From e0dc63202034266fc8442e9a52814155c7589d32 Mon Sep 17 00:00:00 2001
From: Michael Christen <mc@yacy.net>
Date: Tue, 19 Jun 2018 00:42:23 +0200
Subject: [PATCH] removed transformer it was not used any more

---
 htroot/Crawler_p.java                         |   2 +-
 .../yacy/crawler/FileCrawlStarterTask.java    |   2 +-
 source/net/yacy/data/BookmarkHelper.java      |   2 +-
 .../parser/html/AbstractTransformer.java      |  78 ---------
 .../document/parser/html/ContentScraper.java  |   4 +-
 .../parser/html/ContentTransformer.java       | 148 ------------------
 .../parser/html/ScraperInputStream.java       |   3 +-
 .../document/parser/html/Transformer.java     |  59 -------
 .../parser/html/TransformerWriter.java        |  42 +----
 .../net/yacy/document/parser/htmlParser.java  |   4 +-
 10 files changed, 16 insertions(+), 328 deletions(-)
 delete mode 100644 source/net/yacy/document/parser/html/AbstractTransformer.java
 delete mode 100644 source/net/yacy/document/parser/html/ContentTransformer.java
 delete mode 100644 source/net/yacy/document/parser/html/Transformer.java
diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java
index 197f0b0d1..3a2c4f377 100644
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@@ -806,7 +806,7 @@ public class Crawler_p {
 		List<AnchorURL> hyperlinks_from_file;
 		// check if the crawl filter works correctly
 		final ContentScraper scraper = new ContentScraper(new DigestURL(crawlingFile), 10000000, new HashSet<String>(), new VocabularyScraper(), timezoneOffset);
-		final Writer writer = new TransformerWriter(null, null, scraper, null, false);
+		final Writer writer = new TransformerWriter(null, null, scraper, false);
 		if((crawlingFileContent == null || crawlingFileContent.isEmpty()) && crawlingFile != null) {
 			/* Let's report here detailed error to help user when he selected a wrong file */
 			if(!crawlingFile.exists()) {
diff --git a/source/net/yacy/crawler/FileCrawlStarterTask.java b/source/net/yacy/crawler/FileCrawlStarterTask.java
index 3eabdd81f..2caec9740 100644
--- a/source/net/yacy/crawler/FileCrawlStarterTask.java
+++ b/source/net/yacy/crawler/FileCrawlStarterTask.java
@@ -143,7 +143,7 @@ public class FileCrawlStarterTask extends Thread {
 				this.profile, true);
 		this.scraper.registerHtmlFilterEventListener(anchorListener);
 
-		final Writer writer = new TransformerWriter(null, null, this.scraper, null, false);
+		final Writer writer = new TransformerWriter(null, null, this.scraper, false);
 		FileInputStream inStream = null;
 
 		try {
diff --git a/source/net/yacy/data/BookmarkHelper.java b/source/net/yacy/data/BookmarkHelper.java
index 120d03766..a3836de04 100644
--- a/source/net/yacy/data/BookmarkHelper.java
+++ b/source/net/yacy/data/BookmarkHelper.java
@@ -137,7 +137,7 @@ public class BookmarkHelper {
             //load the links
             final ContentScraper scraper = new ContentScraper(baseURL, 10000, new HashSet<String>(), new VocabularyScraper(), 0);
             //OutputStream os = new htmlFilterOutputStream(null, scraper, null, false);
-            final Writer writer = new TransformerWriter(null, null, scraper, null, false);
+            final Writer writer = new TransformerWriter(null, null, scraper, false);
             FileUtils.copy(input,writer);
             writer.close();
             links = scraper.getAnchors();
diff --git a/source/net/yacy/document/parser/html/AbstractTransformer.java b/source/net/yacy/document/parser/html/AbstractTransformer.java
deleted file mode 100644
index e1850c78d..000000000
--- a/source/net/yacy/document/parser/html/AbstractTransformer.java
+++ /dev/null
@@ -1,78 +0,0 @@
-// AbstractTransformer.java
-// ----------------------------------
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.document.parser.html;
-
-import java.util.TreeSet;
-
-@Deprecated
-// TODO: delete candidate, because not in use, (noticed 2014-12-02)
-public abstract class AbstractTransformer implements Transformer {
-
-    private TreeSet<String> tags0;
-    private TreeSet<String> tags1;
-
-    public AbstractTransformer(final TreeSet<String> tags0, final TreeSet<String> tags1) {
-        this.tags0  = tags0;
-        this.tags1  = tags1;
-    }
-
-    @Override
-    public boolean isTag0(final String tag) {
-        return this.tags0.contains(tag);
-    }
-
-    @Override
-    public boolean isTag1(final String tag) {
-        return this.tags1.contains(tag);
-    }
-
-    //the 'missing' method that shall be implemented:
-    @Override
-    public abstract char[] transformText(char[] text);
-    /* could be easily implemented as:
-    {
-	return text;
-    }
-    */
-
-    // the other methods must take into account to construct the return value correctly
-    @Override
-    public char[] transformTag0(final ContentScraper.Tag tag, final char quotechar) {
-        return TransformerWriter.genTag0(tag.name, tag.opts, quotechar);
-    }
-
-    @Override
-    public char[] transformTag1(final ContentScraper.Tag tag, final char quotechar) {
-        return TransformerWriter.genTag1(tag.name, tag.opts, tag.content.getChars(), quotechar);
-    }
-
-    @Override
-    public synchronized void close() {
-        // free resources
-        this.tags0 = null;
-        this.tags1 = null;
-    }
-
-}
diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index 36fb1e34c..9455c988d 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -1603,14 +1603,14 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         if (page == null) throw new IOException("no content in file " + file.toString());
 
         // scrape document to look up charset
-        final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), StandardCharsets.UTF_8.name(), new HashSet<String>(), new VocabularyScraper(), new DigestURL("http://localhost"), null, false, maxLinks, timezoneOffset);
+        final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), StandardCharsets.UTF_8.name(), new HashSet<String>(), new VocabularyScraper(), new DigestURL("http://localhost"), false, maxLinks, timezoneOffset);
         String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
         htmlFilter.close();
         if (charset == null) charset = Charset.defaultCharset().toString();
 
         // scrape content
         final ContentScraper scraper = new ContentScraper(new DigestURL("http://localhost"), maxLinks, new HashSet<String>(), new VocabularyScraper(), timezoneOffset);
-        final Writer writer = new TransformerWriter(null, null, scraper, null, false);
+        final Writer writer = new TransformerWriter(null, null, scraper, false);
         FileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName(charset));
         writer.close();
         return scraper;
diff --git a/source/net/yacy/document/parser/html/ContentTransformer.java b/source/net/yacy/document/parser/html/ContentTransformer.java
deleted file mode 100644
index d20be732e..000000000
--- a/source/net/yacy/document/parser/html/ContentTransformer.java
+++ /dev/null
@@ -1,148 +0,0 @@
-// ContentTransformer.java
-// ---------------------------------
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.document.parser.html;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.TreeSet;
-
-import net.yacy.cora.document.encoding.ASCII;
-import net.yacy.kelondro.io.CharBuffer;
-
-@Deprecated
-// TODO: delete candidate, because not in use, (noticed 2014-12-02)
-public class ContentTransformer extends AbstractTransformer implements Transformer {
-
-    // statics: for initialization of the HTMLFilterAbstractTransformer
-    private static final TreeSet<String> linkTags0 = new TreeSet<String>(ASCII.insensitiveASCIIComparator);
-    private static final TreeSet<String> linkTags1 = new TreeSet<String>(ASCII.insensitiveASCIIComparator);
-
-    static {
-        linkTags0.add("img");
-        linkTags0.add("input");
-
-        linkTags1.add("a");
-    }
-
-    private ArrayList<String> bluelist = null;
-
-    public ContentTransformer() {
-        super(linkTags0, linkTags1);
-    }
-
-    @Override
-    public void init(final String initarg) {
-        if (this.bluelist == null) {
-            // here, the init arg is used to load a list of blue-listed words
-            this.bluelist = new ArrayList<String>();
-            final File f = new File(initarg);
-            if (f.canRead()) {
-                try {
-                    final BufferedReader r = new BufferedReader(new FileReader(f));
-                    String s;
-                    while ((s = r.readLine()) != null) {
-                        if (!s.isEmpty() && s.charAt(0) != '#') this.bluelist.add(s.toLowerCase());
-                    }
-                    r.close();
-                } catch (final IOException e) {
-                }
-                // if (bluelist.isEmpty()) System.out.println("BLUELIST is empty");
-            }
-        }
-    }
-
-    @Override
-    public boolean isIdentityTransformer() {
-        return this.bluelist.isEmpty();
-    }
-
-    private static char[] genBlueLetters(int length) {
-            final CharBuffer bb = new CharBuffer(ContentScraper.MAX_DOCSIZE, " <FONT COLOR=#0000FF>".toCharArray());
-            length = length / 2;
-            if (length > 10) length = 7;
-            while (length-- > 0) {
-                bb.append('X');
-            }
-            bb.append("</FONT> ");
-            final char[] result = bb.getChars();
-            bb.close();
-            return result;
-    }
-
-    private boolean bluelistHit(final char[] text) {
-        if (text == null || this.bluelist == null) return false;
-        final String lc = new String(text).toLowerCase();
-        for (int i = 0; i < this.bluelist.size(); i++) {
-            if (lc.indexOf(this.bluelist.get(i)) >= 0) return true;
-        }
-        return false;
-    }
-
-    @Override
-    public char[] transformText(final char[] text) {
-        if (this.bluelist != null) {
-            if (bluelistHit(text)) {
-                // System.out.println("FILTERHIT: " + text);
-                return genBlueLetters(text.length);
-            }
-            return text;
-        }
-        return text;
-    }
-
-    @Override
-    public char[] transformTag0(final ContentScraper.Tag tag, final char quotechar) {
-        if (tag.name.equals("img")) {
-            // check bluelist
-            if (bluelistHit(tag.opts.getProperty("src", "").toCharArray())) return genBlueLetters(5);
-            if (bluelistHit(tag.opts.getProperty("alt", "").toCharArray())) return genBlueLetters(5);
-
-            // replace image alternative name
-            tag.opts.setProperty("alt", new String(transformText(tag.opts.getProperty("alt", "").toCharArray())));
-        }
-        if (tag.name.equals("input") && (tag.opts.getProperty("type") != null && tag.opts.getProperty("type").equals("submit"))) {
-            // rewrite button name
-            tag.opts.setProperty("value", new String(transformText(tag.opts.getProperty("value", "").toCharArray())));
-        }
-        return TransformerWriter.genTag0(tag.name, tag.opts, quotechar);
-    }
-
-    @Override
-    public char[] transformTag1(final ContentScraper.Tag tag, final char quotechar) {
-        if (bluelistHit(tag.opts.getProperty("href","").toCharArray())) return genBlueLetters(tag.content.length());
-        if (bluelistHit(tag.content.getChars())) return genBlueLetters(tag.content.length());
-        return TransformerWriter.genTag1(tag.name, tag.opts, tag.content.getChars(), quotechar);
-    }
-
-    @Override
-    public synchronized void close() {
-        // free resources
-        super.close();
-    }
-
-}
diff --git a/source/net/yacy/document/parser/html/ScraperInputStream.java b/source/net/yacy/document/parser/html/ScraperInputStream.java
index 68a2602e6..776891eca 100644
--- a/source/net/yacy/document/parser/html/ScraperInputStream.java
+++ b/source/net/yacy/document/parser/html/ScraperInputStream.java
@@ -65,7 +65,6 @@ public class ScraperInputStream extends InputStream implements ScraperListener {
             final Set<String> ignore_class_name,
             final VocabularyScraper vocabularyScraper,
             final DigestURL rooturl,
-            final Transformer transformer,
             final boolean passbyIfBinarySuspect,
             final int maxLinks,
             final int timezoneOffset
@@ -82,7 +81,7 @@ public class ScraperInputStream extends InputStream implements ScraperListener {
 	} catch (final UnsupportedEncodingException e) {
 		this.reader = new InputStreamReader(this, StandardCharsets.UTF_8);
 	}
-        this.writer = new TransformerWriter(null,null,scraper,transformer,passbyIfBinarySuspect);
+        this.writer = new TransformerWriter(null,null,scraper,passbyIfBinarySuspect);
     }
 
     private static String extractCharsetFromMimetypeHeader(final String mimeType) {
diff --git a/source/net/yacy/document/parser/html/Transformer.java b/source/net/yacy/document/parser/html/Transformer.java
deleted file mode 100644
index 9b605340e..000000000
--- a/source/net/yacy/document/parser/html/Transformer.java
+++ /dev/null
@@ -1,59 +0,0 @@
-// Transformer.java 
-// ---------------------------
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.document.parser.html;
-
-public interface Transformer {
-
-    // the init method is used to initialize the transformer with some values
-    // i.e. the initarg - String can be the name of a file which may contain
-    // more specific transformation rules
-    public void init(String initarg);
-
-    // ask if this transformer will do any transformation whatsoever
-    // this may return true if the initialization resulted in a status
-    // that does not allow any transformation
-    public boolean isIdentityTransformer();
-    
-    // tests, if a given body-less tag (i.e. <br> shall be supervised)
-    // only tags that are defined here will be cached and not streamed
-    public boolean isTag0(String tag);
-
-    // tests if a given tag that may have a body (i.e. <tt> ..body.. </tt>)
-    // shall be supervised
-    public boolean isTag1(String tag);
-
-    // method that is called with any text between tags
-    // the returned text replaces the given text
-    // if the text shall not be changed, it must be returned as called
-    public char[] transformText(char[] text);
-
-    // method that is called when a body-less tag occurs
-    public char[] transformTag0(ContentScraper.Tag tag, char quotechar);
-
-    // method that is called when a body-containing text occurs
-    public char[] transformTag1(ContentScraper.Tag tag, char quotechar);
-
-    public void close();
-}
diff --git a/source/net/yacy/document/parser/html/TransformerWriter.java b/source/net/yacy/document/parser/html/TransformerWriter.java
index 1bf300e5e..a6d6a9189 100644
--- a/source/net/yacy/document/parser/html/TransformerWriter.java
+++ b/source/net/yacy/document/parser/html/TransformerWriter.java
@@ -59,7 +59,6 @@ public final class TransformerWriter extends Writer {
     private CharBuffer buffer;
     private Stack<ContentScraper.Tag> tagStack;
     private final Scraper scraper;
-    private final Transformer transformer;
     private boolean inSingleQuote;
     private boolean inDoubleQuote;
     private boolean inComment;
@@ -70,23 +69,20 @@ public final class TransformerWriter extends Writer {
             final OutputStream outStream,
             final Charset charSet,
             final Scraper scraper,
-            final Transformer transformer,
             final boolean passbyIfBinarySuspect
     ) {
-    	this(outStream, charSet, scraper, transformer, passbyIfBinarySuspect, 64);
+    	this(outStream, charSet, scraper, passbyIfBinarySuspect, 64);
     }
 
     public TransformerWriter(
             final OutputStream outStream,
             final Charset charSet,
             final Scraper scraper,
-            final Transformer transformer,
             final boolean passbyIfBinarySuspect,
             final int initialBufferSize
     ) {
         this.outStream     = outStream;
         this.scraper       = scraper;
-        this.transformer   = transformer;
         this.buffer        = new CharBuffer(ContentScraper.MAX_DOCSIZE, initialBufferSize);
         this.tagStack      = new Stack<ContentScraper.Tag>();
         this.inSingleQuote = false;
@@ -235,9 +231,6 @@ public final class TransformerWriter extends Writer {
             if (this.scraper != null && content.length > 0) {
             	this.scraper.scrapeText(content, null);
             }
-            if (this.transformer != null) {
-            	return this.transformer.transformText(content);
-            }
             return content;
         }
 
@@ -246,11 +239,7 @@ public final class TransformerWriter extends Writer {
         if (this.scraper != null) {
             this.scraper.scrapeText(content, this.tagStack.lastElement());
         }
-        if (this.transformer != null) {
-            this.tagStack.lastElement().content.append(this.transformer.transformText(content));
-        } else {
-            this.tagStack.lastElement().content.append(content);
-        }
+        this.tagStack.lastElement().content.append(content);
         return new char[0];
     }
             
@@ -318,32 +307,21 @@ public final class TransformerWriter extends Writer {
             // this single tag is collected at once here
             this.scraper.scrapeTag0(tag);
         }
-        if (this.transformer != null && this.transformer.isTag0(tagname)) {
-            // this single tag is collected at once here
-            char[] b = this.transformer.transformTag0(tag, quotechar);
-            return b;
-        } else if ((this.scraper != null && this.scraper.isTag1(tagname)) ||
-                   (this.transformer != null && this.transformer.isTag1(tagname))) {
+        if (this.scraper != null && this.scraper.isTag1(tagname)) {
             // ok, start collecting; we don't push this here to the scraper or transformer; we do that when the tag is closed.
             this.tagStack.push(tag);
             return new char[0];
-        } else {
-             // we ignore that thing and return it again
-             return genTag0raw(tagname, true, content);
         }
+        // we ignore that thing and return it again
+        return genTag0raw(tagname, true, content);
     }
 
     private char[] filterTagCloseing(final char quotechar) {
         char[] ret;
         ContentScraper.Tag tag = this.tagStack.lastElement();
         if (this.scraper != null) this.scraper.scrapeTag1(tag);
-        if (this.transformer != null) {
-            ret = this.transformer.transformTag1(tag, quotechar);
-        } else {
-            ret = genTag1(tag.name, tag.opts, tag.content.getChars(), quotechar);
-        }
-        if ((this.scraper != null && this.scraper.isTag1(tag.name)) ||
-            (this.transformer != null && this.transformer.isTag1(tag.name))) {
+        ret = genTag1(tag.name, tag.opts, tag.content.getChars(), quotechar);
+        if (this.scraper != null && this.scraper.isTag1(tag.name)) {
             // remove the tag from the stack as soon as the tag is processed
             this.tagStack.pop();
             // at this point the characters from the recently processed tag must be attached to the previous tag
@@ -360,11 +338,7 @@ public final class TransformerWriter extends Writer {
         // it's our closing tag! return complete result.
         char[] ret;
         if (this.scraper != null) this.scraper.scrapeTag1(this.tagStack.lastElement());
-        if (this.transformer != null) {
-            ret = this.transformer.transformTag1(this.tagStack.lastElement(), quotechar);
-        } else {
-            ret = genTag1(this.tagStack.lastElement().name, this.tagStack.lastElement().opts, this.tagStack.lastElement().content.getChars(), quotechar);
-        }
+        ret = genTag1(this.tagStack.lastElement().name, this.tagStack.lastElement().opts, this.tagStack.lastElement().content.getChars(), quotechar);
         this.tagStack.pop();
         return ret;
     }
diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java
index 0b0ee1296..9b0f5c3de 100644
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@@ -276,7 +276,7 @@ public class htmlParser extends AbstractParser implements Parser {
         if (charset == null) {
             ScraperInputStream htmlFilter = null;
             try {
-                htmlFilter = new ScraperInputStream(sourceStream, documentCharset, ignore_class_name, vocabularyScraper, location, null, false, maxLinks, timezoneOffset);
+                htmlFilter = new ScraperInputStream(sourceStream, documentCharset, ignore_class_name, vocabularyScraper, location, false, maxLinks, timezoneOffset);
                 sourceStream = htmlFilter;
                 charset = htmlFilter.detectCharset();
             } catch (final IOException e1) {
@@ -312,7 +312,7 @@ public class htmlParser extends AbstractParser implements Parser {
         // parsing the content
         // for this static method no need to init local this.scraperObject here
         final ContentScraper scraper = new ContentScraper(location, maxAnchors, maxLinks, ignore_class_name, vocabularyScraper, timezoneOffset);
-        final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false, Math.max(64, Math.min(4096, sourceStream.available())));
+        final TransformerWriter writer = new TransformerWriter(null, null, scraper, false, Math.max(64, Math.min(4096, sourceStream.available())));
         try {
         	final long maxChars = (long)(maxBytes * detectedcharsetcontainer[0].newDecoder().averageCharsPerByte());
         	final Reader sourceReader = new InputStreamReader(sourceStream, detectedcharsetcontainer[0]);