removed transformer

it was not used any more
7 years ago · e0dc632020
parent 495ca57f61
commit e0dc632020
10 changed files with 16 additions and 328 deletions
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -806,7 +806,7 @@ public class Crawler_p {
 		List<AnchorURL> hyperlinks_from_file;
 		// check if the crawl filter works correctly
 		final ContentScraper scraper = new ContentScraper(new DigestURL(crawlingFile), 10000000, new HashSet<String>(), new VocabularyScraper(), timezoneOffset);
-		final Writer writer = new TransformerWriter(null, null, scraper, null, false);
+		final Writer writer = new TransformerWriter(null, null, scraper, false);
 		if((crawlingFileContent == null || crawlingFileContent.isEmpty()) && crawlingFile != null) {
 			/* Let's report here detailed error to help user when he selected a wrong file */
 			if(!crawlingFile.exists()) {
--- a/source/net/yacy/crawler/FileCrawlStarterTask.java
+++ b/source/net/yacy/crawler/FileCrawlStarterTask.java
@ -143,7 +143,7 @@ public class FileCrawlStarterTask extends Thread {
 				this.profile, true);
 		this.scraper.registerHtmlFilterEventListener(anchorListener);

-		final Writer writer = new TransformerWriter(null, null, this.scraper, null, false);
+		final Writer writer = new TransformerWriter(null, null, this.scraper, false);
 		FileInputStream inStream = null;

 		try {
--- a/source/net/yacy/data/BookmarkHelper.java
+++ b/source/net/yacy/data/BookmarkHelper.java
@ -137,7 +137,7 @@ public class BookmarkHelper {
            //load the links
            final ContentScraper scraper = new ContentScraper(baseURL, 10000, new HashSet<String>(), new VocabularyScraper(), 0);
            //OutputStream os = new htmlFilterOutputStream(null, scraper, null, false);
-            final Writer writer = new TransformerWriter(null, null, scraper, null, false);
+            final Writer writer = new TransformerWriter(null, null, scraper, false);
            FileUtils.copy(input,writer);
            writer.close();
            links = scraper.getAnchors();
--- a/source/net/yacy/document/parser/html/AbstractTransformer.java
+++ b/source/net/yacy/document/parser/html/AbstractTransformer.java
@ -1,78 +0,0 @@
-// AbstractTransformer.java
-// ----------------------------------
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.document.parser.html;
-
-import java.util.TreeSet;
-
-@Deprecated
-// TODO: delete candidate, because not in use, (noticed 2014-12-02)
-public abstract class AbstractTransformer implements Transformer {
-
-    private TreeSet<String> tags0;
-    private TreeSet<String> tags1;
-
-    public AbstractTransformer(final TreeSet<String> tags0, final TreeSet<String> tags1) {
-        this.tags0  = tags0;
-        this.tags1  = tags1;
-    }
-
-    @Override
-    public boolean isTag0(final String tag) {
-        return this.tags0.contains(tag);
-    }
-
-    @Override
-    public boolean isTag1(final String tag) {
-        return this.tags1.contains(tag);
-    }
-
-    //the 'missing' method that shall be implemented:
-    @Override
-    public abstract char[] transformText(char[] text);
-    /* could be easily implemented as:
-    {
-	return text;
-    }
-    */
-
-    // the other methods must take into account to construct the return value correctly
-    @Override
-    public char[] transformTag0(final ContentScraper.Tag tag, final char quotechar) {
-        return TransformerWriter.genTag0(tag.name, tag.opts, quotechar);
-    }
-
-    @Override
-    public char[] transformTag1(final ContentScraper.Tag tag, final char quotechar) {
-        return TransformerWriter.genTag1(tag.name, tag.opts, tag.content.getChars(), quotechar);
-    }
-
-    @Override
-    public synchronized void close() {
-        // free resources
-        this.tags0 = null;
-        this.tags1 = null;
-    }
-
-}
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -1603,14 +1603,14 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        if (page == null) throw new IOException("no content in file " + file.toString());

        // scrape document to look up charset
-        final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), StandardCharsets.UTF_8.name(), new HashSet<String>(), new VocabularyScraper(), new DigestURL("http://localhost"), null, false, maxLinks, timezoneOffset);
+        final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), StandardCharsets.UTF_8.name(), new HashSet<String>(), new VocabularyScraper(), new DigestURL("http://localhost"), false, maxLinks, timezoneOffset);
        String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
        htmlFilter.close();
        if (charset == null) charset = Charset.defaultCharset().toString();

        // scrape content
        final ContentScraper scraper = new ContentScraper(new DigestURL("http://localhost"), maxLinks, new HashSet<String>(), new VocabularyScraper(), timezoneOffset);
-        final Writer writer = new TransformerWriter(null, null, scraper, null, false);
+        final Writer writer = new TransformerWriter(null, null, scraper, false);
        FileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName(charset));
        writer.close();
        return scraper;
--- a/source/net/yacy/document/parser/html/ContentTransformer.java
+++ b/source/net/yacy/document/parser/html/ContentTransformer.java
@ -1,148 +0,0 @@
-// ContentTransformer.java
-// ---------------------------------
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.document.parser.html;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.TreeSet;
-
-import net.yacy.cora.document.encoding.ASCII;
-import net.yacy.kelondro.io.CharBuffer;
-
-@Deprecated
-// TODO: delete candidate, because not in use, (noticed 2014-12-02)
-public class ContentTransformer extends AbstractTransformer implements Transformer {
-
-    // statics: for initialization of the HTMLFilterAbstractTransformer
-    private static final TreeSet<String> linkTags0 = new TreeSet<String>(ASCII.insensitiveASCIIComparator);
-    private static final TreeSet<String> linkTags1 = new TreeSet<String>(ASCII.insensitiveASCIIComparator);
-
-    static {
-        linkTags0.add("img");
-        linkTags0.add("input");
-
-        linkTags1.add("a");
-    }
-
-    private ArrayList<String> bluelist = null;
-
-    public ContentTransformer() {
-        super(linkTags0, linkTags1);
-    }
-
-    @Override
-    public void init(final String initarg) {
-        if (this.bluelist == null) {
-            // here, the init arg is used to load a list of blue-listed words
-            this.bluelist = new ArrayList<String>();
-            final File f = new File(initarg);
-            if (f.canRead()) {
-                try {
-                    final BufferedReader r = new BufferedReader(new FileReader(f));
-                    String s;
-                    while ((s = r.readLine()) != null) {
-                        if (!s.isEmpty() && s.charAt(0) != '#') this.bluelist.add(s.toLowerCase());
-                    }
-                    r.close();
-                } catch (final IOException e) {
-                }
-                // if (bluelist.isEmpty()) System.out.println("BLUELIST is empty");
-            }
-        }
-    }
-
-    @Override
-    public boolean isIdentityTransformer() {
-        return this.bluelist.isEmpty();
-    }
-
-    private static char[] genBlueLetters(int length) {
-            final CharBuffer bb = new CharBuffer(ContentScraper.MAX_DOCSIZE, " <FONT COLOR=#0000FF>".toCharArray());
-            length = length / 2;
-            if (length > 10) length = 7;
-            while (length-- > 0) {
-                bb.append('X');
-            }
-            bb.append("</FONT> ");
-            final char[] result = bb.getChars();
-            bb.close();
-            return result;
-    }
-
-    private boolean bluelistHit(final char[] text) {
-        if (text == null || this.bluelist == null) return false;
-        final String lc = new String(text).toLowerCase();
-        for (int i = 0; i < this.bluelist.size(); i++) {
-            if (lc.indexOf(this.bluelist.get(i)) >= 0) return true;
-        }
-        return false;
-    }
-
-    @Override
-    public char[] transformText(final char[] text) {
-        if (this.bluelist != null) {
-            if (bluelistHit(text)) {
-                // System.out.println("FILTERHIT: " + text);
-                return genBlueLetters(text.length);
-            }
-            return text;
-        }
-        return text;
-    }
-
-    @Override
-    public char[] transformTag0(final ContentScraper.Tag tag, final char quotechar) {
-        if (tag.name.equals("img")) {
-            // check bluelist
-            if (bluelistHit(tag.opts.getProperty("src", "").toCharArray())) return genBlueLetters(5);
-            if (bluelistHit(tag.opts.getProperty("alt", "").toCharArray())) return genBlueLetters(5);
-
-            // replace image alternative name
-            tag.opts.setProperty("alt", new String(transformText(tag.opts.getProperty("alt", "").toCharArray())));
-        }
-        if (tag.name.equals("input") && (tag.opts.getProperty("type") != null && tag.opts.getProperty("type").equals("submit"))) {
-            // rewrite button name
-            tag.opts.setProperty("value", new String(transformText(tag.opts.getProperty("value", "").toCharArray())));
-        }
-        return TransformerWriter.genTag0(tag.name, tag.opts, quotechar);
-    }
-
-    @Override
-    public char[] transformTag1(final ContentScraper.Tag tag, final char quotechar) {
-        if (bluelistHit(tag.opts.getProperty("href","").toCharArray())) return genBlueLetters(tag.content.length());
-        if (bluelistHit(tag.content.getChars())) return genBlueLetters(tag.content.length());
-        return TransformerWriter.genTag1(tag.name, tag.opts, tag.content.getChars(), quotechar);
-    }
-
-    @Override
-    public synchronized void close() {
-        // free resources
-        super.close();
-    }
-
-}
--- a/source/net/yacy/document/parser/html/ScraperInputStream.java
+++ b/source/net/yacy/document/parser/html/ScraperInputStream.java
@ -65,7 +65,6 @@ public class ScraperInputStream extends InputStream implements ScraperListener {
            final Set<String> ignore_class_name,
            final VocabularyScraper vocabularyScraper,
            final DigestURL rooturl,
-            final Transformer transformer,
            final boolean passbyIfBinarySuspect,
            final int maxLinks,
            final int timezoneOffset
@ -82,7 +81,7 @@ public class ScraperInputStream extends InputStream implements ScraperListener {
 	} catch (final UnsupportedEncodingException e) {
 		this.reader = new InputStreamReader(this, StandardCharsets.UTF_8);
 	}
-        this.writer = new TransformerWriter(null,null,scraper,transformer,passbyIfBinarySuspect);
+        this.writer = new TransformerWriter(null,null,scraper,passbyIfBinarySuspect);
    }

    private static String extractCharsetFromMimetypeHeader(final String mimeType) {
--- a/source/net/yacy/document/parser/html/Transformer.java
+++ b/source/net/yacy/document/parser/html/Transformer.java
@ -1,59 +0,0 @@
-// Transformer.java 
-// ---------------------------
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.document.parser.html;
-
-public interface Transformer {
-
-    // the init method is used to initialize the transformer with some values
-    // i.e. the initarg - String can be the name of a file which may contain
-    // more specific transformation rules
-    public void init(String initarg);
-
-    // ask if this transformer will do any transformation whatsoever
-    // this may return true if the initialization resulted in a status
-    // that does not allow any transformation
-    public boolean isIdentityTransformer();
-    
-    // tests, if a given body-less tag (i.e. <br> shall be supervised)
-    // only tags that are defined here will be cached and not streamed
-    public boolean isTag0(String tag);
-
-    // tests if a given tag that may have a body (i.e. <tt> ..body.. </tt>)
-    // shall be supervised
-    public boolean isTag1(String tag);
-
-    // method that is called with any text between tags
-    // the returned text replaces the given text
-    // if the text shall not be changed, it must be returned as called
-    public char[] transformText(char[] text);
-
-    // method that is called when a body-less tag occurs
-    public char[] transformTag0(ContentScraper.Tag tag, char quotechar);
-
-    // method that is called when a body-containing text occurs
-    public char[] transformTag1(ContentScraper.Tag tag, char quotechar);
-
-    public void close();
-}
--- a/source/net/yacy/document/parser/html/TransformerWriter.java
+++ b/source/net/yacy/document/parser/html/TransformerWriter.java
@ -59,7 +59,6 @@ public final class TransformerWriter extends Writer {
    private CharBuffer buffer;
    private Stack<ContentScraper.Tag> tagStack;
    private final Scraper scraper;
-    private final Transformer transformer;
    private boolean inSingleQuote;
    private boolean inDoubleQuote;
    private boolean inComment;
@ -70,23 +69,20 @@ public final class TransformerWriter extends Writer {
            final OutputStream outStream,
            final Charset charSet,
            final Scraper scraper,
-            final Transformer transformer,
            final boolean passbyIfBinarySuspect
    ) {
-    	this(outStream, charSet, scraper, transformer, passbyIfBinarySuspect, 64);
+    	this(outStream, charSet, scraper, passbyIfBinarySuspect, 64);
    }

    public TransformerWriter(
            final OutputStream outStream,
            final Charset charSet,
            final Scraper scraper,
-            final Transformer transformer,
            final boolean passbyIfBinarySuspect,
            final int initialBufferSize
    ) {
        this.outStream     = outStream;
        this.scraper       = scraper;
-        this.transformer   = transformer;
        this.buffer        = new CharBuffer(ContentScraper.MAX_DOCSIZE, initialBufferSize);
        this.tagStack      = new Stack<ContentScraper.Tag>();
        this.inSingleQuote = false;
@ -235,9 +231,6 @@ public final class TransformerWriter extends Writer {
            if (this.scraper != null && content.length > 0) {
            	this.scraper.scrapeText(content, null);
            }
-            if (this.transformer != null) {
-            	return this.transformer.transformText(content);
-            }
            return content;
        }

@ -246,11 +239,7 @@ public final class TransformerWriter extends Writer {
        if (this.scraper != null) {
            this.scraper.scrapeText(content, this.tagStack.lastElement());
        }
-        if (this.transformer != null) {
-            this.tagStack.lastElement().content.append(this.transformer.transformText(content));
-        } else {
-            this.tagStack.lastElement().content.append(content);
-        }
+        this.tagStack.lastElement().content.append(content);
        return new char[0];
    }
            
@ -318,32 +307,21 @@ public final class TransformerWriter extends Writer {
            // this single tag is collected at once here
            this.scraper.scrapeTag0(tag);
        }
-        if (this.transformer != null && this.transformer.isTag0(tagname)) {
-            // this single tag is collected at once here
-            char[] b = this.transformer.transformTag0(tag, quotechar);
-            return b;
-        } else if ((this.scraper != null && this.scraper.isTag1(tagname)) ||
-                   (this.transformer != null && this.transformer.isTag1(tagname))) {
+        if (this.scraper != null && this.scraper.isTag1(tagname)) {
            // ok, start collecting; we don't push this here to the scraper or transformer; we do that when the tag is closed.
            this.tagStack.push(tag);
            return new char[0];
-        } else {
-             // we ignore that thing and return it again
-             return genTag0raw(tagname, true, content);
        }
+        // we ignore that thing and return it again
+        return genTag0raw(tagname, true, content);
    }

    private char[] filterTagCloseing(final char quotechar) {
        char[] ret;
        ContentScraper.Tag tag = this.tagStack.lastElement();
        if (this.scraper != null) this.scraper.scrapeTag1(tag);
-        if (this.transformer != null) {
-            ret = this.transformer.transformTag1(tag, quotechar);
-        } else {
-            ret = genTag1(tag.name, tag.opts, tag.content.getChars(), quotechar);
-        }
-        if ((this.scraper != null && this.scraper.isTag1(tag.name)) ||
-            (this.transformer != null && this.transformer.isTag1(tag.name))) {
+        ret = genTag1(tag.name, tag.opts, tag.content.getChars(), quotechar);
+        if (this.scraper != null && this.scraper.isTag1(tag.name)) {
            // remove the tag from the stack as soon as the tag is processed
            this.tagStack.pop();
            // at this point the characters from the recently processed tag must be attached to the previous tag
@ -360,11 +338,7 @@ public final class TransformerWriter extends Writer {
        // it's our closing tag! return complete result.
        char[] ret;
        if (this.scraper != null) this.scraper.scrapeTag1(this.tagStack.lastElement());
-        if (this.transformer != null) {
-            ret = this.transformer.transformTag1(this.tagStack.lastElement(), quotechar);
-        } else {
-            ret = genTag1(this.tagStack.lastElement().name, this.tagStack.lastElement().opts, this.tagStack.lastElement().content.getChars(), quotechar);
-        }
+        ret = genTag1(this.tagStack.lastElement().name, this.tagStack.lastElement().opts, this.tagStack.lastElement().content.getChars(), quotechar);
        this.tagStack.pop();
        return ret;
    }
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@ -276,7 +276,7 @@ public class htmlParser extends AbstractParser implements Parser {
        if (charset == null) {
            ScraperInputStream htmlFilter = null;
            try {
-                htmlFilter = new ScraperInputStream(sourceStream, documentCharset, ignore_class_name, vocabularyScraper, location, null, false, maxLinks, timezoneOffset);
+                htmlFilter = new ScraperInputStream(sourceStream, documentCharset, ignore_class_name, vocabularyScraper, location, false, maxLinks, timezoneOffset);
                sourceStream = htmlFilter;
                charset = htmlFilter.detectCharset();
            } catch (final IOException e1) {
@ -312,7 +312,7 @@ public class htmlParser extends AbstractParser implements Parser {
        // parsing the content
        // for this static method no need to init local this.scraperObject here
        final ContentScraper scraper = new ContentScraper(location, maxAnchors, maxLinks, ignore_class_name, vocabularyScraper, timezoneOffset);
-        final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false, Math.max(64, Math.min(4096, sourceStream.available())));
+        final TransformerWriter writer = new TransformerWriter(null, null, scraper, false, Math.max(64, Math.min(4096, sourceStream.available())));
        try {
        	final long maxChars = (long)(maxBytes * detectedcharsetcontainer[0].newDecoder().averageCharsPerByte());
        	final Reader sourceReader = new InputStreamReader(sourceStream, detectedcharsetcontainer[0]);