From 3005be734931bed06e8a4667e18bc43fa0d34c88 Mon Sep 17 00:00:00 2001 From: reger Date: Sat, 15 Jul 2017 00:19:23 +0200 Subject: [PATCH] Clean up unmaintained and unused AugmentParser trail. --- .../parser/augment/AugmentParser.java | 92 ------------------- 1 file changed, 92 deletions(-) delete mode 100644 source/net/yacy/document/parser/augment/AugmentParser.java diff --git a/source/net/yacy/document/parser/augment/AugmentParser.java b/source/net/yacy/document/parser/augment/AugmentParser.java deleted file mode 100644 index dc95e4a24..000000000 --- a/source/net/yacy/document/parser/augment/AugmentParser.java +++ /dev/null @@ -1,92 +0,0 @@ -package net.yacy.document.parser.augment; - -import java.io.IOException; -import java.io.InputStream; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; - -import net.yacy.cora.document.id.DigestURL; -import net.yacy.cora.util.ConcurrentLog; -import net.yacy.data.ymark.YMarkUtil; -import net.yacy.document.AbstractParser; -import net.yacy.document.Document; -import net.yacy.document.Parser; -import net.yacy.document.VocabularyScraper; -import net.yacy.document.parser.rdfa.impl.RDFaParser; -import net.yacy.kelondro.blob.Tables; -import net.yacy.search.Switchboard; - - -public class AugmentParser extends AbstractParser implements Parser { - - RDFaParser rdfaParser; - - public AugmentParser() { - super("AugmentParser"); - this.rdfaParser = new RDFaParser(); - - ConcurrentLog.info("AugmentedParser", "augmented parser was initialized"); - - this.SUPPORTED_EXTENSIONS.add("html"); - this.SUPPORTED_EXTENSIONS.add("htm"); - this.SUPPORTED_EXTENSIONS.add("xhtml"); - this.SUPPORTED_EXTENSIONS.add("php"); - this.SUPPORTED_MIME_TYPES.add("text/html"); - this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml"); - } - - @Override - public Document[] parse( - final DigestURL location, - final String mimeType, - final String charset, - final VocabularyScraper scraper, - final int timezoneOffset, - final InputStream source) throws Parser.Failure, InterruptedException { - - Document[] htmlDocs = this.rdfaParser.parse(location, mimeType, charset, scraper, timezoneOffset, source); - - for (final Document doc : htmlDocs) { - /* analyze(doc, url, mimeType, charset); // enrich document text */ - parseAndAugment(doc, location, mimeType, charset); // enrich document with additional tags - } - return htmlDocs; - } - -/* TODO: not implemented yet - * - private void analyze(Document origDoc, DigestURI url, - String mimeType, String charset) { - // if the magic word appears in the document, perform extra actions. - if (origDoc.getKeywords().contains("magicword")) { - String all = ""; - all = "yacylatest"; - // TODO: append content of string all to origDoc.text, maybe use Document.mergeDocuments() to do so - } - } -*/ - private void parseAndAugment(Document origDoc, DigestURL url, @SuppressWarnings("unused") String mimeType, @SuppressWarnings("unused") String charset) { - - Iterator it; - try { - it = Switchboard.getSwitchboard().tables.iterator("aggregatedtags"); - it = Tables.orderBy(it, -1, "timestamp_creation").iterator(); - while (it.hasNext()) { - net.yacy.kelondro.blob.Tables.Row r = it.next(); - if (r.get("url", "").equals(url.toNormalform(false))) { - Set tags = new HashSet(); - for (String s : YMarkUtil.keysStringToSet(r.get("scitag", ""))) { - tags.add(s); - } - origDoc.addTags(tags); - } - } - - } catch (final IOException e) { - ConcurrentLog.logException(e); - } - } - - -}