From 67cd4c37bd5f46c84a89d7a76283e2621e46904d Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 24 Sep 2014 13:32:58 +0200 Subject: [PATCH] activated the new apk parser which was already ready but not included in the parser initialization. To make the apk parser usable, the handling of application type links had to be modified. Now all documents which have not a parser attached are placed to the noload-queue while all other documents are parsed using the associated parser class. This may have side-Effects on other parsers and the display of different file classes (images, apps, videos). --- source/net/yacy/crawler/CrawlStacker.java | 8 ++------ source/net/yacy/crawler/retrieval/Request.java | 4 ++++ source/net/yacy/document/TextParser.java | 2 ++ source/net/yacy/document/parser/apkParser.java | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index cc415b688..a2bb4ab80 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -325,12 +325,8 @@ public final class CrawlStacker { // check availability of parser and maxfilesize String warning = null; - ContentDomain contentDomain = entry.url().getContentDomainFromExt(); - if (contentDomain == ContentDomain.APP || - (contentDomain == ContentDomain.IMAGE && TextParser.supportsExtension(entry.url()) != null) || - contentDomain == ContentDomain.AUDIO || - contentDomain == ContentDomain.VIDEO || - contentDomain == ContentDomain.CTRL) { + //ContentDomain contentDomain = entry.url().getContentDomainFromExt(); + if (TextParser.supportsExtension(entry.url()) != null) { warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.NOLOAD, entry, profile, this.robots); //if (warning != null && this.log.isFine()) this.log.logFine("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true, false) + " - not pushed: " + warning); return null; diff --git a/source/net/yacy/crawler/retrieval/Request.java b/source/net/yacy/crawler/retrieval/Request.java index 1d027e5bc..6f5c9bd52 100644 --- a/source/net/yacy/crawler/retrieval/Request.java +++ b/source/net/yacy/crawler/retrieval/Request.java @@ -288,4 +288,8 @@ public class Request extends WorkflowJob return this.profileHandle; } + @Override + public String toString() { + return this.url.toNormalform(true); + } } \ No newline at end of file diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java index 9aa0ccb20..eb236c551 100644 --- a/source/net/yacy/document/TextParser.java +++ b/source/net/yacy/document/TextParser.java @@ -36,6 +36,7 @@ import java.util.concurrent.ConcurrentHashMap; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.MultiProtocolURL; +import net.yacy.document.parser.apkParser; import net.yacy.document.parser.audioTagParser; import net.yacy.document.parser.bzipParser; import net.yacy.document.parser.csvParser; @@ -82,6 +83,7 @@ public final class TextParser { private static final Map denyExtensionx = new ConcurrentHashMap(); static { + initParser(new apkParser()); initParser(new bzipParser()); initParser(new csvParser()); initParser(new docParser()); diff --git a/source/net/yacy/document/parser/apkParser.java b/source/net/yacy/document/parser/apkParser.java index 9fe4bfeb4..938bad85c 100644 --- a/source/net/yacy/document/parser/apkParser.java +++ b/source/net/yacy/document/parser/apkParser.java @@ -335,7 +335,7 @@ public class apkParser extends AbstractParser implements Parser { } sb.append('>'); } - System.out.println(sb.toString()); + //System.out.println(sb.toString()); } // evaluate the content