activated the new apk parser which was already ready but not included in

the parser initialization. To make the apk parser usable, the handling
of application type links had to be modified. Now all documents which
have not a parser attached are placed to the noload-queue while all
other documents are parsed using the associated parser class. This may
have side-Effects on other parsers and the display of different file
classes (images, apps, videos).
pull/1/head
Michael Peter Christen 10 years ago
parent 309d978520
commit 67cd4c37bd

@ -325,12 +325,8 @@ public final class CrawlStacker {
// check availability of parser and maxfilesize // check availability of parser and maxfilesize
String warning = null; String warning = null;
ContentDomain contentDomain = entry.url().getContentDomainFromExt(); //ContentDomain contentDomain = entry.url().getContentDomainFromExt();
if (contentDomain == ContentDomain.APP || if (TextParser.supportsExtension(entry.url()) != null) {
(contentDomain == ContentDomain.IMAGE && TextParser.supportsExtension(entry.url()) != null) ||
contentDomain == ContentDomain.AUDIO ||
contentDomain == ContentDomain.VIDEO ||
contentDomain == ContentDomain.CTRL) {
warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.NOLOAD, entry, profile, this.robots); warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.NOLOAD, entry, profile, this.robots);
//if (warning != null && this.log.isFine()) this.log.logFine("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true, false) + " - not pushed: " + warning); //if (warning != null && this.log.isFine()) this.log.logFine("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true, false) + " - not pushed: " + warning);
return null; return null;

@ -288,4 +288,8 @@ public class Request extends WorkflowJob
return this.profileHandle; return this.profileHandle;
} }
@Override
public String toString() {
return this.url.toNormalform(true);
}
} }

@ -36,6 +36,7 @@ import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.document.parser.apkParser;
import net.yacy.document.parser.audioTagParser; import net.yacy.document.parser.audioTagParser;
import net.yacy.document.parser.bzipParser; import net.yacy.document.parser.bzipParser;
import net.yacy.document.parser.csvParser; import net.yacy.document.parser.csvParser;
@ -82,6 +83,7 @@ public final class TextParser {
private static final Map<String, Object> denyExtensionx = new ConcurrentHashMap<String, Object>(); private static final Map<String, Object> denyExtensionx = new ConcurrentHashMap<String, Object>();
static { static {
initParser(new apkParser());
initParser(new bzipParser()); initParser(new bzipParser());
initParser(new csvParser()); initParser(new csvParser());
initParser(new docParser()); initParser(new docParser());

@ -335,7 +335,7 @@ public class apkParser extends AbstractParser implements Parser {
} }
sb.append('>'); sb.append('>');
} }
System.out.println(sb.toString()); //System.out.println(sb.toString());
} }
// evaluate the content // evaluate the content

Loading…
Cancel
Save