diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index b6c2c41dd..ea1b4c84a 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -143,7 +143,7 @@ public class yacysearchitem { if (authenticated) { // only needed if authorized boolean bookmarkexists; // check url exists in bookkmarks - bookmarkexists = sb.bookmarksDB.getBookmark(urlhash) != null; + bookmarkexists = sb.bookmarksDB.getBookmark(urlhash) != null; prop.put("content_authorized_bookmark", !bookmarkexists); // bookmark icon check for YMarks //prop.put("content_authorized_bookmark", sb.tables.bookmarks.hasBookmark("admin", urlhash) ? "0" : "1"); @@ -188,7 +188,6 @@ public class yacysearchitem { prop.putXML("content_link", resultUrlstring); // putXML for rss } -// prop.putHTML("content_value", Interaction.TripleGet(result.urlstring(), "http://virtual.x/hasvalue", "anonymous")); // END interaction boolean isAtomFeed = header.get(HeaderFramework.CONNECTION_PROP_EXT, "").equals("atom"); @@ -303,7 +302,7 @@ public class yacysearchitem { boolean stealthmode = p2pmode && theSearch.query.isLocal(); if ((sb.getConfigBool(SwitchboardConstants.HEURISTIC_SEARCHRESULTS, false) || (sb.getConfigBool(SwitchboardConstants.GREEDYLEARNING_ACTIVE, false) && sb.getConfigBool(SwitchboardConstants.GREEDYLEARNING_ENABLED, false) && Memory.load() < 1.0)) && - !stealthmode) sb.heuristicSearchResults(resultUrlstring); + !stealthmode) sb.heuristicSearchResults(result); theSearch.query.transmitcount = item + 1; return prop; } diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java index cc6f57265..acbcf0641 100644 --- a/source/net/yacy/crawler/retrieval/Response.java +++ b/source/net/yacy/crawler/retrieval/Response.java @@ -859,7 +859,7 @@ public class Response { final String supportError = TextParser.supports(url(), this.responseHeader == null ? null : this.responseHeader.getContentType()); if (supportError != null) throw new Parser.Failure("no parser support:" + supportError, url()); try { - return TextParser.parseSource(new AnchorURL(url()), this.responseHeader == null ? null : this.responseHeader.getContentType(), this.responseHeader == null ? StandardCharsets.UTF_8.name() : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content); + return TextParser.parseSource(url(), this.responseHeader == null ? null : this.responseHeader.getContentType(), this.responseHeader == null ? StandardCharsets.UTF_8.name() : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content); } catch (final Exception e) { return null; } diff --git a/source/net/yacy/document/Parser.java b/source/net/yacy/document/Parser.java index b9139340a..0b278ab69 100644 --- a/source/net/yacy/document/Parser.java +++ b/source/net/yacy/document/Parser.java @@ -26,7 +26,7 @@ package net.yacy.document; import java.io.InputStream; import java.util.Set; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; public interface Parser { @@ -55,7 +55,7 @@ public interface Parser { * @throws InterruptedException */ public Document[] parse( - AnchorURL url, + DigestURL url, String mimeType, String charset, VocabularyScraper scraper, diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java index 66f679f69..d0b3d286a 100644 --- a/source/net/yacy/document/TextParser.java +++ b/source/net/yacy/document/TextParser.java @@ -34,7 +34,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import net.yacy.cora.document.encoding.UTF8; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.util.CommonPattern; import net.yacy.document.parser.apkParser; @@ -161,7 +161,7 @@ public final class TextParser { } public static Document[] parseSource( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -194,7 +194,7 @@ public final class TextParser { } public static Document[] parseSource( - final AnchorURL location, + final DigestURL location, String mimeType, final String charset, final VocabularyScraper scraper, @@ -220,7 +220,7 @@ public final class TextParser { } public static Document[] parseSource( - final AnchorURL location, + final DigestURL location, String mimeType, final String charset, final VocabularyScraper scraper, @@ -262,7 +262,7 @@ public final class TextParser { } private static Document[] parseSource( - final AnchorURL location, + final DigestURL location, final String mimeType, final Parser parser, final String charset, @@ -285,7 +285,7 @@ public final class TextParser { } private static Document[] parseSource( - final AnchorURL location, + final DigestURL location, final String mimeType, final Set parsers, final String charset, diff --git a/source/net/yacy/document/parser/apkParser.java b/source/net/yacy/document/parser/apkParser.java index af4d1f4e0..8dd5b0daf 100644 --- a/source/net/yacy/document/parser/apkParser.java +++ b/source/net/yacy/document/parser/apkParser.java @@ -40,6 +40,7 @@ import java.util.jar.JarEntry; import java.util.jar.JarFile; import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -56,7 +57,7 @@ public class apkParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -91,7 +92,7 @@ public class apkParser extends AbstractParser implements Parser { return docs; } - public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final JarFile jf) { + public Document[] parse(final DigestURL location, final String mimeType, final String charset, final JarFile jf) { StringBuilder sb = new StringBuilder(); String title = location.getFileName(); AndroidManifestParser manifest = null; @@ -142,11 +143,11 @@ public class apkParser extends AbstractParser implements Parser { null, null, singleList(title), - "", + null, manifest == null ? "" : manifest.packageName, null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, sb.toString(), links, null, diff --git a/source/net/yacy/document/parser/audioTagParser.java b/source/net/yacy/document/parser/audioTagParser.java index 747a0ebf5..3bc5c4f68 100644 --- a/source/net/yacy/document/parser/audioTagParser.java +++ b/source/net/yacy/document/parser/audioTagParser.java @@ -35,7 +35,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; @@ -71,7 +71,7 @@ public class audioTagParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -172,7 +172,7 @@ public class audioTagParser extends AbstractParser implements Parser { location.getHost(), // publisher null, // sections descriptions, // abstrct - 0.0f, 0.0f, // lon, lat + 0.0d, 0.0d, // lon, lat text.toString(), // text null, null, @@ -191,11 +191,11 @@ public class audioTagParser extends AbstractParser implements Parser { null, null, singleList(filename), // title - "", // author + null, // author location.getHost(), null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, location.toTokens(), null, null, diff --git a/source/net/yacy/document/parser/augment/AugmentParser.java b/source/net/yacy/document/parser/augment/AugmentParser.java index aa4dcf3df..dc95e4a24 100644 --- a/source/net/yacy/document/parser/augment/AugmentParser.java +++ b/source/net/yacy/document/parser/augment/AugmentParser.java @@ -6,7 +6,6 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.data.ymark.YMarkUtil; @@ -39,7 +38,7 @@ public class AugmentParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/bzipParser.java b/source/net/yacy/document/parser/bzipParser.java index 681a97b95..922eba542 100644 --- a/source/net/yacy/document/parser/bzipParser.java +++ b/source/net/yacy/document/parser/bzipParser.java @@ -32,7 +32,6 @@ import java.io.FileOutputStream; import java.io.InputStream; import java.util.Date; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; @@ -65,7 +64,7 @@ public class bzipParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/csvParser.java b/source/net/yacy/document/parser/csvParser.java index 25bba2fff..cfee1758f 100644 --- a/source/net/yacy/document/parser/csvParser.java +++ b/source/net/yacy/document/parser/csvParser.java @@ -33,7 +33,7 @@ import java.util.ArrayList; import java.util.Date; import java.util.List; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.CommonPattern; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -54,7 +54,7 @@ public class csvParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -77,11 +77,11 @@ public class csvParser extends AbstractParser implements Parser { null, null, singleList(concatRow(table.get(0))), - "", + null, "", null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, sb.toString(), null, null, diff --git a/source/net/yacy/document/parser/docParser.java b/source/net/yacy/document/parser/docParser.java index 5dbae9848..4ac59423a 100644 --- a/source/net/yacy/document/parser/docParser.java +++ b/source/net/yacy/document/parser/docParser.java @@ -32,7 +32,7 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.CommonPattern; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -60,7 +60,7 @@ public class docParser extends AbstractParser implements Parser { @SuppressWarnings("deprecation") @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -121,7 +121,7 @@ public class docParser extends AbstractParser implements Parser { extractor.getDocSummaryInformation().getCompany(), // publisher null, descriptions, - 0.0f, 0.0f, + 0.0d, 0.0d, contents.toString(), null, null, diff --git a/source/net/yacy/document/parser/dwgParser.java b/source/net/yacy/document/parser/dwgParser.java index 25c2d29b6..ae047e4c2 100644 --- a/source/net/yacy/document/parser/dwgParser.java +++ b/source/net/yacy/document/parser/dwgParser.java @@ -25,7 +25,7 @@ package net.yacy.document.parser; import java.io.InputStream; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -62,7 +62,7 @@ public class dwgParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/genericParser.java b/source/net/yacy/document/parser/genericParser.java index 2ff09475d..0d6d64d6b 100644 --- a/source/net/yacy/document/parser/genericParser.java +++ b/source/net/yacy/document/parser/genericParser.java @@ -25,9 +25,8 @@ package net.yacy.document.parser; import java.io.InputStream; -import java.util.Date; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -47,7 +46,7 @@ public class genericParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -63,17 +62,17 @@ public class genericParser extends AbstractParser implements Parser { null, null, singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title - "", // author + null, // author location.getHost(), null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, location.toTokens(), null, null, null, false, - new Date())}; + null)}; return docs; } } diff --git a/source/net/yacy/document/parser/gzipParser.java b/source/net/yacy/document/parser/gzipParser.java index e7119a110..95a6ad77a 100644 --- a/source/net/yacy/document/parser/gzipParser.java +++ b/source/net/yacy/document/parser/gzipParser.java @@ -33,7 +33,6 @@ import java.io.InputStream; import java.util.Date; import java.util.zip.GZIPInputStream; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; @@ -64,7 +63,7 @@ public class gzipParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java index 4fd1dc24c..f668d3001 100644 --- a/source/net/yacy/document/parser/htmlParser.java +++ b/source/net/yacy/document/parser/htmlParser.java @@ -39,7 +39,6 @@ import java.util.LinkedHashMap; import java.util.Set; import net.yacy.cora.document.encoding.UTF8; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.protocol.ClientIdentification; @@ -88,7 +87,7 @@ public class htmlParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String documentCharset, final VocabularyScraper vocscraper, @@ -382,9 +381,9 @@ public class htmlParser extends AbstractParser implements Parser { public static void main(final String[] args) { // test parsing of a url - AnchorURL url; + DigestURL url; try { - url = new AnchorURL(args[0]); + url = new DigestURL(args[0]); final byte[] content = url.get(ClientIdentification.yacyInternetCrawlerAgent, null, null); final Document[] document = new htmlParser().parse(url, "text/html", StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new ByteArrayInputStream(content)); final String title = document[0].dc_title(); diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java index 6e656c3a0..4daa0c14b 100644 --- a/source/net/yacy/document/parser/images/genericImageParser.java +++ b/source/net/yacy/document/parser/images/genericImageParser.java @@ -84,7 +84,7 @@ public class genericImageParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -213,7 +213,7 @@ public class genericImageParser extends AbstractParser implements Parser { } private ImageInfo parseJavaImage( - final AnchorURL location, + final DigestURL location, final InputStream sourceStream) throws Parser.Failure { BufferedImage image = null; try { @@ -228,7 +228,7 @@ public class genericImageParser extends AbstractParser implements Parser { } private ImageInfo parseJavaImage( - final AnchorURL location, + final DigestURL location, final BufferedImage image) { final ImageInfo ii = new ImageInfo(location); ii.image = image; @@ -265,12 +265,12 @@ public class genericImageParser extends AbstractParser implements Parser { } private class ImageInfo { - public AnchorURL location; + public DigestURL location; public BufferedImage image; public StringBuilder info; public int height; public int width; - public ImageInfo(final AnchorURL location) { + public ImageInfo(final DigestURL location) { this.location = location; this.image = null; this.info = new StringBuilder(); diff --git a/source/net/yacy/document/parser/images/metadataImageParser.java b/source/net/yacy/document/parser/images/metadataImageParser.java index d36a39cdd..1a374887b 100644 --- a/source/net/yacy/document/parser/images/metadataImageParser.java +++ b/source/net/yacy/document/parser/images/metadataImageParser.java @@ -44,7 +44,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -85,7 +85,7 @@ public class metadataImageParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -167,7 +167,7 @@ public class metadataImageParser extends AbstractParser implements Parser { new HashSet(0), // languages keywords == null ? new String[]{} : keywords.split(keywords.indexOf(',') > 0 ? "," : " "), // keywords singleList(title), // title - author == null ? "" : author, // author + author == null ? null : author, // author location.getHost(), // Publisher null, // sections descriptions, // description diff --git a/source/net/yacy/document/parser/images/svgParser.java b/source/net/yacy/document/parser/images/svgParser.java index 920b85401..70882e967 100644 --- a/source/net/yacy/document/parser/images/svgParser.java +++ b/source/net/yacy/document/parser/images/svgParser.java @@ -28,7 +28,6 @@ import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.util.ConcurrentLog; @@ -80,7 +79,7 @@ public class svgParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -128,7 +127,7 @@ public class svgParser extends AbstractParser implements Parser { "", null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, docDescription, // text - for this image description is best text we have null, null, diff --git a/source/net/yacy/document/parser/linkScraperParser.java b/source/net/yacy/document/parser/linkScraperParser.java index f0ccbe4d9..dabe3f631 100644 --- a/source/net/yacy/document/parser/linkScraperParser.java +++ b/source/net/yacy/document/parser/linkScraperParser.java @@ -21,9 +21,8 @@ package net.yacy.document.parser; import java.io.InputStream; -import java.util.Date; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -60,7 +59,7 @@ public class linkScraperParser extends AbstractParser implements Parser { } @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -81,17 +80,17 @@ public class linkScraperParser extends AbstractParser implements Parser { null, null, singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title - "", // author + null, // author location.getHost(), null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, location.toTokens(), htmlParserDoc == null ? null : htmlParserDoc.getAnchors(), htmlParserDoc == null ? null : htmlParserDoc.getRSS(), htmlParserDoc == null ? null : htmlParserDoc.getImages(), false, - new Date())}; + null)}; return docs; } } diff --git a/source/net/yacy/document/parser/mmParser.java b/source/net/yacy/document/parser/mmParser.java index 0799ca97c..aef0f8f8a 100644 --- a/source/net/yacy/document/parser/mmParser.java +++ b/source/net/yacy/document/parser/mmParser.java @@ -36,7 +36,7 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import net.yacy.cora.document.encoding.UTF8; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -73,7 +73,7 @@ public class mmParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -119,7 +119,7 @@ public class mmParser extends AbstractParser implements Parser { null, null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, content, null, null, diff --git a/source/net/yacy/document/parser/odtParser.java b/source/net/yacy/document/parser/odtParser.java index 2e96ffbeb..03c35e188 100644 --- a/source/net/yacy/document/parser/odtParser.java +++ b/source/net/yacy/document/parser/odtParser.java @@ -44,7 +44,6 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import net.yacy.cora.document.encoding.UTF8; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -220,7 +219,7 @@ public class odtParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/ooxmlParser.java b/source/net/yacy/document/parser/ooxmlParser.java index c8cc2505c..c543ebf01 100644 --- a/source/net/yacy/document/parser/ooxmlParser.java +++ b/source/net/yacy/document/parser/ooxmlParser.java @@ -44,7 +44,7 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import net.yacy.cora.document.encoding.UTF8; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -93,7 +93,7 @@ public class ooxmlParser extends AbstractParser implements Parser { return parser; } - private Document[] parse(final AnchorURL location, final String mimeType, @SuppressWarnings("unused") final String charset, final File dest) throws Parser.Failure, InterruptedException { + private Document[] parse(final DigestURL location, final String mimeType, @SuppressWarnings("unused") final String charset, final File dest) throws Parser.Failure, InterruptedException { CharBuffer writer = null; try { @@ -206,7 +206,7 @@ public class ooxmlParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java index 5d969f3f1..9291bdb25 100644 --- a/source/net/yacy/document/parser/pdfParser.java +++ b/source/net/yacy/document/parser/pdfParser.java @@ -55,6 +55,7 @@ import org.apache.pdfbox.util.PDFTextStripper; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; @@ -89,7 +90,7 @@ public class pdfParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -253,7 +254,7 @@ public class pdfParser extends AbstractParser implements Parser { docPublisher, null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, contentBytes, pdflinksCombined, null, diff --git a/source/net/yacy/document/parser/pptParser.java b/source/net/yacy/document/parser/pptParser.java index c3c8c3bdf..83a4d7221 100644 --- a/source/net/yacy/document/parser/pptParser.java +++ b/source/net/yacy/document/parser/pptParser.java @@ -33,7 +33,7 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; @@ -65,7 +65,7 @@ public class pptParser extends AbstractParser implements Parser { */ @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -114,7 +114,7 @@ public class pptParser extends AbstractParser implements Parser { pptExtractor.getDocSummaryInformation().getCompany(), null, descriptions, - 0.0f, 0.0f, + 0.0d, 0.0d, contents, null, null, diff --git a/source/net/yacy/document/parser/psParser.java b/source/net/yacy/document/parser/psParser.java index 10f1fd5e1..86b4ca4d9 100644 --- a/source/net/yacy/document/parser/psParser.java +++ b/source/net/yacy/document/parser/psParser.java @@ -37,7 +37,6 @@ import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.util.Date; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -110,7 +109,7 @@ public class psParser extends AbstractParser implements Parser { null, // languages null, // keywords null, // title - "", // author + null, // author "", // publisher null, // sections null, // abstract @@ -259,7 +258,7 @@ public class psParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/rdfParser.java b/source/net/yacy/document/parser/rdfParser.java index dba55415b..c2302f7c2 100644 --- a/source/net/yacy/document/parser/rdfParser.java +++ b/source/net/yacy/document/parser/rdfParser.java @@ -30,7 +30,7 @@ import java.util.ArrayList; import java.util.Date; import java.util.List; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -47,7 +47,7 @@ public class rdfParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -65,8 +65,8 @@ public class rdfParser extends AbstractParser implements Parser { Document doc; String all = "rdfdatasource"; - doc = new Document(location, mimeType, charset, null, null, null, singleList(""), "", - "", null, new ArrayList(0), 0, 0, all, null, null, null, false, new Date()); + doc = new Document(location, mimeType, charset, null, null, null, singleList(""), null, + "", null, null, 0, 0, all, null, null, null, false, new Date()); docs.add(doc); diff --git a/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java b/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java index f95cca2ae..2d3cd48f8 100644 --- a/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java +++ b/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java @@ -17,7 +17,6 @@ import java.util.Date; import java.util.HashSet; import java.util.Set; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; @@ -49,7 +48,7 @@ public class RDFaParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL url, + final DigestURL url, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -103,7 +102,7 @@ public class RDFaParser extends AbstractParser implements Parser { } private Document[] parseHtml( - final AnchorURL url, + final DigestURL url, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -190,7 +189,7 @@ public class RDFaParser extends AbstractParser implements Parser { if (aReader != null) { RDFaParser aParser = new RDFaParser(); try { - aParser.parse(new AnchorURL(args[0]), "", "", new VocabularyScraper(), 0, aURL.openStream()); + aParser.parse(new DigestURL(args[0]), "", "", new VocabularyScraper(), 0, aURL.openStream()); } catch (final FileNotFoundException e) { e.printStackTrace(); } catch (final IOException e) { diff --git a/source/net/yacy/document/parser/rssParser.java b/source/net/yacy/document/parser/rssParser.java index 8f0952bfb..0897c805c 100644 --- a/source/net/yacy/document/parser/rssParser.java +++ b/source/net/yacy/document/parser/rssParser.java @@ -37,7 +37,6 @@ import java.util.Set; import net.yacy.cora.document.feed.Hit; import net.yacy.cora.document.feed.RSSFeed; import net.yacy.cora.document.feed.RSSReader; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -60,7 +59,7 @@ public class rssParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -77,11 +76,11 @@ public class rssParser extends AbstractParser implements Parser { final RSSFeed feed = rssReader.getFeed(); //RSSMessage channel = feed.getChannel(); final List docs = new ArrayList(); - AnchorURL itemuri; + DigestURL itemuri; Set languages; Document doc; for (final Hit item: feed) try { - itemuri = new AnchorURL(item.getLink()); + itemuri = new DigestURL(item.getLink()); languages = new HashSet(); languages.add(item.getLanguage()); doc = new Document( diff --git a/source/net/yacy/document/parser/rtfParser.java b/source/net/yacy/document/parser/rtfParser.java index 63a8c7bbc..c84fb0bfe 100644 --- a/source/net/yacy/document/parser/rtfParser.java +++ b/source/net/yacy/document/parser/rtfParser.java @@ -29,12 +29,11 @@ package net.yacy.document.parser; import java.io.InputStream; import java.nio.charset.StandardCharsets; -import java.util.Date; import javax.swing.text.DefaultStyledDocument; import javax.swing.text.rtf.RTFEditorKit; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -55,7 +54,7 @@ public class rtfParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -83,17 +82,17 @@ public class rtfParser extends AbstractParser implements Parser { replaceAll("\n"," "). replaceAll("\r"," "). replaceAll("\t"," ")), - "", // TODO: AUTHOR + null, // TODO: AUTHOR "", // TODO: publisher null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, bodyText, null, null, null, false, - new Date())}; + null)}; } catch (final Exception e) { if (e instanceof InterruptedException) throw (InterruptedException) e; if (e instanceof Parser.Failure) throw (Parser.Failure) e; diff --git a/source/net/yacy/document/parser/sevenzipParser.java b/source/net/yacy/document/parser/sevenzipParser.java index c5cde469f..d78cad1ef 100644 --- a/source/net/yacy/document/parser/sevenzipParser.java +++ b/source/net/yacy/document/parser/sevenzipParser.java @@ -35,6 +35,7 @@ import java.io.OutputStream; import java.util.Date; import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; @@ -58,7 +59,7 @@ public class sevenzipParser extends AbstractParser implements Parser { } public Document parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final int timezoneOffset, @@ -110,7 +111,7 @@ public class sevenzipParser extends AbstractParser implements Parser { } public Document parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final int timezoneOffset, @@ -120,7 +121,7 @@ public class sevenzipParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/sidAudioParser.java b/source/net/yacy/document/parser/sidAudioParser.java index e0c164e21..eb64dfaa0 100644 --- a/source/net/yacy/document/parser/sidAudioParser.java +++ b/source/net/yacy/document/parser/sidAudioParser.java @@ -31,7 +31,7 @@ import java.util.Date; import java.util.HashMap; import java.util.Map; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -59,7 +59,7 @@ public class sidAudioParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -97,7 +97,7 @@ public class sidAudioParser extends AbstractParser implements Parser { header.get("publisher"), null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, null, null, null, diff --git a/source/net/yacy/document/parser/sitemapParser.java b/source/net/yacy/document/parser/sitemapParser.java index 5297d9893..65b1d07b6 100644 --- a/source/net/yacy/document/parser/sitemapParser.java +++ b/source/net/yacy/document/parser/sitemapParser.java @@ -40,7 +40,6 @@ import java.util.zip.GZIPInputStream; import javax.xml.parsers.DocumentBuilderFactory; import net.yacy.cora.date.ISO8601Formatter; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; @@ -52,7 +51,6 @@ import net.yacy.document.Document; import net.yacy.document.Parser; import net.yacy.document.TextParser; import net.yacy.document.VocabularyScraper; -import net.yacy.document.parser.html.ImageEntry; import net.yacy.kelondro.io.ByteCountInputStream; import org.w3c.dom.CharacterData; @@ -71,7 +69,7 @@ public class sitemapParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -94,15 +92,15 @@ public class sitemapParser extends AbstractParser implements Parser { null, null, singleList(""), + null, "", - "", null, - new ArrayList(), - 0.0f, 0.0f, + null, + 0.0d, 0.0d, + null, null, null, null, - new LinkedHashMap(), false, new Date()); docs.add(doc); diff --git a/source/net/yacy/document/parser/swfParser.java b/source/net/yacy/document/parser/swfParser.java index a25efa74f..81bd0473d 100644 --- a/source/net/yacy/document/parser/swfParser.java +++ b/source/net/yacy/document/parser/swfParser.java @@ -31,7 +31,7 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -56,7 +56,7 @@ public class swfParser extends AbstractParser implements Parser { */ @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/tarParser.java b/source/net/yacy/document/parser/tarParser.java index ae25b4776..815497beb 100644 --- a/source/net/yacy/document/parser/tarParser.java +++ b/source/net/yacy/document/parser/tarParser.java @@ -34,6 +34,7 @@ import java.util.zip.GZIPInputStream; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -65,7 +66,7 @@ public class tarParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/torrentParser.java b/source/net/yacy/document/parser/torrentParser.java index bed4d56d3..72f82bd0a 100644 --- a/source/net/yacy/document/parser/torrentParser.java +++ b/source/net/yacy/document/parser/torrentParser.java @@ -34,7 +34,7 @@ import java.util.List; import java.util.Map; import net.yacy.cora.document.encoding.UTF8; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Condenser; @@ -59,7 +59,7 @@ public class torrentParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -114,7 +114,7 @@ public class torrentParser extends AbstractParser implements Parser { location.getHost(), null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, filenames.toString(), null, null, @@ -127,7 +127,7 @@ public class torrentParser extends AbstractParser implements Parser { try { byte[] b = FileUtils.read(new File(args[0])); torrentParser parser = new torrentParser(); - Document[] d = parser.parse(new AnchorURL("http://localhost/test.torrent"), null, StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new ByteArrayInputStream(b)); + Document[] d = parser.parse(new DigestURL("http://localhost/test.torrent"), null, StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new ByteArrayInputStream(b)); Condenser c = new Condenser(d[0], null, true, true, LibraryProvider.dymLib, false, false, 0); Map w = c.words(); for (Map.Entry e: w.entrySet()) System.out.println("Word: " + e.getKey() + " - " + e.getValue().posInText); diff --git a/source/net/yacy/document/parser/vcfParser.java b/source/net/yacy/document/parser/vcfParser.java index 0676153f5..7dc1bf39e 100644 --- a/source/net/yacy/document/parser/vcfParser.java +++ b/source/net/yacy/document/parser/vcfParser.java @@ -43,6 +43,7 @@ import java.util.List; import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.order.Base64Order; import net.yacy.cora.util.CommonPattern; import net.yacy.document.AbstractParser; @@ -69,7 +70,7 @@ public class vcfParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -235,11 +236,11 @@ public class vcfParser extends AbstractParser implements Parser { null, // set of languages null, // a list of extracted keywords singleList(parsedTitle.toString()), // a long document title - "", // TODO: AUTHOR + null, // TODO: AUTHOR "", // the publisher sections, // an array of section headlines descriptions, // an abstract - 0.0f, 0.0f, + 0.0d, 0.0d, text, // the parsed document text anchors, // a map of extracted anchors null, diff --git a/source/net/yacy/document/parser/vsdParser.java b/source/net/yacy/document/parser/vsdParser.java index 94f036ed1..85e2e7a4b 100644 --- a/source/net/yacy/document/parser/vsdParser.java +++ b/source/net/yacy/document/parser/vsdParser.java @@ -33,7 +33,7 @@ import java.util.ArrayList; import java.util.Date; import java.util.List; -import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -69,7 +69,7 @@ public class vsdParser extends AbstractParser implements Parser { */ @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/document/parser/xlsParser.java b/source/net/yacy/document/parser/xlsParser.java index 3005c0bab..749019134 100644 --- a/source/net/yacy/document/parser/xlsParser.java +++ b/source/net/yacy/document/parser/xlsParser.java @@ -31,7 +31,6 @@ import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.util.Date; -import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; @@ -70,7 +69,7 @@ public class xlsParser extends AbstractParser implements Parser { */ @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, @@ -129,11 +128,11 @@ public class xlsParser extends AbstractParser implements Parser { null, null, singleList(location.getFile()), - "", // TODO: AUTHOR + null, // TODO: AUTHOR "", // TODO: publisher null, null, - 0.0f, 0.0f, + 0.0d, 0.0d, contents, null, null, diff --git a/source/net/yacy/document/parser/zipParser.java b/source/net/yacy/document/parser/zipParser.java index a6718aea1..040a33873 100644 --- a/source/net/yacy/document/parser/zipParser.java +++ b/source/net/yacy/document/parser/zipParser.java @@ -67,7 +67,7 @@ public class zipParser extends AbstractParser implements Parser { @Override public Document[] parse( - final AnchorURL location, + final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index f4b534baa..323305fdd 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -560,7 +560,7 @@ public final class LoaderDispatcher { * @return a map from URLs to the anchor texts of the urls * @throws IOException */ - public final Map loadLinks(final AnchorURL url, final CacheStrategy cacheStrategy, BlacklistType blacklistType, final ClientIdentification.Agent agent, final int timezoneOffset) throws IOException { + public final Map loadLinks(final DigestURL url, final CacheStrategy cacheStrategy, BlacklistType blacklistType, final ClientIdentification.Agent agent, final int timezoneOffset) throws IOException { final Response response = load(request(url, true, false), cacheStrategy, Integer.MAX_VALUE, blacklistType, agent); if (response == null) throw new IOException("response == null"); final ResponseHeader responseHeader = response.getResponseHeader(); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index c26e2ecde..eb7ef4ec7 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -3755,42 +3755,57 @@ public final class Switchboard extends serverSwitch { }.start(); } - public final void heuristicSearchResults(final String url) { + /** + * Get the outbound links of the result and add each unique link to crawler queue + * Is input resulturl a full index document with outboundlinks these will be used + * otherwise url is loaded and links are extracted/parsed + * + * @param resulturl the result doc which outbound links to add to crawler + */ + public final void heuristicSearchResults(final URIMetadataNode resulturl) { new Thread() { @Override public void run() { // get the links for a specific site - final AnchorURL startUrl; - try { - startUrl = new AnchorURL(url); - } catch (final MalformedURLException e) { - ConcurrentLog.logException(e); - return; - } + final DigestURL startUrl = resulturl.url(); + + // result might be rich metadata, try to get outbout links directly from result + Set urls; + Iterator outlinkit = URIMetadataNode.getLinks(resulturl, false); + if (outlinkit.hasNext()) { + urls = new HashSet(); + while (outlinkit.hasNext()) { + try { + urls.add(new DigestURL(outlinkit.next())); + } catch (MalformedURLException ex) { } + } + } else { // otherwise get links from loader + urls = null; - final Map links; - DigestURL url; - try { - links = Switchboard.this.loader.loadLinks(startUrl, CacheStrategy.IFFRESH, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent, 0); - if (links != null) { - if (links.size() < 1000) { // limit to 1000 to skip large index pages - final Iterator i = links.keySet().iterator(); - final boolean globalcrawljob = Switchboard.this.getConfigBool(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL,false); - Collection urls = new ArrayList(); - while (i.hasNext()) { - url = i.next(); - boolean islocal = (url.getHost() == null && startUrl.getHost() == null) || (url.getHost() != null && startUrl.getHost() != null && url.getHost().contentEquals(startUrl.getHost())); - // add all external links or links to different page to crawler - if ( !islocal ) {// || (!startUrl.getPath().endsWith(url.getPath()))) { - urls.add(url); + try { + final Map links; + links = Switchboard.this.loader.loadLinks(startUrl, CacheStrategy.IFFRESH, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent, 0); + if (links != null) { + if (links.size() < 1000) { // limit to 1000 to skip large index pages + final Iterator i = links.keySet().iterator(); + if (urls == null) urls = new HashSet(); + while (i.hasNext()) { + DigestURL url = i.next(); + boolean islocal = (url.getHost() == null && startUrl.getHost() == null) || (url.getHost() != null && startUrl.getHost() != null && url.getHost().contentEquals(startUrl.getHost())); + // add all external links or links to different page to crawler + if ( !islocal ) {// || (!startUrl.getPath().endsWith(url.getPath()))) { + urls.add(url); + } } } - addToCrawler(urls, globalcrawljob); } - } - } catch (final Throwable e) { + } catch (final Throwable e) { } + } + if (urls != null && urls.size() > 0) { + final boolean globalcrawljob = Switchboard.this.getConfigBool(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL,false); + addToCrawler(urls, globalcrawljob); } } }.start();