diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java index ddf4aa27e..e0241a89e 100644 --- a/htroot/HostBrowser.java +++ b/htroot/HostBrowser.java @@ -161,7 +161,7 @@ public class HostBrowser { sb.peers.mySeed().hash.getBytes(), url, null, load, new Date(), sb.crawler.defaultProxyProfile.handle(), - 0, 0, 0 + 0 )); prop.putHTML("result", reasonString == null ? ("added url to indexer: " + load) : ("not indexed url '" + load + "': " + reasonString)); if (wait) waitloop: for (int i = 0; i < 30; i++) { diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java index 7f288c821..f0331493f 100644 --- a/htroot/QuickCrawlLink_p.java +++ b/htroot/QuickCrawlLink_p.java @@ -174,8 +174,6 @@ public class QuickCrawlLink_p { (title==null)?"CRAWLING-ROOT":title, new Date(), pe.handle(), - 0, - 0, 0 )); diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 92ddcb2d9..7debb3c53 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -178,7 +178,8 @@ public class ViewFile { Response response = null; try { ClientIdentification.Agent agent = ClientIdentification.getAgent(post.get("agentName", ClientIdentification.yacyInternetCrawlerAgentName)); - response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, agent); + // use sb.loader.requst( , , global=true) to use crawlprofile to allow index update + response = sb.loader.load(sb.loader.request(url, true, true), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, agent); } catch (final IOException e) { prop.put("error", "4"); prop.put("error_errorText", "error loading resource: " + e.getMessage()); @@ -374,6 +375,10 @@ public class ViewFile { prop.put("showSnippet_teasertext", desc); prop.put("showSnippet", 1); } + // update index with parsed resouce if index entry is older + if (urlEntry.loaddate().before(response.lastModified())) { + Switchboard.getSwitchboard().toIndexer(response); + } if (document != null) document.close(); } prop.put("error", "0"); diff --git a/htroot/api/push_p.java b/htroot/api/push_p.java index d5bf6f14f..f05f1911c 100644 --- a/htroot/api/push_p.java +++ b/htroot/api/push_p.java @@ -102,8 +102,6 @@ public class push_p { "", // the name of the document to crawl new Date(), // current date profile.handle(), // the name of the prefetch profile. This must not be null! - 0, // depth the crawling depth of the entry - 0, // anchors number of anchors of the parent 0); // forkfactor sum of anchors of all ancestors Response response = new Response( request, diff --git a/htroot/rct_p.java b/htroot/rct_p.java index f355f7d11..e32092485 100644 --- a/htroot/rct_p.java +++ b/htroot/rct_p.java @@ -78,8 +78,6 @@ public class rct_p { "REMOTE-CRAWLING", loaddate, sb.crawler.defaultRemoteProfile.handle(), - 0, - 0, 0)); } else { env.getLog().warn("crawlOrder: Rejected URL '" + urlToString(url) + "': " + urlRejectReason); diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index e620d56d8..56fe28bfb 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -209,8 +209,6 @@ public final class CrawlStacker { url.getNameProperty(), new Date(), profileHandle, - 0, - 0, 0 )); } @@ -250,8 +248,6 @@ public final class CrawlStacker { MultiProtocolURL.unescape(entry.name), entry.date, profileHandle, - 0, - 0, 0)); } } catch (final IOException e1) { @@ -276,8 +272,6 @@ public final class CrawlStacker { "CRAWLING-ROOT", new Date(), pe.handle(), - 0, - 0, 0)); } diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index 8ff9ff6c3..b02a38a95 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -530,8 +530,6 @@ public class CrawlQueues { item.getDescriptions().size() > 0 ? item.getDescriptions().get(0) : "", loaddate, this.sb.crawler.defaultRemoteProfile.handle(), - 0, - 0, 0 )); } else { diff --git a/source/net/yacy/crawler/retrieval/Request.java b/source/net/yacy/crawler/retrieval/Request.java index 6f5c9bd52..fe8d9ce10 100644 --- a/source/net/yacy/crawler/retrieval/Request.java +++ b/source/net/yacy/crawler/retrieval/Request.java @@ -119,7 +119,7 @@ public class Request extends WorkflowJob * @param referrerhash */ public Request(final DigestURL url, final byte[] referrerhash) { - this(null, url, referrerhash, null, null, null, 0, 0, 0); + this(null, url, referrerhash, null, null, null, 0); } /** @@ -132,8 +132,6 @@ public class Request extends WorkflowJob * @param appdate the time when the url was first time appeared * @param profileHandle the name of the prefetch profile. This must not be null! * @param depth the crawling depth of the entry - * @param anchors number of anchors of the parent - * @param forkfactor sum of anchors of all ancestors */ public Request( final byte[] initiator, @@ -142,9 +140,7 @@ public class Request extends WorkflowJob final String name, final Date appdate, final String profileHandle, - final int depth, - final int anchors, - final int forkfactor) { + final int depth) { // create new entry and store it into database assert url != null; assert profileHandle == null || profileHandle.length() == Word.commonHashLength : profileHandle diff --git a/source/net/yacy/crawler/retrieval/SitemapImporter.java b/source/net/yacy/crawler/retrieval/SitemapImporter.java index 69ae673e8..936123360 100644 --- a/source/net/yacy/crawler/retrieval/SitemapImporter.java +++ b/source/net/yacy/crawler/retrieval/SitemapImporter.java @@ -108,8 +108,6 @@ public class SitemapImporter extends Thread { entry.url(), entry.lastmod(new Date()), this.crawlingProfile.handle(), - 0, - 0, 0 )); logger.info("New URL '" + entry.url() + "' added for loading."); diff --git a/source/net/yacy/data/ymark/YMarkCrawlStart.java b/source/net/yacy/data/ymark/YMarkCrawlStart.java index 103af2add..1f3491c54 100644 --- a/source/net/yacy/data/ymark/YMarkCrawlStart.java +++ b/source/net/yacy/data/ymark/YMarkCrawlStart.java @@ -197,7 +197,7 @@ public class YMarkCrawlStart extends HashMap{ null, "CRAWLING-ROOT", new Date(), - pe.handle(), 0, 0, 0 + pe.handle(), 0 )); } } diff --git a/source/net/yacy/http/ProxyCacheHandler.java b/source/net/yacy/http/ProxyCacheHandler.java index 392372937..d5417b641 100644 --- a/source/net/yacy/http/ProxyCacheHandler.java +++ b/source/net/yacy/http/ProxyCacheHandler.java @@ -74,8 +74,6 @@ public class ProxyCacheHandler extends AbstractRemoteHandler implements Handler "", cachedResponseHeader.lastModified(), sb.crawler.defaultProxyProfile.handle(), - 0, - 0, 0); final Response cachedResponse = new Response( diff --git a/source/net/yacy/http/ProxyHandler.java b/source/net/yacy/http/ProxyHandler.java index c4d1ab927..b55754b92 100644 --- a/source/net/yacy/http/ProxyHandler.java +++ b/source/net/yacy/http/ProxyHandler.java @@ -180,8 +180,6 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler { "", responseHeaderLegacy.lastModified(), sb.crawler.defaultProxyProfile.handle(), - 0, - 0, 0); //sizeBeforeDelete < 0 ? 0 : sizeBeforeDelete); final Response yacyResponse = new Response( yacyRequest, diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 566eca773..e871ba2be 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -128,8 +128,6 @@ public final class LoaderDispatcher { ((global) ? this.sb.crawler.defaultMediaSnippetGlobalProfile.handle() : this.sb.crawler.defaultMediaSnippetLocalProfile.handle()), // crawl profile - 0, - 0, 0); } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 1caf73b09..b1f57d594 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1941,8 +1941,6 @@ public final class Switchboard extends serverSwitch { "", surrogate.getDate(), this.crawler.defaultSurrogateProfile.handle(), - 0, - 0, 0); response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false, null); final IndexingQueueEntry queueEntry = @@ -2673,9 +2671,7 @@ public final class Switchboard extends serverSwitch { nextEntry.getValue(), new Date(), response.profile().handle(), - nextdepth, - 0, - 0)); + nextdepth)); } catch (final MalformedURLException e ) { ConcurrentLog.logException(e); } @@ -3078,8 +3074,6 @@ public final class Switchboard extends serverSwitch { "CRAWLING-ROOT", new Date(), profile.handle(), - 0, - 0, 0 )); diff --git a/source/net/yacy/server/http/HTTPDProxyHandler.java b/source/net/yacy/server/http/HTTPDProxyHandler.java index c38ad62f6..b735372c5 100644 --- a/source/net/yacy/server/http/HTTPDProxyHandler.java +++ b/source/net/yacy/server/http/HTTPDProxyHandler.java @@ -357,8 +357,6 @@ public final class HTTPDProxyHandler { "", cachedResponseHeader.lastModified(), sb.crawler.defaultProxyProfile.handle(), - 0, - 0, 0); final Response response = new Response( request, @@ -474,8 +472,6 @@ public final class HTTPDProxyHandler { "", responseHeader.lastModified(), sb.crawler.defaultProxyProfile.handle(), - 0, - 0, 0);