From 73f03e05ee062fb4d65b694ccd8b0128b6b3558e Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 18 Jun 2010 15:25:25 +0000 Subject: [PATCH] fixed a bug in snippet fetch strategy: cache only does not help if resource can only be found in web git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6930 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/CrawlProfile.java | 3 +++ source/de/anomic/crawler/CrawlSwitchboard.java | 5 ++++- source/de/anomic/search/TextSnippet.java | 5 +---- source/net/yacy/repository/LoaderDispatcher.java | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java index 2ffe20319..99cb89562 100644 --- a/source/de/anomic/crawler/CrawlProfile.java +++ b/source/de/anomic/crawler/CrawlProfile.java @@ -396,6 +396,9 @@ public class CrawlProfile { return CacheStrategy.IFFRESH; } } + public void setCacheStrategy(CacheStrategy newStrategy) { + mem.put(CACHE_STRAGEGY, newStrategy.toString()); + } public long recrawlIfOlder() { // returns a long (millis) that is the minimum age that // an entry must have to be re-crawled diff --git a/source/de/anomic/crawler/CrawlSwitchboard.java b/source/de/anomic/crawler/CrawlSwitchboard.java index 71d6817da..4be91cdc3 100644 --- a/source/de/anomic/crawler/CrawlSwitchboard.java +++ b/source/de/anomic/crawler/CrawlSwitchboard.java @@ -30,6 +30,8 @@ import java.io.File; import java.io.IOException; import java.util.Iterator; +import de.anomic.crawler.CrawlProfile.CacheStrategy; + import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.kelondroException; @@ -183,8 +185,9 @@ public final class CrawlSwitchboard { if (this.defaultTextSnippetGlobalProfile == null) { // generate new default entry for snippet fetch and optional crawling defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0, - this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.CACHEONLY); + this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST); } + this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST); if (this.defaultMediaSnippetLocalProfile == null) { // generate new default entry for snippet fetch and optional crawling defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0, diff --git a/source/de/anomic/search/TextSnippet.java b/source/de/anomic/search/TextSnippet.java index 264bfbe23..b76dc5d6b 100644 --- a/source/de/anomic/search/TextSnippet.java +++ b/source/de/anomic/search/TextSnippet.java @@ -352,10 +352,7 @@ public class TextSnippet implements Comparable, Comparator maxDocLen) { - // content may be too large to be parsed here. To be fast, we omit calculation of snippet here - return new TextSnippet(url, null, ERROR_SOURCE_LOADING, queryhashes, "resource available, but too large: " + resContent.length + " bytes"); - } else if (fetchOnline) { + if ((resContent == null || responseHeader == null) && fetchOnline) { // if not found try to download it // download resource using the crawler and keep resource in memory if possible diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index a0f8e1148..fe02384c4 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -233,7 +233,7 @@ public final class LoaderDispatcher { // check case where we want results from the cache exclusively, and never from the internet (offline mode) if (cacheStrategy == CrawlProfile.CacheStrategy.CACHEONLY) { // we had a chance to get the content from the cache .. its over. We don't have it. - return null; + throw new IOException("cache only strategy"); } // now forget about the cache, nothing there. Try to load the content from the internet