From 625c2ce6b1e74fa034dbff9ca3b0a94b4613ea19 Mon Sep 17 00:00:00 2001 From: theli Date: Fri, 22 Sep 2006 11:55:28 +0000 Subject: [PATCH] *) bugfix for snippet fetching problem if content but not http header is available in cache See: http://www.yacy-forum.de/viewtopic.php?p=25748 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2651 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaHTCache.java | 4 ++++ .../de/anomic/plasma/plasmaSnippetCache.java | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index c1b3be482..1b7aea8b0 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -487,6 +487,10 @@ public final class plasmaHTCache { IResourceInfo cachedObj = this.objFactory.buildResourceInfoObj(url, hdb); return cachedObj; } + + public ResourceInfoFactory getResourceInfoFactory() { + return this.objFactory; + } public boolean full() { return (this.cacheStack.size() > stackLimit); diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index 4c6da8f24..acc3af272 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -57,6 +57,8 @@ import java.util.Set; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySearch; +import de.anomic.http.httpHeader; +import de.anomic.http.httpc; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexURL; @@ -439,9 +441,21 @@ public class plasmaSnippetCache { // try to get the header from the htcache directory try { docInfo = this.cacheManager.loadResourceInfo(url); - } catch (Exception e) {} + } catch (Exception e) { + // ignore this. resource info loading failed + } - // TODO: try to load it from web + // TODO: we need a better solution here + // encapsulate this in the crawlLoader class + if (url.getProtocol().startsWith("http")) { + // getting URL mimeType + try { + httpHeader header = httpc.whead(url, url.getHost(), 10000, null, null, this.sb.remoteProxyConfig); + docInfo = this.cacheManager.getResourceInfoFactory().buildResourceInfoObj(url, header); + } catch (Exception e) { + // ingore this. http header download failed + } + } }