*) bugfix for snippet fetching problem if content but not http header is available in cache

See: http://www.yacy-forum.de/viewtopic.php?p=25748

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2651 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 18 years ago
parent 813a8a8179
commit 625c2ce6b1

@ -488,6 +488,10 @@ public final class plasmaHTCache {
return cachedObj; return cachedObj;
} }
public ResourceInfoFactory getResourceInfoFactory() {
return this.objFactory;
}
public boolean full() { public boolean full() {
return (this.cacheStack.size() > stackLimit); return (this.cacheStack.size() > stackLimit);
} }

@ -57,6 +57,8 @@ import java.util.Set;
import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacySearch;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL; import de.anomic.index.indexURL;
@ -439,9 +441,21 @@ public class plasmaSnippetCache {
// try to get the header from the htcache directory // try to get the header from the htcache directory
try { try {
docInfo = this.cacheManager.loadResourceInfo(url); docInfo = this.cacheManager.loadResourceInfo(url);
} catch (Exception e) {} } catch (Exception e) {
// ignore this. resource info loading failed
}
// TODO: try to load it from web // TODO: we need a better solution here
// encapsulate this in the crawlLoader class
if (url.getProtocol().startsWith("http")) {
// getting URL mimeType
try {
httpHeader header = httpc.whead(url, url.getHost(), 10000, null, null, this.sb.remoteProxyConfig);
docInfo = this.cacheManager.getResourceInfoFactory().buildResourceInfoObj(url, header);
} catch (Exception e) {
// ingore this. http header download failed
}
}
} }

Loading…
Cancel
Save