diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index c1b3be482..1b7aea8b0 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -487,6 +487,10 @@ public final class plasmaHTCache { IResourceInfo cachedObj = this.objFactory.buildResourceInfoObj(url, hdb); return cachedObj; } + + public ResourceInfoFactory getResourceInfoFactory() { + return this.objFactory; + } public boolean full() { return (this.cacheStack.size() > stackLimit); diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index 4c6da8f24..acc3af272 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -57,6 +57,8 @@ import java.util.Set; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySearch; +import de.anomic.http.httpHeader; +import de.anomic.http.httpc; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexURL; @@ -439,9 +441,21 @@ public class plasmaSnippetCache { // try to get the header from the htcache directory try { docInfo = this.cacheManager.loadResourceInfo(url); - } catch (Exception e) {} + } catch (Exception e) { + // ignore this. resource info loading failed + } - // TODO: try to load it from web + // TODO: we need a better solution here + // encapsulate this in the crawlLoader class + if (url.getProtocol().startsWith("http")) { + // getting URL mimeType + try { + httpHeader header = httpc.whead(url, url.getHost(), 10000, null, null, this.sb.remoteProxyConfig); + docInfo = this.cacheManager.getResourceInfoFactory().buildResourceInfoObj(url, header); + } catch (Exception e) { + // ingore this. http header download failed + } + } }