diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index c621a92ea..e5d9469aa 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -91,6 +91,7 @@ public final class search { final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE); final String prefer = post.get("prefer", ""); final String filter = post.get("filter", ".*"); + final boolean includesnippet = post.get("includesnippet", "false").equals("true"); // final boolean global = ((String) post.get("resource", "global")).equals("global"); // if true, then result may consist of answers from other peers // Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time @@ -200,11 +201,15 @@ public final class search { plasmaSnippetCache.Snippet snippet; while ((acc.hasMoreElements()) && (i < squery.wantedResults)) { urlentry = acc.nextElement(); - snippet = sb.snippetCache.retrieveSnippet(urlentry.url(), squery.queryHashes, false, 260); - if (snippet.getSource() == plasmaSnippetCache.ERROR_NO_MATCH) { + if (includesnippet) { + snippet = sb.snippetCache.retrieveSnippet(urlentry.url(), squery.queryHashes, false, 260); + } else { + snippet = null; + } + if ((snippet != null) && (snippet.getSource() == plasmaSnippetCache.ERROR_NO_MATCH)) { // suppress line: there is no match in that resource } else { - if (snippet.exists()) { + if ((snippet != null) && (snippet.exists())) { resource = urlentry.toString(snippet.getLineRaw()); } else { resource = urlentry.toString(); diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index 10e06f0bf..c0ad8218a 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -192,7 +192,6 @@ public class plasmaSnippetCache { try { // trying to load the resource from the cache resource = this.cacheManager.loadResourceContent(url); - docInfo = this.cacheManager.loadResourceInfo(url); // if not found try to download it if ((resource == null) && (fetchOnline)) { @@ -200,22 +199,21 @@ public class plasmaSnippetCache { plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000); // getting resource metadata (e.g. the http headers for http resources) - if (entry != null) { - docInfo = entry.getDocumentInfo(); - } + if (entry != null) docInfo = entry.getDocumentInfo(); - // now the resource should be stored in the cache, load body - resource = this.cacheManager.loadResourceContent(url); + // read resource body + resource = entry.cacheArray(); if (resource == null) { - //System.out.println("cannot load document for URL " + url); - return new Snippet(null, ERROR_RESOURCE_LOADING, "error loading resource from web, cacheManager returned NULL"); + return new Snippet(null, ERROR_RESOURCE_LOADING, "error loading resource, plasmaHTCache.Entry cache is NULL"); } source = SOURCE_WEB; } } catch (Exception e) { if (!(e instanceof plasmaCrawlerException)) e.printStackTrace(); - return new Snippet(null, ERROR_SOURCE_LOADING, "error loading resource from web: " + e.getMessage()); + return new Snippet(null, ERROR_SOURCE_LOADING, "error loading resource: " + e.getMessage()); } + + if (resource == null) return new Snippet(null, ERROR_SOURCE_LOADING, "no resource available"); /* =========================================================================== * PARSING RESOURCE @@ -459,11 +457,12 @@ public class plasmaSnippetCache { docInfo = this.cacheManager.loadResourceInfo(url); } catch (Exception e) { // ignore this. resource info loading failed - } - + } + } + // TODO: we need a better solution here // encapsulate this in the crawlLoader class - if (url.getProtocol().startsWith("http")) { + if ((docInfo == null) && (url.getProtocol().startsWith("http"))) { // getting URL mimeType try { httpHeader header = httpc.whead(url, url.getHost(), 10000, null, null, this.sb.remoteProxyConfig); @@ -472,8 +471,6 @@ public class plasmaSnippetCache { // ingore this. http header download failed } } - - } if (docInfo == null) { String filename = this.cacheManager.getCachePath(url).getName(); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 798e5e8fd..8b33ed27a 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -2033,9 +2033,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String host, hash, address, descr = ""; yacySeed seed; plasmaSnippetCache.Snippet snippet; + boolean includeSnippets = false; String formerSearch = query.words(" "); long targetTime = timestamp + query.maximumTime; - if (targetTime < System.currentTimeMillis()) targetTime = System.currentTimeMillis() + 5000; + if (targetTime < System.currentTimeMillis()) targetTime = System.currentTimeMillis() + 1000; while ((acc.hasMoreElements()) && (i < query.wantedResults) && (System.currentTimeMillis() < targetTime)) { urlentry = acc.nextElement(); url = urlentry.url(); @@ -2076,8 +2077,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser //addScoreForked(ref, gs, urlstring.split("/")); URL wordURL; if (urlstring.matches(query.urlMask)) { //.* is default - snippet = snippetCache.retrieveSnippet(url, query.queryHashes, false, 260); - if (snippet.getSource() == plasmaSnippetCache.ERROR_NO_MATCH) { + if (includeSnippets) { + snippet = snippetCache.retrieveSnippet(url, query.queryHashes, false, 260); + } else { + snippet = null; + } + if ((snippet != null) && (snippet.getSource() == plasmaSnippetCache.ERROR_NO_MATCH)) { // suppress line: there is no match in that resource } else { prop.put("type_results_" + i + "_recommend", (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, "stippadd", "url", urlstring) == null) ? 1 : 0); @@ -2097,7 +2102,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ((indexURL.probablyRootURL(urlhash)) ? ", probablyRootURL" : "") + (((wordURL = indexURL.probablyWordURL(urlhash, query.words(""))) != null) ? ", probablyWordURL=" + wordURL.toNormalform() : "")); // adding snippet if available - if (snippet.exists()) { + if ((snippet != null) && (snippet.exists())) { prop.put("type_results_" + i + "_snippet", 1); prop.put("type_results_" + i + "_snippet_text", snippet.getLineMarked(query.queryHashes)); } else {