From ebd0be2cea2bf06a5bafa3e521d7b797f2df78c1 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 10 Sep 2014 14:24:03 +0200 Subject: [PATCH] fixes and speed updates for search process --- source/net/yacy/crawler/retrieval/FTPLoader.java | 4 ++-- source/net/yacy/repository/LoaderDispatcher.java | 2 +- source/net/yacy/search/query/QueryParams.java | 2 +- source/net/yacy/search/query/SearchEvent.java | 14 ++++++++------ 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/source/net/yacy/crawler/retrieval/FTPLoader.java b/source/net/yacy/crawler/retrieval/FTPLoader.java index ae2b893dc..d0ab75b4e 100644 --- a/source/net/yacy/crawler/retrieval/FTPLoader.java +++ b/source/net/yacy/crawler/retrieval/FTPLoader.java @@ -242,9 +242,9 @@ public class FTPLoader { // only the metadata is returned if (parserError != null) { - this.log.info("No parser available in FTP crawler: '" + parserError + "' for URL " + request.url().toString() + ": parsing only metadata"); + this.log.info("No parser available in FTP crawler: '" + parserError + "' for URL " + request.url().toNormalform(true) + ": parsing only metadata"); } else { - this.log.info("Too big file in FTP crawler with size = " + size + " Bytes for URL " + request.url().toString() + ": parsing only metadata"); + this.log.info("Too big file in FTP crawler with size = " + size + " Bytes for URL " + request.url().toNormalform(true) + ": parsing only metadata"); } // create response with metadata only diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 004907dcc..0520972d7 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -294,7 +294,7 @@ public final class LoaderDispatcher { throw new IOException("no response (NULL) for url " + url); } if (response.getContent() == null) { - throw new IOException("empty response (code " + response.getStatus() + ") for url " + url); + throw new IOException("empty response (code " + response.getStatus() + ") for url " + url.toNormalform(true)); } // we got something. Now check if we want to store that to the cache diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index ff4136d69..e1a93ba31 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -208,7 +208,7 @@ public final class QueryParams { this.constraint = constraint; this.allofconstraint = allofconstraint; this.siteexcludes = siteexcludes != null && siteexcludes.isEmpty() ? null: siteexcludes; - this.snippetCacheStrategy = contentdom == ContentDomain.TEXT ? snippetCacheStrategy : contentdom == null ? null : CacheStrategy.CACHEONLY; + this.snippetCacheStrategy = snippetCacheStrategy; this.clienthost = host; this.remotepeer = null; this.starttime = Long.valueOf(System.currentTimeMillis()); diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 7db61e4c8..cf1df2ab9 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -1290,7 +1290,7 @@ public final class SearchEvent { success = true; } } else { - new Thread() { + Thread t = new Thread() { @Override public void run() { SearchEvent.this.oneFeederStarted(); @@ -1300,7 +1300,9 @@ public final class SearchEvent { SearchEvent.this.snippetFetchAlive.incrementAndGet(); try { addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy)); - } catch (final Throwable e) {} finally { + } catch (final Throwable e) { + ConcurrentLog.logException(e); + } finally { SearchEvent.this.snippetFetchAlive.decrementAndGet(); } } @@ -1308,7 +1310,8 @@ public final class SearchEvent { SearchEvent.this.oneFeederTerminated(); } } - }.start(); + }; + if (SearchEvent.this.query.snippetCacheStrategy == null) t.run(); else t.start(); //no need for concurrency if there is no latency } return success; } @@ -1398,7 +1401,7 @@ public final class SearchEvent { 180, !this.query.isLocal()); final long snippetComputationTime = System.currentTimeMillis() - startTime; - SearchEvent.log.info("text snippet load time for " + page.url() + ": " + snippetComputationTime + ", " + (!snippet.getErrorCode().fail() ? "snippet found" : ("no snippet found (" + snippet.getError() + ")"))); + SearchEvent.log.info("text snippet load time for " + page.url().toNormalform(true) + ": " + snippetComputationTime + ", " + (!snippet.getErrorCode().fail() ? "snippet found" : ("no snippet found (" + snippet.getError() + ")"))); if (!snippet.getErrorCode().fail()) { // we loaded the file and found the snippet @@ -1429,7 +1432,6 @@ public final class SearchEvent { // (happens if a search pages is accessed a second time) final long finishTime = timeout == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + timeout; EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "started, item = " + item + ", available = " + this.getResultCount(), 0, 0), false); - // wait until a local solr is finished, we must do that to be able to check if we need more if (this.localsolrsearch != null && this.localsolrsearch.isAlive()) {try {this.localsolrsearch.join(100);} catch (final InterruptedException e) {}} if (item >= this.localsolroffset && this.local_solr_stored.get() == 0 && this.localsolrsearch.isAlive()) {try {this.localsolrsearch.join();} catch (final InterruptedException e) {}} @@ -1456,7 +1458,7 @@ public final class SearchEvent { // we have the wanted result already in the result array .. return that final ResultEntry re = this.resultList.element(item).getElement(); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "fetched, item = " + item + ", available = " + this.getResultCount() + ": " + re.urlstring(), 0, 0), false); - + if (this.localsolrsearch == null || !this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) { // at the end of a list, trigger a next solr search if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {