diff --git a/htroot/gsa/searchresult.java b/htroot/gsa/searchresult.java index 7ce843ce7..dc3bc5953 100644 --- a/htroot/gsa/searchresult.java +++ b/htroot/gsa/searchresult.java @@ -115,7 +115,7 @@ public class searchresult { post.put(CommonParams.ROWS, post.remove("num")); post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 5000 : 100)); post.put("defType", "edismax"); - post.put("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^100000.0"); // a bost query that moves double content to the back + post.put("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^100000.0"); // a boost query that moves double content to the back post.put(CommonParams.FL, YaCySchema.content_type.getSolrFieldName() + ',' + YaCySchema.id.getSolrFieldName() + ',' + @@ -145,7 +145,7 @@ public class searchresult { // add sites operator if (site != null && site.length() > 0) { - String[] s0 = site.split(Pattern.quote("|")); + String[] s0 = Pattern.compile(Pattern.quote("|")).split(site, 0); ArrayList sites = new ArrayList(2); for (String s: s0) { s = s.trim().toLowerCase(); diff --git a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java index f1dd68ecc..b98bfb557 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java @@ -83,9 +83,10 @@ public class GSAResponseWriter implements QueryResponseWriter { // pre-select a set of YaCy schema fields for the solr searcher which should cause a better caching private static final YaCySchema[] extrafields = new YaCySchema[]{ - YaCySchema.id, YaCySchema.title, YaCySchema.description, YaCySchema.text_t, - YaCySchema.h1_txt, YaCySchema.h2_txt, YaCySchema.h3_txt, YaCySchema.h4_txt, YaCySchema.h5_txt, YaCySchema.h6_txt, - }; + YaCySchema.id, YaCySchema.sku, YaCySchema.title, YaCySchema.description, + YaCySchema.last_modified, YaCySchema.load_date_dt, YaCySchema.size_i, YaCySchema.language_s + }; + private static final Set SOLR_FIELDS = new HashSet(); static { field2tag.put(YaCySchema.language_s.getSolrFieldName(), GSAToken.LANG.name()); @@ -278,17 +279,6 @@ public class GSAResponseWriter implements QueryResponseWriter { //texts.add(value.stringValue()); continue; } - if (YaCySchema.text_t.getSolrFieldName().equals(fieldName)) { - //texts.add(value.stringValue()); - continue; - } - if (YaCySchema.h1_txt.getSolrFieldName().equals(fieldName) || YaCySchema.h2_txt.getSolrFieldName().equals(fieldName) || - YaCySchema.h3_txt.getSolrFieldName().equals(fieldName) || YaCySchema.h4_txt.getSolrFieldName().equals(fieldName) || - YaCySchema.h5_txt.getSolrFieldName().equals(fieldName) || YaCySchema.h6_txt.getSolrFieldName().equals(fieldName)) { - // because these are multi-valued fields, there can be several of each - //texts.add(value.stringValue()); - continue; - } if (YaCySchema.size_i.getSolrFieldName().equals(fieldName)) { size = value.stringValue() != null && value.stringValue().length() > 0 ? Integer.parseInt(value.stringValue()) : -1; continue; diff --git a/source/net/yacy/crawler/CrawlSwitchboard.java b/source/net/yacy/crawler/CrawlSwitchboard.java index 3233c1e6f..75bf503e4 100644 --- a/source/net/yacy/crawler/CrawlSwitchboard.java +++ b/source/net/yacy/crawler/CrawlSwitchboard.java @@ -513,7 +513,7 @@ public final class CrawlSwitchboard { String handle = r.profileHandle(); RowHandleSet us = this.profilesActiveCrawlsCounter.get(handle); if (us == null) {us = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); this.profilesActiveCrawlsCounter.put(handle, us);} - us.put(r.url().hash()); + if (us.size() < 100) us.put(r.url().hash()); // store the hash, but not too many deletionCandidate.remove(handle); if (deletionCandidate.size() == 0) return 0; if (System.currentTimeMillis() > timeout) return 0; // give up; this is too large diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 2bfbbf8cd..c6f33d4a5 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -1030,6 +1030,7 @@ public final class Protocol final SearchEvent event, final int offset, final int count, + boolean getFacets, final Seed target, final Blacklist blacklist) { @@ -1044,11 +1045,13 @@ public final class Protocol solrQuery.setRows(count); // set facet query attributes - if (event.query.facetfields.length > 0) { + if (getFacets && event.query.facetfields.length > 0) { solrQuery.setFacet(true); solrQuery.setFacetLimit(event.query.maxfacets); solrQuery.setFacetSort(FacetParams.FACET_SORT_COUNT); for (String field: event.query.facetfields) solrQuery.addFacetField(field); + } else { + solrQuery.setFacet(false); } // set highlightning query attributes diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 7e0873ae9..82dd85144 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -253,36 +253,40 @@ public class RemoteSearch extends Thread { final Seed targetPeer, final Blacklist blacklist) { - // check own peer status if (event.peers.mySeed() == null || event.peers.mySeed().getPublicAddress() == null) { return null; } - // prepare seed targets and threads if (targetPeer != null && targetPeer.hash != null && event.preselectedPeerHashes != null) targetPeer.setAlternativeAddress(event.preselectedPeerHashes.get(ASCII.getBytes(targetPeer.hash))); Thread solr = new Thread() { @Override public void run() { - event.rankingProcess.oneFeederStarted(); - try { - int urls = Protocol.solrQuery( - event, - 0, - count, - targetPeer, - blacklist); - if (urls >= 0) { - // urls is an array of url hashes. this is only used for log output - event.peers.mySeed().incRI(urls); - event.peers.mySeed().incRU(urls); - } else { - if (targetPeer != null) { - Network.log.logInfo("REMOTE SEARCH - no answer from remote peer " + targetPeer.hash + ":" + targetPeer.getName()); + int tmpoffset = 0; + int tmpcount = 10; + while (tmpoffset + tmpcount <= count) { + try { + event.rankingProcess.oneFeederStarted(); + int urls = Protocol.solrQuery( + event, + tmpoffset, + tmpcount, + tmpoffset == 0, + targetPeer, + blacklist); + if (urls >= 0) { + // urls is an array of url hashes. this is only used for log output + event.peers.mySeed().incRI(urls); + event.peers.mySeed().incRU(urls); + } else { + if (targetPeer != null) { + Network.log.logInfo("REMOTE SEARCH - no answer from remote peer " + targetPeer.hash + ":" + targetPeer.getName()); + } } + } catch (final Exception e) { + Log.logException(e); + } finally { + event.rankingProcess.oneFeederTerminated(); } - } catch (final Exception e) { - Log.logException(e); - } finally { - event.rankingProcess.oneFeederTerminated(); + tmpoffset += tmpcount; } } }; diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 58449951e..44006cbc3 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -430,7 +430,7 @@ public final class QueryParams { // construct query final SolrQuery params = new SolrQuery(); params.setParam("defType", "edismax"); - params.setParam("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^100000.0"); // a bost query that moves double content to the back + params.setParam("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^100000.0"); // a boost query that moves double content to the back params.setStart(this.offset); params.setRows(this.itemsPerPage); params.setFacet(false); diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index d99254809..2d4c908d9 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -878,7 +878,12 @@ public final class SearchEvent { } public ResultEntry oneResult(final int item, final long timeout) { - if (this.localsearch != null && this.localsearch.isAlive()) try {this.localsearch.join();} catch (InterruptedException e) {} + // if there is not yet a worker alive, start one + if (!anyWorkerAlive()) { + deployWorker(Math.min(SNIPPET_WORKER_THREADS, this.query.itemsPerPage), this.query.neededResults()); + } + // wait until local data is there + while (this.localsearch != null && this.localsearch.isAlive() && this.result.sizeAvailable() < item) try {this.localsearch.join(10);} catch (InterruptedException e) {} // check if we already retrieved this item // (happens if a search pages is accessed a second time) final long finishTime = System.currentTimeMillis() + timeout;