From 562183932b8d8e0634f20ecc2997b2163f832c91 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 26 Sep 2012 13:38:04 +0200 Subject: [PATCH] - removed ip_s from default profile since that needs a DNS lookup to create an document entry. This makes remote search much slower. - removed synchronization of add method if ip_s is activated to prevent that a user configuration causes bad behavior. The disadvantage of that is, that a index dump can cause data loss if an indexing is running during index dump - catched more exceptions and more NPE - better abstraction in MirrorSolrConnector - slight performance enhancement when only the index count is requested (rows=0 is sufficient to get a total count) --- defaults/solr.keys.list | 10 ++-- htroot/IndexFederated_p.java | 14 ++++-- .../solr/connector/EmbeddedSolrConnector.java | 1 + .../solr/connector/MirrorSolrConnector.java | 48 +++++++------------ .../solr/connector/SolrServerConnector.java | 2 +- source/net/yacy/peers/Protocol.java | 4 +- source/net/yacy/peers/RemoteSearch.java | 2 +- source/net/yacy/search/index/Fulltext.java | 22 +++++---- 8 files changed, 53 insertions(+), 50 deletions(-) diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list index ae4e97ed2..e3d951a6d 100644 --- a/defaults/solr.keys.list +++ b/defaults/solr.keys.list @@ -77,12 +77,12 @@ applinkscount_i ### optional but highly recommended values, not part of the index distribution process +## tags that are attached to crawls/index generation to separate the search result into user-defined subsets +collection_sxt + ## point in degrees of latitude,longitude as declared in WSG84, location coordinate_p -## ip of host of url (after DNS lookup), string -ip_s - ## content of author-tag, texgen author @@ -140,8 +140,8 @@ h6_txt ### optional values, not part of standard YaCy handling (but useful for external applications) -## tags that are attached to crawls/index generation to separate the search result into user-defined subsets -#collection_sxt +## ip of host of url (after DNS lookup), string +#ip_s ## tags of css entries, normalized with absolute URL #css_tag_txt diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java index 771c9b043..7c0eb6ef9 100644 --- a/htroot/IndexFederated_p.java +++ b/htroot/IndexFederated_p.java @@ -119,7 +119,11 @@ public class IndexFederated_p { if (solrRemoteWasOn && !solrRemoteIsOnAfterwards) { // switch off - sb.index.fulltext().disconnectRemoteSolr(); + try { + sb.index.fulltext().disconnectRemoteSolr(); + } catch (Throwable e) { + Log.logException(e); + } } if (solrRemoteIsOnAfterwards) { @@ -134,9 +138,13 @@ public class IndexFederated_p { } else { sb.index.fulltext().disconnectRemoteSolr(); } - } catch (final IOException e) { + } catch (final Throwable e) { Log.logException(e); - sb.index.fulltext().disconnectRemoteSolr(); + try { + sb.index.fulltext().disconnectRemoteSolr(); + } catch (Throwable ee) { + Log.logException(ee); + } } } diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java index abc700517..bb47d366a 100644 --- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java @@ -166,6 +166,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo @Override public QueryResponse query(SolrParams params) throws IOException { + if (this.server == null) throw new IOException("server disconnected"); try { return this.server.query(params); } catch (SolrServerException e) { diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index af54083d7..cde987811 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -205,18 +205,13 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo return false; } cacheMiss_Miss++; - if (this.solr0 != null) { - if (this.solr0.exists(id)) { - this.hitCache.put(id, EXIST); - cacheHit_Insert++; - return true; - } - } - if (this.solr1 != null) { - if (this.solr1.exists(id)) { - this.hitCache.put(id, EXIST); - cacheHit_Insert++; - return true; + for (SolrConnector solr: new SolrConnector[]{this.solr0, this.solr1}) { + if (solr != null) { + if (solr.exists(id)) { + this.hitCache.put(id, EXIST); + cacheHit_Insert++; + return true; + } } } this.missCache.put(id, EXIST); @@ -237,24 +232,17 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo return null; } cacheMiss_Miss++; - if (this.solr0 != null) { - doc = this.solr0.get(id); - if (doc != null) { - this.hitCache.put(id, EXIST); - cacheHit_Insert++; - this.documentCache.put(id, doc); - cacheDocument_Insert++; - return doc; - } - } - if (this.solr1 != null) { - doc = this.solr1.get(id); - if (doc != null) { - this.hitCache.put(id, EXIST); - cacheHit_Insert++; - this.documentCache.put(id, doc); - cacheDocument_Insert++; - return doc; + + for (SolrConnector solr: new SolrConnector[]{this.solr0, this.solr1}) { + if (solr != null) { + doc = solr.get(id); + if (doc != null) { + this.hitCache.put(id, EXIST); + cacheHit_Insert++; + this.documentCache.put(id, doc); + cacheDocument_Insert++; + return doc; + } } } this.missCache.put(id, EXIST); diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java index c9fd6f495..abe928e7d 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java @@ -227,7 +227,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen // construct query final SolrQuery params = new SolrQuery(); params.setQuery(querystring); - params.setRows(1); + params.setRows(0); params.setStart(0); // query the server diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 4ecd937b3..111e8a57b 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -1059,7 +1059,7 @@ public final class Protocol // evaluate result if (docList.size() > 0) {// create containers - Network.log.logInfo("SEARCH (solr), returned " + docList.size() + " documents from peer " + target.hash + ":" + target.getName()); + Network.log.logInfo("SEARCH (solr), returned " + docList.size() + " documents from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))) ; final List> container = new ArrayList>(wordhashes.size()); for (byte[] hash: wordhashes) { try { @@ -1087,7 +1087,7 @@ public final class Protocol if (localsearch) { Network.log.logInfo("local search (solr): filtered blacklisted url " + urlEntry.url()); } else { - Network.log.logInfo("remote search (solr): filtered blacklisted url " + urlEntry.url() + " from peer " + target.getName()); + Network.log.logInfo("remote search (solr): filtered blacklisted url " + urlEntry.url() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))); } } continue; // block with backlist diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 3613c7a7d..0574289cc 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -170,7 +170,7 @@ public class RemoteSearch extends Thread { for (Seed s: nodes) { solrRemoteSearch(event, count, time, s, blacklist); } - + // start search to YaCy peers final int targets = targetPeers.length; if (targets == 0) return; diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 8d3d194e7..50f96838b 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -293,11 +293,14 @@ public final class Fulltext implements Iterable { if (this.connectedSolr()) { try { if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); - synchronized (this.solr) { - SolrDocument sd = this.solr.get(id); - if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) { - this.solr.add(doc); - } + SolrDocument sd = this.solr.get(id); + if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) { + if (this.solrScheme.contains(YaCySchema.ip_s)) { + // ip_s needs a dns lookup which causes blockings during search here + this.solr.add(doc); + } else synchronized (this.solr) { + this.solr.add(doc); + } } } catch (SolrException e) { throw new IOException(e.getMessage(), e); @@ -336,9 +339,12 @@ public final class Fulltext implements Iterable { if (this.connectedSolr()) { try { if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); - synchronized (this.solr) { - SolrDocument sd = this.solr.get(id); - if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) { + SolrDocument sd = this.solr.get(id); + if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) { + if (this.solrScheme.contains(YaCySchema.ip_s)) { + // ip_s needs a dns lookup which causes blockings during search here + this.solr.add(getSolrScheme().metadata2solr(row)); + } else synchronized (this.solr) { this.solr.add(getSolrScheme().metadata2solr(row)); } }