diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java index f5f04d92a..dc35a8b71 100644 --- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java @@ -152,8 +152,10 @@ public abstract class AbstractSolrConnector implements SolrConnector { final int buffersize, final int concurrency, final String ... fields) { + assert buffersize > 0; final BlockingQueue queue = buffersize <= 0 ? new LinkedBlockingQueue() : new ArrayBlockingQueue(buffersize); final long endtime = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; // we know infinity! + final int ps = Math.min(pagesize, buffersize); final Thread t = new Thread() { @Override public void run() { @@ -162,12 +164,12 @@ public abstract class AbstractSolrConnector implements SolrConnector { int count = 0; while (System.currentTimeMillis() < endtime && count < maxcount) { try { - SolrDocumentList sdl = getDocumentListByQuery(querystring, sort, o, Math.min(maxcount, pagesize), fields); + SolrDocumentList sdl = getDocumentListByQuery(querystring, sort, o, Math.min(maxcount, ps), fields); for (SolrDocument d: sdl) { try {queue.put(d);} catch (final InterruptedException e) {break;} count++; } - if (sdl.size() < pagesize) { + if (sdl.size() < ps) { //System.out.println("sdl.size() = " + sdl.size() + ", pagesize = " + pagesize); break; } diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java index f14964390..e9f69fdc2 100644 --- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java @@ -239,18 +239,24 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo DocList response = resultContext == null ? new DocSlice(0, 0, new int[0], new float[0], 0, 0.0f) : resultContext.docs; sdl.setNumFound(response == null ? 0 : response.matches()); sdl.setStart(response == null ? 0 : response.offset()); + String originalName = Thread.currentThread().getName(); if (response != null) { try { SolrIndexSearcher searcher = req.getSearcher(); final int responseCount = response.size(); DocIterator iterator = response.iterator(); for (int i = 0; i < responseCount; i++) { - sdl.add(doc2SolrDoc(searcher.doc(iterator.nextDoc(), (Set) null))); + int docid = iterator.nextDoc(); + Thread.currentThread().setName("EmbeddedSolrConnector.SolrQueryResponse2SolrDocumentList: " + docid); + Document responsedoc = searcher.doc(docid, (Set) null); + SolrDocument sordoc = doc2SolrDoc(responsedoc); + sdl.add(sordoc); } } catch (IOException e) { ConcurrentLog.logException(e); } } + Thread.currentThread().setName(originalName); return sdl; } diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index adc8531dc..88b3ddf5f 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -762,7 +762,7 @@ public final class Protocol { // the search-result-url transports all the attributes of word indexes if ( !Base64Order.enhancedCoder.equal(entry.urlhash(), urlEntry.hash()) ) { - Network.log.info("remote search: url-hash " + ASCII.String(urlEntry.hash()) + " does not belong to word-attached-hash " + ASCII.String(entry.urlhash()) + "; url = " + urlEntry.url() + " from peer " + target.getName()); + Network.log.info("remote search: url-hash " + ASCII.String(urlEntry.hash()) + " does not belong to word-attached-hash " + ASCII.String(entry.urlhash()) + "; url = " + urlEntry.url().toNormalform(true) + " from peer " + target.getName()); continue; // spammed } diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index bec249e35..6e5114f63 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -1055,7 +1055,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM; long patchquerycount = collectionConnector.getCountByQuery(patchquery); - BlockingQueue documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, 200, 1, + BlockingQueue documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, 20, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName()); SolrDocument doc_B; int patchquerycountcheck = 0; @@ -1151,7 +1151,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri final long count = segment.fulltext().getWebgraphConnector().getCountByQuery(patchquery); int concurrency = Math.min((int) count, Math.max(1, Runtime.getRuntime().availableProcessors() / 4)); ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the webgraph, concurrency = " + concurrency); - final BlockingQueue docs = segment.fulltext().getWebgraphConnector().concurrentDocumentsByQuery(patchquery, WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, 200, concurrency); + final BlockingQueue docs = segment.fulltext().getWebgraphConnector().concurrentDocumentsByQuery(patchquery, WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, concurrency + 1, concurrency); final AtomicInteger proccount = new AtomicInteger(0); Thread[] t = new Thread[concurrency]; for (final AtomicInteger i = new AtomicInteger(0); i.get() < t.length; i.incrementAndGet()) { @@ -1235,7 +1235,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri omitFields.add(CollectionSchema.harvestkey_s.getSolrFieldName()); final long count = collectionConnector.getCountByQuery(collection1query); final long start = System.currentTimeMillis(); - final int concurrency = Runtime.getRuntime().availableProcessors(); + final int concurrency = Math.max(1, Math.min((int) (MemoryControl.available() / (100L * 1024L * 1024L)), Runtime.getRuntime().availableProcessors())); + //final int concurrency = 1; final boolean reference_computation = this.contains(CollectionSchema.references_i) && this.contains(CollectionSchema.references_internal_i) && this.contains(CollectionSchema.references_external_i) && @@ -1248,7 +1249,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri CollectionSchema.host_subdomain_s.getSolrFieldName() + " asc," + // sort on subdomain to get hosts without subdomain first; that gives an opportunity to set www_unique_b flag to false CollectionSchema.url_protocol_s.getSolrFieldName() + " asc" // sort on protocol to get http before https; that gives an opportunity to set http_unique_b flag to false : null, // null sort is faster! - 0, 100000000, Long.MAX_VALUE, 100, concurrency); + 0, 100000000, Long.MAX_VALUE, concurrency + 1, concurrency); final AtomicInteger proccount = new AtomicInteger(); final AtomicInteger proccount_referencechange = new AtomicInteger(); final AtomicInteger proccount_citationchange = new AtomicInteger();