diff --git a/htroot/IndexDeletion_p.java b/htroot/IndexDeletion_p.java index 31a17f580..e755df417 100644 --- a/htroot/IndexDeletion_p.java +++ b/htroot/IndexDeletion_p.java @@ -161,7 +161,7 @@ public class IndexDeletion_p { String regexquery = CollectionSchema.sku.getSolrFieldName() + ":/" + urldelete + "/"; if (simulate) { try { - count = (int) defaultConnector.getCountByQuery(regexquery); + count = (int) defaultConnector.getCountByQuery("{!cache=false}" + regexquery); } catch (final IOException e) { } prop.put("urldelete-active", count == 0 ? 2 : 1); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 72fa47dbc..3166e521c 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2558,7 +2558,7 @@ public final class Switchboard extends serverSwitch { throw new Parser.Failure("Parser returned null.", response.url()); } } catch (final Parser.Failure e ) { - this.log.warn("Unable to parse the resource '" + response.url() + "'. " + e.getMessage()); + this.log.warn("Unable to parse the resource '" + response.url().toNormalform(true) + "'. " + e.getMessage()); // create a new errorURL DB entry this.crawlQueues.errorURL.push(response.url(), response.depth(), response.profile(), FailCategory.FINAL_PROCESS_CONTEXT, e.getMessage(), -1); return null; diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index d271aa7c9..7bb610c61 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -401,7 +401,7 @@ public final class Fulltext { for (String hosthash: subset) { if (query.length() > 0) query.append(" OR "); //query.append(CollectionSchema.host_id_s.getSolrFieldName()).append(":\"").append(hosthash).append(":\""); - query.append("({!raw f=").append(fieldname).append('}').append(hosthash).append(")"); + query.append("({!cache=false raw f=").append(fieldname).append('}').append(hosthash).append(")"); } if (constraintQuery == null) connector.deleteByQuery(query.toString()); else connector.deleteByQuery("(" + query.toString() + ") AND " + constraintQuery); } catch (final IOException e) { diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 063858c09..b95a52656 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -275,7 +275,7 @@ public class Segment { if ((internalIDs.size() == 0 || !connectedCitation()) && Segment.this.fulltext.useWebgraph()) { // reqd the references from the webgraph SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector(); - BlockingQueue docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, false, WebgraphSchema.source_id_s.getSolrFieldName()); + BlockingQueue docs = webgraph.concurrentDocumentsByQuery("{!cache=false raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, false, WebgraphSchema.source_id_s.getSolrFieldName()); SolrDocument doc; try { while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) { diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index e98f3ab69..4be0b9b5f 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -1078,7 +1078,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C. // To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM; - long patchquerycount = collectionConnector.getCountByQuery(patchquery); + long patchquerycount = collectionConnector.getCountByQuery("{!cache=false}" + patchquery); BlockingQueue documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, 20, 1, true, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName()); SolrDocument doc_B; @@ -1172,7 +1172,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri postprocessingActivity = "writing cr values to webgraph for host " + host; ConcurrentLog.info("CollectionConfiguration", postprocessingActivity); String patchquery = WebgraphSchema.source_host_s.getSolrFieldName() + ":\"" + host + "\" AND " + WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM; - final long count = segment.fulltext().getWebgraphConnector().getCountByQuery(patchquery); + final long count = segment.fulltext().getWebgraphConnector().getCountByQuery("{!cache=false}" + patchquery); int concurrency = Math.min((int) count, Math.max(1, Runtime.getRuntime().availableProcessors() / 4)); ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the webgraph, concurrency = " + concurrency); final BlockingQueue docs = segment.fulltext().getWebgraphConnector().concurrentDocumentsByQuery( @@ -1587,7 +1587,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri doccountterm.addOperand(new Negation(new StringLiteral(CollectionSchema.id, urlhash))); doccountterm.addOperand(new StringLiteral(CollectionSchema.host_id_s, hostid)); doccountterm.addOperand(new LongLiteral(signaturefield, signature)); - long doccount = segment.fulltext().getDefaultConnector().getCountByQuery(doccountterm.toString()); + long doccount = segment.fulltext().getDefaultConnector().getCountByQuery("{!cache=false}" + doccountterm.toString()); sid.setField(uniquefield.getSolrFieldName(), doccount == 0); } catch (final IOException e) {} } @@ -1682,7 +1682,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri this.crt = new ConcurrentHashMap(); try { // select all documents for each host - BlockingQueue ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, 86400000, 200, 1); + BlockingQueue ids = connector.concurrentIDsByQuery("{!cache=false raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, 86400000, 200, 1); String id; while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) { this.crt.put(id, new double[]{0.0d,0.0d}); //{old value, new value}