From d14114697cbc4280caad5dd70570a6f311d863e4 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sun, 21 Dec 2014 17:31:51 +0100 Subject: [PATCH] the miss cache does not seem to work, it sometimes contains urlhashes from documents which actually are inside the index. This can be reproduced using the crawl result table at http://localhost:8090/CrawlResults.html?process=5 The cache is temporary disabled to remove the bad behaviour, however a later reactivation of that feater may be possible. --- defaults/yacy.init | 6 +++++ .../ConcurrentUpdateSolrConnector.java | 26 +++++++++---------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 3e418ac15..055d0f99c 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -266,6 +266,12 @@ parser.mime.deny= parser.extensions.deny= parser.enableAudioTags=false +# experimental single-page parser for pdf files: split one pdf into individual pages; +# the key is the property name in the post arguments that gets a page number assigned, +# page numbers start with 1 +parser.pdf.individualpages=false +parser.pdf.individualpages.key=page + # Promotion Strings # These strings appear in the Web Mask of the YACY search client # Set these Strings to cusomize your peer and give any message to diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java index 984819beb..c5695f794 100644 --- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java @@ -29,9 +29,7 @@ import java.util.concurrent.BlockingQueue; import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.storage.ARC; -import net.yacy.cora.storage.ARH; import net.yacy.cora.storage.ConcurrentARC; -import net.yacy.cora.storage.ConcurrentARH; import net.yacy.cora.util.ConcurrentLog; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.util.MemoryControl; @@ -73,7 +71,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { private SolrConnector connector; private ARC metadataCache; - private final ARH missCache; + //private final ARH missCache; private final LinkedHashMap docBuffer; private CommitHandler processHandler; private final int updateCapacity; @@ -83,7 +81,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { this.connector = connector; this.updateCapacity = updateCapacity; this.metadataCache = new ConcurrentARC<>(idCacheCapacity, concurrency); - this.missCache = new ConcurrentARH<>(idCacheCapacity, concurrency); + //this.missCache = new ConcurrentARH<>(idCacheCapacity, concurrency); this.docBuffer = new LinkedHashMap<>(); this.processHandler = null; this.commitProcessRunning = true; @@ -130,17 +128,17 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { public void clearCaches() { this.connector.clearCaches(); this.metadataCache.clear(); - this.missCache.clear(); + //this.missCache.clear(); } private void updateCache(final String id, final LoadTimeURL md) { if (id == null) return; if (MemoryControl.shortStatus()) { this.metadataCache.clear(); - this.missCache.clear(); + //this.missCache.clear(); } this.metadataCache.put(id, md); - this.missCache.delete(id); + //this.missCache.delete(id); } public void ensureAliveProcessHandler() { @@ -200,13 +198,13 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { this.docBuffer.clear(); this.connector.clear(); this.metadataCache.clear(); - this.missCache.clear(); + //this.missCache.clear(); } @Override public synchronized void deleteById(String id) throws IOException { this.metadataCache.remove(id); - this.missCache.add(id); + //this.missCache.add(id); synchronized (this.docBuffer) { this.docBuffer.remove(id); } @@ -217,7 +215,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { public synchronized void deleteByIds(Collection ids) throws IOException { for (String id: ids) { this.metadataCache.remove(id); - this.missCache.add(id); + //this.missCache.add(id); } synchronized (this.docBuffer) { for (String id: ids) { @@ -240,7 +238,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { @Override public LoadTimeURL getLoadTimeURL(String id) throws IOException { - if (this.missCache.contains(id)) return null; + //if (this.missCache.contains(id)) return null; LoadTimeURL md = this.metadataCache.get(id); if (md != null) { //System.out.println("*** metadata cache hit; metadataCache.size() = " + metadataCache.size()); @@ -254,7 +252,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { return AbstractSolrConnector.getLoadTimeURL(doc); } md = this.connector.getLoadTimeURL(id); - if (md == null) {this.missCache.add(id); return null;} + if (md == null) {/*this.missCache.add(id);*/ return null;} updateCache(id, md); return md; } @@ -296,7 +294,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { @Override public SolrDocument getDocumentById(final String id, String... fields) throws IOException { assert id.length() == Word.commonHashLength : "wrong id: " + id; - if (this.missCache.contains(id)) return null; + //if (this.missCache.contains(id)) return null; SolrInputDocument idoc = this.docBuffer.get(id); if (idoc != null) { //System.out.println("*** docBuffer cache hit; docBuffer.size() = " + docBuffer.size()); @@ -305,7 +303,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { } SolrDocument solrdoc = this.connector.getDocumentById(id, AbstractSolrConnector.ensureEssentialFieldsIncluded(fields)); if (solrdoc == null) { - this.missCache.add(id); + //this.missCache.add(id); this.metadataCache.remove(id); } else { updateCache(id, AbstractSolrConnector.getLoadTimeURL(solrdoc));