From d8e79731df7b2778b31cec1a4c1cd6c0133b4d96 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 25 Feb 2014 13:38:39 +0100 Subject: [PATCH 1/5] fixed wrong used memory display --- htroot/Status.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htroot/Status.java b/htroot/Status.java index f8f773e3e..324fcc7ba 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -318,7 +318,7 @@ public class Status } // memory usage and system attributes - prop.put("usedMemory", Formatter.bytesToString(MemoryControl.total())); + prop.put("usedMemory", Formatter.bytesToString(MemoryControl.used())); prop.put("maxMemory", Formatter.bytesToString(MemoryControl.maxMemory())); prop.put("usedDisk", Formatter.bytesToString(sb.observer.getSizeOfDataPath(true))); prop.put("freeDisk", Formatter.bytesToString(sb.observer.getUsableSpace())); From de8f7994ab6b4e64df88d0ccfaec7a97195a33fa Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 25 Feb 2014 14:17:33 +0100 Subject: [PATCH 2/5] as crawling has a low-cpu demand, we want it to run even if the CPU load is VERY high. This applies also if the CPU load is high because of in-cache crawling; in that case we want to experience a high-CPU load as much as possible --- defaults/yacy.init | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index e073a959f..c9af37465 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -613,7 +613,7 @@ collection=user 50_localcrawl_idlesleep=2000 50_localcrawl_busysleep=10 50_localcrawl_memprereq=12582912 -50_localcrawl_loadprereq=8.0 +50_localcrawl_loadprereq=32.0 50_localcrawl_isPaused=false 60_remotecrawlloader_idlesleep=4000 60_remotecrawlloader_busysleep=800 From 9f6be762a6657a6c8495b2f0341cc650614d7f7c Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 25 Feb 2014 14:37:30 +0100 Subject: [PATCH 3/5] - better logging for postprocessing - fixed collection bug in postprocessing --- source/net/yacy/search/Switchboard.java | 2 +- .../schema/CollectionConfiguration.java | 26 ++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index f65c3dfb1..6aebd9c49 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2057,7 +2057,7 @@ public final class Switchboard extends serverSwitch { // set a random password if no password is configured if ( getConfigBool(SwitchboardConstants.ADMIN_ACCOUNT_FOR_LOCALHOST, false) && getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").isEmpty() ) { - // make a 'random' password + // make a 'random' password, this will keep the ability to log in from localhost without password setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "0000" + this.genRandomPassword()); setConfig(SwitchboardConstants.ADMIN_ACCOUNT, ""); } diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 2ae516356..81f44784b 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -923,10 +923,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // collect hosts from index which shall take part in citation computation String query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + - CollectionSchema.process_sxt.getSolrFieldName() + ":" + ProcessType.CITATION.toString(); + CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]"; ReversibleScoreMap hostscore; try { - hostscore = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName()); + Map> hostfacet = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName()); + hostscore = hostfacet.get(CollectionSchema.host_s.getSolrFieldName()); } catch (final IOException e2) { ConcurrentLog.logException(e2); hostscore = new ClusteredScoreMap(); @@ -988,6 +989,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri CRHost crh = new CRHost(segment, rrCache, host, 0.85d, 6); int convergence_attempts = 0; while (convergence_attempts++ < 30) { + ConcurrentLog.info("CollectionConfiguration", "convergence step " + convergence_attempts + " for host " + host + " ..."); if (crh.convergenceStep()) break; } ConcurrentLog.info("CollectionConfiguration", "convergence for host " + host + " after " + convergence_attempts + " steps"); @@ -1005,8 +1007,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // process all documents at the webgraph for the outgoing links of this document SolrDocument doc; + int allcount = 0; if (segment.fulltext().useWebgraph()) { try { + int proccount = 0; + long start = System.currentTimeMillis(); for (String host: hostscore.keyList(true)) { if (hostscore.get(host) <= 0) continue; // select all webgraph edges and modify their cr value @@ -1041,7 +1046,13 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri ConcurrentLog.logException(e); } countcheck++; + proccount++; allcount++; + if (proccount % 1000 == 0) ConcurrentLog.info( + "CollectionConfiguration", "webgraph - postprocessed " + proccount + " from " + count + " documents; " + + (proccount * 1000 / (System.currentTimeMillis() - start)) + " docs/second; " + + ((System.currentTimeMillis() - start) * (count - proccount) / proccount / 60000) + " minutes remaining"); } + if (count != countcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous webgraph document count for host " + host + ": expected=" + count + ", counted=" + countcheck); } } catch (final IOException e2) { @@ -1054,10 +1065,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // process all documents in collection query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]"; - int proccount = 0, proccount_clickdepthchange = 0, proccount_referencechange = 0, proccount_citationchange = 0, proccount_uniquechange = 0; Map hostExtentCache = new HashMap(); // a mapping from the host id to the number of documents which contain this host-id Set uniqueURLs = new HashSet(); try { + int proccount = 0, proccount_clickdepthchange = 0, proccount_referencechange = 0, proccount_citationchange = 0, proccount_uniquechange = 0; long count = collectionConnector.getCountByQuery(query); long start = System.currentTimeMillis(); ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the collection for harvestkey " + harvestkey); @@ -1119,8 +1130,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri //connector.deleteById(ASCII.String(id)); collectionConnector.add(sid); - proccount++; - if (proccount % 100 == 0) ConcurrentLog.info("CollectionConfiguration", "postprocessed " + proccount + " from " + count + " documents; " + (proccount * 1000 / (System.currentTimeMillis() - start)) + " docs/second; " + ((System.currentTimeMillis() - start) * (count - proccount) / proccount / 60000) + " minutes remaining"); + proccount++; allcount++; + if (proccount % 100 == 0) ConcurrentLog.info( + "CollectionConfiguration", "collection - postprocessed " + proccount + " from " + count + " documents; " + + (proccount * 1000 / (System.currentTimeMillis() - start)) + " docs/second; " + + ((System.currentTimeMillis() - start) * (count - proccount) / proccount / 60000) + " minutes remaining"); } catch (final Throwable e1) { ConcurrentLog.logException(e1); failids.add(i); @@ -1142,7 +1156,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri } catch (IOException e3) { ConcurrentLog.warn("CollectionConfiguration", e3.getMessage(), e3); } - return proccount; + return allcount; } private static final class CRV { From 907db8b7a6c6ab4daa37ffcf47ec14ff8bdba832 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 25 Feb 2014 15:19:04 +0100 Subject: [PATCH 4/5] fix for bad query shortcut hack --- .../solr/connector/AbstractSolrConnector.java | 3 ++- .../federate/solr/connector/CachedSolrConnector.java | 6 +++++- .../solr/connector/ConcurrentUpdateSolrConnector.java | 11 +++++++++++ .../federate/solr/connector/MirrorSolrConnector.java | 6 +++++- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java index 0aaad4f2e..521433886 100644 --- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java @@ -37,6 +37,7 @@ import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.sorting.ClusteredScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.util.LookAheadIterator; +import net.yacy.kelondro.data.word.Word; import net.yacy.search.schema.CollectionSchema; import org.apache.solr.client.solrj.SolrQuery; @@ -336,8 +337,8 @@ public abstract class AbstractSolrConnector implements SolrConnector { @Override public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException { + assert id.length() == Word.commonHashLength : "wrong id: " + id; final SolrQuery query = new SolrQuery(); - assert id.length() == 12; // construct query StringBuilder sb = new StringBuilder(23); sb.append("{!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id); diff --git a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java index cf28b3021..deb6a5096 100644 --- a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java @@ -27,6 +27,7 @@ import java.util.Map; import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.storage.ARC; import net.yacy.cora.storage.ConcurrentARC; +import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.util.MemoryControl; import net.yacy.search.schema.CollectionSchema; @@ -140,6 +141,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo @Override public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException { + assert id.length() == Word.commonHashLength : "wrong id: " + id; String q = idQuery(id); SolrDocument doc = fields.length == 0 ? this.documentCache.get(q) : null; if (doc != null) { @@ -209,7 +211,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo */ @Override public SolrDocumentList getDocumentListByQuery(final String querystring, final int offset, final int count, final String ... fields) throws IOException { - if (offset == 0 && count == 1 && querystring.startsWith("id:")) { + if (offset == 0 && count == 1 && querystring.startsWith("id:") && + ((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') || + querystring.length() == 15)) { final SolrDocumentList list = new SolrDocumentList(); SolrDocument doc = getDocumentById(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields); list.add(doc); diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java index 5ebb3c7c2..feee3484e 100644 --- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java @@ -35,6 +35,7 @@ import net.yacy.cora.storage.ARH; import net.yacy.cora.storage.ConcurrentARC; import net.yacy.cora.storage.ConcurrentARH; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.util.MemoryControl; import net.yacy.search.schema.CollectionSchema; @@ -411,6 +412,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { @Override public SolrDocument getDocumentById(final String id, String... fields) throws IOException { + assert id.length() == Word.commonHashLength : "wrong id: " + id; if (this.missCache.contains(id)) return null; if (existIdFromDeleteQueue(id)) return null; SolrInputDocument idoc = getFromUpdateQueue(id); @@ -443,6 +445,15 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { @Override public SolrDocumentList getDocumentListByQuery(String querystring, int offset, int count, String... fields) throws IOException, SolrException { + if (offset == 0 && count == 1 && querystring.startsWith("id:") && + ((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') || + querystring.length() == 15)) { + final SolrDocumentList list = new SolrDocumentList(); + SolrDocument doc = getDocumentById(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields); + list.add(doc); + return list; + } + SolrDocumentList sdl = this.connector.getDocumentListByQuery(querystring, offset, count, AbstractSolrConnector.ensureEssentialFieldsIncluded(fields)); /* Iterator i = sdl.iterator(); diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index a745a0841..2d42d9039 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -28,6 +28,7 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.atomic.AtomicLong; import net.yacy.cora.sorting.ReversibleScoreMap; +import net.yacy.kelondro.data.word.Word; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; @@ -172,6 +173,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo @Override public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException { + assert key.length() == Word.commonHashLength : "wrong id: " + key; SolrDocument doc; if ((solr0 != null && ((doc = solr0.getDocumentById(key, fields)) != null)) || (solr1 != null && ((doc = solr1.getDocumentById(key, fields)) != null))) { return doc; @@ -205,7 +207,9 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo @Override public SolrDocumentList getDocumentListByQuery(final String querystring, final int offset, final int count, final String ... fields) throws IOException { if (this.solr0 == null && this.solr1 == null) return new SolrDocumentList(); - if (offset == 0 && count == 1 && querystring.startsWith("id:")) { + if (offset == 0 && count == 1 && querystring.startsWith("id:") && + ((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') || + querystring.length() == 15)) { final SolrDocumentList list = new SolrDocumentList(); SolrDocument doc = getDocumentById(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields); list.add(doc); From c57026e24266912919fd0d053d8bb93cdb572441 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 25 Feb 2014 15:23:45 +0100 Subject: [PATCH 5/5] recover from OOM --- .../solr/connector/ConcurrentUpdateSolrConnector.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java index feee3484e..ea97b893b 100644 --- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java @@ -108,6 +108,14 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { //ConcurrentLog.info("ConcurrentUpdateSolrConnector", "sending " + docs.size() + " documents to solr"); try { ConcurrentUpdateSolrConnector.this.connector.add(docs); + } catch (final OutOfMemoryError e) { + // clear and try again... + clearCaches(); + try { + ConcurrentUpdateSolrConnector.this.connector.add(docs); + } catch (final IOException ee) { + ConcurrentLog.logException(e); + } } catch (final IOException e) { ConcurrentLog.logException(e); }