From b3f75e48faa26c67c3750566604c1ee5797252a4 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 18 Mar 2009 20:21:19 +0000 Subject: [PATCH] - enhanced balancer: auto-solving of waiting-deadlocks - removed deprecated cache-init size value - more debug lines for IndexCell cache dump merge git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5728 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- defaults/yacy.init | 2 - htroot/PerformanceQueues_p.html | 9 ----- htroot/PerformanceQueues_p.java | 4 -- htroot/PerformanceQueues_p.xml | 1 - source/de/anomic/crawler/Balancer.java | 36 +++++++++++------ source/de/anomic/kelondro/blob/BLOBArray.java | 40 +++++++++---------- .../text/ReferenceContainerArray.java | 5 +++ .../de/anomic/plasma/plasmaSwitchboard.java | 3 +- .../plasma/plasmaSwitchboardConstants.java | 1 - 9 files changed, 49 insertions(+), 52 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 65be3fbda..c8cb23058 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -626,9 +626,7 @@ javastart_priority=10 # ram cache during indexing. When YaCy is shut down, this cache must be # flushed to disc; this may last some minutes. wordCacheMaxCount = 30000 -wordCacheInitCount = 30000 wordCacheMaxCount__pro = 100000 -wordCacheInitCount__pro = 100000 # Specifies if yacy can be used as transparent http proxy. # diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html index 1f2cfcd57..057c91b3c 100644 --- a/htroot/PerformanceQueues_p.html +++ b/htroot/PerformanceQueues_p.html @@ -122,15 +122,6 @@ flushed to disc; this may last some minutes. - - Initial space of words in cache: - - - - - This is is the init size of space for words in cache. - - diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index 5332a7aa1..7601a5829 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -230,9 +230,6 @@ public class PerformanceQueues_p { final int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 20000); switchboard.setConfig(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount)); switchboard.webIndex.index().setBufferMaxWordCount(wordCacheMaxCount); - - final int wordCacheInitCount = post.getInt(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000); - switchboard.setConfig(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, Integer.toString(wordCacheInitCount)); } if ((post != null) && (post.containsKey("poolConfig"))) { @@ -295,7 +292,6 @@ public class PerformanceQueues_p { prop.putNum("minAgeOfCache", switchboard.webIndex.index().getBufferMinAge() / 1000 / 60); // minutes prop.putNum("maxWaitingWordFlush", switchboard.getConfigLong("maxWaitingWordFlush", 180)); prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000)); - prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000)); prop.put("crawlPauseProxy", switchboard.getConfigLong(plasmaSwitchboardConstants.PROXY_ONLINE_CAUTION_DELAY, 30000)); prop.put("crawlPauseLocalsearch", switchboard.getConfigLong(plasmaSwitchboardConstants.LOCALSEACH_ONLINE_CAUTION_DELAY, 30000)); prop.put("crawlPauseRemotesearch", switchboard.getConfigLong(plasmaSwitchboardConstants.REMOTESEARCH_ONLINE_CAUTION_DELAY, 30000)); diff --git a/htroot/PerformanceQueues_p.xml b/htroot/PerformanceQueues_p.xml index a1f95c117..8535e76a9 100644 --- a/htroot/PerformanceQueues_p.xml +++ b/htroot/PerformanceQueues_p.xml @@ -30,7 +30,6 @@ #[maxAgeOfCache]# #[minAgeOfCache]# #[wordOutCacheMaxCount]# - #[wordCacheInitCount]# #[wordFlushSize]# diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java index 800b4977f..fbab98ae9 100644 --- a/source/de/anomic/crawler/Balancer.java +++ b/source/de/anomic/crawler/Balancer.java @@ -53,13 +53,14 @@ public class Balancer { private final ConcurrentHashMap> domainStacks; // a map from domain name part to Lists with url hashs private final ArrayList urlRAMStack; // a list that is flushed first - private Stack urlFileStack; // a file with url hashes - private ObjectIndex urlFileIndex; + private Stack urlFileStack; // a file with url hashes + private ObjectIndex urlFileIndex; private final File cacheStacksPath; private final String stackname; private boolean top; // to alternate between top and bottom of the file stack private long minimumLocalDelta; private long minimumGlobalDelta; + private long lastPrepare; public Balancer(final File cachePath, final String stackname, final boolean fullram, final long minimumLocalDelta, final long minimumGlobalDelta) { @@ -72,6 +73,7 @@ public class Balancer { this.top = true; this.minimumLocalDelta = minimumLocalDelta; this.minimumGlobalDelta = minimumGlobalDelta; + this.lastPrepare = System.currentTimeMillis(); // create a stack for newly entered entries if (!(cachePath.exists())) cachePath.mkdir(); // make the path @@ -577,6 +579,10 @@ public class Balancer { // this is only to protection against the worst case, where the crawler could // behave in a DoS-manner Log.logInfo("BALANCER", "forcing crawl-delay of " + sleeptime + " milliseconds for " + crawlEntry.url().getHost() + ((sleeptime > Math.max(minimumLocalDelta, minimumGlobalDelta)) ? " (caused by robots.txt)" : "")); + if (System.currentTimeMillis() - this.lastPrepare > 10000) { + prepare(100); + this.lastPrepare = System.currentTimeMillis(); + } try {synchronized(this) { this.wait(sleeptime); }} catch (final InterruptedException e) {} } @@ -595,9 +601,22 @@ public class Balancer { * @throws IOException */ public synchronized ArrayList top(int count) throws IOException { + // construct a list using the urlRAMStack which was filled with this procedure + count = prepare(count); + final ArrayList list = new ArrayList(); + for (int i = 0; i < count; i++) { + final String urlhash = urlRAMStack.get(i); + final Row.Entry entry = urlFileIndex.get(urlhash.getBytes()); + if (entry == null) break; + list.add(new CrawlEntry(entry)); + } + return list; + } + + private int prepare(int count) throws IOException { // if we need to flush anything, then flush the domain stack first, // to avoid that new urls get hidden by old entries from the file stack - if (urlRAMStack == null) return null; + if (urlRAMStack == null) return 0; // ensure that the domain stacks are filled enough shiftFileToDomStacks(count); @@ -614,16 +633,7 @@ public class Balancer { // if the ram is still not full enough, use the file stack shiftFileToRAM(count); - // finally, construct a list using the urlRAMStack which was filled with this procedure - count = Math.min(count, urlRAMStack.size()); - final ArrayList list = new ArrayList(); - for (int i = 0; i < count; i++) { - final String urlhash = urlRAMStack.get(i); - final Row.Entry entry = urlFileIndex.get(urlhash.getBytes()); - if (entry == null) break; - list.add(new CrawlEntry(entry)); - } - return list; + return Math.min(count, urlRAMStack.size()); } public synchronized Iterator iterator() throws IOException { diff --git a/source/de/anomic/kelondro/blob/BLOBArray.java b/source/de/anomic/kelondro/blob/BLOBArray.java index e03af11aa..bac514baa 100755 --- a/source/de/anomic/kelondro/blob/BLOBArray.java +++ b/source/de/anomic/kelondro/blob/BLOBArray.java @@ -136,7 +136,7 @@ public class BLOBArray implements BLOB { * @param location * @throws IOException */ - public void mountBLOB(File location) throws IOException { + public synchronized void mountBLOB(File location) throws IOException { Date d; try { d = DateFormatter.parseShortSecond(location.getName().substring(0, 14)); @@ -147,7 +147,7 @@ public class BLOBArray implements BLOB { blobs.add(new blobItem(d, location, oneBlob)); } - public void unmountBLOB(File location, boolean writeIDX) { + public synchronized void unmountBLOB(File location, boolean writeIDX) { Iterator i = this.blobs.iterator(); blobItem b; while (i.hasNext()) { @@ -160,7 +160,7 @@ public class BLOBArray implements BLOB { } } - public File unmountOldestBLOB() { + public synchronized File unmountOldestBLOB() { if (this.blobs.size() == 0) return null; blobItem b = this.blobs.remove(0); b.blob.close(false); @@ -171,7 +171,7 @@ public class BLOBArray implements BLOB { * return the number of BLOB files in this array * @return */ - public int entries() { + public synchronized int entries() { return this.blobs.size(); } @@ -181,7 +181,7 @@ public class BLOBArray implements BLOB { * @param creation * @return */ - public File newBLOB(Date creation) { + public synchronized File newBLOB(Date creation) { return new File(heapLocation, DateFormatter.formatShortSecond(creation) + "." + blobSalt + ".blob"); } @@ -221,9 +221,9 @@ public class BLOBArray implements BLOB { } /* - * return the size of the repository + * return the size of the repository (in bytes) */ - public long length() { + public synchronized long length() { long s = 0; for (int i = 0; i < blobs.size(); i++) s += blobs.get(i).location.length(); return s; @@ -262,16 +262,16 @@ public class BLOBArray implements BLOB { * clears the content of the database * @throws IOException */ - public void clear() throws IOException { + public synchronized void clear() throws IOException { for (blobItem bi: blobs) bi.blob.clear(); blobs.clear(); } /** - * ask for the number of entries + * ask for the number of blob entries * @return the number of entries in the table */ - public int size() { + public synchronized int size() { int s = 0; for (blobItem bi: blobs) s += bi.blob.size(); return s; @@ -284,7 +284,7 @@ public class BLOBArray implements BLOB { * @return * @throws IOException */ - public CloneableIterator keys(boolean up, boolean rotating) throws IOException { + public synchronized CloneableIterator keys(boolean up, boolean rotating) throws IOException { assert rotating = false; final List> c = new ArrayList>(blobs.size()); final Iterator i = blobs.iterator(); @@ -301,7 +301,7 @@ public class BLOBArray implements BLOB { * @return * @throws IOException */ - public CloneableIterator keys(boolean up, byte[] firstKey) throws IOException { + public synchronized CloneableIterator keys(boolean up, byte[] firstKey) throws IOException { final List> c = new ArrayList>(blobs.size()); final Iterator i = blobs.iterator(); while (i.hasNext()) { @@ -316,7 +316,7 @@ public class BLOBArray implements BLOB { * @return * @throws IOException */ - public boolean has(byte[] key) { + public synchronized boolean has(byte[] key) { for (blobItem bi: blobs) if (bi.blob.has(key)) return true; return false; } @@ -327,7 +327,7 @@ public class BLOBArray implements BLOB { * @return * @throws IOException */ - public byte[] get(byte[] key) throws IOException { + public synchronized byte[] get(byte[] key) throws IOException { byte[] b; for (blobItem bi: blobs) { b = bi.blob.get(key); @@ -343,7 +343,7 @@ public class BLOBArray implements BLOB { * @return * @throws IOException */ - public List getAll(byte[] key) throws IOException { + public synchronized List getAll(byte[] key) throws IOException { byte[] b; ArrayList l = new ArrayList(blobs.size()); for (blobItem bi: blobs) { @@ -359,7 +359,7 @@ public class BLOBArray implements BLOB { * @return the size of the BLOB or -1 if the BLOB does not exist * @throws IOException */ - public long length(byte[] key) throws IOException { + public synchronized long length(byte[] key) throws IOException { long l; for (blobItem bi: blobs) { l = bi.blob.length(key); @@ -374,7 +374,7 @@ public class BLOBArray implements BLOB { * @param b * @throws IOException */ - public void put(byte[] key, byte[] b) throws IOException { + public synchronized void put(byte[] key, byte[] b) throws IOException { blobItem bi = (blobs.size() == 0) ? null : blobs.get(blobs.size() - 1); if (bi == null) System.out.println("bi == null"); @@ -397,7 +397,7 @@ public class BLOBArray implements BLOB { * @param key the primary key * @throws IOException */ - public int replace(byte[] key, Rewriter rewriter) throws IOException { + public synchronized int replace(byte[] key, Rewriter rewriter) throws IOException { int d = 0; for (blobItem bi: blobs) { d += bi.blob.replace(key, rewriter); @@ -410,14 +410,14 @@ public class BLOBArray implements BLOB { * @param key the primary key * @throws IOException */ - public void remove(byte[] key) throws IOException { + public synchronized void remove(byte[] key) throws IOException { for (blobItem bi: blobs) bi.blob.remove(key); } /** * close the BLOB */ - public void close(boolean writeIDX) { + public synchronized void close(boolean writeIDX) { for (blobItem bi: blobs) bi.blob.close(writeIDX); blobs.clear(); blobs = null; diff --git a/source/de/anomic/kelondro/text/ReferenceContainerArray.java b/source/de/anomic/kelondro/text/ReferenceContainerArray.java index 05601d0df..3cb01717e 100644 --- a/source/de/anomic/kelondro/text/ReferenceContainerArray.java +++ b/source/de/anomic/kelondro/text/ReferenceContainerArray.java @@ -248,6 +248,9 @@ public final class ReferenceContainerArray { if (this.array.entries() < 2) return false; File f1 = this.array.unmountOldestBLOB(); File f2 = this.array.unmountOldestBLOB(); + System.out.println("*** DEBUG mergeOldest: vvvvvvvvv array has " + this.array.entries() + " entries vvvvvvvvv"); + System.out.println("*** DEBUG mergeOldest: unmounted " + f1.getName()); + System.out.println("*** DEBUG mergeOldest: unmounted " + f2.getName()); // iterate both files and write a new one CloneableIterator i1 = new blobFileEntries(f1, this.payloadrow); @@ -340,6 +343,8 @@ public final class ReferenceContainerArray { if (!f1.delete()) f1.deleteOnExit(); if (!f2.delete()) f2.deleteOnExit(); this.array.mountBLOB(newFile); + System.out.println("*** DEBUG mergeOldest: mounted " + newFile.getName()); + System.out.println("*** DEBUG mergeOldest: ^^^^^^^^^^^ array has " + this.array.entries() + " entries ^^^^^^^^^^^"); return true; } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 3ad99c491..fadbd6ad5 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -288,8 +288,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitchpublic static final String WORDCACHE_MAX_COUNT = "wordCacheMaxCount"

*

Name of the setting how many words the word-cache (or DHT-Out cache) shall contain maximal. Indexing pages if the