- enhanced balancer: auto-solving of waiting-deadlocks

- removed deprecated cache-init size value
- more debug lines for IndexCell cache dump merge

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5728 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 9a90ea05e0
commit b3f75e48fa

@ -626,9 +626,7 @@ javastart_priority=10
# ram cache during indexing. When YaCy is shut down, this cache must be # ram cache during indexing. When YaCy is shut down, this cache must be
# flushed to disc; this may last some minutes. # flushed to disc; this may last some minutes.
wordCacheMaxCount = 30000 wordCacheMaxCount = 30000
wordCacheInitCount = 30000
wordCacheMaxCount__pro = 100000 wordCacheMaxCount__pro = 100000
wordCacheInitCount__pro = 100000
# Specifies if yacy can be used as transparent http proxy. # Specifies if yacy can be used as transparent http proxy.
# #

@ -122,15 +122,6 @@
flushed to disc; this may last some minutes. flushed to disc; this may last some minutes.
</td> </td>
</tr> </tr>
<tr valign="top" class="TableCellDark">
<td>Initial space of words in cache:</td>
<td>
<input name="wordCacheInitCount" type="text" size="10" maxlength="100" value="#[wordCacheInitCount]#" />
</td>
<td>
This is is the init size of space for words in cache.
</td>
</tr>
<tr valign="top" class="TableCellLight"> <tr valign="top" class="TableCellLight">
<td colspan="4"> <td colspan="4">
<input type="submit" name="cacheSizeSubmit" value="Enter New Cache Size" /> <input type="submit" name="cacheSizeSubmit" value="Enter New Cache Size" />

@ -230,9 +230,6 @@ public class PerformanceQueues_p {
final int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 20000); final int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 20000);
switchboard.setConfig(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount)); switchboard.setConfig(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount));
switchboard.webIndex.index().setBufferMaxWordCount(wordCacheMaxCount); switchboard.webIndex.index().setBufferMaxWordCount(wordCacheMaxCount);
final int wordCacheInitCount = post.getInt(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000);
switchboard.setConfig(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, Integer.toString(wordCacheInitCount));
} }
if ((post != null) && (post.containsKey("poolConfig"))) { if ((post != null) && (post.containsKey("poolConfig"))) {
@ -295,7 +292,6 @@ public class PerformanceQueues_p {
prop.putNum("minAgeOfCache", switchboard.webIndex.index().getBufferMinAge() / 1000 / 60); // minutes prop.putNum("minAgeOfCache", switchboard.webIndex.index().getBufferMinAge() / 1000 / 60); // minutes
prop.putNum("maxWaitingWordFlush", switchboard.getConfigLong("maxWaitingWordFlush", 180)); prop.putNum("maxWaitingWordFlush", switchboard.getConfigLong("maxWaitingWordFlush", 180));
prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000)); prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000));
prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000));
prop.put("crawlPauseProxy", switchboard.getConfigLong(plasmaSwitchboardConstants.PROXY_ONLINE_CAUTION_DELAY, 30000)); prop.put("crawlPauseProxy", switchboard.getConfigLong(plasmaSwitchboardConstants.PROXY_ONLINE_CAUTION_DELAY, 30000));
prop.put("crawlPauseLocalsearch", switchboard.getConfigLong(plasmaSwitchboardConstants.LOCALSEACH_ONLINE_CAUTION_DELAY, 30000)); prop.put("crawlPauseLocalsearch", switchboard.getConfigLong(plasmaSwitchboardConstants.LOCALSEACH_ONLINE_CAUTION_DELAY, 30000));
prop.put("crawlPauseRemotesearch", switchboard.getConfigLong(plasmaSwitchboardConstants.REMOTESEARCH_ONLINE_CAUTION_DELAY, 30000)); prop.put("crawlPauseRemotesearch", switchboard.getConfigLong(plasmaSwitchboardConstants.REMOTESEARCH_ONLINE_CAUTION_DELAY, 30000));

@ -30,7 +30,6 @@
<maxAgeOfCache>#[maxAgeOfCache]#</maxAgeOfCache> <maxAgeOfCache>#[maxAgeOfCache]#</maxAgeOfCache>
<minAgeOfCache>#[minAgeOfCache]#</minAgeOfCache> <minAgeOfCache>#[minAgeOfCache]#</minAgeOfCache>
<wordCacheMaxCount>#[wordOutCacheMaxCount]#</wordCacheMaxCount> <wordCacheMaxCount>#[wordOutCacheMaxCount]#</wordCacheMaxCount>
<wordCacheInitCount>#[wordCacheInitCount]#</wordCacheInitCount>
<wordFlushSize>#[wordFlushSize]#</wordFlushSize> <wordFlushSize>#[wordFlushSize]#</wordFlushSize>
</Cache> </Cache>
<ThreadPools> <ThreadPools>

@ -53,13 +53,14 @@ public class Balancer {
private final ConcurrentHashMap<String, LinkedList<String>> private final ConcurrentHashMap<String, LinkedList<String>>
domainStacks; // a map from domain name part to Lists with url hashs domainStacks; // a map from domain name part to Lists with url hashs
private final ArrayList<String> urlRAMStack; // a list that is flushed first private final ArrayList<String> urlRAMStack; // a list that is flushed first
private Stack urlFileStack; // a file with url hashes private Stack urlFileStack; // a file with url hashes
private ObjectIndex urlFileIndex; private ObjectIndex urlFileIndex;
private final File cacheStacksPath; private final File cacheStacksPath;
private final String stackname; private final String stackname;
private boolean top; // to alternate between top and bottom of the file stack private boolean top; // to alternate between top and bottom of the file stack
private long minimumLocalDelta; private long minimumLocalDelta;
private long minimumGlobalDelta; private long minimumGlobalDelta;
private long lastPrepare;
public Balancer(final File cachePath, final String stackname, final boolean fullram, public Balancer(final File cachePath, final String stackname, final boolean fullram,
final long minimumLocalDelta, final long minimumGlobalDelta) { final long minimumLocalDelta, final long minimumGlobalDelta) {
@ -72,6 +73,7 @@ public class Balancer {
this.top = true; this.top = true;
this.minimumLocalDelta = minimumLocalDelta; this.minimumLocalDelta = minimumLocalDelta;
this.minimumGlobalDelta = minimumGlobalDelta; this.minimumGlobalDelta = minimumGlobalDelta;
this.lastPrepare = System.currentTimeMillis();
// create a stack for newly entered entries // create a stack for newly entered entries
if (!(cachePath.exists())) cachePath.mkdir(); // make the path if (!(cachePath.exists())) cachePath.mkdir(); // make the path
@ -577,6 +579,10 @@ public class Balancer {
// this is only to protection against the worst case, where the crawler could // this is only to protection against the worst case, where the crawler could
// behave in a DoS-manner // behave in a DoS-manner
Log.logInfo("BALANCER", "forcing crawl-delay of " + sleeptime + " milliseconds for " + crawlEntry.url().getHost() + ((sleeptime > Math.max(minimumLocalDelta, minimumGlobalDelta)) ? " (caused by robots.txt)" : "")); Log.logInfo("BALANCER", "forcing crawl-delay of " + sleeptime + " milliseconds for " + crawlEntry.url().getHost() + ((sleeptime > Math.max(minimumLocalDelta, minimumGlobalDelta)) ? " (caused by robots.txt)" : ""));
if (System.currentTimeMillis() - this.lastPrepare > 10000) {
prepare(100);
this.lastPrepare = System.currentTimeMillis();
}
try {synchronized(this) { this.wait(sleeptime); }} catch (final InterruptedException e) {} try {synchronized(this) { this.wait(sleeptime); }} catch (final InterruptedException e) {}
} }
@ -595,9 +601,22 @@ public class Balancer {
* @throws IOException * @throws IOException
*/ */
public synchronized ArrayList<CrawlEntry> top(int count) throws IOException { public synchronized ArrayList<CrawlEntry> top(int count) throws IOException {
// construct a list using the urlRAMStack which was filled with this procedure
count = prepare(count);
final ArrayList<CrawlEntry> list = new ArrayList<CrawlEntry>();
for (int i = 0; i < count; i++) {
final String urlhash = urlRAMStack.get(i);
final Row.Entry entry = urlFileIndex.get(urlhash.getBytes());
if (entry == null) break;
list.add(new CrawlEntry(entry));
}
return list;
}
private int prepare(int count) throws IOException {
// if we need to flush anything, then flush the domain stack first, // if we need to flush anything, then flush the domain stack first,
// to avoid that new urls get hidden by old entries from the file stack // to avoid that new urls get hidden by old entries from the file stack
if (urlRAMStack == null) return null; if (urlRAMStack == null) return 0;
// ensure that the domain stacks are filled enough // ensure that the domain stacks are filled enough
shiftFileToDomStacks(count); shiftFileToDomStacks(count);
@ -614,16 +633,7 @@ public class Balancer {
// if the ram is still not full enough, use the file stack // if the ram is still not full enough, use the file stack
shiftFileToRAM(count); shiftFileToRAM(count);
// finally, construct a list using the urlRAMStack which was filled with this procedure return Math.min(count, urlRAMStack.size());
count = Math.min(count, urlRAMStack.size());
final ArrayList<CrawlEntry> list = new ArrayList<CrawlEntry>();
for (int i = 0; i < count; i++) {
final String urlhash = urlRAMStack.get(i);
final Row.Entry entry = urlFileIndex.get(urlhash.getBytes());
if (entry == null) break;
list.add(new CrawlEntry(entry));
}
return list;
} }
public synchronized Iterator<CrawlEntry> iterator() throws IOException { public synchronized Iterator<CrawlEntry> iterator() throws IOException {

@ -136,7 +136,7 @@ public class BLOBArray implements BLOB {
* @param location * @param location
* @throws IOException * @throws IOException
*/ */
public void mountBLOB(File location) throws IOException { public synchronized void mountBLOB(File location) throws IOException {
Date d; Date d;
try { try {
d = DateFormatter.parseShortSecond(location.getName().substring(0, 14)); d = DateFormatter.parseShortSecond(location.getName().substring(0, 14));
@ -147,7 +147,7 @@ public class BLOBArray implements BLOB {
blobs.add(new blobItem(d, location, oneBlob)); blobs.add(new blobItem(d, location, oneBlob));
} }
public void unmountBLOB(File location, boolean writeIDX) { public synchronized void unmountBLOB(File location, boolean writeIDX) {
Iterator<blobItem> i = this.blobs.iterator(); Iterator<blobItem> i = this.blobs.iterator();
blobItem b; blobItem b;
while (i.hasNext()) { while (i.hasNext()) {
@ -160,7 +160,7 @@ public class BLOBArray implements BLOB {
} }
} }
public File unmountOldestBLOB() { public synchronized File unmountOldestBLOB() {
if (this.blobs.size() == 0) return null; if (this.blobs.size() == 0) return null;
blobItem b = this.blobs.remove(0); blobItem b = this.blobs.remove(0);
b.blob.close(false); b.blob.close(false);
@ -171,7 +171,7 @@ public class BLOBArray implements BLOB {
* return the number of BLOB files in this array * return the number of BLOB files in this array
* @return * @return
*/ */
public int entries() { public synchronized int entries() {
return this.blobs.size(); return this.blobs.size();
} }
@ -181,7 +181,7 @@ public class BLOBArray implements BLOB {
* @param creation * @param creation
* @return * @return
*/ */
public File newBLOB(Date creation) { public synchronized File newBLOB(Date creation) {
return new File(heapLocation, DateFormatter.formatShortSecond(creation) + "." + blobSalt + ".blob"); return new File(heapLocation, DateFormatter.formatShortSecond(creation) + "." + blobSalt + ".blob");
} }
@ -221,9 +221,9 @@ public class BLOBArray implements BLOB {
} }
/* /*
* return the size of the repository * return the size of the repository (in bytes)
*/ */
public long length() { public synchronized long length() {
long s = 0; long s = 0;
for (int i = 0; i < blobs.size(); i++) s += blobs.get(i).location.length(); for (int i = 0; i < blobs.size(); i++) s += blobs.get(i).location.length();
return s; return s;
@ -262,16 +262,16 @@ public class BLOBArray implements BLOB {
* clears the content of the database * clears the content of the database
* @throws IOException * @throws IOException
*/ */
public void clear() throws IOException { public synchronized void clear() throws IOException {
for (blobItem bi: blobs) bi.blob.clear(); for (blobItem bi: blobs) bi.blob.clear();
blobs.clear(); blobs.clear();
} }
/** /**
* ask for the number of entries * ask for the number of blob entries
* @return the number of entries in the table * @return the number of entries in the table
*/ */
public int size() { public synchronized int size() {
int s = 0; int s = 0;
for (blobItem bi: blobs) s += bi.blob.size(); for (blobItem bi: blobs) s += bi.blob.size();
return s; return s;
@ -284,7 +284,7 @@ public class BLOBArray implements BLOB {
* @return * @return
* @throws IOException * @throws IOException
*/ */
public CloneableIterator<byte[]> keys(boolean up, boolean rotating) throws IOException { public synchronized CloneableIterator<byte[]> keys(boolean up, boolean rotating) throws IOException {
assert rotating = false; assert rotating = false;
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(blobs.size()); final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(blobs.size());
final Iterator<blobItem> i = blobs.iterator(); final Iterator<blobItem> i = blobs.iterator();
@ -301,7 +301,7 @@ public class BLOBArray implements BLOB {
* @return * @return
* @throws IOException * @throws IOException
*/ */
public CloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException { public synchronized CloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(blobs.size()); final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(blobs.size());
final Iterator<blobItem> i = blobs.iterator(); final Iterator<blobItem> i = blobs.iterator();
while (i.hasNext()) { while (i.hasNext()) {
@ -316,7 +316,7 @@ public class BLOBArray implements BLOB {
* @return * @return
* @throws IOException * @throws IOException
*/ */
public boolean has(byte[] key) { public synchronized boolean has(byte[] key) {
for (blobItem bi: blobs) if (bi.blob.has(key)) return true; for (blobItem bi: blobs) if (bi.blob.has(key)) return true;
return false; return false;
} }
@ -327,7 +327,7 @@ public class BLOBArray implements BLOB {
* @return * @return
* @throws IOException * @throws IOException
*/ */
public byte[] get(byte[] key) throws IOException { public synchronized byte[] get(byte[] key) throws IOException {
byte[] b; byte[] b;
for (blobItem bi: blobs) { for (blobItem bi: blobs) {
b = bi.blob.get(key); b = bi.blob.get(key);
@ -343,7 +343,7 @@ public class BLOBArray implements BLOB {
* @return * @return
* @throws IOException * @throws IOException
*/ */
public List<byte[]> getAll(byte[] key) throws IOException { public synchronized List<byte[]> getAll(byte[] key) throws IOException {
byte[] b; byte[] b;
ArrayList<byte[]> l = new ArrayList<byte[]>(blobs.size()); ArrayList<byte[]> l = new ArrayList<byte[]>(blobs.size());
for (blobItem bi: blobs) { for (blobItem bi: blobs) {
@ -359,7 +359,7 @@ public class BLOBArray implements BLOB {
* @return the size of the BLOB or -1 if the BLOB does not exist * @return the size of the BLOB or -1 if the BLOB does not exist
* @throws IOException * @throws IOException
*/ */
public long length(byte[] key) throws IOException { public synchronized long length(byte[] key) throws IOException {
long l; long l;
for (blobItem bi: blobs) { for (blobItem bi: blobs) {
l = bi.blob.length(key); l = bi.blob.length(key);
@ -374,7 +374,7 @@ public class BLOBArray implements BLOB {
* @param b * @param b
* @throws IOException * @throws IOException
*/ */
public void put(byte[] key, byte[] b) throws IOException { public synchronized void put(byte[] key, byte[] b) throws IOException {
blobItem bi = (blobs.size() == 0) ? null : blobs.get(blobs.size() - 1); blobItem bi = (blobs.size() == 0) ? null : blobs.get(blobs.size() - 1);
if (bi == null) if (bi == null)
System.out.println("bi == null"); System.out.println("bi == null");
@ -397,7 +397,7 @@ public class BLOBArray implements BLOB {
* @param key the primary key * @param key the primary key
* @throws IOException * @throws IOException
*/ */
public int replace(byte[] key, Rewriter rewriter) throws IOException { public synchronized int replace(byte[] key, Rewriter rewriter) throws IOException {
int d = 0; int d = 0;
for (blobItem bi: blobs) { for (blobItem bi: blobs) {
d += bi.blob.replace(key, rewriter); d += bi.blob.replace(key, rewriter);
@ -410,14 +410,14 @@ public class BLOBArray implements BLOB {
* @param key the primary key * @param key the primary key
* @throws IOException * @throws IOException
*/ */
public void remove(byte[] key) throws IOException { public synchronized void remove(byte[] key) throws IOException {
for (blobItem bi: blobs) bi.blob.remove(key); for (blobItem bi: blobs) bi.blob.remove(key);
} }
/** /**
* close the BLOB * close the BLOB
*/ */
public void close(boolean writeIDX) { public synchronized void close(boolean writeIDX) {
for (blobItem bi: blobs) bi.blob.close(writeIDX); for (blobItem bi: blobs) bi.blob.close(writeIDX);
blobs.clear(); blobs.clear();
blobs = null; blobs = null;

@ -248,6 +248,9 @@ public final class ReferenceContainerArray {
if (this.array.entries() < 2) return false; if (this.array.entries() < 2) return false;
File f1 = this.array.unmountOldestBLOB(); File f1 = this.array.unmountOldestBLOB();
File f2 = this.array.unmountOldestBLOB(); File f2 = this.array.unmountOldestBLOB();
System.out.println("*** DEBUG mergeOldest: vvvvvvvvv array has " + this.array.entries() + " entries vvvvvvvvv");
System.out.println("*** DEBUG mergeOldest: unmounted " + f1.getName());
System.out.println("*** DEBUG mergeOldest: unmounted " + f2.getName());
// iterate both files and write a new one // iterate both files and write a new one
CloneableIterator<ReferenceContainer> i1 = new blobFileEntries(f1, this.payloadrow); CloneableIterator<ReferenceContainer> i1 = new blobFileEntries(f1, this.payloadrow);
@ -340,6 +343,8 @@ public final class ReferenceContainerArray {
if (!f1.delete()) f1.deleteOnExit(); if (!f1.delete()) f1.deleteOnExit();
if (!f2.delete()) f2.deleteOnExit(); if (!f2.delete()) f2.deleteOnExit();
this.array.mountBLOB(newFile); this.array.mountBLOB(newFile);
System.out.println("*** DEBUG mergeOldest: mounted " + newFile.getName());
System.out.println("*** DEBUG mergeOldest: ^^^^^^^^^^^ array has " + this.array.entries() + " entries ^^^^^^^^^^^");
return true; return true;
} }

@ -288,8 +288,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
this.log.logConfig("Work Path: " + this.workPath.toString()); this.log.logConfig("Work Path: " + this.workPath.toString());
// set a high maximum cache size to current size; this is adopted later automatically // set a high maximum cache size to current size; this is adopted later automatically
final int wordCacheMaxCount = Math.max((int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000), final int wordCacheMaxCount = (int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000);
(int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000));
setConfig(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount)); setConfig(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount));
// set network-specific performance attributes // set network-specific performance attributes

@ -317,7 +317,6 @@ public final class plasmaSwitchboardConstants {
*/ */
public static final String CRAWLER_THREADS_ACTIVE_MAX = "crawler.MaxActiveThreads"; public static final String CRAWLER_THREADS_ACTIVE_MAX = "crawler.MaxActiveThreads";
public static final String YACY_MODE_DEBUG = "yacyDebugMode"; public static final String YACY_MODE_DEBUG = "yacyDebugMode";
public static final String WORDCACHE_INIT_COUNT = "wordCacheInitCount";
/** /**
* <p><code>public static final String <strong>WORDCACHE_MAX_COUNT</strong> = "wordCacheMaxCount"</code></p> * <p><code>public static final String <strong>WORDCACHE_MAX_COUNT</strong> = "wordCacheMaxCount"</code></p>
* <p>Name of the setting how many words the word-cache (or DHT-Out cache) shall contain maximal. Indexing pages if the * <p>Name of the setting how many words the word-cache (or DHT-Out cache) shall contain maximal. Indexing pages if the

Loading…
Cancel
Save