- enhanced balancer: auto-solving of waiting-deadlocks

- removed deprecated cache-init size value
- more debug lines for IndexCell cache dump merge

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5728 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 9a90ea05e0
commit b3f75e48fa

@ -626,9 +626,7 @@ javastart_priority=10
# ram cache during indexing. When YaCy is shut down, this cache must be
# flushed to disc; this may last some minutes.
wordCacheMaxCount = 30000
wordCacheInitCount = 30000
wordCacheMaxCount__pro = 100000
wordCacheInitCount__pro = 100000
# Specifies if yacy can be used as transparent http proxy.
#

@ -122,15 +122,6 @@
flushed to disc; this may last some minutes.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td>Initial space of words in cache:</td>
<td>
<input name="wordCacheInitCount" type="text" size="10" maxlength="100" value="#[wordCacheInitCount]#" />
</td>
<td>
This is is the init size of space for words in cache.
</td>
</tr>
<tr valign="top" class="TableCellLight">
<td colspan="4">
<input type="submit" name="cacheSizeSubmit" value="Enter New Cache Size" />

@ -230,9 +230,6 @@ public class PerformanceQueues_p {
final int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 20000);
switchboard.setConfig(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount));
switchboard.webIndex.index().setBufferMaxWordCount(wordCacheMaxCount);
final int wordCacheInitCount = post.getInt(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000);
switchboard.setConfig(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, Integer.toString(wordCacheInitCount));
}
if ((post != null) && (post.containsKey("poolConfig"))) {
@ -295,7 +292,6 @@ public class PerformanceQueues_p {
prop.putNum("minAgeOfCache", switchboard.webIndex.index().getBufferMinAge() / 1000 / 60); // minutes
prop.putNum("maxWaitingWordFlush", switchboard.getConfigLong("maxWaitingWordFlush", 180));
prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000));
prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000));
prop.put("crawlPauseProxy", switchboard.getConfigLong(plasmaSwitchboardConstants.PROXY_ONLINE_CAUTION_DELAY, 30000));
prop.put("crawlPauseLocalsearch", switchboard.getConfigLong(plasmaSwitchboardConstants.LOCALSEACH_ONLINE_CAUTION_DELAY, 30000));
prop.put("crawlPauseRemotesearch", switchboard.getConfigLong(plasmaSwitchboardConstants.REMOTESEARCH_ONLINE_CAUTION_DELAY, 30000));

@ -30,7 +30,6 @@
<maxAgeOfCache>#[maxAgeOfCache]#</maxAgeOfCache>
<minAgeOfCache>#[minAgeOfCache]#</minAgeOfCache>
<wordCacheMaxCount>#[wordOutCacheMaxCount]#</wordCacheMaxCount>
<wordCacheInitCount>#[wordCacheInitCount]#</wordCacheInitCount>
<wordFlushSize>#[wordFlushSize]#</wordFlushSize>
</Cache>
<ThreadPools>

@ -53,13 +53,14 @@ public class Balancer {
private final ConcurrentHashMap<String, LinkedList<String>>
domainStacks; // a map from domain name part to Lists with url hashs
private final ArrayList<String> urlRAMStack; // a list that is flushed first
private Stack urlFileStack; // a file with url hashes
private ObjectIndex urlFileIndex;
private Stack urlFileStack; // a file with url hashes
private ObjectIndex urlFileIndex;
private final File cacheStacksPath;
private final String stackname;
private boolean top; // to alternate between top and bottom of the file stack
private long minimumLocalDelta;
private long minimumGlobalDelta;
private long lastPrepare;
public Balancer(final File cachePath, final String stackname, final boolean fullram,
final long minimumLocalDelta, final long minimumGlobalDelta) {
@ -72,6 +73,7 @@ public class Balancer {
this.top = true;
this.minimumLocalDelta = minimumLocalDelta;
this.minimumGlobalDelta = minimumGlobalDelta;
this.lastPrepare = System.currentTimeMillis();
// create a stack for newly entered entries
if (!(cachePath.exists())) cachePath.mkdir(); // make the path
@ -577,6 +579,10 @@ public class Balancer {
// this is only to protection against the worst case, where the crawler could
// behave in a DoS-manner
Log.logInfo("BALANCER", "forcing crawl-delay of " + sleeptime + " milliseconds for " + crawlEntry.url().getHost() + ((sleeptime > Math.max(minimumLocalDelta, minimumGlobalDelta)) ? " (caused by robots.txt)" : ""));
if (System.currentTimeMillis() - this.lastPrepare > 10000) {
prepare(100);
this.lastPrepare = System.currentTimeMillis();
}
try {synchronized(this) { this.wait(sleeptime); }} catch (final InterruptedException e) {}
}
@ -595,9 +601,22 @@ public class Balancer {
* @throws IOException
*/
public synchronized ArrayList<CrawlEntry> top(int count) throws IOException {
// construct a list using the urlRAMStack which was filled with this procedure
count = prepare(count);
final ArrayList<CrawlEntry> list = new ArrayList<CrawlEntry>();
for (int i = 0; i < count; i++) {
final String urlhash = urlRAMStack.get(i);
final Row.Entry entry = urlFileIndex.get(urlhash.getBytes());
if (entry == null) break;
list.add(new CrawlEntry(entry));
}
return list;
}
private int prepare(int count) throws IOException {
// if we need to flush anything, then flush the domain stack first,
// to avoid that new urls get hidden by old entries from the file stack
if (urlRAMStack == null) return null;
if (urlRAMStack == null) return 0;
// ensure that the domain stacks are filled enough
shiftFileToDomStacks(count);
@ -614,16 +633,7 @@ public class Balancer {
// if the ram is still not full enough, use the file stack
shiftFileToRAM(count);
// finally, construct a list using the urlRAMStack which was filled with this procedure
count = Math.min(count, urlRAMStack.size());
final ArrayList<CrawlEntry> list = new ArrayList<CrawlEntry>();
for (int i = 0; i < count; i++) {
final String urlhash = urlRAMStack.get(i);
final Row.Entry entry = urlFileIndex.get(urlhash.getBytes());
if (entry == null) break;
list.add(new CrawlEntry(entry));
}
return list;
return Math.min(count, urlRAMStack.size());
}
public synchronized Iterator<CrawlEntry> iterator() throws IOException {

@ -136,7 +136,7 @@ public class BLOBArray implements BLOB {
* @param location
* @throws IOException
*/
public void mountBLOB(File location) throws IOException {
public synchronized void mountBLOB(File location) throws IOException {
Date d;
try {
d = DateFormatter.parseShortSecond(location.getName().substring(0, 14));
@ -147,7 +147,7 @@ public class BLOBArray implements BLOB {
blobs.add(new blobItem(d, location, oneBlob));
}
public void unmountBLOB(File location, boolean writeIDX) {
public synchronized void unmountBLOB(File location, boolean writeIDX) {
Iterator<blobItem> i = this.blobs.iterator();
blobItem b;
while (i.hasNext()) {
@ -160,7 +160,7 @@ public class BLOBArray implements BLOB {
}
}
public File unmountOldestBLOB() {
public synchronized File unmountOldestBLOB() {
if (this.blobs.size() == 0) return null;
blobItem b = this.blobs.remove(0);
b.blob.close(false);
@ -171,7 +171,7 @@ public class BLOBArray implements BLOB {
* return the number of BLOB files in this array
* @return
*/
public int entries() {
public synchronized int entries() {
return this.blobs.size();
}
@ -181,7 +181,7 @@ public class BLOBArray implements BLOB {
* @param creation
* @return
*/
public File newBLOB(Date creation) {
public synchronized File newBLOB(Date creation) {
return new File(heapLocation, DateFormatter.formatShortSecond(creation) + "." + blobSalt + ".blob");
}
@ -221,9 +221,9 @@ public class BLOBArray implements BLOB {
}
/*
* return the size of the repository
* return the size of the repository (in bytes)
*/
public long length() {
public synchronized long length() {
long s = 0;
for (int i = 0; i < blobs.size(); i++) s += blobs.get(i).location.length();
return s;
@ -262,16 +262,16 @@ public class BLOBArray implements BLOB {
* clears the content of the database
* @throws IOException
*/
public void clear() throws IOException {
public synchronized void clear() throws IOException {
for (blobItem bi: blobs) bi.blob.clear();
blobs.clear();
}
/**
* ask for the number of entries
* ask for the number of blob entries
* @return the number of entries in the table
*/
public int size() {
public synchronized int size() {
int s = 0;
for (blobItem bi: blobs) s += bi.blob.size();
return s;
@ -284,7 +284,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
public CloneableIterator<byte[]> keys(boolean up, boolean rotating) throws IOException {
public synchronized CloneableIterator<byte[]> keys(boolean up, boolean rotating) throws IOException {
assert rotating = false;
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(blobs.size());
final Iterator<blobItem> i = blobs.iterator();
@ -301,7 +301,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
public CloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
public synchronized CloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(blobs.size());
final Iterator<blobItem> i = blobs.iterator();
while (i.hasNext()) {
@ -316,7 +316,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
public boolean has(byte[] key) {
public synchronized boolean has(byte[] key) {
for (blobItem bi: blobs) if (bi.blob.has(key)) return true;
return false;
}
@ -327,7 +327,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
public byte[] get(byte[] key) throws IOException {
public synchronized byte[] get(byte[] key) throws IOException {
byte[] b;
for (blobItem bi: blobs) {
b = bi.blob.get(key);
@ -343,7 +343,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
public List<byte[]> getAll(byte[] key) throws IOException {
public synchronized List<byte[]> getAll(byte[] key) throws IOException {
byte[] b;
ArrayList<byte[]> l = new ArrayList<byte[]>(blobs.size());
for (blobItem bi: blobs) {
@ -359,7 +359,7 @@ public class BLOBArray implements BLOB {
* @return the size of the BLOB or -1 if the BLOB does not exist
* @throws IOException
*/
public long length(byte[] key) throws IOException {
public synchronized long length(byte[] key) throws IOException {
long l;
for (blobItem bi: blobs) {
l = bi.blob.length(key);
@ -374,7 +374,7 @@ public class BLOBArray implements BLOB {
* @param b
* @throws IOException
*/
public void put(byte[] key, byte[] b) throws IOException {
public synchronized void put(byte[] key, byte[] b) throws IOException {
blobItem bi = (blobs.size() == 0) ? null : blobs.get(blobs.size() - 1);
if (bi == null)
System.out.println("bi == null");
@ -397,7 +397,7 @@ public class BLOBArray implements BLOB {
* @param key the primary key
* @throws IOException
*/
public int replace(byte[] key, Rewriter rewriter) throws IOException {
public synchronized int replace(byte[] key, Rewriter rewriter) throws IOException {
int d = 0;
for (blobItem bi: blobs) {
d += bi.blob.replace(key, rewriter);
@ -410,14 +410,14 @@ public class BLOBArray implements BLOB {
* @param key the primary key
* @throws IOException
*/
public void remove(byte[] key) throws IOException {
public synchronized void remove(byte[] key) throws IOException {
for (blobItem bi: blobs) bi.blob.remove(key);
}
/**
* close the BLOB
*/
public void close(boolean writeIDX) {
public synchronized void close(boolean writeIDX) {
for (blobItem bi: blobs) bi.blob.close(writeIDX);
blobs.clear();
blobs = null;

@ -248,6 +248,9 @@ public final class ReferenceContainerArray {
if (this.array.entries() < 2) return false;
File f1 = this.array.unmountOldestBLOB();
File f2 = this.array.unmountOldestBLOB();
System.out.println("*** DEBUG mergeOldest: vvvvvvvvv array has " + this.array.entries() + " entries vvvvvvvvv");
System.out.println("*** DEBUG mergeOldest: unmounted " + f1.getName());
System.out.println("*** DEBUG mergeOldest: unmounted " + f2.getName());
// iterate both files and write a new one
CloneableIterator<ReferenceContainer> i1 = new blobFileEntries(f1, this.payloadrow);
@ -340,6 +343,8 @@ public final class ReferenceContainerArray {
if (!f1.delete()) f1.deleteOnExit();
if (!f2.delete()) f2.deleteOnExit();
this.array.mountBLOB(newFile);
System.out.println("*** DEBUG mergeOldest: mounted " + newFile.getName());
System.out.println("*** DEBUG mergeOldest: ^^^^^^^^^^^ array has " + this.array.entries() + " entries ^^^^^^^^^^^");
return true;
}

@ -288,8 +288,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
this.log.logConfig("Work Path: " + this.workPath.toString());
// set a high maximum cache size to current size; this is adopted later automatically
final int wordCacheMaxCount = Math.max((int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000),
(int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000));
final int wordCacheMaxCount = (int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000);
setConfig(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount));
// set network-specific performance attributes

@ -317,7 +317,6 @@ public final class plasmaSwitchboardConstants {
*/
public static final String CRAWLER_THREADS_ACTIVE_MAX = "crawler.MaxActiveThreads";
public static final String YACY_MODE_DEBUG = "yacyDebugMode";
public static final String WORDCACHE_INIT_COUNT = "wordCacheInitCount";
/**
* <p><code>public static final String <strong>WORDCACHE_MAX_COUNT</strong> = "wordCacheMaxCount"</code></p>
* <p>Name of the setting how many words the word-cache (or DHT-Out cache) shall contain maximal. Indexing pages if the

Loading…
Cancel
Save