diff --git a/defaults/yacy.init b/defaults/yacy.init
index 65be3fbda..c8cb23058 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -626,9 +626,7 @@ javastart_priority=10
# ram cache during indexing. When YaCy is shut down, this cache must be
# flushed to disc; this may last some minutes.
wordCacheMaxCount = 30000
-wordCacheInitCount = 30000
wordCacheMaxCount__pro = 100000
-wordCacheInitCount__pro = 100000
# Specifies if yacy can be used as transparent http proxy.
#
diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html
index 1f2cfcd57..057c91b3c 100644
--- a/htroot/PerformanceQueues_p.html
+++ b/htroot/PerformanceQueues_p.html
@@ -122,15 +122,6 @@
flushed to disc; this may last some minutes.
-
-
Initial space of words in cache:
-
-
-
-
- This is is the init size of space for words in cache.
-
-
diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java
index 5332a7aa1..7601a5829 100644
--- a/htroot/PerformanceQueues_p.java
+++ b/htroot/PerformanceQueues_p.java
@@ -230,9 +230,6 @@ public class PerformanceQueues_p {
final int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 20000);
switchboard.setConfig(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount));
switchboard.webIndex.index().setBufferMaxWordCount(wordCacheMaxCount);
-
- final int wordCacheInitCount = post.getInt(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000);
- switchboard.setConfig(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, Integer.toString(wordCacheInitCount));
}
if ((post != null) && (post.containsKey("poolConfig"))) {
@@ -295,7 +292,6 @@ public class PerformanceQueues_p {
prop.putNum("minAgeOfCache", switchboard.webIndex.index().getBufferMinAge() / 1000 / 60); // minutes
prop.putNum("maxWaitingWordFlush", switchboard.getConfigLong("maxWaitingWordFlush", 180));
prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000));
- prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000));
prop.put("crawlPauseProxy", switchboard.getConfigLong(plasmaSwitchboardConstants.PROXY_ONLINE_CAUTION_DELAY, 30000));
prop.put("crawlPauseLocalsearch", switchboard.getConfigLong(plasmaSwitchboardConstants.LOCALSEACH_ONLINE_CAUTION_DELAY, 30000));
prop.put("crawlPauseRemotesearch", switchboard.getConfigLong(plasmaSwitchboardConstants.REMOTESEARCH_ONLINE_CAUTION_DELAY, 30000));
diff --git a/htroot/PerformanceQueues_p.xml b/htroot/PerformanceQueues_p.xml
index a1f95c117..8535e76a9 100644
--- a/htroot/PerformanceQueues_p.xml
+++ b/htroot/PerformanceQueues_p.xml
@@ -30,7 +30,6 @@
#[maxAgeOfCache]##[minAgeOfCache]##[wordOutCacheMaxCount]#
- #[wordCacheInitCount]##[wordFlushSize]#
diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java
index 800b4977f..fbab98ae9 100644
--- a/source/de/anomic/crawler/Balancer.java
+++ b/source/de/anomic/crawler/Balancer.java
@@ -53,13 +53,14 @@ public class Balancer {
private final ConcurrentHashMap>
domainStacks; // a map from domain name part to Lists with url hashs
private final ArrayList urlRAMStack; // a list that is flushed first
- private Stack urlFileStack; // a file with url hashes
- private ObjectIndex urlFileIndex;
+ private Stack urlFileStack; // a file with url hashes
+ private ObjectIndex urlFileIndex;
private final File cacheStacksPath;
private final String stackname;
private boolean top; // to alternate between top and bottom of the file stack
private long minimumLocalDelta;
private long minimumGlobalDelta;
+ private long lastPrepare;
public Balancer(final File cachePath, final String stackname, final boolean fullram,
final long minimumLocalDelta, final long minimumGlobalDelta) {
@@ -72,6 +73,7 @@ public class Balancer {
this.top = true;
this.minimumLocalDelta = minimumLocalDelta;
this.minimumGlobalDelta = minimumGlobalDelta;
+ this.lastPrepare = System.currentTimeMillis();
// create a stack for newly entered entries
if (!(cachePath.exists())) cachePath.mkdir(); // make the path
@@ -577,6 +579,10 @@ public class Balancer {
// this is only to protection against the worst case, where the crawler could
// behave in a DoS-manner
Log.logInfo("BALANCER", "forcing crawl-delay of " + sleeptime + " milliseconds for " + crawlEntry.url().getHost() + ((sleeptime > Math.max(minimumLocalDelta, minimumGlobalDelta)) ? " (caused by robots.txt)" : ""));
+ if (System.currentTimeMillis() - this.lastPrepare > 10000) {
+ prepare(100);
+ this.lastPrepare = System.currentTimeMillis();
+ }
try {synchronized(this) { this.wait(sleeptime); }} catch (final InterruptedException e) {}
}
@@ -595,9 +601,22 @@ public class Balancer {
* @throws IOException
*/
public synchronized ArrayList top(int count) throws IOException {
+ // construct a list using the urlRAMStack which was filled with this procedure
+ count = prepare(count);
+ final ArrayList list = new ArrayList();
+ for (int i = 0; i < count; i++) {
+ final String urlhash = urlRAMStack.get(i);
+ final Row.Entry entry = urlFileIndex.get(urlhash.getBytes());
+ if (entry == null) break;
+ list.add(new CrawlEntry(entry));
+ }
+ return list;
+ }
+
+ private int prepare(int count) throws IOException {
// if we need to flush anything, then flush the domain stack first,
// to avoid that new urls get hidden by old entries from the file stack
- if (urlRAMStack == null) return null;
+ if (urlRAMStack == null) return 0;
// ensure that the domain stacks are filled enough
shiftFileToDomStacks(count);
@@ -614,16 +633,7 @@ public class Balancer {
// if the ram is still not full enough, use the file stack
shiftFileToRAM(count);
- // finally, construct a list using the urlRAMStack which was filled with this procedure
- count = Math.min(count, urlRAMStack.size());
- final ArrayList list = new ArrayList();
- for (int i = 0; i < count; i++) {
- final String urlhash = urlRAMStack.get(i);
- final Row.Entry entry = urlFileIndex.get(urlhash.getBytes());
- if (entry == null) break;
- list.add(new CrawlEntry(entry));
- }
- return list;
+ return Math.min(count, urlRAMStack.size());
}
public synchronized Iterator iterator() throws IOException {
diff --git a/source/de/anomic/kelondro/blob/BLOBArray.java b/source/de/anomic/kelondro/blob/BLOBArray.java
index e03af11aa..bac514baa 100755
--- a/source/de/anomic/kelondro/blob/BLOBArray.java
+++ b/source/de/anomic/kelondro/blob/BLOBArray.java
@@ -136,7 +136,7 @@ public class BLOBArray implements BLOB {
* @param location
* @throws IOException
*/
- public void mountBLOB(File location) throws IOException {
+ public synchronized void mountBLOB(File location) throws IOException {
Date d;
try {
d = DateFormatter.parseShortSecond(location.getName().substring(0, 14));
@@ -147,7 +147,7 @@ public class BLOBArray implements BLOB {
blobs.add(new blobItem(d, location, oneBlob));
}
- public void unmountBLOB(File location, boolean writeIDX) {
+ public synchronized void unmountBLOB(File location, boolean writeIDX) {
Iterator i = this.blobs.iterator();
blobItem b;
while (i.hasNext()) {
@@ -160,7 +160,7 @@ public class BLOBArray implements BLOB {
}
}
- public File unmountOldestBLOB() {
+ public synchronized File unmountOldestBLOB() {
if (this.blobs.size() == 0) return null;
blobItem b = this.blobs.remove(0);
b.blob.close(false);
@@ -171,7 +171,7 @@ public class BLOBArray implements BLOB {
* return the number of BLOB files in this array
* @return
*/
- public int entries() {
+ public synchronized int entries() {
return this.blobs.size();
}
@@ -181,7 +181,7 @@ public class BLOBArray implements BLOB {
* @param creation
* @return
*/
- public File newBLOB(Date creation) {
+ public synchronized File newBLOB(Date creation) {
return new File(heapLocation, DateFormatter.formatShortSecond(creation) + "." + blobSalt + ".blob");
}
@@ -221,9 +221,9 @@ public class BLOBArray implements BLOB {
}
/*
- * return the size of the repository
+ * return the size of the repository (in bytes)
*/
- public long length() {
+ public synchronized long length() {
long s = 0;
for (int i = 0; i < blobs.size(); i++) s += blobs.get(i).location.length();
return s;
@@ -262,16 +262,16 @@ public class BLOBArray implements BLOB {
* clears the content of the database
* @throws IOException
*/
- public void clear() throws IOException {
+ public synchronized void clear() throws IOException {
for (blobItem bi: blobs) bi.blob.clear();
blobs.clear();
}
/**
- * ask for the number of entries
+ * ask for the number of blob entries
* @return the number of entries in the table
*/
- public int size() {
+ public synchronized int size() {
int s = 0;
for (blobItem bi: blobs) s += bi.blob.size();
return s;
@@ -284,7 +284,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
- public CloneableIterator keys(boolean up, boolean rotating) throws IOException {
+ public synchronized CloneableIterator keys(boolean up, boolean rotating) throws IOException {
assert rotating = false;
final List> c = new ArrayList>(blobs.size());
final Iterator i = blobs.iterator();
@@ -301,7 +301,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
- public CloneableIterator keys(boolean up, byte[] firstKey) throws IOException {
+ public synchronized CloneableIterator keys(boolean up, byte[] firstKey) throws IOException {
final List> c = new ArrayList>(blobs.size());
final Iterator i = blobs.iterator();
while (i.hasNext()) {
@@ -316,7 +316,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
- public boolean has(byte[] key) {
+ public synchronized boolean has(byte[] key) {
for (blobItem bi: blobs) if (bi.blob.has(key)) return true;
return false;
}
@@ -327,7 +327,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
- public byte[] get(byte[] key) throws IOException {
+ public synchronized byte[] get(byte[] key) throws IOException {
byte[] b;
for (blobItem bi: blobs) {
b = bi.blob.get(key);
@@ -343,7 +343,7 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
- public List getAll(byte[] key) throws IOException {
+ public synchronized List getAll(byte[] key) throws IOException {
byte[] b;
ArrayList l = new ArrayList(blobs.size());
for (blobItem bi: blobs) {
@@ -359,7 +359,7 @@ public class BLOBArray implements BLOB {
* @return the size of the BLOB or -1 if the BLOB does not exist
* @throws IOException
*/
- public long length(byte[] key) throws IOException {
+ public synchronized long length(byte[] key) throws IOException {
long l;
for (blobItem bi: blobs) {
l = bi.blob.length(key);
@@ -374,7 +374,7 @@ public class BLOBArray implements BLOB {
* @param b
* @throws IOException
*/
- public void put(byte[] key, byte[] b) throws IOException {
+ public synchronized void put(byte[] key, byte[] b) throws IOException {
blobItem bi = (blobs.size() == 0) ? null : blobs.get(blobs.size() - 1);
if (bi == null)
System.out.println("bi == null");
@@ -397,7 +397,7 @@ public class BLOBArray implements BLOB {
* @param key the primary key
* @throws IOException
*/
- public int replace(byte[] key, Rewriter rewriter) throws IOException {
+ public synchronized int replace(byte[] key, Rewriter rewriter) throws IOException {
int d = 0;
for (blobItem bi: blobs) {
d += bi.blob.replace(key, rewriter);
@@ -410,14 +410,14 @@ public class BLOBArray implements BLOB {
* @param key the primary key
* @throws IOException
*/
- public void remove(byte[] key) throws IOException {
+ public synchronized void remove(byte[] key) throws IOException {
for (blobItem bi: blobs) bi.blob.remove(key);
}
/**
* close the BLOB
*/
- public void close(boolean writeIDX) {
+ public synchronized void close(boolean writeIDX) {
for (blobItem bi: blobs) bi.blob.close(writeIDX);
blobs.clear();
blobs = null;
diff --git a/source/de/anomic/kelondro/text/ReferenceContainerArray.java b/source/de/anomic/kelondro/text/ReferenceContainerArray.java
index 05601d0df..3cb01717e 100644
--- a/source/de/anomic/kelondro/text/ReferenceContainerArray.java
+++ b/source/de/anomic/kelondro/text/ReferenceContainerArray.java
@@ -248,6 +248,9 @@ public final class ReferenceContainerArray {
if (this.array.entries() < 2) return false;
File f1 = this.array.unmountOldestBLOB();
File f2 = this.array.unmountOldestBLOB();
+ System.out.println("*** DEBUG mergeOldest: vvvvvvvvv array has " + this.array.entries() + " entries vvvvvvvvv");
+ System.out.println("*** DEBUG mergeOldest: unmounted " + f1.getName());
+ System.out.println("*** DEBUG mergeOldest: unmounted " + f2.getName());
// iterate both files and write a new one
CloneableIterator i1 = new blobFileEntries(f1, this.payloadrow);
@@ -340,6 +343,8 @@ public final class ReferenceContainerArray {
if (!f1.delete()) f1.deleteOnExit();
if (!f2.delete()) f2.deleteOnExit();
this.array.mountBLOB(newFile);
+ System.out.println("*** DEBUG mergeOldest: mounted " + newFile.getName());
+ System.out.println("*** DEBUG mergeOldest: ^^^^^^^^^^^ array has " + this.array.entries() + " entries ^^^^^^^^^^^");
return true;
}
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 3ad99c491..fadbd6ad5 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -288,8 +288,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitchpublic static final String WORDCACHE_MAX_COUNT = "wordCacheMaxCount"
*
Name of the setting how many words the word-cache (or DHT-Out cache) shall contain maximal. Indexing pages if the