- fixed re-search bug: after a search with several words, a second search could not

find the same words as before. This was caused because indexContaines stored the url references
  with a hashtable. A tree was needed to work with the index conjunction-by-numeration
- added permanent ram cache flush (again)
- removed direct flush of ram cache after a large container is added.
  this happens especially during DHT transmission and therefore this fix should
  speed up DHT transmission on server side.
- removed unused and out-dated methods

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1765 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 88c0e1da1e
commit 3703f76866

@ -13,8 +13,6 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
public class snippet {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) throws MalformedURLException {

@ -862,6 +862,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
return false;
}
// flush some entries from the RAM cache
// (new permanent cache flushing)
wordIndex.flushCacheSome();
boolean doneSomething = false;
// possibly delete entries from last chunk
@ -883,7 +887,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
doneSomething = true;
}
synchronized (sbQueue) {
if (sbQueue.size() == 0) {
@ -929,6 +932,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
processResourceStack(nextentry);
}
// ready & finished
return true;
}

@ -118,28 +118,41 @@ public final class plasmaWordIndex {
int added = ramCache.addEntries(entries, updateTime, highPriority);
// force flush
while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
try { Thread.sleep(10); } catch (InterruptedException e) { }
flushCacheToBackend(ramCache.bestFlushWordHash());
}
if (highPriority) {
if (ramCache.size() > ramCache.getMaxWordsHigh()) {
while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
try { Thread.sleep(10); } catch (InterruptedException e) { }
flushCacheToBackend(ramCache.bestFlushWordHash());
}}
while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
flushCache(1);
}
}
} else {
while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
flushCache(1);
}
if (ramCache.size() > ramCache.getMaxWordsLow()) {
while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
try { Thread.sleep(10); } catch (InterruptedException e) { }
flushCacheToBackend(ramCache.bestFlushWordHash());
}}
while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
flushCache(1);
}
}
}
return added;
}
private synchronized void flushCacheToBackend(String wordHash) {
public synchronized void flushCacheSome() {
int flushCount = ramCache.size() / 500;
if (flushCount > 50) flushCount = 50;
if (flushCount < 5) flushCount = 5;
flushCache(flushCount);
}
public synchronized void flushCache(int count) {
for (int i = 0; i < count; i++) {
if (ramCache.size() == 0) break;
flushCache(ramCache.bestFlushWordHash());
try {Thread.sleep(10);} catch (InterruptedException e) {}
}
}
private synchronized void flushCache(String wordHash) {
plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash);
if (c != null) {
plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(wordHash, c);
@ -149,15 +162,6 @@ public final class plasmaWordIndex {
}
}
private int addEntriesBackend(plasmaWordIndexEntryContainer entries) {
plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(entries.wordHash(), entries);
if (feedback == null) {
return entries.size();
} else {
return backend.addEntries(feedback, -1, true);
}
}
private static final int hour = 3600000;
private static final int day = 86400000;
@ -259,22 +263,6 @@ public final class plasmaWordIndex {
return container;
}
public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) {
// this possibly creates an index file in the back-end
// the index file is opened and returned as entity object
long start = System.currentTimeMillis();
flushCacheToBackend(wordHash);
if (maxTime < 0) {
flushFromAssortmentCluster(wordHash, -1);
} else {
long remaining = maxTime - (System.currentTimeMillis() - start);
if (remaining > 0)
flushFromAssortmentCluster(wordHash, remaining);
}
long r = maxTime - (System.currentTimeMillis() - start);
return backend.getEntity(wordHash, deleteIfEmpty, (r < 0) ? 0 : r);
}
public Set getContainers(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {
// retrieve entities that belong to the hashes
@ -351,19 +339,6 @@ public final class plasmaWordIndex {
return removed;
}
private boolean flushFromAssortmentCluster(String key, long maxTime) {
// this should only be called if the assortment shall be deleted or returned in an index entity
if (maxTime > 0) maxTime = 8 * maxTime / 10; // reserve time for later adding to backend
plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key, maxTime);
if (container == null) {
return false;
} else {
// we have a non-empty entry-container
// integrate it to the backend
return backend.addEntries(container, container.updated(), true) > 0;
}
}
public static final int RL_RAMCACHE = 0;
public static final int RL_FILECACHE = 1;
public static final int RL_ASSORTMENTS = 2;
@ -485,121 +460,6 @@ public final class plasmaWordIndex {
}
} // class rotatingWordIterator
/*
public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) {
return new iterateFiles(startHash, up, deleteEmpty);
}
public final class iterateFiles implements Iterator {
// Iterator of hash-strings in WORDS path
private final ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries
private final Comparator comp; // for string-compare
private String buffer; // the prefetch-buffer
private final boolean delete;
public iterateFiles(String startHash, boolean up, boolean deleteEmpty) {
this.hierarchy = new ArrayList();
this.comp = kelondroNaturalOrder.naturalOrder; // this is the wrong ordering but mut be used as long as the assortments uses the same ordering
//this.comp = new kelondroBase64Order(up, false);
this.delete = deleteEmpty;
// the we initially fill the hierarchy with the content of the root folder
String path = "WORDS";
TreeSet list = list(new File(databaseRoot, path));
// if we have a start hash then we find the appropriate subdirectory to start
if ((startHash != null) && (startHash.length() == yacySeedDB.commonHashLength)) {
delete(startHash.substring(0, 1), list);
if (list.size() > 0) {
hierarchy.add(list);
String[] paths = new String[]{startHash.substring(0, 1), startHash.substring(1, 2), startHash.substring(2, 4), startHash.substring(4, 6)};
int pathc = 0;
while ((pathc < paths.length) &&
(comp.compare((String) list.first(), paths[pathc]) == 0)) {
path = path + "/" + paths[pathc];
list = list(new File(databaseRoot, path));
delete(paths[pathc], list);
if (list.size() == 0) break;
hierarchy.add(list);
pathc++;
}
}
while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {};
} else {
hierarchy.add(list);
buffer = next0();
}
}
private synchronized void delete(String pattern, TreeSet names) {
String name;
while ((names.size() > 0) && (comp.compare((new File(name = (String) names.first())).getName(), pattern) < 0)) names.remove(name);
}
private TreeSet list(File path) {
// System.out.println("PATH: " + path);
TreeSet t = new TreeSet(comp);
String[] l = path.list();
if (l != null) for (int i = 0; i < l.length; i++) t.add(path + "/" + l[i]);
// else System.out.println("DEBUG: wrong path " + path);
// System.out.println(t);
return t;
}
private synchronized String next0() {
// the object is a File pointing to the corresponding file
File f;
String n;
TreeSet t;
do {
t = null;
while ((t == null) && (hierarchy.size() > 0)) {
t = (TreeSet) hierarchy.get(hierarchy.size() - 1);
if (t.size() == 0) {
hierarchy.remove(hierarchy.size() - 1); // we step up one hierarchy
t = null;
}
}
if ((hierarchy.size() == 0) || (t.size() == 0)) return null; // this is the end
// fetch value
f = new File(n = (String) t.first());
t.remove(n);
// if the value represents another folder, we step into the next hierarchy
if (f.isDirectory()) {
t = list(f);
if (t.size() == 0) {
if (delete) f.delete();
} else {
hierarchy.add(t);
}
f = null;
}
} while (f == null);
// thats it
if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) {
return null;
} else {
return n.substring(0, yacySeedDB.commonHashLength);
}
}
public boolean hasNext() {
return buffer != null;
}
public Object next() {
String r = buffer;
while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {};
return r;
}
public void remove() {
}
}
*/
public Object migrateWords2Assortment(String wordhash) throws IOException {
// returns the number of entries that had been added to the assortments
// can be negative if some assortments have been moved to the backend

@ -194,7 +194,7 @@ public class plasmaWordIndexClassicDB {
}
return container;
} else {
return new plasmaWordIndexEntryContainer(wordHash, 0);
return new plasmaWordIndexEntryContainer(wordHash);
}
}

@ -52,27 +52,28 @@
package de.anomic.plasma;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;
public final class plasmaWordIndexEntryContainer implements Comparable {
private String wordHash;
private final HashMap container; // urlHash/plasmaWordIndexEntry - Mapping
private final TreeMap container; // urlHash/plasmaWordIndexEntry - Mapping
private long updateTime;
public plasmaWordIndexEntryContainer(String wordHash) {
this(wordHash,16);
this(wordHash, new kelondroNaturalOrder(true));
}
public plasmaWordIndexEntryContainer(String wordHash, int initContainerSize) {
public plasmaWordIndexEntryContainer(String wordHash, kelondroOrder ordering) {
this.wordHash = wordHash;
this.updateTime = 0;
container = new HashMap(initContainerSize); // a urlhash/plasmaWordIndexEntry - relation
container = new TreeMap(ordering); // a urlhash/plasmaWordIndexEntry - relation
}
public void setWordHash(String newWordHash) {
@ -158,7 +159,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable {
}
public static plasmaWordIndexEntryContainer instantContainer(String wordHash, long creationTime, plasmaWordIndexEntry entry) {
plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash,1);
plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash);
c.add(entry);
c.updateTime = creationTime;
return c;
@ -283,6 +284,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable {
long stamp = System.currentTimeMillis();
while ((System.currentTimeMillis() - stamp) < time) {
c = ie1.getUrlHash().compareTo(ie2.getUrlHash());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
if (e1.hasNext()) ie1 = (plasmaWordIndexEntry) e1.next(); else break;
} else if (c > 0) {

@ -956,14 +956,14 @@ public final class yacy {
String wordChunkStartHash = "------------", wordChunkEndHash;
while (wordHashIterator.hasNext()) {
plasmaWordIndexEntity wordIdxEntity = null;
plasmaWordIndexEntryContainer wordIdxContainer = null;
try {
wordCounter++;
wordhash = (String) wordHashIterator.next();
wordIdxEntity = wordIndex.getEntity(wordhash, true, -1);
wordIdxContainer = wordIndex.getContainer(wordhash, true, -1);
// the combined container will fit, read the container
Iterator wordIdxEntries = wordIdxEntity.elements(true);
Iterator wordIdxEntries = wordIdxContainer.entries();
plasmaWordIndexEntry wordIdxEntry;
while (wordIdxEntries.hasNext()) {
wordIdxEntry = (plasmaWordIndexEntry) wordIdxEntries.next();
@ -978,7 +978,7 @@ public final class yacy {
} catch (IOException e) {}
}
// we have read all elements, now we can close it
wordIdxEntity.close(); wordIdxEntity = null;
wordIdxContainer = null;
if (wordCounter%500 == 0) {
wordChunkEndHash = wordhash;
@ -997,7 +997,7 @@ public final class yacy {
} catch (Exception e) {
e.printStackTrace();
} finally {
if (wordIdxEntity != null) try { wordIdxEntity.close(); } catch (Exception e) {}
if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (Exception e) {}
}
}
currentUrlDB.close();

Loading…
Cancel
Save