diff --git a/htroot/xml/snippet.java b/htroot/xml/snippet.java index 2a1784b21..d76afbfc8 100644 --- a/htroot/xml/snippet.java +++ b/htroot/xml/snippet.java @@ -13,8 +13,6 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaURL; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacySeed; public class snippet { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) throws MalformedURLException { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index c72d57ff7..6f5da0e53 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -862,6 +862,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser return false; } + // flush some entries from the RAM cache + // (new permanent cache flushing) + wordIndex.flushCacheSome(); + boolean doneSomething = false; // possibly delete entries from last chunk @@ -883,7 +887,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser doneSomething = true; } - synchronized (sbQueue) { if (sbQueue.size() == 0) { @@ -929,6 +932,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser processResourceStack(nextentry); } + + // ready & finished return true; } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 22be23037..95464270d 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -118,28 +118,41 @@ public final class plasmaWordIndex { int added = ramCache.addEntries(entries, updateTime, highPriority); // force flush - while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) { - try { Thread.sleep(10); } catch (InterruptedException e) { } - flushCacheToBackend(ramCache.bestFlushWordHash()); - } - if (highPriority) { if (ramCache.size() > ramCache.getMaxWordsHigh()) { - while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) { - try { Thread.sleep(10); } catch (InterruptedException e) { } - flushCacheToBackend(ramCache.bestFlushWordHash()); - }} + while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) { + flushCache(1); + } + } } else { + while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) { + flushCache(1); + } if (ramCache.size() > ramCache.getMaxWordsLow()) { - while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) { - try { Thread.sleep(10); } catch (InterruptedException e) { } - flushCacheToBackend(ramCache.bestFlushWordHash()); - }} + while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) { + flushCache(1); + } + } } return added; } - private synchronized void flushCacheToBackend(String wordHash) { + public synchronized void flushCacheSome() { + int flushCount = ramCache.size() / 500; + if (flushCount > 50) flushCount = 50; + if (flushCount < 5) flushCount = 5; + flushCache(flushCount); + } + + public synchronized void flushCache(int count) { + for (int i = 0; i < count; i++) { + if (ramCache.size() == 0) break; + flushCache(ramCache.bestFlushWordHash()); + try {Thread.sleep(10);} catch (InterruptedException e) {} + } + } + + private synchronized void flushCache(String wordHash) { plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash); if (c != null) { plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(wordHash, c); @@ -149,15 +162,6 @@ public final class plasmaWordIndex { } } - private int addEntriesBackend(plasmaWordIndexEntryContainer entries) { - plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(entries.wordHash(), entries); - if (feedback == null) { - return entries.size(); - } else { - return backend.addEntries(feedback, -1, true); - } - } - private static final int hour = 3600000; private static final int day = 86400000; @@ -259,22 +263,6 @@ public final class plasmaWordIndex { return container; } - public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) { - // this possibly creates an index file in the back-end - // the index file is opened and returned as entity object - long start = System.currentTimeMillis(); - flushCacheToBackend(wordHash); - if (maxTime < 0) { - flushFromAssortmentCluster(wordHash, -1); - } else { - long remaining = maxTime - (System.currentTimeMillis() - start); - if (remaining > 0) - flushFromAssortmentCluster(wordHash, remaining); - } - long r = maxTime - (System.currentTimeMillis() - start); - return backend.getEntity(wordHash, deleteIfEmpty, (r < 0) ? 0 : r); - } - public Set getContainers(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) { // retrieve entities that belong to the hashes @@ -351,19 +339,6 @@ public final class plasmaWordIndex { return removed; } - private boolean flushFromAssortmentCluster(String key, long maxTime) { - // this should only be called if the assortment shall be deleted or returned in an index entity - if (maxTime > 0) maxTime = 8 * maxTime / 10; // reserve time for later adding to backend - plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key, maxTime); - if (container == null) { - return false; - } else { - // we have a non-empty entry-container - // integrate it to the backend - return backend.addEntries(container, container.updated(), true) > 0; - } - } - public static final int RL_RAMCACHE = 0; public static final int RL_FILECACHE = 1; public static final int RL_ASSORTMENTS = 2; @@ -485,121 +460,6 @@ public final class plasmaWordIndex { } } // class rotatingWordIterator -/* - public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) { - return new iterateFiles(startHash, up, deleteEmpty); - } - - public final class iterateFiles implements Iterator { - // Iterator of hash-strings in WORDS path - - private final ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries - private final Comparator comp; // for string-compare - private String buffer; // the prefetch-buffer - private final boolean delete; - - public iterateFiles(String startHash, boolean up, boolean deleteEmpty) { - this.hierarchy = new ArrayList(); - this.comp = kelondroNaturalOrder.naturalOrder; // this is the wrong ordering but mut be used as long as the assortments uses the same ordering - //this.comp = new kelondroBase64Order(up, false); - this.delete = deleteEmpty; - - // the we initially fill the hierarchy with the content of the root folder - String path = "WORDS"; - TreeSet list = list(new File(databaseRoot, path)); - - // if we have a start hash then we find the appropriate subdirectory to start - if ((startHash != null) && (startHash.length() == yacySeedDB.commonHashLength)) { - delete(startHash.substring(0, 1), list); - if (list.size() > 0) { - hierarchy.add(list); - String[] paths = new String[]{startHash.substring(0, 1), startHash.substring(1, 2), startHash.substring(2, 4), startHash.substring(4, 6)}; - int pathc = 0; - while ((pathc < paths.length) && - (comp.compare((String) list.first(), paths[pathc]) == 0)) { - path = path + "/" + paths[pathc]; - list = list(new File(databaseRoot, path)); - delete(paths[pathc], list); - if (list.size() == 0) break; - hierarchy.add(list); - pathc++; - } - } - while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {}; - } else { - hierarchy.add(list); - buffer = next0(); - } - } - - private synchronized void delete(String pattern, TreeSet names) { - String name; - while ((names.size() > 0) && (comp.compare((new File(name = (String) names.first())).getName(), pattern) < 0)) names.remove(name); - } - - private TreeSet list(File path) { -// System.out.println("PATH: " + path); - TreeSet t = new TreeSet(comp); - String[] l = path.list(); - if (l != null) for (int i = 0; i < l.length; i++) t.add(path + "/" + l[i]); -// else System.out.println("DEBUG: wrong path " + path); -// System.out.println(t); - return t; - } - - private synchronized String next0() { - // the object is a File pointing to the corresponding file - File f; - String n; - TreeSet t; - do { - t = null; - while ((t == null) && (hierarchy.size() > 0)) { - t = (TreeSet) hierarchy.get(hierarchy.size() - 1); - if (t.size() == 0) { - hierarchy.remove(hierarchy.size() - 1); // we step up one hierarchy - t = null; - } - } - if ((hierarchy.size() == 0) || (t.size() == 0)) return null; // this is the end - // fetch value - f = new File(n = (String) t.first()); - t.remove(n); - // if the value represents another folder, we step into the next hierarchy - if (f.isDirectory()) { - t = list(f); - if (t.size() == 0) { - if (delete) f.delete(); - } else { - hierarchy.add(t); - } - f = null; - } - } while (f == null); - // thats it - if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) { - return null; - } else { - return n.substring(0, yacySeedDB.commonHashLength); - } - } - - public boolean hasNext() { - return buffer != null; - } - - public Object next() { - String r = buffer; - while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {}; - return r; - } - - public void remove() { - } - } -*/ - - public Object migrateWords2Assortment(String wordhash) throws IOException { // returns the number of entries that had been added to the assortments // can be negative if some assortments have been moved to the backend diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java index 39d325df3..7b96e1e8a 100644 --- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java +++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java @@ -194,7 +194,7 @@ public class plasmaWordIndexClassicDB { } return container; } else { - return new plasmaWordIndexEntryContainer(wordHash, 0); + return new plasmaWordIndexEntryContainer(wordHash); } } diff --git a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java index 831e22d68..dffcfd37b 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java @@ -52,27 +52,28 @@ package de.anomic.plasma; -import java.util.HashMap; import java.util.Iterator; import java.util.Set; import java.util.TreeMap; import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.kelondro.kelondroNaturalOrder; +import de.anomic.kelondro.kelondroOrder; public final class plasmaWordIndexEntryContainer implements Comparable { private String wordHash; - private final HashMap container; // urlHash/plasmaWordIndexEntry - Mapping + private final TreeMap container; // urlHash/plasmaWordIndexEntry - Mapping private long updateTime; public plasmaWordIndexEntryContainer(String wordHash) { - this(wordHash,16); + this(wordHash, new kelondroNaturalOrder(true)); } - public plasmaWordIndexEntryContainer(String wordHash, int initContainerSize) { + public plasmaWordIndexEntryContainer(String wordHash, kelondroOrder ordering) { this.wordHash = wordHash; this.updateTime = 0; - container = new HashMap(initContainerSize); // a urlhash/plasmaWordIndexEntry - relation + container = new TreeMap(ordering); // a urlhash/plasmaWordIndexEntry - relation } public void setWordHash(String newWordHash) { @@ -158,7 +159,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable { } public static plasmaWordIndexEntryContainer instantContainer(String wordHash, long creationTime, plasmaWordIndexEntry entry) { - plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash,1); + plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash); c.add(entry); c.updateTime = creationTime; return c; @@ -283,6 +284,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable { long stamp = System.currentTimeMillis(); while ((System.currentTimeMillis() - stamp) < time) { c = ie1.getUrlHash().compareTo(ie2.getUrlHash()); + //System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c); if (c < 0) { if (e1.hasNext()) ie1 = (plasmaWordIndexEntry) e1.next(); else break; } else if (c > 0) { diff --git a/source/yacy.java b/source/yacy.java index 5782409ac..af82cf475 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -956,14 +956,14 @@ public final class yacy { String wordChunkStartHash = "------------", wordChunkEndHash; while (wordHashIterator.hasNext()) { - plasmaWordIndexEntity wordIdxEntity = null; + plasmaWordIndexEntryContainer wordIdxContainer = null; try { wordCounter++; wordhash = (String) wordHashIterator.next(); - wordIdxEntity = wordIndex.getEntity(wordhash, true, -1); + wordIdxContainer = wordIndex.getContainer(wordhash, true, -1); // the combined container will fit, read the container - Iterator wordIdxEntries = wordIdxEntity.elements(true); + Iterator wordIdxEntries = wordIdxContainer.entries(); plasmaWordIndexEntry wordIdxEntry; while (wordIdxEntries.hasNext()) { wordIdxEntry = (plasmaWordIndexEntry) wordIdxEntries.next(); @@ -978,7 +978,7 @@ public final class yacy { } catch (IOException e) {} } // we have read all elements, now we can close it - wordIdxEntity.close(); wordIdxEntity = null; + wordIdxContainer = null; if (wordCounter%500 == 0) { wordChunkEndHash = wordhash; @@ -997,7 +997,7 @@ public final class yacy { } catch (Exception e) { e.printStackTrace(); } finally { - if (wordIdxEntity != null) try { wordIdxEntity.close(); } catch (Exception e) {} + if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (Exception e) {} } } currentUrlDB.close();