- fixed re-search bug: after a search with several words, a second search could not

find the same words as before. This was caused because indexContaines stored the url references with a hashtable. A tree was needed to work with the index conjunction-by-numeration - added permanent ram cache flush (again) - removed direct flush of ram cache after a large container is added. this happens especially during DHT transmission and therefore this fix should speed up DHT transmission on server side. - removed unused and out-dated methods git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1765 6c8d7289-2bf4-0310-a012-ef5d649a1542
19 years ago · 3703f76866
parent 88c0e1da1e
commit 3703f76866
6 changed files with 47 additions and 182 deletions
--- a/htroot/xml/snippet.java
+++ b/htroot/xml/snippet.java
@ -13,8 +13,6 @@ import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaURL;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
-import de.anomic.yacy.yacyCore;
-import de.anomic.yacy.yacySeed;

 public class snippet {
    public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) throws MalformedURLException {
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -862,6 +862,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            return false;
        }
        
+        // flush some entries from the RAM cache
+        // (new permanent cache flushing)
+        wordIndex.flushCacheSome();
+        
        boolean doneSomething = false;
        
        // possibly delete entries from last chunk
@ -883,7 +887,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            doneSomething = true;
        }
        
-        
        synchronized (sbQueue) {

            if (sbQueue.size() == 0) {
@ -929,6 +932,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser

            processResourceStack(nextentry);
        }
+        
+        // ready & finished
        return true;
    }
    
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -118,28 +118,41 @@ public final class plasmaWordIndex {
        int added = ramCache.addEntries(entries, updateTime, highPriority);

        // force flush
-        while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
-            try { Thread.sleep(10); } catch (InterruptedException e) { }
-            flushCacheToBackend(ramCache.bestFlushWordHash());
-        }
-        
        if (highPriority) {
            if (ramCache.size() > ramCache.getMaxWordsHigh()) {
-            while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
-                try { Thread.sleep(10); } catch (InterruptedException e) { }
-                flushCacheToBackend(ramCache.bestFlushWordHash());
-            }}
+                while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
+                    flushCache(1);
+                }
+            }
        } else {
+            while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
+                flushCache(1);
+            }
            if (ramCache.size() > ramCache.getMaxWordsLow()) {
-            while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
-                try { Thread.sleep(10); } catch (InterruptedException e) { }
-                flushCacheToBackend(ramCache.bestFlushWordHash());
-            }}
+                while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
+                    flushCache(1);
+                }
+            }
        }
        return added;
    }

-    private synchronized void flushCacheToBackend(String wordHash) {
+    public synchronized void flushCacheSome() {
+        int flushCount = ramCache.size() / 500;
+        if (flushCount > 50) flushCount = 50;
+        if (flushCount < 5) flushCount = 5;
+        flushCache(flushCount);
+    }
+    
+    public synchronized void flushCache(int count) {
+        for (int i = 0; i < count; i++) {
+            if (ramCache.size() == 0) break;
+            flushCache(ramCache.bestFlushWordHash());
+            try {Thread.sleep(10);} catch (InterruptedException e) {}
+        }
+    }
+    
+    private synchronized void flushCache(String wordHash) {
        plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash);
        if (c != null) {
            plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(wordHash, c);
@ -149,15 +162,6 @@ public final class plasmaWordIndex {
        }
    }
    
-    private int addEntriesBackend(plasmaWordIndexEntryContainer entries) {
-        plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(entries.wordHash(), entries);
-        if (feedback == null) {
-            return entries.size();
-        } else {
-            return backend.addEntries(feedback, -1, true);
-        }
-    }
-    
    private static final int hour = 3600000;
    private static final int day  = 86400000;
    
@ -259,22 +263,6 @@ public final class plasmaWordIndex {
        return container;
    }

-    public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) {
-        // this possibly creates an index file in the back-end
-        // the index file is opened and returned as entity object
-        long start = System.currentTimeMillis();
-        flushCacheToBackend(wordHash);
-        if (maxTime < 0) {
-            flushFromAssortmentCluster(wordHash, -1);
-        } else {
-            long remaining = maxTime - (System.currentTimeMillis() - start);
-            if (remaining > 0)
-                flushFromAssortmentCluster(wordHash, remaining);
-        }
-        long r = maxTime - (System.currentTimeMillis() - start);
-        return backend.getEntity(wordHash, deleteIfEmpty, (r < 0) ? 0 : r);
-    }
-    
    public Set getContainers(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {
        
        // retrieve entities that belong to the hashes
@ -351,19 +339,6 @@ public final class plasmaWordIndex {
        return removed;
    }
    
-    private boolean flushFromAssortmentCluster(String key, long maxTime) {
-        // this should only be called if the assortment shall be deleted or returned in an index entity
-        if (maxTime > 0) maxTime = 8 * maxTime / 10; // reserve time for later adding to backend
-        plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key, maxTime);
-        if (container == null) {
-            return false;
-        } else {
-            // we have a non-empty entry-container
-            // integrate it to the backend
-            return backend.addEntries(container, container.updated(), true) > 0;
-        }
-    }
-    
    public static final int RL_RAMCACHE    = 0;
    public static final int RL_FILECACHE   = 1;
    public static final int RL_ASSORTMENTS = 2;
@ -485,121 +460,6 @@ public final class plasmaWordIndex {
        }
    } // class rotatingWordIterator

-/*
-    public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) {
-        return new iterateFiles(startHash, up, deleteEmpty);
-    }
-
-    public final class iterateFiles implements Iterator {
-        // Iterator of hash-strings in WORDS path
-
-        private final ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries
-        private final Comparator comp;     // for string-compare
-        private String buffer;       // the prefetch-buffer
-        private final boolean delete;
-
-        public iterateFiles(String startHash, boolean up, boolean deleteEmpty) {
-            this.hierarchy = new ArrayList();
-            this.comp = kelondroNaturalOrder.naturalOrder; // this is the wrong ordering but mut be used as long as the assortments uses the same ordering
-            //this.comp = new kelondroBase64Order(up, false);
-            this.delete = deleteEmpty;
-
-            // the we initially fill the hierarchy with the content of the root folder
-            String path = "WORDS";
-            TreeSet list = list(new File(databaseRoot, path));
-
-            // if we have a start hash then we find the appropriate subdirectory to start
-            if ((startHash != null) && (startHash.length() == yacySeedDB.commonHashLength)) {
-                delete(startHash.substring(0, 1), list);
-                if (list.size() > 0) {
-                    hierarchy.add(list);
-                    String[] paths = new String[]{startHash.substring(0, 1), startHash.substring(1, 2), startHash.substring(2, 4), startHash.substring(4, 6)};
-                    int pathc = 0;
-                    while ((pathc < paths.length) &&
-                    (comp.compare((String) list.first(), paths[pathc]) == 0)) {
-                        path = path + "/" + paths[pathc];
-                        list = list(new File(databaseRoot, path));
-                        delete(paths[pathc], list);
-                        if (list.size() == 0) break;
-                        hierarchy.add(list);
-                        pathc++;
-                    }
-                }
-                while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {};
-            } else {
-                hierarchy.add(list);
-                buffer = next0();
-            }
-        }
-
-        private synchronized void delete(String pattern, TreeSet names) {
-            String name;
-            while ((names.size() > 0) && (comp.compare((new File(name = (String) names.first())).getName(), pattern) < 0)) names.remove(name);
-        }
-
-        private TreeSet list(File path) {
-//          System.out.println("PATH: " + path);
-            TreeSet t = new TreeSet(comp);
-            String[] l = path.list();
-            if (l != null) for (int i = 0; i < l.length; i++) t.add(path + "/" + l[i]);
-//          else System.out.println("DEBUG: wrong path " + path);
-//          System.out.println(t);
-            return t;
-        }
-
-        private synchronized String next0() {
-            // the object is a File pointing to the corresponding file
-            File f;
-            String n;
-            TreeSet t;
-            do {
-                t = null;
-                while ((t == null) && (hierarchy.size() > 0)) {
-                    t = (TreeSet) hierarchy.get(hierarchy.size() - 1);
-                    if (t.size() == 0) {
-                        hierarchy.remove(hierarchy.size() - 1); // we step up one hierarchy
-                        t = null;
-                    }
-                }
-                if ((hierarchy.size() == 0) || (t.size() == 0)) return null; // this is the end
-                // fetch value
-                f = new File(n = (String) t.first());
-                t.remove(n);
-                // if the value represents another folder, we step into the next hierarchy
-                if (f.isDirectory()) {
-                    t = list(f);
-                    if (t.size() == 0) {
-                        if (delete) f.delete();
-                    } else {
-                        hierarchy.add(t);
-                    }
-                    f = null;
-                }
-            } while (f == null);
-            // thats it
-            if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) {
-                return null;
-            } else {
-                return n.substring(0, yacySeedDB.commonHashLength);
-            }
-        }
-
-        public boolean hasNext() {
-            return buffer != null;
-        }
-
-        public Object next() {
-            String r = buffer;
-            while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {};
-            return r;
-        }
-
-        public void remove() {
-        }
-    }
-*/
-    
-
    public Object migrateWords2Assortment(String wordhash) throws IOException {
        // returns the number of entries that had been added to the assortments
        // can be negative if some assortments have been moved to the backend
--- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
+++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
@ -194,7 +194,7 @@ public class plasmaWordIndexClassicDB {
            }
            return container;
        } else {
-            return new plasmaWordIndexEntryContainer(wordHash, 0);
+            return new plasmaWordIndexEntryContainer(wordHash);
        }
    }
    
--- a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java
+++ b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java
@ -52,27 +52,28 @@

 package de.anomic.plasma;

-import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Set;
 import java.util.TreeMap;

 import de.anomic.kelondro.kelondroBase64Order;
+import de.anomic.kelondro.kelondroNaturalOrder;
+import de.anomic.kelondro.kelondroOrder;

 public final class plasmaWordIndexEntryContainer implements Comparable {

    private String wordHash;
-    private final HashMap container; // urlHash/plasmaWordIndexEntry - Mapping
+    private final TreeMap container; // urlHash/plasmaWordIndexEntry - Mapping
    private long updateTime;
    
    public plasmaWordIndexEntryContainer(String wordHash) {
-        this(wordHash,16);
+        this(wordHash, new kelondroNaturalOrder(true));
    }
    
-    public plasmaWordIndexEntryContainer(String wordHash, int initContainerSize) {
+    public plasmaWordIndexEntryContainer(String wordHash, kelondroOrder ordering) {
        this.wordHash = wordHash;
        this.updateTime = 0;
-        container = new HashMap(initContainerSize); // a urlhash/plasmaWordIndexEntry - relation
+        container = new TreeMap(ordering); // a urlhash/plasmaWordIndexEntry - relation
    }
    
    public void setWordHash(String newWordHash) {
@ -158,7 +159,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable {
    }

    public static plasmaWordIndexEntryContainer instantContainer(String wordHash, long creationTime, plasmaWordIndexEntry entry) {
-        plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash,1);
+        plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash);
        c.add(entry);
        c.updateTime = creationTime;
        return c;
@ -283,6 +284,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable {
            long stamp = System.currentTimeMillis();
            while ((System.currentTimeMillis() - stamp) < time) {
                c = ie1.getUrlHash().compareTo(ie2.getUrlHash());
+                //System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
                if (c < 0) {
                    if (e1.hasNext()) ie1 = (plasmaWordIndexEntry) e1.next(); else break;
                } else if (c > 0) {
--- a/source/yacy.java
+++ b/source/yacy.java
@ -956,14 +956,14 @@ public final class yacy {
            String wordChunkStartHash = "------------", wordChunkEndHash;
            
            while (wordHashIterator.hasNext()) {
-                plasmaWordIndexEntity wordIdxEntity = null;
+                plasmaWordIndexEntryContainer wordIdxContainer = null;
                try {
                    wordCounter++;
                    wordhash = (String) wordHashIterator.next();
-                    wordIdxEntity = wordIndex.getEntity(wordhash, true, -1);
+                    wordIdxContainer = wordIndex.getContainer(wordhash, true, -1);
                    
                    // the combined container will fit, read the container
-                    Iterator wordIdxEntries = wordIdxEntity.elements(true);
+                    Iterator wordIdxEntries = wordIdxContainer.entries();
                    plasmaWordIndexEntry wordIdxEntry;
                    while (wordIdxEntries.hasNext()) {
                        wordIdxEntry = (plasmaWordIndexEntry) wordIdxEntries.next();
@ -978,7 +978,7 @@ public final class yacy {
                        } catch (IOException e) {}
                    }
                    // we have read all elements, now we can close it
-                    wordIdxEntity.close(); wordIdxEntity = null;
+                    wordIdxContainer = null;
                    
                    if (wordCounter%500 == 0) {
                        wordChunkEndHash = wordhash;
@ -997,7 +997,7 @@ public final class yacy {
                } catch (Exception e) {
                    e.printStackTrace();
                } finally {
-                    if (wordIdxEntity != null) try { wordIdxEntity.close(); } catch (Exception e) {}
+                    if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (Exception e) {}
                }
            }
            currentUrlDB.close();