bugfix for index remove bug,

appeared after search where snippet-loading triggered word removal git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2869 6c8d7289-2bf4-0310-a012-ef5d649a1542
19 years ago · 78b7f6f7fd
parent 0e79f2fd7e
commit 78b7f6f7fd
7 changed files with 97 additions and 103 deletions
--- a/htroot/yacy/transferRWI.java
+++ b/htroot/yacy/transferRWI.java
@ -114,13 +114,13 @@ public final class transferRWI {
            granted = false; // don't accept more words if there are too many words to flush
            result = "busy";
            pause = 60000;
-        } else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) {
+        } /* else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) {
            // we are too busy flushing the ramCache to receive indexes
            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (wordcachesize=" + sb.wordIndex.dhtOutCacheSize() + ").");
            granted = false; // don't accept more words if there are too many words to flush
            result = "busy";
            pause = 300000;
-        } else {
+        } */ else {
            // we want and can receive indexes
            // log value status (currently added to find outOfMemory error
            sb.getLog().logFine("Processing " + indexes.length + " bytes / " + wordc + " words / " + entryc + " entries from " + otherPeerName);
--- a/source/de/anomic/index/indexAbstractRI.java
+++ b/source/de/anomic/index/indexAbstractRI.java
@ -1,44 +0,0 @@
-// indexAsbtractRI.java
-// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
-// first published 26.05.2006 on http://www.anomic.de
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
-//
-// LICENSE
-// 
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package de.anomic.index;
-
-
-public abstract class indexAbstractRI implements indexRI {
-
-    public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
-        indexContainer container = new indexContainer(wordHash);
-        container.add(newEntry);
-        return addEntries(container, updateTime, dhtCase);
-    }
-    
-    public long getUpdateTime(String wordHash) {
-        indexContainer entries = getContainer(wordHash, null, false, -1);
-        if (entries == null) return 0;
-        return entries.updated();
-    }
-
-}
--- a/source/de/anomic/index/indexCollectionRI.java
+++ b/source/de/anomic/index/indexCollectionRI.java
@ -40,7 +40,7 @@ import de.anomic.kelondro.kelondroRowCollection;
 import de.anomic.kelondro.kelondroRowSet;
 import de.anomic.server.logging.serverLog;

-public class indexCollectionRI extends indexAbstractRI implements indexRI {
+public class indexCollectionRI implements indexRI {

    kelondroCollectionIndex collectionIndex;
    
@ -61,7 +61,13 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
        }
    }
    
-    public int size() {
+    public long getUpdateTime(String wordHash) {
+        indexContainer entries = getContainer(wordHash, null, false, -1);
+        if (entries == null) return 0;
+        return entries.updated();
+    }
+    
+    public synchronized int size() {
        try {
            return collectionIndex.size();
        } catch (IOException e) {
@ -70,7 +76,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
        }
    }
    
-    public int indexSize(String wordHash) {
+    public synchronized int indexSize(String wordHash) {
        try {
            return collectionIndex.indexSize(wordHash.getBytes());
        } catch (IOException e) {
@ -78,7 +84,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
        }
    }

-    public Iterator wordContainers(String startWordHash, boolean rot) {
+    public synchronized Iterator wordContainers(String startWordHash, boolean rot) {
        return new wordContainersIterator(startWordHash, rot);
    }

@ -108,7 +114,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {

    }
     
-    public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) {
+    public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) {
        try {
            kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty);
            if (collection != null) collection.select(urlselection);
@ -119,7 +125,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
        }
    }

-    public indexContainer deleteContainer(String wordHash) {
+    public synchronized indexContainer deleteContainer(String wordHash) {
        try {
            kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes());
            if (collection == null) return null;
@ -129,13 +135,13 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
        }
    }

-    public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
+    public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
        HashSet hs = new HashSet();
        hs.add(urlHash.getBytes());
        return removeEntries(wordHash, hs, deleteComplete) == 1;
    }
    
-    public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
+    public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
        try {
            return collectionIndex.remove(wordHash.getBytes(), urlHashes, deleteComplete);
        } catch (kelondroOutOfLimitsException e) {
@ -147,7 +153,13 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
        }
    }

-    public indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
+    public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
+        indexContainer container = new indexContainer(wordHash);
+        container.add(newEntry);
+        return addEntries(container, updateTime, dhtCase);
+    }
+    
+    public synchronized indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
        String wordHash = newEntries.getWordHash();
        try {
            collectionIndex.merge(wordHash.getBytes(), (kelondroRowCollection) newEntries);
@ -160,7 +172,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
        }
    }

-    public void close(int waitingSeconds) {
+    public synchronized void close(int waitingSeconds) {
        try {
            collectionIndex.close();
        } catch (IOException e) {
--- a/source/de/anomic/index/indexRAMCacheRI.java
+++ b/source/de/anomic/index/indexRAMCacheRI.java
@ -42,7 +42,7 @@ import de.anomic.plasma.plasmaWordIndexAssortment;
 import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacySeedDB;

-public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
+public final class indexRAMCacheRI implements indexRI {

    // environment constants
    public  static final long wCacheMaxAge         = 1000 * 60 * 30; // milliseconds; 30 minutes
@ -87,6 +87,12 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
        }
    }

+    public synchronized long getUpdateTime(String wordHash) {
+        indexContainer entries = getContainer(wordHash, null, false, -1);
+        if (entries == null) return 0;
+        return entries.updated();
+    }
+    
    private void dump(int waitingSeconds) throws IOException {
        log.logConfig("creating dump for index cache '" + indexArrayFileName + "', " + cache.size() + " words (and much more urls)");
        File indexDumpFile = new File(databaseRoot, indexArrayFileName);
@ -217,18 +223,18 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
        return this.cacheMaxCount;
    }
    
-    public int size() {
+    public synchronized int size() {
        return cache.size();
    }

-    public int indexSize(String wordHash) {
+    public synchronized int indexSize(String wordHash) {
        int size = 0;
        indexContainer cacheIndex = (indexContainer) cache.get(wordHash);
        if (cacheIndex != null) size += cacheIndex.size();
        return size;
    }

-    public Iterator wordContainers(String startWordHash, boolean rot) {
+    public synchronized Iterator wordContainers(String startWordHash, boolean rot) {
        // we return an iterator object that creates top-level-clones of the indexContainers
        // in the cache, so that manipulations of the iterated objects do not change
        // objects in the cache.
@ -276,14 +282,13 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
        
    }

-    public String bestFlushWordHash() {
+    public synchronized String bestFlushWordHash() {
        // select appropriate hash
        // we have 2 different methods to find a good hash:
        // - the oldest entry in the cache
        // - the entry with maximum count
        if (cache.size() == 0) return null;
        try {
-            synchronized (cache) {
                String hash = null;
                int count = hashScore.getMaxScore();
                if ((count >= cacheReferenceLimit) &&
@ -307,7 +312,6 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
                    hash = (String) hashDate.getMinObject(); // flush oldest entries
                }
                return hash;
-            }
        } catch (Exception e) {
            log.logSevere("flushFromMem: " + e.getMessage(), e);
        }
@ -322,7 +326,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
        return (((long) intTime) * (long) 1000) + initTime;
    }
    
-    public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) {
+    public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) {

        // retrieve container
        indexContainer container = (indexContainer) cache.get(wordHash);
@ -339,46 +343,53 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
        return container;
    }

-    public indexContainer deleteContainer(String wordHash) {
+    public synchronized indexContainer deleteContainer(String wordHash) {
        // returns the index that had been deleted
-        synchronized (cache) {
-            indexContainer container = (indexContainer) cache.remove(wordHash);
-            hashScore.deleteScore(wordHash);
-            hashDate.deleteScore(wordHash);
-            return container;
-        }
+        indexContainer container = (indexContainer) cache.remove(wordHash);
+        hashScore.deleteScore(wordHash);
+        hashDate.deleteScore(wordHash);
+        return container;
    }

-    public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
-        synchronized (cache) {
-            indexContainer c = (indexContainer) deleteContainer(wordHash);
-            if (c != null) {
-                if (c.removeEntry(wordHash, urlHash, deleteComplete)) return true;
-                this.addEntries(c, System.currentTimeMillis(), false);
+    public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
+        indexContainer c = (indexContainer) cache.get(wordHash);
+        if ((c != null) && (c.removeEntry(wordHash, urlHash, deleteComplete))) {
+            // removal successful
+            if ((c.size() == 0) && (deleteComplete)) {
+                deleteContainer(wordHash);
+            } else {
+                cache.put(wordHash, c);
+                hashScore.decScore(wordHash);
+                hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
            }
+            return true;
        }
        return false;
    }
    
-    public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
+    public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
        if (urlHashes.size() == 0) return 0;
-        int count = 0;
-        synchronized (cache) {
-            indexContainer c = (indexContainer) deleteContainer(wordHash);
-            if (c != null) {
-                count = c.removeEntries(wordHash, urlHashes, deleteComplete);
-                if (c.size() != 0) this.addEntries(c, System.currentTimeMillis(), false);
+        indexContainer c = (indexContainer) cache.get(wordHash);
+        int count;
+        if ((c != null) && ((count = c.removeEntries(wordHash, urlHashes, deleteComplete)) > 0)) {
+            // removal successful
+            if ((c.size() == 0) && (deleteComplete)) {
+                deleteContainer(wordHash);
+            } else {
+                cache.put(wordHash, c);
+                hashScore.setScore(wordHash, c.size());
+                hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
            }
+            return count;
        }
-        return count;
+        return 0;
    }
 
-    public int tryRemoveURLs(String urlHash) {
+    public synchronized int tryRemoveURLs(String urlHash) {
        // this tries to delete an index from the cache that has this
        // urlHash assigned. This can only work if the entry is really fresh
        // Such entries must be searched in the latest entries
        int delCount = 0;
-        synchronized (cache) {
            Iterator i = cache.entrySet().iterator();
            Map.Entry entry;
            String wordhash;
@ -398,16 +409,14 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
                    delCount++;
                }
            }
-        }
        return delCount;
    }
    
-    public indexContainer addEntries(indexContainer container, long updateTime, boolean dhtCase) {
+    public synchronized indexContainer addEntries(indexContainer container, long updateTime, boolean dhtCase) {
        // this puts the entries into the cache, not into the assortment directly
        int added = 0;

        // put new words into cache
-        synchronized (cache) {
            // put container into wCache
            String wordHash = container.getWordHash();
            indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
@ -419,12 +428,10 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
                hashDate.setScore(wordHash, intTime(updateTime));
            }
            entries = null;
-        }
        return null;
    }

-    public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
-        synchronized (cache) {
+    public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
            indexContainer container = (indexContainer) cache.get(wordHash);
            if (container == null) container = new indexContainer(wordHash);
            indexEntry[] entries = new indexEntry[] { newEntry };
@ -437,10 +444,9 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
            container = null;
            entries = null;
            return null;
-        }
    }

-    public void close(int waitingSeconds) {
+    public synchronized void close(int waitingSeconds) {
        // dump cache
        try {
            dump(waitingSeconds);
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -37,7 +37,6 @@ import java.util.Set;
 import java.util.TreeSet;

 import de.anomic.htmlFilter.htmlFilterContentScraper;
-import de.anomic.index.indexAbstractRI;
 import de.anomic.index.indexCollectionRI;
 import de.anomic.index.indexContainer;
 import de.anomic.index.indexContainerOrder;
@ -56,7 +55,7 @@ import de.anomic.plasma.urlPattern.plasmaURLPattern;
 import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacyDHTAction;

-public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
+public final class plasmaWordIndex implements indexRI {

    private static final String indexAssortmentClusterPath = "ACLUSTER";
    private static final int assortmentCount = 64;
@ -185,7 +184,13 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
            }
        }
    }
-
+    
+    public long getUpdateTime(String wordHash) {
+        indexContainer entries = getContainer(wordHash, null, false, -1);
+        if (entries == null) return 0;
+        return entries.updated();
+    }
+    
    public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtInCase) {
        // set dhtInCase depending on wordHash
        if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true;
--- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
+++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
@ -52,7 +52,6 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;

-import de.anomic.index.indexAbstractRI;
 import de.anomic.index.indexContainer;
 import de.anomic.index.indexContainerOrder;
 import de.anomic.index.indexEntry;
@ -63,7 +62,7 @@ import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.kelondro.kelondroRecords;
 import de.anomic.server.logging.serverLog;

-public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI implements indexRI {
+public final class plasmaWordIndexAssortmentCluster implements indexRI {
    
    // class variables
    private int clusterCount;   // number of cluster files
@ -174,6 +173,18 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
        }
    }
    
+    public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
+        indexContainer container = new indexContainer(wordHash);
+        container.add(newEntry);
+        return addEntries(container, updateTime, dhtCase);
+    }
+    
+    public long getUpdateTime(String wordHash) {
+        indexContainer entries = getContainer(wordHash, null, false, -1);
+        if (entries == null) return 0;
+        return entries.updated();
+    }
+    
    public indexContainer addEntries(indexContainer newContainer, long creationTime, boolean dhtCase) {
        // this is called by the index ram cache flush process
        // it returnes NULL if the storage was successful
--- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java
+++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java
@ -50,7 +50,6 @@ import java.util.Iterator;
 import java.util.Set;
 import java.util.TreeSet;

-import de.anomic.index.indexAbstractRI;
 import de.anomic.index.indexContainer;
 import de.anomic.index.indexEntry;
 import de.anomic.index.indexRI;
@ -58,7 +57,7 @@ import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacySeedDB;

-public class plasmaWordIndexFileCluster extends indexAbstractRI implements indexRI {
+public class plasmaWordIndexFileCluster implements indexRI {
    
    // class variables
    private final File      databaseRoot;
@ -107,7 +106,6 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
        
    }
    
-    
    public Iterator wordHashes(String startHash, boolean rot) {
        // outdated method: to be replaced by wordContainers
        return wordHashes(startHash, true, rot);
@ -301,6 +299,12 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
        } else return 0;
    }
    
+    public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
+        indexContainer container = new indexContainer(wordHash);
+        container.add(newEntry);
+        return addEntries(container, updateTime, dhtCase);
+    }
+    
    public indexContainer addEntries(indexContainer container, long creationTime, boolean highPriority) {
 	//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
 	// fetch the index cache