fixed near-deadlock situation when deleting crawl profiles

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4721 6c8d7289-2bf4-0310-a012-ef5d649a1542
17 years ago · d7e89c2aca
parent 5e3ce46339
commit d7e89c2aca
3 changed files with 42 additions and 36 deletions
--- a/build.properties
+++ b/build.properties
@ -3,7 +3,7 @@ javacSource=1.5
 javacTarget=1.5

 # Release Configuration
-releaseVersion=0.58
+releaseVersion=0.581
 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
--- a/source/de/anomic/plasma/plasmaCrawlBalancer.java
+++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java
@ -46,6 +46,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.Map;
@ -172,7 +173,7 @@ public class plasmaCrawlBalancer {
        
        // first find a list of url hashes that shall be deleted
        Iterator<kelondroRow.Entry> i = urlFileIndex.rows(true, null);
-        ArrayList<String> urlHashes = new ArrayList<String>();
+        HashSet<String> urlHashes = new HashSet<String>();
        kelondroRow.Entry rowEntry;
        plasmaCrawlEntry crawlEntry;
        while (i.hasNext()) {
@ -184,31 +185,35 @@ public class plasmaCrawlBalancer {
        }
        
        // then delete all these urls from the queues and the file index
-        Iterator<String> j = urlHashes.iterator();
-        while (j.hasNext()) this.remove(j.next());
-        return urlHashes.size();
+        return this.remove(urlHashes);
    }
    
-    public synchronized plasmaCrawlEntry remove(String urlhash) throws IOException {
-        // this method is only here, because so many import/export methods need it
-        // and it was implemented in the previous architecture
-        // however, usage is not recommended
-    	int s = urlFileIndex.size();
-       kelondroRow.Entry entry = urlFileIndex.remove(urlhash.getBytes(), false);
-       if (entry == null) return null;
-       assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s;
+    /**
+     * this method is only here, because so many import/export methods need it
+       and it was implemented in the previous architecture
+       however, usage is not recommended
+     * @param urlHashes, a list of hashes that shall be removed
+     * @return number of entries that had been removed
+     * @throws IOException
+     */
+    public synchronized int remove(HashSet<String> urlHashes) throws IOException {
+        int s = urlFileIndex.size();
+        int removedCounter = 0;
+        for (String urlhash: urlHashes) {
+            kelondroRow.Entry entry = urlFileIndex.remove(urlhash.getBytes(), false);
+            if (entry != null) removedCounter++;
+        }
+        if (removedCounter == 0) return 0;
+        assert urlFileIndex.size() + removedCounter == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s;
       
-       // now delete that thing also from the queues
+        // now delete these hashes also from the queues

-       // iterate through the RAM stack
-       Iterator<String> i = urlRAMStack.iterator();
-       String h;
-       while (i.hasNext()) {
+        // iterate through the RAM stack
+        Iterator<String> i = urlRAMStack.iterator();
+        String h;
+        while (i.hasNext()) {
           h = (String) i.next();
-           if (h.equals(urlhash)) {
-               i.remove();
-               return new plasmaCrawlEntry(entry);
-           }
+           if (urlHashes.contains(h)) i.remove();
       }
       
       // iterate through the file stack
@ -216,16 +221,10 @@ public class plasmaCrawlBalancer {
       Iterator<kelondroRow.Entry> j = urlFileStack.stackIterator(true);
       while (j.hasNext()) {
           h = new String(j.next().getColBytes(0));
-           if (h.equals(urlhash)) {
-               j.remove();
-               return new plasmaCrawlEntry(entry);
-           }
+           if (urlHashes.contains(h)) j.remove();
       }
       
-       if (kelondroAbstractRecords.debugmode) {
-           serverLog.logWarning("PLASMA BALANCER", "remove: not found urlhash " + urlhash + " in " + stackname);
-       }
-       return new plasmaCrawlEntry(entry);
+       return removedCounter;
    }
    
    public synchronized boolean has(String urlhash) {
--- a/source/de/anomic/plasma/plasmaCrawlNURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlNURL.java
@ -157,12 +157,19 @@ public class plasmaCrawlNURL {
        return null;
    }
    
-    public plasmaCrawlEntry removeByURLHash(String urlhash) {
-        plasmaCrawlEntry entry = null;
-        try {if ((entry = coreStack.remove(urlhash)) != null) return entry;} catch (IOException e) {}
-        try {if ((entry = limitStack.remove(urlhash)) != null) return entry;} catch (IOException e) {}
-        try {if ((entry = remoteStack.remove(urlhash)) != null) return entry;} catch (IOException e) {}
-        return null;
+    /**
+     * remove a plasmaCrawlEntry by a given hash. Usage of this method is not encouraged,
+     * because the underlying data structure (crawl stacks) cannot handle removals very good.
+     * @param urlhash
+     * @return true, if the entry was removed; false if not
+     */
+    public boolean removeByURLHash(String urlhash) {
+        HashSet<String> urlHashes = new HashSet<String>();
+        urlHashes.add(urlhash);
+        try {return coreStack.remove(urlHashes) > 0;} catch (IOException e) {}
+        try {return limitStack.remove(urlHashes) > 0;} catch (IOException e) {}
+        try {return remoteStack.remove(urlHashes) > 0;} catch (IOException e) {}
+        return false;
    }
    
    public int removeByProfileHandle(String handle) {