fixed near-deadlock situation when deleting crawl profiles

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4721 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 5e3ce46339
commit d7e89c2aca

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5 javacTarget=1.5
# Release Configuration # Release Configuration
releaseVersion=0.58 releaseVersion=0.581
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

@ -46,6 +46,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.Map; import java.util.Map;
@ -172,7 +173,7 @@ public class plasmaCrawlBalancer {
// first find a list of url hashes that shall be deleted // first find a list of url hashes that shall be deleted
Iterator<kelondroRow.Entry> i = urlFileIndex.rows(true, null); Iterator<kelondroRow.Entry> i = urlFileIndex.rows(true, null);
ArrayList<String> urlHashes = new ArrayList<String>(); HashSet<String> urlHashes = new HashSet<String>();
kelondroRow.Entry rowEntry; kelondroRow.Entry rowEntry;
plasmaCrawlEntry crawlEntry; plasmaCrawlEntry crawlEntry;
while (i.hasNext()) { while (i.hasNext()) {
@ -184,31 +185,35 @@ public class plasmaCrawlBalancer {
} }
// then delete all these urls from the queues and the file index // then delete all these urls from the queues and the file index
Iterator<String> j = urlHashes.iterator(); return this.remove(urlHashes);
while (j.hasNext()) this.remove(j.next());
return urlHashes.size();
} }
public synchronized plasmaCrawlEntry remove(String urlhash) throws IOException { /**
// this method is only here, because so many import/export methods need it * this method is only here, because so many import/export methods need it
// and it was implemented in the previous architecture and it was implemented in the previous architecture
// however, usage is not recommended however, usage is not recommended
int s = urlFileIndex.size(); * @param urlHashes, a list of hashes that shall be removed
kelondroRow.Entry entry = urlFileIndex.remove(urlhash.getBytes(), false); * @return number of entries that had been removed
if (entry == null) return null; * @throws IOException
assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s; */
public synchronized int remove(HashSet<String> urlHashes) throws IOException {
int s = urlFileIndex.size();
int removedCounter = 0;
for (String urlhash: urlHashes) {
kelondroRow.Entry entry = urlFileIndex.remove(urlhash.getBytes(), false);
if (entry != null) removedCounter++;
}
if (removedCounter == 0) return 0;
assert urlFileIndex.size() + removedCounter == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s;
// now delete that thing also from the queues // now delete these hashes also from the queues
// iterate through the RAM stack // iterate through the RAM stack
Iterator<String> i = urlRAMStack.iterator(); Iterator<String> i = urlRAMStack.iterator();
String h; String h;
while (i.hasNext()) { while (i.hasNext()) {
h = (String) i.next(); h = (String) i.next();
if (h.equals(urlhash)) { if (urlHashes.contains(h)) i.remove();
i.remove();
return new plasmaCrawlEntry(entry);
}
} }
// iterate through the file stack // iterate through the file stack
@ -216,16 +221,10 @@ public class plasmaCrawlBalancer {
Iterator<kelondroRow.Entry> j = urlFileStack.stackIterator(true); Iterator<kelondroRow.Entry> j = urlFileStack.stackIterator(true);
while (j.hasNext()) { while (j.hasNext()) {
h = new String(j.next().getColBytes(0)); h = new String(j.next().getColBytes(0));
if (h.equals(urlhash)) { if (urlHashes.contains(h)) j.remove();
j.remove();
return new plasmaCrawlEntry(entry);
}
} }
if (kelondroAbstractRecords.debugmode) { return removedCounter;
serverLog.logWarning("PLASMA BALANCER", "remove: not found urlhash " + urlhash + " in " + stackname);
}
return new plasmaCrawlEntry(entry);
} }
public synchronized boolean has(String urlhash) { public synchronized boolean has(String urlhash) {

@ -157,12 +157,19 @@ public class plasmaCrawlNURL {
return null; return null;
} }
public plasmaCrawlEntry removeByURLHash(String urlhash) { /**
plasmaCrawlEntry entry = null; * remove a plasmaCrawlEntry by a given hash. Usage of this method is not encouraged,
try {if ((entry = coreStack.remove(urlhash)) != null) return entry;} catch (IOException e) {} * because the underlying data structure (crawl stacks) cannot handle removals very good.
try {if ((entry = limitStack.remove(urlhash)) != null) return entry;} catch (IOException e) {} * @param urlhash
try {if ((entry = remoteStack.remove(urlhash)) != null) return entry;} catch (IOException e) {} * @return true, if the entry was removed; false if not
return null; */
public boolean removeByURLHash(String urlhash) {
HashSet<String> urlHashes = new HashSet<String>();
urlHashes.add(urlhash);
try {return coreStack.remove(urlHashes) > 0;} catch (IOException e) {}
try {return limitStack.remove(urlHashes) > 0;} catch (IOException e) {}
try {return remoteStack.remove(urlHashes) > 0;} catch (IOException e) {}
return false;
} }
public int removeByProfileHandle(String handle) { public int removeByProfileHandle(String handle) {

Loading…
Cancel
Save