From cf36c1614f7f05f146a314fba0727d0159866459 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 12 May 2013 21:37:45 +0200 Subject: [PATCH] prevent that concurrent deletion process causes wrong double-check in crawl start --- source/net/yacy/search/Switchboard.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 9b5c72607..f3563e896 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2844,6 +2844,16 @@ public final class Switchboard extends serverSwitch { // remove url from the index to be prepared for a re-crawl final byte[] urlhash = url.hash(); remove(urlhash); + // because the removal is done concurrenlty, it is possible that the crawl + // stacking may fail because of double occurrences of that url. Therefore + // we must wait here until the url has actually disappeared + int t = 100; + while (t-- > 0 && this.index.exists(ASCII.String(urlhash))) { + try {Thread.sleep(100);} catch (InterruptedException e) {} + Log.logFine("Switchboard", "STACKURL: waiting for deletion, t=" + t); + if (t == 20) this.index.fulltext().commit(true); + if (t == 50) this.index.fulltext().commit(false); + } // special handling of ftp protocol if (url.isFTP()) {