diff --git a/htroot/CrawlProfileEditor_p.java b/htroot/CrawlProfileEditor_p.java index ef7dd8100..ad0b6777d 100644 --- a/htroot/CrawlProfileEditor_p.java +++ b/htroot/CrawlProfileEditor_p.java @@ -91,7 +91,7 @@ public class CrawlProfileEditor_p { if (entry != null) sb.profilesPassiveCrawls.newEntry(entry.map()); sb.profilesActiveCrawls.removeEntry(handle); // delete all entries from the crawl queue that are deleted here - sb.crawlQueues.noticeURL.removeByProfileHandle(handle); + sb.crawlQueues.noticeURL.removeByProfileHandle(handle, 10000); } if (post.containsKey("delete")) { // deletion of a terminated crawl profile diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java index 9436795de..450f31c0a 100644 --- a/source/de/anomic/crawler/Balancer.java +++ b/source/de/anomic/crawler/Balancer.java @@ -167,7 +167,7 @@ public class Balancer { return new CrawlEntry(entry); } - public synchronized int removeAllByProfileHandle(String profileHandle) throws IOException { + public synchronized int removeAllByProfileHandle(String profileHandle, long timeout) throws IOException { // removes all entries with a specific profile hash. // this may last some time // returns number of deletions @@ -177,7 +177,8 @@ public class Balancer { HashSet urlHashes = new HashSet(); kelondroRow.Entry rowEntry; CrawlEntry crawlEntry; - while (i.hasNext()) { + long terminate = (timeout > 0) ? System.currentTimeMillis() + timeout : Long.MAX_VALUE; + while (i.hasNext() && (System.currentTimeMillis() < terminate)) { rowEntry = (kelondroRow.Entry) i.next(); crawlEntry = new CrawlEntry(rowEntry); if (crawlEntry.profileHandle().equals(profileHandle)) { diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java index f597bf891..a23bba5eb 100644 --- a/source/de/anomic/crawler/CrawlProfile.java +++ b/source/de/anomic/crawler/CrawlProfile.java @@ -72,7 +72,7 @@ public class CrawlProfile { profileTable = new kelondroMapObjects(dyn, 500); } - void resetDatabase() { + public void resetDatabase() { // deletes the profile database and creates a new one if (profileTable != null) profileTable.close(); if (!(profileTableFile.delete())) throw new RuntimeException("cannot delete crawl profile database"); diff --git a/source/de/anomic/crawler/NoticedURL.java b/source/de/anomic/crawler/NoticedURL.java index 8300b125f..d113e60ca 100755 --- a/source/de/anomic/crawler/NoticedURL.java +++ b/source/de/anomic/crawler/NoticedURL.java @@ -174,11 +174,11 @@ public class NoticedURL { return false; } - public int removeByProfileHandle(String handle) { + public int removeByProfileHandle(String handle, long timeout) { int removed = 0; - try {removed += coreStack.removeAllByProfileHandle(handle);} catch (IOException e) {} - try {removed += limitStack.removeAllByProfileHandle(handle);} catch (IOException e) {} - try {removed += remoteStack.removeAllByProfileHandle(handle);} catch (IOException e) {} + try {removed += coreStack.removeAllByProfileHandle(handle, timeout);} catch (IOException e) {} + try {removed += limitStack.removeAllByProfileHandle(handle, timeout);} catch (IOException e) {} + try {removed += remoteStack.removeAllByProfileHandle(handle, timeout);} catch (IOException e) {} return removed; } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 8e0a5c0ff..f44040411 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1475,16 +1475,27 @@ public final class plasmaSwitchboard extends serverAbstractSwitch i = this.profilesActiveCrawls.profiles(true); CrawlProfile.entry profile; String name; - while (i.hasNext()) { - profile = i.next(); - name = profile.name(); - if (name.equals(CRAWL_PROFILE_PROXY)) this.defaultProxyProfile = profile; - if (name.equals(CRAWL_PROFILE_REMOTE)) this.defaultRemoteProfile = profile; - if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) this.defaultTextSnippetLocalProfile = profile; - if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) this.defaultTextSnippetGlobalProfile = profile; - if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) this.defaultMediaSnippetLocalProfile = profile; - if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) this.defaultMediaSnippetGlobalProfile = profile; + try { + while (i.hasNext()) { + profile = i.next(); + name = profile.name(); + if (name.equals(CRAWL_PROFILE_PROXY)) this.defaultProxyProfile = profile; + if (name.equals(CRAWL_PROFILE_REMOTE)) this.defaultRemoteProfile = profile; + if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) this.defaultTextSnippetLocalProfile = profile; + if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) this.defaultTextSnippetGlobalProfile = profile; + if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) this.defaultMediaSnippetLocalProfile = profile; + if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) this.defaultMediaSnippetGlobalProfile = profile; + } + } catch (Exception e) { + this.profilesActiveCrawls.resetDatabase(); + this.defaultProxyProfile = null; + this.defaultRemoteProfile = null; + this.defaultTextSnippetLocalProfile = null; + this.defaultTextSnippetGlobalProfile = null; + this.defaultMediaSnippetLocalProfile = null; + this.defaultMediaSnippetGlobalProfile = null; } + if (this.defaultProxyProfile == null) { // generate new default entry for proxy crawling this.defaultProxyProfile = this.profilesActiveCrawls.newEntry("proxy", null, ".*", ".*",