From 85ca07b90e8354dc9584bc0bbe37edf64f6ccab9 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 25 Oct 2012 10:20:55 +0200 Subject: [PATCH] when a new crawl is started, an equal crawl, if still running, is terminated and the corresponding crawl profile is deleted (this also clears the crawl queue entries for that crawl profile) --- htroot/Crawler_p.java | 6 ++++++ source/net/yacy/crawler/data/CrawlProfile.java | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index dd607bfa8..615393757 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -372,6 +372,12 @@ public class Crawler_p { collection); byte[] handle = ASCII.getBytes(profile.handle()); + // before we fire up a new crawl, we make sure that another crawl with the same name is not running + sb.crawler.removeActive(handle); + sb.crawler.removePassive(handle); + try {sb.crawlQueues.noticeURL.removeByProfileHandle(profile.handle(), 10000);} catch (SpaceExceededException e1) {} + + // start the crawl if ("url".equals(crawlingMode)) { if (rootURLs.size() == 0) { prop.put("info", "5"); //Crawling failed diff --git a/source/net/yacy/crawler/data/CrawlProfile.java b/source/net/yacy/crawler/data/CrawlProfile.java index 52a4b663c..2f6aa3d92 100644 --- a/source/net/yacy/crawler/data/CrawlProfile.java +++ b/source/net/yacy/crawler/data/CrawlProfile.java @@ -155,7 +155,7 @@ public class CrawlProfile extends ConcurrentHashMap implements M if (name == null || name.isEmpty()) { throw new NullPointerException("name must not be null or empty"); } - if (name.length() > 60) name = name.substring(0, 60); + if (name.length() > 256) name = name.substring(256); this.doms = new ConcurrentHashMap(); final String handle = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength); put(HANDLE, handle);