- added a cache for active crawl profiles to the crawl switchboard

- moved the domain cache for domain counter from the crawl switchboard to the crawl profiles. the crawl domain counter is now therefore relative for each crawl start, not for the whole crawler.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8018 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 37e35f2741
commit 3a807e10cf

@ -261,7 +261,7 @@ public class CrawlProfileEditor_p {
if (active && profile.domMaxPages() > 0 if (active && profile.domMaxPages() > 0
&& profile.domMaxPages() != Integer.MAX_VALUE) { && profile.domMaxPages() != Integer.MAX_VALUE) {
String item; String item;
while (i <= domlistlength && !(item = crawlStacker.domName(true, i)).isEmpty()){ while (i <= domlistlength && !(item = profile.domName(true, i)).isEmpty()){
if (i == domlistlength) { if (i == domlistlength) {
item += " ..."; item += " ...";
} }

@ -25,6 +25,7 @@
package de.anomic.crawler; package de.anomic.crawler;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -72,6 +73,25 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
private Pattern urlmustmatch = null, urlmustnotmatch = null, ipmustmatch = null, ipmustnotmatch = null; private Pattern urlmustmatch = null, urlmustnotmatch = null, ipmustmatch = null, ipmustnotmatch = null;
public final static class DomProfile {
public String referrer;
public int depth, count;
public DomProfile(final String ref, final int d) {
this.referrer = ref;
this.depth = d;
this.count = 1;
}
public void inc() {
this.count++;
}
}
private final Map<String, DomProfile> doms;
/** /**
* Constructor which creates CrawlPofile from parameters. * Constructor which creates CrawlPofile from parameters.
* @param name name of the crawl profile * @param name name of the crawl profile
@ -121,6 +141,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
if (name == null || name.isEmpty()) { if (name == null || name.isEmpty()) {
throw new NullPointerException("name must not be null or empty"); throw new NullPointerException("name must not be null or empty");
} }
this.doms = new ConcurrentHashMap<String, DomProfile>();
final String handle = (startURL == null) final String handle = (startURL == null)
? Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength) ? Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength)
: ASCII.String(startURL.hash()); : ASCII.String(startURL.hash());
@ -154,6 +176,45 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
public CrawlProfile(final Map<String, String> ext) { public CrawlProfile(final Map<String, String> ext) {
super(ext == null ? 1 : ext.size()); super(ext == null ? 1 : ext.size());
if (ext != null) putAll(ext); if (ext != null) putAll(ext);
this.doms = new ConcurrentHashMap<String, DomProfile>();
}
public void domInc(final String domain, final String referrer, final int depth) {
final DomProfile dp = this.doms.get(domain);
if (dp == null) {
// new domain
this.doms.put(domain, new DomProfile(referrer, depth));
} else {
// increase counter
dp.inc();
}
}
public String domName(final boolean attr, final int index){
final Iterator<Map.Entry<String, DomProfile>> domnamesi = this.doms.entrySet().iterator();
String domname="";
Map.Entry<String, DomProfile> ey;
DomProfile dp;
int i = 0;
while ((domnamesi.hasNext()) && (i < index)) {
ey = domnamesi.next();
i++;
}
if (domnamesi.hasNext()) {
ey = domnamesi.next();
dp = ey.getValue();
domname = ey.getKey() + ((attr) ? ("/r=" + dp.referrer + ", d=" + dp.depth + ", c=" + dp.count) : " ");
}
return domname;
}
public void clearDoms() {
this.doms.clear();
}
public DomProfile getDom(final String domain) {
return this.doms.get(domain);
} }
/** /**

@ -33,12 +33,10 @@ import java.net.InetAddress;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.util.Date; import java.util.Date;
import java.util.Iterator;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
@ -55,6 +53,7 @@ import net.yacy.repository.Blacklist;
import net.yacy.repository.FilterEngine; import net.yacy.repository.FilterEngine;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import de.anomic.crawler.CrawlProfile.DomProfile;
import de.anomic.crawler.ResultURLs.EventOrigin; import de.anomic.crawler.ResultURLs.EventOrigin;
import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.crawler.retrieval.FTPLoader; import de.anomic.crawler.retrieval.FTPLoader;
@ -75,25 +74,6 @@ public final class CrawlStacker {
private final boolean acceptLocalURLs, acceptGlobalURLs; private final boolean acceptLocalURLs, acceptGlobalURLs;
private final FilterEngine domainList; private final FilterEngine domainList;
public final static class DomProfile {
public String referrer;
public int depth, count;
public DomProfile(final String ref, final int d) {
this.referrer = ref;
this.depth = d;
this.count = 1;
}
public void inc() {
this.count++;
}
}
private final Map<String, DomProfile> doms;
// this is the process that checks url for double-occurrences and for allowance/disallowance by robots.txt // this is the process that checks url for double-occurrences and for allowance/disallowance by robots.txt
public CrawlStacker( public CrawlStacker(
@ -116,37 +96,9 @@ public final class CrawlStacker {
this.fastQueue = new WorkflowProcessor<Request>("CrawlStackerFast", "This process checks new urls before they are enqueued into the balancer (proper, double-check, correct domain, filter)", new String[]{"Balancer"}, this, "job", 10000, null, 2); this.fastQueue = new WorkflowProcessor<Request>("CrawlStackerFast", "This process checks new urls before they are enqueued into the balancer (proper, double-check, correct domain, filter)", new String[]{"Balancer"}, this, "job", 10000, null, 2);
this.slowQueue = new WorkflowProcessor<Request>("CrawlStackerSlow", "This is like CrawlStackerFast, but does additionaly a DNS lookup. The CrawlStackerFast does not need this because it can use the DNS cache.", new String[]{"Balancer"}, this, "job", 1000, null, 5); this.slowQueue = new WorkflowProcessor<Request>("CrawlStackerSlow", "This is like CrawlStackerFast, but does additionaly a DNS lookup. The CrawlStackerFast does not need this because it can use the DNS cache.", new String[]{"Balancer"}, this, "job", 1000, null, 5);
this.doms = new ConcurrentHashMap<String, DomProfile>();
this.log.logInfo("STACKCRAWL thread initialized."); this.log.logInfo("STACKCRAWL thread initialized.");
} }
private void domInc(final String domain, final String referrer, final int depth) {
final DomProfile dp = this.doms.get(domain);
if (dp == null) {
// new domain
this.doms.put(domain, new DomProfile(referrer, depth));
} else {
// increase counter
dp.inc();
}
}
public String domName(final boolean attr, final int index){
final Iterator<Map.Entry<String, DomProfile>> domnamesi = this.doms.entrySet().iterator();
String domname="";
Map.Entry<String, DomProfile> ey;
DomProfile dp;
int i = 0;
while ((domnamesi.hasNext()) && (i < index)) {
ey = domnamesi.next();
i++;
}
if (domnamesi.hasNext()) {
ey = domnamesi.next();
dp = ey.getValue();
domname = ey.getKey() + ((attr) ? ("/r=" + dp.referrer + ", d=" + dp.depth + ", c=" + dp.count) : " ");
}
return domname;
}
public int size() { public int size() {
return this.fastQueue.queueSize() + this.slowQueue.queueSize(); return this.fastQueue.queueSize() + this.slowQueue.queueSize();
@ -160,7 +112,6 @@ public final class CrawlStacker {
public void clear() { public void clear() {
this.fastQueue.clear(); this.fastQueue.clear();
this.slowQueue.clear(); this.slowQueue.clear();
this.doms.clear();
} }
public void announceClose() { public void announceClose() {
@ -412,7 +363,7 @@ public final class CrawlStacker {
// add domain to profile domain list // add domain to profile domain list
if (profile.domMaxPages() != Integer.MAX_VALUE) { if (profile.domMaxPages() != Integer.MAX_VALUE) {
domInc(entry.url().getHost(), (referrerURL == null) ? null : referrerURL.getHost().toLowerCase(), entry.depth()); profile.domInc(entry.url().getHost(), (referrerURL == null) ? null : referrerURL.getHost().toLowerCase(), entry.depth());
} }
if (global) { if (global) {
@ -520,7 +471,7 @@ public final class CrawlStacker {
// deny urls that exceed allowed number of occurrences // deny urls that exceed allowed number of occurrences
final int maxAllowedPagesPerDomain = profile.domMaxPages(); final int maxAllowedPagesPerDomain = profile.domMaxPages();
if (maxAllowedPagesPerDomain < Integer.MAX_VALUE) { if (maxAllowedPagesPerDomain < Integer.MAX_VALUE) {
final DomProfile dp = this.doms.get(url.getHost()); final DomProfile dp = profile.getDom(url.getHost());
if (dp != null && dp.count >= maxAllowedPagesPerDomain) { if (dp != null && dp.count >= maxAllowedPagesPerDomain) {
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + profile.domMaxPages() + " is allowed."); if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + profile.domMaxPages() + " is allowed.");
return "crawl stack domain counter exceeded"; return "crawl stack domain counter exceeded";

@ -28,14 +28,18 @@ package de.anomic.crawler;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeMap;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.services.federated.yacy.CacheStrategy; import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.kelondroException; import net.yacy.kelondro.util.kelondroException;
@ -53,7 +57,6 @@ public final class CrawlSwitchboard {
public static final String DBFILE_ACTIVE_CRAWL_PROFILES = "crawlProfilesActive.heap"; public static final String DBFILE_ACTIVE_CRAWL_PROFILES = "crawlProfilesActive.heap";
public static final String DBFILE_PASSIVE_CRAWL_PROFILES = "crawlProfilesPassive.heap"; public static final String DBFILE_PASSIVE_CRAWL_PROFILES = "crawlProfilesPassive.heap";
public static final String DBFILE_INVALID_CRAWL_PROFILES = "crawlProfilesInvalid.heap";
public static final long CRAWL_PROFILE_PROXY_RECRAWL_CYCLE = 60L * 24L; public static final long CRAWL_PROFILE_PROXY_RECRAWL_CYCLE = 60L * 24L;
public static final long CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE = 60L * 24L * 30L; public static final long CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE = 60L * 24L * 30L;
@ -63,8 +66,9 @@ public final class CrawlSwitchboard {
public static final long CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE = 60L * 24L * 30L; public static final long CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE = 60L * 24L * 30L;
private final Log log; private final Log log;
private Map<byte[], Map<String, String>> profilesActiveCrawls; private MapHeap profilesActiveCrawls;
private final Map<byte[], Map<String, String>> profilesPassiveCrawls, profilesInvalidCrawls; private final MapHeap profilesPassiveCrawls;
private final Map<byte[], CrawlProfile> profilesActiveCrawlsCache; //TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
public CrawlProfile defaultProxyProfile; public CrawlProfile defaultProxyProfile;
public CrawlProfile defaultRemoteProfile; public CrawlProfile defaultRemoteProfile;
public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile; public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
@ -84,28 +88,31 @@ public final class CrawlSwitchboard {
System.exit(0); System.exit(0);
} }
this.log = log; this.log = log;
this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder));
// make crawl profiles database and default profiles // make crawl profiles database and default profiles
this.queuesRoot = queuesRoot; this.queuesRoot = queuesRoot;
this.queuesRoot.mkdirs(); this.queuesRoot.mkdirs();
this.log.logConfig("Initializing Crawl Profiles"); this.log.logConfig("Initializing Crawl Profiles");
final File profilesInvalidFile = new File(queuesRoot, DBFILE_INVALID_CRAWL_PROFILES);
this.profilesInvalidCrawls = loadFromDB(profilesInvalidFile);
final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES);
this.profilesActiveCrawls = loadFromDB(profilesActiveFile); this.profilesActiveCrawls = loadFromDB(profilesActiveFile);
for (final byte[] handle : this.profilesActiveCrawls.keySet()) { for (final byte[] handle : this.profilesActiveCrawls.keySet()) {
final CrawlProfile p; CrawlProfile p;
try {
p = new CrawlProfile(this.profilesActiveCrawls.get(handle)); p = new CrawlProfile(this.profilesActiveCrawls.get(handle));
} catch (final IOException e) {
p = null;
} catch (final RowSpaceExceededException e) {
p = null;
}
if (p == null) continue;
if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) { if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) {
removeActive(handle); removeActive(handle);
putInvalid(handle, p);
Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name()
+ " from active crawls since " + CrawlProfile.FILTER_URL_MUSTMATCH + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTMATCH
+ " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTMATCH)); + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTMATCH));
} else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) { } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) {
putInvalid(handle, p);
removeActive(handle); removeActive(handle);
Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name()
+ " from active crawls since " + CrawlProfile.FILTER_URL_MUSTNOTMATCH + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTNOTMATCH
@ -121,8 +128,15 @@ public final class CrawlSwitchboard {
final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES); final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES);
this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile); this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile);
for (final byte[] handle : this.profilesPassiveCrawls.keySet()) { for (final byte[] handle : this.profilesPassiveCrawls.keySet()) {
final CrawlProfile p = new CrawlProfile(this.profilesPassiveCrawls.get(handle)); CrawlProfile p;
try {
p = new CrawlProfile(this.profilesPassiveCrawls.get(handle));
Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
} catch (final IOException e) {
continue;
} catch (final RowSpaceExceededException e) {
continue;
}
} }
log.logInfo("Loaded passive crawl profiles from file " + profilesPassiveFile.getName() + log.logInfo("Loaded passive crawl profiles from file " + profilesPassiveFile.getName() +
", " + this.profilesPassiveCrawls.size() + " entries" + ", " + this.profilesPassiveCrawls.size() + " entries" +
@ -131,21 +145,35 @@ public final class CrawlSwitchboard {
public CrawlProfile getActive(final byte[] profileKey) { public CrawlProfile getActive(final byte[] profileKey) {
if (profileKey == null) return null; if (profileKey == null) return null;
final Map<String, String> m = this.profilesActiveCrawls.get(profileKey); // get from cache
if (m == null) return null; CrawlProfile p = this.profilesActiveCrawlsCache.get(profileKey);
return new CrawlProfile(m); if (p != null) return p;
}
public CrawlProfile getInvalid(final byte[] profileKey) { // get from db
if (profileKey == null) return null; Map<String, String> m;
final Map<String, String> m = this.profilesInvalidCrawls.get(profileKey); try {
m = this.profilesActiveCrawls.get(profileKey);
} catch (final IOException e) {
m = null;
} catch (final RowSpaceExceededException e) {
m = null;
}
if (m == null) return null; if (m == null) return null;
return new CrawlProfile(m); p = new CrawlProfile(m);
this.profilesActiveCrawlsCache.put(profileKey, p);
return p;
} }
public CrawlProfile getPassive(final byte[] profileKey) { public CrawlProfile getPassive(final byte[] profileKey) {
if (profileKey == null) return null; if (profileKey == null) return null;
final Map<String, String> m = this.profilesPassiveCrawls.get(profileKey); Map<String, String> m;
try {
m = this.profilesPassiveCrawls.get(profileKey);
} catch (final IOException e) {
m = null;
} catch (final RowSpaceExceededException e) {
m = null;
}
if (m == null) return null; if (m == null) return null;
return new CrawlProfile(m); return new CrawlProfile(m);
} }
@ -154,24 +182,16 @@ public final class CrawlSwitchboard {
return this.profilesActiveCrawls.keySet(); return this.profilesActiveCrawls.keySet();
} }
public Set<byte[]> getInvalid() {
return this.profilesInvalidCrawls.keySet();
}
public Set<byte[]> getPassive() { public Set<byte[]> getPassive() {
return this.profilesPassiveCrawls.keySet(); return this.profilesPassiveCrawls.keySet();
} }
public void removeActive(final byte[] profileKey) { public void removeActive(final byte[] profileKey) {
if (profileKey == null) return; if (profileKey == null) return;
this.profilesActiveCrawlsCache.remove(profileKey);
this.profilesActiveCrawls.remove(profileKey); this.profilesActiveCrawls.remove(profileKey);
} }
public void removeInvalid(final byte[] profileKey) {
if (profileKey == null) return;
this.profilesInvalidCrawls.remove(profileKey);
}
public void removePassive(final byte[] profileKey) { public void removePassive(final byte[] profileKey) {
if (profileKey == null) return; if (profileKey == null) return;
this.profilesPassiveCrawls.remove(profileKey); this.profilesPassiveCrawls.remove(profileKey);
@ -179,19 +199,13 @@ public final class CrawlSwitchboard {
public void putActive(final byte[] profileKey, final CrawlProfile profile) { public void putActive(final byte[] profileKey, final CrawlProfile profile) {
this.profilesActiveCrawls.put(profileKey, profile); this.profilesActiveCrawls.put(profileKey, profile);
} this.profilesActiveCrawlsCache.put(profileKey, profile);
public void putInvalid(final byte[] profileKey, final CrawlProfile profile) {
this.profilesInvalidCrawls.put(profileKey, profile);
} }
public void putPassive(final byte[] profileKey, final CrawlProfile profile) { public void putPassive(final byte[] profileKey, final CrawlProfile profile) {
this.profilesPassiveCrawls.put(profileKey, profile); this.profilesPassiveCrawls.put(profileKey, profile);
} }
public void clear() {
}
private void initActiveCrawlProfiles() { private void initActiveCrawlProfiles() {
this.defaultProxyProfile = null; this.defaultProxyProfile = null;
this.defaultRemoteProfile = null; this.defaultRemoteProfile = null;
@ -282,6 +296,7 @@ public final class CrawlSwitchboard {
} }
private void resetProfiles() { private void resetProfiles() {
this.profilesActiveCrawlsCache.clear();
final File pdb = new File(this.queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); final File pdb = new File(this.queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES);
if (pdb.exists()) FileUtils.deletedelete(pdb); if (pdb.exists()) FileUtils.deletedelete(pdb);
try { try {
@ -293,7 +308,8 @@ public final class CrawlSwitchboard {
initActiveCrawlProfiles(); initActiveCrawlProfiles();
} }
public boolean cleanProfiles() throws InterruptedException { public boolean clear() throws InterruptedException {
this.profilesActiveCrawlsCache.clear();
CrawlProfile entry; CrawlProfile entry;
boolean hasDoneSomething = false; boolean hasDoneSomething = false;
try { try {
@ -302,7 +318,13 @@ public final class CrawlSwitchboard {
if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress"); if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress");
// getting next profile // getting next profile
try {
entry = new CrawlProfile(this.profilesActiveCrawls.get(handle)); entry = new CrawlProfile(this.profilesActiveCrawls.get(handle));
} catch (final IOException e) {
continue;
} catch (final RowSpaceExceededException e) {
continue;
}
if (!((entry.name().equals(CRAWL_PROFILE_PROXY)) || if (!((entry.name().equals(CRAWL_PROFILE_PROXY)) ||
(entry.name().equals(CRAWL_PROFILE_REMOTE)) || (entry.name().equals(CRAWL_PROFILE_REMOTE)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) || (entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) ||
@ -325,9 +347,9 @@ public final class CrawlSwitchboard {
public void close() { public void close() {
((MapHeap) this.profilesActiveCrawls).close(); this.profilesActiveCrawlsCache.clear();
((MapHeap) this.profilesInvalidCrawls).close(); this.profilesActiveCrawls.close();
((MapHeap) this.profilesPassiveCrawls).close(); this.profilesPassiveCrawls.close();
} }
@ -336,8 +358,8 @@ public final class CrawlSwitchboard {
* @param file DB file * @param file DB file
* @return crawl profile data * @return crawl profile data
*/ */
private Map<byte[], Map<String, String>> loadFromDB(final File file) { private MapHeap loadFromDB(final File file) {
Map<byte[], Map<String, String>> ret; MapHeap ret;
try { try {
ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch (final IOException e) { } catch (final IOException e) {

@ -1283,7 +1283,7 @@ public final class Switchboard extends serverSwitch {
(this.crawlStacker != null && !this.crawlStacker.isEmpty()) || (this.crawlStacker != null && !this.crawlStacker.isEmpty()) ||
this.crawlQueues.noticeURL.notEmpty()) this.crawlQueues.noticeURL.notEmpty())
return false; return false;
return this.crawler.cleanProfiles(); return this.crawler.clear();
} }
public void close() { public void close() {

Loading…
Cancel
Save