automatically delete entries from the crawl profile list if crawl is

terminated.
pull/1/head
Michael Peter Christen 12 years ago
parent 15d1460b40
commit 158732af37

@ -29,16 +29,22 @@ package net.yacy.crawler;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.NaturalOrder;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.data.NoticedURL.StackType;
import net.yacy.crawler.retrieval.Request;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.logging.Log;
@ -158,7 +164,7 @@ public final class CrawlSwitchboard {
m = null;
}
if ( m == null ) {
return null;
return getPassive(profileKey);
}
p = new CrawlProfile(m);
this.profilesActiveCrawlsCache.put(profileKey, p);
@ -464,6 +470,56 @@ public final class CrawlSwitchboard {
return hasDoneSomething;
}
public int cleanFinishesProfiles(CrawlQueues crawlQueues) {
// find all profiles that are candidates for deletion
Set<String> deletionCandidate = new HashSet<String>();
for (final byte[] handle: this.getActive()) {
CrawlProfile entry;
entry = new CrawlProfile(this.getActive(handle));
if (!((entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY))
|| (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_REMOTE))
|| (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT))
|| (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT))
|| (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA))
|| (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA))
|| (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE)))) {
deletionCandidate.add(ASCII.String(handle));
}
}
if (deletionCandidate.size() == 0) return 0;
// iterate through all the queues and see if one of these handles appear there
// this is a time-consuming process, set a time-out
long timeout = System.currentTimeMillis() + 60000L; // one minute time
try {
for (StackType stack: StackType.values()) {
Iterator<Request> sei = crawlQueues.noticeURL.iterator(stack);
if (sei == null) continue;
Request r;
while (sei.hasNext()) {
r = sei.next();
deletionCandidate.remove(r.profileHandle());
if (deletionCandidate.size() == 0) return 0;
if (System.currentTimeMillis() > timeout) return 0; // give up; this is too large
}
if (deletionCandidate.size() == 0) return 0;
}
} catch (Throwable e) {
return 0;
}
// all entries that are left are candidates for deletion; do that now
for (String h: deletionCandidate) {
byte[] handle = ASCII.getBytes(h);
final CrawlProfile p = this.getActive(handle);
if (p != null) {
this.putPassive(handle, p);
this.removeActive(handle);
}
}
return deletionCandidate.size();
}
public synchronized void close() {
this.profilesActiveCrawlsCache.clear();
this.profilesActiveCrawls.close();

@ -1625,7 +1625,7 @@ public final class Switchboard extends serverSwitch {
}
return this.crawler.clear();
}
public synchronized void close() {
this.log.logConfig("SWITCHBOARD SHUTDOWN STEP 1: sending termination signal to managed threads:");
MemoryTracker.stopSystemProfiling();
@ -2124,8 +2124,10 @@ public final class Switchboard extends serverSwitch {
// clean up profiles
checkInterruption();
cleanProfiles();
//cleanProfiles();
int cleanup = this.crawler.cleanFinishesProfiles(this.crawlQueues);
if (cleanup > 0) log.logInfo("cleanup removed " + cleanup + " crawl profiles");
// clean up news
checkInterruption();
try {

@ -916,8 +916,7 @@ public final class SearchEvent {
// deploy worker to get more results
if (!anyWorkerAlive()) {
final int neededInclPrefetch = this.query.neededResults() + ((MemoryControl.available() > 100 * 1024 * 1024 && SNIPPET_WORKER_THREADS >= 8) ? this.query.itemsPerPage : 0);
deployWorker(Math.min(SNIPPET_WORKER_THREADS, this.query.itemsPerPage), neededInclPrefetch);
deployWorker(Math.min(SNIPPET_WORKER_THREADS, this.query.itemsPerPage), this.query.neededResults());
}
try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {break;}

Loading…
Cancel
Save