added debug code to crawler monitor

pull/1/head
Michael Peter Christen 13 years ago
parent 205f8b222b
commit eca68fa197

@ -159,7 +159,7 @@
<!-- crawl profile list -->
#(crawlProfilesShow)#::
<fieldset>
<legend>Running Crawls</legend>
<legend>Running Crawls (#[count]#)</legend>
<table border="0" cellpadding="2" cellspacing="1" summary="A list of crawl profiles and their current settings.">
<colgroup>
<col width="16" />
@ -167,11 +167,13 @@
</colgroup>
<tr class="TableHeader">
<td><strong>Name</strong></td>
#(debug)#::<td><strong>Count</strong></td>#(/debug)#
<td><strong>Status</strong></td>
</tr>
#{list}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<td>#[name]#</td>
#(debug)#::<td>#[count]#</td>#(/debug)#
<td>#(terminateButton)#::
<div style="text-decoration:blink;float:left;">Running</div>
<form style="float:left;" action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8"><div>

@ -49,6 +49,7 @@ import net.yacy.document.Document;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.TransformerWriter;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.NewsPool;
@ -87,7 +88,8 @@ public class Crawler_p {
prop.put("forwardToCrawlStart", "0");
prop.put("info", "0");
boolean debug = (post != null && post.containsKey("debug"));
if (post != null) {
String c = post.toString();
if (c.length() < 1000) Log.logInfo("Crawl Start", c);
@ -520,13 +522,20 @@ public class Crawler_p {
profile = sb.crawler.getActive(h);
if (CrawlProfile.ignoreNames.contains(profile.name())) continue;
profile.putProfileEntry("crawlProfilesShow_list_", prop, true, dark, count, domlistlength);
prop.put("crawlProfilesShow_list_" + count + "_debug", debug ? 1 : 0);
if (debug) {
RowHandleSet urlhashes = sb.crawler.getURLHashes(h);
prop.put("crawlProfilesShow_list_" + count + "_debug_count", urlhashes == null ? "unknown" : Integer.toString(urlhashes.size()));
}
if (profile.urlMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN) {
hosts = hosts + "," + profile.name();
}
dark = !dark;
count++;
}
prop.put("crawlProfilesShow_debug", debug ? 1 : 0);
prop.put("crawlProfilesShow_list", count);
prop.put("crawlProfilesShow_count", count);
prop.put("crawlProfilesShow", count == 0 ? 0 : 1);
if (count > 0) {

@ -34,6 +34,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
@ -46,7 +47,9 @@ import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.data.NoticedURL.StackType;
import net.yacy.crawler.retrieval.Request;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.kelondroException;
@ -75,6 +78,7 @@ public final class CrawlSwitchboard {
private MapHeap profilesActiveCrawls;
private final MapHeap profilesPassiveCrawls;
private final Map<byte[], CrawlProfile> profilesActiveCrawlsCache; //TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
private final Map<String, RowHandleSet> profilesActiveCrawlsCounter;
public CrawlProfile defaultProxyProfile;
public CrawlProfile defaultRemoteProfile;
public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
@ -91,8 +95,8 @@ public final class CrawlSwitchboard {
System.exit(0);
}
this.log = log;
this.profilesActiveCrawlsCache =
Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder));
this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder));
this.profilesActiveCrawlsCounter = new ConcurrentHashMap<String, RowHandleSet>();
// make crawl profiles database and default profiles
this.queuesRoot = queuesRoot;
@ -229,6 +233,11 @@ public final class CrawlSwitchboard {
this.profilesPassiveCrawls.put(profileKey, profile);
}
public RowHandleSet getURLHashes(final byte[] profileKey) {
return this.profilesActiveCrawlsCounter.get(ASCII.String(profileKey));
}
private void initActiveCrawlProfiles() {
// generate new default entry for proxy crawling
this.defaultProxyProfile =
@ -470,7 +479,10 @@ public final class CrawlSwitchboard {
return hasDoneSomething;
}
public int cleanFinishesProfiles(CrawlQueues crawlQueues) {
public int cleanFinishesProfiles(CrawlQueues crawlQueues) {
// clear the counter cache
this.profilesActiveCrawlsCounter.clear();
// find all profiles that are candidates for deletion
Set<String> deletionCandidate = new HashSet<String>();
for (final byte[] handle: this.getActive()) {
@ -498,7 +510,11 @@ public final class CrawlSwitchboard {
Request r;
while (sei.hasNext()) {
r = sei.next();
deletionCandidate.remove(r.profileHandle());
String handle = r.profileHandle();
RowHandleSet us = this.profilesActiveCrawlsCounter.get(handle);
if (us == null) {us = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); this.profilesActiveCrawlsCounter.put(handle, us);}
us.put(r.url().hash());
deletionCandidate.remove(handle);
if (deletionCandidate.size() == 0) return 0;
if (System.currentTimeMillis() > timeout) return 0; // give up; this is too large
}

Loading…
Cancel
Save