added debug code to crawler monitor

pull/1/head
Michael Peter Christen 13 years ago
parent 205f8b222b
commit eca68fa197

@ -159,7 +159,7 @@
<!-- crawl profile list --> <!-- crawl profile list -->
#(crawlProfilesShow)#:: #(crawlProfilesShow)#::
<fieldset> <fieldset>
<legend>Running Crawls</legend> <legend>Running Crawls (#[count]#)</legend>
<table border="0" cellpadding="2" cellspacing="1" summary="A list of crawl profiles and their current settings."> <table border="0" cellpadding="2" cellspacing="1" summary="A list of crawl profiles and their current settings.">
<colgroup> <colgroup>
<col width="16" /> <col width="16" />
@ -167,11 +167,13 @@
</colgroup> </colgroup>
<tr class="TableHeader"> <tr class="TableHeader">
<td><strong>Name</strong></td> <td><strong>Name</strong></td>
#(debug)#::<td><strong>Count</strong></td>#(/debug)#
<td><strong>Status</strong></td> <td><strong>Status</strong></td>
</tr> </tr>
#{list}# #{list}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#"> <tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<td>#[name]#</td> <td>#[name]#</td>
#(debug)#::<td>#[count]#</td>#(/debug)#
<td>#(terminateButton)#:: <td>#(terminateButton)#::
<div style="text-decoration:blink;float:left;">Running</div> <div style="text-decoration:blink;float:left;">Running</div>
<form style="float:left;" action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8"><div> <form style="float:left;" action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8"><div>

@ -49,6 +49,7 @@ import net.yacy.document.Document;
import net.yacy.document.parser.html.ContentScraper; import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.TransformerWriter; import net.yacy.document.parser.html.TransformerWriter;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.NewsPool; import net.yacy.peers.NewsPool;
@ -87,7 +88,8 @@ public class Crawler_p {
prop.put("forwardToCrawlStart", "0"); prop.put("forwardToCrawlStart", "0");
prop.put("info", "0"); prop.put("info", "0");
boolean debug = (post != null && post.containsKey("debug"));
if (post != null) { if (post != null) {
String c = post.toString(); String c = post.toString();
if (c.length() < 1000) Log.logInfo("Crawl Start", c); if (c.length() < 1000) Log.logInfo("Crawl Start", c);
@ -520,13 +522,20 @@ public class Crawler_p {
profile = sb.crawler.getActive(h); profile = sb.crawler.getActive(h);
if (CrawlProfile.ignoreNames.contains(profile.name())) continue; if (CrawlProfile.ignoreNames.contains(profile.name())) continue;
profile.putProfileEntry("crawlProfilesShow_list_", prop, true, dark, count, domlistlength); profile.putProfileEntry("crawlProfilesShow_list_", prop, true, dark, count, domlistlength);
prop.put("crawlProfilesShow_list_" + count + "_debug", debug ? 1 : 0);
if (debug) {
RowHandleSet urlhashes = sb.crawler.getURLHashes(h);
prop.put("crawlProfilesShow_list_" + count + "_debug_count", urlhashes == null ? "unknown" : Integer.toString(urlhashes.size()));
}
if (profile.urlMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN) { if (profile.urlMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN) {
hosts = hosts + "," + profile.name(); hosts = hosts + "," + profile.name();
} }
dark = !dark; dark = !dark;
count++; count++;
} }
prop.put("crawlProfilesShow_debug", debug ? 1 : 0);
prop.put("crawlProfilesShow_list", count); prop.put("crawlProfilesShow_list", count);
prop.put("crawlProfilesShow_count", count);
prop.put("crawlProfilesShow", count == 0 ? 0 : 1); prop.put("crawlProfilesShow", count == 0 ? 0 : 1);
if (count > 0) { if (count > 0) {

@ -34,6 +34,7 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
@ -46,7 +47,9 @@ import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.data.NoticedURL.StackType; import net.yacy.crawler.data.NoticedURL.StackType;
import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.retrieval.Request;
import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.kelondroException; import net.yacy.kelondro.util.kelondroException;
@ -75,6 +78,7 @@ public final class CrawlSwitchboard {
private MapHeap profilesActiveCrawls; private MapHeap profilesActiveCrawls;
private final MapHeap profilesPassiveCrawls; private final MapHeap profilesPassiveCrawls;
private final Map<byte[], CrawlProfile> profilesActiveCrawlsCache; //TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder); private final Map<byte[], CrawlProfile> profilesActiveCrawlsCache; //TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
private final Map<String, RowHandleSet> profilesActiveCrawlsCounter;
public CrawlProfile defaultProxyProfile; public CrawlProfile defaultProxyProfile;
public CrawlProfile defaultRemoteProfile; public CrawlProfile defaultRemoteProfile;
public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile; public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
@ -91,8 +95,8 @@ public final class CrawlSwitchboard {
System.exit(0); System.exit(0);
} }
this.log = log; this.log = log;
this.profilesActiveCrawlsCache = this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder));
Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder)); this.profilesActiveCrawlsCounter = new ConcurrentHashMap<String, RowHandleSet>();
// make crawl profiles database and default profiles // make crawl profiles database and default profiles
this.queuesRoot = queuesRoot; this.queuesRoot = queuesRoot;
@ -229,6 +233,11 @@ public final class CrawlSwitchboard {
this.profilesPassiveCrawls.put(profileKey, profile); this.profilesPassiveCrawls.put(profileKey, profile);
} }
public RowHandleSet getURLHashes(final byte[] profileKey) {
return this.profilesActiveCrawlsCounter.get(ASCII.String(profileKey));
}
private void initActiveCrawlProfiles() { private void initActiveCrawlProfiles() {
// generate new default entry for proxy crawling // generate new default entry for proxy crawling
this.defaultProxyProfile = this.defaultProxyProfile =
@ -470,7 +479,10 @@ public final class CrawlSwitchboard {
return hasDoneSomething; return hasDoneSomething;
} }
public int cleanFinishesProfiles(CrawlQueues crawlQueues) { public int cleanFinishesProfiles(CrawlQueues crawlQueues) {
// clear the counter cache
this.profilesActiveCrawlsCounter.clear();
// find all profiles that are candidates for deletion // find all profiles that are candidates for deletion
Set<String> deletionCandidate = new HashSet<String>(); Set<String> deletionCandidate = new HashSet<String>();
for (final byte[] handle: this.getActive()) { for (final byte[] handle: this.getActive()) {
@ -498,7 +510,11 @@ public final class CrawlSwitchboard {
Request r; Request r;
while (sei.hasNext()) { while (sei.hasNext()) {
r = sei.next(); r = sei.next();
deletionCandidate.remove(r.profileHandle()); String handle = r.profileHandle();
RowHandleSet us = this.profilesActiveCrawlsCounter.get(handle);
if (us == null) {us = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); this.profilesActiveCrawlsCounter.put(handle, us);}
us.put(r.url().hash());
deletionCandidate.remove(handle);
if (deletionCandidate.size() == 0) return 0; if (deletionCandidate.size() == 0) return 0;
if (System.currentTimeMillis() > timeout) return 0; // give up; this is too large if (System.currentTimeMillis() > timeout) return 0; // give up; this is too large
} }

Loading…
Cancel
Save