diff --git a/htroot/IndexCreateWWWGlobalQueue_p.html b/htroot/IndexCreateWWWGlobalQueue_p.html index d247781dc..83dbb214e 100644 --- a/htroot/IndexCreateWWWGlobalQueue_p.html +++ b/htroot/IndexCreateWWWGlobalQueue_p.html @@ -25,6 +25,7 @@ There are #[num]# entries in the global crawler queue. Showing #[show-num]# most + @@ -33,6 +34,7 @@ There are #[num]# entries in the global crawler queue. Showing #[show-num]# most #{list}# + diff --git a/htroot/IndexCreateWWWGlobalQueue_p.java b/htroot/IndexCreateWWWGlobalQueue_p.java index bf50ee8a4..1d6f6d26c 100644 --- a/htroot/IndexCreateWWWGlobalQueue_p.java +++ b/htroot/IndexCreateWWWGlobalQueue_p.java @@ -49,6 +49,7 @@ import java.util.Locale; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlNURL; +import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -96,13 +97,18 @@ public class IndexCreateWWWGlobalQueue_p { plasmaCrawlNURL.Entry urle; boolean dark = true; yacySeed initiator; + String profileHandle; + plasmaCrawlProfile.entry profileEntry; int i; for (i = 0; i < crawlerList.length; i++) { urle = crawlerList[i]; if (urle != null) { initiator = yacyCore.seedDB.getConnected(urle.initiator()); + profileHandle = urle.profileHandle(); + profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle); prop.put("crawler-queue_list_"+i+"_dark", ((dark) ? 1 : 0) ); prop.put("crawler-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.put("crawler-queue_list_"+i+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+i+"_depth", urle.depth()); prop.put("crawler-queue_list_"+i+"_modified", daydate(urle.loaddate()) ); prop.put("crawler-queue_list_"+i+"_anchor", urle.name()); diff --git a/htroot/IndexCreateWWWLocalQueue_p.html b/htroot/IndexCreateWWWLocalQueue_p.html index 4274b572d..f46d34f1d 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.html +++ b/htroot/IndexCreateWWWLocalQueue_p.html @@ -25,6 +25,7 @@ There are #[num]# entries in the local crawler queue. Showing #[show-num]# most
InitiatorProfile Depth Modified Date Anchor Name
#[initiator]##[profile]# #[depth]# #[modified]# #[anchor]#
+ @@ -34,6 +35,7 @@ There are #[num]# entries in the local crawler queue. Showing #[show-num]# most #{list}# + diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java index 126d5cac8..cc144066b 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.java +++ b/htroot/IndexCreateWWWLocalQueue_p.java @@ -49,6 +49,7 @@ import java.util.Locale; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlNURL; +import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -94,13 +95,18 @@ public class IndexCreateWWWLocalQueue_p { plasmaCrawlNURL.Entry urle; boolean dark = true; yacySeed initiator; + String profileHandle; + plasmaCrawlProfile.entry profileEntry; int i; for (i = 0; (i < crawlerList.length) && (showNum < 100); i++) { urle = crawlerList[i]; if ((urle != null)&&(urle.url()!=null)) { initiator = yacyCore.seedDB.getConnected(urle.initiator()); + profileHandle = urle.profileHandle(); + profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle); prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) ); prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); prop.put("crawler-queue_list_"+showNum+"_anchor", urle.name()); diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index a9f1f5c79..135182a7e 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -339,7 +339,7 @@ public class plasmaCrawlNURL extends plasmaURL { this.referrer = (referrer == null) ? dummyHash : referrer; this.name = (name == null) ? "" : name; this.loaddate = (loaddate == null) ? new Date() : loaddate; - this.profileHandle = profileHandle; + this.profileHandle = profileHandle; // must not be null this.depth = depth; this.anchors = anchors; this.forkfactor = forkfactor; @@ -380,7 +380,7 @@ public class plasmaCrawlNURL extends plasmaURL { this.referrer = (entry[3]==null) ? dummyHash : new String(entry[3]); this.name = (entry[4] == null) ? "" : new String(entry[4]).trim(); this.loaddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[5]))); - this.profileHandle = new String(entry[6]).trim(); + this.profileHandle = (entry[6] == null) ? null : new String(entry[6]).trim(); this.depth = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[7])); this.anchors = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8])); this.forkfactor = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[9])); @@ -408,7 +408,7 @@ public class plasmaCrawlNURL extends plasmaURL { this.referrer.getBytes(), this.name.getBytes(), loaddatestr.getBytes(), - this.profileHandle.getBytes(), + (this.profileHandle == null) ? null : this.profileHandle.getBytes(), serverCodings.enhancedCoder.encodeBase64Long(this.depth, urlCrawlDepthLength).getBytes(), serverCodings.enhancedCoder.encodeBase64Long(this.anchors, urlParentBranchesLength).getBytes(), serverCodings.enhancedCoder.encodeBase64Long(this.forkfactor, urlForkFactorLength).getBytes(), diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 6fef44133..6f2bdda7f 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1230,7 +1230,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser loadDate, /* load date */ referrerHash, /* last url in crawling queue */ name, /* the anchor name */ - (profile == null) ? null : profile.handle(), + (profile == null) ? null : profile.handle(), // profile must not be null! currentdepth, /*depth so far*/ 0, /*anchors, default value */ 0, /*forkfactor, default value */
InitiatorProfile Depth Modified Date Anchor Name
#[initiator]##[profile]# #[depth]# #[modified]# #[anchor]#