From 33aaffbfc6528409e651b3c98cc567ffe984d0e8 Mon Sep 17 00:00:00 2001 From: theli Date: Fri, 2 Sep 2005 08:22:11 +0000 Subject: [PATCH] *) Displaying content size of each entry in indexing queue git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@639 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexCreateIndexingQueue_p.html | 2 ++ htroot/IndexCreateIndexingQueue_p.java | 23 +++++++++++++++----- source/de/anomic/plasma/plasmaCrawlLURL.java | 8 +++---- source/de/anomic/plasma/plasmaCrawlNURL.java | 6 ++--- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/htroot/IndexCreateIndexingQueue_p.html b/htroot/IndexCreateIndexingQueue_p.html index 62254cd6e..c7d58f101 100644 --- a/htroot/IndexCreateIndexingQueue_p.html +++ b/htroot/IndexCreateIndexingQueue_p.html @@ -22,6 +22,7 @@ There are #[num]# entries in the indexing queue:
Modified Date Anchor Name URL +Size #{list}# @@ -30,6 +31,7 @@ There are #[num]# entries in the indexing queue:
#[modified]# #[anchor]# #[url]# +#[size]# #{/list}# diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java index f2d09c5ea..57989f5bc 100644 --- a/htroot/IndexCreateIndexingQueue_p.java +++ b/htroot/IndexCreateIndexingQueue_p.java @@ -81,6 +81,9 @@ public class IndexCreateIndexingQueue_p { if (post.containsKey("moreRejected")) { showRejectedCount = Integer.parseInt(post.get("showRejected", "10")); } + if (post.containsKey("clearsbqueue")) { + //switchboard.sbQueue. + } } yacySeed initiator; @@ -90,33 +93,41 @@ public class IndexCreateIndexingQueue_p { if ((switchboard.sbQueue.size() == 0) && (switchboard.indexingTasksInProcess.size() == 0)) { prop.put("indexing-queue", 0); //is empty } else { - prop.put("indexing-queue", 1); - prop.put("indexing-queue_num", switchboard.sbQueue.size() + switchboard.indexingTasksInProcess.size());//num entries in queue + prop.put("indexing-queue", 1); // there are entries in the queue or in process + dark = true; plasmaSwitchboardQueue.Entry pcentry; + int entryCount = 0; try { ArrayList entryList = new ArrayList(); + // getting all entries that are currently in process synchronized (switchboard.indexingTasksInProcess) { entryList.addAll(switchboard.indexingTasksInProcess.values()); } - entryList.addAll(switchboard.sbQueue.list(0)); + // getting all enqueued entries + entryList.addAll(switchboard.sbQueue.list(0)); + for (i = 0; i < entryList.size(); i++) { pcentry = (plasmaSwitchboardQueue.Entry) entryList.get(i); - if (pcentry != null) { + if ((pcentry != null)&&(pcentry.url() != null)) { initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); prop.put("indexing-queue_list_"+i+"_dark", ((dark) ? 1 : 0)); prop.put("indexing-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.put("indexing-queue_list_"+i+"_depth", pcentry.depth()); - prop.put("indexing-queue_list_"+i+"_modified", (pcentry.responseHeader() == null) ? "null" : daydate(pcentry.responseHeader().lastModified())); + prop.put("indexing-queue_list_"+i+"_modified", (pcentry.responseHeader() == null) ? "" : daydate(pcentry.responseHeader().lastModified())); prop.put("indexing-queue_list_"+i+"_anchor", (pcentry.anchorName()==null)?"":pcentry.anchorName()); prop.put("indexing-queue_list_"+i+"_url", pcentry.normalizedURLString()); + prop.put("indexing-queue_list_"+i+"_size", Status.bytesToString(pcentry.size())); dark = !dark; + entryCount++; } } } catch (IOException e) {} - prop.put("indexing-queue_list", i); + + prop.put("indexing-queue_num", entryCount);//num entries in queue + prop.put("indexing-queue_list", entryCount); } // failure cases diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 170fe6f3c..41158d7d7 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -342,14 +342,14 @@ public class plasmaCrawlLURL extends plasmaURL { // create new entry and store it into database this.urlHash = urlHash(url); this.url = url; - this.descr = descr; + this.descr = (descr==null)?this.url.toString():descr; this.moddate = moddate; this.loaddate = loaddate; this.referrerHash = (referrerHash == null) ? dummyHash : referrerHash; this.copyCount = copyCount; // the number of remote (global) copies of this object without this one this.flags = (localNeed) ? "L " : " "; this.quality = quality; - this.language = language; + this.language = (language==null)?"uk":language; this.doctype = doctype; this.size = size; this.wordCount = wordCount; @@ -370,10 +370,10 @@ public class plasmaCrawlLURL extends plasmaURL { byte[][] entry = urlHashCache.get(urlHash.getBytes()); if (entry != null) { this.url = new URL(new String(entry[1]).trim()); - this.descr = (entry[2] == null) ? "" : new String(entry[2]).trim(); + this.descr = (entry[2] == null) ? this.url.toString() : new String(entry[2]).trim(); this.moddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[3]))); this.loaddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[4]))); - this.referrerHash = new String(entry[5]); + this.referrerHash = (entry[5]==null)?dummyHash:new String(entry[5]); this.copyCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[6])); this.flags = new String(entry[7]); this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8])); diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index 9cac8843d..8bfc7618c 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -334,7 +334,7 @@ public class plasmaCrawlNURL extends plasmaURL { this.hash = urlHash(url); this.initiator = initiator; this.url = url; - this.referrer = (referrer == null) ? "------------" : referrer; + this.referrer = (referrer == null) ? dummyHash : referrer; this.name = (name == null) ? "" : name; this.loaddate = (loaddate == null) ? new Date() : loaddate; this.profileHandle = profileHandle; @@ -352,7 +352,7 @@ public class plasmaCrawlNURL extends plasmaURL { str.append("hash: ").append(url==null ? "null" : urlHash(url)).append(" | ") .append("initiator: ").append(initiator==null?"null":initiator).append(" | ") .append("url: ").append(url==null?"null":url.toString()).append(" | ") - .append("referrer: ").append((referrer == null) ? "------------" : referrer).append(" | ") + .append("referrer: ").append((referrer == null) ? dummyHash : referrer).append(" | ") .append("name: ").append((name == null) ? "null" : name).append(" | ") .append("loaddate: ").append((loaddate == null) ? new Date() : loaddate).append(" | ") .append("profile: ").append(profileHandle==null?"null":profileHandle).append(" | ") @@ -378,7 +378,7 @@ public class plasmaCrawlNURL extends plasmaURL { if (entry != null) { this.initiator = new String(entry[1]); this.url = new URL(new String(entry[2]).trim()); - this.referrer = new String(entry[3]); + this.referrer = (entry[3]==null) ? dummyHash : new String(entry[3]); this.name = (entry[4] == null) ? "" : new String(entry[4]).trim(); this.loaddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[5]))); this.profileHandle = new String(entry[6]).trim();