diff --git a/htroot/IndexCreateIndexingQueue_p.html b/htroot/IndexCreateIndexingQueue_p.html new file mode 100644 index 000000000..9698621bc --- /dev/null +++ b/htroot/IndexCreateIndexingQueue_p.html @@ -0,0 +1,76 @@ + + + +YaCy: Index Creation/Indexing Queue +#[metas]# + + +#[header]# +#[submenuIndexCreate]# +
+

Index Creation: Indexing Queue

+ +

+#(indexing-queue)# +The indexing queue is empty
+:: +There are #[num]# entries in the indexing queue:
+ + + + + + + + +#{list}# + + + + + + + + +#{/list}# +
InitiatorDepthModified Date#HREFAnchor Name +URL
#[initiator]##[depth]##[modified]##[href]##[anchor]##[url]#
+#(/indexing-queue)# +


+ +

+#(rejected)# +:: +

+Rejected URL List: There are #[num]# entries in the rejected-urls list. +#(only-latest)# +:: +Showing latest #[num]# entries. + +  +#(/only-latest)# + +
+There are #[num]# entries in the rejected-queue:
+ + + + + + + +#{list}# + + + + + + +#{/list}# +
InitiatorExecutorURLFail-Reason
#[initiator]##[executor]##[url]##[failreason]#
+#(/rejected)# +

+ +#[footer]# + + diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java new file mode 100644 index 000000000..7586b1e17 --- /dev/null +++ b/htroot/IndexCreateIndexingQueue_p.java @@ -0,0 +1,161 @@ +// IndexCreateIndexingQueue_p.java +// ------------------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 04.07.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// You must compile this file with +// javac -classpath .:../classes IndexCreate_p.java +// if the shell's current path is HTROOT + +import java.net.MalformedURLException; +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.Locale; + +import de.anomic.http.httpHeader; +import de.anomic.plasma.plasmaCrawlEURL; +import de.anomic.plasma.plasmaCrawlLoaderMessage; +import de.anomic.plasma.plasmaCrawlNURL; +import de.anomic.plasma.plasmaCrawlProfile; +import de.anomic.plasma.plasmaCrawlWorker; +import de.anomic.plasma.plasmaHTCache; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.plasmaURL; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacySeed; + +public class IndexCreateIndexingQueue_p { + + private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); + private static String daydate(Date date) { + if (date == null) return ""; else return dayFormatter.format(date); + } + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + prop.put("rejected", 0); + int showRejectedCount = 10; + + if (post != null) { + if (post.containsKey("clearRejected")) { + switchboard.urlPool.errorURL.clearStack(); + } + if (post.containsKey("moreRejected")) { + showRejectedCount = Integer.parseInt(post.get("showRejected", "10")); + } + } + + yacySeed initiator; + boolean dark; + int i; + + if (switchboard.queueStack.size() == 0) { + prop.put("indexing-queue", 0); //is empty + } else { + prop.put("indexing-queue", 1); + prop.put("indexing-queue_num", switchboard.queueStack.size());//num entries in queue + dark = true; + plasmaHTCache.Entry pcentry; + for (i = 0; i < switchboard.queueStack.size(); i++) { + pcentry = (plasmaHTCache.Entry) switchboard.queueStack.get(i); + if (pcentry != null) { + initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); + prop.put("indexing-queue_list_"+i+"_dark", ((dark) ? 1 : 0)); + prop.put("indexing-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); + prop.put("indexing-queue_list_"+i+"_depth", pcentry.depth); + prop.put("indexing-queue_list_"+i+"_modified", daydate(pcentry.lastModified)); + prop.put("indexing-queue_list_"+i+"_href",((pcentry.scraper == null) ? "0" : ("" + pcentry.scraper.getAnchors().size()))); + prop.put("indexing-queue_list_"+i+"_anchor", ((pcentry.scraper == null) ? "-" : pcentry.scraper.getHeadline()) ); + prop.put("indexing-queue_list_"+i+"_url", pcentry.nomalizedURLString); + dark = !dark; + } + } + prop.put("indexing-queue_list", i); + } + + // failure cases + if (switchboard.urlPool.errorURL.stackSize() != 0) { + if (showRejectedCount > switchboard.urlPool.errorURL.stackSize()) showRejectedCount = switchboard.urlPool.errorURL.stackSize(); + prop.put("rejected", 1); + prop.put("rejected_num", switchboard.urlPool.errorURL.stackSize()); + if (showRejectedCount != switchboard.urlPool.errorURL.stackSize()) { + prop.put("rejected_only-latest", 1); + prop.put("rejected_only-latest_num", showRejectedCount); + prop.put("rejected_only-latest_newnum", ((int) (showRejectedCount * 1.5))); + }else{ + prop.put("rejected_only-latest", 0); + } + dark = true; + String url, initiatorHash, executorHash; + plasmaCrawlEURL.entry entry; + yacySeed initiatorSeed, executorSeed; + int j=0; + for (i = switchboard.urlPool.errorURL.stackSize() - 1; i >= (switchboard.urlPool.errorURL.stackSize() - showRejectedCount); i--) { + entry = (plasmaCrawlEURL.entry) switchboard.urlPool.errorURL.getStack(i); + initiatorHash = entry.initiator(); + executorHash = entry.executor(); + url = entry.url().toString(); + initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash); + executorSeed = yacyCore.seedDB.getConnected(executorHash); + prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName())); + prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName())); + prop.put("rejected_list_"+j+"_url", url); + prop.put("rejected_list_"+j+"_failreason", entry.failreason()); + prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0)); + dark = !dark; + j++; + } + prop.put("rejected_list", j); + } + + // return rewrite properties + return prop; + } + +} + + + diff --git a/htroot/IndexCreateLoaderQueue_p.html b/htroot/IndexCreateLoaderQueue_p.html new file mode 100644 index 000000000..30d5196d0 --- /dev/null +++ b/htroot/IndexCreateLoaderQueue_p.html @@ -0,0 +1,37 @@ + + + +YaCy: Index Creation / Loader Queue +#[metas]# + + +#[header]# +#[submenuIndexCreate]# +
+

Index Creation: Loader Queue

+ +

+#(loader-set)# +The loader set is empty
+:: +There are #[num]# entries in the loader set:
+ + + + + +#{list}# + + + + + +#{/list}# +
InitiatorDepth +URL
#[initiator]##[depth]##[url]#
+#(/loader-set)# +

+ +#[footer]# + + diff --git a/htroot/IndexCreateLoaderQueue_p.java b/htroot/IndexCreateLoaderQueue_p.java new file mode 100644 index 000000000..dfb9d46ec --- /dev/null +++ b/htroot/IndexCreateLoaderQueue_p.java @@ -0,0 +1,117 @@ +// IndexCreateLoaderQueue_p.java +// ----------------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 04.07.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// You must compile this file with +// javac -classpath .:../classes IndexCreate_p.java +// if the shell's current path is HTROOT + +import java.net.MalformedURLException; +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.Locale; + +import de.anomic.http.httpHeader; +import de.anomic.plasma.plasmaCrawlEURL; +import de.anomic.plasma.plasmaCrawlLoaderMessage; +import de.anomic.plasma.plasmaCrawlNURL; +import de.anomic.plasma.plasmaCrawlProfile; +import de.anomic.plasma.plasmaCrawlWorker; +import de.anomic.plasma.plasmaHTCache; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.plasmaURL; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacySeed; + +public class IndexCreateLoaderQueue_p { + + private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); + private static String daydate(Date date) { + if (date == null) return ""; else return dayFormatter.format(date); + } + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + + if (switchboard.cacheLoader.size() == 0) { + prop.put("loader-set", 0); + } else { + prop.put("loader-set", 1); + prop.put("loader-set_num", switchboard.cacheLoader.size()); + boolean dark = true; + + ThreadGroup loaderThreads = switchboard.cacheLoader.threadStatus(); + + int threadCount = loaderThreads.activeCount(); + Thread[] threadList = new Thread[threadCount*2]; + threadCount = loaderThreads.enumerate(threadList); + yacySeed initiator; + int i; + for (i = 0; i < threadCount; i++) { + plasmaCrawlWorker theWorker = (plasmaCrawlWorker)threadList[i]; + plasmaCrawlLoaderMessage theMsg = theWorker.theMsg; + if (theMsg == null) continue; + + initiator = yacyCore.seedDB.getConnected(theMsg.initiator); + prop.put("loader-set_list_"+i+"_dark", ((dark) ? 1 : 0) ); + prop.put("loader-set_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.put("loader-set_list_"+i+"_depth", theMsg.depth ); + prop.put("loader-set_list_"+i+"_url", theMsg.url ); // null pointer exception here !!! maybe url = null; check reason. + dark = !dark; + } + prop.put("loader-set_list", i ); + } + + // return rewrite properties + return prop; + } + +} + + + diff --git a/htroot/IndexCreateWWWLocalCrawlQueue_p.html b/htroot/IndexCreateWWWLocalCrawlQueue_p.html new file mode 100644 index 000000000..cf84ef40c --- /dev/null +++ b/htroot/IndexCreateWWWLocalCrawlQueue_p.html @@ -0,0 +1,45 @@ + + + +YaCy: Index Creation / WWW Crawl Queue +#[metas]# + + +#[header]# +#[submenuIndexCreate]# +
+

Index Creation: WWW Crawl Queue

+ +

+#(crawler-queue)# +The crawler queue is empty

+:: +

+ +
+
+There are #[num]# entries in the crawler queue. Showing #[show-num]# most recent entries: + + + + + + + + +#{list}# + + + + + + + +#{/list}# +
InitiatorDepthModified DateAnchor NameURL
#[initiator]##[depth]##[modified]##[anchor]##[url]#
+#(/crawler-queue)# +

+ +#[footer]# + + diff --git a/htroot/IndexCreateWWWLocalCrawlQueue_p.java b/htroot/IndexCreateWWWLocalCrawlQueue_p.java new file mode 100644 index 000000000..54810e15c --- /dev/null +++ b/htroot/IndexCreateWWWLocalCrawlQueue_p.java @@ -0,0 +1,136 @@ +// IndexCreateWWWCrawlQueue_p.java +// ------------------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 04.07.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// You must compile this file with +// javac -classpath .:../classes IndexCreate_p.java +// if the shell's current path is HTROOT + +import java.net.MalformedURLException; +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.Locale; + +import de.anomic.http.httpHeader; +import de.anomic.plasma.plasmaCrawlEURL; +import de.anomic.plasma.plasmaCrawlLoaderMessage; +import de.anomic.plasma.plasmaCrawlNURL; +import de.anomic.plasma.plasmaCrawlProfile; +import de.anomic.plasma.plasmaCrawlWorker; +import de.anomic.plasma.plasmaHTCache; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.plasmaURL; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacySeed; + +public class IndexCreateWWWLocalCrawlQueue_p { + + private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); + private static String daydate(Date date) { + if (date == null) return ""; else return dayFormatter.format(date); + } + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + if (post != null) { + if (post.containsKey("clearcrawlqueue")) { + String urlHash; + int c = 0; + while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) > 0) { + urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE).hash(); + if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; } + } + while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) > 0) { + urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT).hash(); + if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; } + } + while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) > 0) { + urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT).hash(); + if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; } + } + prop.put("info", 3);//crawling queue cleared + prop.put("info_numEntries", c); + } + } + + int localStackSize = switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE); + if (localStackSize == 0) { + prop.put("crawler-queue", 0); + } else { + prop.put("crawler-queue", 1); + plasmaCrawlNURL.entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_CORE, 100); + prop.put("crawler-queue_num", localStackSize);//num Entries + prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent + plasmaCrawlNURL.entry urle; + boolean dark = true; + yacySeed initiator; + int i; + for (i = 0; i < crawlerList.length; i++) { + urle = crawlerList[i]; + if (urle != null) { + initiator = yacyCore.seedDB.getConnected(urle.initiator()); + prop.put("crawler-queue_list_"+i+"_dark", ((dark) ? 1 : 0) ); + prop.put("crawler-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.put("crawler-queue_list_"+i+"_depth", urle.depth()); + prop.put("crawler-queue_list_"+i+"_modified", daydate(urle.loaddate()) ); + prop.put("crawler-queue_list_"+i+"_anchor", urle.name()); + prop.put("crawler-queue_list_"+i+"_url", urle.url()); + dark = !dark; + } + } + prop.put("crawler-queue_list", i); + } + + // return rewrite properties + return prop; + } + +} + + + diff --git a/htroot/IndexCreate_p.html b/htroot/IndexCreate_p.html index aea740b2f..c2d3dcce3 100644 --- a/htroot/IndexCreate_p.html +++ b/htroot/IndexCreate_p.html @@ -6,29 +6,9 @@ #[header]# -

+#[submenuIndexCreate]# +

Index Creation

-

- - - - - - - - - - - - - - - - - - -
downStart new crawling job downRejected URL List
downDistributed IndexingdownIndexing Queue
downCrawl Profile ListdownLoader Queue
downRemote Crawling PeersdownCrawling Queue
-

Start Crawling Job:  @@ -237,129 +217,18 @@ No remote crawl peers availible.
#(/remoteCrawlPeers)# - - -
-#(rejected)# -:: -
-Rejected URL List: There are #[num]# entries in the rejected-urls list. -#(only-latest)# -:: -Showing latest #[num]# entries. - -  -#(/only-latest)# - -
-There are #[num]# entries in the rejected-queue:
- - - - - - - -#{list}# - - - - - - -#{/list}# -
InitiatorExecutorURLFail-Reason
#[initiator]##[executor]##[url]##[failreason]#
-#(/rejected)# - - -
-Indexing Queue:  -#(indexing-queue)# -The indexing queue is empty
-:: -There are #[num]# entries in the indexing queue:
- - - - - - - - -#{list}# - - - - - - - - -#{/list}# -
InitiatorDepthModified Date#HREFAnchor Name -URL
#[initiator]##[depth]##[modified]##[href]##[anchor]##[url]#
-#(/indexing-queue)# - - -
-Loader Set:  -#(loader-set)# -The loader set is empty
-:: -There are #[num]# entries in the loader set:
- - - - - -#{list}# - - - - - -#{/list}# -
InitiatorDepth -URL
#[initiator]##[depth]##[url]#
-#(/loader-set)# - +

-
-Crawler Queue:  -#(crawler-queue)# -The crawler queue is empty

-:: -There are #[num]# entries in the crawler queue. Showing #[show-num]# most recent entries: - - - - - - - - -#{list}# - - - - - - - -#{/list}# -
InitiatorDepthModified DateAnchor NameURL
#[initiator]##[depth]##[modified]##[anchor]##[url]#
-
+

- -#(paused)# +#(crawler-paused)# :: -#(/paused)# +#(/crawler-paused)#
-#(/crawler-queue)# -

+ #[footer]# diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java index f109ce4ab..0ac1b6afa 100644 --- a/htroot/IndexCreate_p.java +++ b/htroot/IndexCreate_p.java @@ -80,8 +80,7 @@ public class IndexCreate_p { prop.put("error", 0); prop.put("info", 0); prop.put("refreshbutton", 0); - prop.put("rejected", 0); - int showRejectedCount = 10; + int i; if (post != null) { @@ -156,34 +155,12 @@ public class IndexCreate_p { } } } - if (post.containsKey("clearRejected")) { - switchboard.urlPool.errorURL.clearStack(); - } - if (post.containsKey("moreRejected")) { - showRejectedCount = Integer.parseInt(post.get("showRejected", "10")); - } + if (post.containsKey("distributedcrawling")) { boolean crawlResponse = ((String) post.get("crawlResponse", "")).equals("on"); env.setConfig("crawlResponse", (crawlResponse) ? "true" : "false"); } - if (post.containsKey("clearcrawlqueue")) { - String urlHash; - int c = 0; - while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) > 0) { - urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE).hash(); - if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; } - } - while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) > 0) { - urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT).hash(); - if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; } - } - while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) > 0) { - urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT).hash(); - if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; } - } - prop.put("info", 3);//crawling queue cleared - prop.put("info_numEntries", c); - } + if (post.containsKey("pausecrawlqueue")) { switchboard.pauseCrawling(); @@ -277,137 +254,12 @@ public class IndexCreate_p { prop.put("remoteCrawlPeers_busy", pendicount); prop.put("remoteCrawlPeers_num", (availcount + pendicount)); } - - // failure cases - if (switchboard.urlPool.errorURL.stackSize() != 0) { - if (showRejectedCount > switchboard.urlPool.errorURL.stackSize()) showRejectedCount = switchboard.urlPool.errorURL.stackSize(); - prop.put("rejected", 1); - prop.put("rejected_num", switchboard.urlPool.errorURL.stackSize()); - if (showRejectedCount != switchboard.urlPool.errorURL.stackSize()) { - prop.put("rejected_only-latest", 1); - prop.put("rejected_only-latest_num", showRejectedCount); - prop.put("rejected_only-latest_newnum", ((int) (showRejectedCount * 1.5))); - }else{ - prop.put("rejected_only-latest", 0); - } - dark = true; - String url, initiatorHash, executorHash; - plasmaCrawlEURL.entry entry; - yacySeed initiatorSeed, executorSeed; - int j=0; - for (i = switchboard.urlPool.errorURL.stackSize() - 1; i >= (switchboard.urlPool.errorURL.stackSize() - showRejectedCount); i--) { - entry = (plasmaCrawlEURL.entry) switchboard.urlPool.errorURL.getStack(i); - initiatorHash = entry.initiator(); - executorHash = entry.executor(); - url = entry.url().toString(); - initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash); - executorSeed = yacyCore.seedDB.getConnected(executorHash); - prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName())); - prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName())); - prop.put("rejected_list_"+j+"_url", url); - prop.put("rejected_list_"+j+"_failreason", entry.failreason()); - prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0)); - dark = !dark; - j++; - } - prop.put("rejected_list", j); - } - - // now about the current processes - if (completequeue > 0) { - - yacySeed initiator; - - if (switchboard.queueStack.size() == 0) { - prop.put("indexing-queue", 0); //is empty - } else { - prop.put("indexing-queue", 1); - prop.put("indexing-queue_num", switchboard.queueStack.size());//num entries in queue - dark = true; - plasmaHTCache.Entry pcentry; - for (i = 0; i < switchboard.queueStack.size(); i++) { - pcentry = (plasmaHTCache.Entry) switchboard.queueStack.get(i); - if (pcentry != null) { - initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); - prop.put("indexing-queue_list_"+i+"_dark", ((dark) ? 1 : 0)); - prop.put("indexing-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); - prop.put("indexing-queue_list_"+i+"_depth", pcentry.depth); - prop.put("indexing-queue_list_"+i+"_modified", daydate(pcentry.lastModified)); - prop.put("indexing-queue_list_"+i+"_href",((pcentry.scraper == null) ? "0" : ("" + pcentry.scraper.getAnchors().size()))); - prop.put("indexing-queue_list_"+i+"_anchor", ((pcentry.scraper == null) ? "-" : pcentry.scraper.getHeadline()) ); - prop.put("indexing-queue_list_"+i+"_url", pcentry.nomalizedURLString); - dark = !dark; - } - } - prop.put("indexing-queue_list", i); - } - - if (loaderThreadsSize == 0) { - prop.put("loader-set", 0); - } else { - prop.put("loader-set", 1); - prop.put("loader-set_num", loaderThreadsSize); - dark = true; - //plasmaCrawlLoader.Exec[] loaderThreads = switchboard.cacheLoader.threadStatus(); -// for (i = 0; i < loaderThreads.length; i++) { -// initiator = yacyCore.seedDB.getConnected(loaderThreads[i].initiator); -// prop.put("loader-set_list_"+i+"_dark", ((dark) ? 1 : 0) ); -// prop.put("loader-set_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); -// prop.put("loader-set_list_"+i+"_depth", loaderThreads[i].depth ); -// prop.put("loader-set_list_"+i+"_url", loaderThreads[i].url ); // null pointer exception here !!! maybe url = null; check reason. -// dark = !dark; -// } -// prop.put("loader-set_list", i ); - - ThreadGroup loaderThreads = switchboard.cacheLoader.threadStatus(); - - int threadCount = loaderThreads.activeCount(); - Thread[] threadList = new Thread[threadCount*2]; - threadCount = loaderThreads.enumerate(threadList); - - for (i = 0; i < threadCount; i++) { - plasmaCrawlWorker theWorker = (plasmaCrawlWorker)threadList[i]; - plasmaCrawlLoaderMessage theMsg = theWorker.theMsg; - if (theMsg == null) continue; - - initiator = yacyCore.seedDB.getConnected(theMsg.initiator); - prop.put("loader-set_list_"+i+"_dark", ((dark) ? 1 : 0) ); - prop.put("loader-set_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); - prop.put("loader-set_list_"+i+"_depth", theMsg.depth ); - prop.put("loader-set_list_"+i+"_url", theMsg.url ); // null pointer exception here !!! maybe url = null; check reason. - dark = !dark; - } - prop.put("loader-set_list", i ); - } - - int localStackSize = switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE); - if (localStackSize == 0) { - prop.put("crawler-queue", 0); - } else { - prop.put("crawler-queue", 1); - plasmaCrawlNURL.entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_CORE, 20); - prop.put("crawler-queue_num", localStackSize);//num Entries - prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent - plasmaCrawlNURL.entry urle; - dark = true; - for (i = 0; i < crawlerList.length; i++) { - urle = crawlerList[i]; - if (urle != null) { - initiator = yacyCore.seedDB.getConnected(urle.initiator()); - prop.put("crawler-queue_list_"+i+"_dark", ((dark) ? 1 : 0) ); - prop.put("crawler-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); - prop.put("crawler-queue_list_"+i+"_depth", urle.depth()); - prop.put("crawler-queue_list_"+i+"_modified", daydate(urle.loaddate()) ); - prop.put("crawler-queue_list_"+i+"_anchor", urle.name()); - prop.put("crawler-queue_list_"+i+"_url", urle.url()); - dark = !dark; - } - } - prop.put("crawler-queue_list", i); - } - prop.put("crawler-queue_paused",(switchboard.crawlingIsPaused())?0:1); - } + } + + + prop.put("crawler-paused",(switchboard.crawlingIsPaused())?0:1); + // return rewrite properties return prop; } diff --git a/htroot/env/style.css b/htroot/env/style.css index a66fe4773..e0a1761e0 100644 --- a/htroot/env/style.css +++ b/htroot/env/style.css @@ -103,6 +103,14 @@ font-size:11px; padding-top: 2; padding-bottom: 2; } +.MenuTopItem { +background-color: #bdcdd4; +font-weight:bold; +text-decoration:none; +font-size:11px; +padding-top: 3; +padding-bottom: 3; +} .MenuSubItem { background-color: #bdcdd4; font-weight:bold; diff --git a/htroot/env/templates/submenuIndexCreate.template b/htroot/env/templates/submenuIndexCreate.template new file mode 100644 index 000000000..c8d4a92ae --- /dev/null +++ b/htroot/env/templates/submenuIndexCreate.template @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file