diff --git a/htroot/IndexCreateIndexingQueue_p.html b/htroot/IndexCreateIndexingQueue_p.html
new file mode 100644
index 000000000..9698621bc
--- /dev/null
+++ b/htroot/IndexCreateIndexingQueue_p.html
@@ -0,0 +1,76 @@
+
+
+
+YaCy: Index Creation/Indexing Queue
+#[metas]#
+
+
+#[header]#
+#[submenuIndexCreate]#
+
+Index Creation: Indexing Queue
+
+
+#(indexing-queue)#
+The indexing queue is empty
+::
+There are #[num]# entries in the indexing queue:
+
+
+#{list}#
+
+#[initiator]# |
+#[depth]# |
+#[modified]# |
+#[href]# |
+#[anchor]# |
+#[url]# |
+
+#{/list}#
+
+#(/indexing-queue)#
+
+
+
+#(rejected)#
+::
+
+There are #[num]# entries in the rejected-queue:
+
+
+#{list}#
+
+#[initiator]# |
+#[executor]# |
+#[url]# |
+#[failreason]# |
+
+#{/list}#
+
+#(/rejected)#
+
+
+#[footer]#
+
+
diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java
new file mode 100644
index 000000000..7586b1e17
--- /dev/null
+++ b/htroot/IndexCreateIndexingQueue_p.java
@@ -0,0 +1,161 @@
+// IndexCreateIndexingQueue_p.java
+// -------------------------------
+// part of the AnomicHTTPD caching proxy
+// (C) by Michael Peter Christen; mc@anomic.de
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2004, 2005
+// last major change: 04.07.2005
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// Using this software in any meaning (reading, learning, copying, compiling,
+// running) means that you agree that the Author(s) is (are) not responsible
+// for cost, loss of data or any harm that may be caused directly or indirectly
+// by usage of this softare or this documentation. The usage of this software
+// is on your own risk. The installation and usage (starting/running) of this
+// software may allow other people or application to access your computer and
+// any attached devices and is highly dependent on the configuration of the
+// software which must be done by the user of the software; the author(s) is
+// (are) also not responsible for proper configuration and usage of the
+// software, even if provoked by documentation provided together with
+// the software.
+//
+// Any changes to this file according to the GPL as documented in the file
+// gpl.txt aside this file in the shipment you received can be done to the
+// lines that follows this copyright notice here, but changes must not be
+// done inside the copyright notive above. A re-distribution must contain
+// the intact and unchanged copyright notice.
+// Contributions and changes to the program code must be marked as such.
+
+// You must compile this file with
+// javac -classpath .:../classes IndexCreate_p.java
+// if the shell's current path is HTROOT
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.Locale;
+
+import de.anomic.http.httpHeader;
+import de.anomic.plasma.plasmaCrawlEURL;
+import de.anomic.plasma.plasmaCrawlLoaderMessage;
+import de.anomic.plasma.plasmaCrawlNURL;
+import de.anomic.plasma.plasmaCrawlProfile;
+import de.anomic.plasma.plasmaCrawlWorker;
+import de.anomic.plasma.plasmaHTCache;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.plasmaURL;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+import de.anomic.yacy.yacyCore;
+import de.anomic.yacy.yacySeed;
+
+public class IndexCreateIndexingQueue_p {
+
+ private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
+ private static String daydate(Date date) {
+ if (date == null) return ""; else return dayFormatter.format(date);
+ }
+
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
+ // return variable that accumulates replacements
+ plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
+ serverObjects prop = new serverObjects();
+ prop.put("rejected", 0);
+ int showRejectedCount = 10;
+
+ if (post != null) {
+ if (post.containsKey("clearRejected")) {
+ switchboard.urlPool.errorURL.clearStack();
+ }
+ if (post.containsKey("moreRejected")) {
+ showRejectedCount = Integer.parseInt(post.get("showRejected", "10"));
+ }
+ }
+
+ yacySeed initiator;
+ boolean dark;
+ int i;
+
+ if (switchboard.queueStack.size() == 0) {
+ prop.put("indexing-queue", 0); //is empty
+ } else {
+ prop.put("indexing-queue", 1);
+ prop.put("indexing-queue_num", switchboard.queueStack.size());//num entries in queue
+ dark = true;
+ plasmaHTCache.Entry pcentry;
+ for (i = 0; i < switchboard.queueStack.size(); i++) {
+ pcentry = (plasmaHTCache.Entry) switchboard.queueStack.get(i);
+ if (pcentry != null) {
+ initiator = yacyCore.seedDB.getConnected(pcentry.initiator());
+ prop.put("indexing-queue_list_"+i+"_dark", ((dark) ? 1 : 0));
+ prop.put("indexing-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
+ prop.put("indexing-queue_list_"+i+"_depth", pcentry.depth);
+ prop.put("indexing-queue_list_"+i+"_modified", daydate(pcentry.lastModified));
+ prop.put("indexing-queue_list_"+i+"_href",((pcentry.scraper == null) ? "0" : ("" + pcentry.scraper.getAnchors().size())));
+ prop.put("indexing-queue_list_"+i+"_anchor", ((pcentry.scraper == null) ? "-" : pcentry.scraper.getHeadline()) );
+ prop.put("indexing-queue_list_"+i+"_url", pcentry.nomalizedURLString);
+ dark = !dark;
+ }
+ }
+ prop.put("indexing-queue_list", i);
+ }
+
+ // failure cases
+ if (switchboard.urlPool.errorURL.stackSize() != 0) {
+ if (showRejectedCount > switchboard.urlPool.errorURL.stackSize()) showRejectedCount = switchboard.urlPool.errorURL.stackSize();
+ prop.put("rejected", 1);
+ prop.put("rejected_num", switchboard.urlPool.errorURL.stackSize());
+ if (showRejectedCount != switchboard.urlPool.errorURL.stackSize()) {
+ prop.put("rejected_only-latest", 1);
+ prop.put("rejected_only-latest_num", showRejectedCount);
+ prop.put("rejected_only-latest_newnum", ((int) (showRejectedCount * 1.5)));
+ }else{
+ prop.put("rejected_only-latest", 0);
+ }
+ dark = true;
+ String url, initiatorHash, executorHash;
+ plasmaCrawlEURL.entry entry;
+ yacySeed initiatorSeed, executorSeed;
+ int j=0;
+ for (i = switchboard.urlPool.errorURL.stackSize() - 1; i >= (switchboard.urlPool.errorURL.stackSize() - showRejectedCount); i--) {
+ entry = (plasmaCrawlEURL.entry) switchboard.urlPool.errorURL.getStack(i);
+ initiatorHash = entry.initiator();
+ executorHash = entry.executor();
+ url = entry.url().toString();
+ initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash);
+ executorSeed = yacyCore.seedDB.getConnected(executorHash);
+ prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
+ prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
+ prop.put("rejected_list_"+j+"_url", url);
+ prop.put("rejected_list_"+j+"_failreason", entry.failreason());
+ prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0));
+ dark = !dark;
+ j++;
+ }
+ prop.put("rejected_list", j);
+ }
+
+ // return rewrite properties
+ return prop;
+ }
+
+}
+
+
+
diff --git a/htroot/IndexCreateLoaderQueue_p.html b/htroot/IndexCreateLoaderQueue_p.html
new file mode 100644
index 000000000..30d5196d0
--- /dev/null
+++ b/htroot/IndexCreateLoaderQueue_p.html
@@ -0,0 +1,37 @@
+
+
+
+YaCy: Index Creation / Loader Queue
+#[metas]#
+
+
+#[header]#
+#[submenuIndexCreate]#
+
+Index Creation: Loader Queue
+
+
+#(loader-set)#
+The loader set is empty
+::
+There are #[num]# entries in the loader set:
+
+
+#{list}#
+
+#[initiator]# |
+#[depth]# |
+#[url]# |
+
+#{/list}#
+
+#(/loader-set)#
+
+
+#[footer]#
+
+
diff --git a/htroot/IndexCreateLoaderQueue_p.java b/htroot/IndexCreateLoaderQueue_p.java
new file mode 100644
index 000000000..dfb9d46ec
--- /dev/null
+++ b/htroot/IndexCreateLoaderQueue_p.java
@@ -0,0 +1,117 @@
+// IndexCreateLoaderQueue_p.java
+// -----------------------------
+// part of the AnomicHTTPD caching proxy
+// (C) by Michael Peter Christen; mc@anomic.de
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2004, 2005
+// last major change: 04.07.2005
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// Using this software in any meaning (reading, learning, copying, compiling,
+// running) means that you agree that the Author(s) is (are) not responsible
+// for cost, loss of data or any harm that may be caused directly or indirectly
+// by usage of this softare or this documentation. The usage of this software
+// is on your own risk. The installation and usage (starting/running) of this
+// software may allow other people or application to access your computer and
+// any attached devices and is highly dependent on the configuration of the
+// software which must be done by the user of the software; the author(s) is
+// (are) also not responsible for proper configuration and usage of the
+// software, even if provoked by documentation provided together with
+// the software.
+//
+// Any changes to this file according to the GPL as documented in the file
+// gpl.txt aside this file in the shipment you received can be done to the
+// lines that follows this copyright notice here, but changes must not be
+// done inside the copyright notive above. A re-distribution must contain
+// the intact and unchanged copyright notice.
+// Contributions and changes to the program code must be marked as such.
+
+// You must compile this file with
+// javac -classpath .:../classes IndexCreate_p.java
+// if the shell's current path is HTROOT
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.Locale;
+
+import de.anomic.http.httpHeader;
+import de.anomic.plasma.plasmaCrawlEURL;
+import de.anomic.plasma.plasmaCrawlLoaderMessage;
+import de.anomic.plasma.plasmaCrawlNURL;
+import de.anomic.plasma.plasmaCrawlProfile;
+import de.anomic.plasma.plasmaCrawlWorker;
+import de.anomic.plasma.plasmaHTCache;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.plasmaURL;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+import de.anomic.yacy.yacyCore;
+import de.anomic.yacy.yacySeed;
+
+public class IndexCreateLoaderQueue_p {
+
+ private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
+ private static String daydate(Date date) {
+ if (date == null) return ""; else return dayFormatter.format(date);
+ }
+
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
+ // return variable that accumulates replacements
+ plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
+ serverObjects prop = new serverObjects();
+
+
+ if (switchboard.cacheLoader.size() == 0) {
+ prop.put("loader-set", 0);
+ } else {
+ prop.put("loader-set", 1);
+ prop.put("loader-set_num", switchboard.cacheLoader.size());
+ boolean dark = true;
+
+ ThreadGroup loaderThreads = switchboard.cacheLoader.threadStatus();
+
+ int threadCount = loaderThreads.activeCount();
+ Thread[] threadList = new Thread[threadCount*2];
+ threadCount = loaderThreads.enumerate(threadList);
+ yacySeed initiator;
+ int i;
+ for (i = 0; i < threadCount; i++) {
+ plasmaCrawlWorker theWorker = (plasmaCrawlWorker)threadList[i];
+ plasmaCrawlLoaderMessage theMsg = theWorker.theMsg;
+ if (theMsg == null) continue;
+
+ initiator = yacyCore.seedDB.getConnected(theMsg.initiator);
+ prop.put("loader-set_list_"+i+"_dark", ((dark) ? 1 : 0) );
+ prop.put("loader-set_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
+ prop.put("loader-set_list_"+i+"_depth", theMsg.depth );
+ prop.put("loader-set_list_"+i+"_url", theMsg.url ); // null pointer exception here !!! maybe url = null; check reason.
+ dark = !dark;
+ }
+ prop.put("loader-set_list", i );
+ }
+
+ // return rewrite properties
+ return prop;
+ }
+
+}
+
+
+
diff --git a/htroot/IndexCreateWWWLocalCrawlQueue_p.html b/htroot/IndexCreateWWWLocalCrawlQueue_p.html
new file mode 100644
index 000000000..cf84ef40c
--- /dev/null
+++ b/htroot/IndexCreateWWWLocalCrawlQueue_p.html
@@ -0,0 +1,45 @@
+
+
+
+YaCy: Index Creation / WWW Crawl Queue
+#[metas]#
+
+
+#[header]#
+#[submenuIndexCreate]#
+
+Index Creation: WWW Crawl Queue
+
+
+#(crawler-queue)#
+The crawler queue is empty
+::
+
+
+There are #[num]# entries in the crawler queue. Showing #[show-num]# most recent entries:
+
+
+#{list}#
+
+#[initiator]# |
+#[depth]# |
+#[modified]# |
+#[anchor]# |
+#[url]# |
+
+#{/list}#
+
+#(/crawler-queue)#
+
+
+#[footer]#
+
+
diff --git a/htroot/IndexCreateWWWLocalCrawlQueue_p.java b/htroot/IndexCreateWWWLocalCrawlQueue_p.java
new file mode 100644
index 000000000..54810e15c
--- /dev/null
+++ b/htroot/IndexCreateWWWLocalCrawlQueue_p.java
@@ -0,0 +1,136 @@
+// IndexCreateWWWCrawlQueue_p.java
+// -------------------------------
+// part of the AnomicHTTPD caching proxy
+// (C) by Michael Peter Christen; mc@anomic.de
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2004, 2005
+// last major change: 04.07.2005
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// Using this software in any meaning (reading, learning, copying, compiling,
+// running) means that you agree that the Author(s) is (are) not responsible
+// for cost, loss of data or any harm that may be caused directly or indirectly
+// by usage of this softare or this documentation. The usage of this software
+// is on your own risk. The installation and usage (starting/running) of this
+// software may allow other people or application to access your computer and
+// any attached devices and is highly dependent on the configuration of the
+// software which must be done by the user of the software; the author(s) is
+// (are) also not responsible for proper configuration and usage of the
+// software, even if provoked by documentation provided together with
+// the software.
+//
+// Any changes to this file according to the GPL as documented in the file
+// gpl.txt aside this file in the shipment you received can be done to the
+// lines that follows this copyright notice here, but changes must not be
+// done inside the copyright notive above. A re-distribution must contain
+// the intact and unchanged copyright notice.
+// Contributions and changes to the program code must be marked as such.
+
+// You must compile this file with
+// javac -classpath .:../classes IndexCreate_p.java
+// if the shell's current path is HTROOT
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.Locale;
+
+import de.anomic.http.httpHeader;
+import de.anomic.plasma.plasmaCrawlEURL;
+import de.anomic.plasma.plasmaCrawlLoaderMessage;
+import de.anomic.plasma.plasmaCrawlNURL;
+import de.anomic.plasma.plasmaCrawlProfile;
+import de.anomic.plasma.plasmaCrawlWorker;
+import de.anomic.plasma.plasmaHTCache;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.plasmaURL;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+import de.anomic.yacy.yacyCore;
+import de.anomic.yacy.yacySeed;
+
+public class IndexCreateWWWLocalCrawlQueue_p {
+
+ private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
+ private static String daydate(Date date) {
+ if (date == null) return ""; else return dayFormatter.format(date);
+ }
+
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
+ // return variable that accumulates replacements
+ plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
+ serverObjects prop = new serverObjects();
+
+ if (post != null) {
+ if (post.containsKey("clearcrawlqueue")) {
+ String urlHash;
+ int c = 0;
+ while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) > 0) {
+ urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE).hash();
+ if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; }
+ }
+ while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) > 0) {
+ urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT).hash();
+ if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; }
+ }
+ while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) > 0) {
+ urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT).hash();
+ if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; }
+ }
+ prop.put("info", 3);//crawling queue cleared
+ prop.put("info_numEntries", c);
+ }
+ }
+
+ int localStackSize = switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE);
+ if (localStackSize == 0) {
+ prop.put("crawler-queue", 0);
+ } else {
+ prop.put("crawler-queue", 1);
+ plasmaCrawlNURL.entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_CORE, 100);
+ prop.put("crawler-queue_num", localStackSize);//num Entries
+ prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent
+ plasmaCrawlNURL.entry urle;
+ boolean dark = true;
+ yacySeed initiator;
+ int i;
+ for (i = 0; i < crawlerList.length; i++) {
+ urle = crawlerList[i];
+ if (urle != null) {
+ initiator = yacyCore.seedDB.getConnected(urle.initiator());
+ prop.put("crawler-queue_list_"+i+"_dark", ((dark) ? 1 : 0) );
+ prop.put("crawler-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
+ prop.put("crawler-queue_list_"+i+"_depth", urle.depth());
+ prop.put("crawler-queue_list_"+i+"_modified", daydate(urle.loaddate()) );
+ prop.put("crawler-queue_list_"+i+"_anchor", urle.name());
+ prop.put("crawler-queue_list_"+i+"_url", urle.url());
+ dark = !dark;
+ }
+ }
+ prop.put("crawler-queue_list", i);
+ }
+
+ // return rewrite properties
+ return prop;
+ }
+
+}
+
+
+
diff --git a/htroot/IndexCreate_p.html b/htroot/IndexCreate_p.html
index aea740b2f..c2d3dcce3 100644
--- a/htroot/IndexCreate_p.html
+++ b/htroot/IndexCreate_p.html
@@ -6,29 +6,9 @@
#[header]#
-
+#[submenuIndexCreate]#
+
Index Creation
-
-
-
Start Crawling Job:
@@ -237,129 +217,18 @@ No remote crawl peers availible.
#(/remoteCrawlPeers)#
-
-
-
-#(rejected)#
-::
-
-There are #[num]# entries in the rejected-queue:
-
-
-#{list}#
-
-#[initiator]# |
-#[executor]# |
-#[url]# |
-#[failreason]# |
-
-#{/list}#
-
-#(/rejected)#
-
-
-
-
Indexing Queue:
-#(indexing-queue)#
-The indexing queue is empty
-::
-There are #[num]# entries in the indexing queue:
-
-
-#{list}#
-
-#[initiator]# |
-#[depth]# |
-#[modified]# |
-#[href]# |
-#[anchor]# |
-#[url]# |
-
-#{/list}#
-
-#(/indexing-queue)#
-
-
-
-
Loader Set:
-#(loader-set)#
-The loader set is empty
-::
-There are #[num]# entries in the loader set:
-
-
-#{list}#
-
-#[initiator]# |
-#[depth]# |
-#[url]# |
-
-#{/list}#
-
-#(/loader-set)#
-
+
-
-
Crawler Queue:
-#(crawler-queue)#
-The crawler queue is empty
-::
-There are #[num]# entries in the crawler queue. Showing #[show-num]# most recent entries:
-
-
-#{list}#
-
-#[initiator]# |
-#[depth]# |
-#[modified]# |
-#[anchor]# |
-#[url]# |
-
-#{/list}#
-
-
+
-#(/crawler-queue)#
-
+
#[footer]#