diff --git a/htroot/IndexCreate_p.html b/htroot/CrawlStartExpert_p.html
similarity index 78%
rename from htroot/IndexCreate_p.html
rename to htroot/CrawlStartExpert_p.html
index f946416cd..5066f61f6 100644
--- a/htroot/IndexCreate_p.html
+++ b/htroot/CrawlStartExpert_p.html
@@ -1,7 +1,7 @@
- YaCy '#[clientname]#': Index Creation
+ YaCy '#[clientname]#': Crawl Start (expert)
#%env/templates/metas.template%#
@@ -9,7 +9,7 @@
#%env/templates/header.template%#
#%env/templates/submenuIndexCreate.template%#
- Index Creation
+ Expert Crawl Start
Start Crawling Job:
@@ -218,100 +218,6 @@
-
- #(info)#
- ::
- Crawling paused successfully.
- ::
- Continue crawling.
- #(/info)#
-
-
- #(refreshbutton)#
- ::
-
- #(/refreshbutton)#
-
-
- Recently started remote crawls in progress:
-
-
- #{otherCrawlStartInProgress}#
-
- #[cre]# |
- #[peername]# |
- #[startURL]# |
- #[intention]# |
- #[generalDepth]# |
- #(crawlingQ)#no::yes#(/crawlingQ)# |
-
- #{/otherCrawlStartInProgress}#
-
- Recently started remote crawls, finished:
-
-
- #{otherCrawlStartFinished}#
-
- #[cre]# |
- #[peername]# |
- #[startURL]# |
- #[intention]# |
- #[generalDepth]# |
- #(crawlingQ)#no::yes#(/crawlingQ)# |
-
- #{/otherCrawlStartFinished}#
-
- Remote Crawling Peers:
- #(remoteCrawlPeers)#
- No remote crawl peers available.
- ::
- #[num]# peers available for remote crawling.
-
-
-
-
-
-
- Idle Peers |
-
- #{available}##[name]# (#[due]# seconds due) #{/available}#
- |
-
-
- Busy Peers |
-
- #{busy}##[name]# (#[due]# seconds due) #{/busy}#
- |
-
-
- #(/remoteCrawlPeers)#
-
#%env/templates/footer.template%#
diff --git a/htroot/CrawlStartExpert_p.java b/htroot/CrawlStartExpert_p.java
new file mode 100644
index 000000000..dc825b869
--- /dev/null
+++ b/htroot/CrawlStartExpert_p.java
@@ -0,0 +1,99 @@
+// CrawlStartExpert_p.java
+// (C) 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 02.12.2004 as IndexCreate_p.java on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import de.anomic.http.httpHeader;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+
+public class CrawlStartExpert_p {
+
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
+ // return variable that accumulates replacements
+ serverObjects prop = new serverObjects();
+
+ // define visible variables
+ prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
+ prop.put("crawlingDepth", env.getConfig("crawlingDepth", "0"));
+ prop.put("crawlingFilter", env.getConfig("crawlingFilter", "0"));
+
+ int crawlingIfOlder = (int) env.getConfigLong("crawlingIfOlder", -1);
+ prop.put("crawlingIfOlderCheck", (crawlingIfOlder == -1) ? 0 : 1);
+ prop.put("crawlingIfOlderUnitYearCheck", 0);
+ prop.put("crawlingIfOlderUnitMonthCheck", 0);
+ prop.put("crawlingIfOlderUnitDayCheck", 0);
+ prop.put("crawlingIfOlderUnitHourCheck", 0);
+ prop.put("crawlingIfOlderUnitMinuteCheck", 0);
+ if ((crawlingIfOlder == -1) || (crawlingIfOlder == Integer.MAX_VALUE)) {
+ prop.put("crawlingIfOlderNumber", -1);
+ prop.put("crawlingIfOlderUnitYearCheck", 1);
+ } else if (crawlingIfOlder >= 60*24*365) {
+ prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*365)));
+ prop.put("crawlingIfOlderUnitYearCheck", 1);
+ } else if (crawlingIfOlder >= 60*24*30) {
+ prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*30)));
+ prop.put("crawlingIfOlderUnitMonthCheck", 1);
+ } else if (crawlingIfOlder >= 60*24) {
+ prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24)));
+ prop.put("crawlingIfOlderUnitDayCheck", 1);
+ } else if (crawlingIfOlder >= 60) {
+ prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / 60f));
+ prop.put("crawlingIfOlderUnitHourCheck", 1);
+ } else {
+ prop.put("crawlingIfOlderNumber", crawlingIfOlder);
+ prop.put("crawlingIfOlderUnitMinuteCheck", 1);
+ }
+ int crawlingDomFilterDepth = (int) env.getConfigLong("crawlingDomFilterDepth", -1);
+ prop.put("crawlingDomFilterCheck", (crawlingDomFilterDepth == -1) ? 0 : 1);
+ prop.put("crawlingDomFilterDepth", (crawlingDomFilterDepth == -1) ? 1 : crawlingDomFilterDepth);
+ int crawlingDomMaxPages = (int) env.getConfigLong("crawlingDomMaxPages", -1);
+ prop.put("crawlingDomMaxCheck", (crawlingDomMaxPages == -1) ? 0 : 1);
+ prop.put("crawlingDomMaxPages", (crawlingDomMaxPages == -1) ? 10000 : crawlingDomMaxPages);
+ prop.put("crawlingQChecked", env.getConfig("crawlingQ", "").equals("true") ? 1 : 0);
+ prop.put("storeHTCacheChecked", env.getConfig("storeHTCache", "").equals("true") ? 1 : 0);
+ prop.put("indexingTextChecked", env.getConfig("indexText", "").equals("true") ? 1 : 0);
+ prop.put("indexingMediaChecked", env.getConfig("indexMedia", "").equals("true") ? 1 : 0);
+ prop.put("crawlOrderChecked", env.getConfig("crawlOrder", "").equals("true") ? 1 : 0);
+
+ long LCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP, "100"));
+ int LCppm = (LCbusySleep == 0) ? 1000 : (int) (60000L / LCbusySleep);
+ prop.put("crawlingSpeedMaxChecked", (LCppm >= 1000) ? 1 : 0);
+ prop.put("crawlingSpeedCustChecked", ((LCppm > 10) && (LCppm < 1000)) ? 1 : 0);
+ prop.put("crawlingSpeedMinChecked", (LCppm <= 10) ? 1 : 0);
+ prop.put("customPPMdefault", ((LCppm > 10) && (LCppm < 1000)) ? Integer.toString(LCppm) : "");
+
+ prop.put("xsstopwChecked", env.getConfig("xsstopw", "").equals("true") ? 1 : 0);
+ prop.put("xdstopwChecked", env.getConfig("xdstopw", "").equals("true") ? 1 : 0);
+ prop.put("xpstopwChecked", env.getConfig("xpstopw", "").equals("true") ? 1 : 0);
+
+ // return rewrite properties
+ return prop;
+ }
+
+}
+
+
+
diff --git a/htroot/CrawlStartSimple_p.html b/htroot/CrawlStartSimple_p.html
new file mode 100644
index 000000000..57b85eb1e
--- /dev/null
+++ b/htroot/CrawlStartSimple_p.html
@@ -0,0 +1,135 @@
+
+
+
+ YaCy '#[clientname]#': Crawl Start (easy)
+ #%env/templates/metas.template%#
+
+
+
+
+ #%env/templates/header.template%#
+ #%env/templates/submenuIndexCreate.template%#
+ Easy Crawl Start
+
+
+ Start Crawling Job:
+ You can define URLs as start points for Web page crawling and start crawling here.
+ "Crawling" means that YaCy will download the given website, extract all links in it
+ and then download the content behind these links.
+ This is repeated as long as specified under "Crawling Depth".
+
+
+
+
+ Recently started remote crawls in progress:
+
+
+ #{otherCrawlStartInProgress}#
+
+ #[cre]# |
+ #[peername]# |
+ #[startURL]# |
+ #[intention]# |
+ #[generalDepth]# |
+ #(crawlingQ)#no::yes#(/crawlingQ)# |
+
+ #{/otherCrawlStartInProgress}#
+
+ Recently started remote crawls, finished:
+
+
+ #{otherCrawlStartFinished}#
+
+ #[cre]# |
+ #[peername]# |
+ #[startURL]# |
+ #[intention]# |
+ #[generalDepth]# |
+ #(crawlingQ)#no::yes#(/crawlingQ)# |
+
+ #{/otherCrawlStartFinished}#
+
+ Remote Crawling Peers:
+ #(remoteCrawlPeers)#
+ No remote crawl peers available.
+ ::
+ #[num]# peers available for remote crawling.
+
+
+
+
+
+
+ Idle Peers |
+
+ #{available}##[name]# (#[due]# seconds due) #{/available}#
+ |
+
+
+ Busy Peers |
+
+ #{busy}##[name]# (#[due]# seconds due) #{/busy}#
+ |
+
+
+ #(/remoteCrawlPeers)#
+
+ #%env/templates/footer.template%#
+
+
diff --git a/htroot/IndexCreate_p.java b/htroot/CrawlStartSimple_p.java
similarity index 77%
rename from htroot/IndexCreate_p.java
rename to htroot/CrawlStartSimple_p.java
index 921ac3d5e..ecdf849ad 100644
--- a/htroot/IndexCreate_p.java
+++ b/htroot/CrawlStartSimple_p.java
@@ -1,245 +1,197 @@
-// IndexCreate_p.java
-// -----------------------
-// part of the AnomicHTTPD caching proxy
-// (C) by Michael Peter Christen; mc@anomic.de
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-// last major change: 02.12.2004
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-//
-// Using this software in any meaning (reading, learning, copying, compiling,
-// running) means that you agree that the Author(s) is (are) not responsible
-// for cost, loss of data or any harm that may be caused directly or indirectly
-// by usage of this softare or this documentation. The usage of this software
-// is on your own risk. The installation and usage (starting/running) of this
-// software may allow other people or application to access your computer and
-// any attached devices and is highly dependent on the configuration of the
-// software which must be done by the user of the software; the author(s) is
-// (are) also not responsible for proper configuration and usage of the
-// software, even if provoked by documentation provided together with
-// the software.
-//
-// Any changes to this file according to the GPL as documented in the file
-// gpl.txt aside this file in the shipment you received can be done to the
-// lines that follows this copyright notice here, but changes must not be
-// done inside the copyright notive above. A re-distribution must contain
-// the intact and unchanged copyright notice.
-// Contributions and changes to the program code must be marked as such.
-
-// You must compile this file with
-// javac -classpath .:../classes IndexCreate_p.java
-// if the shell's current path is HTROOT
-
-import java.io.IOException;
-import java.util.Enumeration;
-
-import de.anomic.http.httpHeader;
-import de.anomic.plasma.plasmaURL;
-import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.server.serverObjects;
-import de.anomic.server.serverSwitch;
-import de.anomic.yacy.yacyCore;
-import de.anomic.yacy.yacyNewsPool;
-import de.anomic.yacy.yacyNewsRecord;
-import de.anomic.yacy.yacySeed;
-
-public class IndexCreate_p {
-
- public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
- // return variable that accumulates replacements
- plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
- serverObjects prop = new serverObjects();
-
- prop.put("info", 0);
- prop.put("refreshbutton", 0);
-
- if (post != null) {
-
- if (post.containsKey("pausecrawlqueue")) {
- switchboard.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL);
- prop.put("info", 1);//crawling paused
- }
-
- if (post.containsKey("continuecrawlqueue")) {
- switchboard.continueCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL);
- prop.put("info", 2);//crawling continued
- }
-
- }
-
- // define visible variables
- prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
- prop.put("crawlingDepth", env.getConfig("crawlingDepth", "0"));
- prop.put("crawlingFilter", env.getConfig("crawlingFilter", "0"));
-
- int crawlingIfOlder = (int) env.getConfigLong("crawlingIfOlder", -1);
- prop.put("crawlingIfOlderCheck", (crawlingIfOlder == -1) ? 0 : 1);
- prop.put("crawlingIfOlderUnitYearCheck", 0);
- prop.put("crawlingIfOlderUnitMonthCheck", 0);
- prop.put("crawlingIfOlderUnitDayCheck", 0);
- prop.put("crawlingIfOlderUnitHourCheck", 0);
- prop.put("crawlingIfOlderUnitMinuteCheck", 0);
- if ((crawlingIfOlder == -1) || (crawlingIfOlder == Integer.MAX_VALUE)) {
- prop.put("crawlingIfOlderNumber", -1);
- prop.put("crawlingIfOlderUnitYearCheck", 1);
- } else if (crawlingIfOlder >= 60*24*365) {
- prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*365)));
- prop.put("crawlingIfOlderUnitYearCheck", 1);
- } else if (crawlingIfOlder >= 60*24*30) {
- prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*30)));
- prop.put("crawlingIfOlderUnitMonthCheck", 1);
- } else if (crawlingIfOlder >= 60*24) {
- prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24)));
- prop.put("crawlingIfOlderUnitDayCheck", 1);
- } else if (crawlingIfOlder >= 60) {
- prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / 60f));
- prop.put("crawlingIfOlderUnitHourCheck", 1);
- } else {
- prop.put("crawlingIfOlderNumber", crawlingIfOlder);
- prop.put("crawlingIfOlderUnitMinuteCheck", 1);
- }
- int crawlingDomFilterDepth = (int) env.getConfigLong("crawlingDomFilterDepth", -1);
- prop.put("crawlingDomFilterCheck", (crawlingDomFilterDepth == -1) ? 0 : 1);
- prop.put("crawlingDomFilterDepth", (crawlingDomFilterDepth == -1) ? 1 : crawlingDomFilterDepth);
- int crawlingDomMaxPages = (int) env.getConfigLong("crawlingDomMaxPages", -1);
- prop.put("crawlingDomMaxCheck", (crawlingDomMaxPages == -1) ? 0 : 1);
- prop.put("crawlingDomMaxPages", (crawlingDomMaxPages == -1) ? 10000 : crawlingDomMaxPages);
- prop.put("crawlingQChecked", env.getConfig("crawlingQ", "").equals("true") ? 1 : 0);
- prop.put("storeHTCacheChecked", env.getConfig("storeHTCache", "").equals("true") ? 1 : 0);
- prop.put("indexingTextChecked", env.getConfig("indexText", "").equals("true") ? 1 : 0);
- prop.put("indexingMediaChecked", env.getConfig("indexMedia", "").equals("true") ? 1 : 0);
- prop.put("crawlOrderChecked", env.getConfig("crawlOrder", "").equals("true") ? 1 : 0);
-
- long LCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP, "100"));
- int LCppm = (LCbusySleep == 0) ? 1000 : (int) (60000L / LCbusySleep);
- prop.put("crawlingSpeedMaxChecked", (LCppm >= 1000) ? 1 : 0);
- prop.put("crawlingSpeedCustChecked", ((LCppm > 10) && (LCppm < 1000)) ? 1 : 0);
- prop.put("crawlingSpeedMinChecked", (LCppm <= 10) ? 1 : 0);
- prop.put("customPPMdefault", ((LCppm > 10) && (LCppm < 1000)) ? Integer.toString(LCppm) : "");
-
- prop.put("xsstopwChecked", env.getConfig("xsstopw", "").equals("true") ? 1 : 0);
- prop.put("xdstopwChecked", env.getConfig("xdstopw", "").equals("true") ? 1 : 0);
- prop.put("xpstopwChecked", env.getConfig("xpstopw", "").equals("true") ? 1 : 0);
-
- int queueStackSize = switchboard.sbQueue.size();
- int loaderThreadsSize = switchboard.cacheLoader.size();
- int crawlerListSize = switchboard.noticeURL.stackSize();
- int completequeue = queueStackSize + loaderThreadsSize + crawlerListSize;
-
- if ((completequeue > 0) || ((post != null) && (post.containsKey("refreshpage")))) {
- prop.put("refreshbutton", 1);
- }
-
- // create prefetch table
- boolean dark = true;
-
- // create other peer crawl table using YaCyNews
- int availableNews = yacyCore.newsPool.size(yacyNewsPool.INCOMING_DB);
- int showedCrawl = 0;
- yacyNewsRecord record;
- yacySeed peer;
- String peername;
- try {
- for (int c = 0; c < availableNews; c++) {
- record = yacyCore.newsPool.get(yacyNewsPool.INCOMING_DB, c);
- if (record == null) continue;
- if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) {
- peer = yacyCore.seedDB.get(record.originator());
- if (peer == null) peername = record.originator(); else peername = peer.getName();
- prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", ((dark) ? 1 : 0));
- prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created());
- prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername);
- prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString());
- prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention").toString());
- prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
- prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0);
- showedCrawl++;
- if (showedCrawl > 20) break;
- }
-
- }
- } catch (IOException e) {}
- prop.put("otherCrawlStartInProgress", showedCrawl);
-
- // finished remote crawls
- availableNews = yacyCore.newsPool.size(yacyNewsPool.PROCESSED_DB);
- showedCrawl = 0;
- try {
- for (int c = 0; c < availableNews; c++) {
- record = yacyCore.newsPool.get(yacyNewsPool.PROCESSED_DB, c);
- if (record == null) continue;
- if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) {
- peer = yacyCore.seedDB.get(record.originator());
- if (peer == null) peername = record.originator(); else peername = peer.getName();
- prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", ((dark) ? 1 : 0));
- prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created());
- prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", peername);
- prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString());
- prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention").toString());
- prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
- prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0);
- showedCrawl++;
- if (showedCrawl > 20) break;
- }
-
- }
- } catch (IOException e) {}
- prop.put("otherCrawlStartFinished", showedCrawl);
-
-
- // remote crawl peers
- if ((yacyCore.seedDB == null) || (yacyCore.seedDB.mySeed.isVirgin()) || (yacyCore.seedDB.mySeed.isJunior())) {
- prop.put("remoteCrawlPeers", 0);
- } else {
- Enumeration crawlavail = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, true);
- Enumeration crawlpendi = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, false);
- if ((!(crawlavail.hasMoreElements())) && (!(crawlpendi.hasMoreElements()))) {
- prop.put("remoteCrawlPeers", 0); //no peers availible
- } else {
- prop.put("remoteCrawlPeers", 1);
- int maxcount = 100;
- int availcount = 0;
- yacySeed seed;
- while ((availcount < maxcount) && (crawlavail.hasMoreElements())) {
- seed = (yacySeed) crawlavail.nextElement();
- prop.put("remoteCrawlPeers_available_" + availcount + "_name", seed.getName());
- prop.put("remoteCrawlPeers_available_" + availcount + "_due", (yacyCore.yacyTime() - seed.available));
- availcount++;
- }
- prop.put("remoteCrawlPeers_available", availcount);
- int pendicount = 0;
- while ((pendicount < maxcount) && (crawlpendi.hasMoreElements())) {
- seed = (yacySeed) crawlpendi.nextElement();
- prop.put("remoteCrawlPeers_busy_" + pendicount + "_name", seed.getName());
- prop.put("remoteCrawlPeers_busy_" + pendicount + "_due", (yacyCore.yacyTime() - seed.available));
- pendicount++;
- }
- prop.put("remoteCrawlPeers_busy", pendicount);
- prop.put("remoteCrawlPeers_num", (availcount + pendicount));
- }
-
- }
-
- prop.put("crawler-paused",(switchboard.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL))?0:1);
-
- // return rewrite properties
- return prop;
- }
-
-}
-
-
-
+// CrawlStartSimple_p.java
+// (C) 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 02.12.2004 as IndexCreate_p.java on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import java.io.IOException;
+import java.util.Enumeration;
+
+import de.anomic.http.httpHeader;
+import de.anomic.plasma.plasmaURL;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+import de.anomic.yacy.yacyCore;
+import de.anomic.yacy.yacyNewsPool;
+import de.anomic.yacy.yacyNewsRecord;
+import de.anomic.yacy.yacySeed;
+
+public class CrawlStartSimple_p {
+
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
+ // return variable that accumulates replacements
+ serverObjects prop = new serverObjects();
+
+ // define visible variables
+ prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
+ prop.put("crawlingDepth", env.getConfig("crawlingDepth", "0"));
+ prop.put("crawlingFilter", env.getConfig("crawlingFilter", "0"));
+
+ int crawlingIfOlder = (int) env.getConfigLong("crawlingIfOlder", -1);
+ prop.put("crawlingIfOlderCheck", (crawlingIfOlder == -1) ? 0 : 1);
+ prop.put("crawlingIfOlderUnitYearCheck", 0);
+ prop.put("crawlingIfOlderUnitMonthCheck", 0);
+ prop.put("crawlingIfOlderUnitDayCheck", 0);
+ prop.put("crawlingIfOlderUnitHourCheck", 0);
+ prop.put("crawlingIfOlderUnitMinuteCheck", 0);
+ if ((crawlingIfOlder == -1) || (crawlingIfOlder == Integer.MAX_VALUE)) {
+ prop.put("crawlingIfOlderNumber", -1);
+ prop.put("crawlingIfOlderUnitYearCheck", 1);
+ } else if (crawlingIfOlder >= 60*24*365) {
+ prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*365)));
+ prop.put("crawlingIfOlderUnitYearCheck", 1);
+ } else if (crawlingIfOlder >= 60*24*30) {
+ prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*30)));
+ prop.put("crawlingIfOlderUnitMonthCheck", 1);
+ } else if (crawlingIfOlder >= 60*24) {
+ prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24)));
+ prop.put("crawlingIfOlderUnitDayCheck", 1);
+ } else if (crawlingIfOlder >= 60) {
+ prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / 60f));
+ prop.put("crawlingIfOlderUnitHourCheck", 1);
+ } else {
+ prop.put("crawlingIfOlderNumber", crawlingIfOlder);
+ prop.put("crawlingIfOlderUnitMinuteCheck", 1);
+ }
+ int crawlingDomFilterDepth = (int) env.getConfigLong("crawlingDomFilterDepth", -1);
+ prop.put("crawlingDomFilterCheck", (crawlingDomFilterDepth == -1) ? 0 : 1);
+ prop.put("crawlingDomFilterDepth", (crawlingDomFilterDepth == -1) ? 1 : crawlingDomFilterDepth);
+ int crawlingDomMaxPages = (int) env.getConfigLong("crawlingDomMaxPages", -1);
+ prop.put("crawlingDomMaxCheck", (crawlingDomMaxPages == -1) ? 0 : 1);
+ prop.put("crawlingDomMaxPages", (crawlingDomMaxPages == -1) ? 10000 : crawlingDomMaxPages);
+ prop.put("crawlingQChecked", env.getConfig("crawlingQ", "").equals("true") ? 1 : 0);
+ prop.put("storeHTCacheChecked", env.getConfig("storeHTCache", "").equals("true") ? 1 : 0);
+ prop.put("indexingTextChecked", env.getConfig("indexText", "").equals("true") ? 1 : 0);
+ prop.put("indexingMediaChecked", env.getConfig("indexMedia", "").equals("true") ? 1 : 0);
+ prop.put("crawlOrderChecked", env.getConfig("crawlOrder", "").equals("true") ? 1 : 0);
+
+ long LCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP, "100"));
+ int LCppm = (LCbusySleep == 0) ? 1000 : (int) (60000L / LCbusySleep);
+ prop.put("crawlingSpeedMaxChecked", (LCppm >= 1000) ? 1 : 0);
+ prop.put("crawlingSpeedCustChecked", ((LCppm > 10) && (LCppm < 1000)) ? 1 : 0);
+ prop.put("crawlingSpeedMinChecked", (LCppm <= 10) ? 1 : 0);
+ prop.put("customPPMdefault", ((LCppm > 10) && (LCppm < 1000)) ? Integer.toString(LCppm) : "");
+
+ prop.put("xsstopwChecked", env.getConfig("xsstopw", "").equals("true") ? 1 : 0);
+ prop.put("xdstopwChecked", env.getConfig("xdstopw", "").equals("true") ? 1 : 0);
+ prop.put("xpstopwChecked", env.getConfig("xpstopw", "").equals("true") ? 1 : 0);
+
+ // create prefetch table
+ boolean dark = true;
+
+ // create other peer crawl table using YaCyNews
+ int availableNews = yacyCore.newsPool.size(yacyNewsPool.INCOMING_DB);
+ int showedCrawl = 0;
+ yacyNewsRecord record;
+ yacySeed peer;
+ String peername;
+ try {
+ for (int c = 0; c < availableNews; c++) {
+ record = yacyCore.newsPool.get(yacyNewsPool.INCOMING_DB, c);
+ if (record == null) continue;
+ if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) {
+ peer = yacyCore.seedDB.get(record.originator());
+ if (peer == null) peername = record.originator(); else peername = peer.getName();
+ prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", ((dark) ? 1 : 0));
+ prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created());
+ prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername);
+ prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString());
+ prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention").toString());
+ prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
+ prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0);
+ showedCrawl++;
+ if (showedCrawl > 20) break;
+ }
+
+ }
+ } catch (IOException e) {}
+ prop.put("otherCrawlStartInProgress", showedCrawl);
+
+ // finished remote crawls
+ availableNews = yacyCore.newsPool.size(yacyNewsPool.PROCESSED_DB);
+ showedCrawl = 0;
+ try {
+ for (int c = 0; c < availableNews; c++) {
+ record = yacyCore.newsPool.get(yacyNewsPool.PROCESSED_DB, c);
+ if (record == null) continue;
+ if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) {
+ peer = yacyCore.seedDB.get(record.originator());
+ if (peer == null) peername = record.originator(); else peername = peer.getName();
+ prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", ((dark) ? 1 : 0));
+ prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created());
+ prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", peername);
+ prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString());
+ prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention").toString());
+ prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
+ prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0);
+ showedCrawl++;
+ if (showedCrawl > 20) break;
+ }
+
+ }
+ } catch (IOException e) {}
+ prop.put("otherCrawlStartFinished", showedCrawl);
+
+
+ // remote crawl peers
+ if ((yacyCore.seedDB == null) || (yacyCore.seedDB.mySeed.isVirgin()) || (yacyCore.seedDB.mySeed.isJunior())) {
+ prop.put("remoteCrawlPeers", 0);
+ } else {
+ Enumeration crawlavail = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, true);
+ Enumeration crawlpendi = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, false);
+ if ((!(crawlavail.hasMoreElements())) && (!(crawlpendi.hasMoreElements()))) {
+ prop.put("remoteCrawlPeers", 0); //no peers availible
+ } else {
+ prop.put("remoteCrawlPeers", 1);
+ int maxcount = 100;
+ int availcount = 0;
+ yacySeed seed;
+ while ((availcount < maxcount) && (crawlavail.hasMoreElements())) {
+ seed = (yacySeed) crawlavail.nextElement();
+ prop.put("remoteCrawlPeers_available_" + availcount + "_name", seed.getName());
+ prop.put("remoteCrawlPeers_available_" + availcount + "_due", (yacyCore.yacyTime() - seed.available));
+ availcount++;
+ }
+ prop.put("remoteCrawlPeers_available", availcount);
+ int pendicount = 0;
+ while ((pendicount < maxcount) && (crawlpendi.hasMoreElements())) {
+ seed = (yacySeed) crawlpendi.nextElement();
+ prop.put("remoteCrawlPeers_busy_" + pendicount + "_name", seed.getName());
+ prop.put("remoteCrawlPeers_busy_" + pendicount + "_due", (yacyCore.yacyTime() - seed.available));
+ pendicount++;
+ }
+ prop.put("remoteCrawlPeers_busy", pendicount);
+ prop.put("remoteCrawlPeers_num", (availcount + pendicount));
+ }
+
+ }
+
+ // return rewrite properties
+ return prop;
+ }
+
+}
+
+
+
diff --git a/htroot/env/templates/header.template b/htroot/env/templates/header.template
index b1769816b..a9f007a6a 100644
--- a/htroot/env/templates/header.template
+++ b/htroot/env/templates/header.template
@@ -29,7 +29,7 @@