diff --git a/htroot/ConfigPHPBB3Search.html b/htroot/ConfigPHPBB3Search.html
index 39013786c..575258602 100644
--- a/htroot/ConfigPHPBB3Search.html
+++ b/htroot/ConfigPHPBB3Search.html
@@ -6,7 +6,7 @@
#%env/templates/header.template%#
- #%env/templates/submenuPortalIntegration.template%#
+ #%env/templates/submenuIndexCreate.template%#
Integration in phpBB3
It is possible to insert forum pages into the YaCy index using a databse import of forum postings.
diff --git a/htroot/ConfigWikiSearch.html b/htroot/ConfigWikiSearch.html
index d156e9e0e..f4bd8d35b 100644
--- a/htroot/ConfigWikiSearch.html
+++ b/htroot/ConfigWikiSearch.html
@@ -6,7 +6,7 @@
#%env/templates/header.template%#
- #%env/templates/submenuPortalIntegration.template%#
+ #%env/templates/submenuIndexCreate.template%#
Integration in MediaWiki
It is possible to insert wiki pages into the YaCy index using a web crawl on that pages.
diff --git a/htroot/CrawlProfileEditor_p.html b/htroot/CrawlProfileEditor_p.html
index df04fae36..b2ddb86e0 100644
--- a/htroot/CrawlProfileEditor_p.html
+++ b/htroot/CrawlProfileEditor_p.html
@@ -6,7 +6,7 @@
#%env/templates/header.template%#
- #%env/templates/submenuIndexCreate.template%#
+ #%env/templates/submenuCrawlMonitor.template%#
Crawl Profile Editor
Crawl profiles hold information about a specific URL which is internally used to perform the crawl it belongs to.
diff --git a/htroot/IndexCreateLoaderQueue_p.html b/htroot/IndexCreateLoaderQueue_p.html
index c68116386..5257fecf3 100644
--- a/htroot/IndexCreateLoaderQueue_p.html
+++ b/htroot/IndexCreateLoaderQueue_p.html
@@ -6,7 +6,7 @@
#%env/templates/header.template%#
- #%env/templates/submenuIndexCreate.template%#
+ #%env/templates/submenuCrawlMonitor.template%#
Loader Queue
diff --git a/htroot/IndexCreateParserErrors_p.html b/htroot/IndexCreateParserErrors_p.html
index 1cffbc24e..b3e8c60b1 100644
--- a/htroot/IndexCreateParserErrors_p.html
+++ b/htroot/IndexCreateParserErrors_p.html
@@ -6,7 +6,7 @@
#%env/templates/header.template%#
- #%env/templates/submenuIndexCreate.template%#
+ #%env/templates/submenuCrawlMonitor.template%#
Parser Errors
#(rejected)#
diff --git a/htroot/IndexCreateWWWGlobalQueue_p.html b/htroot/IndexCreateWWWGlobalQueue_p.html
index b2c8d8dec..f5258fbc2 100644
--- a/htroot/IndexCreateWWWGlobalQueue_p.html
+++ b/htroot/IndexCreateWWWGlobalQueue_p.html
@@ -6,7 +6,7 @@
#%env/templates/header.template%#
- #%env/templates/submenuIndexCreate.template%#
+ #%env/templates/submenuCrawlMonitor.template%#
Global Crawl Queue
This queue stores the urls that shall be sent to other peers to perform a remote crawl.
diff --git a/htroot/IndexCreateWWWLocalQueue_p.html b/htroot/IndexCreateWWWLocalQueue_p.html
index fbfd4d714..06c82a121 100644
--- a/htroot/IndexCreateWWWLocalQueue_p.html
+++ b/htroot/IndexCreateWWWLocalQueue_p.html
@@ -6,7 +6,7 @@
#%env/templates/header.template%#
- #%env/templates/submenuIndexCreate.template%#
+ #%env/templates/submenuCrawlMonitor.template%#
Local Crawl Queue
This queue stores the urls that shall be crawled localy by this peer.
diff --git a/htroot/IndexCreateWWWRemoteQueue_p.html b/htroot/IndexCreateWWWRemoteQueue_p.html
index cdde8bb54..d771bf227 100644
--- a/htroot/IndexCreateWWWRemoteQueue_p.html
+++ b/htroot/IndexCreateWWWRemoteQueue_p.html
@@ -6,7 +6,7 @@
#%env/templates/header.template%#
- #%env/templates/submenuIndexCreate.template%#
+ #%env/templates/submenuCrawlMonitor.template%#
Remote Crawl Queue
This queue stores the urls that other peers sent to you in order to perform a remote crawl for them.
diff --git a/htroot/IndexImport_p.html b/htroot/IndexImport_p.html
deleted file mode 100644
index 172b7957b..000000000
--- a/htroot/IndexImport_p.html
+++ /dev/null
@@ -1,270 +0,0 @@
-
-
-
-
- YaCy '#[clientname]#': Crawling Queue Import
- #%env/templates/metas.template%#
-
-
-
- #%env/templates/header.template%#
- #%env/templates/submenuIndexControl.template%#
-
- Crawling Queue Import
- #(error)#
- ::
- #[error_msg]#
- ::
- Import Job with the same path already started.
- ::
- #[error_msg]#
- #[error_stackTrace]#
- #(/error)#
-
-
-
- Currently running jobs
-
-
-
-
-
-
-
- #{running.jobs}#
-
- #{/running.jobs}#
-
-
-
-
-
- Last Refresh: #[date]#
-
- Usage Examples:
-
-
-
- Crawling Queue Import:
-
- Example Path: E:\PLASMADB\
-
-
- Requirements:
-
-
- You need to have at least the following directories and files in this path:
-
-
-
- Name |
- Type |
- Writeable |
- Description |
-
-
- crawlProfiles0.db |
- File |
- No |
- Contains data about the crawljob an URL belongs to |
-
-
- urlNotice1.db |
- File |
- Yes |
- The crawling queue |
-
-
- urlNoticeImage0.stack |
- File |
- Yes |
- Various stack files that belong to the crawling queue |
-
- urlNoticeImage0.stack |
- urlNoticeLimit0.stack |
- urlNoticeLocal0.stack |
- urlNoticeMovie0.stack |
- urlNoticeMusic0.stack |
- urlNoticeOverhang0.stack |
- urlNoticeRemote0.stack |
-
-
- #%env/templates/footer.template%#
-
-
\ No newline at end of file
diff --git a/htroot/IndexImport_p.java b/htroot/IndexImport_p.java
deleted file mode 100644
index 327c34dac..000000000
--- a/htroot/IndexImport_p.java
+++ /dev/null
@@ -1,197 +0,0 @@
-//IndexTransfer_p.java
-//-----------------------
-//part of the AnomicHTTPD caching proxy
-//(C) by Michael Peter Christen; mc@yacy.net
-//first published on http://www.anomic.de
-//Frankfurt, Germany, 2005
-//
-//This file is contributed by Martin Thelian
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-//This program is free software; you can redistribute it and/or modify
-//it under the terms of the GNU General Public License as published by
-//the Free Software Foundation; either version 2 of the License, or
-//(at your option) any later version.
-//
-//This program is distributed in the hope that it will be useful,
-//but WITHOUT ANY WARRANTY; without even the implied warranty of
-//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-//GNU General Public License for more details.
-//
-//You should have received a copy of the GNU General Public License
-//along with this program; if not, write to the Free Software
-//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-//You must compile this file with
-//javac -classpath .:../Classes IndexControl_p.java
-//if the shell's current path is HTROOT
-
-import java.io.PrintStream;
-import java.util.Date;
-
-import net.yacy.kelondro.logging.Log;
-import net.yacy.kelondro.util.ByteBuffer;
-import net.yacy.kelondro.util.DateFormatter;
-
-import de.anomic.crawler.Importer;
-import de.anomic.crawler.NoticeURLImporter;
-import de.anomic.http.server.RequestHeader;
-import de.anomic.search.Segment;
-import de.anomic.search.Segments;
-import de.anomic.search.Switchboard;
-import de.anomic.server.serverObjects;
-import de.anomic.server.serverSwitch;
-
-public final class IndexImport_p {
-
- public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
- // return variable that accumulates replacements
- final Switchboard sb = (Switchboard) env;
- final serverObjects prop = new serverObjects();
-
- int activeCount = 0;
-
- // get segment
- Segment indexSegment = null;
- if (post != null && post.containsKey("segment")) {
- String segmentName = post.get("segment");
- if (sb.indexSegments.segmentExist(segmentName)) {
- indexSegment = sb.indexSegments.segment(segmentName);
- }
- } else {
- // take default segment
- indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
- }
-
- if (post != null) {
- if (post.containsKey("startIndexDbImport")) {
- try {
- final boolean startImport = true;
- if (startImport) {
- final Importer importerThread = new NoticeURLImporter(
- sb.queuesRoot,
- sb.crawlQueues,
- sb.crawler.profilesActiveCrawls,
- sb.dbImportManager);
-
- if (importerThread != null) {
- importerThread.setJobID(sb.dbImportManager.generateUniqueJobID());
- importerThread.startIt();
- }
- prop.put("LOCATION","");
- return prop;
- }
- } catch (final Exception e) {
- final ByteBuffer errorMsg = new ByteBuffer(100);
- final PrintStream errorOut = new PrintStream(errorMsg);
- Log.logException(e);
-
- prop.put("error", "3");
- prop.putHTML("error_error_msg",e.toString());
- prop.putHTML("error_error_stackTrace",errorMsg.toString().replaceAll("\n","
"));
-
- errorOut.close();
- }
- } else if (post.containsKey("clearFinishedJobList")) {
- sb.dbImportManager.finishedJobs.clear();
- prop.put("LOCATION", "");
- return prop;
- } else if (
- (post.containsKey("stopIndexDbImport")) ||
- (post.containsKey("pauseIndexDbImport")) ||
- (post.containsKey("continueIndexDbImport"))
- ) {
- // get the job nr of the thread
- final String jobID = post.get("jobNr");
- final Importer importer = sb.dbImportManager.getImporterByID(Integer.valueOf(jobID).intValue());
- if (importer != null) {
- if (post.containsKey("stopIndexDbImport")) {
- try {
- importer.stopIt();
- } catch (final InterruptedException e) {
- // TODO Auto-generated catch block
- Log.logException(e);
- }
- } else if (post.containsKey("pauseIndexDbImport")) {
- importer.pauseIt();
- } else if (post.containsKey("continueIndexDbImport")) {
- importer.continueIt();
- }
- }
- prop.put("LOCATION","");
- return prop;
- }
- }
-
- prop.putNum("wcount", indexSegment.termIndex().sizesMax());
- prop.putNum("ucount", indexSegment.urlMetadata().size());
-
- /*
- * Loop over all currently running jobs
- */
- final Importer[] importThreads = sb.dbImportManager.getRunningImporter();
- activeCount = importThreads.length;
-
- for (int i=0; i < activeCount; i++) {
- final Importer currThread = importThreads[i];
-
- // get import type
- prop.put("running.jobs_" + i + "_type", currThread.getJobType());
-
- // root path of the source db
- final String fullName = currThread.getJobName();
- final String shortName = (fullName.length()>30)?fullName.substring(0,12) + "..." + fullName.substring(fullName.length()-22,fullName.length()):fullName;
- prop.put("running.jobs_" + i + "_fullName",fullName);
- prop.put("running.jobs_" + i + "_shortName",shortName);
-
- // specifies if the importer is still running
- prop.put("running.jobs_" + i + "_stopped", currThread.isStopped() ? "0" : "1");
-
- // specifies if the importer was paused
- prop.put("running.jobs_" + i + "_paused", currThread.isPaused() ? "1" : "0");
-
- // setting the status
- prop.put("running.jobs_" + i + "_runningStatus", currThread.isPaused() ? "2" : currThread.isStopped() ? "0" : "1");
-
- // other information
- prop.putNum("running.jobs_" + i + "_percent", currThread.getProcessingStatusPercent());
- prop.put("running.jobs_" + i + "_elapsed", DateFormatter.formatInterval(currThread.getElapsedTime()));
- prop.put("running.jobs_" + i + "_estimated", DateFormatter.formatInterval(currThread.getEstimatedTime()));
- prop.putHTML("running.jobs_" + i + "_status", currThread.getStatus().replaceAll("\n", "
"));
-
- // job number of the importer thread
- prop.put("running.jobs_" + i + "_job_nr", currThread.getJobID());
- }
- prop.put("running.jobs", activeCount);
-
- /*
- * Loop over all finished jobs
- */
- final Importer[] finishedJobs = sb.dbImportManager.getFinishedImporter();
- for (int i=0; i30)?fullName.substring(0,12) + "..." + fullName.substring(fullName.length()-22,fullName.length()):fullName;
- prop.put("finished.jobs_" + i + "_type", currThread.getJobType());
- prop.put("finished.jobs_" + i + "_fullName", fullName);
- prop.put("finished.jobs_" + i + "_shortName", shortName);
- if (error != null) {
- prop.put("finished.jobs_" + i + "_runningStatus", "1");
- prop.putHTML("finished.jobs_" + i + "_runningStatus_errorMsg", error.replaceAll("\n", "
"));
- } else {
- prop.put("finished.jobs_" + i + "_runningStatus", "0");
- }
- prop.putNum("finished.jobs_" + i + "_percent", currThread.getProcessingStatusPercent());
- prop.put("finished.jobs_" + i + "_elapsed", DateFormatter.formatInterval(currThread.getElapsedTime()));
- prop.putHTML("finished.jobs_" + i + "_status", currThread.getStatus().replaceAll("\n", "
"));
- }
- prop.put("finished.jobs",finishedJobs.length);
-
- prop.put("date",(new Date()).toString());
- return prop;
- }
-}
diff --git a/htroot/ProxyIndexingMonitor_p.html b/htroot/ProxyIndexingMonitor_p.html
index 7cd786a5f..865fd1c31 100644
--- a/htroot/ProxyIndexingMonitor_p.html
+++ b/htroot/ProxyIndexingMonitor_p.html
@@ -6,14 +6,13 @@
#%env/templates/header.template%#
+ #%env/templates/submenuIndexCreate.template%#
Indexing with Proxy
- This is the control page for web pages that your peer has indexed during the current application run-time
- as result of proxy fetch/prefetch.
- No personal or protected page is indexed;
+ YaCy can be used to 'scrape' content from pages that pass the integrated caching HTTP proxy.
+ When scraping proxy pages then no personal or protected page is indexed;
those pages are detected by properties in the HTTP header (like Cookie-Use, or HTTP Authorization)
- or by POST-Parameters (either in URL or as HTTP protocol)
- and automatically excluded from indexing.
+ or by POST-Parameters (either in URL or as HTTP protocol) and automatically excluded from indexing.