Start Crawling Job:
-You can define URLs as start points for Web page crawling and start that crawling here.
+You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth".
-
Existing start URL's are re-crawled.
- Other already visited URL's are sorted out as 'double'.
+
Existing start URLs are re-crawled.
+ Other already visited URLs are sorted out as 'double'.
A complete re-crawl will be available soon.
@@ -146,7 +147,7 @@ Your peer can search and index for other peers and they can search for you.
Accept remote crawling requests and perform crawl at maximum of
- Pages Per Minute (minimum is 1, low system load at PPM <= 30)
+ Pages Per Minute (minimum is 1, low system load usually at PPM <= 30)
@@ -165,17 +166,17 @@ Your peer can search and index for other peers and they can search for you.
#(error)#
::
-Error with profile management. Please stop yacy, delete the File DATA/PLASMADB/crawlProfiles0.db and restart.
+Error with profile management. Please stop YaCy, delete the file DATA/PLASMADB/crawlProfiles0.db and restart.
::
Error: #[errmsg]#
::
Application not yet initialized. Sorry. Please wait some seconds and repeat the request.
::
-ERROR: Crawl filter "#[newcrawlingfilter]#" does not match with crawl root "#[crawlingStart]#". Please try again with different filter
+ERROR: Crawl filter "#[newcrawlingfilter]#" does not match with crawl root "#[crawlingStart]#". Please try again with different filter.
::
Crawling of "#[crawlingURL]#" failed. Reason: #[reasonString]#
::
-Error with url input "#[crawlingStart]#": #[error]#
+Error with URL input "#[crawlingStart]#": #[error]#
::
Error with file input "#[crawlingStart]#": #[error]#
#(/error)#
@@ -192,13 +193,13 @@ You can monitor the crawling progress either by watching the URL queues
indexing queue)
or see the fill/process count of all queues on the
performance page.
-Please wait some seconds, because the request is enqueued and delayed until the http server is idle for a certain time.
-The indexing result is presented on the
+Please wait some seconds, because the request is enqueued and delayed until the proxy/HTTP-server is idle for a certain time.
+The indexing results are presented on the
Index Monitor-page.
It will take at least 30 seconds until the first result appears there. Please be patient, the crawling will pause each time you use the proxy or web server to ensure maximum availability.
If you crawl any un-wanted pages, you can delete them here.
::
-Removed #[numEntries]# entries from crawl queue. This queue may fill again if the loading and indexing queue is not empty
+Removed #[numEntries]# entries from crawl queue. This queue may fill again if the loading and indexing queue is not empty.
::
Crawling paused successfully.
::
@@ -227,7 +228,7 @@ Continue crawling.
Start URL
Depth
Filter
-
Accept '?'
+
Accept '?' URLs
Fill Proxy Cache
Local Indexing
Remote Indexing
@@ -254,7 +255,7 @@ Continue crawling.
Start URL
Intention/Description
Depth
-
Accept '?'
+
Accept '?' URLs
#{otherCrawlStartInProgress}#
@@ -276,7 +277,7 @@ Continue crawling.
Start URL
Intention/Description
Depth
-
Accept '?'
+
Accept '?' URLs
#{otherCrawlStartFinished}#
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 9267cea40..6939df82c 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -679,10 +679,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
this.log.logFine("Unknown host in URL '" + entry.url + "'. Will not be indexed.");
doIndexing = false;
} else if (hostAddress.isSiteLocalAddress()) {
- this.log.logFine("Host in URL '" + entry.url + "' has private ip address.. Will not be indexed.");
+ this.log.logFine("Host in URL '" + entry.url + "' has private ip address. Will not be indexed.");
doIndexing = false;
} else if (hostAddress.isLoopbackAddress()) {
- this.log.logFine("Host in URL '" + entry.url + "' has loopback ip address.. Will not be indexed.");
+ this.log.logFine("Host in URL '" + entry.url + "' has loopback ip address. Will not be indexed.");
doIndexing = false;
}
@@ -733,7 +733,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public void close() {
log.logConfig("SWITCHBOARD SHUTDOWN STEP 1: sending termination signal to managed threads:");
terminateAllThreads(true);
- log.logConfig("SWITCHBOARD SHUTDOWN STEP 2: sending termination signal to threaded indexing (stand by..)");
+ log.logConfig("SWITCHBOARD SHUTDOWN STEP 2: sending termination signal to threaded indexing (stand by...)");
int waitingBoundSeconds = Integer.parseInt(getConfig("maxWaitingWordFlush", "120"));
wordIndex.close(waitingBoundSeconds);
log.logConfig("SWITCHBOARD SHUTDOWN STEP 3: sending termination signal to database manager");
@@ -1607,7 +1607,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// fetch snippets
//if (query.domType != plasmaSearchQuery.SEARCHDOM_GLOBALDHT) snippetCache.fetch(acc.cloneSmart(), query.queryHashes, query.urlMask, 10, 1000);
- log.logFine("SEARCH TIME AFTER ORDERING OF SEARCH RESULT: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
+ log.logFine("SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
// result is a List of urlEntry elements: prepare answer
if (acc == null) {