diff --git a/htroot/ConfigRobotsTxt_p.html b/htroot/ConfigRobotsTxt_p.html index 91f2b5045..472e85d1a 100644 --- a/htroot/ConfigRobotsTxt_p.html +++ b/htroot/ConfigRobotsTxt_p.html @@ -17,7 +17,7 @@ ::

Unable to access the local file: #[msg]#

::

Deletion of htroot/robots.txt failed

#(/error)#
-
Restrict access for
+
Restrict access for
@@ -43,7 +43,7 @@
 
-
+
#%env/templates/footer.template%# diff --git a/htroot/CrawlURLFetchStack_p.html b/htroot/CrawlURLFetchStack_p.html index 28c1f2cf0..152c1a683 100644 --- a/htroot/CrawlURLFetchStack_p.html +++ b/htroot/CrawlURLFetchStack_p.html @@ -12,16 +12,16 @@ #(addedUrls)#::Added #[added]# URLs!#(/addedUrls)#
Statistics -
+
Currently stacked URLs:
#[urlCount]#
Totally fetched / added URLs:
#[totalFetched]# / #[totalAdded]#
#{peers}#
Fetched from #[peer]#
#[amount]#
#{/peers}# -
+
Settings -
+
:
@@ -30,11 +30,11 @@ Set max. size for each transfer to #[value]#:: Setting max. size for each transfer to #[value]# was unsuccessful: may not be negative#(/set)#
-
+
Add URLs to stack -
+
:
@@ -60,7 +60,7 @@ Added #[added]# and rejected #[failed]# URLs from uploaded file successfully:: An internal error occured processing the uploaded file: #[error]##(/upload)#
-
+
#%env/templates/footer.template%# diff --git a/htroot/CrawlURLFetch_p.html b/htroot/CrawlURLFetch_p.html index 60f15f004..251d4ea9f 100644 --- a/htroot/CrawlURLFetch_p.html +++ b/htroot/CrawlURLFetch_p.html @@ -15,7 +15,7 @@ The Re-Crawl option isn't used and the sites won't be stored in the Proxy Cache. Text and media types will be indexed. Since these URLs will be requested explicitely from another peer, they won't be distributed for remote indexing.

-
+
:
@@ -62,7 +62,7 @@ #(freqError)#:: Invalid period, fetching only once#(/freqError)#
-
+ @@ -73,7 +73,7 @@ #(runs)#::
Thread to fetch URLs is #(status)#running::stopped::paused#(/status)# -
+
Total runs:
#[totalRuns]#
Total fetched URLs:
#[totalFetchedURLs]#
Total failed URLs:
#[totalFailedURLs]#
@@ -98,7 +98,7 @@ #(/status)# -
+
#(/runs)# diff --git a/htroot/DetailedSearch.html b/htroot/DetailedSearch.html index b0b47e44b..78683e853 100644 --- a/htroot/DetailedSearch.html +++ b/htroot/DetailedSearch.html @@ -18,7 +18,7 @@
Query Attributes -
+
@@ -29,27 +29,27 @@
-
+
Pre-Ranking -
#{attrPre}# +
#{attrPre}#
:
#{select}# #{/select}# #[value]#
#{/attrPre}# -
+
Post-Ranking -
#{attrPost}# +
#{attrPost}#
#{select}# #{/select}# #[value]#
#{/attrPost}# -
+
diff --git a/htroot/IndexCreate_p.html.new-layout b/htroot/IndexCreate_p.html.new-layout new file mode 100644 index 000000000..2bf9a7999 --- /dev/null +++ b/htroot/IndexCreate_p.html.new-layout @@ -0,0 +1,355 @@ + + + + YaCy '#[clientname]#': Index Creation + #%env/templates/metas.template%# + + + + + #%env/templates/header.template%# + #%env/templates/submenuIndexCreate.template%# +

Index Creation

+ +

+ Start Crawling Job:  + You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth". +

+ +
+
Crawling Depth +

+ This defines how often the Crawler will follow links embedded in websites.
+ A minimum of 0 is recommended and means that the page you enter under "Starting Point" will be added to + the index, but no linked content is indexed. 2-4 is good for normal indexing. + Be careful with the depth. Consider a branching factor of average 20; + A prefetch-depth of 8 would index 25.600.000.000 pages, maybe this is the whole WWW. +

+
+
:
+
+
+
+ +
Crawling Filter +

+ This is an emacs-like regular expression that must match with the URLs which are used to be crawled. + Use this i.e. to crawl a single domain. If you set this filter it makes sense to increase + the crawling depth. +

+
+
:
+
+
+
+ +
Re-Crawl Option +

+ If you use this option, web pages that are already existent in your database are crawled and indexed again. + It depends on the age of the last crawl if this is done or not: if the last crawl is older than the given + date, the page is crawled again, otherwise it is treated as 'double' and not loaded or indexed again. +

+
+
:
+
+
:
+
+ + +
+
+
+ +
Auto-Dom-Filter +

+ This option will automatically create a domain-filter which limits the crawl on domains the crawler + will find on the given depth. You can use this option i.e. to crawl a page with bookmarks while + restricting the crawl on only those domains that appear on the bookmark-page. The adequate depth + for this example would be 1.
+ The default value 0 gives no restrictions. +

+
+
:
+
+
:
+
+
+
+ +
Maximum Pages per Domain +

+ You can limit the maxmimum number of pages that are fetched and indexed from a single domain with this option. + You can combine this limitation with the 'Auto-Dom-Filter', so that the limit is applied to all the domains within + the given depth. Domains outside the given depth are then sorted-out anyway. +

+
+
:
+
+
:
+
+
+
+ +
Accept dynamic URLs +

+ A questionmark is usually a hint for a dynamic page. URLs pointing to dynamic content should usually not be crawled. However, there are sometimes web pages with static content that + is accessed with URLs containing question marks. If you are unsure, do not check this to avoid crawl loops. +

+
+
:
+
+
+
+ +
Store to Proxy Cache +

+ This option is used by default for proxy prefetch, but is not needed for explicit crawling. + We recommend to leave this switched off unless you want to control the crawl results with the + Cache Monitor. +

+
+
:
+
+
+
+ +
Local Indexing +

+ This enables indexing of the wepages the crawler will download. This should be switched on by default, unless you want to crawl only to fill the + Proxy Cache without indexing. +

+
+
:
+
+
:
+
+
+
+ +
Remote Indexing +

+ If checked, the crawler will contact other peers and use them as remote indexers for your crawl. + If you need your crawling results locally, you should switch this off. + Only senior and principal peers can initiate or receive remote crawls. + A YaCyNews message will be created to inform all peers about a global crawl, so they can omit starting a crawl with the same start point. +

+
+
:
+
+
:
+
+ + This message will appear in the 'Other Peer Crawl Start' table of other peers. +
+
+
+ +
Exclude static Stop-Words +

+ This can be useful to circumvent that extremely common words are added to the database, i.e. "the", "he", "she", "it"... + To exclude all words given in the file yacy.stopwords from indexing, check this box. +

+
+
:
+
+
+
+ + +
Starting Point +

+ Existing start URLs are re-crawled. + Other already visited URLs are sorted out as "double". + A complete re-crawl will be available soon. +

+
+
:
+
+ + +
+
:
+
+ + +
+
 
+
+ + +
+
+
+ +
+ +
+
+ +
+

+ Distributed Indexing: + Crawling and indexing can be done by remote peers. + Your peer can search and index for other peers and they can search for you. +

+ + + + + + + + + + + + + + + + + + + + + + +
+ + + Accept remote crawling requests and perform crawl at maximum load +
+ + + Accept remote crawling requests and perform crawl at maximum of + Pages Per Minute (minimum is 1, low system load usually at PPM ≥ 30) +
+ + + Do not accept remote crawling requests (please set this only if you cannot accept to crawl only one page per minute; see option above) +
+ + +
+
+ +

+ #(info)# + :: + Crawling paused successfully. + :: + Continue crawling. + #(/info)# +

+ + #(refreshbutton)# + :: +
+
+ +
+
+ #(/refreshbutton)# +
+
+ #(crawler-paused)# + + :: + + #(/crawler-paused)# +
+
+ +

Recently started remote crawls in progress:

+ + + + + + + + + + #{otherCrawlStartInProgress}# + + + + + + + + + #{/otherCrawlStartInProgress}# +
Start TimePeer NameStart URLIntention/DescriptionDepthAccept '?' URLs
#[cre]##[peername]##[startURL]##[intention]##[generalDepth]##(crawlingQ)#no::yes#(/crawlingQ)#
+

Recently started remote crawls, finished:

+ + + + + + + + + + #{otherCrawlStartFinished}# + + + + + + + + + #{/otherCrawlStartFinished}# +
Start TimePeer NameStart URLIntention/DescriptionDepthAccept '?' URLs
#[cre]##[peername]##[startURL]##[intention]##[generalDepth]##(crawlingQ)#no::yes#(/crawlingQ)#
+

Remote Crawling Peers: 

+ #(remoteCrawlPeers)# +

No remote crawl peers available.

+ :: +

#[num]# peers available for remote crawling.

+ + + + + + + + + + + + + +
Idle Peers + #{available}##[name]# (#[due]# seconds due)   #{/available}# +
Busy Peers + #{busy}##[name]# (#[due]# seconds due)  #{/busy}# +
+ #(/remoteCrawlPeers)# + + #%env/templates/footer.template%# + + diff --git a/htroot/LogStatistics_p.html b/htroot/LogStatistics_p.html index 130fef3c5..b68bef165 100644 --- a/htroot/LogStatistics_p.html +++ b/htroot/LogStatistics_p.html @@ -15,7 +15,7 @@
DHT
- URLs
+ URLs
URLs Received:
#[urlSum]#
URLs Requested:
#[urlReqSum]#
@@ -23,10 +23,10 @@
Total time:
#[urlTimeSum]# #[urlTimeSumUnit]#
URLs Sent:
#[DHTSendURLs]#
-
+
- RWIs / Words
+ RWIs / Words
Words received:
#[wordsSum]#
RWIs reveived:
#[rwiSum]#
@@ -34,11 +34,11 @@
Total time:
#[rwiTimeSum]# #[rwiTimeSumUnit]#
RWIs selected:
#[DHTSelectionWordsCount]#
Selection time:
#[DHTSelectionWordsTimeCount]#
-
+
- Chunks / Protocol
+ Chunks / Protocol
Sent traffic:
#[DHTSendTraffic]# #[DHTSendTrafficUnit]#
Total peers selected:
#[DHTSelectionTargetCount]#
@@ -65,19 +65,19 @@ #(/useDHTPeers)# -
+
-
Ranking Distribution
+
Ranking Distribution
Submitted ranking files:
#[rankingDistributionCount]#
Total time submitting ranking files:
#[rankingDistributionTime]#
Failed ranking distributions:
#[rankingDistributionFailCount]#
-
+
-
Indexing
+
Indexing
Indexed sites:
#[indexedSites]#
Size of indexed sites:
#[indexedSiteSizeSum]# #[indexedSiteSizeSumUnit]#
@@ -87,15 +87,15 @@
Total parsing time:
#[indexedParsingTime]# #[indexedParsingTimeUnit]#
Total indexing time:
#[indexedIndexingTime]# #[indexedIndexingTimeUnit]#
Total storage time:
#[indexedStorageTime]# #[indexedStorageTimeUnit]#
-
+
-
Errors
+
Errors
Tried to create left child node twice
#[leftChildTwiceCount]#
Tried to create right child node twice
#[rightChildTwiceCount]#
Malformed URL Exception
#[malformedURLCount]#
-
+
#(/results)# diff --git a/htroot/Messages_p.html b/htroot/Messages_p.html index 1ea863579..e3111c64f 100644 --- a/htroot/Messages_p.html +++ b/htroot/Messages_p.html @@ -11,7 +11,7 @@ #(peersKnown)#::
Compose Message -
+
:
-
+
#(/peersKnown)# #(mode)# diff --git a/htroot/User_p.html b/htroot/User_p.html index a31f5cf40..5a49bc587 100644 --- a/htroot/User_p.html +++ b/htroot/User_p.html @@ -20,7 +20,7 @@ #(/error)#
-
Select user
+
Select user
:
@@ -34,12 +34,12 @@
-
+
-
Edit current user: #[username]#
+
Edit current user: #[username]#
@@ -71,6 +71,6 @@
 
-
+
diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html index 8a2f10343..6db33bec0 100644 --- a/htroot/ViewFile.html +++ b/htroot/ViewFile.html @@ -10,7 +10,7 @@ #(error)#
-
View URL Content
+
View URL Content
URL:
#[url]#
Hash:
#[hash]#
@@ -29,7 +29,7 @@ -
+
diff --git a/htroot/Wiki.java b/htroot/Wiki.java index c0f52f0c1..2e949ed73 100644 --- a/htroot/Wiki.java +++ b/htroot/Wiki.java @@ -130,7 +130,7 @@ public class Wiki { HashMap map = new HashMap(); map.put("page", pagename); map.put("author", author.replace(',', ' ')); - if (!page.page().equals(content)) + if (post.get("content", "").trim().length() > 0 && !page.page().equals(content)) yacyCore.newsPool.publishMyNews(new yacyNewsRecord("wiki_upd", map)); page = newEntry; prop.put("LOCATION", "/Wiki.html?page=" + pagename);