|
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
|
|
<head>
|
|
|
|
<title>YaCy '#[clientname]#': Crawl Start (easy)</title>
|
|
|
|
#%env/templates/metas.template%#
|
|
|
|
<script type="text/javascript" src="/js/ajax.js"></script>
|
|
|
|
<script type="text/javascript" src="/js/IndexCreate.js"></script>
|
|
|
|
</head>
|
|
|
|
<body id="IndexCreate">
|
|
|
|
#%env/templates/header.template%#
|
|
|
|
#%env/templates/submenuIndexCreate.template%#
|
|
|
|
<h2>Easy Crawl Start</h2>
|
|
|
|
|
|
|
|
<p id="startCrawling">
|
|
|
|
<strong>Start Crawling Job:</strong>
|
|
|
|
You can define URLs as start points for Web page crawling and start crawling here.
|
|
|
|
"Crawling" means that YaCy will download the given website, extract all links in it
|
|
|
|
and then download the content behind these links.
|
|
|
|
This is repeated as long as specified under "Crawling Depth".
|
|
|
|
</p>
|
|
|
|
|
|
|
|
<form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data">
|
|
|
|
<input type="hidden" name="crawlingFilter" value=".*" />
|
|
|
|
<input type="hidden" name="crawlingIfOlderCheck" value="off" />
|
|
|
|
<input type="hidden" name="crawlingDomFilterCheck" value="off" />
|
|
|
|
<input type="hidden" name="crawlingDomMaxCheck" value="off" />
|
|
|
|
<input type="hidden" name="crawlingQ" value="off" />
|
|
|
|
<input type="hidden" name="storeHTCache" value="on" />
|
|
|
|
<input type="hidden" name="indexText" value="on" />
|
|
|
|
<input type="hidden" name="indexMedia" value="on" />
|
|
|
|
<input type="hidden" name="crawlOrder" value="on" />
|
|
|
|
<input type="hidden" name="intention" value="simple web crawl" />
|
|
|
|
<input type="hidden" name="xsstopw" value="off" />
|
|
|
|
<table border="0" cellpadding="5" cellspacing="1">
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<td><strong>Attribut</strong></td>
|
|
|
|
<td><strong>Value</strong></td>
|
|
|
|
<td><strong>Description</strong></td>
|
|
|
|
</tr>
|
|
|
|
<tr valign="top" class="TableCellSummary">
|
|
|
|
<td>Starting Point:</td>
|
|
|
|
<td>
|
|
|
|
<input name="crawlingURL" type="text" size="41" maxlength="256" value="http://" onkeypress="changed()" />
|
|
|
|
<span id="robotsOK"></span><br>
|
|
|
|
<span id="title"><br/></span>
|
|
|
|
<img src="/env/grafics/empty.gif" name="ajax" alt="empty" />
|
|
|
|
</td>
|
|
|
|
<td>
|
|
|
|
Enter here the start url of the web crawl.
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr valign="top" class="TableCellLight">
|
|
|
|
<td><label for="crawlingDepth">Crawling Depth</label>:</td>
|
|
|
|
<td><input name="crawlingDepth" id="crawlingDepth" type="text" size="2" maxlength="2" value="#[crawlingDepth]#" /></td>
|
|
|
|
<td>
|
|
|
|
The depth defines how deep the Crawler will follow links of links (...) and so on.
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
|
|
|
|
<tr valign="top" class="TableCellLight">
|
|
|
|
<td colspan="3"><input type="submit" name="crawlingstart" value="Start New Distributed Crawl (will be visible at other peers)" /></td>
|
|
|
|
</tr>
|
|
|
|
</table>
|
|
|
|
</form>
|
|
|
|
|
|
|
|
<p id="crawlingStarts"><strong>Recently started remote crawls in progress:</strong></p>
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1">
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<td><strong>Start Time</strong></td>
|
|
|
|
<td><strong>Peer Name</strong></td>
|
|
|
|
<td><strong>Start URL</strong></td>
|
|
|
|
<td><strong>Intention/Description</strong></td>
|
|
|
|
<td><strong>Depth</strong></td>
|
|
|
|
<td><strong>Accept '?' URLs</strong></td>
|
|
|
|
</tr>
|
|
|
|
#{otherCrawlStartInProgress}#
|
|
|
|
<tr class="TableCell#(dark)#Light::Dark#(/dark)#" >
|
|
|
|
<td>#[cre]#</td>
|
|
|
|
<td>#[peername]#</td>
|
|
|
|
<td><a href="#[startURL]#">#[startURL]#</a></td>
|
|
|
|
<td>#[intention]#</td>
|
|
|
|
<td>#[generalDepth]#</td>
|
|
|
|
<td>#(crawlingQ)#no::yes#(/crawlingQ)#</td>
|
|
|
|
</tr>
|
|
|
|
#{/otherCrawlStartInProgress}#
|
|
|
|
</table>
|
|
|
|
<p><strong>Recently started remote crawls, finished:</strong></p>
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1">
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<td><strong>Start Time</strong></td>
|
|
|
|
<td><strong>Peer Name</strong></td>
|
|
|
|
<td><strong>Start URL</strong></td>
|
|
|
|
<td><strong>Intention/Description</strong></td>
|
|
|
|
<td><strong>Depth</strong></td>
|
|
|
|
<td><strong>Accept '?' URLs</strong></td>
|
|
|
|
</tr>
|
|
|
|
#{otherCrawlStartFinished}#
|
|
|
|
<tr class="TableCell#(dark)#Light::Dark#(/dark)#" >
|
|
|
|
<td>#[cre]#</td>
|
|
|
|
<td>#[peername]#</td>
|
|
|
|
<td><a href="#[startURL]#">#[startURL]#</a></td>
|
|
|
|
<td>#[intention]#</td>
|
|
|
|
<td>#[generalDepth]#</td>
|
|
|
|
<td>#(crawlingQ)#no::yes#(/crawlingQ)#</td>
|
|
|
|
</tr>
|
|
|
|
#{/otherCrawlStartFinished}#
|
|
|
|
</table>
|
|
|
|
<p id="remoteCrawlPeers"><strong>Remote Crawling Peers:</strong> </p>
|
|
|
|
#(remoteCrawlPeers)#
|
|
|
|
<p>No remote crawl peers available.</p>
|
|
|
|
::
|
|
|
|
<p>#[num]# peers available for remote crawling.</p>
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1">
|
|
|
|
<colgroup>
|
|
|
|
<col width="60" />
|
|
|
|
<col />
|
|
|
|
</colgroup>
|
|
|
|
<tr class="TableCellDark">
|
|
|
|
<th>Idle Peers</th>
|
|
|
|
<td>
|
|
|
|
#{available}##[name]# (#[due]# seconds due) #{/available}#
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<th>Busy Peers</th>
|
|
|
|
<td>
|
|
|
|
#{busy}##[name]# (#[due]# seconds due) #{/busy}#
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
</table>
|
|
|
|
#(/remoteCrawlPeers)#
|
|
|
|
|
|
|
|
#%env/templates/footer.template%#
|
|
|
|
</body>
|
|
|
|
</html>
|