You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/htroot/CrawlStart_p.html

109 lines
5.8 KiB

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Crawl Start</title>
#%env/templates/metas.template%#
<script type="text/javascript" src="/js/ajax.js"></script>
<script type="text/javascript" src="/js/IndexCreate.js"></script>
<script type="text/javascript">
function check(key){
document.getElementById(key).checked = 'checked';
}
</script>
<style type="text/css">
.nobr {
white-space: nowrap;
}
</style>
</head>
<body id="IndexCreate">
#%env/templates/header.template%#
#%env/templates/submenuIndexCreate.template%#
<h2>Site Crawling</h2>
<p id="startCrawling">
<strong>Site Crawler:</strong>&nbsp;
Download all web pages from a given domain or base URL.
</p>
<fieldset>
<legend>
<label>Site Crawl Start</label>
</legend>
<form name="Crawler" id="Crawler" method="post" action="Crawler_p.html" enctype="multipart/form-data" accept-charset="UTF-8">
<dl>
<dt><label>Site</label></dt>
<dd>
<table border="0" cellpadding="0" cellspacing="0"><tr valign="top">
<td valign="top"><input type="radio" name="crawlingMode" id="url" value="url" checked="checked" />Start URL</td>
<td valign="top">
<input name="crawlingURL" type="text" size="50" maxlength="256" value="#[starturl]#" onkeypress="changed()" onfocus="check('url')" style="font-size:16px"/><br/>
<input name="bookmarkTitle" id="bookmarkTitle" type="text" size="50" maxlength="256" value="" readonly="readonly" style="background:transparent; border:0px"/>
</td>
<td>
<span id="robotsOK"></span>
<img align="top" src="/env/grafics/empty.gif" name="ajax" alt="empty" />
</td></tr><tr>
<td><input type="radio" name="crawlingMode" id="sitemap" value="sitemap" disabled="disabled"/>Sitemap URL</td>
<td><input name="sitemapURL" type="text" size="41" maxlength="256" value="" readonly="readonly" style="background:transparent; border:0px"/></td>
</tr></table><br/>
</dd>
<input type="hidden" name="crawlingDepth" id="crawlingDepth" value="99">
<dt><label>Scheduler</label></dt>
<dd>
<input type="radio" name="recrawl" value="nodoubles" #(crawlingIfOlderCheck)#checked="checked"::#(/crawlingIfOlderCheck)#/>run this crawl once<br/>
<input type="radio" name="recrawl" value="scheduler"/>scheduled, repeat the crawl every
<select name="repeat_time">
<option value="1">1</option><option value="2">2</option><option value="3">3</option>
<option value="4">4</option><option value="5">5</option><option value="6">6</option>
<option value="7" selected="selected">7</option>
<option value="8">8</option><option value="9">9</option><option value="10">10</option>
<option value="12">12</option><option value="14">14</option><option value="21">21</option>
<option value="28">28</option><option value="30">30</option>
</select>
<select name="repeat_unit">
<option value="selminutes">minutes</option>
<option value="selhours">hours</option>
<option value="seldays" selected="selected">days</option>
</select> automatically.
</dd>
<dt><label>Path in Domain</label></dt>
<dd>
<input type="radio" name="range" value="domain" checked="checked"/>full domain<br />
<input type="radio" name="range" value="subpath" />only sub-path of given url
</dd>
<input type="hidden" name="mustnotmatch" id="mustnotmatch" value="">
<input type="hidden" name="crawlingDomFilterCheck" id="crawlingDomFilterCheck" value="off">
<input type="hidden" name="crawlingDomFilterDepth" id="crawlingDomFilterDepth" value="#[crawlingDomFilterDepth]#">
<dt><label>Limitation</label></dt>
<dd><table border="0" cellpadding="0" cellspacing="0"><tr valign="top"><tr>
<td valign="top"><input type="checkbox" name="crawlingDomMaxCheck" id="crawlingDomMaxCheck" #(crawlingDomMaxCheck)#::checked="checked"#(/crawlingDomMaxCheck)# /> not more than </td>
<td valign="top"><input name="crawlingDomMaxPages" id="crawlingDomMaxPages" type="text" size="6" maxlength="6" value="#[crawlingDomMaxPages]#" /></td>
<td valign="top">documents</td>
</tr></table>
</dd>
<dt><label>Dynamic URLs</label></dt>
<dd><input type="checkbox" name="crawlingQ" id="crawlingQ" #(crawlingQChecked)#::checked="checked"#(/crawlingQChecked)# /> allow '?' in path
</dd>
<input type="hidden" name="storeHTCache" id="storeHTCache" value="on">
<input type="hidden" name="cachePolicy" id="cachePolicy" value="iffresh">
<input type="hidden" name="indexText" id="indexText" value="on">
<input type="hidden" name="indexMedia" id="indexMedia" value="on">
<input type="hidden" name="intention" id="intention" value="">
<input type="hidden" name="xsstopw" id="xsstopw" value="off">
<input type="hidden" name="xdstopw" id="xdstopw" value="off">
<input type="hidden" name="xpstopw" id="xpstopw" value="off">
<input type="hidden" name="createBookmark" id="createBookmark" value="off">
<dt></dt><dd></dd><dt></dt><dd></dd>
<dt><label>Start</label></dt>
<dd><input type="submit" name="crawlingstart" value="Start New Crawl" />
</dd>
</dl>
</form>
</fieldset>
#%env/templates/footer.template%#
</body>
</html>