- fix for last commit

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3366 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
karlchenofhell 18 years ago
parent c5a2ba3a23
commit 67d96249b4

@ -12,18 +12,22 @@
<p>
The newly added URLs will be crawled without any filter restricions except of the <em>static</em> stop-words.
The Re-Crawl option isn't used and the sites won't be stored in the Proxy Cache. Text and media types will be indexed.
Since these URLs are explicitely requested from another peer, they won't be distributed for remote indexing.
Since these URLs will be requested explicitely from another peer, they won't be distributed for remote indexing.
</p>
<dl>
<dt><label for="url">Fetch from URL</label>:</dt>
<dd>
<input type="radio" name="source" value="url" id="url" checked="checked" />
<input type="text" id="host" name="host" size="60" value="#[host]#" />
#(saved)#::<br />
Or select previously entered URL: <select name="savedURL">#{urls}#
<option>#[url]#</option>
</select>#(/saved)#
#(hostError)#::&nbsp;<span class="error">Malformed URL</span>#(/hostError)#
#(saved)#::
</dd>
<dt><label for="savedURL">Or select previously entered URL</label>:</dt>
<dd>
<input type="radio" name="source" id="savedURL" value="saved" />
<select name="saved">#{urls}#
<option>#[url]#</option>#{/urls}#
</select>#(/saved)#
</dd>
#(peersKnown)#::
@ -56,36 +60,39 @@
<dt><input type="submit" name="start" value="Fetch URLs" /></dt>
</dl>
</fieldset>
</form>
#(threadError)#::
<span class="error">Error on stopping thread, it isn't alive anymore</span>::
<span class="error">Error on restarting thread, it isn't alive anymore</span>#(/threadError)#
#(threadError)#::
<span class="error">Error on stopping thread, it isn't alive anymore</span>::
<span class="error">Error on restarting thread, it isn't alive anymore</span>#(/threadError)#
#(runs)#::
#(runs)#::
<form method="post" action="CrawlURLFetch_p.html" enctype="multipart/form-data">
<fieldset><legend>Thread to fetch URLs is #(status)#running::stopped::paused#(/status)#</legend>
<dl>
<dt>Total runs:</dt><dd>#[totalRuns]#</dd>
<dt>Last run duration:</dt><dd>#[lastRun]#</dd>
<dt>Last server response:</dt><dd>#[lastServerResponse]#</dd>
<dt>Total fetched URLs:</dt><dd>#[totalFetchedURLs]#</dd>
<dt>Total failed URLs:</dt><dd>#[totalFailedURLs]#</dd>
<dt>Last run duration:</dt><dd>#[lastRun]# ms</dd>
<dt>Last server response:</dt><dd>#[lastServerResponse]#</dd>
<dt>Last fetched URLs:</dt><dd>#[lastFetchedURLs]#</dd>
<dt>Failed URLs:</dt>
<dd>
#[error]#
<ul>#{error}#
<li><span class="error">#[reason]#</span>: #[url]#</li>#{/error}#
<li><span class="error">#[reason]#</span>: <a href="#[url]#">#[url]#</a></li>#{/error}#
</ul>
</dd>
<dt>#(status)#
<input type="submit" name="stop" value="Stop Thread" />::
<input type="submit" name="restart" value="Restart Thread" />::
<input type="submit" name="resume" value="Resume Thread" />#(/status)#
<input type="submit" name="stop" value="Stop Thread" />
<input type="submit" name="restart" value="Restart Thread" />#(/status)#
</dt>
</dl>
</fieldset>
#(/runs)#
</form>
#(/runs)#
#%env/templates/footer.template%#
</body>
</html>

@ -74,6 +74,7 @@ public class CrawlURLFetch_p {
}
}
if (fetcher != null) fetcher.interrupt();
fetcher = null;
if (post.get("source", "").equals("peer") &&
post.get("peerhash", "").equals("random")) {
@ -93,6 +94,12 @@ public class CrawlURLFetch_p {
prop.put("host", post.get("host", ""));
prop.put("hostError", ERR_HOST_MALFORMED_URL);
}
} else if (post.get("source", "").equals("savedURL")) {
try {
url = new URL(post.get("saved", ""));
} catch (MalformedURLException e) {
/* should never appear, except for invalid input, see above */
}
} else if (post.get("source", "").equals("peer")) {
yacySeed ys = null;
try {
@ -128,8 +135,9 @@ public class CrawlURLFetch_p {
prop.put("threadError", ERR_THREAD_STOP);
}
}
else if (post.containsKey("restart") || post.containsKey("resume")) {
else if (post.containsKey("restart")) {
if (fetcher != null) {
fetcher.interrupt();
if (fetcher.url == null) {
fetcher = new URLFetcher(
env,
@ -151,8 +159,9 @@ public class CrawlURLFetch_p {
if (fetcher != null) {
prop.put("runs", 1);
prop.put("runs_status", (fetcher.isAlive()) ? STAT_THREAD_ALIVE :
(fetcher.paused) ? STAT_THREAD_PAUSED : STAT_THREAD_STOPPED);
prop.put("runs_status",
((fetcher.paused && fetcher.isAlive()) ? STAT_THREAD_PAUSED :
(fetcher.isAlive()) ? STAT_THREAD_ALIVE : STAT_THREAD_STOPPED));
prop.put("runs_totalRuns", URLFetcher.totalRuns);
prop.put("runs_totalFetchedURLs", URLFetcher.totalFetchedURLs);
prop.put("runs_totalFailedURLs", URLFetcher.totalFailed);
@ -180,7 +189,7 @@ public class CrawlURLFetch_p {
if (savedURLs.size() == 0) return 0;
prop.put("saved", 1);
for (int i=0; i<savedURLs.size(); i++)
prop.put("saved_urls_" + i + "url", savedURLs.get(i));
prop.put("saved_urls_" + i + "_url", savedURLs.get(i));
prop.put("saved_urls", savedURLs.size());
return savedURLs.size();
}
@ -218,7 +227,7 @@ public class CrawlURLFetch_p {
if (count != null && count.matches("\\d+")) r = Long.parseLong(count);
if (r < 1) return -1;
r *= 3600;
r *= 3600000;
if (type.equals("weeks")) return r * 24 * 7;
else if (type.equals("days")) return r * 24;
else if (type.equals("hours")) return r;
@ -285,22 +294,24 @@ public class CrawlURLFetch_p {
url = getDLURL();
if (url == null) {
serverLog.logSevere(this.getName(), "canceled because no valid URL for the URL-list could be determinded");
break;
return;
}
totalFetchedURLs += stackURLs(getURLs(url));
lastRun = System.currentTimeMillis() - start;
totalRuns++;
if (this.delay < 0) {
break;
} else if (this.delay == 0) {
this.paused = true;
while (this.paused) this.wait();
} else {
this.paused = true;
this.wait(this.delay);
if (this.delay < 0 || isInterrupted()) {
return;
} else synchronized (this) {
if (this.delay == 0) {
this.paused = true;
while (this.paused) this.wait();
} else {
this.paused = true;
this.wait(this.delay);
}
}
this.paused = false;
} catch (InterruptedException e) { break; }
} catch (InterruptedException e) { return; }
}
}
@ -327,7 +338,7 @@ public class CrawlURLFetch_p {
this.lastFailed = 0;
if (urls == null) return 0;
String reason;
for (int i=0; i<urls.length; i++) {
for (int i=0; i<urls.length && !isInterrupted(); i++) {
serverLog.logFinest(this.getName(), "stacking " + urls[i]);
reason = this.sb.sbStackCrawlThread.stackCrawl(
urls[i],
@ -339,6 +350,7 @@ public class CrawlURLFetch_p {
this.profile);
if (reason != null) {
this.lastFailed++;
totalFailed++;
this.failed.put(urls[i], reason);
try {
plasmaCrawlEURL.Entry ee = this.sb.errorURL.newEntry(
@ -346,7 +358,7 @@ public class CrawlURLFetch_p {
null,
yacyCore.seedDB.mySeed.hash,
yacyCore.seedDB.mySeed.hash,
null,
"",
reason,
new kelondroBitfield());
ee.store();

Loading…
Cancel
Save