fixed and enhanced some details in crawl start with file

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4120 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 16e101f135
commit 1b42152a76

@ -215,7 +215,7 @@
<td style="background-color:#E7B7AF">&nbsp;</td>
<td>lightred font</td>
<td>:</td>
<td>passiv peers ( &lt; 5 hour passiv time)</td>
<td>passiv peers ( &lt; 5 hour passive time)</td>
</tr>
<tr>
<td style="background-color:#A7A75F">&nbsp;</td>

@ -271,11 +271,14 @@ public class WatchCrawler_p {
yacyURL crawlURL = new yacyURL("file://" + file.toString(), null);
plasmaCrawlProfile.entry profile = switchboard.profilesActiveCrawls.newEntry(fileName, crawlURL, newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingIfOlder, crawlingDomFilterDepth, crawlingDomMaxPages, crawlingQ, indexText, indexMedia, storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw);
// pause local crawl here
switchboard.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL);
// loop through the contained links
Iterator interator = hyperlinks.entrySet().iterator();
Iterator linkiterator = hyperlinks.entrySet().iterator();
int c = 0;
while (interator.hasNext()) {
Map.Entry e = (Map.Entry) interator.next();
while (linkiterator.hasNext()) {
Map.Entry e = (Map.Entry) linkiterator.next();
String nexturlstring = (String) e.getKey();
if (nexturlstring == null) continue;
@ -296,7 +299,7 @@ public class WatchCrawler_p {
}
// enqueuing the url for crawling
String rejectReason = switchboard.sbStackCrawlThread.stackCrawl(nexturlstring, null, yacyCore.seedDB.mySeed().hash, (String)e.getValue(), new Date(), 1, profile);
String rejectReason = switchboard.sbStackCrawlThread.stackCrawl(nexturlstring, null, yacyCore.seedDB.mySeed().hash, (String)e.getValue(), new Date(), 0, profile);
// if something failed add the url into the errorURL list
if (rejectReason == null) {
@ -320,7 +323,8 @@ public class WatchCrawler_p {
prop.put("info_error", e.getMessage());
e.printStackTrace();
}
}
switchboard.continueCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL);
}
} else if (crawlingMode.equals(CRAWLING_MODE_SITEMAP)) {
String sitemapURLStr = null;
try {

@ -1,7 +1,7 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy: Proxy Message</title>
<title>YaCy: Error Message</title>
#%env/templates/metas.template%#
</head>
<body id="error">

@ -288,9 +288,7 @@ public final class plasmaCrawlStacker {
// check if ip is local ip address
checkInterruption(); // TODO: this is protocol specific
InetAddress hostAddress = serverDomains.dnsResolve(nexturl.getHost());
if(this.sb.getConfig("yacyDebugMode", "true").equals("true")){
//just ignore the check in debugmode (useful for tor(.eff.org)
}else if (hostAddress == null) {
if (hostAddress == null) {
// if a http proxy is configured name resolution may not work
if (this.sb.remoteProxyConfig == null || !this.sb.remoteProxyConfig.useProxy()) {
reason = plasmaCrawlEURL.DENIED_UNKNOWN_HOST;

Loading…
Cancel
Save