(urls that have no protocol but start with www will be treated as http://www...

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4369 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 634430c48a
commit 002a109c4d

@ -465,10 +465,12 @@ public class plasmaCrawlBalancer {
assert delta >= 0: "delta = " + delta;
int s = urlFileIndex.size();
kelondroRow.Entry rowEntry = urlFileIndex.remove(result.getBytes(), false);
assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result;
assert (rowEntry != null) && (urlFileIndex.size() + 1 == s) : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result;
if (rowEntry == null) {
serverLog.logSevere("PLASMA BALANCER", "get() found a valid urlhash, but failed to fetch the corresponding url entry - total size = " + size() + ", fileStack.size() = " + urlFileStack.size() + ", ramStack.size() = " + urlRAMStack.size() + ", domainStacks.size() = " + domainStacks.size());
return null;
} else {
assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result;
}
plasmaCrawlEntry crawlEntry = new plasmaCrawlEntry(rowEntry);
long minimumDelta = (crawlEntry.url().isLocal()) ? minimumLocalDelta : minimumGlobalDelta;

@ -403,7 +403,14 @@ public class yacyURL {
assert (url != null);
url = url.trim();
int p = url.indexOf(':');
if (p < 0) throw new MalformedURLException("protocol is not given in '" + url + "'");
if (p < 0) {
if (url.startsWith("www.")) {
url = "http://" + url;
p = 4;
} else {
throw new MalformedURLException("protocol is not given in '" + url + "'");
}
}
this.protocol = url.substring(0, p).toLowerCase().trim();
if (url.length() < p + 4) throw new MalformedURLException("URL not parseable: '" + url + "'");
if (url.substring(p + 1, p + 3).equals("//")) {

Loading…
Cancel
Save