diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index 27db45186..bc29b019c 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -465,10 +465,12 @@ public class plasmaCrawlBalancer { assert delta >= 0: "delta = " + delta; int s = urlFileIndex.size(); kelondroRow.Entry rowEntry = urlFileIndex.remove(result.getBytes(), false); - assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result; + assert (rowEntry != null) && (urlFileIndex.size() + 1 == s) : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result; if (rowEntry == null) { serverLog.logSevere("PLASMA BALANCER", "get() found a valid urlhash, but failed to fetch the corresponding url entry - total size = " + size() + ", fileStack.size() = " + urlFileStack.size() + ", ramStack.size() = " + urlRAMStack.size() + ", domainStacks.size() = " + domainStacks.size()); return null; + } else { + assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result; } plasmaCrawlEntry crawlEntry = new plasmaCrawlEntry(rowEntry); long minimumDelta = (crawlEntry.url().isLocal()) ? minimumLocalDelta : minimumGlobalDelta; diff --git a/source/de/anomic/yacy/yacyURL.java b/source/de/anomic/yacy/yacyURL.java index d63e343ac..2e456eb2d 100644 --- a/source/de/anomic/yacy/yacyURL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -403,7 +403,14 @@ public class yacyURL { assert (url != null); url = url.trim(); int p = url.indexOf(':'); - if (p < 0) throw new MalformedURLException("protocol is not given in '" + url + "'"); + if (p < 0) { + if (url.startsWith("www.")) { + url = "http://" + url; + p = 4; + } else { + throw new MalformedURLException("protocol is not given in '" + url + "'"); + } + } this.protocol = url.substring(0, p).toLowerCase().trim(); if (url.length() < p + 4) throw new MalformedURLException("URL not parseable: '" + url + "'"); if (url.substring(p + 1, p + 3).equals("//")) {