From 002a109c4d8b5488ed25406525f190e2a012c045 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 22 Jan 2008 20:49:26 +0000 Subject: [PATCH] patch for http://forum.yacy-websuche.de/viewtopic.php?p=4597#p4597 (urls that have no protocol but start with www will be treated as http://www... git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4369 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaCrawlBalancer.java | 4 +++- source/de/anomic/yacy/yacyURL.java | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index 27db45186..bc29b019c 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -465,10 +465,12 @@ public class plasmaCrawlBalancer { assert delta >= 0: "delta = " + delta; int s = urlFileIndex.size(); kelondroRow.Entry rowEntry = urlFileIndex.remove(result.getBytes(), false); - assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result; + assert (rowEntry != null) && (urlFileIndex.size() + 1 == s) : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result; if (rowEntry == null) { serverLog.logSevere("PLASMA BALANCER", "get() found a valid urlhash, but failed to fetch the corresponding url entry - total size = " + size() + ", fileStack.size() = " + urlFileStack.size() + ", ramStack.size() = " + urlRAMStack.size() + ", domainStacks.size() = " + domainStacks.size()); return null; + } else { + assert urlFileIndex.size() + 1 == s : "urlFileIndex.size() = " + urlFileIndex.size() + ", s = " + s + ", result = " + result; } plasmaCrawlEntry crawlEntry = new plasmaCrawlEntry(rowEntry); long minimumDelta = (crawlEntry.url().isLocal()) ? minimumLocalDelta : minimumGlobalDelta; diff --git a/source/de/anomic/yacy/yacyURL.java b/source/de/anomic/yacy/yacyURL.java index d63e343ac..2e456eb2d 100644 --- a/source/de/anomic/yacy/yacyURL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -403,7 +403,14 @@ public class yacyURL { assert (url != null); url = url.trim(); int p = url.indexOf(':'); - if (p < 0) throw new MalformedURLException("protocol is not given in '" + url + "'"); + if (p < 0) { + if (url.startsWith("www.")) { + url = "http://" + url; + p = 4; + } else { + throw new MalformedURLException("protocol is not given in '" + url + "'"); + } + } this.protocol = url.substring(0, p).toLowerCase().trim(); if (url.length() < p + 4) throw new MalformedURLException("URL not parseable: '" + url + "'"); if (url.substring(p + 1, p + 3).equals("//")) {