fix for crawling of mailto-links

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5906 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 9c6ac43f66
commit eacf95213a

@ -1661,6 +1661,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
// fetching the next hyperlink
nextEntry = i.next();
nextUrl = nextEntry.getKey();
String u = nextUrl.toNormalform(true, true);
if (!(u.startsWith("http") || u.startsWith("ftp"))) continue;
// enqueue the hyperlink into the pre-notice-url db
crawlStacker.enqueueEntry(new CrawlEntry(
entry.initiator(),

@ -159,6 +159,9 @@ public class yacyURL implements Serializable {
public static yacyURL newURL(final String baseURL, final String relPath) throws MalformedURLException {
if ((baseURL == null) ||
(relPath.startsWith("mailto:")) ||
(relPath.startsWith("aim:")) ||
(relPath.startsWith("icq:")) ||
(relPath.startsWith("http://")) ||
(relPath.startsWith("https://")) ||
(relPath.startsWith("ftp://")) ||
@ -925,6 +928,8 @@ public class yacyURL implements Serializable {
new String[]{"http://www.anomic.de/home/index.html", "mailto:abcdefg@nomailnomail.com"},
new String[]{null, "news:de.test"},
new String[]{"http://www.anomic.de/home", "news:de.test"},
new String[]{null, "mailto:bob@web.com"},
new String[]{"http://www.anomic.de/home", "mailto:bob@web.com"},
new String[]{"http://www.anomic.de/home", "ftp://ftp.anomic.de/src"},
new String[]{null, "ftp://ftp.delegate.org/"},
new String[]{"http://www.anomic.de/home", "ftp://ftp.delegate.org/"},

Loading…
Cancel
Save