From c3bf17a3a19f55a958b5fa17f9a8378093be7c92 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 5 Oct 2010 00:05:08 +0000 Subject: [PATCH] fixed must-match filter for smb crawling git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7222 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Crawler_p.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 1f5f48877..03b205727 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -144,7 +144,13 @@ public class Crawler_p { if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL; // avoid that all urls are filtered out if bad value was submitted // special cases: if (crawlingStartURL!= null && fullDomain) { - newcrawlingMustMatch = crawlingStartURL.isFile() ? "file://" + crawlingStartURL.getPath() + ".*" : crawlingStartURL.isSMB() ? "smb://" + crawlingStartURL.getPath() + ".*" : ".*" + crawlingStartURL.getHost() + ".*"; + if (crawlingStartURL.isFile()) { + newcrawlingMustMatch = "file://" + crawlingStartURL.getPath() + ".*"; + } else if (crawlingStartURL.isSMB()) { + newcrawlingMustMatch = "smb://.*" + crawlingStartURL.getHost() + ".*" + crawlingStartURL.getPath() + ".*"; + } else { + newcrawlingMustMatch = ".*" + crawlingStartURL.getHost() + ".*"; + } } if (crawlingStart!= null && subPath && (pos = crawlingStart.lastIndexOf('/')) > 0) { newcrawlingMustMatch = crawlingStart.substring(0, pos + 1) + ".*";