adopted isListed to discussed new behavior as discussed (url, getFile)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1940 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 59fc55ea1e
commit 1f4412a146

@ -460,7 +460,7 @@ public class IndexControl_p {
} else {
url = new URL(us);
if (plasmaSwitchboard.urlBlacklist.isListed(url.getHost().toLowerCase(), url.getPath())) {
if (plasmaSwitchboard.urlBlacklist.isListed(url)) {
result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" checked value=\"").append(uh[0]).append("\" align=\"top\">");
} else {
result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" value=\"").append(uh[0]).append("\" align=\"top\">");

@ -96,7 +96,7 @@ public final class transferURL {
lEntry = sb.urlPool.loadedURL.newEntry(urls, true);
if ((lEntry != null) && (lEntry.url() != null)) {
if ((blockBlacklist) &&
(plasmaSwitchboard.urlBlacklist.isListed( lEntry.url().getHost().toLowerCase(), lEntry.url().getPath()))) {
(plasmaSwitchboard.urlBlacklist.isListed(lEntry.url()))) {
int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash());
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
lEntry = null;

@ -826,7 +826,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
plasmaCrawlLURL.Entry entry = (plasmaCrawlLURL.Entry) eiter.next();
totalSearchedUrls++;
if (plasmaSwitchboard.urlBlacklist.isListed(entry.url().getHost().toLowerCase(),entry.url().getPath())==true) {
if (plasmaSwitchboard.urlBlacklist.isListed(entry.url())==true) {
lastBlacklistedUrl = entry.url().toString();
lastBlacklistedHash = entry.hash();
serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double)blacklistedUrls/totalSearchedUrls)*100 + "%): " + entry.hash() + " " + entry.url());

@ -271,8 +271,7 @@ public final class plasmaCrawlStacker {
}
// check blacklist
String hostlow = nexturl.getHost().toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, nexturl.getPath())) {
if (plasmaSwitchboard.urlBlacklist.isListed(nexturl)) {
reason = "denied_(url_in_blacklist)";
this.log.logFine("URL '" + nexturlString + "' is in blacklist. " +
"Stack processing time: " + (System.currentTimeMillis()-startTime));

@ -187,7 +187,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public plasmaCrawlStacker sbStackCrawlThread;
public messageBoard messageDB;
public wikiBoard wikiDB;
public blogBoard blogDB;
public blogBoard blogDB;
public static plasmaCrawlRobotsTxt robots;
public plasmaCrawlProfile profiles;
public plasmaCrawlProfile.entry defaultProxyProfile;

@ -42,6 +42,7 @@
package de.anomic.plasma;
import java.io.File;
import java.net.URL;
import java.util.HashMap;
import de.anomic.kelondro.kelondroMSetTools;
@ -84,6 +85,10 @@ public class plasmaURLPattern {
hostpaths.put(host.toLowerCase(), path);
}
public boolean isListed(URL url) {
return isListed(url.getHost().toLowerCase(), url.getFile());
}
public boolean isListed(String hostlow, String path) {
if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1);
String pp = ""; // path-pattern

@ -559,7 +559,7 @@ public final class plasmaWordIndex {
// "+entry.getUrlHash());
try {
url = lurl.getEntry(entry.getUrlHash(), null).url();
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(url.getHost().toLowerCase(), url.getPath()) == true)) {
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(url) == true)) {
urlHashs.add(entry.getUrlHash());
}
} catch (IOException e) {

@ -465,7 +465,7 @@ public final class yacyClient {
for (int n = 0; n < results; n++) {
// get one single search result
urlEntry = urlManager.newEntry((String) result.get("resource" + n), true);
if (urlEntry != null && blacklist.isListed(urlEntry.url().getHost().toLowerCase(), urlEntry.url().getPath())) { continue; } // block with backlist
if (urlEntry != null && blacklist.isListed(urlEntry.url())) { continue; } // block with backlist
urlEntry.store();
int urlLength = urlEntry.url().toString().length();
int urlComps = htmlFilterContentScraper.urlComps(urlEntry.url().toString()).length;

Loading…
Cancel
Save