*)fixed logging for urldbcleanup

*)changed exception handling in urldbcleanup so that it shows NullPointerException correctly
*)added more Blacklisting to urlcleaner

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2436 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
hydrox 19 years ago
parent 135e019883
commit 1c99b5a484

@ -803,8 +803,12 @@ public final class plasmaCrawlLURL extends indexURL {
eiter.next();
iteratorCount++;
} catch (RuntimeException e) {
String m = e.getMessage();
damagedURLS.add(m.substring(m.length() - 12));
if(e.getMessage() != null) {
String m = e.getMessage();
damagedURLS.add(m.substring(m.length() - 12));
} else {
log.logSevere("RuntimeException:", e);
}
}
try { Thread.sleep(1000); } catch (InterruptedException e) { }
log.logInfo("URLs vorher: " + size() + " Entries loaded during Iteratorloop: " + iteratorCount + " kaputte URLs: " + damagedURLS.size());
@ -899,7 +903,7 @@ public final class plasmaCrawlLURL extends indexURL {
plasmaCrawlLURL.Entry entry = (plasmaCrawlLURL.Entry) eiter.next();
totalSearchedUrls++;
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER,entry.url())==true) {
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER,entry.url())==true || plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT,entry.url())==true) {
lastBlacklistedUrl = entry.url().toString();
lastBlacklistedHash = entry.hash();
serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double)blacklistedUrls/totalSearchedUrls)*100 + "%): " + entry.hash() + " " + entry.url());

@ -1131,6 +1131,7 @@ public final class yacy {
File root = new File(homePath);
File dbroot = new File(root, "DATA/PLASMADB");
serverLog log = new serverLog("URLDBCLEANUP");
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
try {
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), 4194304, 10000);
currentUrlDB.urldbcleanup();

Loading…
Cancel
Save