diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index b2e035d88..424d9f223 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -803,8 +803,12 @@ public final class plasmaCrawlLURL extends indexURL { eiter.next(); iteratorCount++; } catch (RuntimeException e) { - String m = e.getMessage(); - damagedURLS.add(m.substring(m.length() - 12)); + if(e.getMessage() != null) { + String m = e.getMessage(); + damagedURLS.add(m.substring(m.length() - 12)); + } else { + log.logSevere("RuntimeException:", e); + } } try { Thread.sleep(1000); } catch (InterruptedException e) { } log.logInfo("URLs vorher: " + size() + " Entries loaded during Iteratorloop: " + iteratorCount + " kaputte URLs: " + damagedURLS.size()); @@ -899,7 +903,7 @@ public final class plasmaCrawlLURL extends indexURL { plasmaCrawlLURL.Entry entry = (plasmaCrawlLURL.Entry) eiter.next(); totalSearchedUrls++; - if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER,entry.url())==true) { + if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER,entry.url())==true || plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT,entry.url())==true) { lastBlacklistedUrl = entry.url().toString(); lastBlacklistedHash = entry.hash(); serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double)blacklistedUrls/totalSearchedUrls)*100 + "%): " + entry.hash() + " " + entry.url()); diff --git a/source/yacy.java b/source/yacy.java index 121217e07..eb8e5c747 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -1131,6 +1131,7 @@ public final class yacy { File root = new File(homePath); File dbroot = new File(root, "DATA/PLASMADB"); serverLog log = new serverLog("URLDBCLEANUP"); + try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {} try { plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), 4194304, 10000); currentUrlDB.urldbcleanup();