other bugfix

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3048 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
borg-0300 18 years ago
parent ad65cc9d2f
commit 15381cbf73

@ -388,8 +388,9 @@ public final class plasmaCrawlLURL {
} }
public Object next() throws RuntimeException { public Object next() throws RuntimeException {
kelondroRow.Entry e = (kelondroRow.Entry) i.next(); kelondroRow.Entry e = null;
if (e == null) return null; if (i.hasNext()) { e = (kelondroRow.Entry) i.next(); }
if (e == null) { return null; }
return new indexURLEntryNew(e, null); return new indexURLEntryNew(e, null);
} }
@ -499,7 +500,7 @@ public final class plasmaCrawlLURL {
public void run() { public void run() {
try { try {
serverLog.logInfo("URLDBCLEANER", "UrldbCleaner-Thread startet"); serverLog.logInfo("URLDBCLEANER", "UrldbCleaner-Thread startet");
Iterator eiter = entries(true, false, null); final Iterator eiter = entries(true, false, null);
while (eiter.hasNext() && run) { while (eiter.hasNext() && run) {
synchronized (this) { synchronized (this) {
if (this.pause) { if (this.pause) {
@ -512,19 +513,17 @@ public final class plasmaCrawlLURL {
} }
} }
} }
if (eiter.hasNext()) { final indexURLEntry entry = (indexURLEntry) eiter.next();
indexURLEntry entry = (indexURLEntry) eiter.next();
if (entry == null) { if (entry == null) {
serverLog.logFine("URLDBCLEANER", "entry == null"); serverLog.logFine("URLDBCLEANER", "entry == null");
} else if (entry.hash() == null) {
serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double) blacklistedUrls / totalSearchedUrls) * 100 + "%): " + "hash == null");
} else { } else {
indexURLEntry.Components comp = entry.comp(); final indexURLEntry.Components comp = entry.comp();
totalSearchedUrls++; totalSearchedUrls++;
if (entry.hash() == null) { if (comp.url() == null) {
serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double)blacklistedUrls/totalSearchedUrls)*100 + "%): " + " hash == null");
} else if (comp.url() == null) {
serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double) blacklistedUrls / totalSearchedUrls) * 100 + "%): " + entry.hash() + "URL == null"); serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double) blacklistedUrls / totalSearchedUrls) * 100 + "%): " + entry.hash() + "URL == null");
remove(entry.hash()); remove(entry.hash());
lastHash = entry.hash();
} else if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, comp.url()) || } else if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, comp.url()) ||
plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, comp.url())) { plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, comp.url())) {
lastBlacklistedUrl = comp.url().toNormalform(); lastBlacklistedUrl = comp.url().toNormalform();
@ -535,11 +534,8 @@ public final class plasmaCrawlLURL {
serverLog.logInfo("URLDBCLEANER", "Deleted " + blacklistedUrls + " URLs until now. Last deleted URL-Hash: " + lastBlacklistedUrl); serverLog.logInfo("URLDBCLEANER", "Deleted " + blacklistedUrls + " URLs until now. Last deleted URL-Hash: " + lastBlacklistedUrl);
} }
lastUrl = comp.url().toNormalform(); lastUrl = comp.url().toNormalform();
lastHash = entry.hash();
}
} }
} else { lastHash = entry.hash();
serverLog.logFine("URLDBCLEANER", "Iterator == null");
} }
} }
} catch (RuntimeException e) { } catch (RuntimeException e) {

Loading…
Cancel
Save