From 243a2f831ba6e48c0aefe8eb2ead63ab8199ce02 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 9 Mar 2007 11:07:20 +0000 Subject: [PATCH] fixed problem with not found NURL-hashes The cause for this problem could still not be found, but the effect is handled much better. The NURL-pop will continue automatically until it found a hash that can be found. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3458 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/plasma/plasmaCrawlBalancer.java | 4 +- source/de/anomic/plasma/plasmaCrawlNURL.java | 72 ++++++++++++------- 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index 354c3a19d..556d07081 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -184,7 +184,7 @@ public class plasmaCrawlBalancer { fileStack.push(fileStack.row().newEntry(new byte[][]{((String) ramStack.get(ramStack.size() / 2)).getBytes()})); } - public synchronized void add(String urlhash) throws IOException { + public synchronized void push(String urlhash) throws IOException { assert urlhash != null; if (ramIndex.contains(urlhash)) { serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + urlhash + " - fixed"); @@ -211,7 +211,7 @@ public class plasmaCrawlBalancer { } } - public synchronized String get(long minimumDelta, long maximumAge) throws IOException { + public synchronized String pop(long minimumDelta, long maximumAge) throws IOException { // returns an url-hash from the stack and ensures minimum delta times // we have 3 sources to choose from: the ramStack, the domainStacks and the fileStack diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index a4e81a52c..1feb2dcce 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -293,16 +293,16 @@ public class plasmaCrawlNURL { try { switch (stackType) { case STACK_TYPE_CORE: - coreStack.add(urlhash); + coreStack.push(urlhash); break; case STACK_TYPE_LIMIT: - limitStack.add(urlhash); + limitStack.push(urlhash); break; case STACK_TYPE_OVERHANG: - overhangStack.add(urlhash); + overhangStack.push(urlhash); break; case STACK_TYPE_REMOTE: - remoteStack.add(urlhash); + remoteStack.push(urlhash); break; case STACK_TYPE_IMAGE: imageStack.push(imageStack.row().newEntry(new byte[][] {urlhash.getBytes()})); @@ -385,30 +385,54 @@ public class plasmaCrawlNURL { private Entry pop(kelondroStack stack) throws IOException { // this is a filo - pop - if (stack.size() > 0) { - Entry e = new Entry(new String(stack.pop().getColBytes(0))); - imageStackIndex.remove(e.hash); - movieStackIndex.remove(e.hash); - musicStackIndex.remove(e.hash); - return e; - } else { - throw new IOException("crawl stack is empty"); + int s; + Entry entry; + kelondroRow.Entry re; + while ((s = stack.size()) > 0) { + re = stack.pop(); + if (re == null) { + if (s > stack.size()) continue; + throw new IOException("hash is null"); + } + try { + entry = new Entry(new String(re.getColBytes(0))); + } catch (IOException e) { + serverLog.logWarning("NURL", e.getMessage()); + if (s > stack.size()) continue; + throw new IOException(e.getMessage()); + } + imageStackIndex.remove(entry.hash); + movieStackIndex.remove(entry.hash); + musicStackIndex.remove(entry.hash); + return entry; } + throw new IOException("crawl stack is empty"); } private Entry pop(plasmaCrawlBalancer balancer) throws IOException { // this is a filo - pop - if (balancer.size() > 0) { - String hash = balancer.get(minimumDelta, maximumDomAge); - if (hash == null) throw new IOException("hash is null"); - Entry e = new Entry(hash); - imageStackIndex.remove(e.hash); - movieStackIndex.remove(e.hash); - musicStackIndex.remove(e.hash); - return e; - } else { - throw new IOException("balancer stack is empty"); - } + String hash; + int s; + Entry entry; + while ((s = balancer.size()) > 0) { + hash = balancer.pop(minimumDelta, maximumDomAge); + if (hash == null) { + if (s > balancer.size()) continue; + throw new IOException("hash is null"); + } + try { + entry = new Entry(hash); + } catch (IOException e) { + serverLog.logWarning("NURL", e.getMessage()); + if (s > balancer.size()) continue; + throw new IOException(e.getMessage()); + } + imageStackIndex.remove(entry.hash); + movieStackIndex.remove(entry.hash); + musicStackIndex.remove(entry.hash); + return entry; + } + throw new IOException("balancer stack is empty"); } private Entry[] top(kelondroStack stack, int count) { @@ -505,7 +529,7 @@ public class plasmaCrawlNURL { return; } else { // show that we found nothing - throw new IOException("NURL: hash " + hash + " not found"); + throw new IOException("NURL: hash " + hash + " not found during initialization of entry object"); //this.url = null; } }