fixed problem with not found NURL-hashes

The cause for this problem could still not be found, but the effect
is handled much better. The NURL-pop will continue automatically until
it found a hash that can be found.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3458 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 6ad39bae1e
commit 243a2f831b

@ -184,7 +184,7 @@ public class plasmaCrawlBalancer {
fileStack.push(fileStack.row().newEntry(new byte[][]{((String) ramStack.get(ramStack.size() / 2)).getBytes()})); fileStack.push(fileStack.row().newEntry(new byte[][]{((String) ramStack.get(ramStack.size() / 2)).getBytes()}));
} }
public synchronized void add(String urlhash) throws IOException { public synchronized void push(String urlhash) throws IOException {
assert urlhash != null; assert urlhash != null;
if (ramIndex.contains(urlhash)) { if (ramIndex.contains(urlhash)) {
serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + urlhash + " - fixed"); serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + urlhash + " - fixed");
@ -211,7 +211,7 @@ public class plasmaCrawlBalancer {
} }
} }
public synchronized String get(long minimumDelta, long maximumAge) throws IOException { public synchronized String pop(long minimumDelta, long maximumAge) throws IOException {
// returns an url-hash from the stack and ensures minimum delta times // returns an url-hash from the stack and ensures minimum delta times
// we have 3 sources to choose from: the ramStack, the domainStacks and the fileStack // we have 3 sources to choose from: the ramStack, the domainStacks and the fileStack

@ -293,16 +293,16 @@ public class plasmaCrawlNURL {
try { try {
switch (stackType) { switch (stackType) {
case STACK_TYPE_CORE: case STACK_TYPE_CORE:
coreStack.add(urlhash); coreStack.push(urlhash);
break; break;
case STACK_TYPE_LIMIT: case STACK_TYPE_LIMIT:
limitStack.add(urlhash); limitStack.push(urlhash);
break; break;
case STACK_TYPE_OVERHANG: case STACK_TYPE_OVERHANG:
overhangStack.add(urlhash); overhangStack.push(urlhash);
break; break;
case STACK_TYPE_REMOTE: case STACK_TYPE_REMOTE:
remoteStack.add(urlhash); remoteStack.push(urlhash);
break; break;
case STACK_TYPE_IMAGE: case STACK_TYPE_IMAGE:
imageStack.push(imageStack.row().newEntry(new byte[][] {urlhash.getBytes()})); imageStack.push(imageStack.row().newEntry(new byte[][] {urlhash.getBytes()}));
@ -385,30 +385,54 @@ public class plasmaCrawlNURL {
private Entry pop(kelondroStack stack) throws IOException { private Entry pop(kelondroStack stack) throws IOException {
// this is a filo - pop // this is a filo - pop
if (stack.size() > 0) { int s;
Entry e = new Entry(new String(stack.pop().getColBytes(0))); Entry entry;
imageStackIndex.remove(e.hash); kelondroRow.Entry re;
movieStackIndex.remove(e.hash); while ((s = stack.size()) > 0) {
musicStackIndex.remove(e.hash); re = stack.pop();
return e; if (re == null) {
} else { if (s > stack.size()) continue;
throw new IOException("crawl stack is empty"); throw new IOException("hash is null");
}
try {
entry = new Entry(new String(re.getColBytes(0)));
} catch (IOException e) {
serverLog.logWarning("NURL", e.getMessage());
if (s > stack.size()) continue;
throw new IOException(e.getMessage());
}
imageStackIndex.remove(entry.hash);
movieStackIndex.remove(entry.hash);
musicStackIndex.remove(entry.hash);
return entry;
} }
throw new IOException("crawl stack is empty");
} }
private Entry pop(plasmaCrawlBalancer balancer) throws IOException { private Entry pop(plasmaCrawlBalancer balancer) throws IOException {
// this is a filo - pop // this is a filo - pop
if (balancer.size() > 0) { String hash;
String hash = balancer.get(minimumDelta, maximumDomAge); int s;
if (hash == null) throw new IOException("hash is null"); Entry entry;
Entry e = new Entry(hash); while ((s = balancer.size()) > 0) {
imageStackIndex.remove(e.hash); hash = balancer.pop(minimumDelta, maximumDomAge);
movieStackIndex.remove(e.hash); if (hash == null) {
musicStackIndex.remove(e.hash); if (s > balancer.size()) continue;
return e; throw new IOException("hash is null");
} else {
throw new IOException("balancer stack is empty");
} }
try {
entry = new Entry(hash);
} catch (IOException e) {
serverLog.logWarning("NURL", e.getMessage());
if (s > balancer.size()) continue;
throw new IOException(e.getMessage());
}
imageStackIndex.remove(entry.hash);
movieStackIndex.remove(entry.hash);
musicStackIndex.remove(entry.hash);
return entry;
}
throw new IOException("balancer stack is empty");
} }
private Entry[] top(kelondroStack stack, int count) { private Entry[] top(kelondroStack stack, int count) {
@ -505,7 +529,7 @@ public class plasmaCrawlNURL {
return; return;
} else { } else {
// show that we found nothing // show that we found nothing
throw new IOException("NURL: hash " + hash + " not found"); throw new IOException("NURL: hash " + hash + " not found during initialization of entry object");
//this.url = null; //this.url = null;
} }
} }

Loading…
Cancel
Save