*) fixed bug which caused entries to not be deleted when deleting by URL on IndexCreateWWWLocalQueue_p.html (I hope this did not break anything else)

*)  cleaned up code a little bit

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7493 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 14 years ago
parent d58071947a
commit c5051c4020

@ -28,8 +28,8 @@
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
@ -89,7 +89,7 @@ public class IndexCreateWWWGlobalQueue_p {
prop.put("crawler-queue", "0"); prop.put("crawler-queue", "0");
} else { } else {
prop.put("crawler-queue", "1"); prop.put("crawler-queue", "1");
final ArrayList<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.LIMIT, showLimit); final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.LIMIT, showLimit);
Request urle; Request urle;
boolean dark = true; boolean dark = true;

@ -55,7 +55,7 @@ public class IndexCreateWWWLocalQueue_p {
if (date == null) return ""; if (date == null) return "";
return dayFormatter.format(date); return dayFormatter.format(date);
} }
private static final int INVALID = 0; private static final int INVALID = 0;
private static final int URL = 1; private static final int URL = 1;
private static final int ANCHOR = 2; private static final int ANCHOR = 2;
@ -63,7 +63,7 @@ public class IndexCreateWWWLocalQueue_p {
private static final int DEPTH = 4; private static final int DEPTH = 4;
private static final int INITIATOR = 5; private static final int INITIATOR = 5;
private static final int MODIFIED = 6; private static final int MODIFIED = 6;
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements // return variable that accumulates replacements
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
@ -82,21 +82,20 @@ public class IndexCreateWWWLocalQueue_p {
final String pattern = post.get("pattern", ".*").trim(); final String pattern = post.get("pattern", ".*").trim();
final int option = post.getInt("option", INVALID); final int option = post.getInt("option", INVALID);
if (pattern.equals(".*")) { if (".*".equals(pattern)) {
c = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE); c = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE);
sb.crawlQueues.noticeURL.clear(NoticedURL.StackType.CORE); sb.crawlQueues.noticeURL.clear(NoticedURL.StackType.CORE);
try { sb.cleanProfiles(); } catch (final InterruptedException e) {/* ignore this */} try { sb.cleanProfiles(); } catch (final InterruptedException e) {/* ignore this */}
} else if (option > INVALID) { } else if (option > INVALID) {
Pattern compiledPattern = null;
try { try {
// compiling the regular expression // compiling the regular expression
compiledPattern = Pattern.compile(pattern); final Pattern compiledPattern = Pattern.compile(pattern);
if (option == PROFILE) { if (option == PROFILE) {
// search and delete the crawl profile (_much_ faster, independant of queue size) // search and delete the crawl profile (_much_ faster, independant of queue size)
// XXX: what to do about the annoying LOST PROFILE messages in the log? // XXX: what to do about the annoying LOST PROFILE messages in the log?
CrawlProfile entry; CrawlProfile entry;
for (byte[] handle: sb.crawler.getActive()) { for (final byte[] handle: sb.crawler.getActive()) {
entry = sb.crawler.getActive(handle); entry = sb.crawler.getActive(handle);
final String name = entry.name(); final String name = entry.name();
if (name.equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY) || if (name.equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY) ||
@ -113,7 +112,7 @@ public class IndexCreateWWWLocalQueue_p {
// iterating through the list of URLs // iterating through the list of URLs
final Iterator<Request> iter = sb.crawlQueues.noticeURL.iterator(NoticedURL.StackType.CORE); final Iterator<Request> iter = sb.crawlQueues.noticeURL.iterator(NoticedURL.StackType.CORE);
Request entry; Request entry;
List<byte[]> removehashes = new ArrayList<byte[]>(); final List<byte[]> removehashes = new ArrayList<byte[]>();
while (iter.hasNext()) { while (iter.hasNext()) {
if ((entry = iter.next()) == null) continue; if ((entry = iter.next()) == null) continue;
String value = null; String value = null;
@ -129,10 +128,10 @@ public class IndexCreateWWWLocalQueue_p {
default: value = null; break location; default: value = null; break location;
} }
if (value != null && compiledPattern.matcher(value).find()) removehashes.add(entry.url().hash()); if (value != null && compiledPattern.matcher(value).matches()) removehashes.add(entry.url().hash());
} }
Log.logInfo("IndexCreateWWWLocalQueue", "created a remove list with " + removehashes.size() + " entries for pattern '" + pattern + "'"); Log.logInfo("IndexCreateWWWLocalQueue", "created a remove list with " + removehashes.size() + " entries for pattern '" + pattern + "'");
for (byte[] b: removehashes) { for (final byte[] b: removehashes) {
sb.crawlQueues.noticeURL.removeByURLHash(b); sb.crawlQueues.noticeURL.removeByURLHash(b);
} }
} }
@ -156,7 +155,7 @@ public class IndexCreateWWWLocalQueue_p {
prop.put("crawler-queue", "0"); prop.put("crawler-queue", "0");
} else { } else {
prop.put("crawler-queue", "1"); prop.put("crawler-queue", "1");
final ArrayList<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.CORE, (int) (showLimit * 1.20)); final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.CORE, (int) (showLimit * 1.20));
Request urle; Request urle;
boolean dark = true; boolean dark = true;

@ -25,8 +25,8 @@
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
@ -86,7 +86,7 @@ public class IndexCreateWWWRemoteQueue_p {
prop.put("crawler-queue", "0"); prop.put("crawler-queue", "0");
} else { } else {
prop.put("crawler-queue", "1"); prop.put("crawler-queue", "1");
final ArrayList<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.REMOTE, showLimit); final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.REMOTE, showLimit);
Request urle; Request urle;
boolean dark = true; boolean dark = true;

@ -1,6 +1,6 @@
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
@ -43,37 +43,37 @@ public class queues_p {
yacySeed initiator; yacySeed initiator;
// index size // index size
prop.putNum("urlpublictextSize", segment.urlMetadata().size()); prop.putNum("urlpublictextSize", segment.urlMetadata().size());
prop.putNum("rwipublictextSize", segment.termIndex().sizesMax()); prop.putNum("rwipublictextSize", segment.termIndex().sizesMax());
// loader queue // loader queue
prop.putNum("loaderSize", sb.crawlQueues.workerSize()); prop.putNum("loaderSize", sb.crawlQueues.workerSize());
prop.putNum("loaderMax", sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)); prop.putNum("loaderMax", sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10));
if (sb.crawlQueues.workerSize() == 0) { if (sb.crawlQueues.workerSize() == 0) {
prop.put("list-loader", "0"); prop.put("list-loader", "0");
} else { } else {
final Request[] w = sb.crawlQueues.activeWorkerEntries(); final Request[] w = sb.crawlQueues.activeWorkerEntries();
int count = 0; int count = 0;
for (int i = 0; i < w.length; i++) { for (final Request r : w) {
if (w[i] == null) continue; if (r == null) continue;
prop.put("list-loader_"+count+"_profile", w[i].profileHandle()); prop.put("list-loader_"+count+"_profile", r.profileHandle());
initiator = sb.peers.getConnected((w[i].initiator() == null) ? "" : new String(w[i].initiator())); initiator = sb.peers.getConnected((r.initiator() == null) ? "" : new String(r.initiator()));
prop.putHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.putHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("list-loader_"+count+"_depth", w[i].depth()); prop.put("list-loader_"+count+"_depth", r.depth());
prop.putXML("list-loader_"+count+"_url", w[i].url().toString()); prop.putXML("list-loader_"+count+"_url", r.url().toString());
count++; count++;
} }
prop.put("list-loader", count); prop.put("list-loader", count);
} }
//local crawl queue //local crawl queue
prop.putNum("localCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount()); prop.putNum("localCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount());
prop.put("localCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? STATE_PAUSED : STATE_RUNNING); prop.put("localCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? STATE_PAUSED : STATE_RUNNING);
int stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE); int stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE);
addNTable(sb, prop, "list-local", sb.crawlQueues.noticeURL.top(NoticedURL.StackType.CORE, Math.min(10, stackSize))); addNTable(sb, prop, "list-local", sb.crawlQueues.noticeURL.top(NoticedURL.StackType.CORE, Math.min(10, stackSize)));
//global crawl queue //global crawl queue
prop.putNum("limitCrawlSize", sb.crawlQueues.limitCrawlJobSize()); prop.putNum("limitCrawlSize", sb.crawlQueues.limitCrawlJobSize());
prop.put("limitCrawlState", STATE_RUNNING); prop.put("limitCrawlState", STATE_RUNNING);
stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT); stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
@ -82,7 +82,7 @@ public class queues_p {
prop.putNum("remoteCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount()); prop.putNum("remoteCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
prop.put("remoteCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? STATE_PAUSED : STATE_RUNNING); prop.put("remoteCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? STATE_PAUSED : STATE_RUNNING);
stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT); stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
if (stackSize == 0) { if (stackSize == 0) {
prop.put("list-remote", "0"); prop.put("list-remote", "0");
} else { } else {
@ -94,13 +94,11 @@ public class queues_p {
} }
public static final void addNTable(final Switchboard sb, final serverObjects prop, final String tableName, final ArrayList<Request> crawlerList) { public static final void addNTable(final Switchboard sb, final serverObjects prop, final String tableName, final List<Request> crawlerList) {
int showNum = 0; int showNum = 0;
Request urle;
yacySeed initiator; yacySeed initiator;
for (int i = 0; i < crawlerList.size(); i++) { for (final Request urle : crawlerList) {
urle = crawlerList.get(i);
if ((urle != null) && (urle.url() != null)) { if ((urle != null) && (urle.url() != null)) {
initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : new String(urle.initiator())); initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : new String(urle.initiator()));
prop.put(tableName + "_" + showNum + "_profile", urle.profileHandle()); prop.put(tableName + "_" + showNum + "_profile", urle.profileHandle());

@ -6,6 +6,10 @@
// Frankfurt, Germany, 2005 // Frankfurt, Germany, 2005
// created: 24.09.2005 // created: 24.09.2005
// //
//$LastChangedDate$
//$LastChangedRevision$
//$LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or // the Free Software Foundation; either version 2 of the License, or
@ -28,6 +32,7 @@ import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentLinkedQueue;
@ -45,6 +50,7 @@ import net.yacy.kelondro.util.ByteBuffer;
import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.Request;
import de.anomic.http.client.Cache; import de.anomic.http.client.Cache;
import java.util.concurrent.ConcurrentMap;
public class Balancer { public class Balancer {
@ -54,9 +60,9 @@ public class Balancer {
private static final String localhost = "localhost"; private static final String localhost = "localhost";
// class variables // class variables
private final ConcurrentHashMap<String, HandleSet> domainStacks; // a map from host name to lists with url hashs private final ConcurrentMap<String, HandleSet> domainStacks; // a map from host name to lists with url hashs
private final ConcurrentLinkedQueue<byte[]> top; // a list of url-hashes that shall be taken next private final ConcurrentLinkedQueue<byte[]> top; // a list of url-hashes that shall be taken next
private final TreeMap<Long, byte[]> delayed; private final SortedMap<Long, byte[]> delayed;
private final HandleSet ddc; private final HandleSet ddc;
private final HandleSet double_push_check; // for debugging private final HandleSet double_push_check; // for debugging
private BufferedObjectIndex urlFileIndex; private BufferedObjectIndex urlFileIndex;
@ -67,8 +73,8 @@ public class Balancer {
private int domStackInitSize; private int domStackInitSize;
public Balancer( public Balancer(
final File cachePath, final File cachePath,
final String stackname, final String stackname,
final long minimumLocalDelta, final long minimumLocalDelta,
final long minimumGlobalDelta, final long minimumGlobalDelta,
final boolean useTailCache, final boolean useTailCache,
@ -189,16 +195,16 @@ public class Balancer {
final Iterator<byte[]> j = top.iterator(); final Iterator<byte[]> j = top.iterator();
byte[] urlhash; byte[] urlhash;
while (j.hasNext()) { while (j.hasNext()) {
urlhash = j.next(); urlhash = j.next();
if (urlHashes.has(urlhash)) j.remove(); if (urlHashes.has(urlhash)) j.remove();
} }
// remove from delayed // remove from delayed
synchronized (this.delayed) { synchronized (this.delayed) {
final Iterator<Map.Entry<Long, byte[]>> k = this.delayed.entrySet().iterator(); final Iterator<Map.Entry<Long, byte[]>> k = this.delayed.entrySet().iterator();
while (k.hasNext()) { while (k.hasNext()) {
if (urlHashes.has(k.next().getValue())) k.remove(); if (urlHashes.has(k.next().getValue())) k.remove();
} }
} }
// iterate through the domain stacks // iterate through the domain stacks
@ -206,7 +212,7 @@ public class Balancer {
HandleSet stack; HandleSet stack;
while (q.hasNext()) { while (q.hasNext()) {
stack = q.next().getValue(); stack = q.next().getValue();
for (byte[] handle: urlHashes) stack.remove(handle); for (final byte[] handle: urlHashes) stack.remove(handle);
if (stack.isEmpty()) q.remove(); if (stack.isEmpty()) q.remove();
} }
@ -234,7 +240,7 @@ public class Balancer {
private boolean domainStacksNotEmpty() { private boolean domainStacksNotEmpty() {
if (domainStacks == null) return false; if (domainStacks == null) return false;
synchronized (domainStacks) { synchronized (domainStacks) {
for (HandleSet l: domainStacks.values()) { for (final HandleSet l: domainStacks.values()) {
if (!l.isEmpty()) return true; if (!l.isEmpty()) return true;
} }
} }
@ -255,7 +261,7 @@ public class Balancer {
// add to index // add to index
final int s = this.urlFileIndex.size(); final int s = this.urlFileIndex.size();
this.urlFileIndex.put(entry.toRow()); this.urlFileIndex.put(entry.toRow());
assert s < this.urlFileIndex.size() : "hash = " + new String(hash) + ", s = " + s + ", size = " + this.urlFileIndex.size(); assert s < this.urlFileIndex.size() : "hash = " + new String(hash) + ", s = " + s + ", size = " + this.urlFileIndex.size();
assert this.urlFileIndex.has(hash) : "hash = " + new String(hash); assert this.urlFileIndex.has(hash) : "hash = " + new String(hash);
@ -288,15 +294,15 @@ public class Balancer {
return; return;
} }
domainList.remove(urlhash); domainList.remove(urlhash);
if (domainList.size() == 0) domainStacks.remove(host); if (domainList.isEmpty()) domainStacks.remove(host);
} }
private byte[] nextFromDelayed() { private byte[] nextFromDelayed() {
if (this.delayed.isEmpty()) return null; if (this.delayed.isEmpty()) return null;
final Long first = this.delayed.firstKey(); final Long first = this.delayed.firstKey();
if (first.longValue() < System.currentTimeMillis()) { if (first.longValue() < System.currentTimeMillis()) {
return this.delayed.remove(first); return this.delayed.remove(first);
} }
return null; return null;
} }
@ -465,37 +471,37 @@ public class Balancer {
byte[] besturlhash = null; byte[] besturlhash = null;
String besthost = null; String besthost = null;
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
// clean up empty entries // clean up empty entries
if (entry.getValue().isEmpty()) { if (entry.getValue().isEmpty()) {
i.remove(); i.remove();
continue; continue;
} }
byte[] n = entry.getValue().removeOne(); byte[] n = entry.getValue().removeOne();
if (n == null) continue; if (n == null) continue;
if (delay) { if (delay) {
final long w = Latency.waitingRemainingGuessed(entry.getKey(), minimumLocalDelta, minimumGlobalDelta); final long w = Latency.waitingRemainingGuessed(entry.getKey(), minimumLocalDelta, minimumGlobalDelta);
if (w > maximumwaiting) { if (w > maximumwaiting) {
if (w < smallestWaiting) { if (w < smallestWaiting) {
smallestWaiting = w; smallestWaiting = w;
besturlhash = n; besturlhash = n;
besthost = entry.getKey(); besthost = entry.getKey();
} }
entry.getValue().put(n); // put entry back entry.getValue().put(n); // put entry back
continue; continue;
} }
} }
this.top.add(n); this.top.add(n);
if (entry.getValue().isEmpty()) i.remove(); if (entry.getValue().isEmpty()) i.remove();
} }
// if we could not find any entry, then take the best we have seen so far // if we could not find any entry, then take the best we have seen so far
if (acceptonebest && !this.top.isEmpty() && besturlhash != null) { if (acceptonebest && !this.top.isEmpty() && besturlhash != null) {
removeHashFromDomainStacks(besthost, besturlhash); removeHashFromDomainStacks(besthost, besturlhash);
this.top.add(besturlhash); this.top.add(besturlhash);
} }
} }
@ -525,21 +531,21 @@ public class Balancer {
this.domStackInitSize = this.domainStacks.size(); this.domStackInitSize = this.domainStacks.size();
} }
public ArrayList<Request> top(int count) { public List<Request> top(int count) {
final ArrayList<Request> cel = new ArrayList<Request>(); final List<Request> cel = new ArrayList<Request>();
if (count == 0) return cel; if (count == 0) return cel;
byte[][] ta = new byte[Math.min(count, top.size())][]; byte[][] ta = new byte[Math.min(count, top.size())][];
ta = top.toArray(ta); ta = top.toArray(ta);
for (byte[] n: ta) { for (final byte[] n: ta) {
if (n == null) break; if (n == null) break;
try { try {
final Row.Entry rowEntry = urlFileIndex.get(n); final Row.Entry rowEntry = urlFileIndex.get(n);
if (rowEntry == null) continue; if (rowEntry == null) continue;
final Request crawlEntry = new Request(rowEntry); final Request crawlEntry = new Request(rowEntry);
cel.add(crawlEntry); cel.add(crawlEntry);
count--; count--;
if (count <= 0) break; if (count <= 0) break;
} catch (IOException e) {} } catch (IOException e) {}
} }
int depth = 0; int depth = 0;
@ -565,7 +571,7 @@ public class Balancer {
if (cel.size() < count) try { if (cel.size() < count) try {
List<Row.Entry> list = urlFileIndex.top(count - cel.size()); List<Row.Entry> list = urlFileIndex.top(count - cel.size());
for (Row.Entry entry: list) cel.add(new Request(entry)); for (final Row.Entry entry: list) cel.add(new Request(entry));
} catch (IOException e) { } } catch (IOException e) { }
return cel; return cel;
} }

@ -1,10 +1,13 @@
// plasmaNURL.java // NoticedURL.java
// ----------------------- // -----------------------
// part of YaCy // part of YaCy
// (C) by Michael Peter Christen; mc@yacy.net // (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de // first published on http://www.anomic.de
// Frankfurt, Germany, 2004 // Frankfurt, Germany, 2004
// last major change: 09.08.2004 //
//$LastChangedDate$
//$LastChangedRevision$
//$LastChangedBy$
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
@ -26,9 +29,9 @@ package de.anomic.crawler;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
@ -52,7 +55,7 @@ public class NoticedURL {
private Balancer noloadStack; // links that are not passed to a loader; the index will be generated from the Request entry private Balancer noloadStack; // links that are not passed to a loader; the index will be generated from the Request entry
public NoticedURL( public NoticedURL(
final File cachePath, final File cachePath,
final boolean useTailCache, final boolean useTailCache,
final boolean exceed134217727) { final boolean exceed134217727) {
Log.logInfo("NoticedURL", "CREATING STACKS at " + cachePath.toString()); Log.logInfo("NoticedURL", "CREATING STACKS at " + cachePath.toString());
@ -107,11 +110,13 @@ public class NoticedURL {
} }
} }
protected void finalize() { @Override
protected void finalize() throws Throwable {
if ((coreStack != null) || (limitStack != null) || (remoteStack != null)) { if ((coreStack != null) || (limitStack != null) || (remoteStack != null)) {
Log.logWarning("plasmaCrawlNURL", "NURL stack closed by finalizer"); Log.logWarning("plasmaCrawlNURL", "NURL stack closed by finalizer");
close(); close();
} }
super.finalize();
} }
public boolean notEmpty() { public boolean notEmpty() {
@ -195,13 +200,14 @@ public class NoticedURL {
*/ */
public boolean removeByURLHash(final byte[] urlhashBytes) { public boolean removeByURLHash(final byte[] urlhashBytes) {
try { try {
HandleSet urlHashes = Base64Order.enhancedCoder.getHandleSet(12, 1); final HandleSet urlHashes = Base64Order.enhancedCoder.getHandleSet(12, 1);
urlHashes.put(urlhashBytes); urlHashes.put(urlhashBytes);
try {return noloadStack.remove(urlHashes) > 0;} catch (final IOException e) {} boolean ret = false;
try {return coreStack.remove(urlHashes) > 0;} catch (final IOException e) {} try {ret |= noloadStack.remove(urlHashes) > 0;} catch (final IOException e) {}
try {return limitStack.remove(urlHashes) > 0;} catch (final IOException e) {} try {ret |= coreStack.remove(urlHashes) > 0;} catch (final IOException e) {}
try {return remoteStack.remove(urlHashes) > 0;} catch (final IOException e) {} try {ret |= limitStack.remove(urlHashes) > 0;} catch (final IOException e) {}
return false; try {ret |= remoteStack.remove(urlHashes) > 0;} catch (final IOException e) {}
return ret;
} catch (RowSpaceExceededException e) { } catch (RowSpaceExceededException e) {
Log.logException(e); Log.logException(e);
return false; return false;
@ -217,7 +223,7 @@ public class NoticedURL {
return removed; return removed;
} }
public ArrayList<Request> top(final StackType stackType, final int count) { public List<Request> top(final StackType stackType, final int count) {
switch (stackType) { switch (stackType) {
case CORE: return top(coreStack, count); case CORE: return top(coreStack, count);
case LIMIT: return top(limitStack, count); case LIMIT: return top(limitStack, count);
@ -279,12 +285,10 @@ public class NoticedURL {
return null; return null;
} }
private ArrayList<Request> top(final Balancer balancer, int count) { private List<Request> top(final Balancer balancer, int count) {
// this is a filo - top // this is a filo - top
if (count > balancer.size()) count = balancer.size(); if (count > balancer.size()) count = balancer.size();
ArrayList<Request> list; return balancer.top(count);
list = balancer.top(count);
return list;
} }
public Iterator<Request> iterator(final StackType stackType) { public Iterator<Request> iterator(final StackType stackType) {

Loading…
Cancel
Save