diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java index 2cc1b5caf..55f2c4204 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.java +++ b/htroot/IndexCreateWWWLocalQueue_p.java @@ -31,9 +31,9 @@ import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Iterator; +import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -114,6 +114,7 @@ public class IndexCreateWWWLocalQueue_p { // iterating through the list of URLs final Iterator iter = sb.crawlQueues.noticeURL.iterator(NoticedURL.STACK_TYPE_CORE); Request entry; + List removehashes = new ArrayList(); while (iter.hasNext()) { if ((entry = iter.next()) == null) continue; String value = null; @@ -129,12 +130,11 @@ public class IndexCreateWWWLocalQueue_p { default: value = null; break location; } - if (value != null) { - final Matcher matcher = compiledPattern.matcher(value); - if (matcher.find()) { - sb.crawlQueues.noticeURL.removeByURLHash(entry.url().hash()); - } - } + if (value != null && compiledPattern.matcher(value).find()) removehashes.add(entry.url().hash()); + } + Log.logInfo("IndexCreateWWWLocalQueue", "created a remove list with " + removehashes.size() + " entries for pattern '" + pattern + "'"); + for (byte[] b: removehashes) { + sb.crawlQueues.noticeURL.removeByURLHash(b); } } } catch (final PatternSyntaxException e) { diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java index 6ff684db6..ccd27a0c7 100644 --- a/source/de/anomic/crawler/Balancer.java +++ b/source/de/anomic/crawler/Balancer.java @@ -592,6 +592,7 @@ public class Balancer { try { return (entry == null) ? null : new Request(entry); } catch (final IOException e) { + Log.logException(e); rowIterator = null; return null; } diff --git a/source/de/anomic/crawler/NoticedURL.java b/source/de/anomic/crawler/NoticedURL.java index 13cf3dbb1..652552d28 100755 --- a/source/de/anomic/crawler/NoticedURL.java +++ b/source/de/anomic/crawler/NoticedURL.java @@ -193,6 +193,7 @@ public class NoticedURL { try {return remoteStack.remove(urlHashes) > 0;} catch (final IOException e) {} return false; } catch (RowSpaceExceededException e) { + Log.logException(e); return false; } }