From e1c2d8ec5fbf2b6da448cb29cc1779ad99851b7a Mon Sep 17 00:00:00 2001 From: theli Date: Wed, 7 Dec 2005 11:27:44 +0000 Subject: [PATCH] *) Speedup "removed from queue" See: http://www.yacy-forum.de/viewtopic.php?p=13442#12188 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1183 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/plasma/plasmaSwitchboard.java | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 9e33c6638..01223a88c 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -943,12 +943,25 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // do a local crawl - plasmaCrawlNURL.Entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE); - String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; - if ((urlEntry.url() == null) || (urlEntry.url().toString().length() < 10)) { - log.logInfo(stats + ": URL with hash " + ((urlEntry.hash()==null)?"Unknown":urlEntry.hash()) + " already removed from queue."); - return true; - } + String stats = null; + boolean validEntry = false; + plasmaCrawlNURL.Entry urlEntry = null; + do { + urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE); + stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; + + // if the queue is empty + if (urlEntry == null) return false; + + // if we have received a new entry + if ((urlEntry.url() == null) || (urlEntry.url().toString().length() < 10)) { + log.logInfo(stats + ": URL with hash " + ((urlEntry.hash()==null)?"Unknown":urlEntry.hash()) + " already removed from queue."); + validEntry = false; + } else { + validEntry = true; + } + } while(!validEntry); + String profileHandle = urlEntry.profileHandle(); //System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url()); if (profileHandle == null) {