From d2c4e9a55e2ba59a2cf4e22f83464807b076dab7 Mon Sep 17 00:00:00 2001 From: theli Date: Sat, 14 May 2005 09:41:05 +0000 Subject: [PATCH] *) Implementing yacy forum wishlist item: "Pause Crawling" see: http://www.yacy-forum.de/viewtopic.php?t=48 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@118 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexCreate_p.html | 9 ++ htroot/IndexCreate_p.java | 11 ++ .../de/anomic/plasma/plasmaSwitchboard.java | 150 ++++++++++++------ 3 files changed, 121 insertions(+), 49 deletions(-) diff --git a/htroot/IndexCreate_p.html b/htroot/IndexCreate_p.html index 2ff329b8a..3316f85f4 100644 --- a/htroot/IndexCreate_p.html +++ b/htroot/IndexCreate_p.html @@ -157,6 +157,10 @@ The indexing result is presented on the If you crawl any un-wanted pages, you can delete them here.
:: Removed #[numEntries]# entries from crawl queue. This queue may fill again if the loading and indexing queue is not empty +:: +Crawling paused successfully. +:: +Continue crawling. #(/info)#
#(refreshbutton)# @@ -314,6 +318,11 @@ There are #[num]# entries in the crawler queue. Showing #[show-num]# most recent
+#(paused)# + +:: + +#(/paused)#
#(/crawler-queue)#

diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java index f1aa6785b..507e3b35c 100644 --- a/htroot/IndexCreate_p.java +++ b/htroot/IndexCreate_p.java @@ -179,6 +179,16 @@ public class IndexCreate_p { prop.put("info", 3);//crawling queue cleared prop.put("info_numEntries", c); } + + if (post.containsKey("pausecrawlqueue")) { + switchboard.pauseCrawling(); + prop.put("info", 4);//crawling queue cleared + } + + if (post.containsKey("continuecrawlqueue")) { + switchboard.continueCrawling(); + prop.put("info", 5);//crawling queue cleared + } } // define visible variables @@ -389,6 +399,7 @@ public class IndexCreate_p { } prop.put("crawler-queue_list", i); } + prop.put("crawler-queue_paused",(switchboard.crawlingIsPaused())?0:1); } } // return rewrite properties diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 30786cb92..05a625f22 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -148,7 +148,7 @@ import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeedDB; -public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwitch { +public final class plasmaSwitchboard extends serverAbstractSwitch implements serverSwitch { // load slots @@ -187,6 +187,9 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi private serverSemaphore shutdownSync = new serverSemaphore(0); private boolean terminate = false; + private Object crawlingPausedSync = new Object(); + private boolean crawlingIsPaused = false; + public plasmaSwitchboard(String rootPath, String initPath, String configPath) throws IOException { super(rootPath, initPath, configPath); @@ -488,33 +491,73 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi return hasDoneSomething; } + /** + * With this function the crawling process can be paused + */ + public void pauseCrawling() { + synchronized(this.crawlingPausedSync) { + this.crawlingIsPaused = true; + } + } + + /** + * Continue the previously paused crawling + */ + public void continueCrawling() { + synchronized(this.crawlingPausedSync) { + if (this.crawlingIsPaused) { + this.crawlingIsPaused = false; + this.crawlingPausedSync.notifyAll(); + } + } + } + + /** + * @return true if crawling was paused or false otherwise + */ + public boolean crawlingIsPaused() { + synchronized(this.crawlingPausedSync) { + return this.crawlingIsPaused; + } + } + public int localCrawlJobSize() { return noticeURL.localStackSize(); } public boolean localCrawlJob() { if (noticeURL.localStackSize() == 0) { - //log.logDebug("LocalCrawl: queue is empty"); - return false; - } + //log.logDebug("LocalCrawl: queue is empty"); + return false; + } if (processStack.size() >= crawlSlots) { - log.logDebug("LocalCrawl: too many processes in queue, dismissed (" + - "processStack=" + processStack.size() + ")"); - return false; - } + log.logDebug("LocalCrawl: too many processes in queue, dismissed (" + + "processStack=" + processStack.size() + ")"); + return false; + } if (cacheLoader.size() >= crawlSlots) { - log.logDebug("LocalCrawl: too many loader in queue, dismissed (" + - "cacheLoader=" + cacheLoader.size() + ")"); - return false; - } - - // if the server is busy, we do crawling more slowly + log.logDebug("LocalCrawl: too many loader in queue, dismissed (" + + "cacheLoader=" + cacheLoader.size() + ")"); + return false; + } + + // if the server is busy, we do crawling more slowly if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {} - - // do a local crawl (may start a global crawl) - plasmaCrawlNURL.entry nex = noticeURL.localPop(); - processCrawling(nex, nex.initiator()); - return true; + + // if crawling was paused we have to wait until we wer notified to continue + synchronized(this.crawlingPausedSync) { + if (this.crawlingIsPaused) { + try { + this.crawlingPausedSync.wait(); + } + catch (InterruptedException e){ return false;} + } + } + + // do a local crawl (may start a global crawl) + plasmaCrawlNURL.entry nex = noticeURL.localPop(); + processCrawling(nex, nex.initiator()); + return true; } public int globalCrawlJobSize() { @@ -522,32 +565,42 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi } public boolean globalCrawlJob() { - // work off crawl requests that had been placed by other peers to our crawl stack - - // do nothing if either there are private processes to be done - // or there is no global crawl on the stack + // work off crawl requests that had been placed by other peers to our crawl stack + + // do nothing if either there are private processes to be done + // or there is no global crawl on the stack if (noticeURL.remoteStackSize() == 0) { - //log.logDebug("GlobalCrawl: queue is empty"); - return false; - } + //log.logDebug("GlobalCrawl: queue is empty"); + return false; + } if (processStack.size() > 0) { - log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" + - "processStack=" + processStack.size() + ")"); - return false; - } - if (noticeURL.localStackSize() > 0) { - log.logDebug("GlobalCrawl: any local crawl is in queue, dismissed (" + - "localStackSize=" + noticeURL.localStackSize() + ")"); - return false; - } - - // if the server is busy, we do this more slowly + log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" + + "processStack=" + processStack.size() + ")"); + return false; + } + if (noticeURL.localStackSize() > 0) { + log.logDebug("GlobalCrawl: any local crawl is in queue, dismissed (" + + "localStackSize=" + noticeURL.localStackSize() + ")"); + return false; + } + + // if the server is busy, we do this more slowly if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {} - - // we don't want to crawl a global URL globally, since WE are the global part. (from this point of view) - plasmaCrawlNURL.entry nex = noticeURL.remotePop(); - processCrawling(nex, nex.initiator()); - return true; + + // if crawling was paused we have to wait until we wer notified to continue + synchronized(this.crawlingPausedSync) { + if (this.crawlingIsPaused) { + try { + this.crawlingPausedSync.wait(); + } + catch (InterruptedException e){ return false; } + } + } + + // we don't want to crawl a global URL globally, since WE are the global part. (from this point of view) + plasmaCrawlNURL.entry nex = noticeURL.remotePop(); + processCrawling(nex, nex.initiator()); + return true; } private void processResourceStack(plasmaHTCache.Entry entry) { @@ -1099,7 +1152,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi } else { prop.put("totalcount", "" + acc.sizeOrdered()); int i = 0; - String links = ""; + StringBuffer links = new StringBuffer(); String resource = ""; //plasmaIndexEntry pie; plasmaCrawlLURL.entry urlentry; @@ -1107,19 +1160,18 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi urlentry = acc.nextElement(); resource = urlentry.toString(); if (resource != null) { - links += "resource" + i + "=" + resource + serverCore.crlfString; + links.append(resource).append(i).append("=").append(resource).append(serverCore.crlfString); i++; } } - prop.put("links", links); + prop.put("links", links.toString()); prop.put("linkcount", "" + i); // prepare reference hints Object[] ws = acc.getReferences(16); - String refstr = ""; - for (int j = 0; j < ws.length; j++) refstr += "," + (String) ws[j]; - if (refstr.length() > 0) refstr = refstr.substring(1); - prop.put("references", refstr); + StringBuffer refstr = new StringBuffer(); + for (int j = 0; j < ws.length; j++) refstr.append(",").append((String) ws[j]); + prop.put("references", (refstr.length() > 0)?refstr.substring(1):refstr.toString()); } // add information about forward peers