*) Implementing yacy forum wishlist item: "Pause Crawling"

see: http://www.yacy-forum.de/viewtopic.php?t=48



git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@118 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 2d751ba831
commit d2c4e9a55e

@ -157,6 +157,10 @@ The indexing result is presented on the
If you crawl any un-wanted pages, you can delete them <a href="IndexDelete_p.html">here</a>.<br>
::
Removed #[numEntries]# entries from crawl queue. This queue may fill again if the loading and indexing queue is not empty
::
Crawling paused successfully.
::
Continue crawling.
#(/info)#
<br>
#(refreshbutton)#
@ -314,6 +318,11 @@ There are #[num]# entries in the crawler queue. Showing #[show-num]# most recent
<br>
<form action="IndexCreate_p.html" method="post" enctype="multipart/form-data">
<input type="submit" name="clearcrawlqueue" value="clear crawl queue">
#(paused)#
<input type="submit" name="continuecrawlqueue" value="continue crawling">
::
<input type="submit" name="pausecrawlqueue" value="pause crawling">
#(/paused)#
</form>
#(/crawler-queue)#
</p>

@ -179,6 +179,16 @@ public class IndexCreate_p {
prop.put("info", 3);//crawling queue cleared
prop.put("info_numEntries", c);
}
if (post.containsKey("pausecrawlqueue")) {
switchboard.pauseCrawling();
prop.put("info", 4);//crawling queue cleared
}
if (post.containsKey("continuecrawlqueue")) {
switchboard.continueCrawling();
prop.put("info", 5);//crawling queue cleared
}
}
// define visible variables
@ -389,6 +399,7 @@ public class IndexCreate_p {
}
prop.put("crawler-queue_list", i);
}
prop.put("crawler-queue_paused",(switchboard.crawlingIsPaused())?0:1);
}
}
// return rewrite properties

@ -148,7 +148,7 @@ import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwitch {
public final class plasmaSwitchboard extends serverAbstractSwitch implements serverSwitch {
// load slots
@ -187,6 +187,9 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
private serverSemaphore shutdownSync = new serverSemaphore(0);
private boolean terminate = false;
private Object crawlingPausedSync = new Object();
private boolean crawlingIsPaused = false;
public plasmaSwitchboard(String rootPath, String initPath, String configPath) throws IOException {
super(rootPath, initPath, configPath);
@ -488,33 +491,73 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
return hasDoneSomething;
}
/**
* With this function the crawling process can be paused
*/
public void pauseCrawling() {
synchronized(this.crawlingPausedSync) {
this.crawlingIsPaused = true;
}
}
/**
* Continue the previously paused crawling
*/
public void continueCrawling() {
synchronized(this.crawlingPausedSync) {
if (this.crawlingIsPaused) {
this.crawlingIsPaused = false;
this.crawlingPausedSync.notifyAll();
}
}
}
/**
* @return <code>true</code> if crawling was paused or <code>false</code> otherwise
*/
public boolean crawlingIsPaused() {
synchronized(this.crawlingPausedSync) {
return this.crawlingIsPaused;
}
}
public int localCrawlJobSize() {
return noticeURL.localStackSize();
}
public boolean localCrawlJob() {
if (noticeURL.localStackSize() == 0) {
//log.logDebug("LocalCrawl: queue is empty");
return false;
}
//log.logDebug("LocalCrawl: queue is empty");
return false;
}
if (processStack.size() >= crawlSlots) {
log.logDebug("LocalCrawl: too many processes in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
return false;
}
log.logDebug("LocalCrawl: too many processes in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
return false;
}
if (cacheLoader.size() >= crawlSlots) {
log.logDebug("LocalCrawl: too many loader in queue, dismissed (" +
"cacheLoader=" + cacheLoader.size() + ")");
return false;
}
log.logDebug("LocalCrawl: too many loader in queue, dismissed (" +
"cacheLoader=" + cacheLoader.size() + ")");
return false;
}
// if the server is busy, we do crawling more slowly
// if the server is busy, we do crawling more slowly
if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {}
// do a local crawl (may start a global crawl)
plasmaCrawlNURL.entry nex = noticeURL.localPop();
processCrawling(nex, nex.initiator());
return true;
// if crawling was paused we have to wait until we wer notified to continue
synchronized(this.crawlingPausedSync) {
if (this.crawlingIsPaused) {
try {
this.crawlingPausedSync.wait();
}
catch (InterruptedException e){ return false;}
}
}
// do a local crawl (may start a global crawl)
plasmaCrawlNURL.entry nex = noticeURL.localPop();
processCrawling(nex, nex.initiator());
return true;
}
public int globalCrawlJobSize() {
@ -522,32 +565,42 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
}
public boolean globalCrawlJob() {
// work off crawl requests that had been placed by other peers to our crawl stack
// work off crawl requests that had been placed by other peers to our crawl stack
// do nothing if either there are private processes to be done
// or there is no global crawl on the stack
// do nothing if either there are private processes to be done
// or there is no global crawl on the stack
if (noticeURL.remoteStackSize() == 0) {
//log.logDebug("GlobalCrawl: queue is empty");
return false;
}
//log.logDebug("GlobalCrawl: queue is empty");
return false;
}
if (processStack.size() > 0) {
log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
return false;
}
if (noticeURL.localStackSize() > 0) {
log.logDebug("GlobalCrawl: any local crawl is in queue, dismissed (" +
"localStackSize=" + noticeURL.localStackSize() + ")");
return false;
}
log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
return false;
}
if (noticeURL.localStackSize() > 0) {
log.logDebug("GlobalCrawl: any local crawl is in queue, dismissed (" +
"localStackSize=" + noticeURL.localStackSize() + ")");
return false;
}
// if the server is busy, we do this more slowly
// if the server is busy, we do this more slowly
if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {}
// we don't want to crawl a global URL globally, since WE are the global part. (from this point of view)
plasmaCrawlNURL.entry nex = noticeURL.remotePop();
processCrawling(nex, nex.initiator());
return true;
// if crawling was paused we have to wait until we wer notified to continue
synchronized(this.crawlingPausedSync) {
if (this.crawlingIsPaused) {
try {
this.crawlingPausedSync.wait();
}
catch (InterruptedException e){ return false; }
}
}
// we don't want to crawl a global URL globally, since WE are the global part. (from this point of view)
plasmaCrawlNURL.entry nex = noticeURL.remotePop();
processCrawling(nex, nex.initiator());
return true;
}
private void processResourceStack(plasmaHTCache.Entry entry) {
@ -1099,7 +1152,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
} else {
prop.put("totalcount", "" + acc.sizeOrdered());
int i = 0;
String links = "";
StringBuffer links = new StringBuffer();
String resource = "";
//plasmaIndexEntry pie;
plasmaCrawlLURL.entry urlentry;
@ -1107,19 +1160,18 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
urlentry = acc.nextElement();
resource = urlentry.toString();
if (resource != null) {
links += "resource" + i + "=" + resource + serverCore.crlfString;
links.append(resource).append(i).append("=").append(resource).append(serverCore.crlfString);
i++;
}
}
prop.put("links", links);
prop.put("links", links.toString());
prop.put("linkcount", "" + i);
// prepare reference hints
Object[] ws = acc.getReferences(16);
String refstr = "";
for (int j = 0; j < ws.length; j++) refstr += "," + (String) ws[j];
if (refstr.length() > 0) refstr = refstr.substring(1);
prop.put("references", refstr);
StringBuffer refstr = new StringBuffer();
for (int j = 0; j < ws.length; j++) refstr.append(",").append((String) ws[j]);
prop.put("references", (refstr.length() > 0)?refstr.substring(1):refstr.toString());
}
// add information about forward peers

Loading…
Cancel
Save