- some fixes to prevent blocking situations

- better logging for the crawler
- better default values for the crawler

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6377 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 51f2bbf04b
commit 6e0dc39a7d

@ -570,7 +570,7 @@ filterOutStopwordsFromTopwords=true
40_peerseedcycle_busysleep=1200000 40_peerseedcycle_busysleep=1200000
40_peerseedcycle_memprereq=4194304 40_peerseedcycle_memprereq=4194304
50_localcrawl_idlesleep=2000 50_localcrawl_idlesleep=2000
50_localcrawl_busysleep=30 50_localcrawl_busysleep=20
50_localcrawl_memprereq=12582912 50_localcrawl_memprereq=12582912
50_localcrawl_isPaused=false 50_localcrawl_isPaused=false
60_remotecrawlloader_idlesleep=60000 60_remotecrawlloader_idlesleep=60000
@ -694,7 +694,7 @@ crawler.http.maxFileSize=1048576
crawler.ftp.maxFileSize=1048576 crawler.ftp.maxFileSize=1048576
# maximum number of crawler threads # maximum number of crawler threads
crawler.MaxActiveThreads = 50 crawler.MaxActiveThreads = 200
# maximum size of indexing queue # maximum size of indexing queue
indexer.slots = 100 indexer.slots = 100

@ -212,12 +212,12 @@ public class CrawlQueues {
String queueCheck = crawlIsPossible(NoticedURL.STACK_TYPE_CORE, "Core"); String queueCheck = crawlIsPossible(NoticedURL.STACK_TYPE_CORE, "Core");
if (queueCheck != null) { if (queueCheck != null) {
if (log.isFinest()) log.logFinest("omitting de-queue/local: " + queueCheck); log.logInfo("omitting de-queue/local: " + queueCheck);
return false; return false;
} }
if (isPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) { if (isPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
if (log.isFinest()) log.logFinest("omitting de-queue/local: paused"); log.logInfo("omitting de-queue/local: paused");
return false; return false;
} }
@ -569,9 +569,9 @@ public class CrawlQueues {
result = "no content (possibly caused by cache policy)"; result = "no content (possibly caused by cache policy)";
} else { } else {
request.setStatus("loaded", serverProcessorJob.STATUS_RUNNING); request.setStatus("loaded", serverProcessorJob.STATUS_RUNNING);
final boolean stored = sb.toIndexer(response); final String storedFailMessage = sb.toIndexer(response);
request.setStatus("enqueued-" + ((stored) ? "ok" : "fail"), serverProcessorJob.STATUS_FINISHED); request.setStatus("enqueued-" + ((storedFailMessage == null) ? "ok" : "fail"), serverProcessorJob.STATUS_FINISHED);
result = (stored) ? null : "not enqueued to indexer"; result = (storedFailMessage == null) ? null : "not enqueued to indexer: " + storedFailMessage;
} }
} catch (IOException e) { } catch (IOException e) {
request.setStatus("error", serverProcessorJob.STATUS_FINISHED); request.setStatus("error", serverProcessorJob.STATUS_FINISHED);

@ -181,10 +181,11 @@ public class MapView {
assert key != null; assert key != null;
if (cache == null) return false; // case may appear during shutdown if (cache == null) return false; // case may appear during shutdown
key = normalizeKey(key); key = normalizeKey(key);
boolean h = false;
synchronized (this) { synchronized (this) {
if (this.cache.containsKey(key)) return true; h = this.cache.containsKey(key) || this.blob.has(key.getBytes());
return this.blob.has(key.getBytes());
} }
return h;
} }
/** /**
@ -199,6 +200,7 @@ public class MapView {
} }
private String normalizeKey(String key) { private String normalizeKey(String key) {
if (blob == null) return key;
if (key.length() > blob.keylength()) key = key.substring(0, blob.keylength()); if (key.length() > blob.keylength()) key = key.substring(0, blob.keylength());
while (key.length() < blob.keylength()) key += fillchar; while (key.length() < blob.keylength()) key += fillchar;
return key; return key;

@ -86,9 +86,15 @@ public class IODispatcher extends Thread {
} else { } else {
DumpJob<? extends Reference> job = (DumpJob<? extends Reference>)new DumpJob(cache, file, array); DumpJob<? extends Reference> job = (DumpJob<? extends Reference>)new DumpJob(cache, file, array);
try { try {
this.dumpQueue.put(job); // check if the dispatcher is running
this.controlQueue.release(); if (this.isAlive()) {
Log.logInfo("IODispatcher", "appended dump job for file " + file.getName()); this.dumpQueue.put(job);
this.controlQueue.release();
Log.logInfo("IODispatcher", "appended dump job for file " + file.getName());
} else {
job.dump();
Log.logWarning("IODispatcher", "dispatcher is not alive, just dumped file " + file.getName());
}
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
cache.dump(file, (int) Math.min(MemoryControl.available() / 3, writeBufferSize)); cache.dump(file, (int) Math.min(MemoryControl.available() / 3, writeBufferSize));
@ -111,9 +117,14 @@ public class IODispatcher extends Thread {
} else { } else {
MergeJob job = new MergeJob(f1, f2, factory, array, payloadrow, newFile); MergeJob job = new MergeJob(f1, f2, factory, array, payloadrow, newFile);
try { try {
this.mergeQueue.put(job); if (this.isAlive()) {
this.controlQueue.release(); this.mergeQueue.put(job);
Log.logInfo("IODispatcher", "appended merge job of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName()); this.controlQueue.release();
Log.logInfo("IODispatcher", "appended merge job of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName());
} else {
job.merge();
Log.logWarning("IODispatcher", "dispatcher not running, merged files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName());
}
} catch (InterruptedException e) { } catch (InterruptedException e) {
Log.logWarning("IODispatcher", "interrupted: " + e.getMessage(), e); Log.logWarning("IODispatcher", "interrupted: " + e.getMessage(), e);
try { try {

@ -65,6 +65,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
private final long targetFileSize, maxFileSize; private final long targetFileSize, maxFileSize;
private final int writeBufferSize; private final int writeBufferSize;
private final SimpleARC<ByteArray, Integer> countCache; private final SimpleARC<ByteArray, Integer> countCache;
private boolean cleanerRunning = false;
public IndexCell( public IndexCell(
final File cellPath, final File cellPath,
@ -340,15 +341,19 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
} }
// clean-up the cache // clean-up the cache
if (this.array.entries() > 50 || (this.lastCleanup + cleanupCycle < System.currentTimeMillis())) synchronized (this) { if (!this.cleanerRunning && (this.array.entries() > 50 || this.lastCleanup + cleanupCycle < System.currentTimeMillis())) synchronized (this) {
if (this.array.entries() > 50 || (this.lastCleanup + cleanupCycle < System.currentTimeMillis())) { if (this.array.entries() > 50 || (this.lastCleanup + cleanupCycle < System.currentTimeMillis())) try {
this.cleanerRunning = true;
//System.out.println("----cleanup check"); //System.out.println("----cleanup check");
this.array.shrink(this.targetFileSize, this.maxFileSize); this.array.shrink(this.targetFileSize, this.maxFileSize);
this.lastCleanup = System.currentTimeMillis(); this.lastCleanup = System.currentTimeMillis();
} finally {
this.cleanerRunning = false;
} }
} }
} }
public File newContainerBLOBFile() { public File newContainerBLOBFile() {
// for migration of cache files // for migration of cache files
return this.array.newContainerBLOBFile(); return this.array.newContainerBLOBFile();

@ -1128,17 +1128,22 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi
log.logConfig("SWITCHBOARD SHUTDOWN TERMINATED"); log.logConfig("SWITCHBOARD SHUTDOWN TERMINATED");
} }
public boolean toIndexer(final Response response) { /**
* pass a response to the indexer
* @param response
* @return null if successful, an error message othervise
*/
public String toIndexer(final Response response) {
assert response != null; assert response != null;
// get next queue entry and start a queue processing // get next queue entry and start a queue processing
if (response == null) { if (response == null) {
if (this.log.isFine()) log.logFine("deQueue: queue entry is null"); if (this.log.isFine()) log.logFine("deQueue: queue entry is null");
return false; return "queue entry is null";
} }
if (response.profile() == null) { if (response.profile() == null) {
if (this.log.isFine()) log.logFine("deQueue: profile is null"); if (this.log.isFine()) log.logFine("deQueue: profile is null");
return false; return "profile is null";
} }
// check if the document should be indexed based on proxy/crawler rules // check if the document should be indexed based on proxy/crawler rules
@ -1176,17 +1181,17 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi
if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason); if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason);
addURLtoErrorDB(response.url(), (referrerURL == null) ? "" : referrerURL.hash(), response.initiator(), response.name(), noIndexReason); addURLtoErrorDB(response.url(), (referrerURL == null) ? "" : referrerURL.hash(), response.initiator(), response.name(), noIndexReason);
// finish this entry // finish this entry
return false; return "not indexed any word in URL " + response.url() + "; cause: " + noIndexReason;
} }
// put document into the concurrent processing queue // put document into the concurrent processing queue
if (log.isFinest()) log.logFinest("deQueue: passing to indexing queue: " + response.url().toNormalform(true, false)); if (log.isFinest()) log.logFinest("deQueue: passing to indexing queue: " + response.url().toNormalform(true, false));
try { try {
this.indexingDocumentProcessor.enQueue(new indexingQueueEntry(response, null, null)); this.indexingDocumentProcessor.enQueue(new indexingQueueEntry(response, null, null));
return true; return null;
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
return false; return "interrupted: " + e.getMessage();
} }
} }

Loading…
Cancel
Save