added memory protection for postprocessing

pull/1/head
Michael Peter Christen 11 years ago
parent 412d55523c
commit 3d474a843e

@ -2015,6 +2015,25 @@ public final class Switchboard extends serverSwitch {
// do nothing
}
public static void clearCaches() {
// flush caches in used libraries
pdfParser.clean_up_idiotic_PDFParser_font_cache_which_eats_up_tons_of_megabytes(); // eats up megabytes, see http://markmail.org/thread/quk5odee4hbsauhu
// clear caches
if (WordCache.sizeCommonWords() > 1000) WordCache.clearCommonWords();
Word.clearCache();
// Domains.clear();
FieldCache.DEFAULT.purgeAllCaches();
// clean up image stack
ResultImages.clearQueues();
// flush the document compressor cache
Cache.commit();
Digest.cleanup(); // don't let caches become permanent memory leaks
}
public int cleanupJobSize() {
int c = 1; // "es gibt immer was zu tun"
if ( (this.crawlQueues.delegatedURL.size() > 1000) ) {
@ -2040,21 +2059,7 @@ public final class Switchboard extends serverSwitch {
ConcurrentLog.ensureWorkerIsRunning();
try {
// flush caches in used libraries
pdfParser.clean_up_idiotic_PDFParser_font_cache_which_eats_up_tons_of_megabytes(); // eats up megabytes, see http://markmail.org/thread/quk5odee4hbsauhu
// clear caches
if (WordCache.sizeCommonWords() > 1000) WordCache.clearCommonWords();
Word.clearCache();
// Domains.clear();
FieldCache.DEFAULT.purgeAllCaches();
// clean up image stack
ResultImages.clearQueues();
// flush the document compressor cache
Cache.commit();
Digest.cleanup(); // don't let caches become permanent memory leaks
clearCaches();
// clear caches if necessary
if ( !MemoryControl.request(128000000L, false) ) {
@ -2304,7 +2309,7 @@ public final class Switchboard extends serverSwitch {
Fulltext fulltext = index.fulltext();
CollectionConfiguration collection1Configuration = fulltext.getDefaultConfiguration();
WebgraphConfiguration webgraphConfiguration = fulltext.getWebgraphConfiguration();
if (!this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
if (!this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) && MemoryControl.request(256000000L, false) && Memory.load() < 1.0f) {
// we optimize first because that is useful for postprocessing
int proccount = 0;

@ -76,6 +76,7 @@ import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceFactory;
import net.yacy.kelondro.util.Bitfield;
import net.yacy.kelondro.util.ISO639;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.StorageQueueEntry;
@ -304,6 +305,7 @@ public class Segment {
}
public ReferenceReport getReferenceReport(final byte[] id, final boolean acceptSelfReference) throws IOException {
ReferenceReport rr = cache.get(id);
if (MemoryControl.shortStatus()) cache.clear();
if (rr != null) return rr;
try {
rr = new ReferenceReport(id, acceptSelfReference);
@ -329,6 +331,7 @@ public class Segment {
}
public int getClickdepth(final DigestURL url, int maxtime) throws IOException {
Integer clickdepth = cache.get(url.hash());
if (MemoryControl.shortStatus()) cache.clear();
if (clickdepth != null) {
//ConcurrentLog.info("Segment", "get clickdepth of url " + url.toNormalform(true) + ": " + clickdepth + " CACHE HIT");
return clickdepth.intValue();
@ -386,6 +389,7 @@ public class Segment {
SolrDocument doc;
try {
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
if (MemoryControl.shortStatus()) break;
String refid = (String) doc.getFieldValue(WebgraphSchema.source_id_s.getSolrFieldName());
if (refid == null) continue;
byte[] refidh = ASCII.getBytes(refid);

@ -1059,6 +1059,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
Set<String> uniqueURLs = new HashSet<String>();
try {
long count = collectionConnector.getCountByQuery(query);
long start = System.currentTimeMillis();
ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the collection for harvestkey " + harvestkey);
BlockingQueue<SolrDocument> docs = collectionConnector.concurrentDocumentsByQuery(query, 0, 10000000, 1800000, 100);
int countcheck = 0;
@ -1117,6 +1118,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
collectionConnector.add(sid);
proccount++;
if (proccount % 100 == 0) ConcurrentLog.info("CollectionConfiguration", "postprocessed " + proccount + " from " + count + " documents; " + (proccount * 1000 / (System.currentTimeMillis() - start)) + " docs/second; " + ((System.currentTimeMillis() - start) * (count - proccount) / proccount / 60000) + " minutes remaining");
} catch (final Throwable e1) {
ConcurrentLog.logException(e1);
}

Loading…
Cancel
Save