From 1d4fb680ce1e38fd2f272c372aca87404990f6b4 Mon Sep 17 00:00:00 2001 From: theli Date: Tue, 3 Oct 2006 12:16:25 +0000 Subject: [PATCH] *) CrawlWorker.java: only keep content in memory if size is equal or less than 5MB TODO: make this limit configurable git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2703 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../anomic/plasma/crawler/http/CrawlWorker.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/source/de/anomic/plasma/crawler/http/CrawlWorker.java b/source/de/anomic/plasma/crawler/http/CrawlWorker.java index ebb064048..65369ce3a 100644 --- a/source/de/anomic/plasma/crawler/http/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/http/CrawlWorker.java @@ -248,9 +248,21 @@ public final class CrawlWorker extends AbstractCrawlWorker { // creating an output stream fos = new FileOutputStream(cacheFile); + // getting content length + long contentLength = (res.isGzipped()) ? res.getGzippedLength() : res.responseHeader.contentLength(); + + // check if the file is too large to keep it in memory + if (this.keepInMemory) { + // if the content length is unknown or larger than 5MB we + // do not keep resource in memory + // TODO: make MAX_KEEP_IN_MEMORY_SIZE configureble + if ((contentLength == -1) || (contentLength > 5 * 1024 * 1024)) { + this.keepInMemory = false; + } + } + // check the maximum allowed file size - if (this.maxFileSize > -1) { - long contentLength = (res.isGzipped()) ? res.getGzippedLength() : res.responseHeader.contentLength(); + if (this.maxFileSize > -1) { if (contentLength == -1) { fos = new httpdBoundedSizeOutputStream(fos,this.maxFileSize); } else if (contentLength > this.maxFileSize) {