From 755efac17d8a2088bceca2d53baf81735e38ab8b Mon Sep 17 00:00:00 2001 From: luc Date: Fri, 20 Nov 2015 19:35:39 +0100 Subject: [PATCH] Use same max file size when loading all resource bytes or opening stream content --- source/net/yacy/crawler/retrieval/HTTPLoader.java | 2 +- source/net/yacy/crawler/retrieval/Response.java | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java index 9cf9ce1a7..4f427a1bf 100644 --- a/source/net/yacy/crawler/retrieval/HTTPLoader.java +++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java @@ -191,7 +191,7 @@ public final class HTTPLoader { * When content is not large (less than 1MB), we have better cache it if cache is enabled and url is not local */ long contentLength = client.getHttpResponse().getEntity().getContentLength(); - if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (1024 * 1024) && !url.isLocal()) { + if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (Response.CRAWLER_MAX_SIZE_TO_CACHE) && !url.isLocal()) { byte[] content = HTTPClient.getByteArray(client.getHttpResponse().getEntity(), maxFileSize); try { diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java index 4e1acb6ef..642994a4a 100644 --- a/source/net/yacy/crawler/retrieval/Response.java +++ b/source/net/yacy/crawler/retrieval/Response.java @@ -69,6 +69,9 @@ public class Response { private byte[] content; private int status; // tracker indexing status, see status defs below private final boolean fromCache; + + /** Maximum file size to put in cache for crawler */ + public static final long CRAWLER_MAX_SIZE_TO_CACHE = 10 * 1024L * 1024L; /** * doctype calculation by file extension @@ -387,7 +390,7 @@ public class Response { public String shallStoreCacheForCrawler() { // check storage size: all files will be handled in RAM before storage, so they must not exceed // a given size, which we consider as 1MB - if (size() > 10 * 1024L * 1024L) return "too_large_for_caching_" + size(); + if (size() > CRAWLER_MAX_SIZE_TO_CACHE) return "too_large_for_caching_" + size(); // check status code if (!validResponseStatus()) {