From 8100c033a23d2f205e784dca6280f0b332ffb12a Mon Sep 17 00:00:00 2001 From: luccioman Date: Sun, 16 Jul 2017 14:37:06 +0200 Subject: [PATCH] URL Viewer : apply crawler size limits when adding to local index. This allow large files parsing and preview, while preventing unwanted OutOfMemory errors which are likely to occur when adding to the Solr Index resources larger than configured crawler limits. --- htroot/ViewFile.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index bfa6c4a65..5a8037637 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -363,10 +363,15 @@ public class ViewFile { prop.put("showSnippet_teasertext", desc); prop.put("showSnippet", 1); } - // update index with parsed resouce if index entry is older or missing - if (urlEntry == null || urlEntry.loaddate().before(response.lastModified())) { - Switchboard.getSwitchboard().toIndexer(response); - } + // update index with parsed resource if index entry is older or missing + final long responseSize = response.size(); + if (urlEntry == null || urlEntry.loaddate().before(response.lastModified())) { + /* Also check resource size is lower than configured crawler limits */ + if (responseSize >= 0 + && responseSize <= Switchboard.getSwitchboard().loader.protocolMaxFileSize(response.url())) { + Switchboard.getSwitchboard().toIndexer(response); + } + } if (document != null) document.close(); } prop.put("error", "0");