From a9cb083fa135819b56c98561d60f2ab39d2fd8f6 Mon Sep 17 00:00:00 2001
From: luccioman <luccioman@users.noreply.github.com>
Date: Fri, 2 Jun 2017 01:46:06 +0200
Subject: [PATCH] Improved consistency between loader openInputStream and load
 functions

---
 .../net/yacy/crawler/retrieval/FTPLoader.java |  24 ++++
 .../yacy/crawler/retrieval/FileLoader.java    |  40 ++++--
 .../yacy/crawler/retrieval/HTTPLoader.java    |  34 +++--
 .../net/yacy/crawler/retrieval/Response.java  |  11 ++
 .../net/yacy/crawler/retrieval/SMBLoader.java |  38 +++++-
 .../crawler/retrieval/StreamResponse.java     | 120 ++++++++++++++++++
 .../net/yacy/repository/LoaderDispatcher.java |  80 +++++++++---
 .../net/yacy/visualization/ImageViewer.java   |   4 +-
 8 files changed, 300 insertions(+), 51 deletions(-)
 create mode 100644 source/net/yacy/crawler/retrieval/StreamResponse.java

diff --git a/source/net/yacy/crawler/retrieval/FTPLoader.java b/source/net/yacy/crawler/retrieval/FTPLoader.java
index 0d2dfeb17..c0994d0d7 100644
--- a/source/net/yacy/crawler/retrieval/FTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/FTPLoader.java
@@ -27,6 +27,7 @@
 
 package net.yacy.crawler.retrieval;
 
+import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
@@ -166,6 +167,29 @@ public class FTPLoader {
         Latency.updateAfterLoad(request.url(), System.currentTimeMillis() - start);
         return response;
     }
+    
+    /**
+     * Open a stream on the entry content from a FTP server
+     *
+     * @param request the request to process
+     * @param acceptOnlyParseable when true and no parser can be found to handle the detected MIME type, open a stream on the URL tokens
+     * @return a response with full meta data and embedding on open input stream on content. Don't forget to close the stream.
+     */
+    public StreamResponse openInputStream(final Request request, final boolean acceptOnlyParseable) throws IOException {
+
+        final Response response = load(request, acceptOnlyParseable);
+        // TODO implement a true ftp content stream instead of a simple ByteArrayInputStream encapsulation
+        final StreamResponse streamResponse;
+        if(response.getContent() != null) {
+        	streamResponse = new StreamResponse(response,
+				new ByteArrayInputStream(response.getContent()));
+        } else {
+        	/* content can be null when no parser can handle it : then return the URL tokens as content */
+        	streamResponse = new StreamResponse(response,
+        			new ByteArrayInputStream(UTF8.getBytes(request.url().toTokens())));        	
+        }
+		return streamResponse;
+    }
 
     /**
      * @param ftpClient
diff --git a/source/net/yacy/crawler/retrieval/FileLoader.java b/source/net/yacy/crawler/retrieval/FileLoader.java
index 96495b470..5c07a5cd5 100644
--- a/source/net/yacy/crawler/retrieval/FileLoader.java
+++ b/source/net/yacy/crawler/retrieval/FileLoader.java
@@ -24,6 +24,7 @@
 
 package net.yacy.crawler.retrieval;
 
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
@@ -58,7 +59,31 @@ public class FileLoader {
         this.maxFileSize = (int) sb.getConfigLong("crawler.file.maxFileSize", -1l);
     }
 
+    /**
+     * Load fully the requested file in a byte buffer
+     *
+     * @param request the request to process
+     * @param acceptOnlyParseable when true and no parser can be found to handle the detected MIME type, the response content buffer contains only URL tokens
+     * @return a response with full meta data and embedding the content as a byte buffer
+     */
     public Response load(final Request request, boolean acceptOnlyParseable) throws IOException {
+        StreamResponse streamResponse = openInputStream(request, acceptOnlyParseable);
+        
+        /* Read fully the stream and update the response */
+        byte[] content = FileUtils.read(streamResponse.getContentStream());
+        Response response = streamResponse.getResponse();
+        response.setContent(content);
+        return response;
+    }
+    
+    /**
+     * Open a stream on the requested file
+     *
+     * @param request the request to process
+     * @param acceptOnlyParseable when true and no parser can be found to handle the detected MIME type, open a stream on the URL tokens
+     * @return a response with full meta data and embedding on open input stream on content. Don't forget to close the stream.
+     */
+    public StreamResponse openInputStream(final Request request, final boolean acceptOnlyParseable) throws IOException {
         DigestURL url = request.url();
         if (!url.getProtocol().equals("file")) throw new IOException("wrong protocol for FileLoader: " + url.getProtocol());
 
@@ -93,9 +118,9 @@ public class FileLoader {
                     responseHeader,
                     profile,
                     false,
-                    UTF8.getBytes(content.toString()));
+                    null);
 
-            return response;
+            return new StreamResponse(response, new ByteArrayInputStream(UTF8.getBytes(content.toString())));
         }
 
         // create response header
@@ -133,13 +158,12 @@ public class FileLoader {
                     responseHeader,
                     profile,
                     false,
-                    UTF8.getBytes(url.toTokens()));
-            return response;
+                    null);
+            return new StreamResponse(response, new ByteArrayInputStream(UTF8.getBytes(url.toTokens())));
         }
 
         // load the resource
-        InputStream is = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
-        byte[] b = FileUtils.read(is);
+        final InputStream is = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
 
         // create response with loaded content
         final CrawlProfile profile = this.sb.crawler.get(ASCII.getBytes(request.profileHandle()));
@@ -149,7 +173,7 @@ public class FileLoader {
                 responseHeader,
                 profile,
                 false,
-                b);
-        return response;
+                null);
+        return new StreamResponse(response, is);
     }
 }
diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java
index cea02f278..d795be108 100644
--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@@ -28,6 +28,7 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 
+import org.apache.http.HttpStatus;
 import org.apache.http.StatusLine;
 
 import net.yacy.cora.document.id.DigestURL;
@@ -82,18 +83,19 @@ public final class HTTPLoader {
         return doc;
     }
     
-    /**
-     * Open input stream on a requested HTTP resource. When resource is small, fully load it and returns a ByteArrayInputStream instance.
+	/**
+     * Open an input stream on a requested HTTP resource. When the resource content size is small 
+     * (lower than {@link Response#CRAWLER_MAX_SIZE_TO_CACHE}, fully load it and use a ByteArrayInputStream instance.
      * @param request
      * @param profile crawl profile
      * @param retryCount remaining redirect retries count
      * @param maxFileSize max file size to load. -1 means no limit.
      * @param blacklistType blacklist type to use
      * @param agent agent identifier
-     * @return an open input stream. Don't forget to close it.
-     * @throws IOException when an error occured
+     * @return a response with full meta data and embedding on open input stream on content. Don't forget to close the stream.
+     * @throws IOException when an error occurred
      */
-	public InputStream openInputStream(final Request request, CrawlProfile profile, final int retryCount,
+	public StreamResponse openInputStream(final Request request, CrawlProfile profile, final int retryCount,
 			final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent)
 					throws IOException {
 		if (retryCount < 0) {
@@ -200,13 +202,14 @@ public final class HTTPLoader {
 					FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
 			throw new IOException("REJECTED UNWANTED REDIRECTION '" + statusline
 					+ "' for URL '" + requestURLString + "'$");
-		} else if (statusCode == 200 || statusCode == 203) {
+		} else if (statusCode == HttpStatus.SC_OK || statusCode == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION) {
 			// the transfer is ok
 
 			/*
-			 * When content is not large (less than 1MB), we have better cache it if cache is enabled and url is not local
+			 * When content is not large (less than Response.CRAWLER_MAX_SIZE_TO_CACHE), we have better cache it if cache is enabled and url is not local
 			 */
 			long contentLength = client.getHttpResponse().getEntity().getContentLength();
+			final InputStream contentStream;
 			if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (Response.CRAWLER_MAX_SIZE_TO_CACHE) && !url.isLocal()) {
 				byte[] content = null;
 				try {
@@ -218,14 +221,17 @@ public final class HTTPLoader {
 					client.finish();
 				}
 
-				return new ByteArrayInputStream(content);
+				contentStream = new ByteArrayInputStream(content);
+			} else {
+				/*
+				 * Create a HTTPInputStream delegating to
+				 * client.getContentstream(). Close method will ensure client is
+				 * properly closed.
+				 */
+				contentStream = new HTTPInputStream(client);
 			}
-			/*
-			 * Returns a HTTPInputStream delegating to
-			 * client.getContentstream(). Close method will ensure client is
-			 * properly closed.
-			 */
-			return new HTTPInputStream(client);
+
+			return new StreamResponse(new Response(request, requestHeader, responseHeader, profile, false, null), contentStream);
 		} else {
 			client.finish();
 			// if the response has not the right response type then reject file
diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java
index 2b7edc334..e4d1f41d0 100644
--- a/source/net/yacy/crawler/retrieval/Response.java
+++ b/source/net/yacy/crawler/retrieval/Response.java
@@ -225,10 +225,21 @@ public class Response {
     public void updateStatus(final int newStatus) {
         this.status = newStatus;
     }
+    
+    /**
+     * @return the original request that produced this response
+     */
+    public Request getRequest() {
+		return request;
+	}
 
     public ResponseHeader getResponseHeader() {
         return this.responseHeader;
     }
+    
+    public RequestHeader getRequestHeader() {
+		return this.requestHeader;
+	}
 
     public boolean fromCache() {
         return this.fromCache;
diff --git a/source/net/yacy/crawler/retrieval/SMBLoader.java b/source/net/yacy/crawler/retrieval/SMBLoader.java
index 79661ee71..4cf4d37d5 100644
--- a/source/net/yacy/crawler/retrieval/SMBLoader.java
+++ b/source/net/yacy/crawler/retrieval/SMBLoader.java
@@ -27,6 +27,7 @@
 
 package net.yacy.crawler.retrieval;
 
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.MalformedURLException;
@@ -69,7 +70,31 @@ public class SMBLoader {
     }
 
 
+    /**
+     * Load fully the requested file in a byte buffer
+     *
+     * @param request the request to process
+     * @param acceptOnlyParseable when true and no parser can be found to handle the detected MIME type, the response content buffer contains only URL tokens
+     * @return a response with full meta data and embedding the content as a byte buffer
+     */
     public Response load(final Request request, boolean acceptOnlyParseable) throws IOException {
+        StreamResponse streamResponse = openInputStream(request, acceptOnlyParseable);
+        
+        /* Read fully the stream and update the response */
+        byte[] content = FileUtils.read(streamResponse.getContentStream());
+        Response response = streamResponse.getResponse();
+        response.setContent(content);
+        return response;
+    }
+    
+    /**
+     * Open a stream on the requested file
+     *
+     * @param request the request to process
+     * @param acceptOnlyParseable when true, do not open a stream on content when no parser can be found to handle the detected MIME type
+     * @return a response with full meta data and embedding on open input stream on content. Don't forget to close the stream.
+     */
+    public StreamResponse openInputStream(final Request request, final boolean acceptOnlyParseable) throws IOException {
         DigestURL url = request.url();
         if (!url.getProtocol().equals("smb")) throw new IOException("wrong loader for SMBLoader: " + url.getProtocol());
 
@@ -111,9 +136,9 @@ public class SMBLoader {
                     responseHeader,
                     profile,
                     false,
-                    UTF8.getBytes(content.toString()));
+                    null);
 
-            return response;
+            return new StreamResponse(response, new ByteArrayInputStream(UTF8.getBytes(content.toString())));
         }
 
         // create response header
@@ -151,13 +176,12 @@ public class SMBLoader {
                     responseHeader,
                     profile,
                     false,
-                    url.toTokens().getBytes());
-            return response;
+                    null);
+            return new StreamResponse(response, new ByteArrayInputStream(url.toTokens().getBytes()));
         }
 
         // load the resource
         InputStream is = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
-        byte[] b = FileUtils.read(is);
 
         // create response with loaded content
         final CrawlProfile profile = this.sb.crawler.get(request.profileHandle().getBytes());
@@ -167,8 +191,8 @@ public class SMBLoader {
                 responseHeader,
                 profile,
                 false,
-                b);
-        return response;
+                null);
+        return new StreamResponse(response, is);
     }
 
     public static void main(String[] args) {
diff --git a/source/net/yacy/crawler/retrieval/StreamResponse.java b/source/net/yacy/crawler/retrieval/StreamResponse.java
new file mode 100644
index 000000000..19a78501b
--- /dev/null
+++ b/source/net/yacy/crawler/retrieval/StreamResponse.java
@@ -0,0 +1,120 @@
+// StreamResponse.java
+// ---------------------------
+// Copyright 2017 by luccioman; https://github.com/luccioman
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// LICENSE
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+package net.yacy.crawler.retrieval;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+
+import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.document.Document;
+import net.yacy.document.Parser;
+import net.yacy.document.TextParser;
+import net.yacy.document.VocabularyScraper;
+
+/**
+ * A crawler load response, holding content as a stream.
+ */
+public class StreamResponse {
+
+	/** Logger */
+	private final static ConcurrentLog log = new ConcurrentLog(StreamResponse.class.getSimpleName());
+
+	/**
+	 * Content as a stream.
+	 */
+	private InputStream contentStream;
+
+	/**
+	 * The response details, including notably the request and response headers.
+	 */
+	private Response response;
+
+	/**
+	 * @param response
+	 *            contains the complete crawler response details
+	 * @param contentStream
+	 *            an open input stream on the response content
+	 * @throws IllegalArgumentException
+	 *             when response is null
+	 */
+	public StreamResponse(final Response response, final InputStream contentStream) {
+		if (response == null) {
+			throw new IllegalArgumentException("response parameter must not be null");
+		}
+		this.response = response;
+		this.contentStream = contentStream;
+	}
+
+	/**
+	 * @return the content stream. Don't forget to close it when processing is
+	 *         terminated.
+	 */
+	public InputStream getContentStream() {
+		return this.contentStream;
+	}
+
+	/**
+	 * @return the crawler response with complete details
+	 */
+	public Response getResponse() {
+		return this.response;
+	}
+
+	/**
+	 * Parse and close the content stream and return the parsed documents when
+	 * possible
+	 * 
+	 * @return the parsed documents or null when an error occurred
+	 * @throws Parser.Failure
+	 *             when no parser support the content
+	 */
+	public Document[] parse() throws Parser.Failure {
+		final String supportError = TextParser.supports(this.response.url(),
+				this.response.getResponseHeader() == null ? null : this.response.getResponseHeader().getContentType());
+		if (supportError != null) {
+			throw new Parser.Failure("no parser support:" + supportError, this.response.url());
+		}
+		try {
+			return TextParser.parseSource(this.response.url(),
+					this.response.getResponseHeader() == null ? null
+							: this.response.getResponseHeader().getContentType(),
+					this.response.getResponseHeader() == null ? StandardCharsets.UTF_8.name()
+							: this.response.getResponseHeader().getCharacterEncoding(),
+					new VocabularyScraper(), this.response.getRequest().timezoneOffset(),
+					this.response.getRequest().depth(), this.response.size(), this.contentStream);
+		} catch (final Exception e) {
+			return null;
+		} finally {
+			if (this.contentStream != null) {
+				try {
+					this.contentStream.close();
+				} catch (IOException ignored) {
+					log.warn("Could not close content stream on url " + this.response.url());
+				}
+			}
+		}
+
+	}
+
+}
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index e8f04318f..32aa2b4d9 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -29,7 +29,6 @@ package net.yacy.repository;
 import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.util.Arrays;
 import java.util.Date;
@@ -59,6 +58,7 @@ import net.yacy.crawler.retrieval.HTTPLoader;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.crawler.retrieval.SMBLoader;
+import net.yacy.crawler.retrieval.StreamResponse;
 import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.TextParser;
@@ -347,7 +347,7 @@ public final class LoaderDispatcher {
      * @return an open ImageInputStream. Don't forget to close it once used!
      * @throws IOException when url is malformed, blacklisted, or CacheStrategy is CACHEONLY and content is unavailable
      */
-    private InputStream openInputStreamInternal(final Request request, CacheStrategy cacheStrategy, final int maxFileSize, final BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
+    private StreamResponse openInputStreamInternal(final Request request, CacheStrategy cacheStrategy, final int maxFileSize, final BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
         // get the protocol of the next URL
         final DigestURL url = request.url();
 		if (url.isFile() || url.isSMB()) {
@@ -366,9 +366,9 @@ public final class LoaderDispatcher {
         
         // check if we have the page in the cache
         Response cachedResponse = loadFromCache(request, cacheStrategy, agent, url, crawlProfile);
-        if(cachedResponse != null) {
-        	return new ByteArrayInputStream(cachedResponse.getContent());
-        }
+		if (cachedResponse != null) {
+			return new StreamResponse(cachedResponse, new ByteArrayInputStream(cachedResponse.getContent()));
+		}
 
         // check case where we want results from the cache exclusively, and never from the Internet (offline mode)
         if (cacheStrategy == CacheStrategy.CACHEONLY) {
@@ -389,20 +389,20 @@ public final class LoaderDispatcher {
         }
 
         // load resource from the internet
-        InputStream inStream = null;
+        StreamResponse response;
         if (protocol.equals("http") || protocol.equals("https")) {
-        	inStream = this.httpLoader.openInputStream(request, crawlProfile, 1, maxFileSize, blacklistType, agent);
-        } else if (protocol.equals("ftp") || protocol.equals("smb") || protocol.equals("file")) {
-        	// may also open directly stream with ftp loader
-        	inStream = url.getInputStream(agent);
+        	response = this.httpLoader.openInputStream(request, crawlProfile, 1, maxFileSize, blacklistType, agent);
+        } else if (protocol.equals("ftp")) {
+        	response = this.ftpLoader.openInputStream(request, true);
+        } else if (protocol.equals("smb")) {
+            response = this.smbLoader.openInputStream(request, true);
+        } else if (protocol.equals("file")) {
+            response = this.fileLoader.openInputStream(request, true);
         } else {
             throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
         }
-        if (inStream == null) {
-            throw new IOException("Unable to open content stream");
-        }
 
-        return inStream;
+        return response;
     }
     
 
@@ -464,18 +464,18 @@ public final class LoaderDispatcher {
     }
     
     /**
-     * Open url as InputStream from the web or the cache
+     * Open the URL as an InputStream from the web or the cache
      * @param request must be not null
      * @param cacheStrategy cache strategy to use
      * @param blacklistType black list
      * @param agent agent identification for HTTP requests
-     * @return an open InputStream on content. Don't forget to close it once used.
+     * @return a response with full meta data and embedding on open input stream on content. Don't forget to close the stream.
      * @throws IOException when url is malformed or blacklisted
      */
-	public InputStream openInputStream(final Request request, final CacheStrategy cacheStrategy,
+	public StreamResponse openInputStream(final Request request, final CacheStrategy cacheStrategy,
 			BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException {
 		final int maxFileSize = protocolMaxFileSize(request.url());
-		InputStream stream = null;
+		StreamResponse response;
 
 		Semaphore check = this.loaderSteering.get(request.url());
 		if (check != null && cacheStrategy != CacheStrategy.NOCACHE) {
@@ -493,9 +493,9 @@ public final class LoaderDispatcher {
 
 		this.loaderSteering.put(request.url(), new Semaphore(0));
 		try {
-			stream = openInputStreamInternal(request, cacheStrategy, maxFileSize, blacklistType, agent);
+			response = openInputStreamInternal(request, cacheStrategy, maxFileSize, blacklistType, agent);
 		} catch(IOException ioe) {
-			/* Do not re encapsulate eventual IOException in an IOException */
+			/* Do not re encapsulate any eventual IOException in an IOException */
 			throw ioe;
 		} catch (final Throwable e) {
 			throw new IOException(e);
@@ -507,7 +507,7 @@ public final class LoaderDispatcher {
 			}
 		}
 
-		return stream;
+		return response;
 	}
 
     public Document[] loadDocuments(final Request request, final CacheStrategy cacheStrategy, final int maxFileSize, BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException, Parser.Failure {
@@ -554,6 +554,44 @@ public final class LoaderDispatcher {
             throw new IOException(e.getMessage());
         }
     }
+    
+    /**
+     * Similar to the loadDocument method, but streaming the resource content when possible instead of fully loading it in memory.
+     * @param location URL of the resource to load
+     * @param cachePolicy cache policy strategy
+     * @param blacklistType blacklist to use
+     * @param agent user agent identifier
+     * @return on parsed document or null when an error occurred while parsing
+     * @throws IOException when the content can not be fetched or no parser support it
+     */
+    public Document loadDocumentAsStream(final DigestURL location, final CacheStrategy cachePolicy, BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException {
+        // load resource
+        Request request = request(location, true, false);
+        final StreamResponse streamResponse = this.openInputStream(request, cachePolicy, blacklistType, agent);
+        final Response response = streamResponse.getResponse();
+        final DigestURL url = request.url();
+        if (response == null) throw new IOException("no Response for url " + url);
+
+        // if it is still not available, report an error
+        if (streamResponse.getContentStream() == null || response.getResponseHeader() == null) {
+        	throw new IOException("no Content available for url " + url);
+        }
+
+        // parse resource
+        try {
+            Document[] documents = streamResponse.parse();
+            Document merged = Document.mergeDocuments(location, response.getMimeType(), documents);
+            
+            String x_robots_tag = response.getResponseHeader().getXRobotsTag();
+            if (x_robots_tag.indexOf("noindex",0) >= 0) {
+            	merged.setIndexingDenied(true);
+            }
+            
+            return merged;
+        } catch(final Parser.Failure e) {
+            throw new IOException(e.getMessage());
+        }
+    }
 
     /**
      * load all links from a resource
diff --git a/source/net/yacy/visualization/ImageViewer.java b/source/net/yacy/visualization/ImageViewer.java
index f701e72dd..1b886972b 100644
--- a/source/net/yacy/visualization/ImageViewer.java
+++ b/source/net/yacy/visualization/ImageViewer.java
@@ -44,6 +44,7 @@ import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.Domains;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.crawler.retrieval.StreamResponse;
 import net.yacy.data.InvalidURLLicenceException;
 import net.yacy.data.URLLicense;
 import net.yacy.http.servlets.TemplateMissingParameterException;
@@ -122,8 +123,9 @@ public class ImageViewer {
 				String agentName = post.get("agentName", auth ? ClientIdentification.yacyIntranetCrawlerAgentName
 						: ClientIdentification.yacyInternetCrawlerAgentName);
 				ClientIdentification.Agent agent = ClientIdentification.getAgent(agentName);
-				inStream = loader.openInputStream(loader.request(url, false, true), CacheStrategy.IFEXIST,
+				final StreamResponse response = loader.openInputStream(loader.request(url, false, true), CacheStrategy.IFEXIST,
 						BlacklistType.SEARCH, agent);
+				inStream = response.getContentStream();
 			} catch (final IOException e) {
 				/** No need to log full stack trace (in most cases resource is not available because of a network error) */
 				ConcurrentLog.fine("ImageViewer", "cannot load image. URL : " + url.toNormalform(true));