From 4a611ac6a39fca86f4b4d1108a5ffa403c8f3d31 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 15 Nov 2023 23:45:53 +0100 Subject: [PATCH] another possible fix for https://github.com/yacy/yacy_search_server/issues/500 --- source/net/yacy/document/parser/sitemapParser.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/net/yacy/document/parser/sitemapParser.java b/source/net/yacy/document/parser/sitemapParser.java index 2dd6ebdeb..bfc49ac90 100644 --- a/source/net/yacy/document/parser/sitemapParser.java +++ b/source/net/yacy/document/parser/sitemapParser.java @@ -25,6 +25,7 @@ package net.yacy.document.parser; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; @@ -50,6 +51,7 @@ import net.yacy.document.Parser; import net.yacy.document.TextParser; import net.yacy.document.VocabularyScraper; +import org.apache.commons.io.IOUtils; import org.w3c.dom.CharacterData; import org.w3c.dom.Element; import org.w3c.dom.Node; @@ -131,7 +133,9 @@ public class sitemapParser extends AbstractParser implements Parser { if ((contentMimeType != null && (contentMimeType.equals("application/x-gzip") || contentMimeType.equals("application/gzip"))) || url.endsWith(".gz")) { contentStream = new GZIPInputStream(contentStream); } - return new SitemapReader(contentStream, agent); + byte[] bytes = IOUtils.toByteArray(contentStream); + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + return new SitemapReader(bais, agent); } catch (final IOException e) { throw e; }