From cff0991d850123dd5b9a7062b6df991c50fb26f5 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 13 Nov 2023 16:41:19 +0100 Subject: [PATCH] test if this is helpful for https://github.com/yacy/yacy_search_server/issues/500 --- source/net/yacy/document/parser/sitemapParser.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/source/net/yacy/document/parser/sitemapParser.java b/source/net/yacy/document/parser/sitemapParser.java index be52f72e7..2dd6ebdeb 100644 --- a/source/net/yacy/document/parser/sitemapParser.java +++ b/source/net/yacy/document/parser/sitemapParser.java @@ -49,7 +49,6 @@ import net.yacy.document.Document; import net.yacy.document.Parser; import net.yacy.document.TextParser; import net.yacy.document.VocabularyScraper; -import net.yacy.kelondro.io.ByteCountInputStream; import org.w3c.dom.CharacterData; import org.w3c.dom.Element; @@ -116,7 +115,8 @@ public class sitemapParser extends AbstractParser implements Parser { ConcurrentLog.info("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true)); // client.setHeader(requestHeader.entrySet()); try (final HTTPClient client = new HTTPClient(agent)) { - client.GET(sitemapURL.toNormalform(false), false); + String url = sitemapURL.toNormalform(false); + client.GET(url, false); if (client.getStatusCode() != 200) { throw new IOException("Unable to download the sitemap file " + sitemapURL + "\nServer returned status: " + client.getHttpResponse().getStatusLine()); @@ -128,11 +128,10 @@ public class sitemapParser extends AbstractParser implements Parser { final String contentMimeType = header.mime(); InputStream contentStream = client.getContentstream(); - if (contentMimeType != null && (contentMimeType.equals("application/x-gzip") || contentMimeType.equals("application/gzip"))) { + if ((contentMimeType != null && (contentMimeType.equals("application/x-gzip") || contentMimeType.equals("application/gzip"))) || url.endsWith(".gz")) { contentStream = new GZIPInputStream(contentStream); } - final ByteCountInputStream counterStream = new ByteCountInputStream(contentStream, null); - return new SitemapReader(counterStream, agent); + return new SitemapReader(contentStream, agent); } catch (final IOException e) { throw e; }