Michael Peter Christen 1 year ago
parent ceb07a5218
commit cff0991d85

@ -49,7 +49,6 @@ import net.yacy.document.Document;
import net.yacy.document.Parser; import net.yacy.document.Parser;
import net.yacy.document.TextParser; import net.yacy.document.TextParser;
import net.yacy.document.VocabularyScraper; import net.yacy.document.VocabularyScraper;
import net.yacy.kelondro.io.ByteCountInputStream;
import org.w3c.dom.CharacterData; import org.w3c.dom.CharacterData;
import org.w3c.dom.Element; import org.w3c.dom.Element;
@ -116,7 +115,8 @@ public class sitemapParser extends AbstractParser implements Parser {
ConcurrentLog.info("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true)); ConcurrentLog.info("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true));
// client.setHeader(requestHeader.entrySet()); // client.setHeader(requestHeader.entrySet());
try (final HTTPClient client = new HTTPClient(agent)) { try (final HTTPClient client = new HTTPClient(agent)) {
client.GET(sitemapURL.toNormalform(false), false); String url = sitemapURL.toNormalform(false);
client.GET(url, false);
if (client.getStatusCode() != 200) { if (client.getStatusCode() != 200) {
throw new IOException("Unable to download the sitemap file " + sitemapURL + throw new IOException("Unable to download the sitemap file " + sitemapURL +
"\nServer returned status: " + client.getHttpResponse().getStatusLine()); "\nServer returned status: " + client.getHttpResponse().getStatusLine());
@ -128,11 +128,10 @@ public class sitemapParser extends AbstractParser implements Parser {
final String contentMimeType = header.mime(); final String contentMimeType = header.mime();
InputStream contentStream = client.getContentstream(); InputStream contentStream = client.getContentstream();
if (contentMimeType != null && (contentMimeType.equals("application/x-gzip") || contentMimeType.equals("application/gzip"))) { if ((contentMimeType != null && (contentMimeType.equals("application/x-gzip") || contentMimeType.equals("application/gzip"))) || url.endsWith(".gz")) {
contentStream = new GZIPInputStream(contentStream); contentStream = new GZIPInputStream(contentStream);
} }
final ByteCountInputStream counterStream = new ByteCountInputStream(contentStream, null); return new SitemapReader(contentStream, agent);
return new SitemapReader(counterStream, agent);
} catch (final IOException e) { } catch (final IOException e) {
throw e; throw e;
} }

Loading…
Cancel
Save