Michael Peter Christen 1 year ago
parent ceb07a5218
commit cff0991d85

@ -49,7 +49,6 @@ import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.document.TextParser;
import net.yacy.document.VocabularyScraper;
import net.yacy.kelondro.io.ByteCountInputStream;
import org.w3c.dom.CharacterData;
import org.w3c.dom.Element;
@ -116,7 +115,8 @@ public class sitemapParser extends AbstractParser implements Parser {
ConcurrentLog.info("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true));
// client.setHeader(requestHeader.entrySet());
try (final HTTPClient client = new HTTPClient(agent)) {
client.GET(sitemapURL.toNormalform(false), false);
String url = sitemapURL.toNormalform(false);
client.GET(url, false);
if (client.getStatusCode() != 200) {
throw new IOException("Unable to download the sitemap file " + sitemapURL +
"\nServer returned status: " + client.getHttpResponse().getStatusLine());
@ -128,11 +128,10 @@ public class sitemapParser extends AbstractParser implements Parser {
final String contentMimeType = header.mime();
InputStream contentStream = client.getContentstream();
if (contentMimeType != null && (contentMimeType.equals("application/x-gzip") || contentMimeType.equals("application/gzip"))) {
if ((contentMimeType != null && (contentMimeType.equals("application/x-gzip") || contentMimeType.equals("application/gzip"))) || url.endsWith(".gz")) {
contentStream = new GZIPInputStream(contentStream);
}
final ByteCountInputStream counterStream = new ByteCountInputStream(contentStream, null);
return new SitemapReader(counterStream, agent);
return new SitemapReader(contentStream, agent);
} catch (final IOException e) {
throw e;
}

Loading…
Cancel
Save