From 20e18d79f8927a8b9c7d7f9e389b82077495ced2 Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 10 Nov 2015 01:29:13 +0100 Subject: [PATCH] harmonize document title for archive parsers --- source/net/yacy/document/parser/bzipParser.java | 5 +++-- source/net/yacy/document/parser/gzipParser.java | 4 +++- source/net/yacy/document/parser/sevenzipParser.java | 7 +++++-- source/net/yacy/document/parser/tarParser.java | 7 ++++--- source/net/yacy/document/parser/zipParser.java | 6 ++++-- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/source/net/yacy/document/parser/bzipParser.java b/source/net/yacy/document/parser/bzipParser.java index 0dc0daad6..dca586f3c 100644 --- a/source/net/yacy/document/parser/bzipParser.java +++ b/source/net/yacy/document/parser/bzipParser.java @@ -34,6 +34,7 @@ import java.util.Date; import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -92,7 +93,7 @@ public class bzipParser extends AbstractParser implements Parser { } zippedContent.close(); out.close(); - + final String filename = location.getFileName(); // create maindoc for this bzip container, register with supplied url & mime maindoc = new Document( location, @@ -101,7 +102,7 @@ public class bzipParser extends AbstractParser implements Parser { this, null, null, - null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title null, null, null, diff --git a/source/net/yacy/document/parser/gzipParser.java b/source/net/yacy/document/parser/gzipParser.java index 504dd1116..c8590530f 100644 --- a/source/net/yacy/document/parser/gzipParser.java +++ b/source/net/yacy/document/parser/gzipParser.java @@ -35,6 +35,7 @@ import java.util.zip.GZIPInputStream; import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -90,6 +91,7 @@ public class gzipParser extends AbstractParser implements Parser { } zippedContent.close(); out.close(); + final String filename = location.getFileName(); // create maindoc for this gzip container, register with supplied url & mime maindoc = new Document( location, @@ -98,7 +100,7 @@ public class gzipParser extends AbstractParser implements Parser { this, null, null, - null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title null, null, null, diff --git a/source/net/yacy/document/parser/sevenzipParser.java b/source/net/yacy/document/parser/sevenzipParser.java index ddfdd8153..c5cde469f 100644 --- a/source/net/yacy/document/parser/sevenzipParser.java +++ b/source/net/yacy/document/parser/sevenzipParser.java @@ -35,6 +35,7 @@ import java.io.OutputStream; import java.util.Date; import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -62,6 +63,8 @@ public class sevenzipParser extends AbstractParser implements Parser { final String charset, final int timezoneOffset, final IInStream source) throws Parser.Failure, InterruptedException { + + final String filename = location.getFileName(); final Document doc = new Document( location, mimeType, @@ -69,12 +72,12 @@ public class sevenzipParser extends AbstractParser implements Parser { this, null, null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title, null, null, null, null, - null, - 0.0f, 0.0f, + 0.0d, 0.0d, (Object)null, null, null, diff --git a/source/net/yacy/document/parser/tarParser.java b/source/net/yacy/document/parser/tarParser.java index c5a5fbd03..ae25b4776 100644 --- a/source/net/yacy/document/parser/tarParser.java +++ b/source/net/yacy/document/parser/tarParser.java @@ -72,7 +72,8 @@ public class tarParser extends AbstractParser implements Parser { final int timezoneOffset, InputStream source) throws Parser.Failure, InterruptedException { - final String ext = MultiProtocolURL.getFileExtension(location.getFileName()); + final String filename = location.getFileName(); + final String ext = MultiProtocolURL.getFileExtension(filename); if (ext.equals("gz") || ext.equals("tgz")) { try { source = new GZIPInputStream(source); @@ -84,14 +85,14 @@ public class tarParser extends AbstractParser implements Parser { final TarArchiveInputStream tis = new TarArchiveInputStream(source); // create maindoc for this bzip container - Document maindoc = new Document( + final Document maindoc = new Document( location, mimeType, charset, this, null, null, - null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title null, null, null, diff --git a/source/net/yacy/document/parser/zipParser.java b/source/net/yacy/document/parser/zipParser.java index 155d669ba..a6718aea1 100644 --- a/source/net/yacy/document/parser/zipParser.java +++ b/source/net/yacy/document/parser/zipParser.java @@ -33,6 +33,7 @@ import java.util.zip.ZipInputStream; import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -79,15 +80,16 @@ public class zipParser extends AbstractParser implements Parser { ZipEntry entry; final ZipInputStream zis = new ZipInputStream(source); + final String filename = location.getFileName(); // create maindoc for this zip container with supplied url and mime - Document maindoc = new Document( + final Document maindoc = new Document( location, mimeType, charset, this, null, null, - null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title null, null, null,