From 902d16cf6c836f53125620b1f39b0267249515ac Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 17 Sep 2009 22:00:19 +0000 Subject: [PATCH] fixes to parser git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6323 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/document/Document.java | 2 -- source/de/anomic/document/parser/tarParser.java | 1 + source/de/anomic/document/parser/zipParser.java | 1 + source/de/anomic/kelondro/text/DocumentIndex.java | 8 ++++++-- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/source/de/anomic/document/Document.java b/source/de/anomic/document/Document.java index dd4bbad35..6d90c1bea 100644 --- a/source/de/anomic/document/Document.java +++ b/source/de/anomic/document/Document.java @@ -555,12 +555,10 @@ dc_rights } public int inboundLinks() { - assert this.inboundLinks >= 0; return (this.inboundLinks < 0) ? 0 : this.inboundLinks; } public int outboundLinks() { - assert this.outboundLinks >= 0; return (this.outboundLinks < 0) ? 0 : this.outboundLinks; } diff --git a/source/de/anomic/document/parser/tarParser.java b/source/de/anomic/document/parser/tarParser.java index 5c932ab63..045020374 100644 --- a/source/de/anomic/document/parser/tarParser.java +++ b/source/de/anomic/document/parser/tarParser.java @@ -124,6 +124,7 @@ public class tarParser extends AbstractParser implements Idiom { // skip directories if (entry.isDirectory()) continue; + if (entry.getSize() <= 0) continue; // Get the short entry name final String entryName = entry.getName(); diff --git a/source/de/anomic/document/parser/zipParser.java b/source/de/anomic/document/parser/zipParser.java index 0a874e7e6..252420a90 100644 --- a/source/de/anomic/document/parser/zipParser.java +++ b/source/de/anomic/document/parser/zipParser.java @@ -113,6 +113,7 @@ public class zipParser extends AbstractParser implements Idiom { // skip directories if (entry.isDirectory()) continue; + if (entry.getSize() <= 0) continue; // Get the entry name final String entryName = entry.getName(); diff --git a/source/de/anomic/kelondro/text/DocumentIndex.java b/source/de/anomic/kelondro/text/DocumentIndex.java index 84526e665..e745aba69 100644 --- a/source/de/anomic/kelondro/text/DocumentIndex.java +++ b/source/de/anomic/kelondro/text/DocumentIndex.java @@ -82,7 +82,7 @@ public class DocumentIndex extends Segment { throw new IOException("cannot parse " + file.toString() + ": " + e.getMessage()); } final Condenser condenser = new Condenser(document, true, true); - return super.storeDocument( + return super.storeDocument( url, null, new Date(file.lastModified()), @@ -119,7 +119,7 @@ public class DocumentIndex extends Segment { try { add(w); } catch (IOException e) { - e.printStackTrace(); + if (e.getMessage().indexOf("cannot parse") < 0) e.printStackTrace(); } } } @@ -154,6 +154,10 @@ public class DocumentIndex extends Segment { return find(querystring, 0, 20); } + public void close() { + super.close(); + } + public static void main(String[] args) { // first argument: path to segment // second argument: either 'add' or 'search'