From 92d3f71b1643df8e5798e9e98c4694b26a6e0c78 Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 28 Jul 2013 03:41:09 +0200 Subject: [PATCH] htmlParser: closes input stream -> changed it to leave it open for a reset (used by AugmentParser - even if this is practically not used), note: stream.close is done by caller (Textparser.parseSource) - removed unnecessary reset in AugmentParser - added stream.mark in tdfatripleimpl. to make stream.reset work here --- source/net/yacy/document/parser/augment/AugmentParser.java | 5 ----- source/net/yacy/document/parser/htmlParser.java | 2 +- .../net/yacy/document/parser/rdfa/impl/RDFaTripleImpl.java | 1 + 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/source/net/yacy/document/parser/augment/AugmentParser.java b/source/net/yacy/document/parser/augment/AugmentParser.java index 46f17796b..9304c68c5 100644 --- a/source/net/yacy/document/parser/augment/AugmentParser.java +++ b/source/net/yacy/document/parser/augment/AugmentParser.java @@ -37,11 +37,6 @@ public class AugmentParser extends AbstractParser implements Parser { public Document[] parse(DigestURI url, String mimeType, String charset, InputStream source) throws Parser.Failure, InterruptedException { Document[] htmlDocs = this.rdfaParser.parse(url, mimeType, charset, source); - try { - source.reset(); - } catch (final IOException e) { - ConcurrentLog.logException(e); - } for (final Document doc : htmlDocs) { /* analyze(doc, url, mimeType, charset); // enrich document text */ diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java index a8d84a39b..3133a8f74 100644 --- a/source/net/yacy/document/parser/htmlParser.java +++ b/source/net/yacy/document/parser/htmlParser.java @@ -206,7 +206,7 @@ public class htmlParser extends AbstractParser implements Parser { throw new Parser.Failure("IO error:" + e.getMessage(), location); } finally { writer.flush(); - sourceStream.close(); + //sourceStream.close(); keep open for multipe parsing (close done by caller) writer.close(); } //OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false); diff --git a/source/net/yacy/document/parser/rdfa/impl/RDFaTripleImpl.java b/source/net/yacy/document/parser/rdfa/impl/RDFaTripleImpl.java index b651da6ec..0f7562e15 100644 --- a/source/net/yacy/document/parser/rdfa/impl/RDFaTripleImpl.java +++ b/source/net/yacy/document/parser/rdfa/impl/RDFaTripleImpl.java @@ -38,6 +38,7 @@ public class RDFaTripleImpl{ TransformerException, TransformerConfigurationException { BufferedReader bufReader = new BufferedReader(in); + bufReader.mark(2048); // mark position for following reset String readLine = bufReader.readLine(); if (!readLine.toLowerCase().contains("