htmlParser: closes input stream -> changed it to leave it open for a reset (used by AugmentParser - even if this is practically not used),

note: stream.close is done by caller (Textparser.parseSource)
- removed unnecessary reset in AugmentParser
- added stream.mark in tdfatripleimpl. to make stream.reset work here
pull/1/head
reger 12 years ago
parent f117ea0492
commit 92d3f71b16

@ -37,11 +37,6 @@ public class AugmentParser extends AbstractParser implements Parser {
public Document[] parse(DigestURI url, String mimeType, String charset, InputStream source) throws Parser.Failure, InterruptedException {
Document[] htmlDocs = this.rdfaParser.parse(url, mimeType, charset, source);
try {
source.reset();
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
for (final Document doc : htmlDocs) {
/* analyze(doc, url, mimeType, charset); // enrich document text */

@ -206,7 +206,7 @@ public class htmlParser extends AbstractParser implements Parser {
throw new Parser.Failure("IO error:" + e.getMessage(), location);
} finally {
writer.flush();
sourceStream.close();
//sourceStream.close(); keep open for multipe parsing (close done by caller)
writer.close();
}
//OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);

@ -38,6 +38,7 @@ public class RDFaTripleImpl{
TransformerException, TransformerConfigurationException {
BufferedReader bufReader = new BufferedReader(in);
bufReader.mark(2048); // mark position for following reset
String readLine = bufReader.readLine();
if (!readLine.toLowerCase().contains("<!doctype")){
bufReader.reset();

Loading…
Cancel
Save