universal handling for crashed parsers

reverting r6090/1

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6176 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
lotus 16 years ago
parent 535aee4425
commit 4320f69574

@ -76,7 +76,6 @@ public class pdfParser extends AbstractParser implements Parser {
public plasmaParserDocument parse(final yacyURL location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
PDDocument theDocument = null;
plasmaParserDocument theDoc = null;
Writer writer = null;
File writerFile = null;
try {
@ -136,6 +135,8 @@ public class pdfParser extends AbstractParser implements Parser {
String[] docKeywords = null;
if (docKeywordStr != null) docKeywords = docKeywordStr.split(" |,");
plasmaParserDocument theDoc = null;
if (writer instanceof serverCharBuffer) {
final byte[] contentBytes = ((serverCharBuffer)writer).toString().getBytes("UTF-8");
theDoc = new plasmaParserDocument(
@ -186,7 +187,6 @@ public class pdfParser extends AbstractParser implements Parser {
if (theDocument != null) try { theDocument.close(); } catch (final Exception e) {/* ignore this */}
if (writer != null) try { writer.close(); } catch (final Exception e) {/* ignore this */}
Thread.currentThread().setPriority(Thread.NORM_PRIORITY);
if (theDoc == null) throw new ParserException("Unexpected error while parsing pdf file. possibly out of memory",location);
}
}

@ -75,8 +75,6 @@ public class swfParser extends AbstractParser implements Parser {
*/
public plasmaParserDocument parse(final yacyURL location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
plasmaParserDocument theDoc = null;
try {
final SWF2HTML swf2html = new SWF2HTML();
String contents = "";
@ -114,7 +112,7 @@ public class swfParser extends AbstractParser implements Parser {
}
// As the result of parsing this function must return a plasmaParserDocument object
theDoc = new plasmaParserDocument(
final plasmaParserDocument theDoc = new plasmaParserDocument(
location, // url of the source document
mimeType, // the documents mime type
"UTF-8", // charset of the document text
@ -139,13 +137,6 @@ public class swfParser extends AbstractParser implements Parser {
final String errorMsg = "Unable to parse the swf document '" + location + "':" + e.getMessage();
this.theLogger.logSevere(errorMsg);
throw new ParserException(errorMsg, location);
} finally {
if (theDoc == null) {
// if an unexpected error occures just log the error and raise a new ParserException
final String errorMsg = "Unable to parse the swf document '" + location + "': possibly out of memory";
this.theLogger.logSevere(errorMsg);
throw new ParserException(errorMsg, location);
}
}
}

@ -1693,7 +1693,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
// parse the document
document = parser.parseSource(entry.url(), entry.getMimeType(), entry.getCharacterEncoding(), plasmaHTCache.getResourceContent(entry.url()));
assert(document != null) : "Unexpected error. Parser returned null.";
if (document == null) return null;
} catch (final ParserException e) {
this.log.logInfo("Unable to parse the resource '" + entry.url() + "'. " + e.getMessage());
addURLtoErrorDB(entry.url(), entry.referrerHash(), entry.initiator(), entry.anchorName(), e.getErrorCode());
@ -1702,6 +1701,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
document = null;
}
return null;
} finally {
if (document == null) { // if you get here, comment this part out and you will possibly see a OOM in the log
this.log.logInfo("Unable to parse the resource '" + entry.url() + "'. " + "no parser result");
addURLtoErrorDB(entry.url(), entry.referrerHash(), entry.initiator(), entry.anchorName(), "no parser result");
return null;
}
}
final long parsingEndTime = System.currentTimeMillis();

Loading…
Cancel
Save