avoiding double/wrong parser errors

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6189 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
lotus 16 years ago
parent 21b8704fb4
commit e15d27bc63

@ -1674,6 +1674,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
private Document parseDocument(final IndexingStack.QueueEntry entry) throws InterruptedException { private Document parseDocument(final IndexingStack.QueueEntry entry) throws InterruptedException {
Document document = null; Document document = null;
boolean parserException = false;
final int processCase = entry.processCase(); final int processCase = entry.processCase();
if (this.log.isFine()) log.logFine("processResourceStack processCase=" + processCase + if (this.log.isFine()) log.logFine("processResourceStack processCase=" + processCase +
@ -1693,6 +1694,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
document = Parser.parseSource(entry.url(), entry.getMimeType(), entry.getCharacterEncoding(), plasmaHTCache.getResourceContent(entry.url())); document = Parser.parseSource(entry.url(), entry.getMimeType(), entry.getCharacterEncoding(), plasmaHTCache.getResourceContent(entry.url()));
assert(document != null) : "Unexpected error. Parser returned null."; assert(document != null) : "Unexpected error. Parser returned null.";
} catch (final ParserException e) { } catch (final ParserException e) {
parserException = true;
this.log.logWarning("Unable to parse the resource '" + entry.url() + "'. " + e.getMessage()); this.log.logWarning("Unable to parse the resource '" + entry.url() + "'. " + e.getMessage());
addURLtoErrorDB(entry.url(), entry.referrerHash(), entry.initiator(), entry.anchorName(), e.getErrorCode()); addURLtoErrorDB(entry.url(), entry.referrerHash(), entry.initiator(), entry.anchorName(), e.getErrorCode());
if (document != null) { if (document != null) {
@ -1701,7 +1703,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
} }
return null; return null;
} finally { } finally {
if (document == null) { // if you get here, comment this part out and you will possibly see a OOM in the log if (document == null && !parserException) { // if you get here, comment this part out and you will possibly see a OOM in the log
this.log.logWarning("Unable to parse the resource '" + entry.url() + "'. " + "no parser result"); this.log.logWarning("Unable to parse the resource '" + entry.url() + "'. " + "no parser result");
addURLtoErrorDB(entry.url(), entry.referrerHash(), entry.initiator(), entry.anchorName(), "no parser result"); addURLtoErrorDB(entry.url(), entry.referrerHash(), entry.initiator(), entry.anchorName(), "no parser result");
return null; return null;

Loading…
Cancel
Save