From 08f1cbb125ba00000038dbc85ce50f9a5c5b4ab0 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 6 Nov 2009 22:41:37 +0000 Subject: [PATCH] another update to the pdf parser git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6463 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../net/yacy/document/parser/pdfParser.java | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java index 23f4e647b..8d40a43e5 100644 --- a/source/net/yacy/document/parser/pdfParser.java +++ b/source/net/yacy/document/parser/pdfParser.java @@ -88,24 +88,13 @@ public class pdfParser extends AbstractParser implements Idiom { public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException { - PDDocument theDocument = null; - Writer writer = null; - File writerFile = null; - - String docTitle = null, docSubject = null, docAuthor = null, docKeywordStr = null; - - // check for interruption - checkInterruption(); - - // creating a pdf parser + // create a pdf parser + final PDDocument theDocument; final PDFParser parser; - final PDFTextStripper stripper; try { Thread.currentThread().setPriority(Thread.MIN_PRIORITY); parser = new PDFParser(source); parser.parse(); - checkInterruption(); - stripper = new PDFTextStripper(); theDocument = parser.getPDDocument(); } catch (IOException e) { Log.logException(e); @@ -113,6 +102,8 @@ public class pdfParser extends AbstractParser implements Idiom { } finally { Thread.currentThread().setPriority(Thread.NORM_PRIORITY); } + + checkInterruption(); if (theDocument.isEncrypted()) { try { @@ -134,6 +125,7 @@ public class pdfParser extends AbstractParser implements Idiom { // extracting some metadata final PDDocumentInformation theDocInfo = theDocument.getDocumentInformation(); + String docTitle = null, docSubject = null, docAuthor = null, docKeywordStr = null; if (theDocInfo != null) { docTitle = theDocInfo.getTitle(); docSubject = theDocInfo.getSubject(); @@ -141,6 +133,8 @@ public class pdfParser extends AbstractParser implements Idiom { docKeywordStr = theDocInfo.getKeywords(); } + Writer writer = null; + File writerFile = null; try { // creating a writer for output if ((this.contentLength == -1) || (this.contentLength > Idiom.MAX_KEEP_IN_MEMORY_SIZE)) { @@ -149,13 +143,9 @@ public class pdfParser extends AbstractParser implements Idiom { } else { writer = new CharBuffer(); } - try { - stripper.writeText(theDocument, writer ); // may throw a NPE - } catch (Exception e) { - Log.logException(e); - Log.logWarning("pdfParser", e.getMessage()); - } - theDocument.close(); theDocument = null; + final PDFTextStripper stripper = new PDFTextStripper(); + stripper.writeText(theDocument, writer); // may throw a NPE + theDocument.close(); writer.close(); } catch (IOException e) { Log.logException(e);