fix pdfParser not closed warning from pdfbox

for encrypted pdf on exit due to missing permission to extract
11 years ago · 09f73b790f
parent c798a9d1bb
commit 09f73b790f
1 changed files with 11 additions and 9 deletions
--- a/source/net/yacy/document/parser/pdfParser.java
+++ b/source/net/yacy/document/parser/pdfParser.java
@ -107,8 +107,10 @@ public class pdfParser extends AbstractParser implements Parser {
                throw new Parser.Failure("Document is encrypted (3): " + e.getMessage(), location);
            }
            final AccessPermission perm = pdfDoc.getCurrentAccessPermission();
-            if (perm == null || !perm.canExtractContent())
+            if (perm == null || !perm.canExtractContent()) {
+                try {pdfDoc.close();} catch (final IOException ee) {}
                throw new Parser.Failure("Document is encrypted and cannot be decrypted", location);
+            }
        }

        // extracting some metadata
@ -131,16 +133,16 @@ public class pdfParser extends AbstractParser implements Parser {
        if (docTitle == null || docTitle.isEmpty()) {
            docTitle = MultiProtocolURL.unescape(location.getFileName());
        }
-        final CharBuffer writer = new CharBuffer(odtParser.MAX_DOCSIZE);
-        byte[] contentBytes = new byte[0];
+        final CharBuffer writer = new CharBuffer(odtParser.MAX_DOCSIZE);
+        byte[] contentBytes = new byte[0];
        try {
            // create a writer for output
-            final PDFTextStripper  stripper = new PDFTextStripper();
+            final PDFTextStripper  stripper = new PDFTextStripper();

            stripper.setEndPage(3); // get first 3 pages (always)
            writer.append(stripper.getText(pdfDoc));
            contentBytes = writer.getBytes(); // remember text in case of interrupting thread
-
+
            stripper.setStartPage(4); // continue with page 4 (terminated, resulting in no text)
            stripper.setEndPage(Integer.MAX_VALUE); // set to default
            // we start the pdf parsing in a separate thread to ensure that it can be terminated
@ -149,14 +151,14 @@ public class pdfParser extends AbstractParser implements Parser {
                @Override
                public void run() {
                    Thread.currentThread().setName("pdfParser.getText:" + location);
-                    try {
-                        writer.append(stripper.getText(pdfDocC));
+                    try {
+                        writer.append(stripper.getText(pdfDocC));
                    } catch (final Throwable e) {}
                }
            };
            t.start();
            t.join(3000);
-            if (t.isAlive()) t.interrupt();
+            if (t.isAlive()) t.interrupt();
            pdfDoc.close();
            contentBytes = writer.getBytes(); // get final text before closing writer
        } catch (final Throwable e) {
@ -176,7 +178,7 @@ public class pdfParser extends AbstractParser implements Parser {
        if (docTitle == null) {
            docTitle = docSubject;
        }
-
+
        // clear resources in pdfbox. they say that is resolved but it's not. see:
        // https://issues.apache.org/jira/browse/PDFBOX-313
        // https://issues.apache.org/jira/browse/PDFBOX-351