diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java index c61536f69..af2d0e200 100644 --- a/source/net/yacy/document/parser/pdfParser.java +++ b/source/net/yacy/document/parser/pdfParser.java @@ -143,23 +143,24 @@ public class pdfParser extends AbstractParser implements Parser { writer.append(stripper.getText(pdfDoc)); contentBytes = writer.getBytes(); // remember text in case of interrupting thread - stripper.setStartPage(4); // continue with page 4 (terminated, resulting in no text) - stripper.setEndPage(Integer.MAX_VALUE); // set to default - // we start the pdf parsing in a separate thread to ensure that it can be terminated - final PDDocument pdfDocC = pdfDoc; - final Thread t = new Thread() { - @Override - public void run() { - Thread.currentThread().setName("pdfParser.getText:" + location); - try { - writer.append(stripper.getText(pdfDocC)); - } catch (final Throwable e) {} - } - }; - t.start(); - t.join(3000); - if (t.isAlive()) t.interrupt(); - pdfDoc.close(); + if (pdfDoc.getNumberOfPages() > 3) { // spare creating/starting thread if all pages read + stripper.setStartPage(4); // continue with page 4 (terminated, resulting in no text) + stripper.setEndPage(Integer.MAX_VALUE); // set to default + // we start the pdf parsing in a separate thread to ensure that it can be terminated + final PDDocument pdfDocC = pdfDoc; + final Thread t = new Thread() { + @Override + public void run() { + Thread.currentThread().setName("pdfParser.getText:" + location); + try { + writer.append(stripper.getText(pdfDocC)); + } catch (final Throwable e) {} + } + }; + t.start(); + t.join(3000); + if (t.isAlive()) t.interrupt(); + } contentBytes = writer.getBytes(); // get final text before closing writer } catch (final Throwable e) { // close the writer