optimize pdfParser

skip starting reader thread if all content already read
pull/1/head
reger 11 years ago
parent 09f73b790f
commit 3b559e7846

@ -143,6 +143,7 @@ public class pdfParser extends AbstractParser implements Parser {
writer.append(stripper.getText(pdfDoc));
contentBytes = writer.getBytes(); // remember text in case of interrupting thread
if (pdfDoc.getNumberOfPages() > 3) { // spare creating/starting thread if all pages read
stripper.setStartPage(4); // continue with page 4 (terminated, resulting in no text)
stripper.setEndPage(Integer.MAX_VALUE); // set to default
// we start the pdf parsing in a separate thread to ensure that it can be terminated
@ -159,7 +160,7 @@ public class pdfParser extends AbstractParser implements Parser {
t.start();
t.join(3000);
if (t.isAlive()) t.interrupt();
pdfDoc.close();
}
contentBytes = writer.getBytes(); // get final text before closing writer
} catch (final Throwable e) {
// close the writer

Loading…
Cancel
Save