diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java index 703f632e4..307d96274 100644 --- a/source/net/yacy/document/parser/pdfParser.java +++ b/source/net/yacy/document/parser/pdfParser.java @@ -36,7 +36,6 @@ import java.io.UnsupportedEncodingException; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.exceptions.CryptographyException; -import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; @@ -71,12 +70,13 @@ public class pdfParser extends AbstractParser implements Parser { // create a pdf parser PDDocument pdfDoc = null; - final PDFParser pdfParser; + //final PDFParser pdfParser; try { Thread.currentThread().setPriority(Thread.MIN_PRIORITY); - pdfParser = new PDFParser(source); - pdfParser.parse(); - pdfDoc = pdfParser.getPDDocument(); + pdfDoc = PDDocument.load(source); + //pdfParser = new PDFParser(source); + //pdfParser.parse(); + //pdfDoc = pdfParser.getPDDocument(); } catch (IOException e) { if (pdfDoc != null) try {pdfDoc.close();} catch (IOException ee) {} throw new Parser.Failure(e.getMessage(), location); @@ -103,15 +103,20 @@ public class pdfParser extends AbstractParser implements Parser { } // extracting some metadata - final PDDocumentInformation theDocInfo = pdfDoc.getDocumentInformation(); + final PDDocumentInformation info = pdfDoc.getDocumentInformation(); String docTitle = null, docSubject = null, docAuthor = null, docPublisher = null, docKeywordStr = null; - if (theDocInfo != null) { - docTitle = theDocInfo.getTitle(); - docSubject = theDocInfo.getSubject(); - docAuthor = theDocInfo.getAuthor(); - docPublisher = theDocInfo.getProducer(); - docKeywordStr = theDocInfo.getKeywords(); - } + if (info != null) { + docTitle = info.getTitle(); + docSubject = info.getSubject(); + docAuthor = info.getAuthor(); + docPublisher = info.getProducer(); + if (docPublisher == null || docPublisher.length() == 0) docPublisher = info.getCreator(); + docKeywordStr = info.getKeywords(); + // unused: + // info.getTrapped()); + // info.getCreationDate()); + // info.getModificationDate(); + } CharBuffer writer = null; try { @@ -175,7 +180,7 @@ public class pdfParser extends AbstractParser implements Parser { null, false)}; } - + /** * test * @param args