diff --git a/libx/PDFBox-0.7.2.License b/libx/PDFBox-0.7.3.License similarity index 100% rename from libx/PDFBox-0.7.2.License rename to libx/PDFBox-0.7.3.License diff --git a/libx/PDFBox-0.7.2.jar b/libx/PDFBox-0.7.3.jar similarity index 85% rename from libx/PDFBox-0.7.2.jar rename to libx/PDFBox-0.7.3.jar index 6d4b513fc..f821d6447 100644 Binary files a/libx/PDFBox-0.7.2.jar and b/libx/PDFBox-0.7.3.jar differ diff --git a/source/de/anomic/plasma/parser/pdf/build.xml b/source/de/anomic/plasma/parser/pdf/build.xml index 16c9a2b6c..24a8c51b3 100644 --- a/source/de/anomic/plasma/parser/pdf/build.xml +++ b/source/de/anomic/plasma/parser/pdf/build.xml @@ -16,7 +16,7 @@ - + @@ -25,7 +25,7 @@ - + diff --git a/source/de/anomic/plasma/parser/pdf/pdfParser.java b/source/de/anomic/plasma/parser/pdf/pdfParser.java index 2a2eab041..2736e1fb8 100644 --- a/source/de/anomic/plasma/parser/pdf/pdfParser.java +++ b/source/de/anomic/plasma/parser/pdf/pdfParser.java @@ -53,6 +53,7 @@ import java.util.Hashtable; import org.pdfbox.pdfparser.PDFParser; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.pdmodel.PDDocumentInformation; +import org.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; import org.pdfbox.util.PDFTextStripper; import de.anomic.plasma.plasmaCrawlEURL; @@ -77,7 +78,7 @@ public class pdfParser extends AbstractParser implements Parser { * @see Parser#getLibxDependences() */ private static final String[] LIBX_DEPENDENCIES = new String[] { - "PDFBox-0.7.2.jar" + "PDFBox-0.7.3.jar" }; public pdfParser() { @@ -119,7 +120,9 @@ public class pdfParser extends AbstractParser implements Parser { theDocument = parser.getPDDocument(); if (theDocument.isEncrypted()) { - throw new ParserException("Document is encrypted",location,plasmaCrawlEURL.DENIED_DOCUMENT_ENCRYPTED); + theDocument.openProtection(new StandardDecryptionMaterial("")); + if (!theDocument.getCurrentAccessPermission().canExtractContent()) + throw new ParserException("Document is encrypted",location,plasmaCrawlEURL.DENIED_DOCUMENT_ENCRYPTED); } // extracting some metadata