diff --git a/source/net/yacy/cora/util/Html2Image.java b/source/net/yacy/cora/util/Html2Image.java
index a96c85574..d9adeb5bb 100644
--- a/source/net/yacy/cora/util/Html2Image.java
+++ b/source/net/yacy/cora/util/Html2Image.java
@@ -31,6 +31,7 @@ import java.beans.PropertyChangeListener;
import java.io.File;
import java.io.IOException;
import java.util.List;
+import java.util.concurrent.TimeUnit;
import javax.imageio.ImageIO;
import javax.swing.JEditorPane;
@@ -74,8 +75,10 @@ public class Html2Image {
private final static File convertMac1 = new File("/opt/local/bin/convert");
private final static File convertMac2 = new File("/opt/ImageMagick/bin/convert");
- // debian
- // to install: apt-get install wkhtmltopdf imagemagick xvfb ghostscript
+ /* Debian packages to install: apt-get install wkhtmltopdf imagemagick xvfb ghostscript
+ The imagemagick policy at /etc should also be checked :
+ if it contains a line such as it must be edited with rights="read" at minimum
+ */
private final static File wkhtmltopdfDebian = new File("/usr/bin/wkhtmltopdf"); // there is no wkhtmltoimage, use convert to create images
private final static File convertDebian = new File("/usr/bin/convert");
@@ -93,17 +96,102 @@ public class Html2Image {
*/
private static final File WKHTMLTOPDF_WINDOWS_X86 = new File(
"C:\\Program Files (x86)\\wkhtmltopdf\\bin\\wkhtmltopdf.exe");
+
+ /** Command to use when wkhtmltopdf is included in the system Path */
+ private static final String WKHTMLTOPDF_COMMAND = "wkhtmltopdf";
+
+ /** Command to use when imagemagick convert is included in the system Path */
+ private static final String CONVERT_COMMAND = "convert";
private static boolean usexvfb = false;
+ /**
+ * @return when the wkhtmltopdf command is detected as available in the system
+ */
public static boolean wkhtmltopdfAvailable() {
- return OS.isWindows ? (WKHTMLTOPDF_WINDOWS.exists() || WKHTMLTOPDF_WINDOWS_X86.exists())
- : (wkhtmltopdfMac.exists() || wkhtmltopdfDebian.exists());
+ /* Check wkhtmltopdf common installation paths and system Path */
+ return wkhtmltopdfExecutable() != null || wkhtmltopdfAvailableInPath();
+ }
+
+ /**
+ * @return a wkhtmltopdf executable file when one can be found, null otherwise
+ */
+ private static File wkhtmltopdfExecutable() {
+ File executable = null;
+ if(OS.isWindows) {
+ if(WKHTMLTOPDF_WINDOWS.exists()) {
+ executable = WKHTMLTOPDF_WINDOWS;
+ } else if(WKHTMLTOPDF_WINDOWS_X86.exists()) {
+ executable = WKHTMLTOPDF_WINDOWS_X86;
+ }
+ } else {
+ if(wkhtmltopdfMac.exists()) {
+ executable = wkhtmltopdfMac;
+ } else if(wkhtmltopdfDebian.exists()) {
+ executable = wkhtmltopdfDebian;
+ }
+ }
+ return executable;
+ }
+
+ /**
+ * @return true when wkhtmltopdf is available in system path
+ */
+ private static boolean wkhtmltopdfAvailableInPath() {
+ boolean available = false;
+ try {
+ final Process p = Runtime.getRuntime().exec(WKHTMLTOPDF_COMMAND + " -V");
+ available = p.waitFor(2, TimeUnit.SECONDS) && p.exitValue() == 0;
+ } catch (final IOException e) {
+ ConcurrentLog.fine("Html2Image", "wkhtmltopdf is not included in system path.");
+ } catch (final InterruptedException e) {
+ Thread.currentThread().interrupt(); // preserve thread interrupted state
+ }
+ return available;
+ }
+
+ /**
+ * @return a imagemagick convert executable file when one can be found, null otherwise
+ */
+ private static File convertExecutable() {
+ File executable = null;
+ if(!OS.isWindows) {
+ if(convertMac1.exists()) {
+ executable = convertMac1;
+ } else if(convertMac2.exists()) {
+ executable = convertMac2;
+ } else if(convertDebian.exists()) {
+ executable = convertDebian;
+ }
+ }
+ return executable;
}
+ /**
+ * @return when the imagemagick convert command is detected as available in the system
+ */
public static boolean convertAvailable() {
- return convertMac1.exists() || convertMac2.exists() || convertDebian.exists();
+ /* Check convert common installation paths and system Path */
+ return convertExecutable() != null || convertAvailableInPath();
}
+
+ /**
+ * @return when imagemagick convert is available in system path
+ */
+ private static boolean convertAvailableInPath() {
+ boolean available = false;
+ if(!OS.isWindows) { // on MS Windows convert is a system tool to convert volumes from FAT to NTFS
+ try {
+ final Process p = Runtime.getRuntime().exec(CONVERT_COMMAND + " -version");
+ available = p.waitFor(2, TimeUnit.SECONDS) && p.exitValue() == 0;
+ } catch (final IOException e) {
+ ConcurrentLog.fine("Html2Image", "convert is not included in system path.");
+ } catch (final InterruptedException e) {
+ Thread.currentThread().interrupt(); // preserve thread interrupted state
+ }
+ }
+ return available;
+ }
/**
* write a pdf of a web page
@@ -132,11 +220,18 @@ public class Html2Image {
}
private static boolean writeWkhtmltopdfInternal(final String url, final String proxy, final File destination, final String userAgent, final String acceptLanguage, final boolean ignoreErrors) {
- final File wkhtmltopdf = OS.isWindows
- ? (WKHTMLTOPDF_WINDOWS.exists() ? WKHTMLTOPDF_WINDOWS : WKHTMLTOPDF_WINDOWS_X86)
- : (wkhtmltopdfMac.exists() ? wkhtmltopdfMac : wkhtmltopdfDebian);
+ final String wkhtmltopdfCmd;
+ final File wkhtmltopdf = wkhtmltopdfExecutable();
+ if(wkhtmltopdf != null) {
+ wkhtmltopdfCmd = wkhtmltopdf.getAbsolutePath();
+ } else if(wkhtmltopdfAvailableInPath()) {
+ wkhtmltopdfCmd = WKHTMLTOPDF_COMMAND;
+ } else {
+ ConcurrentLog.warn("Html2Pdf", "Unable to locate wkhtmltopdf executable on this system!");
+ return false;
+ }
String commandline =
- wkhtmltopdf.getAbsolutePath() + " -q --title '" + url + "' " +
+ wkhtmltopdfCmd + " -q --title '" + url + "' " +
//acceptLanguage == null ? "" : "--custom-header 'Accept-Language' '" + acceptLanguage + "' " +
//(userAgent == null ? "" : "--custom-header \"User-Agent\" \"" + userAgent + "\" --custom-header-propagation ") +
(proxy == null ? "" : "--proxy " + proxy + " ") +
@@ -185,24 +280,36 @@ public class Html2Image {
/* Use JPEG as a default fallback */
imageFormat = "jpg";
}
- final File convert = convertMac1.exists() ? convertMac1 : convertMac2.exists() ? convertMac2 : convertDebian;
+ String convertCmd = null;
+ final File convert = convertExecutable();
+ if(convert != null) {
+ convertCmd = convert.getAbsolutePath();
+ } else if(convertAvailableInPath()) {
+ convertCmd = CONVERT_COMMAND;
+ } else {
+ ConcurrentLog.info("Html2Image", "Unable to locate convert executable on this system!");
+ }
// convert pdf to jpg using internal pdfbox capability
- if (OS.isWindows || !convert.exists()) {
+ if (convertCmd == null) {
try {
PDDocument pdoc = PDDocument.load(pdf);
BufferedImage bi = new PDFRenderer(pdoc).renderImageWithDPI(0, density, ImageType.RGB);
return ImageIO.write(bi, imageFormat, image);
- } catch (IOException ex) { }
+ } catch (final IOException ex) {
+ ConcurrentLog.warn("Html2Image", "Failed to create image with pdfbox"
+ + (ex.getMessage() != null ? " : " + ex.getMessage() : ""));
+ return false;
+ }
}
- // convert on mac or linux using external command line utility
+ // convert using external command line utility
try {
// i.e. convert -density 300 -trim yacy.pdf[0] -trim -resize 1024x -crop x1024+0+0 -quality 75% yacy-convert-300.jpg
// note: both -trim are necessary, otherwise it is trimmed only on one side. The [0] selects the first page of the pdf
- String command = convert.getAbsolutePath() + " -alpha remove -density " + density + " -trim " + pdf.getAbsolutePath() + "[0] -trim -resize " + width + "x -crop x" + height + "+0+0 -quality " + quality + "% " + image.getAbsolutePath();
+ String command = convertCmd + " -alpha remove -density " + density + " -trim " + pdf.getAbsolutePath() + "[0] -trim -resize " + width + "x -crop x" + height + "+0+0 -quality " + quality + "% " + image.getAbsolutePath();
List message = OS.execSynchronous(command);
if (image.exists()) return true;
ConcurrentLog.warn("Html2Image", "failed to create image with command: " + command);
@@ -327,28 +434,73 @@ public class Html2Image {
*
*/
public static void main(String[] args) {
+ final String usageMessage = "Usage : java " + Html2Image.class.getName()
+ + " [wkhtmltopdf|swing]";
+ int exitStatus = 0;
try {
if (args.length < 2) {
System.out.println("Missing required parameter(s).");
- System.out.println("Usage : java " + Html2Image.class.getName()
- + " [wkhtmltopdf|swing]");
+ System.out.println(usageMessage);
+ exitStatus = 1;
return;
}
+ final String targetPath = args[1];
if (args.length < 3 || "wkhtmltopdf".equals(args[2])) {
if(Html2Image.wkhtmltopdfAvailable()) {
- Html2Image.writeWkhtmltopdf(args[0], null, ClientIdentification.yacyInternetCrawlerAgent.userAgent,
- "en-us,en;q=0.5", new File(args[1]));
+ final File targetPdfFile;
+ if(targetPath.endsWith(".jpg") || targetPath.endsWith(".png")) {
+ targetPdfFile = new File(targetPath.substring(0, targetPath.length() - 4) + ".pdf");
+ } else if(targetPath.endsWith(".pdf")) {
+ targetPdfFile = new File(targetPath);
+ } else {
+ System.out.println("Unsupported output format");
+ System.out.println(usageMessage);
+ exitStatus = 1;
+ return;
+ }
+ if(Html2Image.writeWkhtmltopdf(args[0], null, ClientIdentification.yacyInternetCrawlerAgent.userAgent,
+ "en-us,en;q=0.5", targetPdfFile)) {
+ if(targetPath.endsWith(".jpg") || targetPath.endsWith(".png")) {
+ if(Html2Image.pdf2image(targetPdfFile, new File(targetPath), 1024, 1024, 300, 75)) {
+ ConcurrentLog.info("Html2Image", "wrote " + targetPath + " converted from " + targetPdfFile);
+ } else {
+ exitStatus = 1;
+ return;
+ }
+ }
+ } else {
+ exitStatus = 1;
+ return;
+ }
} else {
System.out.println("Unable to locate wkhtmltopdf executable on this system!");
+ exitStatus = 1;
+ return;
}
} else if ("swing".equals(args[2])) {
+ if(targetPath.endsWith(".pdf")) {
+ System.out.println("Pdf output format is not supported with swing method.");
+ exitStatus = 1;
+ return;
+ }
+ if(!targetPath.endsWith(".jpg") && !targetPath.endsWith(".png")) {
+ System.out.println("Unsupported output format");
+ System.out.println(usageMessage);
+ exitStatus = 1;
+ return;
+ }
+
try {
- Html2Image.writeSwingImage(args[0], new Dimension(1200, 2000), new File(args[1]));
+ Html2Image.writeSwingImage(args[0], new Dimension(1200, 2000), new File(targetPath));
} catch (final IOException e) {
e.printStackTrace();
+ exitStatus = 1;
+ return;
}
} else {
- System.out.println("Unknown method : please specify either wkhtmltopdf or swing");
+ System.out.println("Unknown method : please specify either wkhtmltopdf or swing.");
+ exitStatus = 1;
+ return;
}
} finally {
/* Shutdown running threads */
@@ -359,6 +511,9 @@ public class Html2Image {
Thread.currentThread().interrupt(); // restore interrupted state
}
ConcurrentLog.shutdown();
+ if(exitStatus != 0) {
+ System.exit(exitStatus);
+ }
}
}