Extended detection of external tools used for Snapshots generation

This enable detecting wkhtmltopdf and Imagemagick convert executables
when they are at system Path in addition to common installation paths.
pull/260/head
luccioman 6 years ago
parent dbacff2044
commit 73a6e45524

@ -31,6 +31,7 @@ import java.beans.PropertyChangeListener;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.TimeUnit;
import javax.imageio.ImageIO;
import javax.swing.JEditorPane;
@ -74,8 +75,10 @@ public class Html2Image {
private final static File convertMac1 = new File("/opt/local/bin/convert");
private final static File convertMac2 = new File("/opt/ImageMagick/bin/convert");
// debian
// to install: apt-get install wkhtmltopdf imagemagick xvfb ghostscript
/* Debian packages to install: apt-get install wkhtmltopdf imagemagick xvfb ghostscript
The imagemagick policy at /etc should also be checked :
if it contains a line such as <policy domain="coder" rights="none" pattern="PDF" /> it must be edited with rights="read" at minimum
*/
private final static File wkhtmltopdfDebian = new File("/usr/bin/wkhtmltopdf"); // there is no wkhtmltoimage, use convert to create images
private final static File convertDebian = new File("/usr/bin/convert");
@ -93,17 +96,102 @@ public class Html2Image {
*/
private static final File WKHTMLTOPDF_WINDOWS_X86 = new File(
"C:\\Program Files (x86)\\wkhtmltopdf\\bin\\wkhtmltopdf.exe");
/** Command to use when wkhtmltopdf is included in the system Path */
private static final String WKHTMLTOPDF_COMMAND = "wkhtmltopdf";
/** Command to use when imagemagick convert is included in the system Path */
private static final String CONVERT_COMMAND = "convert";
private static boolean usexvfb = false;
/**
* @return when the wkhtmltopdf command is detected as available in the system
*/
public static boolean wkhtmltopdfAvailable() {
return OS.isWindows ? (WKHTMLTOPDF_WINDOWS.exists() || WKHTMLTOPDF_WINDOWS_X86.exists())
: (wkhtmltopdfMac.exists() || wkhtmltopdfDebian.exists());
/* Check wkhtmltopdf common installation paths and system Path */
return wkhtmltopdfExecutable() != null || wkhtmltopdfAvailableInPath();
}
/**
* @return a wkhtmltopdf executable file when one can be found, null otherwise
*/
private static File wkhtmltopdfExecutable() {
File executable = null;
if(OS.isWindows) {
if(WKHTMLTOPDF_WINDOWS.exists()) {
executable = WKHTMLTOPDF_WINDOWS;
} else if(WKHTMLTOPDF_WINDOWS_X86.exists()) {
executable = WKHTMLTOPDF_WINDOWS_X86;
}
} else {
if(wkhtmltopdfMac.exists()) {
executable = wkhtmltopdfMac;
} else if(wkhtmltopdfDebian.exists()) {
executable = wkhtmltopdfDebian;
}
}
return executable;
}
/**
* @return true when wkhtmltopdf is available in system path
*/
private static boolean wkhtmltopdfAvailableInPath() {
boolean available = false;
try {
final Process p = Runtime.getRuntime().exec(WKHTMLTOPDF_COMMAND + " -V");
available = p.waitFor(2, TimeUnit.SECONDS) && p.exitValue() == 0;
} catch (final IOException e) {
ConcurrentLog.fine("Html2Image", "wkhtmltopdf is not included in system path.");
} catch (final InterruptedException e) {
Thread.currentThread().interrupt(); // preserve thread interrupted state
}
return available;
}
/**
* @return a imagemagick convert executable file when one can be found, null otherwise
*/
private static File convertExecutable() {
File executable = null;
if(!OS.isWindows) {
if(convertMac1.exists()) {
executable = convertMac1;
} else if(convertMac2.exists()) {
executable = convertMac2;
} else if(convertDebian.exists()) {
executable = convertDebian;
}
}
return executable;
}
/**
* @return when the imagemagick convert command is detected as available in the system
*/
public static boolean convertAvailable() {
return convertMac1.exists() || convertMac2.exists() || convertDebian.exists();
/* Check convert common installation paths and system Path */
return convertExecutable() != null || convertAvailableInPath();
}
/**
* @return when imagemagick convert is available in system path
*/
private static boolean convertAvailableInPath() {
boolean available = false;
if(!OS.isWindows) { // on MS Windows convert is a system tool to convert volumes from FAT to NTFS
try {
final Process p = Runtime.getRuntime().exec(CONVERT_COMMAND + " -version");
available = p.waitFor(2, TimeUnit.SECONDS) && p.exitValue() == 0;
} catch (final IOException e) {
ConcurrentLog.fine("Html2Image", "convert is not included in system path.");
} catch (final InterruptedException e) {
Thread.currentThread().interrupt(); // preserve thread interrupted state
}
}
return available;
}
/**
* write a pdf of a web page
@ -132,11 +220,18 @@ public class Html2Image {
}
private static boolean writeWkhtmltopdfInternal(final String url, final String proxy, final File destination, final String userAgent, final String acceptLanguage, final boolean ignoreErrors) {
final File wkhtmltopdf = OS.isWindows
? (WKHTMLTOPDF_WINDOWS.exists() ? WKHTMLTOPDF_WINDOWS : WKHTMLTOPDF_WINDOWS_X86)
: (wkhtmltopdfMac.exists() ? wkhtmltopdfMac : wkhtmltopdfDebian);
final String wkhtmltopdfCmd;
final File wkhtmltopdf = wkhtmltopdfExecutable();
if(wkhtmltopdf != null) {
wkhtmltopdfCmd = wkhtmltopdf.getAbsolutePath();
} else if(wkhtmltopdfAvailableInPath()) {
wkhtmltopdfCmd = WKHTMLTOPDF_COMMAND;
} else {
ConcurrentLog.warn("Html2Pdf", "Unable to locate wkhtmltopdf executable on this system!");
return false;
}
String commandline =
wkhtmltopdf.getAbsolutePath() + " -q --title '" + url + "' " +
wkhtmltopdfCmd + " -q --title '" + url + "' " +
//acceptLanguage == null ? "" : "--custom-header 'Accept-Language' '" + acceptLanguage + "' " +
//(userAgent == null ? "" : "--custom-header \"User-Agent\" \"" + userAgent + "\" --custom-header-propagation ") +
(proxy == null ? "" : "--proxy " + proxy + " ") +
@ -185,24 +280,36 @@ public class Html2Image {
/* Use JPEG as a default fallback */
imageFormat = "jpg";
}
final File convert = convertMac1.exists() ? convertMac1 : convertMac2.exists() ? convertMac2 : convertDebian;
String convertCmd = null;
final File convert = convertExecutable();
if(convert != null) {
convertCmd = convert.getAbsolutePath();
} else if(convertAvailableInPath()) {
convertCmd = CONVERT_COMMAND;
} else {
ConcurrentLog.info("Html2Image", "Unable to locate convert executable on this system!");
}
// convert pdf to jpg using internal pdfbox capability
if (OS.isWindows || !convert.exists()) {
if (convertCmd == null) {
try {
PDDocument pdoc = PDDocument.load(pdf);
BufferedImage bi = new PDFRenderer(pdoc).renderImageWithDPI(0, density, ImageType.RGB);
return ImageIO.write(bi, imageFormat, image);
} catch (IOException ex) { }
} catch (final IOException ex) {
ConcurrentLog.warn("Html2Image", "Failed to create image with pdfbox"
+ (ex.getMessage() != null ? " : " + ex.getMessage() : ""));
return false;
}
}
// convert on mac or linux using external command line utility
// convert using external command line utility
try {
// i.e. convert -density 300 -trim yacy.pdf[0] -trim -resize 1024x -crop x1024+0+0 -quality 75% yacy-convert-300.jpg
// note: both -trim are necessary, otherwise it is trimmed only on one side. The [0] selects the first page of the pdf
String command = convert.getAbsolutePath() + " -alpha remove -density " + density + " -trim " + pdf.getAbsolutePath() + "[0] -trim -resize " + width + "x -crop x" + height + "+0+0 -quality " + quality + "% " + image.getAbsolutePath();
String command = convertCmd + " -alpha remove -density " + density + " -trim " + pdf.getAbsolutePath() + "[0] -trim -resize " + width + "x -crop x" + height + "+0+0 -quality " + quality + "% " + image.getAbsolutePath();
List<String> message = OS.execSynchronous(command);
if (image.exists()) return true;
ConcurrentLog.warn("Html2Image", "failed to create image with command: " + command);
@ -327,28 +434,73 @@ public class Html2Image {
* </ol>
*/
public static void main(String[] args) {
final String usageMessage = "Usage : java " + Html2Image.class.getName()
+ " <url> <target-file[.pdf|.jpg|.png]> [wkhtmltopdf|swing]";
int exitStatus = 0;
try {
if (args.length < 2) {
System.out.println("Missing required parameter(s).");
System.out.println("Usage : java " + Html2Image.class.getName()
+ " <url> <target-file[.pdf|.jpg|.png]> [wkhtmltopdf|swing]");
System.out.println(usageMessage);
exitStatus = 1;
return;
}
final String targetPath = args[1];
if (args.length < 3 || "wkhtmltopdf".equals(args[2])) {
if(Html2Image.wkhtmltopdfAvailable()) {
Html2Image.writeWkhtmltopdf(args[0], null, ClientIdentification.yacyInternetCrawlerAgent.userAgent,
"en-us,en;q=0.5", new File(args[1]));
final File targetPdfFile;
if(targetPath.endsWith(".jpg") || targetPath.endsWith(".png")) {
targetPdfFile = new File(targetPath.substring(0, targetPath.length() - 4) + ".pdf");
} else if(targetPath.endsWith(".pdf")) {
targetPdfFile = new File(targetPath);
} else {
System.out.println("Unsupported output format");
System.out.println(usageMessage);
exitStatus = 1;
return;
}
if(Html2Image.writeWkhtmltopdf(args[0], null, ClientIdentification.yacyInternetCrawlerAgent.userAgent,
"en-us,en;q=0.5", targetPdfFile)) {
if(targetPath.endsWith(".jpg") || targetPath.endsWith(".png")) {
if(Html2Image.pdf2image(targetPdfFile, new File(targetPath), 1024, 1024, 300, 75)) {
ConcurrentLog.info("Html2Image", "wrote " + targetPath + " converted from " + targetPdfFile);
} else {
exitStatus = 1;
return;
}
}
} else {
exitStatus = 1;
return;
}
} else {
System.out.println("Unable to locate wkhtmltopdf executable on this system!");
exitStatus = 1;
return;
}
} else if ("swing".equals(args[2])) {
if(targetPath.endsWith(".pdf")) {
System.out.println("Pdf output format is not supported with swing method.");
exitStatus = 1;
return;
}
if(!targetPath.endsWith(".jpg") && !targetPath.endsWith(".png")) {
System.out.println("Unsupported output format");
System.out.println(usageMessage);
exitStatus = 1;
return;
}
try {
Html2Image.writeSwingImage(args[0], new Dimension(1200, 2000), new File(args[1]));
Html2Image.writeSwingImage(args[0], new Dimension(1200, 2000), new File(targetPath));
} catch (final IOException e) {
e.printStackTrace();
exitStatus = 1;
return;
}
} else {
System.out.println("Unknown method : please specify either wkhtmltopdf or swing");
System.out.println("Unknown method : please specify either wkhtmltopdf or swing.");
exitStatus = 1;
return;
}
} finally {
/* Shutdown running threads */
@ -359,6 +511,9 @@ public class Html2Image {
Thread.currentThread().interrupt(); // restore interrupted state
}
ConcurrentLog.shutdown();
if(exitStatus != 0) {
System.exit(exitStatus);
}
}
}

Loading…
Cancel
Save