diff --git a/source/net/yacy/cora/util/Html2Image.java b/source/net/yacy/cora/util/Html2Image.java
index 3821f6b42..e845a8a0f 100644
--- a/source/net/yacy/cora/util/Html2Image.java
+++ b/source/net/yacy/cora/util/Html2Image.java
@@ -71,17 +71,22 @@ public class Html2Image {
* @param destination
* @return
*/
- public static boolean writeWkhtmltopdf(String url, String proxy, File destination) {
- boolean success = writeWkhtmltopdfInternal(url, proxy, destination);
+ public static boolean writeWkhtmltopdf(String url, String proxy, String userAgent, File destination) {
+ boolean success = writeWkhtmltopdfInternal(url, proxy, destination, null, false);
if (success) return true;
if (proxy == null) return false;
ConcurrentLog.warn("Html2Image", "trying to load without proxy: " + url);
- return writeWkhtmltopdfInternal(url, null, destination);
+ return writeWkhtmltopdfInternal(url, null, destination, userAgent, true);
}
- private static boolean writeWkhtmltopdfInternal(String url, String proxy, File destination) {
+ private static boolean writeWkhtmltopdfInternal(String url, String proxy, File destination, String userAgent, boolean ignoreErrors) {
final File wkhtmltopdf = wkhtmltopdfMac.exists() ? wkhtmltopdfMac : wkhtmltopdfDebian;
- String commandline = wkhtmltopdf.getAbsolutePath() + " -q --title " + url + (proxy == null ? " " : " --proxy " + proxy + " ") + (OS.isMacArchitecture ? "--load-error-handling ignore " : "--ignore-load-errors ") + url + " " + destination.getAbsolutePath();
+ String commandline =
+ wkhtmltopdf.getAbsolutePath() + " -q --title " + url +
+ (userAgent == null ? "" : "--custom-header 'User-Agent' '" + userAgent + "' --custom-header-propagation") +
+ (proxy == null ? " " : " --proxy " + proxy + " ") +
+ (ignoreErrors ? (OS.isMacArchitecture ? "--load-error-handling ignore " : "--ignore-load-errors ") : "") +
+ url + " " + destination.getAbsolutePath();
try {
List message;
if (!usexvfb) {
diff --git a/source/net/yacy/crawler/data/Snapshots.java b/source/net/yacy/crawler/data/Snapshots.java
index fb9fe3f34..394d14f73 100644
--- a/source/net/yacy/crawler/data/Snapshots.java
+++ b/source/net/yacy/crawler/data/Snapshots.java
@@ -70,14 +70,14 @@ public class Snapshots {
* @param proxy - a string of the form 'http://:
* @return
*/
- public File downloadPDFSnapshot(final DigestURL url, final int depth, final Date date, boolean replaceOld, String proxy) {
+ public File downloadPDFSnapshot(final DigestURL url, final int depth, final Date date, boolean replaceOld, String proxy, String userAgent) {
Collection oldPaths = findPaths(url, depth);
if (replaceOld) {
for (File oldPath: oldPaths) oldPath.delete();
}
File path = definePath(url, "pdf", depth, date);
path.getParentFile().mkdirs();
- boolean success = Html2Image.writeWkhtmltopdf(url.toNormalform(true), proxy, path);
+ boolean success = Html2Image.writeWkhtmltopdf(url.toNormalform(true), proxy, userAgent, path);
return success ? path : null;
}
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index bdfad4ed2..566eca773 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -217,7 +217,7 @@ public final class LoaderDispatcher {
String ext = MultiProtocolURL.getFileExtension(file).toLowerCase();
boolean extok = ext.length() == 0 || file.length() <= 1 || htmlParser.htmlExtensionsSet.contains(ext);
if (depthok && extok) {
- File snapshotFile = sb.snapshots.downloadPDFSnapshot(request.url(), request.depth(), new Date(), crawlProfile.snapshotReplaceold(), sb.getConfigBool("isTransparentProxy", false) ? "http://127.0.0.1:" + sb.getConfigInt("port", 8090) : null);
+ File snapshotFile = sb.snapshots.downloadPDFSnapshot(request.url(), request.depth(), new Date(), crawlProfile.snapshotReplaceold(), sb.getConfigBool("isTransparentProxy", false) ? "http://127.0.0.1:" + sb.getConfigInt("port", 8090) : null, agent.userAgent);
log.info("SNAPSHOT - " + (snapshotFile == null ? "could not generate snapshot for " + request.url().toNormalform(true) : "wrote " + snapshotFile + " for " + request.url().toNormalform(true)));
} else {
//if (!depthok) log.warn("SNAPSHOT: depth not ok, " + (crawlProfile == null ? "profile = null" : "entry.depth() = " + request.depth() + ", profile.snapshotMaxdepth() = " + crawlProfile.snapshotMaxdepth()));