added a servlet which can create preview images, preview tumbnails and

preview pdfs from web pages, i.e.:
http://localhost:8090/api/snapshot.png?url=http://yacy.net/en/&width=128&height=128
http://localhost:8090/api/snapshot.jpg?url=http://yacy.net/en/&width=128&height=128
http://localhost:8090/api/snapshot.pdf?url=http://yacy.net/en/

This supports also an on-the-fly generation of the preview documents if
the user is an administrator. Otherwise, the servlet fails.
To enable this, you must add wkhtmltopdf, imagemagick and (on headless
servers) xvfb to your operation system.

for detailed instructions, see
97f6089a41
pull/1/head
Michael Peter Christen 10 years ago
parent 28456dfc09
commit 226aea5914

@ -0,0 +1,137 @@
/**
* snapshot
* Copyright 2014 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 02.12.2014 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.awt.Container;
import java.awt.Image;
import java.awt.MediaTracker;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Collection;
import java.util.Date;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.Html2Image;
import net.yacy.document.ImageParser;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class snapshot {
//width = 1024, height = 1024, density = 300, quality = 75
private final static int DEFAULT_WIDTH = 1024;
private final static int DEFAULT_HEIGHT = 1024;
private final static int DEFAULT_DENSITY = 300;
private final static int DEFAULT_QUALITY = 75;
private final static String DEFAULT_EXT = "jpg";
public static Object respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
if (post == null) return null;
final String ext = header.get("EXT", "");
final boolean pdf = ext.equals("pdf");
final boolean authenticated = sb.adminAuthenticated(header) >= 2;
if (pdf && !authenticated) return null;
final boolean pngjpg = ext.equals("png") || ext.equals("jpg");
String urlhash = post.get("urlhash", "");
String url = post.get("url", "");
if (url.length() == 0 && urlhash.length() == 0) return null;
DigestURL durl = null;
if (urlhash.length() == 0) {
try {
durl = new DigestURL(url);
urlhash = ASCII.String(durl.hash());
} catch (MalformedURLException e) {
}
}
if (durl == null) {
try {
durl = sb.index.fulltext().getURL(urlhash);
} catch (IOException e) {
ConcurrentLog.logException(e);
}
}
if (durl == null) return null;
url = durl.toNormalform(true);
Collection<File> snapshots = sb.snapshots.findPaths(durl, "pdf");
File pdfFile = null;
if (snapshots.size() == 0) {
// if the client is authenticated, we create the pdf on the fly!
if (!authenticated) return null;
pdfFile = sb.snapshots.downloadPDFSnapshot(durl, 99, new Date(), true, sb.getConfigBool("isTransparentProxy", false) ? "http://127.0.0.1:" + sb.getConfigInt("port", 8090) : null, ClientIdentification.yacyProxyAgent.userAgent);
} else {
pdfFile = snapshots.iterator().next();
}
if (pdfFile == null) return null;
if (pdf) {
try {
byte[] pdfBinary = FileUtils.read(pdfFile);
return new ByteArrayInputStream(pdfBinary);
} catch (IOException e) {
ConcurrentLog.logException(e);
return null;
}
}
if (pngjpg) {
int width = Math.min(post.getInt("width", DEFAULT_WIDTH), DEFAULT_WIDTH);
int height = Math.min(post.getInt("height", DEFAULT_HEIGHT), DEFAULT_HEIGHT);
String imageFileStub = pdfFile.getAbsolutePath(); imageFileStub = imageFileStub.substring(0, imageFileStub.length() - 3); // cut off extension
File imageFile = new File(imageFileStub + DEFAULT_WIDTH + "." + DEFAULT_HEIGHT + "." + DEFAULT_EXT);
if (!imageFile.exists() && authenticated) {
Html2Image.pdf2image(pdfFile, imageFile, DEFAULT_WIDTH, DEFAULT_HEIGHT, DEFAULT_DENSITY, DEFAULT_QUALITY);
}
if (!imageFile.exists()) return null;
if (width == DEFAULT_WIDTH && height == DEFAULT_HEIGHT) {
try {
byte[] imageBinary = FileUtils.read(imageFile);
return new ByteArrayInputStream(imageBinary);
} catch (IOException e) {
ConcurrentLog.logException(e);
return null;
}
}
// lets read the file and scale
Image image;
try {
image = ImageParser.parse(imageFile.getAbsolutePath(), FileUtils.read(imageFile));
final Image scaled = image.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING);
final MediaTracker mediaTracker = new MediaTracker(new Container());
mediaTracker.addImage(scaled, 0);
try {mediaTracker.waitForID(0);} catch (final InterruptedException e) {}
return scaled;
} catch (IOException e) {
ConcurrentLog.logException(e);
return null;
}
}
return null;
}
}

@ -72,7 +72,7 @@ public class Html2Image {
* @return
*/
public static boolean writeWkhtmltopdf(String url, String proxy, String userAgent, File destination) {
boolean success = writeWkhtmltopdfInternal(url, proxy, destination, null, false);
boolean success = writeWkhtmltopdfInternal(url, proxy, destination, null, true);
if (success) return true;
if (proxy == null) return false;
ConcurrentLog.warn("Html2Image", "trying to load without proxy: " + url);

@ -71,7 +71,7 @@ public class Snapshots {
* @return
*/
public File downloadPDFSnapshot(final DigestURL url, final int depth, final Date date, boolean replaceOld, String proxy, String userAgent) {
Collection<File> oldPaths = findPaths(url, depth);
Collection<File> oldPaths = findPaths(url, depth, "pdf");
if (replaceOld) {
for (File oldPath: oldPaths) oldPath.delete();
}
@ -126,9 +126,9 @@ public class Snapshots {
* @param ext
* @return a set of files for snapshots of the url
*/
public Collection<File> findPaths(final DigestURL url) {
public Collection<File> findPaths(final DigestURL url, final String ext) {
for (int i = 0; i < 100; i++) {
Collection<File> paths = findPaths(url, i);
Collection<File> paths = findPaths(url, i, ext);
if (paths.size() > 0) return paths;
}
return new ArrayList<>(0);
@ -142,13 +142,12 @@ public class Snapshots {
* @param depth
* @return a set of files for snapshots of the url
*/
public Collection<File> findPaths(final DigestURL url, final int depth) {
public Collection<File> findPaths(final DigestURL url, final int depth, final String ext) {
String id = ASCII.String(url.hash());
File pathToShard = pathToShard(url, depth);
String[] list = pathToShard.exists() && pathToShard.isDirectory() ? pathToShard.list() : null; // may be null if path does not exist
ArrayList<File> paths = new ArrayList<>();
if (list != null) {
final String ext = MultiProtocolURL.getFileExtension(url.getFileName());
for (String f: list) {
if (f.startsWith(id) && f.endsWith(ext)) paths.add(new File(pathToShard, f));
}

@ -853,6 +853,19 @@ public class YaCyDefaultServlet extends HttpServlet {
return;
}
if (tmp instanceof InputStream) {
final InputStream is = (InputStream) tmp;
final String mimeType = Classification.ext2mime(targetExt, TEXT_HTML.asString());
response.setContentType(mimeType);
response.setStatus(HttpServletResponse.SC_OK);
byte[] buffer = new byte[4096];
int l, size = 0;
while ((l = is.read(buffer)) > 0) {response.getOutputStream().write(buffer, 0, l); size += l;}
response.setContentLength(size);
is.close();
return;
}
servletProperties templatePatterns;
if (tmp == null) {
// if no args given, then tp will be an empty Hashtable object (not null)

Loading…
Cancel
Save