Merge pull request from luccioman/master

Favicon retrieval and image preview enhancements.
More details on mantis 629 (http://mantis.tokeek.de/view.php?id=629)
pull/91/head
luccioman 9 years ago committed by GitHub
commit d16e57b41e

@ -278,6 +278,18 @@ outboundlinks_urlstub_sxt
## external links, the visible anchor text
outboundlinks_anchortext_txt
## all icon links without the protocol and '://'
icons_urlstub_sxt
## all icon links protocols : split from icons_urlstub to provide some compression, as http protocol is implied as default and not stored
icons_protocol_sxt
## all icon links relationships space separated (e.g. 'icon apple-touch-icon')
icons_rel_sxt
## all icon sizes space separated (e.g. '16x16 32x32')
icons_sizes_sxt
## all text/words appearing in image alt texts or the tokenized url
images_text_t

@ -0,0 +1,195 @@
// ViewFavicon.java
// -----------------------
// part of YaCy
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.peers.graphics.EncodedImage;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.visualization.ImageViewer;
/**
* Extends ViewImage behavior : add a specific favicon cache and use of a
* default image on loading error.
*
* @author luc
*
*/
public class ViewFavicon {
/** Single instance of ImageViewer */
private static final ImageViewer VIEWER = new ImageViewer();
/** Icons cache encoded as png */
private static Map<String, byte[]> pngIconCache = new ConcurrentARC<String, byte[]>(1000,
Math.max(10, Math.min(32, WorkflowProcessor.availableCPU * 2)));
/** Default icon local file */
private static final String defaulticon = "htroot/env/grafics/dfltfvcn.ico";
/**
* Default icon encoded as png : we use a bvte array as it is thread-safe
* instead of a ByteBuffer in EncodedImage
*/
private static byte[] defaultPNGEncodedIcon = null;
/**
* Try parsing image from post "url" parameter (authenticated users) or from
* "code" parameter (non authenticated users). When image could be parsed,
* try encoding to target format specified by header "EXT". When any error
* occurs, return default icon.
*
* @param header
* request header
* @param post
* post parameters
* @param env
* Switchboard instance
* @return an {@link EncodedImage} instance encoded in format specified in
* post, or an InputStream pointing to original image data.
*/
public static Object respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
String ext = header.get("EXT", null);
boolean isPNGTarget = "png".equalsIgnoreCase(ext);
ImageInputStream imageInStream = null;
InputStream inStream = null;
byte[] resultBytes = null;
try {
/* Clear icon cache when running out of memory */
if (MemoryControl.shortStatus()) {
pngIconCache.clear();
}
final boolean auth = Domains.isLocalhost(header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP, ""))
|| sb.verifyAuthentication(header); // handle access rights
DigestURL url = VIEWER.parseURL(post, auth);
final String normalizedURL = url.toNormalform(false);
if (isPNGTarget) {
resultBytes = pngIconCache.get(normalizedURL);
}
/* Icon is not already in cache */
if (resultBytes == null) {
String urlExt = MultiProtocolURL.getFileExtension(url.getFileName());
if (ext != null && ext.equalsIgnoreCase(urlExt) && ImageViewer.isBrowserRendered(urlExt)) {
return VIEWER.openInputStream(post, sb.loader, auth, url);
}
/*
* When opening a file, the most efficient is to open
* ImageInputStream directly on file
*/
if (url.isFile()) {
imageInStream = ImageIO.createImageInputStream(url.getFSFile());
} else {
inStream = VIEWER.openInputStream(post, sb.loader, auth, url);
imageInStream = ImageIO.createImageInputStream(inStream);
}
// read image
EncodedImage encodedIcon = VIEWER.parseAndScale(post, auth, url, ext, imageInStream);
if (encodedIcon != null && !encodedIcon.getImage().isEmpty()) {
resultBytes = encodedIcon.getImage().getBytes();
if (isPNGTarget && encodedIcon.getImage().length() <= 10240) {
/* Only store in cache icon images below 10KB, png encoded */
pngIconCache.put(normalizedURL, resultBytes);
}
}
}
} catch (IOException e) {
ConcurrentLog.fine("ViewFavicon", "Error loading favicon, default one wille be used : " + e);
} finally {
/*
* imageInStream.close() method doesn't close source input stream
*/
if (inStream != null) {
try {
inStream.close();
} catch (IOException ignored) {
}
}
if (resultBytes == null) {
/*
* I any error occured when loading icon, return default one
*/
if (ext == null || isPNGTarget) {
/* Load default icon only once */
if (defaultPNGEncodedIcon == null) {
defaultPNGEncodedIcon = loadDefaultIcon(post, sb, ext);
}
resultBytes = defaultPNGEncodedIcon;
} else {
resultBytes = loadDefaultIcon(post, sb, ext);
}
}
}
return new ByteArrayInputStream(resultBytes);
}
/**
* Load default icon and encode it to ext format
*
* @param post
* post parameters
* @param sb
* Switchboard instance
* @param ext
* target image format
* @return icon encoded bytes, empty if and exception occured when loading
* or rendering
*/
private static byte[] loadDefaultIcon(final serverObjects post, final Switchboard sb, String ext) {
byte[] resultBytes;
byte[] defaultBytes = new byte[0];
try {
defaultBytes = FileUtils.read(new File(sb.getAppPath(), defaulticon));
} catch (final IOException initicon) {
defaultBytes = new byte[0];
} finally {
resultBytes = new EncodedImage(defaultBytes, ext, post.getBoolean("isStatic")).getImage().getBytes();
}
return resultBytes;
}
}

@ -53,6 +53,7 @@ import net.yacy.document.SentenceReader;
import net.yacy.document.Tokenizer;
import net.yacy.document.WordTokenizer;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.document.parser.html.IconEntry;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.search.Switchboard;
@ -322,54 +323,7 @@ public class ViewFile {
prop.put("viewMode_words", i);
} else if (viewMode.equals("links")) {
prop.put("viewMode", VIEW_MODE_AS_LINKLIST);
boolean dark = true;
int i = 0;
if (document.getEmaillinks() != null) {
Iterator<AnchorURL> emailit = document.getEmaillinks().iterator();
while (emailit.hasNext()) {
AnchorURL eentry = emailit.next();
prop.put("viewMode_links_" + i + "_nr", i);
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
prop.put("viewMode_links_" + i + "_type", "email");
prop.put("viewMode_links_" + i + "_text", (eentry.getTextProperty().isEmpty()) ? "&nbsp;" : eentry.getTextProperty());
prop.put("viewMode_links_" + i + "_url", "#");
prop.put("viewMode_links_" + i + "_link", eentry.toNormalform(true));
prop.put("viewMode_links_" + i + "_rel", "");
prop.put("viewMode_links_" + i + "_name", eentry.getNameProperty());
dark = !dark;
i++;
}
}
i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
dark = (i % 2 == 0);
final Map<DigestURL, ImageEntry> ts = document.getImages();
final Iterator<ImageEntry> tsi = ts.values().iterator();
ImageEntry entry;
while (tsi.hasNext()) {
entry = tsi.next();
prop.put("viewMode_links_" + i + "_nr", i);
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
prop.put("viewMode_links_" + i + "_type", "image");
prop.put("viewMode_links_" + i + "_text", (entry.alt().isEmpty()) ? "&nbsp;" : markup(wordArray, entry.alt()));
prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(true));
prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(true)));
if (entry.width() > 0 && entry.height() > 0) {
prop.put("viewMode_links_" + i + "_rel", entry.width() + "x" + entry.height() + " Pixel");
} else {
prop.put("viewMode_links_" + i + "_rel", "");
}
prop.put("viewMode_links_" + i + "_name", "");
dark = !dark;
i++;
}
i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getHyperlinks(), "link", (i % 2 == 0));
prop.put("viewMode_links", i);
putLinks(prop, wordArray, document);
}
// optional: generate snippet
@ -458,6 +412,65 @@ public class ViewFile {
return prop;
}
/**
* Fill prop object with document links.
* @param prop object to be filled. Must not be null
* @param wordArray aray of words from word post parameter
* @param document document to process
*/
private static void putLinks(final serverObjects prop, final String[] wordArray, Document document) {
prop.put("viewMode", VIEW_MODE_AS_LINKLIST);
boolean dark = true;
int i = 0;
if (document.getEmaillinks() != null) {
Iterator<AnchorURL> emailit = document.getEmaillinks().iterator();
while (emailit.hasNext()) {
AnchorURL eentry = emailit.next();
prop.put("viewMode_links_" + i + "_nr", i);
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
prop.put("viewMode_links_" + i + "_type", "email");
prop.put("viewMode_links_" + i + "_text", (eentry.getTextProperty().isEmpty()) ? "&nbsp;" : eentry.getTextProperty());
prop.put("viewMode_links_" + i + "_url", "#");
prop.put("viewMode_links_" + i + "_link", eentry.toNormalform(true));
prop.put("viewMode_links_" + i + "_rel", "");
prop.put("viewMode_links_" + i + "_name", eentry.getNameProperty());
dark = !dark;
i++;
}
}
i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
dark = (i % 2 == 0);
i += putIconsInfos(prop, wordArray, i, document.getIcons().values(), (i % 2 == 0));
dark = (i % 2 == 0);
final Map<DigestURL, ImageEntry> ts = document.getImages();
final Iterator<ImageEntry> tsi = ts.values().iterator();
ImageEntry entry;
while (tsi.hasNext()) {
entry = tsi.next();
prop.put("viewMode_links_" + i + "_nr", i);
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
prop.put("viewMode_links_" + i + "_type", "image");
prop.put("viewMode_links_" + i + "_text", (entry.alt().isEmpty()) ? "&nbsp;" : markup(wordArray, entry.alt()));
prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(true));
prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(true)));
if (entry.width() > 0 && entry.height() > 0) {
prop.put("viewMode_links_" + i + "_rel", entry.width() + "x" + entry.height() + " Pixel");
} else {
prop.put("viewMode_links_" + i + "_rel", "");
}
prop.put("viewMode_links_" + i + "_name", "");
dark = !dark;
i++;
}
i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getHyperlinks(), "link", (i % 2 == 0));
prop.put("viewMode_links", i);
}
private static final String[] wordArray(String words) {
String[] w = new String[0];
if (words == null || words.isEmpty()) return w;
@ -489,6 +502,16 @@ public class ViewFile {
return message;
}
/**
* Fill prop object with media links.
* @param prop object ot be filled
* @param wordArray words array
* @param c current links count
* @param media media links
* @param type type of media link
* @param dark current result line style
* @return number of links added to prop
*/
private static int putMediaInfo(
final serverObjects prop,
final String[] wordArray,
@ -516,5 +539,41 @@ public class ViewFile {
}
return i;
}
/**
* Fill prop object with icon links.
* @param prop object ot be filled
* @param wordArray words array
* @param c current links count
* @param icons icon links
* @param dark current result line style
* @return number of links added to prop
*/
private static int putIconsInfos(
final serverObjects prop,
final String[] wordArray,
int c,
final Collection<IconEntry> icons,
boolean dark) {
int i = 0;
for (final IconEntry entry : icons) {
final String name = ""; // the name attribute
final String rel = entry.relToString(); // the rel-attribute
final String text = ""; // the text between the <a></a> tag
prop.put("viewMode_links_" + c + "_nr", c);
prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
prop.putHTML("viewMode_links_" + c + "_type", "icon");
prop.put("viewMode_links_" + c + "_text", text);
prop.put("viewMode_links_" + c + "_link", markup(wordArray, entry.getUrl().toNormalform(true)));
prop.put("viewMode_links_" + c + "_url", entry.getUrl().toNormalform(true));
prop.put("viewMode_links_" + c + "_rel", rel);
prop.put("viewMode_links_" + c + "_name", name);
dark = !dark;
c++;
i++;
}
return i;
}
}

@ -21,55 +21,31 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.awt.Container;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.MediaTracker;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.awt.image.Raster;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.Map;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.stream.ImageInputStream;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.URLLicense;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.http.servlets.TemplateMissingParameterException;
import net.yacy.peers.graphics.EncodedImage;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.visualization.ImageViewer;
public class ViewImage {
private static Map<String, Image> iconcache = new ConcurrentARC<String, Image>(1000,
Math.max(10, Math.min(32, WorkflowProcessor.availableCPU * 2)));
private static String defaulticon = "htroot/env/grafics/dfltfvcn.ico";
private static byte[] defaulticonb = null;
/** Single instance of ImageViewer */
private static final ImageViewer VIEWER = new ImageViewer();
/**
* Try parsing image from post "url" parameter or from "code" parameter.
* Try parsing image from post "url" parameter (authenticated users) or from "code" parameter (non authenticated users).
* When image format is not supported, return directly image data. When
* image could be parsed, try encoding to target format specified by header
* "EXT".
@ -89,506 +65,69 @@ public class ViewImage {
* Sould end in a HTTP 500 error whose processing is more
* consistent across browsers than a response with zero content
* bytes.
* @throws TemplateMissingParameterException when one required parameter is missing
*/
public static Object respond(final RequestHeader header, final serverObjects post, final serverSwitch env)
throws IOException {
final Switchboard sb = (Switchboard) env;
// the url to the image can be either submitted with an url in clear
// text, or using a license key
// if the url is given as clear text, the user must be authorized as
// admin
// the license can be used also from non-authorized users
if(post == null) {
throw new TemplateMissingParameterException("please fill at least url or code parameter");
}
String urlString = post.get("url", "");
final String urlLicense = post.get("code", "");
String ext = header.get("EXT", null);
final boolean auth = Domains.isLocalhost(header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP, ""))
|| sb.verifyAuthentication(header); // handle access rights
DigestURL url = null;
if ((urlString.length() > 0) && (auth)) {
url = new DigestURL(urlString);
}
if ((url == null) && (urlLicense.length() > 0)) {
urlString = URLLicense.releaseLicense(urlLicense);
if (urlString != null) {
url = new DigestURL(urlString);
} else { // license is gone (e.g. released/remove in prev calls)
ConcurrentLog.fine("ViewImage", "image urlLicense not found key=" + urlLicense);
/* Return an empty EncodedImage. Caller is responsible for handling this correctly (500 status code response) */
return new EncodedImage(new byte[0], ext, post.getBoolean("isStatic")); // TODO: maybe favicon accessed again, check
// iconcache
}
}
DigestURL url = VIEWER.parseURL(post, auth);
// get the image as stream
if (MemoryControl.shortStatus()) {
iconcache.clear();
}
EncodedImage encodedImage = null;
Image image = iconcache.get(urlString);
if (image != null) {
encodedImage = new EncodedImage(image, ext, post.getBoolean("isStatic"));
} else {
ImageInputStream imageInStream = null;
InputStream inStream = null;
try {
String urlExt = MultiProtocolURL.getFileExtension(url.getFileName());
if (ext != null && ext.equalsIgnoreCase(urlExt) && isBrowserRendered(urlExt)) {
return openInputStream(post, sb.loader, auth, url);
}
/*
* When opening a file, the most efficient is to open
* ImageInputStream directly on file
*/
if (url.isFile()) {
imageInStream = ImageIO.createImageInputStream(url.getFSFile());
} else {
inStream = openInputStream(post, sb.loader, auth, url);
imageInStream = ImageIO.createImageInputStream(inStream);
}
// read image
encodedImage = parseAndScale(post, auth, urlString, ext, imageInStream);
} catch(Exception e) {
/* Exceptions are not propagated here : many error causes are possible, network errors,
* incorrect or unsupported format, bad ImageIO plugin...
* Instead return an empty EncodedImage. Caller is responsible for handling this correctly (500 status code response) */
if (url != null && "favicon.ico".equalsIgnoreCase(url.getFileName())) { // but on missing favicon just present a default (occures frequently by call from searchitem.html)
// currently yacysearchitem assigns "hosturl/favicon.ico" (to look for the filename should not much interfere with other situatios)
if (defaulticonb == null) { // load the default icon once
try {
defaulticonb = FileUtils.read(new File(sb.getAppPath(), defaulticon));
} catch (final IOException initicon) {
defaulticonb = new byte[0];
}
}
encodedImage = new EncodedImage(defaulticonb, ext, post.getBoolean("isStatic"));
} else {
encodedImage = new EncodedImage(new byte[0], ext, post.getBoolean("isStatic"));
}
} finally {
/*
* imageInStream.close() method doesn't close source input
* stream
*/
if (inStream != null) {
try {
inStream.close();
} catch (IOException ignored) {
}
}
}
}
return encodedImage;
}
/**
* Open input stream on image url using provided loader. All parameters must
* not be null.
*
* @param post
* post parameters.
* @param loader.
* Resources loader.
* @param auth
* true when user has credentials to load full images.
* @param url
* image url.
* @return an open input stream instance (don't forget to close it).
* @throws IOException
* when a read/write error occured.
*/
private static InputStream openInputStream(final serverObjects post, final LoaderDispatcher loader,
final boolean auth, DigestURL url) throws IOException {
InputStream inStream = null;
if (url != null) {
try {
String agentName = post.get("agentName", auth ? ClientIdentification.yacyIntranetCrawlerAgentName
: ClientIdentification.yacyInternetCrawlerAgentName);
ClientIdentification.Agent agent = ClientIdentification.getAgent(agentName);
inStream = loader.openInputStream(loader.request(url, false, true), CacheStrategy.IFEXIST,
BlacklistType.SEARCH, agent);
} catch (final IOException e) {
/** No need to log full stack trace (in most cases resource is not available because of a network error) */
ConcurrentLog.fine("ViewImage", "cannot load image. URL : " + url.toNormalform(true));
throw e;
}
}
if (inStream == null) {
throw new IOException("Input stream could no be open");
}
return inStream;
}
/**
* @param formatName
* informal file format name. For example : "png".
* @return true when image format is rendered by browser and not by
* ViewImage internals
*/
public static boolean isBrowserRendered(String formatName) {
/*
* gif images are not loaded because of an animated gif bug within jvm
* which sends java into an endless loop with high CPU
*/
/*
* svg images not supported by jdk, but by most browser, deliver just
* content (without crop/scale)
*/
return ("gif".equalsIgnoreCase(formatName) || "svg".equalsIgnoreCase(formatName));
}
/**
* Process source image to try to produce an EncodedImage instance
* eventually scaled and clipped depending on post parameters. When
* processed, imageInStream is closed.
*
* @param post
* request post parameters. Must not be null.
* @param auth
* true when access rigths are OK.
* @param urlString
* image source URL as String. Must not be null.
* @param ext
* target image file format. May be null.
* @param imageInStream
* open stream on image content. Must not be null.
* @return an EncodedImage instance.
* @throws IOException
* when image could not be parsed or encoded to specified format.
*/
protected static EncodedImage parseAndScale(serverObjects post, boolean auth, String urlString, String ext,
ImageInputStream imageInStream) throws IOException {
EncodedImage encodedImage;
// BufferedImage image = ImageIO.read(imageInStream);
Iterator<ImageReader> readers = ImageIO.getImageReaders(imageInStream);
if (!readers.hasNext()) {
try {
/* When no reader can be found, we have to close the stream */
imageInStream.close();
} catch (IOException ignoredException) {
ImageInputStream imageInStream = null;
InputStream inStream = null;
try {
String urlExt = MultiProtocolURL.getFileExtension(url.getFileName());
if (ext != null && ext.equalsIgnoreCase(urlExt) && ImageViewer.isBrowserRendered(urlExt)) {
return VIEWER.openInputStream(post, sb.loader, auth, url);
}
String errorMessage = "Image format (" + ext + ") is not supported.";
ConcurrentLog.fine("ViewImage", errorMessage + "Image URL : " + urlString);
/*
* Throw an exception, wich will end in a HTTP 500 response, better
* handled by browsers than an empty image
* When opening a file, the most efficient is to open
* ImageInputStream directly on file
*/
throw new IOException(errorMessage);
}
ImageReader reader = readers.next();
reader.setInput(imageInStream, true, true);
int maxwidth = post.getInt("maxwidth", 0);
int maxheight = post.getInt("maxheight", 0);
final boolean quadratic = post.containsKey("quadratic");
boolean isStatic = post.getBoolean("isStatic");
BufferedImage image = null;
boolean returnRaw = true;
if (!auth || maxwidth != 0 || maxheight != 0) {
// find original size
final int originWidth = reader.getWidth(0);
final int originHeigth = reader.getHeight(0);
// in case of not-authorized access shrink the image to
// prevent
// copyright problems, so that images are not larger than
// thumbnails
Dimension maxDimensions = calculateMaxDimensions(auth, originWidth, originHeigth, maxwidth, maxheight);
// if a quadratic flag is set, we cut the image out to be in
// quadratic shape
int w = originWidth;
int h = originHeigth;
if (quadratic && originWidth != originHeigth) {
Rectangle square = getMaxSquare(originHeigth, originWidth);
h = square.height;
w = square.width;
}
Dimension finalDimensions = calculateDimensions(w, h, maxDimensions);
if (originWidth != finalDimensions.width || originHeigth != finalDimensions.height) {
returnRaw = false;
image = readImage(reader);
if (quadratic && originWidth != originHeigth) {
image = makeSquare(image);
}
image = scale(finalDimensions.width, finalDimensions.height, image);
}
if (finalDimensions.width == 16 && finalDimensions.height == 16) {
// this might be a favicon, store image to cache for
// faster
// re-load later on
if (image == null) {
returnRaw = false;
image = readImage(reader);
}
iconcache.put(urlString, image);
}
}
/* Image do not need to be scaled or cropped */
if (returnRaw) {
if (!reader.getFormatName().equalsIgnoreCase(ext) || imageInStream.getFlushedPosition() != 0) {
/*
* image parsing and reencoding is only needed when source image
* and target formats differ, or when first bytes have been discarded
*/
returnRaw = false;
image = readImage(reader);
if (url.isFile()) {
imageInStream = ImageIO.createImageInputStream(url.getFSFile());
} else {
inStream = VIEWER.openInputStream(post, sb.loader, auth, url);
imageInStream = ImageIO.createImageInputStream(inStream);
}
}
if (returnRaw) {
byte[] imageData = readRawImage(imageInStream);
encodedImage = new EncodedImage(imageData, ext, isStatic);
} else {
// read image
encodedImage = VIEWER.parseAndScale(post, auth, url, ext, imageInStream);
} catch (Exception e) {
/*
* An error can still occur when transcoding from buffered image to
* target ext : in that case EncodedImage.getImage() is empty.
* Exceptions are not propagated here : many error causes are
* possible, network errors, incorrect or unsupported format, bad
* ImageIO plugin... Instead return an empty EncodedImage. Caller is
* responsible for handling this correctly (500 status code
* response)
*/
encodedImage = new EncodedImage(image, ext, isStatic);
if (encodedImage.getImage().length() == 0) {
String errorMessage = "Image could not be encoded to format : " + ext;
ConcurrentLog.fine("ViewImage", errorMessage + ". Image URL : " + urlString);
throw new IOException(errorMessage);
}
}
return encodedImage;
}
/**
* Read image using specified reader and close ImageInputStream source.
* Input must have bean set before using
* {@link ImageReader#setInput(Object)}
*
* @param reader
* image reader. Must not be null.
* @return buffered image
* @throws IOException
* when an error occured
*/
private static BufferedImage readImage(ImageReader reader) throws IOException {
BufferedImage image;
try {
image = reader.read(0);
encodedImage = new EncodedImage(new byte[0], ext, post.getBoolean("isStatic"));
} finally {
reader.dispose();
Object input = reader.getInput();
if (input instanceof ImageInputStream) {
try {
((ImageInputStream) input).close();
} catch (IOException ignoredException) {
}
}
}
return image;
}
/**
* Read image data without parsing.
*
* @param inStream
* image source. Must not be null. First bytes must not have been marked discarded ({@link ImageInputStream#getFlushedPosition()} must be zero)
* @return image data as bytes
* @throws IOException
* when a read/write error occured.
*/
private static byte[] readRawImage(ImageInputStream inStream) throws IOException {
byte[] buffer = new byte[4096];
int l = 0;
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
inStream.seek(0);
try {
while ((l = inStream.read(buffer)) >= 0) {
outStream.write(buffer, 0, l);
}
return outStream.toByteArray();
} finally {
try {
inStream.close();
} catch (IOException ignored) {
}
}
}
/**
* Calculate image dimensions from image original dimensions, max
* dimensions, and target dimensions.
*
* @return dimensions to render image
*/
protected static Dimension calculateDimensions(final int originWidth, final int originHeight, final Dimension max) {
int resultWidth;
int resultHeight;
if (max.width < originWidth || max.height < originHeight) {
// scale image
final double hs = (originWidth <= max.width) ? 1.0 : ((double) max.width) / ((double) originWidth);
final double vs = (originHeight <= max.height) ? 1.0 : ((double) max.height) / ((double) originHeight);
final double scale = Math.min(hs, vs);
// if (!auth) scale = Math.min(scale, 0.6); // this is for copyright
// purpose
if (scale < 1.0) {
resultWidth = Math.max(1, (int) (originWidth * scale));
resultHeight = Math.max(1, (int) (originHeight * scale));
} else {
resultWidth = Math.max(1, originWidth);
resultHeight = Math.max(1, originHeight);
}
} else {
// do not scale
resultWidth = originWidth;
resultHeight = originHeight;
}
return new Dimension(resultWidth, resultHeight);
}
/**
* Calculate image maximum dimentions from original and specified maximum
* dimensions
*
* @param auth
* true when acces rigths are OK.
* @return maximum dimensions to render image
*/
protected static Dimension calculateMaxDimensions(final boolean auth, final int originWidth, final int originHeight,
final int maxWidth, final int maxHeight) {
int resultWidth;
int resultHeight;
// in case of not-authorized access shrink the image to prevent
// copyright problems, so that images are not larger than thumbnails
if (auth) {
resultWidth = (maxWidth == 0) ? originWidth : maxWidth;
resultHeight = (maxHeight == 0) ? originHeight : maxHeight;
} else if ((originWidth > 16) || (originHeight > 16)) {
resultWidth = Math.min(96, originWidth);
resultHeight = Math.min(96, originHeight);
} else {
resultWidth = 16;
resultHeight = 16;
}
return new Dimension(resultWidth, resultHeight);
}
/**
* Scale image to specified dimensions
*
* @param width
* target width
* @param height
* target height
* @param image
* image to scale. Must not be null.
* @return a scaled image
*/
protected static BufferedImage scale(final int width, final int height, final BufferedImage image) {
// compute scaled image
Image scaled = image.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING);
final MediaTracker mediaTracker = new MediaTracker(new Container());
mediaTracker.addImage(scaled, 0);
try {
mediaTracker.waitForID(0);
} catch (final InterruptedException e) {
}
// make a BufferedImage out of that
BufferedImage result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
try {
result.createGraphics().drawImage(scaled, 0, 0, width, height, null);
// check outcome
final Raster raster = result.getData();
int[] pixel = new int[raster.getSampleModel().getNumBands()];
pixel = raster.getPixel(0, 0, pixel);
} catch (final Exception e) {
/*
* Exception may be caused by source image color model : try now to
* convert to RGB before scaling
* imageInStream.close() method doesn't close source input stream
*/
try {
BufferedImage converted = EncodedImage.convertToRGB(image);
scaled = converted.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING);
mediaTracker.addImage(scaled, 1);
if (inStream != null) {
try {
mediaTracker.waitForID(1);
} catch (final InterruptedException e2) {
inStream.close();
} catch (IOException ignored) {
}
result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
result.createGraphics().drawImage(scaled, 0, 0, width, height, null);
// check outcome
final Raster raster = result.getData();
int[] pixel = new int[result.getSampleModel().getNumBands()];
pixel = raster.getPixel(0, 0, pixel);
} catch (Exception e2) {
result = image;
}
ConcurrentLog.fine("ViewImage", "Image could not be scaled");
}
return result;
}
/**
*
* @param h
* image height
* @param w
* image width
* @return max square area fitting inside dimensions
*/
protected static Rectangle getMaxSquare(final int h, final int w) {
Rectangle square;
if (w > h) {
final int offset = (w - h) / 2;
square = new Rectangle(offset, 0, h, h);
} else {
final int offset = (h - w) / 2;
square = new Rectangle(0, offset, w, w);
}
return square;
return encodedImage;
}
/**
* Crop image to make a square
*
* @param image
* image to crop
* @return
*/
protected static BufferedImage makeSquare(BufferedImage image) {
final int w = image.getWidth();
final int h = image.getHeight();
if (w > h) {
final BufferedImage dst = new BufferedImage(h, h, BufferedImage.TYPE_INT_ARGB);
Graphics2D g = dst.createGraphics();
final int offset = (w - h) / 2;
try {
g.drawImage(image, 0, 0, h - 1, h - 1, offset, 0, h + offset, h - 1, null);
} finally {
g.dispose();
}
image = dst;
} else {
final BufferedImage dst = new BufferedImage(w, w, BufferedImage.TYPE_INT_ARGB);
Graphics2D g = dst.createGraphics();
final int offset = (h - w) / 2;
try {
g.drawImage(image, 0, 0, w - 1, w - 1, 0, offset, w - 1, w + offset, null);
} finally {
g.dispose();
}
image = dst;
}
return image;
}
}

@ -65,7 +65,7 @@ public class getpageinfo {
prop.put("lang", "");
prop.put("robots-allowed", "3"); //unknown
prop.put("robotsInfo", ""); //unknown
prop.put("favicon","");
prop.put("icons","0");
prop.put("sitelist", "");
prop.put("filter", ".*");
prop.put("oai", 0);
@ -110,8 +110,15 @@ public class getpageinfo {
// put the document title
prop.putXML("title", removelinebreaks(scraper.dc_title()));
// put the favicon that belongs to the document
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
Set<DigestURL> iconURLs = scraper.getIcons().keySet();
int i = 0;
for (DigestURL iconURL : iconURLs) {
prop.putXML("icons_" + i + "_icon", iconURL.toNormalform(false));
prop.put("icons_" + i + "_eol", 1);
i++;
}
prop.put("icons_" + (i - 1) + "_eol", 0);
prop.put("icons", iconURLs.size());
// put keywords
final Set<String> list = scraper.dc_subject();

@ -6,7 +6,9 @@
"robots": "#(robots-allowed)#0::1::#(/robots-allowed)#",
"robotsInfo": "#[robotsInfo]#",
"favicon": "#[favicon]#",
"icons": [#{icons}#
"#[icon]#"#(eol)#::,#(/eol)#
#{/icons}#],
"filter": "#[filter]#",
"tags": "#{tags}##[tag]#,#{/tags}#",

@ -8,7 +8,11 @@
#{sitemaps}#
<sitemap>#[sitemap]#</sitemap>
#{/sitemaps}#
<favicon>#[favicon]#</favicon>
<icons>
#{icons}#
<icon>#[icon]#</icon>
#{/icons}#
</icons>
<sitelist>#[sitelist]#</sitelist>
<filter>#[filter]#</filter>
<tags>

@ -35,6 +35,11 @@ import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
@ -47,11 +52,6 @@ import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class getpageinfo_p {
@ -66,7 +66,7 @@ public class getpageinfo_p {
prop.put("robots-allowed", "3"); //unknown
prop.put("robotsInfo", ""); //unknown
prop.put("sitemap", "");
prop.put("favicon","");
prop.put("icons","0");
prop.put("sitelist", "");
prop.put("filter", ".*");
prop.put("oai", 0);
@ -110,8 +110,16 @@ public class getpageinfo_p {
// put the document title
prop.putXML("title", scraper.dc_title());
// put the favicon that belongs to the document
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
// put the icons that belongs to the document
Set<DigestURL> iconURLs = scraper.getIcons().keySet();
int i = 0;
for (DigestURL iconURL : iconURLs) {
prop.putXML("icons_" + i + "_icon", iconURL.toNormalform(false));
prop.put("icons_" + i + "_eol", 1);
i++;
}
prop.put("icons_" + (i - 1) + "_eol", 0);
prop.put("icons", iconURLs.size());
// put keywords
final Set<String> list = scraper.dc_subject();

@ -8,7 +8,11 @@
#{sitemaps}#
<sitemap>#[sitemap]#</sitemap>
#{/sitemaps}#
<favicon>#[favicon]#</favicon>
<icons>
#{icons}#
<icon>#[icon]#</icon>
#{/icons}#
</icons>
<sitelist>#[sitelist]#</sitelist>
<filter>#[filter]#</filter>
<tags>

@ -294,7 +294,7 @@ function yacysearch(clear) {
data.channels[0].items,
function(i,item) {
if (item) {
var favicon = "<img src='"+yconf.url+"/ViewImage.png?width=16&amp;height=16&amp;code="+item.faviconCode+"' class='favicon'/>";
var favicon = "<img src='"+item.faviconUrl+"' class='favicon'/>";
var title = "<h3 class='linktitle'>"+favicon+"<a href='"+item.link+"' target='_blank'>"+item.title+"</a></h3>";
var url = "<p class='url'><a href='"+item.link+"' target='_blank'>"+item.link+"</a></p>"
var desc = "<p class='desc'>"+item.description+"</p>";

@ -264,7 +264,7 @@ function yacysearch(global, clear) {
data.channels[0].items,
function(i,item) {
if (item) {
var favicon = "<img src='"+yconf.url+"/ViewImage.png?width=16&amp;height=16&amp;code="+item.faviconCode+"' class='favicon'/>";
var favicon = "<img src='"+item.faviconUrl+"' class='favicon'/>";
var title = "<h3 class='linktitle'>"+favicon+"<a href='"+item.link+"' target='_blank'>"+item.title+"</a></h3>";
var url = "<p class='url'><a href='"+item.link+"' target='_blank'>"+item.link+"</a></p>"
var desc = "<p class='desc'>"+item.description+"</p>";

@ -1,7 +1,7 @@
#(content)#::
<div class="searchresults">
<h4 class="linktitle">
<img width="16" height="16" src="ViewImage.png?width=16&amp;height=16&amp;code=#[faviconCode]#&amp;isStatic=true" id="f#[urlhash]#" class="favicon" style="width:16px; height:16px;" alt="" />
<img width="16" height="16" src="#[faviconUrl]#" id="f#[urlhash]#" class="favicon" style="width:16px; height:16px;" alt="" />
<a href="#[link]#" target="#[target]#">#[title]#</a></h4>
<div class="urlactions">
#(heuristic)#::

@ -24,6 +24,7 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.awt.Dimension;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
@ -51,6 +52,7 @@ import net.yacy.crawler.data.Transactions;
import net.yacy.crawler.data.Transactions.State;
import net.yacy.crawler.retrieval.Response;
import net.yacy.data.URLLicense;
import net.yacy.document.parser.html.IconEntry;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.util.Formatter;
import net.yacy.peers.NewsPool;
@ -69,6 +71,7 @@ import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.utils.crypt;
import net.yacy.utils.nxTools;
import net.yacy.visualization.ImageViewer;
public class yacysearchitem {
@ -130,14 +133,6 @@ public class yacysearchitem {
final DigestURL resultURL = result.url();
final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");
final int port = resultURL.getPort();
DigestURL faviconURL = null;
if ((fileType == FileType.HTML || fileType == FileType.JSON) && !sb.isIntranetMode()) try {
faviconURL = new DigestURL(resultURL.getProtocol() + "://" + resultURL.getHost() + ((port != -1) ? (":" + port) : "") + "/favicon.ico");
} catch (final MalformedURLException e1) {
ConcurrentLog.logException(e1);
faviconURL = null;
}
final String resource = theSearch.query.domType.toString();
final String origQ = theSearch.query.getQueryGoal().getQueryString(true);
prop.put("content", 1); // switch on specific content
@ -186,8 +181,11 @@ public class yacysearchitem {
boolean isAtomFeed = header.get(HeaderFramework.CONNECTION_PROP_EXT, "").equals("atom");
String resultFileName = resultURL.getFileName();
prop.putHTML("content_target", target);
//if (faviconURL != null && fileType == FileType.HTML) sb.loader.loadIfNotExistBackground(faviconURL, 1024 * 1024 * 10, null, ClientIdentification.yacyIntranetCrawlerAgent);
prop.putHTML("content_faviconCode", URLLicense.aquireLicense(faviconURL)); // acquire license for favicon url loading
DigestURL faviconURL = null;
if ((fileType == FileType.HTML || fileType == FileType.JSON) && !sb.isIntranetMode()) {
faviconURL = getFaviconURL(result, new Dimension(16, 16));
}
prop.putHTML("content_faviconUrl", processFaviconURL(authenticated, faviconURL));
prop.put("content_urlhash", urlhash);
prop.put("content_ranking", Float.toString(result.score()));
Date[] events = result.events();
@ -302,7 +300,7 @@ public class yacysearchitem {
if (theSearch.query.contentdom == Classification.ContentDomain.IMAGE) {
// image search; shows thumbnails
processImage(sb, prop, item, theSearch, target_special_pattern, timeout);
processImage(sb, prop, item, theSearch, target_special_pattern, timeout, authenticated);
theSearch.query.transmitcount = item + 1;
return prop;
}
@ -335,7 +333,66 @@ public class yacysearchitem {
return prop;
}
/**
* Tries to retrieve favicon url from solr result document, or generates
* default favicon URL (i.e. "http://host/favicon.ico") from resultURL and
* port.
*
* @param result
* solr document result. Must not be null.
* @param preferredSize preferred icon size. If no one matches, most close icon is returned.
* @return favicon URL or null when even default favicon URL can not be generated
* @throws NullPointerException when one requested parameter is null
*/
protected static DigestURL getFaviconURL(final URIMetadataNode result, Dimension preferredSize) {
/*
* We look preferably for a standard icon with preferred size, but
* accept as a fallback other icons below 128x128 or with no known size
*/
IconEntry faviconEntry = result.getFavicon(preferredSize);
DigestURL faviconURL;
if (faviconEntry == null) {
try {
String defaultFaviconURL = result.url().getProtocol() + "://" + result.url().getHost()
+ ((result.url().getPort() != -1) ? (":" + result.url().getPort()) : "") + "/favicon.ico";
faviconURL = new DigestURL(defaultFaviconURL);
} catch (final MalformedURLException e1) {
ConcurrentLog.logException(e1);
faviconURL = null;
}
} else {
faviconURL = faviconEntry.getUrl();
}
return faviconURL;
}
/**
* @param authenticated
* true when current user is authenticated
* @param faviconURL
* url icon of web site
* @return url to propose in search result or empty string when faviconURL
* is null
*/
private static String processFaviconURL(final boolean authenticated, DigestURL faviconURL) {
final String iconUrlExt = MultiProtocolURL.getFileExtension(faviconURL.getFileName());
/* Image format ouput for ViewFavicon servlet : default is png, except with gif and svg icons */
final String viewFaviconExt = !iconUrlExt.isEmpty() && ImageViewer.isBrowserRendered(iconUrlExt) ? iconUrlExt : "png";
/* Only use licence code for non authentified users. For authenticated users licence would never be released and would unnecessarily fill URLLicense.permissions. */
StringBuilder contentFaviconURL = new StringBuilder();
if (faviconURL != null) {
contentFaviconURL.append("ViewFavicon.").append(viewFaviconExt).append("?maxwidth=16&maxheight=16&isStatic=true&quadratic");
if (authenticated) {
contentFaviconURL.append("&url=").append(faviconURL.toNormalform(true));
} else {
contentFaviconURL.append("&code=").append(URLLicense.aquireLicense(faviconURL));
}
}
return contentFaviconURL.toString();
}
/**
* Add action links reserved to authorized users. All parameters must be non null.
* @param sb the main Switchboard instance
@ -409,9 +466,10 @@ public class yacysearchitem {
* @param theSearch search event
* @param target_special_pattern
* @param timeout result getting timeOut
* @param authenticated set to true when user authentication is ok
*/
private static void processImage(final Switchboard sb, final serverObjects prop, final int item,
final SearchEvent theSearch, final String target_special_pattern, long timeout) {
final SearchEvent theSearch, final String target_special_pattern, long timeout, boolean authenticated) {
prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content
try {
SearchEvent.ImageResult image = theSearch.oneImageResult(item, timeout);
@ -421,11 +479,26 @@ public class yacysearchitem {
final String license = URLLicense.aquireLicense(image.imageUrl); // this is just the license key to get the image forwarded through the YaCy thumbnail viewer, not an actual lawful license
/* Image format ouput for ViewImage servlet : default is png, except with gif and svg images */
final String viewImageExt = !imageUrlExt.isEmpty() && ViewImage.isBrowserRendered(imageUrlExt) ? imageUrlExt : "png";
final String viewImageExt = !imageUrlExt.isEmpty() && ImageViewer.isBrowserRendered(imageUrlExt) ? imageUrlExt : "png";
/* Thumb URL */
prop.putHTML("content_item_hrefCache", "ViewImage." + viewImageExt + "?maxwidth=" + DEFAULT_IMG_WIDTH + "&maxheight=" + DEFAULT_IMG_HEIGHT + "&code="+license+"&isStatic=true&quadratic=&url=" + imageUrlstring);
StringBuilder thumbURLBuilder = new StringBuilder("ViewImage.").append(viewImageExt).append("?maxwidth=")
.append(DEFAULT_IMG_WIDTH).append("&maxheight=").append(DEFAULT_IMG_HEIGHT)
.append("&isStatic=true&quadratic");
/* Only use licence code for non authentified users. For authenticated users licence would never be released and would unnecessarily fill URLLicense.permissions. */
if(authenticated) {
thumbURLBuilder.append("&url=").append(imageUrlstring);
} else {
thumbURLBuilder.append("&code=").append(URLLicense.aquireLicense(image.imageUrl));
}
String thumbURL = thumbURLBuilder.toString();
prop.putHTML("content_item_hrefCache", thumbURL);
/* Full size preview URL */
prop.putHTML("content_item_hrefFullPreview", "ViewImage." + viewImageExt + "?code="+license+"&isStatic=true&url=" + imageUrlstring);
if(authenticated) {
prop.putHTML("content_item_hrefFullPreview", "ViewImage." + viewImageExt + "?isStatic=true&url=" + imageUrlstring);
} else {
/* Not authenticated : full preview URL must be the same as thumb URL */
prop.putHTML("content_item_hrefFullPreview", thumbURL);
}
prop.putHTML("content_item_href", imageUrlstring);
prop.putHTML("content_item_target", target);
prop.put("content_item_code", license);
@ -437,7 +510,7 @@ public class yacysearchitem {
/* When image content is rendered by browser :
* - set smaller dimension to 100% in order to crop image on other dimension with CSS style 'overflow:hidden' on image container
* - set negative margin top behave like ViewImage which sets an offset when cutting to square */
if (ViewImage.isBrowserRendered(imageUrlExt)) {
if (ImageViewer.isBrowserRendered(imageUrlExt)) {
if (image.width > image.height) {
/* Landscape orientation */
itemWidth = "";

@ -9,7 +9,7 @@
"size": "#[size]#",
"sizename": "#[sizename]#",
"guid": "#[urlhash]#",
"faviconCode": "#[faviconCode]#",
"faviconUrl": "#[faviconUrl]#",
"host": "#[host]#",
"path": "#[path]#",
"file": "#[file]#",

@ -30,13 +30,11 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.responsewriter.OpensearchResponseWriter.ResHead;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.JSONObject;
import net.yacy.data.URLLicense;
import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
@ -51,7 +49,6 @@ import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
/**
* write the opensearch result in YaCys special way to include as much as in opensearch is included.
* This will also include YaCy facets.
@ -162,8 +159,6 @@ public class YJsonResponseWriter implements QueryResponseWriter {
String filename = url.getFileName();
solitaireTag(writer, "link", u);
solitaireTag(writer, "file", filename);
// get image license
if (Classification.isImageExtension(MultiProtocolURL.getFileExtension(filename))) URLLicense.aquireLicense(urlhash, url.toNormalform(true));
} catch (final MalformedURLException e) {}
continue;
}

@ -0,0 +1,51 @@
// InvalidURLLicenceException.java
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.data;
/**
* Exception indicating a URLLicense is not valid.
* @author luc
*
*/
public class InvalidURLLicenceException extends RuntimeException {
private static final long serialVersionUID = 388769934848447613L;
/**
* Default constructor : use generic message
*/
public InvalidURLLicenceException() {
super("Url license code is not valid or empty");
}
/**
* @param message detail message
*/
public InvalidURLLicenceException(String message) {
super(message);
}
}

@ -28,36 +28,53 @@ package net.yacy.data;
import java.util.Collections;
import java.util.Map;
import java.util.UUID;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.storage.SizeLimitedMap;
/**
* This class defines a license-generation for URLs.
* It is used in case of preview-Image-fetching to grant also non-authorized users the usage of a image-fetcher servlet,
* but to prevent them to use this servlet as a proxy.
*/
public class URLLicense {
// this class defines a license-generation for URLs
// it is used in case of snippet- and preview-Image-fetching to grant also non-authorized users the usage of a image-fetcher servlet
private static final int maxQueue = 10000;
/** Map URLs by licence keys */
private static final Map<String, String> permissions = Collections.synchronizedMap(new SizeLimitedMap<String, String>(maxQueue));
/**
* Generates and stores a unique licence key for delayed url data fetching.
* @param url URL for whose data should be fectched later
* @return licence key generated or null when url is null
*/
public static String aquireLicense(final DigestURL url) {
if (url == null) return "";
// generate license key
String license = ASCII.String(url.hash());
// store reference to url with license key
permissions.put(license, url.toNormalform(true));
// return the license key
return license;
}
if (url == null) return null;
/* Generate license key : it must absolutely be a unique key, not related to url parameter (thus url.hash can not be used).
* If the same key is generated for each call of this method with the same url parameter,
* problem may occur concurrent non authorized users try to fetch same url content.
* Example scenario (emulated in URLLicenseConcurrentTest) :
* 1 - userA aquireLicence for url
* 2 - userB aquireLicence for same url as A
* 3 - userA releaseLicense : he can now fetch url content
* 4 - userB releaseLicense : if the same license was generated, it has been already released and url content can not be fetched! */
String license = UUID.randomUUID().toString();
public static String aquireLicense(final String license, final String url) {
// store reference to url with license key
permissions.put(license, url);
permissions.put(license, url.toNormalform(true));
// return the license key
return license;
}
/**
* Use it to retrieve source url and to ensures YaCy url containing this licence code can not be reused by non-authorized users.
* @param license unique code associated to source url
* @return source url or null licence is no more valid
* @throws NullPointerException when license is null
*/
public static String releaseLicense(final String license) {
return permissions.remove(license);
}

@ -32,7 +32,6 @@ import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
@ -62,6 +61,7 @@ import net.yacy.cora.util.ByteBuffer;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.retrieval.Request;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.IconEntry;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.util.FileUtils;
@ -87,7 +87,8 @@ public class Document {
private LinkedHashMap<AnchorURL, String> audiolinks, videolinks, applinks, hyperlinks; // TODO: check if redundant value (set to key.getNameProperty()) is needed
private LinkedHashMap<DigestURL, String> inboundlinks, outboundlinks;
private Set<AnchorURL> emaillinks; // mailto: links
private MultiProtocolURL favicon;
/** links to icons that belongs to the document (mapped by absolute URL) */
private Map<DigestURL, IconEntry> icons;
private boolean resorted;
private final Set<String> languages;
private boolean indexingDenied;
@ -139,6 +140,7 @@ public class Document {
this.videolinks = null;
this.applinks = null;
this.emaillinks = null;
this.icons = new HashMap<>();
this.resorted = false;
this.inboundlinks = null;
this.outboundlinks = null;
@ -592,6 +594,14 @@ dc_rights
// expand the hyperlinks:
// we add artificial hyperlinks to the hyperlink set
// that can be calculated from given hyperlinks and imagelinks
/*
* Should we also include icons ? with
* this.hyperlinks.putAll(allReflinks(this.icons.keySet())); It is
* problematic as allReflinks will modify icons set set, removing those whose URL is
* starting with "/www" but it is not desired for icons such as
* www.wikipedia.org/static/favicon/wikipedia.ico
*/
this.hyperlinks.putAll(allReflinks(this.images.values()));
this.hyperlinks.putAll(allReflinks(this.audiolinks.keySet()));
@ -679,6 +689,8 @@ dc_rights
url = new AnchorURL((String) o);
else if (o instanceof ImageEntry)
url = new AnchorURL(((ImageEntry) o).url());
else if (o instanceof IconEntry)
url = new AnchorURL(((IconEntry) o).getUrl());
else {
assert false;
continue loop;
@ -755,20 +767,26 @@ dc_rights
this.images.putAll(doc.getImages());
}
}
/**
* @return the {@link URL} to the favicon that belongs to the document
* @return links to icons that belongs to the document (mapped by absolute URL)
*/
public MultiProtocolURL getFavicon() {
return this.favicon;
}
public Map<DigestURL, IconEntry> getIcons() {
return icons;
}
/**
* @param faviconURL the {@link URL} to the favicon that belongs to the document
* Set links to icons that belongs to the document (mapped by absolute URL)
* @param icons
*/
public void setFavicon(final MultiProtocolURL faviconURL) {
this.favicon = faviconURL;
}
public void setIcons(Map<DigestURL, IconEntry> icons) {
/* Better to ensure now icons property will not be null */
if(icons != null) {
this.icons = icons;
} else {
this.icons = new HashMap<>();
}
}
public int inboundLinkNofollowCount() {
if (this.inboundlinks == null) resortLinks();
@ -873,9 +891,13 @@ dc_rights
}
/**
* merge documents: a helper method for all parsers that return multiple documents
* @param docs
* @return
* merge documents: a helper method for all parsers that return multiple documents.
* Note : when docs contains more than one item, eventual icons in each docs are not merged in result doc,
* as their scope is limited to only one document.
* @param location url of merged document
* @param globalMime Mime type of merged document
* @param docs documents to merge
* @return document resulting of merge, or original document when docs contains only one item.
*/
public static Document mergeDocuments(final DigestURL location, final String globalMime, final Document[] docs) {
if (docs == null || docs.length == 0) return null;

@ -20,6 +20,7 @@
package net.yacy.document.parser.html;
import java.awt.Dimension;
import java.io.ByteArrayInputStream;
import java.io.CharArrayReader;
import java.io.File;
@ -31,12 +32,16 @@ import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
@ -52,6 +57,7 @@ import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.storage.SizeLimitedMap;
import net.yacy.cora.storage.SizeLimitedSet;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.NumberTools;
import net.yacy.document.SentenceReader;
@ -66,7 +72,7 @@ import net.yacy.kelondro.util.ISO639;
public class ContentScraper extends AbstractScraper implements Scraper {
private final static int MAX_TAGSIZE = 1024 * 1024;
public static final int MAX_DOCSIZE = 40 * 1024 * 1024;
public static final int MAX_DOCSIZE = 40 * 1024 * 1024;
private final char degree = '\u00B0';
private final char[] minuteCharsHTML = "&#039;".toCharArray();
@ -194,10 +200,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
private int breadcrumbs;
/**
* {@link MultiProtocolURL} to the favicon that belongs to the document
*/
private MultiProtocolURL favicon;
/** links to icons that belongs to the document (mapped by absolute URL)*/
private final Map<DigestURL, IconEntry> icons;
/**
* The document root {@link MultiProtocolURL}
@ -230,6 +234,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
this.css = new SizeLimitedMap<DigestURL, String>(maxLinks);
this.anchors = new ArrayList<AnchorURL>();
this.images = new ArrayList<ImageEntry>();
this.icons = new HashMap<>();
this.embeds = new SizeLimitedMap<AnchorURL, EmbedEntry>(maxLinks);
this.frames = new SizeLimitedSet<AnchorURL>(maxLinks);
this.iframes = new SizeLimitedSet<AnchorURL>(maxLinks);
@ -409,6 +414,69 @@ public class ContentScraper extends AbstractScraper implements Scraper {
}
}
}
/**
* Parses sizes icon link attribute. (see
* http://www.w3.org/TR/html5/links.html#attr-link-sizes) Eventual
* duplicates are removed.
*
* @param sizesAttr
* sizes attribute string, may be null
* @return a set of sizes eventually empty.
*/
public static Set<Dimension> parseSizes(String sizesAttr) {
Set<Dimension> sizes = new HashSet<Dimension>();
Set<String> tokens = parseSpaceSeparatedTokens(sizesAttr);
for (String token : tokens) {
/*
* "any" keyword may be present, but doesn't have to produce a
* dimension result
*/
if (token != null) {
Matcher matcher = IconEntry.SIZE_PATTERN.matcher(token);
if (matcher.matches()) {
/* With given pattern no NumberFormatException can occur */
sizes.add(new Dimension(Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(2))));
}
}
}
return sizes;
}
/**
* Parses a space separated tokens attribute value (see
* http://www.w3.org/TR/html5/infrastructure.html#space-separated-tokens).
* Eventual duplicates are removed.
*
* @param attr
* attribute string, may be null
* @return a set of tokens eventually empty
*/
public static Set<String> parseSpaceSeparatedTokens(String attr) {
Set<String> tokens = new HashSet<>();
/* Check attr string is not empty to avoid adding a single empty string
* in result */
if (attr != null && !attr.trim().isEmpty()) {
String[] items = attr.trim().split(CommonPattern.SPACES.pattern());
Collections.addAll(tokens, items);
}
return tokens;
}
/**
* Retain only icon relations (standard and non standard) from tokens .
* @param relTokens relationship tokens (parsed from a rel attribute)
* @return a Set of icon relations, eventually empty
*/
public Set<String> retainIconRelations(Collection<String> relTokens) {
HashSet<String> iconRels = new HashSet<>();
for(String token : relTokens) {
if(IconLinkRelations.isIconRel(token)) {
iconRels.add(token.toLowerCase(Locale.ENGLISH));
}
}
return iconRels;
}
@Override
public void scrapeTag0(Tag tag) {
@ -481,14 +549,28 @@ public class ContentScraper extends AbstractScraper implements Scraper {
if (newLink != null) {
tag.opts.put("href", newLink.toNormalform(true));
String rel = tag.opts.getProperty("rel", EMPTY_STRING);
/* Rel attribute is supposed to be a set of space-separated tokens */
Set<String> relTokens = parseSpaceSeparatedTokens(rel);
final String linktitle = tag.opts.getProperty("title", EMPTY_STRING);
final String type = tag.opts.getProperty("type", EMPTY_STRING);
final String hreflang = tag.opts.getProperty("hreflang", EMPTY_STRING);
if (rel.equalsIgnoreCase("shortcut icon") || rel.equalsIgnoreCase("icon")) { // html5 -> rel="icon")
final ImageEntry ie = new ImageEntry(newLink, linktitle, -1, -1, -1);
this.images.add(ie);
this.favicon = newLink;
Set<String> iconRels = retainIconRelations(relTokens);
/* Distinguish icons from images. It will enable for example to later search only images and no icons */
if (!iconRels.isEmpty()) {
String sizesAttr = tag.opts.getProperty("sizes", EMPTY_STRING);
Set<Dimension> sizes = parseSizes(sizesAttr);
IconEntry icon = this.icons.get(newLink);
/* There is already an icon with same URL for this document :
* they may have different rel attribute or different sizes (multi sizes ico file) or this may be a duplicate */
if(icon != null) {
icon.getRel().addAll(iconRels);
icon.getSizes().addAll(sizes);
} else {
icon = new IconEntry(newLink, iconRels, sizes);
this.icons.put(newLink, icon);
}
} else if (rel.equalsIgnoreCase("canonical")) {
tag.opts.put("name", this.titles.size() == 0 ? "" : this.titles.iterator().next());
newLink.setAll(tag.opts);
@ -890,10 +972,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
}
/**
* @return the {@link MultiProtocolURL} to the favicon that belongs to the document
* @return all icons links
*/
public MultiProtocolURL getFavicon() {
return this.favicon;
public Map<DigestURL, IconEntry> getIcons() {
return this.icons;
}
/*
@ -950,7 +1032,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
private final static Pattern commaSepPattern = Pattern.compile(" |,");
private final static Pattern semicSepPattern = Pattern.compile(" |;");
public Set<String> getContentLanguages() {
// i.e. <meta name="DC.language" content="en" scheme="DCTERMS.RFC3066">
// or <meta http-equiv="content-language" content="en">
@ -1107,6 +1189,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
this.iframes.clear();
this.embeds.clear();
this.images.clear();
this.icons.clear();
this.metas.clear();
this.hreflang.clear();
this.navigation.clear();

@ -0,0 +1,197 @@
/**
* IconEntry
* Copyright 2016 by luccioman; https://github.com/luccioman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.parser.html;
import java.awt.Dimension;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
import net.yacy.cora.document.id.DigestURL;
/**
* Represents an icon in a document.
*
* @author luc
*
*/
public class IconEntry {
/** Patern to parse a HTML link sizes token attribute (ie. "16x16") */
public static final Pattern SIZE_PATTERN = Pattern.compile("([1-9][0-9]*)[xX]([1-9][0-9]*)");
/** Icon URL */
private final DigestURL url;
/**
* Icon links relations (one url may be used as multiple icon relations in
* the same document)
*/
private final Set<String> rel;
/** Icon sizes */
private final Set<Dimension> sizes;
/**
* Constructs instance from parameters.
*
* @param url
* must not be null.
* @param rel
* must not be null and contain at least one item.
* @param sizes
* optional.
*/
public IconEntry(final DigestURL url, Set<String> rel, Set<Dimension> sizes) {
if (url == null) {
throw new IllegalArgumentException("url must not be null.");
}
if (rel == null || rel.isEmpty()) {
throw new IllegalArgumentException("rel must be specified");
}
this.url = url;
this.rel = rel;
if (sizes != null) {
this.sizes = sizes;
} else {
this.sizes = new HashSet<>();
}
}
/**
* @return true when rel property contains a standard IANA registered icon
* link relation
*/
public boolean isStandardIcon() {
boolean standard = false;
for (String relation : this.rel) {
if (IconLinkRelations.isStandardIconRel(relation)) {
standard = true;
break;
}
}
return standard;
}
/**
* @param size1
* @param size2
* @return distance between two sizes, or Double.MAX_VALUE when one size is null
*/
public static double getDistance(Dimension size1, Dimension size2) {
double result = Double.MAX_VALUE;
if(size1 != null && size2 != null) {
result = (Math.abs(size1.width - size2.width) + Math.abs(size1.height - size2.height)) / 2.0;
}
return result;
}
/**
* @param preferredSize
* @return the size among sizes property which is the closest to
* preferredSize, or null when sizes is empty or preferredSize is null.
*/
public Dimension getClosestSize(Dimension preferredSize) {
Dimension closest = null;
if (preferredSize != null) {
double closestDistance = Double.MAX_VALUE;
for (Dimension size : this.sizes) {
double currentDistance = getDistance(size, preferredSize);
if (closest == null) {
closest = size;
closestDistance = currentDistance;
} else {
if (currentDistance < closestDistance) {
closest = size;
closestDistance = currentDistance;
}
}
}
}
return closest;
}
@Override
public String toString() {
StringBuilder res = new StringBuilder();
res.append("<link");
res.append(" href=\"").append(this.url.toNormalform(false)).append("\"");
res.append(" rel=\"");
res.append(relToString());
res.append("\"");
if (!this.sizes.isEmpty()) {
res.append(" sizes=\"");
res.append(sizesToString());
res.append("\"");
}
res.append(">");
return res.toString();
}
/**
* @return icon URL
*/
public DigestURL getUrl() {
return url;
}
/**
* @return icons link relations
*/
public Set<String> getRel() {
return rel;
}
/**
* @return icon eventual sizes
*/
public Set<Dimension> getSizes() {
return sizes;
}
/**
* @return a string representation of sizes property, in the form of a valid
* HTML link tag sizes attribute (e.g. "16x16 64x64")
*/
public String sizesToString() {
StringBuilder builder = new StringBuilder();
for (Dimension size : this.sizes) {
if (builder.length() > 0) {
builder.append(" ");
}
builder.append(size.width).append("x").append(size.height);
}
return builder.toString();
}
/**
* @return a string representation of rel property, int the form of a valid
* HTML link tag rel attribute (e.g. "icon apple-touch-icon")
*/
public String relToString() {
StringBuilder builder = new StringBuilder();
for (String relation : this.rel) {
if (builder.length() > 0) {
builder.append(" ");
}
builder.append(relation);
}
return builder.toString();
}
}

@ -0,0 +1,87 @@
/**
* IconLinkRelations
* Copyright 2016 by luccioman; https://github.com/luccioman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.parser.html;
/**
* Enumeration of HTML link relationships (rel attribute) designating icon.
* @author luc
*
*/
public enum IconLinkRelations {
/** Standard IANA registered icon link relation (see https://www.iana.org/assignments/link-relations/link-relations.xhtml) */
ICON("icon", "Standard favicon"),
/** Icon for IOS app shortcut */
APPLE_TOUCH_ICON("apple-touch-icon", "IOS app shortcut icon"),
/** Icon for IOS app shortcut (deprecated but still used by major websites in 2015) */
APPLE_TOUCH_ICON_PRECOMPOSED("apple-touch-icon-precomposed", "Deprecated IOS app shortcut icon"),
/** icon for Safari pinned tab */
MASK_ICON("mask-icon", "Safari browser pinned tab icon"),
/** Icon for Fluid web app */
FLUID_ICON("fluid-icon", "Fluid app icon");
/** HTML rel attribute value */
private String relValue;
/** Human readable description */
private String description;
private IconLinkRelations(String relValue, String description) {
this.relValue = relValue;
this.description = description;
}
/**
* @return HTML rel attribute value
*/
public String getRelValue() {
return relValue;
}
/**
* @return Human readable description of icon rel attribute
*/
public String getDescription() {
return description;
}
/**
* @param relToken HTML rel attribute token
* @return true when relToken is an icon relationship (standard or non-standard)
*/
public static boolean isIconRel(String relToken) {
boolean res = false;
for(IconLinkRelations iconRel : IconLinkRelations.values()) {
if(iconRel.getRelValue().equalsIgnoreCase(relToken)) {
res = true;
break;
}
}
return res;
}
/**
* @param relToken HTML rel attribute token
* @return true when relToken is Standard IANA registered icon link relation
*/
public static boolean isStandardIconRel(String relToken) {
return ICON.getRelValue().equalsIgnoreCase(relToken);
}
}

@ -172,7 +172,7 @@ public class htmlParser extends AbstractParser implements Parser {
noDoubleImages,
scraper.indexingDenied(),
scraper.getDate());
ppd.setFavicon(scraper.getFavicon());
ppd.setIcons(scraper.getIcons());
return ppd;
}

@ -104,7 +104,7 @@ public class SolrSelectServlet extends HttpServlet {
RESPONSE_WRITER.put("grephtml", new GrepHTMLResponseWriter());
RESPONSE_WRITER.put("rss", opensearchResponseWriter); //try http://localhost:8090/solr/select?wt=rss&q=olympia&hl=true&hl.fl=text_t,h1,h2
RESPONSE_WRITER.put("opensearch", opensearchResponseWriter); //try http://localhost:8090/solr/select?wt=rss&q=olympia&hl=true&hl.fl=text_t,h1,h2
RESPONSE_WRITER.put("yjson", new YJsonResponseWriter()); //try http://localhost:8090/solr/select?wt=json&q=olympia&hl=true&hl.fl=text_t,h1,h2
RESPONSE_WRITER.put("yjson", new YJsonResponseWriter()); //try http://localhost:8090/solr/select?wt=yjson&q=olympia&hl=true&hl.fl=text_t,h1,h2
RESPONSE_WRITER.put("gsa", new GSAResponseWriter());
}

@ -0,0 +1,47 @@
// TemplateMissingParameterException.java
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.http.servlets;
/**
* Use this to indicates a required parameter is missing for a template. Allows finer grained exception handling.
* @author luc
*
*/
public class TemplateMissingParameterException extends IllegalArgumentException {
private static final long serialVersionUID = -3443324572847193267L;
/**
* Default constructor : use generic message.
*/
public TemplateMissingParameterException() {
super("Missing required parameters");
}
/**
* @param message detail message
*/
public TemplateMissingParameterException(String message) {
super(message);
}
}

@ -66,6 +66,7 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ByteBuffer;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.InvalidURLLicenceException;
import net.yacy.data.UserDB.AccessRight;
import net.yacy.data.UserDB.Entry;
import net.yacy.kelondro.util.FileUtils;
@ -873,7 +874,20 @@ public class YaCyDefaultServlet extends HttpServlet {
} else {
tmp = invokeServlet(targetClass, legacyRequestHeader, args);
}
} catch (InvocationTargetException | IllegalArgumentException | IllegalAccessException e) {
} catch(InvocationTargetException e) {
if(e.getCause() instanceof InvalidURLLicenceException) {
/* A non authaurized user is trying to fetch a image with a bad or already released license code */
response.sendError(HttpServletResponse.SC_BAD_REQUEST, e.getCause().getMessage());
return;
}
if(e.getCause() instanceof TemplateMissingParameterException) {
/* A template is used but miss some required parameter */
response.sendError(HttpServletResponse.SC_BAD_REQUEST, e.getCause().getMessage());
return;
}
ConcurrentLog.logException(e);
throw new ServletException(targetFile.getAbsolutePath());
} catch (IllegalArgumentException | IllegalAccessException e) {
ConcurrentLog.logException(e);
throw new ServletException(targetFile.getAbsolutePath());
}

@ -22,18 +22,24 @@
package net.yacy.kelondro.data.meta;
import java.awt.Dimension;
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.solr.common.SolrDocument;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.MicroDate;
import net.yacy.cora.document.analysis.Classification;
@ -49,6 +55,9 @@ import net.yacy.crawler.retrieval.Response;
import net.yacy.document.SentenceReader;
import net.yacy.document.Tokenizer;
import net.yacy.document.parser.pdfParser;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.IconEntry;
import net.yacy.document.parser.html.IconLinkRelations;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.data.word.WordReferenceVars;
@ -64,8 +73,6 @@ import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.utils.crypt;
import org.apache.solr.common.SolrDocument;
/**
* This is the URIMetadata object implementation for Solr documents.
@ -90,6 +97,12 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
private String alternative_urlname;
private TextSnippet textSnippet = null;
/**
* Creates an instance from encoded properties.
* @param prop encoded properties
* @param collection collection origin (e.g. "dht")
* @throws MalformedURLException
*/
public URIMetadataNode(final Properties prop, String collection) throws MalformedURLException {
// generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString
@ -159,7 +172,13 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
if (prop.containsKey("wi")) {
this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))), false);
}
if (prop.containsKey("favicon")) {
final String rawFaviconURL = crypt.simpleDecode(prop.getProperty("favicon", ""));
DigestURL faviconURL = new DigestURL(rawFaviconURL);
this.setIconsFields(faviconURL);
}
}
public URIMetadataNode(final SolrDocument doc) throws MalformedURLException {
super();
@ -512,7 +531,145 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
}
return list.iterator();
}
/**
* Extracts icon entries from this solr document
* @return icon entries collection eventually empty
*/
public final Collection<IconEntry> getIcons() {
Collection<IconEntry> icons = new ArrayList<>();
List<?> iconsUrlStubsList = getFieldValuesAsList(CollectionSchema.icons_urlstub_sxt.getSolrFieldName());
if (iconsUrlStubsList != null) {
List<String> ports = CollectionConfiguration.indexedList2protocolList(
getFieldValues(CollectionSchema.icons_protocol_sxt.getSolrFieldName()), iconsUrlStubsList.size());
List<?> allSizes = getFieldValuesAsList(CollectionSchema.icons_sizes_sxt.getSolrFieldName());
List<?> allRels = getFieldValuesAsList(CollectionSchema.icons_rel_sxt.getSolrFieldName());
Object item;
for (int index = 0; index < iconsUrlStubsList.size(); index++) {
item = iconsUrlStubsList.get(index);
String urlStub = null;
if (item instanceof String) {
urlStub = (String) item;
String iconURLStr = (ports != null && ports.size() > index ? ports.get(index) : "http") + "://" + urlStub;
DigestURL iconURL;
try {
iconURL = new DigestURL(iconURLStr);
} catch (MalformedURLException e) {
continue;
}
Set<String> rels = null;
if (allRels.size() > index) {
item = allRels.get(index);
if (item instanceof String) {
rels = ContentScraper.parseSpaceSeparatedTokens((String) item);
}
}
/* This may happen when icons_rel_sxt field has been disabled in solr schema */
if(rels == null) {
rels = new HashSet<>();
rels.add("unknown");
}
Set<Dimension> sizes = null;
if (allSizes.size() > index) {
item = allSizes.get(index);
if (item instanceof String) {
sizes = ContentScraper.parseSizes((String) item);
}
}
icons.add(new IconEntry(iconURL, rels, sizes));
}
}
}
return icons;
}
/**
* Try to extract icon entry with preferred size from this solr document.
* We look preferably for a standard icon but accept as a fallback other icons.
* @param preferredSize preferred size
* @return icon entry or null
*/
public IconEntry getFavicon(Dimension preferredSize) {
IconEntry faviconEntry = null;
boolean foundStandard = false;
double closestDistance = Double.MAX_VALUE;
for (IconEntry icon : this.getIcons()) {
boolean isStandard = icon.isStandardIcon();
double distance = IconEntry.getDistance(icon.getClosestSize(preferredSize), preferredSize);
boolean match = false;
if (foundStandard) {
/*
* Already found a standard icon : now must find a standard icon
* with closer size
*/
match = isStandard && distance < closestDistance;
} else {
/*
* No standard icon yet found : prefer a standard icon, or check
* size
*/
match = isStandard || distance <= closestDistance;
}
if (match) {
faviconEntry = icon;
closestDistance = distance;
foundStandard = isStandard;
if (isStandard && distance == 0.0) {
break;
}
}
}
return faviconEntry;
}
/**
* Use iconURL to set icons related field on this solr document.
*
* @param iconURL icon URL
*/
private void setIconsFields(DigestURL iconURL) {
final List<String> protocols = new ArrayList<String>(1);
final List<String> sizes = new ArrayList<String>(1);
final List<String> stubs = new ArrayList<String>(1);
final List<String> rels = new ArrayList<String>(1);
if (iconURL != null) {
String protocol = iconURL.getProtocol();
protocols.add(protocol);
sizes.add("");
stubs.add(iconURL.toString().substring(protocol.length() + 3));
rels.add(IconLinkRelations.ICON.getRelValue());
}
this.setField(CollectionSchema.icons_protocol_sxt.name(), protocols);
this.setField(CollectionSchema.icons_urlstub_sxt.name(), stubs);
this.setField(CollectionSchema.icons_rel_sxt.name(), rels);
this.setField(CollectionSchema.icons_sizes_sxt.name(), sizes);
}
/**
* @param name field name
* @return field values from field name eventually immutable empty list when field has no values or is not a List
*/
public List<?> getFieldValuesAsList(String name) {
Collection<Object> fieldValues = getFieldValues(name);
List<?> list;
if (fieldValues instanceof List<?>) {
list = (List<?>) fieldValues;
} else {
list = Collections.EMPTY_LIST;
}
return list;
}
public static Date getDate(SolrDocument doc, final CollectionSchema key) {
Date x = doc == null ? null : (Date) doc.getFieldValue(key.getSolrFieldName());
Date now = new Date();
@ -601,6 +758,13 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
final String wprop = this.word().toPropertyForm();
s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
}
/* Add favicon URL with preferred size being 16x16 pixels if known */
if(!this.getIcons().isEmpty()) {
IconEntry faviconEntry = this.getFavicon(new Dimension(16, 16));
if(faviconEntry != null) {
s.append(",favicon=").append(crypt.simpleEncode(faviconEntry.getUrl().toNormalform(false)));
}
}
return s;
} catch (final Throwable e) {
ConcurrentLog.logException(e);

@ -1671,6 +1671,8 @@ public final class SearchEvent {
// boolean fakeImageHost = ms.url().getHost() != null && ms.url().getHost().indexOf("wikipedia") > 0; // pages with image extension from wikipedia do not contain image files but html files... I know this is a bad hack, but many results come from wikipedia and we must handle that
// generalize above hack (regarding url with file extension but beeing a html (with html mime)
if (doc.doctype() == Response.DT_IMAGE) {
/* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents,
* or documents coming from previous versions peers */
if (!doc.url().getFileName().endsWith(".ico")) { // we don't want favicons
final String id = ASCII.String(doc.hash());
// check image size
@ -1695,6 +1697,8 @@ public final class SearchEvent {
List<Object> width = widthO == null ? null : (List<Object>) widthO;
for (int c = 0; c < img.size(); c++) {
String image_urlstub = (String) img.get(c);
/* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents,
* or documents coming from previous versions peers */
if (image_urlstub.endsWith(".ico")) continue; // we don't want favicons, makes the result look idiotic
try {
int h = height == null ? 0 : (Integer) height.get(c);

@ -95,6 +95,7 @@ import net.yacy.document.SentenceReader;
import net.yacy.document.Tokenizer;
import net.yacy.document.content.DCEntry;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.IconEntry;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.citation.CitationReference;
import net.yacy.kelondro.data.meta.URIMetadataNode;
@ -315,6 +316,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
add(doc, CollectionSchema.keywords, keywords);
}
/* Metadata node may contain one favicon url when transmitted as dht chunk */
processIcons(doc, allAttr, md.getIcons());
if (allAttr || contains(CollectionSchema.imagescount_i)) add(doc, CollectionSchema.imagescount_i, md.limage());
if (allAttr || contains(CollectionSchema.linkscount_i)) add(doc, CollectionSchema.linkscount_i, md.llocal() + md.lother());
if (allAttr || contains(CollectionSchema.inboundlinkscount_i)) add(doc, CollectionSchema.inboundlinkscount_i, md.llocal());
@ -525,6 +528,9 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
final Object scraper = document.getScraperObject();
boolean containsCanonical = false;
DigestURL canonical = null;
processIcons(doc, allAttr, inboundLinks, outboundLinks, document.getIcons().values());
if (scraper instanceof ContentScraper) {
final ContentScraper html = (ContentScraper) scraper;
List<ImageEntry> images = html.getImages();
@ -648,45 +654,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
if (articles.size() > 0) add(doc, CollectionSchema.article_txt, articles);
// images
final ArrayList<String> imgprots = new ArrayList<String>(images.size());
final Integer[] imgheights = new Integer[images.size()];
final Integer[] imgwidths = new Integer[images.size()];
final Integer[] imgpixels = new Integer[images.size()];
final String[] imgstubs = new String[images.size()];
final String[] imgalts = new String[images.size()];
int withalt = 0;
int i = 0;
LinkedHashSet<String> images_text_map = new LinkedHashSet<String>();
for (final ImageEntry ie: images) {
final MultiProtocolURL uri = ie.url();
inboundLinks.remove(uri);
outboundLinks.remove(uri);
imgheights[i] = ie.height();
imgwidths[i] = ie.width();
imgpixels[i] = ie.height() < 0 || ie.width() < 0 ? -1 : ie.height() * ie.width();
String protocol = uri.getProtocol();
imgprots.add(protocol);
imgstubs[i] = uri.toString().substring(protocol.length() + 3);
imgalts[i] = ie.alt();
for (String it: CommonPattern.SPACE.split(uri.toTokens())) images_text_map.add(it);
if (ie.alt() != null && ie.alt().length() > 0) {
SentenceReader sr = new SentenceReader(ie.alt());
while (sr.hasNext()) images_text_map.add(sr.next().toString());
withalt++;
}
i++;
}
StringBuilder images_text = new StringBuilder(images_text_map.size() * 6 + 1);
for (String s: images_text_map) images_text.append(s.trim()).append(' ');
if (allAttr || contains(CollectionSchema.imagescount_i)) add(doc, CollectionSchema.imagescount_i, images.size());
if (allAttr || contains(CollectionSchema.images_protocol_sxt)) add(doc, CollectionSchema.images_protocol_sxt, protocolList2indexedList(imgprots));
if (allAttr || contains(CollectionSchema.images_urlstub_sxt)) add(doc, CollectionSchema.images_urlstub_sxt, imgstubs);
if (allAttr || contains(CollectionSchema.images_alt_sxt)) add(doc, CollectionSchema.images_alt_sxt, imgalts);
if (allAttr || contains(CollectionSchema.images_height_val)) add(doc, CollectionSchema.images_height_val, imgheights);
if (allAttr || contains(CollectionSchema.images_width_val)) add(doc, CollectionSchema.images_width_val, imgwidths);
if (allAttr || contains(CollectionSchema.images_pixel_val)) add(doc, CollectionSchema.images_pixel_val, imgpixels);
if (allAttr || contains(CollectionSchema.images_withalt_i)) add(doc, CollectionSchema.images_withalt_i, withalt);
if (allAttr || contains(CollectionSchema.images_text_t)) add(doc, CollectionSchema.images_text_t, images_text.toString().trim());
processImages(doc, allAttr, inboundLinks, outboundLinks, images);
// style sheets
if (allAttr || contains(CollectionSchema.css_tag_sxt)) {
@ -1013,6 +981,137 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
return doc;
}
/**
* Add icons metadata to Solr doc when corresponding schema attributes are
* enabled.
*
* @param doc
* solr document to fill. Must not be null.
* @param allAttr
* all attributes are enabled.
* @param icons
* document icon entries.
*/
private void processIcons(SolrInputDocument doc, boolean allAttr, Collection<IconEntry> icons) {
processIcons(doc, allAttr, null, null, icons);
}
/**
* Add icons metadata to Solr doc when corresponding schema attributes are
* enabled. Remove icons urls from inboudLinks and outboundLinks.
*
* @param doc
* solr document to fill. Must not be null.
* @param allAttr
* all attributes are enabled.
* @param inboundLinks
* all document inbound links.
* @param outboundLinks
* all document outbound links.
* @param icons
* document icon entries.
*/
private void processIcons(SolrInputDocument doc, boolean allAttr, LinkedHashMap<DigestURL, String> inboundLinks,
LinkedHashMap<DigestURL, String> outboundLinks, Collection<IconEntry> icons) {
if (icons != null) {
final List<String> protocols = new ArrayList<String>(icons.size());
final String[] sizes = new String[icons.size()];
final String[] stubs = new String[icons.size()];
final String[] rels = new String[icons.size()];
int i = 0;
/* Prepare solr field values */
for (final IconEntry ie : icons) {
final DigestURL url = ie.getUrl();
if(inboundLinks != null) {
inboundLinks.remove(url);
}
if(outboundLinks != null) {
outboundLinks.remove(url);
}
String protocol = url.getProtocol();
protocols.add(protocol);
/*
* There may be multiple sizes and multiple rels for one icon :
* we store this as flat string as currently solr doesn't
* support multidimensionnal array fields
*/
sizes[i] = ie.sizesToString();
stubs[i] = url.toString().substring(protocol.length() + 3);
rels[i] = ie.relToString();
i++;
}
if (allAttr || contains(CollectionSchema.icons_protocol_sxt)) {
add(doc, CollectionSchema.icons_protocol_sxt, protocolList2indexedList(protocols));
}
if (allAttr || contains(CollectionSchema.icons_urlstub_sxt)) {
add(doc, CollectionSchema.icons_urlstub_sxt, stubs);
}
if (allAttr || contains(CollectionSchema.icons_rel_sxt)) {
add(doc, CollectionSchema.icons_rel_sxt, rels);
}
if (allAttr || contains(CollectionSchema.icons_sizes_sxt)) {
add(doc, CollectionSchema.icons_sizes_sxt, sizes);
}
}
}
/**
* Add images metadata to Solr doc when corresponding schema attributes are enabled.
* Remove images urls from inboudLinks and outboundLinks.
* @param doc solr document to fill
* @param allAttr all attributes are enabled
* @param inboundLinks all document inbound links
* @param outboundLinks all document outbound links
* @param images document images
*/
private void processImages(SolrVector doc, boolean allAttr, LinkedHashMap<DigestURL, String> inboundLinks,
LinkedHashMap<DigestURL, String> outboundLinks, List<ImageEntry> images) {
final ArrayList<String> imgprots = new ArrayList<String>(images.size());
final Integer[] imgheights = new Integer[images.size()];
final Integer[] imgwidths = new Integer[images.size()];
final Integer[] imgpixels = new Integer[images.size()];
final String[] imgstubs = new String[images.size()];
final String[] imgalts = new String[images.size()];
int withalt = 0;
int i = 0;
LinkedHashSet<String> images_text_map = new LinkedHashSet<String>();
/* Prepare flat solr field values */
for (final ImageEntry ie: images) {
final MultiProtocolURL uri = ie.url();
inboundLinks.remove(uri);
outboundLinks.remove(uri);
imgheights[i] = ie.height();
imgwidths[i] = ie.width();
imgpixels[i] = ie.height() < 0 || ie.width() < 0 ? -1 : ie.height() * ie.width();
String protocol = uri.getProtocol();
imgprots.add(protocol);
imgstubs[i] = uri.toString().substring(protocol.length() + 3);
imgalts[i] = ie.alt();
for (String it: CommonPattern.SPACE.split(uri.toTokens())) images_text_map.add(it);
if (ie.alt() != null && ie.alt().length() > 0) {
SentenceReader sr = new SentenceReader(ie.alt());
while (sr.hasNext()) images_text_map.add(sr.next().toString());
withalt++;
}
i++;
}
StringBuilder images_text = new StringBuilder(images_text_map.size() * 6 + 1);
for (String s: images_text_map) images_text.append(s.trim()).append(' ');
if (allAttr || contains(CollectionSchema.imagescount_i)) add(doc, CollectionSchema.imagescount_i, images.size());
if (allAttr || contains(CollectionSchema.images_protocol_sxt)) add(doc, CollectionSchema.images_protocol_sxt, protocolList2indexedList(imgprots));
if (allAttr || contains(CollectionSchema.images_urlstub_sxt)) add(doc, CollectionSchema.images_urlstub_sxt, imgstubs);
if (allAttr || contains(CollectionSchema.images_alt_sxt)) add(doc, CollectionSchema.images_alt_sxt, imgalts);
if (allAttr || contains(CollectionSchema.images_height_val)) add(doc, CollectionSchema.images_height_val, imgheights);
if (allAttr || contains(CollectionSchema.images_width_val)) add(doc, CollectionSchema.images_width_val, imgwidths);
if (allAttr || contains(CollectionSchema.images_pixel_val)) add(doc, CollectionSchema.images_pixel_val, imgpixels);
if (allAttr || contains(CollectionSchema.images_withalt_i)) add(doc, CollectionSchema.images_withalt_i, withalt);
if (allAttr || contains(CollectionSchema.images_text_t)) add(doc, CollectionSchema.images_text_t, images_text.toString().trim());
}
/**
* attach additional information to the document to enable navigation features
* @param doc the document to be enriched
@ -1993,14 +2092,24 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
return a;
}
/**
* Uncompress indexed iplist of protocol names to a list of specified dimension.
* @param iplist indexed list typically produced by protocolList2indexedList
* @param dimension size of target list
* @return a list of protocol names
*/
public static List<String> indexedList2protocolList(Collection<Object> iplist, int dimension) {
List<String> a = new ArrayList<String>(dimension);
for (int i = 0; i < dimension; i++) a.add("http");
if (iplist == null) return a;
for (Object ip : iplist) {
// ip format is 001-https but can be 4 digits 1011-https
int i = ((String) ip).indexOf('-');
a.set(Integer.parseInt(((String) ip).substring(0, i)), ((String) ip).substring(i+1));
String indexedProtocol = ((String) ip);
int i = indexedProtocol.indexOf('-');
/* Silently ignore badly formatted entry */
if(i > 0 && indexedProtocol.length() > (i + 1)) {
a.set(Integer.parseInt(indexedProtocol.substring(0, i)), indexedProtocol.substring(i+1));
}
}
return a;
}

@ -139,6 +139,12 @@ public enum CollectionSchema implements SchemaDeclaration {
outboundlinks_urlstub_sxt(SolrType.string, true, true, true, false, true, "external links, the url only without the protocol"),
outboundlinks_anchortext_txt(SolrType.text_general, true, true, true, false, true, "external links, the visible anchor text"),
icons_urlstub_sxt(SolrType.string, true, true, true, false, true, "all icon links without the protocol and '://'"),
/** All icon links protocols : split from icons_urlstub to provide some compression, as http protocol is implied as default and not stored */
icons_protocol_sxt(SolrType.string, true, true, true, false, false, "all icon links protocols"),
icons_rel_sxt(SolrType.string, true, true, true, false, false, "all icon links relationships space separated (e.g.. 'icon apple-touch-icon')"),
icons_sizes_sxt(SolrType.num_integer, true, true, true, false, false, "all icon sizes space separated (e.g. '16x16 32x32')"),
images_text_t(SolrType.text_general, true, true, false, false, true, "all text/words appearing in image alt texts or the tokenized url"),
images_urlstub_sxt(SolrType.string, true, true, true, false, true, "all image links without the protocol and '://'"),
images_protocol_sxt(SolrType.string, true, true, true, false, false, "all image link protocols"),

@ -0,0 +1,490 @@
/**
* ImageViewer
* Copyright 2016 by luccioman; https://github.com/luccioman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.visualization;
import java.awt.Container;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.MediaTracker;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.awt.image.Raster;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.util.Iterator;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.stream.ImageInputStream;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.InvalidURLLicenceException;
import net.yacy.data.URLLicense;
import net.yacy.http.servlets.TemplateMissingParameterException;
import net.yacy.peers.graphics.EncodedImage;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.server.serverObjects;
/**
* Provides methods for image or favicon viewing in YaCy servlets.
* @author luc
*
*/
public class ImageViewer {
/**
* Try to get image URL from parameters.
* @param post post parameters. Must not be null.
* @param auth true when current user is authenticated
* @return DigestURL instance
* @throws MalformedURLException when url is malformed
* @throws TemplateMissingParameterException when urlString or urlLicense is missing (the one needed depends on auth)
*/
public DigestURL parseURL(final serverObjects post, final boolean auth)
throws MalformedURLException {
final String urlString = post.get("url", "");
final String urlLicense = post.get("code", "");
DigestURL url;
if(auth) {
/* Authenticated user : rely on url parameter*/
if (urlString.length() > 0) {
url = new DigestURL(urlString);
} else {
throw new TemplateMissingParameterException("missing required url parameter");
}
} else {
/* Non authenticated user : rely on urlLicense parameter */
if((urlLicense.length() > 0)) {
String licensedURL = URLLicense.releaseLicense(urlLicense);
if (licensedURL != null) {
url = new DigestURL(licensedURL);
} else { // license is gone (e.g. released/remove in prev calls)
ConcurrentLog.fine("ImageViewer", "image urlLicense not found key=" + urlLicense);
/* Caller is responsible for handling this with appropriate HTTP status code */
throw new InvalidURLLicenceException();
}
} else {
throw new TemplateMissingParameterException("missing required code parameter");
}
}
return url;
}
/**
* Open input stream on image url using provided loader. All parameters must
* not be null.
*
* @param post
* post parameters.
* @param loader.
* Resources loader.
* @param auth
* true when user has credentials to load full images.
* @param url
* image url.
* @return an open input stream instance (don't forget to close it).
* @throws IOException
* when a read/write error occured.
*/
public InputStream openInputStream(final serverObjects post, final LoaderDispatcher loader,
final boolean auth, DigestURL url) throws IOException {
InputStream inStream = null;
if (url != null) {
try {
String agentName = post.get("agentName", auth ? ClientIdentification.yacyIntranetCrawlerAgentName
: ClientIdentification.yacyInternetCrawlerAgentName);
ClientIdentification.Agent agent = ClientIdentification.getAgent(agentName);
inStream = loader.openInputStream(loader.request(url, false, true), CacheStrategy.IFEXIST,
BlacklistType.SEARCH, agent);
} catch (final IOException e) {
/** No need to log full stack trace (in most cases resource is not available because of a network error) */
ConcurrentLog.fine("ImageViewer", "cannot load image. URL : " + url.toNormalform(true));
throw e;
}
}
if (inStream == null) {
throw new IOException("Input stream could no be open");
}
return inStream;
}
/**
* @param formatName
* informal file format name. For example : "png".
* @return true when image format will be rendered by browser and not by a YaCy service
*/
public static boolean isBrowserRendered(String formatName) {
/*
* gif images are not loaded because of an animated gif bug within jvm
* which sends java into an endless loop with high CPU
*/
/*
* svg images not supported by jdk, but by most browser, deliver just
* content (without crop/scale)
*/
return ("gif".equalsIgnoreCase(formatName) || "svg".equalsIgnoreCase(formatName));
}
/**
* Process source image to try to produce an EncodedImage instance
* eventually scaled and clipped depending on post parameters. When
* processed, imageInStream is closed.
*
* @param post
* request post parameters. Must not be null.
* @param auth
* true when access rigths are OK.
* @param url
* image source URL. Must not be null.
* @param ext
* target image file format. May be null.
* @param imageInStream
* open stream on image content. Must not be null.
* @return an EncodedImage instance.
* @throws IOException
* when image could not be parsed or encoded to specified format.
*/
public EncodedImage parseAndScale(serverObjects post, boolean auth, DigestURL url, String ext,
ImageInputStream imageInStream) throws IOException {
EncodedImage encodedImage;
// BufferedImage image = ImageIO.read(imageInStream);
Iterator<ImageReader> readers = ImageIO.getImageReaders(imageInStream);
if (!readers.hasNext()) {
try {
/* When no reader can be found, we have to close the stream */
imageInStream.close();
} catch (IOException ignoredException) {
}
String urlString = url.toNormalform(false);
String errorMessage = "Image format (" + MultiProtocolURL.getFileExtension(urlString) + ") is not supported.";
ConcurrentLog.fine("ImageViewer", errorMessage + "Image URL : " + urlString);
/*
* Throw an exception, wich will end in a HTTP 500 response, better
* handled by browsers than an empty image
*/
throw new IOException(errorMessage);
}
ImageReader reader = readers.next();
reader.setInput(imageInStream, true, true);
int maxwidth = post.getInt("maxwidth", 0);
int maxheight = post.getInt("maxheight", 0);
final boolean quadratic = post.containsKey("quadratic");
boolean isStatic = post.getBoolean("isStatic");
BufferedImage image = null;
boolean returnRaw = true;
if (!auth || maxwidth != 0 || maxheight != 0) {
// find original size
final int originWidth = reader.getWidth(0);
final int originHeigth = reader.getHeight(0);
// in case of not-authorized access shrink the image to
// prevent
// copyright problems, so that images are not larger than
// thumbnails
Dimension maxDimensions = calculateMaxDimensions(auth, originWidth, originHeigth, maxwidth, maxheight);
// if a quadratic flag is set, we cut the image out to be in
// quadratic shape
int w = originWidth;
int h = originHeigth;
if (quadratic && originWidth != originHeigth) {
Rectangle square = getMaxSquare(originHeigth, originWidth);
h = square.height;
w = square.width;
}
Dimension finalDimensions = calculateDimensions(w, h, maxDimensions);
if (originWidth != finalDimensions.width || originHeigth != finalDimensions.height) {
returnRaw = false;
image = readImage(reader);
if (quadratic && originWidth != originHeigth) {
image = makeSquare(image);
}
image = scale(finalDimensions.width, finalDimensions.height, image);
}
}
/* Image do not need to be scaled or cropped */
if (returnRaw) {
if (!reader.getFormatName().equalsIgnoreCase(ext) || imageInStream.getFlushedPosition() != 0) {
/*
* image parsing and reencoding is only needed when source image
* and target formats differ, or when first bytes have been discarded
*/
returnRaw = false;
image = readImage(reader);
}
}
if (returnRaw) {
byte[] imageData = readRawImage(imageInStream);
encodedImage = new EncodedImage(imageData, ext, isStatic);
} else {
/*
* An error can still occur when transcoding from buffered image to
* target ext : in that case EncodedImage.getImage() is empty.
*/
encodedImage = new EncodedImage(image, ext, isStatic);
if (encodedImage.getImage().length() == 0) {
String errorMessage = "Image could not be encoded to format : " + ext;
ConcurrentLog.fine("ImageViewer", errorMessage + ". Image URL : " + url.toNormalform(false));
throw new IOException(errorMessage);
}
}
return encodedImage;
}
/**
* Read image using specified reader and close ImageInputStream source.
* Input must have bean set before using
* {@link ImageReader#setInput(Object)}
*
* @param reader
* image reader. Must not be null.
* @return buffered image
* @throws IOException
* when an error occured
*/
private BufferedImage readImage(ImageReader reader) throws IOException {
BufferedImage image;
try {
image = reader.read(0);
} finally {
reader.dispose();
Object input = reader.getInput();
if (input instanceof ImageInputStream) {
try {
((ImageInputStream) input).close();
} catch (IOException ignoredException) {
}
}
}
return image;
}
/**
* Read image data without parsing.
*
* @param inStream
* image source. Must not be null. First bytes must not have been marked discarded ({@link ImageInputStream#getFlushedPosition()} must be zero)
* @return image data as bytes
* @throws IOException
* when a read/write error occured.
*/
private byte[] readRawImage(ImageInputStream inStream) throws IOException {
byte[] buffer = new byte[4096];
int l = 0;
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
inStream.seek(0);
try {
while ((l = inStream.read(buffer)) >= 0) {
outStream.write(buffer, 0, l);
}
return outStream.toByteArray();
} finally {
try {
inStream.close();
} catch (IOException ignored) {
}
}
}
/**
* Calculate image dimensions from image original dimensions, max
* dimensions, and target dimensions.
*
* @return dimensions to render image
*/
protected Dimension calculateDimensions(final int originWidth, final int originHeight, final Dimension max) {
int resultWidth;
int resultHeight;
if (max.width < originWidth || max.height < originHeight) {
// scale image
final double hs = (originWidth <= max.width) ? 1.0 : ((double) max.width) / ((double) originWidth);
final double vs = (originHeight <= max.height) ? 1.0 : ((double) max.height) / ((double) originHeight);
final double scale = Math.min(hs, vs);
// if (!auth) scale = Math.min(scale, 0.6); // this is for copyright
// purpose
if (scale < 1.0) {
resultWidth = Math.max(1, (int) (originWidth * scale));
resultHeight = Math.max(1, (int) (originHeight * scale));
} else {
resultWidth = Math.max(1, originWidth);
resultHeight = Math.max(1, originHeight);
}
} else {
// do not scale
resultWidth = originWidth;
resultHeight = originHeight;
}
return new Dimension(resultWidth, resultHeight);
}
/**
* Calculate image maximum dimentions from original and specified maximum
* dimensions
*
* @param auth
* true when acces rigths are OK.
* @return maximum dimensions to render image
*/
protected Dimension calculateMaxDimensions(final boolean auth, final int originWidth, final int originHeight,
final int maxWidth, final int maxHeight) {
int resultWidth;
int resultHeight;
// in case of not-authorized access shrink the image to prevent
// copyright problems, so that images are not larger than thumbnails
if (auth) {
resultWidth = (maxWidth == 0) ? originWidth : maxWidth;
resultHeight = (maxHeight == 0) ? originHeight : maxHeight;
} else if ((originWidth > 16) || (originHeight > 16)) {
resultWidth = Math.min(96, originWidth);
resultHeight = Math.min(96, originHeight);
} else {
resultWidth = 16;
resultHeight = 16;
}
return new Dimension(resultWidth, resultHeight);
}
/**
* Scale image to specified dimensions
*
* @param width
* target width
* @param height
* target height
* @param image
* image to scale. Must not be null.
* @return a scaled image
*/
public BufferedImage scale(final int width, final int height, final BufferedImage image) {
// compute scaled image
Image scaled = image.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING);
final MediaTracker mediaTracker = new MediaTracker(new Container());
mediaTracker.addImage(scaled, 0);
try {
mediaTracker.waitForID(0);
} catch (final InterruptedException e) {
}
// make a BufferedImage out of that
BufferedImage result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
try {
result.createGraphics().drawImage(scaled, 0, 0, width, height, null);
// check outcome
final Raster raster = result.getData();
int[] pixel = new int[raster.getSampleModel().getNumBands()];
pixel = raster.getPixel(0, 0, pixel);
} catch (final Exception e) {
/*
* Exception may be caused by source image color model : try now to
* convert to RGB before scaling
*/
try {
BufferedImage converted = EncodedImage.convertToRGB(image);
scaled = converted.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING);
mediaTracker.addImage(scaled, 1);
try {
mediaTracker.waitForID(1);
} catch (final InterruptedException e2) {
}
result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
result.createGraphics().drawImage(scaled, 0, 0, width, height, null);
// check outcome
final Raster raster = result.getData();
int[] pixel = new int[result.getSampleModel().getNumBands()];
pixel = raster.getPixel(0, 0, pixel);
} catch (Exception e2) {
result = image;
}
ConcurrentLog.fine("ImageViewer", "Image could not be scaled");
}
return result;
}
/**
*
* @param h
* image height
* @param w
* image width
* @return max square area fitting inside dimensions
*/
public Rectangle getMaxSquare(final int h, final int w) {
Rectangle square;
if (w > h) {
final int offset = (w - h) / 2;
square = new Rectangle(offset, 0, h, h);
} else {
final int offset = (h - w) / 2;
square = new Rectangle(0, offset, w, w);
}
return square;
}
/**
* Crop image to make a square
*
* @param image
* image to crop
* @return
*/
public BufferedImage makeSquare(BufferedImage image) {
final int w = image.getWidth();
final int h = image.getHeight();
if (w > h) {
final BufferedImage dst = new BufferedImage(h, h, BufferedImage.TYPE_INT_ARGB);
Graphics2D g = dst.createGraphics();
final int offset = (w - h) / 2;
try {
g.drawImage(image, 0, 0, h - 1, h - 1, offset, 0, h + offset, h - 1, null);
} finally {
g.dispose();
}
image = dst;
} else {
final BufferedImage dst = new BufferedImage(w, w, BufferedImage.TYPE_INT_ARGB);
Graphics2D g = dst.createGraphics();
final int offset = (h - w) / 2;
try {
g.drawImage(image, 0, 0, w - 1, w - 1, 0, offset, w - 1, w + offset, null);
} finally {
g.dispose();
}
image = dst;
}
return image;
}
}

@ -1,3 +1,22 @@
// ImageViewerPerfTest.java
// -----------------------
// part of YaCy
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
@ -11,39 +30,20 @@ import java.util.TreeMap;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.peers.graphics.EncodedImage;
import net.yacy.server.serverObjects;
// ViewImagePerfTest.java
// -----------------------
// part of YaCy
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2006
// created 03.04.2006
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* Test to measure image render performance by ViewImage
* Test to measure image render performance by
*
* @author luc
*
*/
public class ViewImagePerfTest extends ViewImageTest {
public class ImageViewerPerfTest extends ImageViewerTest {
/** Default minimum measurement time */
private static final int DEFAULT_MIN_MEASURE_TIME = 10;
@ -56,7 +56,7 @@ public class ViewImagePerfTest extends ViewImageTest {
* main parameters : args[7] may contain minimum measurement time
* in secondes. Default : 10.
*/
public ViewImagePerfTest(String args[]) {
public ImageViewerPerfTest(String args[]) {
this.minMeasureTime = getMinMeasurementTime(args);
}
@ -87,7 +87,7 @@ public class ViewImagePerfTest extends ViewImageTest {
* @param outDir
* output directory
* @param post
* ViewImage post parameters
* post parameters
* @param failures
* map failed file urls to eventual exception
* @param inFile
@ -100,7 +100,7 @@ public class ViewImagePerfTest extends ViewImageTest {
File inFile) throws IOException {
/* Delete eventual previous result file */
System.out
.println("Measuring ViewImage render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext);
.println("Measuring render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext);
File outFile = new File(outDir, inFile.getName() + "." + ext);
if (outFile.exists()) {
outFile.delete();
@ -115,7 +115,7 @@ public class ViewImagePerfTest extends ViewImageTest {
beginTime = System.nanoTime();
ImageInputStream inStream = ImageIO.createImageInputStream(inFile);
try {
img = ViewImage.parseAndScale(post, true, urlString, ext, inStream);
img = this.VIEWER.parseAndScale(post, true, new DigestURL(urlString), ext, inStream);
} catch (Exception e) {
error = e;
}
@ -131,7 +131,7 @@ public class ViewImagePerfTest extends ViewImageTest {
}
PrintWriter resultsWriter = new PrintWriter(new FileWriter(new File(outDir, "results_perfs.txt"), true));
try {
writeMessage("Measured ViewImage render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext,
writeMessage("Measured render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext,
resultsWriter);
if(img == null) {
writeMessage("Image could not be rendered! Measurement show time needed to read and parse image data until error detection.", resultsWriter);
@ -176,7 +176,7 @@ public class ViewImagePerfTest extends ViewImageTest {
* <li>args[1] : output format name (for example : "jpg") for
* rendered image. Defaut : "png".</li>
* <li>args[2] : ouput folder URL. Default :
* "[system tmp dir]/ViewImageTest".</li>
* "[system tmp dir]/Test".</li>
* <li>args[3] : max width (in pixels) for rendered image. May be
* set to zero to specify no max width. Default : no value.</li>
* <li>args[4] : max height (in pixels) for rendered image. May
@ -194,7 +194,7 @@ public class ViewImagePerfTest extends ViewImageTest {
* when a read/write error occured
*/
public static void main(String args[]) throws IOException {
ViewImagePerfTest test = new ViewImagePerfTest(args);
ImageViewerPerfTest test = new ImageViewerPerfTest(args);
File inFile = test.getInputURL(args);
String ext = test.getEncodingExt(args);
File outDir = test.getOuputDir(args);
@ -207,10 +207,10 @@ public class ViewImagePerfTest extends ViewImageTest {
inFiles = new File[1];
inFiles[0] = inFile;
System.out.println(
"Measuring ViewImage render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext);
"Measuring render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext);
} else if (inFile.isDirectory()) {
inFiles = inFile.listFiles();
System.out.println("Measuring ViewImage render with files in folder : " + inFile.getAbsolutePath()
System.out.println("Measuring render with files in folder : " + inFile.getAbsolutePath()
+ " encoded To : " + ext);
} else {
inFiles = new File[0];

@ -1,3 +1,22 @@
// ImageViewerTest.java
// -----------------------
// part of YaCy
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
@ -13,45 +32,28 @@ import java.util.TreeMap;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.peers.graphics.EncodedImage;
import net.yacy.server.serverObjects;
// ViewImageTest.java
// -----------------------
// part of YaCy
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2006
// created 03.04.2006
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import net.yacy.visualization.ImageViewer;
/**
* Test rendering of one or more image files by ViewImage
* Test rendering of one or more image files by ImageViewer
*
* @author luc
*
*/
public class ViewImageTest {
public class ImageViewerTest {
/** Default image */
private static final String DEFAULT_IMG_RESOURCES = "/viewImageTest/test";
/** Default output encoding format */
private static final String DEFAULT_OUT_EXT = "png";
/** Viewer instance */
protected final ImageViewer VIEWER = new ImageViewer();
/**
* @param args
@ -65,7 +67,7 @@ public class ViewImageTest {
if (args != null && args.length > 0) {
fileURL = args[0];
} else {
URL defaultURL = ViewImageTest.class.getResource(DEFAULT_IMG_RESOURCES);
URL defaultURL = ImageViewerTest.class.getResource(DEFAULT_IMG_RESOURCES);
if (defaultURL == null) {
throw new IllegalArgumentException("File not found : " + DEFAULT_IMG_RESOURCES);
}
@ -97,7 +99,7 @@ public class ViewImageTest {
}
/**
* Build post parameters to use with ViewImage
* Build post parameters to use with ImageViewer
*
* @param args
* main parameters : args[3] and args[4] may respectively contain
@ -238,7 +240,7 @@ public class ViewImageTest {
* @param outDir
* output directory
* @param post
* ViewImage post parameters
* ImageViewer post parameters
* @param inFiles
* files or directories to process
* @param processedFiles
@ -269,7 +271,7 @@ public class ViewImageTest {
* parameters must not be null.
* @param ext output encoding image format
* @param outDir output directory
* @param post ViewImage post parameters
* @param post ImageViewer post parameters
* @param failures map failed file urls to eventual exception
* @param inFile file image to process
* @throws IOException when an read/write error occured
@ -287,7 +289,7 @@ public class ViewImageTest {
EncodedImage img = null;
Throwable error = null;
try {
img = ViewImage.parseAndScale(post, true, urlString, ext, inStream);
img = this.VIEWER.parseAndScale(post, true, new DigestURL(urlString), ext, inStream);
} catch (Throwable e) {
error = e;
}
@ -322,7 +324,7 @@ public class ViewImageTest {
* <li>args[1] : output format name (for example : "jpg") for
* rendered image. Defaut : "png".</li>
* <li>args[2] : ouput folder URL. Default :
* "[system tmp dir]/ViewImageTest".</li>
* "[system tmp dir]/ImageViewerTest".</li>
* <li>args[3] : max width (in pixels) for rendered image. May be
* set to zero to specify no max width. Default : no value.</li>
* <li>args[4] : max height (in pixels) for rendered image. May
@ -338,7 +340,7 @@ public class ViewImageTest {
* when a read/write error occured
*/
public static void main(String args[]) throws IOException {
ViewImageTest test = new ViewImageTest();
ImageViewerTest test = new ImageViewerTest();
File inFile = test.getInputURL(args);
String ext = test.getEncodingExt(args);
File outDir = test.getOuputDir(args);
@ -350,11 +352,11 @@ public class ViewImageTest {
if (inFile.isFile()) {
inFiles = new File[1];
inFiles[0] = inFile;
System.out.println("Testing ViewImage rendering with input file : " + inFile.getAbsolutePath()
System.out.println("Testing ImageViewer rendering with input file : " + inFile.getAbsolutePath()
+ " encoded To : " + ext);
} else if (inFile.isDirectory()) {
inFiles = inFile.listFiles();
System.out.println("Testing ViewImage rendering with input files in folder : " + inFile.getAbsolutePath()
System.out.println("Testing ImageViewer rendering with input files in folder : " + inFile.getAbsolutePath()
+ " encoded To : " + ext);
} else {
inFiles = new File[0];

@ -0,0 +1,125 @@
/**
* ContentScraperTest
* part of YaCy
* Copyright 2016 by luccioman; https://github.com/luccioman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.parser.html;
import java.awt.Dimension;
import java.util.Set;
import org.junit.Assert;
import org.junit.Test;
/**
* Unit tests for ContentScrapper class.
* @author luc
*
*/
public class ContentScraperTest {
@Test
public final void testParseSizes() {
/* Normal case */
Set<Dimension> sizes = ContentScraper.parseSizes("96x128");
Assert.assertEquals(1, sizes.size());
Assert.assertTrue(sizes.contains(new Dimension(96, 128)));
/* "any" keyword */
sizes = ContentScraper.parseSizes("any");
Assert.assertEquals(0, sizes.size());
/* Multiple valid sizes, lower and upper case separator */
sizes = ContentScraper.parseSizes("96x128 16X16 1X2 1024x768");
Assert.assertEquals(4, sizes.size());
Assert.assertTrue(sizes.contains(new Dimension(96, 128)));
Assert.assertTrue(sizes.contains(new Dimension(16, 16)));
Assert.assertTrue(sizes.contains(new Dimension(1, 2)));
Assert.assertTrue(sizes.contains(new Dimension(1024, 768)));
/* Duplicate entries */
sizes = ContentScraper.parseSizes("96x128 96X128 1X2 96x128");
Assert.assertEquals(2, sizes.size());
Assert.assertTrue(sizes.contains(new Dimension(96, 128)));
Assert.assertTrue(sizes.contains(new Dimension(1, 2)));
/* Mutiple inner and trailing spaces */
sizes = ContentScraper.parseSizes(" 96x128 16X16 ");
Assert.assertEquals(2, sizes.size());
Assert.assertTrue(sizes.contains(new Dimension(96, 128)));
Assert.assertTrue(sizes.contains(new Dimension(16, 16)));
/* Empty string */
sizes = ContentScraper.parseSizes("");
Assert.assertEquals(0, sizes.size());
/* null string */
sizes = ContentScraper.parseSizes(null);
Assert.assertEquals(0, sizes.size());
/* Invalid sizes */
sizes = ContentScraper.parseSizes("096x0128 -16x-16 0x0 x768 78x axb 1242");
Assert.assertEquals(0, sizes.size());
/* Mix of valid and invalid sizes */
sizes = ContentScraper.parseSizes("96x128 16X16 axb 123 78x32");
Assert.assertEquals(3, sizes.size());
Assert.assertTrue(sizes.contains(new Dimension(96, 128)));
Assert.assertTrue(sizes.contains(new Dimension(16, 16)));
Assert.assertTrue(sizes.contains(new Dimension(78, 32)));
}
@Test
public final void testParseSpaceSeparatedTokens() {
/* Normal case */
Set<String> tokens = ContentScraper.parseSpaceSeparatedTokens("abc de");
Assert.assertEquals(2, tokens.size());
Assert.assertTrue(tokens.contains("abc"));
Assert.assertTrue(tokens.contains("de"));
/* One item only */
tokens = ContentScraper.parseSpaceSeparatedTokens("abc");
Assert.assertEquals(1, tokens.size());
Assert.assertTrue(tokens.contains("abc"));
/* Mutiple inner and trailing spaces */
tokens = ContentScraper.parseSpaceSeparatedTokens(" abc d efff fgj ");
Assert.assertEquals(4, tokens.size());
Assert.assertTrue(tokens.contains("abc"));
Assert.assertTrue(tokens.contains("d"));
Assert.assertTrue(tokens.contains("efff"));
Assert.assertTrue(tokens.contains("fgj"));
/* Duplicate entries */
tokens = ContentScraper.parseSpaceSeparatedTokens("abc bb abc abc ABC");
Assert.assertEquals(3, tokens.size());
Assert.assertTrue(tokens.contains("abc"));
/* ignoring case is not the purpose of this function */
Assert.assertTrue(tokens.contains("ABC"));
Assert.assertTrue(tokens.contains("bb"));
/* Empty string */
tokens = ContentScraper.parseSpaceSeparatedTokens("");
Assert.assertEquals(0, tokens.size());
/* Null string */
tokens = ContentScraper.parseSpaceSeparatedTokens(null);
Assert.assertEquals(0, tokens.size());
}
}

@ -0,0 +1,192 @@
/**
* IconEntryTest
* part of YaCy
* Copyright 2016 by luccioman; https://github.com/luccioman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.parser.html;
import java.awt.Dimension;
import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.Set;
import org.junit.Assert;
import org.junit.Test;
import net.yacy.cora.document.id.DigestURL;
/**
* Unit tests for IconEntry class.
* @author luc
*
*/
public class IconEntryTest {
@Test
public final void testGetDistance() {
/* Normal case : one size has both width and height greater */
Dimension size1 = new Dimension(5, 8);
Dimension size2 = new Dimension(7, 12);
Assert.assertEquals(3.0, IconEntry.getDistance(size1, size2), 0.0);
/* Check inverted parameters should produces same result */
Assert.assertEquals(3.0, IconEntry.getDistance(size2, size1), 0.0);
/* Equal sizes */
size2 = new Dimension(5, 8);
Assert.assertEquals(0.0, IconEntry.getDistance(size1, size2), 0.0);
/* Equal sizes */
size2 = new Dimension(5, 8);
Assert.assertEquals(0.0, IconEntry.getDistance(size1, size2), 0.0);
/* Only one dimension differs */
size2 = new Dimension(5, 12);
Assert.assertEquals(2.0, IconEntry.getDistance(size1, size2), 0.0);
size2 = new Dimension(10, 8);
Assert.assertEquals(2.5, IconEntry.getDistance(size1, size2), 0.0);
/* width lower, height upper */
size2 = new Dimension(3, 12);
Assert.assertEquals(3.0, IconEntry.getDistance(size1, size2), 0.0);
/* negative values */
size1 = new Dimension(-5, -8);
size2 = new Dimension(-7, -12);
Assert.assertEquals(3.0, IconEntry.getDistance(size1, size2), 0.0);
/* one null */
size1 = null;
size2 = new Dimension(-7, -12);
Assert.assertEquals(Double.MAX_VALUE, IconEntry.getDistance(size1, size2), 0.0);
}
@Test
public final void testGetClosestSize() throws MalformedURLException {
/* Preferred size in sizes set */
Set<String> rels = new HashSet<>();
rels.add(IconLinkRelations.ICON.getRelValue());
Set<Dimension> sizes = new HashSet<>();
sizes.add(new Dimension(128,128));
sizes.add(new Dimension(256,512));
sizes.add(new Dimension(16,16));
Dimension preferredSize = new Dimension(16, 16);
IconEntry icon = new IconEntry(new DigestURL("http://yacy.net"), rels, sizes);
Dimension result = icon.getClosestSize(preferredSize);
Assert.assertEquals(preferredSize, result);
/* Preferred size lower than all sizes in set */
preferredSize = new Dimension(12, 12);
result = icon.getClosestSize(preferredSize);
Assert.assertEquals(new Dimension(16,16), result);
/* Preferred size over than all sizes in set */
preferredSize = new Dimension(1992, 1024);
result = icon.getClosestSize(preferredSize);
Assert.assertEquals(new Dimension(256, 512), result);
/* Preferred size between sizes in set */
preferredSize = new Dimension(17, 18);
result = icon.getClosestSize(preferredSize);
Assert.assertEquals(new Dimension(16, 16), result);
/* Sizes set contains only one item */
sizes = new HashSet<>();
sizes.add(new Dimension(128,128));
icon = new IconEntry(new DigestURL("http://yacy.net"), rels, sizes);
preferredSize = new Dimension(1992, 1024);
result = icon.getClosestSize(preferredSize);
Assert.assertEquals(new Dimension(128, 128), result);
/* Empty sizes set */
sizes = new HashSet<>();
icon = new IconEntry(new DigestURL("http://yacy.net"), rels, sizes);
preferredSize = new Dimension(16, 16);
result = icon.getClosestSize(preferredSize);
Assert.assertNull(result);
/* Null preferred size */
sizes = new HashSet<>();
sizes.add(new Dimension(128,128));
sizes.add(new Dimension(256,512));
sizes.add(new Dimension(16,16));
icon = new IconEntry(new DigestURL("http://yacy.net"), rels, sizes);
preferredSize = null;
result = icon.getClosestSize(preferredSize);
Assert.assertNull(result);
}
@Test
public final void testSizesToString() throws MalformedURLException {
/* Multiple values in sizes set */
Set<String> rels = new HashSet<>();
rels.add(IconLinkRelations.ICON.getRelValue());
Set<Dimension> sizes = new HashSet<>();
sizes.add(new Dimension(128,128));
sizes.add(new Dimension(256,512));
sizes.add(new Dimension(16,16));
IconEntry icon = new IconEntry(new DigestURL("http://yacy.net"), rels, sizes);
String sizesStr = icon.sizesToString();
/* The set is not ordered, only check result contains what we expect */
Assert.assertTrue(sizesStr.contains("128x128"));
Assert.assertTrue(sizesStr.contains("256x512"));
Assert.assertTrue(sizesStr.contains("16x16"));
Assert.assertTrue(sizesStr.contains(" "));
/* One value in sizes set */
sizes = new HashSet<>();
sizes.add(new Dimension(128,128));
icon = new IconEntry(new DigestURL("http://yacy.net"), rels, sizes);
sizesStr = icon.sizesToString();
Assert.assertEquals("128x128", sizesStr);
/* Empty sizes set */
sizes = new HashSet<>();
icon = new IconEntry(new DigestURL("http://yacy.net"), rels, sizes);
sizesStr = icon.sizesToString();
Assert.assertTrue(sizesStr.isEmpty());
}
@Test
public final void testRelToString() throws MalformedURLException {
/* Multiple values in rel set */
Set<String> rels = new HashSet<>();
rels.add(IconLinkRelations.ICON.getRelValue());
rels.add(IconLinkRelations.APPLE_TOUCH_ICON.getRelValue());
rels.add(IconLinkRelations.MASK_ICON.getRelValue());
Set<Dimension> sizes = new HashSet<>();
sizes.add(new Dimension(128,128));
IconEntry icon = new IconEntry(new DigestURL("http://yacy.net"), rels, sizes);
String relStr = icon.relToString();
/* The set is not ordered, only check result contains what we expect */
Assert.assertTrue(relStr.contains(IconLinkRelations.ICON.getRelValue()));
Assert.assertTrue(relStr.contains(IconLinkRelations.APPLE_TOUCH_ICON.getRelValue()));
Assert.assertTrue(relStr.contains(IconLinkRelations.MASK_ICON.getRelValue()));
Assert.assertTrue(relStr.contains(" "));
/* One value in rel set */
rels = new HashSet<>();
rels.add(IconLinkRelations.ICON.getRelValue());
icon = new IconEntry(new DigestURL("http://yacy.net"), rels, sizes);
relStr = icon.relToString();
Assert.assertEquals(IconLinkRelations.ICON.getRelValue(), relStr);
}
}

@ -0,0 +1,213 @@
/**
* URIMetadataNodeTest
* part of YaCy
* Copyright 2016 by luccioman; https://github.com/luccioman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.data.meta;
import java.awt.Dimension;
import java.net.MalformedURLException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.junit.Assert;
import org.junit.Test;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.document.parser.html.IconEntry;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
/**
* Unit tests for URIMetadataNode class.
*
* @author luc
*
*/
public class URIMetadataNodeTest {
/**
* Three standard icons with different sizes, one non-standard with a larger
* size
*/
@Test
public final void testGetIcons4Items() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode
.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "somehost.org/static/images/icon16.png", "somehost.org/static/images/icon32.png",
"somehost.org/static/images/icon64.png",
"somehost.org/static/images/iconApple128.png" });
List<String> protocols = CollectionConfiguration
.protocolList2indexedList(Arrays.asList(new String[] { "http", "https", "https", "http" }));
metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols);
metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(),
new String[] { "icon", "icon", "icon", "apple-touch-icon" });
metadataNode.setField(CollectionSchema.icons_sizes_sxt.getSolrFieldName(),
new String[] { "16x24", "32x32", "58x64", "128x128" });
Collection<IconEntry> icons = metadataNode.getIcons();
int nb = 0;
/* Check results consistency */
for (IconEntry icon : icons) {
if ("http://somehost.org/static/images/icon16.png".equals(icon.getUrl().toNormalform(false))) {
Assert.assertEquals(1, icon.getSizes().size());
Dimension size = icon.getSizes().iterator().next();
Assert.assertEquals(16, size.width);
Assert.assertEquals(24, size.height);
Assert.assertEquals(1, icon.getRel().size());
Assert.assertEquals("icon", icon.getRel().iterator().next());
nb++;
} else if ("https://somehost.org/static/images/icon32.png".equals(icon.getUrl().toNormalform(false))) {
Assert.assertEquals(1, icon.getSizes().size());
Dimension size = icon.getSizes().iterator().next();
Assert.assertEquals(32, size.width);
Assert.assertEquals(32, size.height);
Assert.assertEquals(1, icon.getRel().size());
Assert.assertEquals("icon", icon.getRel().iterator().next());
nb++;
} else if ("https://somehost.org/static/images/icon64.png".equals(icon.getUrl().toNormalform(false))) {
Assert.assertEquals(1, icon.getSizes().size());
Dimension size = icon.getSizes().iterator().next();
Assert.assertEquals(58, size.width);
Assert.assertEquals(64, size.height);
Assert.assertEquals(1, icon.getRel().size());
Assert.assertEquals("icon", icon.getRel().iterator().next());
nb++;
} else if ("http://somehost.org/static/images/iconApple128.png".equals(icon.getUrl().toNormalform(false))) {
Assert.assertEquals(1, icon.getSizes().size());
Dimension size = icon.getSizes().iterator().next();
Assert.assertEquals(128, size.width);
Assert.assertEquals(128, size.height);
Assert.assertEquals(1, icon.getRel().size());
Assert.assertEquals("apple-touch-icon", icon.getRel().iterator().next());
nb++;
}
}
Assert.assertEquals(4, nb);
}
/**
* Only icons_urlstub_sxt field valued
*/
@Test
public final void testGetIconsOnlyIconsUrlstubSxt() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode
.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "somehost.org/static/images/icon16.png", "somehost.org/static/images/icon32.png",
"somehost.org/static/images/icon64.png",
"somehost.org/static/images/iconApple124.png" });
Collection<IconEntry> icons = metadataNode.getIcons();
Assert.assertEquals(4, icons.size());
}
/**
* Only one standard icon
*/
@Test
public final void testGetIcons1Item() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "somehost.org/static/images/icon16.png" });
List<String> protocols = CollectionConfiguration
.protocolList2indexedList(Arrays.asList(new String[] { "http" }));
metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols);
metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(), new String[] { "icon" });
metadataNode.setField(CollectionSchema.icons_sizes_sxt.getSolrFieldName(), new String[] { "16x16" });
Collection<IconEntry> icons = metadataNode.getIcons();
Assert.assertEquals(1, icons.size());
IconEntry icon = icons.iterator().next();
Assert.assertEquals(1, icon.getSizes().size());
Dimension size = icon.getSizes().iterator().next();
Assert.assertEquals(16.0, size.getWidth(), 0.0);
Assert.assertEquals(16.0, size.getHeight(), 0.0);
}
/**
* No Icon
*/
@Test
public final void testGetIconsNoIcon() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
Collection<IconEntry> icons = metadataNode.getIcons();
Assert.assertEquals(0, icons.size());
}
/**
* Check encoding/decoding consistency
*
* @throws MalformedURLException
*/
@Test
public final void testEncodeDecode() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode
.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "somehost.org/static/images/icon16.png", "somehost.org/static/images/icon32.png",
"somehost.org/static/images/icon64.png",
"somehost.org/static/images/iconApple128.png" });
List<String> protocols = CollectionConfiguration
.protocolList2indexedList(Arrays.asList(new String[] { "http", "https", "https", "http" }));
metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols);
metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(),
new String[] { "icon", "icon", "icon", "apple-touch-icon" });
metadataNode.setField(CollectionSchema.icons_sizes_sxt.getSolrFieldName(),
new String[] { "16x24", "32x32", "58x64", "128x128" });
String encoded = metadataNode.toString();
URIMetadataNode decoded = URIMetadataNode.importEntry(encoded, "dht");
Collection<IconEntry> icons = decoded.getIcons();
/*
* Only icon which is the closest to 16x16 pixels is encoded, and sizes
* and rel attribute are not encoded
*/
Assert.assertEquals(1, icons.size());
IconEntry icon = icons.iterator().next();
Assert.assertEquals(0, icon.getSizes().size());
Assert.assertEquals("http://somehost.org/static/images/icon16.png", icon.getUrl().toNormalform(false));
Assert.assertEquals(1, icon.getRel().size());
Assert.assertEquals("icon", icon.getRel().iterator().next());
}
/**
* Check encoding/decoding consistency when document has no indexed icon
*
* @throws MalformedURLException
*/
@Test
public final void testEncodeDecodeNoIcon() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
String encoded = metadataNode.toString();
URIMetadataNode decoded = URIMetadataNode.importEntry(encoded, "dht");
Collection<IconEntry> icons = decoded.getIcons();
Assert.assertEquals(0, icons.size());
}
}

@ -0,0 +1,192 @@
/**
* yacysearchitemTest
* Copyright 2016 by luccioman; https://github.com/luccioman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.awt.Dimension;
import java.net.MalformedURLException;
import java.util.Arrays;
import java.util.List;
import org.junit.Assert;
import org.junit.Test;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
/**
* Unit tests for yacysearchitem class.
*
* @author luc
*
*/
public class yacysearchitemTest {
/**
* Three standard icons with different sizes, one non-standard with a larger
* size
*
* @throws MalformedURLException
*/
@Test
public final void testGetFaviconURL() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode
.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "someHost.org/static/images/icon16.png", "somehost.org/static/images/icon32.png",
"somehost.org/static/images/icon64.png",
"somehost.org/static/images/iconApple124.png" });
List<String> protocols = CollectionConfiguration
.protocolList2indexedList(Arrays.asList(new String[] { "http", "http", "http", "http" }));
metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols);
metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(),
new String[] { "icon", "icon", "icon", "apple-touch-icon" });
metadataNode.setField(CollectionSchema.icons_sizes_sxt.getSolrFieldName(),
new String[] { "16x16", "32x32", "64x64", "128x128" });
/* Search for a size present in icons collection */
DigestURL faviconURL = yacysearchitem.getFaviconURL(metadataNode, new Dimension(32, 32));
Assert.assertNotNull(faviconURL);
Assert.assertEquals("http://somehost.org/static/images/icon32.png", faviconURL.toNormalform(false));
/* Search for a size not in icons collection */
faviconURL = yacysearchitem.getFaviconURL(metadataNode, new Dimension(40, 40));
Assert.assertNotNull(faviconURL);
Assert.assertEquals("http://somehost.org/static/images/icon32.png", faviconURL.toNormalform(false));
/*
* Search for a size equals to non-standard : standard icon is stil
* preffered
*/
faviconURL = yacysearchitem.getFaviconURL(metadataNode, new Dimension(128, 128));
Assert.assertNotNull(faviconURL);
Assert.assertEquals("http://somehost.org/static/images/icon64.png", faviconURL.toNormalform(false));
}
/**
* Only non-standard icons
*
* @throws MalformedURLException
*/
@Test
public final void testGetFaviconURLNonStandard() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode
.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "somehost.org/static/images/mask32.png",
"somehost.org/static/images/fluid.64.png",
"somehost.org/static/images/iconApple124.png" });
List<String> protocols = CollectionConfiguration
.protocolList2indexedList(Arrays.asList(new String[] { "http", "http", "http" }));
metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols);
metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(),
new String[] { "mask-icon", "fluid-icon", "apple-touch-icon" });
metadataNode.setField(CollectionSchema.icons_sizes_sxt.getSolrFieldName(),
new String[] { "32x32", "64x64", "128x128" });
/* Non standard icon is returned as fallback */
DigestURL faviconURL = yacysearchitem.getFaviconURL(metadataNode, new Dimension(32, 32));
Assert.assertNotNull(faviconURL);
Assert.assertEquals("http://somehost.org/static/images/mask32.png", faviconURL.toNormalform(false));
}
/**
* One standard icon with multiple sizes
*
* @throws MalformedURLException
*/
@Test
public final void testGetFaviconURLMultiSizes() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "somehost.org/static/images/favicon.ico" });
List<String> protocols = CollectionConfiguration
.protocolList2indexedList(Arrays.asList(new String[] { "http" }));
metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols);
metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(), new String[] { "icon" });
metadataNode.setField(CollectionSchema.icons_sizes_sxt.getSolrFieldName(),
new String[] { "16x16 32x32 64x64", });
/* Search for a size in sizes set */
DigestURL faviconURL = yacysearchitem.getFaviconURL(metadataNode, new Dimension(32, 32));
Assert.assertNotNull(faviconURL);
Assert.assertEquals("http://somehost.org/static/images/favicon.ico", faviconURL.toNormalform(false));
/* Search for a size not in sizes set */
faviconURL = yacysearchitem.getFaviconURL(metadataNode, new Dimension(40, 40));
Assert.assertNotNull(faviconURL);
Assert.assertEquals("http://somehost.org/static/images/favicon.ico", faviconURL.toNormalform(false));
}
/**
* One standard icon with no size
*
* @throws MalformedURLException
*/
@Test
public final void testGetFaviconURLNoSize() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "somehost.org/static/images/favicon.ico" });
List<String> protocols = CollectionConfiguration
.protocolList2indexedList(Arrays.asList(new String[] { "http" }));
metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols);
metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(), new String[] { "icon" });
DigestURL faviconURL = yacysearchitem.getFaviconURL(metadataNode, new Dimension(32, 32));
Assert.assertNotNull(faviconURL);
Assert.assertEquals("http://somehost.org/static/images/favicon.ico", faviconURL.toNormalform(false));
}
/**
* One non-standard icon with no size
*
* @throws MalformedURLException
*/
@Test
public final void testGetFaviconURLNonStandardNoSize() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "somehost.org/static/images/favicon.png" });
List<String> protocols = CollectionConfiguration
.protocolList2indexedList(Arrays.asList(new String[] { "http" }));
metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols);
metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(), new String[] { "appel-touch-icon" });
DigestURL faviconURL = yacysearchitem.getFaviconURL(metadataNode, new Dimension(32, 32));
Assert.assertNotNull(faviconURL);
Assert.assertEquals("http://somehost.org/static/images/favicon.png", faviconURL.toNormalform(false));
}
/**
* No icon in document
*
* @throws MalformedURLException
*/
@Test
public final void testGetFaviconURLNoIcon() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://someHost.org"));
/* Default fallback favicon URL should be generated */
DigestURL faviconURL = yacysearchitem.getFaviconURL(metadataNode, new Dimension(32, 32));
Assert.assertEquals("http://somehost.org/favicon.ico", faviconURL.toNormalform(false));
}
}

@ -0,0 +1,96 @@
// URLLicense.java
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.data;
import java.net.MalformedURLException;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.ConcurrentLog;
/**
* Test URLLicence reliability when used by concurrent threads
*
* @author luc
*
*/
public class URLLicenseConcurrentTest {
/**
* Thread emulating a client who tries to fetch some url content.
* @author luc
*
*/
private static class ClientThread extends Thread {
private String testURL = "http://yacy.net";
private int steps = 100000;
@Override
public void run() {
System.out.println(this.getName() + " started...");
DigestURL url = null;
try {
url = new DigestURL(this.testURL);
} catch (MalformedURLException e1) {
e1.printStackTrace();
}
String normalizedURL = url.toNormalform(true);
for (int step = 0; step < this.steps; step++) {
String license = URLLicense.aquireLicense(url);
// You can eventually call here Thread.sleep()
String retrievedURL = URLLicense.releaseLicense(license);
if (!normalizedURL.equals(retrievedURL)) {
System.err.println("Licence lost! license : " + license + ", step : " + step + ", Thread : " + this.getName());
}
}
System.out.println(this.getName() + " finished!");
}
}
/**
* Runs clients concurrently : until the end, no error message should be displayed in console.
* @param args
*/
public static void main(String args[]) {
long beginTime = System.nanoTime();
try {
ClientThread[] threads = new ClientThread[10];
for (int i = 0; i < threads.length; i++) {
threads[i] = new URLLicenseConcurrentTest.ClientThread();
threads[i].setName("ClientThread" + i);
threads[i].start();
}
for (int i = 0; i < threads.length; i++) {
try {
threads[i].join();
} catch (InterruptedException e) {
}
}
} finally {
long time = System.nanoTime() - beginTime;
System.out.println("Test run in " + time / 1000000 + "ms");
ConcurrentLog.shutdown();
}
}
}
Loading…
Cancel
Save