Added favicon url transmission in RWI chunks.

pull/39/head
luc 9 years ago
parent 480772c070
commit 07222b3e1a

@ -356,49 +356,21 @@ public class yacysearchitem {
* We look preferably for a standard icon with preferred size, but
* accept as a fallback other icons below 128x128 or with no known size
*/
IconEntry faviconEntry = null;
boolean foundStandard = false;
double closestDistance = Double.MAX_VALUE;
for (IconEntry icon : result.getIcons()) {
boolean isStandard = icon.isStandardIcon();
double distance = IconEntry.getDistance(icon.getClosestSize(preferredSize), preferredSize);
boolean match = false;
if (foundStandard) {
/*
* Already found a standard icon : now must find a standard icon
* with closer size
*/
match = isStandard && distance < closestDistance;
} else {
/*
* No standard icon yet found : prefer a standard icon, or check
* size
*/
match = isStandard || distance < closestDistance;
}
if (match) {
faviconEntry = icon;
closestDistance = distance;
foundStandard = isStandard;
if (isStandard && distance == 0.0) {
break;
}
}
}
IconEntry faviconEntry = result.getFavicon(preferredSize);
DigestURL faviconURL;
try {
if (faviconEntry == null) {
if (faviconEntry == null) {
try {
String defaultFaviconURL = result.url().getProtocol() + "://" + result.url().getHost()
+ ((result.url().getPort() != -1) ? (":" + result.url().getPort()) : "") + "/favicon.ico";
faviconURL = new DigestURL(defaultFaviconURL);
} else {
faviconURL = faviconEntry.getUrl();
} catch (final MalformedURLException e1) {
ConcurrentLog.logException(e1);
faviconURL = null;
}
} catch (final MalformedURLException e1) {
ConcurrentLog.logException(e1);
faviconURL = null;
} else {
faviconURL = faviconEntry.getUrl();
}
return faviconURL;
}

@ -57,6 +57,7 @@ import net.yacy.document.Tokenizer;
import net.yacy.document.parser.pdfParser;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.IconEntry;
import net.yacy.document.parser.html.IconLinkRelations;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.data.word.WordReferenceVars;
@ -96,6 +97,12 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
private String alternative_urlname;
private TextSnippet textSnippet = null;
/**
* Creates an instance from encoded properties.
* @param prop encoded properties
* @param collection collection origin (e.g. "dht")
* @throws MalformedURLException
*/
public URIMetadataNode(final Properties prop, String collection) throws MalformedURLException {
// generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString
@ -165,7 +172,13 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
if (prop.containsKey("wi")) {
this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))), false);
}
if (prop.containsKey("favicon")) {
final String rawFaviconURL = crypt.simpleDecode(prop.getProperty("favicon", ""));
DigestURL faviconURL = new DigestURL(rawFaviconURL);
this.setIconsFields(faviconURL);
}
}
public URIMetadataNode(final SolrDocument doc) throws MalformedURLException {
super();
@ -569,7 +582,73 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
}
return icons;
}
/**
* Try to extract icon entry with preferred size from this solr document.
* We look preferably for a standard icon but accept as a fallback other icons.
* @param preferredSize preferred size
* @return icon entry or null
*/
public IconEntry getFavicon(Dimension preferredSize) {
IconEntry faviconEntry = null;
boolean foundStandard = false;
double closestDistance = Double.MAX_VALUE;
for (IconEntry icon : this.getIcons()) {
boolean isStandard = icon.isStandardIcon();
double distance = IconEntry.getDistance(icon.getClosestSize(preferredSize), preferredSize);
boolean match = false;
if (foundStandard) {
/*
* Already found a standard icon : now must find a standard icon
* with closer size
*/
match = isStandard && distance < closestDistance;
} else {
/*
* No standard icon yet found : prefer a standard icon, or check
* size
*/
match = isStandard || distance < closestDistance;
}
if (match) {
faviconEntry = icon;
closestDistance = distance;
foundStandard = isStandard;
if (isStandard && distance == 0.0) {
break;
}
}
}
return faviconEntry;
}
/**
* Use iconURL to set icons related field on this solr document.
*
* @param iconURL icon URL
*/
private void setIconsFields(DigestURL iconURL) {
final List<String> protocols = new ArrayList<String>(1);
final List<String> sizes = new ArrayList<String>(1);
final List<String> stubs = new ArrayList<String>(1);
final List<String> rels = new ArrayList<String>(1);
if (iconURL != null) {
String protocol = iconURL.getProtocol();
protocols.add(protocol);
sizes.add("");
stubs.add(iconURL.toString().substring(protocol.length() + 3));
rels.add(IconLinkRelations.ICON.getRelValue());
}
this.setField(CollectionSchema.icons_protocol_sxt.name(), protocols);
this.setField(CollectionSchema.icons_urlstub_sxt.name(), stubs);
this.setField(CollectionSchema.icons_rel_sxt.name(), rels);
this.setField(CollectionSchema.icons_sizes_sxt.name(), sizes);
}
/**
* @param name field name
* @return field values from field name eventually immutable empty list when field has no values or is not a List
@ -673,6 +752,13 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
final String wprop = this.word().toPropertyForm();
s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
}
/* Add favicon URL with preferred size being 16x16 pixels if known */
if(!this.getIcons().isEmpty()) {
IconEntry faviconEntry = this.getFavicon(new Dimension(16, 16));
if(faviconEntry != null) {
s.append(",favicon=").append(crypt.simpleEncode(faviconEntry.getUrl().toNormalform(false)));
}
}
return s;
} catch (final Throwable e) {
ConcurrentLog.logException(e);

@ -1633,6 +1633,8 @@ public final class SearchEvent {
// boolean fakeImageHost = ms.url().getHost() != null && ms.url().getHost().indexOf("wikipedia") > 0; // pages with image extension from wikipedia do not contain image files but html files... I know this is a bad hack, but many results come from wikipedia and we must handle that
// generalize above hack (regarding url with file extension but beeing a html (with html mime)
if (doc.doctype() == Response.DT_IMAGE) {
/* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents,
* or documents coming from previous versions peers */
if (!doc.url().getFileName().endsWith(".ico")) { // we don't want favicons
final String id = ASCII.String(doc.hash());
// check image size
@ -1657,6 +1659,8 @@ public final class SearchEvent {
List<Object> width = widthO == null ? null : (List<Object>) widthO;
for (int c = 0; c < img.size(); c++) {
String image_urlstub = (String) img.get(c);
/* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents,
* or documents coming from previous versions peers */
if (image_urlstub.endsWith(".ico")) continue; // we don't want favicons, makes the result look idiotic
try {
int h = height == null ? 0 : (Integer) height.get(c);

@ -65,8 +65,8 @@ public class URIMetadataNodeTest {
Collection<IconEntry> icons = metadataNode.getIcons();
int nb = 0;
/* Check results consistency */
for(IconEntry icon : icons) {
if("http://somehost.org/static/images/icon16.png".equals(icon.getUrl().toNormalform(false))) {
for (IconEntry icon : icons) {
if ("http://somehost.org/static/images/icon16.png".equals(icon.getUrl().toNormalform(false))) {
Assert.assertEquals(1, icon.getSizes().size());
Dimension size = icon.getSizes().iterator().next();
Assert.assertEquals(16, size.width);
@ -74,7 +74,7 @@ public class URIMetadataNodeTest {
Assert.assertEquals(1, icon.getRel().size());
Assert.assertEquals("icon", icon.getRel().iterator().next());
nb++;
} else if("https://somehost.org/static/images/icon32.png".equals(icon.getUrl().toNormalform(false))) {
} else if ("https://somehost.org/static/images/icon32.png".equals(icon.getUrl().toNormalform(false))) {
Assert.assertEquals(1, icon.getSizes().size());
Dimension size = icon.getSizes().iterator().next();
Assert.assertEquals(32, size.width);
@ -82,7 +82,7 @@ public class URIMetadataNodeTest {
Assert.assertEquals(1, icon.getRel().size());
Assert.assertEquals("icon", icon.getRel().iterator().next());
nb++;
} else if("https://somehost.org/static/images/icon64.png".equals(icon.getUrl().toNormalform(false))) {
} else if ("https://somehost.org/static/images/icon64.png".equals(icon.getUrl().toNormalform(false))) {
Assert.assertEquals(1, icon.getSizes().size());
Dimension size = icon.getSizes().iterator().next();
Assert.assertEquals(58, size.width);
@ -90,7 +90,7 @@ public class URIMetadataNodeTest {
Assert.assertEquals(1, icon.getRel().size());
Assert.assertEquals("icon", icon.getRel().iterator().next());
nb++;
} else if("http://somehost.org/static/images/iconApple128.png".equals(icon.getUrl().toNormalform(false))) {
} else if ("http://somehost.org/static/images/iconApple128.png".equals(icon.getUrl().toNormalform(false))) {
Assert.assertEquals(1, icon.getSizes().size());
Dimension size = icon.getSizes().iterator().next();
Assert.assertEquals(128, size.width);
@ -154,4 +154,60 @@ public class URIMetadataNodeTest {
Assert.assertEquals(0, icons.size());
}
/**
* Check encoding/decoding consistency
*
* @throws MalformedURLException
*/
@Test
public final void testEncodeDecode() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
metadataNode
.setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(),
new String[] { "somehost.org/static/images/icon16.png", "somehost.org/static/images/icon32.png",
"somehost.org/static/images/icon64.png",
"somehost.org/static/images/iconApple128.png" });
List<String> protocols = CollectionConfiguration
.protocolList2indexedList(Arrays.asList(new String[] { "http", "https", "https", "http" }));
metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols);
metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(),
new String[] { "icon", "icon", "icon", "apple-touch-icon" });
metadataNode.setField(CollectionSchema.icons_sizes_sxt.getSolrFieldName(),
new String[] { "16x24", "32x32", "58x64", "128x128" });
String encoded = metadataNode.toString();
URIMetadataNode decoded = URIMetadataNode.importEntry(encoded, "dht");
Collection<IconEntry> icons = decoded.getIcons();
/*
* Only icon which is the closest to 16x16 pixels is encoded, and sizes
* and rel attribute are not encoded
*/
Assert.assertEquals(1, icons.size());
IconEntry icon = icons.iterator().next();
Assert.assertEquals(0, icon.getSizes().size());
Assert.assertEquals("http://somehost.org/static/images/icon16.png", icon.getUrl().toNormalform(false));
Assert.assertEquals(1, icon.getRel().size());
Assert.assertEquals("icon", icon.getRel().iterator().next());
}
/**
* Check encoding/decoding consistency when document has no indexed icon
*
* @throws MalformedURLException
*/
@Test
public final void testEncodeDecodeNoIcon() throws MalformedURLException {
URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org"));
String encoded = metadataNode.toString();
URIMetadataNode decoded = URIMetadataNode.importEntry(encoded, "dht");
Collection<IconEntry> icons = decoded.getIcons();
Assert.assertEquals(0, icons.size());
}
}

Loading…
Cancel
Save