diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 5e300680a..c1e163c93 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -201,7 +201,7 @@ public class yacysearchitem { prop.putHTML("content_sizename", sizename(result.filesize())); prop.putHTML("content_showSize_sizename", sizename(result.filesize())); prop.putHTML("content_host", resultURL.getHost() == null ? "" : resultURL.getHost()); - prop.putHTML("content_file", resultURL.getFile()); + prop.putHTML("content_file", resultURL.getFileName()); prop.putHTML("content_path", resultURL.getPath()); prop.put("content_nl", (item == theQuery.offset) ? 0 : 1); prop.putHTML("content_publisher", result.publisher()); diff --git a/source/de/anomic/crawler/ResultImages.java b/source/de/anomic/crawler/ResultImages.java index 28c16ce8a..e7dd3f0ec 100644 --- a/source/de/anomic/crawler/ResultImages.java +++ b/source/de/anomic/crawler/ResultImages.java @@ -69,13 +69,12 @@ public class ResultImages { if (doubleCheck.contains(url)) continue; doubleCheck.add(url); - final String name = image.url().getFile(); boolean good = false; if (image.width() > 120 && image.height() > 100 && image.width() < 1200 && image.height() < 1000 && - name.lastIndexOf(".gif") == -1) { + !"gif".equals(image.url().getFileExtension())) { // && ((urlString.lastIndexOf(".jpg") != -1)) || // ((urlString.lastIndexOf(".png") != -1)){ diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java index c735b76c6..71f3bf371 100644 --- a/source/net/yacy/cora/document/MultiProtocolURI.java +++ b/source/net/yacy/cora/document/MultiProtocolURI.java @@ -632,16 +632,29 @@ public class MultiProtocolURI implements Serializable, Comparable fields = doc.getFields(); int fieldc = fields.size(); List texts = new ArrayList(); + MultiProtocolURI url = null; String description = "", title = ""; StringBuilder path = new StringBuilder(80); for (int j = 0; j < fieldc; j++) { Fieldable value = fields.get(j); String fieldName = value.name(); - if (YaCySchema.title.name().equals(fieldName)) { - title = value.stringValue(); - texts.add(title); + + // apply generic matching rule + String stag = field2tag.get(fieldName); + if (stag != null) { + solitaireTag(writer, stag, value.stringValue()); continue; } + + // some special handling here if (YaCySchema.sku.name().equals(fieldName)) { - solitaireTag(writer, "link", value.stringValue()); + String u = value.stringValue(); + try { + url = new MultiProtocolURI(u); + solitaireTag(writer, "link", u); + solitaireTag(writer, "file", url.getFileName()); + } catch (MalformedURLException e) {} + continue; + } + if (YaCySchema.title.name().equals(fieldName)) { + title = value.stringValue(); + texts.add(title); continue; } if (YaCySchema.description.name().equals(fieldName)) { @@ -133,18 +159,10 @@ public class JsonResponseWriter implements QueryResponseWriter { solitaireTag(writer, "guid", urlhash); continue; } - if (YaCySchema.host_s.name().equals(fieldName)) { - solitaireTag(writer, "host", value.stringValue()); - continue; - } if (YaCySchema.url_paths_sxt.name().equals(fieldName)) { path.append('/').append(value.stringValue()); continue; } - if (YaCySchema.url_file_ext_s.name().equals(fieldName)) { - solitaireTag(writer, "ext", value.stringValue()); - continue; - } if (YaCySchema.last_modified.name().equals(fieldName)) { Date d = new Date(Long.parseLong(value.stringValue())); solitaireTag(writer, "pubDate", HeaderFramework.formatRFC1123(d)); @@ -169,9 +187,11 @@ public class JsonResponseWriter implements QueryResponseWriter { texts.add(value.stringValue()); continue; } - } - // compute snippet from texts + //missing: "code","faviconCode" + } + + // compute snippet from texts solitaireTag(writer, "path", path.toString()); solitaireTag(writer, "title", title.length() == 0 ? (texts.size() == 0 ? path.toString() : texts.get(0)) : title); List snippet = urlhash == null ? null : snippets.get(urlhash); diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index b11ef58ec..c253dc483 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -447,7 +447,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { final String href = tagopts.getProperty("href", EMPTY_STRING); MultiProtocolURI url; if ((href.length() > 0) && ((url = absolutePath(href)) != null)) { - final String f = url.getFile(); + final String f = url.getFileName(); final int p = f.lastIndexOf('.'); final String type = (p < 0) ? EMPTY_STRING : f.substring(p + 1); if (type.equals("png") || type.equals("gif") || type.equals("jpg") || type.equals("jpeg") || type.equals("tiff") || type.equals("tif")) { diff --git a/source/net/yacy/search/snippet/ResultEntry.java b/source/net/yacy/search/snippet/ResultEntry.java index 63bf30743..9ec0614b8 100644 --- a/source/net/yacy/search/snippet/ResultEntry.java +++ b/source/net/yacy/search/snippet/ResultEntry.java @@ -82,7 +82,7 @@ public class ResultEntry implements Comparable, Comparator, Comparator, Comparator 0) this.alternative_urlname = this.alternative_urlname.substring(0, p); } }