From 24d2ee3c52615b7de0af067475a7e7dbca20e9e3 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 26 Sep 2012 18:36:32 +0200 Subject: [PATCH] - better date ranking - more protection against NPE and time travel effects --- htroot/yacysearch.java | 2 + source/net/yacy/document/content/DCEntry.java | 4 +- .../kelondro/data/meta/URIMetadataNode.java | 2 +- source/net/yacy/peers/Protocol.java | 13 ++---- .../yacy/search/index/SolrConfiguration.java | 8 ++-- .../yacy/search/ranking/RankingProfile.java | 40 +++++++++++++++++++ 6 files changed, 55 insertions(+), 14 deletions(-) diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index a73a0e4c0..377fc3bdf 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -364,11 +364,13 @@ public class yacysearch { if ( querystring.indexOf("/near", 0) >= 0 ) { querystring = querystring.replace("/near", ""); + ranking.allZero(); // switch off all attributes ranking.coeff_worddistance = RankingProfile.COEFF_MAX; modifier.append("/near "); } if ( querystring.indexOf("/date", 0) >= 0 ) { querystring = querystring.replace("/date", ""); + ranking.allZero(); // switch off all attributes ranking.coeff_date = RankingProfile.COEFF_MAX; modifier.append("/date "); } diff --git a/source/net/yacy/document/content/DCEntry.java b/source/net/yacy/document/content/DCEntry.java index 0c12f8665..73dfd3d5b 100644 --- a/source/net/yacy/document/content/DCEntry.java +++ b/source/net/yacy/document/content/DCEntry.java @@ -102,7 +102,9 @@ public class DCEntry extends TreeMap { if (d == null) return null; if (d.isEmpty()) return null; try { - return ISO8601Formatter.FORMATTER.parse(d); + Date x = ISO8601Formatter.FORMATTER.parse(d); + Date now = new Date(); + return x.after(now) ? now : x; } catch (ParseException e) { Log.logException(e); return new Date(); diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index 852f90f07..01eb3fc18 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -130,7 +130,7 @@ public class URIMetadataNode implements URIMetadata { Date x = (Date) this.doc.getFieldValue(field.name()); if (x == null) return new Date(0); Date now = new Date(); - return (x.after(now)) ? now : x; + return x.after(now) ? now : x; } private String getString(YaCySchema field) { diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 111e8a57b..529ed6459 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -1063,12 +1063,8 @@ public final class Protocol final List> container = new ArrayList>(wordhashes.size()); for (byte[] hash: wordhashes) { try { - container.add(ReferenceContainer.emptyContainer( - Segment.wordReferenceFactory, - hash, - count)); - } catch (SpaceExceededException e) { - } // throws SpaceExceededException + container.add(ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, hash, count)); + } catch (SpaceExceededException e) {} // throws SpaceExceededException } int term = count; @@ -1090,11 +1086,10 @@ public final class Protocol Network.log.logInfo("remote search (solr): filtered blacklisted url " + urlEntry.url() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))); } } - continue; // block with backlist + continue; // block with blacklist } - final String urlRejectReason = - Switchboard.getSwitchboard().crawlStacker.urlInAcceptedDomain(urlEntry.url()); + final String urlRejectReason = Switchboard.getSwitchboard().crawlStacker.urlInAcceptedDomain(urlEntry.url()); if ( urlRejectReason != null ) { if ( Network.log.isInfo() ) { if (localsearch) { diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 568fb56a7..fd8ded0ac 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -175,12 +175,14 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable public Date getDate(SolrInputDocument doc, final YaCySchema key) { Date x = (Date) doc.getFieldValue(key.name()); - return (x == null) ? new Date(0) : x; + Date now = new Date(); + return (x == null) ? new Date(0) : x.after(now) ? now : x; } public Date getDate(SolrDocument doc, final YaCySchema key) { - Date x = (Date) doc.getFieldValue(key.name()); - return (x == null) ? new Date(0) : x; + Date x = doc == null ? null : (Date) doc.getFieldValue(key.name()); + Date now = new Date(); + return (x == null) ? new Date(0) : x.after(now) ? now : x; } /** diff --git a/source/net/yacy/search/ranking/RankingProfile.java b/source/net/yacy/search/ranking/RankingProfile.java index 42700c6dd..2e21fbdb9 100644 --- a/source/net/yacy/search/ranking/RankingProfile.java +++ b/source/net/yacy/search/ranking/RankingProfile.java @@ -195,6 +195,46 @@ public class RankingProfile { return (coeff.get(attr)).intValue(); } + /** + * set all ranking attributes to zero + * This is usually used when a specific value is set to maximum + */ + public void allZero() { + this.coeff_domlength = 0; + this.coeff_ybr = 0; + this.coeff_date = 0; + this.coeff_wordsintitle = 0; + this.coeff_wordsintext = 0; + this.coeff_phrasesintext = 0; + this.coeff_llocal = 0; + this.coeff_lother = 0; + this.coeff_urllength = 0; + this.coeff_urlcomps = 0; + this.coeff_hitcount = 0; + this.coeff_posintext = 0; + this.coeff_posofphrase = 0; + this.coeff_posinphrase = 0; + this.coeff_authority = 0; + this.coeff_worddistance = 0; + this.coeff_appurl = 0; + this.coeff_app_dc_title = 0; + this.coeff_app_dc_creator = 0; + this.coeff_app_dc_subject = 0; + this.coeff_app_dc_description = 0; + this.coeff_appemph = 0; + this.coeff_catindexof = 0; + this.coeff_cathasimage = 0; + this.coeff_cathasaudio = 0; + this.coeff_cathasvideo = 0; + this.coeff_cathasapp = 0; + this.coeff_termfrequency = 0; + this.coeff_urlcompintoplist = 0; + this.coeff_descrcompintoplist = 0; + this.coeff_prefer = 0; + this.coeff_language = 0; + this.coeff_citation = 0; + } + private String externalStringCache = null; public String toExternalString() { if (this.externalStringCache != null) return this.externalStringCache;