From ca313e404f1ef0930d9c0bb98f40404aa039a2de Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 26 Sep 2012 16:56:33 +0200 Subject: [PATCH] - if a "/date" modifier is used, the solr remote query applies an ordering by date (ascending) - added also some 'anti-timetravel' protection (check if date is in the future within any metadata date field) --- .../yacy/cora/protocol/ResponseHeader.java | 6 ++-- .../kelondro/data/meta/URIMetadataNode.java | 3 +- .../kelondro/data/meta/URIMetadataRow.java | 4 ++- source/net/yacy/search/index/Fulltext.java | 8 ++++- source/net/yacy/search/index/Segment.java | 2 +- .../yacy/search/index/SolrConfiguration.java | 4 ++- source/net/yacy/search/query/QueryParams.java | 36 ++++++++++++++----- 7 files changed, 48 insertions(+), 15 deletions(-) diff --git a/source/net/yacy/cora/protocol/ResponseHeader.java b/source/net/yacy/cora/protocol/ResponseHeader.java index e4185ad4a..ada7d2270 100644 --- a/source/net/yacy/cora/protocol/ResponseHeader.java +++ b/source/net/yacy/cora/protocol/ResponseHeader.java @@ -73,7 +73,8 @@ public class ResponseHeader extends HeaderFramework { public Date date() { final Date d = headerDate(HeaderFramework.DATE); - return (d == null) ? new Date() : d; + final Date now = new Date(); + return (d == null) ? now : d.after(now) ? now : d; } public Date expires() { @@ -82,7 +83,8 @@ public class ResponseHeader extends HeaderFramework { public Date lastModified() { final Date d = headerDate(LAST_MODIFIED); - return (d == null) ? date() : d; + final Date now = new Date(); + return (d == null) ? date() : d.after(now) ? now : d; } public long age() { diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index d4b0dc983..852f90f07 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -129,7 +129,8 @@ public class URIMetadataNode implements URIMetadata { assert field.getType() == SolrType.date; Date x = (Date) this.doc.getFieldValue(field.name()); if (x == null) return new Date(0); - return x; + Date now = new Date(); + return (x.after(now)) ? now : x; } private String getString(YaCySchema field) { diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index 54612d335..f95310993 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -174,7 +174,9 @@ public class URIMetadataRow implements URIMetadata { private void encodeDate(final int col, final Date d) { // calculates the number of days since 1.1.1970 and returns this as 4-byte array // 86400000 is the number of milliseconds in one day - this.entry.setCol(col, NaturalOrder.encodeLong(d.getTime() / 86400000L, 4)); + long time = d.getTime(); + long now = System.currentTimeMillis(); + this.entry.setCol(col, NaturalOrder.encodeLong((time > now ? now : time) / 86400000L, 4)); } private Date decodeDate(final int col) { diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 166f3a016..5e4fb1a25 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -28,6 +28,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -279,7 +280,12 @@ public final class Fulltext implements Iterable { try { if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); SolrDocument sd = this.solr.get(id); - if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) { + Date now = new Date(); + Date sdDate = this.solrScheme.getDate(sd, YaCySchema.last_modified); + if (sdDate.after(now)) sdDate = now; + Date docDate = this.solrScheme.getDate(doc, YaCySchema.last_modified); + if (docDate.after(now)) docDate = now; + if (sd == null || sdDate.before(docDate)) { if (this.solrScheme.contains(YaCySchema.ip_s)) { // ip_s needs a dns lookup which causes blockings during search here this.solr.add(doc); diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 7723c57d5..756b3c23b 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -198,7 +198,7 @@ public class Segment { } public int getQueryCount(String word) { - if (word == null || word.indexOf(':') >= 0 || word.indexOf(' ') >= 0) return 0; + if (word == null || word.indexOf(':') >= 0 || word.indexOf(' ') >= 0 || word.indexOf('/') >= 0) return 0; int count = this.termIndex == null ? 0 : this.termIndex.count(Word.word2hash(word)); try {count += this.fulltext.getSolr().getQueryCount(YaCySchema.text_t.name() + ':' + word);} catch (IOException e) {} return count; diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 8331b7a09..568fb56a7 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -835,7 +835,9 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable } public Date solrGetDate(final SolrDocument solr) { - return (Date) solr.getFieldValue(YaCySchema.last_modified.getSolrFieldName()); + Date date = (Date) solr.getFieldValue(YaCySchema.last_modified.getSolrFieldName()); + Date now = new Date(); + return date.after(now) ? now : date; } public Collection solrGetKeywords(final SolrDocument solr) { diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index c62f8e096..020f316e9 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -39,6 +39,8 @@ import java.util.SortedSet; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.apache.solr.common.params.CommonParams; + import net.yacy.cora.document.ASCII; import net.yacy.cora.document.Classification; import net.yacy.cora.document.Classification.ContentDomain; @@ -481,6 +483,18 @@ public final class QueryParams { boosts.put(YaCySchema.keywords, 2.0f); boosts.put(YaCySchema.text_t, 1.0f); } + + /* + public static final String QT ="qt"; + public static final String WT ="wt"; + public static final String Q ="q"; + public static final String START ="start"; + public static final String ROWS ="rows"; + public static final String XSL ="xsl"; + public static final String VERSION ="version"; + public static final String FL = "fl"; + public static final String DF = "df"; + */ public String solrQueryString() { if (this.solrQueryString != null) return this.solrQueryString; @@ -533,18 +547,24 @@ public final class QueryParams { if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) { // localtion search, no special ranking - q.append("&fq={!bbox sfield=").append(YaCySchema.coordinate_p.name()).append("}&pt="); + q.append('&').append(CommonParams.FQ).append("={!bbox sfield=").append(YaCySchema.coordinate_p.name()).append("}&pt="); q.append(Double.toString(this.lat)).append(',').append(Double.toString(this.lon)).append("&d=").append(GeoLocation.degreeToKm(this.radius)); } else { - // boost fields - q.append("&defType=edismax&qf="); - int c = 0; - for (Map.Entry boost: boosts.entrySet()) { - if (c++ > 0) q.append(','); - q.append(boost.getKey().name()).append('^').append(boost.getValue().toString()); + // set ranking + if (this.ranking.coeff_date == RankingProfile.COEFF_MAX) { + // set a most-recent ordering + q.append('&').append(CommonParams.SORT).append('=').append(YaCySchema.last_modified.name()).append(" desc"); + } else { + // boost fields + q.append("&defType=edismax&qf="); + int c = 0; + for (Map.Entry boost: boosts.entrySet()) { + if (c++ > 0) q.append(','); + q.append(boost.getKey().name()).append('^').append(boost.getValue().toString()); + } } } - + // prepare result this.solrQueryString = q.toString(); Log.logInfo("Protocol", "SOLR QUERY: " + this.solrQueryString);