diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list index 757605930..4eba5aa6d 100644 --- a/defaults/solr.keys.list +++ b/defaults/solr.keys.list @@ -80,6 +80,9 @@ lon_coordinate ## longitude of location as declared in WSG84, tdouble lat_coordinate +## point in degrees of latitude,longitude as declared in WSG84, location +coordinate_p + ## ip of host of url (after DNS lookup), string ip_s diff --git a/source/net/yacy/cora/services/federated/solr/SolrType.java b/source/net/yacy/cora/services/federated/solr/SolrType.java index 36320494e..773eff247 100644 --- a/source/net/yacy/cora/services/federated/solr/SolrType.java +++ b/source/net/yacy/cora/services/federated/solr/SolrType.java @@ -29,6 +29,7 @@ public enum SolrType { string, text_general, text_en_splitting_tight, + location, date, integer("int"), bool("boolean"), diff --git a/source/net/yacy/document/geolocation/GeoLocation.java b/source/net/yacy/document/geolocation/GeoLocation.java index c639a7d2a..3940b5d90 100644 --- a/source/net/yacy/document/geolocation/GeoLocation.java +++ b/source/net/yacy/document/geolocation/GeoLocation.java @@ -92,4 +92,8 @@ public class GeoLocation extends IntegerGeoPoint implements Comparable 0) { + this.lat = Double.parseDouble(latlon.substring(0, p)); + this.lon = Double.parseDouble(latlon.substring(p + 1)); + } + } this.flags = new Bitfield(); if (this.keywords != null && this.keywords.indexOf("indexof") >= 0) this.flags.set(Condenser.flag_cat_indexof, true); if (this.lon != 0.0d || this.lat != 0.0d) this.flags.set(Condenser.flag_cat_haslocation, true); diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index d764185d8..4f37c4b01 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -1019,18 +1019,21 @@ public final class Protocol public static int solrQuery( final SearchEvent event, - final HandleSet wordhashes, final int offset, final int count, final long time, final Seed target, final Blacklist blacklist) { + + final HandleSet wordhashes = event.getQuery().query_include_hashes; + if (event.getQuery().queryString == null || event.getQuery().queryString.length() == 0) { return -1; // we cannot query solr only with word hashes, there is no clear text string } event.rankingProcess.addExpectedRemoteReferences(count); SolrDocumentList docList = null; - final String solrQuerystring = "{!lucene q.op=AND df=text_t}" + event.getQuery().solrQueryString(false); + final String solrQuerystring = event.getQuery().solrQueryString(false); + Log.logInfo("Protocol", "SOLR QUERY: " + solrQuerystring); boolean localsearch = target == null || target.equals(event.peers.mySeed()); if (localsearch) { // search the local index diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 8f0b452ce..c17076772 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -169,7 +169,7 @@ public class RemoteSearch extends Thread { for (Seed s: targetPeers) omit.add(s); Seed[] nodes = PeerSelection.selectNodeSearchTargets(event.peers, 20, omit); for (Seed s: nodes) { - solrRemoteSearch(event, count, event.getQuery().query_include_hashes, time, s, blacklist); + solrRemoteSearch(event, count, time, s, blacklist); } // start search to YaCy peers @@ -258,11 +258,11 @@ public class RemoteSearch extends Thread { public static Thread solrRemoteSearch( final SearchEvent event, final int count, - final HandleSet wordhashes, final long time, final Seed targetPeer, final Blacklist blacklist) { + // check own peer status if (event.peers.mySeed() == null || event.peers.mySeed().getPublicAddress() == null) { return null; } @@ -275,7 +275,6 @@ public class RemoteSearch extends Thread { try { int urls = Protocol.solrQuery( event, - wordhashes, 0, count, time, diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 9ab298dda..a712ce7b7 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -156,12 +156,12 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable Date x = (Date) doc.getFieldValue(key.name()); return (x == null) ? new Date(0) : x; } - + public Date getDate(SolrDocument doc, final YaCySchema key) { Date x = (Date) doc.getFieldValue(key.name()); return (x == null) ? new Date(0) : x; } - + /** * save configuration to file and update enum SolrFields * @throws IOException @@ -186,8 +186,8 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable public SolrInputDocument metadata2solr(final URIMetadata md) { if (md instanceof URIMetadataNode) { return ClientUtils.toSolrInputDocument(((URIMetadataNode) md).getDocument()); - } - + } + final SolrInputDocument doc = new SolrInputDocument(); final DigestURI digestURI = new DigestURI(md.url()); boolean allAttr = this.isEmpty(); @@ -206,7 +206,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable if (allAttr || contains(YaCySchema.content_type)) add(doc, YaCySchema.content_type, Response.doctype2mime(digestURI.getFileExtension(), md.doctype())); if (allAttr || contains(YaCySchema.last_modified)) add(doc, YaCySchema.last_modified, md.moddate()); if (allAttr || contains(YaCySchema.wordcount_i)) add(doc, YaCySchema.wordcount_i, md.wordCount()); - + String keywords = md.dc_subject(); Bitfield flags = md.flags(); if (flags.get(Condenser.flag_cat_indexof)) { @@ -214,7 +214,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable if (keywords.indexOf(',') > 0) keywords += ", indexof"; else keywords += " indexof"; } } - if (allAttr || contains(YaCySchema.keywords)) { + if (allAttr || contains(YaCySchema.keywords)) { add(doc, YaCySchema.keywords, keywords); } @@ -233,7 +233,8 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable // coordinates if (md.lat() != 0.0f && md.lon() != 0.0f) { if (allAttr || contains(YaCySchema.lat_coordinate)) add(doc, YaCySchema.lat_coordinate, md.lat()); - if (allAttr || contains(YaCySchema.lon_coordinate)) add(doc, YaCySchema.lon_coordinate, md.lon()); + if (allAttr || contains(YaCySchema.lon_coordinate)) add(doc, YaCySchema.lon_coordinate, md.lon()); + if (allAttr || contains(YaCySchema.coordinate_p)) add(doc, YaCySchema.coordinate_p, Double.toString(md.lat()) + "," + Double.toString(md.lon())); } if (allAttr || contains(YaCySchema.httpstatus_i)) add(doc, YaCySchema.httpstatus_i, 200); @@ -261,10 +262,10 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable accText(sb, keywords); add(doc, YaCySchema.text_t, sb.toString()); } - + return doc; } - + private static void accText(final StringBuilder sb, String text) { if (text == null || text.length() == 0) return; if (sb.length() != 0) sb.append(' '); @@ -616,6 +617,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable if (yacydoc.lat() != 0.0f && yacydoc.lon() != 0.0f) { if (allAttr || contains(YaCySchema.lat_coordinate)) add(doc, YaCySchema.lat_coordinate, yacydoc.lat()); if (allAttr || contains(YaCySchema.lon_coordinate)) add(doc, YaCySchema.lon_coordinate, yacydoc.lon()); + if (allAttr || contains(YaCySchema.coordinate_p)) add(doc, YaCySchema.coordinate_p, Double.toString(yacydoc.lat()) + "," + Double.toString(yacydoc.lon())); } if (allAttr || contains(YaCySchema.httpstatus_i)) add(doc, YaCySchema.httpstatus_i, header == null ? 200 : header.getStatusCode()); diff --git a/source/net/yacy/search/index/YaCySchema.java b/source/net/yacy/search/index/YaCySchema.java index 0ad95732b..fd7c2e1cf 100644 --- a/source/net/yacy/search/index/YaCySchema.java +++ b/source/net/yacy/search/index/YaCySchema.java @@ -27,11 +27,11 @@ package net.yacy.search.index; import java.util.Date; import java.util.List; -import org.apache.solr.common.SolrInputDocument; - import net.yacy.cora.services.federated.solr.Schema; import net.yacy.cora.services.federated.solr.SolrType; +import org.apache.solr.common.SolrInputDocument; + public enum YaCySchema implements Schema { // mandatory @@ -46,7 +46,7 @@ public enum YaCySchema implements Schema { process_s(SolrType.string, true, true, "index creation comment"), failreason_t(SolrType.text_general, true, true, "fail reason if a page was not loaded. if the page was loaded then this field is empty"), httpstatus_i(SolrType.integer, true, true, "html status return code (i.e. \"200\" for ok), -1 if not loaded"), - + // optional but recommended, part of index distribution load_date_dt(SolrType.date, true, true, "time when resource was loaded"), fresh_date_dt(SolrType.date, true, true, "date until resource shall be considered as fresh"), @@ -56,10 +56,11 @@ public enum YaCySchema implements Schema { audiolinkscount_i(SolrType.integer, true, true, "number of links to audio resources"),// int laudio(); videolinkscount_i(SolrType.integer, true, true, "number of links to video resources"),// int lvideo(); applinkscount_i(SolrType.integer, true, true, "number of links to application resources"),// int lapp(); - + // optional but recommended - lon_coordinate(SolrType.tdouble, true, true, "longitude of location as declared in WSG84"), - lat_coordinate(SolrType.tdouble, true, true, "latitude of location as declared in WSG84"), + lon_coordinate(SolrType.tdouble, true, true, "longitude of location as declared in WSG84"), // deprecated + lat_coordinate(SolrType.tdouble, true, true, "latitude of location as declared in WSG84"), // deprecated + coordinate_p(SolrType.location, true, true, "point in degrees of latitude,longitude as declared in WSG84"), ip_s(SolrType.string, true, true, "ip of host of url (after DNS lookup)"), author(SolrType.text_general, true, true, "content of author-tag"), description(SolrType.text_general, true, true, "content of description-tag"), @@ -73,8 +74,8 @@ public enum YaCySchema implements Schema { imagescount_i(SolrType.integer, true, true, "number of images"), responsetime_i(SolrType.integer, true, true, "response time of target server in milliseconds"), text_t(SolrType.text_general, true, true, "all visible text"), - - // optional values + + // optional values csscount_i(SolrType.integer, true, true, "number of entries in css_tag_txt and css_url_txt"), css_tag_txt(SolrType.text_general, true, true, true, "full css tag with normalized url"), css_url_txt(SolrType.text_general, true, true, true, "normalized urls within a css tag"), @@ -146,7 +147,7 @@ public enum YaCySchema implements Schema { ext_tracker_val(SolrType.integer, true, true, true, "number of attribute counts in ext_tracker_txt"), ext_title_txt(SolrType.text_general, true, true, true, "names matching title expressions"), ext_title_val(SolrType.integer, true, true, true, "number of matching title expressions"); - + private String solrFieldName = null; // solr field name in custom solr schema, defaults to solcell schema field name (= same as this.name() ) private final SolrType type; private final boolean indexed, stored; @@ -222,7 +223,7 @@ public enum YaCySchema implements Schema { @Override public final String getComment() { return this.comment; - } + } public final void add(final SolrInputDocument doc, final String value) { doc.setField(this.getSolrFieldName(), value); diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 61d1ee05e..ac063c6a1 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -48,6 +48,7 @@ import net.yacy.cora.services.federated.yacy.CacheStrategy; import net.yacy.cora.storage.HandleSet; import net.yacy.cora.util.SpaceExceededException; import net.yacy.document.Condenser; +import net.yacy.document.geolocation.GeoLocation; import net.yacy.document.parser.html.AbstractScraper; import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.data.meta.DigestURI; @@ -60,6 +61,7 @@ import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.util.SetTools; import net.yacy.peers.Seed; import net.yacy.search.index.Segment; +import net.yacy.search.index.YaCySchema; import net.yacy.search.ranking.RankingProfile; public final class QueryParams { @@ -323,15 +325,6 @@ public final class QueryParams { return this.domType == Searchdom.LOCAL; } - public String solrQuery() { - if (this.query_include_words == null || this.query_include_words.size() == 0) return null; - StringBuilder sb = new StringBuilder(80); - for (String s: this.query_include_words) {sb.append('+'); sb.append(s);} - for (String s: this.query_exclude_words) {sb.append("+-"); sb.append(s);} - if (sb.length() == 0) return null; - return "text_t:" + sb.substring(1, sb.length()); - } - public static HandleSet hashes2Set(final String query) { final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0); if (query != null) { @@ -475,13 +468,42 @@ public final class QueryParams { } public String solrQueryString(boolean urlencoded) { - final StringBuilder q = new StringBuilder(); - if (this.query_include_words != null) { - for (String s: this.query_include_words) q.append(urlencoded ? '+' : ' ').append(s); - for (String s: this.query_exclude_words) q.append(urlencoded ? "+-" : " -").append(s); + if (this.query_include_words == null || this.query_include_words.size() == 0) return null; + final StringBuilder q = new StringBuilder(80); + q.append("{!lucene q.op=AND}"); + + // add text query + q.append("text_t:"); + int wc = 0; + for (String s: this.query_include_words) { + if (wc > 0) q.append(urlencoded ? '+' : ' '); + q.append(s); + wc++; } - if (urlencoded) return CharacterCoding.unicode2html(q.length() > 0 ? q.substring(1) : q.toString(), true); - return q.length() > 0 ? q.substring(1) : q.toString(); + for (String s: this.query_exclude_words){ + if (wc > 0) q.append(urlencoded ? "+-" : " -"); + q.append(s); + wc++; + } + + // add constraints + if ( this.sitehash == null ) { + if (this.siteexcludes != null) { + for (String ex: this.siteexcludes) { + q.append(urlencoded ? "+AND+-host_id_s:" : " AND -host_id_s:").append(ex); + } + } + } else { + q.append(urlencoded ? "+AND+host_id_s:" : " AND host_id_s:").append(this.sitehash); + } + + if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) { + q.append("&fq={!bbox sfield=").append(YaCySchema.coordinate_p.name()).append("}&pt="); + q.append(Double.toString(this.lat)).append(',').append(Double.toString(this.lon)).append("&d=").append(GeoLocation.degreeToKm(this.radius)); + } + + // prepare result + return (urlencoded) ? CharacterCoding.unicode2html(q.toString(), true) : q.toString(); } public String queryStringForUrl() { diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 3bd50cfd4..a2bb30c96 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -150,7 +150,7 @@ public final class SearchEvent { this.rankingProcess = new RWIProcess(this.query, this.order, remote); // start a local solr search - RemoteSearch.solrRemoteSearch(this, 100, this.query.query_include_hashes, 10000, null, Switchboard.urlBlacklist); + RemoteSearch.solrRemoteSearch(this, 100, 10000, null /*this peer*/, Switchboard.urlBlacklist); // start a local RWI search concurrently this.rankingProcess.start();