- added faceted drill-down for host and geolocation to solr queries

- added a new geolocation field to index schema, the old values are
migrated if possible
pull/1/head
Michael Peter Christen 13 years ago
parent f00168ecc5
commit e8acd542b5

@ -80,6 +80,9 @@ lon_coordinate
## longitude of location as declared in WSG84, tdouble ## longitude of location as declared in WSG84, tdouble
lat_coordinate lat_coordinate
## point in degrees of latitude,longitude as declared in WSG84, location
coordinate_p
## ip of host of url (after DNS lookup), string ## ip of host of url (after DNS lookup), string
ip_s ip_s

@ -29,6 +29,7 @@ public enum SolrType {
string, string,
text_general, text_general,
text_en_splitting_tight, text_en_splitting_tight,
location,
date, date,
integer("int"), integer("int"),
bool("boolean"), bool("boolean"),

@ -92,4 +92,8 @@ public class GeoLocation extends IntegerGeoPoint implements Comparable<GeoLocati
return o1.compareTo(o2); return o1.compareTo(o2);
} }
public static int degreeToKm(double degree) {
return (int) (degree * 111.32d);
}
} }

@ -56,7 +56,7 @@ public class URIMetadataNode implements URIMetadata {
private DigestURI url; private DigestURI url;
Bitfield flags; Bitfield flags;
private final int imagec, audioc, videoc, appc; private final int imagec, audioc, videoc, appc;
private final double lon, lat; private double lat, lon;
private long ranking; // during generation of a search result this value is set private long ranking; // during generation of a search result this value is set
private final SolrDocument doc; private final SolrDocument doc;
private final String snippet; private final String snippet;
@ -84,6 +84,14 @@ public class URIMetadataNode implements URIMetadata {
this.appc = getInt(YaCySchema.videolinkscount_i); this.appc = getInt(YaCySchema.videolinkscount_i);
this.lon = getDouble(YaCySchema.lon_coordinate); this.lon = getDouble(YaCySchema.lon_coordinate);
this.lat = getDouble(YaCySchema.lat_coordinate); this.lat = getDouble(YaCySchema.lat_coordinate);
String latlon = (String) this.doc.getFieldValue(YaCySchema.coordinate_p.name());
if (latlon != null) {
int p = latlon.indexOf(',');
if (p > 0) {
this.lat = Double.parseDouble(latlon.substring(0, p));
this.lon = Double.parseDouble(latlon.substring(p + 1));
}
}
this.flags = new Bitfield(); this.flags = new Bitfield();
if (this.keywords != null && this.keywords.indexOf("indexof") >= 0) this.flags.set(Condenser.flag_cat_indexof, true); if (this.keywords != null && this.keywords.indexOf("indexof") >= 0) this.flags.set(Condenser.flag_cat_indexof, true);
if (this.lon != 0.0d || this.lat != 0.0d) this.flags.set(Condenser.flag_cat_haslocation, true); if (this.lon != 0.0d || this.lat != 0.0d) this.flags.set(Condenser.flag_cat_haslocation, true);

@ -1019,18 +1019,21 @@ public final class Protocol
public static int solrQuery( public static int solrQuery(
final SearchEvent event, final SearchEvent event,
final HandleSet wordhashes,
final int offset, final int offset,
final int count, final int count,
final long time, final long time,
final Seed target, final Seed target,
final Blacklist blacklist) { final Blacklist blacklist) {
final HandleSet wordhashes = event.getQuery().query_include_hashes;
if (event.getQuery().queryString == null || event.getQuery().queryString.length() == 0) { if (event.getQuery().queryString == null || event.getQuery().queryString.length() == 0) {
return -1; // we cannot query solr only with word hashes, there is no clear text string return -1; // we cannot query solr only with word hashes, there is no clear text string
} }
event.rankingProcess.addExpectedRemoteReferences(count); event.rankingProcess.addExpectedRemoteReferences(count);
SolrDocumentList docList = null; SolrDocumentList docList = null;
final String solrQuerystring = "{!lucene q.op=AND df=text_t}" + event.getQuery().solrQueryString(false); final String solrQuerystring = event.getQuery().solrQueryString(false);
Log.logInfo("Protocol", "SOLR QUERY: " + solrQuerystring);
boolean localsearch = target == null || target.equals(event.peers.mySeed()); boolean localsearch = target == null || target.equals(event.peers.mySeed());
if (localsearch) { if (localsearch) {
// search the local index // search the local index

@ -169,7 +169,7 @@ public class RemoteSearch extends Thread {
for (Seed s: targetPeers) omit.add(s); for (Seed s: targetPeers) omit.add(s);
Seed[] nodes = PeerSelection.selectNodeSearchTargets(event.peers, 20, omit); Seed[] nodes = PeerSelection.selectNodeSearchTargets(event.peers, 20, omit);
for (Seed s: nodes) { for (Seed s: nodes) {
solrRemoteSearch(event, count, event.getQuery().query_include_hashes, time, s, blacklist); solrRemoteSearch(event, count, time, s, blacklist);
} }
// start search to YaCy peers // start search to YaCy peers
@ -258,11 +258,11 @@ public class RemoteSearch extends Thread {
public static Thread solrRemoteSearch( public static Thread solrRemoteSearch(
final SearchEvent event, final SearchEvent event,
final int count, final int count,
final HandleSet wordhashes,
final long time, final long time,
final Seed targetPeer, final Seed targetPeer,
final Blacklist blacklist) { final Blacklist blacklist) {
// check own peer status // check own peer status
if (event.peers.mySeed() == null || event.peers.mySeed().getPublicAddress() == null) { return null; } if (event.peers.mySeed() == null || event.peers.mySeed().getPublicAddress() == null) { return null; }
@ -275,7 +275,6 @@ public class RemoteSearch extends Thread {
try { try {
int urls = Protocol.solrQuery( int urls = Protocol.solrQuery(
event, event,
wordhashes,
0, 0,
count, count,
time, time,

@ -233,7 +233,8 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
// coordinates // coordinates
if (md.lat() != 0.0f && md.lon() != 0.0f) { if (md.lat() != 0.0f && md.lon() != 0.0f) {
if (allAttr || contains(YaCySchema.lat_coordinate)) add(doc, YaCySchema.lat_coordinate, md.lat()); if (allAttr || contains(YaCySchema.lat_coordinate)) add(doc, YaCySchema.lat_coordinate, md.lat());
if (allAttr || contains(YaCySchema.lon_coordinate)) add(doc, YaCySchema.lon_coordinate, md.lon()); if (allAttr || contains(YaCySchema.lon_coordinate)) add(doc, YaCySchema.lon_coordinate, md.lon());
if (allAttr || contains(YaCySchema.coordinate_p)) add(doc, YaCySchema.coordinate_p, Double.toString(md.lat()) + "," + Double.toString(md.lon()));
} }
if (allAttr || contains(YaCySchema.httpstatus_i)) add(doc, YaCySchema.httpstatus_i, 200); if (allAttr || contains(YaCySchema.httpstatus_i)) add(doc, YaCySchema.httpstatus_i, 200);
@ -616,6 +617,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (yacydoc.lat() != 0.0f && yacydoc.lon() != 0.0f) { if (yacydoc.lat() != 0.0f && yacydoc.lon() != 0.0f) {
if (allAttr || contains(YaCySchema.lat_coordinate)) add(doc, YaCySchema.lat_coordinate, yacydoc.lat()); if (allAttr || contains(YaCySchema.lat_coordinate)) add(doc, YaCySchema.lat_coordinate, yacydoc.lat());
if (allAttr || contains(YaCySchema.lon_coordinate)) add(doc, YaCySchema.lon_coordinate, yacydoc.lon()); if (allAttr || contains(YaCySchema.lon_coordinate)) add(doc, YaCySchema.lon_coordinate, yacydoc.lon());
if (allAttr || contains(YaCySchema.coordinate_p)) add(doc, YaCySchema.coordinate_p, Double.toString(yacydoc.lat()) + "," + Double.toString(yacydoc.lon()));
} }
if (allAttr || contains(YaCySchema.httpstatus_i)) add(doc, YaCySchema.httpstatus_i, header == null ? 200 : header.getStatusCode()); if (allAttr || contains(YaCySchema.httpstatus_i)) add(doc, YaCySchema.httpstatus_i, header == null ? 200 : header.getStatusCode());

@ -27,11 +27,11 @@ package net.yacy.search.index;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import org.apache.solr.common.SolrInputDocument;
import net.yacy.cora.services.federated.solr.Schema; import net.yacy.cora.services.federated.solr.Schema;
import net.yacy.cora.services.federated.solr.SolrType; import net.yacy.cora.services.federated.solr.SolrType;
import org.apache.solr.common.SolrInputDocument;
public enum YaCySchema implements Schema { public enum YaCySchema implements Schema {
// mandatory // mandatory
@ -58,8 +58,9 @@ public enum YaCySchema implements Schema {
applinkscount_i(SolrType.integer, true, true, "number of links to application resources"),// int lapp(); applinkscount_i(SolrType.integer, true, true, "number of links to application resources"),// int lapp();
// optional but recommended // optional but recommended
lon_coordinate(SolrType.tdouble, true, true, "longitude of location as declared in WSG84"), lon_coordinate(SolrType.tdouble, true, true, "longitude of location as declared in WSG84"), // deprecated
lat_coordinate(SolrType.tdouble, true, true, "latitude of location as declared in WSG84"), lat_coordinate(SolrType.tdouble, true, true, "latitude of location as declared in WSG84"), // deprecated
coordinate_p(SolrType.location, true, true, "point in degrees of latitude,longitude as declared in WSG84"),
ip_s(SolrType.string, true, true, "ip of host of url (after DNS lookup)"), ip_s(SolrType.string, true, true, "ip of host of url (after DNS lookup)"),
author(SolrType.text_general, true, true, "content of author-tag"), author(SolrType.text_general, true, true, "content of author-tag"),
description(SolrType.text_general, true, true, "content of description-tag"), description(SolrType.text_general, true, true, "content of description-tag"),

@ -48,6 +48,7 @@ import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
import net.yacy.document.geolocation.GeoLocation;
import net.yacy.document.parser.html.AbstractScraper; import net.yacy.document.parser.html.AbstractScraper;
import net.yacy.document.parser.html.CharacterCoding; import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
@ -60,6 +61,7 @@ import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.util.SetTools; import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.YaCySchema;
import net.yacy.search.ranking.RankingProfile; import net.yacy.search.ranking.RankingProfile;
public final class QueryParams { public final class QueryParams {
@ -323,15 +325,6 @@ public final class QueryParams {
return this.domType == Searchdom.LOCAL; return this.domType == Searchdom.LOCAL;
} }
public String solrQuery() {
if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
StringBuilder sb = new StringBuilder(80);
for (String s: this.query_include_words) {sb.append('+'); sb.append(s);}
for (String s: this.query_exclude_words) {sb.append("+-"); sb.append(s);}
if (sb.length() == 0) return null;
return "text_t:" + sb.substring(1, sb.length());
}
public static HandleSet hashes2Set(final String query) { public static HandleSet hashes2Set(final String query) {
final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0); final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
if (query != null) { if (query != null) {
@ -475,13 +468,42 @@ public final class QueryParams {
} }
public String solrQueryString(boolean urlencoded) { public String solrQueryString(boolean urlencoded) {
final StringBuilder q = new StringBuilder(); if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
if (this.query_include_words != null) { final StringBuilder q = new StringBuilder(80);
for (String s: this.query_include_words) q.append(urlencoded ? '+' : ' ').append(s); q.append("{!lucene q.op=AND}");
for (String s: this.query_exclude_words) q.append(urlencoded ? "+-" : " -").append(s);
// add text query
q.append("text_t:");
int wc = 0;
for (String s: this.query_include_words) {
if (wc > 0) q.append(urlencoded ? '+' : ' ');
q.append(s);
wc++;
} }
if (urlencoded) return CharacterCoding.unicode2html(q.length() > 0 ? q.substring(1) : q.toString(), true); for (String s: this.query_exclude_words){
return q.length() > 0 ? q.substring(1) : q.toString(); if (wc > 0) q.append(urlencoded ? "+-" : " -");
q.append(s);
wc++;
}
// add constraints
if ( this.sitehash == null ) {
if (this.siteexcludes != null) {
for (String ex: this.siteexcludes) {
q.append(urlencoded ? "+AND+-host_id_s:" : " AND -host_id_s:").append(ex);
}
}
} else {
q.append(urlencoded ? "+AND+host_id_s:" : " AND host_id_s:").append(this.sitehash);
}
if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) {
q.append("&fq={!bbox sfield=").append(YaCySchema.coordinate_p.name()).append("}&pt=");
q.append(Double.toString(this.lat)).append(',').append(Double.toString(this.lon)).append("&d=").append(GeoLocation.degreeToKm(this.radius));
}
// prepare result
return (urlencoded) ? CharacterCoding.unicode2html(q.toString(), true) : q.toString();
} }
public String queryStringForUrl() { public String queryStringForUrl() {

@ -150,7 +150,7 @@ public final class SearchEvent {
this.rankingProcess = new RWIProcess(this.query, this.order, remote); this.rankingProcess = new RWIProcess(this.query, this.order, remote);
// start a local solr search // start a local solr search
RemoteSearch.solrRemoteSearch(this, 100, this.query.query_include_hashes, 10000, null, Switchboard.urlBlacklist); RemoteSearch.solrRemoteSearch(this, 100, 10000, null /*this peer*/, Switchboard.urlBlacklist);
// start a local RWI search concurrently // start a local RWI search concurrently
this.rankingProcess.start(); this.rankingProcess.start();

Loading…
Cancel
Save