- added faceted drill-down for host and geolocation to solr queries

- added a new geolocation field to index schema, the old values are
migrated if possible
pull/1/head
Michael Peter Christen 13 years ago
parent f00168ecc5
commit e8acd542b5

@ -80,6 +80,9 @@ lon_coordinate
## longitude of location as declared in WSG84, tdouble
lat_coordinate
## point in degrees of latitude,longitude as declared in WSG84, location
coordinate_p
## ip of host of url (after DNS lookup), string
ip_s

@ -29,6 +29,7 @@ public enum SolrType {
string,
text_general,
text_en_splitting_tight,
location,
date,
integer("int"),
bool("boolean"),

@ -92,4 +92,8 @@ public class GeoLocation extends IntegerGeoPoint implements Comparable<GeoLocati
return o1.compareTo(o2);
}
public static int degreeToKm(double degree) {
return (int) (degree * 111.32d);
}
}

@ -56,7 +56,7 @@ public class URIMetadataNode implements URIMetadata {
private DigestURI url;
Bitfield flags;
private final int imagec, audioc, videoc, appc;
private final double lon, lat;
private double lat, lon;
private long ranking; // during generation of a search result this value is set
private final SolrDocument doc;
private final String snippet;
@ -84,6 +84,14 @@ public class URIMetadataNode implements URIMetadata {
this.appc = getInt(YaCySchema.videolinkscount_i);
this.lon = getDouble(YaCySchema.lon_coordinate);
this.lat = getDouble(YaCySchema.lat_coordinate);
String latlon = (String) this.doc.getFieldValue(YaCySchema.coordinate_p.name());
if (latlon != null) {
int p = latlon.indexOf(',');
if (p > 0) {
this.lat = Double.parseDouble(latlon.substring(0, p));
this.lon = Double.parseDouble(latlon.substring(p + 1));
}
}
this.flags = new Bitfield();
if (this.keywords != null && this.keywords.indexOf("indexof") >= 0) this.flags.set(Condenser.flag_cat_indexof, true);
if (this.lon != 0.0d || this.lat != 0.0d) this.flags.set(Condenser.flag_cat_haslocation, true);

@ -1019,18 +1019,21 @@ public final class Protocol
public static int solrQuery(
final SearchEvent event,
final HandleSet wordhashes,
final int offset,
final int count,
final long time,
final Seed target,
final Blacklist blacklist) {
final HandleSet wordhashes = event.getQuery().query_include_hashes;
if (event.getQuery().queryString == null || event.getQuery().queryString.length() == 0) {
return -1; // we cannot query solr only with word hashes, there is no clear text string
}
event.rankingProcess.addExpectedRemoteReferences(count);
SolrDocumentList docList = null;
final String solrQuerystring = "{!lucene q.op=AND df=text_t}" + event.getQuery().solrQueryString(false);
final String solrQuerystring = event.getQuery().solrQueryString(false);
Log.logInfo("Protocol", "SOLR QUERY: " + solrQuerystring);
boolean localsearch = target == null || target.equals(event.peers.mySeed());
if (localsearch) {
// search the local index

@ -169,7 +169,7 @@ public class RemoteSearch extends Thread {
for (Seed s: targetPeers) omit.add(s);
Seed[] nodes = PeerSelection.selectNodeSearchTargets(event.peers, 20, omit);
for (Seed s: nodes) {
solrRemoteSearch(event, count, event.getQuery().query_include_hashes, time, s, blacklist);
solrRemoteSearch(event, count, time, s, blacklist);
}
// start search to YaCy peers
@ -258,11 +258,11 @@ public class RemoteSearch extends Thread {
public static Thread solrRemoteSearch(
final SearchEvent event,
final int count,
final HandleSet wordhashes,
final long time,
final Seed targetPeer,
final Blacklist blacklist) {
// check own peer status
if (event.peers.mySeed() == null || event.peers.mySeed().getPublicAddress() == null) { return null; }
@ -275,7 +275,6 @@ public class RemoteSearch extends Thread {
try {
int urls = Protocol.solrQuery(
event,
wordhashes,
0,
count,
time,

@ -234,6 +234,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (md.lat() != 0.0f && md.lon() != 0.0f) {
if (allAttr || contains(YaCySchema.lat_coordinate)) add(doc, YaCySchema.lat_coordinate, md.lat());
if (allAttr || contains(YaCySchema.lon_coordinate)) add(doc, YaCySchema.lon_coordinate, md.lon());
if (allAttr || contains(YaCySchema.coordinate_p)) add(doc, YaCySchema.coordinate_p, Double.toString(md.lat()) + "," + Double.toString(md.lon()));
}
if (allAttr || contains(YaCySchema.httpstatus_i)) add(doc, YaCySchema.httpstatus_i, 200);
@ -616,6 +617,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (yacydoc.lat() != 0.0f && yacydoc.lon() != 0.0f) {
if (allAttr || contains(YaCySchema.lat_coordinate)) add(doc, YaCySchema.lat_coordinate, yacydoc.lat());
if (allAttr || contains(YaCySchema.lon_coordinate)) add(doc, YaCySchema.lon_coordinate, yacydoc.lon());
if (allAttr || contains(YaCySchema.coordinate_p)) add(doc, YaCySchema.coordinate_p, Double.toString(yacydoc.lat()) + "," + Double.toString(yacydoc.lon()));
}
if (allAttr || contains(YaCySchema.httpstatus_i)) add(doc, YaCySchema.httpstatus_i, header == null ? 200 : header.getStatusCode());

@ -27,11 +27,11 @@ package net.yacy.search.index;
import java.util.Date;
import java.util.List;
import org.apache.solr.common.SolrInputDocument;
import net.yacy.cora.services.federated.solr.Schema;
import net.yacy.cora.services.federated.solr.SolrType;
import org.apache.solr.common.SolrInputDocument;
public enum YaCySchema implements Schema {
// mandatory
@ -58,8 +58,9 @@ public enum YaCySchema implements Schema {
applinkscount_i(SolrType.integer, true, true, "number of links to application resources"),// int lapp();
// optional but recommended
lon_coordinate(SolrType.tdouble, true, true, "longitude of location as declared in WSG84"),
lat_coordinate(SolrType.tdouble, true, true, "latitude of location as declared in WSG84"),
lon_coordinate(SolrType.tdouble, true, true, "longitude of location as declared in WSG84"), // deprecated
lat_coordinate(SolrType.tdouble, true, true, "latitude of location as declared in WSG84"), // deprecated
coordinate_p(SolrType.location, true, true, "point in degrees of latitude,longitude as declared in WSG84"),
ip_s(SolrType.string, true, true, "ip of host of url (after DNS lookup)"),
author(SolrType.text_general, true, true, "content of author-tag"),
description(SolrType.text_general, true, true, "content of description-tag"),

@ -48,6 +48,7 @@ import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.geolocation.GeoLocation;
import net.yacy.document.parser.html.AbstractScraper;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
@ -60,6 +61,7 @@ import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.Seed;
import net.yacy.search.index.Segment;
import net.yacy.search.index.YaCySchema;
import net.yacy.search.ranking.RankingProfile;
public final class QueryParams {
@ -323,15 +325,6 @@ public final class QueryParams {
return this.domType == Searchdom.LOCAL;
}
public String solrQuery() {
if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
StringBuilder sb = new StringBuilder(80);
for (String s: this.query_include_words) {sb.append('+'); sb.append(s);}
for (String s: this.query_exclude_words) {sb.append("+-"); sb.append(s);}
if (sb.length() == 0) return null;
return "text_t:" + sb.substring(1, sb.length());
}
public static HandleSet hashes2Set(final String query) {
final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
if (query != null) {
@ -475,13 +468,42 @@ public final class QueryParams {
}
public String solrQueryString(boolean urlencoded) {
final StringBuilder q = new StringBuilder();
if (this.query_include_words != null) {
for (String s: this.query_include_words) q.append(urlencoded ? '+' : ' ').append(s);
for (String s: this.query_exclude_words) q.append(urlencoded ? "+-" : " -").append(s);
if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
final StringBuilder q = new StringBuilder(80);
q.append("{!lucene q.op=AND}");
// add text query
q.append("text_t:");
int wc = 0;
for (String s: this.query_include_words) {
if (wc > 0) q.append(urlencoded ? '+' : ' ');
q.append(s);
wc++;
}
for (String s: this.query_exclude_words){
if (wc > 0) q.append(urlencoded ? "+-" : " -");
q.append(s);
wc++;
}
// add constraints
if ( this.sitehash == null ) {
if (this.siteexcludes != null) {
for (String ex: this.siteexcludes) {
q.append(urlencoded ? "+AND+-host_id_s:" : " AND -host_id_s:").append(ex);
}
}
} else {
q.append(urlencoded ? "+AND+host_id_s:" : " AND host_id_s:").append(this.sitehash);
}
if (urlencoded) return CharacterCoding.unicode2html(q.length() > 0 ? q.substring(1) : q.toString(), true);
return q.length() > 0 ? q.substring(1) : q.toString();
if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) {
q.append("&fq={!bbox sfield=").append(YaCySchema.coordinate_p.name()).append("}&pt=");
q.append(Double.toString(this.lat)).append(',').append(Double.toString(this.lon)).append("&d=").append(GeoLocation.degreeToKm(this.radius));
}
// prepare result
return (urlencoded) ? CharacterCoding.unicode2html(q.toString(), true) : q.toString();
}
public String queryStringForUrl() {

@ -150,7 +150,7 @@ public final class SearchEvent {
this.rankingProcess = new RWIProcess(this.query, this.order, remote);
// start a local solr search
RemoteSearch.solrRemoteSearch(this, 100, this.query.query_include_hashes, 10000, null, Switchboard.urlBlacklist);
RemoteSearch.solrRemoteSearch(this, 100, 10000, null /*this peer*/, Switchboard.urlBlacklist);
// start a local RWI search concurrently
this.rankingProcess.start();

Loading…
Cancel
Save