added a 'collection' property attribute in yacysearch.html which can be

used to select between different collections as defined during a crawl
start with the 'collection' attribute. This actually implements the
ability to prepare search tenants which restrict their search results to
a specific collection. The main use for this is to provide tenants to
the yaml4 interface (at this time).
pull/1/head
orbiter 12 years ago
parent 3e79bd4b1f
commit f7571386a3

@ -22,7 +22,6 @@ import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
import java.io.Writer; import java.io.Writer;
import java.util.ArrayList;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.Ranking;
@ -30,11 +29,11 @@ import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter; import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter;
import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.CommonPattern;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.query.AccessTracker; import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryGoal; import net.yacy.search.query.QueryGoal;
import net.yacy.search.query.QueryModifier;
import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEvent;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
@ -150,28 +149,13 @@ public class searchresult {
post.put("hl.simple.post", "</b>"); post.put("hl.simple.post", "</b>");
post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH)); post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH));
String[] site = post.remove("site"); // example: col1|col2
//String[] access = post.remove("access"); //String[] access = post.remove("access");
//String[] entqr = post.remove("entqr"); //String[] entqr = post.remove("entqr");
// add sites operator // add sites operator
String[] site = post.remove("site"); // example: col1|col2
if (site != null && site[0].length() > 0) { if (site != null && site[0].length() > 0) {
String[] s0 = CommonPattern.VERTICALBAR.split(site[0]); post.put(CommonParams.FQ, QueryModifier.parseCollectionExpression(site[0]));
ArrayList<String> sites = new ArrayList<String>(2);
for (String s: s0) {
s = s.trim().toLowerCase();
if (s.length() > 0) sites.add(s);
}
StringBuilder fq = new StringBuilder(20);
if (sites.size() > 1) {
fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
for (int i = 1; i < sites.size(); i++) {
fq.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(i));
}
} else if (sites.size() == 1) {
fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
}
post.put(CommonParams.FQ, fq.toString());
} }
// get the embedded connector // get the embedded connector

@ -358,6 +358,9 @@ public class yacysearch {
final RankingProfile ranking = sb.getRanking(); final RankingProfile ranking = sb.getRanking();
final QueryModifier modifier = new QueryModifier(); final QueryModifier modifier = new QueryModifier();
querystring = modifier.parse(querystring); querystring = modifier.parse(querystring);
// read collection
modifier.collection = post.get("collection", "");
int stp = querystring.indexOf('*'); int stp = querystring.indexOf('*');
if (stp >= 0) { if (stp >= 0) {

@ -25,20 +25,27 @@
package net.yacy.cora.document; package net.yacy.cora.document;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Properties;
import java.util.Set; import java.util.Set;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.lod.vocabulary.DublinCore; import net.yacy.cora.lod.vocabulary.DublinCore;
import net.yacy.cora.lod.vocabulary.Geo; import net.yacy.cora.lod.vocabulary.Geo;
import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.CommonPattern;
import net.yacy.document.Document;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.meta.DigestURI;
public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMessage> { public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMessage> {
@ -335,4 +342,29 @@ public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMe
size = size / 1024; size = size / 1024;
return size + " gbyte"; return size + " gbyte";
} }
/*
public Document toDocument() {
DigestURI url = new DigestURI(this.getLink());
List<String> titles = new ArrayList<String>();
titles.add(this.getTitle());
return new Document(
url,
Classification.ext2mime(url.getFileExtension(), "text/plain"),
"UTF8",
null,
this.getLanguage(),
Token.subject.valueFrom(this.map, ""),
titles,
this.getAuthor(),
this.getCopyright(),
null,
this.getDescription(),
0.0d, 0.0d,
this.getFulltext(),
null,
null,
null,
false);
}
*/
} }

@ -20,8 +20,11 @@
package net.yacy.search.query; package net.yacy.search.query;
import java.util.ArrayList;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import net.yacy.cora.util.CommonPattern;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
@ -30,7 +33,7 @@ import net.yacy.server.serverObjects;
public class QueryModifier { public class QueryModifier {
private final StringBuilder modifier; private final StringBuilder modifier;
public String sitehost, sitehash, filetype, protocol, author; public String sitehost, sitehash, filetype, protocol, author, collection;
public QueryModifier() { public QueryModifier() {
this.sitehash = null; this.sitehash = null;
@ -38,6 +41,7 @@ public class QueryModifier {
this.filetype = null; this.filetype = null;
this.protocol = null; this.protocol = null;
this.author = null; this.author = null;
this.collection = null;
this.modifier = new StringBuilder(20); this.modifier = new StringBuilder(20);
} }
@ -183,4 +187,30 @@ public class QueryModifier {
} }
} }
/**
* parse a GSA site description string and create a filter query string
* which is used to restrict the search result to collections as named with the site attributes
* @param collectionDescription
* @return a solr query string which shall be used for a filter query
*/
public static String parseCollectionExpression(String collectionDescription) {
String[] s0 = CommonPattern.VERTICALBAR.split(collectionDescription);
ArrayList<String> sites = new ArrayList<String>(2);
for (String s: s0) {
s = s.trim().toLowerCase();
if (s.length() > 0) sites.add(s);
}
StringBuilder filterQuery = new StringBuilder(20);
if (sites.size() > 1) {
filterQuery.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
for (int i = 1; i < sites.size(); i++) {
filterQuery.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(i));
}
} else if (sites.size() == 1) {
filterQuery.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
}
return filterQuery.toString();
}
} }

@ -503,6 +503,11 @@ public final class QueryParams {
//params.setSortField(CollectionSchema.last_modified.getSolrFieldName(), ORDER.desc); // deprecated in Solr 4.2 //params.setSortField(CollectionSchema.last_modified.getSolrFieldName(), ORDER.desc); // deprecated in Solr 4.2
} }
} }
if (this.modifier.collection != null && this.modifier.collection.length() > 0) {
fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.modifier.collection));
}
if (fq.length() > 0) { if (fq.length() > 0) {
params.setFilterQueries(fq.substring(5)); params.setFilterQueries(fq.substring(5));
} }
@ -570,15 +575,16 @@ public final class QueryParams {
context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())).append(asterisk); context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())).append(asterisk);
context.append(Base64Order.enhancedCoder.encodeString(this.urlMask.toString())).append(asterisk); context.append(Base64Order.enhancedCoder.encodeString(this.urlMask.toString())).append(asterisk);
context.append(this.modifier.sitehash).append(asterisk); context.append(this.modifier.sitehash).append(asterisk);
context.append(this.siteexcludes).append(asterisk);
context.append(this.modifier.author).append(asterisk); context.append(this.modifier.author).append(asterisk);
context.append(this.modifier.protocol).append(asterisk);
context.append(this.modifier.filetype).append(asterisk);
context.append(this.modifier.collection).append(asterisk);
context.append(this.modifier.toString()).append(asterisk);
context.append(this.siteexcludes).append(asterisk);
context.append(this.targetlang).append(asterisk); context.append(this.targetlang).append(asterisk);
context.append(this.constraint).append(asterisk); context.append(this.constraint).append(asterisk);
context.append(this.maxDistance).append(asterisk); context.append(this.maxDistance).append(asterisk);
context.append(this.modifier.toString()).append(asterisk);
context.append(this.modifier.protocol).append(asterisk);
context.append(this.tld).append(asterisk); context.append(this.tld).append(asterisk);
context.append(this.modifier.filetype).append(asterisk);
context.append(this.inlink).append(asterisk); context.append(this.inlink).append(asterisk);
context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius).append(asterisk); context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius).append(asterisk);
context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name()); context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name());

Loading…
Cancel
Save