Added collection navigation:

The collection field (can be filled i.e. in Crawl Start) can be used to
add categories to YaCy index entries. The usage of that field was
restricted to solr searches and post argument filters as implemented in
commit f7571386a3.
This commit extends collections to a full navigation option in the
standard YaCy search interface. The field is not active by default but
can be activated easily in the /ConfigSearchPage_p.html servlet (just
check the 'Collection' facet field). Collections can now be used for (at
least) two purposes:
- to provide search tenants (through post argument collection)
- to provide self-made category navigation
Search requests may now have (independently from switched on or off
collection facet) a "collection:<collection-name>" modifier attached;
firthermore collection names may use disjunctions using the '|' pipe
symbol. For example, this is a valid search request:
www collection:user|proxy
pull/1/head
Michael Peter Christen 11 years ago
parent 74c249288a
commit d2151857f1

@ -825,6 +825,7 @@ search.result.show.tags = false
# can be temporary different if search string is given with differen navigation values
# assigning no value(s) means that no navigation is shown
search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language
#search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language,collections
# search result verification and snippet fetch caching rules
# each search result can be verified byloading the link from the web

@ -170,6 +170,7 @@ public class ConfigPortal {
prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0);
prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0);
prop.put("search.navigation.collections", sb.getConfig("search.navigation", "").indexOf("collections",0) >= 0 ? 1 : 0);
prop.put("search.navigation.namespace", sb.getConfig("search.navigation", "").indexOf("namespace",0) >= 0 ? 1 : 0);
prop.put("search.navigation.topics", sb.getConfig("search.navigation", "").indexOf("topics",0) >= 0 ? 1 : 0);

@ -125,13 +125,19 @@
<td><ul class="nav nav-sidebar menugroup" id="sidebarLanguages" style="padding-left:15px; padding-right:10px;">
<li><h3>Language</h3></li>
</ul></td>
</tr>
</tr>
<tr>
<td><input type="checkbox" name="search.navigation.authors" value="true" #(search.navigation.authors)#::checked="checked" #(/search.navigation.authors)# /></td>
<td><ul class="nav nav-sidebar menugroup" id="sidebarAuthors" style="padding-left:15px; padding-right:10px;">
<li><h3>Author</h3></li>
</ul></td>
</tr>
<tr>
<td><input type="checkbox" name="search.navigation.collections" value="true" #(search.navigation.collections)#::checked="checked" #(/search.navigation.collections)# /></td>
<td><ul class="nav nav-sidebar menugroup" id="sidebarCollections" style="padding-left:15px; padding-right:10px;">
<li><h3>Collection</h3></li>
</ul></td>
</tr>
<tr>
<td></td>
<td><ul class="nav nav-sidebar menugroup" id="vocabulary" style="padding-left:15px; padding-right:10px;">

@ -85,6 +85,7 @@ public class ConfigSearchPage_p {
if (post.getBoolean("search.navigation.hosts")) nav += "hosts,";
if (post.getBoolean("search.navigation.language")) nav += "language,";
if (post.getBoolean("search.navigation.authors")) nav += "authors,";
if (post.getBoolean("search.navigation.collections")) nav += "collections,";
if (post.getBoolean("search.navigation.namespace")) nav += "namespace,";
if (post.getBoolean("search.navigation.topics")) nav += "topics,";
if (nav.endsWith(",")) nav = nav.substring(0, nav.length() - 1);
@ -162,6 +163,7 @@ public class ConfigSearchPage_p {
prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0);
prop.put("search.navigation.language", sb.getConfig("search.navigation", "").indexOf("language",0) >= 0 ? 1 : 0);
prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0);
prop.put("search.navigation.collections", sb.getConfig("search.navigation", "").indexOf("collections",0) >= 0 ? 1 : 0);
prop.put("search.navigation.namespace", sb.getConfig("search.navigation", "").indexOf("namespace",0) >= 0 ? 1 : 0);
prop.put("search.navigation.topics", sb.getConfig("search.navigation", "").indexOf("topics",0) >= 0 ? 1 : 0);

@ -121,6 +121,7 @@ public final class search {
modifier.sitehost = post.get("sitehost", ""); if (modifier.sitehost.isEmpty()) modifier.sitehost = null;
modifier.sitehash = post.get("sitehash", ""); if (modifier.sitehash.isEmpty()) modifier.sitehash = null;
modifier.author = post.get("author", ""); if (modifier.author.isEmpty()) modifier.author = null;
modifier.collection = post.get("collection", ""); if (modifier.collection.isEmpty()) modifier.collection = null;
modifier.filetype = post.get("filetype", ""); if (modifier.filetype.isEmpty()) modifier.filetype = null;
modifier.protocol = post.get("protocol", ""); if (modifier.protocol.isEmpty()) modifier.protocol = null;
modifier.parse(post.get("modifier", "").trim());

@ -364,7 +364,7 @@ public class yacysearch {
querystring = modifier.parse(querystring);
// read collection
modifier.collection = post.get("collection", "");
modifier.collection = post.get("collection", modifier.collection); // post arguments may overrule parsed collection values
int stp = querystring.indexOf('*');
if (stp >= 0) {

@ -163,6 +163,21 @@ $(function() { $("#sidebarAuthors").accordion('activate', false); });
::#(/activate)#
#(/nav-authors)#
#(nav-collections)#::
<ul class="nav nav-sidebar menugroup">
<li><h3>Collection</h3></li>
#{element}#
<li><a href="#[url]#" class="MenuItemLink"><input type="checkbox" onchange="window.location.href='#[url]#'"#(on)# checked="checked"::#(/on)#/> #[name]# (#[count]#)</a></li>
#{/element}#</ul>
#(activate)#
<script type="text/javascript">
//<![CDATA[
$(function() { $("#sidebarCollections").accordion('activate', false); });
//]]>
</script>
::#(/activate)#
#(/nav-collections)#
#{nav-vocabulary}#
<ul class="nav nav-sidebar menugroup">
<li><h3>#[navname]#</h3></li>

@ -265,6 +265,47 @@ public class yacysearchtrailer {
prop.put("nav-authors", 0); // this navigation is not useful
}
}
// collection navigators
if (theSearch.collectionNavigator == null || theSearch.collectionNavigator.isEmpty()) {
prop.put("nav-collections", 0);
} else {
prop.put("nav-collections", 1);
navigatorIterator = theSearch.collectionNavigator.keys(false);
int i = 0, pos = 0, neg = 0;
String nav;
while (i < 10 && navigatorIterator.hasNext()) {
name = navigatorIterator.next().trim();
count = theSearch.collectionNavigator.get(name);
if (count == 0) {
break;
}
nav = (name.indexOf(' ', 0) < 0) ? "collection%3A" + name : "collection%3A%28" + name.replace(" ", "+") + "%29";
if (theSearch.query.modifier.collection == null || !theSearch.query.modifier.collection.contains(name)) {
pos++;
prop.put("nav-collections_element_" + i + "_on", 1);
prop.put(fileType, "nav-collections_element_" + i + "_modifier", nav);
} else {
neg++;
prop.put("nav-collections_element_" + i + "_on", 0);
prop.put(fileType, "nav-collections_element_" + i + "_modifier", "-" + nav);
nav="";
}
prop.put(fileType, "nav-collections_element_" + i + "_name", name);
prop.put(fileType, "nav-collections_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, nav, false).toString());
prop.put("nav-collections_element_" + i + "_count", count);
prop.put("nav-collections_element_" + i + "_nl", 1);
i++;
}
prop.put("nav-collections_element", i);
prop.put("nav-collections_activate", neg > 0 ? 1 : 0); // by default off
i--;
prop.put("nav-collections_element_" + i + "_nl", 0);
if (pos == 1 && neg == 0)
{
prop.put("nav-collections", 0); // this navigation is not useful
}
}
// topics navigator
final ScoreMap<String> topicNavigator = sb.index.connectedRWI() ? theSearch.getTopicNavigator(TOPWORDS_MAXCOUNT) : null;

@ -63,7 +63,20 @@
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#(/nav-authors)##{nav-vocabulary}#
},#(/nav-authors)##(nav-collections)#::
{
"facetname": "collections",
"displayname": "Collections",
"type": "String",
"min": "0",
"max": "0",
"mean": "0",
"elements": [
#{element}#
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#(/nav-collections)##{nav-vocabulary}#
{
"facetname": "#[navname]#",
"displayname": "#[navname]#",

@ -20,6 +20,13 @@
#{/element}#
</yacy:facet>
#(/nav-authors)#
#(nav-collections)#::
<yacy:facet name="collections" displayname="Collections" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" url="#[url]#"/>
#{/element}#
</yacy:facet>
#(/nav-collections)#
#(nav-filetypes)#::
<yacy:facet name="filetypes" displayname="Filetypes" type="String" min="0" max="0" mean="0">
#{element}#

@ -267,6 +267,8 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
NamedList<Integer> protocols = facetFields == null ? null : (NamedList<Integer>) facetFields.get(CollectionSchema.url_protocol_s.getSolrFieldName());
@SuppressWarnings("unchecked")
NamedList<Integer> authors = facetFields == null ? null : (NamedList<Integer>) facetFields.get(CollectionSchema.author_sxt.getSolrFieldName());
@SuppressWarnings("unchecked")
NamedList<Integer> collections = facetFields == null ? null : (NamedList<Integer>) facetFields.get(CollectionSchema.collection_sxt.getSolrFieldName());
if (domains != null) {
openTag(writer, "yacy:facet name=\"domains\" displayname=\"Domains\" type=\"String\" min=\"0\" max=\"0\" mean=\"0\"");
@ -288,6 +290,11 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
for (Map.Entry<String, Integer> entry: authors) facetEntry(writer, "author", entry.getKey(), Integer.toString(entry.getValue()));
closeTag(writer, "yacy:facet");
}
if (collections != null) {
openTag(writer, "yacy:facet name=\"collections\" displayname=\"Collections\" type=\"String\" min=\"0\" max=\"0\" mean=\"0\"");
for (Map.Entry<String, Integer> entry: collections) facetEntry(writer, "collection", entry.getKey(), Integer.toString(entry.getValue()));
closeTag(writer, "yacy:facet");
}
closeTag(writer, "yacy:navigation");
closeTag(writer, "channel");

@ -236,6 +236,8 @@ public class YJsonResponseWriter implements QueryResponseWriter {
NamedList<Integer> protocols = facetFields == null ? null : (NamedList<Integer>) facetFields.get(CollectionSchema.url_protocol_s.getSolrFieldName());
@SuppressWarnings("unchecked")
NamedList<Integer> authors = facetFields == null ? null : (NamedList<Integer>) facetFields.get(CollectionSchema.author_sxt.getSolrFieldName());
@SuppressWarnings("unchecked")
NamedList<Integer> collections = facetFields == null ? null : (NamedList<Integer>) facetFields.get(CollectionSchema.collection_sxt.getSolrFieldName());
if (domains != null) {
writer.write("{\"facetname\":\"domains\",\"displayname\":\"Provider\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray());
@ -279,6 +281,15 @@ public class YJsonResponseWriter implements QueryResponseWriter {
}
writer.write("]},\n".toCharArray());
}
if (collections != null) {
writer.write("{\"facetname\":\"collections\",\"displayname\":\"Collections\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray());
for (int i = 0; i < collections.size(); i++) {
facetEntry(writer, "collection", collections.getName(i), Integer.toString(collections.getVal(i)));
if (i < collections.size() - 1) writer.write(',');
writer.write("\n");
}
writer.write("]},\n".toCharArray());
}
writer.write("]}]}\n".toCharArray());
if (jsonp != null) {

@ -78,7 +78,7 @@ import org.apache.solr.util.FastWriter;
/*
* taken from the Solr 3.6.0 code, which is now deprecated;
* this is now done in Solr 4.x.x with org.apache.solr.servlet.SolrDispatchFilter
* implemented as servlet (we don't use multicore)
* implemented as servlet
*/
public class SolrSelectServlet extends HttpServlet {
private static final long serialVersionUID = 1L;

@ -55,26 +55,26 @@ public class QueryModifier {
// parse protocol
if ( querystring.indexOf("/https", 0) >= 0 ) {
querystring = querystring.replace("/https", "");
protocol = "https";
this.protocol = "https";
add("/https");
} else if ( querystring.indexOf("/http", 0) >= 0 ) {
querystring = querystring.replace("/http", "");
protocol = "http";
this.protocol = "http";
add("/http");
}
if ( querystring.indexOf("/ftp", 0) >= 0 ) {
querystring = querystring.replace("/ftp", "");
protocol = "ftp";
this.protocol = "ftp";
add("/ftp");
}
if ( querystring.indexOf("/smb", 0) >= 0 ) {
querystring = querystring.replace("/smb", "");
protocol = "smb";
this.protocol = "smb";
add("/smb");
}
if ( querystring.indexOf("/file", 0) >= 0 ) {
querystring = querystring.replace("/file", "");
protocol = "file";
this.protocol = "file";
add("/file");
}
@ -91,21 +91,21 @@ public class QueryModifier {
if ( ftb == -1 ) {
ftb = querystring.length();
}
sitehost = querystring.substring(sp + 5, ftb);
querystring = querystring.replace("site:" + sitehost, "");
while ( sitehost.length() > 0 && sitehost.charAt(0) == '.' ) {
sitehost = sitehost.substring(1);
this.sitehost = querystring.substring(sp + 5, ftb);
querystring = querystring.replace("site:" + this.sitehost, "");
while ( this.sitehost.length() > 0 && this.sitehost.charAt(0) == '.' ) {
this.sitehost = this.sitehost.substring(1);
}
while ( sitehost.endsWith(".") ) {
sitehost = sitehost.substring(0, sitehost.length() - 1);
this.sitehost = this.sitehost.substring(0, this.sitehost.length() - 1);
}
try {
sitehash = DigestURL.hosthash(sitehost, sitehost.startsWith("ftp.") ? 21 : 80);
this.sitehash = DigestURL.hosthash(this.sitehost, this.sitehost.startsWith("ftp.") ? 21 : 80);
} catch (MalformedURLException e) {
sitehash = "";
this.sitehash = "";
ConcurrentLog.logException(e);
}
add("site:" + sitehost);
add("site:" + this.sitehost);
}
// parse author
@ -116,31 +116,43 @@ public class QueryModifier {
if ( quotes ) {
int ftb = querystring.indexOf(')', authori + 8);
if (ftb == -1) ftb = querystring.length() + 1;
author = querystring.substring(authori + 8, ftb);
querystring = querystring.replace("author:(" + author + ")", "");
this.author = querystring.substring(authori + 8, ftb);
querystring = querystring.replace("author:(" + this.author + ")", "");
add("author:(" + author + ")");
} else {
int ftb = querystring.indexOf(' ', authori);
if ( ftb == -1 ) {
ftb = querystring.length();
}
author = querystring.substring(authori + 7, ftb);
querystring = querystring.replace("author:" + author, "");
this.author = querystring.substring(authori + 7, ftb);
querystring = querystring.replace("author:" + this.author, "");
add("author:" + author);
}
}
// parse collection
final int collectioni = querystring.indexOf("collection:", 0);
if ( collectioni >= 0 ) {
int ftb = querystring.indexOf(' ', collectioni);
if ( ftb == -1 ) {
ftb = querystring.length();
}
this.collection = querystring.substring(collectioni + 11, ftb);
querystring = querystring.replace("collection:" + this.collection, "");
add("collection:" + this.collection);
}
// parse language
final int langi = querystring.indexOf("/language/");
if (langi >= 0) {
if (querystring.length() >= (langi + 12)) {
language = querystring.substring(langi + 10, langi + 12);
querystring = querystring.replace("/language/" + language, "");
if (language.length() == 2 && ISO639.exists(language)) { // only 2-digit codes valid
language = language.toLowerCase();
add("/language/" + language);
this.language = querystring.substring(langi + 10, langi + 12);
querystring = querystring.replace("/language/" + this.language, "");
if (this.language.length() == 2 && ISO639.exists(this.language)) { // only 2-digit codes valid
this.language = this.language.toLowerCase();
add("/language/" + this.language);
} else {
language = null;
this.language = null;
}
}
}
@ -223,12 +235,17 @@ public class QueryModifier {
fq.append(" AND ").append(CollectionSchema.author_sxt.getSolrFieldName()).append(":\"").append(this.author).append('\"');
}
if (this.collection != null && this.collection.length() > 0 && fq.indexOf(CollectionSchema.collection_sxt.getSolrFieldName()) < 0) {
fq.append(" AND ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(this.collection).append('\"');
}
if (this.protocol != null && this.protocol.length() > 0 && fq.indexOf(CollectionSchema.url_protocol_s.getSolrFieldName()) < 0) {
fq.append(" AND ").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(":\"").append(this.protocol).append('\"');
}
return fq;
}
public void apply(serverObjects post) {
final StringBuilder fq = apply(post.get(CommonParams.FQ,""));

@ -88,6 +88,7 @@ public final class QueryParams {
defaultfacetfields.put("protocol", CollectionSchema.url_protocol_s);
defaultfacetfields.put("filetype", CollectionSchema.url_file_ext_s);
defaultfacetfields.put("authors", CollectionSchema.author_sxt);
defaultfacetfields.put("collections", CollectionSchema.collection_sxt);
defaultfacetfields.put("language", CollectionSchema.language_s);
//missing: namespace
}
@ -226,7 +227,7 @@ public final class QueryParams {
this.solrSchema = indexSegment.fulltext().getDefaultConfiguration();
for (String navkey: search_navigation) {
CollectionSchema f = defaultfacetfields.get(navkey);
if (f != null && solrSchema.contains(f)) facetfields.add(f.getSolrFieldName());
if (f != null && solrSchema.contains(f)) this.facetfields.add(f.getSolrFieldName());
}
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(CollectionSchema.VOCABULARY_PREFIX + v.getName() + CollectionSchema.VOCABULARY_SUFFIX);
this.maxfacets = defaultmaxfacets;
@ -463,6 +464,11 @@ public final class QueryParams {
if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrSchema.contains(CollectionSchema.author_sxt)) {
fq.append(" AND ").append(CollectionSchema.author_sxt.getSolrFieldName()).append(":\"").append(this.modifier.author).append('\"');
}
// add collection facets
if (this.modifier.collection != null && this.modifier.collection.length() > 0 && this.solrSchema.contains(CollectionSchema.collection_sxt)) {
fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.modifier.collection));
}
if (this.modifier.protocol != null) {
fq.append(" AND {!tag=").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append("}").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(':').append(this.modifier.protocol);
@ -504,10 +510,6 @@ public final class QueryParams {
//params.setRows(Integer.MAX_VALUE);
}
if (this.modifier.collection != null && this.modifier.collection.length() > 0) {
fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.modifier.collection));
}
return fq.length() > 0 ? fq.substring(5) : fq.toString();
}

@ -141,6 +141,7 @@ public final class SearchEvent {
public final ScoreMap<String> locationNavigator; // a counter for the appearance of location coordinates
public final ScoreMap<String> hostNavigator; // a counter for the appearance of host names
public final ScoreMap<String> authorNavigator; // a counter for the appearances of authors
public final ScoreMap<String> collectionNavigator; // a counter for the appearances of collections
public final ScoreMap<String> namespaceNavigator; // a counter for name spaces
public final ScoreMap<String> protocolNavigator; // a counter for protocol types
public final ScoreMap<String> filetypeNavigator; // a counter for file types
@ -229,6 +230,7 @@ public final class SearchEvent {
final String navcfg = Switchboard.getSwitchboard().getConfig("search.navigation", "")+",language";
this.locationNavigator = navcfg.contains("location") ? new ConcurrentScoreMap<String>() : null;
this.authorNavigator = navcfg.contains("authors") ? new ConcurrentScoreMap<String>() : null;
this.collectionNavigator = navcfg.contains("collections") ? new ConcurrentScoreMap<String>() : null;
this.namespaceNavigator = navcfg.contains("namespace") ? new ConcurrentScoreMap<String>() : null;
this.hostNavigator = navcfg.contains("hosts") ? new ConcurrentScoreMap<String>() : null;
this.protocolNavigator = navcfg.contains("protocol") ? new ConcurrentScoreMap<String>() : null;
@ -809,6 +811,11 @@ public final class SearchEvent {
if (fcts != null) this.authorNavigator.inc(fcts);
}
if (this.collectionNavigator != null) {
fcts = facets.get(CollectionSchema.collection_sxt.getSolrFieldName());
if (fcts != null) this.collectionNavigator.inc(fcts);
}
if (this.protocolNavigator != null) {
fcts = facets.get(CollectionSchema.url_protocol_s.getSolrFieldName());
if (fcts != null) {

Loading…
Cancel
Save