From f307d65dcfd3cefd9bdde84b2b42fa5ce8cb8282 Mon Sep 17 00:00:00 2001 From: reger Date: Fri, 24 Jan 2014 03:11:25 +0100 Subject: [PATCH] prepare for a language navigator works fine to restrict language for local solrSearches. More work needs to be done to make rwi/remote searches respect the modifier.language restriction. --- htroot/ConfigSearchPage_p.html | 43 ++++++++++-------- htroot/ConfigSearchPage_p.java | 2 + htroot/yacysearch.html | 35 ++++++++------- htroot/yacysearch.java | 18 ++------ htroot/yacysearchtrailer.html | 17 +++++++ htroot/yacysearchtrailer.java | 45 +++++++++++++++++++ .../net/yacy/search/query/QueryModifier.java | 19 +++++++- source/net/yacy/search/query/QueryParams.java | 7 +++ source/net/yacy/search/query/SearchEvent.java | 26 ++++++++++- 9 files changed, 161 insertions(+), 51 deletions(-) diff --git a/htroot/ConfigSearchPage_p.html b/htroot/ConfigSearchPage_p.html index a3f9a17fb..3fa4daaaa 100644 --- a/htroot/ConfigSearchPage_p.html +++ b/htroot/ConfigSearchPage_p.html @@ -3,17 +3,17 @@ YaCy '#[clientname]#': Search Page #%env/templates/metas.template%# - - - + + + - - - - - - - + + + + + + + @@ -32,8 +32,8 @@ $(function() { $("#sidebarDomains").accordion({}); $("#sidebarProtocols").accordion({}); $("#sidebarFiletypes").accordion({}); + $("#sidebarLanguages").accordion({}); $("#sidebarAuthors").accordion({}); - $("#sidebarLanguages").accordion({}); $("#sidebarNameSpace").accordion({}); $("#sidebarTopics").tagcloud({type:"sphere",power:.25,seed:0,sizemin:10,sizemax:20,height:80,colormin:"682",colormax:"20C"}).find("li").tsort(); $("#sidebarAbout").accordion({}); @@ -58,11 +58,11 @@ $(function() { @@ -143,6 +143,12 @@ $(function() {

Wiki Name Space Navigator

+ + +
+

Language Navigator

+
+
@@ -185,9 +191,9 @@ $(function() { #[content_showDate_date]#  | 42 kbyte -  | Metadata +  | Metadata  | Parser -  | Citation +  | Citation  | Pictures  | Cache  | Augmented Browsing @@ -236,6 +242,7 @@ $(function() { $("#sidebarProtocols").accordion('activate', false); }); $(function() { $("#sidebarFiletypes").accordion('activate', false); }); $(function() { $("#sidebarDomains").accordion('activate', false); }); $(function() { $("#sidebarNameSpace").accordion('activate', false); }); +$(function() { $("#sidebarLanguages").accordion('activate', false); }); $(function() { $("#sidebarAuthors").accordion('activate', false); }); $(function() { $("#vocabulary").accordion('activate', false); }); $(function() { $("#sidebarAbout").accordion('activate', false); }); diff --git a/htroot/ConfigSearchPage_p.java b/htroot/ConfigSearchPage_p.java index 108845e35..d07f9f48d 100644 --- a/htroot/ConfigSearchPage_p.java +++ b/htroot/ConfigSearchPage_p.java @@ -84,6 +84,7 @@ public class ConfigSearchPage_p { if (post.getBoolean("search.navigation.filetype")) nav += "filetype,"; if (post.getBoolean("search.navigation.protocol")) nav += "protocol,"; if (post.getBoolean("search.navigation.hosts")) nav += "hosts,"; + if (post.getBoolean("search.navigation.language")) nav += "language,"; if (post.getBoolean("search.navigation.authors")) nav += "authors,"; if (post.getBoolean("search.navigation.namespace")) nav += "namespace,"; if (post.getBoolean("search.navigation.topics")) nav += "topics,"; @@ -162,6 +163,7 @@ public class ConfigSearchPage_p { prop.put("search.navigation.filetype", sb.getConfig("search.navigation", "").indexOf("filetype",0) >= 0 ? 1 : 0); prop.put("search.navigation.protocol", sb.getConfig("search.navigation", "").indexOf("protocol",0) >= 0 ? 1 : 0); prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0); + prop.put("search.navigation.language", sb.getConfig("search.navigation", "").indexOf("language",0) >= 0 ? 1 : 0); prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0); prop.put("search.navigation.namespace", sb.getConfig("search.navigation", "").indexOf("namespace",0) >= 0 ? 1 : 0); prop.put("search.navigation.topics", sb.getConfig("search.navigation", "").indexOf("topics",0) >= 0 ? 1 : 0); diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index 327d38c58..b389f76fa 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -6,19 +6,19 @@ #%env/templates/interactionheader.template%# - - - - - + + + + + - - - - - - - + + + + + + + - + +::#(/activate)# +#(/nav-languages)# + #(nav-namespace)#::

Name Space Navigator

diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 5d6c30123..d26d94000 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -35,6 +35,7 @@ import net.yacy.cora.lod.vocabulary.Tagging; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.sorting.ScoreMap; import net.yacy.document.LibraryProvider; +import net.yacy.kelondro.util.ISO639; import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; @@ -155,6 +156,50 @@ public class yacysearchtrailer { } } + // host navigators + final ScoreMap languageNavigator = theSearch.languageNavigator; + if (languageNavigator == null || languageNavigator.isEmpty()) { + prop.put("nav-languages", 0); + } else { + prop.put("nav-languages", 1); + navigatorIterator = languageNavigator.keys(false); + int i = 0, pos = 0, neg = 0; + String nav; + while (i < 20 && navigatorIterator.hasNext()) { + name = navigatorIterator.next(); + count = languageNavigator.get(name); + if (count == 0) { + break; + } + nav = "%2Flanguage%2F" + name; + String queryStringForUrl = theSearch.query.getQueryGoal().getQueryString(true); + if (theSearch.query.modifier.language == null || !theSearch.query.modifier.language.contains(name)) { + pos++; + queryStringForUrl += "+" + nav; + prop.put("nav-languages_element_" + i + "_on", 1); + prop.put(fileType, "nav-languages_element_" + i + "_modifier", nav); + } else { + neg++; + prop.put("nav-languages_element_" + i + "_on", 0); + prop.put(fileType, "nav-languages_element_" + i + "_modifier", "-" + nav); + } + String longname = ISO639.country(name); + prop.put(fileType, "nav-languages_element_" + i + "_name", longname == null ? name : longname); + prop.put(fileType, "nav-languages_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl).toString()); + prop.put("nav-languages_element_" + i + "_count", count); + prop.put("nav-languages_element_" + i + "_nl", 1); + i++; + } + prop.put("nav-languages_element", i); + prop.put("nav-languages_activate", on(pos, neg, MAXLIMIT_NAV_HIGH) ? 1 : 0); + i--; + prop.put("nav-languages_element_" + i + "_nl", 0); + if (pos == 1 && neg == 0) + { + prop.put("nav-languages", 0); // this navigation is not useful + } + } + // author navigators if (theSearch.authorNavigator == null || theSearch.authorNavigator.isEmpty()) { prop.put("nav-authors", 0); diff --git a/source/net/yacy/search/query/QueryModifier.java b/source/net/yacy/search/query/QueryModifier.java index 6255ff7af..85f3fe799 100644 --- a/source/net/yacy/search/query/QueryModifier.java +++ b/source/net/yacy/search/query/QueryModifier.java @@ -27,6 +27,7 @@ import org.apache.solr.common.params.MultiMapSolrParams; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.CommonPattern; +import net.yacy.kelondro.util.ISO639; import net.yacy.search.schema.CollectionSchema; import net.yacy.server.serverObjects; @@ -34,13 +35,14 @@ import net.yacy.server.serverObjects; public class QueryModifier { private final StringBuilder modifier; - public String sitehost, sitehash, filetype, protocol, author, collection; + public String sitehost, sitehash, filetype, protocol, language, author, collection; public QueryModifier() { this.sitehash = null; this.sitehost = null; this.filetype = null; this.protocol = null; + this.language = null; this.author = null; this.collection = null; this.modifier = new StringBuilder(20); @@ -120,6 +122,21 @@ public class QueryModifier { add("author:" + author); } } + + // parse language + final int langi = querystring.indexOf("/language/"); + if (langi >= 0) { + if (querystring.length() >= (langi + 12)) { + language = querystring.substring(langi + 10, langi + 12); + querystring = querystring.replace("/language/" + language, ""); + if (language.length() == 2 && ISO639.exists(language)) { // only 2-digit codes valid + language = language.toLowerCase(); + add("/language/" + language); + } else { + language = null; + } + } + } // check the number of quotes in the string; if there is only one double-quote, add another one. this will prevent error messages in int p = querystring.indexOf('"'); diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 0787e6b44..1d1830c85 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -87,6 +87,7 @@ public final class QueryParams { defaultfacetfields.put("protocol", CollectionSchema.url_protocol_s); defaultfacetfields.put("filetype", CollectionSchema.url_file_ext_s); defaultfacetfields.put("authors", CollectionSchema.author_sxt); + defaultfacetfields.put("language", CollectionSchema.language_s); //missing: namespace } @@ -446,6 +447,12 @@ public final class QueryParams { fq.append(" AND ").append(CollectionSchema.VOCABULARY_PREFIX).append(tag.getVocabularyName()).append(CollectionSchema.VOCABULARY_SUFFIX).append(":\"").append(tag.getObject()).append('\"'); } } + + // add language facet + if (this.modifier.language != null && this.modifier.language.length() > 0 && this.solrSchema.contains((CollectionSchema.language_s))) { + fq.append(" AND ").append(CollectionSchema.language_s.getSolrFieldName()).append(":\"").append(this.modifier.language).append('\"'); + } + // add author facets if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrSchema.contains(CollectionSchema.author_sxt)) { fq.append(" AND ").append(CollectionSchema.author_sxt.getSolrFieldName()).append(":\"").append(this.modifier.author).append('\"'); diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index aa83def5d..846d3d055 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -78,6 +78,7 @@ import net.yacy.kelondro.index.RowHandleSet; import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.TermSearch; import net.yacy.kelondro.util.Bitfield; +import net.yacy.kelondro.util.ISO639; import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.SetTools; import net.yacy.peers.RemoteSearch; @@ -142,6 +143,7 @@ public final class SearchEvent { public final ScoreMap namespaceNavigator; // a counter for name spaces public final ScoreMap protocolNavigator; // a counter for protocol types public final ScoreMap filetypeNavigator; // a counter for file types + public final ScoreMap languageNavigator; // a counter for appearance of languages public final Map> vocabularyNavigator; // counters for Vocabularies; key is metatag.getVocabularyName() private final int topicNavigatorCount; // if 0 no topicNavigator, holds expected number of terms for the topicNavigator private final LoaderDispatcher loader; @@ -223,7 +225,7 @@ public final class SearchEvent { this.expectedRemoteReferences = new AtomicInteger(0); this.excludeintext_image = Switchboard.getSwitchboard().getConfigBool("search.excludeintext.image", true); // prepare configured search navigation - final String navcfg = Switchboard.getSwitchboard().getConfig("search.navigation", ""); + final String navcfg = Switchboard.getSwitchboard().getConfig("search.navigation", "")+",language"; this.locationNavigator = navcfg.contains("location") ? new ConcurrentScoreMap() : null; this.authorNavigator = navcfg.contains("authors") ? new ConcurrentScoreMap() : null; this.namespaceNavigator = navcfg.contains("namespace") ? new ConcurrentScoreMap() : null; @@ -231,6 +233,7 @@ public final class SearchEvent { this.protocolNavigator = navcfg.contains("protocol") ? new ConcurrentScoreMap() : null; this.filetypeNavigator = navcfg.contains("filetype") ? new ConcurrentScoreMap() : null; this.topicNavigatorCount = navcfg.contains("topics") ? MAX_TOPWORDS : 0; + this.languageNavigator = navcfg.contains("language") ? new ConcurrentScoreMap() : null; this.vocabularyNavigator = new ConcurrentHashMap>(); this.snippets = new ConcurrentHashMap(); this.secondarySearchSuperviser = (this.query.getQueryGoal().getIncludeHashes().size() > 1) ? new SecondarySearchSuperviser(this) : null; // generate abstracts only for combined searches @@ -782,6 +785,21 @@ public final class SearchEvent { } } + if (this.languageNavigator != null) { + fcts = facets.get(CollectionSchema.language_s.getSolrFieldName()); + if (fcts != null) { + // remove unknown languages + Iterator i = fcts.iterator(); + while (i.hasNext()) { + String lang = i.next(); + if (!ISO639.exists(lang)) { + i.remove(); + } + } + this.languageNavigator.inc(fcts); + } + } + if (this.authorNavigator != null) { fcts = facets.get(CollectionSchema.author_sxt.getSolrFieldName()); if (fcts != null) this.authorNavigator.inc(fcts); @@ -875,6 +893,12 @@ public final class SearchEvent { } } + if (this.query.modifier.language != null) { + if (!this.query.modifier.language.equals(UTF8.String(iEntry.language()))) { + if (log.isFine()) log.fine("dropped Node: language"); + continue pollloop; + } + } // finally extend the double-check and insert result to stack this.urlhashes.putUnique(iEntry.hash()); rankingtryloop: while (true) {