From a814f3d8858437428e6caad714893e71bc7c26e0 Mon Sep 17 00:00:00 2001 From: reger Date: Fri, 2 Jun 2017 01:00:21 +0200 Subject: [PATCH] Introduce keyword query parameter This enables keyword navigator to filter on keywords. Added search page output and layout config for keywords, allowing e.g. in Intranet use to display the keywords. No styling or links applied to the keyword text (but is desirable possibly in combination with bootstrap-tagsinput for future/intranet). --- defaults/yacy.init | 1 + htroot/ConfigSearchPage_p.html | 5 ++++- htroot/ConfigSearchPage_p.java | 3 +++ htroot/index.html | 2 ++ htroot/yacysearchitem.html | 1 + htroot/yacysearchitem.java | 2 ++ .../search/navigator/StringNavigator.java | 3 +++ .../net/yacy/search/query/QueryModifier.java | 21 ++++++++++++++++--- source/net/yacy/search/query/QueryParams.java | 5 +++++ source/net/yacy/search/query/SearchEvent.java | 8 +++++++ 10 files changed, 47 insertions(+), 4 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 23e323805..f62f0971e 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -851,6 +851,7 @@ search.result.noreferrer=false # search result lines may show additional information for each search hit # these information pieces may be switched on or off +search.result.show.keywords = false search.result.show.date = true search.result.show.size = false search.result.show.metadata = false diff --git a/htroot/ConfigSearchPage_p.html b/htroot/ConfigSearchPage_p.html index 1e98cc0ed..fb8f3bce9 100644 --- a/htroot/ConfigSearchPage_p.html +++ b/htroot/ConfigSearchPage_p.html @@ -6,7 +6,7 @@ - + @@ -224,6 +224,9 @@ var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=

http://url-of-the-search-result.net

+

+ Tags: keyword subject keyword2 keyword3 +

diff --git a/htroot/ConfigSearchPage_p.java b/htroot/ConfigSearchPage_p.java index 9ae5b5249..b86b5879b 100644 --- a/htroot/ConfigSearchPage_p.java +++ b/htroot/ConfigSearchPage_p.java @@ -72,6 +72,7 @@ public class ConfigSearchPage_p { sb.setConfig("search.video", post.getBoolean("search.video")); sb.setConfig("search.app", post.getBoolean("search.app")); + sb.setConfig("search.result.show.keywords", post.getBoolean("search.result.show.keywords")); sb.setConfig("search.result.show.date", post.getBoolean("search.result.show.date")); sb.setConfig("search.result.show.size", post.getBoolean("search.result.show.size")); sb.setConfig("search.result.show.metadata", post.getBoolean("search.result.show.metadata")); @@ -156,6 +157,7 @@ public class ConfigSearchPage_p { sb.setConfig("search.audio", config.getProperty("search.audio","false")); sb.setConfig("search.video", config.getProperty("search.video","false")); sb.setConfig("search.app", config.getProperty("search.app","false")); + sb.setConfig("search.result.show.keywords", config.getProperty("search.result.show.keywords","false")); sb.setConfig("search.result.show.date", config.getProperty("search.result.show.date","true")); sb.setConfig("search.result.show.size", config.getProperty("search.result.show.size","false")); sb.setConfig("search.result.show.metadata", config.getProperty("search.result.show.metadata","false")); @@ -184,6 +186,7 @@ public class ConfigSearchPage_p { prop.put("search.video", sb.getConfigBool("search.video", false) ? 1 : 0); prop.put("search.app", sb.getConfigBool("search.app", false) ? 1 : 0); + prop.put("search.result.show.keywords", sb.getConfigBool("search.result.show.keywords", false) ? 1 : 0); prop.put("search.result.show.date", sb.getConfigBool("search.result.show.date", false) ? 1 : 0); prop.put("search.result.show.size", sb.getConfigBool("search.result.show.size", false) ? 1 : 0); prop.put("search.result.show.metadata", sb.getConfigBool("search.result.show.metadata", false) ? 1 : 0); diff --git a/htroot/index.html b/htroot/index.html index 7da5b4f54..ad1cf55f7 100644 --- a/htroot/index.html +++ b/htroot/index.html @@ -161,6 +161,8 @@
from:<date1> to:<date2>
only pages with a date between <date1> and <date2> in content
#(/datesincontent)# +
keyword:<phrase>
+
only pages with keyword anotation containing <phrase>
/http
only resources from http or https servers
/ftp
diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html index 5130c3734..d44347bfb 100644 --- a/htroot/yacysearchitem.html +++ b/htroot/yacysearchitem.html @@ -25,6 +25,7 @@

#[description]#

#[urlname]#

+ #(showKeywords)#::

Tags: #[subject]#

#(/showKeywords)#

#(showDate)#::#[date]##(/showDate)# #(showEvent)#::on #[date]##(/showEvent)# diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 2f03a82f3..7cbdcea58 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -205,6 +205,7 @@ public class yacysearchitem { prop.put("content_showEvent", showEvent ? 1 : 0); Collection snapshotPaths = sb.getConfigBool("search.result.show.snapshots", true) ? Transactions.findPaths(result.url(), null, State.ANY) : null; if (fileType == FileType.HTML) { // html template specific settings + prop.put("content_showKeywords", (sb.getConfigBool("search.result.show.keywords", false) && !result.dc_subject().isEmpty()) ? 1 : 0); prop.put("content_showDate", sb.getConfigBool("search.result.show.date", true) && !showEvent ? 1 : 0); prop.put("content_showSize", sb.getConfigBool("search.result.show.size", true) ? 1 : 0); prop.put("content_showMetadata", sb.getConfigBool("search.result.show.metadata", true) ? 1 : 0); @@ -219,6 +220,7 @@ public class yacysearchitem { prop.put("content_showRanking", sb.getConfigBool("search.result.show.ranking", false) ? 1 : 0); if (showEvent) prop.put("content_showEvent_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(events[0])); + prop.putHTML("content_showKeywords_subject", result.dc_subject()); prop.put("content_showDate_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(result.moddate())); prop.putHTML("content_showSize_sizename", RSSMessage.sizename(result.filesize())); prop.put("content_showMetadata_urlhash", urlhash); diff --git a/source/net/yacy/search/navigator/StringNavigator.java b/source/net/yacy/search/navigator/StringNavigator.java index c0d5c78eb..c42dc7104 100644 --- a/source/net/yacy/search/navigator/StringNavigator.java +++ b/source/net/yacy/search/navigator/StringNavigator.java @@ -68,6 +68,9 @@ public class StringNavigator extends ConcurrentScoreMap implements Navi mod = "author:" + key; } break; + case keywords: + mod = "keyword:" + key; + break; case url_protocol_s: mod = "/" + key; break; diff --git a/source/net/yacy/search/query/QueryModifier.java b/source/net/yacy/search/query/QueryModifier.java index c2194b565..4e3dffdd6 100644 --- a/source/net/yacy/search/query/QueryModifier.java +++ b/source/net/yacy/search/query/QueryModifier.java @@ -40,7 +40,7 @@ import net.yacy.server.serverObjects; public class QueryModifier { private final StringBuilder modifier; - public String sitehost, sitehash, filetype, protocol, language, author, collection, on, from, to; + public String sitehost, sitehash, filetype, protocol, language, author, keyword, collection, on, from, to; public int timezoneOffset; public QueryModifier(final int timezoneOffset) { @@ -51,6 +51,7 @@ public class QueryModifier { this.protocol = null; this.language = null; this.author = null; + this.keyword = null; this.collection = null; this.on = null; this.from = null; @@ -145,7 +146,17 @@ public class QueryModifier { add("author:" + author); } } - + + // parse keyword + final int keywordi = querystring.indexOf("keyword:", 0); + if (keywordi >= 0) { + // TODO: should we handle quoted keywords (to allow space) and comma separated list ? + int ftb = querystring.indexOf(' ', keywordi); + this.keyword = querystring.substring(keywordi + 8, ftb == -1 ? querystring.length() : ftb); + querystring = querystring.replace("keyword:" + this.keyword, "").replace(" ", " ").trim(); + add("keyword:" + this.keyword); + } + // parse collection int collectioni = querystring.indexOf("collection:", 0); while (collectioni >= 0) { // due to possible collision with "on:" modifier make sure no "collection:" remains @@ -281,7 +292,11 @@ public class QueryModifier { if (this.author != null && this.author.length() > 0 && fq.indexOf(CollectionSchema.author_sxt.getSolrFieldName()) < 0) { fq.append(" AND ").append(CollectionSchema.author_sxt.getSolrFieldName()).append(":\"").append(this.author).append('\"'); } - + + if (this.keyword != null && this.keyword.length() > 0 && fq.indexOf(CollectionSchema.keywords.getSolrFieldName()) < 0) { + fq.append(" AND ").append(CollectionSchema.keywords.getSolrFieldName()).append(":\"").append(this.keyword).append('\"'); + } + if (this.collection != null && this.collection.length() > 0 && fq.indexOf(CollectionSchema.collection_sxt.getSolrFieldName()) < 0) { fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.collection)); } diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index ed8455385..378d284b8 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -527,6 +527,11 @@ public final class QueryParams { fqs.add(CollectionSchema.author_sxt.getSolrFieldName() + ":\"" + this.modifier.author + '\"'); } + // add keyword filter + if (this.modifier.keyword != null && this.modifier.keyword.length() > 0 && this.solrSchema.contains(CollectionSchema.keywords)) { + fqs.add(CollectionSchema.keywords.getSolrFieldName() + ":\"" + this.modifier.keyword + '\"'); + } + // add collection facets if (this.modifier.collection != null && this.modifier.collection.length() > 0 && this.solrSchema.contains(CollectionSchema.collection_sxt)) { fqs.add(QueryModifier.parseCollectionExpression(this.modifier.collection)); diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index e3aa44de6..24129ef3a 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -964,6 +964,14 @@ public final class SearchEvent { continue pollloop; } } + + if (this.query.modifier.keyword != null) { + if (iEntry.dc_subject().indexOf(this.query.modifier.keyword) < 0) { + if (log.isFine()) log.fine ("dropped Node: keyword"); + continue pollloop; + } + } + // finally extend the double-check and insert result to stack this.urlhashes.putUnique(iEntry.hash()); rankingtryloop: while (true) {