Introduce a Keyword search navigator using the index field keywords.

The keywords field string is split into words as navigator entries. A keyword navigator facet is essential for search appliance usage were documents and metadata use often specialized keyword vocabularies to filter search results. This navi can be used without custom index schema. As we don't have defined a search query command to filter "keywords" yet, the filtering is limited by adding the keyword to the search query.
8 years ago · b7417ac329
parent eddb7a9804
commit b7417ac329
2 changed files with 82 additions and 0 deletions
--- a/source/net/yacy/search/navigator/NavigatorPlugins.java
+++ b/source/net/yacy/search/navigator/NavigatorPlugins.java
@ -48,6 +48,7 @@ public class NavigatorPlugins {
        defaultnavplugins.put("namespace", "Wiki Name Space");
        defaultnavplugins.put("year", "Year");
        // defaultnavplugins.put("year:dates_in_content_dts:Event","Event");
+        defaultnavplugins.put("keywords", "Keywords");
        return defaultnavplugins;
    }

@ -118,6 +119,10 @@ public class NavigatorPlugins {
                    navigatorPlugins.put("year", new YearNavigator("Year", CollectionSchema.last_modified));
                }
            }
+            
+            if (navname.contains("keywords")) {
+                navigatorPlugins.put("keywords", new TokenizedStringNavigator("Keywords", CollectionSchema.keywords));
+            }
        }
        return navigatorPlugins;
    }
--- a/source/net/yacy/search/navigator/TokenizedStringNavigator.java
+++ b/source/net/yacy/search/navigator/TokenizedStringNavigator.java
@ -0,0 +1,77 @@
+/**
+ * TokenizedStringNavigator.java
+ * (C) 2017 by reger24; https://github.com/reger24
+ *
+ * This is a part of YaCy, a peer-to-peer based web search engine
+ *
+ * LICENSE
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+package net.yacy.search.navigator;
+
+import java.util.Collection;
+import java.util.StringTokenizer;
+import net.yacy.kelondro.data.meta.URIMetadataNode;
+import net.yacy.search.Switchboard;
+import net.yacy.search.schema.CollectionSchema;
+
+/**
+ * Search navigator for string entries based on ScoreMap to count and
+ * order the result list by counted occurence. The string values are tokenized
+ * and each word is added (lowercased) to the score map.
+ */
+public class TokenizedStringNavigator  extends StringNavigator implements Navigator {
+
+    public TokenizedStringNavigator(String title, CollectionSchema field) {
+        super(title, field);
+    }
+
+    /**
+     * Increase the score for the key value contained in the defined field in
+     * the doc. The value string is tokenized using delimiter " ,;"
+     * @param doc Solrdocument with field for the key content
+     */
+    @Override
+    public void incDoc(URIMetadataNode doc) {
+        if (field != null) {
+            Object val = doc.getFieldValue(field.getSolrFieldName());
+            if (val != null) {
+                if (val instanceof Collection) {
+                    Collection<String> ll = (Collection) val;
+                    for (String s : ll) {
+                        if (!s.isEmpty()) {
+                            StringTokenizer token = new StringTokenizer(s.toLowerCase()," ,;"); // StringTokenizer faster than regex pattern
+                            while (token.hasMoreTokens()) {
+                                String word = token.nextToken();
+                                if (word.length() > 1 && !Switchboard.stopwords.contains(word)) {
+                                    this.inc(word);
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    StringTokenizer token = new StringTokenizer((String) val, " ,;");
+                    while (token.hasMoreTokens()) {
+                        String word = token.nextToken().toLowerCase();
+                        if (word.length() > 1 && !Switchboard.stopwords.contains(word)) {
+                            this.inc(word);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}