From ba03ca8620b36c07720783090ec594560cbb0ae9 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 21 Mar 2011 07:50:34 +0000 Subject: [PATCH] added more configuration options for search: - removed configuration button for 'search only for admin' from index.html and added this to ConfigPortal - added configuration of link verification options (iffresh, cacheonly, nocache, ifexist) to ConfigPortal - added configuration of navigation options to ConfigPortal - added an option to switch off automatic index cleaning in case that a link verification method fails git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7613 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- defaults/yacy.init | 24 ++++++ htroot/ConfigPortal.html | 26 ++++++- htroot/ConfigPortal.java | 29 ++++++++ htroot/index.html | 30 +++----- htroot/index.java | 6 +- htroot/yacysearch.html | 4 +- htroot/yacysearch.java | 8 +- source/de/anomic/search/ResultEntry.java | 2 +- source/de/anomic/search/ResultFetcher.java | 13 ++-- source/de/anomic/search/SearchEvent.java | 7 +- source/de/anomic/search/SearchEventCache.java | 3 +- source/de/anomic/search/Switchboard.java | 4 +- source/de/anomic/search/TextSnippet.java | 73 +++++++++++-------- 13 files changed, 155 insertions(+), 74 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index e6abc2195..48e3b626e 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -723,6 +723,30 @@ search.result.show.metadata = true search.result.show.parser = true search.result.show.pictures = true +# search navigators: comma-separated list of default values for search navigation. +# can be temporary different if search string is given with differen navigation values +# assigning no value(s) means that no navigation is shown +search.navigation=hosts,authors,namespace,topics + +# search result verification and snippet fetch caching rules +# each search result can be verified byloading the link from the web +# this can be enhanced using a cache. In some cases it may be appropriate +# to not verify the link at all and do not compute a snippet +# the possible cases are: +# nocache: no use of web cache, load all snippets online +# iffresh: use the cache if the cache exists and is fresh otherwise load online +# ifexist: use the cache if the cache exist or load online +# cacheonly: never go online, use all content from cache. If no cache entry exist, +# consider content nevertheless as available and show result without snippet +# false: no link verification and not snippet generation: + all search results are valid without verification +search.verify = iffresh + +# in case that a link verification fails then the corresponding index reference can be +# deleted to clean up the index. If this property is set then failed index verification in +# the cases of nocache, iffresh and ifexist causes an index deletion +search.verify.delete = true + # remote search details remotesearch.maxcount = 20 remotesearch.maxtime = 1000 diff --git a/htroot/ConfigPortal.html b/htroot/ConfigPortal.html index 872fa27bd..3d728faca 100644 --- a/htroot/ConfigPortal.html +++ b/htroot/ConfigPortal.html @@ -15,7 +15,7 @@ and a link to a home page that is reached when the 'corporate identity'-images are clicked. To change also colours and styles use the Appearance Servlet for different skins and languages.

-
+
Greeting Line
@@ -30,6 +30,12 @@
URL of a Large Corporate Image
+
Enable Search for Everyone?
+
+ Search is available for everyone  + Only the administator is allowed to search +
+
Show Navigation Bar on Search Page?
Show Navigation Top-Menu  @@ -42,6 +48,16 @@ do not show Advanced Search
+
Snippet Fetch Strategy & Link Verification
+
+ NOCACHE: no use of web cache, load all snippets online
+ IFFRESH: use the cache if the cache exists and is fresh otherwise load online
+ IFEXIST: use the cache if the cache exist or load online
+ If verification fails, delete index reference

+ CACHEONLY: never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available and show result without snippet
+ FALSE: no link verification and not snippet generation: all search results are valid without verification +
+
Show Information Links for each Search Result Entry
Date  @@ -51,6 +67,14 @@ Pictures
+
Show Navigation on Side-Bar
+
+ Host Navigation  + Author Navigation  + Wiki Name-Space Navigation  + Topics (Tag-Cloud) Navigation  +
+
Default Pop-Up Page
Status Page  diff --git a/htroot/ConfigPortal.java b/htroot/ConfigPortal.java index 2c3c44700..022b47548 100644 --- a/htroot/ConfigPortal.java +++ b/htroot/ConfigPortal.java @@ -71,12 +71,23 @@ public class ConfigPortal { sb.setConfig(SwitchboardConstants.INDEX_FORWARD, post.get(SwitchboardConstants.INDEX_FORWARD, "")); HTTPDFileHandler.indexForward = post.get(SwitchboardConstants.INDEX_FORWARD, ""); sb.setConfig("publicTopmenu", post.getBoolean("publicTopmenu", true)); + sb.setConfig("publicSearchpage", post.getBoolean("publicSearchpage", true)); sb.setConfig("search.options", post.getBoolean("search.options", false)); sb.setConfig("search.result.show.date", post.getBoolean("search.result.show.date", false)); sb.setConfig("search.result.show.size", post.getBoolean("search.result.show.size", false)); sb.setConfig("search.result.show.metadata", post.getBoolean("search.result.show.metadata", false)); sb.setConfig("search.result.show.parser", post.getBoolean("search.result.show.parser", false)); sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures", false)); + sb.setConfig("search.verify", post.get("search.verify", "ifexist")); + sb.setConfig("search.verify.delete", post.getBoolean("search.verify.delete", false)); + // construct navigation String + String nav = ""; + if (post.getBoolean("search.navigation.hosts", false)) nav += "hosts,"; + if (post.getBoolean("search.navigation.authors", false)) nav += "authors,"; + if (post.getBoolean("search.navigation.namespace", false)) nav += "namespace,"; + if (post.getBoolean("search.navigation.topics", false)) nav += "topics,"; + if (nav.endsWith(",")) nav = nav.substring(0, nav.length() - 1); + sb.setConfig("search.navigation", nav); } if (post.containsKey("searchpage_default")) { sb.setConfig(SwitchboardConstants.GREETING, "P2P Web Search"); @@ -88,12 +99,16 @@ public class ConfigPortal { HTTPDFileHandler.indexForward = ""; sb.setConfig(SwitchboardConstants.SEARCH_TARGET, "_self"); sb.setConfig("publicTopmenu", true); + sb.setConfig("publicSearchpage", true); + sb.setConfig("search.navigation", "hosts,authors,namespace,topics"); sb.setConfig("search.options", true); sb.setConfig("search.result.show.date", true); sb.setConfig("search.result.show.size", true); sb.setConfig("search.result.show.metadata", true); sb.setConfig("search.result.show.parser", true); sb.setConfig("search.result.show.pictures", true); + sb.setConfig("search.verify", "iffresh"); + sb.setConfig("search.verify.delete", "true"); } } @@ -103,13 +118,27 @@ public class ConfigPortal { prop.putHTML(SwitchboardConstants.GREETING_SMALL_IMAGE, sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, "")); prop.putHTML(SwitchboardConstants.INDEX_FORWARD, sb.getConfig(SwitchboardConstants.INDEX_FORWARD, "")); prop.put("publicTopmenu", sb.getConfigBool("publicTopmenu", false) ? 1 : 0); + prop.put("publicSearchpage", sb.getConfigBool("publicSearchpage", false) ? 1 : 0); prop.put("search.options", sb.getConfigBool("search.options", false) ? 1 : 0); + prop.put("search.result.show.date", sb.getConfigBool("search.result.show.date", false) ? 1 : 0); prop.put("search.result.show.size", sb.getConfigBool("search.result.show.size", false) ? 1 : 0); prop.put("search.result.show.metadata", sb.getConfigBool("search.result.show.metadata", false) ? 1 : 0); prop.put("search.result.show.parser", sb.getConfigBool("search.result.show.parser", false) ? 1 : 0); prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0); + prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts") >= 0 ? 1 : 0); + prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors") >= 0 ? 1 : 0); + prop.put("search.navigation.namespace", sb.getConfig("search.navigation", "").indexOf("namespace") >= 0 ? 1 : 0); + prop.put("search.navigation.topics", sb.getConfig("search.navigation", "").indexOf("topics") >= 0 ? 1 : 0); + + prop.put("search.verify.nocache", sb.getConfig("search.verify", "").equals("nocache") ? 1 : 0); + prop.put("search.verify.iffresh", sb.getConfig("search.verify", "").equals("iffresh") ? 1 : 0); + prop.put("search.verify.ifexist", sb.getConfig("search.verify", "").equals("ifexist") ? 1 : 0); + prop.put("search.verify.cacheonly", sb.getConfig("search.verify", "").equals("cacheonly") ? 1 : 0); + prop.put("search.verify.false", sb.getConfig("search.verify", "").equals("false") ? 1 : 0); + prop.put("search.verify.delete", sb.getConfigBool("search.verify.delete", true) ? 1 : 0); + final String browserPopUpPage = sb.getConfig(SwitchboardConstants.BROWSER_POP_UP_PAGE, "ConfigBasic.html"); prop.put("popupFront", 0); prop.put("popupSearch", 0); diff --git a/htroot/index.html b/htroot/index.html index 76109cc31..cbe1a5f44 100644 --- a/htroot/index.html +++ b/htroot/index.html @@ -49,7 +49,7 @@
- + #(searchdomswitches)#::
#(searchtext)#::  #(/searchtext)# @@ -59,7 +59,7 @@ #(searchapp)#::#(/searchapp)#
#(/searchdomswitches)# - + @@ -70,7 +70,6 @@ #(searchoptions)#::

more options...

-

advanced parameters

::
@@ -132,26 +131,17 @@ + + + + advanced parameters + + + + #(/searchoptions)#
- #(searchoptions)#:: - - #(/searchoptions)# #(topmenu)# #%env/templates/embeddedfooter.template%# :: diff --git a/htroot/index.java b/htroot/index.java index d94b41239..4ab2f9707 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -51,15 +51,12 @@ public class index { } // access control - boolean publicPage = sb.getConfigBool("publicSearchpage", true); final boolean authorizedAccess = sb.verifyAuthentication(header, false); if ((post != null) && (post.containsKey("publicPage"))) { if (!authorizedAccess) { prop.put("AUTHENTICATE", "admin log-in"); // force log-in return prop; } - publicPage = post.get("publicPage", "0").equals("1"); - sb.setConfig("publicSearchpage", publicPage); } final boolean global = (post == null) ? true : post.get("resource", "global").equals("global"); @@ -114,7 +111,6 @@ public class index { prop.put("searchoptions_prefermaskoptions", "0"); prop.putHTML("searchoptions_prefermaskoptions_prefermaskfilter", prefermaskfilter); prop.put("searchoptions_indexofChecked", ""); - prop.put("searchoptions_publicSearchpage", (publicPage) ? "0" : "1"); prop.put("results", ""); prop.putHTML("cat", cat); prop.put("type", type); @@ -132,6 +128,8 @@ public class index { prop.put("searchdomswitches_searchvideo_check", (contentdom == ContentDomain.VIDEO) ? "1" : "0"); prop.put("searchdomswitches_searchimage_check", (contentdom == ContentDomain.IMAGE) ? "1" : "0"); prop.put("searchdomswitches_searchapp_check", (contentdom == ContentDomain.APP) ? "1" : "0"); + prop.put("search.navigation", sb.getConfig("search.navigation", "all") ); + prop.put("search.verify", sb.getConfig("search.verify", "iffresh") ); // online caution timing sb.localSearchLastAccess = System.currentTimeMillis(); diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index 88aa6d3c9..c9e17e618 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -121,9 +121,9 @@ $(function() { - + - + diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 845ccdd7d..00bb2d382 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -131,7 +131,8 @@ public class yacysearch { prop.put("constraint", ""); prop.put("cat", "href"); prop.put("depth", "0"); - prop.put("verify", (post == null) ? "true" : post.get("verify", "true")); + prop.put("search.verify", (post == null) ? sb.getConfig("search.verify", "iffresh") : post.get("verify", "iffresh")); + prop.put("search.navigation", (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all")); prop.put("contentdom", "text"); prop.put("contentdomCheckText", "1"); prop.put("contentdomCheckAudio", "0"); @@ -403,7 +404,7 @@ public class yacysearch { } // navigation - final String navigation = (post == null) ? "" : post.get("nav", ""); + final String navigation = (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", ""); // the query final TreeSet[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute @@ -707,7 +708,8 @@ public class yacysearch { prop.putHTML("prefermaskfilter", prefermask); prop.put("indexof", (indexof) ? "on" : "off"); prop.put("constraint", (constraint == null) ? "" : constraint.exportB64()); - prop.put("verify", snippetFetchStrategy == null ? "false" : snippetFetchStrategy.toName()); + prop.put("search.verify", snippetFetchStrategy == null ? sb.getConfig("search.verify", "iffresh") : snippetFetchStrategy.toName()); + prop.put("search.navigation", (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all")); prop.put("contentdom", (post == null ? "text" : post.get("contentdom", "text"))); prop.put("searchdomswitches", sb.getConfigBool("search.text", true) || sb.getConfigBool("search.audio", true) || sb.getConfigBool("search.video", true) || sb.getConfigBool("search.image", true) || sb.getConfigBool("search.app", true) ? 1 : 0); prop.put("searchdomswitches_searchtext", sb.getConfigBool("search.text", true) ? 1 : 0); diff --git a/source/de/anomic/search/ResultEntry.java b/source/de/anomic/search/ResultEntry.java index 2a4584947..a0c764506 100644 --- a/source/de/anomic/search/ResultEntry.java +++ b/source/de/anomic/search/ResultEntry.java @@ -174,7 +174,7 @@ public class ResultEntry implements Comparable, Comparator, Comparator\\A[^\\p{L}\\p{N}].+ @@ -118,12 +107,32 @@ public class TextSnippet implements Comparable, Comparator, Comparator, Comparator, Comparator, Comparator, Comparator sentences = document.getSentences(pre); if (sentences == null) { - init(url.hash(), null, ERROR_PARSER_NO_LINES, "parser returned no sentences"); + init(url.hash(), null, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences"); return; } final SnippetExtractor tsr; @@ -235,7 +244,7 @@ public class TextSnippet implements Comparable, Comparator, Comparator" + textline; if (snippetLine == null || !remainingHashes.isEmpty()) { - init(url.hash(), null, ERROR_NO_MATCH, "no matching snippet found"); + init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found"); return; } if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength); @@ -266,10 +275,10 @@ public class TextSnippet implements Comparable, Comparator, Comparator