From 97e84439fb749a624c00ec01a6caa076c9c1c32f Mon Sep 17 00:00:00 2001 From: reger Date: Mon, 20 Jan 2014 00:58:17 +0100 Subject: [PATCH] adjusted ConfigHeuristic and changed QueryGoal.getOriginalQueryString to .getQueryString - since specific heuristic Twitter & Blekko is not longer available or redundant with OpenSearchHeuristic, adjusted ConfigHeuristic to use OpensearchHeuristic settings only. For this the default OSD search target list is made available (copied) by default and the other configs are removed. - the return of QueryGoal.getOriginalQueryString includes the queryModifier, which are held separately in a modifier object, but in most (all) cases just the query term is expected, clarified and renamed it to QueryGoal.getQueryString which returns just the search term (if needed a .getOrigianlQueryString could be implemented in Queryparameters, adding the modifiers) - started to adjust internal html href references from absolute to relative (currently it is mixed). For future development we should prefer relative href targets (less trouble with context aware servlets) --- defaults/yacy.init | 2 - htroot/AccessTracker_p.java | 2 +- htroot/ConfigHeuristics_p.html | 39 ++++--------------- htroot/ConfigHeuristics_p.java | 10 +---- htroot/ConfigNetwork_p.java | 3 +- htroot/api/timeline.java | 2 +- htroot/index.html | 22 ++++++----- htroot/index.java | 6 ++- htroot/yacysearch.java | 19 +++------ htroot/yacysearchitem.java | 2 +- htroot/yacysearchtrailer.java | 18 ++++----- .../yacy/http/servlets/GSAsearchServlet.java | 2 +- .../net/yacy/http/servlets/SolrServlet.java | 2 +- source/net/yacy/peers/Protocol.java | 2 +- source/net/yacy/search/Switchboard.java | 15 +++++-- .../net/yacy/search/SwitchboardConstants.java | 2 - .../net/yacy/search/query/AccessTracker.java | 4 +- source/net/yacy/search/query/QueryGoal.java | 22 ++++++++--- source/net/yacy/search/query/QueryParams.java | 4 +- source/net/yacy/search/query/SearchEvent.java | 2 +- 20 files changed, 81 insertions(+), 99 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 618119cf9..393fcaf48 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -1060,8 +1060,6 @@ donation.iframetarget=env/donate.html # search heuristics heuristic.site = false -heuristic.blekko = false -heuristic.twitter = false heuristic.searchresults = false heuristic.searchresults.crawlglobal = false heuristic.opensearch = false diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java index aaa42141a..a59840153 100644 --- a/htroot/AccessTracker_p.java +++ b/htroot/AccessTracker_p.java @@ -170,7 +170,7 @@ public class AccessTracker_p { if (page == 2) { // local search prop.putNum("page_list_" + m + "_offset", query.offset); - prop.putHTML("page_list_" + m + "_querystring", query.getQueryGoal().getOriginalQueryString(false)); + prop.putHTML("page_list_" + m + "_querystring", query.getQueryGoal().getQueryString(false)); } else { // remote search prop.putHTML("page_list_" + m + "_peername", (query.remotepeer == null) ? "" : query.remotepeer.getName()); diff --git a/htroot/ConfigHeuristics_p.html b/htroot/ConfigHeuristics_p.html index 83b70b7e1..5d127c80b 100644 --- a/htroot/ConfigHeuristics_p.html +++ b/htroot/ConfigHeuristics_p.html @@ -14,16 +14,16 @@

- The success of heuristics are marked with an image (heuristic:<name> (redundant)/heuristic:<name> (new link)) below the favicon left from the search result entry: + The success of heuristics are marked with an image (heuristic:<name> (redundant)/heuristic:<name> (new link)) below the favicon left from the search result entry:
- heuristic:<name> (redundant) + heuristic:<name> (redundant)
The search result was discovered by a heuristic, but the link was already known by YaCy
- heuristic:<name> (new link) + heuristic:<name> (new link)
The search result was discovered by a heuristic, not previously known by YaCy @@ -70,33 +70,7 @@

- -
-
- - - - -

- When using this heuristic, then every search request line is used for a call to twitter. - 50 results are taken from twitter and loaded simultanously, parsed and indexed immediately. -

-
-
- -
-
- - - - -

- When using this heuristic, then every search request line is used for a call to blekko. - 20 results are taken from blekko and loaded simultanously, parsed and indexed immediately. -

-
-
- +
@@ -137,10 +111,13 @@ + + + +
- #[osderrmsg]#

diff --git a/htroot/ConfigHeuristics_p.java b/htroot/ConfigHeuristics_p.java index 0c7fe7062..0f0cfa6ee 100644 --- a/htroot/ConfigHeuristics_p.java +++ b/htroot/ConfigHeuristics_p.java @@ -61,10 +61,6 @@ public class ConfigHeuristics_p { if (post.containsKey("searchresult_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS, false); if (post.containsKey("searchresultglobal_on")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, true); if (post.containsKey("searchresultglobal_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, false); - if (post.containsKey("blekko_on")) sb.setConfig(SwitchboardConstants.HEURISTIC_BLEKKO, true); - if (post.containsKey("blekko_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_BLEKKO, false); - if (post.containsKey("twitter_on")) sb.setConfig(SwitchboardConstants.HEURISTIC_TWITTER, true); - if (post.containsKey("twitter_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_TWITTER, false); if (post.containsKey("opensearch_on")) { sb.setConfig(SwitchboardConstants.HEURISTIC_OPENSEARCH, true); // re-read config (and create work table) @@ -137,11 +133,11 @@ public class ConfigHeuristics_p { } // copy default opensearch heuristic config with sample entries - if (post.containsKey("copydefaultosdconfig")) { + if (post.containsKey("copydefaultosdconfig") || post.containsKey("resettodefaultosdlist")) { // prepare a solr index profile switch list final File osdDefaultConfig = new File(sb.getDataPath(), "defaults/heuristicopensearch.conf"); final File osdConfig = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf"); - if (!osdConfig.exists() && osdDefaultConfig.exists()) { + if ((post.containsKey("resettodefaultosdlist") || !osdConfig.exists()) && osdDefaultConfig.exists()) { try { Files.copy(osdDefaultConfig, osdConfig); } catch (final IOException ex) { @@ -158,8 +154,6 @@ public class ConfigHeuristics_p { prop.put("site.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_SITE, false) ? 1 : 0); prop.put("searchresult.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_SEARCHRESULTS, false) ? 1 : 0); prop.put("searchresultglobal.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, false) ? 1 : 0); - prop.put("blekko.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_BLEKKO, false) ? 1 : 0); - prop.put("twitter.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_TWITTER, false) ? 1 : 0); prop.put("opensearch.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) ? 1 : 0); // display config file content diff --git a/htroot/ConfigNetwork_p.java b/htroot/ConfigNetwork_p.java index cfc66a097..42ef47a7d 100644 --- a/htroot/ConfigNetwork_p.java +++ b/htroot/ConfigNetwork_p.java @@ -93,8 +93,7 @@ public class ConfigNetwork_p if ( !indexReceive ) { // remove heuristics sb.setConfig(SwitchboardConstants.HEURISTIC_SITE, false); - sb.setConfig(SwitchboardConstants.HEURISTIC_BLEKKO, false); - sb.setConfig(SwitchboardConstants.HEURISTIC_TWITTER, false); + sb.setConfig(SwitchboardConstants.HEURISTIC_OPENSEARCH, false); } final boolean robinsonmode = "robinson".equals(post.get("network", "")); if ( robinsonmode ) { diff --git a/htroot/api/timeline.java b/htroot/api/timeline.java index ce068ecf6..f96cf08ea 100644 --- a/htroot/api/timeline.java +++ b/htroot/api/timeline.java @@ -67,7 +67,7 @@ public final class timeline { language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent); if (language == null) language = "en"; } - final QueryGoal qg = new QueryGoal(querystring, querystring); + final QueryGoal qg = new QueryGoal(querystring); HandleSet q = qg.getIncludeHashes(); // tell all threads to do nothing for a specific time diff --git a/htroot/index.html b/htroot/index.html index 72a5bfe07..b1f2446ab 100644 --- a/htroot/index.html +++ b/htroot/index.html @@ -6,14 +6,14 @@ #%env/templates/metas.template%# - - + + - - - + + + - +