diff --git a/defaults/web.xml b/defaults/web.xml
index a13e090aa..91d8323da 100644
--- a/defaults/web.xml
+++ b/defaults/web.xml
@@ -55,7 +55,23 @@
YaCy stop proxy
+
+ ClickServlet
+ net.yacy.http.servlets.ClickServlet
+
+ clickaction
+ index
+ defines the action to perform with supplied url
+
+
+
+
+
+
+ ClickServlet
+ /click
+ SolrSelectServlet
diff --git a/defaults/yacy.init b/defaults/yacy.init
index 14d1b426d..626eadf6a 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -826,6 +826,8 @@ search.result.show.proxy = false
search.result.show.hostbrowser = true
search.result.show.vocabulary = false
+search.result.useclickservlet = true
+
# search navigators: comma-separated list of default values for search navigation.
# can be temporary different if search string is given with differen navigation values
# assigning no value(s) means that no navigation is shown
@@ -857,6 +859,7 @@ search.verify.delete = true
# remote search details
remotesearch.maxcount = 10
remotesearch.maxtime = 3000
+remotesearch.result.store=true
# specifies if yacy should set it's own referer if no referer URL
# was set by the client.
diff --git a/htroot/ConfigPortal.html b/htroot/ConfigPortal.html
index c5236e50a..b2f7bcd0a 100644
--- a/htroot/ConfigPortal.html
+++ b/htroot/ConfigPortal.html
@@ -51,19 +51,31 @@
Snippet Fetch Strategy & Link Verification
Speed up search results with this option! (use CACHEONLY or FALSE to switch off verification)
- NOCACHE: no use of web cache, load all snippets online
- IFFRESH: use the cache if the cache exists and is fresh otherwise load online
- IFEXIST: use the cache if the cache exist or load online
- If verification fails, delete index reference
- CACHEONLY: never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available and show result without snippet
- FALSE: no link verification and not snippet generation: all search results are valid without verification
+ NOCACHE: no use of web cache, load all snippets online
+ IFFRESH: use the cache if the cache exists and is fresh otherwise load online
+ IFEXIST: use the cache if the cache exist or load online
+ If verification fails, delete index reference
+ CACHEONLY: never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available and show result without snippet
+ FALSE: no link verification and not snippet generation: all search results are valid without verification
Greedy Learning Mode
- load documents linked in search results, will be deactivated automatically when index size > #[greedylearning.limit.doccount]# (see Heuristics: search-result to use this permanent)
+ load documents linked in search results, will be deactivated automatically when index size > #[greedylearning.limit.doccount]# (see Heuristics: search-result to use this permanent)
+
Index remote results
+
+ add remote search results to the local index ( default=on, it is recommended to enable this option ! )
+
+
+ #(remotesearch.result.store)#
+
Use Click-Servlet for search result links
+
+ allows YaCy to perform some actions if user clicks on a search result (by default add the clicked link to the index)
+
+ ::#(/remotesearch.result.store)#
+
Default Pop-Up Page
Status Page
diff --git a/htroot/ConfigPortal.java b/htroot/ConfigPortal.java
index e450b65a7..2c618c730 100644
--- a/htroot/ConfigPortal.java
+++ b/htroot/ConfigPortal.java
@@ -90,6 +90,11 @@ public class ConfigPortal {
sb.setConfig("search.options", post.getBoolean("search.options"));
sb.setConfig(SwitchboardConstants.GREEDYLEARNING_ACTIVE, post.getBoolean(SwitchboardConstants.GREEDYLEARNING_ACTIVE));
+
+ final boolean storeresult = post.getBoolean(SwitchboardConstants.REMOTESEARCH_RESULT_STORE);
+ sb.setConfig(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, storeresult);
+ // click servlet only meaningful if result not stored (currently)
+ sb.setConfig(SwitchboardConstants.SEARCH_USECLICKSERVLET, !storeresult && post.getBoolean(SwitchboardConstants.SEARCH_USECLICKSERVLET));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, post.get("search.verify", "ifexist"));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, post.getBoolean("search.verify.delete"));
@@ -144,6 +149,8 @@ public class ConfigPortal {
sb.setConfig("search.navigation", config.getProperty("search.navigation","hosts,authors,namespace,topics"));
sb.setConfig("search.options", config.getProperty("search.options","true"));
sb.setConfig(SwitchboardConstants.GREEDYLEARNING_ACTIVE, config.getProperty(SwitchboardConstants.GREEDYLEARNING_ACTIVE));
+ sb.setConfig(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, config.getProperty(SwitchboardConstants.REMOTESEARCH_RESULT_STORE));
+ sb.setConfig(SwitchboardConstants.REMOTESEARCH_RESULT_STORE+"_"+SwitchboardConstants.SEARCH_USECLICKSERVLET, config.getProperty(SwitchboardConstants.SEARCH_USECLICKSERVLET));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, config.getProperty(SwitchboardConstants.SEARCH_VERIFY,"iffresh"));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, config.getProperty(SwitchboardConstants.SEARCH_VERIFY_DELETE,"true"));
sb.setConfig("about.headline", config.getProperty("about.headline",""));
@@ -165,6 +172,9 @@ public class ConfigPortal {
prop.put(SwitchboardConstants.GREEDYLEARNING_ACTIVE, sb.getConfigBool(SwitchboardConstants.GREEDYLEARNING_ACTIVE, false) ? 1 : 0);
prop.put(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, sb.getConfig(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, "0"));
+ prop.put(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, sb.getConfigBool(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, true) ? 1 : 0);
+ prop.put(SwitchboardConstants.REMOTESEARCH_RESULT_STORE+"_"+SwitchboardConstants.SEARCH_USECLICKSERVLET, sb.getConfigBool(SwitchboardConstants.SEARCH_USECLICKSERVLET, false) ? 1 : 0);
+
prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0);
prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0);
prop.put("search.navigation.collections", sb.getConfig("search.navigation", "").indexOf("collections",0) >= 0 ? 1 : 0);
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index eff60c80f..9ac02ddc4 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -181,6 +181,8 @@ public class yacysearchitem {
}
}
prop.putXML("content_link", modifyURL); // putXML for rss
+ } else if (sb.getConfigBool(SwitchboardConstants.SEARCH_USECLICKSERVLET, false)) {
+ prop.putXML("content_link", "click?url="+resultUrlstring); // putXML for rss
} else {
prop.putXML("content_link", resultUrlstring); // putXML for rss
}
diff --git a/source/net/yacy/http/servlets/ClickServlet.java b/source/net/yacy/http/servlets/ClickServlet.java
new file mode 100644
index 000000000..5e4d40e82
--- /dev/null
+++ b/source/net/yacy/http/servlets/ClickServlet.java
@@ -0,0 +1,144 @@
+/**
+ * ClickServlet Copyright 2014 by Michael Peter Christen First released
+ * 25.12.2014 at http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt If not, see
+ * .
+ */
+package net.yacy.http.servlets;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Collection;
+
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.search.Switchboard;
+import net.yacy.search.SwitchboardConstants;
+
+/**
+ * The ClickServlet is used as search result link to perform additional actions
+ * upon click on the link by user. The actual target url is given as parameter,
+ * the servlet forwards the user to the target link page and performs additonal
+ * actions with the target url (basically alternative of using javascript
+ * href.onClick() )
+ *
+ * Request Parameter: url= the target User browser is forwarded to the url using
+ * html header or javascript afterwards performs configured actions,
+ *
+ * Actions e.g. (0- = not implemented yet)
+ * - crawl/recrawl the url
+ * - crawl all links on page (with depth) / site
+ * 0- increase/create rating
+ * 0- add to a collection
+ * 0- connect query and url
+ * 0- learn and classify content - promote rating
+ * 0- add to click statistic url/cnt (maybe to use for boost)
+ */
+public class ClickServlet extends HttpServlet {
+
+ private static final long serialVersionUID = 1L;
+
+ // config switches to remember actions to perform
+ String _actionCode = "index";
+
+ static final String crawlaction = "crawl"; // actionCode to add url to crawler with crawldepth=0
+ static final String indexaction = "index"; // actionCode to add url to index (=default)
+ static final String crawllinksaction = "crawllinks"; // actionCode to add url to crawler with crawldepth=1
+
+ @Override
+ public void init() {
+ if (this.getInitParameter("clickaction") != null) {
+ _actionCode = this.getInitParameter("clickaction");
+ }
+ }
+
+ @Override
+ public void service(ServletRequest request, ServletResponse response) throws IOException, ServletException {
+
+ HttpServletRequest hrequest = (HttpServletRequest) request;
+ HttpServletResponse hresponse = (HttpServletResponse) response;
+
+ final String strUrl = hrequest.getParameter("url");
+ if (strUrl == null) {
+ hresponse.sendError(HttpServletResponse.SC_NOT_FOUND, "url parameter missing");
+ return;
+ }
+
+ try {
+ hresponse.setStatus(HttpServletResponse.SC_OK);
+ /* alternative to use javascript / http-equiv header
+ hresponse.setStatus(HttpServletResponse.SC_TEMPORARY_REDIRECT);
+ hresponse.setHeader(HeaderFramework.LOCATION, strUrl);
+ */
+
+ // output html forward to url header
+ PrintWriter pw = response.getWriter();
+ response.setContentType("text/html");
+
+ pw.println("");
+ pw.println("");
+
+ pw.print("");
+
+ pw.print("");
+
+ pw.println("");
+ pw.close();
+
+ if (Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_USECLICKSERVLET, false)) {
+
+ // do click event action
+ if (_actionCode != null) {
+ switch (_actionCode) {
+ case crawlaction: {
+ final Collection urls = new ArrayList();
+ urls.add(new DigestURL(strUrl));
+ Switchboard.getSwitchboard().addToCrawler(urls, false);
+ break;
+ }
+ case indexaction: {
+ final Collection urls = new ArrayList();
+ urls.add(new DigestURL(strUrl));
+
+ Switchboard.getSwitchboard().addToIndex(urls, null, null, null, true);
+ break;
+ }
+ case crawllinksaction: {
+ final Collection urls = new ArrayList();
+ urls.add(new DigestURL(strUrl));
+ Switchboard.getSwitchboard().addToCrawler(urls, false);
+ Switchboard.getSwitchboard().heuristicSearchResults(strUrl);
+ break;
+ }
+ }
+ }
+ }
+ } catch (Exception e) {
+ ConcurrentLog.logException(e);
+ }
+ }
+
+}
diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java
index 012fa969b..15915dbb4 100644
--- a/source/net/yacy/peers/Protocol.java
+++ b/source/net/yacy/peers/Protocol.java
@@ -652,7 +652,13 @@ public final class Protocol {
// insert results to containers
int term = count;
- Collection storeDocs = new ArrayList(result.links.size());
+ Map> snip;
+ if (event.addResultsToLocalIndex) {
+ snip = null;
+ } else {
+ snip = new HashMap>(); // needed to display nodestack results
+ }
+ List storeDocs = new ArrayList(result.links.size());
for ( final URIMetadataNode urlEntry : result.links ) {
if ( term-- <= 0 ) {
break; // do not process more that requested (in case that evil peers fill us up with rubbish)
@@ -713,6 +719,13 @@ public final class Protocol {
// instead, they are placed in a snipped-search cache.
// System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'");
TextSnippet.snippetsCache.put(wordhashes, ASCII.String(urlEntry.hash()), urlEntry.snippet());
+ // add snippet for snippethandling for nodestack entries (used if not stored to index)
+ if (!event.addResultsToLocalIndex) {
+ // TODO: must have a snippet even to get the snippetcache entry back when adding to nodestack
+ LinkedHashSet sniptxt = new LinkedHashSet();
+ sniptxt.add(urlEntry.snippet());
+ snip.put(ASCII.String(urlEntry.hash()), sniptxt);
+ }
}
// add the url entry to the word indexes
@@ -725,19 +738,25 @@ public final class Protocol {
}
}
}
-
- for (URIMetadataNode entry: storeDocs) {
- try {
- event.query.getSegment().fulltext().putMetadata(entry);
- } catch (final IOException e) {
- ConcurrentLog.logException(e);
- }
- }
// store remote result to local result container
// insert one container into the search result buffer
// one is enough, only the references are used, not the word
- event.addRWIs(container.get(0), false, target.getName() + "/" + target.hash, result.totalCount, time);
+ if (event.addResultsToLocalIndex) {
+ for (URIMetadataNode entry : storeDocs) {
+ try {
+ event.query.getSegment().fulltext().putMetadata(entry);
+ } catch (final IOException e) {
+ ConcurrentLog.logException(e);
+ }
+ }
+ event.addRWIs(container.get(0), false, target.getName() + "/" + target.hash, result.totalCount, time);
+ } else {
+ // feed results as nodes (SolrQuery results) which carry metadata,
+ // to prevent a call to getMetaData for RWI results, which would fail (if no metadata in index and no display of these results)
+ Map> facets = new HashMap>();
+ event.addNodes(storeDocs, facets, snip, false, target.getName() + "/" + target.hash, count);
+ }
event.addFinalize();
event.addExpectedRemoteReferences(-count);
@@ -1053,7 +1072,10 @@ public final class Protocol {
List container = new ArrayList();
Network.log.info("SEARCH (solr), returned " + docList[0].size() + " out of " + docList[0].getNumFound() + " documents and " + facets.size() + " facets " + facets.keySet().toString() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())));
int term = count;
- Collection docs = new ArrayList(docList[0].size());
+ Collection docs;
+ if (event.addResultsToLocalIndex) { // only needed to store remote results
+ docs = new ArrayList(docList[0].size());
+ } else docs = null;
for (final SolrDocument doc: docList[0]) {
if ( term-- <= 0 ) {
break; // do not process more that requested (in case that evil peers fill us up with rubbish)
@@ -1092,15 +1114,18 @@ public final class Protocol {
event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis()));
// put the remote documents to the local index. We must convert the solr document to a solr input document:
- SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(doc);
+ if (event.addResultsToLocalIndex) {
+ final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(doc);
+
+ // the input document stays untouched because it contains top-level cloned objects
+ if (event.addResultsToLocalIndex) docs.add(sid);
+ }
// after this conversion we can remove the largest and not used field text_t and synonyms_sxt from the document
// because that goes into a search cache and would take a lot of memory in the search cache
//doc.removeFields(CollectionSchema.text_t.getSolrFieldName());
doc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName());
- // the input document stays untouched because it contains top-level cloned objects
- docs.add(sid);
ResultURLs.stack(
ASCII.String(urlEntry.url().hash()),
urlEntry.url().getHost(),
@@ -1122,10 +1147,12 @@ public final class Protocol {
event.addExpectedRemoteReferences(-count);
Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + numFound + " references");
} else {
- for (SolrInputDocument doc: docs) {
- event.query.getSegment().putDocument(doc);
+ if (event.addResultsToLocalIndex) {
+ for (SolrInputDocument doc: docs) {
+ event.query.getSegment().putDocument(doc);
+ }
+ docs.clear(); docs = null;
}
- docs.clear(); docs = null;
event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound);
event.addFinalize();
event.addExpectedRemoteReferences(-count);
diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java
index c90eeec96..26b476aa1 100644
--- a/source/net/yacy/search/SwitchboardConstants.java
+++ b/source/net/yacy/search/SwitchboardConstants.java
@@ -331,6 +331,8 @@ public final class SwitchboardConstants {
public static final String REMOTESEARCH_MAXCOUNT_USER = "remotesearch.maxcount";
public static final String REMOTESEARCH_MAXTIME_USER = "remotesearch.maxtime";
+ public static final String REMOTESEARCH_RESULT_STORE = "remotesearch.result.store"; // add remote results to local index
+
public static final String FEDERATED_SERVICE_SOLR_INDEXING_ENABLED = "federated.service.solr.indexing.enabled";
public static final String FEDERATED_SERVICE_SOLR_INDEXING_URL = "federated.service.solr.indexing.url";
public static final String FEDERATED_SERVICE_SOLR_INDEXING_SHARDING = "federated.service.solr.indexing.sharding";
@@ -524,6 +526,8 @@ public final class SwitchboardConstants {
public static final String SEARCH_VERIFY = "search.verify";
public static final String SEARCH_VERIFY_DELETE = "search.verify.delete";
+ public static final String SEARCH_USECLICKSERVLET = "search.result.useclickservlet"; // resultlink via click servlet
+
/**
* ranking+evaluation
*/
diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java
index 6fcf8d378..4c7a2f2f4 100644
--- a/source/net/yacy/search/query/SearchEvent.java
+++ b/source/net/yacy/search/query/SearchEvent.java
@@ -158,6 +158,7 @@ public final class SearchEvent {
private long snippetComputationAllTime;
private ConcurrentHashMap> snippets;
private final boolean remote;
+ public final boolean addResultsToLocalIndex; // add received results to local index (defult=true)
private SortedMap> localSearchInclusion;
private final ScoreMap ref; // reference score computation for the commonSense heuristic
private final long maxtime;
@@ -204,7 +205,8 @@ public final class SearchEvent {
final LoaderDispatcher loader,
final int remote_maxcount,
final long remote_maxtime,
- final boolean deleteIfSnippetFail) {
+ final boolean deleteIfSnippetFail,
+ final boolean addResultsToLocalIdx) {
long ab = MemoryControl.available();
if (ab < 1024 * 1024 * 200) {
@@ -255,6 +257,7 @@ public final class SearchEvent {
this.IAmaxcounthash = null;
this.IAneardhthash = null;
this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW_SEARCH, false)));
+ this.addResultsToLocalIndex = addResultsToLocalIdx;
this.local_rwi_available = new AtomicInteger(0); // the number of results in the local peer after filtering
this.local_rwi_stored = new AtomicInteger(0);
this.local_solr_available = new AtomicInteger(0);
diff --git a/source/net/yacy/search/query/SearchEventCache.java b/source/net/yacy/search/query/SearchEventCache.java
index 9458bca07..bdae26b28 100644
--- a/source/net/yacy/search/query/SearchEventCache.java
+++ b/source/net/yacy/search/query/SearchEventCache.java
@@ -171,7 +171,8 @@ public class SearchEventCache {
// start a new event
Switchboard sb = Switchboard.getSwitchboard();
final boolean delete = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true);
- event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, delete);
+ final boolean addToLocalIdx = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, true);
+ event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, delete, addToLocalIdx);
MemoryControl.request(100 * 1024 * 1024, false); // this may trigger a short memory status which causes a reducing of cache space of other threads
}