Added a “don't store remote search results” option

This is intended for peers who want to participate in the P2P network but don't wish to load/fill-up their index with metadata of every received search result. 
The DHT transfer is not effected by this option (and will work as usual, so that a peer disabling the new store to index switch still receives and holds the metadata according to DHT rules).
Downside for the local peer is that search speed will not improve if search terms are only avail. remote or by quick hits in local index.

To be able to improve the local index a Click-Servlet option was added additionally.
If switched on, all search result links point to this servlet, which forwards the users browser (by html header) to the desired page and feeds the page to the fulltext-index.
The servlet accepts a parameter defining the action to perform (see defaults/web.xml, index, crawl, crawllinks)

The option check-boxes are placed in ConfigPortal.html
pull/1/head
reger 10 years ago
parent d729386787
commit d44d8996d0

@ -55,7 +55,23 @@
<param-value>YaCy stop proxy</param-value> <param-value>YaCy stop proxy</param-value>
</init-param> </init-param>
</servlet> </servlet>
<servlet>
<servlet-name>ClickServlet</servlet-name>
<servlet-class>net.yacy.http.servlets.ClickServlet</servlet-class>
<init-param>
<param-name>clickaction</param-name>
<param-value>index</param-value> <!-- options: index crawl crawllinks -->
<description>defines the action to perform with supplied url</description>
</init-param>
</servlet>
<servlet-mapping>
<servlet-name>ClickServlet</servlet-name>
<url-pattern>/click</url-pattern>
</servlet-mapping>
<!-- mappings activated by the application - hardcoded (added here for completness) --> <!-- mappings activated by the application - hardcoded (added here for completness) -->
<servlet-mapping> <servlet-mapping>
<servlet-name>SolrSelectServlet</servlet-name> <servlet-name>SolrSelectServlet</servlet-name>

@ -826,6 +826,8 @@ search.result.show.proxy = false
search.result.show.hostbrowser = true search.result.show.hostbrowser = true
search.result.show.vocabulary = false search.result.show.vocabulary = false
search.result.useclickservlet = true
# search navigators: comma-separated list of default values for search navigation. # search navigators: comma-separated list of default values for search navigation.
# can be temporary different if search string is given with differen navigation values # can be temporary different if search string is given with differen navigation values
# assigning no value(s) means that no navigation is shown # assigning no value(s) means that no navigation is shown
@ -857,6 +859,7 @@ search.verify.delete = true
# remote search details # remote search details
remotesearch.maxcount = 10 remotesearch.maxcount = 10
remotesearch.maxtime = 3000 remotesearch.maxtime = 3000
remotesearch.result.store=true
# specifies if yacy should set it's own referer if no referer URL # specifies if yacy should set it's own referer if no referer URL
# was set by the client. # was set by the client.

@ -51,19 +51,31 @@
<dt>Snippet Fetch Strategy &amp; Link Verification</dt> <dt>Snippet Fetch Strategy &amp; Link Verification</dt>
<dd> <dd>
<img src="env/grafics/idea.png" width="32" height="32" alt="idea" align="center"/>Speed up search results with this option! (use CACHEONLY or FALSE to switch off verification)<br/> <img src="env/grafics/idea.png" width="32" height="32" alt="idea" align="center"/>Speed up search results with this option! (use CACHEONLY or FALSE to switch off verification)<br/>
<input type="radio" name="search.verify" value="nocache" #(search.verify.nocache)#::checked="checked"#(/search.verify.nocache)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/>NOCACHE: no use of web cache, load all snippets online<br/> <input type="radio" name="search.verify" value="nocache" #(search.verify.nocache)#::checked="checked"#(/search.verify.nocache)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/> NOCACHE: no use of web cache, load all snippets online<br/>
<input type="radio" name="search.verify" value="iffresh" #(search.verify.iffresh)#::checked="checked"#(/search.verify.iffresh)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/>IFFRESH: use the cache if the cache exists and is fresh otherwise load online<br/> <input type="radio" name="search.verify" value="iffresh" #(search.verify.iffresh)#::checked="checked"#(/search.verify.iffresh)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/> IFFRESH: use the cache if the cache exists and is fresh otherwise load online<br/>
<input type="radio" name="search.verify" value="ifexist" #(search.verify.ifexist)#::checked="checked"#(/search.verify.ifexist)# />IFEXIST: use the cache if the cache exist or load online<br/> <input type="radio" name="search.verify" value="ifexist" #(search.verify.ifexist)#::checked="checked"#(/search.verify.ifexist)# /> IFEXIST: use the cache if the cache exist or load online<br/>
<input type="checkbox" name="search.verify.delete" id="search_verify_delete" value="true" #(search.verify.delete)#::checked="checked"#(/search.verify.delete)# />If verification fails, delete index reference<br/><br/> <input type="checkbox" name="search.verify.delete" id="search_verify_delete" value="true" #(search.verify.delete)#::checked="checked"#(/search.verify.delete)# /> If verification fails, delete index reference<br/><br/>
<input type="radio" name="search.verify" value="cacheonly" #(search.verify.cacheonly)#::checked="checked"#(/search.verify.cacheonly)# onclick="document.getElementById('search_verify_delete').disabled=true;document.getElementById('search_verify_delete').checked=false;"/>CACHEONLY: never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available and show result without snippet<br/> <input type="radio" name="search.verify" value="cacheonly" #(search.verify.cacheonly)#::checked="checked"#(/search.verify.cacheonly)# onclick="document.getElementById('search_verify_delete').disabled=true;document.getElementById('search_verify_delete').checked=false;"/> CACHEONLY: never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available and show result without snippet<br/>
<input type="radio" name="search.verify" value="false" #(search.verify.false)#::checked="checked"#(/search.verify.false)# onclick="document.getElementById('search_verify_delete').disabled=true;document.getElementById('search_verify_delete').checked=false;"/>FALSE: no link verification and not snippet generation: all search results are valid without verification <input type="radio" name="search.verify" value="false" #(search.verify.false)#::checked="checked"#(/search.verify.false)# onclick="document.getElementById('search_verify_delete').disabled=true;document.getElementById('search_verify_delete').checked=false;"/> FALSE: no link verification and not snippet generation: all search results are valid without verification
</dd> </dd>
<dt>Greedy Learning Mode</dt> <dt>Greedy Learning Mode</dt>
<dd> <dd>
<input type="checkbox" name="greedylearning.active" value="true" #(greedylearning.active)#::checked="checked"#(/greedylearning.active)# />load documents linked in search results, will be deactivated automatically when index size &gt; #[greedylearning.limit.doccount]# <small> (see <a href="ConfigHeuristics_p.html">Heuristics: search-result</a> to use this permanent)</small> <input type="checkbox" name="greedylearning.active" value="true" #(greedylearning.active)#::checked="checked"#(/greedylearning.active)# /> load documents linked in search results, will be deactivated automatically when index size &gt; #[greedylearning.limit.doccount]# <small> (see <a href="ConfigHeuristics_p.html">Heuristics: search-result</a> to use this permanent)</small>
</dd> </dd>
<dt>Index remote results</dt>
<dd>
<input type="checkbox" name="remotesearch.result.store" value="true" #(remotesearch.result.store)#::checked="checked"#(/remotesearch.result.store)# /> add remote search results to the local index <b>( default=on, it is recommended to enable this option ! )</b>
</dd>
#(remotesearch.result.store)#
<dt>Use Click-Servlet for search result links</dt>
<dd>
<input type="checkbox" name="search.result.useclickservlet" value="true" #(search.result.useclickservlet)#::checked="checked"#(/search.result.useclickservlet)# /> allows YaCy to perform some actions if user clicks on a search result (by default add the clicked link to the index)
</dd>
::#(/remotesearch.result.store)#
<dt>Default Pop-Up Page</dt> <dt>Default Pop-Up Page</dt>
<dd> <dd>
<input type="radio" name="popup" value="status" #(popupStatus)#::checked="checked"#(/popupStatus)# />Status Page&nbsp; <input type="radio" name="popup" value="status" #(popupStatus)#::checked="checked"#(/popupStatus)# />Status Page&nbsp;

@ -90,6 +90,11 @@ public class ConfigPortal {
sb.setConfig("search.options", post.getBoolean("search.options")); sb.setConfig("search.options", post.getBoolean("search.options"));
sb.setConfig(SwitchboardConstants.GREEDYLEARNING_ACTIVE, post.getBoolean(SwitchboardConstants.GREEDYLEARNING_ACTIVE)); sb.setConfig(SwitchboardConstants.GREEDYLEARNING_ACTIVE, post.getBoolean(SwitchboardConstants.GREEDYLEARNING_ACTIVE));
final boolean storeresult = post.getBoolean(SwitchboardConstants.REMOTESEARCH_RESULT_STORE);
sb.setConfig(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, storeresult);
// click servlet only meaningful if result not stored (currently)
sb.setConfig(SwitchboardConstants.SEARCH_USECLICKSERVLET, !storeresult && post.getBoolean(SwitchboardConstants.SEARCH_USECLICKSERVLET));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, post.get("search.verify", "ifexist")); sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, post.get("search.verify", "ifexist"));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, post.getBoolean("search.verify.delete")); sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, post.getBoolean("search.verify.delete"));
@ -144,6 +149,8 @@ public class ConfigPortal {
sb.setConfig("search.navigation", config.getProperty("search.navigation","hosts,authors,namespace,topics")); sb.setConfig("search.navigation", config.getProperty("search.navigation","hosts,authors,namespace,topics"));
sb.setConfig("search.options", config.getProperty("search.options","true")); sb.setConfig("search.options", config.getProperty("search.options","true"));
sb.setConfig(SwitchboardConstants.GREEDYLEARNING_ACTIVE, config.getProperty(SwitchboardConstants.GREEDYLEARNING_ACTIVE)); sb.setConfig(SwitchboardConstants.GREEDYLEARNING_ACTIVE, config.getProperty(SwitchboardConstants.GREEDYLEARNING_ACTIVE));
sb.setConfig(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, config.getProperty(SwitchboardConstants.REMOTESEARCH_RESULT_STORE));
sb.setConfig(SwitchboardConstants.REMOTESEARCH_RESULT_STORE+"_"+SwitchboardConstants.SEARCH_USECLICKSERVLET, config.getProperty(SwitchboardConstants.SEARCH_USECLICKSERVLET));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, config.getProperty(SwitchboardConstants.SEARCH_VERIFY,"iffresh")); sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, config.getProperty(SwitchboardConstants.SEARCH_VERIFY,"iffresh"));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, config.getProperty(SwitchboardConstants.SEARCH_VERIFY_DELETE,"true")); sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, config.getProperty(SwitchboardConstants.SEARCH_VERIFY_DELETE,"true"));
sb.setConfig("about.headline", config.getProperty("about.headline","")); sb.setConfig("about.headline", config.getProperty("about.headline",""));
@ -165,6 +172,9 @@ public class ConfigPortal {
prop.put(SwitchboardConstants.GREEDYLEARNING_ACTIVE, sb.getConfigBool(SwitchboardConstants.GREEDYLEARNING_ACTIVE, false) ? 1 : 0); prop.put(SwitchboardConstants.GREEDYLEARNING_ACTIVE, sb.getConfigBool(SwitchboardConstants.GREEDYLEARNING_ACTIVE, false) ? 1 : 0);
prop.put(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, sb.getConfig(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, "0")); prop.put(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, sb.getConfig(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, "0"));
prop.put(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, sb.getConfigBool(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, true) ? 1 : 0);
prop.put(SwitchboardConstants.REMOTESEARCH_RESULT_STORE+"_"+SwitchboardConstants.SEARCH_USECLICKSERVLET, sb.getConfigBool(SwitchboardConstants.SEARCH_USECLICKSERVLET, false) ? 1 : 0);
prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0); prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0);
prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0); prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0);
prop.put("search.navigation.collections", sb.getConfig("search.navigation", "").indexOf("collections",0) >= 0 ? 1 : 0); prop.put("search.navigation.collections", sb.getConfig("search.navigation", "").indexOf("collections",0) >= 0 ? 1 : 0);

@ -181,6 +181,8 @@ public class yacysearchitem {
} }
} }
prop.putXML("content_link", modifyURL); // putXML for rss prop.putXML("content_link", modifyURL); // putXML for rss
} else if (sb.getConfigBool(SwitchboardConstants.SEARCH_USECLICKSERVLET, false)) {
prop.putXML("content_link", "click?url="+resultUrlstring); // putXML for rss
} else { } else {
prop.putXML("content_link", resultUrlstring); // putXML for rss prop.putXML("content_link", resultUrlstring); // putXML for rss
} }

@ -0,0 +1,144 @@
/**
* ClickServlet Copyright 2014 by Michael Peter Christen First released
* 25.12.2014 at http://yacy.net
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt If not, see
* <http://www.gnu.org/licenses/>.
*/
package net.yacy.http.servlets;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
/**
* The ClickServlet is used as search result link to perform additional actions
* upon click on the link by user. The actual target url is given as parameter,
* the servlet forwards the user to the target link page and performs additonal
* actions with the target url (basically alternative of using javascript
* href.onClick() )
*
* Request Parameter: url= the target User browser is forwarded to the url using
* html header or javascript afterwards performs configured actions,
*
* Actions e.g. (0- = not implemented yet)
* - crawl/recrawl the url
* - crawl all links on page (with depth) / site
* 0- increase/create rating
* 0- add to a collection
* 0- connect query and url
* 0- learn and classify content - promote rating
* 0- add to click statistic url/cnt (maybe to use for boost)
*/
public class ClickServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
// config switches to remember actions to perform
String _actionCode = "index";
static final String crawlaction = "crawl"; // actionCode to add url to crawler with crawldepth=0
static final String indexaction = "index"; // actionCode to add url to index (=default)
static final String crawllinksaction = "crawllinks"; // actionCode to add url to crawler with crawldepth=1
@Override
public void init() {
if (this.getInitParameter("clickaction") != null) {
_actionCode = this.getInitParameter("clickaction");
}
}
@Override
public void service(ServletRequest request, ServletResponse response) throws IOException, ServletException {
HttpServletRequest hrequest = (HttpServletRequest) request;
HttpServletResponse hresponse = (HttpServletResponse) response;
final String strUrl = hrequest.getParameter("url");
if (strUrl == null) {
hresponse.sendError(HttpServletResponse.SC_NOT_FOUND, "url parameter missing");
return;
}
try {
hresponse.setStatus(HttpServletResponse.SC_OK);
/* alternative to use javascript / http-equiv header
hresponse.setStatus(HttpServletResponse.SC_TEMPORARY_REDIRECT);
hresponse.setHeader(HeaderFramework.LOCATION, strUrl);
*/
// output html forward to url header
PrintWriter pw = response.getWriter();
response.setContentType("text/html");
pw.println("<html>");
pw.println("<head>");
pw.print("<script>window.location.replace(\"");
pw.print(strUrl);
pw.println("\");</script>");
pw.print("<noscript><META http-equiv=\"refresh\" content=\"0; URL=");
pw.print(strUrl);
pw.println("\"></noscript>");
pw.println("</head></html>");
pw.close();
if (Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_USECLICKSERVLET, false)) {
// do click event action
if (_actionCode != null) {
switch (_actionCode) {
case crawlaction: {
final Collection<DigestURL> urls = new ArrayList<DigestURL>();
urls.add(new DigestURL(strUrl));
Switchboard.getSwitchboard().addToCrawler(urls, false);
break;
}
case indexaction: {
final Collection<DigestURL> urls = new ArrayList<DigestURL>();
urls.add(new DigestURL(strUrl));
Switchboard.getSwitchboard().addToIndex(urls, null, null, null, true);
break;
}
case crawllinksaction: {
final Collection<DigestURL> urls = new ArrayList<DigestURL>();
urls.add(new DigestURL(strUrl));
Switchboard.getSwitchboard().addToCrawler(urls, false);
Switchboard.getSwitchboard().heuristicSearchResults(strUrl);
break;
}
}
}
}
} catch (Exception e) {
ConcurrentLog.logException(e);
}
}
}

@ -652,7 +652,13 @@ public final class Protocol {
// insert results to containers // insert results to containers
int term = count; int term = count;
Collection<URIMetadataNode> storeDocs = new ArrayList<URIMetadataNode>(result.links.size()); Map<String, LinkedHashSet<String>> snip;
if (event.addResultsToLocalIndex) {
snip = null;
} else {
snip = new HashMap<String, LinkedHashSet<String>>(); // needed to display nodestack results
}
List<URIMetadataNode> storeDocs = new ArrayList<URIMetadataNode>(result.links.size());
for ( final URIMetadataNode urlEntry : result.links ) { for ( final URIMetadataNode urlEntry : result.links ) {
if ( term-- <= 0 ) { if ( term-- <= 0 ) {
break; // do not process more that requested (in case that evil peers fill us up with rubbish) break; // do not process more that requested (in case that evil peers fill us up with rubbish)
@ -713,6 +719,13 @@ public final class Protocol {
// instead, they are placed in a snipped-search cache. // instead, they are placed in a snipped-search cache.
// System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'"); // System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'");
TextSnippet.snippetsCache.put(wordhashes, ASCII.String(urlEntry.hash()), urlEntry.snippet()); TextSnippet.snippetsCache.put(wordhashes, ASCII.String(urlEntry.hash()), urlEntry.snippet());
// add snippet for snippethandling for nodestack entries (used if not stored to index)
if (!event.addResultsToLocalIndex) {
// TODO: must have a snippet even to get the snippetcache entry back when adding to nodestack
LinkedHashSet<String> sniptxt = new LinkedHashSet<String>();
sniptxt.add(urlEntry.snippet());
snip.put(ASCII.String(urlEntry.hash()), sniptxt);
}
} }
// add the url entry to the word indexes // add the url entry to the word indexes
@ -725,19 +738,25 @@ public final class Protocol {
} }
} }
} }
for (URIMetadataNode entry: storeDocs) {
try {
event.query.getSegment().fulltext().putMetadata(entry);
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
// store remote result to local result container // store remote result to local result container
// insert one container into the search result buffer // insert one container into the search result buffer
// one is enough, only the references are used, not the word // one is enough, only the references are used, not the word
event.addRWIs(container.get(0), false, target.getName() + "/" + target.hash, result.totalCount, time); if (event.addResultsToLocalIndex) {
for (URIMetadataNode entry : storeDocs) {
try {
event.query.getSegment().fulltext().putMetadata(entry);
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
event.addRWIs(container.get(0), false, target.getName() + "/" + target.hash, result.totalCount, time);
} else {
// feed results as nodes (SolrQuery results) which carry metadata,
// to prevent a call to getMetaData for RWI results, which would fail (if no metadata in index and no display of these results)
Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>();
event.addNodes(storeDocs, facets, snip, false, target.getName() + "/" + target.hash, count);
}
event.addFinalize(); event.addFinalize();
event.addExpectedRemoteReferences(-count); event.addExpectedRemoteReferences(-count);
@ -1053,7 +1072,10 @@ public final class Protocol {
List<URIMetadataNode> container = new ArrayList<URIMetadataNode>(); List<URIMetadataNode> container = new ArrayList<URIMetadataNode>();
Network.log.info("SEARCH (solr), returned " + docList[0].size() + " out of " + docList[0].getNumFound() + " documents and " + facets.size() + " facets " + facets.keySet().toString() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))); Network.log.info("SEARCH (solr), returned " + docList[0].size() + " out of " + docList[0].getNumFound() + " documents and " + facets.size() + " facets " + facets.keySet().toString() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())));
int term = count; int term = count;
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(docList[0].size()); Collection<SolrInputDocument> docs;
if (event.addResultsToLocalIndex) { // only needed to store remote results
docs = new ArrayList<SolrInputDocument>(docList[0].size());
} else docs = null;
for (final SolrDocument doc: docList[0]) { for (final SolrDocument doc: docList[0]) {
if ( term-- <= 0 ) { if ( term-- <= 0 ) {
break; // do not process more that requested (in case that evil peers fill us up with rubbish) break; // do not process more that requested (in case that evil peers fill us up with rubbish)
@ -1092,15 +1114,18 @@ public final class Protocol {
event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis())); event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis()));
// put the remote documents to the local index. We must convert the solr document to a solr input document: // put the remote documents to the local index. We must convert the solr document to a solr input document:
SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(doc); if (event.addResultsToLocalIndex) {
final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(doc);
// the input document stays untouched because it contains top-level cloned objects
if (event.addResultsToLocalIndex) docs.add(sid);
}
// after this conversion we can remove the largest and not used field text_t and synonyms_sxt from the document // after this conversion we can remove the largest and not used field text_t and synonyms_sxt from the document
// because that goes into a search cache and would take a lot of memory in the search cache // because that goes into a search cache and would take a lot of memory in the search cache
//doc.removeFields(CollectionSchema.text_t.getSolrFieldName()); //doc.removeFields(CollectionSchema.text_t.getSolrFieldName());
doc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName()); doc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName());
// the input document stays untouched because it contains top-level cloned objects
docs.add(sid);
ResultURLs.stack( ResultURLs.stack(
ASCII.String(urlEntry.url().hash()), ASCII.String(urlEntry.url().hash()),
urlEntry.url().getHost(), urlEntry.url().getHost(),
@ -1122,10 +1147,12 @@ public final class Protocol {
event.addExpectedRemoteReferences(-count); event.addExpectedRemoteReferences(-count);
Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + numFound + " references"); Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + numFound + " references");
} else { } else {
for (SolrInputDocument doc: docs) { if (event.addResultsToLocalIndex) {
event.query.getSegment().putDocument(doc); for (SolrInputDocument doc: docs) {
event.query.getSegment().putDocument(doc);
}
docs.clear(); docs = null;
} }
docs.clear(); docs = null;
event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound); event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound);
event.addFinalize(); event.addFinalize();
event.addExpectedRemoteReferences(-count); event.addExpectedRemoteReferences(-count);

@ -331,6 +331,8 @@ public final class SwitchboardConstants {
public static final String REMOTESEARCH_MAXCOUNT_USER = "remotesearch.maxcount"; public static final String REMOTESEARCH_MAXCOUNT_USER = "remotesearch.maxcount";
public static final String REMOTESEARCH_MAXTIME_USER = "remotesearch.maxtime"; public static final String REMOTESEARCH_MAXTIME_USER = "remotesearch.maxtime";
public static final String REMOTESEARCH_RESULT_STORE = "remotesearch.result.store"; // add remote results to local index
public static final String FEDERATED_SERVICE_SOLR_INDEXING_ENABLED = "federated.service.solr.indexing.enabled"; public static final String FEDERATED_SERVICE_SOLR_INDEXING_ENABLED = "federated.service.solr.indexing.enabled";
public static final String FEDERATED_SERVICE_SOLR_INDEXING_URL = "federated.service.solr.indexing.url"; public static final String FEDERATED_SERVICE_SOLR_INDEXING_URL = "federated.service.solr.indexing.url";
public static final String FEDERATED_SERVICE_SOLR_INDEXING_SHARDING = "federated.service.solr.indexing.sharding"; public static final String FEDERATED_SERVICE_SOLR_INDEXING_SHARDING = "federated.service.solr.indexing.sharding";
@ -524,6 +526,8 @@ public final class SwitchboardConstants {
public static final String SEARCH_VERIFY = "search.verify"; public static final String SEARCH_VERIFY = "search.verify";
public static final String SEARCH_VERIFY_DELETE = "search.verify.delete"; public static final String SEARCH_VERIFY_DELETE = "search.verify.delete";
public static final String SEARCH_USECLICKSERVLET = "search.result.useclickservlet"; // resultlink via click servlet
/** /**
* ranking+evaluation * ranking+evaluation
*/ */

@ -158,6 +158,7 @@ public final class SearchEvent {
private long snippetComputationAllTime; private long snippetComputationAllTime;
private ConcurrentHashMap<String, LinkedHashSet<String>> snippets; private ConcurrentHashMap<String, LinkedHashSet<String>> snippets;
private final boolean remote; private final boolean remote;
public final boolean addResultsToLocalIndex; // add received results to local index (defult=true)
private SortedMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion; private SortedMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private final ScoreMap<String> ref; // reference score computation for the commonSense heuristic private final ScoreMap<String> ref; // reference score computation for the commonSense heuristic
private final long maxtime; private final long maxtime;
@ -204,7 +205,8 @@ public final class SearchEvent {
final LoaderDispatcher loader, final LoaderDispatcher loader,
final int remote_maxcount, final int remote_maxcount,
final long remote_maxtime, final long remote_maxtime,
final boolean deleteIfSnippetFail) { final boolean deleteIfSnippetFail,
final boolean addResultsToLocalIdx) {
long ab = MemoryControl.available(); long ab = MemoryControl.available();
if (ab < 1024 * 1024 * 200) { if (ab < 1024 * 1024 * 200) {
@ -255,6 +257,7 @@ public final class SearchEvent {
this.IAmaxcounthash = null; this.IAmaxcounthash = null;
this.IAneardhthash = null; this.IAneardhthash = null;
this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW_SEARCH, false))); this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW_SEARCH, false)));
this.addResultsToLocalIndex = addResultsToLocalIdx;
this.local_rwi_available = new AtomicInteger(0); // the number of results in the local peer after filtering this.local_rwi_available = new AtomicInteger(0); // the number of results in the local peer after filtering
this.local_rwi_stored = new AtomicInteger(0); this.local_rwi_stored = new AtomicInteger(0);
this.local_solr_available = new AtomicInteger(0); this.local_solr_available = new AtomicInteger(0);

@ -171,7 +171,8 @@ public class SearchEventCache {
// start a new event // start a new event
Switchboard sb = Switchboard.getSwitchboard(); Switchboard sb = Switchboard.getSwitchboard();
final boolean delete = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true); final boolean delete = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true);
event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, delete); final boolean addToLocalIdx = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, true);
event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, delete, addToLocalIdx);
MemoryControl.request(100 * 1024 * 1024, false); // this may trigger a short memory status which causes a reducing of cache space of other threads MemoryControl.request(100 * 1024 * 1024, false); // this may trigger a short memory status which causes a reducing of cache space of other threads
} }

Loading…
Cancel
Save