because we have the inurl:<term> - searchmodifier, we don't actually

need regular expressions as search attributes. They had now been removed
from the advanced search page while they are still created internally.
The filter is then expressed against solr as regular expression filter
query. If the expression points out a selection of an specific protocol,
host or filetype this is then translated into a facetted query.
pull/1/head
Michael Peter Christen 13 years ago
parent b55ea2197f
commit d64445c3cb

@ -27,6 +27,7 @@
import java.util.Collection; import java.util.Collection;
import java.util.Date; import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
@ -68,7 +69,7 @@ public final class timeline {
language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent); language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent);
if (language == null) language = "en"; if (language == null) language = "en";
} }
final Collection<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute final List<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute
HandleSet q = Word.words2hashesHandles(query[0]); HandleSet q = Word.words2hashesHandles(query[0]);
// tell all threads to do nothing for a specific time // tell all threads to do nothing for a specific time

@ -24,6 +24,7 @@ import java.io.OutputStreamWriter;
import java.io.Writer; import java.io.Writer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -110,7 +111,7 @@ public class searchresult {
post.put("originalQuery", q); post.put("originalQuery", q);
// get a solr query string // get a solr query string
Collection<String>[] cq = QueryParams.cleanQuery(q); List<String>[] cq = QueryParams.cleanQuery(q);
q = QueryParams.solrQueryString(cq[0], cq[1], sb.index.fulltext().getSolrScheme()).toString(); q = QueryParams.solrQueryString(cq[0], cq[1], sb.index.fulltext().getSolrScheme()).toString();
post.put(CommonParams.ROWS, post.remove("num")); post.put(CommonParams.ROWS, post.remove("num"));

@ -71,7 +71,6 @@
<input type="hidden" name="meanCount" value="5" /> <input type="hidden" name="meanCount" value="5" />
#(searchoptions)# #(searchoptions)#
<input type="hidden" name="resource" value="global" /> <input type="hidden" name="resource" value="global" />
<input type="hidden" name="urlmaskfilter" value=".*" />
<input type="hidden" name="prefermaskfilter" value="" /> <input type="hidden" name="prefermaskfilter" value="" />
<input type="hidden" name="maximumRecords" value="#[maximumRecords]#" /> <input type="hidden" name="maximumRecords" value="#[maximumRecords]#" />
</fieldset> </fieldset>
@ -96,10 +95,6 @@
</td> </td>
#(/resource-select)# #(/resource-select)#
</tr> </tr>
<tr>
<td><label for="urlmaskfilter">URL mask</label>:</td>
<td><input id="urlmaskfilter" name="urlmaskfilter" type="text" size="12" maxlength="80" value="#[urlmaskfilter]#" /></td>
</tr>
<tr> <tr>
<td> <td>
<label for="prefermaskfilter">Prefer mask</label>: <label for="prefermaskfilter">Prefer mask</label>:
@ -189,7 +184,7 @@
<dt>browser integration</dt> <dt>browser integration</dt>
<dd>after searching, click-open on the default search engine in the upper right search field of your browser and select 'Add "YaCy Search.."'</dd> <dd>after searching, click-open on the default search engine in the upper right search field of your browser and select 'Add "YaCy Search.."'</dd>
<dt>search as rss feed</dt> <dt>search as rss feed</dt>
<dd>click on the red icon in the upper right after a search. this works good in combination with the '/date' ranking modifier. See an <a href="http://localhost:8090/yacysearch.rss?query=news+%2Fdate&Enter=Search&verify=cacheonly&contentdom=text&nav=hosts%2Cauthors%2Cnamespace%2Ctopics%2Cfiletype%2Cprotocol&startRecord=0&indexof=off&meanCount=5&maximumRecords=10&resource=global&urlmaskfilter=.*&prefermaskfilter=">example</a>.</dd> <dd>click on the red icon in the upper right after a search. this works good in combination with the '/date' ranking modifier. See an <a href="http://localhost:8090/yacysearch.rss?query=news+%2Fdate&Enter=Search&verify=cacheonly&contentdom=text&nav=hosts%2Cauthors%2Cnamespace%2Ctopics%2Cfiletype%2Cprotocol&startRecord=0&indexof=off&meanCount=5&maximumRecords=10&resource=global&prefermaskfilter=">example</a>.</dd>
<dt>json search results</dt> <dt>json search results</dt>
<dd>for ajax developers: get the search rss feed and replace the '.rss' extension in the search result url with '.json'</dd> <dd>for ajax developers: get the search rss feed and replace the '.rss' extension in the search result url with '.json'</dd>
</dl> </dl>

@ -74,7 +74,6 @@ public class index {
final String former = (post == null) ? "" : post.get("former", ""); final String former = (post == null) ? "" : post.get("former", "");
final int count = Math.min(100, (post == null) ? 10 : post.getInt("count", 10)); final int count = Math.min(100, (post == null) ? 10 : post.getInt("count", 10));
final int maximumRecords = sb.getConfigInt(SwitchboardConstants.SEARCH_ITEMS, 10); final int maximumRecords = sb.getConfigInt(SwitchboardConstants.SEARCH_ITEMS, 10);
final String urlmaskfilter = (post == null) ? ".*" : post.get("urlmaskfilter", ".*");
final String prefermaskfilter = (post == null) ? "" : post.get("prefermaskfilter", ""); final String prefermaskfilter = (post == null) ? "" : post.get("prefermaskfilter", "");
final String constraint = (post == null) ? "" : post.get("constraint", ""); final String constraint = (post == null) ? "" : post.get("constraint", "");
final String cat = (post == null) ? "href" : post.get("cat", "href"); final String cat = (post == null) ? "href" : post.get("cat", "href");
@ -114,7 +113,6 @@ public class index {
prop.put("searchoptions_resource-select_global", global ? "1" : "0"); prop.put("searchoptions_resource-select_global", global ? "1" : "0");
prop.put("searchoptions_resource-select_global-disabled", indexReceiveGranted ? "0" : "1"); prop.put("searchoptions_resource-select_global-disabled", indexReceiveGranted ? "0" : "1");
prop.put("searchoptions_resource-select_local", global ? "0" : "1"); prop.put("searchoptions_resource-select_local", global ? "0" : "1");
prop.putHTML("searchoptions_urlmaskfilter", urlmaskfilter);
prop.put("searchoptions_prefermaskoptions", "0"); prop.put("searchoptions_prefermaskoptions", "0");
prop.putHTML("searchoptions_prefermaskoptions_prefermaskfilter", prefermaskfilter); prop.putHTML("searchoptions_prefermaskoptions_prefermaskfilter", prefermaskfilter);
prop.put("searchoptions_indexofChecked", ""); prop.put("searchoptions_indexofChecked", "");

@ -120,7 +120,7 @@ $(function() {
#(searchvideo)#::<input type="radio" id="video" name="contentdom" value="video" #(check)#::checked="checked"#(/check)# /><label for="video">Video</label>&nbsp;&nbsp;#(/searchvideo)# #(searchvideo)#::<input type="radio" id="video" name="contentdom" value="video" #(check)#::checked="checked"#(/check)# /><label for="video">Video</label>&nbsp;&nbsp;#(/searchvideo)#
#(searchapp)#::<input type="radio" id="app" name="contentdom" value="app" #(check)#::checked="checked"#(/check)# /><label for="app">Applications</label>#(/searchapp)# #(searchapp)#::<input type="radio" id="app" name="contentdom" value="app" #(check)#::checked="checked"#(/check)# /><label for="app">Applications</label>#(/searchapp)#
&nbsp; &nbsp;
<a href="index.html?searchoptions=2&amp;count=#[count]#&amp;urlmaskfilter=#[urlmaskfilter]#&amp;prefermaskfilter=#[prefermaskfilter]#&amp;cat=#[cat]#&amp;constraint=#[constraint]#&amp;contentdom=#[contentdom]#&amp;former=#[former]#&amp;meanCount=#[meanCount]#">more options</a> <a href="index.html?searchoptions=2&amp;count=#[count]#&amp;prefermaskfilter=#[prefermaskfilter]#&amp;cat=#[cat]#&amp;constraint=#[constraint]#&amp;contentdom=#[contentdom]#&amp;former=#[former]#&amp;meanCount=#[meanCount]#">more options</a>
</div> </div>
#(/searchdomswitches)# #(/searchdomswitches)#
<input type="hidden" name="former" value="#[former]#" /> <input type="hidden" name="former" value="#[former]#" />
@ -129,7 +129,6 @@ $(function() {
<input type="hidden" name="verify" value="#[search.verify]#" /> <input type="hidden" name="verify" value="#[search.verify]#" />
<input type="hidden" name="resource" value="#[resource]#" /> <input type="hidden" name="resource" value="#[resource]#" />
<input type="hidden" name="nav" value="#[search.navigation]#" /> <input type="hidden" name="nav" value="#[search.navigation]#" />
<input type="hidden" name="urlmaskfilter" value="#[urlmaskfilter]#" />
<input type="hidden" name="prefermaskfilter" value="#[prefermaskfilter]#" /> <input type="hidden" name="prefermaskfilter" value="#[prefermaskfilter]#" />
<input type="hidden" name="depth" value="#[depth]#" /> <input type="hidden" name="depth" value="#[depth]#" />
<input type="hidden" name="cat" value="#[cat]#" /> <input type="hidden" name="cat" value="#[cat]#" />

@ -34,6 +34,7 @@ import java.util.Collection;
import java.util.ConcurrentModificationException; import java.util.ConcurrentModificationException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
@ -217,13 +218,6 @@ public class yacysearch {
boolean global = post.get("resource", "local").equals("global") && sb.peers.sizeConnected() > 0; boolean global = post.get("resource", "local").equals("global") && sb.peers.sizeConnected() > 0;
final boolean indexof = (post != null && post.get("indexof", "").equals("on")); final boolean indexof = (post != null && post.get("indexof", "").equals("on"));
final String originalUrlMask;
if ( post.containsKey("urlmaskfilter") ) {
originalUrlMask = post.get("urlmaskfilter", ".*");
} else {
originalUrlMask = ".*";
}
String prefermask = (post == null) ? "" : post.get("prefermaskfilter", ""); String prefermask = (post == null) ? "" : post.get("prefermaskfilter", "");
if ( !prefermask.isEmpty() && prefermask.indexOf(".*", 0) < 0 ) { if ( !prefermask.isEmpty() && prefermask.indexOf(".*", 0) < 0 ) {
prefermask = ".*" + prefermask + ".*"; prefermask = ".*" + prefermask + ".*";
@ -578,7 +572,7 @@ public class yacysearch {
} }
} }
if ( urlmask == null || urlmask.isEmpty() ) { if ( urlmask == null || urlmask.isEmpty() ) {
urlmask = originalUrlMask; urlmask = ".*";
} //if no urlmask was given } //if no urlmask was given
// read the language from the language-restrict option 'lr' // read the language from the language-restrict option 'lr'
@ -600,7 +594,7 @@ public class yacysearch {
} }
// the query // the query
final Collection<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute final List<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute
final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? query.length - 1 : Integer.MAX_VALUE; final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? query.length - 1 : Integer.MAX_VALUE;
@ -863,8 +857,7 @@ public class yacysearch {
"html", "html",
0, 0,
theQuery, theQuery,
suggestion, suggestion).toString());
originalUrlMask.toString()).toString());
prop.put("didYouMean_suggestions_" + meanCount + "_sep", "|"); prop.put("didYouMean_suggestions_" + meanCount + "_sep", "|");
meanCount++; meanCount++;
} catch (ConcurrentModificationException e) {break meanCollect;} } catch (ConcurrentModificationException e) {break meanCollect;}
@ -936,12 +929,7 @@ public class yacysearch {
.append("<img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" />&nbsp;"); .append("<img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" />&nbsp;");
} else { } else {
resnav.append("<a id=\"prevpage\" href=\""); resnav.append("<a id=\"prevpage\" href=\"");
resnav.append(QueryParams.navurl( resnav.append(QueryParams.navurl("html", thispage - 1, theQuery, null).toString());
"html",
thispage - 1,
theQuery,
null,
originalUrlMask).toString());
resnav resnav
.append("\"><img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /></a>&nbsp;"); .append("\"><img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /></a>&nbsp;");
} }
@ -956,9 +944,7 @@ public class yacysearch {
resnav.append("\" width=\"16\" height=\"16\" />&nbsp;"); resnav.append("\" width=\"16\" height=\"16\" />&nbsp;");
} else { } else {
resnav.append("<a href=\""); resnav.append("<a href=\"");
resnav.append(QueryParams resnav.append(QueryParams.navurl("html", i, theQuery, null).toString());
.navurl("html", i, theQuery, null, originalUrlMask)
.toString());
resnav.append("\"><img src=\"env/grafics/navd"); resnav.append("\"><img src=\"env/grafics/navd");
resnav.append(i + 1); resnav.append(i + 1);
resnav.append(".gif\" alt=\"page"); resnav.append(".gif\" alt=\"page");
@ -971,14 +957,8 @@ public class yacysearch {
.append("<img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" />"); .append("<img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" />");
} else { } else {
resnav.append("<a id=\"nextpage\" href=\""); resnav.append("<a id=\"nextpage\" href=\"");
resnav.append(QueryParams.navurl( resnav.append(QueryParams.navurl("html", thispage + 1, theQuery, null).toString());
"html", resnav.append("\"><img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" /></a>");
thispage + 1,
theQuery,
null,
originalUrlMask).toString());
resnav
.append("\"><img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" /></a>");
} }
final String resnavs = resnav.toString(); final String resnavs = resnav.toString();
prop.put("num-results_resnav", resnavs); prop.put("num-results_resnav", resnavs);
@ -1035,7 +1015,6 @@ public class yacysearch {
prop.put("count", itemsPerPage); prop.put("count", itemsPerPage);
prop.put("offset", startRecord); prop.put("offset", startRecord);
prop.put("resource", global ? "global" : "local"); prop.put("resource", global ? "global" : "local");
prop.putHTML("urlmaskfilter", originalUrlMask);
prop.putHTML("prefermaskfilter", prefermask); prop.putHTML("prefermaskfilter", prefermask);
prop.put("indexof", (indexof) ? "on" : "off"); prop.put("indexof", (indexof) ? "on" : "off");
prop.put("constraint", (constraint == null) ? "" : constraint.exportB64()); prop.put("constraint", (constraint == null) ? "" : constraint.exportB64());

@ -100,7 +100,7 @@ public class yacysearchitem {
prop.put("remoteResourceSize", Formatter.number(theSearch.query.remote_stored.get(), true)); prop.put("remoteResourceSize", Formatter.number(theSearch.query.remote_stored.get(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.query.remote_available.get(), true)); prop.put("remoteIndexCount", Formatter.number(theSearch.query.remote_available.get(), true));
prop.put("remotePeerCount", Formatter.number(theSearch.query.remote_peerCount.get(), true)); prop.put("remotePeerCount", Formatter.number(theSearch.query.remote_peerCount.get(), true));
prop.put("navurlBase", QueryParams.navurlBase("html", theSearch.query, null, theSearch.query.urlMask.toString()).toString()); prop.put("navurlBase", QueryParams.navurlBase("html", theSearch.query, null));
final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, ""); final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, "");
if (theSearch.query.contentdom == Classification.ContentDomain.TEXT || theSearch.query.contentdom == Classification.ContentDomain.ALL) { if (theSearch.query.contentdom == Classification.ContentDomain.TEXT || theSearch.query.contentdom == Classification.ContentDomain.ALL) {

@ -42,7 +42,7 @@ public class yacysearchlatestinfo {
prop.put("remoteResourceSize", Formatter.number(theSearch.query.remote_stored.get(), true)); prop.put("remoteResourceSize", Formatter.number(theSearch.query.remote_stored.get(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.query.remote_available.get(), true)); prop.put("remoteIndexCount", Formatter.number(theSearch.query.remote_available.get(), true));
prop.put("remotePeerCount", Formatter.number(theSearch.query.remote_peerCount.get(), true)); prop.put("remotePeerCount", Formatter.number(theSearch.query.remote_peerCount.get(), true));
prop.putJSON("navurlBase", QueryParams.navurlBase("html", theSearch.query, null, theSearch.query.urlMask.toString()).toString()); prop.putJSON("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString());
return prop; return prop;
} }

@ -93,7 +93,7 @@ public class yacysearchtrailer {
queryStringForUrl = (queryStringForUrl.substring(0, p) + queryStringForUrl.substring(p + nav.length())).trim(); queryStringForUrl = (queryStringForUrl.substring(0, p) + queryStringForUrl.substring(p + nav.length())).trim();
} }
prop.put(fileType, "nav-namespace_element_" + i + "_name", name); prop.put(fileType, "nav-namespace_element_" + i + "_name", name);
prop.put(fileType, "nav-namespace_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl, theSearch.query.urlMask.toString()).toString()); prop.put(fileType, "nav-namespace_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl).toString());
prop.put("nav-namespace_element_" + i + "_count", count); prop.put("nav-namespace_element_" + i + "_count", count);
prop.put("nav-namespace_element_" + i + "_nl", 1); prop.put("nav-namespace_element_" + i + "_nl", 1);
i++; i++;
@ -133,7 +133,7 @@ public class yacysearchtrailer {
prop.put(fileType, "nav-authors_element_" + i + "_modifier", "-" + nav); prop.put(fileType, "nav-authors_element_" + i + "_modifier", "-" + nav);
} }
prop.put(fileType, "nav-domains_element_" + i + "_name", name); prop.put(fileType, "nav-domains_element_" + i + "_name", name);
prop.put(fileType, "nav-domains_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl, theSearch.query.urlMask.toString()).toString()); prop.put(fileType, "nav-domains_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl).toString());
prop.put("nav-domains_element_" + i + "_count", count); prop.put("nav-domains_element_" + i + "_count", count);
prop.put("nav-domains_element_" + i + "_nl", 1); prop.put("nav-domains_element_" + i + "_nl", 1);
i++; i++;
@ -172,7 +172,7 @@ public class yacysearchtrailer {
prop.put(fileType, "nav-authors_element_" + i + "_modifier", "-" + nav); prop.put(fileType, "nav-authors_element_" + i + "_modifier", "-" + nav);
} }
prop.put(fileType, "nav-authors_element_" + i + "_name", name); prop.put(fileType, "nav-authors_element_" + i + "_name", name);
prop.put(fileType, "nav-authors_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl, theSearch.query.urlMask.toString()).toString()); prop.put(fileType, "nav-authors_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl).toString());
prop.put("nav-authors_element_" + i + "_count", count); prop.put("nav-authors_element_" + i + "_count", count);
prop.put("nav-authors_element_" + i + "_nl", 1); prop.put("nav-authors_element_" + i + "_nl", 1);
i++; i++;
@ -203,7 +203,7 @@ public class yacysearchtrailer {
prop.put("nav-topics_element_" + i + "_on", 1); prop.put("nav-topics_element_" + i + "_on", 1);
prop.put(fileType, "nav-topics_element_" + i + "_modifier", name); prop.put(fileType, "nav-topics_element_" + i + "_modifier", name);
prop.put(fileType, "nav-topics_element_" + i + "_name", name); prop.put(fileType, "nav-topics_element_" + i + "_name", name);
prop.put(fileType, "nav-topics_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl + "+" + name, theSearch.query.urlMask.toString()).toString()); prop.put(fileType, "nav-topics_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl + "+" + name).toString());
prop.put("nav-topics_element_" + i + "_count", count); prop.put("nav-topics_element_" + i + "_count", count);
prop.put("nav-topics_element_" + i + "_nl", 1); prop.put("nav-topics_element_" + i + "_nl", 1);
i++; i++;
@ -241,7 +241,7 @@ public class yacysearchtrailer {
prop.put(fileType, "nav-protocols_element_" + i + "_modifier", "-" + nav); prop.put(fileType, "nav-protocols_element_" + i + "_modifier", "-" + nav);
} }
prop.put(fileType, "nav-protocols_element_" + i + "_name", name); prop.put(fileType, "nav-protocols_element_" + i + "_name", name);
prop.put(fileType, "nav-protocols_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl, (p >= 0 && theSearch.query.urlMask.toString().startsWith(name)) ? ".*" : theSearch.query.urlMask.toString()).toString()); prop.put(fileType, "nav-protocols_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl).toString());
prop.put("nav-protocols_element_" + i + "_count", count); prop.put("nav-protocols_element_" + i + "_count", count);
prop.put("nav-protocols_element_" + i + "_nl", 1); prop.put("nav-protocols_element_" + i + "_nl", 1);
i++; i++;
@ -280,7 +280,7 @@ public class yacysearchtrailer {
prop.put(fileType, "nav-filetypes_element_" + i + "_modifier", "-" + nav); prop.put(fileType, "nav-filetypes_element_" + i + "_modifier", "-" + nav);
} }
prop.put(fileType, "nav-filetypes_element_" + i + "_name", name); prop.put(fileType, "nav-filetypes_element_" + i + "_name", name);
prop.put(fileType, "nav-filetypes_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl, (p >= 0 && theSearch.query.urlMask.toString().endsWith(name)) ? ".*" : theSearch.query.urlMask.toString()).toString()); prop.put(fileType, "nav-filetypes_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl).toString());
prop.put("nav-filetypes_element_" + i + "_count", count); prop.put("nav-filetypes_element_" + i + "_count", count);
prop.put("nav-filetypes_element_" + i + "_nl", 1); prop.put("nav-filetypes_element_" + i + "_nl", 1);
i++; i++;
@ -322,7 +322,7 @@ public class yacysearchtrailer {
prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_modifier", "-" + nav); prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_modifier", "-" + nav);
} }
prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_name", name); prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_name", name);
prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl, theSearch.query.urlMask.toString()).toString()); prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_url", QueryParams.navurl(fileType.name().toLowerCase(), 0, theSearch.query, queryStringForUrl).toString());
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_count", count); prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_count", count);
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_nl", 1); prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_nl", 1);
i++; i++;

@ -1081,7 +1081,7 @@ public final class Protocol
docList = rsp.getResults(); docList = rsp.getResults();
// no need to close this here because that sends a commit to remote solr which is not wanted here // no need to close this here because that sends a commit to remote solr which is not wanted here
} catch (IOException e) { } catch (IOException e) {
Network.log.logInfo("SEARCH failed (solr), Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + ")", e); Network.log.logInfo("SEARCH failed (solr), Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")", e);
return -1; return -1;
} }
} }

@ -34,6 +34,7 @@ import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
@ -114,7 +115,7 @@ public final class QueryParams {
public final String queryString; public final String queryString;
public final HandleSet query_include_hashes, query_exclude_hashes, query_all_hashes; public final HandleSet query_include_hashes, query_exclude_hashes, query_all_hashes;
private final Collection<String> query_include_words, query_exclude_words, query_all_words; private final List<String> query_include_words, query_exclude_words, query_all_words;
public int itemsPerPage; public int itemsPerPage;
public int offset; public int offset;
public final Pattern urlMask, prefer; public final Pattern urlMask, prefer;
@ -183,7 +184,7 @@ public final class QueryParams {
} }
} else { } else {
this.queryString = queryString; this.queryString = queryString;
final Collection<String>[] cq = cleanQuery(queryString); final List<String>[] cq = cleanQuery(queryString);
this.query_include_words = cq[0]; this.query_include_words = cq[0];
this.query_exclude_words = cq[1]; this.query_exclude_words = cq[1];
this.query_all_words = cq[2]; this.query_all_words = cq[2];
@ -237,9 +238,9 @@ public final class QueryParams {
public QueryParams( public QueryParams(
final String queryString, final String queryString,
final Collection<String> queryWords, final List<String> queryWords,
final Collection<String> excludeWords, final List<String> excludeWords,
final Collection<String> fullqueryWords, final List<String> fullqueryWords,
final HandleSet queryHashes, final HandleSet queryHashes,
final HandleSet excludeHashes, final HandleSet excludeHashes,
final HandleSet fullqueryHashes, final HandleSet fullqueryHashes,
@ -445,11 +446,11 @@ public final class QueryParams {
private static String seps = "'.,/&_"; static {seps += '"';} private static String seps = "'.,/&_"; static {seps += '"';}
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static Collection<String>[] cleanQuery(String querystring) { public static List<String>[] cleanQuery(String querystring) {
// returns three sets: a query set, an exclude set and a full query set // returns three sets: a query set, an exclude set and a full query set
final Collection<String> query_include_words = new ArrayList<String>(); final List<String> query_include_words = new ArrayList<String>();
final Collection<String> query_exclude_words = new ArrayList<String>(); final List<String> query_exclude_words = new ArrayList<String>();
final Collection<String> query_all_words = new ArrayList<String>(); final List<String> query_all_words = new ArrayList<String>();
if ((querystring != null) && (!querystring.isEmpty())) { if ((querystring != null) && (!querystring.isEmpty())) {
@ -484,7 +485,7 @@ public final class QueryParams {
} }
} }
} }
return new Collection[]{query_include_words, query_exclude_words, query_all_words}; return new List[]{query_include_words, query_exclude_words, query_all_words};
} }
public String queryString(final boolean encodeHTML) { public String queryString(final boolean encodeHTML) {
@ -533,29 +534,41 @@ public final class QueryParams {
else else
q.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.nav_sitehash).append('\"'); q.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.nav_sitehash).append('\"');
} }
String urlMaskPattern = this.urlMask.pattern();
// translate filetype navigation
int extm = urlMaskPattern.indexOf(".*\\.");
if (extm >= 0) {
String ext = urlMaskPattern.substring(extm + 4);
q.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(':').append(ext);
}
// translate protocol navigation
if (urlMaskPattern.startsWith("http://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("http");
else if (urlMaskPattern.startsWith("https://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("https");
else if (urlMaskPattern.startsWith("ftp://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("ftp");
else if (urlMaskPattern.startsWith("smb://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("smb");
else if (urlMaskPattern.startsWith("file://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("file");
// construct query // construct query
final SolrQuery params = new SolrQuery(); final SolrQuery params = new SolrQuery();
params.setQuery(q.toString());
params.setStart(this.offset); params.setStart(this.offset);
params.setRows(this.itemsPerPage); params.setRows(this.itemsPerPage);
params.setFacet(false); params.setFacet(false);
if (!this.urlMask_isCatchall) {
String urlMaskPattern = this.urlMask.pattern();
// translate filetype navigation
int extm = urlMaskPattern.indexOf(".*\\.");
if (extm >= 0) {
String ext = urlMaskPattern.substring(extm + 4);
q.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(':').append(ext);
}
// translate protocol navigation
if (urlMaskPattern.startsWith("http://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("http");
else if (urlMaskPattern.startsWith("https://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("https");
else if (urlMaskPattern.startsWith("ftp://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("ftp");
else if (urlMaskPattern.startsWith("smb://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("smb");
else if (urlMaskPattern.startsWith("file://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("file");
// add a filter query on urls
// solr doesn't like slashes, backslashes or doublepoints; remove them
int p;
while ((p = urlMaskPattern.indexOf("\\")) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 2);
while ((p = urlMaskPattern.indexOf(':')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1);
while ((p = urlMaskPattern.indexOf('/')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1);
params.setFilterQueries(YaCySchema.sku.getSolrFieldName() + ":/" + urlMaskPattern + "/");
}
params.setQuery(q.toString());
if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) { if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) {
// localtion search, no special ranking // localtion search, no special ranking
// try http://localhost:8090/solr/select?q=*:*&fq={!bbox sfield=coordinate_p pt=50.17,8.65 d=1} // try http://localhost:8090/solr/select?q=*:*&fq={!bbox sfield=coordinate_p pt=50.17,8.65 d=1}
@ -579,9 +592,15 @@ public final class QueryParams {
return params; return params;
} }
public static StringBuilder solrQueryString(Collection<String> include, Collection<String> exclude, SolrConfiguration configuration) { public static StringBuilder solrQueryString(List<String> include, List<String> exclude, SolrConfiguration configuration) {
final StringBuilder q = new StringBuilder(80); final StringBuilder q = new StringBuilder(80);
// parse special requests
if (include.size() == 1 && exclude.size() == 0) {
String w = include.get(0);
if (Segment.catchallString.equals(w)) return new StringBuilder("*:*");
}
// add text query // add text query
int wc = 0; int wc = 0;
StringBuilder w = new StringBuilder(80); StringBuilder w = new StringBuilder(80);
@ -628,7 +647,7 @@ public final class QueryParams {
} }
} }
public Collection<String>[] queryWords() { public List<String>[] queryWords() {
return cleanQuery(this.queryString); return cleanQuery(this.queryString);
} }
@ -719,11 +738,9 @@ public final class QueryParams {
* @param addToQuery * @param addToQuery
* @return * @return
*/ */
public static StringBuilder navurl( public static StringBuilder navurl(final String ext, final int page, final QueryParams theQuery, final String newQueryString) {
final String ext, final int page, final QueryParams theQuery,
final String newQueryString, final String originalUrlMask) {
final StringBuilder sb = navurlBase(ext, theQuery, newQueryString, originalUrlMask); final StringBuilder sb = navurlBase(ext, theQuery, newQueryString);
sb.append(ampersand); sb.append(ampersand);
sb.append("startRecord="); sb.append("startRecord=");
@ -732,9 +749,7 @@ public final class QueryParams {
return sb; return sb;
} }
public static StringBuilder navurlBase( public static StringBuilder navurlBase(final String ext, final QueryParams theQuery, final String newQueryString) {
final String ext, final QueryParams theQuery,
final String newQueryString, final String originalUrlMask) {
final StringBuilder sb = new StringBuilder(120); final StringBuilder sb = new StringBuilder(120);
sb.append("/yacysearch."); sb.append("/yacysearch.");
@ -754,10 +769,6 @@ public final class QueryParams {
sb.append("verify="); sb.append("verify=");
sb.append(theQuery.snippetCacheStrategy == null ? "false" : theQuery.snippetCacheStrategy.toName()); sb.append(theQuery.snippetCacheStrategy == null ? "false" : theQuery.snippetCacheStrategy.toName());
sb.append(ampersand);
sb.append("urlmaskfilter=");
sb.append(originalUrlMask);
sb.append(ampersand); sb.append(ampersand);
sb.append("prefermaskfilter="); sb.append("prefermaskfilter=");
sb.append(theQuery.prefer); sb.append(theQuery.prefer);

Loading…
Cancel
Save