From c34af7fe9446c6b76f3efcbe977707baebf727ac Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 12 Feb 2013 03:42:46 +0100 Subject: [PATCH] extended JSON Response Writer and Opensearch Response Writer for the Solr search interface in such way that it is possible to use this interface for the yacyinteractive search. This search interface is now much faster using the Solr search directly. For the Solr interface it was necessary to create a translation from the YaCy search modifiers to the Solr facet selection. This was added in such a way that it becomes generic for the normal YaCy search and as a on-top evaluation for Solr queries. --- htroot/js/yacyinteractive.js | 5 +- htroot/solr/select.java | 12 +- htroot/yacy/search.java | 39 ++-- htroot/yacyinteractive.html | 3 +- htroot/yacysearch.java | 121 ++---------- source/net/yacy/cora/document/UTF8.java | 2 +- .../responsewriter/JsonResponseWriter.java | 74 ++++++-- .../OpensearchResponseWriter.java | 19 +- source/net/yacy/peers/Protocol.java | 23 +-- source/net/yacy/peers/RemoteSearch.java | 15 +- .../net/yacy/search/query/QueryModifier.java | 179 ++++++++++++++++++ source/net/yacy/search/query/QueryParams.java | 107 +++++------ .../net/yacy/search/query/RankingProcess.java | 4 +- source/net/yacy/search/query/SearchEvent.java | 4 +- 14 files changed, 363 insertions(+), 244 deletions(-) create mode 100644 source/net/yacy/search/query/QueryModifier.java diff --git a/htroot/js/yacyinteractive.js b/htroot/js/yacyinteractive.js index 9c99d37b0..87c07b7ec 100644 --- a/htroot/js/yacyinteractive.js +++ b/htroot/js/yacyinteractive.js @@ -36,7 +36,8 @@ function search(search, count, offset) { } else if (window.ActiveXObject) { // IE self.xmlHttpReq = new ActiveXObject("Microsoft.XMLHTTP"); } - self.xmlHttpReq.open('GET', "yacysearch.json?verify=false&resource=local&nav=all&contentdom=all&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord + "&query=" + query, true); + //self.xmlHttpReq.open('GET', "yacysearch.json?verify=false&resource=local&nav=all&contentdom=all&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord + "&query=" + query, true); + self.xmlHttpReq.open('GET', "solr/select?hl=false&wt=yjson&facet=true&facet.mincount=1&facet.field=host_s&facet.field=url_file_ext_s&facet.field=url_protocol_s&facet.field=author_sxt&start=" + startRecord + "&rows=" + maximumRecords + "&startRecord=" + startRecord + "&query=" + query, true); self.xmlHttpReq.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded'); self.xmlHttpReq.onreadystatechange = function() { if (self.xmlHttpReq.readyState == 4) { @@ -143,7 +144,7 @@ function resultNavigation() { } else { // check if there is a filetype constraint and offer a removal if (modifier != "") { - html += "remove the filter '" + modifier + "'"; + html += "remove the filter '" + modifier + "'"; } } diff --git a/htroot/solr/select.java b/htroot/solr/select.java index 5fd789126..70950269c 100644 --- a/htroot/solr/select.java +++ b/htroot/solr/select.java @@ -41,6 +41,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.query.AccessTracker; +import net.yacy.search.query.QueryModifier; import net.yacy.search.query.SearchEvent; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -144,7 +145,14 @@ public class select { sb.intermissionAllThreads(3000); // tell all threads to do nothing for a specific time // rename post fields according to result style - if (!post.containsKey(CommonParams.Q)) post.put(CommonParams.Q, post.remove("query")); // sru patch + if (!post.containsKey(CommonParams.Q) && post.containsKey("query")) { + String querystring = post.get("query", ""); + post.remove("query"); + QueryModifier modifier = new QueryModifier(); + querystring = modifier.parse(querystring); + modifier.apply(post); + post.put(CommonParams.Q, querystring); // sru patch + } String q = post.get(CommonParams.Q, ""); if (!post.containsKey(CommonParams.START)) post.put(CommonParams.START, post.remove("startRecord")); // sru patch post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, post.getInt("maximumRecords", 10)), (authenticated) ? 5000 : 100)); @@ -164,7 +172,7 @@ public class select { } // if this is a call to YaCys special search formats, enhance the query with field assignments - if (responseWriter instanceof JsonResponseWriter || responseWriter instanceof OpensearchResponseWriter) { + if ((responseWriter instanceof JsonResponseWriter || responseWriter instanceof OpensearchResponseWriter) && "true".equals(post.get("hl", "true"))) { // add options for snippet generation post.put("hl", "true"); post.put("hl.fl", "text_t,h1,h2"); diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 696eb4680..9272db816 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -69,6 +69,7 @@ import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; import net.yacy.search.query.AccessTracker; import net.yacy.search.query.QueryGoal; +import net.yacy.search.query.QueryModifier; import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; @@ -108,16 +109,20 @@ public final class search { final String query = post.get("query", ""); // a string of word hashes that shall be searched and combined final String exclude= post.get("exclude", "");// a string of word hashes that shall not be within the search result final String urls = post.get("urls", ""); // a string of url hashes that are preselected for the search: no other may be returned - final String abstracts = post.get("abstracts", ""); // a string of word hashes for abstracts that shall be generated, or 'auto' (for maxcount-word), or '' (for none) + final String abstracts = post.get("abstracts", ""); // a string of word hashes for abstracts that shall be generated, or 'auto' (for maxcount-word), or '' (for none) final int count = Math.min((int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_DEFAULT, 100), post.getInt("count", 10)); // maximum number of wanted results final long maxtime = Math.min((int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000), post.getLong("time", 3000)); // maximum waiting time final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE); final String prefer = post.get("prefer", ""); - final String modifier = post.get("modifier", "").trim(); final String contentdom = post.get("contentdom", "all"); final String filter = post.get("filter", ".*"); // a filter on the url - String sitehash = post.get("sitehash", ""); if (sitehash.isEmpty()) sitehash = null; - String author = post.get("author", ""); if (author.isEmpty()) author = null; + QueryModifier modifier = new QueryModifier(); + modifier.sitehost = post.get("sitehost", ""); if (modifier.sitehost.isEmpty()) modifier.sitehost = null; + modifier.sitehash = post.get("sitehash", ""); if (modifier.sitehash.isEmpty()) modifier.sitehash = null; + modifier.author = post.get("author", ""); if (modifier.author.isEmpty()) modifier.author = null; + modifier.filetype = post.get("filetype", ""); if (modifier.filetype.isEmpty()) modifier.filetype = null; + modifier.protocol = post.get("protocol", ""); if (modifier.protocol.isEmpty()) modifier.protocol = null; + modifier.parse(post.get("modifier", "").trim()); String language = post.get("language", ""); if (language == null || language.isEmpty() || !ISO639.exists(language)) { // take language from the user agent @@ -228,22 +233,25 @@ public final class search { null, // no snippet computation count, 0, - filter, null, null, null, null, + filter, + null, + null, QueryParams.Searchdom.LOCAL, -1, null, false, - sitehash, - null, null, - author, DigestURI.TLD_any_zone_filter, client, false, indexSegment, rankingProfile, header.get(RequestHeader.USER_AGENT, ""), - false, false, 0.0d, 0.0d, 0.0d + false, + false, + 0.0d, + 0.0d, + 0.0d ); Network.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links"); @@ -290,22 +298,25 @@ public final class search { null, // no snippet computation count, 0, - filter, null, null, null, null, + filter, + null, + null, QueryParams.Searchdom.LOCAL, -1, constraint, false, - sitehash, - null, null, - author, DigestURI.TLD_any_zone_filter, client, false, sb.index, rankingProfile, header.get(RequestHeader.USER_AGENT, ""), - false, false, 0.0d, 0.0d, 0.0d + false, + false, + 0.0d, + 0.0d, + 0.0d ); Network.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links"); EventChannel.channels(EventChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()), "")); diff --git a/htroot/yacyinteractive.html b/htroot/yacyinteractive.html index dd1a3b1a6..a6d0668f2 100644 --- a/htroot/yacyinteractive.html +++ b/htroot/yacyinteractive.html @@ -36,7 +36,8 @@
API diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index f7f42da98..c6f5a7841 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -78,6 +78,7 @@ import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; import net.yacy.search.query.AccessTracker; import net.yacy.search.query.QueryGoal; +import net.yacy.search.query.QueryModifier; import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; @@ -345,9 +346,7 @@ public class yacysearch { if ( !block && (post == null || post.get("cat", "href").equals("href")) ) { String urlmask = null; - String protocol = null; String tld = null; - String ext = null; String inlink = null; // check available memory and clean up if necessary @@ -357,8 +356,9 @@ public class yacysearch { } final RankingProfile ranking = sb.getRanking(); - final StringBuilder modifier = new StringBuilder(20); - + final QueryModifier modifier = new QueryModifier(); + querystring = modifier.parse(querystring); + int stp = querystring.indexOf('*'); if (stp >= 0) { querystring = querystring.substring(0, stp) + Segment.catchallString + querystring.substring(stp + 1); @@ -367,38 +367,13 @@ public class yacysearch { querystring = querystring.replace("/near", ""); ranking.allZero(); // switch off all attributes ranking.coeff_worddistance = RankingProfile.COEFF_MAX; - modifier.append("/near "); + modifier.add("/near"); } if ( querystring.indexOf("/date", 0) >= 0 ) { querystring = querystring.replace("/date", ""); ranking.allZero(); // switch off all attributes ranking.coeff_date = RankingProfile.COEFF_MAX; - modifier.append("/date "); - } - if ( querystring.indexOf("/https", 0) >= 0 ) { - querystring = querystring.replace("/https", ""); - protocol = "https"; - modifier.append("/https "); - } else if ( querystring.indexOf("/http", 0) >= 0 ) { - querystring = querystring.replace("/http", ""); - protocol = "http"; - modifier.append("/http "); - } - if ( querystring.indexOf("/ftp", 0) >= 0 ) { - querystring = querystring.replace("/ftp", ""); - protocol = "ftp"; - modifier.append("/ftp "); - } - if ( querystring.indexOf("/smb", 0) >= 0 ) { - querystring = querystring.replace("/smb", ""); - protocol = "smb"; - modifier.append("/smb "); - } - - if ( querystring.indexOf("/file", 0) >= 0 ) { - querystring = querystring.replace("/file", ""); - protocol = "file"; - modifier.append("/file "); + modifier.add("/date"); } if ( querystring.indexOf("/location", 0) >= 0 ) { @@ -407,7 +382,7 @@ public class yacysearch { constraint = new Bitfield(4); } constraint.set(Condenser.flag_cat_haslocation, true); - modifier.append("/location "); + modifier.add("/location"); } final int lrp = querystring.indexOf("/language/", 0); @@ -418,7 +393,7 @@ public class yacysearch { } querystring = querystring.replace("/language/" + language, ""); language = language.toLowerCase(); - modifier.append("/language/").append(language).append(' '); + modifier.add("/language/" + language); } final int inurlp = querystring.indexOf("inurl:", 0); @@ -432,7 +407,7 @@ public class yacysearch { if ( !urlstr.isEmpty() ) { urlmask = urlmask == null ? ".*" + urlstr + ".*" : urlmask + urlstr + ".*"; } - modifier.append("inurl:").append(urlstr).append(' '); + modifier.add("inurl:" + urlstr); } final int inlinkp = querystring.indexOf("inlink:", 0); @@ -443,22 +418,7 @@ public class yacysearch { } inlink = querystring.substring(inlinkp + 7, ftb); querystring = querystring.replace("inlink:" + inlink, ""); - modifier.append("inlink:").append(inlink).append(' '); - } - - final int filetype = querystring.indexOf("filetype:", 0); - if ( filetype >= 0 ) { - int ftb = querystring.indexOf(' ', filetype); - if ( ftb == -1 ) { - ftb = querystring.length(); - } - ext = querystring.substring(filetype + 9, ftb); - querystring = querystring.replace("filetype:" + ext, ""); - while ( !ext.isEmpty() && ext.charAt(0) == '.' ) { - ext = ext.substring(1); - } - modifier.append("filetype:").append(ext).append(' '); - if (ext.isEmpty()) ext = null; + modifier.add("inlink:" + inlink); } int voc = 0; @@ -473,7 +433,7 @@ public class yacysearch { vocabulary = querystring.substring(voc, ve); querystring = querystring.substring(0, voc) + querystring.substring(ve); } - modifier.append(vocabulary).append(' '); + modifier.add(vocabulary); vocabulary = vocabulary.substring(12); int p = vocabulary.indexOf('/'); if (p > 0) { @@ -506,58 +466,16 @@ public class yacysearch { } } - final int site = querystring.indexOf("site:", 0); - String sitehash = null; - String sitehost = null; - if ( site >= 0 ) { - int ftb = querystring.indexOf(' ', site); - if ( ftb == -1 ) { - ftb = querystring.length(); - } - sitehost = querystring.substring(site + 5, ftb); - querystring = querystring.replace("site:" + sitehost, ""); - while ( sitehost.length() > 0 && sitehost.charAt(0) == '.' ) { - sitehost = sitehost.substring(1); - } - while ( sitehost.endsWith(".") ) { - sitehost = sitehost.substring(0, sitehost.length() - 1); - } - sitehash = DigestURI.hosthash(sitehost); - modifier.append("site:").append(sitehost).append(' '); - } - final int heuristicBlekko = querystring.indexOf("/heuristic/blekko", 0); if ( heuristicBlekko >= 0 ) { querystring = querystring.replace("/heuristic/blekko", ""); - modifier.append("/heuristic/blekko "); + modifier.add("/heuristic/blekko"); } final int heuristicTwitter = querystring.indexOf("/heuristic/twitter", 0); if ( heuristicBlekko >= 0 ) { querystring = querystring.replace("/heuristic/twitter", ""); - modifier.append("/heuristic/twitter "); - } - - final int authori = querystring.indexOf("author:", 0); - String author = null; - if ( authori >= 0 ) { - // check if the author was given with single quotes or without - final boolean quotes = (querystring.charAt(authori + 7) == '('); - if ( quotes ) { - int ftb = querystring.indexOf(')', authori + 8); - if (ftb == -1) ftb = querystring.length() + 1; - author = querystring.substring(authori + 8, ftb); - querystring = querystring.replace("author:(" + author + ")", ""); - modifier.append("author:(").append(author).append(") "); - } else { - int ftb = querystring.indexOf(' ', authori); - if ( ftb == -1 ) { - ftb = querystring.length(); - } - author = querystring.substring(authori + 7, ftb); - querystring = querystring.replace("author:" + author, ""); - modifier.append("author:").append(author).append(' '); - } + modifier.add("/heuristic/twitter"); } final int tldp = querystring.indexOf("tld:", 0); @@ -566,7 +484,7 @@ public class yacysearch { if (ftb == -1) ftb = querystring.length(); tld = querystring.substring(tldp + 4, ftb); querystring = querystring.replace("tld:" + tld, ""); - modifier.append("tld:").append(tld).append(' '); + modifier.add("tld:" + tld); while ( tld.length() > 0 && tld.charAt(0) == '.' ) { tld = tld.substring(1); } @@ -714,7 +632,7 @@ public class yacysearch { final QueryParams theQuery = new QueryParams( qg, - modifier.toString().trim(), + modifier, maxDistance, prefermask, contentdom, @@ -723,17 +641,14 @@ public class yacysearch { snippetFetchStrategy, itemsPerPage, startRecord, - urlmask, protocol, tld, ext, inlink, + urlmask, tld, inlink, clustersearch && global ? QueryParams.Searchdom.CLUSTER : (global && indexReceiveGranted ? QueryParams.Searchdom.GLOBAL : QueryParams.Searchdom.LOCAL), 20, constraint, true, - sitehash, - sitehost, DigestURI.hosthashess(sb.getConfig("search.excludehosth", "")), - author, DigestURI.TLD_any_zone_filter, client, authenticated, @@ -798,8 +713,8 @@ public class yacysearch { (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0)); if ( startRecord == 0 ) { - if ( sitehost != null && sb.getConfigBool("heuristic.site", false) && authenticated ) { - sb.heuristicSite(theSearch, sitehost); + if ( modifier.sitehost != null && sb.getConfigBool("heuristic.site", false) && authenticated ) { + sb.heuristicSite(theSearch, modifier.sitehost); } if ( (heuristicBlekko >= 0 || sb.getConfigBool("heuristic.blekko", false)) && authenticated ) { sb.heuristicRSS("http://blekko.com/ws/$+/rss", theSearch, "blekko"); diff --git a/source/net/yacy/cora/document/UTF8.java b/source/net/yacy/cora/document/UTF8.java index 053d36edd..7c0babcbc 100644 --- a/source/net/yacy/cora/document/UTF8.java +++ b/source/net/yacy/cora/document/UTF8.java @@ -104,7 +104,7 @@ public class UTF8 implements Comparator { public final static StringBody StringBody(final String s) { try { - return new StringBody(s, charset); + return new StringBody(s == null ? "" : s, charset); } catch (final UnsupportedEncodingException e) { e.printStackTrace(); return null; diff --git a/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java index 1f3c01f75..160f9aad8 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java @@ -207,30 +207,78 @@ public class JsonResponseWriter implements QueryResponseWriter { writer.write(",\n".toCharArray()); } } - writer.write("]\n".toCharArray()); - writer.write(",\n\"navigation\":[\n"); + writer.write("],\n".toCharArray()); + + + writer.write("\"navigation\":[\n"); + // the facets can be created with the options &facet=true&facet.mincount=1&facet.field=host_s&facet.field=url_file_ext_s&facet.field=url_protocol_s&facet.field=author_sxt @SuppressWarnings("unchecked") - NamedList hosts = facetFields == null ? null : (NamedList) facetFields.get(YaCySchema.host_s.getSolrFieldName()); + NamedList domains = facetFields == null ? null : (NamedList) facetFields.get(YaCySchema.host_s.getSolrFieldName()); @SuppressWarnings("unchecked") - NamedList exts = facetFields == null ? null : (NamedList) facetFields.get(YaCySchema.url_file_ext_s.getSolrFieldName()); + NamedList filetypes = facetFields == null ? null : (NamedList) facetFields.get(YaCySchema.url_file_ext_s.getSolrFieldName()); @SuppressWarnings("unchecked") - NamedList prots = facetFields == null ? null : (NamedList) facetFields.get(YaCySchema.url_protocol_s.getSolrFieldName()); - - writer.write("{\"facetname\":\"filetypes\",\"displayname\":\"Filetypes\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[]},\n".toCharArray()); - writer.write("{\"facetname\":\"protocols\",\"displayname\":\"Protocol\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[]},\n".toCharArray()); - writer.write("{\"facetname\":\"domains\",\"displayname\":\"Domains\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[]},\n".toCharArray()); - writer.write("{\"facetname\":\"topics\",\"displayname\":\"Topics\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[]}\n".toCharArray()); + NamedList protocols = facetFields == null ? null : (NamedList) facetFields.get(YaCySchema.url_protocol_s.getSolrFieldName()); + @SuppressWarnings("unchecked") + NamedList authors = facetFields == null ? null : (NamedList) facetFields.get(YaCySchema.author_sxt.getSolrFieldName()); + + if (domains != null) { + writer.write("{\"facetname\":\"domains\",\"displayname\":\"Domains\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[".toCharArray()); + for (int i = 0; i < domains.size(); i++) { + facetEntry(writer, "site", domains.getName(i), Integer.toString(domains.getVal(i))); + if (i < domains.size() - 1) writer.write(','); + writer.write("\n"); + } + writer.write("]},\n".toCharArray()); + } + if (filetypes != null) { + writer.write("{\"facetname\":\"filetypes\",\"displayname\":\"Filetypes\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[".toCharArray()); + List> l = new ArrayList>(); + for (Map.Entry e: filetypes) { + if (e.getKey().length() <= 6) l.add(e); + if (l.size() >= 16) break; + } + for (int i = 0; i < l.size(); i++) { + Map.Entry e = l.get(i); + facetEntry(writer, "filetype", e.getKey(), Integer.toString(e.getValue())); + if (i < l.size() - 1) writer.write(','); + writer.write("\n"); + } + writer.write("]},\n".toCharArray()); + } + if (protocols != null) { + writer.write("{\"facetname\":\"protocols\",\"displayname\":\"Protocol\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[".toCharArray()); + for (int i = 0; i < protocols.size(); i++) { + facetEntry(writer, "protocol", protocols.getName(i), Integer.toString(protocols.getVal(i))); + if (i < protocols.size() - 1) writer.write(','); + writer.write("\n"); + } + writer.write("]},\n".toCharArray()); + } + if (authors != null) { + writer.write("{\"facetname\":\"authors\",\"displayname\":\"Authors\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[".toCharArray()); + for (int i = 0; i < authors.size(); i++) { + facetEntry(writer, "author", authors.getName(i), Integer.toString(authors.getVal(i))); + if (i < authors.size() - 1) writer.write(','); + writer.write("\n"); + } + writer.write("]},\n".toCharArray()); + } writer.write("]}]}\n".toCharArray()); } public static void solitaireTag(final Writer writer, final String tagname, String value) throws IOException { - if (value == null || value.length() == 0) return; + if (value == null) return; writer.write('"'); writer.write(tagname); writer.write("\":\""); writer.write(serverObjects.toJSON(value)); writer.write("\","); writer.write('\n'); } + private static void facetEntry(final Writer writer, final String modifier, final String propname, String value) throws IOException { + writer.write("{\"name\": \""); writer.write(propname); + writer.write("\", \"count\": \""); writer.write(value); + writer.write("\", \"modifier\": \""); writer.write(modifier); writer.write("%3A"); writer.write(propname); + writer.write("\"}"); + } } - /** { "channels": [{ @@ -246,9 +294,7 @@ public class JsonResponseWriter implements QueryResponseWriter { "startIndex": "0", "itemsPerPage": "10", "searchTerms": "uni-mainz", - "items": [ - { "title": "From dark matter to school experiments: Physicists meet in Mainz", "link": "http://www.phmi.uni-mainz.de/5305.php", diff --git a/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java index 33c2f8eb5..1422a23ea 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java @@ -243,6 +243,8 @@ public class OpensearchResponseWriter implements QueryResponseWriter { closeTag(writer, "item"); } + openTag(writer, "yacy:navigation"); + // the facets can be created with the options &facet=true&facet.mincount=1&facet.field=host_s&facet.field=url_file_ext_s&facet.field=url_protocol_s&facet.field=author_sxt @SuppressWarnings("unchecked") NamedList domains = facetFields == null ? null : (NamedList) facetFields.get(YaCySchema.host_s.getSolrFieldName()); @@ -253,25 +255,24 @@ public class OpensearchResponseWriter implements QueryResponseWriter { @SuppressWarnings("unchecked") NamedList authors = facetFields == null ? null : (NamedList) facetFields.get(YaCySchema.author_sxt.getSolrFieldName()); - openTag(writer, "yacy:navigation"); if (domains != null) { openTag(writer, "yacy:facet name=\"domains\" displayname=\"Domains\" type=\"String\" min=\"0\" max=\"0\" mean=\"0\""); - for (Map.Entry entry: domains) facetEntry(writer, entry.getKey(), Integer.toString(entry.getValue())); + for (Map.Entry entry: domains) facetEntry(writer, "site", entry.getKey(), Integer.toString(entry.getValue())); closeTag(writer, "yacy:facet"); } if (filetypes != null) { openTag(writer, "yacy:facet name=\"filetypes\" displayname=\"Filetypes\" type=\"String\" min=\"0\" max=\"0\" mean=\"0\""); - for (Map.Entry entry: filetypes) facetEntry(writer, entry.getKey(), Integer.toString(entry.getValue())); + for (Map.Entry entry: filetypes) facetEntry(writer, "filetype", entry.getKey(), Integer.toString(entry.getValue())); closeTag(writer, "yacy:facet"); } if (protocols != null) { openTag(writer, "yacy:facet name=\"protocols\" displayname=\"Protocols\" type=\"String\" min=\"0\" max=\"0\" mean=\"0\""); - for (Map.Entry entry: protocols) facetEntry(writer, entry.getKey(), Integer.toString(entry.getValue())); + for (Map.Entry entry: protocols) facetEntry(writer, "protocol", entry.getKey(), Integer.toString(entry.getValue())); closeTag(writer, "yacy:facet"); } if (authors != null) { openTag(writer, "yacy:facet name=\"authors\" displayname=\"Authors\" type=\"String\" min=\"0\" max=\"0\" mean=\"0\""); - for (Map.Entry entry: authors) facetEntry(writer, entry.getKey(), Integer.toString(entry.getValue())); + for (Map.Entry entry: authors) facetEntry(writer, "author", entry.getKey(), Integer.toString(entry.getValue())); closeTag(writer, "yacy:facet"); } closeTag(writer, "yacy:navigation"); @@ -334,10 +335,10 @@ public class OpensearchResponseWriter implements QueryResponseWriter { writer.write("\n"); } - private static void facetEntry(final Writer writer, final String propname, String value) throws IOException { - writer.write("\n"); } diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index d7a1bb3db..b6e4fd058 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -105,6 +105,7 @@ import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; +import net.yacy.search.query.QueryModifier; import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SecondarySearchSuperviser; import net.yacy.search.snippet.TextSnippet; @@ -597,10 +598,7 @@ public final class Protocol { final SearchEvent event, final String wordhashes, final String excludehashes, - final String modifier, final String language, - final String sitehash, - final String author, final String contentdom, final int count, final long time, @@ -635,10 +633,7 @@ public final class Protocol { wordhashes, excludehashes, "", - modifier, language, - sitehash, - author, contentdom, count, time, @@ -716,9 +711,6 @@ public final class Protocol { "", urlhashes, "", - "", - "", - "", contentdom, count, time, @@ -889,10 +881,7 @@ public final class Protocol { final String wordhashes, final String excludehashes, final String urlhashes, - final String modifier, final String language, - final String sitehash, - final String author, final String contentdom, final int count, final long time, @@ -931,8 +920,8 @@ public final class Protocol { String filter = event.query.urlMask.pattern().toString(); if (event.query.tld != null) filter = ".*" + event.query.tld + ".*" + filter; - if (event.query.protocol != null) filter = ".*" + event.query.protocol + ".*" + filter; - if (event.query.ext != null) filter = filter + ".*" + event.query.ext + ".*"; + if (event.query.modifier.protocol != null) filter = ".*" + event.query.modifier.protocol + ".*" + filter; + if (event.query.modifier.filetype != null) filter = filter + ".*" + event.query.modifier.filetype + ".*"; parts.put("myseed", UTF8.StringBody((event.peers.mySeed() == null) ? "" : event.peers.mySeed().genSeedStr(key))); parts.put("count", UTF8.StringBody(Integer.toString(Math.max(10, count)))); parts.put("time", UTF8.StringBody(Long.toString(Math.max(3000, time)))); @@ -943,10 +932,10 @@ public final class Protocol { parts.put("urls", UTF8.StringBody(urlhashes)); parts.put("prefer", UTF8.StringBody(event.query.prefer.pattern())); parts.put("filter", UTF8.StringBody(filter)); - parts.put("modifier", UTF8.StringBody(modifier)); + parts.put("modifier", UTF8.StringBody(event.query.modifier.toString())); parts.put("language", UTF8.StringBody(language)); - parts.put("sitehash", UTF8.StringBody(sitehash)); - parts.put("author", UTF8.StringBody(author)); + parts.put("sitehash", UTF8.StringBody(event.query.modifier.sitehash)); + parts.put("author", UTF8.StringBody(event.query.modifier.author)); parts.put("contentdom", UTF8.StringBody(contentdom)); parts.put("ttl", UTF8.StringBody("0")); parts.put("maxdist", UTF8.StringBody(Integer.toString(maxDistance))); diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 567ba04d5..2574f84de 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -44,7 +44,7 @@ public class RemoteSearch extends Thread { private static final ThreadGroup ysThreadGroup = new ThreadGroup("yacySearchThreadGroup"); final private SearchEvent event; - final private String wordhashes, excludehashes, sitehash, author, contentdom; + final private String wordhashes, excludehashes, contentdom; final private int partitions; final private SecondarySearchSuperviser secondarySearchSuperviser; final private Blacklist blacklist; @@ -52,17 +52,13 @@ public class RemoteSearch extends Thread { private int urls; private final int count, maxDistance; private final long time; - final private QueryParams.Modifier modifier; final private String language; public RemoteSearch( final SearchEvent event, final String wordhashes, final String excludehashes, - final QueryParams.Modifier modifier, final String language, - final String sitehash, - final String author, final String contentdom, final int count, final long time, @@ -75,10 +71,7 @@ public class RemoteSearch extends Thread { this.event = event; this.wordhashes = wordhashes; this.excludehashes = excludehashes; - this.modifier = modifier; this.language = language; - this.sitehash = sitehash; - this.author = author; this.contentdom = contentdom; this.partitions = partitions; this.secondarySearchSuperviser = secondarySearchSuperviser; @@ -98,10 +91,7 @@ public class RemoteSearch extends Thread { this.event, this.wordhashes, this.excludehashes, - this.modifier.getModifier(), this.language, - this.sitehash, - this.author, this.contentdom, this.count, this.time, @@ -187,10 +177,7 @@ public class RemoteSearch extends Thread { event, QueryParams.hashSet2hashString(event.query.getQueryGoal().getIncludeHashes()), QueryParams.hashSet2hashString(event.query.getQueryGoal().getExcludeHashes()), - event.query.modifier, event.query.targetlang == null ? "" : event.query.targetlang, - event.query.nav_sitehash == null ? "" : event.query.nav_sitehash, - event.query.author == null ? "" : event.query.author, event.query.contentdom == null ? "all" : event.query.contentdom.toString(), count, time, diff --git a/source/net/yacy/search/query/QueryModifier.java b/source/net/yacy/search/query/QueryModifier.java new file mode 100644 index 000000000..44491fec7 --- /dev/null +++ b/source/net/yacy/search/query/QueryModifier.java @@ -0,0 +1,179 @@ +/** + * QueryModifier + * Copyright 2013 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany + * First published 12.02.2013 on http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.search.query; + +import org.apache.solr.common.params.CommonParams; + +import net.yacy.cora.federate.solr.YaCySchema; +import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.server.serverObjects; + + +public class QueryModifier { + + private final StringBuilder modifier; + public String sitehost, sitehash, filetype, protocol, author; + + public QueryModifier() { + this.sitehash = null; + this.sitehost = null; + this.filetype = null; + this.protocol = null; + this.author = null; + this.modifier = new StringBuilder(20); + } + + public String parse(String querystring) { + + // parse protocol + if ( querystring.indexOf("/https", 0) >= 0 ) { + querystring = querystring.replace("/https", ""); + protocol = "https"; + add("/https"); + } else if ( querystring.indexOf("/http", 0) >= 0 ) { + querystring = querystring.replace("/http", ""); + protocol = "http"; + add("/http"); + } + if ( querystring.indexOf("/ftp", 0) >= 0 ) { + querystring = querystring.replace("/ftp", ""); + protocol = "ftp"; + add("/ftp"); + } + if ( querystring.indexOf("/smb", 0) >= 0 ) { + querystring = querystring.replace("/smb", ""); + protocol = "smb"; + add("/smb"); + } + if ( querystring.indexOf("/file", 0) >= 0 ) { + querystring = querystring.replace("/file", ""); + protocol = "file"; + add("/file"); + } + + // parse filetype + final int ftp = querystring.indexOf("filetype:", 0); + if ( ftp >= 0 ) { + int ftb = querystring.indexOf(' ', ftp); + if ( ftb == -1 ) { + ftb = querystring.length(); + } + filetype = querystring.substring(ftp + 9, ftb); + querystring = querystring.replace("filetype:" + filetype, ""); + while ( !filetype.isEmpty() && filetype.charAt(0) == '.' ) { + filetype = filetype.substring(1); + } + add("filetype:" + filetype); + if (filetype.isEmpty()) filetype = null; + } + + // parse site + final int sp = querystring.indexOf("site:", 0); + if ( sp >= 0 ) { + int ftb = querystring.indexOf(' ', sp); + if ( ftb == -1 ) { + ftb = querystring.length(); + } + sitehost = querystring.substring(sp + 5, ftb); + querystring = querystring.replace("site:" + sitehost, ""); + while ( sitehost.length() > 0 && sitehost.charAt(0) == '.' ) { + sitehost = sitehost.substring(1); + } + while ( sitehost.endsWith(".") ) { + sitehost = sitehost.substring(0, sitehost.length() - 1); + } + sitehash = DigestURI.hosthash(sitehost); + add("site:" + sitehost); + } + + // parse author + final int authori = querystring.indexOf("author:", 0); + if ( authori >= 0 ) { + // check if the author was given with single quotes or without + final boolean quotes = (querystring.charAt(authori + 7) == '('); + if ( quotes ) { + int ftb = querystring.indexOf(')', authori + 8); + if (ftb == -1) ftb = querystring.length() + 1; + author = querystring.substring(authori + 8, ftb); + querystring = querystring.replace("author:(" + author + ")", ""); + add("author:(" + author + ")"); + } else { + int ftb = querystring.indexOf(' ', authori); + if ( ftb == -1 ) { + ftb = querystring.length(); + } + author = querystring.substring(authori + 7, ftb); + querystring = querystring.replace("author:" + author, ""); + add("author:" + author); + } + } + + return querystring.trim(); + } + + public void add(String m) { + if (modifier.length() > 0 && modifier.charAt(modifier.length() - 1) != ' ' && m != null && m.length() > 0) modifier.append(' '); + if (m != null) modifier.append(m); + } + + public String toString() { + return this.modifier.toString(); + } + + public void apply(serverObjects post) { + + final StringBuilder fq = new StringBuilder(post.get(CommonParams.FQ,"")); + + if (this.sitehost != null && this.sitehost.length() > 0 && fq.indexOf(YaCySchema.host_s.getSolrFieldName()) < 0) { + // consider to search for hosts with 'www'-prefix, if not already part of the host name + if (this.sitehost.startsWith("www.")) { + fq.append(" AND (").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.sitehost.substring(4)).append('\"'); + fq.append(" OR ").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.sitehost).append("\")"); + } else { + fq.append(" AND (").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.sitehost).append('\"'); + fq.append(" OR ").append(YaCySchema.host_s.getSolrFieldName()).append(":\"www.").append(this.sitehost).append("\")"); + } + } + if (this.sitehash != null && this.sitehash.length() > 0 && fq.indexOf(YaCySchema.host_id_s.getSolrFieldName()) < 0) { + fq.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.sitehash).append('\"'); + } + + if (this.filetype != null && this.filetype.length() > 0 && fq.indexOf(YaCySchema.url_file_ext_s.getSolrFieldName()) < 0) { + fq.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(":\"").append(this.filetype).append('\"'); + } + + if (this.author != null && this.author.length() > 0 && fq.indexOf(YaCySchema.author_sxt.getSolrFieldName()) < 0) { + fq.append(" AND ").append(YaCySchema.author_sxt.getSolrFieldName()).append(":\"").append(this.author).append('\"'); + } + + if (this.protocol != null && this.protocol.length() > 0 && fq.indexOf(YaCySchema.url_protocol_s.getSolrFieldName()) < 0) { + fq.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(":\"").append(this.protocol).append('\"'); + } + + if (fq.length() > 0) { + String fqs = fq.toString(); + if (fqs.startsWith(" AND ")) fqs = fqs.substring(5); + post.remove(CommonParams.FQ); + post.put(CommonParams.FQ, fqs); + } + } + +} diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index a5bfc79b5..8e37c6c56 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -88,20 +88,7 @@ public final class QueryParams { YaCySchema.host_s, YaCySchema.url_protocol_s, YaCySchema.url_file_ext_s, YaCySchema.author_sxt}; private static final int defaultmaxfacets = 30; - private static final String ampersand = "&"; - - public static class Modifier { - private String s; - private Modifier(final String modifier) { - this.s = modifier; - } - public String getModifier() { - return this.s; - } - } - - public static final Bitfield empty_constraint = new Bitfield(4, "AAAAAA"); public static final Pattern catchall_pattern = Pattern.compile(".*"); private static final Pattern matchnothing_pattern = Pattern.compile(""); @@ -112,7 +99,7 @@ public final class QueryParams { public Pattern urlMask; public final Pattern prefer; - public final String protocol, tld, ext, inlink; + public final String tld, inlink; boolean urlMask_isCatchall; public final Classification.ContentDomain contentdom; public final String targetlang; @@ -126,11 +113,8 @@ public final class QueryParams { public final RankingProfile ranking; private final Segment indexSegment; public final String clienthost; // this is the client host that starts the query, not a site operator - public final String nav_sitehost; // this is a domain name which is used to navigate to that host - public final String nav_sitehash; // this is a domain hash, 6 bytes long or null protected final Set siteexcludes; // set of domain hashes that are excluded if not included by sitehash - public final String author; - public final Modifier modifier; + public final QueryModifier modifier; public Seed remotepeer; public final long starttime; // the time when the query started, how long it should take and the time when the timeout is reached (milliseconds) protected final long maxtime; @@ -166,13 +150,11 @@ public final class QueryParams { final String userAgent) { this.queryGoal = new QueryGoal(query_original, query_words); this.ranking = ranking; - this.modifier = new Modifier(""); + this.modifier = new QueryModifier(); this.maxDistance = Integer.MAX_VALUE; this.urlMask = catchall_pattern; this.urlMask_isCatchall = true; - this.protocol = null; this.tld = null; - this.ext = null; this.inlink = null; this.prefer = matchnothing_pattern; this.contentdom = ContentDomain.ALL; @@ -186,10 +168,7 @@ public final class QueryParams { this.allofconstraint = false; this.snippetCacheStrategy = null; this.clienthost = null; - this.nav_sitehash = null; - this.nav_sitehost = null; this.siteexcludes = null; - this.author = null; this.remotepeer = null; this.starttime = Long.valueOf(System.currentTimeMillis()); this.maxtime = 10000; @@ -223,19 +202,23 @@ public final class QueryParams { public QueryParams( final QueryGoal queryGoal, - final String modifier, - final int maxDistance, final String prefer, final ContentDomain contentdom, + final QueryModifier modifier, + final int maxDistance, + final String prefer, + final ContentDomain contentdom, final String language, final Collection metatags, final CacheStrategy snippetCacheStrategy, - final int itemsPerPage, final int offset, - final String urlMask, final String protocol, final String tld, final String ext, final String inlink, - final Searchdom domType, final int domMaxTargets, - final Bitfield constraint, final boolean allofconstraint, - final String nav_sitehash, - final String nav_sitehost, + final int itemsPerPage, + final int offset, + final String urlMask, + final String tld, + final String inlink, + final Searchdom domType, + final int domMaxTargets, + final Bitfield constraint, + final boolean allofconstraint, final Set siteexcludes, - final String author, final int domainzone, final String host, final boolean specialRights, @@ -244,9 +227,12 @@ public final class QueryParams { final String userAgent, final boolean filterfailurls, final boolean filterscannerfail, - final double lat, final double lon, final double radius) { + final double lat, + final double lon, + final double radius + ) { this.queryGoal = queryGoal; - this.modifier = new Modifier(modifier == null ? "" : modifier); + this.modifier = modifier; this.ranking = ranking; this.maxDistance = maxDistance; this.contentdom = contentdom; @@ -259,22 +245,20 @@ public final class QueryParams { } this.urlMask_isCatchall = this.urlMask.toString().equals(catchall_pattern.toString()); if (this.urlMask_isCatchall) { - if (protocol != null) { - this.urlMask = Pattern.compile(protocol + ".*"); + if (modifier.protocol != null) { + this.urlMask = Pattern.compile(modifier.protocol + ".*"); this.urlMask_isCatchall = false; } if (tld != null) { this.urlMask = Pattern.compile(".*" + tld + ".*"); this.urlMask_isCatchall = false; } - if (ext != null) { - this.urlMask = Pattern.compile(".*" + ext + ".*"); + if (modifier.filetype != null) { + this.urlMask = Pattern.compile(".*" + modifier.filetype + ".*"); this.urlMask_isCatchall = false; } } - this.protocol = protocol; this.tld = tld; - this.ext = ext; this.inlink = inlink; try { this.prefer = Pattern.compile(prefer); @@ -289,10 +273,7 @@ public final class QueryParams { this.zonecode = domainzone; this.constraint = constraint; this.allofconstraint = allofconstraint; - this.nav_sitehash = nav_sitehash; assert nav_sitehash == null || nav_sitehash.length() == 6; - this.nav_sitehost = nav_sitehost; this.siteexcludes = siteexcludes != null && siteexcludes.isEmpty() ? null: siteexcludes; - this.author = author; assert author == null || !author.isEmpty(); this.snippetCacheStrategy = snippetCacheStrategy; this.clienthost = host; this.remotepeer = null; @@ -461,24 +442,24 @@ public final class QueryParams { // add site facets final StringBuilder fq = new StringBuilder(); - if (this.nav_sitehash == null && this.nav_sitehost == null) { + if (this.modifier.sitehash == null && this.modifier.sitehost == null) { if (this.siteexcludes != null) { for (String ex: this.siteexcludes) { fq.append(" AND -").append(YaCySchema.host_id_s.getSolrFieldName()).append(':').append(ex); } } } else { - if (this.nav_sitehost != null) { + if (this.modifier.sitehost != null) { // consider to search for hosts with 'www'-prefix, if not already part of the host name - if (this.nav_sitehost.startsWith("www.")) { - fq.append(" AND (").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.nav_sitehost.substring(4)).append('\"'); - fq.append(" OR ").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.nav_sitehost).append("\")"); + if (this.modifier.sitehost.startsWith("www.")) { + fq.append(" AND (").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.modifier.sitehost.substring(4)).append('\"'); + fq.append(" OR ").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.modifier.sitehost).append("\")"); } else { - fq.append(" AND (").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.nav_sitehost).append('\"'); - fq.append(" OR ").append(YaCySchema.host_s.getSolrFieldName()).append(":\"www.").append(this.nav_sitehost).append("\")"); + fq.append(" AND (").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.modifier.sitehost).append('\"'); + fq.append(" OR ").append(YaCySchema.host_s.getSolrFieldName()).append(":\"www.").append(this.modifier.sitehost).append("\")"); } } else - fq.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.nav_sitehash).append('\"'); + fq.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.modifier.sitehash).append('\"'); } // add vocabulary facets @@ -487,20 +468,20 @@ public final class QueryParams { } // add author facets - if (this.author != null && this.author.length() > 0 && this.solrScheme.contains(YaCySchema.author_sxt)) { - fq.append(" AND ").append(YaCySchema.author_sxt.getSolrFieldName()).append(":\"").append(this.author).append('\"'); + if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrScheme.contains(YaCySchema.author_sxt)) { + fq.append(" AND ").append(YaCySchema.author_sxt.getSolrFieldName()).append(":\"").append(this.modifier.author).append('\"'); } - if (this.protocol != null) { - fq.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append(this.protocol); + if (this.modifier.protocol != null) { + fq.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append(this.modifier.protocol); } if (this.tld != null) { fq.append(" AND ").append(YaCySchema.host_dnc_s.getSolrFieldName()).append(":\"").append(this.tld).append('\"'); } - if (this.ext != null) { - fq.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(":\"").append(this.ext).append('\"'); + if (this.modifier.filetype != null) { + fq.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(":\"").append(this.modifier.filetype).append('\"'); } if (this.inlink != null) { @@ -602,16 +583,16 @@ public final class QueryParams { context.append(ASCII.String(Word.word2hash(this.ranking.toExternalString()))).append(asterisk); context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())).append(asterisk); context.append(Base64Order.enhancedCoder.encodeString(this.urlMask.toString())).append(asterisk); - context.append(this.nav_sitehash).append(asterisk); + context.append(this.modifier.sitehash).append(asterisk); context.append(this.siteexcludes).append(asterisk); - context.append(this.author).append(asterisk); + context.append(this.modifier.author).append(asterisk); context.append(this.targetlang).append(asterisk); context.append(this.constraint).append(asterisk); context.append(this.maxDistance).append(asterisk); - context.append(this.modifier.s).append(asterisk); - context.append(this.protocol).append(asterisk); + context.append(this.modifier.toString()).append(asterisk); + context.append(this.modifier.protocol).append(asterisk); context.append(this.tld).append(asterisk); - context.append(this.ext).append(asterisk); + context.append(this.modifier.filetype).append(asterisk); context.append(this.inlink).append(asterisk); context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius).append(asterisk); context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name()); diff --git a/source/net/yacy/search/query/RankingProcess.java b/source/net/yacy/search/query/RankingProcess.java index be1ebf714..5f52a8e3f 100644 --- a/source/net/yacy/search/query/RankingProcess.java +++ b/source/net/yacy/search/query/RankingProcess.java @@ -321,11 +321,11 @@ public final class RankingProcess extends Thread { // check site constraints final String hosthash = iEntry.hosthash(); - if ( this.query.nav_sitehash == null ) { + if ( this.query.modifier.sitehash == null ) { if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) continue pollloop; } else { // filter out all domains that do not match with the site constraint - if (!hosthash.equals(this.query.nav_sitehash)) continue pollloop; + if (!hosthash.equals(this.query.modifier.sitehash)) continue pollloop; } // finally extend the double-check and insert result to stack diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index f9f6d2ead..81ad6b708 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -593,13 +593,13 @@ public final class SearchEvent { // check site constraints final String hosthash = iEntry.hosthash(); - if ( this.query.nav_sitehash == null ) { + if ( this.query.modifier.sitehash == null ) { if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) { continue pollloop; } } else { // filter out all domains that do not match with the site constraint - if (iEntry.url().getHost().indexOf(this.query.nav_sitehost) < 0) continue pollloop; + if (iEntry.url().getHost().indexOf(this.query.modifier.sitehost) < 0) continue pollloop; } // finally extend the double-check and insert result to stack