diff --git a/defaults/solr.webgraph.schema b/defaults/solr.webgraph.schema index f7fb37f76..dec0dcb02 100644 --- a/defaults/solr.webgraph.schema +++ b/defaults/solr.webgraph.schema @@ -156,7 +156,7 @@ target_path_folders_sxt ## the values from key-value pairs in the search part of the url (target) #target_parameter_value_sxt -## "depth of web page according to number of clicks from the 'main' page, which is the page that appears if only the host is entered as url (target) +## depth of web page according to number of clicks from the 'main' page, which is the page that appears if only the host is entered as url (target) #target_clickdepth_i ## host of the url (target) diff --git a/htroot/portalsearch/yacy-portalsearch.js b/htroot/portalsearch/yacy-portalsearch.js index dbaf9d6ed..8121347e5 100644 --- a/htroot/portalsearch/yacy-portalsearch.js +++ b/htroot/portalsearch/yacy-portalsearch.js @@ -207,7 +207,7 @@ function yrun() { function yacysearch(clear) { var url = yconf.url + '/yacysearch.json?callback=?' // JSONP (cross domain) request URL - //var url = yconf.url + '/solr/select?wt=yjson&jsonp=?' // JSONP (cross domain) request URL + //var url = yconf.url + '/solr/select?wt=yjson&callback=?' // JSONP (cross domain) request URL if(clear) { $('#ypopup').empty(); diff --git a/htroot/solr/select.java b/htroot/solr/select.java index b39638fcf..1b1d2ba8c 100644 --- a/htroot/solr/select.java +++ b/htroot/solr/select.java @@ -177,11 +177,12 @@ public class select { // if this is a call to YaCys special search formats, enhance the query with field assignments if ((responseWriter instanceof JsonResponseWriter || responseWriter instanceof OpensearchResponseWriter) && "true".equals(post.get("hl", "true"))) { // add options for snippet generation - post.put("hl", "true"); - post.put("hl.fl", "text_t,h1,h2"); - post.put("hl.simple.pre", ""); - post.put("hl.simple.post", ""); - post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH)); + if (!post.containsKey("hl.q")) post.put("hl.q", q); + if (!post.containsKey("hl.fl")) post.put("hl.fl", CollectionSchema.h1_txt.getSolrFieldName() + "," + CollectionSchema.h2_txt.getSolrFieldName() + "," + CollectionSchema.text_t.getSolrFieldName()); + if (!post.containsKey("hl.alternateField")) post.put("hl.alternateField", CollectionSchema.description.getSolrFieldName()); + if (!post.containsKey("hl.simple.pre")) post.put("hl.simple.pre", ""); + if (!post.containsKey("hl.simple.post")) post.put("hl.simple.post", ""); + if (!post.containsKey("hl.fragsize")) post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH)); } // get the embedded connector diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index cfcd28a8d..46ec5dcb7 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -212,8 +212,8 @@ public class yacysearch { ? 100 : 5000) : (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline() ? 20 : 1000), - post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative - int startRecord = post.getInt("startRecord", post.getInt("offset", 0)); + post.getInt("maximumRecords", post.getInt("count", post.getInt("rows", 10)))); // SRU syntax with old property as alternative + int startRecord = post.getInt("startRecord", post.getInt("offset", post.getInt("start", 0))); boolean global = post.get("resource", "local").equals("global") && sb.peers.sizeConnected() > 0; final boolean indexof = (post != null && post.get("indexof", "").equals("on")); diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index b96dd8028..2f4486092 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -104,11 +104,13 @@ public class yacysearchitem { prop.put("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString()); final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, ""); + long timeout = item == 0 ? 10000 : (theSearch.query.isLocal() ? 1000 : 3000); + if (theSearch.query.contentdom == Classification.ContentDomain.TEXT || theSearch.query.contentdom == Classification.ContentDomain.ALL) { // text search // generate result object - final ResultEntry result = theSearch.oneResult(item, theSearch.query.isLocal() ? 1000 : 3000); + final ResultEntry result = theSearch.oneResult(item, timeout); if (result == null) return prop; // no content final String resultUrlstring = result.urlstring(); final DigestURI resultURL = result.url(); @@ -261,7 +263,7 @@ public class yacysearchitem { prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content //final MediaSnippet ms = theSearch.result().oneImage(item); - final ResultEntry ms = theSearch.oneResult(item, theSearch.query.isLocal() ? 1000 : 5000); + final ResultEntry ms = theSearch.oneResult(item, timeout); if (ms == null) { prop.put("content_item", "0"); } else { @@ -297,7 +299,7 @@ public class yacysearchitem { // any other media content // generate result object - final ResultEntry ms = theSearch.oneResult(item, theSearch.query.isLocal() ? 1000 : 5000); + final ResultEntry ms = theSearch.oneResult(item, timeout); prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content if (ms == null) { prop.put("content_item", "0"); diff --git a/source/net/yacy/cora/document/RSSFeed.java b/source/net/yacy/cora/document/RSSFeed.java index bb65405da..cb688e39a 100644 --- a/source/net/yacy/cora/document/RSSFeed.java +++ b/source/net/yacy/cora/document/RSSFeed.java @@ -31,7 +31,7 @@ import java.util.Set; public class RSSFeed implements Iterable { - public static final int DEFAULT_MAXSIZE = 1000; + public static final int DEFAULT_MAXSIZE = 10000; // class variables private RSSMessage channel; diff --git a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java index 776894c67..a16469788 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java @@ -164,21 +164,26 @@ public class GSAResponseWriter implements QueryResponseWriter { // write header writer.write(XML_START); String query = request.getParams().get("originalQuery"); - String site = (String) context.get("site"); + String site = getContextString(context, "site", ""); + String sort = getContextString(context, "sort", ""); + String client = getContextString(context, "client", ""); + String ip = getContextString(context, "ip", ""); + String access = getContextString(context, "access", ""); + String entqr = getContextString(context, "entqr", ""); OpensearchResponseWriter.solitaireTag(writer, "TM", Long.toString(System.currentTimeMillis() - start)); OpensearchResponseWriter.solitaireTag(writer, "Q", query); - paramTag(writer, "sort", (String) context.get("sort")); + paramTag(writer, "sort", sort); paramTag(writer, "output", "xml_no_dtd"); paramTag(writer, "ie", "UTF-8"); paramTag(writer, "oe", "UTF-8"); - paramTag(writer, "client", (String) context.get("client")); + paramTag(writer, "client", client); paramTag(writer, "q", query); paramTag(writer, "site", site); paramTag(writer, "start", Integer.toString(resHead.offset)); paramTag(writer, "num", Integer.toString(resHead.rows)); - paramTag(writer, "ip", (String) context.get("ip")); - paramTag(writer, "access", (String) context.get("access")); // p - search only public content, s - search only secure content, a - search all content, both public and secure - paramTag(writer, "entqr", (String) context.get("entqr")); // query expansion policy; (entqr=1) -- Uses only the search appliance's synonym file, (entqr=1) -- Uses only the search appliance's synonym file, (entqr=3) -- Uses both standard and local synonym files. + paramTag(writer, "ip", ip); + paramTag(writer, "access", access); // p - search only public content, s - search only secure content, a - search all content, both public and secure + paramTag(writer, "entqr", entqr); // query expansion policy; (entqr=1) -- Uses only the search appliance's synonym file, (entqr=1) -- Uses only the search appliance's synonym file, (entqr=3) -- Uses both standard and local synonym files. // body introduction final int responseCount = response.size(); @@ -192,16 +197,16 @@ public class GSAResponseWriter implements QueryResponseWriter { writer.write(""); if (prevStart >= 0) { writer.write(""); - XML.escapeCharData("/gsa/search?q=" + request.getParams().get("q") + "&site=" + (String) context.get("site") + - "&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=" + (String) context.get("client") + "&access=" + (String) context.get("access") + - "&sort=" + (String) context.get("sort") + "&start=" + prevStart + "&sa=N", writer); // a relative URL pointing to the NEXT results page. + XML.escapeCharData("/gsa/search?q=" + request.getParams().get("q") + "&site=" + site + + "&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=" + client + "&access=" + access + + "&sort=" + sort + "&start=" + prevStart + "&sa=N", writer); // a relative URL pointing to the NEXT results page. writer.write(""); } if (nextNum > 0) { writer.write(""); - XML.escapeCharData("/gsa/search?q=" + request.getParams().get("q") + "&site=" + (String) context.get("site") + - "&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=" + (String) context.get("client") + "&access=" + (String) context.get("access") + - "&sort=" + (String) context.get("sort") + "&start=" + nextStart + "&num=" + nextNum + "&sa=N", writer); // a relative URL pointing to the NEXT results page. + XML.escapeCharData("/gsa/search?q=" + request.getParams().get("q") + "&site=" + site + + "&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=" + client + "&access=" + access + + "&sort=" + sort + "&start=" + nextStart + "&num=" + nextNum + "&sa=N", writer); // a relative URL pointing to the NEXT results page. writer.write(""); } writer.write(""); @@ -296,6 +301,17 @@ public class GSAResponseWriter implements QueryResponseWriter { writer.write(XML_STOP); } + private static String getContextString(Map context, String key, String dflt) { + Object v = context.get(key); + if (v == null) return dflt; + if (v instanceof String) return (String) v; + if (v instanceof String[]) { + String[] va = (String[]) v; + return va.length == 0 ? dflt : va[0]; + } + return dflt; + } + public static void paramTag(final Writer writer, final String tagname, String value) throws IOException { if (value == null || value.length() == 0) return; writer.write(" authors = facetFields == null ? null : (NamedList) facetFields.get(CollectionSchema.author_sxt.getSolrFieldName()); if (domains != null) { - writer.write("{\"facetname\":\"domains\",\"displayname\":\"Domains\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray()); + writer.write("{\"facetname\":\"domains\",\"displayname\":\"Provider\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray()); for (int i = 0; i < domains.size(); i++) { facetEntry(writer, "site", domains.getName(i), Integer.toString(domains.getVal(i))); if (i < domains.size() - 1) writer.write(','); diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java index fe0bd8184..a8d84a39b 100644 --- a/source/net/yacy/document/parser/htmlParser.java +++ b/source/net/yacy/document/parser/htmlParser.java @@ -51,7 +51,7 @@ import com.ibm.icu.text.CharsetDetector; public class htmlParser extends AbstractParser implements Parser { private static final Pattern patternUnderline = Pattern.compile("_"); - private static final int maxLinks = 1000; + private static final int maxLinks = 10000; public htmlParser() { super("Streaming HTML Parser"); diff --git a/source/net/yacy/kelondro/index/RowCollection.java b/source/net/yacy/kelondro/index/RowCollection.java index 523bad0e4..065c641a0 100644 --- a/source/net/yacy/kelondro/index/RowCollection.java +++ b/source/net/yacy/kelondro/index/RowCollection.java @@ -54,7 +54,7 @@ public class RowCollection implements Sortable, Iterable, private static final byte[] EMPTY_CACHE = new byte[0]; public static final long growfactorLarge100 = 140L; - public static final long growfactorSmall100 = 120L; + public static final long growfactorSmall100 = 110L; private static final int isortlimit = 20; private static final int exp_chunkcount = 0; @@ -246,12 +246,11 @@ public class RowCollection implements Sortable, Iterable, long allocram = needed * growfactorLarge100 / 100L; allocram -= allocram % this.rowdef.objectsize; assert allocram > 0 : "elements = " + elements + ", new = " + allocram; - if (allocram <= Integer.MAX_VALUE && MemoryControl.request(allocram, false)) return allocram; + if (allocram <= Integer.MAX_VALUE && MemoryControl.request(allocram, forcegc)) return allocram; allocram = needed * growfactorSmall100 / 100L; allocram -= allocram % this.rowdef.objectsize; assert allocram >= 0 : "elements = " + elements + ", new = " + allocram; - if (allocram <= Integer.MAX_VALUE && MemoryControl.request(allocram, forcegc)) return allocram; - return needed; + return allocram; } private final void ensureSize(final int elements) throws SpaceExceededException {