From 17ae51e741abb7d60593a2e7e378895324e67457 Mon Sep 17 00:00:00 2001
From: orbiter <mc@yacy.net>
Date: Sun, 17 Mar 2013 22:13:56 +0100
Subject: [PATCH 1/6] increased number of links limitation from 1000 to 10000
 for rss feeds and html documents

---
 defaults/solr.webgraph.schema                   | 2 +-
 source/net/yacy/cora/document/RSSFeed.java      | 2 +-
 source/net/yacy/document/parser/htmlParser.java | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/defaults/solr.webgraph.schema b/defaults/solr.webgraph.schema
index f7fb37f76..dec0dcb02 100644
--- a/defaults/solr.webgraph.schema
+++ b/defaults/solr.webgraph.schema
@@ -156,7 +156,7 @@ target_path_folders_sxt
 ## the values from key-value pairs in the search part of the url (target)
 #target_parameter_value_sxt
 
-## "depth of web page according to number of clicks from the 'main' page, which is the page that appears if only the host is entered as url (target)
+## depth of web page according to number of clicks from the 'main' page, which is the page that appears if only the host is entered as url (target)
 #target_clickdepth_i
 
 ## host of the url (target)
diff --git a/source/net/yacy/cora/document/RSSFeed.java b/source/net/yacy/cora/document/RSSFeed.java
index bb65405da..cb688e39a 100644
--- a/source/net/yacy/cora/document/RSSFeed.java
+++ b/source/net/yacy/cora/document/RSSFeed.java
@@ -31,7 +31,7 @@ import java.util.Set;
 
 public class RSSFeed implements Iterable<RSSMessage> {
 
-    public static final int DEFAULT_MAXSIZE = 1000;
+    public static final int DEFAULT_MAXSIZE = 10000;
 
     // class variables
     private RSSMessage channel;
diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java
index fe0bd8184..a8d84a39b 100644
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@@ -51,7 +51,7 @@ import com.ibm.icu.text.CharsetDetector;
 public class htmlParser extends AbstractParser implements Parser {
 
     private static final Pattern patternUnderline = Pattern.compile("_");
-    private static final int maxLinks = 1000;
+    private static final int maxLinks = 10000;
 
     public htmlParser() {
         super("Streaming HTML Parser");

From 65d73e56523785aa423fd9d23bacd7551a9ddb52 Mon Sep 17 00:00:00 2001
From: orbiter <mc@yacy.net>
Date: Tue, 19 Mar 2013 00:59:47 +0100
Subject: [PATCH 2/6] renamed callback function to 'callback' because that is a
 standard for jsonp which is also used in backbone.js/jquery

---
 .../cora/federate/solr/responsewriter/JsonResponseWriter.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
index 7f3a2ca3a..9f14cf6d2 100644
--- a/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
@@ -107,7 +107,7 @@ public class JsonResponseWriter implements QueryResponseWriter {
         resHead.offset = response.offset(); // equal to 'start'
         resHead.numFound = response.matches();
 
-        String jsonp = request.getParams().get("jsonp"); // check for JSONP
+        String jsonp = request.getParams().get("callback"); // check for JSONP
         if (jsonp != null) {
             writer.write(jsonp.toCharArray());
             writer.write("([".toCharArray());

From 342ba1049b1465e65430a5eeef6e7bc6a6be90d2 Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Tue, 19 Mar 2013 10:32:01 +0100
Subject: [PATCH 3/6] - callback fix - memory allocation problem in
 RowCollection: if memory is too low, do not to try to increase by 1 because
 this leads to very long execution time and at the end to the same OOM as if
 we allocate the memory at the moment we need it even if the resource observer
 states that this memory is not there. To compensate this, the increase size
 is reduced.

---
 htroot/portalsearch/yacy-portalsearch.js          | 2 +-
 source/net/yacy/kelondro/index/RowCollection.java | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/htroot/portalsearch/yacy-portalsearch.js b/htroot/portalsearch/yacy-portalsearch.js
index dbaf9d6ed..8121347e5 100644
--- a/htroot/portalsearch/yacy-portalsearch.js
+++ b/htroot/portalsearch/yacy-portalsearch.js
@@ -207,7 +207,7 @@ function yrun() {
 
 function yacysearch(clear) {	
 	var url = yconf.url + '/yacysearch.json?callback=?'    // JSONP (cross domain) request URL
-	//var url = yconf.url + '/solr/select?wt=yjson&jsonp=?'    // JSONP (cross domain) request URL
+	//var url = yconf.url + '/solr/select?wt=yjson&callback=?'    // JSONP (cross domain) request URL
 
 	if(clear) {
 		$('#ypopup').empty();
diff --git a/source/net/yacy/kelondro/index/RowCollection.java b/source/net/yacy/kelondro/index/RowCollection.java
index 523bad0e4..065c641a0 100644
--- a/source/net/yacy/kelondro/index/RowCollection.java
+++ b/source/net/yacy/kelondro/index/RowCollection.java
@@ -54,7 +54,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
     private static final byte[] EMPTY_CACHE = new byte[0];
 
     public  static final long growfactorLarge100 = 140L;
-    public  static final long growfactorSmall100 = 120L;
+    public  static final long growfactorSmall100 = 110L;
     private static final int isortlimit = 20;
 
     private static final int exp_chunkcount  = 0;
@@ -246,12 +246,11 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
         long allocram = needed * growfactorLarge100 / 100L;
         allocram -= allocram % this.rowdef.objectsize;
         assert allocram > 0 : "elements = " + elements + ", new = " + allocram;
-        if (allocram <= Integer.MAX_VALUE && MemoryControl.request(allocram, false)) return allocram;
+        if (allocram <= Integer.MAX_VALUE && MemoryControl.request(allocram, forcegc)) return allocram;
         allocram = needed * growfactorSmall100 / 100L;
         allocram -= allocram % this.rowdef.objectsize;
         assert allocram >= 0 : "elements = " + elements + ", new = " + allocram;
-        if (allocram <= Integer.MAX_VALUE && MemoryControl.request(allocram, forcegc)) return allocram;
-        return needed;
+        return allocram;
     }
 
     private final void ensureSize(final int elements) throws SpaceExceededException {

From 5512be6673734a9f017b13a52737a71d2df8a112 Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Tue, 19 Mar 2013 10:33:35 +0100
Subject: [PATCH 4/6] fix in GSA result writer which evaluates result context
 fields as String. After the migration to Solr 4.1.0 'some' of these fields
 suddenly are stored as String[]; this patch compensates this confusion.

---
 .../responsewriter/GSAResponseWriter.java     | 40 +++++++++++++------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java
index 776894c67..a16469788 100644
--- a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java
@@ -164,21 +164,26 @@ public class GSAResponseWriter implements QueryResponseWriter {
         // write header
         writer.write(XML_START);
         String query = request.getParams().get("originalQuery");
-        String site  = (String) context.get("site");
+        String site  = getContextString(context, "site", "");
+        String sort  = getContextString(context, "sort", "");
+        String client  = getContextString(context, "client", "");
+        String ip  = getContextString(context, "ip", "");
+        String access  = getContextString(context, "access", "");
+        String entqr  = getContextString(context, "entqr", "");
         OpensearchResponseWriter.solitaireTag(writer, "TM", Long.toString(System.currentTimeMillis() - start));
         OpensearchResponseWriter.solitaireTag(writer, "Q", query);
-        paramTag(writer, "sort", (String) context.get("sort"));
+        paramTag(writer, "sort", sort);
         paramTag(writer, "output", "xml_no_dtd");
         paramTag(writer, "ie", "UTF-8");
         paramTag(writer, "oe", "UTF-8");
-        paramTag(writer, "client", (String) context.get("client"));
+        paramTag(writer, "client", client);
         paramTag(writer, "q", query);
         paramTag(writer, "site", site);
         paramTag(writer, "start", Integer.toString(resHead.offset));
         paramTag(writer, "num", Integer.toString(resHead.rows));
-        paramTag(writer, "ip", (String) context.get("ip"));
-        paramTag(writer, "access", (String) context.get("access")); // p - search only public content, s - search only secure content, a - search all content, both public and secure
-        paramTag(writer, "entqr", (String) context.get("entqr")); // query expansion policy; (entqr=1) -- Uses only the search appliance's synonym file, (entqr=1) -- Uses only the search appliance's synonym file, (entqr=3) -- Uses both standard and local synonym files.
+        paramTag(writer, "ip", ip);
+        paramTag(writer, "access", access); // p - search only public content, s - search only secure content, a - search all content, both public and secure
+        paramTag(writer, "entqr", entqr); // query expansion policy; (entqr=1) -- Uses only the search appliance's synonym file, (entqr=1) -- Uses only the search appliance's synonym file, (entqr=3) -- Uses both standard and local synonym files.
 
         // body introduction
         final int responseCount = response.size();
@@ -192,16 +197,16 @@ public class GSAResponseWriter implements QueryResponseWriter {
             writer.write("<NB>");
             if (prevStart >= 0) {
                 writer.write("<PU>");
-                XML.escapeCharData("/gsa/search?q=" + request.getParams().get("q") + "&site=" + (String) context.get("site") +
-                         "&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=" + (String) context.get("client") + "&access=" + (String) context.get("access") +
-                         "&sort=" + (String) context.get("sort") + "&start=" + prevStart + "&sa=N", writer); // a relative URL pointing to the NEXT results page.
+                XML.escapeCharData("/gsa/search?q=" + request.getParams().get("q") + "&site=" + site +
+                         "&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=" + client + "&access=" + access +
+                         "&sort=" + sort + "&start=" + prevStart + "&sa=N", writer); // a relative URL pointing to the NEXT results page.
                 writer.write("</PU>");
             }
             if (nextNum > 0) {
                 writer.write("<NU>");
-                XML.escapeCharData("/gsa/search?q=" + request.getParams().get("q") + "&site=" + (String) context.get("site") +
-                         "&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=" + (String) context.get("client") + "&access=" + (String) context.get("access") +
-                         "&sort=" + (String) context.get("sort") + "&start=" + nextStart + "&num=" + nextNum + "&sa=N", writer); // a relative URL pointing to the NEXT results page.
+                XML.escapeCharData("/gsa/search?q=" + request.getParams().get("q") + "&site=" + site +
+                         "&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=" + client + "&access=" + access +
+                         "&sort=" + sort + "&start=" + nextStart + "&num=" + nextNum + "&sa=N", writer); // a relative URL pointing to the NEXT results page.
                 writer.write("</NU>");
             }
             writer.write("</NB>");
@@ -296,6 +301,17 @@ public class GSAResponseWriter implements QueryResponseWriter {
         writer.write(XML_STOP);
     }
 
+    private static String getContextString(Map<Object,Object> context, String key, String dflt) {
+        Object v = context.get(key);
+        if (v == null) return dflt;
+        if (v instanceof String) return (String) v;
+        if (v instanceof String[]) {
+            String[] va = (String[]) v;
+            return va.length == 0 ? dflt : va[0];
+        }
+        return dflt;
+    }
+    
     public static void paramTag(final Writer writer, final String tagname, String value) throws IOException {
         if (value == null || value.length() == 0) return;
         writer.write("<PARAM name=\"");

From 735eb70525abeb8e35bc369e3cc4f7418d0b59bb Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Tue, 19 Mar 2013 11:23:18 +0100
Subject: [PATCH 5/6] better search timing; prevents '0 results' for very large
 local indexes >> 10 mio documents

---
 htroot/yacysearchitem.java | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index b96dd8028..2f4486092 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -104,11 +104,13 @@ public class yacysearchitem {
         prop.put("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString());
         final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, "");
 
+        long timeout = item == 0 ? 10000 : (theSearch.query.isLocal() ? 1000 : 3000);
+        
         if (theSearch.query.contentdom == Classification.ContentDomain.TEXT || theSearch.query.contentdom == Classification.ContentDomain.ALL) {
             // text search
 
             // generate result object
-            final ResultEntry result = theSearch.oneResult(item, theSearch.query.isLocal() ? 1000 : 3000);
+            final ResultEntry result = theSearch.oneResult(item, timeout);
             if (result == null) return prop; // no content
             final String resultUrlstring = result.urlstring();
             final DigestURI resultURL = result.url();
@@ -261,7 +263,7 @@ public class yacysearchitem {
 
             prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content
             //final MediaSnippet ms = theSearch.result().oneImage(item);
-            final ResultEntry ms = theSearch.oneResult(item, theSearch.query.isLocal() ? 1000 : 5000);
+            final ResultEntry ms = theSearch.oneResult(item, timeout);
             if (ms == null) {
                 prop.put("content_item", "0");
             } else {
@@ -297,7 +299,7 @@ public class yacysearchitem {
             // any other media content
 
             // generate result object
-            final ResultEntry ms = theSearch.oneResult(item, theSearch.query.isLocal() ? 1000 : 5000);
+            final ResultEntry ms = theSearch.oneResult(item, timeout);
             prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content
             if (ms == null) {
                 prop.put("content_item", "0");

From 870aedf3c6d55536e2f176c6737f9ecc60e472ab Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Wed, 20 Mar 2013 16:19:49 +0100
Subject: [PATCH 6/6] fixes for better search interface integration in yaml
 templates

---
 htroot/solr/select.java                               | 11 ++++++-----
 htroot/yacysearch.java                                |  4 ++--
 .../solr/responsewriter/JsonResponseWriter.java       |  4 +++-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/htroot/solr/select.java b/htroot/solr/select.java
index b39638fcf..1b1d2ba8c 100644
--- a/htroot/solr/select.java
+++ b/htroot/solr/select.java
@@ -177,11 +177,12 @@ public class select {
         // if this is a call to YaCys special search formats, enhance the query with field assignments
         if ((responseWriter instanceof JsonResponseWriter || responseWriter instanceof OpensearchResponseWriter) && "true".equals(post.get("hl", "true"))) {
             // add options for snippet generation
-            post.put("hl", "true");
-            post.put("hl.fl", "text_t,h1,h2");
-            post.put("hl.simple.pre", "");
-            post.put("hl.simple.post", "");
-            post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH));
+            if (!post.containsKey("hl.q")) post.put("hl.q", q);
+            if (!post.containsKey("hl.fl")) post.put("hl.fl", CollectionSchema.h1_txt.getSolrFieldName() + "," + CollectionSchema.h2_txt.getSolrFieldName() + "," + CollectionSchema.text_t.getSolrFieldName());
+            if (!post.containsKey("hl.alternateField")) post.put("hl.alternateField", CollectionSchema.description.getSolrFieldName());
+            if (!post.containsKey("hl.simple.pre")) post.put("hl.simple.pre", "<b>");
+            if (!post.containsKey("hl.simple.post")) post.put("hl.simple.post", "</b>");
+            if (!post.containsKey("hl.fragsize")) post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH));
         }
 
         // get the embedded connector
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index cfcd28a8d..46ec5dcb7 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -212,8 +212,8 @@ public class yacysearch {
                         ? 100
                         : 5000) : (snippetFetchStrategy != null
                         && snippetFetchStrategy.isAllowedToFetchOnline() ? 20 : 1000),
-                post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative
-        int startRecord = post.getInt("startRecord", post.getInt("offset", 0));
+                post.getInt("maximumRecords", post.getInt("count", post.getInt("rows", 10)))); // SRU syntax with old property as alternative
+        int startRecord = post.getInt("startRecord", post.getInt("offset", post.getInt("start", 0)));
 
         boolean global = post.get("resource", "local").equals("global") && sb.peers.sizeConnected() > 0;
         final boolean indexof = (post != null && post.get("indexof", "").equals("on"));
diff --git a/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
index 9f14cf6d2..1dfd38d1c 100644
--- a/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
@@ -127,6 +127,7 @@ public class JsonResponseWriter implements QueryResponseWriter {
         SolrIndexSearcher searcher = request.getSearcher();
         DocIterator iterator = response.iterator();
         for (int i = 0; i < responseCount; i++) {
+            try {
             writer.write("{\n".toCharArray());
             int id = iterator.nextDoc();
             Document doc = searcher.doc(id, OpensearchResponseWriter.SOLR_FIELDS);
@@ -216,6 +217,7 @@ public class JsonResponseWriter implements QueryResponseWriter {
             if (i < responseCount - 1) {
                 writer.write(",\n".toCharArray());
             }
+            } catch (Throwable ee) {}
         }
         writer.write("],\n".toCharArray());
         
@@ -233,7 +235,7 @@ public class JsonResponseWriter implements QueryResponseWriter {
         NamedList<Integer> authors = facetFields == null ? null : (NamedList<Integer>) facetFields.get(CollectionSchema.author_sxt.getSolrFieldName());
 
         if (domains != null) {
-            writer.write("{\"facetname\":\"domains\",\"displayname\":\"Domains\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray());
+            writer.write("{\"facetname\":\"domains\",\"displayname\":\"Provider\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray());
             for (int i = 0; i < domains.size(); i++) {
                 facetEntry(writer, "site", domains.getName(i), Integer.toString(domains.getVal(i)));
                 if (i < domains.size() - 1) writer.write(',');