diff --git a/defaults/heuristicopensearch.conf b/defaults/heuristicopensearch.conf
index ffd7030ef..143f25a03 100644
--- a/defaults/heuristicopensearch.conf
+++ b/defaults/heuristicopensearch.conf
@@ -18,3 +18,4 @@
#WordPress.com = http://en.search.wordpress.com/?q={searchTerms}&f=feed&page={startPage?} #Search WordPress.com Blogs
#Sueddeutsche.de = http://suche.sueddeutsche.de/query/{searchTerms}?output=rss # Sueddeutsche Zeitung Artikel Archiv
#Los Angeles Times = http://framework.latimes.com/?s={searchTerms}&feed=rss2
+#Archive-It = http://archive-it.org/seam/resource/opensearch?q={searchTerms}&n=20 # archiving cultural heritage on the web
diff --git a/htroot/index.html b/htroot/index.html
index 39c309195..97ff4a39f 100644
--- a/htroot/index.html
+++ b/htroot/index.html
@@ -175,8 +175,8 @@
heuristics
- - /heuristic/blekko
- - add search results from blekko
+ - /heuristic
+ - add search results from #[count]# external opensearch systems
#(/heuristic)#
diff --git a/htroot/index.java b/htroot/index.java
index 7f1f268e1..c35928d25 100644
--- a/htroot/index.java
+++ b/htroot/index.java
@@ -28,6 +28,7 @@
// javac -classpath .:../classes index.java
// if the shell's current path is HTROOT
+import java.io.IOException;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.protocol.RequestHeader;
@@ -66,7 +67,12 @@ public class index {
if (!sb.getConfigBool("search.options", true)) {
searchoptions = 0;
} else { // show heuristic hint on search option screen
- prop.put("searchoptions_heuristic", sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false));
+ int osdcnt = 0; // (only if some are active and heuristic is not ON by config)
+ try {
+ osdcnt = sb.tables.size("opensearchsys");
+ } catch (IOException ex) { }
+ prop.put("searchoptions_heuristic", !sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) && osdcnt > 0);
+ prop.put("searchoptions_heuristic_count", osdcnt);
}
final String former = (post == null) ? "" : post.get("former", "");
final int count = Math.min(100, (post == null) ? 10 : post.getInt("count", 10));
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index e3ec46db4..6d4d420e4 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -467,10 +467,10 @@ public class yacysearch {
}
}
- final int heuristicBlekko = querystring.indexOf("/heuristic/blekko", 0);
+ final int heuristicBlekko = querystring.indexOf("/heuristic", 0);
if ( heuristicBlekko >= 0 ) {
- querystring = querystring.replace("/heuristic/blekko", "");
- modifier.add("/heuristic/blekko");
+ querystring = querystring.replace("/heuristic", "");
+ modifier.add("/heuristic");
}
final int tldp = querystring.indexOf("tld:", 0);
@@ -708,7 +708,7 @@ public class yacysearch {
sb.heuristicSite(theSearch, modifier.sitehost);
}
if ( heuristicBlekko >= 0 && authenticated && !stealthmode ) {
- sb.heuristicRSS("http://blekko.com/ws/$+/rss", theSearch, "blekko");
+ OpenSearchConnector.query(sb, theSearch);
}
if (sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) && authenticated && !stealthmode) {
OpenSearchConnector.query(sb, theSearch);
diff --git a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
index 9590da53b..68fee2161 100644
--- a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
+++ b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
@@ -107,9 +107,7 @@ public class OpenSearchConnector {
Tables.Row row = ossysworktable.next();
String osurl = row.get("url", "");
String name = row.get("title", "");
- // to reuse existing heuristicRSS procedure replace querystring with "$"
- // querystring is inserted/replaced inside heuristicRSS
- sb.heuristicRSS(parseSearchTemplate(osurl, "$", 0, theSearch.query.itemsPerPage), theSearch, "opensearch:" + name);
+ sb.heuristicRSS(parseSearchTemplate(osurl, theSearch.query.getQueryGoal().getQueryString(false), 0, theSearch.query.itemsPerPage), theSearch, name);
}
} catch (final IOException ex) {
ConcurrentLog.warn("OpenSearchConnector.query", "failed reading table opensearchsys");
diff --git a/source/net/yacy/cora/federate/opensearch/SRURSSConnector.java b/source/net/yacy/cora/federate/opensearch/SRURSSConnector.java
index b1f11427d..3a0d83d18 100644
--- a/source/net/yacy/cora/federate/opensearch/SRURSSConnector.java
+++ b/source/net/yacy/cora/federate/opensearch/SRURSSConnector.java
@@ -116,7 +116,7 @@ public class SRURSSConnector extends Thread implements SearchAccumulator {
final long st = System.currentTimeMillis();
RSSFeed feed;
try {
- feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global, agent);
+ feed = loadSRURSS(urlBase, query, startRecord, recordsPerSession, verify, global, agent);
} catch (final IOException e1) {
//e1.printStackTrace();
break mainloop;
@@ -151,13 +151,11 @@ public class SRURSSConnector extends Thread implements SearchAccumulator {
* @param maximumRecords maximum number of records
* @param verify if true, result entries are verified using the snippet fetch (slow); if false simply the result is returned
* @param global if true also search results from other peers are included
- * @param timeout milliseconds that are waited at maximum for a search result
* @return
*/
public static RSSFeed loadSRURSS(
final String rssSearchServiceURL,
final String query,
- final long timeout,
final int startRecord,
final int maximumRecords,
final CacheStrategy cacheStrategy,
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index ea48acef2..b4afb9065 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -3545,39 +3545,29 @@ public final class Switchboard extends serverSwitch {
}.start();
}
- // blekko pattern: http://blekko.com/ws/$+/rss
+ /**
+ * Queries a remote opensearch system, expects RSS feed as response, parses the RSS feed and
+ * - adds the results to the results of the searchEvent
+ * - adds the results to the local index
+ *
+ * @param urlpattern the search query url (e.g. http://search.org?query=searchword)
+ * @param searchEvent
+ * @param feedName short/internal name of the remote system
+ */
public final void heuristicRSS(
final String urlpattern,
final SearchEvent searchEvent,
final String feedName) {
- final int p = urlpattern.indexOf('$');
- if ( p < 0 ) {
- return;
- }
+
new Thread() {
@Override
public void run() {
- String queryString = searchEvent.query.getQueryGoal().getQueryString(false);
- Thread.currentThread().setName("Switchboard.heuristicRSS:" + queryString);
- final int meta = queryString.indexOf("heuristic:", 0);
- if ( meta >= 0 ) {
- final int q = queryString.indexOf(' ', meta);
- if ( q >= 0 ) {
- queryString = queryString.substring(0, meta) + queryString.substring(q + 1);
- } else {
- queryString = queryString.substring(0, meta);
- }
- }
-
- final String urlString =
- urlpattern.substring(0, p)
- + queryString.trim().replaceAll(" ", "+")
- + urlpattern.substring(p + 1);
+ Thread.currentThread().setName("heuristicRSS:" + feedName);
final DigestURL url;
try {
- url = new DigestURL(MultiProtocolURL.unescape(urlString));
+ url = new DigestURL(MultiProtocolURL.unescape(urlpattern));
} catch (final MalformedURLException e1 ) {
- ConcurrentLog.warn("heuristicRSS", "url not well-formed: '" + urlString + "'");
+ ConcurrentLog.warn("heuristicRSS", "url not well-formed: '" + urlpattern + "'");
return;
}
@@ -3588,7 +3578,6 @@ public final class Switchboard extends serverSwitch {
final Response response =
Switchboard.this.loader.load(Switchboard.this.loader.request(url, true, false), CacheStrategy.NOCACHE, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent);
final byte[] resource = (response == null) ? null : response.getContent();
- //System.out.println("BLEKKO: " + UTF8.String(resource));
rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
if ( rss != null ) {
final Map links = new TreeMap<>();
@@ -3610,7 +3599,6 @@ public final class Switchboard extends serverSwitch {
addAllToIndex(null, links, searchEvent, feedName, CrawlProfile.collectionParser("rss"), true);
}
} catch (final Throwable e ) {
- //Log.logException(e);
} finally {
searchEvent.oneFeederTerminated();
}