Combine /heuristic search modifier with opensearch configured targets

- with search modifier /heuristic a request is send to all configured opensearch target systems (old /heuristic/blekko modifier not longer valid)
- this allows to use opensearch heuristic on individual search request (in contrast to configuration HEURISTIC_OPENSEARCH=true which sends a osd request on all global searches
- the index.html searchoption text adjusted to be displayed only if option configured
- add Archive-It to predefined systems
pull/1/head
reger 11 years ago
parent 2de159719b
commit a2cb366b25

@ -18,3 +18,4 @@
#WordPress.com = http://en.search.wordpress.com/?q={searchTerms}&f=feed&page={startPage?} #Search WordPress.com Blogs #WordPress.com = http://en.search.wordpress.com/?q={searchTerms}&f=feed&page={startPage?} #Search WordPress.com Blogs
#Sueddeutsche.de = http://suche.sueddeutsche.de/query/{searchTerms}?output=rss # Sueddeutsche Zeitung Artikel Archiv #Sueddeutsche.de = http://suche.sueddeutsche.de/query/{searchTerms}?output=rss # Sueddeutsche Zeitung Artikel Archiv
#Los Angeles Times = http://framework.latimes.com/?s={searchTerms}&feed=rss2 #Los Angeles Times = http://framework.latimes.com/?s={searchTerms}&feed=rss2
#Archive-It = http://archive-it.org/seam/resource/opensearch?q={searchTerms}&n=20 # archiving cultural heritage on the web

@ -175,8 +175,8 @@
<dt style="width:100px">heuristics</dt> <dt style="width:100px">heuristics</dt>
<dd> <dd>
<dl style="width:500px"> <dl style="width:500px">
<dt>/heuristic/blekko</dt> <dt>/heuristic</dt>
<dd>add search results from blekko</dd> <dd>add search results from #[count]# external opensearch systems</dd>
</dl> </dl>
</dd> </dd>
#(/heuristic)# #(/heuristic)#

@ -28,6 +28,7 @@
// javac -classpath .:../classes index.java // javac -classpath .:../classes index.java
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.io.IOException;
import net.yacy.cora.document.analysis.Classification; import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain; import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
@ -66,7 +67,12 @@ public class index {
if (!sb.getConfigBool("search.options", true)) { if (!sb.getConfigBool("search.options", true)) {
searchoptions = 0; searchoptions = 0;
} else { // show heuristic hint on search option screen } else { // show heuristic hint on search option screen
prop.put("searchoptions_heuristic", sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false)); int osdcnt = 0; // (only if some are active and heuristic is not ON by config)
try {
osdcnt = sb.tables.size("opensearchsys");
} catch (IOException ex) { }
prop.put("searchoptions_heuristic", !sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) && osdcnt > 0);
prop.put("searchoptions_heuristic_count", osdcnt);
} }
final String former = (post == null) ? "" : post.get("former", ""); final String former = (post == null) ? "" : post.get("former", "");
final int count = Math.min(100, (post == null) ? 10 : post.getInt("count", 10)); final int count = Math.min(100, (post == null) ? 10 : post.getInt("count", 10));

@ -467,10 +467,10 @@ public class yacysearch {
} }
} }
final int heuristicBlekko = querystring.indexOf("/heuristic/blekko", 0); final int heuristicBlekko = querystring.indexOf("/heuristic", 0);
if ( heuristicBlekko >= 0 ) { if ( heuristicBlekko >= 0 ) {
querystring = querystring.replace("/heuristic/blekko", ""); querystring = querystring.replace("/heuristic", "");
modifier.add("/heuristic/blekko"); modifier.add("/heuristic");
} }
final int tldp = querystring.indexOf("tld:", 0); final int tldp = querystring.indexOf("tld:", 0);
@ -708,7 +708,7 @@ public class yacysearch {
sb.heuristicSite(theSearch, modifier.sitehost); sb.heuristicSite(theSearch, modifier.sitehost);
} }
if ( heuristicBlekko >= 0 && authenticated && !stealthmode ) { if ( heuristicBlekko >= 0 && authenticated && !stealthmode ) {
sb.heuristicRSS("http://blekko.com/ws/$+/rss", theSearch, "blekko"); OpenSearchConnector.query(sb, theSearch);
} }
if (sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) && authenticated && !stealthmode) { if (sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) && authenticated && !stealthmode) {
OpenSearchConnector.query(sb, theSearch); OpenSearchConnector.query(sb, theSearch);

@ -107,9 +107,7 @@ public class OpenSearchConnector {
Tables.Row row = ossysworktable.next(); Tables.Row row = ossysworktable.next();
String osurl = row.get("url", ""); String osurl = row.get("url", "");
String name = row.get("title", ""); String name = row.get("title", "");
// to reuse existing heuristicRSS procedure replace querystring with "$" sb.heuristicRSS(parseSearchTemplate(osurl, theSearch.query.getQueryGoal().getQueryString(false), 0, theSearch.query.itemsPerPage), theSearch, name);
// querystring is inserted/replaced inside heuristicRSS
sb.heuristicRSS(parseSearchTemplate(osurl, "$", 0, theSearch.query.itemsPerPage), theSearch, "opensearch:" + name);
} }
} catch (final IOException ex) { } catch (final IOException ex) {
ConcurrentLog.warn("OpenSearchConnector.query", "failed reading table opensearchsys"); ConcurrentLog.warn("OpenSearchConnector.query", "failed reading table opensearchsys");

@ -116,7 +116,7 @@ public class SRURSSConnector extends Thread implements SearchAccumulator {
final long st = System.currentTimeMillis(); final long st = System.currentTimeMillis();
RSSFeed feed; RSSFeed feed;
try { try {
feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global, agent); feed = loadSRURSS(urlBase, query, startRecord, recordsPerSession, verify, global, agent);
} catch (final IOException e1) { } catch (final IOException e1) {
//e1.printStackTrace(); //e1.printStackTrace();
break mainloop; break mainloop;
@ -151,13 +151,11 @@ public class SRURSSConnector extends Thread implements SearchAccumulator {
* @param maximumRecords maximum number of records * @param maximumRecords maximum number of records
* @param verify if true, result entries are verified using the snippet fetch (slow); if false simply the result is returned * @param verify if true, result entries are verified using the snippet fetch (slow); if false simply the result is returned
* @param global if true also search results from other peers are included * @param global if true also search results from other peers are included
* @param timeout milliseconds that are waited at maximum for a search result
* @return * @return
*/ */
public static RSSFeed loadSRURSS( public static RSSFeed loadSRURSS(
final String rssSearchServiceURL, final String rssSearchServiceURL,
final String query, final String query,
final long timeout,
final int startRecord, final int startRecord,
final int maximumRecords, final int maximumRecords,
final CacheStrategy cacheStrategy, final CacheStrategy cacheStrategy,

@ -3545,39 +3545,29 @@ public final class Switchboard extends serverSwitch {
}.start(); }.start();
} }
// blekko pattern: http://blekko.com/ws/$+/rss /**
* Queries a remote opensearch system, expects RSS feed as response, parses the RSS feed and
* - adds the results to the results of the searchEvent
* - adds the results to the local index
*
* @param urlpattern the search query url (e.g. http://search.org?query=searchword)
* @param searchEvent
* @param feedName short/internal name of the remote system
*/
public final void heuristicRSS( public final void heuristicRSS(
final String urlpattern, final String urlpattern,
final SearchEvent searchEvent, final SearchEvent searchEvent,
final String feedName) { final String feedName) {
final int p = urlpattern.indexOf('$');
if ( p < 0 ) {
return;
}
new Thread() { new Thread() {
@Override @Override
public void run() { public void run() {
String queryString = searchEvent.query.getQueryGoal().getQueryString(false); Thread.currentThread().setName("heuristicRSS:" + feedName);
Thread.currentThread().setName("Switchboard.heuristicRSS:" + queryString);
final int meta = queryString.indexOf("heuristic:", 0);
if ( meta >= 0 ) {
final int q = queryString.indexOf(' ', meta);
if ( q >= 0 ) {
queryString = queryString.substring(0, meta) + queryString.substring(q + 1);
} else {
queryString = queryString.substring(0, meta);
}
}
final String urlString =
urlpattern.substring(0, p)
+ queryString.trim().replaceAll(" ", "+")
+ urlpattern.substring(p + 1);
final DigestURL url; final DigestURL url;
try { try {
url = new DigestURL(MultiProtocolURL.unescape(urlString)); url = new DigestURL(MultiProtocolURL.unescape(urlpattern));
} catch (final MalformedURLException e1 ) { } catch (final MalformedURLException e1 ) {
ConcurrentLog.warn("heuristicRSS", "url not well-formed: '" + urlString + "'"); ConcurrentLog.warn("heuristicRSS", "url not well-formed: '" + urlpattern + "'");
return; return;
} }
@ -3588,7 +3578,6 @@ public final class Switchboard extends serverSwitch {
final Response response = final Response response =
Switchboard.this.loader.load(Switchboard.this.loader.request(url, true, false), CacheStrategy.NOCACHE, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent); Switchboard.this.loader.load(Switchboard.this.loader.request(url, true, false), CacheStrategy.NOCACHE, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent);
final byte[] resource = (response == null) ? null : response.getContent(); final byte[] resource = (response == null) ? null : response.getContent();
//System.out.println("BLEKKO: " + UTF8.String(resource));
rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource); rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
if ( rss != null ) { if ( rss != null ) {
final Map<AnchorURL, String> links = new TreeMap<>(); final Map<AnchorURL, String> links = new TreeMap<>();
@ -3610,7 +3599,6 @@ public final class Switchboard extends serverSwitch {
addAllToIndex(null, links, searchEvent, feedName, CrawlProfile.collectionParser("rss"), true); addAllToIndex(null, links, searchEvent, feedName, CrawlProfile.collectionParser("rss"), true);
} }
} catch (final Throwable e ) { } catch (final Throwable e ) {
//Log.logException(e);
} finally { } finally {
searchEvent.oneFeederTerminated(); searchEvent.oneFeederTerminated();
} }

Loading…
Cancel
Save