Merge branch 'master' of ssh://gitorious.org/yacy/rc1

pull/1/head
Michael Peter Christen 11 years ago
commit 970368359b

@ -18,3 +18,4 @@
#WordPress.com = http://en.search.wordpress.com/?q={searchTerms}&f=feed&page={startPage?} #Search WordPress.com Blogs
#Sueddeutsche.de = http://suche.sueddeutsche.de/query/{searchTerms}?output=rss # Sueddeutsche Zeitung Artikel Archiv
#Los Angeles Times = http://framework.latimes.com/?s={searchTerms}&feed=rss2
#Archive-It = http://archive-it.org/seam/resource/opensearch?q={searchTerms}&n=20 # archiving cultural heritage on the web

@ -29,6 +29,8 @@ import net.yacy.search.query.SearchEventCache;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.DisMaxParams;
public class RankingSolr_p {
@ -54,8 +56,10 @@ public class RankingSolr_p {
if (fieldValue == null || fieldValue.length() == 0) continue;
try {
float boost = Float.parseFloat(fieldValue);
if (boost > 0.0f) { // don't allow <= 0
if (boostString.length() > 0) boostString.append(',');
boostString.append(field.getSolrFieldName()).append('^').append(Float.toString(boost));
}
} catch (final NumberFormatException e) {
continue;
}
@ -74,7 +78,7 @@ public class RankingSolr_p {
}
if (post != null && post.containsKey("EnterBQ")) {
String bq = post.get("bq");
String bq = post.get(DisMaxParams.BQ);
if (bq != null) {
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + profileNr, bq);
sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).setBoostQuery(bq);
@ -89,7 +93,7 @@ public class RankingSolr_p {
}
if (post != null && post.containsKey("EnterFQ")) {
String fq = post.get("fq");
String fq = post.get(CommonParams.FQ);
if (fq != null) {
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_FILTERQUERY_ + profileNr, fq);
sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).setFilterQuery(fq);
@ -104,7 +108,7 @@ public class RankingSolr_p {
}
if (post != null && post.containsKey("EnterBF")) {
String bf = post.get("bf");
String bf = post.get(DisMaxParams.BF);
if (bf != null) {
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ + profileNr, bf);
sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).setBoostFunction(bf);
@ -139,9 +143,9 @@ public class RankingSolr_p {
i++;
}
prop.put("boosts", i);
prop.put("fq", ranking.getFilterQuery());
prop.put("bq", ranking.getBoostQuery());
prop.put("bf", ranking.getBoostFunction());
prop.put(CommonParams.FQ, ranking.getFilterQuery());
prop.put(DisMaxParams.BQ, ranking.getBoostQuery());
prop.put(DisMaxParams.BF, ranking.getBoostFunction());
for (int j = 0; j < 4; j++) {
prop.put("profiles_" + j + "_nr", j);

@ -175,8 +175,8 @@
<dt style="width:100px">heuristics</dt>
<dd>
<dl style="width:500px">
<dt>/heuristic/blekko</dt>
<dd>add search results from blekko</dd>
<dt>/heuristic</dt>
<dd>add search results from #[count]# external opensearch systems</dd>
</dl>
</dd>
#(/heuristic)#

@ -28,6 +28,7 @@
// javac -classpath .:../classes index.java
// if the shell's current path is HTROOT
import java.io.IOException;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.protocol.RequestHeader;
@ -66,7 +67,12 @@ public class index {
if (!sb.getConfigBool("search.options", true)) {
searchoptions = 0;
} else { // show heuristic hint on search option screen
prop.put("searchoptions_heuristic", sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false));
int osdcnt = 0; // (only if some are active and heuristic is not ON by config)
try {
osdcnt = sb.tables.size("opensearchsys");
} catch (IOException ex) { }
prop.put("searchoptions_heuristic", !sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) && osdcnt > 0);
prop.put("searchoptions_heuristic_count", osdcnt);
}
final String former = (post == null) ? "" : post.get("former", "");
final int count = Math.min(100, (post == null) ? 10 : post.getInt("count", 10));

@ -467,10 +467,10 @@ public class yacysearch {
}
}
final int heuristicBlekko = querystring.indexOf("/heuristic/blekko", 0);
final int heuristicBlekko = querystring.indexOf("/heuristic", 0);
if ( heuristicBlekko >= 0 ) {
querystring = querystring.replace("/heuristic/blekko", "");
modifier.add("/heuristic/blekko");
querystring = querystring.replace("/heuristic", "");
modifier.add("/heuristic");
}
final int tldp = querystring.indexOf("tld:", 0);
@ -708,7 +708,7 @@ public class yacysearch {
sb.heuristicSite(theSearch, modifier.sitehost);
}
if ( heuristicBlekko >= 0 && authenticated && !stealthmode ) {
sb.heuristicRSS("http://blekko.com/ws/$+/rss", theSearch, "blekko");
OpenSearchConnector.query(sb, theSearch);
}
if (sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) && authenticated && !stealthmode) {
OpenSearchConnector.query(sb, theSearch);

@ -107,9 +107,7 @@ public class OpenSearchConnector {
Tables.Row row = ossysworktable.next();
String osurl = row.get("url", "");
String name = row.get("title", "");
// to reuse existing heuristicRSS procedure replace querystring with "$"
// querystring is inserted/replaced inside heuristicRSS
sb.heuristicRSS(parseSearchTemplate(osurl, "$", 0, theSearch.query.itemsPerPage), theSearch, "opensearch:" + name);
sb.heuristicRSS(parseSearchTemplate(osurl, theSearch.query.getQueryGoal().getQueryString(false), 0, theSearch.query.itemsPerPage), theSearch, name);
}
} catch (final IOException ex) {
ConcurrentLog.warn("OpenSearchConnector.query", "failed reading table opensearchsys");

@ -116,7 +116,7 @@ public class SRURSSConnector extends Thread implements SearchAccumulator {
final long st = System.currentTimeMillis();
RSSFeed feed;
try {
feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global, agent);
feed = loadSRURSS(urlBase, query, startRecord, recordsPerSession, verify, global, agent);
} catch (final IOException e1) {
//e1.printStackTrace();
break mainloop;
@ -151,13 +151,11 @@ public class SRURSSConnector extends Thread implements SearchAccumulator {
* @param maximumRecords maximum number of records
* @param verify if true, result entries are verified using the snippet fetch (slow); if false simply the result is returned
* @param global if true also search results from other peers are included
* @param timeout milliseconds that are waited at maximum for a search result
* @return
*/
public static RSSFeed loadSRURSS(
final String rssSearchServiceURL,
final String query,
final long timeout,
final int startRecord,
final int maximumRecords,
final CacheStrategy cacheStrategy,

@ -68,11 +68,9 @@ import net.yacy.cora.document.feed.RSSFeed;
import net.yacy.cora.document.feed.RSSMessage;
import net.yacy.cora.document.feed.RSSReader;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.opensearch.SRURSSConnector;
import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.solr.instance.RemoteInstance;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.Digest;
import net.yacy.cora.protocol.ClientIdentification;
@ -556,29 +554,6 @@ public final class Protocol {
}
}
public static RSSFeed search(
final Seed targetSeed,
final String query,
final CacheStrategy verify,
final boolean global,
final long timeout,
final int startRecord,
final int maximumRecords) throws IOException {
final String address =
(targetSeed == null || targetSeed == Switchboard.getSwitchboard().peers.mySeed()) ? "localhost:"
+ Switchboard.getSwitchboard().getConfig("port", "8090") : targetSeed.getClusterAddress();
final String urlBase = "http://" + address + "/yacysearch.rss";
return SRURSSConnector.loadSRURSS(
urlBase,
query,
timeout,
startRecord,
maximumRecords,
verify,
global,
ClientIdentification.yacyInternetCrawlerAgent);
}
protected static int primarySearch(
final SearchEvent event,
final String wordhashes,

@ -398,19 +398,19 @@ public final class yacyRelease extends yacyVersion {
try{
ConcurrentLog.info("RESTART", "INITIATED");
final String script =
"@echo off" + serverCore.LF_STRING +
"title YaCy restarter" + serverCore.LF_STRING +
"set loading=YACY RESTARTER" + serverCore.LF_STRING +
"echo %loading%" + serverCore.LF_STRING +
"cd \"" + sb.getDataPath().toString() + "/DATA/RELEASE/".replace("/", File.separator) + "\"" + serverCore.LF_STRING +
":WAIT" + serverCore.LF_STRING +
"set loading=%loading%." + serverCore.LF_STRING +
"cls" + serverCore.LF_STRING +
"echo %loading%" + serverCore.LF_STRING +
"ping -n 2 127.0.0.1 >nul" + serverCore.LF_STRING +
"IF exist ..\\yacy.running goto WAIT" + serverCore.LF_STRING +
"cd \"" + sb.getAppPath().toString() + "\"" + serverCore.LF_STRING +
"start /MIN CMD /C " + starterFile + serverCore.LF_STRING;
"@echo off" + serverCore.CRLF_STRING +
"title YaCy restarter" + serverCore.CRLF_STRING +
"set loading=YACY RESTARTER" + serverCore.CRLF_STRING +
"echo %loading%" + serverCore.CRLF_STRING +
"cd \"" + sb.getDataPath().toString() + "/DATA/RELEASE/".replace("/", File.separator) + "\"" + serverCore.CRLF_STRING +
":WAIT" + serverCore.CRLF_STRING +
"set loading=%loading%." + serverCore.CRLF_STRING +
"cls" + serverCore.CRLF_STRING +
"echo %loading%" + serverCore.CRLF_STRING +
"ping -n 2 127.0.0.1 >nul" + serverCore.CRLF_STRING +
"IF exist ..\\yacy.running goto WAIT" + serverCore.CRLF_STRING +
"cd \"" + sb.getAppPath().toString() + "\"" + serverCore.CRLF_STRING +
"start /MIN CMD /C " + starterFile + serverCore.CRLF_STRING;
final File scriptFile = new File(sb.getDataPath(), "DATA/RELEASE/restart.bat".replace("/", File.separator));
OS.deployScript(scriptFile, script);
ConcurrentLog.info("RESTART", "wrote restart-script to " + scriptFile.getAbsolutePath());
@ -495,38 +495,38 @@ public final class yacyRelease extends yacyVersion {
if (startType.exists()) starterFile = "startYACY.bat"; // startType noconsole
if (startParameter.startsWith("-gui")) starterFile += " " + startParameter;
script =
"@echo off" + serverCore.LF_STRING +
"title YaCy updater" + serverCore.LF_STRING +
"set loading=YACY UPDATER" + serverCore.LF_STRING +
"echo %loading%" + serverCore.LF_STRING +
"cd \"" + sb.getDataPath().toString() + "/DATA/RELEASE/".replace("/", File.separator) + "\"" + serverCore.LF_STRING +
":WAIT" + serverCore.LF_STRING +
"set loading=%loading%." + serverCore.LF_STRING +
"cls" + serverCore.LF_STRING +
"echo %loading%" + serverCore.LF_STRING +
"ping -n 2 127.0.0.1 >nul" + serverCore.LF_STRING +
"IF exist ..\\yacy.running goto WAIT" + serverCore.LF_STRING +
"IF not exist yacy goto NODATA" + serverCore.LF_STRING +
"cd yacy" + serverCore.LF_STRING +
"del /Q \"" + sb.getAppPath().toString() + "\\lib\\*\" >nul" + serverCore.LF_STRING +
"xcopy *.* \"" + sb.getAppPath().toString() + "\" /E /Y >nul" + serverCore.LF_STRING +
"@echo off" + serverCore.CRLF_STRING +
"title YaCy updater" + serverCore.CRLF_STRING +
"set loading=YACY UPDATER" + serverCore.CRLF_STRING +
"echo %loading%" + serverCore.CRLF_STRING +
"cd \"" + sb.getDataPath().toString() + "/DATA/RELEASE/".replace("/", File.separator) + "\"" + serverCore.CRLF_STRING +
":WAIT" + serverCore.CRLF_STRING +
"set loading=%loading%." + serverCore.CRLF_STRING +
"cls" + serverCore.CRLF_STRING +
"echo %loading%" + serverCore.CRLF_STRING +
"ping -n 2 127.0.0.1 >nul" + serverCore.CRLF_STRING +
"IF exist ..\\yacy.running goto WAIT" + serverCore.CRLF_STRING +
"IF not exist yacy goto NODATA" + serverCore.CRLF_STRING +
"cd yacy" + serverCore.CRLF_STRING +
"del /Q \"" + sb.getAppPath().toString() + "\\lib\\*\" >nul" + serverCore.CRLF_STRING +
"xcopy *.* \"" + sb.getAppPath().toString() + "\" /E /Y >nul" + serverCore.CRLF_STRING +
// /E - all subdirectories
// /Y - don't ask
"cd .." + serverCore.LF_STRING +
"rd yacy /S /Q" + serverCore.LF_STRING +
"cd .." + serverCore.CRLF_STRING +
"rd yacy /S /Q" + serverCore.CRLF_STRING +
// /S delete tree
// /Q don't ask
"goto END" + serverCore.LF_STRING +
"goto END" + serverCore.CRLF_STRING +
":NODATA" + serverCore.LF_STRING +
"echo YACY UPDATER ERROR: NO UPDATE SOURCE FILES ON FILESYSTEM" + serverCore.LF_STRING +
"pause" + serverCore.LF_STRING +
":NODATA" + serverCore.CRLF_STRING +
"echo YACY UPDATER ERROR: NO UPDATE SOURCE FILES ON FILESYSTEM" + serverCore.CRLF_STRING +
"pause" + serverCore.CRLF_STRING +
":END" + serverCore.LF_STRING +
"cd \"" + sb.getAppPath().toString() + "\"" + serverCore.LF_STRING +
"start /MIN CMD /C " + starterFile + serverCore.LF_STRING;
":END" + serverCore.CRLF_STRING +
"cd \"" + sb.getAppPath().toString() + "\"" + serverCore.CRLF_STRING +
"start /MIN CMD /C " + starterFile + serverCore.CRLF_STRING;
scriptFileName = "update.bat";
} else { // unix/linux
script =

@ -3545,39 +3545,29 @@ public final class Switchboard extends serverSwitch {
}.start();
}
// blekko pattern: http://blekko.com/ws/$+/rss
/**
* Queries a remote opensearch system, expects RSS feed as response, parses the RSS feed and
* - adds the results to the results of the searchEvent
* - adds the results to the local index
*
* @param urlpattern the search query url (e.g. http://search.org?query=searchword)
* @param searchEvent
* @param feedName short/internal name of the remote system
*/
public final void heuristicRSS(
final String urlpattern,
final SearchEvent searchEvent,
final String feedName) {
final int p = urlpattern.indexOf('$');
if ( p < 0 ) {
return;
}
new Thread() {
@Override
public void run() {
String queryString = searchEvent.query.getQueryGoal().getQueryString(false);
Thread.currentThread().setName("Switchboard.heuristicRSS:" + queryString);
final int meta = queryString.indexOf("heuristic:", 0);
if ( meta >= 0 ) {
final int q = queryString.indexOf(' ', meta);
if ( q >= 0 ) {
queryString = queryString.substring(0, meta) + queryString.substring(q + 1);
} else {
queryString = queryString.substring(0, meta);
}
}
final String urlString =
urlpattern.substring(0, p)
+ queryString.trim().replaceAll(" ", "+")
+ urlpattern.substring(p + 1);
Thread.currentThread().setName("heuristicRSS:" + feedName);
final DigestURL url;
try {
url = new DigestURL(MultiProtocolURL.unescape(urlString));
url = new DigestURL(MultiProtocolURL.unescape(urlpattern));
} catch (final MalformedURLException e1 ) {
ConcurrentLog.warn("heuristicRSS", "url not well-formed: '" + urlString + "'");
ConcurrentLog.warn("heuristicRSS", "url not well-formed: '" + urlpattern + "'");
return;
}
@ -3588,7 +3578,6 @@ public final class Switchboard extends serverSwitch {
final Response response =
Switchboard.this.loader.load(Switchboard.this.loader.request(url, true, false), CacheStrategy.NOCACHE, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent);
final byte[] resource = (response == null) ? null : response.getContent();
//System.out.println("BLEKKO: " + UTF8.String(resource));
rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
if ( rss != null ) {
final Map<AnchorURL, String> links = new TreeMap<>();
@ -3610,7 +3599,6 @@ public final class Switchboard extends serverSwitch {
addAllToIndex(null, links, searchEvent, feedName, CrawlProfile.collectionParser("rss"), true);
}
} catch (final Throwable e ) {
//Log.logException(e);
} finally {
searchEvent.oneFeederTerminated();
}

Loading…
Cancel
Save