added ranking and evaluation of language type in a search

the wanted language is taken from the browser user-agent string

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5192 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent e201ad0e9f
commit 00c1535f84

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5
# Release Configuration
releaseVersion=0.601
releaseVersion=0.602
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

@ -51,6 +51,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverProfiling;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
import de.anomic.tools.iso639;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage;
import de.anomic.yacy.yacyCore;
@ -86,6 +87,13 @@ public final class search {
final String prefer = post.get("prefer", "");
final String contentdom = post.get("contentdom", "text");
final String filter = post.get("filter", ".*");
String language = post.get("language", "");
if (!iso639.exists(language)) {
// take language from the user agent
String agent = header.get("User-Agent");
if (agent == null) agent = System.getProperty("user.language");
language = (agent == null) ? "en" : iso639.userAgentLanguageDetection(agent);
}
final int partitions = post.getInt("partitions", 30);
String profile = post.get("profile", ""); // remote profile hand-over
if (profile.length() > 0) profile = crypt.simpleDecode(profile, null);
@ -174,7 +182,7 @@ public final class search {
plasmaSearchEvent theSearch = null;
if ((query.length() == 0) && (abstractSet != null)) {
// this is _not_ a normal search, only a request for index abstracts
theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet<String>(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, yacyURL.TLD_any_zone_filter, client, false);
theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet<String>(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), language, false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, yacyURL.TLD_any_zone_filter, client, false);
theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
@ -200,7 +208,7 @@ public final class search {
} else {
// retrieve index containers from search request
theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, yacyURL.TLD_any_zone_filter, client, false);
theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), language, false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, yacyURL.TLD_any_zone_filter, client, false);
theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
RSSFeed.channels(RSSFeed.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes), ""));

@ -40,6 +40,7 @@ import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.iso639;
import de.anomic.tools.yFormatter;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage;
@ -66,6 +67,7 @@ public class ysearch {
// get query
String querystring = (post == null) ? "" : post.get("search", "").trim();
boolean fetchSnippets = (post != null && post.get("verify", "false").equals("true"));
final serverObjects prop = new serverObjects();
final boolean rss = (post == null) ? false : post.get("rss", "false").equals("true");
@ -132,6 +134,11 @@ public class ysearch {
final int domainzone = (post == null ? yacyURL.TLD_any_zone_filter : post.getInt("zone", yacyURL.TLD_any_zone_filter));
// find out language of the user by reading of the user-agent string
String agent = header.get("User-Agent");
if (agent == null) agent = System.getProperty("user.language");
String language = (agent == null) ? "en" : iso639.userAgentLanguageDetection(agent);
// SEARCH
//final boolean indexDistributeGranted = sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true").equals("true");
//final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true");
@ -152,18 +159,23 @@ public class ysearch {
TreeSet<Long> trackerHandles = sb.localSearchTracker.get(client);
if (trackerHandles == null) trackerHandles = new TreeSet<Long>();
boolean block = false;
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size() > 1) try {
Thread.sleep(3000);
block = true;
} catch (final InterruptedException e) { e.printStackTrace(); }
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size() > 12) try {
Thread.sleep(10000);
block = true;
} catch (final InterruptedException e) { e.printStackTrace(); }
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size() > 36) try {
Thread.sleep(30000);
block = true;
} catch (final InterruptedException e) { e.printStackTrace(); }
if (global || fetchSnippets) {
// in case that we do a global search or we want to fetch snippets, we check for DoS cases
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size() > 1) {
global = false;
fetchSnippets = false;
}
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size() > 30) {
global = false;
fetchSnippets = false;
block = true;
}
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size() > 100) {
global = false;
fetchSnippets = false;
block = true;
}
}
if ((!block) && (post == null || post.get("cat", "href").equals("href"))) {
@ -198,6 +210,7 @@ public class ysearch {
maxDistance,
prefermask,
contentdomCode,
language,
true,
itemsPerPage,
offset,

@ -50,6 +50,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverProfiling;
import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.iso639;
import de.anomic.tools.yFormatter;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage;
@ -76,7 +77,7 @@ public class yacysearch {
// get query
String querystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim(); // SRU compliance
final boolean fetchSnippets = (post != null && post.get("verify", "false").equals("true"));
boolean fetchSnippets = (post != null && post.get("verify", "false").equals("true"));
final serverObjects prop = new serverObjects();
final boolean rss = (post == null) ? false : post.get("rss", "false").equals("true");
@ -137,6 +138,11 @@ public class yacysearch {
constraint.set(plasmaCondenser.flag_cat_indexof, true);
}
// find out language of the user by reading of the user-agent string
String agent = header.get("User-Agent");
if (agent == null) agent = System.getProperty("user.language");
String language = (agent == null) ? "en" : iso639.userAgentLanguageDetection(agent);
// SEARCH
//final boolean indexDistributeGranted = sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true").equals("true");
//final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true");
@ -159,23 +165,23 @@ public class yacysearch {
boolean block = false;
if (global || fetchSnippets) {
// in case that we do a global search or we want to fetch snippets, we check for DoS cases
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size() > 1) try {
Thread.sleep(3000);
block = true;
} catch (final InterruptedException e) {
e.printStackTrace();
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 1000)).size() > 2) {
global = false;
fetchSnippets = false;
}
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size() > 1) {
global = false;
fetchSnippets = false;
}
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size() > 12) try {
Thread.sleep(10000);
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size() > 200) {
global = false;
fetchSnippets = false;
block = true;
} catch (final InterruptedException e) {
e.printStackTrace();
}
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size() > 36) try {
Thread.sleep(30000);
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size() > 600) {
global = false;
fetchSnippets = false;
block = true;
} catch (final InterruptedException e) {
e.printStackTrace();
}
}
@ -265,6 +271,7 @@ public class yacysearch {
maxDistance,
prefermask,
contentdomCode,
language,
fetchSnippets,
itemsPerPage,
offset,

@ -44,13 +44,15 @@ public class indexRWIEntryOrder {
private final plasmaSearchRankingProfile ranking;
private final kelondroMScoreCluster<String> doms; // collected for "authority" heuristic
private int maxdomcount;
private String language;
public indexRWIEntryOrder(final plasmaSearchRankingProfile profile) {
public indexRWIEntryOrder(final plasmaSearchRankingProfile profile, String language) {
this.min = null;
this.max = null;
this.ranking = profile;
this.doms = new kelondroMScoreCluster<String>();
this.maxdomcount = 0;
this.language = language;
}
public ArrayList<indexRWIVarEntry> normalizeWith(final indexContainer container) {
@ -134,23 +136,29 @@ public class indexRWIEntryOrder {
+ ((max.hitcount() == min.hitcount()) ? 0 : (((t.hitcount() - min.hitcount() ) << 8) / (max.hitcount() - min.hitcount()) ) << ranking.coeff_hitcount)
+ tf
+ ((ranking.coeff_authority > 12) ? (authority(t.urlHash()) << ranking.coeff_authority) : 0)
+ (((flags.get(indexRWIEntry.flag_app_dc_identifier)) ? 255 << ranking.coeff_appurl : 0))
+ (((flags.get(indexRWIEntry.flag_app_dc_title)) ? 255 << ranking.coeff_app_dc_title : 0))
+ (((flags.get(indexRWIEntry.flag_app_dc_creator)) ? 255 << ranking.coeff_app_dc_creator : 0))
+ (((flags.get(indexRWIEntry.flag_app_dc_subject)) ? 255 << ranking.coeff_app_dc_subject : 0))
+ (((flags.get(indexRWIEntry.flag_app_dc_description)) ? 255 << ranking.coeff_app_dc_description : 0))
+ (((flags.get(indexRWIEntry.flag_app_emphasized)) ? 255 << ranking.coeff_appemph : 0))
+ (((flags.get(plasmaCondenser.flag_cat_indexof)) ? 255 << ranking.coeff_catindexof : 0))
+ (((flags.get(plasmaCondenser.flag_cat_hasimage)) ? 255 << ranking.coeff_cathasimage : 0))
+ (((flags.get(plasmaCondenser.flag_cat_hasaudio)) ? 255 << ranking.coeff_cathasaudio : 0))
+ (((flags.get(plasmaCondenser.flag_cat_hasvideo)) ? 255 << ranking.coeff_cathasvideo : 0))
+ (((flags.get(plasmaCondenser.flag_cat_hasapp)) ? 255 << ranking.coeff_cathasapp : 0))
+ (((yacyURL.probablyRootURL(t.urlHash())) ? 15 << ranking.coeff_urllength : 0));
+ ((flags.get(indexRWIEntry.flag_app_dc_identifier)) ? 255 << ranking.coeff_appurl : 0)
+ ((flags.get(indexRWIEntry.flag_app_dc_title)) ? 255 << ranking.coeff_app_dc_title : 0)
+ ((flags.get(indexRWIEntry.flag_app_dc_creator)) ? 255 << ranking.coeff_app_dc_creator : 0)
+ ((flags.get(indexRWIEntry.flag_app_dc_subject)) ? 255 << ranking.coeff_app_dc_subject : 0)
+ ((flags.get(indexRWIEntry.flag_app_dc_description)) ? 255 << ranking.coeff_app_dc_description : 0)
+ ((flags.get(indexRWIEntry.flag_app_emphasized)) ? 255 << ranking.coeff_appemph : 0)
+ ((flags.get(plasmaCondenser.flag_cat_indexof)) ? 255 << ranking.coeff_catindexof : 0)
+ ((flags.get(plasmaCondenser.flag_cat_hasimage)) ? 255 << ranking.coeff_cathasimage : 0)
+ ((flags.get(plasmaCondenser.flag_cat_hasaudio)) ? 255 << ranking.coeff_cathasaudio : 0)
+ ((flags.get(plasmaCondenser.flag_cat_hasvideo)) ? 255 << ranking.coeff_cathasvideo : 0)
+ ((flags.get(plasmaCondenser.flag_cat_hasapp)) ? 255 << ranking.coeff_cathasapp : 0)
+ ((patchUK(t.language).equals(this.language)) ? 255 << ranking.coeff_language : 0)
+ ((yacyURL.probablyRootURL(t.urlHash())) ? 15 << ranking.coeff_urllength : 0);
//if (searchWords != null) r += (yacyURL.probablyWordURL(t.urlHash(), searchWords) != null) ? 256 << ranking.coeff_appurl : 0;
return Long.MAX_VALUE - r; // returns a reversed number: the lower the number the better the ranking. This is used for simple sorting with a TreeMap
}
private static final String patchUK(String l) {
// this is to patch a bad language name setting that was used in 0.60 and before
if (l.equals("uk")) return "en"; else return l;
}
public static class minmaxfinder extends Thread {
indexRWIVarEntry entryMin;

@ -142,6 +142,7 @@ public final class plasmaSearchEvent {
"",
query.prefer,
query.urlMask,
query.targetlang,
query.displayResults(),
query.maxDistance,
wordIndex,

@ -62,6 +62,7 @@ public final class plasmaSearchQuery {
public String prefer;
public int contentdom;
public String urlMask;
public String targetlang;
public int domType;
public int zonecode;
public int domMaxTargets;
@ -100,6 +101,7 @@ public final class plasmaSearchQuery {
this.linesPerPage = lines;
this.offset = 0;
this.urlMask = ".*";
this.targetlang = "en";
this.domType = SEARCHDOM_LOCAL;
this.zonecode = yacyURL.TLD_any_zone_filter;
this.domMaxTargets = 0;
@ -117,6 +119,7 @@ public final class plasmaSearchQuery {
final TreeSet<String> excludeHashes,
final plasmaSearchRankingProfile ranking,
final int maxDistance, final String prefer, final int contentdom,
final String language,
final boolean onlineSnippetFetch,
final int lines, final int offset, final String urlMask,
final int domType, final String domGroupName, final int domMaxTargets,
@ -134,7 +137,8 @@ public final class plasmaSearchQuery {
this.linesPerPage = Math.min((specialRights) ? 1000 : 10, lines);
this.offset = Math.min((specialRights) ? 10000 : 100, offset);
this.urlMask = urlMask;
this.domType = domType;
this.targetlang = language;
this.domType = domType;
this.zonecode = domainzone;
this.domMaxTargets = domMaxTargets;
this.constraint = constraint;
@ -286,6 +290,7 @@ public final class plasmaSearchQuery {
"*" + indexWord.word2hash(this.ranking.toExternalString()) +
"*" + this.prefer +
"*" + this.urlMask +
"*" + this.targetlang +
"*" + this.constraint +
"*" + this.maxDistance;
if (anonymized)

@ -81,7 +81,7 @@ public final class plasmaSearchRankingProcess {
this.stack = new kelondroSortStack<indexRWIVarEntry>(maxentries);
this.doubleDomCache = new HashMap<String, kelondroSortStack<indexRWIVarEntry>>();
this.handover = new HashMap<String, String>();
this.order = (query == null) ? null : new indexRWIEntryOrder(query.ranking);
this.order = (query == null) ? null : new indexRWIEntryOrder(query.ranking, query.targetlang);
this.query = query;
this.maxentries = maxentries;
this.remote_peerCount = 0;

@ -64,6 +64,7 @@ import de.anomic.kelondro.kelondroRowCollection;
import de.anomic.server.serverMemory;
import de.anomic.server.serverProfiling;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.iso639;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage;
import de.anomic.yacy.yacyDHTAction;
@ -815,31 +816,49 @@ public final class plasmaWordIndex implements indexRI {
final long startTime = System.currentTimeMillis();
// CREATE INDEX
// load some document metadata
final String dc_title = document.dc_title();
final yacyURL referrerURL = entry.referrerURL();
final Date docDate = entry.getModificationDate();
String language = condenser.language();
// do a identification of the language
String language = condenser.language(); // this is a statistical analysation of the content: will be compared with other attributes
String bymetadata = document.languageByMetadata(); // the languageByMetadata may return null if there was no declaration
if (language == null) {
// no statistics available, we take either the metadata (if given) or the TLD
language = (bymetadata == null) ? entry.url().language() : bymetadata;
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " FAILED, taking " + ((bymetadata == null) ? "TLD" : "metadata") + ": " + language);
} else {
if (language.equals("pl")) {
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " HAS BUG: " + language);
language = (bymetadata == null) ? entry.url().language() : bymetadata; // extra handling of this case: overwrite with bymetadata
} else {
if (bymetadata == null) {
if (language.equals(entry.url().language()))
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFIRMED - TLD IDENTICAL: " + language);
else {
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFLICTING: " + language + " (the language given by the TLD is " + entry.url().language() + ")");
if (bymetadata == null) {
// two possible results: compare and report conflicts
if (language.equals(entry.url().language()))
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFIRMED - TLD IDENTICAL: " + language);
else {
String error = "*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFLICTING: " + language + " (the language given by the TLD is " + entry.url().language() + ")";
// see if we have a hint in the url that the statistic was right
String u = entry.url().toNormalform(true, false).toLowerCase();
if (!u.contains("/" + language + "/") && !u.contains("/" + iso639.country(language).toLowerCase() + "/")) {
// no confirmation using the url, use the TLD
language = entry.url().language();
System.out.println(error + ", corrected using the TLD");
} else {
// this is a strong hint that the statistics was in fact correct
System.out.println(error + ", but the url proves that the statistic is correct");
}
}
} else {
// here we have three results: we can do a voting
if (language.equals(bymetadata)) {
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFIRMED - METADATA IDENTICAL: " + language);
} else if (language.equals(entry.url().language())) {
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFIRMED - TLD IS IDENTICAL: " + language);
} else if (bymetadata.equals(entry.url().language())) {
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFLICTING: " + language + " BUT METADATA AND TLD ARE IDENTICAL: " + bymetadata + ")");
language = bymetadata;
} else {
if (language.equals(bymetadata))
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFIRMED - METADATA IDENTICAL: " + language);
else
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFLICTING: " + language + " (the language given by metadata is " + bymetadata + ")");
System.out.println("*** DEBUG LANGUAGE-BY-STATISTICS: " + entry.url() + " CONFLICTING: ALL DIFFERENT! statistic: " + language + ", metadata: " + bymetadata + ", TLD: + " + entry.url().language() + ". taking metadata.");
language = bymetadata;
}
}
}

@ -194,4 +194,28 @@ public class iso639 {
return mapping.containsKey(code.toLowerCase());
}
/**
* analyse a user-agent string and return language as given in the agent string
* @param userAgent string
* @return the language code if it is possible to parse the string and find a language code or null if not
*/
public static final String userAgentLanguageDetection(String userAgent) {
if (userAgent == null || userAgent.length() < 2) return null;
userAgent = userAgent.toLowerCase();
if (userAgent.length() == 2 && mapping.containsKey(userAgent)) return userAgent;
if (userAgent.length() == 5 && mapping.containsKey(userAgent.substring(0, 2))) return userAgent.substring(0, 2);
int p = 2;
// search for entries like ' en-'
while (p < userAgent.length() - 1 && (p = userAgent.indexOf('-', p)) > 2) {
if (userAgent.charAt(p - 3) == ' ' && mapping.containsKey(userAgent.substring(p - 2, p))) return userAgent.substring(p - 2, p);
p++;
}
// search for entries like ' en;'
p = 1;
while (p < userAgent.length() - 1 && (p = userAgent.indexOf(';', p)) > 2) {
if (userAgent.charAt(p - 3) == ' ' && mapping.containsKey(userAgent.substring(p - 2, p))) return userAgent.substring(p - 2, p);
p++;
}
return null;
}
}

@ -422,6 +422,7 @@ public final class yacyClient {
final String urlhashes,
final String prefer,
final String filter,
final String language,
final int count,
final int maxDistance,
final boolean global,
@ -464,6 +465,7 @@ public final class yacyClient {
post.add(new DefaultCharsetStringPart("urls", urlhashes));
post.add(new DefaultCharsetStringPart("prefer", prefer));
post.add(new DefaultCharsetStringPart("filter", filter));
post.add(new DefaultCharsetStringPart("language", language));
post.add(new DefaultCharsetStringPart("ttl", "0"));
post.add(new DefaultCharsetStringPart("maxdist", Integer.toString(maxDistance)));
post.add(new DefaultCharsetStringPart("profile", crypt.simpleEncode(rankingProfile.toExternalString())));

@ -73,11 +73,14 @@ public class yacySearch extends Thread {
private String[] urls;
private final int count, maxDistance;
final private plasmaSearchRankingProfile rankingProfile;
final private String prefer, filter;
final private String prefer, filter, language;
final private kelondroBitfield constraint;
ResultURLs crawlResults;
public yacySearch(final String wordhashes, final String excludehashes, final String urlhashes, final String prefer, final String filter, final int count, final int maxDistance,
public yacySearch(final String wordhashes, final String excludehashes, final String urlhashes,
final String prefer, final String filter, final String language,
final int count, final int maxDistance,
final boolean global, final int partitions, final yacySeed targetPeer, final plasmaWordIndex wordIndex,
final ResultURLs crawlResults,
final plasmaSearchRankingProcess containerCache,
@ -92,6 +95,7 @@ public class yacySearch extends Thread {
this.urlhashes = urlhashes;
this.prefer = prefer;
this.filter = filter;
this.language = language;
this.global = global;
this.partitions = partitions;
this.wordIndex = wordIndex;
@ -110,7 +114,7 @@ public class yacySearch extends Thread {
public void run() {
this.urls = yacyClient.search(
wordIndex.seedDB.mySeed(),
wordhashes, excludehashes, urlhashes, prefer, filter, count, maxDistance, global, partitions,
wordhashes, excludehashes, urlhashes, prefer, filter, language, count, maxDistance, global, partitions,
targetPeer, wordIndex, crawlResults, containerCache, abstractCache,
blacklist, rankingProfile, constraint);
if (urls != null) {
@ -276,7 +280,8 @@ public class yacySearch extends Thread {
public static yacySearch[] primaryRemoteSearches(
final String wordhashes, final String excludehashes, final String urlhashes,
final String prefer, final String filter, final int count, final int maxDist,
final String prefer, final String filter, String language,
final int count, final int maxDist,
final plasmaWordIndex wordIndex,
final ResultURLs crawlResults,
final plasmaSearchRankingProcess containerCache,
@ -297,7 +302,7 @@ public class yacySearch extends Thread {
final yacySearch[] searchThreads = new yacySearch[targets];
for (int i = 0; i < targets; i++) {
if (targetPeers[i] == null || targetPeers[i].hash == null) continue;
searchThreads[i] = new yacySearch(wordhashes, excludehashes, urlhashes, prefer, filter, count, maxDist, true, targets, targetPeers[i],
searchThreads[i] = new yacySearch(wordhashes, excludehashes, urlhashes, prefer, filter, language, count, maxDist, true, targets, targetPeers[i],
wordIndex, crawlResults, containerCache, abstractCache, blacklist, rankingProfile, constraint);
searchThreads[i].start();
//try {Thread.sleep(20);} catch (InterruptedException e) {}
@ -305,7 +310,8 @@ public class yacySearch extends Thread {
return searchThreads;
}
public static yacySearch secondaryRemoteSearch(final String wordhashes, final String excludehashes, final String urlhashes,
public static yacySearch secondaryRemoteSearch(
final String wordhashes, final String excludehashes, final String urlhashes,
final plasmaWordIndex wordIndex,
final ResultURLs crawlResults,
final plasmaSearchRankingProcess containerCache,
@ -319,7 +325,7 @@ public class yacySearch extends Thread {
final yacySeed targetPeer = wordIndex.seedDB.getConnected(targethash);
if (targetPeer == null || targetPeer.hash == null) return null;
if (clusterselection != null) targetPeer.setAlternativeAddress(clusterselection.get(targetPeer.hash));
final yacySearch searchThread = new yacySearch(wordhashes, excludehashes, urlhashes, "", "", 0, 9999, true, 0, targetPeer,
final yacySearch searchThread = new yacySearch(wordhashes, excludehashes, urlhashes, "", "", "en", 0, 9999, true, 0, targetPeer,
wordIndex, crawlResults, containerCache, new TreeMap<String, TreeMap<String, String>>(), blacklist, rankingProfile, constraint);
searchThread.start();
return searchThread;

Loading…
Cancel
Save