first attempt to implement a secondary search

this is a set of search processes that shall enrich search results
with specialized requests to realize a combination of search results
from different peers.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2571 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 2a06ce5538
commit cf9884e22b

@ -47,14 +47,12 @@
// javac -classpath .:../../Classes search.java
// if the shell's current path is htroot/yacy
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import de.anomic.http.httpHeader;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchEvent;
@ -108,10 +106,7 @@ public final class search {
}
// prepare search
final HashSet keyhashes = new HashSet(query.length() / indexEntryAttribute.wordHashLength);
for (int i = 0; i < (query.length() / indexEntryAttribute.wordHashLength); i++) {
keyhashes.add(query.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength));
}
final Set keyhashes = plasmaSearchQuery.hashes2Set(query);
final long timestamp = System.currentTimeMillis();
plasmaSearchQuery squery = new plasmaSearchQuery(keyhashes, maxdist, prefer, count, duetime, filter);
@ -129,11 +124,7 @@ public final class search {
// retrieve index containers from search request
plasmaSearchEvent theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.urlPool.loadedURL, sb.snippetCache);
Set urlselection = null;
if ((urls.length() > 0) && (urls.length() % 12 == 0)) {
for (int i = 0; i < (urls.length() / 12); i++) urlselection.add(urls.substring(i * 12, (i + 1 * 12)));
}
Map containers = theSearch.localSearchContainers(urlselection);
Map containers = theSearch.localSearchContainers(plasmaSearchQuery.hashes2Set(urls));
// set statistic details of search result and find best result index set
String maxcounthash = null, neardhthash = null;
@ -168,7 +159,7 @@ public final class search {
indexContainer localResults = theSearch.localSearchJoin(containers.values());
int joincount = localResults.size();
prop.put("joincount", Integer.toString(joincount));
plasmaSearchResult acc = theSearch.order(localResults);
plasmaSearchResult acc = theSearch.orderFinal(localResults);
// generate compressed index for maxcounthash
// this is not needed if the search is restricted to specific urls, because it is a re-search

@ -48,8 +48,8 @@ import java.io.FileInputStream;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
@ -85,7 +85,7 @@ public class kelondroMSetTools {
// - join by iterative tests (where we distinguish left-right and right-left tests)
public static TreeMap joinConstructive(Collection maps) {
public static TreeMap joinConstructive(Collection maps, boolean concatStrings) {
// this joins all TreeMap(s) contained in maps
// first order entities by their size
@ -116,7 +116,7 @@ public class kelondroMSetTools {
k = (Long) orderMap.firstKey(); // the next smallest...
mapA = joinResult;
mapB = (TreeMap) orderMap.remove(k);
joinResult = joinConstructiveByTestSetInMap(mapB, mapA.keySet());
joinResult = joinConstructiveByTest(mapA, mapB, concatStrings);
// free resources
mapA = null;
mapB = null;
@ -127,72 +127,63 @@ public class kelondroMSetTools {
return joinResult;
}
public static TreeMap joinConstructive(TreeMap map, TreeSet set) {
// comparators must be equal
if ((map == null) || (set == null)) return null;
if (map.comparator() != set.comparator()) return null;
if ((map.size() == 0) || (set.size() == 0)) return new TreeMap(map.comparator());
// decide which method to use
int high = ((map.size() > set.size()) ? map.size() : set.size());
int low = ((map.size() > set.size()) ? set.size() : map.size());
int stepsEnum = 10 * (high + low - 1);
int stepsTest = 12 * log2a(high) * low;
public static TreeMap joinConstructive(TreeMap map1, TreeMap map2, boolean concatStrings) {
// comparators must be equal
if ((map1 == null) || (map2 == null)) return null;
if (map1.comparator() != map2.comparator()) return null;
if ((map1.size() == 0) || (map2.size() == 0)) return new TreeMap(map1.comparator());
// start most efficient method
if (stepsEnum > stepsTest) {
if (map.size() > set.size()) return joinConstructiveByTestSetInMap(map, set);
return joinConstructiveByTestMapInSet(map, set);
}
return joinConstructiveByEnumeration(map, set);
}
// decide which method to use
int high = ((map1.size() > map2.size()) ? map1.size() : map2.size());
int low = ((map1.size() > map2.size()) ? map2.size() : map1.size());
int stepsEnum = 10 * (high + low - 1);
int stepsTest = 12 * log2a(high) * low;
private static TreeMap joinConstructiveByTestSetInMap(TreeMap map, Set set) {
Iterator si = set.iterator();
TreeMap result = new TreeMap(map.comparator());
Object o;
while (si.hasNext()) {
o = si.next();
if (map.containsKey(o)) result.put(o, map.get(o));
}
return result;
// start most efficient method
if (stepsEnum > stepsTest) {
if (map1.size() > map2.size()) return joinConstructiveByTest(map2, map1, concatStrings);
return joinConstructiveByTest(map1, map2, concatStrings);
}
return joinConstructiveByEnumeration(map1, map2, concatStrings);
}
private static TreeMap joinConstructiveByTestMapInSet(Map map, TreeSet set) {
Iterator mi = map.keySet().iterator();
TreeMap result = new TreeMap(set.comparator());
Object o;
while (mi.hasNext()) {
o = mi.next();
if (set.contains(o)) result.put(o, map.get(o));
}
return result;
private static TreeMap joinConstructiveByTest(TreeMap small, TreeMap large, boolean concatStrings) {
Iterator mi = small.entrySet().iterator();
TreeMap result = new TreeMap(large.comparator());
Map.Entry mentry1;
Object mobj2;
while (mi.hasNext()) {
mentry1 = (Map.Entry) mi.next();
mobj2 = large.get(mentry1.getKey());
if (mobj2 != null) result.put(mentry1.getKey(), (concatStrings) ? ((String) mentry1.getValue() + (String) mobj2) : mentry1.getValue());
}
return result;
}
private static TreeMap joinConstructiveByEnumeration(TreeMap map, TreeSet set) {
// implement pairvise enumeration
Comparator comp = map.comparator();
Iterator mi = map.keySet().iterator();
Iterator si = set.iterator();
TreeMap result = new TreeMap(map.comparator());
int c;
if ((mi.hasNext()) && (si.hasNext())) {
Object mobj = mi.next();
Object sobj = si.next();
while (true) {
c = compare(mobj, sobj, comp);
if (c < 0) {
if (mi.hasNext()) mobj = mi.next(); else break;
} else if (c > 0) {
if (si.hasNext()) sobj = si.next(); else break;
} else {
result.put(mobj, map.get(mobj));
if (mi.hasNext()) mobj = mi.next(); else break;
if (si.hasNext()) sobj = si.next(); else break;
}
}
}
return result;
private static TreeMap joinConstructiveByEnumeration(TreeMap map1, TreeMap map2, boolean concatStrings) {
// implement pairvise enumeration
Comparator comp = map1.comparator();
Iterator mi1 = map1.entrySet().iterator();
Iterator mi2 = map2.entrySet().iterator();
TreeMap result = new TreeMap(map1.comparator());
int c;
if ((mi1.hasNext()) && (mi2.hasNext())) {
Map.Entry mentry1 = (Map.Entry) mi1.next();
Map.Entry mentry2 = (Map.Entry) mi2.next();
while (true) {
c = compare(mentry1.getKey(), mentry2.getKey(), comp);
if (c < 0) {
if (mi1.hasNext()) mentry1 = (Map.Entry) mi1.next(); else break;
} else if (c > 0) {
if (mi2.hasNext()) mentry2 = (Map.Entry) mi2.next(); else break;
} else {
result.put(mentry1.getKey(), (concatStrings) ? ((String) mentry1.getValue() + (String) mentry2.getValue()) : mentry1.getValue());
if (mi1.hasNext()) mentry1 = (Map.Entry) mi1.next(); else break;
if (mi2.hasNext()) mentry2 = (Map.Entry) mi2.next(); else break;
}
}
}
return result;
}
// now the same for set-set
@ -268,7 +259,7 @@ public class kelondroMSetTools {
// return excludeConstructiveByEnumeration(map, set);
}
private static TreeMap excludeConstructiveByTestMapInSet(TreeMap map, TreeSet set) {
private static TreeMap excludeConstructiveByTestMapInSet(TreeMap map, Set set) {
Iterator mi = map.keySet().iterator();
TreeMap result = new TreeMap(map.comparator());
Object o;
@ -279,7 +270,8 @@ public class kelondroMSetTools {
return result;
}
private static TreeMap excludeConstructiveByEnumeration(TreeMap map, TreeSet set) {
/*
private static TreeMap excludeConstructiveByEnumeration(TreeMap map, TreeSet set) {
// returns map without the elements in set
// enumerates objects
Comparator comp = map.comparator();
@ -317,7 +309,7 @@ public class kelondroMSetTools {
}
return result;
}
*/
public static void excludeDestructive(TreeMap map, TreeSet set) {
// comparators must be equal
if (map == null) return;
@ -411,7 +403,7 @@ public class kelondroMSetTools {
public static void main(String[] args) {
TreeMap m = new TreeMap();
TreeSet s = new TreeSet();
TreeMap s = new TreeMap();
m.put("a", "a");
m.put("x", "x");
m.put("f", "f");
@ -422,26 +414,26 @@ public class kelondroMSetTools {
m.put("k", "k");
m.put("y", "y");
m.put("z", "z");
s.add("a");
s.add("b");
s.add("c");
s.add("k");
s.add("l");
s.add("m");
s.add("n");
s.add("o");
s.add("p");
s.add("q");
s.add("r");
s.add("s");
s.add("t");
s.add("x");
s.put("a", "a");
s.put("b", "b");
s.put("c", "c");
s.put("k", "k");
s.put("l", "l");
s.put("m", "m");
s.put("n", "n");
s.put("o", "o");
s.put("p", "p");
s.put("q", "q");
s.put("r", "r");
s.put("s", "s");
s.put("t", "t");
s.put("x", "x");
System.out.println("Compare " + m.toString() + " with " + s.toString());
System.out.println("Join=" + joinConstructiveByEnumeration(m, s));
System.out.println("Join=" + joinConstructiveByTestMapInSet(m, s));
System.out.println("Join=" + joinConstructiveByTestSetInMap(m, s));
System.out.println("Join=" + joinConstructive(m, s));
System.out.println("Exclude=" + excludeConstructiveByEnumeration(m, s));
System.out.println("Join=" + joinConstructiveByEnumeration(m, s, true));
System.out.println("Join=" + joinConstructiveByTest(m, s, true));
System.out.println("Join=" + joinConstructiveByTest(m, s, true));
System.out.println("Join=" + joinConstructive(m, s, true));
System.out.println("Exclude=" + excludeConstructiveByTestMapInSet(m, s.keySet()));
/*
for (int low = 0; low < 10; low++)

@ -67,8 +67,9 @@ public class plasmaGrafics {
public static ymagePainter getSearchEventPicture() {
if (plasmaSearchEvent.lastEvent == null) return null;
yacySearch[] searches = plasmaSearchEvent.lastEvent.getSearchThreads();
if (searches == null) return null; // this was a local search and there are no threads
yacySearch[] primarySearches = plasmaSearchEvent.lastEvent.getPrimarySearchThreads();
yacySearch[] secondarySearches = plasmaSearchEvent.lastEvent.getSecondarySearchThreads();
if (primarySearches == null) return null; // this was a local search and there are no threads
// get a copy of a recent network picture
ymagePainter eventPicture = getNetworkPicture(120000);
@ -82,14 +83,25 @@ public class plasmaGrafics {
String hash;
int angle;
// draw in the search peers
for (int j = 0; j < searches.length; j++) {
eventPicture.setColor((searches[j].isAlive()) ? ymageMatrix.ADDITIVE_RED : ymageMatrix.ADDITIVE_GREEN);
hash = searches[j].target().hash;
// draw in the primary search peers
for (int j = 0; j < primarySearches.length; j++) {
eventPicture.setColor((primarySearches[j].isAlive()) ? ymageMatrix.ADDITIVE_RED : ymageMatrix.ADDITIVE_GREEN);
hash = primarySearches[j].target().hash;
angle = (int) ((long) 360 * (yacySeed.dhtPosition(hash) / (yacySeed.maxDHTDistance / (long) 10000)) / (long) 10000);
eventPicture.arcLine(cx, cy, cr - 20, cr, angle);
}
// draw in the secondary search peers
if (secondarySearches != null) {
for (int j = 0; j < secondarySearches.length; j++) {
eventPicture.setColor((secondarySearches[j].isAlive()) ? ymageMatrix.ADDITIVE_RED : ymageMatrix.ADDITIVE_GREEN);
hash = secondarySearches[j].target().hash;
angle = (int) ((long) 360 * (yacySeed.dhtPosition(hash) / (yacySeed.maxDHTDistance / (long) 10000)) / (long) 10000);
eventPicture.arcLine(cx, cy, cr - 10, cr, angle - 1);
eventPicture.arcLine(cx, cy, cr - 10, cr, angle + 1);
}
}
// draw in the search target
plasmaSearchQuery query = plasmaSearchEvent.lastEvent.getQuery();
Iterator i = query.queryHashes.iterator();

@ -73,7 +73,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private Map rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
private plasmaSearchTimingProfile profileLocal, profileGlobal;
private boolean postsort;
private yacySearch[] searchThreads;
private yacySearch[] primarySearchThreads, secondarySearchThreads;
public plasmaSearchEvent(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
@ -96,7 +96,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
this.profileLocal = localTiming;
this.profileGlobal = remoteTiming;
this.postsort = postsort;
this.searchThreads = null;
this.primarySearchThreads = null;
this.secondarySearchThreads = null;
}
public plasmaSearchQuery getQuery() {
@ -107,8 +108,11 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
return profileLocal;
}
public yacySearch[] getSearchThreads() {
return searchThreads;
public yacySearch[] getPrimarySearchThreads() {
return primarySearchThreads;
}
public yacySearch[] getSecondarySearchThreads() {
return secondarySearchThreads;
}
public plasmaSearchResult search() {
@ -134,7 +138,9 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 2;
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
searchThreads = yacySearch.searchHashes(query.queryHashes, query.prefer, query.urlMask, query.maxDistance, urlStore, rcContainers, rcAbstracts, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking);
primarySearchThreads = yacySearch.primaryRemoteSearches(plasmaSearchQuery.hashSet2hashString(query.queryHashes), "",
query.prefer, query.urlMask, query.maxDistance, urlStore, rcContainers, rcAbstracts,
fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking);
// meanwhile do a local search
Map searchContainerMap = localSearchContainers(null);
@ -144,35 +150,16 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// evaluate index abstracts and start a secondary search
// this is temporary debugging code to learn that the index abstracts are fetched correctly
while (System.currentTimeMillis() < secondaryTimeout + 10000) {
if (yacySearch.remainingWaiting(searchThreads) == 0) break; // all threads have finished
if (yacySearch.remainingWaiting(primarySearchThreads) == 0) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
System.out.println("DEBUG-INDEXABSTRACT: " + rcAbstracts.size() + " word references catched, " + query.size() + " needed");
/*
Iterator i = rcAbstracts.entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
System.out.println("DEBUG-INDEXABSTRACT: hash " + (String) entry.getKey() + ": " + ((query.queryHashes.contains((String) entry.getKey())) ? "NEEDED" : "NOT NEEDED") + "; " + ((TreeMap) entry.getValue()).size() + " entries");
}
*/
TreeMap abstractJoin = (rcAbstracts.size() == query.size()) ? kelondroMSetTools.joinConstructive(rcAbstracts.values()) : new TreeMap();
if (abstractJoin.size() == 0) {
System.out.println("DEBUG-INDEXABSTRACT: no success using index abstracts from remote peers");
} else {
System.out.println("DEBUG-INDEXABSTRACT: index abstracts delivered " + abstractJoin.size() + " additional results for secondary search");
Iterator i = abstractJoin.entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
System.out.println("DEBUG-INDEXABSTRACT: url " + (String) entry.getKey() + ": from peers " + (String) entry.getValue());
}
}
prepareSecondarySearch();
// catch up global results:
// wait until primary timeout passed
while (System.currentTimeMillis() < primaryTimeout) {
if (yacySearch.remainingWaiting(searchThreads) == 0) break; // all threads have finished
if ((yacySearch.remainingWaiting(primarySearchThreads) == 0) &&
((secondarySearchThreads == null) || (yacySearch.remainingWaiting(secondarySearchThreads) == 0))) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
int globalContributions = rcContainers.size();
@ -181,7 +168,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
log.logFine("SEARCH TIME AFTER GLOBAL-TRIGGER TO " + fetchpeers + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
// combine the result and order
plasmaSearchResult result = ((globalContributions == 0) && (localResult.sizeOrdered() != 0)) ? localResult : order(rcLocal);
plasmaSearchResult result = ((globalContributions == 0) && (localResult.sizeOrdered() != 0)) ? localResult : orderFinal(rcLocal);
result.globalContributions = globalContributions;
result.localContributions = rcLocal.size();
@ -195,7 +182,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
} else {
Map searchContainerMap = localSearchContainers(null);
indexContainer rcLocal = localSearchJoin((searchContainerMap == null) ? null : searchContainerMap.values());
plasmaSearchResult result = order(rcLocal);
plasmaSearchResult result = orderFinal(rcLocal);
result.localContributions = rcLocal.size();
// return search result
@ -206,6 +193,91 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
}
}
private void prepareSecondarySearch() {
// catch up index abstracts and join them; then call peers again to submit their urls
System.out.println("DEBUG-INDEXABSTRACT: " + rcAbstracts.size() + " word references catched, " + query.size() + " needed");
if (rcAbstracts.size() != query.size()) return; // secondary search not possible
Iterator i = rcAbstracts.entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
System.out.println("DEBUG-INDEXABSTRACT: hash " + (String) entry.getKey() + ": " + ((query.queryHashes.contains((String) entry.getKey())) ? "NEEDED" : "NOT NEEDED") + "; " + ((TreeMap) entry.getValue()).size() + " entries");
}
TreeMap abstractJoin = (rcAbstracts.size() == query.size()) ? kelondroMSetTools.joinConstructive(rcAbstracts.values(), true) : new TreeMap();
if (abstractJoin.size() == 0) {
System.out.println("DEBUG-INDEXABSTRACT: no success using index abstracts from remote peers");
} else {
System.out.println("DEBUG-INDEXABSTRACT: index abstracts delivered " + abstractJoin.size() + " additional results for secondary search");
// generate query for secondary search
TreeMap secondarySearchURLs = new TreeMap(); // a (peerhash:urlhash-liststring) mapping
Iterator i1 = abstractJoin.entrySet().iterator();
Map.Entry entry1;
String url, urls, peer, peers;
while (i1.hasNext()) {
entry1 = (Map.Entry) i1.next();
url = (String) entry1.getKey();
peers = (String) entry1.getValue();
System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peers);
for (int j = 0; j < peers.length(); j = j + 12) {
peer = peers.substring(j, j + 12);
if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin
urls = (String) secondarySearchURLs.get(peer);
urls = (urls == null) ? url : urls + url;
secondarySearchURLs.put(peer, urls);
}
}
// compute words for secondary search and start the secondary searches
i1 = secondarySearchURLs.entrySet().iterator();
String words;
secondarySearchThreads = new yacySearch[secondarySearchURLs.size()];
int c = 0;
while (i1.hasNext()) {
entry1 = (Map.Entry) i1.next();
peer = (String) entry1.getKey();
urls = (String) entry1.getValue();
words = wordsFromPeer(peer, urls);
System.out.println("DEBUG-INDEXABSTRACT: peer " + peer + " has urls: " + urls);
System.out.println("DEBUG-INDEXABSTRACT: peer " + peer + " from words: " + words);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls, urlStore, rcContainers, peer, plasmaSwitchboard.urlBlacklist, snippetCache,
profileGlobal, ranking);
}
}
}
private String wordsFromPeer(String peerhash, String urls) {
Map.Entry entry;
String word, peerlist, url, wordlist = "";
TreeMap urlPeerlist;
int p;
boolean hasURL;
synchronized (rcAbstracts) {
Iterator i = rcAbstracts.entrySet().iterator();
while (i.hasNext()) {
entry = (Map.Entry) i.next();
word = (String) entry.getKey();
urlPeerlist = (TreeMap) entry.getValue();
hasURL = true;
for (int j = 0; j < urls.length(); j = j + 12) {
url = urls.substring(j, j + 12);
peerlist = (String) urlPeerlist.get(url);
p = (peerlist == null) ? -1 : peerlist.indexOf(peerhash);
if ((p < 0) || (p % 12 != 0)) {
hasURL = false;
break;
}
}
if (hasURL) wordlist += word;
}
}
return wordlist;
}
public Map localSearchContainers(Set urlselection) {
// search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result
@ -243,7 +315,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
return rcLocal;
}
public plasmaSearchResult order(indexContainer rcLocal) {
public plasmaSearchResult orderFinal(indexContainer rcLocal) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
@ -263,6 +335,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// start url-fetch
long postorderTime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_POSTSORT);
System.out.println("DEBUG: postorder-final (urlfetch) maxtime = " + postorderTime);
long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : (System.currentTimeMillis() + postorderTime);
profileLocal.startTimer();
plasmaSearchResult acc = new plasmaSearchResult(query, ranking);
@ -307,20 +380,17 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
return acc;
}
private plasmaSearchResult orderLocal(indexContainer rcLocal, long maxtime) {
private plasmaSearchResult orderLocal(indexContainer rcLocal, long timeout) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
profileLocal.startTimer();
if (maxtime < 0) maxtime = 200;
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, rcLocal, maxtime);
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, rcLocal, timeout - System.currentTimeMillis());
preorder.remove(true, true);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_PRESORT);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_PRESORT, rcLocal.size());
// start url-fetch
maxtime = Math.max(200, maxtime - profileLocal.getYieldTime(plasmaSearchTimingProfile.PROCESS_PRESORT));
long postorderLimitTime = System.currentTimeMillis() + maxtime;
profileLocal.startTimer();
plasmaSearchResult acc = new plasmaSearchResult(query, ranking);
@ -330,7 +400,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
Object[] preorderEntry;
try {
while (preorder.hasNext()) {
if (System.currentTimeMillis() >= postorderLimitTime) break;
if (System.currentTimeMillis() >= timeout) break;
preorderEntry = preorder.next();
entry = (indexEntry) preorderEntry[0];
preranking = (Long) preorderEntry[1];
@ -368,15 +438,21 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// it is wise to call this within a separate thread because
// this method waits until all threads are finished
int remaining;
int remaining = 0;
if (primarySearchThreads == null) return;
long starttime = System.currentTimeMillis();
while ((searchThreads != null) && ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0)) {
while (true) {
remaining = yacySearch.remainingWaiting(primarySearchThreads);
if (secondarySearchThreads != null) remaining += yacySearch.remainingWaiting(secondarySearchThreads);
if (remaining == 0) break;
flushGlobalResults();
// wait a little bit before trying again
try {Thread.sleep(3000);} catch (InterruptedException e) {}
try {Thread.sleep(1000);} catch (InterruptedException e) {}
if (System.currentTimeMillis() - starttime > 90000) {
yacySearch.interruptAlive(searchThreads);
yacySearch.interruptAlive(primarySearchThreads);
if (secondarySearchThreads != null) yacySearch.interruptAlive(secondarySearchThreads);
log.logFine("SEARCH FLUSH: " + remaining + " PEERS STILL BUSY; ABANDONED; SEARCH WAS " + query.queryWords);
break;
}

@ -42,6 +42,7 @@
package de.anomic.plasma;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.Iterator;
@ -59,8 +60,7 @@ public final class plasmaSearchQuery {
public static final int SEARCHDOM_GLOBALDHT = 3;
public static final int SEARCHDOM_GLOBALALL = 4;
public Set queryWords;
public Set queryHashes;
public Set queryWords, queryHashes;
public int wantedResults;
public String prefer;
public long maximumTime;
@ -99,12 +99,18 @@ public final class plasmaSearchQuery {
this.domMaxTargets = -1;
}
public static Set words2hashes(String[] words) {
public static Set words2hashSet(String[] words) {
TreeSet hashes = new TreeSet();
for (int i = 0; i < words.length; i++) hashes.add(indexEntryAttribute.word2hash(words[i]));
return hashes;
}
public static String words2hashString(String[] words) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < words.length; i++) sb.append(indexEntryAttribute.word2hash(words[i]));
return new String(sb);
}
public static Set words2hashes(Set words) {
Iterator i = words.iterator();
TreeSet hashes = new TreeSet();
@ -112,6 +118,22 @@ public final class plasmaSearchQuery {
return hashes;
}
public static Set hashes2Set(String query) {
if (query == null) return new HashSet();
final HashSet keyhashes = new HashSet(query.length() / indexEntryAttribute.wordHashLength);
for (int i = 0; i < (query.length() / indexEntryAttribute.wordHashLength); i++) {
keyhashes.add(query.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength));
}
return keyhashes;
}
public static String hashSet2hashString(Set words) {
Iterator i = words.iterator();
StringBuffer sb = new StringBuffer(words.size() * indexEntryAttribute.wordHashLength);
while (i.hasNext()) sb.append((String) i.next());
return new String(sb);
}
public static TreeSet cleanQuery(String words) {
// convert Umlaute
words = htmlFilterAbstractScraper.convertUmlaute(new serverByteBuffer(words.getBytes())).toString();
@ -148,6 +170,7 @@ public final class plasmaSearchQuery {
return result.toString();
}
/*
public String hashes(String separator) {
StringBuffer result = new StringBuffer(8 * queryHashes.size());
Iterator i = queryHashes.iterator();
@ -158,7 +181,8 @@ public final class plasmaSearchQuery {
}
return result.toString();
}
*/
public void filterOut(Set blueList) {
// filter out words that appear in this set
Iterator it = queryWords.iterator();

@ -209,8 +209,8 @@ public class plasmaSearchRankingProfile {
}
// apply query-in-result matching
Set urlcomph = plasmaSearchQuery.words2hashes(urlcomps);
Set descrcomph = plasmaSearchQuery.words2hashes(descrcomps);
Set urlcomph = plasmaSearchQuery.words2hashSet(urlcomps);
Set descrcomph = plasmaSearchQuery.words2hashSet(descrcomps);
Iterator shi = query.queryHashes.iterator();
String queryhash;
while (shi.hasNext()) {

@ -366,6 +366,7 @@ public final class yacyClient {
public static int search(
String wordhashes,
String urlhashes,
String prefer,
String filter,
int maxDistance,
@ -422,6 +423,7 @@ public final class yacyClient {
obj.put("count", timingProfile.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT));
obj.put("resource", ((global) ? "global" : "local"));
obj.put("query", wordhashes);
obj.put("urls", urlhashes);
obj.put("prefer", prefer);
obj.put("filter", filter);
obj.put("ttl", "0");
@ -448,6 +450,11 @@ public final class yacyClient {
)
);
if (result.size() == 0) {
yacyCore.log.logFine("SEARCH failed FROM " + targetPeer.hash + ":" + targetPeer.getName() + ", score=" + targetPeer.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(targetPeer.hash, wordhashes));
return 0;
}
// compute all computation times
final long totalrequesttime = System.currentTimeMillis() - timestamp;
String returnProfile = (String) result.get("profile");
@ -470,7 +477,7 @@ public final class yacyClient {
// references : references (search hints) that was calculated during search
// now create a plasmaIndex out of this result
//System.out.println("yacyClient: search result = " + result.toString()); // debug
System.out.println("yacyClient: " + ((urlhashes.length() == 0) ? "primary" : "secondary")+ " search result = " + result.toString()); // debug
final int results = Integer.parseInt((String) result.get("count"));
//System.out.println("***result count " + results);

@ -48,10 +48,12 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import de.anomic.index.indexContainer;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
@ -60,7 +62,7 @@ import de.anomic.server.logging.serverLog;
public class yacySearch extends Thread {
final private Set wordhashes;
final private String wordhashes, urlhashes;
final private boolean global;
final private plasmaCrawlLURL urlManager;
final private indexContainer containerCache;
@ -74,13 +76,14 @@ public class yacySearch extends Thread {
final private plasmaSearchRankingProfile rankingProfile;
final private String prefer, filter;
public yacySearch(Set wordhashes, String prefer, String filter, int maxDistance,
public yacySearch(String wordhashes, String urlhashes, String prefer, String filter, int maxDistance,
boolean global, yacySeed targetPeer, plasmaCrawlLURL urlManager,
indexContainer containerCache, Map abstractCache,
plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile) {
super("yacySearch_" + targetPeer.getName());
this.wordhashes = wordhashes;
this.urlhashes = urlhashes;
this.prefer = prefer;
this.filter = filter;
this.global = global;
@ -97,7 +100,7 @@ public class yacySearch extends Thread {
}
public void run() {
this.links = yacyClient.search(set2string(wordhashes), prefer, filter, maxDistance, global, targetPeer, urlManager, containerCache, abstractCache, blacklist, snippetCache, timingProfile, rankingProfile);
this.links = yacyClient.search(wordhashes, urlhashes, prefer, filter, maxDistance, global, targetPeer, urlManager, containerCache, abstractCache, blacklist, snippetCache, timingProfile, rankingProfile);
if (links != 0) {
//yacyCore.log.logInfo("REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + links + " links for word hash " + wordhashes);
yacyCore.seedDB.mySeed.incRI(links);
@ -186,7 +189,7 @@ public class yacySearch extends Thread {
return result;
}
public static yacySearch[] searchHashes(Set wordhashes, String prefer, String filter, int maxDist, plasmaCrawlLURL urlManager,
public static yacySearch[] primaryRemoteSearches(String wordhashes, String urlhashes, String prefer, String filter, int maxDist, plasmaCrawlLURL urlManager,
indexContainer containerCache, Map abstractCache,
int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile) {
@ -195,13 +198,13 @@ public class yacySearch extends Thread {
// prepare seed targets and threads
//Set wordhashes = plasmaSearch.words2hashes(querywords);
final yacySeed[] targetPeers = selectPeers(wordhashes, targets);
final yacySeed[] targetPeers = selectPeers(plasmaSearchQuery.hashes2Set(wordhashes), targets);
if (targetPeers == null) return null;
targets = targetPeers.length;
if (targets == 0) return null;
yacySearch[] searchThreads = new yacySearch[targets];
for (int i = 0; i < targets; i++) {
searchThreads[i]= new yacySearch(wordhashes, prefer, filter, maxDist, true, targetPeers[i],
searchThreads[i]= new yacySearch(wordhashes, urlhashes, prefer, filter, maxDist, true, targetPeers[i],
urlManager, containerCache, abstractCache, blacklist, snippetCache, timingProfile, rankingProfile);
searchThreads[i].start();
//try {Thread.sleep(20);} catch (InterruptedException e) {}
@ -209,6 +212,22 @@ public class yacySearch extends Thread {
return searchThreads;
}
public static yacySearch secondaryRemoteSearch(String wordhashes, String urlhashes, plasmaCrawlLURL urlManager, indexContainer containerCache,
String targethash, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile) {
// check own peer status
if (yacyCore.seedDB.mySeed == null || yacyCore.seedDB.mySeed.getAddress() == null) { return null; }
// prepare seed targets and threads
//Set wordhashes = plasmaSearch.words2hashes(querywords);
final yacySeed targetPeer = yacyCore.seedDB.getConnected(targethash);
if (targetPeer == null) return null;
yacySearch searchThread = new yacySearch(wordhashes, urlhashes, "", "", 9999, true, targetPeer,
urlManager, containerCache, new TreeMap(), blacklist, snippetCache, timingProfile, rankingProfile);
searchThread.start();
return searchThread;
}
public static int remainingWaiting(yacySearch[] searchThreads) {
if (searchThreads == null) return 0;
int alive = 0;

Loading…
Cancel
Save