more changes towards better join-search

- fixed problems with index-abstract generation
- added analysis output for index abstract receive

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2551 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 8219ce6c67
commit 4f9e42d5ed

@ -68,6 +68,7 @@ import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyDHTAction;
import de.anomic.yacy.yacySeed;
public final class search {
@ -135,20 +136,29 @@ public final class search {
Map containers = theSearch.localSearchContainers(urlselection);
// set statistic details of search result and find best result index set
String maxcounthash = null;
String maxcounthash = null, neardhthash = null;
if (containers == null) {
prop.put("indexcount", "");
} else {
Iterator ci = containers.entrySet().iterator();
StringBuffer indexcount = new StringBuffer();
Map.Entry entry;
String wordhash;
int maxcount = -1;
double mindhtdistance = 1.1, d;
String wordhash;
while (ci.hasNext()) {
entry = (Map.Entry) ci.next();
wordhash = (String) entry.getKey();
indexContainer container = (indexContainer) entry.getValue();
if (container.size() > maxcount) maxcounthash = wordhash;
if (container.size() > maxcount) {
maxcounthash = wordhash;
maxcount = container.size();
}
d = yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhash);
if (d < mindhtdistance) {
mindhtdistance = d;
neardhthash = wordhash;
}
indexcount.append("indexcount.").append(container.getWordHash()).append('=').append(Integer.toString(container.size())).append(serverCore.crlfString);
}
prop.put("indexcount", new String(indexcount));
@ -159,7 +169,10 @@ public final class search {
if ((maxcounthash == null) || (urls.length() != 0)) {
prop.put("indexabstract","");
} else {
String indexabstract = "indexabstract." + maxcounthash + "=" + indexURL.compressIndex(((indexContainer) containers.get(maxcounthash)), 1000).toString();
String indexabstract = "indexabstract." + maxcounthash + "=" + indexURL.compressIndex(((indexContainer) containers.get(maxcounthash)), 1000).toString() + serverCore.crlfString;
if ((neardhthash != null) && (!(neardhthash.equals(maxcounthash)))) {
indexabstract += "indexabstract." + neardhthash + "=" + indexURL.compressIndex(((indexContainer) containers.get(neardhthash)), 1000).toString() + serverCore.crlfString;
}
//yacyCore.log.logFine("DEBUG HASH SEARCH: " + indexabstract);
prop.put("indexabstract", indexabstract);
}

@ -679,12 +679,13 @@ public class indexURL {
public static final void decompressIndex(TreeMap target, serverByteBuffer ci, String peerhash) {
// target is a mapping from url-hashes to a string of peer-hashes
if ((ci.byteAt(0) == '{') && (ci.byteAt(ci.length() - 1) == '}')) {
//System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString());
ci = ci.trim(1, ci.length() - 1);
String dom, url, peers;
while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) {
dom = ci.toString(0, 6);
ci.trim(7);
while ((ci.length() == 6) || ((ci.length() > 6) && (ci.byteAt(6) != ','))) {
while ((ci.length() > 0) && (ci.byteAt(0) != ',')) {
url = ci.toString(0, 6) + dom;
ci.trim(6);
peers = (String) target.get(url);
@ -693,6 +694,7 @@ public class indexURL {
} else {
target.put(url, peers + peerhash);
}
//System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url));
}
if (ci.byteAt(0) == ',') ci.trim(1);
}

@ -70,7 +70,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private plasmaSnippetCache snippetCache;
private indexContainer rcContainers; // cache for results
private int rcContainerCount;
private Map rcAbstracts; // cache for index abstracts
private Map rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
private plasmaSearchTimingProfile profileLocal, profileGlobal;
private boolean postsort;
private yacySearch[] searchThreads;
@ -132,21 +132,34 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
long timeout = System.currentTimeMillis() + profileGlobal.duetime();
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 2;
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
searchThreads = yacySearch.searchHashes(query.queryHashes, query.prefer, query.urlMask, query.maxDistance, urlStore, rcContainers, rcAbstracts, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking);
// meanwhile do a local search
Map searchContainerMap = localSearchContainers(null);
indexContainer rcLocal = localSearchJoin((searchContainerMap == null) ? null : searchContainerMap.values());
plasmaSearchResult localResult = orderLocal(rcLocal, timeout);
plasmaSearchResult localResult = orderLocal(rcLocal, secondaryTimeout);
// evaluate index abstracts and start a secondary search
// this is temporary debugging code to learn that the index abstracts are fetched correctly
while (System.currentTimeMillis() < secondaryTimeout + 10000) {
if (yacySearch.remainingWaiting(searchThreads) == 0) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
System.out.println("DEBUG-INDEXABSTRACT: " + rcAbstracts.size() + " word references catched, " + query.size() + " needed");
Iterator i = rcAbstracts.entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
System.out.println("DEBUG-INDEXABSTRACT: hash " + (String) entry.getKey() + ": " + ((query.queryHashes.contains((String) entry.getKey())) ? "NEEDED" : "NOT NEEDED") + "; " + ((TreeMap) entry.getValue()).size() + " entries");
}
// catch up global results:
// wait until wanted delay passed or wanted result appeared
while (System.currentTimeMillis() < timeout) {
// check if all threads have been finished or results so far are enough
//if (rcGlobal.size() >= profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) * 5) break; // we have enough
if (yacySearch.remainingWaiting(searchThreads) == 0) break; // we cannot expect more
// wait a little time ..
// wait until primary timeout passed
while (System.currentTimeMillis() < primaryTimeout) {
if (yacySearch.remainingWaiting(searchThreads) == 0) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
int globalContributions = rcContainers.size();

@ -537,14 +537,19 @@ public final class yacyClient {
Map.Entry entry;
TreeMap singleAbstract;
String wordhash;
serverByteBuffer ci;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
if (((String) entry.getKey()).startsWith("indexabstract.")) {
wordhash = ((String) entry.getKey()).substring(14);
singleAbstract = (TreeMap) abstractCache.get(wordhash);
if (singleAbstract == null) singleAbstract = new TreeMap();
indexURL.decompressIndex(singleAbstract, new serverByteBuffer(((String) entry.getValue()).getBytes()), targetPeer.hash);
abstractCache.put(wordhash, singleAbstract);
synchronized (abstractCache) {
singleAbstract = (TreeMap) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap();
ci = new serverByteBuffer(((String) entry.getValue()).getBytes());
System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
indexURL.decompressIndex(singleAbstract, ci, targetPeer.hash);
abstractCache.put(wordhash, singleAbstract);
}
}
}

Loading…
Cancel
Save