diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index fec0346c4..2f3bfe4b2 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -68,6 +68,7 @@ import de.anomic.server.serverCore; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyDHTAction; import de.anomic.yacy.yacySeed; public final class search { @@ -135,20 +136,29 @@ public final class search { Map containers = theSearch.localSearchContainers(urlselection); // set statistic details of search result and find best result index set - String maxcounthash = null; + String maxcounthash = null, neardhthash = null; if (containers == null) { prop.put("indexcount", ""); } else { Iterator ci = containers.entrySet().iterator(); StringBuffer indexcount = new StringBuffer(); Map.Entry entry; - String wordhash; int maxcount = -1; + double mindhtdistance = 1.1, d; + String wordhash; while (ci.hasNext()) { entry = (Map.Entry) ci.next(); wordhash = (String) entry.getKey(); indexContainer container = (indexContainer) entry.getValue(); - if (container.size() > maxcount) maxcounthash = wordhash; + if (container.size() > maxcount) { + maxcounthash = wordhash; + maxcount = container.size(); + } + d = yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhash); + if (d < mindhtdistance) { + mindhtdistance = d; + neardhthash = wordhash; + } indexcount.append("indexcount.").append(container.getWordHash()).append('=').append(Integer.toString(container.size())).append(serverCore.crlfString); } prop.put("indexcount", new String(indexcount)); @@ -159,7 +169,10 @@ public final class search { if ((maxcounthash == null) || (urls.length() != 0)) { prop.put("indexabstract",""); } else { - String indexabstract = "indexabstract." + maxcounthash + "=" + indexURL.compressIndex(((indexContainer) containers.get(maxcounthash)), 1000).toString(); + String indexabstract = "indexabstract." + maxcounthash + "=" + indexURL.compressIndex(((indexContainer) containers.get(maxcounthash)), 1000).toString() + serverCore.crlfString; + if ((neardhthash != null) && (!(neardhthash.equals(maxcounthash)))) { + indexabstract += "indexabstract." + neardhthash + "=" + indexURL.compressIndex(((indexContainer) containers.get(neardhthash)), 1000).toString() + serverCore.crlfString; + } //yacyCore.log.logFine("DEBUG HASH SEARCH: " + indexabstract); prop.put("indexabstract", indexabstract); } diff --git a/source/de/anomic/index/indexURL.java b/source/de/anomic/index/indexURL.java index 947d527a0..27a5a2534 100644 --- a/source/de/anomic/index/indexURL.java +++ b/source/de/anomic/index/indexURL.java @@ -679,12 +679,13 @@ public class indexURL { public static final void decompressIndex(TreeMap target, serverByteBuffer ci, String peerhash) { // target is a mapping from url-hashes to a string of peer-hashes if ((ci.byteAt(0) == '{') && (ci.byteAt(ci.length() - 1) == '}')) { + //System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString()); ci = ci.trim(1, ci.length() - 1); String dom, url, peers; while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) { dom = ci.toString(0, 6); ci.trim(7); - while ((ci.length() == 6) || ((ci.length() > 6) && (ci.byteAt(6) != ','))) { + while ((ci.length() > 0) && (ci.byteAt(0) != ',')) { url = ci.toString(0, 6) + dom; ci.trim(6); peers = (String) target.get(url); @@ -693,6 +694,7 @@ public class indexURL { } else { target.put(url, peers + peerhash); } + //System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url)); } if (ci.byteAt(0) == ',') ci.trim(1); } diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 8e5bb0c5f..6c7766d7d 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -70,7 +70,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { private plasmaSnippetCache snippetCache; private indexContainer rcContainers; // cache for results private int rcContainerCount; - private Map rcAbstracts; // cache for index abstracts + private Map rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation private plasmaSearchTimingProfile profileLocal, profileGlobal; private boolean postsort; private yacySearch[] searchThreads; @@ -132,21 +132,34 @@ public final class plasmaSearchEvent extends Thread implements Runnable { log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS"); - long timeout = System.currentTimeMillis() + profileGlobal.duetime(); + long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 2; + long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime(); searchThreads = yacySearch.searchHashes(query.queryHashes, query.prefer, query.urlMask, query.maxDistance, urlStore, rcContainers, rcAbstracts, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking); // meanwhile do a local search Map searchContainerMap = localSearchContainers(null); indexContainer rcLocal = localSearchJoin((searchContainerMap == null) ? null : searchContainerMap.values()); - plasmaSearchResult localResult = orderLocal(rcLocal, timeout); + plasmaSearchResult localResult = orderLocal(rcLocal, secondaryTimeout); + + // evaluate index abstracts and start a secondary search + // this is temporary debugging code to learn that the index abstracts are fetched correctly + while (System.currentTimeMillis() < secondaryTimeout + 10000) { + if (yacySearch.remainingWaiting(searchThreads) == 0) break; // all threads have finished + try {Thread.sleep(100);} catch (InterruptedException e) {} + } + System.out.println("DEBUG-INDEXABSTRACT: " + rcAbstracts.size() + " word references catched, " + query.size() + " needed"); + Iterator i = rcAbstracts.entrySet().iterator(); + Map.Entry entry; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + System.out.println("DEBUG-INDEXABSTRACT: hash " + (String) entry.getKey() + ": " + ((query.queryHashes.contains((String) entry.getKey())) ? "NEEDED" : "NOT NEEDED") + "; " + ((TreeMap) entry.getValue()).size() + " entries"); + } + // catch up global results: - // wait until wanted delay passed or wanted result appeared - while (System.currentTimeMillis() < timeout) { - // check if all threads have been finished or results so far are enough - //if (rcGlobal.size() >= profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) * 5) break; // we have enough - if (yacySearch.remainingWaiting(searchThreads) == 0) break; // we cannot expect more - // wait a little time .. + // wait until primary timeout passed + while (System.currentTimeMillis() < primaryTimeout) { + if (yacySearch.remainingWaiting(searchThreads) == 0) break; // all threads have finished try {Thread.sleep(100);} catch (InterruptedException e) {} } int globalContributions = rcContainers.size(); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 1fa7ab752..261a9aa0f 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -537,14 +537,19 @@ public final class yacyClient { Map.Entry entry; TreeMap singleAbstract; String wordhash; + serverByteBuffer ci; while (i.hasNext()) { entry = (Map.Entry) i.next(); if (((String) entry.getKey()).startsWith("indexabstract.")) { wordhash = ((String) entry.getKey()).substring(14); - singleAbstract = (TreeMap) abstractCache.get(wordhash); - if (singleAbstract == null) singleAbstract = new TreeMap(); - indexURL.decompressIndex(singleAbstract, new serverByteBuffer(((String) entry.getValue()).getBytes()), targetPeer.hash); - abstractCache.put(wordhash, singleAbstract); + synchronized (abstractCache) { + singleAbstract = (TreeMap) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes + if (singleAbstract == null) singleAbstract = new TreeMap(); + ci = new serverByteBuffer(((String) entry.getValue()).getBytes()); + System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString()); + indexURL.decompressIndex(singleAbstract, ci, targetPeer.hash); + abstractCache.put(wordhash, singleAbstract); + } } }