diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 9b1a9c5ff..62fee8b59 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -32,6 +32,9 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceVars; @@ -93,7 +96,7 @@ public final class SearchEvent { this.crawlResults = crawlResults; this.query = query; this.secondarySearchSuperviser = (query.queryHashes.size() > 1) ? new SecondarySearchSuperviser() : null; // generate abstracts only for combined searches - //if (this.secondarySearchSuperviser != null) this.secondarySearchSuperviser.start(); + if (this.secondarySearchSuperviser != null) this.secondarySearchSuperviser.start(); this.primarySearchThreads = null; this.secondarySearchThreads = null; this.preselectedPeerHashes = preselectedPeerHashes; @@ -122,7 +125,6 @@ public final class SearchEvent { this.primarySearchThreads = (query.queryHashes.isEmpty()) ? null : yacySearch.primaryRemoteSearches( QueryParams.hashSet2hashString(query.queryHashes), QueryParams.hashSet2hashString(query.excludeHashes), - "", query.prefer, query.urlMask, query.targetlang == null ? "" : query.targetlang, @@ -336,9 +338,13 @@ public final class SearchEvent { // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation // this relation contains the information where specific urls can be found in specific peers TreeMap> abstractsCache; + TreeSet checkedPeers; + Semaphore trigger; public SecondarySearchSuperviser() { this.abstractsCache = new TreeMap>(); + this.checkedPeers = new TreeSet(); + this.trigger = new Semaphore(0); } /** @@ -369,6 +375,10 @@ public final class SearchEvent { } } + public void commitAbstract() { + this.trigger.release(); + } + private String wordsFromPeer(final String peerhash, final String urls) { Map.Entry> entry; String word, peerlist, url, wordlist = ""; @@ -398,41 +408,50 @@ public final class SearchEvent { } public void run() { - try {Thread.sleep(5000);} catch (InterruptedException e) {} - prepareSecondarySearch(); + try { + while (this.trigger.tryAcquire(10000, TimeUnit.MILLISECONDS)) { + // a trigger was released + prepareSecondarySearch(); + } + } catch (InterruptedException e) { + // the thread was interrupted + // do nohing + } + // the time-out was reached } private void prepareSecondarySearch() { - if (abstractsCache == null || abstractsCache.size() != query.queryHashes.size()) return; // secondary search not possible (yet) - // catch up index abstracts and join them; then call peers again to submit their urls + /* System.out.println("DEBUG-INDEXABSTRACT: " + abstractsCache.size() + " word references caught, " + query.queryHashes.size() + " needed"); for (Map.Entry> entry: abstractsCache.entrySet()) { System.out.println("DEBUG-INDEXABSTRACT: hash " + entry.getKey() + ": " + ((query.queryHashes.has(entry.getKey().getBytes()) ? "NEEDED" : "NOT NEEDED") + "; " + entry.getValue().size() + " entries")); } + */ - final TreeMap abstractJoin = (abstractsCache.size() == query.queryHashes.size()) ? SetTools.joinConstructive(abstractsCache.values(), true) : new TreeMap(); + // find out if there are enough references for all words that are searched + if (abstractsCache.size() != query.queryHashes.size()) return; + + // join all the urlhash:peerlist relations: the resulting map has values with a combined peer-list list + final TreeMap abstractJoin = SetTools.joinConstructive(abstractsCache.values(), true); if (abstractJoin.isEmpty()) return; + // the join result is now a urlhash: peer-list relation - //System.out.println("DEBUG-INDEXABSTRACT: index abstracts delivered " + abstractJoin.size() + " additional results for secondary search"); - // generate query for secondary search + // generate a list of peers that have the urls for the joined search result final TreeMap secondarySearchURLs = new TreeMap(); // a (peerhash:urlhash-liststring) mapping - Iterator> i1 = abstractJoin.entrySet().iterator(); - Map.Entry entry1; - String url, urls, peer, ps; + String url, urls, peer, peerlist; final String mypeerhash = peers.mySeed().hash; boolean mypeerinvolved = false; int mypeercount; - while (i1.hasNext()) { - entry1 = i1.next(); - url = entry1.getKey(); - ps = entry1.getValue(); - System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peers); + for (Map.Entry entry: abstractJoin.entrySet()) { + url = entry.getKey(); + peerlist = entry.getValue(); + //System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peerlist); mypeercount = 0; - for (int j = 0; j < ps.length(); j = j + 12) { - peer = ps.substring(j, j + 12); + for (int j = 0; j < peerlist.length(); j += 12) { + peer = peerlist.substring(j, j + 12); if ((peer.equals(mypeerhash)) && (mypeercount++ > 1)) continue; //if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin urls = secondarySearchURLs.get(peer); @@ -443,21 +462,21 @@ public final class SearchEvent { } // compute words for secondary search and start the secondary searches - i1 = secondarySearchURLs.entrySet().iterator(); String words; secondarySearchThreads = new yacySearch[(mypeerinvolved) ? secondarySearchURLs.size() - 1 : secondarySearchURLs.size()]; int c = 0; - while (i1.hasNext()) { - entry1 = i1.next(); - peer = entry1.getKey(); - if (peer.equals(mypeerhash)) continue; // we dont need to ask ourself - urls = entry1.getValue(); + for (Map.Entry entry: secondarySearchURLs.entrySet()) { + peer = entry.getKey(); + if (peer.equals(mypeerhash)) continue; // we don't need to ask ourself + if (checkedPeers.contains(peer)) continue; // do not ask a peer again + urls = entry.getValue(); words = wordsFromPeer(peer, urls); assert words.length() >= 12 : "words = " + words; - System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls); - System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words); + //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words); + rankedCache.moreFeeders(1); + checkedPeers.add(peer); secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch( - words, "", urls, query.getSegment(), peers, crawlResults, rankedCache, peer, Switchboard.urlBlacklist, + words, urls, query.getSegment(), peers, crawlResults, rankedCache, peer, Switchboard.urlBlacklist, query.ranking, query.constraint, preselectedPeerHashes); } diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index b24d9bfe6..173fd71f6 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -429,6 +429,8 @@ public final class yacyClient { post.add(new DefaultCharsetStringPart("constraint", (constraint == null) ? "" : constraint.exportB64())); if (secondarySearchSuperviser != null) post.add(new DefaultCharsetStringPart("abstracts", "auto")); final long timestamp = System.currentTimeMillis(); + boolean thisIsASecondarySearch = urlhashes.length() > 0; + assert !thisIsASecondarySearch || secondarySearchSuperviser == null; // send request Map result = null; @@ -565,7 +567,7 @@ public final class yacyClient { } catch (Exception e) { Log.logException(e); } - yacyCore.log.logInfo("remote search: peer " + target.getName() + " sent " + container[0].size() + "/" + joincount + " references for joined word queries"); + yacyCore.log.logInfo("remote search: peer " + target.getName() + " sent " + container[0].size() + "/" + joincount + " references for " + (thisIsASecondarySearch ? "a secondary search" : "joined word queries")); // integrate remote top-words/topics final String references = result.get("references"); @@ -604,7 +606,10 @@ public final class yacyClient { ac++; } } - if (ac > 0) yacyCore.log.logInfo("remote search: peer " + target.getName() + " sent " + ac + " index abstracts for words "+ whacc); + if (ac > 0) { + secondarySearchSuperviser.commitAbstract(); + yacyCore.log.logInfo("remote search: peer " + target.getName() + " sent " + ac + " index abstracts for words "+ whacc); + } } // generate statistics diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java index dade258c3..8922a53aa 100644 --- a/source/de/anomic/yacy/yacySearch.java +++ b/source/de/anomic/yacy/yacySearch.java @@ -252,7 +252,7 @@ public class yacySearch extends Thread { } public static yacySearch[] primaryRemoteSearches( - final String wordhashes, final String excludehashes, final String urlhashes, + final String wordhashes, final String excludehashes, final Pattern prefer, final Pattern filter, String language, final String sitehash, final String authorhash, @@ -288,7 +288,7 @@ public class yacySearch extends Thread { for (int i = 0; i < targets; i++) { if (targetPeers[i] == null || targetPeers[i].hash == null) continue; searchThreads[i] = new yacySearch( - wordhashes, excludehashes, urlhashes, prefer, filter, language, + wordhashes, excludehashes, "", prefer, filter, language, sitehash, authorhash, count, maxDist, true, targets, targetPeers[i], indexSegment, peers, crawlResults, containerCache, secondarySearchSuperviser, blacklist, rankingProfile, constraint); @@ -298,7 +298,7 @@ public class yacySearch extends Thread { } public static yacySearch secondaryRemoteSearch( - final String wordhashes, final String excludehashes, final String urlhashes, + final String wordhashes, final String urlhashes, final Segment indexSegment, final yacySeedDB peers, final ResultURLs crawlResults, @@ -310,13 +310,15 @@ public class yacySearch extends Thread { // check own peer status if (peers.mySeed() == null || peers.mySeed().getPublicAddress() == null) { return null; } - + assert urlhashes != null; + assert urlhashes.length() > 0; + // prepare seed targets and threads final yacySeed targetPeer = peers.getConnected(targethash); if (targetPeer == null || targetPeer.hash == null) return null; if (clusterselection != null) targetPeer.setAlternativeAddress(clusterselection.get(targetPeer.hash.getBytes())); final yacySearch searchThread = new yacySearch( - wordhashes, excludehashes, urlhashes, Pattern.compile(""), Pattern.compile(".*"), "", "", "", 0, 9999, true, 0, targetPeer, + wordhashes, "", urlhashes, Pattern.compile(""), Pattern.compile(".*"), "", "", "", 0, 9999, true, 0, targetPeer, indexSegment, peers, crawlResults, containerCache, null, blacklist, rankingProfile, constraint); searchThread.start(); return searchThread;