diff --git a/source/net/yacy/kelondro/data/word/WordReferenceFactory.java b/source/net/yacy/kelondro/data/word/WordReferenceFactory.java index 85d57e7d4..c70b17f54 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceFactory.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceFactory.java @@ -28,8 +28,10 @@ package net.yacy.kelondro.data.word; import java.io.Serializable; import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -116,16 +118,16 @@ public class WordReferenceFactory implements ReferenceFactory, Se * decompress an index abstract that was generated from a word index and transmitted over a network connection * @param ci * @param peerhash - * @return + * @return a urlhash -> peerlist map: this shows in which peers an url is stored */ - public static final SortedMap decompressIndex(ByteBuffer ci, final String peerhash) { - SortedMap target = Collections.synchronizedSortedMap(new TreeMap()); + public static final SortedMap> decompressIndex(ByteBuffer ci, final String peerhash) { + SortedMap> target = Collections.synchronizedSortedMap(new TreeMap>()); // target is a mapping from url-hashes to a string of peer-hashes if (ci.byteAt(0) != '{' || ci.byteAt(ci.length() - 1) != '}') return target; //System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString()); ci = ci.trim(1, ci.length() - 2); String dom, url; - StringBuilder peers; + Set peers; StringBuilder urlsb; while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) { assert ci.length() >= 6 : "ci.length() = " + ci.length(); @@ -140,16 +142,15 @@ public class WordReferenceFactory implements ReferenceFactory, Se peers = target.get(url); if (peers == null) { - peers = new StringBuilder(24); - peers.append(peerhash); + peers = new HashSet(); target.put(url, peers); - } else { - peers.append(peerhash); } + peers.add(peerhash); //System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url)); } if (ci.byteAt(0) == ',') ci.trim(1); } + //System.out.println("DEBUG-DECOMPRESS: " + target); return target; } } diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 0db60280c..9178a7f34 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -26,6 +26,7 @@ package net.yacy.peers; import java.util.Iterator; import java.util.List; +import java.util.Set; import java.util.SortedMap; import java.util.regex.Pattern; @@ -215,7 +216,7 @@ public class RemoteSearch extends Thread { } public static RemoteSearch secondaryRemoteSearch( - final String wordhashes, final String urlhashes, + final Set wordhashes, final String urlhashes, final long time, final Segment indexSegment, final SeedDB peers, @@ -223,7 +224,6 @@ public class RemoteSearch extends Thread { final String targethash, final Blacklist blacklist, final RankingProfile rankingProfile, final Bitfield constraint, final SortedMap clusterselection) { - assert wordhashes.length() >= 12 : "wordhashes = " + wordhashes; // check own peer status if (peers.mySeed() == null || peers.mySeed().getPublicAddress() == null) { return null; } @@ -234,8 +234,10 @@ public class RemoteSearch extends Thread { final Seed targetPeer = peers.getConnected(targethash); if (targetPeer == null || targetPeer.hash == null) return null; if (clusterselection != null) targetPeer.setAlternativeAddress(clusterselection.get(ASCII.getBytes(targetPeer.hash))); + StringBuilder whs = new StringBuilder(24); + for (String s: wordhashes) whs.append(s); final RemoteSearch searchThread = new RemoteSearch( - wordhashes, "", urlhashes, QueryParams.matchnothing_pattern, QueryParams.catchall_pattern, QueryParams.catchall_pattern, new QueryParams.Modifier(""), "", "", "", "all", 20, time, 9999, true, 0, targetPeer, + whs.toString(), "", urlhashes, QueryParams.matchnothing_pattern, QueryParams.catchall_pattern, QueryParams.catchall_pattern, new QueryParams.Modifier(""), "", "", "", "all", 20, time, 9999, true, 0, targetPeer, indexSegment, peers, containerCache, null, blacklist, rankingProfile, constraint); searchThread.start(); return searchThread; diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 39fe53b86..8a691c1aa 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -28,9 +28,11 @@ package net.yacy.search.query; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; @@ -521,17 +523,16 @@ public final class SearchEvent } } - public class SecondarySearchSuperviser extends Thread - { + public class SecondarySearchSuperviser extends Thread { // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation // this relation contains the information where specific urls can be found in specific peers - private final SortedMap> abstractsCache; + private final SortedMap>> abstractsCache; private final SortedSet checkedPeers; private final Semaphore trigger; public SecondarySearchSuperviser() { - this.abstractsCache = Collections.synchronizedSortedMap(new TreeMap>()); + this.abstractsCache = Collections.synchronizedSortedMap(new TreeMap>>()); this.checkedPeers = Collections.synchronizedSortedSet(new TreeSet()); this.trigger = new Semaphore(0); } @@ -542,25 +543,24 @@ public final class SearchEvent * @param wordhash * @param singleAbstract // a mapping from url-hashes to a string of peer-hashes */ - public void addAbstract(final String wordhash, final SortedMap singleAbstract) { - final SortedMap oldAbstract; - oldAbstract = this.abstractsCache.get(wordhash); - if ( oldAbstract == null ) { - // new abstracts in the cache - this.abstractsCache.put(wordhash, singleAbstract); - return; - } + public void addAbstract(final String wordhash, final SortedMap> singleAbstract) { + final SortedMap> oldAbstract = this.abstractsCache.get(wordhash); + if ( oldAbstract == null ) { + // new abstracts in the cache + this.abstractsCache.put(wordhash, singleAbstract); + return; + } // extend the abstracts in the cache: join the single abstracts new Thread() { @Override public void run() { - Thread.currentThread().setName("SearchEvent.paddAbstract:" + wordhash); - for ( final Map.Entry oneref : singleAbstract.entrySet() ) { + Thread.currentThread().setName("SearchEvent.addAbstract:" + wordhash); + for ( final Map.Entry> oneref : singleAbstract.entrySet() ) { final String urlhash = oneref.getKey(); - final StringBuilder peerlistNew = oneref.getValue(); - final StringBuilder peerlistOld = oldAbstract.put(urlhash, peerlistNew); + final Set peerlistNew = oneref.getValue(); + final Set peerlistOld = oldAbstract.put(urlhash, peerlistNew); if ( peerlistOld != null ) { - peerlistOld.append(peerlistNew); + peerlistOld.addAll(peerlistNew); } } } @@ -572,32 +572,21 @@ public final class SearchEvent this.trigger.release(); } - private String wordsFromPeer(final String peerhash, final StringBuilder urls) { - Map.Entry> entry; - String word, url, wordlist = ""; - StringBuilder peerlist; - SortedMap urlPeerlist; - int p; - boolean hasURL; - final Iterator>> i = - this.abstractsCache.entrySet().iterator(); - while ( i.hasNext() ) { - entry = i.next(); + private Set wordsFromPeer(final String peerhash, final Set urls) { + Set wordlist = new HashSet(); + String word; + Set peerlist; + SortedMap> urlPeerlist; // urlhash:peerlist + for ( Map.Entry>> entry: this.abstractsCache.entrySet()) { word = entry.getKey(); urlPeerlist = entry.getValue(); - hasURL = true; - for ( int j = 0; j < urls.length(); j = j + 12 ) { - url = urls.substring(j, j + 12); + for (String url: urls) { peerlist = urlPeerlist.get(url); - p = (peerlist == null) ? -1 : peerlist.indexOf(peerhash); - if ( (p < 0) || (p % 12 != 0) ) { - hasURL = false; + if (peerlist != null && peerlist.contains(peerhash)) { + wordlist.add(word); break; } } - if ( hasURL ) { - wordlist += word; - } } return wordlist; } @@ -605,36 +594,36 @@ public final class SearchEvent @Override public void run() { try { - int t = 0; - while ( this.trigger.tryAcquire(10000, TimeUnit.MILLISECONDS) ) { - // a trigger was released - prepareSecondarySearch(); - t++; - if ( t > 10 ) { + boolean aquired; + while ( aquired = this.trigger.tryAcquire(3000, TimeUnit.MILLISECONDS) ) { + if ( !aquired || MemoryControl.shortStatus()) { break; } + // a trigger was released + prepareSecondarySearch(); } } catch ( final InterruptedException e ) { // the thread was interrupted // do nothing } - // the time-out was reached + // the time-out was reached: + // as we will never again prepare another secondary search, we can flush all cached data + this.abstractsCache.clear(); + this.checkedPeers.clear(); } private void prepareSecondarySearch() { - if ( this.abstractsCache == null - || this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size() ) { + if ( this.abstractsCache == null || this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size() ) { return; // secondary search not possible (yet) } // catch up index abstracts and join them; then call peers again to submit their urls - /* System.out.println("DEBUG-INDEXABSTRACT: " + this.abstractsCache.size() + " word references caught, " + SearchEvent.this.query.queryHashes.size() + " needed"); - for (final Map.Entry> entry: this.abstractsCache.entrySet()) { + for (final Map.Entry>> entry: this.abstractsCache.entrySet()) { System.out.println("DEBUG-INDEXABSTRACT: hash " + entry.getKey() + ": " + ((SearchEvent.this.query.queryHashes.has(entry.getKey().getBytes()) ? "NEEDED" : "NOT NEEDED") + "; " + entry.getValue().size() + " entries")); } - */ + */ // find out if there are enough references for all words that are searched if ( this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size() ) { @@ -642,38 +631,37 @@ public final class SearchEvent } // join all the urlhash:peerlist relations: the resulting map has values with a combined peer-list list - final SortedMap abstractJoin = - SetTools.joinConstructive(this.abstractsCache.values(), true); + final SortedMap> abstractJoin = SetTools.joinConstructive(this.abstractsCache.values(), true); if ( abstractJoin.isEmpty() ) { return; // the join result is now a urlhash: peer-list relation } // generate a list of peers that have the urls for the joined search result - final SortedMap secondarySearchURLs = new TreeMap(); // a (peerhash:urlhash-liststring) mapping - String url, peer; - StringBuilder urls, peerlist; + final SortedMap> secondarySearchURLs = new TreeMap>(); // a (peerhash:urlhash-liststring) mapping + String url; + Set urls; + Set peerlist; final String mypeerhash = SearchEvent.this.peers.mySeed().hash; boolean mypeerinvolved = false; int mypeercount; - for ( final Map.Entry entry : abstractJoin.entrySet() ) { + for ( final Map.Entry> entry : abstractJoin.entrySet() ) { url = entry.getKey(); peerlist = entry.getValue(); //System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peerlist); mypeercount = 0; - for ( int j = 0; j < peerlist.length(); j += 12 ) { - peer = peerlist.substring(j, j + 12); + for (String peer: peerlist) { if ( (peer.equals(mypeerhash)) && (mypeercount++ > 1) ) { continue; } //if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin urls = secondarySearchURLs.get(peer); if ( urls == null ) { - urls = new StringBuilder(24); - urls.append(url); + urls = new HashSet(); + urls.add(url); secondarySearchURLs.put(peer, urls); } else { - urls.append(url); + urls.add(url); } secondarySearchURLs.put(peer, urls); } @@ -683,13 +671,12 @@ public final class SearchEvent } // compute words for secondary search and start the secondary searches - String words; + Set words; SearchEvent.this.secondarySearchThreads = - new RemoteSearch[(mypeerinvolved) ? secondarySearchURLs.size() - 1 : secondarySearchURLs - .size()]; + new RemoteSearch[(mypeerinvolved) ? secondarySearchURLs.size() - 1 : secondarySearchURLs.size()]; int c = 0; - for ( final Map.Entry entry : secondarySearchURLs.entrySet() ) { - peer = entry.getKey(); + for ( final Map.Entry> entry : secondarySearchURLs.entrySet() ) { + String peer = entry.getKey(); if ( peer.equals(mypeerhash) ) { continue; // we don't need to ask ourself } @@ -698,11 +685,10 @@ public final class SearchEvent } urls = entry.getValue(); words = wordsFromPeer(peer, urls); - if ( words.length() == 0 ) { + if ( words.size() == 0 ) { continue; // ??? } - assert words.length() >= 12 : "words = " + words; - //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words); + Log.logInfo("SearchEvent.SecondarySearchSuperviser", "asking peer " + peer + " for urls: " + urls + " from words: " + words); this.checkedPeers.add(peer); SearchEvent.this.secondarySearchThreads[c++] = RemoteSearch.secondaryRemoteSearch(