From c9216d5adf25079699806ec002ff88afd6e412fd Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 24 Nov 2011 22:45:31 +0000 Subject: [PATCH] fixed secondary remote search (the process that finds distributed join situations) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8098 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacy/search.java | 3 +- source/net/yacy/kelondro/util/ByteBuffer.java | 16 +---- source/net/yacy/peers/Protocol.java | 7 +- source/net/yacy/peers/RemoteSearch.java | 68 +++++++++---------- source/net/yacy/search/query/SearchEvent.java | 23 ++++--- 5 files changed, 53 insertions(+), 64 deletions(-) diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 445d76799..77898468f 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -38,7 +38,6 @@ import java.util.regex.Pattern; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.RSSMessage; -import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; @@ -333,7 +332,7 @@ public final class search { final Iterator> i = theSearch.abstractsCount(); while (i.hasNext()) { entry = i.next(); - indexcount.append("indexcount.").append(UTF8.String(entry.getKey())).append('=').append((entry.getValue()).toString()).append(serverCore.CRLF_STRING); + indexcount.append("indexcount.").append(ASCII.String(entry.getKey())).append('=').append((entry.getValue()).toString()).append(serverCore.CRLF_STRING); } if (abstractSet != null) { // if a specific index-abstract is demanded, attach it here diff --git a/source/net/yacy/kelondro/util/ByteBuffer.java b/source/net/yacy/kelondro/util/ByteBuffer.java index 6edb528d8..051467e61 100644 --- a/source/net/yacy/kelondro/util/ByteBuffer.java +++ b/source/net/yacy/kelondro/util/ByteBuffer.java @@ -403,7 +403,7 @@ public final class ByteBuffer extends OutputStream { @Override public String toString() { - return UTF8.String(this.buffer, this.offset, this.length); + return UTF8.String(this.buffer, this.offset, this.length); } public String toString(final int left, final int length) { @@ -415,7 +415,7 @@ public final class ByteBuffer extends OutputStream { final StringBuilder sb = new StringBuilder(sblength); int i = 0; sb.setLength(length); - for (int j = left; j < left + length; j++) sb.setCharAt(i++, (char) this.buffer[j]); + for (int j = left; j < left + length; j++) sb.setCharAt(i++, (char) this.buffer[this.offset + j]); return sb; } @@ -506,18 +506,6 @@ public final class ByteBuffer extends OutputStream { this.offset = 0; } - public void reset(final int newSize) { - resize(newSize); - this.reset(); - } - - public void resize(final int newSize) { - if(newSize < 0) throw new IllegalArgumentException("Illegal array size: " + newSize); - final byte[] v = new byte[newSize]; - System.arraycopy(this.buffer,0,v,0,newSize > this.buffer.length ? this.buffer.length : newSize); - this.buffer = v; - } - public void writeTo(final OutputStream dest) throws IOException { dest.write(this.buffer, this.offset, this.length); dest.flush(); diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 2bed8d4f2..33b5c1ed6 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -482,6 +482,9 @@ public final class Protocol { // computation time final long totalrequesttime = System.currentTimeMillis() - timestamp; + final boolean thisIsASecondarySearch = urlhashes.length() > 0; + assert !thisIsASecondarySearch || secondarySearchSuperviser == null; + // create containers final int words = wordhashes.length() / Word.commonHashLength; assert words > 0 : "wordhashes = " + wordhashes; @@ -568,8 +571,6 @@ public final class Protocol { } catch (final Exception e) { Log.logException(e); } - final boolean thisIsASecondarySearch = urlhashes.length() > 0; - assert !thisIsASecondarySearch || secondarySearchSuperviser == null; Network.log.logInfo("remote search: peer " + target.getName() + " sent " + container[0].size() + "/" + result.joincount + " references for " + (thisIsASecondarySearch ? "a secondary search" : "joined word queries")); @@ -651,7 +652,7 @@ public final class Protocol { final boolean global, final int partitions, final String hostname, - String hostaddress, + final String hostaddress, final SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser, final RankingProfile rankingProfile, final Bitfield constraint) throws IOException { diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 3ed52d57d..c9c79581b 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -1,4 +1,4 @@ -// yacySearch.java +// yacySearch.java // ------------------------------------- // (C) by Michael Peter Christen; mc@yacy.net // first published on http://www.anomic.de @@ -44,7 +44,7 @@ import net.yacy.search.ranking.RankingProfile; public class RemoteSearch extends Thread { private static final ThreadGroup ysThreadGroup = new ThreadGroup("yacySearchThreadGroup"); - + final private String wordhashes, excludehashes, urlhashes, sitehash, authorhash; final private boolean global; final private int partitions; @@ -61,16 +61,16 @@ public class RemoteSearch extends Thread { final private String language; final private Bitfield constraint; final private SeedDB peers; - + public RemoteSearch( final String wordhashes, final String excludehashes, - final String urlhashes, + final String urlhashes, // this is the field that is filled during a secondary search to restrict to specific urls that are to be retrieved final Pattern prefer, final Pattern filter, final Pattern snippet, final String language, final String sitehash, final String authorhash, - final int count, final long time, final int maxDistance, + final int count, final long time, final int maxDistance, final boolean global, final int partitions, final Seed targetPeer, final Segment indexSegment, @@ -112,30 +112,30 @@ public class RemoteSearch extends Thread { public void run() { try { this.urls = Protocol.search( - peers.mySeed(), - wordhashes, excludehashes, urlhashes, - prefer, filter, snippet, - language, sitehash, authorhash, - count, time, maxDistance, global, partitions, - targetPeer, indexSegment, containerCache, secondarySearchSuperviser, - blacklist, rankingProfile, constraint); - if (urls >= 0) { + this.peers.mySeed(), + this.wordhashes, this.excludehashes, this.urlhashes, + this.prefer, this.filter, this.snippet, + this.language, this.sitehash, this.authorhash, + this.count, this.time, this.maxDistance, this.global, this.partitions, + this.targetPeer, this.indexSegment, this.containerCache, this.secondarySearchSuperviser, + this.blacklist, this.rankingProfile, this.constraint); + if (this.urls >= 0) { // urls is an array of url hashes. this is only used for log output - if (urlhashes != null && urlhashes.length() > 0) Network.log.logInfo("SECONDARY REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + this.urls + " links for word hash " + wordhashes); - peers.mySeed().incRI(urls); - peers.mySeed().incRU(urls); + if (this.urlhashes != null && this.urlhashes.length() > 0) Network.log.logInfo("SECONDARY REMOTE SEARCH - remote peer " + this.targetPeer.hash + ":" + this.targetPeer.getName() + " contributed " + this.urls + " links for word hash " + this.wordhashes); + this.peers.mySeed().incRI(this.urls); + this.peers.mySeed().incRU(this.urls); } else { - Network.log.logInfo("REMOTE SEARCH - no answer from remote peer " + targetPeer.hash + ":" + targetPeer.getName()); + Network.log.logInfo("REMOTE SEARCH - no answer from remote peer " + this.targetPeer.hash + ":" + this.targetPeer.getName()); } } catch (final Exception e) { Log.logException(e); } finally { - containerCache.oneFeederTerminated(); + this.containerCache.oneFeederTerminated(); } } - + public static String set2string(final HandleSet hashes) { - StringBuilder wh = new StringBuilder(hashes.size() * 12); + final StringBuilder wh = new StringBuilder(hashes.size() * 12); final Iterator iter = hashes.iterator(); while (iter.hasNext()) { wh.append(ASCII.String(iter.next())); } return wh.toString(); @@ -144,22 +144,22 @@ public class RemoteSearch extends Thread { public int links() { return this.urls; } - + public int count() { return this.count; } - + public Seed target() { - return targetPeer; + return this.targetPeer; } - + public static RemoteSearch[] primaryRemoteSearches( final String wordhashes, final String excludehashes, final Pattern prefer, final Pattern filter, final Pattern snippet, final String language, final String sitehash, final String authorhash, - final int count, long time, final int maxDist, + final int count, final long time, final int maxDist, final Segment indexSegment, final SeedDB peers, final RWIProcess containerCache, @@ -186,7 +186,7 @@ public class RemoteSearch extends Thread { burstMultiwordPercent) : PeerSelection.selectClusterPeers(peers, clusterselection); if (targetPeers == null) return new RemoteSearch[0]; - int targets = targetPeers.length; + final int targets = targetPeers.length; if (targets == 0) return new RemoteSearch[0]; final RemoteSearch[] searchThreads = new RemoteSearch[targets]; for (int i = 0; i < targets; i++) { @@ -198,14 +198,14 @@ public class RemoteSearch extends Thread { count, time, maxDist, true, targets, targetPeers[i], indexSegment, peers, containerCache, secondarySearchSuperviser, blacklist, rankingProfile, constraint); searchThreads[i].start(); - } catch (OutOfMemoryError e) { + } catch (final OutOfMemoryError e) { Log.logException(e); break; } } return searchThreads; } - + public static RemoteSearch secondaryRemoteSearch( final String wordhashes, final String urlhashes, final long time, @@ -216,12 +216,12 @@ public class RemoteSearch extends Thread { final RankingProfile rankingProfile, final Bitfield constraint, final SortedMap clusterselection) { assert wordhashes.length() >= 12 : "wordhashes = " + wordhashes; - + // check own peer status if (peers.mySeed() == null || peers.mySeed().getPublicAddress() == null) { return null; } assert urlhashes != null; assert urlhashes.length() > 0; - + // prepare seed targets and threads final Seed targetPeer = peers.getConnected(targethash); if (targetPeer == null || targetPeer.hash == null) return null; @@ -232,7 +232,7 @@ public class RemoteSearch extends Thread { searchThread.start(); return searchThread; } - + public static int remainingWaiting(final RemoteSearch[] searchThreads) { if (searchThreads == null) return 0; int alive = 0; @@ -241,7 +241,7 @@ public class RemoteSearch extends Thread { } return alive; } - + public static int collectedLinks(final RemoteSearch[] searchThreads) { int links = 0; for (final RemoteSearch searchThread : searchThreads) { @@ -251,11 +251,11 @@ public class RemoteSearch extends Thread { } return links; } - + public static void interruptAlive(final RemoteSearch[] searchThreads) { for (final RemoteSearch searchThread : searchThreads) { if (searchThread.isAlive()) searchThread.interrupt(); } } - + } diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index c087f915d..8780a764b 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -383,11 +383,11 @@ public final class SearchEvent { return this.resultFetcher.oneResult(item, timeout); } - boolean secondarySearchStartet = false; + //boolean secondarySearchStartet = false; public static class HeuristicResult /*implements Comparable*/ { - public final byte[] urlhash; public final String heuristicName; public final boolean redundant; - public HeuristicResult(final byte[] urlhash, final String heuristicName, final boolean redundant) { + private final byte[] urlhash; public final String heuristicName; public final boolean redundant; + private HeuristicResult(final byte[] urlhash, final String heuristicName, final boolean redundant) { this.urlhash = urlhash; this.heuristicName = heuristicName; this.redundant = redundant; }/* public int compareTo(HeuristicResult o) { @@ -405,9 +405,9 @@ public final class SearchEvent { // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation // this relation contains the information where specific urls can be found in specific peers - SortedMap> abstractsCache; - SortedSet checkedPeers; - Semaphore trigger; + private final SortedMap> abstractsCache; + private final SortedSet checkedPeers; + private final Semaphore trigger; public SecondarySearchSuperviser() { this.abstractsCache = new TreeMap>(); @@ -491,7 +491,7 @@ public final class SearchEvent { } } catch (final InterruptedException e) { // the thread was interrupted - // do nohing + // do nothing } // the time-out was reached } @@ -500,12 +500,13 @@ public final class SearchEvent { if (this.abstractsCache == null || this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size()) return; // secondary search not possible (yet) // catch up index abstracts and join them; then call peers again to submit their urls + /* - System.out.println("DEBUG-INDEXABSTRACT: " + abstractsCache.size() + " word references caught, " + query.queryHashes.size() + " needed"); - for (Map.Entry> entry: abstractsCache.entrySet()) { - System.out.println("DEBUG-INDEXABSTRACT: hash " + entry.getKey() + ": " + ((query.queryHashes.has(entry.getKey().getBytes()) ? "NEEDED" : "NOT NEEDED") + "; " + entry.getValue().size() + " entries")); + System.out.println("DEBUG-INDEXABSTRACT: " + this.abstractsCache.size() + " word references caught, " + SearchEvent.this.query.queryHashes.size() + " needed"); + for (final Map.Entry> entry: this.abstractsCache.entrySet()) { + System.out.println("DEBUG-INDEXABSTRACT: hash " + entry.getKey() + ": " + ((SearchEvent.this.query.queryHashes.has(entry.getKey().getBytes()) ? "NEEDED" : "NOT NEEDED") + "; " + entry.getValue().size() + " entries")); } - */ + */ // find out if there are enough references for all words that are searched if (this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size()) return;