fixed secondary remote search (the process that finds distributed join situations)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8098 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 13 years ago
parent 64fd20b857
commit c9216d5adf

@ -38,7 +38,6 @@ import java.util.regex.Pattern;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
@ -333,7 +332,7 @@ public final class search {
final Iterator<Map.Entry<byte[], Integer>> i = theSearch.abstractsCount();
while (i.hasNext()) {
entry = i.next();
indexcount.append("indexcount.").append(UTF8.String(entry.getKey())).append('=').append((entry.getValue()).toString()).append(serverCore.CRLF_STRING);
indexcount.append("indexcount.").append(ASCII.String(entry.getKey())).append('=').append((entry.getValue()).toString()).append(serverCore.CRLF_STRING);
}
if (abstractSet != null) {
// if a specific index-abstract is demanded, attach it here

@ -403,7 +403,7 @@ public final class ByteBuffer extends OutputStream {
@Override
public String toString() {
return UTF8.String(this.buffer, this.offset, this.length);
return UTF8.String(this.buffer, this.offset, this.length);
}
public String toString(final int left, final int length) {
@ -415,7 +415,7 @@ public final class ByteBuffer extends OutputStream {
final StringBuilder sb = new StringBuilder(sblength);
int i = 0;
sb.setLength(length);
for (int j = left; j < left + length; j++) sb.setCharAt(i++, (char) this.buffer[j]);
for (int j = left; j < left + length; j++) sb.setCharAt(i++, (char) this.buffer[this.offset + j]);
return sb;
}
@ -506,18 +506,6 @@ public final class ByteBuffer extends OutputStream {
this.offset = 0;
}
public void reset(final int newSize) {
resize(newSize);
this.reset();
}
public void resize(final int newSize) {
if(newSize < 0) throw new IllegalArgumentException("Illegal array size: " + newSize);
final byte[] v = new byte[newSize];
System.arraycopy(this.buffer,0,v,0,newSize > this.buffer.length ? this.buffer.length : newSize);
this.buffer = v;
}
public void writeTo(final OutputStream dest) throws IOException {
dest.write(this.buffer, this.offset, this.length);
dest.flush();

@ -482,6 +482,9 @@ public final class Protocol {
// computation time
final long totalrequesttime = System.currentTimeMillis() - timestamp;
final boolean thisIsASecondarySearch = urlhashes.length() > 0;
assert !thisIsASecondarySearch || secondarySearchSuperviser == null;
// create containers
final int words = wordhashes.length() / Word.commonHashLength;
assert words > 0 : "wordhashes = " + wordhashes;
@ -568,8 +571,6 @@ public final class Protocol {
} catch (final Exception e) {
Log.logException(e);
}
final boolean thisIsASecondarySearch = urlhashes.length() > 0;
assert !thisIsASecondarySearch || secondarySearchSuperviser == null;
Network.log.logInfo("remote search: peer " + target.getName() + " sent " + container[0].size() + "/" + result.joincount + " references for " + (thisIsASecondarySearch ? "a secondary search" : "joined word queries"));
@ -651,7 +652,7 @@ public final class Protocol {
final boolean global,
final int partitions,
final String hostname,
String hostaddress,
final String hostaddress,
final SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser,
final RankingProfile rankingProfile,
final Bitfield constraint) throws IOException {

@ -1,4 +1,4 @@
// yacySearch.java
// yacySearch.java
// -------------------------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
@ -44,7 +44,7 @@ import net.yacy.search.ranking.RankingProfile;
public class RemoteSearch extends Thread {
private static final ThreadGroup ysThreadGroup = new ThreadGroup("yacySearchThreadGroup");
final private String wordhashes, excludehashes, urlhashes, sitehash, authorhash;
final private boolean global;
final private int partitions;
@ -61,16 +61,16 @@ public class RemoteSearch extends Thread {
final private String language;
final private Bitfield constraint;
final private SeedDB peers;
public RemoteSearch(
final String wordhashes, final String excludehashes,
final String urlhashes,
final String urlhashes, // this is the field that is filled during a secondary search to restrict to specific urls that are to be retrieved
final Pattern prefer,
final Pattern filter,
final Pattern snippet,
final String language,
final String sitehash, final String authorhash,
final int count, final long time, final int maxDistance,
final int count, final long time, final int maxDistance,
final boolean global, final int partitions,
final Seed targetPeer,
final Segment indexSegment,
@ -112,30 +112,30 @@ public class RemoteSearch extends Thread {
public void run() {
try {
this.urls = Protocol.search(
peers.mySeed(),
wordhashes, excludehashes, urlhashes,
prefer, filter, snippet,
language, sitehash, authorhash,
count, time, maxDistance, global, partitions,
targetPeer, indexSegment, containerCache, secondarySearchSuperviser,
blacklist, rankingProfile, constraint);
if (urls >= 0) {
this.peers.mySeed(),
this.wordhashes, this.excludehashes, this.urlhashes,
this.prefer, this.filter, this.snippet,
this.language, this.sitehash, this.authorhash,
this.count, this.time, this.maxDistance, this.global, this.partitions,
this.targetPeer, this.indexSegment, this.containerCache, this.secondarySearchSuperviser,
this.blacklist, this.rankingProfile, this.constraint);
if (this.urls >= 0) {
// urls is an array of url hashes. this is only used for log output
if (urlhashes != null && urlhashes.length() > 0) Network.log.logInfo("SECONDARY REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + this.urls + " links for word hash " + wordhashes);
peers.mySeed().incRI(urls);
peers.mySeed().incRU(urls);
if (this.urlhashes != null && this.urlhashes.length() > 0) Network.log.logInfo("SECONDARY REMOTE SEARCH - remote peer " + this.targetPeer.hash + ":" + this.targetPeer.getName() + " contributed " + this.urls + " links for word hash " + this.wordhashes);
this.peers.mySeed().incRI(this.urls);
this.peers.mySeed().incRU(this.urls);
} else {
Network.log.logInfo("REMOTE SEARCH - no answer from remote peer " + targetPeer.hash + ":" + targetPeer.getName());
Network.log.logInfo("REMOTE SEARCH - no answer from remote peer " + this.targetPeer.hash + ":" + this.targetPeer.getName());
}
} catch (final Exception e) {
Log.logException(e);
} finally {
containerCache.oneFeederTerminated();
this.containerCache.oneFeederTerminated();
}
}
public static String set2string(final HandleSet hashes) {
StringBuilder wh = new StringBuilder(hashes.size() * 12);
final StringBuilder wh = new StringBuilder(hashes.size() * 12);
final Iterator<byte[]> iter = hashes.iterator();
while (iter.hasNext()) { wh.append(ASCII.String(iter.next())); }
return wh.toString();
@ -144,22 +144,22 @@ public class RemoteSearch extends Thread {
public int links() {
return this.urls;
}
public int count() {
return this.count;
}
public Seed target() {
return targetPeer;
return this.targetPeer;
}
public static RemoteSearch[] primaryRemoteSearches(
final String wordhashes, final String excludehashes,
final Pattern prefer, final Pattern filter, final Pattern snippet,
final String language,
final String sitehash,
final String authorhash,
final int count, long time, final int maxDist,
final int count, final long time, final int maxDist,
final Segment indexSegment,
final SeedDB peers,
final RWIProcess containerCache,
@ -186,7 +186,7 @@ public class RemoteSearch extends Thread {
burstMultiwordPercent)
: PeerSelection.selectClusterPeers(peers, clusterselection);
if (targetPeers == null) return new RemoteSearch[0];
int targets = targetPeers.length;
final int targets = targetPeers.length;
if (targets == 0) return new RemoteSearch[0];
final RemoteSearch[] searchThreads = new RemoteSearch[targets];
for (int i = 0; i < targets; i++) {
@ -198,14 +198,14 @@ public class RemoteSearch extends Thread {
count, time, maxDist, true, targets, targetPeers[i],
indexSegment, peers, containerCache, secondarySearchSuperviser, blacklist, rankingProfile, constraint);
searchThreads[i].start();
} catch (OutOfMemoryError e) {
} catch (final OutOfMemoryError e) {
Log.logException(e);
break;
}
}
return searchThreads;
}
public static RemoteSearch secondaryRemoteSearch(
final String wordhashes, final String urlhashes,
final long time,
@ -216,12 +216,12 @@ public class RemoteSearch extends Thread {
final RankingProfile rankingProfile,
final Bitfield constraint, final SortedMap<byte[], String> clusterselection) {
assert wordhashes.length() >= 12 : "wordhashes = " + wordhashes;
// check own peer status
if (peers.mySeed() == null || peers.mySeed().getPublicAddress() == null) { return null; }
assert urlhashes != null;
assert urlhashes.length() > 0;
// prepare seed targets and threads
final Seed targetPeer = peers.getConnected(targethash);
if (targetPeer == null || targetPeer.hash == null) return null;
@ -232,7 +232,7 @@ public class RemoteSearch extends Thread {
searchThread.start();
return searchThread;
}
public static int remainingWaiting(final RemoteSearch[] searchThreads) {
if (searchThreads == null) return 0;
int alive = 0;
@ -241,7 +241,7 @@ public class RemoteSearch extends Thread {
}
return alive;
}
public static int collectedLinks(final RemoteSearch[] searchThreads) {
int links = 0;
for (final RemoteSearch searchThread : searchThreads) {
@ -251,11 +251,11 @@ public class RemoteSearch extends Thread {
}
return links;
}
public static void interruptAlive(final RemoteSearch[] searchThreads) {
for (final RemoteSearch searchThread : searchThreads) {
if (searchThread.isAlive()) searchThread.interrupt();
}
}
}

@ -383,11 +383,11 @@ public final class SearchEvent {
return this.resultFetcher.oneResult(item, timeout);
}
boolean secondarySearchStartet = false;
//boolean secondarySearchStartet = false;
public static class HeuristicResult /*implements Comparable<HeuristicResult>*/ {
public final byte[] urlhash; public final String heuristicName; public final boolean redundant;
public HeuristicResult(final byte[] urlhash, final String heuristicName, final boolean redundant) {
private final byte[] urlhash; public final String heuristicName; public final boolean redundant;
private HeuristicResult(final byte[] urlhash, final String heuristicName, final boolean redundant) {
this.urlhash = urlhash; this.heuristicName = heuristicName; this.redundant = redundant;
}/*
public int compareTo(HeuristicResult o) {
@ -405,9 +405,9 @@ public final class SearchEvent {
// cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
// this relation contains the information where specific urls can be found in specific peers
SortedMap<String, SortedMap<String, StringBuilder>> abstractsCache;
SortedSet<String> checkedPeers;
Semaphore trigger;
private final SortedMap<String, SortedMap<String, StringBuilder>> abstractsCache;
private final SortedSet<String> checkedPeers;
private final Semaphore trigger;
public SecondarySearchSuperviser() {
this.abstractsCache = new TreeMap<String, SortedMap<String, StringBuilder>>();
@ -491,7 +491,7 @@ public final class SearchEvent {
}
} catch (final InterruptedException e) {
// the thread was interrupted
// do nohing
// do nothing
}
// the time-out was reached
}
@ -500,12 +500,13 @@ public final class SearchEvent {
if (this.abstractsCache == null || this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size()) return; // secondary search not possible (yet)
// catch up index abstracts and join them; then call peers again to submit their urls
/*
System.out.println("DEBUG-INDEXABSTRACT: " + abstractsCache.size() + " word references caught, " + query.queryHashes.size() + " needed");
for (Map.Entry<String, TreeMap<String, String>> entry: abstractsCache.entrySet()) {
System.out.println("DEBUG-INDEXABSTRACT: hash " + entry.getKey() + ": " + ((query.queryHashes.has(entry.getKey().getBytes()) ? "NEEDED" : "NOT NEEDED") + "; " + entry.getValue().size() + " entries"));
System.out.println("DEBUG-INDEXABSTRACT: " + this.abstractsCache.size() + " word references caught, " + SearchEvent.this.query.queryHashes.size() + " needed");
for (final Map.Entry<String, SortedMap<String, StringBuilder>> entry: this.abstractsCache.entrySet()) {
System.out.println("DEBUG-INDEXABSTRACT: hash " + entry.getKey() + ": " + ((SearchEvent.this.query.queryHashes.has(entry.getKey().getBytes()) ? "NEEDED" : "NOT NEEDED") + "; " + entry.getValue().size() + " entries"));
}
*/
*/
// find out if there are enough references for all words that are searched
if (this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size()) return;

Loading…
Cancel
Save