not hold a expensive cache of references for DHT-out,but but load them

on demand
see: http://forum.yacy-websuche.de/viewtopic.php?f=8&t=4530
pull/1/head
sixcooler 13 years ago
parent ef937af35d
commit 57ddd63888

@ -32,13 +32,11 @@ import java.io.PrintWriter;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.TreeMap;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
@ -269,19 +267,20 @@ public class IndexControlRWIs_p {
index = segment.termIndex().get(keyhash, null);
// built urlCache
final Iterator<WordReference> urlIter = index.entries();
final TreeMap<byte[], URIMetadataNode> knownURLs =
new TreeMap<byte[], URIMetadataNode>(Base64Order.enhancedCoder);
final HandleSet knownURLs =
new RowHandleSet(
WordReferenceRow.urlEntryRow.primaryKeyLength,
WordReferenceRow.urlEntryRow.objectOrder,
index.size());
final HandleSet unknownURLEntries =
new RowHandleSet(
WordReferenceRow.urlEntryRow.primaryKeyLength,
WordReferenceRow.urlEntryRow.objectOrder,
index.size());
Reference iEntry;
URIMetadataNode lurl;
while (urlIter.hasNext()) {
iEntry = urlIter.next();
lurl = segment.fulltext().getMetadata(iEntry.urlhash());
if (lurl == null) {
if (!segment.fulltext().exists(iEntry.urlhash())) {
try {
unknownURLEntries.put(iEntry.urlhash());
} catch (final SpaceExceededException e) {
@ -289,7 +288,11 @@ public class IndexControlRWIs_p {
}
urlIter.remove();
} else {
knownURLs.put(iEntry.urlhash(), lurl);
try {
knownURLs.put(iEntry.urlhash());
} catch (final SpaceExceededException e) {
Log.logException(e);
}
}
}
@ -308,7 +311,7 @@ public class IndexControlRWIs_p {
// transport to other peer
final boolean gzipBody = sb.getConfigBool("indexControl.gzipBody", false);
final int timeout = (int) sb.getConfigLong("indexControl.timeout", 60000);
final String error = Protocol.transferIndex(seed, icc, knownURLs, gzipBody, timeout);
final String error = Protocol.transferIndex(seed, icc, knownURLs, segment, gzipBody, timeout);
prop.put("result", (error == null) ? ("Successfully transferred "
+ knownURLs.size()
+ " words in "

@ -54,7 +54,6 @@ import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import net.yacy.migration;
@ -78,6 +77,7 @@ import net.yacy.cora.order.Digest;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.data.ResultURLs;
import net.yacy.crawler.data.ResultURLs.EventOrigin;
@ -1266,7 +1266,8 @@ public final class Protocol
public static String transferIndex(
final Seed targetSeed,
final ReferenceContainerCache<WordReference> indexes,
final SortedMap<byte[], URIMetadataNode> urlCache,
final HandleSet urlRefs,
final Segment segment,
final boolean gzipBody,
final int timeout) {
@ -1277,7 +1278,7 @@ public final class Protocol
eenum = ic.entries();
while ( eenum.hasNext() ) {
entry = eenum.next();
if ( urlCache.get(entry.urlhash()) == null ) {
if ( !urlRefs.has(entry.urlhash()) ) {
if ( Network.log.isFine() ) {
Network.log.logFine("DEBUG transferIndex: to-send url hash '"
+ ASCII.String(entry.urlhash())
@ -1328,8 +1329,10 @@ public final class Protocol
// extract the urlCache from the result
final URIMetadataNode[] urls = new URIMetadataNode[uhs.length];
byte[] key;
for ( int i = 0; i < uhs.length; i++ ) {
urls[i] = urlCache.get(ASCII.getBytes(uhs[i]));
key = ASCII.getBytes(uhs[i]);
if ( urlRefs.has(key) ) urls[i] = segment.fulltext().getMetadata(key);
if ( urls[i] == null ) {
if ( Network.log.isFine() ) {
Network.log.logFine("DEBUG transferIndex: requested url hash '"

@ -28,14 +28,10 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.SortedMap;
import java.util.TreeMap;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceRow;
@ -88,7 +84,7 @@ public class Transmission {
*/
private final byte[] primaryTarget;
private final ReferenceContainerCache<WordReference> containers;
private final SortedMap<byte[], URIMetadataNode> references;
private final HandleSet references;
private final HandleSet badReferences;
private final List<Seed> targets;
private int hit, miss;
@ -104,7 +100,7 @@ public class Transmission {
super();
this.primaryTarget = primaryTarget;
this.containers = new ReferenceContainerCache<WordReference>(Segment.wordReferenceFactory, Segment.wordOrder, Word.commonHashLength);
this.references = new TreeMap<byte[], URIMetadataNode>(Base64Order.enhancedCoder);
this.references = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.badReferences = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.targets = targets;
this.hit = 0;
@ -168,17 +164,16 @@ public class Transmission {
final List<byte[]> notFoundx = new ArrayList<byte[]>();
while (i.hasNext()) {
final WordReference e = i.next();
if (this.references.containsKey(e.urlhash())) continue;
if (this.references.has(e.urlhash())) continue;
if (this.badReferences.has(e.urlhash())) {
notFoundx.add(e.urlhash());
continue;
}
final URIMetadataNode r = Transmission.this.segment.fulltext().getMetadata(e.urlhash());
if (r == null) {
if (!Transmission.this.segment.fulltext().exists(e.urlhash())) {
notFoundx.add(e.urlhash());
this.badReferences.put(e.urlhash());
} else {
this.references.put(e.urlhash(), r);
this.references.put(e.urlhash());
}
}
// now delete all references that were not found
@ -243,7 +238,7 @@ public class Transmission {
}
Transmission.this.log.logInfo("starting new index transmission request to " + ASCII.String(this.primaryTarget));
final long start = System.currentTimeMillis();
final String error = Protocol.transferIndex(target, this.containers, this.references, Transmission.this.gzipBody4Transfer, Transmission.this.timeout4Transfer);
final String error = Protocol.transferIndex(target, this.containers, this.references, Transmission.this.segment, Transmission.this.gzipBody4Transfer, Transmission.this.timeout4Transfer);
if (error == null) {
// words successfully transfered
final long transferTime = System.currentTimeMillis() - start;

Loading…
Cancel
Save