- reduced memory usage in index transmission using a transformation of

Node to Row objects
- removed peerDeparture in solr remote search in case that peer does not
answer (this may be normal because it is allowed to switch this off)
pull/1/head
Michael Peter Christen 13 years ago
parent af764c106c
commit 4716546ef5

@ -45,6 +45,7 @@ import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadata;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
@ -309,8 +310,8 @@ public class IndexControlRWIs_p {
index = segment.termIndex().get(keyhash, null);
// built urlCache
final Iterator<WordReference> urlIter = index.entries();
final TreeMap<byte[], URIMetadata> knownURLs =
new TreeMap<byte[], URIMetadata>(Base64Order.enhancedCoder);
final TreeMap<byte[], URIMetadataRow> knownURLs =
new TreeMap<byte[], URIMetadataRow>(Base64Order.enhancedCoder);
final HandleSet unknownURLEntries =
new RowHandleSet(
WordReferenceRow.urlEntryRow.primaryKeyLength,
@ -329,7 +330,11 @@ public class IndexControlRWIs_p {
}
urlIter.remove();
} else {
knownURLs.put(iEntry.urlhash(), lurl);
if (lurl instanceof URIMetadataRow) {
knownURLs.put(iEntry.urlhash(), (URIMetadataRow) lurl);
} else if (lurl instanceof URIMetadataNode) {
knownURLs.put(iEntry.urlhash(), ((URIMetadataNode) lurl).toRow());
}
}
}

@ -99,6 +99,10 @@ public class URIMetadataNode implements URIMetadata {
this.ranking = ranking;
}
public URIMetadataRow toRow() {
return URIMetadataRow.importEntry(this.toString());
}
public SolrDocument getDocument() {
return this.doc;
}

@ -954,7 +954,7 @@ public final class Protocol
//resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
}
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), 20000);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), 8000);
byte[] a = httpClient.POSTbytes(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), hostname, parts, false);
if (a != null && a.length > 200000) {
// there is something wrong. This is too large, maybe a hack on the other side?
@ -1051,7 +1051,6 @@ public final class Protocol
// no need to close this here because that sends a commit to remote solr which is not wanted here
} catch (IOException e) {
Network.log.logInfo("SEARCH failed (solr), Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + ")");
event.peers.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
return -1;
}
}
@ -1282,7 +1281,7 @@ public final class Protocol
public static String transferIndex(
final Seed targetSeed,
final ReferenceContainerCache<WordReference> indexes,
final SortedMap<byte[], URIMetadata> urlCache,
final SortedMap<byte[], URIMetadataRow> urlCache,
final boolean gzipBody,
final int timeout) {

@ -35,6 +35,8 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.data.meta.URIMetadata;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceRow;
@ -91,7 +93,7 @@ public class Transmission {
*/
private final byte[] primaryTarget;
private final ReferenceContainerCache<WordReference> containers;
private final SortedMap<byte[], URIMetadata> references;
private final SortedMap<byte[], URIMetadataRow> references;
private final HandleSet badReferences;
private final List<Seed> targets;
private int hit, miss;
@ -107,7 +109,7 @@ public class Transmission {
super();
this.primaryTarget = primaryTarget;
this.containers = new ReferenceContainerCache<WordReference>(Segment.wordReferenceFactory, Segment.wordOrder, Word.commonHashLength);
this.references = new TreeMap<byte[], URIMetadata>(Base64Order.enhancedCoder);
this.references = new TreeMap<byte[], URIMetadataRow>(Base64Order.enhancedCoder);
this.badReferences = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.targets = targets;
this.hit = 0;
@ -181,7 +183,11 @@ public class Transmission {
notFoundx.add(e.urlhash());
this.badReferences.put(e.urlhash());
} else {
this.references.put(e.urlhash(), r);
if (r instanceof URIMetadataRow) {
this.references.put(e.urlhash(), (URIMetadataRow) r);
} else if (r instanceof URIMetadataNode) {
this.references.put(e.urlhash(), ((URIMetadataNode) r).toRow());
}
}
}
// now delete all references that were not found

@ -303,11 +303,10 @@ public final class Fulltext implements Iterable<byte[]> {
Log.logException(e);
oldEntry = null;
}
URIMetadata entry = new URIMetadataNode(ClientUtils.toSolrDocument(doc));
URIMetadataNode entry = new URIMetadataNode(ClientUtils.toSolrDocument(doc));
if (oldEntry == null || oldEntry.isOlder(entry)) {
try {
URIMetadataRow row = URIMetadataRow.importEntry(entry.toString());
this.urlIndexFile.put(row.toRowEntry());
this.urlIndexFile.put(entry.toRow().toRowEntry());
} catch (final SpaceExceededException e) {
throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage());
}
@ -356,7 +355,7 @@ public final class Fulltext implements Iterable<byte[]> {
this.statsDump = null;
if (MemoryControl.shortStatus()) clearCache();
}
public boolean remove(final byte[] urlHash) {
if (urlHash == null) return false;
try {
@ -446,7 +445,7 @@ public final class Fulltext implements Iterable<byte[]> {
}
};
}
// export methods
public Export export(final File f, final String filter, final HandleSet set, final int format, final boolean dom) {
if ((this.exportthread != null) && (this.exportthread.isAlive())) {
@ -723,7 +722,7 @@ public final class Fulltext implements Iterable<byte[]> {
assert hosthash.length() == 6;
// delete in solr
this.solr.deleteByQuery(YaCySchema.host_id_s.name() + ":\"" + hosthash + "\"");
// delete in old metadata structure
final ArrayList<String> l = new ArrayList<String>();
synchronized (this) {

Loading…
Cancel
Save