Enhancements to DHT

- speed up deletion of containers when selscted from whole index
- correctly eliminate all references to unavailable URLs, not just the first encountered



git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6381 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
hermens 16 years ago
parent e49e2d75fe
commit 9324b5b6c5

@ -191,20 +191,34 @@ public class Dispatcher {
containers.add(container);
}
// then remove the container from the backend
HashSet<String> urlHashes = new HashSet<String>();
Iterator<WordReference> it;
for (ReferenceContainer<WordReference> c: containers) {
urlHashes.clear();
it = c.entries();
while (it.hasNext()) {
urlHashes.add(it.next().metadataHash());
final ArrayList<ReferenceContainer<WordReference>> rc;
if (ram) {
// selection was only from ram, so we have to carefully remove only the selected entries
HashSet<String> urlHashes = new HashSet<String>();
Iterator<WordReference> it;
for (ReferenceContainer<WordReference> c: containers) {
urlHashes.clear();
it = c.entries();
while (it.hasNext()) {
urlHashes.add(it.next().metadataHash());
}
if (this.log.isFine()) this.log.logFine("selected " + urlHashes.size() + " urls for word '" + c.getTermHashAsString() + "'");
if (urlHashes.size() > 0) this.backend.remove(c.getTermHash(), urlHashes);
}
rc = containers;
} else {
// selection was from whole index, so we can just delete the whole container
// but to avoid race conditions return the results from the deletes
rc = new ArrayList<ReferenceContainer<WordReference>>(containers.size());
for (ReferenceContainer<WordReference> c: containers) {
container = this.backend.delete(c.getTermHash());
if (this.log.isFine()) this.log.logFine("selected " + container.size() + " urls for word '" + c.getTermHashAsString() + "'");
if (container.size() != 0) rc.add(container);
}
if (this.log.isFine()) this.log.logFine("selected " + urlHashes.size() + " urls for word '" + c.getTermHashAsString() + "'");
if (urlHashes.size() > 0) this.backend.remove(c.getTermHash(), urlHashes);
}
// finished. The caller must take care of the containers and must put them back if not needed
return containers;
return rc;
}
/**

@ -126,7 +126,11 @@ public class Transmission {
ArrayList<String> notFound = new ArrayList<String>();
while (i.hasNext()) {
WordReference e = i.next();
if (references.containsKey(e.metadataHash()) || badReferences.contains(e.metadataHash())) continue;
if (references.containsKey(e.metadataHash())) continue;
if (badReferences.contains(e.metadataHash())) {
notFound.add(e.metadataHash());
continue;
}
URLMetadataRow r = repository.load(e.metadataHash(), null, 0);
if (r == null) {
notFound.add(e.metadataHash());

Loading…
Cancel
Save