added content iterator for corrupted database files

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1406 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent ecdc1f7547
commit d6581c445b

@ -71,6 +71,7 @@ package de.anomic.kelondro;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import java.util.Random; import java.util.Random;
import java.util.StringTokenizer; import java.util.StringTokenizer;
@ -537,22 +538,23 @@ public class kelondroRecords {
this.tailChanged = true; this.tailChanged = true;
} }
/* private Node(Handle handle) throws IOException {
private Node(Handle handle) throws IOException { // this creates an entry with an pre-reserved entry position
// this creates an entry with an pre-reserved entry position // values can be written using the setValues() method
// values can be written using the setValues() method // but we expect that values are already there in the file ready to
// but we expect that values are already there in the file ready to be read which we do not here // be read which we do not here
if (handle == null) throw new IllegalArgumentException("INTERNAL ERROR: node handle is null."); if (handle == null)
if (handle.index >= USEDC + FREEC) throw new kelondroException(filename, "INTERNAL ERROR: node handle index exceeds size."); throw new IllegalArgumentException("INTERNAL ERROR: node handle is null.");
if (handle.index >= USAGE.allCount())
throw new kelondroException(filename, "INTERNAL ERROR: node handle index exceeds size.");
// use given handle // use given handle
this.handle = new Handle(handle.index); this.handle = new Handle(handle.index);
// init the content // init the content
initContent(); initContent();
} }
*/
private Node(Handle handle, Node parentNode, int referenceInParent) throws IOException { private Node(Handle handle, Node parentNode, int referenceInParent) throws IOException {
// this creates an entry with an pre-reserved entry position values can be written // this creates an entry with an pre-reserved entry position values can be written
// using the setValues() method but we expect that values are already there in the file // using the setValues() method but we expect that values are already there in the file
@ -561,8 +563,7 @@ public class kelondroRecords {
assert (handle.index >= 0): "node handle too low: " + handle.index; assert (handle.index >= 0): "node handle too low: " + handle.index;
//assert (handle.index < USAGE.allCount()) : "node handle too high: " + handle.index + ", USEDC=" + USAGE.USEDC + ", FREEC=" + USAGE.FREEC; //assert (handle.index < USAGE.allCount()) : "node handle too high: " + handle.index + ", USEDC=" + USAGE.USEDC + ", FREEC=" + USAGE.FREEC;
// the parentNode can be given if an auto-fix in the following case // the parentNode can be given if an auto-fix in the following case is wanted
// is wanted
if (handle.index >= USAGE.allCount()) { if (handle.index >= USAGE.allCount()) {
if (parentNode == null) { if (parentNode == null) {
throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index exceeds size. No auto-fix node was submitted. This is a serious failure."); throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index exceeds size. No auto-fix node was submitted. This is a serious failure.");
@ -1036,6 +1037,57 @@ public class kelondroRecords {
} }
} }
public Iterator content() {
try {
return new contentIterator();
} catch (IOException e) {
return new HashSet().iterator();
}
}
public class contentIterator implements Iterator {
// iterator that iterates all byte[][]-objects in the file
// all records that are marked as deleted are ommitted
// this is probably also the fastest way to iterate all objects
private HashSet markedDeleted;
private Handle pos;
public contentIterator() throws IOException {
pos = new Handle(0);
markedDeleted = new HashSet();
synchronized (USAGE) {
if (USAGE.FREEC != 0) {
Handle h = USAGE.FREEH;
while (h.index != NUL) {
markedDeleted.add(h);
h = new Handle(entryFile.readInt(seekpos(h)));
}
}
}
while ((markedDeleted.contains(pos)) && (pos.index < USAGE.allCount())) pos.index++;
}
public boolean hasNext() {
return pos.index < USAGE.allCount();
}
public Object next() {
try {
Node n = new Node(pos);
while ((markedDeleted.contains(pos)) && (pos.index < USAGE.allCount())) pos.index++;
return n.getValues();
} catch (IOException e) {
throw new kelondroException(filename, e.getMessage());
}
}
public void remove() {
throw new UnsupportedOperationException();
}
}
public void close() throws IOException { public void close() throws IOException {
if (this.entryFile != null) this.entryFile.close(); if (this.entryFile != null) this.entryFile.close();
this.entryFile = null; this.entryFile = null;

@ -432,34 +432,38 @@ public class plasmaURL {
} }
public int size() { public int size() {
return urlHashCache.size(); return urlHashCache.size();
} }
public void close() throws IOException { public void close() throws IOException {
if (urlHashCache != null) urlHashCache.close(); if (urlHashCache != null) urlHashCache.close();
} }
public boolean exists(String urlHash) { public boolean exists(String urlHash) {
if (existsIndex.contains(urlHash)) return true; synchronized (existsIndex) {
try { if (existsIndex.contains(urlHash)) return true;
if (urlHashCache.get(urlHash.getBytes()) != null) { try {
existsIndex.add(urlHash); if (urlHashCache.get(urlHash.getBytes()) != null) {
return true; existsIndex.add(urlHash);
} else { return true;
} else {
return false;
}
} catch (IOException e) {
return false; return false;
} }
} catch (IOException e) { }
return false;
}
} }
public boolean remove(String urlHash) { public boolean remove(String urlHash) {
try { synchronized (existsIndex) {
boolean existsInIndex = this.existsIndex.remove(urlHash); try {
boolean existsInCache = (this.urlHashCache.remove(urlHash.getBytes())!= null); boolean existsInIndex = this.existsIndex.remove(urlHash);
return existsInIndex || existsInCache; boolean existsInCache = (this.urlHashCache.remove(urlHash.getBytes()) != null);
} catch (IOException e) { return existsInIndex || existsInCache;
return false; } catch (IOException e) {
return false;
}
} }
} }

Loading…
Cancel
Save