starting of re-design of kelondroTree iterator

- new access to iterator
- added many IOException handling in other Classes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1914 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent d2f34a2e94
commit dba02f399f

@ -284,19 +284,21 @@ public class IndexControl_p {
// generate list
if (post.containsKey("keyhashsimilar")) {
try {
final Iterator hashIt = switchboard.wordIndex.wordHashes(keyhash, plasmaWordIndex.RL_WORDFILES, true, 256).iterator();
StringBuffer result = new StringBuffer("Sequential List of Word-Hashes:<br>");
String hash;
int i = 0;
while (hashIt.hasNext() && i < 256) {
hash = (String) hashIt.next();
result.append("<a href=\"/IndexControl_p.html?")
.append("keyhash=").append(hash).append("&keyhashsearch=")
.append("\" class=\"tt\">").append(hash).append("</a> ")
.append(((i + 1) % 8 == 0) ? "<br>" : "");
i++;
StringBuffer result = new StringBuffer("Sequential List of Word-Hashes:<br>");
String hash;
int i = 0;
while (hashIt.hasNext() && i < 256) {
hash = (String) hashIt.next();
result.append("<a href=\"/IndexControl_p.html?").append("keyhash=").append(hash).append("&keyhashsearch=")
.append("\" class=\"tt\">").append(hash).append("</a> ").append(((i + 1) % 8 == 0) ? "<br>" : "");
i++;
}
prop.put("result", result);
} catch (IOException e) {
prop.put("result", "unknown keys: " + e.getMessage());
}
prop.put("result", result);
}
if (post.containsKey("urlstringsearch")) {
@ -329,19 +331,23 @@ public class IndexControl_p {
// generate list
if (post.containsKey("urlhashsimilar")) {
final Iterator hashIt = switchboard.urlPool.loadedURL.urlHashes(urlhash, true);
StringBuffer result = new StringBuffer("Sequential List of URL-Hashes:<br>");
String hash;
int i = 0;
while (hashIt.hasNext() && i < 256) {
hash = (String) hashIt.next();
result.append("<a href=\"/IndexControl_p.html?")
.append("urlhash=").append(hash).append("&urlhashsearch=")
.append("\" class=\"tt\">").append(hash).append("</a> ")
.append(((i + 1) % 8 == 0) ? "<br>" : "");
i++;
try {
final Iterator hashIt = switchboard.urlPool.loadedURL.urlHashes(urlhash, true);
StringBuffer result = new StringBuffer(
"Sequential List of URL-Hashes:<br>");
String hash;
int i = 0;
while (hashIt.hasNext() && i < 256) {
hash = (String) hashIt.next();
result.append("<a href=\"/IndexControl_p.html?").append("urlhash=").append(hash).append("&urlhashsearch=")
.append("\" class=\"tt\">").append(hash).append("</a> ").append(((i + 1) % 8 == 0) ? "<br>" : "");
i++;
}
prop.put("result", result.toString());
} catch (IOException e) {
prop.put("result", "No Entries for URL hash " + urlhash);
}
prop.put("result", result.toString());
}
// list known hosts

@ -187,13 +187,13 @@ public class kelondroDyn extends kelondroTree {
}
public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating) throws IOException {
// iterates only the keys of the Nodes
// enumerated objects are of type String
return new dynKeyIterator(super.rows(up, rotating));
// iterates only the keys of the Nodes
// enumerated objects are of type String
return new dynKeyIterator(super.rows(up, rotating));
}
public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating, byte[] firstKey) {
return new dynKeyIterator(super.rows(up, rotating, firstKey));
public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating, byte[] firstKey) throws IOException {
return new dynKeyIterator(super.rows(up, rotating, firstKey));
}
private byte[] getValueCached(byte[] key) throws IOException {

@ -287,7 +287,7 @@ public class kelondroMap {
return dyn.dynKeys(up, rotating);
}
public synchronized kelondroDyn.dynKeyIterator keys(final boolean up, final boolean rotating, final byte[] firstKey) {
public synchronized kelondroDyn.dynKeyIterator keys(final boolean up, final boolean rotating, final byte[] firstKey) throws IOException {
// simple enumeration of key names without special ordering
return dyn.dynKeys(up, rotating, firstKey);
}
@ -305,7 +305,7 @@ public class kelondroMap {
return new mapIterator(keys(up, rotating));
}
public synchronized mapIterator maps(final boolean up, final boolean rotating, final byte[] firstKey) {
public synchronized mapIterator maps(final boolean up, final boolean rotating, final byte[] firstKey) throws IOException {
return new mapIterator(keys(up, rotating, firstKey));
}

@ -151,7 +151,7 @@ public class kelondroTables {
return table.maps(up, rotating);
}
public synchronized kelondroMap.mapIterator /* of Map-Elements */ maps(String tablename, boolean up, boolean rotating, byte[] firstKey) {
public synchronized kelondroMap.mapIterator /* of Map-Elements */ maps(String tablename, boolean up, boolean rotating, byte[] firstKey) throws IOException {
kelondroMap table = (kelondroMap) mTables.get(tablename);
if (table == null) throw new RuntimeException("kelondroTables.maps: map table '" + tablename + "' does not exist.");
return table.maps(up, rotating, firstKey);
@ -163,7 +163,7 @@ public class kelondroTables {
return table.maps(up, field);
}
public synchronized Iterator /* of byte[][]-Elements */ rows(String tablename, boolean up, boolean rotating, byte[] firstKey) {
public synchronized Iterator /* of byte[][]-Elements */ rows(String tablename, boolean up, boolean rotating, byte[] firstKey) throws IOException {
kelondroTree tree = (kelondroTree) tTables.get(tablename);
if (tree == null) throw new RuntimeException("kelondroTables.bytes: tree table '" + tablename + "' does not exist.");
return tree.rows(up, rotating, firstKey);

@ -807,6 +807,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
return node;
}
/*
private synchronized Iterator nodeIterator(boolean up, boolean rotating) {
// iterates the elements in a sorted way. returns Node - type Objects
try {
@ -824,6 +825,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
throw new RuntimeException("error creating an iteration: " + e.getMessage());
}
}
*/
private class nodeIterator implements Iterator {
// we implement an iteration! (not a recursive function as the structure would suggest...)
@ -1011,11 +1013,11 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
// iterates the rows of the Nodes
// enumerated objects are of type byte[][]
// iterates the elements in a sorted way.
return new rowIterator(nodeIterator(up, rotating));
return new rowIterator(new nodeIterator(up, rotating));
}
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) {
return new rowIterator((firstKey == null) ? nodeIterator(up, rotating) : nodeIterator(up, rotating, firstKey));
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
return new rowIterator((firstKey == null) ? new nodeIterator(up, rotating) : new nodeIterator(up, rotating, firstKey, true));
}
public class rowIterator implements Iterator {
@ -1045,15 +1047,15 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
}
public synchronized keyIterator keys(boolean up, boolean rotating) {
// iterates only the keys of the Nodes
// enumerated objects are of type String
public synchronized keyIterator keys(boolean up, boolean rotating) throws IOException {
// iterates only the keys of the Nodes
// enumerated objects are of type String
// iterates the elements in a sorted way.
return new keyIterator(nodeIterator(up, rotating));
return new keyIterator(new nodeIterator(up, rotating));
}
public Iterator keys(boolean up, boolean rotating, byte[] firstKey) {
return new keyIterator(nodeIterator(up, rotating, firstKey));
public Iterator keys(boolean up, boolean rotating, byte[] firstKey) throws IOException {
return new keyIterator(new nodeIterator(up, rotating, firstKey, true));
}
public class keyIterator implements Iterator {
@ -1448,9 +1450,9 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
b = testWord('L'); tt.put(b, b);
int c = countElements(tt);
System.out.println("elements: " + c);
Iterator i = tt.nodeIterator(true, true, testWord('G'));
Iterator i = tt.rows(true, true, testWord('G'));
for (int j = 0; j < c; j++) {
System.out.println("Node " + j + ": " + new String(((Node) i.next()).getKey()));
System.out.println("Row " + j + ": " + new String(((byte[][]) i.next())[0]));
}
System.out.println("TERMINATED");
} catch (IOException e) {
@ -1555,13 +1557,17 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
public static int countElements(kelondroTree t) {
int count = 0;
Iterator iter = t.nodeIterator(true, false);
Node n;
while (iter.hasNext()) {
count++;
n = (Node) iter.next();
if (n == null) System.out.println("ERROR! null element found");
//else System.out.println("counted element: " + new String(n.getKey()));
try {
Iterator iter = t.rows(true, false);
byte[][] row;
while (iter.hasNext()) {
count++;
row = (byte[][]) iter.next();
if (row == null) System.out.println("ERROR! null element found");
// else System.out.println("counted element: " + new
// String(n.getKey()));
}
} catch (IOException e) {
}
return count;
}

@ -555,7 +555,15 @@ public final class plasmaCrawlStacker {
}
} catch (kelondroException e) {
/* if we have an error, we start with a fresh database */
plasmaCrawlStacker.this.log.logSevere("Unable to initialize crawl stacker queue. Reseting DB.\n",e);
plasmaCrawlStacker.this.log.logSevere("Unable to initialize crawl stacker queue, kelondroException:" + e.getMessage() + ". Reseting DB.\n",e);
// deleting old db and creating a new db
try {this.urlEntryCache.close();}catch(Exception ex){}
cacheFile.delete();
this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, plasmaCrawlNURL.ce, true);
} catch (IOException e) {
/* if we have an error, we start with a fresh database */
plasmaCrawlStacker.this.log.logSevere("Unable to initialize crawl stacker queue, IOException:" + e.getMessage() + ". Reseting DB.\n",e);
// deleting old db and creating a new db
try {this.urlEntryCache.close();}catch(Exception ex){}

@ -239,9 +239,13 @@ public class plasmaDHTChunk {
log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e);
indexContainers = new plasmaWordIndexEntryContainer[0];
urlCache = new HashMap();
this.status = chunkStatus_FAILED;
return 0;
} catch (IOException e) {
log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e);
indexContainers = new plasmaWordIndexEntryContainer[0];
urlCache = new HashMap();
this.status = chunkStatus_FAILED;
return 0;
}
}

@ -564,7 +564,7 @@ public class plasmaURL {
return hash;
}
public Iterator urlHashes(String urlHash, boolean up) {
public Iterator urlHashes(String urlHash, boolean up) throws IOException {
return urlHashCache.keys(up, false, urlHash.getBytes());
}

@ -378,7 +378,7 @@ public final class plasmaWordIndex {
public static final int RL_ASSORTMENTS = 2;
public static final int RL_WORDFILES = 3;
public synchronized TreeSet wordHashes(String startHash, int resourceLevel, boolean rot, int count) {
public synchronized TreeSet wordHashes(String startHash, int resourceLevel, boolean rot, int count) throws IOException {
kelondroOrder hashOrder = (kelondroOrder) indexOrder.clone();
if (rot) hashOrder.rotate(startHash.getBytes()); else hashOrder.rotate(null);
TreeSet hashes = new TreeSet(hashOrder);
@ -391,12 +391,12 @@ public final class plasmaWordIndex {
return hashes;
}
public Iterator wordHashes(String startHash, int resourceLevel, boolean rot) {
public Iterator wordHashes(String startHash, int resourceLevel, boolean rot) throws IOException {
if (rot) return new rotatingWordIterator(startHash, resourceLevel);
else return new correctedWordIterator(startHash, resourceLevel, rot); // use correction until bug is found
}
private Iterator wordHashesX(String startWordHash, int resourceLevel, boolean rot) {
private Iterator wordHashesX(String startWordHash, int resourceLevel, boolean rot) throws IOException {
if (resourceLevel == plasmaWordIndex.RL_RAMCACHE) {
return ramCache.wordHashes(startWordHash, rot);
}
@ -431,7 +431,7 @@ public final class plasmaWordIndex {
Iterator iter;
String nextWord;
public correctedWordIterator(String firstWord, int resourceLevel, boolean rotating) {
public correctedWordIterator(String firstWord, int resourceLevel, boolean rotating) throws IOException {
iter = wordHashesX(firstWord, resourceLevel, rotating);
try {
nextWord = (iter.hasNext()) ? (String) iter.next() : null;
@ -481,7 +481,7 @@ public final class plasmaWordIndex {
Iterator i;
int resourceLevel;
public rotatingWordIterator(String startWordHash, int resourceLevel) {
public rotatingWordIterator(String startWordHash, int resourceLevel) throws IOException {
this.resourceLevel = resourceLevel;
i = new correctedWordIterator(startWordHash, resourceLevel, false);
}
@ -492,9 +492,11 @@ public final class plasmaWordIndex {
public boolean hasNext() {
if (i.hasNext()) return true;
else {
else try {
i = new correctedWordIterator("------------", resourceLevel, false);
return i.hasNext();
} catch (IOException e) {
return false;
}
}
@ -588,36 +590,42 @@ public final class plasmaWordIndex {
plasmaWordIndexEntry entry = null;
URL url = null;
HashSet urlHashs = new HashSet();
Iterator wordHashIterator = wordHashes(startHash, plasmaWordIndex.RL_WORDFILES, false);
while (wordHashIterator.hasNext() && run) {
waiter();
wordHash = (String) wordHashIterator.next();
wordContainer = getContainer(wordHash, true, -1);
Iterator containerIterator = wordContainer.entries();
wordHashNow = wordHash;
while (containerIterator.hasNext() && run) {
try {
Iterator wordHashIterator = wordHashes(startHash, plasmaWordIndex.RL_WORDFILES, false);
while (wordHashIterator.hasNext() && run) {
waiter();
entry = (plasmaWordIndexEntry) containerIterator.next();
//System.out.println("Wordhash: "+wordHash+" UrlHash: "+entry.getUrlHash());
try {
url = lurl.getEntry(entry.getUrlHash(), null).url();
if ((url == null) ||
(plasmaSwitchboard.urlBlacklist.isListed(url.getHost().toLowerCase(),url.getPath())==true)) {
wordHash = (String) wordHashIterator.next();
wordContainer = getContainer(wordHash, true, -1);
Iterator containerIterator = wordContainer.entries();
wordHashNow = wordHash;
while (containerIterator.hasNext() && run) {
waiter();
entry = (plasmaWordIndexEntry) containerIterator.next();
// System.out.println("Wordhash: "+wordHash+" UrlHash:
// "+entry.getUrlHash());
try {
url = lurl.getEntry(entry.getUrlHash(), null).url();
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(url.getHost().toLowerCase(), url.getPath()) == true)) {
urlHashs.add(entry.getUrlHash());
}
} catch (IOException e) {
urlHashs.add(entry.getUrlHash());
}
} catch (IOException e) {
urlHashs.add(entry.getUrlHash());
}
if (urlHashs.size() > 0) {
String[] urlArray;
urlArray = (String[]) urlHashs.toArray(new String[0]);
int removed = removeEntries(wordHash, urlArray, true);
serverLog.logFine("INDEXCLEANER", wordHash + ": " + removed + " of " + wordContainer.size() + " URL-entries deleted");
lastWordHash = wordHash;
lastDeletionCounter = urlHashs.size();
urlHashs.clear();
}
}
if (urlHashs.size()>0) {
String [] urlArray;
urlArray = (String[]) urlHashs.toArray(new String[0]);
int removed = removeEntries(wordHash, urlArray, true);
serverLog.logFine("INDEXCLEANER", wordHash + ": " + removed + " of " + wordContainer.size() + " URL-entries deleted");
lastWordHash = wordHash;
lastDeletionCounter = urlHashs.size();
urlHashs.clear();
}
} catch (IOException e) {
serverLog.logSevere("INDEXCLEANER",
"IndexCleaner-Thread: unable to start: "
+ e.getMessage());
}
serverLog.logInfo("INDEXCLEANER", "IndexCleaner-Thread stopped");
}
@ -667,9 +675,13 @@ public final class plasmaWordIndex {
// System.out.println(new Date(reverseMicroDateDays(microDateDays(System.currentTimeMillis()))));
plasmaWordIndex index = new plasmaWordIndex(new File("D:\\dev\\proxy\\DATA\\PLASMADB"), 555, new serverLog("TESTAPP"));
Iterator iter = index.wordHashes("5A8yhZMh_Kmv", plasmaWordIndex.RL_WORDFILES, true);
while (iter.hasNext()) {
System.out.println("File: " + (String) iter.next());
try {
Iterator iter = index.wordHashes("5A8yhZMh_Kmv", plasmaWordIndex.RL_WORDFILES, true);
while (iter.hasNext()) {
System.out.println("File: " + (String) iter.next());
}
} catch (IOException e) {
e.printStackTrace();
}
}

@ -243,7 +243,7 @@ public final class plasmaWordIndexAssortment {
assortments = new kelondroTree(assortmentFile, bufferSize, bufferStructure(assortmentLength), true);
}
public Iterator hashes(String startWordHash, boolean up, boolean rot) {
public Iterator hashes(String startWordHash, boolean up, boolean rot) throws IOException {
try {
return assortments.keys(up, rot, startWordHash.getBytes());
} catch (kelondroException e) {

@ -47,6 +47,7 @@
package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
@ -235,7 +236,7 @@ public final class plasmaWordIndexAssortmentCluster {
return size;
}
public Iterator hashConjunction(String startWordHash, boolean up, boolean rot) {
public Iterator hashConjunction(String startWordHash, boolean up, boolean rot) throws IOException {
HashSet iterators = new HashSet();
//if (rot) System.out.println("WARNING: kelondroMergeIterator does not work correctly when individual iterators rotate on their own!");
for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, rot));

Loading…
Cancel
Save