enhancements in kelondroCollectionIndex:

* synchronized array and index objects
* auto-fix function for slightly corrupted index entries
* generalized internal access methods

also extended kelondroIndex interface to support ordering access
which is used in kelondroCollectionIndex for string comparisments

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2366 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent ec5149ff3b
commit 718fbc2dae

@ -287,7 +287,7 @@ public class PerformanceMemory_p {
prop.put("namecache.hit",Long.toString(amount));
amount = httpc.nameCacheNoCachingListSize();
prop.put("namecache.noCache",Long.toString(amount));
amount = sb.urlBlacklist.blacklistCacheSize();
amount = plasmaSwitchboard.urlBlacklist.blacklistCacheSize();
prop.put("blacklistcache.size",Long.toString(amount));
// return rewrite values for templates
return prop;

@ -15,6 +15,8 @@ import java.util.Iterator;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;
import de.anomic.kelondro.kelondroProfile;
import de.anomic.kelondro.kelondroSplittedTree;
import de.anomic.kelondro.kelondroTree;
@ -375,7 +377,8 @@ final class dbTable implements kelondroIndex {
private final String db_usr_str = "yacy";
private final String db_pwd_str = "yacy";
private Connection theDBConnection = null;
private Connection theDBConnection = null;
private final kelondroOrder order = new kelondroNaturalOrder(true);
private kelondroRow rowdef;
public dbTable(String dbType, kelondroRow rowdef) throws Exception {
@ -540,6 +543,10 @@ final class dbTable implements kelondroIndex {
// TODO Auto-generated method stub
return 0;
}
public kelondroOrder order() {
return this.order;
}
}

@ -45,8 +45,8 @@ public interface indexContainer {
public String getWordHash();
public void setOrdering(kelondroOrder newOrder, int newColumn);
public kelondroOrder getOrdering();
public int getOrderColumn();
public kelondroOrder order();
public int orderColumn();
public int add(indexEntry entry);
public int add(indexEntry entry, long updateTime);

@ -296,8 +296,8 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
private static indexContainer joinConstructiveByEnumeration(indexContainer i1, indexContainer i2, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY ENUMERATION");
indexContainer conj = new indexRowSetContainer(null); // start with empty search result
if (!((i1.getOrdering().signature().equals(i2.getOrdering().signature())) &&
(i1.getOrderColumn() == i2.getOrderColumn()))) return conj; // ordering must be equal
if (!((i1.order().signature().equals(i2.order().signature())) &&
(i1.orderColumn() == i2.orderColumn()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();
Iterator e2 = i2.entries();
int c;
@ -309,7 +309,7 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
long stamp = System.currentTimeMillis();
while ((System.currentTimeMillis() - stamp) < time) {
c = i1.getOrdering().compare(ie1.urlHash(), ie2.urlHash());
c = i1.order().compare(ie1.urlHash(), ie2.urlHash());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break;

@ -70,4 +70,8 @@ public class kelondroBytesIntMap {
return ki.rows(up, rotating, firstKey);
}
public kelondroOrder order() {
return ki.order();
}
}

@ -163,109 +163,98 @@ public class kelondroCollectionIndex {
return 0;
}
// first find an old entry, if one exists
kelondroRow.Entry oldindexrow = index.get(key);
synchronized (index) {
// first find an old entry, if one exists
kelondroRow.Entry oldindexrow = index.get(key);
if (oldindexrow == null) {
if ((collection != null) && (collection.size() > 0)) {
// the collection is new
overwrite(key, collection);
}
return 0;
} else {
// overwrite the old collection
// read old information
int oldchunksize = (int) oldindexrow.getColLongB256(idx_col_chunksize); // needed only for migration
int oldchunkcount = (int) oldindexrow.getColLongB256(idx_col_chunkcount);
int oldrownumber = (int) oldindexrow.getColLongB256(idx_col_indexpos);
int oldPartitionNumber = arrayIndex(oldchunkcount);
int oldSerialNumber = 0;
if (oldindexrow == null) {
if ((collection != null) && (collection.size() > 0)) {
// the collection is new
overwrite(key, collection);
}
return 0;
} else {
// overwrite the old collection
// read old information
int oldchunksize = (int) oldindexrow.getColLongB256(idx_col_chunksize); // needed only for migration
int oldchunkcount = (int) oldindexrow.getColLongB256(idx_col_chunkcount);
int oldrownumber = (int) oldindexrow.getColLongB256(idx_col_indexpos);
int oldPartitionNumber = arrayIndex(oldchunkcount);
int oldSerialNumber = 0;
if (merge) {
// load the old collection and join it with the old
// open array entry
kelondroFixedWidthArray oldarray = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
//System.out.println("joining for key " + new String(key) + ", oldrow=" + oldrownumber + ", oldchunkcount=" + oldchunkcount + ", array file=" + oldarray.filename);
kelondroRow.Entry oldarrayrow = oldarray.get(oldrownumber);
if (oldarrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, oldchunksize, oldPartitionNumber, oldSerialNumber).toString(), "array does not contain expected row");
// read the row and define a collection
kelondroRowSet oldcollection = new kelondroRowSet(this.rowdef, oldarrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
// join with new collection
oldcollection.addAll(collection);
collection = oldcollection;
}
if (merge) {
// load the old collection and join it with the old
kelondroRowSet oldcollection = getdelete(oldindexrow, false, false);
// join with new collection
oldcollection.addAll(collection);
collection = oldcollection;
}
int removed = 0;
if (removekeys != null) {
// load the old collection and remove keys
// open array entry
kelondroFixedWidthArray oldarray = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
kelondroRow.Entry oldarrayrow = oldarray.get(oldrownumber);
if (oldarrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, oldchunksize, oldPartitionNumber, oldSerialNumber).toString(), "array does not contain expected row");
int removed = 0;
if (removekeys != null) {
// load the old collection and remove keys
kelondroRowSet oldcollection = getdelete(oldindexrow, false, false);
// read the row and define a collection
kelondroRowSet oldcollection = new kelondroRowSet(this.rowdef, oldarrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
// remove the keys from the set
Iterator i = removekeys.iterator();
Object k;
while (i.hasNext()) {
k = i.next();
if (k instanceof byte[]) {if (oldcollection.remove((byte[]) k) != null) removed++;}
if (k instanceof String) {if (oldcollection.remove(((String) k).getBytes()) != null) removed++;}
// remove the keys from the set
Iterator i = removekeys.iterator();
Object k;
while (i.hasNext()) {
k = i.next();
if (k instanceof byte[]) {if (oldcollection.remove((byte[]) k) != null) removed++;}
if (k instanceof String) {if (oldcollection.remove(((String) k).getBytes()) != null) removed++;}
}
collection = oldcollection;
}
collection = oldcollection;
}
if (collection.size() == 0) {
if (deletecomplete) {
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
array.remove(oldrownumber);
if (collection.size() == 0) {
if (deletecomplete) {
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
array.remove(oldrownumber);
}
return removed;
}
return removed;
}
int newPartitionNumber = arrayIndex(collection.size());
int newSerialNumber = 0;
int newPartitionNumber = arrayIndex(collection.size());
int newSerialNumber = 0;
// see if we need new space or if we can overwrite the old space
if (oldPartitionNumber == newPartitionNumber) {
// we don't need a new slot, just write into the old one
// see if we need new space or if we can overwrite the old space
if (oldPartitionNumber == newPartitionNumber) {
// we don't need a new slot, just write into the old one
// find array file
kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.rowdef.objectsize());
// find array file
kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.rowdef.objectsize());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
arrayEntry.setCol(0, key);
arrayEntry.setCol(1, collection.exportCollection());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
arrayEntry.setCol(0, key);
arrayEntry.setCol(1, collection.exportCollection());
// overwrite entry in this array
array.set(oldrownumber, arrayEntry);
// overwrite entry in this array
array.set(oldrownumber, arrayEntry);
// update the index entry
oldindexrow.setColLongB256(idx_col_chunkcount, collection.size());
oldindexrow.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
index.put(oldindexrow);
} else {
// we need a new slot, that means we must first delete the old entry
// find array file
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
// update the index entry
oldindexrow.setColLongB256(idx_col_chunkcount, collection.size());
oldindexrow.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
index.put(oldindexrow);
} else {
// we need a new slot, that means we must first delete the old entry
// find array file
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
// delete old entry
array.remove(oldrownumber);
// delete old entry
array.remove(oldrownumber);
// write a new entry in the other array
overwrite(key, collection);
// write a new entry in the other array
overwrite(key, collection);
}
return removed;
}
return removed;
}
}
private void overwrite(byte[] key, kelondroRowCollection collection) throws IOException {
// helper method, should not be called directly
// helper method, should not be called directly and only within a synchronized(index) environment
// simply store a collection without check if the collection existed before
// find array file
@ -292,19 +281,25 @@ public class kelondroCollectionIndex {
public kelondroRowSet get(byte[] key, boolean deleteIfEmpty) throws IOException {
// find an entry, if one exists
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, false, deleteIfEmpty);
synchronized (index) {
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, false, deleteIfEmpty);
}
}
public kelondroRowSet delete(byte[] key) throws IOException {
// find an entry, if one exists
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, true, false);
synchronized (index) {
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, true, false);
}
}
private kelondroRowSet getdelete(kelondroRow.Entry indexrow, boolean remove, boolean deleteIfEmpty) throws IOException {
// call this only within a synchronized(index) environment
// read values
int chunksize = (int) indexrow.getColLongB256(idx_col_chunksize);
int chunkcount = (int) indexrow.getColLongB256(idx_col_chunkcount);
@ -319,8 +314,17 @@ public class kelondroCollectionIndex {
// read the row and define a collection
kelondroRowSet collection = new kelondroRowSet(this.rowdef, arrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
if (index.order().compare(arrayrow.getColBytes(0), indexrow.getColBytes(idx_col_key)) != 0) {
// check if we got the right row; this row is wrong
throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString(), "array contains wrong row '" + new String(arrayrow.getColBytes(0)) + "', expected is '" + new String(indexrow.getColBytes(idx_col_key) + "'"));
}
int chunkcountInArray = collection.size();
if (chunkcountInArray != chunkcount) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString(), "array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray);
if (chunkcountInArray != chunkcount) {
// fix the entry in index
indexrow.setColLong(idx_col_chunkcount, chunkcountInArray);
index.put(indexrow);
array.logFailure("INCONSISTENCY in " + arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString() + ": array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray + "; the index has been auto-fixed");
}
if ((remove) || ((chunkcountInArray == 0) && (deleteIfEmpty))) array.remove(rownumber);

@ -191,4 +191,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
}
public kelondroOrder order() {
return index.order();
}
}

@ -55,6 +55,7 @@ import java.util.Iterator;
public interface kelondroIndex {
public kelondroOrder order();
public int size() throws IOException;
public kelondroRow row() throws IOException;
public kelondroRow.Entry get(byte[] key) throws IOException;

@ -220,11 +220,11 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
}
}
public kelondroOrder getOrdering() {
public kelondroOrder order() {
return this.sortOrder;
}
public int getOrderColumn() {
public int orderColumn() {
return this.sortColumn;
}

@ -231,5 +231,9 @@ public class kelondroSplittedTree implements kelondroIndex {
}
public kelondroOrder order() {
return this.order;
}
}

Loading…
Cancel
Save