- tried to remove deadlock

- enhanced searchtime in kelondroRowSets
- enhanced uniq() - reverse enumeration causes less time in case of mass removal of doubles

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4207 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent a4010f7dc8
commit 0abf33ed03

@ -71,7 +71,7 @@ public class indexCollectionRI implements indexRI {
return collectionIndex.size();
}
public synchronized int indexSize(String wordHash) {
public int indexSize(String wordHash) {
try {
return collectionIndex.indexSize(wordHash.getBytes());
} catch (IOException e) {
@ -122,7 +122,7 @@ public class indexCollectionRI implements indexRI {
}
public synchronized boolean hasContainer(String wordHash) {
public boolean hasContainer(String wordHash) {
try {
return collectionIndex.has(wordHash.getBytes());
} catch (IOException e) {
@ -130,7 +130,7 @@ public class indexCollectionRI implements indexRI {
}
}
public synchronized indexContainer getContainer(String wordHash, Set urlselection) {
public indexContainer getContainer(String wordHash, Set urlselection) {
try {
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes());
if (collection != null) collection.select(urlselection);
@ -141,7 +141,7 @@ public class indexCollectionRI implements indexRI {
}
}
public synchronized indexContainer deleteContainer(String wordHash) {
public indexContainer deleteContainer(String wordHash) {
try {
kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes());
if (collection == null) return null;
@ -151,13 +151,13 @@ public class indexCollectionRI implements indexRI {
}
}
public synchronized boolean removeEntry(String wordHash, String urlHash) {
public boolean removeEntry(String wordHash, String urlHash) {
HashSet hs = new HashSet();
hs.add(urlHash.getBytes());
return removeEntries(wordHash, hs) == 1;
}
public synchronized int removeEntries(String wordHash, Set urlHashes) {
public int removeEntries(String wordHash, Set urlHashes) {
try {
return collectionIndex.remove(wordHash.getBytes(), urlHashes);
} catch (kelondroOutOfLimitsException e) {
@ -169,7 +169,7 @@ public class indexCollectionRI implements indexRI {
}
}
public synchronized void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
public void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
try {
collectionIndex.merge(newEntries);
} catch (kelondroOutOfLimitsException e) {
@ -179,10 +179,10 @@ public class indexCollectionRI implements indexRI {
}
}
public synchronized void addMultipleEntries(List /*of indexContainer*/ containerList) {
public void addMultipleEntries(List /*of indexContainer*/ containerList) {
try {
//for (int i = 0; i < containerList.size(); i++) collectionIndex.merge((indexContainer) containerList.get(i));
synchronized (containerList) {collectionIndex.mergeMultiple(containerList);}
collectionIndex.mergeMultiple(containerList);
} catch (kelondroOutOfLimitsException e) {
e.printStackTrace();
} catch (IOException e) {
@ -190,7 +190,7 @@ public class indexCollectionRI implements indexRI {
}
}
public synchronized void close() {
public void close() {
collectionIndex.close();
}

@ -288,7 +288,7 @@ public final class indexRAMRI implements indexRI {
public class wordContainerIterator implements kelondroCloneableIterator {
// this class exists, because the wCache cannot be iterated with rotation
// and because every indeContainer Object that is iterated must be returned as top-level-clone
// and because every indexContainer Object that is iterated must be returned as top-level-clone
// so this class simulates wCache.tailMap(startWordHash).values().iterator()
// plus the mentioned features

@ -56,7 +56,7 @@ public class kelondroRowCollection {
private static final int exp_order_bound = 5;
private static final int exp_collection = 6;
private static int processors = 1; //Runtime.getRuntime().availableProcessors();
private static int processors = Runtime.getRuntime().availableProcessors();
public kelondroRowCollection(kelondroRowCollection rc) {
this.rowdef = rc.rowdef;
@ -427,7 +427,7 @@ public class kelondroRowCollection {
qsort(p, this.chunkcount, 0, swapspace);
}
this.sortBound = this.chunkcount;
assert this.isSorted();
//assert this.isSorted();
}
private class qsortthread extends Thread {
@ -528,14 +528,13 @@ public class kelondroRowCollection {
// then this method may run a long time with 100% CPU load which is caused
// by the large number of memory movements. Therefore it is possible
// to assign a runtime limitation
if (chunkcount <= 1) return;
int i = 0;
while (i < chunkcount - 1) {
if (chunkcount < 2) return;
int i = chunkcount - 2;
while (i >= 0) {
if (compare(i, i + 1) == 0) {
removeRow(i, true); // this decreases the chunkcount
} else {
i++;
removeRow(i, true);
}
i--;
}
}
@ -580,8 +579,7 @@ public class kelondroRowCollection {
return c;
}
private final byte[] compilePivot(int i) {
assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length;
protected final byte[] compilePivot(int i) {
assert (i >= 0) && (i < chunkcount) : "i = " + i + ", chunkcount = " + chunkcount;
assert (this.rowdef.objectOrder != null);
assert (this.rowdef.objectOrder instanceof kelondroBase64Order);
@ -591,7 +589,14 @@ public class kelondroRowCollection {
return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(chunkcache, i * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength);
}
private final int comparePivot(byte[] compiledPivot, int j) {
protected final byte[] compilePivot(byte[] a, int astart, int alength) {
assert (this.rowdef.objectOrder != null);
assert (this.rowdef.objectOrder instanceof kelondroBase64Order);
assert (this.rowdef.primaryKeyIndex == 0) : "this.sortColumn = " + this.rowdef.primaryKeyIndex;
return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(a, astart, alength);
}
protected final int comparePivot(byte[] compiledPivot, int j) {
assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length;
assert (j >= 0) && (j < chunkcount) : "j = " + j + ", chunkcount = " + chunkcount;
assert (this.rowdef.objectOrder != null);
@ -693,7 +698,26 @@ public class kelondroRowCollection {
boolean eis = e.isSorted();
long t12 = System.currentTimeMillis();
System.out.println("e isSorted = " + ((eis) ? "true" : "false") + ": " + (t12 - t11) + " milliseconds");
System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size());
random = new Random(0);
boolean allfound = true;
for (int i = 0; i < testsize; i++) {
if (e.get(randomHash().getBytes()) == null) {
allfound = false;
break;
}
}
long t13 = System.currentTimeMillis();
System.out.println("e allfound = " + ((allfound) ? "true" : "false") + ": " + (t13 - t12) + " milliseconds");
boolean noghosts = true;
for (int i = 0; i < testsize; i++) {
if (e.get(randomHash().getBytes()) != null) {
noghosts = false;
break;
}
}
long t14 = System.currentTimeMillis();
System.out.println("e noghosts = " + ((noghosts) ? "true" : "false") + ": " + (t14 - t13) + " milliseconds");
System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size());
System.out.println();
}
@ -702,8 +726,6 @@ public class kelondroRowCollection {
test(10000);
test(100000);
//test(1000000);
// 368, 12029
/*
System.out.println(new java.util.Date(10957 * day));
@ -711,4 +733,40 @@ public class kelondroRowCollection {
System.out.println(daysSince2000(System.currentTimeMillis()));
*/
}
/*
kelondroRowCollection test with size = 10000
create c : 134 milliseconds, 74 entries/millisecond
copy c -> d: 47 milliseconds, 212 entries/millisecond
sort c (1) : 66 milliseconds, 151 entries/millisecond
sort d (2) : 23 milliseconds, 434 entries/millisecond
uniq c : 3 milliseconds, 3333 entries/millisecond
uniq d : 2 milliseconds, 5000 entries/millisecond
create e : 528 milliseconds, 18 entries/millisecond
sort e (2) : 13 milliseconds, 769 entries/millisecond
uniq e : 2 milliseconds, 5000 entries/millisecond
c isSorted = true: 2 milliseconds
d isSorted = true: 3 milliseconds
e isSorted = true: 2 milliseconds
e allfound = true: 85 milliseconds
e noghosts = true: 75 milliseconds
Result size: c = 10000, d = 10000, e = 10000
kelondroRowCollection test with size = 100000
create c : 589 milliseconds, 169 entries/millisecond
copy c -> d: 141 milliseconds, 709 entries/millisecond
sort c (1) : 268 milliseconds, 373 entries/millisecond
sort d (2) : 187 milliseconds, 534 entries/millisecond
uniq c : 13 milliseconds, 7692 entries/millisecond
uniq d : 14 milliseconds, 7142 entries/millisecond
create e : 22068 milliseconds, 4 entries/millisecond
sort e (2) : 167 milliseconds, 598 entries/millisecond
uniq e : 14 milliseconds, 7142 entries/millisecond
c isSorted = true: 13 milliseconds
d isSorted = true: 14 milliseconds
e isSorted = true: 13 milliseconds
e allfound = true: 815 milliseconds
e noghosts = true: 787 milliseconds
Result size: c = 100000, d = 100000, e = 100000
*/
}

@ -141,29 +141,37 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
if (rowdef.objectOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount);
// check if a re-sorting make sense
// check if a re-sorting makes sense
if ((this.chunkcount - this.sortBound) > collectionReSortLimit) {
sort();
}
if ((this.rowdef.objectOrder != null) && (this.rowdef.objectOrder instanceof kelondroBase64Order) && (this.sortBound > 4000)) {
// first try to find in sorted area
final byte[] compiledPivot = compilePivot(a, astart, alength);
int p = binarySearchCompiledPivot(compiledPivot);
if (p >= 0) return p;
// then find in unsorted area
return iterativeSearchCompiledPivot(compiledPivot, this.sortBound, this.chunkcount);
} else {
// first try to find in sorted area
int p = binarySearch(a, astart, alength);
if (p >= 0) return p;
// first try to find in sorted area
int p = binarySearch(a, astart, alength);
if (p >= 0) return p;
// then find in unsorted area
return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount);
// then find in unsorted area
return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount);
}
}
private int iterativeSearch(byte[] key, int astart, int alength, int leftBorder, int rightBound) {
// returns the chunknumber
// returns the chunknumber
if (rowdef.objectOrder == null) {
for (int i = leftBorder; i < rightBound; i++) {
if (match(key, astart, alength, i)) return i;
}
return -1;
} else {
// we dont do a special handling of kelondroBase64Order here, because tests showed that this produces too much overhead
for (int i = leftBorder; i < rightBound; i++) {
if (compare(key, astart, alength, i) == 0) return i;
}
@ -171,6 +179,16 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
}
}
private int iterativeSearchCompiledPivot(byte[] compiledPivot, int leftBorder, int rightBound) {
// returns the chunknumber
assert (rowdef.objectOrder != null);
assert (rowdef.objectOrder instanceof kelondroBase64Order);
for (int i = leftBorder; i < rightBound; i++) {
if (comparePivot(compiledPivot, i) == 0) return i;
}
return -1;
}
private int binarySearch(byte[] key, int astart, int alength) {
// returns the exact position of the key if the key exists,
// or -1 if the key does not exist
@ -183,8 +201,25 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
p = l + ((rbound - l) >> 1);
d = compare(key, astart, alength, p);
if (d == 0) return p;
else if (d < 0) rbound = p;
else l = p + 1;
if (d < 0) rbound = p; else l = p + 1;
}
return -1;
}
private int binarySearchCompiledPivot(byte[] compiledPivot) {
// returns the exact position of the key if the key exists,
// or -1 if the key does not exist
assert (rowdef.objectOrder != null);
assert (rowdef.objectOrder instanceof kelondroBase64Order);
int l = 0;
int rbound = this.sortBound;
int p = 0;
int d;
while (l < rbound) {
p = l + ((rbound - l) >> 1);
d = comparePivot(compiledPivot, p);
if (d == 0) return p;
if (d < 0) rbound = p; else l = p + 1;
}
return -1;
}
@ -202,8 +237,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
p = l + ((rbound - l) >> 1);
d = compare(key, astart, alength, p);
if (d == 0) return p;
else if (d < 0) rbound = p;
else l = p + 1;
if (d < 0) rbound = p; else l = p + 1;
}
return l;
}

@ -155,14 +155,15 @@ public final class plasmaWordIndex implements indexRI {
public void dhtFlushControl(indexRAMRI theCache) {
// check for forced flush
int count = -1;
synchronized (theCache) {
if ((theCache.maxURLinCache() > wCacheMaxChunk ) ||
(theCache.size() > theCache.getMaxWordCount()) ||
(serverMemory.available() < collections.minMem())) {
int count = theCache.size() + flushsize - theCache.getMaxWordCount();
flushCache(theCache, (count > 0) ? count : 1);
count = theCache.size() + flushsize - theCache.getMaxWordCount();
}
}
if (count >= 0) flushCache(theCache, (count > 0) ? count : 1);
}
public long getUpdateTime(String wordHash) {
@ -216,11 +217,11 @@ public final class plasmaWordIndex implements indexRI {
busyCacheFlush = true;
String wordHash;
ArrayList containerList = new ArrayList();
synchronized (ram) {
count = Math.min(5000, Math.min(count, ram.size()));
boolean collectMax = true;
indexContainer c;
while (collectMax) {
count = Math.min(5000, Math.min(count, ram.size()));
boolean collectMax = true;
indexContainer c;
while (collectMax) {
synchronized (ram) {
wordHash = ram.maxScoreWordHash();
c = ram.getContainer(wordHash, null);
if ((c != null) && (c.size() > wCacheMaxChunk)) {
@ -230,17 +231,20 @@ public final class plasmaWordIndex implements indexRI {
collectMax = false;
}
}
count = count - containerList.size();
for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
}
count = count - containerList.size();
for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
synchronized (ram) {
if (ram.size() == 0) break;
if (serverMemory.available() < collections.minMem()) break; // protect memory during flush
// select one word to flush
wordHash = ram.bestFlushWordHash();
// move one container from ram to flush list
c = ram.deleteContainer(wordHash);
if (c != null) containerList.add(c);
}
if (c != null) containerList.add(c);
}
// flush the containers
collections.addMultipleEntries(containerList);
@ -540,7 +544,7 @@ public final class plasmaWordIndex implements indexRI {
public synchronized kelondroCloneableIterator wordContainers(String startWordHash, boolean ram) {
kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone());
containerOrder.rotate(startWordHash.getBytes());
containerOrder.rotate(startWordHash.getBytes());
if (ram) {
return dhtOutCache.wordContainers(startWordHash, false);
} else {

Loading…
Cancel
Save