- tried to remove deadlock

- enhanced searchtime in kelondroRowSets
- enhanced uniq() - reverse enumeration causes less time in case of mass removal of doubles

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4207 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent a4010f7dc8
commit 0abf33ed03

@ -71,7 +71,7 @@ public class indexCollectionRI implements indexRI {
return collectionIndex.size(); return collectionIndex.size();
} }
public synchronized int indexSize(String wordHash) { public int indexSize(String wordHash) {
try { try {
return collectionIndex.indexSize(wordHash.getBytes()); return collectionIndex.indexSize(wordHash.getBytes());
} catch (IOException e) { } catch (IOException e) {
@ -122,7 +122,7 @@ public class indexCollectionRI implements indexRI {
} }
public synchronized boolean hasContainer(String wordHash) { public boolean hasContainer(String wordHash) {
try { try {
return collectionIndex.has(wordHash.getBytes()); return collectionIndex.has(wordHash.getBytes());
} catch (IOException e) { } catch (IOException e) {
@ -130,7 +130,7 @@ public class indexCollectionRI implements indexRI {
} }
} }
public synchronized indexContainer getContainer(String wordHash, Set urlselection) { public indexContainer getContainer(String wordHash, Set urlselection) {
try { try {
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes()); kelondroRowSet collection = collectionIndex.get(wordHash.getBytes());
if (collection != null) collection.select(urlselection); if (collection != null) collection.select(urlselection);
@ -141,7 +141,7 @@ public class indexCollectionRI implements indexRI {
} }
} }
public synchronized indexContainer deleteContainer(String wordHash) { public indexContainer deleteContainer(String wordHash) {
try { try {
kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes()); kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes());
if (collection == null) return null; if (collection == null) return null;
@ -151,13 +151,13 @@ public class indexCollectionRI implements indexRI {
} }
} }
public synchronized boolean removeEntry(String wordHash, String urlHash) { public boolean removeEntry(String wordHash, String urlHash) {
HashSet hs = new HashSet(); HashSet hs = new HashSet();
hs.add(urlHash.getBytes()); hs.add(urlHash.getBytes());
return removeEntries(wordHash, hs) == 1; return removeEntries(wordHash, hs) == 1;
} }
public synchronized int removeEntries(String wordHash, Set urlHashes) { public int removeEntries(String wordHash, Set urlHashes) {
try { try {
return collectionIndex.remove(wordHash.getBytes(), urlHashes); return collectionIndex.remove(wordHash.getBytes(), urlHashes);
} catch (kelondroOutOfLimitsException e) { } catch (kelondroOutOfLimitsException e) {
@ -169,7 +169,7 @@ public class indexCollectionRI implements indexRI {
} }
} }
public synchronized void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) { public void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
try { try {
collectionIndex.merge(newEntries); collectionIndex.merge(newEntries);
} catch (kelondroOutOfLimitsException e) { } catch (kelondroOutOfLimitsException e) {
@ -179,10 +179,10 @@ public class indexCollectionRI implements indexRI {
} }
} }
public synchronized void addMultipleEntries(List /*of indexContainer*/ containerList) { public void addMultipleEntries(List /*of indexContainer*/ containerList) {
try { try {
//for (int i = 0; i < containerList.size(); i++) collectionIndex.merge((indexContainer) containerList.get(i)); //for (int i = 0; i < containerList.size(); i++) collectionIndex.merge((indexContainer) containerList.get(i));
synchronized (containerList) {collectionIndex.mergeMultiple(containerList);} collectionIndex.mergeMultiple(containerList);
} catch (kelondroOutOfLimitsException e) { } catch (kelondroOutOfLimitsException e) {
e.printStackTrace(); e.printStackTrace();
} catch (IOException e) { } catch (IOException e) {
@ -190,7 +190,7 @@ public class indexCollectionRI implements indexRI {
} }
} }
public synchronized void close() { public void close() {
collectionIndex.close(); collectionIndex.close();
} }

@ -288,7 +288,7 @@ public final class indexRAMRI implements indexRI {
public class wordContainerIterator implements kelondroCloneableIterator { public class wordContainerIterator implements kelondroCloneableIterator {
// this class exists, because the wCache cannot be iterated with rotation // this class exists, because the wCache cannot be iterated with rotation
// and because every indeContainer Object that is iterated must be returned as top-level-clone // and because every indexContainer Object that is iterated must be returned as top-level-clone
// so this class simulates wCache.tailMap(startWordHash).values().iterator() // so this class simulates wCache.tailMap(startWordHash).values().iterator()
// plus the mentioned features // plus the mentioned features

@ -56,7 +56,7 @@ public class kelondroRowCollection {
private static final int exp_order_bound = 5; private static final int exp_order_bound = 5;
private static final int exp_collection = 6; private static final int exp_collection = 6;
private static int processors = 1; //Runtime.getRuntime().availableProcessors(); private static int processors = Runtime.getRuntime().availableProcessors();
public kelondroRowCollection(kelondroRowCollection rc) { public kelondroRowCollection(kelondroRowCollection rc) {
this.rowdef = rc.rowdef; this.rowdef = rc.rowdef;
@ -427,7 +427,7 @@ public class kelondroRowCollection {
qsort(p, this.chunkcount, 0, swapspace); qsort(p, this.chunkcount, 0, swapspace);
} }
this.sortBound = this.chunkcount; this.sortBound = this.chunkcount;
assert this.isSorted(); //assert this.isSorted();
} }
private class qsortthread extends Thread { private class qsortthread extends Thread {
@ -528,14 +528,13 @@ public class kelondroRowCollection {
// then this method may run a long time with 100% CPU load which is caused // then this method may run a long time with 100% CPU load which is caused
// by the large number of memory movements. Therefore it is possible // by the large number of memory movements. Therefore it is possible
// to assign a runtime limitation // to assign a runtime limitation
if (chunkcount <= 1) return; if (chunkcount < 2) return;
int i = 0; int i = chunkcount - 2;
while (i < chunkcount - 1) { while (i >= 0) {
if (compare(i, i + 1) == 0) { if (compare(i, i + 1) == 0) {
removeRow(i, true); // this decreases the chunkcount removeRow(i, true);
} else {
i++;
} }
i--;
} }
} }
@ -580,8 +579,7 @@ public class kelondroRowCollection {
return c; return c;
} }
private final byte[] compilePivot(int i) { protected final byte[] compilePivot(int i) {
assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length;
assert (i >= 0) && (i < chunkcount) : "i = " + i + ", chunkcount = " + chunkcount; assert (i >= 0) && (i < chunkcount) : "i = " + i + ", chunkcount = " + chunkcount;
assert (this.rowdef.objectOrder != null); assert (this.rowdef.objectOrder != null);
assert (this.rowdef.objectOrder instanceof kelondroBase64Order); assert (this.rowdef.objectOrder instanceof kelondroBase64Order);
@ -591,7 +589,14 @@ public class kelondroRowCollection {
return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(chunkcache, i * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength); return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(chunkcache, i * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength);
} }
private final int comparePivot(byte[] compiledPivot, int j) { protected final byte[] compilePivot(byte[] a, int astart, int alength) {
assert (this.rowdef.objectOrder != null);
assert (this.rowdef.objectOrder instanceof kelondroBase64Order);
assert (this.rowdef.primaryKeyIndex == 0) : "this.sortColumn = " + this.rowdef.primaryKeyIndex;
return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(a, astart, alength);
}
protected final int comparePivot(byte[] compiledPivot, int j) {
assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length; assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length;
assert (j >= 0) && (j < chunkcount) : "j = " + j + ", chunkcount = " + chunkcount; assert (j >= 0) && (j < chunkcount) : "j = " + j + ", chunkcount = " + chunkcount;
assert (this.rowdef.objectOrder != null); assert (this.rowdef.objectOrder != null);
@ -693,7 +698,26 @@ public class kelondroRowCollection {
boolean eis = e.isSorted(); boolean eis = e.isSorted();
long t12 = System.currentTimeMillis(); long t12 = System.currentTimeMillis();
System.out.println("e isSorted = " + ((eis) ? "true" : "false") + ": " + (t12 - t11) + " milliseconds"); System.out.println("e isSorted = " + ((eis) ? "true" : "false") + ": " + (t12 - t11) + " milliseconds");
System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size()); random = new Random(0);
boolean allfound = true;
for (int i = 0; i < testsize; i++) {
if (e.get(randomHash().getBytes()) == null) {
allfound = false;
break;
}
}
long t13 = System.currentTimeMillis();
System.out.println("e allfound = " + ((allfound) ? "true" : "false") + ": " + (t13 - t12) + " milliseconds");
boolean noghosts = true;
for (int i = 0; i < testsize; i++) {
if (e.get(randomHash().getBytes()) != null) {
noghosts = false;
break;
}
}
long t14 = System.currentTimeMillis();
System.out.println("e noghosts = " + ((noghosts) ? "true" : "false") + ": " + (t14 - t13) + " milliseconds");
System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size());
System.out.println(); System.out.println();
} }
@ -703,12 +727,46 @@ public class kelondroRowCollection {
test(100000); test(100000);
//test(1000000); //test(1000000);
// 368, 12029
/* /*
System.out.println(new java.util.Date(10957 * day)); System.out.println(new java.util.Date(10957 * day));
System.out.println(new java.util.Date(0)); System.out.println(new java.util.Date(0));
System.out.println(daysSince2000(System.currentTimeMillis())); System.out.println(daysSince2000(System.currentTimeMillis()));
*/ */
} }
/*
kelondroRowCollection test with size = 10000
create c : 134 milliseconds, 74 entries/millisecond
copy c -> d: 47 milliseconds, 212 entries/millisecond
sort c (1) : 66 milliseconds, 151 entries/millisecond
sort d (2) : 23 milliseconds, 434 entries/millisecond
uniq c : 3 milliseconds, 3333 entries/millisecond
uniq d : 2 milliseconds, 5000 entries/millisecond
create e : 528 milliseconds, 18 entries/millisecond
sort e (2) : 13 milliseconds, 769 entries/millisecond
uniq e : 2 milliseconds, 5000 entries/millisecond
c isSorted = true: 2 milliseconds
d isSorted = true: 3 milliseconds
e isSorted = true: 2 milliseconds
e allfound = true: 85 milliseconds
e noghosts = true: 75 milliseconds
Result size: c = 10000, d = 10000, e = 10000
kelondroRowCollection test with size = 100000
create c : 589 milliseconds, 169 entries/millisecond
copy c -> d: 141 milliseconds, 709 entries/millisecond
sort c (1) : 268 milliseconds, 373 entries/millisecond
sort d (2) : 187 milliseconds, 534 entries/millisecond
uniq c : 13 milliseconds, 7692 entries/millisecond
uniq d : 14 milliseconds, 7142 entries/millisecond
create e : 22068 milliseconds, 4 entries/millisecond
sort e (2) : 167 milliseconds, 598 entries/millisecond
uniq e : 14 milliseconds, 7142 entries/millisecond
c isSorted = true: 13 milliseconds
d isSorted = true: 14 milliseconds
e isSorted = true: 13 milliseconds
e allfound = true: 815 milliseconds
e noghosts = true: 787 milliseconds
Result size: c = 100000, d = 100000, e = 100000
*/
} }

@ -141,29 +141,37 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
if (rowdef.objectOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount); if (rowdef.objectOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount);
// check if a re-sorting make sense // check if a re-sorting makes sense
if ((this.chunkcount - this.sortBound) > collectionReSortLimit) { if ((this.chunkcount - this.sortBound) > collectionReSortLimit) {
sort(); sort();
} }
if ((this.rowdef.objectOrder != null) && (this.rowdef.objectOrder instanceof kelondroBase64Order) && (this.sortBound > 4000)) {
// first try to find in sorted area
final byte[] compiledPivot = compilePivot(a, astart, alength);
int p = binarySearchCompiledPivot(compiledPivot);
if (p >= 0) return p;
// then find in unsorted area
return iterativeSearchCompiledPivot(compiledPivot, this.sortBound, this.chunkcount);
} else {
// first try to find in sorted area
int p = binarySearch(a, astart, alength);
if (p >= 0) return p;
// first try to find in sorted area // then find in unsorted area
int p = binarySearch(a, astart, alength); return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount);
if (p >= 0) return p; }
// then find in unsorted area
return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount);
} }
private int iterativeSearch(byte[] key, int astart, int alength, int leftBorder, int rightBound) { private int iterativeSearch(byte[] key, int astart, int alength, int leftBorder, int rightBound) {
// returns the chunknumber // returns the chunknumber
if (rowdef.objectOrder == null) { if (rowdef.objectOrder == null) {
for (int i = leftBorder; i < rightBound; i++) { for (int i = leftBorder; i < rightBound; i++) {
if (match(key, astart, alength, i)) return i; if (match(key, astart, alength, i)) return i;
} }
return -1; return -1;
} else { } else {
// we dont do a special handling of kelondroBase64Order here, because tests showed that this produces too much overhead
for (int i = leftBorder; i < rightBound; i++) { for (int i = leftBorder; i < rightBound; i++) {
if (compare(key, astart, alength, i) == 0) return i; if (compare(key, astart, alength, i) == 0) return i;
} }
@ -171,6 +179,16 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
} }
} }
private int iterativeSearchCompiledPivot(byte[] compiledPivot, int leftBorder, int rightBound) {
// returns the chunknumber
assert (rowdef.objectOrder != null);
assert (rowdef.objectOrder instanceof kelondroBase64Order);
for (int i = leftBorder; i < rightBound; i++) {
if (comparePivot(compiledPivot, i) == 0) return i;
}
return -1;
}
private int binarySearch(byte[] key, int astart, int alength) { private int binarySearch(byte[] key, int astart, int alength) {
// returns the exact position of the key if the key exists, // returns the exact position of the key if the key exists,
// or -1 if the key does not exist // or -1 if the key does not exist
@ -183,8 +201,25 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
p = l + ((rbound - l) >> 1); p = l + ((rbound - l) >> 1);
d = compare(key, astart, alength, p); d = compare(key, astart, alength, p);
if (d == 0) return p; if (d == 0) return p;
else if (d < 0) rbound = p; if (d < 0) rbound = p; else l = p + 1;
else l = p + 1; }
return -1;
}
private int binarySearchCompiledPivot(byte[] compiledPivot) {
// returns the exact position of the key if the key exists,
// or -1 if the key does not exist
assert (rowdef.objectOrder != null);
assert (rowdef.objectOrder instanceof kelondroBase64Order);
int l = 0;
int rbound = this.sortBound;
int p = 0;
int d;
while (l < rbound) {
p = l + ((rbound - l) >> 1);
d = comparePivot(compiledPivot, p);
if (d == 0) return p;
if (d < 0) rbound = p; else l = p + 1;
} }
return -1; return -1;
} }
@ -202,8 +237,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
p = l + ((rbound - l) >> 1); p = l + ((rbound - l) >> 1);
d = compare(key, astart, alength, p); d = compare(key, astart, alength, p);
if (d == 0) return p; if (d == 0) return p;
else if (d < 0) rbound = p; if (d < 0) rbound = p; else l = p + 1;
else l = p + 1;
} }
return l; return l;
} }

@ -155,14 +155,15 @@ public final class plasmaWordIndex implements indexRI {
public void dhtFlushControl(indexRAMRI theCache) { public void dhtFlushControl(indexRAMRI theCache) {
// check for forced flush // check for forced flush
int count = -1;
synchronized (theCache) { synchronized (theCache) {
if ((theCache.maxURLinCache() > wCacheMaxChunk ) || if ((theCache.maxURLinCache() > wCacheMaxChunk ) ||
(theCache.size() > theCache.getMaxWordCount()) || (theCache.size() > theCache.getMaxWordCount()) ||
(serverMemory.available() < collections.minMem())) { (serverMemory.available() < collections.minMem())) {
int count = theCache.size() + flushsize - theCache.getMaxWordCount(); count = theCache.size() + flushsize - theCache.getMaxWordCount();
flushCache(theCache, (count > 0) ? count : 1);
} }
} }
if (count >= 0) flushCache(theCache, (count > 0) ? count : 1);
} }
public long getUpdateTime(String wordHash) { public long getUpdateTime(String wordHash) {
@ -216,11 +217,11 @@ public final class plasmaWordIndex implements indexRI {
busyCacheFlush = true; busyCacheFlush = true;
String wordHash; String wordHash;
ArrayList containerList = new ArrayList(); ArrayList containerList = new ArrayList();
synchronized (ram) { count = Math.min(5000, Math.min(count, ram.size()));
count = Math.min(5000, Math.min(count, ram.size())); boolean collectMax = true;
boolean collectMax = true; indexContainer c;
indexContainer c; while (collectMax) {
while (collectMax) { synchronized (ram) {
wordHash = ram.maxScoreWordHash(); wordHash = ram.maxScoreWordHash();
c = ram.getContainer(wordHash, null); c = ram.getContainer(wordHash, null);
if ((c != null) && (c.size() > wCacheMaxChunk)) { if ((c != null) && (c.size() > wCacheMaxChunk)) {
@ -230,17 +231,20 @@ public final class plasmaWordIndex implements indexRI {
collectMax = false; collectMax = false;
} }
} }
count = count - containerList.size(); }
for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ? count = count - containerList.size();
for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
synchronized (ram) {
if (ram.size() == 0) break; if (ram.size() == 0) break;
if (serverMemory.available() < collections.minMem()) break; // protect memory during flush if (serverMemory.available() < collections.minMem()) break; // protect memory during flush
// select one word to flush // select one word to flush
wordHash = ram.bestFlushWordHash(); wordHash = ram.bestFlushWordHash();
// move one container from ram to flush list // move one container from ram to flush list
c = ram.deleteContainer(wordHash); c = ram.deleteContainer(wordHash);
if (c != null) containerList.add(c);
} }
if (c != null) containerList.add(c);
} }
// flush the containers // flush the containers
collections.addMultipleEntries(containerList); collections.addMultipleEntries(containerList);
@ -540,7 +544,7 @@ public final class plasmaWordIndex implements indexRI {
public synchronized kelondroCloneableIterator wordContainers(String startWordHash, boolean ram) { public synchronized kelondroCloneableIterator wordContainers(String startWordHash, boolean ram) {
kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone()); kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone());
containerOrder.rotate(startWordHash.getBytes()); containerOrder.rotate(startWordHash.getBytes());
if (ram) { if (ram) {
return dhtOutCache.wordContainers(startWordHash, false); return dhtOutCache.wordContainers(startWordHash, false);
} else { } else {

Loading…
Cancel
Save