bugfixes and performance hacks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5824 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 4e97a31009
commit 29e96c1a60

@ -29,6 +29,7 @@ import java.util.Set;
import java.util.TreeSet;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.order.NaturalOrder;
import de.anomic.kelondro.util.SetTools;
import de.anomic.plasma.plasmaProfiling;
import de.anomic.plasma.plasmaSearchEvent;
@ -64,7 +65,7 @@ public class yacysearchtrailer {
final Set<String> references = theSearch.references(20);
if (references.size() > 0) {
// get the topwords
final TreeSet<String> topwords = new TreeSet<String>();
final TreeSet<String> topwords = new TreeSet<String>(NaturalOrder.naturalComparator);
String tmp = "";
final Iterator<String> i = references.iterator();
while (i.hasNext()) {

@ -30,6 +30,8 @@ import java.util.Map;
/**
* This is a simple cache using two generations of hashtables to store the content with a LFU strategy.
* The Algorithm is described in a slightly more complex version as Adaptive Replacement Cache, "ARC".
* For details see http://www.almaden.ibm.com/cs/people/dmodha/ARC.pdf
* or http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache
* This version omits the ghost entry handling which is described in ARC, and keeps both cache levels
* at the same size.
*/

@ -58,6 +58,7 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Cod
private final boolean rfc1113compliant;
private final char[] alpha;
private final byte[] ahpla;
private final short[] ab; // decision table for comparisments
public Base64Order(final boolean up, final boolean rfc1113compliant) {
// if we choose not to be rfc1113compliant,
@ -66,7 +67,20 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Cod
this.asc = up;
alpha = (rfc1113compliant) ? alpha_standard : alpha_enhanced;
ahpla = (rfc1113compliant) ? ahpla_standard : ahpla_enhanced;
ab = new short[1 << 14];
byte acc, bcc;
short c;
for (char ac: alpha) {
for (char bc: alpha) {
acc = ahpla[ac];
bcc = ahpla[bc];
c = 0;
if (acc > bcc) c = 1;
if (acc < bcc) c = -1;
ab[(ac << 7) | bc] = c;
}
}
this.log = new Log("BASE64");
}
@ -387,9 +401,11 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Cod
int i = 0;
final int al = Math.min(alength, a.length - aoffset);
final int bl = Math.min(blength, b.length - boffset);
final int ml = Math.min(al, bl);
byte ac, bc;
byte acc, bcc;
while ((i < al) && (i < bl)) {
//byte acc, bcc;
//int c = 0;
while (i < ml) {
assert (i + aoffset < a.length) : "i = " + i + ", aoffset = " + aoffset + ", a.length = " + a.length + ", a = " + NaturalOrder.arrayList(a, aoffset, al);
assert (i + boffset < b.length) : "i = " + i + ", boffset = " + boffset + ", b.length = " + b.length + ", b = " + NaturalOrder.arrayList(b, boffset, al);
ac = a[aoffset + i];
@ -402,14 +418,16 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Cod
i++;
continue;
}
acc = ahpla[ac];
assert (acc >= 0) : "acc = " + acc + ", a = " + NaturalOrder.arrayList(a, aoffset, al) + "/" + new String(a, aoffset, al) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + NaturalOrder.table(a, 16, aoffset);
bcc = ahpla[bc];
assert (bcc >= 0) : "bcc = " + bcc + ", b = " + NaturalOrder.arrayList(b, boffset, bl) + "/" + new String(b, boffset, bl) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + NaturalOrder.table(b, 16, boffset);
if (acc > bcc) return 1;
if (acc < bcc) return -1;
// else the bytes are equal and it may go on yet undecided
i++;
//acc = ahpla[ac];
//assert (acc >= 0) : "acc = " + acc + ", a = " + NaturalOrder.arrayList(a, aoffset, al) + "/" + new String(a, aoffset, al) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + NaturalOrder.table(a, 16, aoffset);
//bcc = ahpla[bc];
//assert (bcc >= 0) : "bcc = " + bcc + ", b = " + NaturalOrder.arrayList(b, boffset, bl) + "/" + new String(b, boffset, bl) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + NaturalOrder.table(b, 16, boffset);
//if (acc > bcc) c = 1;
//if (acc < bcc) c = -1;
//assert c != 0;
//assert ab[(ac << 7) | bc] == c;
//return c;
return ab[(ac << 7) | bc];
}
// compare length
if (al > bl) return 1;

@ -36,11 +36,12 @@ import de.anomic.kelondro.index.SimpleARC;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.Bitfield;
import de.anomic.kelondro.order.Digest;
import de.anomic.kelondro.util.MemoryControl;
import de.anomic.yacy.yacySeedDB;
public class Word {
public static final int hashCacheSize = 20000;
public static final int hashCacheSize = (int) (MemoryControl.available() / 10000L);
private static final SimpleARC<String, byte[]> hashCache = new SimpleARC<String, byte[]>(hashCacheSize);
// object carries statistics for words and sentences

@ -125,6 +125,9 @@ public final class plasmaWordIndex {
final int redundancy,
final int partitionExponent,
final boolean useCell) throws IOException {
log.logInfo("Initializing Word Index for the network '" + networkName + "', word hash cache size is " + Word.hashCacheSize + ".");
if (networkName == null || networkName.length() == 0) {
log.logSevere("no network name given - shutting down");
System.exit(0);

Loading…
Cancel
Save