bugfixes and performance hacks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5824 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 4e97a31009
commit 29e96c1a60

@ -29,6 +29,7 @@ import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import de.anomic.http.httpRequestHeader; import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.order.NaturalOrder;
import de.anomic.kelondro.util.SetTools; import de.anomic.kelondro.util.SetTools;
import de.anomic.plasma.plasmaProfiling; import de.anomic.plasma.plasmaProfiling;
import de.anomic.plasma.plasmaSearchEvent; import de.anomic.plasma.plasmaSearchEvent;
@ -64,7 +65,7 @@ public class yacysearchtrailer {
final Set<String> references = theSearch.references(20); final Set<String> references = theSearch.references(20);
if (references.size() > 0) { if (references.size() > 0) {
// get the topwords // get the topwords
final TreeSet<String> topwords = new TreeSet<String>(); final TreeSet<String> topwords = new TreeSet<String>(NaturalOrder.naturalComparator);
String tmp = ""; String tmp = "";
final Iterator<String> i = references.iterator(); final Iterator<String> i = references.iterator();
while (i.hasNext()) { while (i.hasNext()) {

@ -30,6 +30,8 @@ import java.util.Map;
/** /**
* This is a simple cache using two generations of hashtables to store the content with a LFU strategy. * This is a simple cache using two generations of hashtables to store the content with a LFU strategy.
* The Algorithm is described in a slightly more complex version as Adaptive Replacement Cache, "ARC". * The Algorithm is described in a slightly more complex version as Adaptive Replacement Cache, "ARC".
* For details see http://www.almaden.ibm.com/cs/people/dmodha/ARC.pdf
* or http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache
* This version omits the ghost entry handling which is described in ARC, and keeps both cache levels * This version omits the ghost entry handling which is described in ARC, and keeps both cache levels
* at the same size. * at the same size.
*/ */

@ -58,6 +58,7 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Cod
private final boolean rfc1113compliant; private final boolean rfc1113compliant;
private final char[] alpha; private final char[] alpha;
private final byte[] ahpla; private final byte[] ahpla;
private final short[] ab; // decision table for comparisments
public Base64Order(final boolean up, final boolean rfc1113compliant) { public Base64Order(final boolean up, final boolean rfc1113compliant) {
// if we choose not to be rfc1113compliant, // if we choose not to be rfc1113compliant,
@ -66,7 +67,20 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Cod
this.asc = up; this.asc = up;
alpha = (rfc1113compliant) ? alpha_standard : alpha_enhanced; alpha = (rfc1113compliant) ? alpha_standard : alpha_enhanced;
ahpla = (rfc1113compliant) ? ahpla_standard : ahpla_enhanced; ahpla = (rfc1113compliant) ? ahpla_standard : ahpla_enhanced;
ab = new short[1 << 14];
byte acc, bcc;
short c;
for (char ac: alpha) {
for (char bc: alpha) {
acc = ahpla[ac];
bcc = ahpla[bc];
c = 0;
if (acc > bcc) c = 1;
if (acc < bcc) c = -1;
ab[(ac << 7) | bc] = c;
}
}
this.log = new Log("BASE64"); this.log = new Log("BASE64");
} }
@ -387,9 +401,11 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Cod
int i = 0; int i = 0;
final int al = Math.min(alength, a.length - aoffset); final int al = Math.min(alength, a.length - aoffset);
final int bl = Math.min(blength, b.length - boffset); final int bl = Math.min(blength, b.length - boffset);
final int ml = Math.min(al, bl);
byte ac, bc; byte ac, bc;
byte acc, bcc; //byte acc, bcc;
while ((i < al) && (i < bl)) { //int c = 0;
while (i < ml) {
assert (i + aoffset < a.length) : "i = " + i + ", aoffset = " + aoffset + ", a.length = " + a.length + ", a = " + NaturalOrder.arrayList(a, aoffset, al); assert (i + aoffset < a.length) : "i = " + i + ", aoffset = " + aoffset + ", a.length = " + a.length + ", a = " + NaturalOrder.arrayList(a, aoffset, al);
assert (i + boffset < b.length) : "i = " + i + ", boffset = " + boffset + ", b.length = " + b.length + ", b = " + NaturalOrder.arrayList(b, boffset, al); assert (i + boffset < b.length) : "i = " + i + ", boffset = " + boffset + ", b.length = " + b.length + ", b = " + NaturalOrder.arrayList(b, boffset, al);
ac = a[aoffset + i]; ac = a[aoffset + i];
@ -402,14 +418,16 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Cod
i++; i++;
continue; continue;
} }
acc = ahpla[ac]; //acc = ahpla[ac];
assert (acc >= 0) : "acc = " + acc + ", a = " + NaturalOrder.arrayList(a, aoffset, al) + "/" + new String(a, aoffset, al) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + NaturalOrder.table(a, 16, aoffset); //assert (acc >= 0) : "acc = " + acc + ", a = " + NaturalOrder.arrayList(a, aoffset, al) + "/" + new String(a, aoffset, al) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + NaturalOrder.table(a, 16, aoffset);
bcc = ahpla[bc]; //bcc = ahpla[bc];
assert (bcc >= 0) : "bcc = " + bcc + ", b = " + NaturalOrder.arrayList(b, boffset, bl) + "/" + new String(b, boffset, bl) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + NaturalOrder.table(b, 16, boffset); //assert (bcc >= 0) : "bcc = " + bcc + ", b = " + NaturalOrder.arrayList(b, boffset, bl) + "/" + new String(b, boffset, bl) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + NaturalOrder.table(b, 16, boffset);
if (acc > bcc) return 1; //if (acc > bcc) c = 1;
if (acc < bcc) return -1; //if (acc < bcc) c = -1;
// else the bytes are equal and it may go on yet undecided //assert c != 0;
i++; //assert ab[(ac << 7) | bc] == c;
//return c;
return ab[(ac << 7) | bc];
} }
// compare length // compare length
if (al > bl) return 1; if (al > bl) return 1;

@ -36,11 +36,12 @@ import de.anomic.kelondro.index.SimpleARC;
import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.Bitfield; import de.anomic.kelondro.order.Bitfield;
import de.anomic.kelondro.order.Digest; import de.anomic.kelondro.order.Digest;
import de.anomic.kelondro.util.MemoryControl;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
public class Word { public class Word {
public static final int hashCacheSize = 20000; public static final int hashCacheSize = (int) (MemoryControl.available() / 10000L);
private static final SimpleARC<String, byte[]> hashCache = new SimpleARC<String, byte[]>(hashCacheSize); private static final SimpleARC<String, byte[]> hashCache = new SimpleARC<String, byte[]>(hashCacheSize);
// object carries statistics for words and sentences // object carries statistics for words and sentences

@ -125,6 +125,9 @@ public final class plasmaWordIndex {
final int redundancy, final int redundancy,
final int partitionExponent, final int partitionExponent,
final boolean useCell) throws IOException { final boolean useCell) throws IOException {
log.logInfo("Initializing Word Index for the network '" + networkName + "', word hash cache size is " + Word.hashCacheSize + ".");
if (networkName == null || networkName.length() == 0) { if (networkName == null || networkName.length() == 0) {
log.logSevere("no network name given - shutting down"); log.logSevere("no network name given - shutting down");
System.exit(0); System.exit(0);

Loading…
Cancel
Save