enhanced exported row parsing during row import

this affects the search and dht receive speed

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7994 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 204e98db3a
commit 5af9598bd1

@ -34,7 +34,6 @@ import java.util.Map;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
@ -51,7 +50,7 @@ import net.yacy.kelondro.util.kelondroException;
public final class Row { public final class Row {
private final static Pattern commaPattern = Pattern.compile(","); //private final static Pattern commaPattern = Pattern.compile(",");
protected final Column[] row; protected final Column[] row;
public final int[] colstart; public final int[] colstart;
@ -291,43 +290,46 @@ public final class Row {
public Entry(String external, final boolean decimalCardinal) { public Entry(String external, final boolean decimalCardinal) {
// parse external form // parse external form
if (external.length() > 0 && external.charAt(0) == '{') external = external.substring(1, external.length() - 1); if (external.length() > 0 && external.charAt(0) == '{') external = external.substring(1, external.length() - 1);
final String[] elts = commaPattern.split(external); //final String[] elts = commaPattern.split(external);
final StringTokenizer st = new StringTokenizer(external, ",");
if (Row.this.nickref == null) genNickRef(); if (Row.this.nickref == null) genNickRef();
String nick; String nick;
int p; int p;
this.rowinstance = new byte[Row.this.objectsize]; this.rowinstance = new byte[Row.this.objectsize];
this.offset = 0; this.offset = 0;
for (int i = 0; i < elts.length; i++) { String token;
p = elts[i].indexOf('='); while (st.hasMoreTokens()) {
if (p < 0) p = elts[i].indexOf(':'); token = st.nextToken();
p = token.indexOf('=');
if (p < 0) p = token.indexOf(':');
if (p > 0) { if (p > 0) {
nick = elts[i].substring(0, p).trim(); nick = token.substring(0, p).trim();
if (nick.charAt(0) == '"' && nick.charAt(nick.length() - 1) == '"') nick = nick.substring(1, nick.length() - 1); if (nick.charAt(0) == '"' && nick.charAt(nick.length() - 1) == '"') nick = nick.substring(1, nick.length() - 1);
final Object[] ref = Row.this.nickref.get(nick); final Object[] ref = Row.this.nickref.get(nick);
final Column col = (Column) ref[0]; final Column col = (Column) ref[0];
final int clstrt = ((Integer) ref[1]).intValue(); final int clstrt = ((Integer) ref[1]).intValue();
if (p + 1 == elts[i].length()) { if (p + 1 == token.length()) {
setCol(clstrt, col.cellwidth, null); setCol(clstrt, col.cellwidth, null);
} else { } else {
if ((decimalCardinal) && (col.celltype == Column.celltype_cardinal)) { if ((decimalCardinal) && (col.celltype == Column.celltype_cardinal)) {
try { try {
setCol(col.encoder, this.offset + clstrt, col.cellwidth, Long.parseLong(elts[i].substring(p + 1).trim())); setCol(col.encoder, this.offset + clstrt, col.cellwidth, Long.parseLong(token.substring(p + 1).trim()));
} catch (final NumberFormatException e) { } catch (final NumberFormatException e) {
Log.logSevere("kelondroRow", "NumberFormatException for celltype_cardinal; row = " + i + ", celltype = " + col.celltype + ", encoder = " + col.encoder + ", value = '" + elts[i].substring(p + 1).trim() + "'"); Log.logSevere("kelondroRow", "NumberFormatException for celltype_cardinal, celltype = " + col.celltype + ", encoder = " + col.encoder + ", value = '" + token.substring(p + 1).trim() + "'");
setCol(col.encoder, this.offset + clstrt, col.cellwidth, 0); setCol(col.encoder, this.offset + clstrt, col.cellwidth, 0);
} }
} else if ((decimalCardinal) && (col.celltype == Column.celltype_binary)) { } else if ((decimalCardinal) && (col.celltype == Column.celltype_binary)) {
assert col.cellwidth == 1; assert col.cellwidth == 1;
try { try {
setCol(clstrt, col.cellwidth, new byte[]{(byte) Integer.parseInt(elts[i].substring(p + 1).trim())}); setCol(clstrt, col.cellwidth, new byte[]{(byte) Integer.parseInt(token.substring(p + 1).trim())});
} catch (final NumberFormatException e) { } catch (final NumberFormatException e) {
Log.logSevere("kelondroRow", "NumberFormatException for celltype_binary; row = " + i + ", celltype = " + col.celltype + ", encoder = " + col.encoder + ", value = '" + elts[i].substring(p + 1).trim() + "'"); Log.logSevere("kelondroRow", "NumberFormatException for celltype_binary, celltype = " + col.celltype + ", encoder = " + col.encoder + ", value = '" + token.substring(p + 1).trim() + "'");
setCol(clstrt, col.cellwidth, new byte[]{0}); setCol(clstrt, col.cellwidth, new byte[]{0});
} }
} else if ((decimalCardinal) && (col.celltype == Column.celltype_bitfield)) { } else if ((decimalCardinal) && (col.celltype == Column.celltype_bitfield)) {
setCol(clstrt, col.cellwidth, (new Bitfield(col.cellwidth, elts[i].substring(p + 1).trim())).bytes()); setCol(clstrt, col.cellwidth, (new Bitfield(col.cellwidth, token.substring(p + 1).trim())).bytes());
} else { } else {
setCol(clstrt, col.cellwidth, UTF8.getBytes(elts[i].substring(p + 1).trim())); setCol(clstrt, col.cellwidth, UTF8.getBytes(token.substring(p + 1).trim()));
} }
} }
} }

@ -520,20 +520,20 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Com
} }
private final int compares(final byte[] a, final byte[] b, final int length) { private final int compares(final byte[] a, final byte[] b, final int length) {
assert (length <= a.length) : "a.length = " + a.length + ", alength = " + length; //assert (length <= a.length) : "a.length = " + a.length + ", alength = " + length;
assert (length <= b.length) : "b.length = " + b.length + ", blength = " + length; //assert (length <= b.length) : "b.length = " + b.length + ", blength = " + length;
assert (this.ahpla.length == 128); //assert (this.ahpla.length == 128);
short i = 0; short i = 0;
byte ac, bc; byte ac, bc;
while (i < length) { while (i < length) {
assert (i < a.length) : "i = " + i + ", a.length = " + a.length + ", a = " + NaturalOrder.arrayList(a, 0, length); //assert (i < a.length) : "i = " + i + ", a.length = " + a.length + ", a = " + NaturalOrder.arrayList(a, 0, length);
assert (i < b.length) : "i = " + i + ", b.length = " + b.length + ", b = " + NaturalOrder.arrayList(b, 0, length); //assert (i < b.length) : "i = " + i + ", b.length = " + b.length + ", b = " + NaturalOrder.arrayList(b, 0, length);
ac = a[i]; ac = a[i];
assert (ac >= 0) && (ac < 128) : "ac = " + ac + ", a = " + NaturalOrder.arrayList(a, 0, length); //assert (ac >= 0) && (ac < 128) : "ac = " + ac + ", a = " + NaturalOrder.arrayList(a, 0, length);
bc = b[i]; bc = b[i];
assert (bc >= 0) && (bc < 128) : "bc = " + bc + ", b = " + NaturalOrder.arrayList(b, 0, length); //assert (bc >= 0) && (bc < 128) : "bc = " + bc + ", b = " + NaturalOrder.arrayList(b, 0, length);
assert ac != 0; //assert ac != 0;
assert bc != 0; //assert bc != 0;
if (ac != bc) return this.ab[(ac << 7) | bc]; if (ac != bc) return this.ab[(ac << 7) | bc];
i++; i++;
} }

@ -189,7 +189,7 @@ public final class Switchboard extends serverSwitch {
public static int xstackCrawlSlots = 2000; public static int xstackCrawlSlots = 2000;
public static long lastPPMUpdate = System.currentTimeMillis()- 30000; public static long lastPPMUpdate = System.currentTimeMillis()- 30000;
private static final int dhtMaxContainerCount = 500; private static final int dhtMaxContainerCount = 500;
private int dhtMaxReferenceCount = 1000; private int dhtMaxReferenceCount = 1000;
// colored list management // colored list management
public static SortedSet<String> badwords = new TreeSet<String>(NaturalOrder.naturalComparator); public static SortedSet<String> badwords = new TreeSet<String>(NaturalOrder.naturalComparator);

Loading…
Cancel
Save