From 5af9598bd12b41df5a1f95aa8432acd935a1898b Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 10 Oct 2011 09:46:38 +0000 Subject: [PATCH] enhanced exported row parsing during row import this affects the search and dht receive speed git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7994 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/net/yacy/kelondro/index/Row.java | 30 ++++++++++--------- .../net/yacy/kelondro/order/Base64Order.java | 18 +++++------ source/net/yacy/search/Switchboard.java | 2 +- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/source/net/yacy/kelondro/index/Row.java b/source/net/yacy/kelondro/index/Row.java index 0b7ab9413..b3032857f 100644 --- a/source/net/yacy/kelondro/index/Row.java +++ b/source/net/yacy/kelondro/index/Row.java @@ -34,7 +34,6 @@ import java.util.Map; import java.util.StringTokenizer; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ConcurrentHashMap; -import java.util.regex.Pattern; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.UTF8; @@ -51,7 +50,7 @@ import net.yacy.kelondro.util.kelondroException; public final class Row { - private final static Pattern commaPattern = Pattern.compile(","); + //private final static Pattern commaPattern = Pattern.compile(","); protected final Column[] row; public final int[] colstart; @@ -291,43 +290,46 @@ public final class Row { public Entry(String external, final boolean decimalCardinal) { // parse external form if (external.length() > 0 && external.charAt(0) == '{') external = external.substring(1, external.length() - 1); - final String[] elts = commaPattern.split(external); + //final String[] elts = commaPattern.split(external); + final StringTokenizer st = new StringTokenizer(external, ","); if (Row.this.nickref == null) genNickRef(); String nick; int p; this.rowinstance = new byte[Row.this.objectsize]; this.offset = 0; - for (int i = 0; i < elts.length; i++) { - p = elts[i].indexOf('='); - if (p < 0) p = elts[i].indexOf(':'); + String token; + while (st.hasMoreTokens()) { + token = st.nextToken(); + p = token.indexOf('='); + if (p < 0) p = token.indexOf(':'); if (p > 0) { - nick = elts[i].substring(0, p).trim(); + nick = token.substring(0, p).trim(); if (nick.charAt(0) == '"' && nick.charAt(nick.length() - 1) == '"') nick = nick.substring(1, nick.length() - 1); final Object[] ref = Row.this.nickref.get(nick); final Column col = (Column) ref[0]; final int clstrt = ((Integer) ref[1]).intValue(); - if (p + 1 == elts[i].length()) { + if (p + 1 == token.length()) { setCol(clstrt, col.cellwidth, null); } else { if ((decimalCardinal) && (col.celltype == Column.celltype_cardinal)) { try { - setCol(col.encoder, this.offset + clstrt, col.cellwidth, Long.parseLong(elts[i].substring(p + 1).trim())); + setCol(col.encoder, this.offset + clstrt, col.cellwidth, Long.parseLong(token.substring(p + 1).trim())); } catch (final NumberFormatException e) { - Log.logSevere("kelondroRow", "NumberFormatException for celltype_cardinal; row = " + i + ", celltype = " + col.celltype + ", encoder = " + col.encoder + ", value = '" + elts[i].substring(p + 1).trim() + "'"); + Log.logSevere("kelondroRow", "NumberFormatException for celltype_cardinal, celltype = " + col.celltype + ", encoder = " + col.encoder + ", value = '" + token.substring(p + 1).trim() + "'"); setCol(col.encoder, this.offset + clstrt, col.cellwidth, 0); } } else if ((decimalCardinal) && (col.celltype == Column.celltype_binary)) { assert col.cellwidth == 1; try { - setCol(clstrt, col.cellwidth, new byte[]{(byte) Integer.parseInt(elts[i].substring(p + 1).trim())}); + setCol(clstrt, col.cellwidth, new byte[]{(byte) Integer.parseInt(token.substring(p + 1).trim())}); } catch (final NumberFormatException e) { - Log.logSevere("kelondroRow", "NumberFormatException for celltype_binary; row = " + i + ", celltype = " + col.celltype + ", encoder = " + col.encoder + ", value = '" + elts[i].substring(p + 1).trim() + "'"); + Log.logSevere("kelondroRow", "NumberFormatException for celltype_binary, celltype = " + col.celltype + ", encoder = " + col.encoder + ", value = '" + token.substring(p + 1).trim() + "'"); setCol(clstrt, col.cellwidth, new byte[]{0}); } } else if ((decimalCardinal) && (col.celltype == Column.celltype_bitfield)) { - setCol(clstrt, col.cellwidth, (new Bitfield(col.cellwidth, elts[i].substring(p + 1).trim())).bytes()); + setCol(clstrt, col.cellwidth, (new Bitfield(col.cellwidth, token.substring(p + 1).trim())).bytes()); } else { - setCol(clstrt, col.cellwidth, UTF8.getBytes(elts[i].substring(p + 1).trim())); + setCol(clstrt, col.cellwidth, UTF8.getBytes(token.substring(p + 1).trim())); } } } diff --git a/source/net/yacy/kelondro/order/Base64Order.java b/source/net/yacy/kelondro/order/Base64Order.java index b67f5b5d0..e9e86f7dd 100644 --- a/source/net/yacy/kelondro/order/Base64Order.java +++ b/source/net/yacy/kelondro/order/Base64Order.java @@ -520,20 +520,20 @@ public class Base64Order extends AbstractOrder implements ByteOrder, Com } private final int compares(final byte[] a, final byte[] b, final int length) { - assert (length <= a.length) : "a.length = " + a.length + ", alength = " + length; - assert (length <= b.length) : "b.length = " + b.length + ", blength = " + length; - assert (this.ahpla.length == 128); + //assert (length <= a.length) : "a.length = " + a.length + ", alength = " + length; + //assert (length <= b.length) : "b.length = " + b.length + ", blength = " + length; + //assert (this.ahpla.length == 128); short i = 0; byte ac, bc; while (i < length) { - assert (i < a.length) : "i = " + i + ", a.length = " + a.length + ", a = " + NaturalOrder.arrayList(a, 0, length); - assert (i < b.length) : "i = " + i + ", b.length = " + b.length + ", b = " + NaturalOrder.arrayList(b, 0, length); + //assert (i < a.length) : "i = " + i + ", a.length = " + a.length + ", a = " + NaturalOrder.arrayList(a, 0, length); + //assert (i < b.length) : "i = " + i + ", b.length = " + b.length + ", b = " + NaturalOrder.arrayList(b, 0, length); ac = a[i]; - assert (ac >= 0) && (ac < 128) : "ac = " + ac + ", a = " + NaturalOrder.arrayList(a, 0, length); + //assert (ac >= 0) && (ac < 128) : "ac = " + ac + ", a = " + NaturalOrder.arrayList(a, 0, length); bc = b[i]; - assert (bc >= 0) && (bc < 128) : "bc = " + bc + ", b = " + NaturalOrder.arrayList(b, 0, length); - assert ac != 0; - assert bc != 0; + //assert (bc >= 0) && (bc < 128) : "bc = " + bc + ", b = " + NaturalOrder.arrayList(b, 0, length); + //assert ac != 0; + //assert bc != 0; if (ac != bc) return this.ab[(ac << 7) | bc]; i++; } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index f3fe45ca3..604c5fbc4 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -189,7 +189,7 @@ public final class Switchboard extends serverSwitch { public static int xstackCrawlSlots = 2000; public static long lastPPMUpdate = System.currentTimeMillis()- 30000; private static final int dhtMaxContainerCount = 500; - private int dhtMaxReferenceCount = 1000; + private int dhtMaxReferenceCount = 1000; // colored list management public static SortedSet badwords = new TreeSet(NaturalOrder.naturalComparator);