diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java index 87fa6e5a3..6868bde6e 100644 --- a/source/de/anomic/http/httpdFileHandler.java +++ b/source/de/anomic/http/httpdFileHandler.java @@ -646,7 +646,7 @@ public final class httpdFileHandler { StringBuilder stringBuffer = new StringBuilder(1024); while (is.available() > 0) { - stringBuffer.append((char) is.read()); + stringBuffer.append((char) is.read()); } String cgiReturn = stringBuffer.toString(); diff --git a/source/de/anomic/kelondro/index/RowCollection.java b/source/de/anomic/kelondro/index/RowCollection.java index 61214c9f0..6a0abcc43 100644 --- a/source/de/anomic/kelondro/index/RowCollection.java +++ b/source/de/anomic/kelondro/index/RowCollection.java @@ -285,7 +285,7 @@ public class RowCollection implements Iterable { } public synchronized void add(final byte[] a) { - assert a.length == this.rowdef.objectsize; + assert a.length == this.rowdef.objectsize : "a.length = " + a.length + ", objectsize = " + this.rowdef.objectsize; addUnique(a, 0, a.length); } @@ -623,27 +623,22 @@ public class RowCollection implements Iterable { int p = L; int q = R - 1; int pivot = pivot(L, R, S, swapspace); - int oldpivot = -1; - byte[] compiledPivot = null; if (this.rowdef.objectOrder instanceof Base64Order) { while (p <= q) { // wenn pivot < S: pivot befindet sich in sortierter Sequenz von L bis S - 1 // d.h. alle Werte von L bis pivot sind kleiner als das pivot - // zu finden ist ein minimales p <= q so dass chunk[p] >= pivot - if (compiledPivot == null) compiledPivot = compilePivot(pivot); + // zu finden ist ein minimales p <= q so dass chunk[p] >= pivot if ((pivot < S) && (p < pivot)) { //System.out.println("+++ saved " + (pivot - p) + " comparisments"); p = pivot; S = 0; } else { - while ((p < R - 1) && (comparePivot(compiledPivot, p) >= 0)) p++; // chunkAt[p] < pivot + while ((p < R - 1) && (compare(pivot, p) >= 0)) p++; // chunkAt[p] < pivot } // nun gilt chunkAt[p] >= pivot - while ((q > L) && (comparePivot(compiledPivot, q) <= 0)) q--; // chunkAt[q] > pivot + while ((q > L) && (compare(pivot, q) <= 0)) q--; // chunkAt[q] > pivot if (p <= q) { - oldpivot = pivot; pivot = swap(p, q, pivot, swapspace); - if (pivot != oldpivot && compiledPivot != null) compiledPivot = null; // must be computed again p++; q--; } @@ -867,34 +862,6 @@ public class RowCollection implements Iterable { this.rowdef.primaryKeyLength); return c; } - - protected final byte[] compilePivot(final int i) { - assert (i >= 0) && (i < chunkcount) : "i = " + i + ", chunkcount = " + chunkcount; - assert (this.rowdef.objectOrder != null); - assert (this.rowdef.objectOrder instanceof Base64Order); - //assert (!bugappearance(chunkcache, i * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength)); - return ((Base64Order) this.rowdef.objectOrder).compilePivot(chunkcache, i * this.rowdef.objectsize, this.rowdef.primaryKeyLength); - } - - protected final byte[] compilePivot(final byte[] a, final int astart, final int alength) { - assert (this.rowdef.objectOrder != null); - assert (this.rowdef.objectOrder instanceof Base64Order); - return ((Base64Order) this.rowdef.objectOrder).compilePivot(a, astart, alength); - } - - protected final int comparePivot(final byte[] compiledPivot, final int j) { - assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length; - assert (j >= 0) && (j < chunkcount) : "j = " + j + ", chunkcount = " + chunkcount; - assert (this.rowdef.objectOrder != null); - assert (this.rowdef.objectOrder instanceof Base64Order); - //assert (!bugappearance(chunkcache, j * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength)); - final int c = ((Base64Order) this.rowdef.objectOrder).comparePivot( - compiledPivot, - chunkcache, - j * this.rowdef.objectsize, - this.rowdef.primaryKeyLength); - return c; - } protected synchronized int compare(final byte[] a, final int astart, final int alength, final int chunknumber) { assert (chunknumber < chunkcount); diff --git a/source/de/anomic/kelondro/index/RowSet.java b/source/de/anomic/kelondro/index/RowSet.java index f505c1b96..9ecf5637a 100644 --- a/source/de/anomic/kelondro/index/RowSet.java +++ b/source/de/anomic/kelondro/index/RowSet.java @@ -198,8 +198,7 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable 4000)) { // first try to find in sorted area assert this.rowdef.objectOrder.wellformed(a, astart, alength) : "not wellformed: " + new String(a, astart, alength); - final byte[] compiledPivot = compilePivot(a, astart, alength); - final int p = binarySearchCompiledPivot(compiledPivot); + final int p = binarySearch(a, astart, alength); if (p >= 0) return p; // then find in unsorted area @@ -238,24 +237,6 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable> 1); - d = comparePivot(compiledPivot, p); - if (d == 0) return p; - if (d < 0) rbound = p; else l = p + 1; - } - return -1; - } private int binaryPosition(final byte[] key, final int astart, final int alength) { // returns the exact position of the key if the key exists, @@ -489,7 +470,17 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable implements ByteOrder, Cod // b64-Strings // we will do that by grouping each three input bytes to four output bytes. public final String encode(final byte[] in) { - if (in.length == 0) return ""; + if (in == null || in.length == 0) return ""; int lene = in.length / 3 * 4 + 3; StringBuilder out = new StringBuilder(lene); int pos = 0; @@ -509,7 +509,7 @@ public class Base64Order extends AbstractOrder implements ByteOrder, Cod // they are equal return 0; } - + /* public final int comparePivot(final byte[] compiledPivot, final byte[] b, final int boffset, final int blength) { assert zero == null; assert asc; @@ -556,7 +556,7 @@ public class Base64Order extends AbstractOrder implements ByteOrder, Cod } return cp; } - +*/ public static void main(final String[] s) { // java -classpath classes de.anomic.kelondro.kelondroBase64Order final Base64Order b64 = new Base64Order(true, true); diff --git a/source/de/anomic/tools/mediawikiIndex.java b/source/de/anomic/tools/mediawikiIndex.java index 9438e50ed..a7446f227 100644 --- a/source/de/anomic/tools/mediawikiIndex.java +++ b/source/de/anomic/tools/mediawikiIndex.java @@ -292,17 +292,17 @@ public class mediawikiIndex { this.end = end; } } - public wikiparserrecord newRecord(String title, StringBuffer sb) { + public wikiparserrecord newRecord(String title, StringBuilder sb) { return new wikiparserrecord(title, sb); } public class wikiparserrecord { public String title; - StringBuffer source; + StringBuilder source; String html; yacyURL url; plasmaParserDocument document; - public wikiparserrecord(String title, StringBuffer sb) { + public wikiparserrecord(String title, StringBuilder sb) { this.title = title; this.source = sb; } @@ -426,7 +426,7 @@ public class mediawikiIndex { } // example: - // java -Xmx1000m -cp classes:lib/bzip2.jar de.anomic.tools.mediawikiIndex -convert DATA\HTCACHE\dewiki-20090311-pages-articles.xml.bz2 DATA\SURROGATES\in\ http://de.wikipedia.org/wiki/ + // java -Xmx2000m -cp classes:lib/bzip2.jar de.anomic.tools.mediawikiIndex -convert DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2 DATA/SURROGATES/in/ http://de.wikipedia.org/wiki/ if (s[0].equals("-convert") && s.length > 2 && s[1].endsWith(".xml.bz2") && s[3].startsWith("http://")) { File sourcefile = new File(s[1]); @@ -444,9 +444,9 @@ public class mediawikiIndex { if (b != 'Z') throw new IOException("Invalid bz2 content."); is = new CBZip2InputStream(is); } - BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is)); + BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is, "UTF-8")); String t; - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); boolean page = false, text = false; String title = null; plasmaParser.initHTMLParsableMimeTypes("text/html"); @@ -456,7 +456,7 @@ public class mediawikiIndex { int fc = 0; int rc = 0; String outputfilename = targetstub + "." + fc + ".xml.tmp"; - OutputStreamWriter osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename)))); + OutputStreamWriter osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8"); osw.write("\n\n"); while ((t = r.readLine()) != null) { if (t.indexOf(pagestart) >= 0) { @@ -484,7 +484,7 @@ public class mediawikiIndex { rc = 0; fc++; outputfilename = targetstub + "." + fc + ".xml.tmp"; - osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename)))); + osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8"); osw.write("\n\n"); } } catch (InterruptedException e) {