From c40fca08a2432f9114f80b5c0935244543124eaf Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 2 Oct 2006 10:21:14 +0000 Subject: [PATCH] fixed bad handling of string separation you can now use a new encoding attribute to create strings from byte arrays git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2693 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/tools/nxTools.java | 33 ++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/source/de/anomic/tools/nxTools.java b/source/de/anomic/tools/nxTools.java index 2eaf2664b..086080927 100644 --- a/source/de/anomic/tools/nxTools.java +++ b/source/de/anomic/tools/nxTools.java @@ -43,6 +43,7 @@ package de.anomic.tools; +import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashMap; @@ -149,13 +150,39 @@ public class nxTools { } public static ArrayList strings(byte[] a) { + return strings(a, null); + } + + public static ArrayList strings(byte[] a, String encoding) { int s = 0; int e; ArrayList v = new ArrayList(); + byte b; while (s < a.length) { - e = s; while (e < a.length) if (a[e++] < 32) {e--; break;} - v.add(new String(a, s, e - s)); - s = e; while (s < a.length) if (a[s++] >= 32) {s--; break;} + // find eol + e = s; + while (e < a.length) { + b = a[e]; + if ((b == 10) || (b == 13)) break; + e++; + } + + // read line + if (encoding == null) { + v.add(new String(a, s, e - s)); + } else try { + v.add(new String(a, s, e - s, encoding)); + } catch (UnsupportedEncodingException xcptn) { + return v; + } + + // eat up additional eol bytes + s = e + 1; + while (s < a.length) { + b = a[s]; + if ((b != 10) && (b != 13)) break; + s++; + } } return v; }