From 4149939c02ef9fc2e1b59ddb1a66f8b5843fd946 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 19 Jul 2006 23:18:06 +0000 Subject: [PATCH] better handling of whitespace for gettext quotation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2310 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../htmlFilterContentTransformer.java | 12 +++++---- source/de/anomic/server/serverByteBuffer.java | 25 +++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java b/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java index d2429a2cc..a33eb7166 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java @@ -139,24 +139,26 @@ public class htmlFilterContentTransformer extends htmlFilterAbstractTransformer public byte[] transformText(byte[] text) { if (gettext) { serverByteBuffer sbb = new serverByteBuffer(text); - sbb.trim(); //if (sbb.length() > 0) System.out.println(" TEXT: " + sbb.toString()); serverByteBuffer[] sbbs = httpTemplate.splitQuotations(sbb); sbb = new serverByteBuffer(); for (int i = 0; i < sbbs.length; i++) { - sbbs[i].trim(); - if (sbbs[i].length() == 0) { - sbb.append(' '); + if (sbbs[i].isWhitespace()) { + sbb.append(sbbs[i]); } else if ((sbbs[i].byteAt(0) == httpTemplate.hash) || (sbbs[i].startsWith(httpTemplate.dpdpa))) { // this is a template or a part of a template sbb.append(sbbs[i]); } else { // this is a text fragment, generate gettext quotation + int ws = sbbs[i].whitespaceStart(); + int we = sbbs[i].whitespaceEnd(); + sbb.append(sbbs[i].getBytes(0, ws)); sbb.append('_'); sbb.append('('); - sbb.append(sbbs[i]); + sbb.append(sbbs[i].getBytes(ws, we)); sbb.append(')'); + sbb.append(sbbs[i].getBytes(we)); } } //if (sbb.length() > 0) System.out.println("GETTEXT: " + sbb.toString()); diff --git a/source/de/anomic/server/serverByteBuffer.java b/source/de/anomic/server/serverByteBuffer.java index 829bfbdac..adc879364 100644 --- a/source/de/anomic/server/serverByteBuffer.java +++ b/source/de/anomic/server/serverByteBuffer.java @@ -290,6 +290,31 @@ public final class serverByteBuffer extends OutputStream { return trim(l, r); } + public boolean isWhitespace() { + // returns true, if trim() would result in an empty serverByteBuffer + for (int i = 0; i < length; i++) { + if (buffer[offset + i] > 32) return false; + } + return true; + } + + public int whitespaceStart() { + // returns number of whitespace bytes at the beginning of text + for (int i = 0; i < length; i++) { + if (buffer[offset + i] > 32) return i; + } + return length; + } + + public int whitespaceEnd() { + // returns position of whitespace at the end of text + for (int i = length - 1; i >= 0; i--) { + if (buffer[offset + i] > 32) return i + 1; + } + return 0; + } + + public String toString() { return new String(buffer, offset, length); }