diff --git a/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java b/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java index a33eb7166..26e315935 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java @@ -143,7 +143,7 @@ public class htmlFilterContentTransformer extends htmlFilterAbstractTransformer serverByteBuffer[] sbbs = httpTemplate.splitQuotations(sbb); sbb = new serverByteBuffer(); for (int i = 0; i < sbbs.length; i++) { - if (sbbs[i].isWhitespace()) { + if (sbbs[i].isWhitespace(true)) { sbb.append(sbbs[i]); } else if ((sbbs[i].byteAt(0) == httpTemplate.hash) || (sbbs[i].startsWith(httpTemplate.dpdpa))) { @@ -151,8 +151,8 @@ public class htmlFilterContentTransformer extends htmlFilterAbstractTransformer sbb.append(sbbs[i]); } else { // this is a text fragment, generate gettext quotation - int ws = sbbs[i].whitespaceStart(); - int we = sbbs[i].whitespaceEnd(); + int ws = sbbs[i].whitespaceStart(true); + int we = sbbs[i].whitespaceEnd(true); sbb.append(sbbs[i].getBytes(0, ws)); sbb.append('_'); sbb.append('('); diff --git a/source/de/anomic/server/serverByteBuffer.java b/source/de/anomic/server/serverByteBuffer.java index adc879364..e481eb323 100644 --- a/source/de/anomic/server/serverByteBuffer.java +++ b/source/de/anomic/server/serverByteBuffer.java @@ -290,26 +290,44 @@ public final class serverByteBuffer extends OutputStream { return trim(l, r); } - public boolean isWhitespace() { + public boolean isWhitespace(boolean includeNonLetterBytes) { // returns true, if trim() would result in an empty serverByteBuffer - for (int i = 0; i < length; i++) { - if (buffer[offset + i] > 32) return false; + if (includeNonLetterBytes) { + byte b; + for (int i = 0; i < length; i++) { + b = buffer[offset + i]; + if (((b >= '0') && (b <= '9')) || ((b >= 'A') && (b <= 'Z')) || ((b >= 'a') && (b <= 'z'))) return false; + } + } else { + for (int i = 0; i < length; i++) if (buffer[offset + i] > 32) return false; } return true; } - public int whitespaceStart() { + public int whitespaceStart(boolean includeNonLetterBytes) { // returns number of whitespace bytes at the beginning of text - for (int i = 0; i < length; i++) { - if (buffer[offset + i] > 32) return i; + if (includeNonLetterBytes) { + byte b; + for (int i = 0; i < length; i++) { + b = buffer[offset + i]; + if (((b >= '0') && (b <= '9')) || ((b >= 'A') && (b <= 'Z')) || ((b >= 'a') && (b <= 'z'))) return i; + } + } else { + for (int i = 0; i < length; i++) if (buffer[offset + i] > 32) return i; } return length; } - public int whitespaceEnd() { + public int whitespaceEnd(boolean includeNonLetterBytes) { // returns position of whitespace at the end of text - for (int i = length - 1; i >= 0; i--) { - if (buffer[offset + i] > 32) return i + 1; + if (includeNonLetterBytes) { + byte b; + for (int i = length - 1; i >= 0; i--) { + b = buffer[offset + i]; + if (((b >= '0') && (b <= '9')) || ((b >= 'A') && (b <= 'Z')) || ((b >= 'a') && (b <= 'z'))) return i + 1; + } + } else { + for (int i = length - 1; i >= 0; i--) if (buffer[offset + i] > 32) return i + 1; } return 0; }