better handling of whitespace for gettext quotation

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2310 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 97fa6788a1
commit 4149939c02

@ -139,24 +139,26 @@ public class htmlFilterContentTransformer extends htmlFilterAbstractTransformer
public byte[] transformText(byte[] text) { public byte[] transformText(byte[] text) {
if (gettext) { if (gettext) {
serverByteBuffer sbb = new serverByteBuffer(text); serverByteBuffer sbb = new serverByteBuffer(text);
sbb.trim();
//if (sbb.length() > 0) System.out.println(" TEXT: " + sbb.toString()); //if (sbb.length() > 0) System.out.println(" TEXT: " + sbb.toString());
serverByteBuffer[] sbbs = httpTemplate.splitQuotations(sbb); serverByteBuffer[] sbbs = httpTemplate.splitQuotations(sbb);
sbb = new serverByteBuffer(); sbb = new serverByteBuffer();
for (int i = 0; i < sbbs.length; i++) { for (int i = 0; i < sbbs.length; i++) {
sbbs[i].trim(); if (sbbs[i].isWhitespace()) {
if (sbbs[i].length() == 0) { sbb.append(sbbs[i]);
sbb.append(' ');
} else if ((sbbs[i].byteAt(0) == httpTemplate.hash) || } else if ((sbbs[i].byteAt(0) == httpTemplate.hash) ||
(sbbs[i].startsWith(httpTemplate.dpdpa))) { (sbbs[i].startsWith(httpTemplate.dpdpa))) {
// this is a template or a part of a template // this is a template or a part of a template
sbb.append(sbbs[i]); sbb.append(sbbs[i]);
} else { } else {
// this is a text fragment, generate gettext quotation // this is a text fragment, generate gettext quotation
int ws = sbbs[i].whitespaceStart();
int we = sbbs[i].whitespaceEnd();
sbb.append(sbbs[i].getBytes(0, ws));
sbb.append('_'); sbb.append('_');
sbb.append('('); sbb.append('(');
sbb.append(sbbs[i]); sbb.append(sbbs[i].getBytes(ws, we));
sbb.append(')'); sbb.append(')');
sbb.append(sbbs[i].getBytes(we));
} }
} }
//if (sbb.length() > 0) System.out.println("GETTEXT: " + sbb.toString()); //if (sbb.length() > 0) System.out.println("GETTEXT: " + sbb.toString());

@ -290,6 +290,31 @@ public final class serverByteBuffer extends OutputStream {
return trim(l, r); return trim(l, r);
} }
public boolean isWhitespace() {
// returns true, if trim() would result in an empty serverByteBuffer
for (int i = 0; i < length; i++) {
if (buffer[offset + i] > 32) return false;
}
return true;
}
public int whitespaceStart() {
// returns number of whitespace bytes at the beginning of text
for (int i = 0; i < length; i++) {
if (buffer[offset + i] > 32) return i;
}
return length;
}
public int whitespaceEnd() {
// returns position of whitespace at the end of text
for (int i = length - 1; i >= 0; i--) {
if (buffer[offset + i] > 32) return i + 1;
}
return 0;
}
public String toString() { public String toString() {
return new String(buffer, offset, length); return new String(buffer, offset, length);
} }

Loading…
Cancel
Save