diff --git a/source/net/yacy/data/Translator.java b/source/net/yacy/data/Translator.java index 08aae9e06..8df94b6c5 100644 --- a/source/net/yacy/data/Translator.java +++ b/source/net/yacy/data/Translator.java @@ -49,6 +49,7 @@ import java.util.Set; import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.document.SentenceReader; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.Formatter; import net.yacy.peers.Seed; @@ -71,34 +72,55 @@ public class Translator { * @param translationTable translation entries : text to translate -> translation * @return source translated */ - public String translate(final StringBuilder source, - final Map translationTable) { - final Set> entries = translationTable.entrySet(); - StringBuilder builder = new StringBuilder(source); - for (final Entry entry: entries) { - String key = entry.getKey(); - /* We have to check key is not empty or indexOf would always return a positive value */ - if (key != null && !key.isEmpty()) { - String translation = entry.getValue(); - int index = builder.indexOf(key); - if (index < 0) { - // Filename not available, but it will be printed in Log - // after all untranslated Strings as "Translated file: " - if (ConcurrentLog.isFine("TRANSLATOR")) - ConcurrentLog.fine("TRANSLATOR", "Unused String: " - + key); - } else { - while (index >= 0) { - builder.replace(index, index + key.length(), - translation); - index = builder.indexOf(key, - index + translation.length()); - } - } - } - } - return builder.toString(); - } + public String translate(final StringBuilder source, + final Map translationTable) { + final Set> entries = translationTable.entrySet(); + StringBuilder builder = new StringBuilder(source); + for (final Entry entry : entries) { + String key = entry.getKey(); + /* We have to check key is not empty or indexOf would always return a positive value */ + if (key != null && !key.isEmpty()) { + String translation = entry.getValue(); + int index = builder.indexOf(key); + if (index < 0) { + // Filename not available, but it will be printed in Log + // after all untranslated Strings as "Translated file: " + if (ConcurrentLog.isFine("TRANSLATOR")) + ConcurrentLog.fine("TRANSLATOR", "Unused String: " + key); + } else { + while (index >= 0) { + + // check for word boundary before and after translation key + // to avoid translation just on char sequence e.g. as in key="bug" source="mybugfix" + boolean boundary = index + key.length() >= builder.length(); // eof text = end-bondary + + if (!boundary) { + char c = builder.charAt(index + key.length() - 1); + char lc = builder.charAt(index + key.length()); + boundary |= (SentenceReader.punctuation(c) || SentenceReader.invisible(c)); // special case, basically last char of key + boundary |= (SentenceReader.punctuation(lc) || SentenceReader.invisible(lc)); // char after key = end-boundary + } + + // if end-boundary ok check begin-boundary + if (boundary && index > 0) { + char c = builder.charAt(index - 1); // char before key = begin-boundary + boundary = (SentenceReader.punctuation(c) || SentenceReader.invisible(c)); + char fc = builder.charAt(index); // special case for key >name< , currently to allow translate + builder.replace(index, index + key.length(), translation); + index = builder.indexOf(key, index + translation.length()); + } else { // otherwise just skip to next occurence + index = builder.indexOf(key, index + key.length()); + } + } + } + } + } + return builder.toString(); + } /** * Load multiple translationLists from one File. Each List starts with #File: relative/path/to/file diff --git a/test/java/net/yacy/data/TranslatorTest.java b/test/java/net/yacy/data/TranslatorTest.java new file mode 100644 index 000000000..df5380d59 --- /dev/null +++ b/test/java/net/yacy/data/TranslatorTest.java @@ -0,0 +1,63 @@ +package net.yacy.data; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import org.junit.Test; +import static org.junit.Assert.*; + +public class TranslatorTest { + + /** + * Test of translate method, of class Translator. + */ + @Test + public void testTranslate() { + // test that translator respects word bondaries ( e.g. key=bug not translate "mybugfix" + Translator t = new Translator(); + final Map translationTable = new HashMap(); + translationTable.put("MIST", "Nebel"); // key upper case just to easy identify it in test strings + translationTable.put(">MIST", ">Nebel"); + translationTable.put("BY", "bei"); + translationTable.put(">BY", ">bei"); + translationTable.put("BY<", "bei<"); + translationTable.put(">BY<", ">bei<"); + + // source test text, expected not to be translated + Set noChange = new HashSet(); + noChange.add("MISTer wong "); + noChange.add("make no MISTake"); + noChange.add("value=\"MISTake\" "); + noChange.add("MISTral"); + noChange.add("value=\"#[MISTake]#\" "); + noChange.add(" optiMIST "); + noChange.add("goodBY."); + noChange.add(" BYte"); + noChange.add(""); + //noChange.add(" BY_BY "); // this translates + + // source test text, to be translated + Set doChange = new HashSet(); + doChange.add("Queen of the MIST "); + doChange.add("value=\"#[MIST]#\" "); + doChange.add("text#[MIST]#text "); + doChange.add("MIST in the forrest"); + doChange.add("MIST\nin the forrest"); + doChange.add(""); + + String result; + for (String stringToExamine : noChange) { + StringBuilder source = new StringBuilder(stringToExamine); + result = t.translate(source, translationTable); + assertEquals(result, stringToExamine); + } + + for (String stringToExamine : doChange) { + StringBuilder source = new StringBuilder(stringToExamine); + result = t.translate(source, translationTable); + assertNotEquals(result, stringToExamine); + } + } + +}