You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
392 lines
17 KiB
392 lines
17 KiB
// TranslatorXliff.java
|
|
// -------------------------------------
|
|
// part of YACY
|
|
// (C) by Michael Peter Christen; mc@yacy.net
|
|
// first published on http://www.anomic.de
|
|
// Frankfurt, Germany, 2004
|
|
//
|
|
// This file ist contributed by Burkhard Buelte
|
|
// last major change: 2016-03-28
|
|
//
|
|
// $LastChangedDate$
|
|
// $LastChangedRevision$
|
|
// $LastChangedBy$
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
package net.yacy.utils.translation;
|
|
|
|
import static javax.xml.stream.XMLStreamConstants.END_ELEMENT;
|
|
import static javax.xml.stream.XMLStreamConstants.START_ELEMENT;
|
|
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.FileOutputStream;
|
|
import java.io.IOException;
|
|
import java.io.OutputStreamWriter;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.util.LinkedHashMap;
|
|
import java.util.Locale;
|
|
import java.util.Map;
|
|
import java.util.TreeMap;
|
|
|
|
import javax.xml.stream.XMLInputFactory;
|
|
import javax.xml.stream.XMLStreamException;
|
|
import javax.xml.stream.XMLStreamReader;
|
|
|
|
import net.yacy.cora.util.ConcurrentLog;
|
|
import net.yacy.data.Translator;
|
|
import net.yacy.search.Switchboard;
|
|
|
|
|
|
/**
|
|
* Wordlist based translator
|
|
*
|
|
* Translator which can read and write translation lists from a
|
|
* <a href="http://docs.oasis-open.org/xliff/v1.2/os/xliff-core.html">XLIFF 1.2</a>
|
|
* file with phrases or single words to translate a string or a file.
|
|
*
|
|
* On loading of translation files loaded data is merged with local (modified or downloaded)
|
|
* translation data in DATA/LOCALE/
|
|
*/
|
|
public class TranslatorXliff extends Translator {
|
|
|
|
/**
|
|
* Load translationLists for one language from a Xliff File.
|
|
*
|
|
* @param xliffFile the File, which contains the Lists
|
|
* @return a HashMap, which contains for each File a HashMap with
|
|
* translations.
|
|
*/
|
|
public Map<String, Map<String, String>> loadTranslationsListsFromXliff(final File xliffFile) {
|
|
|
|
final Map<String, Map<String, String>> lngLists = new TreeMap<String, Map<String, String>>(); //list of translationLists for different files.
|
|
/**
|
|
* read xliff xml file into a xliff object
|
|
* <xliff>
|
|
* <file original="filename">
|
|
* <body>
|
|
* <trans-unit>
|
|
* <source>text</source>
|
|
* <target>text</target>
|
|
* </trans-unit>
|
|
* <trans-unit>....
|
|
* </body>
|
|
* </file>
|
|
* <file>.....
|
|
* </xliff>
|
|
*/
|
|
|
|
try (FileInputStream fis = new FileInputStream(xliffFile)) { // try-with-resource to close inputstream
|
|
|
|
XMLInputFactory factory = XMLInputFactory.newInstance();
|
|
XMLStreamReader xmlreader = factory.createXMLStreamReader(fis);
|
|
|
|
Map<String, String> translationList = null; //current Translation Table (maintaining input order)
|
|
String source = null;
|
|
String target = null;
|
|
String state = null;
|
|
while (xmlreader.hasNext()) {
|
|
int eventtype = xmlreader.next();
|
|
|
|
if (eventtype == START_ELEMENT) {
|
|
String ename = xmlreader.getLocalName();
|
|
|
|
// setup for 'file' section (get or add translationlist for this file)
|
|
if (ename.equalsIgnoreCase("file")) {
|
|
String forFile = xmlreader.getAttributeValue(null, "original");
|
|
if (lngLists.containsKey(forFile)) {
|
|
translationList = lngLists.get(forFile);
|
|
} else {
|
|
translationList = new LinkedHashMap<String, String>(); //current Translation Table (maintaining input order)
|
|
lngLists.put(forFile, translationList);
|
|
}
|
|
source = null;
|
|
target = null;
|
|
} else if (ename.equalsIgnoreCase("trans-unit")) { // prepare for trans-unit
|
|
source = null;
|
|
target = null;
|
|
} else if (ename.equalsIgnoreCase("source")) { // get source text
|
|
source = xmlreader.getElementText();
|
|
} else if (ename.equalsIgnoreCase("target")) { // get target text
|
|
state = xmlreader.getAttributeValue(null, "state");
|
|
target = xmlreader.getElementText(); // TODO: in full blown xliff, target may contain sub-xml elements (but we use only text)
|
|
}
|
|
} else if (eventtype == END_ELEMENT) {
|
|
String ename = xmlreader.getLocalName();
|
|
|
|
// store source/target on finish of trans-unit
|
|
if (ename.equalsIgnoreCase("trans-unit") && translationList != null) {
|
|
if (source != null) {
|
|
if (target != null) {
|
|
if ("translated".equals(state)) {
|
|
translationList.put(source, target);
|
|
} else {
|
|
translationList.put(source, null);
|
|
}
|
|
} else {
|
|
translationList.put(source, null);
|
|
}
|
|
source = null;
|
|
}
|
|
target = null;
|
|
}
|
|
// on file end-tag make sure nothing is added (on error in xml)
|
|
if (ename.equalsIgnoreCase("file")) {
|
|
translationList = null;
|
|
}
|
|
}
|
|
}
|
|
xmlreader.close();
|
|
} catch (IOException | XMLStreamException ex) {
|
|
ConcurrentLog.warn("TRANSLATOR", "error reading " + xliffFile.getAbsolutePath() + " -> " + ex.getMessage());
|
|
}
|
|
return lngLists;
|
|
}
|
|
|
|
/**
|
|
* Maps (overrides) Translator.loadTranslationsLists to read from xliff file
|
|
* if file extension is .xlf or .xliff (otherwise load xx.lng file).
|
|
* Additionally if localy modified translation exists in DATA/LOCALE content
|
|
* is merged into given translation.
|
|
*
|
|
* @param xliffFile
|
|
* @return translation map
|
|
*/
|
|
@Override
|
|
public Map<String, Map<String, String>> loadTranslationsLists(final File xliffFile) {
|
|
File locallng = getScratchFile(xliffFile);
|
|
if (xliffFile.getName().toLowerCase(Locale.ROOT).endsWith(".xlf") || xliffFile.getName().toLowerCase(Locale.ROOT).endsWith(".xliff")) {
|
|
if (locallng.exists()) {
|
|
Map<String, Map<String, String>> mergedList = loadTranslationsListsFromXliff(xliffFile);
|
|
Map<String, Map<String, String>> tmplist = loadTranslationsListsFromXliff(locallng);
|
|
return mergeTranslationLists(mergedList, tmplist);
|
|
}
|
|
return loadTranslationsListsFromXliff(xliffFile);
|
|
} else if (locallng.exists()) {
|
|
Map<String, Map<String, String>> mergedList = super.loadTranslationsLists(xliffFile);
|
|
Map<String, Map<String, String>> tmplist = super.loadTranslationsLists(locallng);
|
|
return mergeTranslationLists(mergedList, tmplist);
|
|
} else {
|
|
return super.loadTranslationsLists(xliffFile);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Merges translations, values from localTrans overwrite entries in masterTrans.
|
|
*
|
|
* @param masterTrans master translation
|
|
* @param localTrans translation to be merged to master
|
|
* @return resulting map with all entries from master and localTrans
|
|
*/
|
|
protected Map<String, Map<String, String>> mergeTranslationLists(Map<String, Map<String, String>> masterTrans, Map<String, Map<String, String>> localTrans) {
|
|
if (localTrans != null && !localTrans.isEmpty()) {
|
|
for (String transfilename : localTrans.keySet()) { // get translation filename
|
|
|
|
Map<String, String> origList = localTrans.get(transfilename);
|
|
if (masterTrans.containsKey(transfilename)) {
|
|
Map<String, String> xliffList = masterTrans.get(transfilename);
|
|
xliffList.putAll(origList);
|
|
} else {
|
|
masterTrans.put(transfilename, origList);
|
|
}
|
|
}
|
|
}
|
|
return masterTrans;
|
|
}
|
|
|
|
/**
|
|
* Saves the internal translation map as XLIFF 1.2 file
|
|
*
|
|
* @param targetLanguage the target language code, if null target is omitted
|
|
* in output file and only source text stored
|
|
* @param xliffFile name of the output XLIFF file (typically with .xlf
|
|
* extension)
|
|
* @param lng the YaCy translation for one language
|
|
*
|
|
* @return true on success
|
|
*/
|
|
public boolean saveAsXliff(final String targetLanguageCode, File xliffFile, Map<String, Map<String, String>> lng) {
|
|
|
|
final String sourceLanguage = "en"; // source language is always English
|
|
|
|
try (
|
|
/* Resources automatically closed by this try-with-resources statement */
|
|
final FileOutputStream fileOutStream = new FileOutputStream(xliffFile);
|
|
final OutputStreamWriter output = new OutputStreamWriter(fileOutStream, StandardCharsets.UTF_8.name());
|
|
) {
|
|
output.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
|
|
output.write("<xliff version='1.2' xmlns='urn:oasis:names:tc:xliff:document:1.2'> \n");
|
|
for (String afilemap : lng.keySet()) {
|
|
output.write("<file original=\"" + afilemap + "\" " // original required in xliff 1.2
|
|
+ " source-language=\"" + sourceLanguage + "\" "); // required in xliff 1.2
|
|
if (targetLanguageCode != null && !targetLanguageCode.isEmpty()) {
|
|
output.write(" target-language=\"" + targetLanguageCode + "\" "); // required in xliff 1.2
|
|
}
|
|
output.write(" datatype=\"html\">\n"); // required in xliff 1.2
|
|
|
|
output.write(" <body>\n");
|
|
Map<String, String> txtmap = lng.get(afilemap);
|
|
for (String source : txtmap.keySet()) {
|
|
String target = txtmap.get(source);
|
|
// we use hashCode of source string to get same id in different xliff files for same translation text
|
|
output.write(" <trans-unit id=\"" + Integer.toHexString(source.hashCode()) + "\" xml:space=\"preserve\" approved=\"no\"");
|
|
if (target == null || target.isEmpty()) { // omitt target text if not available
|
|
output.write(" translate=\"yes\">\n");
|
|
output.write(" <source>" + toXmlStr(source) + "</source>\n");
|
|
} else {
|
|
output.write(">\n");
|
|
output.write(" <source>" + toXmlStr(source) + "</source>\n");
|
|
output.write(" <target" + (target.equals(source) ? "" : " state='translated'") + ">" + toXmlStr(target) + "</target>\n");
|
|
}
|
|
output.write(" </trans-unit>\n");
|
|
}
|
|
output.write(" </body>\n");
|
|
output.write("</file>\n\n");
|
|
}
|
|
|
|
output.write("</xliff>\n");
|
|
} catch (Exception e) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Helper to write translation entries for one file
|
|
*
|
|
* @param filename relative path file name
|
|
* @param textlist the translation list for filename
|
|
* @param output output file
|
|
* @throws IOException
|
|
*/
|
|
private void writeFileSection(final String filename, final Map<String, String> textlist, OutputStreamWriter output) throws IOException {
|
|
if (!filename.isEmpty()) {
|
|
output.write("#File: " + filename + "\n"
|
|
+ "#---------------------------\n");
|
|
|
|
for (String source : textlist.keySet()) {
|
|
String target = textlist.get(source);
|
|
if (target != null && !target.isEmpty()) { // omitt target text if not available
|
|
if (source.equals(target)) {
|
|
output.write("#" + source + "==" + target + "\n"); // no translation needed (mark #)
|
|
} else {
|
|
output.write(source + "==" + target + "\n");
|
|
}
|
|
} else {
|
|
output.write("#" + source + "==" + source + "\n"); // no translation available (mark #)
|
|
}
|
|
}
|
|
output.write("#-----------------------------\n\n");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Saves the internal translation map as XLIFF 1.2 file
|
|
*
|
|
* @param targetLanguageCode the target language code, if null target is omitted
|
|
* in output file and only source text stored
|
|
* @param lngFile name of the output XLIFF file (typically with .xlf
|
|
* extension)
|
|
* @param lng the YaCy translation for one language
|
|
*
|
|
* @return true on success
|
|
*/
|
|
public boolean saveAsLngFile(final String targetLanguageCode, File lngFile, Map<String, Map<String, String>> lng) {
|
|
|
|
try (
|
|
/* Resources automatically closed by this try-with-resources statement */
|
|
final FileOutputStream fileOutStream = new FileOutputStream(lngFile);
|
|
final OutputStreamWriter output = new OutputStreamWriter(fileOutStream, StandardCharsets.UTF_8.name());
|
|
) {
|
|
output.write("# " + (targetLanguageCode == null ? "master" : targetLanguageCode) + ".lng\n");
|
|
output.write("# -----------------------\n");
|
|
output.write("# This is a part of YaCy, a peer-to-peer based web search engine\n\n");
|
|
output.write("# Each translation list starts with #File: relative/path/to/file\n");
|
|
output.write("# followed by the translations OriginalText==TranslatedText (in one line)\n");
|
|
output.write("# Comment lines or not translated lines start with #\n\n");
|
|
|
|
// special handling of "ConfigLanguage_p.html" to list on top of all other
|
|
// because of some important identifier
|
|
Map<String, String> txtmap = lng.get("ConfigLanguage_p.html");
|
|
if (txtmap != null) writeFileSection("ConfigLanguage_p.html", txtmap, output);
|
|
|
|
for (String afilemap : lng.keySet()) {
|
|
txtmap = lng.get(afilemap);
|
|
if (!"ConfigLanguage_p.html".equals(afilemap)) {
|
|
writeFileSection(afilemap, txtmap, output);
|
|
}
|
|
}
|
|
output.write("# EOF");
|
|
} catch (Exception e) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Helper to make valid xml content text as text may contain html markup
|
|
* (the reverse on read is done automatically)
|
|
* @param s input string
|
|
* @return xml string
|
|
*/
|
|
private String toXmlStr(String s) {
|
|
|
|
int control = s.indexOf("&");
|
|
while (control >= 0) {
|
|
s = s.substring(0, control) + "&" + s.substring(control + 1);
|
|
if (control < s.length()) {
|
|
control++;
|
|
}
|
|
control = s.indexOf("&", control);
|
|
}
|
|
|
|
control = s.indexOf("<");
|
|
while (control >= 0) {
|
|
s = s.substring(0, control) + "<" + s.substring(control + 1);
|
|
if (control < s.length()) {
|
|
control++;
|
|
}
|
|
control = s.indexOf("<", control);
|
|
}
|
|
|
|
control = s.indexOf(">");
|
|
while (control >= 0) {
|
|
s = s.substring(0, control) + ">" + s.substring(control + 1);
|
|
if (control < s.length()) {
|
|
control++;
|
|
}
|
|
control = s.indexOf(">", control);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
/**
|
|
* Get the path to a work/scratch file in the DATA/LOCALE directory with the
|
|
* same name as given in the langPath
|
|
*
|
|
* @param langFile the path with filename to the language file
|
|
* @return a path to DATA/LOCALE/langFile.filename()
|
|
*/
|
|
public File getScratchFile(final File langFile) {
|
|
if (Switchboard.getSwitchboard() != null) { // for debug and testing were switchboard is null
|
|
File f = Switchboard.getSwitchboard().getDataPath("locale.translated_html", "DATA/LOCALE");
|
|
return new File(f.getParentFile(), langFile.getName());
|
|
}
|
|
return langFile;
|
|
}
|
|
}
|