// TranslatorXliff.java // ------------------------------------- // part of YACY // (C) by Michael Peter Christen; mc@yacy.net // first published on http://www.anomic.de // Frankfurt, Germany, 2004 // // This file ist contributed by Burkhard Buelte // last major change: 2016-03-28 // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package net.yacy.utils.translation; import static javax.xml.stream.XMLStreamConstants.END_ELEMENT; import static javax.xml.stream.XMLStreamConstants.START_ELEMENT; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.util.LinkedHashMap; import java.util.Locale; import java.util.Map; import java.util.TreeMap; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import net.yacy.cora.util.ConcurrentLog; import net.yacy.data.Translator; import net.yacy.search.Switchboard; /** * Wordlist based translator * * Translator which can read and write translation lists from a * XLIFF 1.2 * file with phrases or single words to translate a string or a file. * * On loading of translation files loaded data is merged with local (modified or downloaded) * translation data in DATA/LOCALE/ */ public class TranslatorXliff extends Translator { /** * Load translationLists for one language from a Xliff File. * * @param xliffFile the File, which contains the Lists * @return a HashMap, which contains for each File a HashMap with * translations. */ public Map> loadTranslationsListsFromXliff(final File xliffFile) { final Map> lngLists = new TreeMap>(); //list of translationLists for different files. /** * read xliff xml file into a xliff object * * * * * text * text * * .... * * * ..... * */ try (FileInputStream fis = new FileInputStream(xliffFile)) { // try-with-resource to close inputstream XMLInputFactory factory = XMLInputFactory.newInstance(); XMLStreamReader xmlreader = factory.createXMLStreamReader(fis); Map translationList = null; //current Translation Table (maintaining input order) String source = null; String target = null; String state = null; while (xmlreader.hasNext()) { int eventtype = xmlreader.next(); if (eventtype == START_ELEMENT) { String ename = xmlreader.getLocalName(); // setup for 'file' section (get or add translationlist for this file) if (ename.equalsIgnoreCase("file")) { String forFile = xmlreader.getAttributeValue(null, "original"); if (lngLists.containsKey(forFile)) { translationList = lngLists.get(forFile); } else { translationList = new LinkedHashMap(); //current Translation Table (maintaining input order) lngLists.put(forFile, translationList); } source = null; target = null; } else if (ename.equalsIgnoreCase("trans-unit")) { // prepare for trans-unit source = null; target = null; } else if (ename.equalsIgnoreCase("source")) { // get source text source = xmlreader.getElementText(); } else if (ename.equalsIgnoreCase("target")) { // get target text state = xmlreader.getAttributeValue(null, "state"); target = xmlreader.getElementText(); // TODO: in full blown xliff, target may contain sub-xml elements (but we use only text) } } else if (eventtype == END_ELEMENT) { String ename = xmlreader.getLocalName(); // store source/target on finish of trans-unit if (ename.equalsIgnoreCase("trans-unit") && translationList != null) { if (source != null) { if (target != null) { if ("translated".equals(state)) { translationList.put(source, target); } else { translationList.put(source, null); } } else { translationList.put(source, null); } source = null; } target = null; } // on file end-tag make sure nothing is added (on error in xml) if (ename.equalsIgnoreCase("file")) { translationList = null; } } } xmlreader.close(); } catch (IOException | XMLStreamException ex) { ConcurrentLog.warn("TRANSLATOR", "error reading " + xliffFile.getAbsolutePath() + " -> " + ex.getMessage()); } return lngLists; } /** * Maps (overrides) Translator.loadTranslationsLists to read from xliff file * if file extension is .xlf or .xliff (otherwise load xx.lng file). * Additionally if localy modified translation exists in DATA/LOCALE content * is merged into given translation. * * @param xliffFile * @return translation map */ @Override public Map> loadTranslationsLists(final File xliffFile) { File locallng = getScratchFile(xliffFile); if (xliffFile.getName().toLowerCase(Locale.ROOT).endsWith(".xlf") || xliffFile.getName().toLowerCase(Locale.ROOT).endsWith(".xliff")) { if (locallng.exists()) { Map> mergedList = loadTranslationsListsFromXliff(xliffFile); Map> tmplist = loadTranslationsListsFromXliff(locallng); return mergeTranslationLists(mergedList, tmplist); } return loadTranslationsListsFromXliff(xliffFile); } else if (locallng.exists()) { Map> mergedList = super.loadTranslationsLists(xliffFile); Map> tmplist = super.loadTranslationsLists(locallng); return mergeTranslationLists(mergedList, tmplist); } else { return super.loadTranslationsLists(xliffFile); } } /** * Merges translations, values from localTrans overwrite entries in masterTrans. * * @param masterTrans master translation * @param localTrans translation to be merged to master * @return resulting map with all entries from master and localTrans */ protected Map> mergeTranslationLists(Map> masterTrans, Map> localTrans) { if (localTrans != null && !localTrans.isEmpty()) { for (String transfilename : localTrans.keySet()) { // get translation filename Map origList = localTrans.get(transfilename); if (masterTrans.containsKey(transfilename)) { Map xliffList = masterTrans.get(transfilename); xliffList.putAll(origList); } else { masterTrans.put(transfilename, origList); } } } return masterTrans; } /** * Saves the internal translation map as XLIFF 1.2 file * * @param targetLanguage the target language code, if null target is omitted * in output file and only source text stored * @param xliffFile name of the output XLIFF file (typically with .xlf * extension) * @param lng the YaCy translation for one language * * @return true on success */ public boolean saveAsXliff(final String targetLanguageCode, File xliffFile, Map> lng) { final String sourceLanguage = "en"; // source language is always English try ( /* Resources automatically closed by this try-with-resources statement */ final FileOutputStream fileOutStream = new FileOutputStream(xliffFile); final OutputStreamWriter output = new OutputStreamWriter(fileOutStream, StandardCharsets.UTF_8.name()); ) { output.write("\n"); output.write(" \n"); for (String afilemap : lng.keySet()) { output.write("\n"); // required in xliff 1.2 output.write(" \n"); Map txtmap = lng.get(afilemap); for (String source : txtmap.keySet()) { String target = txtmap.get(source); // we use hashCode of source string to get same id in different xliff files for same translation text output.write(" \n"); output.write(" " + toXmlStr(source) + "\n"); } else { output.write(">\n"); output.write(" " + toXmlStr(source) + "\n"); output.write(" " + toXmlStr(target) + "\n"); } output.write(" \n"); } output.write(" \n"); output.write("\n\n"); } output.write("\n"); } catch (Exception e) { return false; } return true; } /** * Helper to write translation entries for one file * * @param filename relative path file name * @param textlist the translation list for filename * @param output output file * @throws IOException */ private void writeFileSection(final String filename, final Map textlist, OutputStreamWriter output) throws IOException { if (!filename.isEmpty()) { output.write("#File: " + filename + "\n" + "#---------------------------\n"); for (String source : textlist.keySet()) { String target = textlist.get(source); if (target != null && !target.isEmpty()) { // omitt target text if not available if (source.equals(target)) { output.write("#" + source + "==" + target + "\n"); // no translation needed (mark #) } else { output.write(source + "==" + target + "\n"); } } else { output.write("#" + source + "==" + source + "\n"); // no translation available (mark #) } } output.write("#-----------------------------\n\n"); } } /** * Saves the internal translation map as XLIFF 1.2 file * * @param targetLanguageCode the target language code, if null target is omitted * in output file and only source text stored * @param lngFile name of the output XLIFF file (typically with .xlf * extension) * @param lng the YaCy translation for one language * * @return true on success */ public boolean saveAsLngFile(final String targetLanguageCode, File lngFile, Map> lng) { try ( /* Resources automatically closed by this try-with-resources statement */ final FileOutputStream fileOutStream = new FileOutputStream(lngFile); final OutputStreamWriter output = new OutputStreamWriter(fileOutStream, StandardCharsets.UTF_8.name()); ) { output.write("# " + (targetLanguageCode == null ? "master" : targetLanguageCode) + ".lng\n"); output.write("# -----------------------\n"); output.write("# This is a part of YaCy, a peer-to-peer based web search engine\n\n"); output.write("# Each translation list starts with #File: relative/path/to/file\n"); output.write("# followed by the translations OriginalText==TranslatedText (in one line)\n"); output.write("# Comment lines or not translated lines start with #\n\n"); // special handling of "ConfigLanguage_p.html" to list on top of all other // because of some important identifier Map txtmap = lng.get("ConfigLanguage_p.html"); if (txtmap != null) writeFileSection("ConfigLanguage_p.html", txtmap, output); for (String afilemap : lng.keySet()) { txtmap = lng.get(afilemap); if (!"ConfigLanguage_p.html".equals(afilemap)) { writeFileSection(afilemap, txtmap, output); } } output.write("# EOF"); } catch (Exception e) { return false; } return true; } /** * Helper to make valid xml content text as text may contain html markup * (the reverse on read is done automatically) * @param s input string * @return xml string */ private String toXmlStr(String s) { int control = s.indexOf("&"); while (control >= 0) { s = s.substring(0, control) + "&" + s.substring(control + 1); if (control < s.length()) { control++; } control = s.indexOf("&", control); } control = s.indexOf("<"); while (control >= 0) { s = s.substring(0, control) + "<" + s.substring(control + 1); if (control < s.length()) { control++; } control = s.indexOf("<", control); } control = s.indexOf(">"); while (control >= 0) { s = s.substring(0, control) + ">" + s.substring(control + 1); if (control < s.length()) { control++; } control = s.indexOf(">", control); } return s; } /** * Get the path to a work/scratch file in the DATA/LOCALE directory with the * same name as given in the langPath * * @param langFile the path with filename to the language file * @return a path to DATA/LOCALE/langFile.filename() */ public File getScratchFile(final File langFile) { if (Switchboard.getSwitchboard() != null) { // for debug and testing were switchboard is null File f = Switchboard.getSwitchboard().getDataPath("locale.translated_html", "DATA/LOCALE"); return new File(f.getParentFile(), langFile.getName()); } return langFile; } }