diff --git a/build.xml b/build.xml index faeb82bc0..879296ef4 100644 --- a/build.xml +++ b/build.xml @@ -242,7 +242,6 @@ - diff --git a/lib/xliff-core-1.2-1.1.jar b/lib/xliff-core-1.2-1.1.jar deleted file mode 100644 index 6d5f6b203..000000000 Binary files a/lib/xliff-core-1.2-1.1.jar and /dev/null differ diff --git a/pom.xml b/pom.xml index 15d68243b..191ced648 100644 --- a/pom.xml +++ b/pom.xml @@ -606,11 +606,6 @@ xerces xercesImpl 2.11.0 - - - com.collaborne - xliff-core-1.2 - 1.1 com.adobe.xmp diff --git a/source/net/yacy/utils/translation/TranslatorXliff.java b/source/net/yacy/utils/translation/TranslatorXliff.java index d0ccc423c..e44628709 100644 --- a/source/net/yacy/utils/translation/TranslatorXliff.java +++ b/source/net/yacy/utils/translation/TranslatorXliff.java @@ -29,27 +29,22 @@ package net.yacy.utils.translation; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; import java.util.TreeMap; -import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBException; -import javax.xml.bind.Unmarshaller; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import javax.xml.stream.events.XMLEvent; import net.yacy.cora.util.ConcurrentLog; import net.yacy.data.Translator; import net.yacy.search.Switchboard; -import org.oasis.xliff.core_12.Body; -import org.oasis.xliff.core_12.Target; -import org.oasis.xliff.core_12.TransUnit; -import org.oasis.xliff.core_12.Xliff; /** * Wordlist based translator @@ -88,55 +83,70 @@ public class TranslatorXliff extends Translator { * ..... * */ - Xliff xliffTranslation; - try (FileInputStream fis = new FileInputStream(xliffFile)){ // try-with-resource to close inputstream - JAXBContext ctx = JAXBContext.newInstance(org.oasis.xliff.core_12.Xliff.class); - Unmarshaller un = ctx.createUnmarshaller(); - Object obj = un.unmarshal(fis); - if (obj instanceof org.oasis.xliff.core_12.Xliff) { - xliffTranslation = (org.oasis.xliff.core_12.Xliff) obj; - } else { - return null; - } - List xlfFileList = xliffTranslation.getAnyAndFile(); - for (Object xlfobj : xlfFileList) { - org.oasis.xliff.core_12.File xlfFileNode = (org.oasis.xliff.core_12.File) xlfobj; - Map translationList; //current Translation Table (maintaining input order) - String forFile = xlfFileNode.getOriginal(); - if (lngLists.containsKey(forFile)) { - translationList = lngLists.get(forFile); - } else { - translationList = new LinkedHashMap(); //current Translation Table (maintaining input order) - lngLists.put(forFile, translationList); - } + try (FileInputStream fis = new FileInputStream(xliffFile)) { // try-with-resource to close inputstream + + XMLInputFactory factory = XMLInputFactory.newInstance(); + XMLStreamReader xmlreader = factory.createXMLStreamReader(fis); + + Map translationList = null; //current Translation Table (maintaining input order) + String source = null; + String target = null; + String state = null; + while (xmlreader.hasNext()) { + int eventtype = xmlreader.next(); + + if (eventtype == XMLEvent.START_ELEMENT) { + String ename = xmlreader.getLocalName(); - Body xlfBody = xlfFileNode.getBody(); - List xlfTransunitList = xlfBody.getGroupOrTransUnitOrBinUnit(); - for (Object xlfTransunit : xlfTransunitList) { - if (xlfTransunit instanceof TransUnit) { - String source = ((TransUnit) xlfTransunit).getSource().getContent().get(0).toString(); - Target target = ((TransUnit) xlfTransunit).getTarget(); - if (target != null) { - if ("translated".equals(target.getState())) { - List targetContentList = target.getContent(); - String targetContent = targetContentList.get(0).toString(); - translationList.put(source, targetContent); + // setup for 'file' section (get or add translationlist for this file) + if (ename.equalsIgnoreCase("file")) { + String forFile = xmlreader.getAttributeValue(null, "original"); + if (lngLists.containsKey(forFile)) { + translationList = lngLists.get(forFile); + } else { + translationList = new LinkedHashMap(); //current Translation Table (maintaining input order) + lngLists.put(forFile, translationList); + } + source = null; + target = null; + } else if (ename.equalsIgnoreCase("trans-unit")) { // prepare for trans-unit + source = null; + target = null; + } else if (ename.equalsIgnoreCase("source")) { // get source text + source = xmlreader.getElementText(); + } else if (ename.equalsIgnoreCase("target")) { // get target text + state = xmlreader.getAttributeValue(null, "state"); + target = xmlreader.getElementText(); // TODO: in full blown xliff, target may contain sub-xml elements (but we use only text) + } + } else if (eventtype == XMLEvent.END_ELEMENT) { + String ename = xmlreader.getLocalName(); + + // store source/target on finish of trans-unit + if (ename.equalsIgnoreCase("trans-unit") && translationList != null) { + if (source != null) { + if (target != null) { + if ("translated".equals(state)) { + translationList.put(source, target); + } else { + translationList.put(source, null); + } } else { translationList.put(source, null); } - } else { - translationList.put(source, null); + source = null; } + target = null; + } + // on file end-tag make sure nothing is added (on error in xml) + if (ename.equalsIgnoreCase("file")) { + translationList = null; } } } - } catch (JAXBException je) { - ConcurrentLog.warn("TRANSLATOR", je.getMessage()); - } catch (FileNotFoundException ex) { - ConcurrentLog.warn("TRANSLATOR", "File not found: " + xliffFile.getAbsolutePath()); - } catch (IOException ex) { - ConcurrentLog.warn("TRANSLATOR", ex.getMessage()); + xmlreader.close(); + } catch (IOException | XMLStreamException ex) { + ConcurrentLog.warn("TRANSLATOR", "error reading " + xliffFile.getAbsolutePath() + " -> " + ex.getMessage()); } return lngLists; }