speed-up reading of xlif language file, by using xmlparser (stax) instead of jaxb

making xliff-core-1.2-1.1.jar obsolete
pull/71/head
reger 9 years ago
parent 5ab8afd4d8
commit 19b4509d54

@ -242,7 +242,6 @@
<pathelement location="${lib}/weupnp-0.1.4.jar" /> <pathelement location="${lib}/weupnp-0.1.4.jar" />
<pathelement location="${lib}/woodstox-core-asl-4.4.1.jar" /> <pathelement location="${lib}/woodstox-core-asl-4.4.1.jar" />
<pathelement location="${lib}/xercesImpl.jar" /> <pathelement location="${lib}/xercesImpl.jar" />
<pathelement location="${lib}/xliff-core-1.2-1.1.jar" />
<pathelement location="${lib}/xml-apis.jar" /> <pathelement location="${lib}/xml-apis.jar" />
<pathelement location="${lib}/xmpcore-5.1.2.jar" /> <pathelement location="${lib}/xmpcore-5.1.2.jar" />
<pathelement location="${lib}/zookeeper-3.4.6.jar" /> <pathelement location="${lib}/zookeeper-3.4.6.jar" />

Binary file not shown.

@ -606,11 +606,6 @@
<groupId>xerces</groupId> <groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId> <artifactId>xercesImpl</artifactId>
<version>2.11.0</version> <version>2.11.0</version>
</dependency>
<dependency>
<groupId>com.collaborne</groupId>
<artifactId>xliff-core-1.2</artifactId>
<version>1.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.adobe.xmp</groupId> <groupId>com.adobe.xmp</groupId>

@ -29,27 +29,22 @@ package net.yacy.utils.translation;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import javax.xml.bind.JAXBContext; import javax.xml.stream.XMLInputFactory;
import javax.xml.bind.JAXBException; import javax.xml.stream.XMLStreamException;
import javax.xml.bind.Unmarshaller; import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.events.XMLEvent;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.Translator; import net.yacy.data.Translator;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import org.oasis.xliff.core_12.Body;
import org.oasis.xliff.core_12.Target;
import org.oasis.xliff.core_12.TransUnit;
import org.oasis.xliff.core_12.Xliff;
/** /**
* Wordlist based translator * Wordlist based translator
@ -88,55 +83,70 @@ public class TranslatorXliff extends Translator {
* <file>..... * <file>.....
* </xliff> * </xliff>
*/ */
Xliff xliffTranslation;
try (FileInputStream fis = new FileInputStream(xliffFile)){ // try-with-resource to close inputstream
JAXBContext ctx = JAXBContext.newInstance(org.oasis.xliff.core_12.Xliff.class);
Unmarshaller un = ctx.createUnmarshaller();
Object obj = un.unmarshal(fis);
if (obj instanceof org.oasis.xliff.core_12.Xliff) {
xliffTranslation = (org.oasis.xliff.core_12.Xliff) obj;
} else {
return null;
}
List<Object> xlfFileList = xliffTranslation.getAnyAndFile(); try (FileInputStream fis = new FileInputStream(xliffFile)) { // try-with-resource to close inputstream
for (Object xlfobj : xlfFileList) {
org.oasis.xliff.core_12.File xlfFileNode = (org.oasis.xliff.core_12.File) xlfobj; XMLInputFactory factory = XMLInputFactory.newInstance();
Map<String, String> translationList; //current Translation Table (maintaining input order) XMLStreamReader xmlreader = factory.createXMLStreamReader(fis);
String forFile = xlfFileNode.getOriginal();
if (lngLists.containsKey(forFile)) { Map<String, String> translationList = null; //current Translation Table (maintaining input order)
translationList = lngLists.get(forFile); String source = null;
} else { String target = null;
translationList = new LinkedHashMap<String, String>(); //current Translation Table (maintaining input order) String state = null;
lngLists.put(forFile, translationList); while (xmlreader.hasNext()) {
} int eventtype = xmlreader.next();
if (eventtype == XMLEvent.START_ELEMENT) {
String ename = xmlreader.getLocalName();
Body xlfBody = xlfFileNode.getBody(); // setup for 'file' section (get or add translationlist for this file)
List<Object> xlfTransunitList = xlfBody.getGroupOrTransUnitOrBinUnit(); if (ename.equalsIgnoreCase("file")) {
for (Object xlfTransunit : xlfTransunitList) { String forFile = xmlreader.getAttributeValue(null, "original");
if (xlfTransunit instanceof TransUnit) { if (lngLists.containsKey(forFile)) {
String source = ((TransUnit) xlfTransunit).getSource().getContent().get(0).toString(); translationList = lngLists.get(forFile);
Target target = ((TransUnit) xlfTransunit).getTarget(); } else {
if (target != null) { translationList = new LinkedHashMap<String, String>(); //current Translation Table (maintaining input order)
if ("translated".equals(target.getState())) { lngLists.put(forFile, translationList);
List<Object> targetContentList = target.getContent(); }
String targetContent = targetContentList.get(0).toString(); source = null;
translationList.put(source, targetContent); target = null;
} else if (ename.equalsIgnoreCase("trans-unit")) { // prepare for trans-unit
source = null;
target = null;
} else if (ename.equalsIgnoreCase("source")) { // get source text
source = xmlreader.getElementText();
} else if (ename.equalsIgnoreCase("target")) { // get target text
state = xmlreader.getAttributeValue(null, "state");
target = xmlreader.getElementText(); // TODO: in full blown xliff, target may contain sub-xml elements (but we use only text)
}
} else if (eventtype == XMLEvent.END_ELEMENT) {
String ename = xmlreader.getLocalName();
// store source/target on finish of trans-unit
if (ename.equalsIgnoreCase("trans-unit") && translationList != null) {
if (source != null) {
if (target != null) {
if ("translated".equals(state)) {
translationList.put(source, target);
} else {
translationList.put(source, null);
}
} else { } else {
translationList.put(source, null); translationList.put(source, null);
} }
} else { source = null;
translationList.put(source, null);
} }
target = null;
}
// on file end-tag make sure nothing is added (on error in xml)
if (ename.equalsIgnoreCase("file")) {
translationList = null;
} }
} }
} }
} catch (JAXBException je) { xmlreader.close();
ConcurrentLog.warn("TRANSLATOR", je.getMessage()); } catch (IOException | XMLStreamException ex) {
} catch (FileNotFoundException ex) { ConcurrentLog.warn("TRANSLATOR", "error reading " + xliffFile.getAbsolutePath() + " -> " + ex.getMessage());
ConcurrentLog.warn("TRANSLATOR", "File not found: " + xliffFile.getAbsolutePath());
} catch (IOException ex) {
ConcurrentLog.warn("TRANSLATOR", ex.getMessage());
} }
return lngLists; return lngLists;
} }

Loading…
Cancel
Save