@ -14,13 +14,14 @@ It will do the following automatically:
TODO :
- auto - add new translations to the build system according to the translation process
- remove ' unfinished ' translation items
'''
from __future__ import division , print_function
import subprocess
import re
import sys
import os
import io
import xml . etree . ElementTree as ET
# Name of transifex tool
TX = ' tx '
@ -40,24 +41,143 @@ def fetch_all_translations():
print ( ' Error while fetching translations ' , file = sys . stderr )
exit ( 1 )
def postprocess_translations ( ) :
print ( ' Postprocessing... ' )
def find_format_specifiers ( s ) :
''' Find all format specifiers in a string. '''
pos = 0
specifiers = [ ]
while True :
percent = s . find ( ' % ' , pos )
if percent < 0 :
break
specifiers . append ( s [ percent + 1 ] )
pos = percent + 2
return specifiers
def split_format_specifiers ( specifiers ) :
''' Split format specifiers between numeric (Qt) and others (strprintf) '''
numeric = [ ]
other = [ ]
for s in specifiers :
if s in { ' 1 ' , ' 2 ' , ' 3 ' , ' 4 ' , ' 5 ' , ' 6 ' , ' 7 ' , ' 8 ' , ' 9 ' } :
numeric . append ( s )
else :
other . append ( s )
# numeric (Qt) can be present in any order, others (strprintf) must be in specified order
return set ( numeric ) , other
def sanitize_string ( s ) :
''' Sanitize string for printing '''
return s . replace ( ' \n ' , ' ' )
def check_format_specifiers ( source , translation , errors ) :
source_f = split_format_specifiers ( find_format_specifiers ( source ) )
# assert that no source messages contain both Qt and strprintf format specifiers
# if this fails, go change the source as this is hacky and confusing!
assert ( not ( source_f [ 0 ] and source_f [ 1 ] ) )
try :
translation_f = split_format_specifiers ( find_format_specifiers ( translation ) )
except IndexError :
errors . append ( " Parse error in translation ' %s ' " % sanitize_string ( translation ) )
return False
else :
if source_f != translation_f :
errors . append ( " Mismatch between ' %s ' and ' %s ' " % ( sanitize_string ( source ) , sanitize_string ( translation ) ) )
return False
return True
def all_ts_files ( suffix = ' ' ) :
for filename in os . listdir ( LOCALE_DIR ) :
# process only language files, and do not process source language
if not filename . endswith ( ' .ts ' ) or filename == SOURCE_LANG :
if not filename . endswith ( ' .ts ' + suffix ) or filename == SOURCE_LANG + suffix :
continue
if suffix : # remove provided suffix
filename = filename [ 0 : - len ( suffix ) ]
filepath = os . path . join ( LOCALE_DIR , filename )
with open ( filepath , ' rb ' ) as f :
yield ( filename , filepath )
FIX_RE = re . compile ( b ' [ \x00 - \x09 \x0b \x0c \x0e - \x1f ] ' )
def remove_invalid_characters ( s ) :
''' Remove invalid characters from translation string '''
return FIX_RE . sub ( b ' ' , s )
# Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for
# comparison, disable by default)
_orig_escape_cdata = None
def escape_cdata ( text ) :
text = _orig_escape_cdata ( text )
text = text . replace ( " ' " , ' ' ' )
text = text . replace ( ' " ' , ' " ' )
return text
def postprocess_translations ( reduce_diff_hacks = False ) :
print ( ' Checking and postprocessing... ' )
if reduce_diff_hacks :
global _orig_escape_cdata
_orig_escape_cdata = ET . _escape_cdata
ET . _escape_cdata = escape_cdata
for ( filename , filepath ) in all_ts_files ( ) :
os . rename ( filepath , filepath + ' .orig ' )
have_errors = False
for ( filename , filepath ) in all_ts_files ( ' .orig ' ) :
# pre-fixups to cope with transifex output
parser = ET . XMLParser ( encoding = ' utf-8 ' ) # need to override encoding because 'utf8' is not understood only 'utf-8'
with open ( filepath + ' .orig ' , ' rb ' ) as f :
data = f . read ( )
# remove non-allowed control characters
data = re . sub ( ' [ \x00 - \x09 \x0b \x0c \x0e - \x1f ] ' , ' ' , data )
data = data . split ( ' \n ' )
# strip locations from non-origin translation
# location tags are used to guide translators, they are not necessary for compilation
# TODO: actually process XML instead of relying on Transifex's one-tag-per-line output format
data = [ line for line in data if not ' <location ' in line ]
with open ( filepath , ' wb ' ) as f :
f . write ( ' \n ' . join ( data ) )
# remove control characters; this must be done over the entire file otherwise the XML parser will fail
data = remove_invalid_characters ( data )
tree = ET . parse ( io . BytesIO ( data ) , parser = parser )
# iterate over all messages in file
root = tree . getroot ( )
for context in root . findall ( ' context ' ) :
for message in context . findall ( ' message ' ) :
numerus = message . get ( ' numerus ' ) == ' yes '
source = message . find ( ' source ' ) . text
translation_node = message . find ( ' translation ' )
# pick all numerusforms
if numerus :
translations = [ i . text for i in translation_node . findall ( ' numerusform ' ) ]
else :
translations = [ translation_node . text ]
for translation in translations :
if translation is None :
continue
errors = [ ]
valid = check_format_specifiers ( source , translation , errors )
for error in errors :
print ( ' %s : %s ' % ( filename , error ) )
if not valid : # set type to unfinished and clear string if invalid
translation_node . clear ( )
translation_node . set ( ' type ' , ' unfinished ' )
have_errors = True
# Remove location tags
for location in message . findall ( ' location ' ) :
message . remove ( location )
# Remove entire message if it is an unfinished translation
if translation_node . get ( ' type ' ) == ' unfinished ' :
context . remove ( message )
# write fixed-up tree
# if diff reduction requested, replace some XML to 'sanitize' to qt formatting
if reduce_diff_hacks :
out = io . BytesIO ( )
tree . write ( out , encoding = ' utf-8 ' )
out = out . getvalue ( )
out = out . replace ( b ' /> ' , b ' /> ' )
with open ( filepath , ' wb ' ) as f :
f . write ( out )
else :
tree . write ( filepath , encoding = ' utf-8 ' )
return have_errors
if __name__ == ' __main__ ' :
check_at_repository_root ( )