forked from IDEMSInternational/indesign-translate
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_xliff.py
More file actions
88 lines (61 loc) · 3.15 KB
/
parse_xliff.py
File metadata and controls
88 lines (61 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from lxml import etree
import json
import re
#f_xlf = open("./translate_json/tip_sheets/zu/Zulu_tip_sheets_7to12.xlf", "r")
f_xlf_path = "./translate_json/tip_sheets/zu/Zulu_tip_sheets_7to12.xlf"
transl_xlf_tree = etree.iterparse(f_xlf_path)
#f_xlf.close()
f_original_json = open("./translate_json/tip_sheets/en/multiple_tipsheet_same_json/Zulu_tip_sheets_7to12.json", "r", encoding="utf8")
original_json = json.load(f_original_json)
f_original_json.close()
for action, elem in transl_xlf_tree:
if elem.tag.endswith("trans-unit"):
for child in elem:
if child.tag.endswith("source"):
source_hyper = []
has_hyper = False
source_text = child.text
if not child.text:
has_hyper = True
for grandchild in child:
source_hyper.append(grandchild.text)
print(source_hyper)
elif child.tag.endswith("target"):
transl_hyper = []
transl_text = child.text
if not child.text:
for grandchild in child:
transl_hyper.append(grandchild.text)
print(transl_hyper)
match = False
if has_hyper:
#print(source_hyper)
for bit in original_json:
if bit["sourceText"].startswith("<span id=\"item-0\">"):
find_test = []
source_replace_special = bit["sourceText"].replace("”" , "”").replace("“", "“").replace("–", "–").replace("’" ,"’")
for atom in source_hyper:
find_test.append(source_replace_special.find(atom))
print(find_test)
if min(find_test)>=0:
print("match")
match = True
transl_text = source_replace_special
transl_text_split = transl_text.split("><")
for i in range(len(source_hyper)):
transl_text = transl_text.replace(source_hyper[i],transl_hyper[i])
transl_text_split[i] = transl_text_split[i].replace(source_hyper[i],transl_hyper[i])
bit["text"] = transl_text
transl_text_split = "><".join(transl_text_split)
#transl_text_split = transl_text_split.replace("</span>", "</span>").replace("</a>", "</a>")
bit["text"] = transl_text_split
else:
for bit in original_json:
if bit["sourceText"] == source_text:
bit["text"] = transl_text
match = True
if not match:
print("no match for " + source_text)
translated_json = open("./translate_json/tip_sheets/zu/Zulu_tip_sheets_7to12_new_tag.json", "w")
json.dump(original_json, translated_json, indent=2)
translated_json.close()