1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Fill localization files with suggested translations based on
22 translation memory and existing translations.
23 """
24
25 from translate.storage import factory
26 from translate.storage import xliff, po
27 from translate.search import match
28
29
30 tmmatcher = None
31
32
33 -def memory(tmfiles, max_candidates=1, min_similarity=75, max_length=1000):
34 """Returns the TM store to use. Only initialises on first call."""
35 global tmmatcher
36
37 if tmmatcher is None:
38 if isinstance(tmfiles, list):
39 tmstore = [factory.getobject(tmfile) for tmfile in tmfiles]
40 else:
41 tmstore = factory.getobject(tmfiles)
42 tmmatcher = match.matcher(tmstore, max_candidates=max_candidates, min_similarity=min_similarity, max_length=max_length)
43 return tmmatcher
44
45
46 -def pretranslate_file(input_file, output_file, template_file, tm=None, min_similarity=75, fuzzymatching=True):
47 """Pretranslate any factory supported file with old translations and translation memory."""
48 input_store = factory.getobject(input_file)
49 template_store = None
50 if template_file is not None:
51 template_store = factory.getobject(template_file)
52
53 output = pretranslate_store(input_store, template_store, tm, min_similarity, fuzzymatching)
54 output_file.write(str(output))
55 return 1
56
57
59 """Returns a matching unit from a template. matching based on locations"""
60
61
62
63 locations = input_unit.getlocations()
64 if not locations or ":" in locations[0]:
65 return match_template_id(input_unit, template_store)
66
67
68
69
70 for location in locations:
71 matching_unit = template_store.locationindex.get(location, None)
72 if matching_unit is not None and matching_unit.source == input_unit.source and matching_unit.gettargetlen() > 0:
73 return matching_unit
74
76 """Returns a matching unit from a template. matching based on unit id"""
77 matching_unit = template_store.findid(input_unit.getid())
78 return matching_unit
79
81 """Returns a matching unit from a template. matching based on unit id"""
82
83
84 if len(input_unit.source) > 1:
85 matching_unit = template_store.findunit(input_unit.source)
86 return matching_unit
87
89 """Return a fuzzy match from a queue of matchers."""
90 for matcher in matchers:
91 fuzzycandidates = matcher.matches(input_unit.source)
92 if fuzzycandidates:
93 return fuzzycandidates[0]
94
95
96 -def pretranslate_unit(input_unit, template_store, matchers=None, mark_reused=False, match_locations=False):
97 """Pretranslate a unit or return unchanged if no translation was found."""
98
99 matching_unit = None
100
101 if template_store:
102 if match_locations:
103 matching_unit = match_template_location(input_unit, template_store)
104 else:
105 matching_unit = match_template_id(input_unit, template_store)
106
107
108 if matching_unit and matching_unit.gettargetlen() > 0:
109 input_unit.merge(matching_unit, authoritative=True)
110 elif matchers:
111
112 matching_unit = match_source(input_unit, template_store)
113
114 if not matching_unit or not matching_unit.gettargetlen():
115
116 matching_unit = match_fuzzy(input_unit, matchers)
117
118 if matching_unit and matching_unit.gettargetlen() > 0:
119
120 if isinstance(input_unit, xliff.xliffunit):
121
122 input_unit.addalttrans(matching_unit.target, origin="fish", sourcetxt=matching_unit.source)
123 else:
124 input_unit.merge(matching_unit, authoritative=True)
125
126
127
128 if mark_reused and matching_unit and template_store:
129 original_unit = template_store.findunit(matching_unit.source)
130 if original_unit is not None:
131 original_unit.reused = True
132
133 return input_unit
134
135
137 """PO format specific template preparation logic."""
138
139 for unit in template_store.units:
140 if unit.isobsolete():
141 unit.resurrect()
142
143
144 -def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True):
145 """Do the actual pretranslation of a whole store."""
146
147 matchers = []
148
149 if template_store is not None:
150 template_store.makeindex()
151
152 prepare_template = "prepare_template_%s" % template_store.__class__.__name__
153 if prepare_template in globals():
154 globals()[prepare_template](template_store)
155
156 if fuzzymatching:
157
158
159 matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True)
160 matcher.addpercentage = False
161 matchers.append(matcher)
162
163
164
165 if tm and fuzzymatching:
166
167 matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000)
168 matcher.addpercentage = False
169 matchers.append(matcher)
170
171
172 match_locations = isinstance(input_store, po.pofile) and input_store.parseheader().get('X-Accelerator-Marker') in ('&', '~')
173 for input_unit in input_store.units:
174 if input_unit.istranslatable():
175 input_unit = pretranslate_unit(input_unit, template_store, matchers, match_locations=match_locations)
176
177 return input_store
178
179
180 -def main(argv=None):
181 from translate.convert import convert
182 formats = {"pot": ("po", pretranslate_file), ("pot", "po"): ("po", pretranslate_file),
183 "po": ("po", pretranslate_file), ("po", "po"): ("po", pretranslate_file),
184 "xlf": ("xlf", pretranslate_file), ("xlf", "xlf"): ("xlf", pretranslate_file),
185 }
186 parser = convert.ConvertOptionParser(formats, usetemplates=True,
187 allowmissingtemplate=True, description=__doc__)
188 parser.add_option("", "--tm", dest="tm", default=None,
189 help="The file to use as translation memory when fuzzy matching")
190 parser.passthrough.append("tm")
191 defaultsimilarity = 75
192 parser.add_option("-s", "--similarity", dest="min_similarity", default=defaultsimilarity,
193 type="float", help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity)
194 parser.passthrough.append("min_similarity")
195 parser.add_option("--nofuzzymatching", dest="fuzzymatching", action="store_false",
196 default=True, help="Disable fuzzy matching")
197 parser.passthrough.append("fuzzymatching")
198 parser.run(argv)
199
200
201 if __name__ == '__main__':
202 main()
203