Package translate :: Package tools :: Module pretranslate
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pretranslate

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2008 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Fill localization files with suggested translations based on 
 22  translation memory and existing translations. 
 23  """ 
 24   
 25  from translate.storage import factory 
 26  from translate.storage import xliff, po 
 27  from translate.search import match 
 28   
 29  # We don't want to reinitialise the TM each time, so let's store it here. 
 30  tmmatcher = None 
 31   
 32   
33 -def memory(tmfiles, max_candidates=1, min_similarity=75, max_length=1000):
34 """Returns the TM store to use. Only initialises on first call.""" 35 global tmmatcher 36 # Only initialise first time 37 if tmmatcher is None: 38 if isinstance(tmfiles, list): 39 tmstore = [factory.getobject(tmfile) for tmfile in tmfiles] 40 else: 41 tmstore = factory.getobject(tmfiles) 42 tmmatcher = match.matcher(tmstore, max_candidates=max_candidates, min_similarity=min_similarity, max_length=max_length) 43 return tmmatcher
44 45
46 -def pretranslate_file(input_file, output_file, template_file, tm=None, min_similarity=75, fuzzymatching=True):
47 """Pretranslate any factory supported file with old translations and translation memory.""" 48 input_store = factory.getobject(input_file) 49 template_store = None 50 if template_file is not None: 51 template_store = factory.getobject(template_file) 52 53 output = pretranslate_store(input_store, template_store, tm, min_similarity, fuzzymatching) 54 output_file.write(str(output)) 55 return 1
56 57
58 -def match_template_location(input_unit, template_store):
59 """Returns a matching unit from a template. matching based on locations""" 60 # we want to use slightly different matching strategies for PO files 61 # generated by our own moz2po and oo2po. Let's take a cheap shot at 62 # detecting them from the presence of a ':' in the first location. 63 locations = input_unit.getlocations() 64 if not locations or ":" in locations[0]: 65 return match_template_id(input_unit, template_store) 66 67 # since oo2po and moz2po use location as unique identifiers for strings 68 # we match against location first, then check for matching source strings 69 # this makes no sense for normal gettext files 70 for location in locations: 71 matching_unit = template_store.locationindex.get(location, None) 72 if matching_unit is not None and matching_unit.source == input_unit.source and matching_unit.gettargetlen() > 0: 73 return matching_unit
74
75 -def match_template_id(input_unit, template_store):
76 """Returns a matching unit from a template. matching based on unit id""" 77 matching_unit = template_store.findid(input_unit.getid()) 78 return matching_unit
79
80 -def match_source(input_unit, template_store):
81 """Returns a matching unit from a template. matching based on unit id""" 82 # hack for weird mozilla single letter strings, we don't want to 83 # match them by anything but locations 84 if len(input_unit.source) > 1: 85 matching_unit = template_store.findunit(input_unit.source) 86 return matching_unit
87
88 -def match_fuzzy(input_unit, matchers):
89 """Return a fuzzy match from a queue of matchers.""" 90 for matcher in matchers: 91 fuzzycandidates = matcher.matches(input_unit.source) 92 if fuzzycandidates: 93 return fuzzycandidates[0]
94 95
96 -def pretranslate_unit(input_unit, template_store, matchers=None, mark_reused=False, match_locations=False):
97 """Pretranslate a unit or return unchanged if no translation was found.""" 98 99 matching_unit = None 100 #do template matching 101 if template_store: 102 if match_locations: 103 matching_unit = match_template_location(input_unit, template_store) 104 else: 105 matching_unit = match_template_id(input_unit, template_store) 106 107 108 if matching_unit and matching_unit.gettargetlen() > 0: 109 input_unit.merge(matching_unit, authoritative=True) 110 elif matchers: 111 # quickly try exact match by source 112 matching_unit = match_source(input_unit, template_store) 113 114 if not matching_unit or not matching_unit.gettargetlen(): 115 #do fuzzy matching 116 matching_unit = match_fuzzy(input_unit, matchers) 117 118 if matching_unit and matching_unit.gettargetlen() > 0: 119 #FIXME: should we dispatch here instead of this crude type check 120 if isinstance(input_unit, xliff.xliffunit): 121 #FIXME: what about origin, lang and matchquality 122 input_unit.addalttrans(matching_unit.target, origin="fish", sourcetxt=matching_unit.source) 123 else: 124 input_unit.merge(matching_unit, authoritative=True) 125 126 #FIXME: ugly hack required by pot2po to mark old 127 #translations reused for new file. loops over 128 if mark_reused and matching_unit and template_store: 129 original_unit = template_store.findunit(matching_unit.source) 130 if original_unit is not None: 131 original_unit.reused = True 132 133 return input_unit
134 135
136 -def prepare_template_pofile(template_store):
137 """PO format specific template preparation logic.""" 138 #do we want to consider obsolete translations? 139 for unit in template_store.units: 140 if unit.isobsolete(): 141 unit.resurrect()
142 143
144 -def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True):
145 """Do the actual pretranslation of a whole store.""" 146 #preperation 147 matchers = [] 148 #prepare template 149 if template_store is not None: 150 template_store.makeindex() 151 #template preparation based on type 152 prepare_template = "prepare_template_%s" % template_store.__class__.__name__ 153 if prepare_template in globals(): 154 globals()[prepare_template](template_store) 155 156 if fuzzymatching: 157 #create template matcher 158 #FIXME: max_length hardcoded 159 matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) 160 matcher.addpercentage = False 161 matchers.append(matcher) 162 163 #prepare tm 164 #create tm matcher 165 if tm and fuzzymatching: 166 #FIXME: max_length hardcoded 167 matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) 168 matcher.addpercentage = False 169 matchers.append(matcher) 170 171 #main loop 172 match_locations = isinstance(input_store, po.pofile) and input_store.parseheader().get('X-Accelerator-Marker') in ('&', '~') 173 for input_unit in input_store.units: 174 if input_unit.istranslatable(): 175 input_unit = pretranslate_unit(input_unit, template_store, matchers, match_locations=match_locations) 176 177 return input_store
178 179
180 -def main(argv=None):
181 from translate.convert import convert 182 formats = {"pot": ("po", pretranslate_file), ("pot", "po"): ("po", pretranslate_file), 183 "po": ("po", pretranslate_file), ("po", "po"): ("po", pretranslate_file), 184 "xlf": ("xlf", pretranslate_file), ("xlf", "xlf"): ("xlf", pretranslate_file), 185 } 186 parser = convert.ConvertOptionParser(formats, usetemplates=True, 187 allowmissingtemplate=True, description=__doc__) 188 parser.add_option("", "--tm", dest="tm", default=None, 189 help="The file to use as translation memory when fuzzy matching") 190 parser.passthrough.append("tm") 191 defaultsimilarity = 75 192 parser.add_option("-s", "--similarity", dest="min_similarity", default=defaultsimilarity, 193 type="float", help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity) 194 parser.passthrough.append("min_similarity") 195 parser.add_option("--nofuzzymatching", dest="fuzzymatching", action="store_false", 196 default=True, help="Disable fuzzy matching") 197 parser.passthrough.append("fuzzymatching") 198 parser.run(argv)
199 200 201 if __name__ == '__main__': 202 main() 203