Package translate :: Package storage :: Package xml_extract :: Module extract
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xml_extract.extract

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2008-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  from lxml import etree 
 22   
 23  from translate.storage import base 
 24  from translate.misc.typecheck import accepts, Self, IsCallable, IsOneOf, Any, Class 
 25  from translate.misc.typecheck.typeclasses import Number 
 26  from translate.misc.contextlib import contextmanager, nested 
 27  from translate.misc.context import with_ 
 28  from translate.storage.xml_extract import xpath_breadcrumb 
 29  from translate.storage.xml_extract import misc 
 30  from translate.storage.placeables import xliff, StringElem 
31 32 33 -def Nullable(t):
34 return IsOneOf(t, type(None))
35 36 TranslatableClass = Class('Translatable')
37 38 39 -class Translatable(object):
40 """A node corresponds to a translatable element. A node may 41 have children, which correspond to placeables.""" 42 43 @accepts(Self(), unicode, unicode, etree._Element, [IsOneOf(TranslatableClass, unicode)])
44 - def __init__(self, placeable_name, xpath, dom_node, source):
45 self.placeable_name = placeable_name 46 self.source = source 47 self.xpath = xpath 48 self.is_inline = False 49 self.dom_node = dom_node
50
51 - def _get_placeables(self):
52 return [placeable for placeable in self.source if isinstance(placeable, Translatable)]
53 54 placeables = property(_get_placeables)
55
56 57 @accepts(IsCallable(), Translatable, state=[Any()]) 58 -def reduce_unit_tree(f, unit_node, *state):
59 return misc.reduce_tree(f, unit_node, unit_node, lambda unit_node: unit_node.placeables, *state)
60
61 62 -class ParseState(object):
63 """Maintain constants and variables used during the walking of a 64 DOM tree (via the function apply).""" 65
66 - def __init__(self, no_translate_content_elements, inline_elements={}, nsmap={}):
67 self.no_translate_content_elements = no_translate_content_elements 68 self.inline_elements = inline_elements 69 self.is_inline = False 70 self.xpath_breadcrumb = xpath_breadcrumb.XPathBreadcrumb() 71 self.placeable_name = u"<top-level>" 72 self.nsmap = nsmap
73 74 75 @accepts(etree._Element, ParseState)
76 -def _process_placeable(dom_node, state):
77 """Run find_translatable_dom_nodes on the current dom_node""" 78 placeable = find_translatable_dom_nodes(dom_node, state) 79 # This happens if there were no recognized child tags and thus 80 # no translatable is returned. Make a placeable with the name 81 # "placeable" 82 if len(placeable) == 0: 83 return Translatable(u"placeable", state.xpath_breadcrumb.xpath, dom_node, []) 84 # The ideal situation: we got exactly one translateable back 85 # when processing this tree. 86 elif len(placeable) == 1: 87 return placeable[0] 88 else: 89 raise Exception("BUG: find_translatable_dom_nodes should never return more than a single translatable")
90 91 92 @accepts(etree._Element, ParseState)
93 -def _process_placeables(dom_node, state):
94 """Return a list of placeables and list with 95 alternating string-placeable objects. The former is 96 useful for directly working with placeables and the latter 97 is what will be used to build the final translatable string.""" 98 99 source = [] 100 for child in dom_node: 101 source.extend([_process_placeable(child, state), unicode(child.tail or u"")]) 102 return source
103 104 105 @accepts(etree._Element, ParseState)
106 -def _process_translatable(dom_node, state):
107 source = [unicode(dom_node.text or u"")] + _process_placeables(dom_node, state) 108 translatable = Translatable(state.placeable_name, state.xpath_breadcrumb.xpath, dom_node, source) 109 translatable.is_inline = state.is_inline 110 return [translatable]
111 112 113 @accepts(etree._Element, ParseState)
114 -def _process_children(dom_node, state):
115 _namespace, tag = misc.parse_tag(dom_node.tag) 116 children = [find_translatable_dom_nodes(child, state) for child in dom_node] 117 # Flatten a list of lists into a list of elements 118 children = [child for child_list in children for child in child_list] 119 if len(children) > 1: 120 intermediate_translatable = Translatable(tag, state.xpath_breadcrumb.xpath, dom_node, children) 121 return [intermediate_translatable] 122 else: 123 return children
124
125 126 -def compact_tag(nsmap, namespace, tag):
127 if namespace in nsmap: 128 return u'%s:%s' % (nsmap[namespace], tag) 129 else: 130 return u'{%s}%s' % (namespace, tag)
131 132 133 @accepts(etree._Element, ParseState)
134 -def find_translatable_dom_nodes(dom_node, state):
135 # For now, we only want to deal with XML elements. 136 # And we want to avoid processing instructions, which 137 # are XML elements (in the inheritance hierarchy). 138 if not isinstance(dom_node, etree._Element) or \ 139 isinstance(dom_node, etree._ProcessingInstruction): 140 return [] 141 142 namespace, tag = misc.parse_tag(dom_node.tag) 143 144 @contextmanager 145 def xpath_set(): 146 state.xpath_breadcrumb.start_tag(compact_tag(state.nsmap, namespace, tag)) 147 yield state.xpath_breadcrumb 148 state.xpath_breadcrumb.end_tag()
149 150 @contextmanager 151 def placeable_set(): 152 old_placeable_name = state.placeable_name 153 state.placeable_name = tag 154 yield state.placeable_name 155 state.placeable_name = old_placeable_name 156 157 @contextmanager 158 def inline_set(): 159 old_inline = state.is_inline 160 if (namespace, tag) in state.inline_elements: 161 state.is_inline = True 162 else: 163 state.is_inline = False 164 yield state.is_inline 165 state.is_inline = old_inline 166 167 def with_block(xpath_breadcrumb, placeable_name, is_inline): 168 if (namespace, tag) not in state.no_translate_content_elements: 169 return _process_translatable(dom_node, state) 170 else: 171 return _process_children(dom_node, state) 172 return with_(nested(xpath_set(), placeable_set(), inline_set()), with_block) 173
174 175 -class IdMaker(object):
176
177 - def __init__(self):
178 self._max_id = 0 179 self._obj_id_map = {}
180
181 - def get_id(self, obj):
182 if not self.has_id(obj): 183 self._obj_id_map[obj] = self._max_id 184 self._max_id += 1 185 return self._obj_id_map[obj]
186
187 - def has_id(self, obj):
188 return obj in self._obj_id_map
189
190 191 @accepts(Nullable(Translatable), Translatable, IdMaker) 192 -def _to_placeables(parent_translatable, translatable, id_maker):
193 result = [] 194 for chunk in translatable.source: 195 if isinstance(chunk, unicode): 196 result.append(chunk) 197 else: 198 id = unicode(id_maker.get_id(chunk)) 199 if chunk.is_inline: 200 result.append(xliff.G(sub=_to_placeables(parent_translatable, chunk, id_maker), id=id)) 201 else: 202 result.append(xliff.X(id=id, xid=chunk.xpath)) 203 return result
204 205 206 @accepts(base.TranslationStore, Nullable(Translatable), Translatable, IdMaker)
207 -def _add_translatable_to_store(store, parent_translatable, translatable, id_maker):
208 """Construct a new translation unit, set its source and location 209 information and add it to 'store'. 210 """ 211 unit = store.UnitClass(u'') 212 unit.rich_source = [StringElem(_to_placeables(parent_translatable, translatable, id_maker))] 213 unit.addlocation(translatable.xpath) 214 store.addunit(unit)
215
216 217 @accepts(Translatable) 218 -def _contains_translatable_text(translatable):
219 """Checks whether translatable contains any chunks of text which contain 220 more than whitespace. 221 222 If not, then there's nothing to translate.""" 223 for chunk in translatable.source: 224 if isinstance(chunk, unicode): 225 if chunk.strip() != u"": 226 return True 227 return False
228 229 230 @accepts(base.TranslationStore)
231 -def _make_store_adder(store):
232 """Return a function which, when called with a Translatable will add 233 a unit to 'store'. The placeables will represented as strings according 234 to 'placeable_quoter'.""" 235 id_maker = IdMaker() 236 237 def add_to_store(parent_translatable, translatable, rid): 238 _add_translatable_to_store(store, parent_translatable, translatable, id_maker)
239 240 return add_to_store 241
242 243 @accepts([Translatable], IsCallable(), Nullable(Translatable), Number) 244 -def _walk_translatable_tree(translatables, f, parent_translatable, rid):
245 for translatable in translatables: 246 if _contains_translatable_text(translatable) and not translatable.is_inline: 247 rid = rid + 1 248 new_parent_translatable = translatable 249 f(parent_translatable, translatable, rid) 250 else: 251 new_parent_translatable = parent_translatable 252 253 _walk_translatable_tree(translatable.placeables, f, new_parent_translatable, rid)
254
255 256 -def reverse_map(a_map):
257 return dict((value, key) for key, value in a_map.iteritems())
258 259 260 @accepts(lambda obj: hasattr(obj, "read"), base.TranslationStore, ParseState, Nullable(IsCallable()))
261 -def build_store(odf_file, store, parse_state, store_adder=None):
262 """Utility function for loading xml_filename""" 263 store_adder = store_adder or _make_store_adder(store) 264 tree = etree.parse(odf_file) 265 root = tree.getroot() 266 parse_state.nsmap = reverse_map(root.nsmap) 267 translatables = find_translatable_dom_nodes(root, parse_state) 268 _walk_translatable_tree(translatables, store_adder, None, 0) 269 return tree
270