source: subversion/applications/editors/django/osmeditor/third/ElementTree.py

Last change on this file was 13349, checked in by crschmidt, 11 years ago

Commit first pass at a web UI for doing simplified OSM object editing.

File size: 41.8 KB
Line 
1#
2# ElementTree
3# $Id: ElementTree.py 3224 2007-08-27 21:23:39Z fredrik $
4#
5# light-weight XML support for Python 1.5.2 and later.
6#
7# history:
8# 2001-10-20 fl   created (from various sources)
9# 2001-11-01 fl   return root from parse method
10# 2002-02-16 fl   sort attributes in lexical order
11# 2002-04-06 fl   TreeBuilder refactoring, added PythonDoc markup
12# 2002-05-01 fl   finished TreeBuilder refactoring
13# 2002-07-14 fl   added basic namespace support to ElementTree.write
14# 2002-07-25 fl   added QName attribute support
15# 2002-10-20 fl   fixed encoding in write
16# 2002-11-24 fl   changed default encoding to ascii; fixed attribute encoding
17# 2002-11-27 fl   accept file objects or file names for parse/write
18# 2002-12-04 fl   moved XMLTreeBuilder back to this module
19# 2003-01-11 fl   fixed entity encoding glitch for us-ascii
20# 2003-02-13 fl   added XML literal factory
21# 2003-02-21 fl   added ProcessingInstruction/PI factory
22# 2003-05-11 fl   added tostring/fromstring helpers
23# 2003-05-26 fl   added ElementPath support
24# 2003-07-05 fl   added makeelement factory method
25# 2003-07-28 fl   added more well-known namespace prefixes
26# 2003-08-15 fl   fixed typo in ElementTree.findtext (Thomas Dartsch)
27# 2003-09-04 fl   fall back on emulator if ElementPath is not installed
28# 2003-10-31 fl   markup updates
29# 2003-11-15 fl   fixed nested namespace bug
30# 2004-03-28 fl   added XMLID helper
31# 2004-06-02 fl   added default support to findtext
32# 2004-06-08 fl   fixed encoding of non-ascii element/attribute names
33# 2004-08-23 fl   take advantage of post-2.1 expat features
34# 2005-02-01 fl   added iterparse implementation
35# 2005-03-02 fl   fixed iterparse support for pre-2.2 versions
36# 2006-11-18 fl   added parser support for IronPython (ElementIron)
37# 2007-08-27 fl   fixed newlines in attributes
38#
39# Copyright (c) 1999-2007 by Fredrik Lundh.  All rights reserved.
40#
41# fredrik@pythonware.com
42# http://www.pythonware.com
43#
44# --------------------------------------------------------------------
45# The ElementTree toolkit is
46#
47# Copyright (c) 1999-2007 by Fredrik Lundh
48#
49# By obtaining, using, and/or copying this software and/or its
50# associated documentation, you agree that you have read, understood,
51# and will comply with the following terms and conditions:
52#
53# Permission to use, copy, modify, and distribute this software and
54# its associated documentation for any purpose and without fee is
55# hereby granted, provided that the above copyright notice appears in
56# all copies, and that both that copyright notice and this permission
57# notice appear in supporting documentation, and that the name of
58# Secret Labs AB or the author not be used in advertising or publicity
59# pertaining to distribution of the software without specific, written
60# prior permission.
61#
62# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
63# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
64# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
65# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
66# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
67# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
68# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
69# OF THIS SOFTWARE.
70# --------------------------------------------------------------------
71
72__all__ = [
73    # public symbols
74    "Comment",
75    "dump",
76    "Element", "ElementTree",
77    "fromstring",
78    "iselement", "iterparse",
79    "parse",
80    "PI", "ProcessingInstruction",
81    "QName",
82    "SubElement",
83    "tostring",
84    "TreeBuilder",
85    "VERSION", "XML",
86    "XMLTreeBuilder",
87    ]
88
89# parser api override (None = use default)
90parser_api = None
91
92# TODO: add support for custom namespace resolvers/default namespaces
93# TODO: add improved support for incremental parsing
94
95VERSION = "1.2.7"
96
97##
98# The <b>Element</b> type is a flexible container object, designed to
99# store hierarchical data structures in memory. The type can be
100# described as a cross between a list and a dictionary.
101# <p>
102# Each element has a number of properties associated with it:
103# <ul>
104# <li>a <i>tag</i>. This is a string identifying what kind of data
105# this element represents (the element type, in other words).</li>
106# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
107# <li>a <i>text</i> string.</li>
108# <li>an optional <i>tail</i> string.</li>
109# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
110# </ul>
111#
112# To create an element instance, use the {@link #Element} or {@link
113# #SubElement} factory functions.
114# <p>
115# The {@link #ElementTree} class can be used to wrap an element
116# structure, and convert it from and to XML.
117##
118
119import sys, re
120
121try:
122    import string
123except:
124    # emulate string module under IronPython
125    class string(object):
126        def join(self, seq, sep):
127            return sep.join(seq)
128        def replace(self, text, *args):
129            return text.replace(*args)
130        def split(self, text, *args):
131            return text.split(*args)
132        def strip(self, text, *args):
133            return text.strip(*args)
134    string = string()
135
136class _SimpleElementPath:
137    # emulate pre-1.2 find/findtext/findall behaviour
138    def find(self, element, tag):
139        for elem in element:
140            if elem.tag == tag:
141                return elem
142        return None
143    def findtext(self, element, tag, default=None):
144        for elem in element:
145            if elem.tag == tag:
146                return elem.text or ""
147        return default
148    def findall(self, element, tag):
149        if tag[:3] == ".//":
150            return element.getiterator(tag[3:])
151        result = []
152        for elem in element:
153            if elem.tag == tag:
154                result.append(elem)
155        return result
156
157try:
158    import ElementPath
159except ImportError:
160    # FIXME: issue warning in this case?
161    ElementPath = _SimpleElementPath()
162
163class DefaultParserAPI:
164
165    def parse(self, source, parser=None):
166        if not hasattr(source, "read"):
167            source = open(source, "rb")
168        if not parser:
169            parser = XMLTreeBuilder()
170        while 1:
171            data = source.read(32768)
172            if not data:
173                break
174            parser.feed(data)
175        return parser.close()
176
177    def iterparse(self, source, events):
178        return _iterparse(source, events)
179
180    def fromstring(self, text):
181        parser = XMLTreeBuilder()
182        parser.feed(text)
183        return parser.close()
184
185parser_api = default_parser_api = DefaultParserAPI()
186
187##
188# Internal element class.  This class defines the Element interface,
189# and provides a reference implementation of this interface.
190# <p>
191# You should not create instances of this class directly.  Use the
192# appropriate factory functions instead, such as {@link #Element}
193# and {@link #SubElement}.
194#
195# @see Element
196# @see SubElement
197# @see Comment
198# @see ProcessingInstruction
199
200class _ElementInterface:
201    # <tag attrib>text<child/>...</tag>tail
202
203    ##
204    # (Attribute) Element tag.
205
206    tag = None
207
208    ##
209    # (Attribute) Element attribute dictionary.  Where possible, use
210    # {@link #_ElementInterface.get},
211    # {@link #_ElementInterface.set},
212    # {@link #_ElementInterface.keys}, and
213    # {@link #_ElementInterface.items} to access
214    # element attributes.
215
216    attrib = None
217
218    ##
219    # (Attribute) Text before first subelement.  This is either a
220    # string or the value None, if there was no text.
221
222    text = None
223
224    ##
225    # (Attribute) Text after this element's end tag, but before the
226    # next sibling element's start tag.  This is either a string or
227    # the value None, if there was no text.
228
229    tail = None # text after end tag, if any
230
231    def __init__(self, tag, attrib):
232        self.tag = tag
233        self.attrib = attrib
234        self._children = []
235
236    def __repr__(self):
237        return "<Element %s at %x>" % (self.tag, id(self))
238
239    ##
240    # Creates a new element object of the same type as this element.
241    #
242    # @param tag Element tag.
243    # @param attrib Element attributes, given as a dictionary.
244    # @return A new element instance.
245
246    def makeelement(self, tag, attrib):
247        return Element(tag, attrib)
248
249    ##
250    # Returns the number of subelements.
251    #
252    # @return The number of subelements.
253
254    def __len__(self):
255        return len(self._children)
256
257    ##
258    # Returns the given subelement.
259    #
260    # @param index What subelement to return.
261    # @return The given subelement.
262    # @exception IndexError If the given element does not exist.
263
264    def __getitem__(self, index):
265        return self._children[index]
266
267    ##
268    # Replaces the given subelement.
269    #
270    # @param index What subelement to replace.
271    # @param element The new element value.
272    # @exception IndexError If the given element does not exist.
273    # @exception AssertionError If element is not a valid object.
274
275    def __setitem__(self, index, element):
276        assert iselement(element)
277        self._children[index] = element
278
279    ##
280    # Deletes the given subelement.
281    #
282    # @param index What subelement to delete.
283    # @exception IndexError If the given element does not exist.
284
285    def __delitem__(self, index):
286        del self._children[index]
287
288    ##
289    # Returns a list containing subelements in the given range.
290    #
291    # @param start The first subelement to return.
292    # @param stop The first subelement that shouldn't be returned.
293    # @return A sequence object containing subelements.
294
295    def __getslice__(self, start, stop):
296        return self._children[start:stop]
297
298    ##
299    # Replaces a number of subelements with elements from a sequence.
300    #
301    # @param start The first subelement to replace.
302    # @param stop The first subelement that shouldn't be replaced.
303    # @param elements A sequence object with zero or more elements.
304    # @exception AssertionError If a sequence member is not a valid object.
305
306    def __setslice__(self, start, stop, elements):
307        for element in elements:
308            assert iselement(element)
309        self._children[start:stop] = list(elements)
310
311    ##
312    # Deletes a number of subelements.
313    #
314    # @param start The first subelement to delete.
315    # @param stop The first subelement to leave in there.
316
317    def __delslice__(self, start, stop):
318        del self._children[start:stop]
319
320    ##
321    # Adds a subelement to the end of this element.
322    #
323    # @param element The element to add.
324    # @exception AssertionError If a sequence member is not a valid object.
325
326    def append(self, element):
327        assert iselement(element)
328        self._children.append(element)
329
330    ##
331    # Inserts a subelement at the given position in this element.
332    #
333    # @param index Where to insert the new subelement.
334    # @exception AssertionError If the element is not a valid object.
335
336    def insert(self, index, element):
337        assert iselement(element)
338        self._children.insert(index, element)
339
340    ##
341    # Removes a matching subelement.  Unlike the <b>find</b> methods,
342    # this method compares elements based on identity, not on tag
343    # value or contents.
344    #
345    # @param element What element to remove.
346    # @exception ValueError If a matching element could not be found.
347    # @exception AssertionError If the element is not a valid object.
348
349    def remove(self, element):
350        assert iselement(element)
351        self._children.remove(element)
352
353    ##
354    # Returns all subelements.  The elements are returned in document
355    # order.
356    #
357    # @return A list of subelements.
358    # @defreturn list of Element instances
359
360    def getchildren(self):
361        return self._children
362
363    ##
364    # Finds the first matching subelement, by tag name or path.
365    #
366    # @param path What element to look for.
367    # @return The first matching element, or None if no element was found.
368    # @defreturn Element or None
369
370    def find(self, path):
371        return ElementPath.find(self, path)
372
373    ##
374    # Finds text for the first matching subelement, by tag name or path.
375    #
376    # @param path What element to look for.
377    # @param default What to return if the element was not found.
378    # @return The text content of the first matching element, or the
379    #     default value no element was found.  Note that if the element
380    #     has is found, but has no text content, this method returns an
381    #     empty string.
382    # @defreturn string
383
384    def findtext(self, path, default=None):
385        return ElementPath.findtext(self, path, default)
386
387    ##
388    # Finds all matching subelements, by tag name or path.
389    #
390    # @param path What element to look for.
391    # @return A list or iterator containing all matching elements,
392    #    in document order.
393    # @defreturn list of Element instances
394
395    def findall(self, path):
396        return ElementPath.findall(self, path)
397
398    ##
399    # Resets an element.  This function removes all subelements, clears
400    # all attributes, and sets the text and tail attributes to None.
401
402    def clear(self):
403        self.attrib.clear()
404        self._children = []
405        self.text = self.tail = None
406
407    ##
408    # Gets an element attribute.
409    #
410    # @param key What attribute to look for.
411    # @param default What to return if the attribute was not found.
412    # @return The attribute value, or the default value, if the
413    #     attribute was not found.
414    # @defreturn string or None
415
416    def get(self, key, default=None):
417        return self.attrib.get(key, default)
418
419    ##
420    # Sets an element attribute.
421    #
422    # @param key What attribute to set.
423    # @param value The attribute value.
424
425    def set(self, key, value):
426        self.attrib[key] = value
427
428    ##
429    # Gets a list of attribute names.  The names are returned in an
430    # arbitrary order (just like for an ordinary Python dictionary).
431    #
432    # @return A list of element attribute names.
433    # @defreturn list of strings
434
435    def keys(self):
436        return self.attrib.keys()
437
438    ##
439    # Gets element attributes, as a sequence.  The attributes are
440    # returned in an arbitrary order.
441    #
442    # @return A list of (name, value) tuples for all attributes.
443    # @defreturn list of (string, string) tuples
444
445    def items(self):
446        return self.attrib.items()
447
448    ##
449    # Creates a tree iterator.  The iterator loops over this element
450    # and all subelements, in document order, and returns all elements
451    # with a matching tag.
452    # <p>
453    # If the tree structure is modified during iteration, the result
454    # is undefined.
455    #
456    # @param tag What tags to look for (default is to return all elements).
457    # @return A list or iterator containing all the matching elements.
458    # @defreturn list or iterator
459
460    def getiterator(self, tag=None):
461        nodes = []
462        if tag == "*":
463            tag = None
464        if tag is None or self.tag == tag:
465            nodes.append(self)
466        for node in self._children:
467            nodes.extend(node.getiterator(tag))
468        return nodes
469
470# compatibility
471_Element = _ElementInterface
472
473##
474# Element factory.  This function returns an object implementing the
475# standard Element interface.  The exact class or type of that object
476# is implementation dependent, but it will always be compatible with
477# the {@link #_ElementInterface} class in this module.
478# <p>
479# The element name, attribute names, and attribute values can be
480# either 8-bit ASCII strings or Unicode strings.
481#
482# @param tag The element name.
483# @param attrib An optional dictionary, containing element attributes.
484# @param **extra Additional attributes, given as keyword arguments.
485# @return An element instance.
486# @defreturn Element
487
488def Element(tag, attrib={}, **extra):
489    attrib = attrib.copy()
490    attrib.update(extra)
491    return _ElementInterface(tag, attrib)
492
493##
494# Subelement factory.  This function creates an element instance, and
495# appends it to an existing element.
496# <p>
497# The element name, attribute names, and attribute values can be
498# either 8-bit ASCII strings or Unicode strings.
499#
500# @param parent The parent element.
501# @param tag The subelement name.
502# @param attrib An optional dictionary, containing element attributes.
503# @param **extra Additional attributes, given as keyword arguments.
504# @return An element instance.
505# @defreturn Element
506
507def SubElement(parent, tag, attrib={}, **extra):
508    attrib = attrib.copy()
509    attrib.update(extra)
510    element = parent.makeelement(tag, attrib)
511    parent.append(element)
512    return element
513
514##
515# Comment element factory.  This factory function creates a special
516# element that will be serialized as an XML comment.
517# <p>
518# The comment string can be either an 8-bit ASCII string or a Unicode
519# string.
520#
521# @param text A string containing the comment string.
522# @return An element instance, representing a comment.
523# @defreturn Element
524
525def Comment(text=None):
526    element = Element(Comment)
527    element.text = text
528    return element
529
530##
531# PI element factory.  This factory function creates a special element
532# that will be serialized as an XML processing instruction.
533#
534# @param target A string containing the PI target.
535# @param text A string containing the PI contents, if any.
536# @return An element instance, representing a PI.
537# @defreturn Element
538
539def ProcessingInstruction(target, text=None):
540    element = Element(ProcessingInstruction)
541    element.text = target
542    if text:
543        element.text = element.text + " " + text
544    return element
545
546PI = ProcessingInstruction
547
548##
549# QName wrapper.  This can be used to wrap a QName attribute value, in
550# order to get proper namespace handling on output.
551#
552# @param text A string containing the QName value, in the form {uri}local,
553#     or, if the tag argument is given, the URI part of a QName.
554# @param tag Optional tag.  If given, the first argument is interpreted as
555#     an URI, and this argument is interpreted as a local name.
556# @return An opaque object, representing the QName.
557
558class QName:
559    def __init__(self, text_or_uri, tag=None):
560        if tag:
561            text_or_uri = "{%s}%s" % (text_or_uri, tag)
562        self.text = text_or_uri
563    def __str__(self):
564        return self.text
565    def __hash__(self):
566        return hash(self.text)
567    def __cmp__(self, other):
568        if isinstance(other, QName):
569            return cmp(self.text, other.text)
570        return cmp(self.text, other)
571
572##
573# ElementTree wrapper class.  This class represents an entire element
574# hierarchy, and adds some extra support for serialization to and from
575# standard XML.
576#
577# @param element Optional root element.
578# @keyparam file Optional file handle or name.  If given, the
579#     tree is initialized with the contents of this XML file.
580
581class ElementTree:
582
583    def __init__(self, element=None, file=None):
584        assert element is None or iselement(element)
585        self._root = element # first node
586        if file:
587            self.parse(file)
588
589    ##
590    # Gets the root element for this tree.
591    #
592    # @return An element instance.
593    # @defreturn Element
594
595    def getroot(self):
596        return self._root
597
598    ##
599    # Replaces the root element for this tree.  This discards the
600    # current contents of the tree, and replaces it with the given
601    # element.  Use with care.
602    #
603    # @param element An element instance.
604
605    def _setroot(self, element):
606        assert iselement(element)
607        self._root = element
608
609    ##
610    # Loads an external XML document into this element tree.
611    #
612    # @param source A file name or file object.
613    # @param parser An optional parser instance.  If not given, the
614    #     standard {@link XMLTreeBuilder} parser is used.
615    # @return The document root element.
616    # @defreturn Element
617
618    def parse(self, source, parser=None):
619        if parser:
620            tree = default_parser_api.parse(source, parser)
621        else:
622            tree = parser_api.parse(source)
623        self._root = tree
624        return tree
625
626    ##
627    # Creates a tree iterator for the root element.  The iterator loops
628    # over all elements in this tree, in document order.
629    #
630    # @param tag What tags to look for (default is to return all elements)
631    # @return An iterator.
632    # @defreturn iterator
633
634    def getiterator(self, tag=None):
635        assert self._root is not None
636        return self._root.getiterator(tag)
637
638    ##
639    # Finds the first toplevel element with given tag.
640    # Same as getroot().find(path).
641    #
642    # @param path What element to look for.
643    # @return The first matching element, or None if no element was found.
644    # @defreturn Element or None
645
646    def find(self, path):
647        assert self._root is not None
648        if path[:1] == "/":
649            path = "." + path
650        return self._root.find(path)
651
652    ##
653    # Finds the element text for the first toplevel element with given
654    # tag.  Same as getroot().findtext(path).
655    #
656    # @param path What toplevel element to look for.
657    # @param default What to return if the element was not found.
658    # @return The text content of the first matching element, or the
659    #     default value no element was found.  Note that if the element
660    #     has is found, but has no text content, this method returns an
661    #     empty string.
662    # @defreturn string
663
664    def findtext(self, path, default=None):
665        assert self._root is not None
666        if path[:1] == "/":
667            path = "." + path
668        return self._root.findtext(path, default)
669
670    ##
671    # Finds all toplevel elements with the given tag.
672    # Same as getroot().findall(path).
673    #
674    # @param path What element to look for.
675    # @return A list or iterator containing all matching elements,
676    #    in document order.
677    # @defreturn list of Element instances
678
679    def findall(self, path):
680        assert self._root is not None
681        if path[:1] == "/":
682            path = "." + path
683        return self._root.findall(path)
684
685    ##
686    # Writes the element tree to a file, as XML.
687    #
688    # @param file A file name, or a file object opened for writing.
689    # @param encoding Optional output encoding (default is US-ASCII).
690
691    def write(self, file, encoding="us-ascii"):
692        assert self._root is not None
693        if not hasattr(file, "write"):
694            file = open(file, "wb")
695        if not encoding:
696            encoding = "us-ascii"
697        elif encoding != "utf-8" and encoding != "us-ascii":
698            file.write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
699        self._write(file, self._root, encoding, {})
700
701    def _write(self, file, node, encoding, namespaces):
702        # write XML to file
703        tag = node.tag
704        if tag is Comment:
705            file.write("<!-- %s -->" % _escape_cdata(node.text, encoding))
706        elif tag is ProcessingInstruction:
707            file.write("<?%s?>" % _escape_cdata(node.text, encoding))
708        else:
709            items = node.items()
710            xmlns_items = [] # new namespaces in this scope
711            try:
712                if isinstance(tag, QName) or tag[:1] == "{":
713                    tag, xmlns = fixtag(tag, namespaces)
714                    if xmlns: xmlns_items.append(xmlns)
715            except TypeError:
716                _raise_serialization_error(tag)
717            file.write("<" + _encode(tag, encoding))
718            if items or xmlns_items:
719                items.sort() # lexical order
720                for k, v in items:
721                    try:
722                        if isinstance(k, QName) or k[:1] == "{":
723                            k, xmlns = fixtag(k, namespaces)
724                            if xmlns: xmlns_items.append(xmlns)
725                    except TypeError:
726                        _raise_serialization_error(k)
727                    try:
728                        if isinstance(v, QName):
729                            v, xmlns = fixtag(v, namespaces)
730                            if xmlns: xmlns_items.append(xmlns)
731                    except TypeError:
732                        _raise_serialization_error(v)
733                    file.write(" %s=\"%s\"" % (_encode(k, encoding),
734                                               _escape_attrib(v, encoding)))
735                for k, v in xmlns_items:
736                    file.write(" %s=\"%s\"" % (_encode(k, encoding),
737                                               _escape_attrib(v, encoding)))
738            if node.text or len(node):
739                file.write(">")
740                if node.text:
741                    file.write(_escape_cdata(node.text, encoding))
742                for n in node:
743                    self._write(file, n, encoding, namespaces)
744                file.write("</" + _encode(tag, encoding) + ">")
745            else:
746                file.write(" />")
747            for k, v in xmlns_items:
748                del namespaces[v]
749        if node.tail:
750            file.write(_escape_cdata(node.tail, encoding))
751
752# --------------------------------------------------------------------
753# helpers
754
755##
756# Checks if an object appears to be a valid element object.
757#
758# @param An element instance.
759# @return A true value if this is an element object.
760# @defreturn flag
761
762def iselement(element):
763    # FIXME: not sure about this; might be a better idea to look
764    # for tag/attrib/text attributes
765    return isinstance(element, _ElementInterface) or hasattr(element, "tag")
766
767##
768# Writes an element tree or element structure to sys.stdout.  This
769# function should be used for debugging only.
770# <p>
771# The exact output format is implementation dependent.  In this
772# version, it's written as an ordinary XML file.
773#
774# @param elem An element tree or an individual element.
775
776def dump(elem):
777    # debugging
778    if not isinstance(elem, ElementTree):
779        elem = ElementTree(elem)
780    elem.write(sys.stdout)
781    tail = elem.getroot().tail
782    if not tail or tail[-1] != "\n":
783        sys.stdout.write("\n")
784
785def _encode(s, encoding):
786    try:
787        return s.encode(encoding)
788    except AttributeError:
789        return s # 1.5.2: assume the string uses the right encoding
790
791if sys.version[:3] == "1.5":
792    _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2
793else:
794    _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"'))
795
796_escape_map = {
797    "&": "&amp;",
798    "<": "&lt;",
799    ">": "&gt;",
800    '"': "&quot;",
801}
802
803_namespace_map = {
804    # "well-known" namespace prefixes
805    "http://www.w3.org/XML/1998/namespace": "xml",
806    "http://www.w3.org/1999/xhtml": "html",
807    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
808    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
809}
810
811def _raise_serialization_error(text):
812    raise TypeError(
813        "cannot serialize %r (type %s)" % (text, type(text).__name__)
814        )
815
816def _encode_entity(text, pattern=_escape):
817    # map reserved and non-ascii characters to numerical entities
818    def escape_entities(m, map=_escape_map):
819        out = []
820        append = out.append
821        for char in m.group():
822            text = map.get(char)
823            if text is None:
824                text = "&#%d;" % ord(char)
825            append(text)
826        return string.join(out, "")
827    try:
828        return _encode(pattern.sub(escape_entities, text), "ascii")
829    except TypeError:
830        _raise_serialization_error(text)
831
832#
833# the following functions assume an ascii-compatible encoding
834# (or "utf-16")
835
836def _escape_cdata(text, encoding=None, replace=string.replace):
837    # escape character data
838    try:
839        if encoding:
840            try:
841                text = _encode(text, encoding)
842            except UnicodeError:
843                return _encode_entity(text)
844        if "&" in text:
845            text = replace(text, "&", "&amp;")
846        if "<" in text:
847            text = replace(text, "<", "&lt;")
848        if ">" in text:
849            text = replace(text, ">", "&gt;")
850        return text
851    except (TypeError, AttributeError):
852        _raise_serialization_error(text)
853
854def _escape_attrib(text, encoding=None, replace=string.replace):
855    # escape attribute value
856    try:
857        if encoding:
858            try:
859                text = _encode(text, encoding)
860            except UnicodeError:
861                return _encode_entity(text)
862        if "&" in text:
863            text = replace(text, "&", "&amp;")
864        if "\"" in text:
865            text = replace(text, "\"", "&quot;")
866        if "<" in text:
867            text = replace(text, "<", "&lt;")
868        if ">" in text:
869            text = replace(text, ">", "&gt;")
870        if "\n" in text:
871            text = replace(text, "\n", "&#10;")
872        return text
873    except (TypeError, AttributeError):
874        _raise_serialization_error(text)
875
876def fixtag(tag, namespaces):
877    # given a decorated tag (of the form {uri}tag), return prefixed
878    # tag and namespace declaration, if any
879    if isinstance(tag, QName):
880        tag = tag.text
881    namespace_uri, tag = string.split(tag[1:], "}", 1)
882    prefix = namespaces.get(namespace_uri)
883    if prefix is None:
884        prefix = _namespace_map.get(namespace_uri)
885        if prefix is None:
886            prefix = "ns%d" % len(namespaces)
887        namespaces[namespace_uri] = prefix
888        if prefix == "xml":
889            xmlns = None
890        else:
891            xmlns = ("xmlns:%s" % prefix, namespace_uri)
892    else:
893        xmlns = None
894    return "%s:%s" % (prefix, tag), xmlns
895
896##
897# Parses an XML document into an element tree.
898#
899# @param source A filename or file object containing XML data.
900# @param parser An optional parser instance.  If not given, the
901#     standard {@link XMLTreeBuilder} parser is used.
902# @return An ElementTree instance
903
904def parse(source, parser=None):
905    if parser:
906        tree = default_parser_api.parse(source, parser)
907    else:
908        tree = parser_api.parse(source)
909    return ElementTree(tree)
910
911##
912# Parses an XML document into an element tree incrementally, and reports
913# what's going on to the user.
914#
915# @param source A filename or file object containing XML data.
916# @param events A list of events to report back.  If omitted, only "end"
917#     events are reported.
918# @return A (event, elem) iterator.
919
920def iterparse(source, events=None):
921    return parser_api.iterparse(source, events)
922
923class _iterparse:
924
925    def __init__(self, source, events):
926        if not hasattr(source, "read"):
927            source = open(source, "rb")
928        self._file = source
929        self._events = []
930        self._index = 0
931        self.root = self._root = None
932        self._parser = XMLTreeBuilder()
933        # wire up the parser for event reporting
934        parser = self._parser._parser
935        append = self._events.append
936        if events is None:
937            events = ["end"]
938        for event in events:
939            if event == "start":
940                try:
941                    parser.ordered_attributes = 1
942                    parser.specified_attributes = 1
943                    def handler(tag, attrib_in, event=event, append=append,
944                                start=self._parser._start_list):
945                        append((event, start(tag, attrib_in)))
946                    parser.StartElementHandler = handler
947                except AttributeError:
948                    def handler(tag, attrib_in, event=event, append=append,
949                                start=self._parser._start):
950                        append((event, start(tag, attrib_in)))
951                    parser.StartElementHandler = handler
952            elif event == "end":
953                def handler(tag, event=event, append=append,
954                            end=self._parser._end):
955                    append((event, end(tag)))
956                parser.EndElementHandler = handler
957            elif event == "start-ns":
958                def handler(prefix, uri, event=event, append=append):
959                    try:
960                        uri = _encode(uri, "ascii")
961                    except UnicodeError:
962                        pass
963                    append((event, (prefix or "", uri)))
964                parser.StartNamespaceDeclHandler = handler
965            elif event == "end-ns":
966                def handler(prefix, event=event, append=append):
967                    append((event, None))
968                parser.EndNamespaceDeclHandler = handler
969
970    def next(self):
971        while 1:
972            try:
973                item = self._events[self._index]
974            except IndexError:
975                if self._parser is None:
976                    self.root = self._root
977                    try:
978                        raise StopIteration
979                    except NameError:
980                        raise IndexError
981                # load event buffer
982                del self._events[:]
983                self._index = 0
984                data = self._file.read(16384)
985                if data:
986                    self._parser.feed(data)
987                else:
988                    self._root = self._parser.close()
989                    self._parser = None
990            else:
991                self._index = self._index + 1
992                return item
993
994    try:
995        iter
996        def __iter__(self):
997            return self
998    except NameError:
999        def __getitem__(self, index):
1000            return self.next()
1001
1002##
1003# Parses an XML document from a string constant.  This function can
1004# be used to embed "XML literals" in Python code.
1005#
1006# @param source A string containing XML data.
1007# @return An Element instance.
1008# @defreturn Element
1009
1010def XML(text):
1011    api = parser_api or default_parser_api
1012    return api.fromstring(text)
1013
1014##
1015# Parses an XML document from a string constant, and also returns
1016# a dictionary which maps from element id:s to elements.
1017#
1018# @param source A string containing XML data.
1019# @return A tuple containing an Element instance and a dictionary.
1020# @defreturn (Element, dictionary)
1021
1022def XMLID(text):
1023    api = parser_api or default_parser_api
1024    tree = api.fromstring(text)
1025    ids = {}
1026    for elem in tree.getiterator():
1027        id = elem.get("id")
1028        if id:
1029            ids[id] = elem
1030    return tree, ids
1031
1032##
1033# Parses an XML document from a string constant.  Same as {@link #XML}.
1034#
1035# @def fromstring(text)
1036# @param source A string containing XML data.
1037# @return An Element instance.
1038# @defreturn Element
1039
1040fromstring = XML
1041
1042##
1043# Generates a string representation of an XML element, including all
1044# subelements.
1045#
1046# @param element An Element instance.
1047# @return An encoded string containing the XML data.
1048# @defreturn string
1049
1050def tostring(element, encoding=None):
1051    class dummy:
1052        pass
1053    data = []
1054    file = dummy()
1055    file.write = data.append
1056    ElementTree(element).write(file, encoding)
1057    return string.join(data, "")
1058
1059##
1060# Generic element structure builder.  This builder converts a sequence
1061# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1062# #TreeBuilder.end} method calls to a well-formed element structure.
1063# <p>
1064# You can use this class to build an element structure using a custom XML
1065# parser, or a parser for some other XML-like format.
1066#
1067# @param element_factory Optional element factory.  This factory
1068#    is called to create new Element instances, as necessary.
1069
1070class TreeBuilder:
1071
1072    def __init__(self, element_factory=None):
1073        self._data = [] # data collector
1074        self._elem = [] # element stack
1075        self._last = None # last element
1076        self._tail = None # true if we're after an end tag
1077        if element_factory is None:
1078            element_factory = _ElementInterface
1079        self._factory = element_factory
1080
1081    ##
1082    # Flushes the parser buffers, and returns the toplevel documen
1083    # element.
1084    #
1085    # @return An Element instance.
1086    # @defreturn Element
1087
1088    def close(self):
1089        assert len(self._elem) == 0, "missing end tags"
1090        assert self._last != None, "missing toplevel element"
1091        return self._last
1092
1093    def _flush(self):
1094        if self._data:
1095            if self._last is not None:
1096                text = string.join(self._data, "")
1097                if self._tail:
1098                    assert self._last.tail is None, "internal error (tail)"
1099                    self._last.tail = text
1100                else:
1101                    assert self._last.text is None, "internal error (text)"
1102                    self._last.text = text
1103            self._data = []
1104
1105    ##
1106    # Adds text to the current element.
1107    #
1108    # @param data A string.  This should be either an 8-bit string
1109    #    containing ASCII text, or a Unicode string.
1110
1111    def data(self, data):
1112        self._data.append(data)
1113
1114    ##
1115    # Opens a new element.
1116    #
1117    # @param tag The element name.
1118    # @param attrib A dictionary containing element attributes.
1119    # @return The opened element.
1120    # @defreturn Element
1121
1122    def start(self, tag, attrs):
1123        self._flush()
1124        self._last = elem = self._factory(tag, attrs)
1125        if self._elem:
1126            self._elem[-1].append(elem)
1127        self._elem.append(elem)
1128        self._tail = 0
1129        return elem
1130
1131    ##
1132    # Closes the current element.
1133    #
1134    # @param tag The element name.
1135    # @return The closed element.
1136    # @defreturn Element
1137
1138    def end(self, tag):
1139        self._flush()
1140        self._last = self._elem.pop()
1141        assert self._last.tag == tag,\
1142               "end tag mismatch (expected %s, got %s)" % (
1143                   self._last.tag, tag)
1144        self._tail = 1
1145        return self._last
1146
1147##
1148# Element structure builder for XML source data, based on the
1149# <b>expat</b> parser.
1150#
1151# @keyparam target Target object.  If omitted, the builder uses an
1152#     instance of the standard {@link #TreeBuilder} class.
1153# @keyparam html Predefine HTML entities.  This flag is not supported
1154#     by the current implementation.
1155# @see #ElementTree
1156# @see #TreeBuilder
1157
1158class XMLTreeBuilder:
1159
1160    def __init__(self, html=0, target=None):
1161        try:
1162            from xml.parsers import expat
1163        except ImportError:
1164            raise ImportError(
1165                "No module named expat; use SimpleXMLTreeBuilder instead"
1166                )
1167        self._parser = parser = expat.ParserCreate(None, "}")
1168        if target is None:
1169            target = TreeBuilder()
1170        self._target = target
1171        self._names = {} # name memo cache
1172        # callbacks
1173        parser.DefaultHandlerExpand = self._default
1174        parser.StartElementHandler = self._start
1175        parser.EndElementHandler = self._end
1176        parser.CharacterDataHandler = self._data
1177        # let expat do the buffering, if supported
1178        try:
1179            self._parser.buffer_text = 1
1180        except AttributeError:
1181            pass
1182        # use new-style attribute handling, if supported
1183        try:
1184            self._parser.ordered_attributes = 1
1185            self._parser.specified_attributes = 1
1186            parser.StartElementHandler = self._start_list
1187        except AttributeError:
1188            pass
1189        encoding = None
1190        if not parser.returns_unicode:
1191            encoding = "utf-8"
1192        # target.xml(encoding, None)
1193        self._doctype = None
1194        self.entity = {}
1195
1196    def _fixtext(self, text):
1197        # convert text string to ascii, if possible
1198        try:
1199            return _encode(text, "ascii")
1200        except UnicodeError:
1201            return text
1202
1203    def _fixname(self, key):
1204        # expand qname, and convert name string to ascii, if possible
1205        try:
1206            name = self._names[key]
1207        except KeyError:
1208            name = key
1209            if "}" in name:
1210                name = "{" + name
1211            self._names[key] = name = self._fixtext(name)
1212        return name
1213
1214    def _start(self, tag, attrib_in):
1215        fixname = self._fixname
1216        tag = fixname(tag)
1217        attrib = {}
1218        for key, value in attrib_in.items():
1219            attrib[fixname(key)] = self._fixtext(value)
1220        return self._target.start(tag, attrib)
1221
1222    def _start_list(self, tag, attrib_in):
1223        fixname = self._fixname
1224        tag = fixname(tag)
1225        attrib = {}
1226        if attrib_in:
1227            for i in range(0, len(attrib_in), 2):
1228                attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])
1229        return self._target.start(tag, attrib)
1230
1231    def _data(self, text):
1232        return self._target.data(self._fixtext(text))
1233
1234    def _end(self, tag):
1235        return self._target.end(self._fixname(tag))
1236
1237    def _default(self, text):
1238        prefix = text[:1]
1239        if prefix == "&":
1240            # deal with undefined entities
1241            try:
1242                self._target.data(self.entity[text[1:-1]])
1243            except KeyError:
1244                from xml.parsers import expat
1245                raise expat.error(
1246                    "undefined entity %s: line %d, column %d" %
1247                    (text, self._parser.ErrorLineNumber,
1248                    self._parser.ErrorColumnNumber)
1249                    )
1250        elif prefix == "<" and text[:9] == "<!DOCTYPE":
1251            self._doctype = [] # inside a doctype declaration
1252        elif self._doctype is not None:
1253            # parse doctype contents
1254            if prefix == ">":
1255                self._doctype = None
1256                return
1257            text = string.strip(text)
1258            if not text:
1259                return
1260            self._doctype.append(text)
1261            n = len(self._doctype)
1262            if n > 2:
1263                type = self._doctype[1]
1264                if type == "PUBLIC" and n == 4:
1265                    name, type, pubid, system = self._doctype
1266                elif type == "SYSTEM" and n == 3:
1267                    name, type, system = self._doctype
1268                    pubid = None
1269                else:
1270                    return
1271                if pubid:
1272                    pubid = pubid[1:-1]
1273                self.doctype(name, pubid, system[1:-1])
1274                self._doctype = None
1275
1276    ##
1277    # Handles a doctype declaration.
1278    #
1279    # @param name Doctype name.
1280    # @param pubid Public identifier.
1281    # @param system System identifier.
1282
1283    def doctype(self, name, pubid, system):
1284        pass
1285
1286    ##
1287    # Feeds data to the parser.
1288    #
1289    # @param data Encoded data.
1290
1291    def feed(self, data):
1292        self._parser.Parse(data, 0)
1293
1294    ##
1295    # Finishes feeding data to the parser.
1296    #
1297    # @return An element structure.
1298    # @defreturn Element
1299
1300    def close(self):
1301        self._parser.Parse("", 1) # end of data
1302        tree = self._target.close()
1303        del self._target, self._parser # get rid of circular references
1304        return tree
1305
1306
1307# --------------------------------------------------------------------
1308# load platform specific extensions
1309
1310if sys.platform == "cli":
1311    try:
1312        import ElementIron
1313    except ImportError:
1314        pass # fall back on optional pyexpat emulation
1315    else:
1316        parser_api = ElementIron.ParserAPI(TreeBuilder)
1317
1318elif sys.platform.startswith("java"):
1319    try:
1320        import ElementJava
1321    except ImportError:
1322        pass
1323    else:
1324        parser_api = ElementJava.ParserAPI(TreeBuilder)
Note: See TracBrowser for help on using the repository browser.