source: subversion/applications/rendering/parking/mapnik/pxdom.py @ 26267

Last change on this file since 26267 was 23613, checked in by kdrangmeister, 9 years ago

initial version of mapnik-to-bw

File size: 191.0 KB
Line 
1"""pxdom - stand-alone embeddable pure-Python DOM implementation
2
3Fully-compliant with DOM Level 3 Core/XML and Load/Save Recommendations.
4Includes pure-Python non-validating parser.
5"""
6
7__version__= 1,6
8__author__= 'Andrew Clover <and@doxdesk.com>'
9__date__= 2008,5,1
10__all__= ['getDOMImplementation', 'getDOMImplementations', 'parse', 'parseString', 'DOMException']
11
12
13# Setup, utility functions
14# ============================================================================
15
16import os, sys, string, StringIO, urlparse, urllib, httplib
17r= string.replace
18
19def _insertMethods():
20    """Monkey-patch specially-named methods into classes
21
22    In this source, not all members are defined directly inside their class
23    definitions; some are organised into aspects and defined together later
24    in the file, to improve readability. This function is called at the end to
25    combine the externally-defined members, whose names are in the format
26    _class__member, into the classes they are meant to be in.
27    """
28    for key, value in globals().items():
29        if key[:1]=='_' and string.find(key, '__')>=1:
30            class_, method= string.split(key[1:], '__', 1)
31            setattr(globals()[class_], method, value)
32
33
34# Backwards-compatibility boolean type (<2.2.1)
35#
36try:
37    True
38except NameError:
39    globals()['True'], globals()['False']= None is None, None is not None
40
41# Use sets where available for low-level character matching
42#
43try:
44    from sets import ImmutableSet
45except ImportError:
46    ImmutableSet= lambda x: x
47
48# Check unicode is supported (Python 1.6+), provide dummy class to use with
49# isinstance
50#
51try:
52    import unicodedata
53except ImportError:
54    globals()['unicode']= None
55    class Unicode: pass
56else:
57    Unicode= type(unicode(''))
58    import unicodedata, codecs
59
60# Allow thread-specific storage when threading is available
61#
62try:
63    from thread import get_ident
64except ImportError:
65    get_ident= lambda: None
66
67# XML character classes. Provide only an XML 1.1 character model for NAMEs, as
68# 1.0's rules are insanely complex.
69#
70DEC= ImmutableSet('0123456789')
71HEX= ImmutableSet('0123456789abcdefABDCDEF')
72LS= ('\r\n', '\r')
73WHITE= ' \t\n\r'
74
75NOTCHAR= ImmutableSet('\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x7F')
76NOTFIRST= ImmutableSet('.-0123456789')
77NOTNAME= ImmutableSet(' \t\n\r!"#$%&\'()*+,/;<=>?@[\\]^`{|}~')
78NOTURI= ImmutableSet(
79    string.join(map(chr, range(0, 33)+range(127,256)), '')+'<>"{}\^`'
80)
81
82if unicode is not None:
83    LSU= unichr(0x85), unichr(0x2028)
84    WHITEU= unichr(0x85)+unichr(0x2028)
85    NOTCHARU= ImmutableSet(
86        unicode('\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F', 'iso-8859-1')
87       +unichr(0xFFFE)+unichr(0xFFFF)
88    )
89    NOTFIRSTU= (0xB7,0xB8), (0x300,0x370), (0x203F,0x2041)
90    NOTNAMEU= (
91        (0x80,0xB7), (0xB8,0xC0), (0xD7,0xD8), (0xF7,0xF8), (0x037E,0x037F), (0x2000,0x200C), (0x200E,0x203F),
92        (0x2041,0x2070), (0x2190,0x2C00), (0x2FF0,0x3001), (0xE000,0xF900), (0xFDD0,0xFDF0), (0xFFFE, 0x10000)
93    )
94
95# Unicode character normalisation (>=2.3). Also includes a kludge for
96# composing-characters that we can't check through unicodedata, see
97# 'Character Model for the World Wide Web', Appendix C
98#
99CNORM= False
100if unicode is not None:
101    if hasattr(unicodedata, 'normalize'):
102        CNORM= True
103        EXTRACOMPOSERS= string.join(map(unichr, [
104            0x09BE, 0x09D7, 0x0B3E, 0x0B56, 0x0B57, 0x0BBE, 0x0BD7, 0x0CC2, 0x0CD5,
105            0x0CD6, 0x0D3E, 0x0D57, 0x0DCF, 0x0DDF, 0x0FB5, 0x0FB7, 0x102E
106        ] + range(0x1161, 0x1176) + range(0x11A8, 0x11C2) ), '')
107
108def dictadd(a, b):
109    ab= a.copy()
110    ab.update(b)
111    return ab
112
113REPR_MAX_LEN= 12
114REPR_MAX_LIST=3
115
116# Special namespace URIs
117#
118XMNS= 'http://www.w3.org/XML/1998/namespace'
119NSNS= 'http://www.w3.org/2000/xmlns/'
120HTNS= 'http://www.w3.org/1999/xhtml'
121DTNS= 'http://www.w3.org/TR/REC-xml'
122FIXEDNS= {'xmlns': NSNS, 'xml': XMNS}
123
124class _NONS:
125    """No-namespaces special value
126
127    Singleton value type used internally as a value for namespaceURI
128    signifying that a non-namespace version of a node or method is in use;
129    the accompanying localName is then the complete nodeName. This is
130    different to None, which is a null namespace value.
131    """
132    def __str__(self):
133        return '(non-namespace)'
134NONS= _NONS()
135
136# Media types to allow in addition to anything labelled '...+xml' when using
137# parameter supported-media-types-only
138#
139XMLTYPES= [
140    'text/xml', 'application/xml', 'application/xml-dtd', 'text/xsl'
141    'text/xml-external-parsed-entity','application/xml-external-parsed-entity'
142]
143
144# Elements defined as EMPTY in XHTML for parameter pxdom-html-compatible
145#
146HTMLEMPTY= [
147    'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img',
148    'input', 'isindex', 'link', 'meta', 'param'
149]
150
151def _checkName(name, nc= False):
152    """Check name string, raise exception if not well-formed
153
154    Optionally check it also matches NCName (no colons)
155    """
156    if name=='':
157        raise InvalidCharacterErr(name, '')
158    if name[0] in NOTFIRST:
159        raise InvalidCharacterErr(name, name[0])
160    if isinstance(name, Unicode):
161        for c0, c1 in NOTFIRSTU:
162            if ord(name[0])>=c0 and ord(name[0])<c1:
163                raise InvalidCharacterErr(name, char)
164    for char in name:
165        if char in NOTNAME or char in NOTCHAR:
166            raise InvalidCharacterErr(name, char)
167        if isinstance(char, Unicode):
168            if char in NOTCHARU:
169                raise InvalidCharacterErr(name, char)
170            for c0, c1 in NOTNAMEU:
171                if ord(char)>=c0 and ord(char)<c1:
172                    raise InvalidCharacterErr(name, char)
173    if nc and ':' in name:
174        raise NamespaceErr(name, None)
175
176def _splitName(name):
177    """Split a qualified name into prefix and localName
178
179    prefix may be None if no prefix is used; both will be None if the name
180    is not a valid qualified name.
181    """
182    parts= string.split(name, ':', 2)
183    if '' not in parts:
184        if len(parts)==2:
185            return tuple(parts)
186        if len(parts)==1:
187            return (None, name)
188    return (None, None)
189
190def _encodeURI(s):
191    """Turn a string from a SYSTEM ID or xml:base attribute into a URI string
192
193    %-encode disallowed characters
194    """
195    if isinstance(s, Unicode):
196        s= s.encode('utf-8')
197    uri= ''
198    for c in s:
199        if c in NOTURI:
200            uri= uri+'%%%02X'%ord(c)
201        else:
202            uri= uri+c
203    return uri
204
205
206class DOMObject:
207    """Base class for objects implementing DOM interfaces
208
209    Provide properties in a way compatible with old versions of Python:
210    subclass should provide method _get_propertyName to make a read-only
211    property, and also _set_propertyName for a writable. If the readonly
212    property is set, all other properties become immutable.
213    """
214    def __init__(self, readonly= False):
215        self._readonly= readonly
216    def _get_readonly(self):
217        return self._readonly
218    def _set_readonly(self, value):
219        self._readonly= value
220
221    def __getattr__(self, key):
222        if key[:1]=='_':
223            raise AttributeError, key
224        try:
225            getter= getattr(self, '_get_'+key)
226        except AttributeError:
227            raise AttributeError, key
228        return getter()
229
230    def __setattr__(self, key, value):
231        if key[:1]=='_':
232            self.__dict__[key]= value
233            return
234
235        # When an object is readonly, there are a few attributes that can be set
236        # regardless. Readonly is one (obviously), but due to a wart in the DOM
237        # spec it must also be possible to set nodeValue and textContent to
238        # anything on nodes where these properties are defined to be null (with no
239        # effect). Check specifically for these property names as a nasty hack
240        # to conform exactly to the spec.
241        #
242        if self._readonly and key not in ('readonly', 'nodeValue', 'textContent'):
243            raise NoModificationAllowedErr(self, key)
244        try:
245            setter= getattr(self, '_set_'+key)
246        except AttributeError:
247            if hasattr(self, '_get_'+key):
248                raise NoModificationAllowedErr(self, key)
249            raise AttributeError, key
250        setter(value)
251
252
253# Node-structure classes
254# ============================================================================
255
256class DOMList(DOMObject):
257    """Sequence that responds to Python and DOM-style access
258    """
259    def __init__(self, initial= None):
260        DOMObject.__init__(self)
261        if initial is None:
262            self._list= []
263        else:
264            self._list= initial
265    def  __repr__(self):
266        l= repr(self._list[:REPR_MAX_LIST])
267        if len(self._list)>REPR_MAX_LIST:
268            l= l+'...'
269        return '<pxdom.%s %s>' % (self.__class__.__name__, l)
270
271    # DOM-style methods
272    #
273    def _get_length(self):
274        return len(self._list)
275
276    def item(self, index):
277        if index<0 or index>=len(self._list):
278            return None
279        return self._list[index]
280
281    def contains(self, str):
282        return str in self._list
283
284    # Python-style methods
285    #
286    def __len__(self):
287        return len(self._list)
288
289    def __getitem__(self, index):
290        return self._list[index]
291
292    def __setitem__(self, index, value):
293        raise NoModificationAllowedErr(self, 'item(%s)' % str(index))
294
295    def __delitem__(self, index):
296        raise NoModificationAllowedErr(self, 'item(%s)' % str(index))
297
298    # Mutable sequence convenience methods for internal use
299    #
300    def _index(self, value):
301        return self._list.index(value)
302
303    def _append(self, value):
304        if self._readonly:
305            raise NoModificationAllowedErr(self, 'item(%s)' % str(len(self._list)))
306        self._list.append(value)
307
308    def _insertseq(self, index, values):
309        if self._readonly:
310            raise NoModificationAllowedErr(self, 'item(%s)' % str(index))
311        self._list[index:index]= values
312
313
314class NodeList(DOMList):
315    """Abstract list of nodes dependent on an owner node.
316    """
317    def __init__(self, ownerNode= None):
318        DOMList.__init__(self)
319        self._ownerNode= ownerNode
320
321
322class ChildNodeList(NodeList):
323    """NodeList of children of a parent node
324 
325    Python-style alterations to the list result in calls to the parent's
326    corresponding DOM methods. This seems to be required by a literal reading of
327    the Python DOM bindings spec, but tends not to be relied on in practice.
328    """
329    def __setitem__(self, index, value):
330        self._ownerNode.replaceChild(value, self._list[index])
331
332    def __delitem__(self, index):
333        self._ownerNode.removeChild(self._list[index])
334
335
336class NodeListByTagName(NodeList):
337    """Live NodeList returned by Element.getElementsByTagName[NS] methods
338
339    As a 'live' list, the internal _list acts only as a cache, and is
340    recalculated if the owner Element's contents have changed since it was
341    last built.
342    """
343    def __init__(self, ownerNode, namespaceURI, localName):
344        NodeList.__init__(self, ownerNode)
345        self._namespaceURI= namespaceURI
346        self._localName= localName
347        self._sequence= None
348
349    def _get_length(self):
350        if self._sequence!=self._ownerNode._sequence: self._calculate()
351        return NodeList._get_length(self)
352
353    def item(self, index):
354        if self._sequence!=self._ownerNode._sequence: self._calculate()
355        return NodeList.item(self, index)
356
357    def __getitem__(self, index):
358        if self._sequence!=self._ownerNode._sequence: self._calculate()
359        return NodeList.__getitem__(self, index)
360
361    def __len__(self):
362        if self._sequence!=self._ownerNode._sequence: self._calculate()
363        return NodeList.__len__(self)
364
365    def __repr__(self):
366        try:
367            self._calculate()
368        except DOMException:
369            pass
370        return NodeList.__repr__(self)
371
372    def _calculate(self):
373        """Recalculate the list
374
375        This method does the actual work of the getElementsByTagName call
376        """
377        self._list= []
378        self._walk(self._ownerNode)
379        self._sequence= self._ownerNode._sequence
380
381    def _walk(self, element):
382        """Recurse through child elements looking for matches
383        """
384        for childNode in element.childNodes:
385            if childNode.nodeType==Node.ELEMENT_NODE:
386                if (
387                    self._localName=='*' and
388                    self._namespaceURI in ('*', NONS, childNode.namespaceURI)
389                ) or (
390                    self._namespaceURI=='*' and
391                    self._localName==childNode.localName
392                ) or (
393                    self._namespaceURI is NONS and
394                    self._localName==childNode.nodeName
395                ) or (
396                    self._namespaceURI==childNode.namespaceURI and
397                    self._localName==childNode.localName
398                ):
399                    self._list.append(childNode)
400            if childNode.nodeType in (Node.ELEMENT_NODE,Node.ENTITY_REFERENCE_NODE):
401                self._walk(childNode)
402
403
404class NamedNodeMap(NodeList):
405    """Abstract dictionary-style object used for mappings
406
407    Subclass should initialise with the nodeType for nodes it is intending to
408    hold as values.
409    """
410    def __init__(self, ownerNode, childType):
411        NodeList.__init__(self, ownerNode)
412        self._childTypes= (childType,)
413
414    def getNamedItemNS(self, namespaceURI, localName):
415        if namespaceURI=='':
416            namespaceURI= None
417        for node in self._list:
418            if (
419                (namespaceURI is NONS and localName==node.nodeName) or
420                (namespaceURI==node.namespaceURI and localName==node.localName)
421            ):
422                return node
423        return None
424
425    def setNamedItemNS(self, arg):
426        node= self.getNamedItemNS(arg.namespaceURI, arg.localName)
427        self._writeItem(node, arg)
428        return node
429
430    def removeNamedItemNS(self, namespaceURI, localName):
431        node= self.getNamedItemNS(namespaceURI, localName)
432        if node is None:
433            raise NotFoundErr(self, namespaceURI, localName)
434        self._writeItem(node, None)
435        return node
436
437    def getNamedItem(self, name):
438        return self.getNamedItemNS(NONS, name)
439
440    def setNamedItem(self, arg):
441        node= self.getNamedItemNS(NONS, arg.nodeName)
442        self._writeItem(node, arg)
443        return node
444
445    def removeNamedItem(self, name):
446        return self.removeNamedItemNS(NONS, name)
447
448    def _writeItem(self, oldItem, newItem):
449        """Internal backend for all add/remove/replace operations
450       
451        If oldItem is not None it is removed; if newItem is not None it is
452        added; if both are not None the new item is written to the previous
453        position of the oldItem.
454        """
455        if self._readonly:
456            raise NoModificationAllowedErr(self, 'namedItem')
457        if newItem is not None:
458            if newItem.nodeType not in self._childTypes:
459                raise HierarchyRequestErr(newItem, self)
460            if newItem.ownerDocument is not self._ownerNode.ownerDocument:
461                raise WrongDocumentErr(self._ownerNode.ownerDocument, newItem)
462        if oldItem is None:
463            index= len(self._list)
464        else:
465            try:
466                index= self._list.index(oldItem)
467            except ValueError:
468                raise NotFoundErr(self, NONS, oldItem.nodeName)
469            oldItem._containerNode= None
470        if newItem is not None:
471            newItem._containerNode= self._ownerNode
472            self._list[index:index+1]= [newItem]
473        else:
474            self._list[index:index+1]= []
475
476    # Python dictionary-style methods. This is inconsistent with how Python
477    # dictionaries normally work; it is only here for compatibility with
478    # minidom and the behaviour is not guaranteed. Use the standard DOM methods
479    # getNamedItem etc. instead.
480    #
481    def __getitem__(self, key):
482        if isinstance(key, type(0)):
483            return self._list[key]
484        elif isinstance(key, type(())):
485            return self.getNamedItemNS(key[0], key[1])
486        else:
487            return self.getNamedItem(key)
488
489    def __delitem__(self, key):
490        if isinstance(key, type(0)):
491            self._writeItem(self._list[key], None)
492        elif isinstance(key, type(())):
493            self.removeNamedItemNS(key[0], key[1])
494        else:
495            return self.removeNamedItem(key)
496
497    def __setitem__(self, key, value):
498        if isinstance(value, Attr):
499            if isinstance(key, type(0)):
500                self._writeItem(self._list[key], value)
501            elif isinstance(key, type(())):
502                self._ownerNode.setAttributeNodeNS(value)
503            else:
504                self._ownerNode.setAttributeNode(value)
505        else:
506            if isinstance(key, type(0)):
507                self._list[key].value= value
508            elif isinstance(key, type(())):
509                return self._ownerNode.setAttributeNS(key[0], key[1], value)
510            else:
511                return self._ownerNode.setAttribute(key, value)
512
513    def values(self):
514        return self._list[:]
515    def keys(self):
516        return map(lambda a: a.nodeName, self._list)
517    def items(self):
518        return map(lambda a: (a.nodeName, a.value), self._list)
519    def keysNS(self):
520        return map(lambda a: (a.namespaceURI, a.localName), self._list)
521    def itemsNS(self):
522        return map(lambda a: ((a.namespaceURI,a.localName),a.value), self._list)
523
524
525class AttrMap(NamedNodeMap):
526    """A node mapping for an Element's attributes
527
528    Defaulted attributes are updated automatically on changes.
529    """
530    def __init__(self, ownerNode):
531        NamedNodeMap.__init__(self, ownerNode, Node.ATTRIBUTE_NODE)
532    def _writeItem(self, oldItem, newItem):
533        if newItem is not None and newItem.nodeType==Node.ATTRIBUTE_NODE and (
534            newItem._containerNode not in (None, self._ownerNode)
535        ):
536            raise InuseAttributeErr(newItem)
537        NamedNodeMap._writeItem(self, oldItem, newItem)
538        if oldItem is not None:
539            if newItem is None or newItem.nodeName!=oldItem.nodeName:
540                ownerDocument= self._ownerNode.ownerDocument
541                if ownerDocument is not None:
542                    doctype= ownerDocument.doctype
543                    if doctype is not None:
544                        declarationList= doctype._attlists.getNamedItem(
545                            self._ownerNode.nodeName
546                        )
547                        if declarationList is not None:
548                            declaration= declarationList.declarations.getNamedItem(oldItem.nodeName)
549                            if (declaration is not None and declaration.defaultType in (
550                                AttributeDeclaration.DEFAULT_VALUE, AttributeDeclaration.FIXED_VALUE
551                            )):
552                                declaration._createAttribute(self._ownerNode)
553
554
555# Core non-node classes
556# ============================================================================
557
558class DOMImplementation(DOMObject):
559    """Main pxdom implementation interface
560
561    The pxdom module itself implements the DOMImplementationSource interface,
562    so you can get hold of the module's singleton implementation using
563    pxdom.getDOMImplementation('').
564    """
565    [MODE_SYNCHRONOUS,MODE_ASYNCHRONOUS
566    ]=range(1, 3)
567
568    _features= {
569        'xml':  ['1.0', '2.0', '3.0'],
570        'core':        ['2.0', '3.0'],
571        'ls':                 ['3.0'],
572        'xmlversion':  ['1.0', '1.1']
573    }
574    def hasFeature(self, feature, version):
575        f= string.lower(feature)
576        if f[:1]=='+':
577            f= f[1:]
578        if self._features.has_key(f):
579            if version in self._features[f]+['', None]:
580                return True
581        return False
582    def getFeature(self, feature, version):
583        if self.hasFeature(feature, version):
584            return self
585
586    def createDocument(self, namespaceURI, qualifiedName, doctype):
587        if namespaceURI=='':
588            namespaceURI= None
589        document= Document()
590        if doctype is not None:
591            document.appendChild(doctype)
592        if qualifiedName is not None:
593            root= document.createElementNS(namespaceURI, qualifiedName)
594            document.appendChild(root)
595        return document
596
597    def createDocumentType(self, qualifiedName, publicId, systemId):
598        _checkName(qualifiedName)
599        if _splitName(qualifiedName)[1] is None:
600            raise NamespaceErr(qualifiedName, None)
601        doctype= DocumentType(None, qualifiedName, publicId, systemId)
602        doctype.entities.readonly= True
603        doctype.notations.readonly= True
604        return doctype
605
606_implementation= DOMImplementation()
607
608def getDOMImplementation(features= ''):
609    """DOM 3 Core hook to get the Implementation object
610 
611    If features is supplied, only return the implementation if all features are
612    satisfied.
613    """
614    fv= string.split(features, ' ')
615    for index in range(0, len(fv)-1, 2):
616        if not _implementation.hasFeature(fv[index], fv[index+1]):
617            return None
618    return _implementation
619
620def getDOMImplementationList(features= ''):
621    """DOM 3 Core method to get implementations in a list wrapper
622
623    For pxdom this will only ever be the single implementation, if any.
624    """
625    implementation= getDOMImplementation(features)
626    implementationList= DOMImplementationList()
627    if implementation is not None:
628        implementationList._append(implementation)
629    implementationList.readonly= True
630    return implementationList
631
632
633class DOMImplementationList(DOMList):
634    """List of DOMImplementation classes
635
636    No extra capabilities over DOMList, but is an interface expected by spec.
637    """
638    pass
639
640
641class DOMConfiguration(DOMObject):
642    """Mapping of DOM-related option name strings to values
643 
644    The 'infoset' and 'canonical-form' parameters map to multiple other
645    parameters rather than working independently.
646
647    Some DOM parameters are optional features that may not be supported
648    and in this case cannot be set. pxdom adds some extra parameters for
649    extended pxdom-specific configuration, these are prefixed 'pxdom-'.
650    """
651    _defaults= {
652        # Core configuration
653        'canonical-form':                            (False, True ),
654        'cdata-sections':                            (True,  True ),
655        'check-character-normalization':             (False, CNORM),
656        'comments':                                  (True,  True ),
657        'datatype-normalization':                    (False, False),
658        'element-content-whitespace':                (True,  True ),
659        'entities':                                  (True,  True ),
660        'error-handler':                             (None,  True ),
661        'ignore-unknown-character-denormalizations': (True,  False),
662        'namespaces':                                (True,  True ),
663        'namespace-declarations':                    (True,  True ),
664        'normalize-characters':                      (False, CNORM),
665        'schema-location':                           (None,  False),
666        'schema-type':                               (None,  False),
667        'split-cdata-sections':                      (True,  True ),
668        'validate':                                  (False, False),
669        'validate-if-schema':                        (False, False),
670        'well-formed':                               (True,  True ),
671        # LSParser-specific configuration
672        'charset-overrides-xml-encoding':            (True,  True ),
673        'disallow-doctype':                          (False, True ),
674        'resource-resolver':                         (None,  True ),
675        'supported-media-types-only':                (False, True),
676        # LSSerializer-specific configuration
677        'discard-default-content':                   (True,  True ),
678        'format-pretty-print':                       (False, True ),
679        'xml-declaration':                           (True,  True ),
680        # Non-standard extensions
681        'pxdom-assume-element-content':              (False, True ),
682        'pxdom-resolve-resources':                   (True,  True ),
683        'pxdom-html-compatible':                     (False, True ),
684        # Switches to make required normalizeDocument operations optional
685        'pxdom-normalize-text':                      (True,  True ),
686        'pxdom-reset-identity':                      (True,  True ),
687        'pxdom-update-entities':                     (True,  True ),
688        'pxdom-preserve-base-uri':                   (True,  True ),
689        'pxdom-examine-cdata-sections':              (True,  True ),
690        # Normally used only inside an entity reference
691        'pxdom-fix-unbound-namespaces':              (False, True )
692    }
693
694    _complexparameters= {
695        'infoset': ( # mirrors when the following are:
696            ('cdata-sections', 'datatype-normalization', 'entities', 'validate-if-schema'), # True
697            ('comments', 'element-content-whitespace', 'namespace-declarations', 'namespaces', 'well-formed') # False
698        ),
699        'canonical-form': (
700            ('cdata-sections', 'entities', 'format-pretty-print', 'normalize-characters', 'discard-default-content', 'xml-declaration', 'pxdom-html-compatible'),
701            ('element-content-whitespace', 'namespace-declarations', 'namespaces', 'well-formed')
702        ),
703    }
704
705    def __init__(self, copyFrom= None):
706        """Make a new DOMConfiguration mapping
707
708        Use either default values, or the current values of another
709        DOMConfiguration (copy-constructor)
710        """
711        DOMObject.__init__(self)
712        self._parameters= {}
713        for (name, (value, canSet)) in self._defaults.items():
714            if copyFrom is not None:
715                self._parameters[name]= copyFrom._parameters[name]
716            else:
717                self._parameters[name]= value
718
719    def canSetParameter(self, name, value):
720        name= string.lower(name)
721        if name=='infoset':
722            return True
723        if self._parameters[name]==value:
724            return True
725        return self._defaults.get(name, (None, False))[1]
726
727    def getParameter(self, name):
728        name= string.lower(name)
729        if self._complexparameters.has_key(name):
730            for b in False, True:
731                for p in self._complexparameters[name][b]:
732                    if self._parameters[p]!=b:
733                        return False
734            if name=='infoset':
735                return True
736        if not self._parameters.has_key(name):
737            raise NotFoundErr(self, None, name)
738        return self._parameters[name]
739
740    def setParameter(self, name, value):
741        name= string.lower(name)
742        if self._complexparameters.has_key(name):
743            if value:
744                for b in False, True:
745                    for p in self._complexparameters[name][b]:
746                        self._parameters[p]= b
747            if name=='infoset':
748                return
749        if not self._defaults.has_key(name):
750            raise NotFoundErr(self, None, name)
751        if self._parameters[name]!=value:
752            if not self._defaults[name][1]:
753                raise NotSupportedErr(self, name)
754            self._parameters[name]= value
755
756    def _get_parameterNames(self):
757        return DOMList(self._parameters.keys()+['infoset'])
758
759    # Convenience method to do character normalization and/or check character
760    # normalization on a string, depending on the parameters set on the config
761    #
762    def _cnorm(self, text, node, isParse= False):
763        nc= self._parameters['normalize-characters']
764        cn= self._parameters['check-character-normalization']
765        if not nc and not cn or text=='' or not isinstance(text, Unicode):
766            return text
767        normal= unicodedata.normalize('NFC', text)
768        if nc:
769            text= normal
770        if (not nc and text!=normal or cn and (
771          unicodedata.combining(text[0])!=0 or text[0] in EXTRACOMPOSERS
772        )):
773            self._handleError(CheckNormErr(node, isParse))
774        return text
775
776    # Convenience method for pxdom to callback the error-handler if one is set
777    # on the DOMConfiguration, and raise an exception if the error or handler
778    # says processing should not continue.
779    #
780    def _handleError(self, error):
781        handler= self._parameters['error-handler']
782        cont= None
783        if handler is not None:
784            cont= handler.handleError(error)
785        if not error.allowContinue(cont):
786            raise error
787
788
789# LSParsers can't have well-formed set to False, and default entities and
790# cdata-sections to False instead of True
791#
792class ParserConfiguration(DOMConfiguration):
793    _defaults= dictadd(DOMConfiguration._defaults, {
794        'well-formed': (True, False),
795        'entities': (False, True),
796        'cdata-sections': (False, True)
797    })
798
799
800# Predefined configurations for simple normalisation processes outside of the
801# normalizeDocument method
802#
803DOMCONFIG_NONE= DOMConfiguration()
804DOMCONFIG_NONE.setParameter('well-formed', False)
805DOMCONFIG_NONE.setParameter('namespaces', False)
806DOMCONFIG_NONE.setParameter('pxdom-normalize-text', False)
807DOMCONFIG_NONE.setParameter('pxdom-update-entities', False)
808DOMCONFIG_NONE.setParameter('pxdom-examine-cdata-sections', False)
809DOMCONFIG_NONE.setParameter('pxdom-reset-identity', False)
810
811DOMCONFIG_ENTS= DOMConfiguration(DOMCONFIG_NONE)
812DOMCONFIG_ENTS.setParameter('pxdom-update-entities', True)
813DOMCONFIG_ENTS_BIND= DOMConfiguration(DOMCONFIG_ENTS)
814DOMCONFIG_ENTS_BIND.setParameter('pxdom-fix-unbound-namespaces', True)
815
816DOMCONFIG_TEXT= DOMConfiguration(DOMCONFIG_NONE)
817DOMCONFIG_TEXT.setParameter('pxdom-normalize-text', True)
818if CNORM:
819    DOMCONFIG_TEXT_CANONICAL= DOMConfiguration(DOMCONFIG_TEXT)
820    DOMCONFIG_TEXT_CANONICAL.setParameter('normalize-characters', True)
821
822
823class TypeInfo(DOMObject):
824    """Value type giving information about schema type information
825
826    Belongs to Element or Attribute. Since only DTD schema information is
827    supported, this returns nulls except for Attribute typeNames, which might
828    be grabbable from the doctype's attlists.
829    """
830    [DERIVATION_RESTRICTION, DERIVATION_EXTENSION, DERIVATION_UNION,
831    DERIVATION_LIST]= map(lambda n: 2**n, range(1, 5))
832
833    def __init__(self, ownerNode):
834        DOMObject.__init__(self, False)
835        self._ownerNode= ownerNode
836    def _get_typeNamespace(self):
837        return self._getType()[0]
838    def _get_typeName(self):
839        return self._getType()[1]
840
841    def _getType(self):
842        if self._ownerNode.nodeType==Node.ATTRIBUTE_NODE:
843            if (
844                self._ownerNode.ownerElement is not None and
845                self._ownerNode.ownerDocument is not None and
846                self._ownerNode.ownerDocument.doctype is not None
847            ):
848                attlist= self._ownerNode.ownerDocument.doctype._attlists.getNamedItem(self._ownerNode.ownerElement.tagName)
849                if attlist is not None:
850                    attdecl= attlist.declarations.getNamedItem(self._ownerNode.name)
851                    if attdecl is not None:
852                        return (DTNS, AttributeDeclaration.ATTR_NAMES[attdecl.attributeType])
853            if (self._ownerNode.name=='xml:id'):
854                return (DTNS, 'ID')
855        return (None, None)
856
857    def isDerivedFrom(self, typeNamespaceArg, typeNameArg, derivationMethod):
858        """DOM 3 required method, otherwise not useful
859
860        pxdom does not support XML Schema; for DTD schema this method always
861        returns false.
862        """
863        return False
864
865
866class DOMLocator(DOMObject):
867    """Value type used to return source document/position information
868
869    Used in the standard DOM to locate DOMErrors; pxdom also allows any Node
870    to be located this way.
871    """
872    def __init__(self, node= None, lineNumber= -1, columnNumber= -1, uri= None):
873        self._relatedNode= node
874        self._lineNumber= lineNumber
875        self._columnNumber= columnNumber
876        if uri is not None:
877            self._uri= uri
878        elif node is not None:
879            self._uri= node._ownerDocument.documentURI
880        else:
881            self._uri= ''
882    def _get_lineNumber(self):
883        return self._lineNumber
884    def _get_columnNumber(self):
885        return self._columnNumber
886    def _get_byteOffset(self):
887        return -1
888    def _get_utf16Offset(self):
889        return -1
890    def _get_relatedNode(self):
891        return self._relatedNode
892    def _get_uri(self):
893        return self._uri
894
895
896class UserDataHandler:
897    """Constants for UserDataHandler classes
898
899    Any Python object that supplies a 'handle' method can be bound to the
900    DOM type UserDataHandler; this merely holds its static constants. NB.
901    NODE_DELETED is never called because (as noted in the DOM Core spec)
902    we have no idea when the object will be deleted by Python. No __del__
903    handler is provided for this because __del__ has negative implications
904    for garbage collection.
905    """
906    [NODE_CLONED, NODE_IMPORTED, NODE_DELETED, NODE_RENAMED, NODE_ADOPTED
907    ]= range(1, 6)
908
909
910# Core node classes
911# ============================================================================
912
913class Node(DOMObject):
914  """ Abstract base class for all DOM Nodes.
915  """
916  [ELEMENT_NODE,ATTRIBUTE_NODE,TEXT_NODE,CDATA_SECTION_NODE,
917  ENTITY_REFERENCE_NODE,ENTITY_NODE,PROCESSING_INSTRUCTION_NODE,COMMENT_NODE,
918  DOCUMENT_NODE,DOCUMENT_TYPE_NODE,DOCUMENT_FRAGMENT_NODE,NOTATION_NODE
919  ]= range(1,13)
920  [ELEMENT_DECLARATION_NODE,ATTRIBUTE_DECLARATION_NODE,ATTRIBUTE_LIST_NODE
921  ]= range(301, 304)
922  [DOCUMENT_POSITION_DISCONNECTED,DOCUMENT_POSITION_PRECEDING,
923  DOCUMENT_POSITION_FOLLOWING,DOCUMENT_POSITION_CONTAINS,
924  DOCUMENT_POSITION_CONTAINED_BY,DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC
925  ]= map(lambda n: 1<<n, range(6))
926
927  # Node properties
928  #
929  def __init__(self,
930    ownerDocument= None, namespaceURI= None, localName= None, prefix= None
931  ):
932    DOMObject.__init__(self)
933    self._ownerDocument= ownerDocument
934    self._containerNode= None
935    self._namespaceURI= namespaceURI
936    self._localName= localName
937    self._prefix= prefix
938    self._childNodes= ChildNodeList(self)
939    self._attributes= None
940    self._userData= {}
941    self._childNodes.readonly= True
942    self._sequence= 0
943    self._row= -1
944    self._col= -1
945  def _cloneTo(self, node):
946    node._ownerDocument= self._ownerDocument
947    node._namespaceURI= self._namespaceURI
948    node._localName= self._localName
949    node._prefix= self._prefix
950    node._row= self._row
951    node._col= self._col
952
953  def _get_ownerDocument(self): return self._ownerDocument
954  def _get_parentNode(self): return self._containerNode
955  def _get_nodeType(self): return None
956  def _get_nodeName(self): return '#abstract-node'
957  def _get_nodeValue(self): return None
958  def _get_namespaceURI(self): return self._namespaceURI
959  def _get_localName(self): return self._localName
960  def _get_prefix(self): return self._prefix
961  def _get_childNodes(self): return self._childNodes
962  def _get_attributes(self): return self._attributes
963  def _set_nodeValue(self, value):
964    pass
965
966  def __repr__(self):
967    t= repr(self.nodeName)
968    if len(t)>REPR_MAX_LEN:
969      t= t[:REPR_MAX_LEN-2]+'...'
970    if t[:1]=='u':
971      t= t[1:]
972    return '<pxdom.%s %s>' % (self.__class__.__name__, t)
973
974  # Hierarchy access
975  #
976  def _get_firstChild(self):
977    if self.childNodes.length>0:
978      return self.childNodes.item(0)
979    return None
980
981  def _get_lastChild(self):
982    if self.childNodes.length>0:
983      return self._childNodes.item(self.childNodes.length-1)
984    return None
985
986  def _get_previousSibling(self):
987    if self.parentNode is None:
988      return None
989    try:
990      index= self.parentNode.childNodes._index(self)
991    except ValueError:
992      return None
993    if index<1:
994      return None
995    return self.parentNode.childNodes.item(index-1)
996
997  def _get_nextSibling(self):
998    if self.parentNode is None:
999      return None
1000    try:
1001      index= self.parentNode.childNodes._index(self)
1002    except ValueError:
1003      return None
1004    if index>=self.parentNode.childNodes.length-1:
1005      return None
1006    return self.parentNode.childNodes.item(index+1)
1007
1008  def hasAttributes(self):
1009    if self._attributes is not None:
1010      if self._attributes.length>0:
1011        return True
1012    return False
1013
1014  def hasChildNodes(self):
1015    return self._childNodes.length>0
1016
1017  # Hierarchy alteration
1018  #
1019  _childTypes= (
1020    ELEMENT_NODE, COMMENT_NODE, ENTITY_REFERENCE_NODE,TEXT_NODE,
1021    CDATA_SECTION_NODE, PROCESSING_INSTRUCTION_NODE
1022  )
1023
1024  def appendChild(self, newChild):
1025    if newChild is None:
1026      raise NotFoundErr(self, None, None)
1027    self._writeChild(newChild, None, False)
1028    return newChild
1029  def insertBefore(self, newChild, oldChild):
1030    if newChild is None:
1031      raise NotFoundErr(self, None, None)
1032    self._writeChild(newChild, oldChild, False)
1033    return newChild
1034  def replaceChild(self, newChild, refChild):
1035    if newChild is None or refChild is None:
1036      raise NotFoundErr(self, None, None)
1037    self._writeChild(newChild, refChild, True)
1038    return refChild
1039  def removeChild(self, oldChild):
1040    if oldChild is None:
1041      raise NotFoundErr(self, None, None)
1042    self._writeChild(None, oldChild, True)
1043    return oldChild
1044
1045  def _writeChild(self, newChild, oldChild, removeOld):
1046    if self._readonly:
1047      raise NoModificationAllowedErr(self, 'Child')
1048    if oldChild is not None and oldChild not in self._childNodes:
1049      raise NotFoundErr(self, oldChild.namespaceURI, oldChild.localName)
1050    if oldChild is newChild:
1051      return
1052
1053    if newChild is not None:
1054      if newChild.ownerDocument not in (self._ownerDocument, None):
1055        raise WrongDocumentErr(newChild, self._ownerDocument)
1056      ancestor= self
1057      while ancestor is not None:
1058        if newChild is ancestor:
1059          raise HierarchyRequestErr(newChild, self)
1060        ancestor= ancestor.parentNode
1061      if newChild.nodeType==Node.DOCUMENT_FRAGMENT_NODE:
1062        newNodes= list(newChild._childNodes._list)
1063      else:
1064        newNodes= [newChild]
1065      for node in newNodes:
1066        if node.nodeType not in self._childTypes:
1067          raise HierarchyRequestErr(node, self)
1068        if node.parentNode is not None:
1069          node.parentNode.removeChild(node)
1070
1071    self._childNodes.readonly= False
1072    if oldChild is None:
1073      index= self._childNodes.length
1074    else:
1075      index= self._childNodes._index(oldChild)
1076    if removeOld:
1077      oldChild._containerNode= None
1078      del self._childNodes._list[index]
1079    if newChild is not None:
1080      if newChild.ownerDocument is None:
1081        newChild._recurse(True, ownerDocument= self._ownerDocument)
1082      self._childNodes._insertseq(index, newNodes)
1083      for node in newNodes:
1084        node._containerNode= self
1085    self._childNodes.readonly= True
1086    self._changed()
1087
1088  def isSupported(self, feature, version):
1089    return _implementation.hasFeature(feature, version)
1090
1091  def getFeature(self, feature, version):
1092    if _implementation.hasFeature(feature, version):
1093      return self
1094    return None
1095
1096  def _get_pxdomLocation(self):
1097    return DOMLocator(self, self._row, self._col)
1098  def _setLocation(self, (row, col)):
1099    self._row= row
1100    self._col= col
1101
1102  def _renameNode(self, namespaceURI, qualifiedName):
1103    raise NotSupportedErr(self, 'renameNode')
1104
1105  def _changed(self):
1106    """Backend sequence number update
1107
1108    Works iteratively over parents, not recursively. This is possible because
1109    no node subclass changes the behaviour of _changed, allowing node updates
1110    over the recursion limit.
1111    """
1112    node= self
1113    while node is not None:
1114      node._sequence= node._sequence+1
1115      node= node._containerNode
1116
1117  def _getDescendants(self, descendants):
1118    for child in self._childNodes:
1119      descendants.append(child)
1120      child._getDescendants(descendants)
1121
1122  def _containsUnboundPrefix(self):
1123    if self._prefix is not None and self._namespaceURI is None:
1124      return True
1125    if self._attributes is not None:
1126      for attr in self._attributes:
1127        if attr._containsUnboundPrefix():
1128          return True
1129    for child in self._childNodes:
1130      if child._containsUnboundPrefix():
1131        return True
1132    return False
1133
1134
1135class NamedNode(Node):
1136  """ Base class for nodes who have specific names but no namespace
1137      capability (entity references and so on).
1138  """
1139  def __init__(self, ownerDocument= None, nodeName= None):
1140    Node.__init__(self, ownerDocument, None, None, None)
1141    if nodeName is not None:
1142      _checkName(nodeName)
1143    self._nodeName= nodeName
1144  def _cloneTo(self, node):
1145    Node._cloneTo(self, node)
1146    node._nodeName= self._nodeName
1147  def _get_nodeName(self):
1148    return self._nodeName
1149
1150
1151class NamedNodeNS(Node):
1152  """ Base class for nodes whose names are derived from their prefix and
1153      local name (Element and Attribute). In these nodes, namespaceURI may be
1154      stored internally as NONS, signifying a node created by Level 1 methods.
1155      In this case the node name is stored internally in localName, but
1156      trying to read either namespaceURI or localName will result in a null
1157      value as specified by DOM Level 2 Core.
1158  """
1159  def __init__(self,
1160    ownerDocument= None, namespaceURI= None, localName= None, prefix= None
1161  ):
1162    for name in (prefix, localName):
1163      if name is not None:
1164        _checkName(name, nc= namespaceURI is not NONS)
1165    Node.__init__(self, ownerDocument, namespaceURI, localName, prefix)
1166  def _get_nodeName(self):
1167    if self._namespaceURI is NONS or self._prefix is None:
1168      return self._localName
1169    return '%s:%s' % (self._prefix, self._localName)
1170  def _get_localName(self):
1171    if self._namespaceURI is NONS:
1172      return None
1173    return self._localName
1174  def _get_namespaceURI(self):
1175    if self._namespaceURI is NONS:
1176      return None
1177    return self._namespaceURI
1178  def _get_schemaTypeInfo(self):
1179    return TypeInfo(self)
1180
1181  def _set_prefix(self, value):
1182    if value=='':
1183      value= None
1184    if value is not None:
1185      _checkName(value, True)
1186    if (value is not None and ':' in value or
1187      (self._namespaceURI in (None, NONS) and value is not None) or
1188      value=='xml' and self._namespaceURI!=XMNS or
1189      (value=='xmlns') != (self._namespaceURI==NSNS)
1190    ):
1191      raise NamespaceErr((value or '')+':'+self._localName,self._namespaceURI)
1192    self._prefix= value
1193    self._changed()
1194
1195  def _renameNode(self, namespaceURI, qualifiedName):
1196    prefix, localName= _splitName(qualifiedName)
1197    if localName is None:
1198      _checkName(qualifiedName)
1199      if namespaceURI is not None:
1200        raise NamespaceErr(qualifiedName, namespaceURI)
1201      self._namespaceURI= NONS
1202      self._prefix= None
1203      self._localName= qualifiedName
1204    else:
1205      _checkName(localName, nc= True)
1206      if prefix is not None:
1207          _checkName(prefix, nc= True)
1208      if (
1209        namespaceURI is None and prefix is not None or
1210        prefix=='xml' and namespaceURI!=XMNS or
1211        (namespaceURI==NSNS) != ('xmlns' in (prefix, qualifiedName))
1212      ):
1213        raise NamespaceErr(qualifiedName, namespaceURI)
1214      self._namespaceURI= namespaceURI
1215      self._prefix= prefix
1216      self._localName= localName
1217
1218
1219class Document(Node):
1220  """ Implementation of DOM 3 Document interface.
1221  """
1222  def __init__(self):
1223    Node.__init__(self, self, None, None, None)
1224    self._xmlStandalone= False
1225    self._xmlVersion= '1.0'
1226    self._xmlEncoding= None
1227    self._inputEncoding= None
1228    self._documentURI= None
1229    self._strictErrorChecking= True
1230    self._domConfig= DOMConfiguration()
1231    self._userdatacalls= {}
1232  def _cloneTo(self, node):
1233    Node._cloneTo(self, node)
1234    node._xmlStandalone= self._xmlStandalone
1235    node._xmlVersion= self._xmlVersion
1236    node._xmlEncoding= self._xmlEncoding
1237    node._inputEncoding= self._inputEncoding
1238    node._documentURI= self._documentURI
1239    node._strictErrorChecking= self._strictErrorChecking
1240    node._domConfig= DOMConfiguration(self._domConfig)
1241
1242  def _get_nodeType(self):
1243    return Node.DOCUMENT_NODE
1244  def _get_nodeName(self):
1245    return '#document'
1246  def _get_ownerDocument(self):
1247    return None
1248  _childTypes= (
1249    Node.ELEMENT_NODE, Node.COMMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1250    Node.DOCUMENT_TYPE_NODE
1251  )
1252
1253  def _get_implementation(self):
1254    return _implementation
1255  def _get_documentElement(self):
1256    for child in self._childNodes:
1257      if child.nodeType==Node.ELEMENT_NODE:
1258        return child
1259    return None
1260  def _get_doctype(self):
1261    for child in self._childNodes:
1262       if child.nodeType==Node.DOCUMENT_TYPE_NODE:
1263        return child
1264    return None
1265  def _get_domConfig(self):
1266    return self._domConfig
1267
1268  def _get_xmlStandalone(self):
1269    return self._xmlStandalone
1270  def _set_xmlStandalone(self, value):
1271    self._xmlStandalone= value
1272  def _get_xmlVersion(self):
1273    return self._xmlVersion
1274  def _set_xmlVersion(self, value):
1275    if value not in ('1.0', '1.1'):
1276      raise NotSupportedErr(self, 'xmlVersion '+value)
1277    self._xmlVersion= value
1278  def _get_xmlEncoding(self):
1279    return self._xmlEncoding
1280  def _get_inputEncoding(self):
1281    return self._inputEncoding
1282  def _get_documentURI(self):
1283    return self._documentURI
1284  def _set_documentURI(self, value):
1285    self._documentURI= value
1286  def _get_strictErrorChecking(self):
1287    return self._strictErrorChecking
1288  def _set_strictErrorChecking(self, value):
1289    self._strictErrorChecking= value
1290
1291  def createElement(self, tagName):
1292    element= Element(self, NONS, tagName, None)
1293    element._setDefaultAttributes()
1294    return element
1295  def createElementNS(self, namespaceURI, qualifiedName):
1296    if namespaceURI=='':
1297      namespaceURI= None
1298    _checkName(qualifiedName)
1299    prefix, localName= _splitName(qualifiedName)
1300    if (
1301      localName is None or
1302      namespaceURI is None and prefix is not None or
1303      prefix=='xml' and namespaceURI!=XMNS or
1304      (namespaceURI==NSNS) != ('xmlns' in (prefix, qualifiedName))
1305    ):
1306      raise NamespaceErr(qualifiedName, namespaceURI)
1307    element= Element(self, namespaceURI, localName, prefix)
1308    element._setDefaultAttributes()
1309    return element
1310  def createAttribute(self, name):
1311    return Attr(self, NONS, name, None, True)
1312  def createAttributeNS(self, namespaceURI, qualifiedName):
1313    if namespaceURI=='':
1314      namespaceURI= None
1315    _checkName(qualifiedName)
1316    prefix, localName= _splitName(qualifiedName)
1317    if (
1318      localName is None or
1319      namespaceURI is None and prefix is not None or
1320      prefix=='xml' and namespaceURI!=XMNS or
1321      (namespaceURI==NSNS) != ('xmlns' in (prefix, qualifiedName))
1322    ):
1323      raise NamespaceErr(qualifiedName, namespaceURI)
1324    return Attr(self, namespaceURI, localName, prefix, True)
1325  def createTextNode(self, data):
1326    node= Text(self)
1327    node.data= data
1328    return node
1329  def createComment(self, data):
1330    node= Comment(self)
1331    node.data= data
1332    return node
1333  def createCDATASection(self, data):
1334    node= CDATASection(self)
1335    node.data= data
1336    return node
1337  def createProcessingInstruction(self, target, data):
1338    node= ProcessingInstruction(self, target)
1339    node.data= data
1340    return node
1341  def createDocumentFragment(self):
1342    return DocumentFragment(self)
1343  def createEntityReference(self, name):
1344    node= EntityReference(self, name)
1345    node._normalize(DOMCONFIG_ENTS) # will also set readonly
1346    return node
1347
1348  def getElementsByTagName(self, name):
1349    return NodeListByTagName(self, NONS, name)
1350  def getElementsByTagNameNS(self, namespaceURI, localName):
1351    if namespaceURI=='':
1352      namespaceURI= None
1353    return NodeListByTagName(self, namespaceURI, localName)
1354  def getElementById(self, elementId):
1355    return self._getElementById(self, elementId)
1356  def _getElementById(self, node, elementId):
1357    if node._attributes is not None:
1358      for attr in node._attributes:
1359        if attr.isId and attr.value==elementId:
1360          return node
1361    if Node.ELEMENT_NODE in node._childTypes:
1362      for child in node._childNodes:
1363        element= self._getElementById(child, elementId)
1364        if element is not None:
1365          return element
1366    return None
1367
1368  def renameNode(self, n, namespaceURI, qualifiedName):
1369    if namespaceURI=='':
1370      namespaceURI= None
1371    if self._readonly:
1372      raise NoModificationAllowedErr(self, 'renameNode')
1373    if n._ownerDocument is not self:
1374      raise WrongDocumentErr(n, self)
1375    n._renameNode(namespaceURI, qualifiedName)
1376    n._changed()
1377    self._initUserData()
1378    n._callUserDataHandlers(UserDataHandler.NODE_RENAMED, n, None)
1379    self._flushUserData()
1380    return n
1381
1382  def _writeChild(self, newChild, oldChild, removeOld):
1383    """ Before allowing a child hierarchy change to go ahead, check that
1384        allowing it wouldn't leave the document containing two Element or two
1385        DocumentType children.
1386    """
1387    if newChild is not None:
1388      if newChild.nodeType==Node.DOCUMENT_FRAGMENT_NODE:
1389        newNodes= newChild._childNodes._list
1390      else:
1391        newNodes= [newChild]
1392      doctype= None
1393      documentElement= None
1394      afterNodes= list(self._childNodes._list)
1395      if removeOld and oldChild in afterNodes:
1396        afterNodes.remove(oldChild)
1397      for node in afterNodes+newNodes:
1398        if node.nodeType==Node.DOCUMENT_TYPE_NODE:
1399          if doctype not in (node, None):
1400            raise HierarchyRequestErr(node, self)
1401          doctype= node
1402        if node.nodeType==Node.ELEMENT_NODE:
1403          if documentElement not in (node, None):
1404            raise HierarchyRequestErr(node, self)
1405          documentElement= node
1406    Node._writeChild(self, newChild, oldChild, removeOld)
1407
1408  def __repr__(self):
1409    if self.documentURI is not None:
1410      return '<pxdom.Document %s>' % repr(self.documentURI)
1411    else:
1412      return '<pxdom.Document>'
1413
1414
1415class DocumentFragment(Node):
1416  def __init__(self, ownerDocument= None):
1417    Node.__init__(self, ownerDocument, None, None, None)
1418  def _get_nodeType(self):
1419    return Node.DOCUMENT_FRAGMENT_NODE
1420  def _get_nodeName(self):
1421    return '#document-fragment'
1422  def __repr__(self):
1423    return '<pxdom.DocumentFragment>'
1424
1425
1426class Element(NamedNodeNS):
1427  """ Implementation of DOM 3 Element interface.
1428  """
1429  def __init__(self,
1430    ownerDocument= None, namespaceURI= None, localName= None, prefix= None
1431  ):
1432    NamedNodeNS.__init__(self, ownerDocument, namespaceURI, localName, prefix)
1433    self._attributes= AttrMap(self)
1434  def _get_nodeType(self):
1435    return Node.ELEMENT_NODE
1436  def _get_tagName(self):
1437    return self.nodeName
1438
1439  def hasAttribute(self, name):
1440    return self._attributes.getNamedItem(name) is not None
1441  def getAttribute(self, name):
1442    attr= self._attributes.getNamedItem(name)
1443    if attr is None:
1444      return ''
1445    return attr.value
1446  def setAttribute(self, name, value):
1447    if self._readonly:
1448      raise NoModificationAllowedErr(self, 'setAttribute')
1449    attr= self._attributes.getNamedItem(name)
1450    if attr is None:
1451      attr= Attr(self._ownerDocument, NONS, name, None, True)
1452      self._attributes.setNamedItem(attr)
1453    else:
1454      attr._specified= True
1455    attr.value= value
1456  def removeAttribute(self, name):
1457    if self._readonly:
1458      raise NoModificationAllowedErr(self, 'removeAttribute')
1459    try:
1460      self._attributes.removeNamedItem(name)
1461    except NotFoundErr:
1462      pass
1463  def getAttributeNode(self, name):
1464    return self._attributes.getNamedItem(name)
1465  def setAttributeNode(self, node):
1466    if self._readonly:
1467      raise NoModificationAllowedErr(self, 'setAttributeNode')
1468    return self._attributes.setNamedItem(node)
1469  def removeAttributeNode(self, node):
1470    if self._readonly:
1471      raise NoModificationAllowedErr(self, 'removeAttributeNode')
1472    self._attributes._writeItem(node, None)
1473    return node
1474
1475  def hasAttributeNS(self, namespaceURI, localName):
1476    return self._attributes.getNamedItemNS(namespaceURI,localName) is not None
1477  def getAttributeNS(self, namespaceURI, localName):
1478    attr= self._attributes.getNamedItemNS(namespaceURI, localName)
1479    if attr is None:
1480      return ''
1481    return attr.value
1482  def setAttributeNS(self, namespaceURI, qualifiedName, value):
1483    if self._readonly:
1484      raise NoModificationAllowedErr(self, 'setAttributeNS')
1485    attr= self._attributes.getNamedItemNS(namespaceURI, qualifiedName)
1486    if attr is None:
1487      attr= self._ownerDocument.createAttributeNS(namespaceURI, qualifiedName)
1488      self._attributes.setNamedItemNS(attr)
1489    else:
1490      attr._specified= True
1491    attr.value= value
1492  def removeAttributeNS(self, namespaceURI, localName):
1493    if self._readonly:
1494      raise NoModificationAllowedErr(self, 'removeAttributeNS')
1495    try:
1496      self._attributes.removeNamedItemNS(namespaceURI, localName)
1497    except NotFoundErr:
1498      pass
1499  def getAttributeNodeNS(self, namespaceURI, localName):
1500    return self._attributes.getNamedItemNS(namespaceURI, localName)
1501  def setAttributeNodeNS(self, node):
1502    if self._readonly:
1503      raise NoModificationAllowedErr(self, 'setAttributeNodeNS')
1504    return self._attributes.setNamedItemNS(node)
1505
1506  def getElementsByTagName(self, name):
1507    return NodeListByTagName(self, NONS, name)
1508  def getElementsByTagNameNS(self, namespaceURI, localName):
1509    if namespaceURI=='':
1510      namespaceURI= None
1511    return NodeListByTagName(self, namespaceURI, localName)
1512
1513  def setIdAttribute(self, name, isId):
1514    node= self.getAttributeNode(name)
1515    if node is None:
1516      raise NotFoundErr(self._attributes, NONS, name)
1517    self.setIdAttributeNode(node, isId)
1518  def setIdAttributeNS(self, namespaceURI, localName, isId):
1519    node= self.getAttributeNodeNS(namespaceURI, localName)
1520    if node is None:
1521      raise NotFoundErr(self._attributes,namespaceURI, localName)
1522    self.setIdAttributeNode(node, isId)
1523  def setIdAttributeNode(self, idAttr, isId):
1524    if self._readonly:
1525      raise NoModificationAllowedErr(self, 'setIdAttribute')
1526    if idAttr not in self._attributes._list:
1527      raise NotFoundErr(self._attributes, NONS, idAttr.name)
1528    idAttr._isId= isId
1529
1530  def _renameNode(self, namespaceURI, qualifiedName):
1531    self._setDefaultAttributes(False)
1532    NamedNodeNS._renameNode(self, namespaceURI, qualifiedName)
1533    self._setDefaultAttributes()
1534
1535
1536  def _setDefaultAttributes(self, set= True):
1537    """ Set or remove an element's default attributes.
1538    """
1539    if self._ownerDocument is None or self._ownerDocument.doctype is None:
1540      return
1541    declarationList= self._ownerDocument.doctype._attlists.getNamedItem(
1542      self.tagName
1543    )
1544    if declarationList is not None:
1545      for declaration in declarationList.declarations:
1546        if declaration.defaultType in (
1547          AttributeDeclaration.DEFAULT_VALUE, AttributeDeclaration.FIXED_VALUE
1548        ):
1549          oldNode= self.getAttributeNode(declaration.nodeName)
1550          if set:
1551            if oldNode is None:
1552              declaration._createAttribute(self)
1553          elif oldNode is not None and not oldNode.specified:
1554            self.removeAttributeNode(oldNode)
1555
1556class Attr(NamedNodeNS):
1557  def __init__(self,
1558    ownerDocument= None,
1559    namespaceURI= None, localName= None, prefix= None, specified= True
1560  ):
1561    NamedNodeNS.__init__(self, ownerDocument, namespaceURI, localName, prefix)
1562    self._specified= specified
1563    self._isId= False
1564  def _cloneTo(self, node):
1565    NamedNodeNS._cloneTo(self, node)
1566    node._isId= self._isId
1567    node._specified= self._specified
1568
1569  def _get_nodeType(self):
1570    return Node.ATTRIBUTE_NODE
1571  def _get_nodeValue(self):
1572    return self.textContent
1573  def _get_name(self):
1574    return self.nodeName
1575  def _get_value(self):
1576    c= self._childNodes
1577    if c.length==1 and c[0].nodeType==Node.TEXT_NODE:
1578      value= c[0].data
1579    else:
1580      value= self.textContent
1581    if self.schemaTypeInfo.typeName in ('CDATA', None):
1582      return value
1583    else:
1584      return string.join(
1585        filter(lambda s: s!='', string.split(value, ' ')),
1586      ' ')
1587  def _set_nodeValue(self, value):
1588    self.value= value
1589
1590  def _set_value(self, value):
1591    while self.firstChild is not None:
1592      self.removeChild(self.firstChild)
1593    if value!='':
1594      self.appendChild(self._ownerDocument.createTextNode(value))
1595    self._specified= True
1596
1597  _childTypes= (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
1598  def _get_parentNode(self):
1599    return None
1600  def _get_ownerElement(self):
1601    return self._containerNode
1602  def _get_schemaTypeInfo(self):
1603    return TypeInfo(self)
1604
1605  def _get_specified(self):
1606    return self._specified
1607  def _get_isId(self):
1608    return self._isId or self.schemaTypeInfo.typeName=='ID'
1609
1610  def _renameNode(self, namespaceURI, qualifiedName):
1611    owner= self._containerNode
1612    if owner is not None:
1613      owner.removeAttributeNode(self)
1614    NamedNodeNS._renameNode(self, namespaceURI, qualifiedName)
1615    if owner is not None:
1616      owner.setAttributeNodeNS(self)
1617
1618
1619class CharacterData(Node):
1620  def __init__(self, ownerDocument= None):
1621    Node.__init__(self, ownerDocument, None, None, None)
1622    self._data= ''
1623  def _cloneTo(self, node):
1624    Node._cloneTo(self, node)
1625    node._data= self._data
1626
1627  _childTypes= ()
1628  def _get_nodeName(self):
1629    return '#character-data'
1630  def _get_nodeValue(self):
1631    return self.data
1632  def _set_nodeValue(self, value):
1633    self.data= value
1634
1635  def _get_data(self):
1636    return self._data
1637  def _get_length(self):
1638    return len(self._data)
1639  def _set_data(self, value):
1640    self._data= value
1641
1642  def substringData(self, offset, count):
1643    if offset<0 or count<0 or offset>len(self._data):
1644      raise IndexSizeErr(self._data, offset)
1645    return self._data[offset:offset+count]
1646  def appendData(self, arg):
1647    if self._readonly:
1648      raise NoModificationAllowedErr(self, 'data')
1649    self._data= self._data+arg
1650  def insertData(self, offset, arg):
1651    self.replaceData(offset, 0, arg)
1652  def deleteData(self, offset, count):
1653    self.replaceData(offset, count, '')
1654  def replaceData(self, offset, count, arg):
1655    if self._readonly:
1656      raise NoModificationAllowedErr(self, 'data')
1657    if offset<0 or count<0 or offset>len(self._data):
1658      raise IndexSizeErr(self._data, offset)
1659    self._data= self._data[:offset]+arg+self._data[offset+count:]
1660
1661  def __repr__(self):
1662    t= repr(self.nodeValue)
1663    if len(t)>REPR_MAX_LEN:
1664      t= t[:REPR_MAX_LEN-2]+'...'
1665    if t[:1]=='u':
1666      t= t[1:]
1667    return '<pxdom.%s %s>' % (self.__class__.__name__, t)
1668
1669
1670class Comment(CharacterData):
1671  def _get_nodeType(self):
1672    return Node.COMMENT_NODE
1673  def _get_nodeName(self):
1674    return '#comment'
1675
1676
1677class Text(CharacterData):
1678  def _get_nodeType(self):
1679    return Node.TEXT_NODE
1680  def _get_nodeName(self):
1681    return '#text'
1682
1683  def _get_isElementContentWhitespace(self, config= None):
1684    """ Return whether a node is whitespace in an element whose content model
1685        is declared in the document type as element-only (not ANY). If we
1686        don't know the content model, guess either ANY (by default), or
1687        element-only (if the appropriate config parameter is set).
1688    """
1689    # Find the nearest element ancestor, as we might be in nested entity
1690    # references.
1691    #
1692    pn= self.parentNode
1693    while pn is not None:
1694      if pn.nodeType==Node.ENTITY_REFERENCE_NODE:
1695        pn= pn.parentNode
1696        continue
1697      if pn.nodeType==Node.ELEMENT_NODE:
1698        break
1699      return False
1700    else:
1701      return False
1702
1703    # Get the DOMConfiguration to look at - usually the current Document's,
1704    # but an LS process might pass an alternative in. Get the default content
1705    # model from this.
1706    #
1707    if config is None:
1708      config= self._ownerDocument.domConfig
1709    contentType= ElementDeclaration.ANY_CONTENT
1710    if config.getParameter('pxdom-assume-element-content'):
1711      contentType= ElementDeclaration.ELEMENT_CONTENT
1712
1713    # See if the element has a different content model declared. If the final
1714    # content model is not element-only, can't be ECW.
1715    #
1716    if self._ownerDocument.doctype is not None:
1717      eldecl= self._ownerDocument.doctype._elements.getNamedItem(pn.nodeName)
1718      if eldecl is not None:
1719        contentType= eldecl.contentType
1720    if contentType!=ElementDeclaration.ELEMENT_CONTENT:
1721      return False
1722
1723    # Finally check the node does only have whitespaces. (For it not to do so
1724    # would be invalid, but still well-formed.)
1725    #
1726    for c in self._data:
1727      if not(c in WHITE or isinstance(c, Unicode) and c in WHITEU):
1728        return False
1729    return True
1730
1731  def splitText(self, offset):
1732    """ Move character data following the offset point from this node to a new
1733        (next sibling) node of the same type (could be subclass CDATASection).
1734    """
1735    newNode= self.cloneNode(False)
1736    self.deleteData(offset, len(self._data)-offset)
1737    newNode.deleteData(0, offset)
1738    if self.parentNode is not None:
1739      self.parentNode.insertBefore(newNode, self.nextSibling)
1740    return newNode
1741
1742
1743class CDATASection(Text):
1744  def _get_nodeType(self):
1745    return Node.CDATA_SECTION_NODE
1746  def _get_nodeName(self):
1747    return '#cdata-section'
1748
1749
1750class ProcessingInstruction(NamedNode):
1751  def __init__(self, ownerDocument= None, target= None):
1752    NamedNode.__init__(self, ownerDocument, target)
1753    self._data= ''
1754  def _cloneTo(self, node):
1755    NamedNode._cloneTo(self, node)
1756    node._data= self._data
1757
1758  _childTypes= ()
1759  def _get_nodeType(self):
1760    return Node.PROCESSING_INSTRUCTION_NODE
1761  def _get_nodeValue(self):
1762    return self.data
1763  def _set_nodeValue(self, value):
1764    self.data= value
1765
1766  def _get_target(self):
1767    return self.nodeName
1768  def _get_data(self):
1769    return self._data
1770  def _set_data(self, value):
1771    self._data= value
1772
1773
1774class EntityReference(NamedNode):
1775  def __init__(self, ownerDocument= None, nodeName= None):
1776    NamedNode.__init__(self, ownerDocument, nodeName)
1777  def _get_nodeType(self):
1778    return Node.ENTITY_REFERENCE_NODE
1779
1780
1781class DocumentType(NamedNode):
1782  """ Implementation of DocumentType interface. Goes a little beyond the DOM 3
1783      interface in providing maps for attlists and entity declarations of the
1784      internal subset (attlists are required internally to support attribute
1785      defaulting).
1786  """
1787  def __init__(self,
1788    ownerDocument= None, name= None, publicId=None, systemId= None
1789  ):
1790    NamedNode.__init__(self, ownerDocument, name)
1791    self._publicId= publicId
1792    self._systemId= systemId
1793    self._internalSubset= None
1794    self._entities= NamedNodeMap(self, Node.ENTITY_NODE)
1795    self._notations= NamedNodeMap(self, Node.NOTATION_NODE)
1796    self._elements= NamedNodeMap(self, Node.ELEMENT_DECLARATION_NODE)
1797    self._attlists= NamedNodeMap(self, Node.ATTRIBUTE_LIST_NODE)
1798    self._processed= True
1799  def _cloneTo(self, node):
1800    NamedNode._cloneTo(self, node)
1801    node._publicId= self._publicId
1802    node._systemId= self._systemId
1803    node._internalSubset= self._internalSubset
1804  def _get_nodeType(self):
1805    return Node.DOCUMENT_TYPE_NODE
1806
1807  def _get_name(self):
1808    return self.nodeName
1809  def _get_publicId(self):
1810    return self._publicId
1811  def _get_systemId(self):
1812    return self._systemId
1813  def _get_internalSubset(self):
1814    return self._internalSubset
1815  def _get_entities(self):
1816    return self._entities
1817  def _get_notations(self):
1818    return self._notations
1819  def _get_pxdomElements(self):
1820    return self._elements
1821  def _get_pxdomAttlists(self):
1822    return self._attlists
1823  def _get_pxdomProcessed(self):
1824    return self._processed
1825  def _set_internalSubset(self, value):
1826    self._internalSubset= value
1827
1828
1829class Entity(NamedNode):
1830  def __init__(self,
1831    ownerDocument= None, nodeName= None, publicId= None, systemId= None,
1832    notationName= None, baseURI= None
1833  ):
1834    NamedNode.__init__(self, ownerDocument, nodeName)
1835    self._publicId= publicId
1836    self._systemId= systemId
1837    self._notationName= notationName
1838    self._baseURI= baseURI
1839    self._xmlVersion= None
1840    self._xmlEncoding= None
1841    self._inputEncoding= None
1842    self._documentURI= None
1843    self._available= False
1844  def _cloneTo(self, node):
1845    NamedNode._cloneTo(self, node)
1846    node._publicId= self._publicId
1847    node._systemId= self._systemId
1848    node._notationName= self._notationName
1849    node._available= self._available
1850    node._xmlVersion= self._xmlVersion
1851    node._xmlEncoding= self._xmlEncoding
1852    node._inputEncoding= self._inputEncoding
1853    node._documentURI= self._documentURI
1854  def _get_nodeType(self):
1855    return Node.ENTITY_NODE
1856  def _get_parentNode(self):
1857    return None
1858  def _get_publicId(self):
1859    return self._publicId
1860  def _get_systemId(self):
1861    return self._systemId
1862  def _get_notationName(self):
1863    return self._notationName
1864  def _get_xmlVersion(self):
1865    return self._xmlVersion
1866  def _get_xmlEncoding(self):
1867    return self._xmlEncoding
1868  def _get_inputEncoding(self):
1869    return self._inputEncoding
1870  def _get_pxdomAvailable(self):
1871    return self._available
1872  def _get_pxdomDocumentURI(self):
1873    return self._documentURI
1874
1875class Notation(NamedNode):
1876  def __init__(self, ownerDocument= None,
1877    nodeName= None, publicId= None, systemId= None, baseURI= None
1878  ):
1879    NamedNode.__init__(self, ownerDocument, nodeName)
1880    self._publicId= publicId
1881    self._systemId= systemId
1882    self._baseURI= baseURI
1883  def _cloneTo(self, node):
1884    NamedNode._cloneTo(self, node)
1885    node._publicId= self._publicId
1886    node._systemId= self._systemId
1887  def _get_nodeType(self):
1888    return Node.NOTATION_NODE
1889  def _get_parentNode(self):
1890    return None
1891  def _get_publicId(self):
1892    return self._publicId
1893  def _get_systemId(self):
1894    return self._systemId
1895
1896
1897# Extended pxdom node classes for doctype parts not currently modelled in the
1898# standard DOM
1899# ============================================================================
1900
1901class ElementDeclaration(NamedNode):
1902  """ Node representing an <!ELEMENT> declaration in document type. Prescribed
1903      content is described by 'contentType' and 'elements', which is null for
1904      EMPTY and ANY content, or a ContentDeclaration for Mixed and element
1905      content.
1906  """
1907  [EMPTY_CONTENT, ANY_CONTENT, MIXED_CONTENT, ELEMENT_CONTENT
1908  ]= range(1, 5)
1909  def __init__(
1910    self, ownerDocument= None, nodeName= None,
1911    contentType= ANY_CONTENT, elements= None
1912  ):
1913    NamedNode.__init__(self, ownerDocument, nodeName)
1914    self._contentType= contentType
1915    self._elements= elements
1916  def _cloneTo(self, node):
1917    NamedNode._cloneTo(self, node)
1918    node._contentType= self._contentType
1919    node._elements= self._elements
1920  def _get_nodeType(self):
1921    return Node.ELEMENT_DECLARATION_NODE
1922  def _get_contentType(self):
1923    return self._contentType
1924  def _get_elements(self):
1925    return self._elements
1926  def _get_parentNode(self):
1927    return None
1928
1929
1930class ContentDeclaration(DOMList):
1931  """ A list representing part of the content model given in an <!ELEMENT>
1932      declaration. Apart from normal DOMList accessors, has flags specifying
1933      whether the group is optional, can be included more than once (or both),
1934      and whether it's a sequence or a choice. List items are element name
1935      strings or, in the case of element content, ContentDeclarations. In
1936      mixed content the initial #PCDATA is omitted and nesting is not used.
1937  """
1938  def __init__(self):
1939    DOMList.__init__(self)
1940    self._isOptional= False
1941    self._isMultiple= False
1942    self._isSequence= False
1943  def _get_isOptional(self):
1944    return self._isOptional
1945  def _get_isMultiple(self):
1946    return self._isMultiple
1947  def _get_isSequence(self):
1948    return self._isSequence
1949  def _set_isOptional(self, value):
1950    self._isOptional= value
1951  def _set_isMultiple(self, value):
1952    self._isMultiple= value
1953  def _set_isSequence(self, value):
1954    self._isSequence= value
1955
1956
1957class AttributeListDeclaration(NamedNode):
1958  def __init__(self, ownerDocument= None, nodeName= None):
1959    NamedNode.__init__(self, ownerDocument, nodeName)
1960    self._declarations= NamedNodeMap(self, Node.ATTRIBUTE_DECLARATION_NODE)
1961  def _cloneTo(self, node):
1962    NamedNode._cloneTo(self, node)
1963  def _get_nodeType(self):
1964    return Node.ATTRIBUTE_LIST_NODE
1965  def _get_parentNode(self):
1966    return None
1967  def _get_declarations(self):
1968    return self._declarations
1969
1970
1971class AttributeDeclaration(NamedNode):
1972  """ Node representing the declaration of a single attribute in an attlist.
1973      The type of attribute is made known, along with a list of values or
1974      notation names if the type is Enumeration or Notation. The defaulting
1975      is made known; if it is #FIXED or defaulted, the child Nodes of the
1976      declaration are the child nodes to be used by the attribute.
1977  """
1978  [REQUIRED_VALUE,IMPLIED_VALUE,DEFAULT_VALUE,FIXED_VALUE
1979  ]= range(1,5)
1980  [ID_ATTR,IDREF_ATTR,IDREFS_ATTR,ENTITY_ATTR,ENTITIES_ATTR,NMTOKEN_ATTR,
1981  NMTOKENS_ATTR,NOTATION_ATTR,CDATA_ATTR,ENUMERATION_ATTR
1982  ]= range(1,11)
1983  ATTR_NAMES= [ None,
1984    'ID', 'IDREF', 'IDREFS', 'ENTITY', 'ENTITIES', 'NMTOKEN', 'NMTOKENS',
1985    'NOTATION', 'CDATA', 'ENUMERATION'
1986  ]
1987  def __init__(self,
1988    ownerDocument= None, nodeName= None, attributeType= None,
1989    typeValues= None, defaultType= None
1990  ):
1991    NamedNode.__init__(self, ownerDocument, nodeName)
1992    self._attributeType= attributeType
1993    self._typeValues= typeValues
1994    self._defaultType= defaultType
1995  def _cloneTo(self, node):
1996    Node._cloneTo(self, node)
1997    node._attributeType= self.attributeType
1998    node._typeValues= self.typeValues
1999    node._defaultType= self.defaultType
2000  _childTypes= (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
2001  def _get_nodeType(self):
2002    return Node.ATTRIBUTE_DECLARATION_NODE
2003  def _get_parentNode(self):
2004    return None
2005  def _get_attributeType(self):
2006    return self._attributeType
2007  def _get_typeValues(self):
2008    return self._typeValues
2009  def _get_defaultType(self):
2010    return self._defaultType
2011  def _get_nodeValue(self):
2012    return self.textContent
2013
2014  def _createAttribute(self, element):
2015    prefix, localName= _splitName(self.nodeName)
2016    if localName is None:
2017      attr= element.ownerDocument.createAttribute(self.nodeName)
2018    else:
2019      if 'xmlns' in (prefix, self.nodeName):
2020        namespaceURI= NSNS
2021      elif prefix=='xml':
2022        namespaceURI= XMNS
2023      elif prefix is None:
2024        namespaceURI= None
2025      else:
2026        namespaceURI= element.lookupNamespaceURI(prefix)
2027      attr=element.ownerDocument.createAttribute(self.nodeName)
2028      attr._namespaceURI= namespaceURI
2029      attr._prefix, attr._localName= _splitName(self.nodeName)
2030    self._ownerDocument._initUserData()
2031    for child in self._childNodes:
2032      attr.appendChild(child.cloneNode(True))
2033    self._ownerDocument._initUserData() # don't flush! should not call handlers
2034    element.setAttributeNodeNS(attr)
2035    attr._specified= False
2036
2037
2038# Recursive node operations: clone, adopt, import (=clone+adopt) and, for
2039# entity-reference purporses, recursive-set-readonly.
2040# ============================================================================
2041
2042def _Node__cloneNode(self, deep):
2043  """ Make an identical copy of a node, and optionally its descendants.
2044  """
2045  self._ownerDocument._initUserData()
2046  r= self._recurse(deep, clone= True)
2047  self._ownerDocument._flushUserData()
2048  return r
2049
2050def _Attr__cloneNode(self, deep):
2051  """ Attributes become always specified if cloned directly, but not if cloned
2052      as part of an ancestor's deep clone.
2053  """
2054  self._ownerDocument._initUserData()
2055  r= self._recurse(deep, clone= True)
2056  r._specified= True
2057  self._ownerDocument._flushUserData()
2058  return r
2059
2060def _Document__cloneNode(self, deep):
2061  """ Make a copy of a document. This is 'implementation dependent' in the
2062      spec; we allow it and make a new document in response, copying any child
2063      nodes in importNode-style if deep is True, otherwise just making an
2064      empty documentElement.
2065  """
2066  doc= Document()
2067  self._cloneTo(doc)
2068  doc._ownerDocument= doc
2069  if deep:
2070    doc._childNodes.readonly= False
2071    self._initUserData()
2072    for child in self._childNodes:
2073      r= child._recurse(True, clone= True, ownerDocument=doc)
2074      doc._childNodes._append(r)
2075      r._containerNode= doc
2076    doc._childNodes.readonly= True
2077    self._flushUserData()
2078  else:
2079    ns, name= self.documentElement.namespaceURI, self.documentElement.nodeName
2080    doc.appendChild(doc.createElementNS(ns, name))
2081  return doc
2082
2083def _Document__adoptNode(self, source):
2084  """ Take over a node and its descendants from a potentially different
2085      document.
2086  """
2087  # Adoption of Documents and - for some reason - DocumentTypes is explicitly
2088  # disallowed by the spec.
2089  #
2090  if source.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE):
2091    raise NotSupportedErr(source, 'beAdopted')
2092
2093  # Try to remove the node from wherever it is in the current document. If it
2094  # has a proper parent node this is easy; otherwise we have to guess which
2095  # of its owner's NamedNodeMaps it is part of. Note that removing an Entity
2096  # or Notation will generally fail as these maps are by default readonly.
2097  #
2098  if source.parentNode is not None:
2099    source.parentNode.removeChild(source)
2100  elif source._containerNode is not None:
2101    nodeMap= getattr(source._containerNode, {
2102      Node.ATTRIBUTE_NODE: 'attributes', Node.ENTITY_NODE: 'entities',
2103      Node.NOTATION_NODE: 'notations', Node. ATTRIBUTE_LIST_NODE: 'attlists',
2104      Node.ATTRIBUTE_DECLARATION_NODE: 'declarations',
2105      Node.ELEMENT_DECLARATION_NODE: 'elements'
2106    }[source.nodeType])
2107    nodeMap._writeItem(source, None)
2108
2109  if source.nodeType==Node.ATTRIBUTE_NODE:
2110    source._specified= True
2111  self._initUserData()
2112  dest= source._recurse(True, ownerDocument= self)
2113  self._flushUserData()
2114  dest._normalize(DOMCONFIG_ENTS)
2115  return dest
2116
2117
2118def _Document__importNode(self, importedNode, deep):
2119  """ Make a copy of a node from another pxdom document, optionally
2120      including all descendants.
2121  """
2122  if importedNode.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE):
2123    raise NotSupportedErr(importedNode, 'beImported')
2124  self._initUserData()
2125  node= importedNode._recurse(deep, clone= True, ownerDocument= self)
2126  self._flushUserData()
2127  return node
2128
2129def _Node___recurse(self, deep, clone= False, ownerDocument= None, readonly= None):
2130  """ Perform operations on a node and, if 'deep', all its descendants
2131      recursively.
2132  """
2133  if not clone:
2134    node= self
2135  else:
2136    node= self.__class__()
2137    self._cloneTo(node)
2138
2139  if ownerDocument is not None:
2140    origowner= node._ownerDocument
2141    node._ownerDocument= ownerDocument
2142  self._recurseTo(node, clone, ownerDocument, readonly)
2143
2144  if deep:
2145    node._childNodes.readonly= False
2146    for child in self._childNodes:
2147      r= child._recurse(deep, clone, ownerDocument, readonly)
2148      if clone:
2149        node._childNodes._append(r)
2150        r._containerNode= node
2151    node._childNodes.readonly= True
2152
2153  if readonly is not None:
2154    node.readonly= readonly
2155    if node._attributes is not None:
2156      node._attributes.readonly= readonly
2157
2158  if clone:
2159    if ownerDocument is not None:
2160      self._callUserDataHandlers(UserDataHandler.NODE_IMPORTED, self, node)
2161    else:
2162      self._callUserDataHandlers(UserDataHandler.NODE_CLONED, self, node)
2163  elif ownerDocument is not None and origowner is not None:
2164    self._callUserDataHandlers(UserDataHandler.NODE_ADOPTED, self, None)
2165  return node
2166
2167def _Attr___recurse(self,
2168  deep, clone= False, ownerDocument= None, readonly= None
2169):
2170  """ Recursive operations on attributes are always 'deep'. Import/adoption
2171      operations also make all attributes 'specified' and discard user-
2172      determined isIDness.
2173  """
2174  r= Node._recurse(self, True, clone, ownerDocument, readonly)
2175  if ownerDocument is not None:
2176    r._specified= True
2177    r._isId= False
2178  return r
2179
2180def _EntityReference___recurse(self,
2181  deep, clone= False, ownerDocument= None, readonly= None
2182):
2183  """ When an entity reference is cloned/imported/adopted, its children are
2184      recreated from the matching entity rather than deeply recursed.
2185  """
2186  nontrivial= clone or ownerDocument is not None
2187  if nontrivial:
2188    deep= False
2189  r= Node._recurse(self, deep, clone, ownerDocument, readonly)
2190  if nontrivial:
2191    r._normalize(DOMCONFIG_ENTS_BIND)
2192  return r
2193
2194def _Node___recurseTo(self, node, clone, ownerDocument, readonly):
2195  """ Fire off recursive operations to child nodes and attributes. May be
2196      extended by specific node types to send the ops to other nodes they
2197      contain (not in child lists).
2198  """
2199  pass
2200
2201def _Element___recurseTo(self, node, clone, ownerDocument, readonly):
2202  """ Elements pass recursive operations to their attributes. Non-specified
2203      attributes may be ignored (import), removed (adopt) or made specified
2204      (clone).
2205  """
2206  for attr in list(self._attributes._list):
2207    if not attr.specified:
2208      if (ownerDocument is not None and not clone):
2209        self.removeAttributeNode(attr)
2210      if (ownerDocument is not None):
2211        continue
2212    r= attr._recurse(True, clone, ownerDocument, readonly)
2213    if clone:
2214      node._attributes._append(r)
2215      r._containerNode= node
2216  node._setDefaultAttributes()
2217
2218def _DocumentType___recurseTo(self, node, clone, ownerDocument, readonly):
2219  """ Distribute recursive operations to the nodes in a doctype's extra
2220      NamedNodeMaps.
2221  """
2222  for mapName in ('_entities', '_notations', '_elements', '_attlists'):
2223    selfMap= getattr(self, mapName)
2224    nodeMap= getattr(node, mapName)
2225    mro= nodeMap._readonly
2226    if readonly is not None:
2227      mro= readonly
2228    nodeMap._readonly= False
2229    for namedNode in selfMap._list:
2230      r= namedNode._recurse(True, clone, ownerDocument, readonly)
2231      if clone:
2232        nodeMap._append(r)
2233    nodeMap._readonly= mro
2234
2235def _AttributeListDeclaration____recurseTo(
2236  self, node, clone, ownerDocument, readonly
2237):
2238  """ Distribute recursive operations to attribute declaration nodes.
2239  """
2240  for declaration in self._declarations:
2241    r= declaration._recurse(True, clone, ownerDocument, readonly)
2242    if clone:
2243      node._declarations._append(r)
2244
2245
2246# DOM 3 UserData
2247#
2248def _Node__getUserData(self, key):
2249  return self._userData.get(key, (None, None))[0]
2250
2251def _Node__setUserData(self, key, data, handler):
2252  oldData= self.getUserData(key)
2253  self._userData[key]= (data, handler)
2254  return oldData
2255
2256def _Document___initUserData(self):
2257    self._userdatacalls[get_ident()]= []
2258
2259def _Node___callUserDataHandlers(self, operation, src, dst):
2260    """Queue callbacks to all registered UserDataHandlers.
2261
2262    The operation is added to a list per document/thread and well be flushed
2263    to the actual UserDataHandlers when Document._flushUserData() is called.
2264    """
2265    if self._userData=={}:
2266        return
2267    callbacks= self._ownerDocument._userdatacalls[get_ident()]
2268    for (key, (data, handler)) in self._userData.items():
2269        if handler is not None:
2270            callbacks.append((handler, operation, key, data, src, dst))
2271
2272def _Document___flushUserData(self):
2273    """Call deferred UserDataHandler callbacks
2274    """
2275    threadid= get_ident()
2276    callbacks= self._userdatacalls[threadid]
2277    del self._userdatacalls[threadid]
2278    for (handler, operation, key, data, src, dst) in callbacks:
2279        handler.handle(operation, key, data, src, dst)
2280
2281
2282# XML Base (DOM 3 baseURI)
2283# ============================================================================
2284
2285# Most nodes have null baseURIs. PIs always have the same baseURI as their
2286# parents. Document nodes at the top duplicate documentURI.
2287#
2288def _Node___get_baseURI(self):
2289  return None
2290def _ProcessingInstruction___get_baseURI(self):
2291  return self._getParentURI()
2292def  _Document___get_baseURI(self):
2293  return self._documentURI
2294
2295# Check elements for xml:base attributes that might affect the baseURI.
2296# Absolute values can be returned directly; relative ones may be affected by
2297# baseURI of parent.
2298#
2299def _Element___get_baseURI(self):
2300  global bitch
2301  bitch= self
2302  base= self._attributes.getNamedItemNS(XMNS, 'base')
2303  if base is not None:
2304    uri= _encodeURI(base.value)
2305    if urlparse.urlparse(uri)[0]!='':
2306      return uri
2307    return urlparse.urljoin(self._getParentURI(), uri)
2308  return self._getParentURI()
2309
2310# Declaration baseURIs are the URIs of the entity they're defined in, stored
2311# in a static internal property.
2312#
2313def _Entity___get_baseURI(self):
2314  return self._baseURI
2315def _Notation___get_baseURI(self):
2316  return self._baseURI
2317
2318# Entity references have the same baseURI as their associated definition,
2319# regardless of where they are in the document.
2320#
2321def _EntityReference___get_baseURI(self):
2322  document= self._ownerDocument
2323  entity= None
2324  if document.doctype is not None:
2325    entity= document.doctype.entities.getNamedItem(self.nodeName)
2326  if entity is not None:
2327    return entity._get_baseURI()
2328  return None
2329
2330# Elements and PIs can inherit baseURIs from their parents. Step up the DOM
2331# hierarchy to a parent or, if unattached, the Document itself. If the parent
2332# is an entity/reference it overrides the parent URI, but with the absolute
2333# URI of the document it was read from, which is not the same as its baseURI.
2334#
2335def _Node___getParentURI(self):
2336  parent= self._containerNode
2337  document= self._ownerDocument
2338  if parent is None:
2339    return document.documentURI
2340  entity= None
2341  if parent.nodeType==Node.ENTITY_NODE:
2342    entity= parent
2343  elif parent.nodeType==Node.ENTITY_REFERENCE_NODE:
2344    if document.doctype is not None:
2345      entity= document.doctype.entities.getNamedItem(parent.nodeName)
2346  if entity is not None and entity._documentURI is not None:
2347    return entity._documentURI
2348  return parent.baseURI
2349
2350
2351# DOM 3 namespace inspection
2352# ============================================================================
2353
2354# Public lookup interface
2355#
2356def _Node__lookupNamespaceURI(self, prefix):
2357  return self._getNamespaces({}).get(prefix, None)
2358
2359def _Node__lookupPrefix(self, namespaceURI):
2360  if namespaceURI in (None, ''):
2361    return None
2362  return self._getNamespaces({}, True).get(namespaceURI, None)
2363
2364def _Node__isDefaultNamespace(self, namespaceURI):
2365  return self._getNamespaces({}).get(None, NONS)==namespaceURI
2366
2367
2368# Public lookup on Document node redirects to document root element
2369#
2370def _Document__lookupNamespaceURI(self, prefix):
2371  return self.documentElement.lookupNamespaceURI(prefix)
2372
2373def _Document__lookupPrefix(self, namespaceURI):
2374  return self.documentElement.lookupPrefix(namespaceURI)
2375
2376def _Document__isDefaultNamespace(self, namespaceURI):
2377  return self.documentElement.isDefaultNamespace(namespaceURI)
2378
2379
2380def _Node___getNamespaces(self, store, inverted= False):
2381  """ Construct a lookup dictionary of in-scope namespaces.
2382  """
2383  if self._containerNode is not None:
2384    self._containerNode._getNamespaces(store, inverted)
2385  return store
2386
2387def _Element___getNamespaces(self, store, inverted= False, ignoreSelf= False):
2388  if self.localName is not None:
2389    if not ignoreSelf:
2390      key, value= self.prefix, self.namespaceURI
2391      if inverted:
2392        key, value= value, key
2393      if not store.has_key(key):
2394        store[key]= value
2395    for attr in self.attributes:
2396      if attr.namespaceURI==NSNS:
2397        key= [attr.localName, None][attr.prefix is None]
2398        value= attr.value or None
2399        if inverted:
2400          key, value= value, key
2401        if not store.has_key(key):
2402          store[key]= value
2403  return NamedNodeNS._getNamespaces(self, store, inverted)
2404
2405 
2406# Namespace normalisation
2407#
2408def _Element___getFixups(self, nsframe):
2409  """ For an element with a given in-scope-namespace lookup, return a list of
2410      new namespace declaration attributes to add, and a list of prefix
2411      changes to existing attributes. Note the nsframe of in-scope namespaces
2412      will be updated in-place.
2413  """
2414  # Ensure element's prefix maps to element's namespaceURI
2415  #
2416  create, reprefix= [], []
2417  if self._namespaceURI not in (NONS, nsframe.get(self.prefix, None)):
2418    create.append((self._prefix, self._namespaceURI))
2419    nsframe[self._prefix]= self._namespaceURI
2420
2421  # Fix up each attribute
2422  #
2423  for attr in self._attributes:
2424    if attr._namespaceURI in (NONS, NSNS, XMNS):
2425      continue
2426    namespaceURI= None
2427    if attr._prefix is not None:
2428      namespaceURI= nsframe.get(attr._prefix, None)
2429
2430    # If attribute prefix does not map to its namespace, will need new prefix.
2431    # Find one that matches the namespace
2432    #
2433    if attr._namespaceURI!=namespaceURI:
2434      prefix= None
2435      if attr._namespaceURI is not None:
2436        try:
2437          ix= nsframe.values().index(attr._namespaceURI)
2438          prefix= nsframe.keys()[ix]
2439        except ValueError:
2440          prefix= None
2441
2442        # No match, have to create a new namespace declaration for it. Use
2443        # existing prefix if we can, else make up a new arbitrary name
2444        #
2445        if prefix is None:
2446          prefix= attr._prefix
2447          nsSuffix= 0
2448          while prefix is None or nsframe.has_key(prefix):
2449            nsSuffix= nsSuffix+1
2450            prefix= 'NS'+str(nsSuffix)
2451        create.append((prefix, attr._namespaceURI))
2452        nsframe[prefix]= attr._namespaceURI
2453
2454      reprefix.append((attr, prefix))
2455  return create, reprefix
2456
2457
2458# DOM 3 node comparison
2459# ============================================================================
2460
2461def _Node__isSameNode(self, other):
2462  return self is other
2463
2464def _Node__isEqualNode(self, other):
2465  """ Check two nodes have the same properties and content.
2466  """
2467  ps=('nodeType','nodeName','localName','namespaceURI','prefix','nodeValue')
2468  for property in ps:
2469    if getattr(self, property)!=getattr(other, property):
2470      return False
2471  if (self.attributes is None)!=(other.attributes is None):
2472    return False
2473  if self.attributes is not None:
2474    if not self.attributes._isEqualMap(other.attributes):
2475      return False
2476  if self.childNodes.length!=other.childNodes.length:
2477    return False
2478  for index in range(self.childNodes.length):
2479    if not self.childNodes.item(index).isEqualNode(
2480      other.childNodes.item(index)
2481    ):
2482      return False
2483  return True
2484
2485def _DocumentType__isEqualNode(self, other):
2486  """ Doctype nodes have additional properties that must match to be equal.
2487      The extension attlists and elements maps are not checked for equality as
2488      they are not part of the standard.
2489  """
2490  if not NamedNode.isEqualNode(self, other):
2491    return False
2492  ps= ('publicId', 'systemId', 'internalSubset')
2493  for property in ps:
2494    if getattr(self, property)!=getattr(other, property):
2495      return False
2496  if not self._entities._isEqualMap(other._entities):
2497    return False
2498  if not self._notations._isEqualMap(other._notations):
2499    return False
2500  return True
2501
2502
2503def _NamedNodeMap___isEqualMap(self, other):
2504  """ Test whether two maps have equal contents, though possibly in a
2505      different order.
2506  """
2507  if other is None:
2508    return False
2509  if len(self._list)!=len(other._list):
2510    return False
2511  for selfItem in self._list:
2512    for otherItem in other._list:
2513      if selfItem.isEqualNode(otherItem):
2514        break
2515    else:
2516      return False
2517  return True
2518
2519
2520def _canonicalAttrSort(self, other):
2521  """ Compare Attrs in terms of xmlnsness, namespaceURI and localName, for
2522      canonical-form ordering purposes.
2523  """
2524  if (self.namespaceURI==NSNS) and (other.namespaceURI==NSNS):
2525    if (self.prefix is None) != (other.prefix is None):
2526      return 1-(self.prefix is None)*2
2527    return cmp(self.localName, other.localName)
2528  if (self.namespaceURI==NSNS) != (other.namespaceURI==NSNS):
2529    return 1-(self.namespaceURI==NSNS)*2
2530  return cmp(
2531    (self.namespaceURI, self.localName),
2532    (other.namespaceURI, other.localName)
2533  )
2534
2535
2536def _Node__compareDocumentPosition(self, other):
2537  """ Get flags describing the document position of one node relative to
2538      another.
2539  """
2540  if other is self:
2541    return 0
2542  containers= []
2543  container= self
2544  while container is not None:
2545    containers.append(container)
2546    container= container._containerNode
2547  container= other
2548  other_determining= other
2549  while container is not None:
2550    if container in containers:
2551      index= containers.index(container)
2552      if index<1:
2553        index= 1
2554      self_determining= containers[index-1]
2555      break
2556    other_determining= container
2557    container= container._containerNode
2558  else:
2559    if id(other)>id(self):
2560      return (
2561        Node.DOCUMENT_POSITION_DISCONNECTED +
2562        Node.DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC +
2563        Node.DOCUMENT_POSITION_FOLLOWING
2564      )
2565    return (
2566      Node.DOCUMENT_POSITION_DISCONNECTED +
2567      Node.DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC +
2568      Node.DOCUMENT_POSITION_PRECEDING
2569    )
2570  if container is other:
2571    return (
2572      Node.DOCUMENT_POSITION_CONTAINS + Node.DOCUMENT_POSITION_PRECEDING
2573    )
2574  if container is self:
2575    return (
2576      Node.DOCUMENT_POSITION_CONTAINED_BY + Node.DOCUMENT_POSITION_FOLLOWING
2577    )
2578  if (other_determining in container._childNodes):
2579    if (self_determining in container._childNodes):
2580      if (
2581        container._childNodes._index(other_determining) >
2582        container._childNodes._index(self_determining)
2583      ):
2584        return Node.DOCUMENT_POSITION_FOLLOWING
2585      return Node.DOCUMENT_POSITION_PRECEDING
2586    return Node.DOCUMENT_POSITION_FOLLOWING
2587  if (self_determining in container._childNodes):
2588    return Node.DOCUMENT_POSITION_PRECEDING
2589  if other_determining.nodeType!=self_determining.nodeType:
2590    if other_determining.nodeType>self_determining.nodeType:
2591      return Node.DOCUMENT_POSITION_FOLLOWING
2592    return Node.DOCUMENT_POSITION_PRECEDING
2593  if self_determining.nodeType==Node.ATTRIBUTE_NODE:
2594    attrs= container.attributes
2595    if attrs._index(other_determining)>attrs._index(other_determining):
2596      return (
2597        Node.DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC +
2598        Node.DOCUMENT_POSITION_FOLLOWING
2599      )
2600    return (
2601      Node.DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC +
2602      Node.DOCUMENT_POSITION_PRECEDING
2603    )
2604  if id(other_determining)>id(self_determining):
2605    return (
2606      Node.DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC +
2607      Node.DOCUMENT_POSITION_FOLLOWING
2608    )
2609  return (
2610    Node.DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC +
2611    Node.DOCUMENT_POSITION_PRECEDING
2612  )
2613
2614
2615# DOM 3 textual content access
2616# ============================================================================
2617
2618def _Node___set_textContent(self, value):
2619  if self.readonly:
2620    raise NoModificationAllowedErr(self, 'textContent')
2621  if (Node.TEXT_NODE not in self._childTypes):
2622    raise HierarchyRequestErr(self, Text())
2623  while self._childNodes.length>0:
2624    self.removeChild(self.firstChild)
2625  text= Text(self._ownerDocument)
2626  text.data= value
2627  self.appendChild(text)
2628
2629def _CharacterData___set_textContent(self, value):
2630  if self.readonly:
2631    raise NoModificationAllowedErr(self, 'textContent')
2632  self.data= value
2633def _ProcessingInstruction___set_textContent(self, value):
2634  if self.readonly:
2635    raise NoModificationAllowedErr(self, 'textContent')
2636  self.data= value
2637
2638def _Document___set_textContent(self, value):
2639  return
2640def _DocumentType___set_textContent(self, value):
2641  return
2642def _Notation___set_textContent(self, value):
2643  return
2644
2645
2646def _Node___get_textContent(self):
2647  value= ''
2648  for index in range(self._childNodes.length):
2649    child= self._childNodes.item(index)
2650    if child.nodeType not in [
2651      Node.COMMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE
2652    ]:
2653      value= value+child.textContent
2654  return value
2655
2656def _Attr___get_textContent(self):
2657  value= ''
2658  for index in range(self._childNodes.length):
2659    child= self._childNodes.item(index)
2660    if child.nodeType==Node.TEXT_NODE:
2661      value= value+child.textContent
2662    elif child.nodeType==Node.ENTITY_REFERENCE_NODE:
2663      value= value+r(r(r(child.textContent, '\n',' '), '\t',' '),'\r',' ')
2664  return value
2665
2666def _CharacterData___get_textContent(self):
2667  return self.data
2668
2669def _ProcessingInstruction___get_textContent(self):
2670  return self.data
2671
2672def _Text___get_textContent(self):
2673  if self.isElementContentWhitespace:
2674    return ''
2675  return CharacterData._get_textContent(self)
2676
2677def _Document___get_textContent(self):
2678  return None
2679def _DocumentType___get_textContent(self):
2680  return None
2681def _Notation___get_textContent(self):
2682  return None
2683
2684
2685def _Text___get_wholeText(self):
2686  value= ''
2687  for node in self._getLogicallyAdjacentTextNodes():
2688    value= value+node.data
2689  return value
2690
2691def _Text__replaceWholeText(self, value):
2692  replacement= None
2693  haveReplaced= False
2694  if self._readonly and value!='':
2695    replacement= self._ownerDocument.createTextNode(value)
2696  nodes= self._getLogicallyAdjacentTextNodes()
2697  removables= []
2698  for node in nodes:
2699    if node is self and not (value=='' or self._readonly):
2700      continue
2701    while node.parentNode is not None:
2702      if not node.parentNode.readonly:
2703        if node not in removables:
2704          removables.append(node)
2705        break
2706      node= node.parentNode
2707  for removable in removables:
2708    descendants= []
2709    removable._getDescendants(descendants)
2710    for node in descendants:
2711      if node.nodeType!=Node.ENTITY_REFERENCE_NODE and node not in nodes:
2712        raise NoModificationAllowedErr(node.parentNode, 'removeChild')
2713    if replacement is not None and not haveReplaced:
2714      removable.parentNode.replaceChild(replacement, removable)
2715    else:
2716      removable.parentNode.removeChild(removable)
2717  if replacement is not None:
2718    return replacement
2719  if value=='':
2720    return None
2721  self._data= value
2722  return self
2723
2724def _Text___getLogicallyAdjacentTextNodes(self):
2725  ok= (Node.TEXT_NODE, Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE)
2726  node= self
2727  goin= False
2728  while True:
2729    previous= None
2730    if goin:
2731      previous= node.lastChild
2732    if previous is None:
2733      previous= node.previousSibling
2734      goin= True
2735    if previous is None:
2736      previous= node.parentNode
2737      goin= False
2738      if previous is None or previous.nodeType!=Node.ENTITY_REFERENCE_NODE:
2739        break
2740    if previous.nodeType not in ok:
2741      break
2742    node= previous
2743  nodes= []
2744  goin= True
2745  while True:
2746    if node.nodeType!=Node.ENTITY_REFERENCE_NODE:
2747      nodes.append(node)
2748    next= None
2749    if goin:
2750      next= node.firstChild
2751    if next is None:
2752      next= node.nextSibling
2753      goin= True
2754    if next is None:
2755      next= node.parentNode
2756      goin= False
2757      if next is None or next.nodeType!=Node.ENTITY_REFERENCE_NODE:
2758        break
2759    if next.nodeType not in ok:
2760      break
2761    node= next
2762  return nodes
2763
2764
2765# Normalization and canonicalization
2766# ============================================================================
2767
2768def _Node__normalize(self):
2769  """ Perform text node concatenation and, if enabled in the domConfig,
2770      character normalisation. Hack around the fact that apparently check-
2771      character-normalization shouldn't do anything here.
2772  """
2773  if self._readonly:
2774    raise NoModificationAllowedErr(self, 'normalize')
2775  if self._ownerDocument.domConfig.getParameter('normalize-characters'):
2776    self._normalize(DOMCONFIG_TEXT_CANONICAL)
2777  else:
2778    self._normalize(DOMCONFIG_TEXT)
2779  self._changed()
2780
2781
2782def _Document__normalizeDocument(self):
2783  """ Perform all normalisations specified by the domConfig across the whole
2784      document.
2785  """
2786  # normalizeDocument doesn't return exceptions, even NO_MOD. Although there
2787  # is no reason a Document should ever be readonly anyway.
2788  #
2789  if self._readonly:
2790    return
2791
2792  # Recursively normalise the document. Throw away DOMErrors, this method does
2793  # not return them other than to the error-handler.
2794  #
2795  try:
2796    self._normalize(self.domConfig)
2797  except DOMException:
2798    pass
2799
2800  # In canonical-form mode, chuck away the doctype at the end
2801  #
2802  if self.domConfig.getParameter('canonical-form'):
2803    if self.doctype is not None:
2804      self.removeChild(self.doctype)
2805  self._changed()
2806
2807
2808def _Node___normalize(self, config):
2809  """ Normalisation back-end. Perform a number of different normalisations on
2810      child nodes, in the appropriate order.
2811  """
2812  p= config.getParameter
2813
2814  # If entities are off, do a first pass replacing available entities with
2815  # their contents. Their contents may include other entity references so keep
2816  # doing this until there are no more available entity children. When
2817  # replacing try to preserve the baseURI.
2818  #
2819  if not p('entities'):
2820    doctype=self._ownerDocument.doctype
2821    if doctype is not None:
2822      while True:
2823        for child in self._childNodes._list[:]:
2824          if child.nodeType==Node.ENTITY_REFERENCE_NODE:
2825            entity= doctype.entities.getNamedItem(child.nodeName)
2826            if entity is not None and entity._available:
2827              child._normalize(DOMCONFIG_ENTS_BIND)
2828              child._recurse(True, readonly= False)
2829              for grandchild in child.childNodes._list[:]:
2830                if grandchild.nodeType not in self._childTypes:
2831                  config._handleError(InvalidEntityForAttrErr(child, False))
2832                else:
2833                  baseURI= grandchild.baseURI
2834                  self.insertBefore(grandchild, child)
2835                  if config.getParameter('pxdom-preserve-base-uri'):
2836                    if baseURI!=grandchild.baseURI:
2837                      if grandchild.nodeType==Node.ELEMENT_NODE:
2838                        baseAttr= self._ownerDocument.createAttributeNS(
2839                          XMNS, 'xml:base'
2840                        )
2841                        baseAttr.value= baseURI
2842                        specified= grandchild.hasAttributeNS(XMNS, 'xml:base')
2843                        grandchild.setAttributeNodeNS(baseAttr)
2844                        baseAttr._specified= specified
2845                      else:
2846                        config._handleError(
2847                          PIBaseURILostErr(grandchild, False)
2848                        )
2849              self.removeChild(child)
2850              break
2851        else:
2852          break
2853
2854  # Main loop. Begin by normalising the children themselves
2855  #
2856  for child in self._childNodes._list[:]:
2857    child._normalize(config)
2858
2859    # Remove comments if unwanted
2860    #
2861    if child.nodeType==Node.COMMENT_NODE and not p('comments'):
2862      self.removeChild(child)
2863      continue
2864
2865    # If unwanted, change CDATA sections to text nodes
2866    #
2867    if child.nodeType==Node.CDATA_SECTION_NODE and not p('cdata-sections'):
2868      newChild= self.ownerDocument.createTextNode(child.data)
2869      self.replaceChild(newChild, child)
2870      child= newChild
2871
2872    # Concatenate adjacent text nodes, remove ignorable whitespace
2873    #
2874    if child.nodeType==Node.TEXT_NODE:
2875      if (
2876        p('pxdom-normalize-text') and child.data=='' or not
2877        p('element-content-whitespace') and child.isElementContentWhitespace
2878      ):
2879        self.removeChild(child)
2880        continue
2881      elif p('pxdom-normalize-text'):
2882        previous= child.previousSibling
2883        if previous is not None and previous.nodeType==Node.TEXT_NODE:
2884          previous.data= config._cnorm(previous.data+child.data, child)
2885          self.removeChild(child)
2886
2887    # Split CDATA sections including string ']]>'
2888    #
2889    if (
2890      child.nodeType==Node.CDATA_SECTION_NODE
2891      and p('pxdom-examine-cdata-sections')
2892      and string.find(child.data, ']]>')!=-1
2893    ):
2894      if not config.getParameter('split-cdata-sections'):
2895        config._handleError(WfInvalidCharacterErr(child))
2896      else:
2897        datas= string.split(child.data, ']]>')
2898        child.data= datas[0]+']]'
2899        refChild= child.nextSibling
2900        for data in datas[1:-1]:
2901          newChild= self._ownerDocument.createCDATASection('>'+data+']]')
2902          self.insertBefore(newChild, refChild)
2903        newChild= self._ownerDocument.createCDATASection('>'+datas[-1])
2904        self.insertBefore(newChild, refChild)
2905        config._handleError(CdataSectionsSplittedErr(child))
2906
2907
2908  # Some forms of normalisation might require NodeListByTagNames recalculated.
2909  # Don't bother bounce up to parents as with the normal _changed() method, as
2910  # they will already know about the normalization, but make sure our own
2911  # change count is updated.
2912  #
2913  self._sequence= self._sequence+1
2914
2915
2916def _NamedNode___normalize(self, config):
2917  """ Normalisations required by nodes with name values. Additionally to
2918      general node normalisations, character-normalise the node name. This
2919      could theoretically lead to two nodes of the same name in a
2920      NamedNodeMap; the DOM spec doesn't seem to say what to do in this
2921      situation, so for the moment we let it be.
2922  """
2923  self._nodeName= config._cnorm(self._nodeName, self)
2924  Node._normalize(self, config)
2925
2926
2927def _NamedNodeNS___normalize(self, config):
2928  """ Additional normalisations required by namespace-aware nodes.
2929  """
2930  # Character-normalise name parts.
2931  #
2932  self._localName= config._cnorm(self._localName, self)
2933  if self._prefix is not None:
2934    self._prefix= config._cnorm(self._prefix, self)
2935
2936  # Generate a warning (but with ERROR severity due to spec) if Level 1 nodes
2937  # are encountered.
2938  #
2939  if config.getParameter('namespaces') and self._namespaceURI is NONS:
2940    config._handleError(Level1NodeErr(self))
2941
2942  Node._normalize(self, config)
2943
2944  # If we're in an entity reference and have a null namespace (that might be
2945  # unbound) see if we've inherited an in-scope namespace from outside that
2946  # might bind it up
2947  #
2948  if config.getParameter('pxdom-fix-unbound-namespaces'):
2949    if self._namespaceURI is None and self._containerNode is not None:
2950      self._namespaceURI= self._containerNode._getNamespaces(
2951        {}
2952      ).get(self._prefix, None)
2953
2954
2955def _Element___normalize(self, config):
2956  """ Normalisations required by elements. Additionally to general named node
2957      normalisations, may need to add namespace declarations make it
2958      namespace-well-formed, and normalise or remove some attributes.
2959  """
2960  # Normalise element and each attribute name, reordered if in canonical-form
2961  # mode
2962  #
2963  NamedNodeNS._normalize(self, config)
2964  for attr in self._attributes:
2965    attr._normalize(config)
2966  if config.getParameter('canonical-form'):
2967    self._attributes._list.sort(_canonicalAttrSort)
2968
2969  # Fix element, attributes namespaces in place
2970  #
2971  if config.getParameter('namespaces'):
2972    create, reprefix= self._getFixups(
2973      self._getNamespaces(FIXEDNS.copy(), ignoreSelf= True)
2974    )
2975    for prefix, namespaceURI in create:
2976      name= 'xmlns'
2977      if prefix is not None:
2978        name= name+':'+prefix
2979      self.setAttributeNS(NSNS, name, namespaceURI or '')
2980    for attr, prefix in reprefix:
2981      attr._prefix= prefix
2982
2983  # Remove any namespace declarations that are redundant in canonical-form
2984  # mode, or all of them if namespace-declarations is off
2985  #
2986  if config.getParameter('canonical-form'):
2987    nsframe= {}
2988    if self._containerNode is not None:
2989      nsframe= self._containerNode._getNamespaces({})
2990  for attr in self.attributes._list[:]:
2991    if attr.namespaceURI==NSNS:
2992      if not config.getParameter('namespace-declarations'):
2993        self.removeAttributeNode(attr)
2994      elif config.getParameter('canonical-form'):
2995        prefix= [attr.localName, None][attr.prefix is None]
2996        namespaceURI= nsframe.get(prefix, None) or ''
2997        if attr.value==namespaceURI:
2998          self.removeAttributeNode(attr)
2999
3000
3001def _Attr___normalize(self, config):
3002  """ Normalisation for attributes. User-determined isIDness is discarded.
3003  """
3004  NamedNodeNS._normalize(self, config)
3005  if config.getParameter('pxdom-reset-identity'):
3006    self._isId= False
3007
3008def _CharacterData___normalize(self, config):
3009  """ Normalisation for text-based nodes. Only need to normalise characters.
3010  """
3011  Node._normalize(self, config)
3012  self._data= config._cnorm(self._data, self)
3013
3014
3015def _Comment___normalize(self, config):
3016  """ Normalisations for comment nodes. Only need to check well-formedness.
3017  """
3018  CharacterData._normalize(self, config)
3019  if config.getParameter('well-formed') and (
3020    self._data[-1:]=='-' or string.find(self._data, '--')!=-1
3021  ):
3022    config._handleError(WfInvalidCharacterErr(self))
3023
3024
3025def _ProcessingInstruction___normalize(self, config):
3026  """ Normalisations for PI nodes. Only need to check well-formedness.
3027  """
3028  NamedNode._normalize(self, config)
3029  if config.getParameter('well-formed') and string.find(self._data, '?>')!=-1:
3030    config._handleError(WfInvalidCharacterErr(self))
3031
3032
3033def _EntityReference___normalize(self, config):
3034  """ Normalisations for entity references. Remove any child nodes and replace
3035      them with up-to-date replacement nodes from the doctype's entity list.
3036  """
3037  if config.getParameter('pxdom-update-entities'):
3038    self._readonly= False
3039
3040    while self._childNodes.length>0:
3041      self.removeChild(self._childNodes.item(0))
3042    if self._ownerDocument.doctype:
3043      entity=self._ownerDocument.doctype.entities.getNamedItem(self.nodeName)
3044      if entity is not None:
3045        for child in entity.childNodes:
3046          clone= child._recurse(True, clone= True, readonly= False)
3047          self.appendChild(clone)
3048
3049    bind= config.getParameter('pxdom-fix-unbound-namespaces')
3050    config.setParameter('pxdom-fix-unbound-namespaces', True)
3051    try:
3052      NamedNode._normalize(self, config)
3053    finally:
3054      config.setParameter('pxdom-fix-unbound-namespaces', bind)
3055    self._recurse(True, readonly= True)
3056
3057
3058# DOM 3 LS Load features
3059# ============================================================================
3060
3061def _DOMImplementation__createLSParser(
3062  self, mode= DOMImplementation.MODE_SYNCHRONOUS, schemaType= None
3063):
3064  if mode!=DOMImplementation.MODE_SYNCHRONOUS:
3065    raise NotSupportedErr(self, 'createLSParser.mode')
3066  if schemaType is not None and schemaType!=DTNS:
3067    raise NotSupportedErr(self, 'createLSParser.schemaType')
3068  return LSParser()
3069
3070def _DOMImplementation__createLSInput(self):
3071  return LSInput()
3072
3073
3074class LSInput(DOMObject):
3075  """ Abstraction of possible source of serialised XML data. Can have
3076      character or byte stream objects attached (in Python terms, objects
3077      having a read() method that returns Unicode or narrow strings,
3078      respectively), plain string input (either type) or a resolvable Id/URI
3079      to get data from.
3080  """
3081  def __init__(self):
3082    DOMObject.__init__(self)
3083    self._characterStream= None
3084    self._byteStream= None
3085    self._stringData= None
3086    self._systemId= None
3087    self._publicId= None
3088    self._baseURI= None
3089    self._encoding= None
3090    self._certifiedText= False
3091
3092  def _get_characterStream(self): return self._characterStream
3093  def _get_byteStream(self): return self._byteStream
3094  def _get_stringData(self): return self._stringData
3095  def _get_systemId(self): return self._systemId
3096  def _get_publicId(self): return self._publicId
3097  def _get_baseURI(self): return self._baseURI
3098  def _get_encoding(self): return self._encoding
3099  def _get_certifiedText(self): return self._certifiedText
3100
3101  def _set_characterStream(self, value):
3102    self._characterStream= value
3103  def _set_byteStream(self, value):
3104    self._byteStream= value
3105  def _set_stringData(self, value):
3106    self._stringData= value
3107  def _set_systemId(self, value):
3108    self._systemId= value
3109  def _set_publicId(self, value):
3110    self._publicId= value
3111  def _set_baseURI(self, value):
3112    self._baseURI= value
3113  def _set_encoding(self, value):
3114    self._encoding= value
3115  def _set_certifiedText(self, value):
3116    self._certifiedText= value
3117
3118
3119class InputBuffer:
3120  """ Wrapper for reading from an LSInput (or user object implementing this
3121      interface) or other resource with possible encoding change if an XML
3122      declaration is encountered.
3123  """
3124  def __init__(self, input, offset, config, isDocument):
3125    self.config= config
3126    charsetCertain= config.getParameter('charset-overrides-xml-encoding')
3127    checkMT= isDocument and config.getParameter('supported-media-types-only')
3128
3129    # URI of input source
3130    #
3131    self.uri= None
3132    if input.systemId is not None:
3133      self.uri= _encodeURI(input.systemId)
3134      if input.baseURI is not None:
3135        self.uri= urlparse.urljoin(input.baseURI, self.uri)
3136
3137    # Hold encoding currently in use, and bytes or chars read from the input
3138    # source. If both bytes and chars are non-None, we are uncertain that the
3139    # encoding will prove to be correct; an XML declaration could override it.
3140    #
3141    self.bytes= None
3142    self.encoding= None
3143    self.chars= None
3144
3145    # Whilst parsing, keep pointer into character data. Keep an offset into
3146    # data from uri so that we can know what the 'real' index was when dealing
3147    # with internal entity values. Store pointer to parent buffer as a hack
3148    # for parameter entity parsing.
3149    #
3150    self.offset= offset
3151    self.parent= None
3152    self.reset()
3153
3154    # Read data from the input source as characters or bytes. If we come out
3155    # of this with bytes and an encoding, that encoding's certainty is
3156    # dependent on the charset-overrides-xml-encoding parameter.
3157    #
3158    if input.characterStream is not None:
3159      self.chars= input.characterStream.read()
3160      if unicode is not None:
3161        self.encoding= 'utf-16'
3162      else:
3163        self.encoding= 'utf-8'
3164    elif input.byteStream is not None:
3165      self.bytes= input.byteStream.read()
3166    elif input.stringData not in (None, ''):
3167
3168      # Hack. Allow string data to be a blank string by hiding it in a tuple.
3169      #
3170      if isinstance(input.stringData, type(())):
3171        data= input.stringData[0]
3172      else:
3173        data= input.stringData
3174
3175      # Treat stringData as bytes if it's a narrow string, or chars in Unicode
3176      #
3177      if isinstance(data, Unicode):
3178        self.chars= data
3179        self.encoding= 'utf-16'
3180      else:
3181        self.bytes= data
3182        self.encoding= 'utf-8'
3183
3184    elif self.uri is not None:
3185      try:
3186        stream= urllib.urlopen(self.uri)
3187      except IOError, e:
3188        self.config._handleError(IOErrorErr(e))
3189      if checkMT:
3190        contentType= stream.info().type
3191        if contentType not in XMLTYPES and contentType[-4:]!='+xml':
3192          self.config._handleError(UnsupportedMediaTypeErr(None))
3193      self.encoding= stream.info().getparam('charset')
3194      self.bytes= stream.read()
3195      stream.close()
3196    else:
3197      self.config._handleError(NoInputErr(None))
3198
3199    # If we have bytes, attempt to convert them to characters. If we are
3200    # certain of the encoding, drop the original bytes on the floor.
3201    #
3202    if self.chars is None:
3203      certain= self.encoding is not None and charsetCertain
3204      if self.encoding is None:
3205        if self.bytes[:2] in ('\xff\xfe', '\xfe\xff'):
3206          self.encoding= 'utf-16'
3207        else:
3208          self.encoding= 'utf-8'
3209      self.decode(True)
3210      if certain:
3211        self.bytes= None
3212    else:
3213      self.decode(False)
3214
3215  def setEncoding(self, xmlEncoding= None):
3216    """ Finished checking for encoding in possible XML declaration. If we were
3217        uncertain about the character encoding to use before, update the chars
3218        from the bytes again with the new encoding.
3219    """
3220    if self.bytes is not None:
3221      if xmlEncoding is not None and xmlEncoding!=self.encoding:
3222        self.encoding= xmlEncoding
3223        self.decode(True)
3224      self.bytes= None
3225    for ch in NOTCHAR:
3226      if ch in self.chars:
3227        self.index= string.find(self.chars, ch)
3228        self.config._handleError(ParseErr(self,'Invalid chr '+hex(ord(ch))))
3229    if isinstance(self.chars, Unicode):
3230      for ch in NOTCHARU:
3231        if ch in self.chars:
3232          self.index= string.find(self.chars, ch)
3233          self.config._handleError(ParseErr(self,'Invalid chr '+hex(ord(ch))))
3234
3235  def decode(self, fromBytes= True):
3236    """ Take input from chars or bytes (decoding through encoding property),
3237        send result with normalised newlines and no BOM.
3238    """
3239    if fromBytes:
3240      if unicode is not None:
3241        try:
3242          codec= codecs.lookup(self.encoding)
3243        except LookupError:
3244          self.config._handleError(UnsupportedEncodingErr(None))
3245        if codec==codecs.lookup('utf-16'):
3246          if self.bytes[:2]=='\xff\xfe':
3247            self.encoding= 'utf-16le'
3248          elif self.bytes[:2]=='\xfe\xff':
3249            self.encoding= 'utf-16be'
3250        self.chars= unicode(self.bytes, self.encoding, 'replace')
3251      else:
3252        self.chars= self.bytes
3253    for ls in LS:
3254      self.chars= r(self.chars, ls, '\n')
3255    if isinstance(self.chars, Unicode):
3256      for ls in LSU:
3257        self.chars= r(self.chars, ls, '\n')
3258      if self.chars[:1]==unichr(0xFEFF):
3259        self.chars= self.chars[1:]
3260
3261  def getLocation(self):
3262    """ Return (line, column) position corresponding to the current index.
3263    """
3264    # Get (line, col) position relative to start of entity, caching the
3265    # calculated location for the given index to reduce the amount of
3266    # string to search through next time
3267    #
3268    line= string.count(self.chars, '\n', self.cIndex, self.index)
3269    if line==0:
3270      line= self.cLocation[0]
3271      col= self.cLocation[1]+self.index-self.cIndex
3272    else:
3273      line= self.cLocation[0]+line
3274      col= self.index-string.rfind(self.chars, '\n', self.cIndex,self.index)-1
3275    self.cLocation= (line, col)
3276    self.cIndex= self.index
3277
3278    # Return the relative-index added to the entity offset. (1-based)
3279    #
3280    if line==0:
3281      col= col+self.offset[1]
3282    else:
3283      col= col+1
3284    line= line+self.offset[0]
3285    return (line, col)
3286
3287  def reset(self):
3288    """ Set the index point back to the beginning of this buffer in order to
3289        allow it to be read again.
3290    """
3291    self.index= 0
3292    self.cIndex= 0
3293    self.cLocation= (0, 0)
3294
3295  def swallow(self):
3296    """ Throw away any previously-parsed part of this buffer.
3297    """
3298    if self.index!=0:
3299      self.offset= self.getLocation()
3300      self.chars= self.chars[self.index:]
3301      self.index= 0
3302
3303
3304# Convenience method for parsers to get an InputBuffer object for a resource
3305# with possible resourceResolver redirection.
3306#
3307def _DOMConfiguration___resolveResource(self, publicId, systemId, baseURI):
3308  if not self._parameters['pxdom-resolve-resources']:
3309    return None
3310  input= None
3311  if self._parameters['resource-resolver'] is not None:
3312    input= self._parameters['resource-resolver'].resolveResource(
3313      DTNS, None, publicId, systemId, baseURI
3314    )
3315  if input is None:
3316    input= LSInput()
3317    input.publicId= publicId
3318    input.systemId= systemId
3319    input.baseURI= baseURI
3320  return InputBuffer(input, (1, 1), self, False)
3321
3322
3323class NodeFilter(DOMObject):
3324  [
3325    SHOW_ELEMENT,SHOW_ATTRIBUTE,SHOW_TEXT,SHOW_CDATA_SECTION,
3326    SHOW_ENTITY_REFERENCE,SHOW_ENTITY,SHOW_PROCESSING_INSTRUCTION,SHOW_COMMENT,
3327    SHOW_DOCUMENT,SHOW_DOCUMENT_TYPE,SHOW_DOCUMENT_FRAGMENT,SHOW_NOTATION
3328  ]= map(lambda n: 2**n, range(1, 13))
3329  SHOW_ALL= 2**13-1;
3330  FILTER_RESULTS= [
3331    FILTER_ACCEPT,FILTER_REJECT,FILTER_SKIP,FILTER_INTERRUPT
3332  ]= range(1, 5)
3333  def __init__(whatToShow):
3334    DOMObject.__init__()
3335    self._whatToShow= whatToShow
3336  def _get_whatToShow(self):
3337    return self._whatToShow
3338  def _set_whatToShow(self, value):
3339    self._whatToShow= value
3340  def acceptNode(self, n):
3341    return NodeFilter.FILTER_ACCEPT
3342
3343def _acceptNode(filter, node, startElement= False):
3344  """ Convenience function to pass a node to a filter, if it exists and wants
3345      to see it, and return the result or the right default.
3346  """
3347  if filter is None or filter is False:
3348    return NodeFilter.FILTER_ACCEPT
3349  if node.nodeType>=32 or (filter.whatToShow & (1<<(node.nodeType-1)) == 0):
3350    return NodeFilter.FILTER_ACCEPT
3351  if startElement:
3352    accepted= filter.startElement(node)
3353  else:
3354    accepted= filter.acceptNode(node)
3355  if accepted not in NodeFilter.FILTER_RESULTS:
3356    raise ValueError('NodeFilter returned unknown result %r' % accepted)
3357  if accepted==NodeFilter.FILTER_INTERRUPT:
3358    raise LSFilterInterrupt()
3359  return accepted
3360
3361
3362class LSFilterInterrupt(Exception):
3363  """ Exception raised when an LSFilter has returned a FILTER_INTERRUPT,
3364      causing the process to stop and return to the caller.
3365  """
3366  pass
3367
3368
3369class LSParser(DOMObject):
3370  """ DOM Level 3 LS  XML parser.
3371  """
3372  [ACTION_APPEND_AS_CHILDREN,ACTION_REPLACE_CHILDREN,ACTION_INSERT_BEFORE,
3373  ACTION_INSERT_AFTER,ACTION_REPLACE
3374  ]= range(1, 6)
3375  _CHARCHUNK= 1024
3376  def __init__(self, config= None):
3377    DOMObject.__init__(self)
3378    if config is None:
3379      config= ParserConfiguration()
3380    self._domConfig= config
3381    self._filter= None
3382  def _get_domConfig(self):
3383    return self._domConfig
3384  def _get_filter(self):
3385    return self._filter
3386  def _set_filter(self, value):
3387    self._filter= value
3388  def _get_async(self):
3389    return False
3390  def _get_busy(self):
3391    return False
3392  def abort(self):
3393    pass
3394
3395  # Standard public parse interfaces
3396  #
3397  def parse(self, input):
3398    """ Parse complete document from an InputSource.
3399    """
3400    document= Document()
3401    self.pxdomParseBefore(input, document, None)
3402    return document
3403
3404  def parseURI(self, uri):
3405    """ Parse complete document from a URI.
3406    """
3407    input= LSInput()
3408    input.systemId= uri
3409    document= Document()
3410    self.pxdomParseBefore(input, document, None)
3411    return document
3412   
3413  def parseWithContext(self, input, contextArg, action):
3414    """ Parse a fragment of document (pxdom interprets this as being the
3415        same as an external parsed entity) into a point described by a node
3416        and relationship.
3417    """
3418    # Find the node that will contain the new content, either the contextArg
3419    # or, for certain actions, its parent. Check it can receive content.
3420    #
3421    pnode= [contextArg.parentNode, contextArg][action in (
3422      LSParser.ACTION_APPEND_AS_CHILDREN, LSParser.ACTION_REPLACE_CHILDREN
3423    )]
3424    if pnode is None or pnode.nodeType not in (
3425      Node.DOCUMENT_NODE, Node.ELEMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE
3426    ):
3427      raise NotSupportedErr([pnode,contextArg][pnode is None], 'context')
3428
3429    # Determine where to put the newly-parsed nodes
3430    #
3431    if action==LSParser.ACTION_INSERT_BEFORE:
3432      parentNode= contextArg.parentNode
3433      nextSibling= contextArg
3434    elif action in [LSParser.ACTION_INSERT_AFTER, LSParser.ACTION_REPLACE]:
3435      parentNode= contextArg.parentNode
3436      nextSibling= contextArg.nextSibling
3437    elif action in [
3438      LSParser.ACTION_REPLACE_CHILDREN, LSParser.ACTION_APPEND_AS_CHILDREN
3439    ]:
3440      parentNode= contextArg
3441      nextSibling= None
3442
3443    if action==LSParser.ACTION_REPLACE:
3444      parentNode.removeChild(contextArg)
3445    elif action==LSParser.ACTION_REPLACE_CHILDREN:
3446      while contextArg.childNodes.length>0:
3447        contextArg.removeChild(contextArg.childNodes.item(0))
3448
3449    if nextSibling is None:
3450      previousSibling= parentNode.lastChild
3451    else:
3452      previousSibling= nextSibling.previousSibling
3453
3454    # Mysteriously, according to spec, whitespace removal shouldn't work in
3455    # parseWithContext.
3456    #
3457    ws= self._domConfig.getParameter('element-content-whitespace')
3458    self._domConfig.setParameter('element-content-whitespace', True)
3459    try:
3460      self.pxdomParseBefore(input, parentNode, nextSibling)
3461    finally:
3462      self._domConfig.setParameter('element-content-whitespace', ws)
3463
3464    # Return the first generated node (if there was one)
3465    #
3466    if previousSibling is None:
3467      refChild= parentNode.firstChild
3468    else:
3469      refChild= previousSibling.nextSibling
3470    if refChild not in [None, nextSibling]:
3471      return refChild
3472    return None
3473
3474
3475  def pxdomParseBefore(self, input, parentNode, refChild):
3476    """Main backend parsing entry point
3477
3478    Allows content to be parsed into a node specified in the same way as with
3479    node.insertBefore. (A slightly saner interface than the parseWithContext
3480    call uses.)
3481    """
3482    p= self._domConfig.getParameter
3483
3484    # Entity state: lookups for parameter and general entities, pointing to
3485    # InputBuffers for each; list of entity nesting depth to detect circular
3486    # entity definitions.
3487    #
3488    self._parameterEntities= self._generalEntities= {}
3489    self._entityNest= []
3490    self._dofilter= True
3491
3492    # If the input source is certified, ignore normalisation options
3493    #
3494    if input.certifiedText:
3495      nc= p('normalize-characters')
3496      ccn= p('check-character-normalization')
3497      self._domConfig.setParameter('normalize-characters', False)
3498      self._domConfig.setParameter('check-character-normalization', False)
3499    try:
3500
3501      # Dispatch into internal node parsing interfaces
3502      #
3503      namespaces= parentNode._getNamespaces(FIXEDNS.copy())
3504      self._queue= ''
3505      try:
3506        self._buffer= InputBuffer(input, (1, 1), self._domConfig, True)
3507        self._inEntity= False
3508        self._Declaration(parentNode)
3509        self._Content(parentNode, refChild, namespaces)
3510        self._end()
3511
3512      except LSFilterInterrupt:
3513        pass
3514    finally:
3515      self._buffer= None
3516      del self._parameterEntities
3517      del self._generalEntities
3518      del self._entityNest
3519      if input.certifiedText:
3520        self._domConfig.setParameter('normalize-characters', nc)
3521        self._domConfig.setParameter('check-character-normalization', ccn)
3522
3523
3524  # Parsing utility functions
3525  #
3526  def _push(self, text):
3527    self._queue= self._queue+text
3528
3529  def _flush(self, parentNode, refChild):
3530    """ Write any text that has been read and queued into a new Text node.
3531    """
3532    if self._queue=='':
3533      return None
3534    text= self._domConfig._cnorm(self._queue, parentNode, True)
3535    self._queue= ''
3536    node= parentNode._ownerDocument.createTextNode(text)
3537    node._setLocation(self._buffer.getLocation())
3538
3539    # If whitespace removal is required, must put the node in place to test
3540    # whether it is element content whitespace.
3541    #
3542    if not self._domConfig.getParameter('element-content-whitespace'):
3543      node._containerNode= parentNode
3544      if node._get_isElementContentWhitespace(self._domConfig):
3545        return
3546      node._containerNode= None
3547
3548    self._insert(node, parentNode, refChild)
3549
3550  def _insert(self, newNode, parentNode, refChild, preserve= False):
3551    """ Utility method to insert a node into a specific place in the document
3552        and then find out the filter's point of view if any, possibly removing
3553        or skipping it afterwards. If skipping, optionally try to preserve
3554        the base URI. If the node is already in the parent we assume it's in
3555        the right place and don't try to re-insert it, for performance.
3556    """
3557    if newNode._containerNode is not parentNode:
3558      parentNode.insertBefore(newNode, refChild)
3559    accepted= _acceptNode(self._dofilter and self._filter, newNode)
3560    if accepted==NodeFilter.FILTER_REJECT:
3561      parentNode.removeChild(newNode)
3562    elif accepted==NodeFilter.FILTER_SKIP:
3563      for grandchild in newNode.childNodes._list[:]:
3564        baseURI= grandchild.baseURI
3565        parentNode.insertBefore(grandchild, newNode)
3566        if grandchild.baseURI!=baseURI and preserve:
3567          if grandchild.nodeType==Node.ELEMENT_NODE:
3568            baseAttr= self._ownerDocument.createAttributeNS(XMNS, 'xml:base')
3569            baseAttr.value= baseURI
3570            specified= grandchild.hasAttributeNS(XMNS, 'xml:base')
3571            grandchild.setAttributeNodeNS(baseAttr)
3572            baseAttr._specified= specified
3573          else:
3574            self._domConfig._handleError(PIBaseURILostErr(grandchild, True))
3575      parentNode.removeChild(newNode)
3576
3577  def _error(self, message):
3578    self._domConfig._handleError(ParseErr(self._buffer, message))
3579
3580
3581  # Low-level parsing
3582  #
3583  def _match(self, chars, stepPast= True):
3584    """ Check if a string is the next thing in the queue. Optionally and by
3585        default step over it if it is.
3586    """
3587    index= self._buffer.index
3588    matches= self._buffer.chars[index:index+len(chars)]==chars
3589    if stepPast and matches:
3590      self._buffer.index= index+len(chars)
3591    return matches
3592
3593  def _upto(self, chars):
3594    """ Read text up until the next occurance of one of a range of characters
3595        or strings.
3596    """
3597    end= len(self._buffer.chars)
3598    for s in chars:
3599      index= string.find(self._buffer.chars, s, self._buffer.index, end)
3600      if index!=-1 and index<end:
3601        end= index
3602    try:
3603      return self._buffer.chars[self._buffer.index:end]
3604    finally:
3605      self._buffer.index= end
3606
3607  def _white(self, required= True):
3608    """ Parse white space.
3609    """
3610    start= self._buffer.index
3611    l= len(self._buffer.chars)
3612    while True:
3613      index= self._buffer.index
3614      if index>=l:
3615        break
3616      c= self._buffer.chars[index]
3617      if not (c in WHITE or isinstance(c, Unicode) and c in WHITEU):
3618        break
3619      self._buffer.index= index+1
3620    if required and index<=start:
3621      self._error('Expected whitespace')
3622
3623  def _quote(self):
3624    """ Parse and return a quote character.
3625    """
3626    for quote in '"\'':
3627      if self._match(quote):
3628        return quote
3629    self._error('Expected open-quote')
3630
3631  def _equal(self):
3632    """ Parse an equals sign with possible white space.
3633    """
3634    self._white(False)
3635    if not self._match('='):
3636      self._error('Expected equals sign')
3637    self._white(False)
3638
3639  def _literal(self):
3640    """ Parse and return a quoted literal value.
3641    """
3642    quote= self._quote()
3643    value= self._upto(quote)
3644    if not self._match(quote):
3645      self._error('Quoted literal left open')
3646    return self._domConfig._cnorm(value, None, True)
3647
3648  def _hex(self):
3649    """ Parse and return a hexadecimal number.
3650    """
3651    start= self._buffer.index
3652    l= len(self._buffer.chars)
3653    while True:
3654      index= self._buffer.index
3655      if index>=l or self._buffer.chars[index] not in HEX:
3656        break
3657      self._buffer.index= index+1
3658    if index==start:
3659      self._error('Expected hex number')
3660    return eval('0x'+str(self._buffer.chars[start:self._buffer.index]))
3661
3662  def _dec(self):
3663    """ Parse and return a decimal number.
3664    """
3665    start= self._buffer.index
3666    l= len(self._buffer.chars)
3667    while True:
3668      index= self._buffer.index
3669      if index>=l or self._buffer.chars[index] not in HEX:
3670        break
3671      self._buffer.index= index+1
3672    if index==start:
3673      self._error('Expected decimal number')
3674    return int(self._buffer.chars[start:self._buffer.index])
3675
3676  def _name(self):
3677    """ Parse and return an XML name.
3678    """
3679    index= self._buffer.index
3680    if index>=len(self._buffer.chars):
3681      self._error('Expected name')
3682    char= self._buffer.chars[index]
3683    if char in NOTFIRST:
3684      self._error('Expected name')
3685    if isinstance(char, Unicode):
3686      for c0, c1 in NOTFIRSTU:
3687        if ord(char)>=c0 and ord(char)<c1:
3688          self._error('Expected name')
3689    return self._nmtokens()
3690  def _nmtokens(self):
3691    start= self._buffer.index
3692    l= len(self._buffer.chars)
3693    while True:
3694      index= self._buffer.index
3695      if index>=l:
3696        break
3697      char= self._buffer.chars[index]
3698      if char in NOTNAME or char in NOTCHAR:
3699        break
3700      if isinstance(char, Unicode):
3701        if char in NOTCHARU:
3702          break
3703        bad= False
3704        for c0, c1 in NOTNAMEU:
3705          if ord(char)>=c0 and ord(char)<c1:
3706            bad= True
3707        if bad:
3708          break
3709      self._buffer.index= index+1
3710    if index==start:
3711      self._error('Expected name tokens')
3712    return self._domConfig._cnorm(self._buffer.chars[start:index], None, True)
3713
3714  def _end(self):
3715    """ Check there is no more input to come.
3716    """
3717    if self._buffer.index<len(self._buffer.chars):
3718      self._error('Expected end of input')
3719
3720
3721  # Main structure-parsing methods
3722  #
3723  def _Declaration(self, parentNode):
3724    """ Parse the XML/text declaration, if present.
3725    """
3726    xmlVersion= None
3727    xmlEncoding= None
3728    xmlStandalone= None
3729
3730    if self._match('<?xml'):
3731      self._white()
3732      if not self._match('version'):
3733        self._error('Expected version declaration')
3734      self._equal()
3735      xmlVersion= self._literal()
3736      self._white(False)
3737      if self._match('encoding'):
3738        self._equal()
3739        xmlEncoding= self._literal()
3740        self._white(False)
3741      if self._match('standalone'):
3742        self._equal()
3743        standalone= self._literal()
3744        if standalone not in ('no', 'yes'):
3745          self._error('Expected yes or no')
3746        xmlStandalone= (standalone=='yes')
3747        self._white(False)
3748      if not self._match('?>'):
3749        self._error('Expected ?> to close XML/text declaration')
3750
3751    # Let the buffer know we are now sure about the encoding. This might
3752    # change the encoding being used to read the file.
3753    #
3754    self._buffer.setEncoding(xmlEncoding)
3755
3756    # If the parentNode is a document or external parsed entity, can record
3757    # the above details.
3758    #
3759    if parentNode is not None:
3760      if parentNode.nodeType in (Node.DOCUMENT_NODE, Node.ENTITY_NODE):
3761        parentNode._xmlVersion= xmlVersion or '1.0'
3762        parentNode._xmlEncoding= xmlEncoding
3763        parentNode._inputEncoding= self._buffer.encoding
3764        parentNode._documentURI= self._buffer.uri
3765      if parentNode.nodeType==Node.DOCUMENT_NODE:
3766        parentNode._xmlStandalone= xmlStandalone
3767
3768
3769  def _Content(self, parentNode, refChild, namespaces, inheritURI= None, flush= True):
3770    """Parse general content
3771
3772    Optionally, fix up base URI (for when entity references are off). If flush
3773    is set false, potentially leave trailing text content on the queue for
3774    later possible concatenation.
3775    """
3776    # Context stack. Have to remember namespace setups, baseURI and insertion
3777    # point for each level of element nesting encountered. (Insertion points
3778    # can't be implied by tree traversal because filters can choose to discard
3779    # or reparent elements on the fly.)
3780    #
3781    stack= [(None, NodeFilter.FILTER_ACCEPT, parentNode, refChild, namespaces, inheritURI)]
3782    origfilter= self.filter
3783
3784    while True:
3785      etagname, filtering, parentNode, refChild, namespaces, inheritURI= stack[-1]
3786      isDoc= parentNode.nodeType==Node.DOCUMENT_NODE
3787
3788      # Get text up until next markup character and push it onto the text
3789      # queue
3790      #
3791      text= self._upto('<&')
3792      if text!='':
3793        if isDoc:
3794          for c in text:
3795            if not (c in WHITE or isinstance(c, Unicode) and c in WHITEU):
3796              self._error('Text not allowed at document level')
3797        else:
3798          self._push(text)
3799
3800      # Dispatch to character and entity reference handlers
3801      #
3802      elif self._match('&'):
3803        if isDoc:
3804          self._error('References are not allowed at document level')
3805        if self._match('#'):
3806          self._Charref(parentNode, refChild, namespaces)
3807        else:
3808          self._Entref(parentNode, refChild, namespaces)
3809
3810      # Dispatch to non-element node handlers
3811      #
3812      elif self._match('<'):
3813        if self._match('?'):
3814          self._PI(parentNode, refChild, namespaces, inheritURI)
3815        elif self._match('!'):
3816          if self._match('['):
3817            if self._match('CDATA['):
3818              if isDoc:
3819                self._error('CDATA not allowed at document level')
3820              self._CDATA(parentNode, refChild, namespaces)
3821            else:
3822              self._error('Expected \'CDATA[...]\'')
3823          elif self._match('DOCTYPE'):
3824            if (not isDoc or
3825              parentNode.documentElement is not None or
3826              parentNode._ownerDocument.doctype is not None
3827            ):
3828              self._error('Doctype in unexpected position')
3829            if self._domConfig.getParameter('disallow-doctype'):
3830              self._domConfig._handleError(DoctypeNotAllowedErr(None))
3831            self._Doctype(parentNode, refChild, namespaces)
3832          elif self._match('--'):
3833            self._Comment(parentNode, refChild, namespaces)
3834          else:
3835            self._error('Expected comment, doctype or CDATA')
3836
3837        # Start tag
3838        #
3839        elif not self._match('/'):
3840          if isDoc and parentNode.documentElement is not None:
3841            self._error('Only one root element is allowed')
3842          element, empty, newspaces, baseURI= self._Element(parentNode, refChild, namespaces, inheritURI)
3843
3844          # Check the filter's initial opinion of whether it wants the element.
3845          # (Always accept the document root element as per spec.) If a filter
3846          # rejects nodes it's possible that the parsed text nodes will be
3847          # non-normalised. There is no obvious way around this.
3848          #
3849          parentNode.insertBefore(element, refChild)
3850          if parentNode.nodeType==Node.DOCUMENT_NODE:
3851            accepted= NodeFilter.FILTER_ACCEPT
3852          else:
3853            accepted= _acceptNode(self._dofilter and self._filter, element, startElement= True)
3854
3855          if accepted in (NodeFilter.FILTER_SKIP, NodeFilter.FILTER_REJECT):
3856            parentNode.removeChild(element)
3857          if accepted in (NodeFilter.FILTER_ACCEPT, NodeFilter.FILTER_REJECT):
3858            parentNode, refChild= element, None
3859
3860          # Push state onto stack. Hack: if the filter has completely rejected
3861          # the element, we still have to go through the process of parsing it
3862          # all, but we can't let the filter know about any of it.
3863          #
3864          if not empty:
3865            stack.append((element.tagName, accepted, parentNode, refChild, newspaces, baseURI))
3866            if accepted==NodeFilter.FILTER_REJECT:
3867               self._dofilter= False
3868
3869        # End tag
3870        #
3871        else:
3872          name= self._name()
3873          if etagname is None:
3874            self._error('Unexpected %s end-tag' % name)
3875          if name!=etagname:
3876            self._error('Expected %s end-tag, got %s' % (etagname, name))
3877          self._white(False)
3878          if not self._match('>'):
3879            self._error('Expected close angle bracket')
3880          self._flush(parentNode, refChild)
3881          del stack[-1]
3882
3883          # Give filter a chance to reject the completed element (unless it's root)
3884          #
3885          if filtering==NodeFilter.FILTER_ACCEPT:
3886            element= parentNode
3887            etagname, filtering, parentNode, refChild, namespaces, inheritURI= stack[-1]
3888            if parentNode.nodeType==Node.DOCUMENT_NODE:
3889              parentNode.insertBefore(element, refChild)
3890            else:
3891              self._insert(element, parentNode, refChild, self._domConfig.getParameter('pxdom-preserve-base-uri'))
3892
3893          # Revert reject-filter-off hack
3894          #
3895          if filtering==NodeFilter.FILTER_REJECT:
3896            self._dofilter= False
3897
3898      else: # eof
3899        if len(stack)!=1:
3900          self._error('%r element left open' % parentNode.tagName)
3901        break
3902    if flush:
3903      self._flush(parentNode, refChild)
3904
3905
3906  def _Element(self, parentNode, refChild, namespaces, baseURI= None):
3907    """Parse element start-tag
3908    """
3909    self._flush(parentNode, refChild)
3910    doc= parentNode._ownerDocument
3911    newspaces= namespaces.copy()
3912    ns= self._domConfig.getParameter('namespaces')
3913
3914    # Create element. Check for any default attributes that might introduce
3915    # namespaces into scope.
3916    #
3917    element= doc.createElement(self._name())
3918    element._setLocation(self._buffer.getLocation())
3919    if ns:
3920      for attr in element.attributes:
3921        if attr.namespaceURI==NSNS:
3922          newspaces[
3923            [attr.localName, None][attr.prefix is None]
3924          ]= attr.value or None
3925
3926    # First pass (parse) over attributes.
3927    #
3928    empty= False
3929    while True:
3930      if self._match('>'):
3931        break
3932      if self._match('/>'):
3933        empty= True
3934        break
3935      self._white()
3936      if self._match('>'):
3937        break
3938      if self._match('/>'):
3939        empty= True
3940        break
3941
3942      name= self._name()
3943      attr= element.getAttributeNode(name)
3944      if attr is not None and attr.specified:
3945        self._error('Duplicate attribute %s' % name)
3946
3947      # Add attribute node with parsed value. Take note of added namespace
3948      # declarations for next pass.
3949      #
3950      attr= doc.createAttribute(name)
3951      attr._setLocation(self._buffer.getLocation())
3952      self._equal()
3953      self._Attr(attr, None, namespaces)
3954
3955      prefix, localName= _splitName(name)
3956      if ns and 'xmlns' in (name, prefix):
3957        newspaces[[localName, None][prefix is None]]= attr.value or None
3958      if not ns or self._domConfig.getParameter('namespace-declarations'):
3959        element.setAttributeNode(attr)
3960
3961      if attr.schemaTypeInfo.typeName=='ID':
3962        element.setIdAttributeNode(attr, True)
3963
3964    # If namespace parsing, use the new in-scope namespaces to work out the
3965    # namespaceURIs of the element and its attributes, converting them up to
3966    # level 2 nodes.
3967    #
3968    if ns:
3969      prefix, localName= _splitName(element.nodeName)
3970      if localName is None:
3971        self._error('Element %s not namespace-well-formed' % element.nodeName)
3972      element._prefix= prefix
3973      element._localName= localName
3974      if newspaces.has_key(prefix):
3975        element._namespaceURI= newspaces[prefix]
3976      else:
3977        element._namespaceURI= None
3978        if prefix is not None:
3979          self._domConfig._handleError(UnboundNSErr(element, self._inEntity))
3980
3981      for attr in element.attributes:
3982        prefix, localName= _splitName(attr.nodeName)
3983        if localName is None:
3984          self._error('Attr %s not namespace-well-formed' % attr.nodeName)
3985        attr._prefix= prefix
3986        attr._localName= localName
3987        if prefix is None and localName=='xmlns':
3988          attr._namespaceURI= NSNS
3989        elif prefix is None:
3990          attr._namespaceURI= None
3991        elif newspaces.has_key(prefix):
3992          attr._namespaceURI= newspaces[prefix]
3993        else:
3994          attr._namespaceURI= None
3995          self._domConfig._handleError(UnboundNSErr(element, self._inEntity))
3996
3997    # If we are inheriting a skipped baseURI and the element doesn't completely
3998    # override it with an absolute URI, fix it up
3999    #
4000    if baseURI is not None:
4001      if element.hasAttributeNS(XMNS, 'base'):
4002        baseURI= urlparse.urljoin(baseURI,element.getAttributeNS(XMNS,'base'))
4003      if element.baseURI!=baseURI:
4004        baseAttr=parentNode._ownerDocument.createAttributeNS(XMNS, 'xml:base')
4005        baseAttr.value= baseURI
4006        specified= element.hasAttributeNS(XMNS, 'xml:base')
4007        element.setAttributeNodeNS(baseAttr)
4008        baseAttr._specified= specified
4009
4010    return element, empty, newspaces, baseURI
4011
4012
4013  def _Attr(self, parentNode, refChild, namespaces):
4014    """ Parse quoted attribute value. Turn non-escaped whitespace characters
4015        into actual spaces as XML mysteriously requires.
4016    """
4017    # Attr children are never passed to filter.
4018    #
4019    filter= self._filter
4020    self._filter= None
4021    quote= self._quote()
4022
4023    while True:
4024      text= self._upto(quote+'<&')
4025      if text!='':
4026        for white in WHITE:
4027          text= r(text, white, ' ')
4028        if isinstance(text, Unicode):
4029          for white in WHITEU:
4030            text= r(text, white, ' ')
4031        self._push(text)
4032      if self._match('&'):
4033        if self._match('#'):
4034          self._Charref(parentNode, refChild, namespaces)
4035        else:
4036          self._Entref(parentNode, refChild, namespaces)
4037      elif self._match('<'):
4038        self._error('Expected close quote, found < in attribute value')
4039      else:
4040        break
4041    if not self._match(quote):
4042      self._error('Attr value left open, expected close quote')
4043    self._flush(parentNode, refChild)
4044    self._filter= filter
4045
4046
4047  def _Charref(self, parentNode, refChild, namespaces, textonly= False):
4048    """ Parse character references.
4049    """
4050    # Read character number from hex or decimal.
4051    #
4052    if self._match('x'):
4053      value= self._hex()
4054    else:
4055      value= self._dec()
4056    if not self._match(';'):
4057      self._error('Expected semicolon after character reference')
4058    if value in (0, 0xFFFE, 0xFFFF) or 0xD800<=value<0xE000:
4059      self._error('Invalid character referenced')
4060    elif parentNode.ownerDocument.xmlVersion=='1.0':
4061      if (value<256 and chr(value) in NOTCHAR) or (
4062        unicode is not None and unichr(value) in NOTCHARU
4063      ):
4064        self._error('Invalid character reference for XML 1.0 character model')
4065
4066    # On pre-Unicode Pythons, store non-ASCII character references as fake
4067    # unbound entity references, unless we're parsing an EntityValue, in which
4068    # case we can only pass through an escaped &#...; as a last attempt
4069    #
4070    if unicode is None:
4071      if value>=128:
4072        if textonly:
4073          self._push('&#%d;' % value)
4074        else:
4075          self._flush(parentNode, refChild)
4076          ent= EntityReference(parentNode._ownerDocument, 'x')
4077          ent._nodeName= '#x%x' % value
4078          ent._setLocation(self._buffer.getLocation())
4079          ent._recurse(True, readonly= True)
4080          self._insert(ent, parentNode, refChild)
4081
4082      # Otherwise add as text to the queue. On 'narrow' Python builds
4083      # character references outside the BMP will cause unichr to not work,
4084      # convert to two surrogate UTF-16 characters manually.
4085      #
4086      else:
4087        self._push(chr(value))
4088    else:
4089      try:
4090        unichr(value)
4091      except ValueError:
4092        self._push(unichr( 0xD800+((value-0x10000 >>10)&0x3FF) ))
4093        self._push(unichr( 0xDC00+((value-0x10000)&0x3FF) ))
4094      else:
4095        if unichr(value) in NOTCHARU:
4096          self._error('Invalid character referenced')
4097        if value>=0xD800 and value<0xE000:
4098          self._error('Invalid surrogate character reference')
4099        self._push(unichr(value))
4100
4101
4102  def _Entref(self, parentNode, refChild, namespaces):
4103    name= self._name()
4104    if not self._match(';'):
4105      self._error('Expected semicolon after entity reference')
4106
4107    # Replace built-in entity references with plain text
4108    #
4109    char= {'amp':'&','lt':'<','gt':'>','quot':'"','apos':"'"}.get(name, None)
4110    if char is not None:
4111      self._push(char)
4112      return
4113
4114    # Check for unparsed and circular entities
4115    #
4116    doctype= parentNode._ownerDocument.doctype
4117    if doctype is not None:
4118      ent= doctype.entities.getNamedItem(name)
4119      if ent is not None and ent.notationName is not None:
4120        self._error('Reference to unparsed entity')
4121    isCircular= name in self._entityNest
4122    self._entityNest.append(name)
4123    if isCircular:
4124      self._error('Circular entref: '+string.join(self._entityNest,'>'))
4125
4126    # Look for the InputBuffer for this general entity
4127    #
4128    buffer= None
4129    if not self._generalEntities.has_key(name):
4130      self._domConfig._handleError(UnboundEntityErr())
4131    else:
4132      buffer= self._generalEntities[name]
4133
4134    # If entities is on, create an EntityReference node and parse the
4135    # replacement text into it. If there is no replacement text available
4136    # create an empty EntityReference regardless of the state of entities.
4137    #
4138    if buffer is None or self._domConfig.getParameter('entities'):
4139      self._flush(parentNode, refChild)
4140      ent= EntityReference(parentNode.ownerDocument, name)
4141      if buffer is not None:
4142        parentNode.insertBefore(ent, refChild)
4143        oldbuffer= self._buffer
4144        self._buffer= buffer
4145        self._Content(ent, None, namespaces)
4146        self._buffer= oldbuffer
4147        buffer.reset()
4148      ent._recurse(True, readonly= True)
4149      self._insert(ent, parentNode, refChild,
4150        self._domConfig.getParameter('pxdom-preserve-base-uri')
4151      )
4152
4153    # If entities is off, parse the replacement text from the InputBuffer
4154    # directly into the current node
4155    #
4156    else:
4157      if self._domConfig.getParameter('pxdom-preserve-base-uri'):
4158        inheritURI= None
4159        if buffer.uri!=parentNode.baseURI:
4160          inheritURI= buffer.uri
4161        oldbuffer= self._buffer
4162        self._buffer= buffer
4163        self._Content(parentNode, refChild, namespaces, inheritURI, flush= False)
4164        self._buffer= oldbuffer
4165        buffer.reset()
4166
4167    del self._entityNest[-1]
4168
4169
4170  def _Comment(self, parentNode, refChild, namespaces):
4171    data= self._upto(['--'])
4172    if not self._match('-->'):
4173      self._error('Expected --> to close comment')
4174    if self._domConfig.getParameter('comments'):
4175      self._flush(parentNode, refChild)
4176      comment= parentNode._ownerDocument.createComment(data)
4177      comment._setLocation(self._buffer.getLocation())
4178      self._insert(comment, parentNode, refChild)
4179
4180
4181  def _PI(self, parentNode, refChild, namespaces, inheritURI= None):
4182    target= self._name()
4183    data= ''
4184    if not self._match('?>'):
4185      self._white()
4186      data= self._upto(['?>'])
4187      if not self._match('?>'):
4188        self._error('Expected ?> to close processing instruction')
4189    pi= parentNode._ownerDocument.createProcessingInstruction(target, data)
4190    pi._setLocation(self._buffer.getLocation())
4191    self._flush(parentNode, refChild)
4192    self._insert(pi, parentNode, refChild)
4193    if inheritURI is not None:
4194      self._domConfig._handleError(PIBaseURILostErr(pi, True))
4195
4196
4197  def _CDATA(self, parentNode, refChild, namespaces):
4198    data= self._upto([']]>'])
4199    if not self._match(']]>'):
4200      self._error('CDATA left open, expected ]]> to close')
4201    if not self._domConfig.getParameter('cdata-sections'):
4202      self._push(data)
4203    else:
4204      cdata= parentNode._ownerDocument.createCDATASection(data)
4205      cdata._setLocation(self._buffer.getLocation())
4206
4207      # Depending on configuration parameter, possibly throw away CDATA
4208      # sections in element content that contain only whitespace. It is
4209      # currently unclear from spec whether this is the right thing.
4210      #
4211      if not self._domConfig.getParameter('element-content-whitespace'):
4212        cdata._containerNode= parentNode
4213        if cdata._get_isElementContentWhitespace(self._domConfig):
4214          cdata= None
4215        else:
4216          cdata._containerNode= None
4217
4218      if cdata is not None:
4219        self._flush(parentNode, refChild)
4220        self._insert(cdata, parentNode, refChild)
4221
4222
4223  def _Doctype(self, parentNode, refChild, namespaces):
4224    self._white()
4225    name= self._name()
4226    if not self._match('>', False):
4227      self._white()
4228    publicId=systemId= None
4229    publicId, systemId= self._externalId(None)
4230
4231    # Create and insert doctype node. Make it temporarily not readonly.
4232    #
4233    if self._domConfig.getParameter('namespaces'):
4234        if _splitName(name)[1] is None:
4235            self._error('Doctype root element name not namespace-well-formed')
4236    doctype= DocumentType(None, name, publicId, systemId)
4237    parentNode.insertBefore(doctype, refChild)
4238
4239    # Parse internal subset if given
4240    #
4241    self._white(False)
4242    if self._match('['):
4243      start= self._buffer.index
4244      self._DTD(doctype, False)
4245      if start<self._buffer.index:
4246        doctype.internalSubset= self._buffer.chars[start:self._buffer.index]
4247      if not self._match(']'):
4248        self._error('Internal subset left open, expected ]')
4249      self._white(False)
4250    if not self._match('>'):
4251      self._error('Doctype left open, expected >')
4252
4253    # Resolve and parse external DTD subset
4254    #
4255    if systemId is not None:
4256      baseURI= parentNode.documentURI
4257      buffer= self._buffer
4258      self._buffer=self._domConfig._resolveResource(publicId,systemId,baseURI)
4259      if self._buffer is None:
4260        doctype._processed= False
4261      else:
4262        self._Declaration(None)
4263        self._DTD(doctype, True)
4264        self._end()
4265      self._buffer= buffer
4266
4267    # Fill in the children of available parsed general entities from the
4268    # replacement text in the InputBuffer we made at <!ENTITY> stage.
4269    #
4270    oldbuffer= self._buffer
4271    self._inEntity= True
4272    for ent in doctype.entities._list:
4273      if ent.notationName is None:
4274        buffer= self._generalEntities.get(ent.nodeName, None)
4275        if buffer is not None:
4276          self._buffer= buffer
4277          self._entityNest.append(ent.nodeName)
4278          self._Content(ent, None, namespaces)
4279          del self._entityNest[-1]
4280          ent._available= True
4281          buffer.reset()
4282          self._buffer= oldbuffer
4283    self._inEntity= False
4284
4285    # Finished, make doctype read-only as per DOM spec
4286    #
4287    doctype._recurse(True, readonly= True)
4288
4289
4290  # Parameter entity handling for DTD parsing.
4291  #
4292  def _checkPE(self, doctype, white= True, ignorePercent= False):
4293    """ Check whether the buffer contains a parameter entity reference, or
4294        whether the buffer is coming to an end inside a parameter entity. In
4295        either case return a different buffer object to the caller to carry on
4296        parsing. Optionally skip white spaces at the edges of references and
4297        replacements. Optionally allow and ignore a % followed by whitespace,
4298        for the construct <!ENTITY % ...> which is the only place this can
4299        occur.
4300    """
4301    while True:
4302      if white:
4303        self._white(False)
4304
4305      # Step into PE
4306      #
4307      if self._match('%'):
4308        if ignorePercent:
4309          index= self._buffer.index
4310          if self._buffer.chars[index:index+1] in WHITE+'%':
4311            self._buffer.index= index-1
4312            return
4313
4314        name= self._name()
4315        if not self._match(';'):
4316          self._error('Expected ; to end parameter reference')
4317        if doctype is not None and doctype._processed:
4318          if not self._parameterEntities.has_key(name):
4319            self._error(self._buffer, 'Undefined parameter entity referenced')
4320          par= self._parameterEntities[name]
4321          if par is None:
4322            doctype._processed= False
4323          else:
4324            if par.parent is not None:
4325              self._error('Circular reference in parameter '+name)
4326            par.parent= self._buffer
4327            self._buffer= par
4328        continue
4329
4330      # Step out of PE
4331      #
4332      l= len(self._buffer.chars)
4333      if self._buffer.index>=l and self._buffer.parent is not None:
4334        par= self._buffer.parent
4335        self._buffer.parent= None
4336        self._buffer.index= 0
4337        self._buffer= par
4338        continue
4339      break
4340
4341
4342  def _externalId(self, doctype, isNotation= False):
4343    """ Parse optional PUBLIC/SYSTEM ID as used by external entity/DTD subset.
4344        For notation declarations, allow a PUBLIC ID on its own.
4345    """
4346    if self._match('PUBLIC'):
4347      self._checkPE(doctype)
4348      publicId= self._literal()
4349      self._checkPE(doctype)
4350      systemId= None
4351      if not isNotation or not self._match('>', stepPast= False):
4352        systemId= self._literal()
4353      return (publicId, systemId)
4354    elif self._match('SYSTEM'):
4355      self._checkPE(doctype)
4356      return (None, self._literal())
4357    return (None, None)
4358
4359
4360  # DTD structure-parsing methods
4361  #
4362  def _DTD(self, doctype, external):
4363    """ Parse DTD declarations from internal or external subset or a DeclSep
4364        parameter entity reference.
4365    """
4366    while True:
4367      self._checkPE(doctype)
4368      if (
4369        self._buffer.index==len(self._buffer.chars) or
4370        self._match(']', stepPast=False)
4371      ):
4372        break
4373
4374      # Dispatch declarations to appropriate parsing method. Ignore PIs and
4375      # comments; they do not appear in the DOM as DocumentType never has
4376      # any children. For some reason.
4377      #
4378      if self._match('<'):
4379        if self._match('?'):
4380          self._upto(['?>'])
4381          if not self._match('?>'):
4382            self._error('Expected ?> to close PI')
4383          continue
4384
4385        if self._match('!'):
4386          if self._match('--'):
4387            self._upto(['--'])
4388            if not self._match('-->'):
4389              self._error('Expected --> to close comment')
4390            continue
4391
4392          if self._match('['):
4393            self._checkPE(doctype)
4394            if not external:
4395              self._error('Cannot use conditionals in doctype')
4396            if self._match('INCLUDE'):
4397              self._checkPE(doctype)
4398              if not self._match('['):
4399                self._error('Expected open square bracket')
4400              self._DTD(doctype, external)
4401              if not self._match(']]>'):
4402                self._error('Expected ]]> to close conditional')
4403              continue
4404            if self._match('IGNORE'):
4405              self._checkPE(doctype)
4406              if not self._match('['):
4407                self._error('Expected open square bracket')
4408              nest= 1
4409              while nest>0:
4410                self._upto([']]>', '<!['])
4411                if self._match(']]>'):
4412                  nest= nest-1
4413                elif self._match('<!['):
4414                  nest= nest+1
4415                else:
4416                  self._error('Expected ]]> to close conditional')
4417              continue
4418
4419          decl= None
4420          if self._match('NOTATION'):
4421            decl= self._NotationD
4422          elif self._match('ENTITY'):
4423            decl= self._EntityD
4424          elif self._match('ATTLIST'):
4425            decl= self._AttlistD
4426          elif self._match('ELEMENT'):
4427            decl= self._ElementD
4428          if decl is not None:
4429            decl(doctype)
4430            self._checkPE(doctype)
4431            if not self._match('>'):
4432              self._error('Expected close angle bracket')
4433            continue
4434      self._error('Expected markup declaration')
4435
4436
4437  def _NotationD(self, doctype):
4438    """ Parse notation declaration.
4439    """
4440    self._checkPE(doctype)
4441    name= self._name()
4442    self._checkPE(doctype)
4443    publicId, systemId= self._externalId(doctype, True)
4444    if doctype._processed and doctype._notations.getNamedItem(name) is None:
4445      doctype.notations.setNamedItem(Notation(
4446        doctype._ownerDocument, name, publicId, systemId, self._buffer.uri
4447      ))
4448
4449
4450  def _EntityD(self, doctype):
4451    """ Parse entity declaration.
4452    """
4453    self._checkPE(doctype, ignorePercent= True)
4454    isParameter= self._match('%')
4455    self._checkPE(doctype)
4456    name= self._name()
4457    self._checkPE(doctype)
4458    publicId, systemId= self._externalId(doctype)
4459    self._checkPE(doctype)
4460
4461    # Internal entities: read the literal entity value into a temporary input
4462    # buffer and parse only character references and parameter entity
4463    # references from it - *not* any other type of entity reference, even the
4464    # built-ins. The queued text from this parse will be the replacement text
4465    # to be stored in another InputBuffer for later use.
4466    #
4467    if systemId is None:
4468      quote= self._quote()
4469      location= self._buffer.getLocation()
4470      literal= self._upto(quote)
4471      if not self._match(quote):
4472        self._error('EntityValue left open')
4473      realbuf= self._buffer
4474
4475      input= LSInput()
4476      input.stringData= (literal,) # hack to allow empty string
4477      input.systemId= self._buffer.uri
4478      self._buffer= InputBuffer(input, location, self._domConfig, False)
4479
4480      while True:
4481        self._push(self._upto(('&#', '%')))
4482        if self._match('&#'):
4483          self._Charref(doctype, None, None, textonly= True)
4484        else:
4485          self._checkPE(doctype, white= False)
4486          if self._buffer.index==len(self._buffer.chars):
4487            break
4488
4489      replacement= self._queue
4490      self._queue= ''
4491      self._buffer=realbuf
4492
4493      input= LSInput()
4494      input.stringData= (replacement,)
4495      input.systemId= self._buffer.uri
4496      extbuf= InputBuffer(input, location, self._domConfig, False)
4497      entity= Entity(
4498        doctype._ownerDocument, name, None, None, None, self._buffer.uri
4499      )
4500
4501    # External entities: check for notation (which makes it an unparsed
4502    # entity) otherwise create LSInput representing external resource and
4503    # pass it through any LSResourceResolver in use, then make a buffer and
4504    # read then remove any text-declaration from it.
4505    #
4506    else:
4507      notation= None
4508      if not self._match('>', stepPast= False):
4509        self._checkPE(doctype)
4510      if self._match('NDATA'):
4511        self._checkPE(doctype)
4512        notation= self._name()
4513      entity= Entity(
4514        doctype._ownerDocument,name,
4515        publicId,systemId,notation,self._buffer.uri
4516      )
4517
4518      extbuf= None
4519      if notation is None:
4520        extbuf= self._domConfig._resolveResource(
4521          publicId, systemId, self._buffer.uri
4522        )
4523      if extbuf is not None:
4524        buffer= self._buffer
4525        self._buffer= extbuf
4526        self._Declaration(entity)
4527        self._buffer= buffer
4528        extbuf.swallow()
4529
4530    # Store the InputBuffer in one of the parser's maps (general or parameter
4531    # depending on type). For general entities store the Entity object in the
4532    # doctype NamedNodeMap, but do not parse it and fill in its children yet;
4533    # that doesn't happen until the doctype is complete.
4534    #
4535    if isParameter:
4536      if entity.notationName is not None:
4537        self._error('Parameter entities must be parsed entities')
4538      if doctype._processed and not self._parameterEntities.has_key(name):
4539        self._parameterEntities[name]= extbuf
4540    else:
4541      if doctype._processed and not self._generalEntities.has_key(name):
4542        doctype._entities.setNamedItem(entity)
4543        self._generalEntities[name]= extbuf
4544
4545
4546  def _AttlistD(self, doctype):
4547    """ Parse attribute list declaration.
4548    """
4549    # Get attlist object to write attributes to. Can re-use an existing one
4550    # to add attributes to a previously-declared attlist. If some declarations
4551    # have not been processed must ignore this, so write to a dummy attlist
4552    # we won't do anything with.
4553    #
4554    self._checkPE(doctype)
4555    name= self._name()
4556    if doctype._processed:
4557      attlist= doctype._attlists.getNamedItem(name)
4558      if attlist is None:
4559        attlist= AttributeListDeclaration(doctype._ownerDocument, name)
4560        doctype._attlists.setNamedItem(attlist)
4561    else:
4562      attlist= AttributeListDeclaration(doctype._ownerDocument, name)
4563
4564    # Loop over declared attributes
4565    #
4566    while True:
4567      self._checkPE(doctype)
4568      if self._match('>', stepPast= False):
4569        break
4570      name= self._name()
4571      self._checkPE(doctype)
4572      typeValues= None
4573
4574      # Look for known attribute value type names (CDATA etc.) but do it in
4575      # reverse order as a nasty hack to ensure 'NMTOKENS' is detected before
4576      # the substring 'NMTOKEN' (and similarly for ID[REF[S]]). If no name
4577      # found, must be an enum type.
4578      #
4579      for ix in range(len(AttributeDeclaration.ATTR_NAMES)-1, 0, -1):
4580        if self._match(AttributeDeclaration.ATTR_NAMES[ix]):
4581          attributeType= ix
4582          self._checkPE(doctype)
4583          break
4584      else:
4585        attributeType= AttributeDeclaration.ENUMERATION_ATTR
4586
4587      # For enumeration types, parse list of names. For notation enums, must
4588      # be proper names, not just nmtokens
4589      #
4590      if attributeType in (
4591        AttributeDeclaration.NOTATION_ATTR,
4592        AttributeDeclaration.ENUMERATION_ATTR
4593      ):
4594        if not self._match('('):
4595          self._error(self._buffer,'Expected open bracket to start values')
4596        typeValues= []
4597        while True:
4598          self._checkPE(doctype)
4599          typeValues.append([self._nmtokens, self._name][
4600            attributeType==AttributeDeclaration.NOTATION_ATTR
4601          ]())
4602          self._checkPE(doctype)
4603          if not self._match('|'):
4604            break
4605        if not self._match(')'):
4606          self._error('Expected close bracket to end values')
4607        self._checkPE(doctype)
4608
4609      # Read defaulting type.
4610      #
4611      if self._match('#REQUIRED'):
4612        defaultType= AttributeDeclaration.REQUIRED_VALUE
4613      elif self._match('#IMPLIED'):
4614        defaultType= AttributeDeclaration.IMPLIED_VALUE
4615      elif self._match('#FIXED'):
4616        defaultType= AttributeDeclaration.FIXED_VALUE
4617        self._checkPE(doctype)
4618      else:
4619        defaultType= AttributeDeclaration.DEFAULT_VALUE
4620
4621      # Create attribute declaration object. Add to attlist if not already
4622      # declared. For attributes with default values, parse the attribute
4623      # value into the childNodes.
4624      #
4625      attdef= AttributeDeclaration(
4626        doctype._ownerDocument, name, attributeType, typeValues, defaultType
4627      )
4628
4629      if attlist.declarations.getNamedItem(name) is None:
4630        attlist.declarations.setNamedItem(attdef)
4631      if defaultType in (
4632        AttributeDeclaration.FIXED_VALUE, AttributeDeclaration.DEFAULT_VALUE
4633      ):
4634        self._Attr(attdef, None, FIXEDNS.copy())
4635
4636
4637  def _ElementD(self, doctype):
4638    """ Parse element content declaration.
4639    """
4640    self._checkPE(doctype)
4641    name= self._name()
4642    self._checkPE(doctype)
4643    elements= None
4644    if self._match('EMPTY'):
4645      contentType= ElementDeclaration.EMPTY_CONTENT
4646    elif self._match('ANY'):
4647      contentType= ElementDeclaration.ANY_CONTENT
4648    else:
4649      if not self._match('('):
4650        self._error('Expected open bracket start content model')
4651      self._checkPE(doctype)
4652      if not self._match('#PCDATA'):
4653        contentType= ElementDeclaration.ELEMENT_CONTENT
4654        elements= self._ContentD(doctype)
4655
4656      else:
4657        contentType= ElementDeclaration.MIXED_CONTENT
4658        elements= ContentDeclaration()
4659        self._checkPE(doctype)
4660        while True:
4661          if not self._match('|'):
4662            break
4663          self._checkPE(doctype)
4664          elements._append(self._name())
4665          self._checkPE(doctype)
4666        if not self._match(')'):
4667          self._error('Expected close bracket end content model')
4668        if not self._match('*') and elements.length!=0:
4669          self._error('Expected asterisk ending mixed content')
4670
4671    if doctype._processed and doctype._elements.getNamedItem(name) is None:
4672      doctype._elements.setNamedItem(ElementDeclaration(
4673        doctype._ownerDocument, name, contentType,elements
4674      ))
4675
4676
4677  def _ContentD(self, doctype):
4678    """ Parse (recursively) the content model in an element declaration, minus
4679        the leading open bracket. Return a ContentDeclaration object.
4680    """
4681    elements= ContentDeclaration()
4682    elements.isSequence= None
4683
4684    while True:
4685      if self._match('('):
4686        self._checkPE(doctype)
4687        element= self._ContentD(doctype)
4688      else:
4689        element= self._name()
4690        element= self._SuffixD(element)
4691      elements._append(element)
4692
4693      self._checkPE(doctype)
4694      if self._match(')'):
4695        break
4696      if self._match('|'):
4697        sequence= False
4698      elif self._match(','):
4699        sequence= True
4700      else:
4701        self._error('Expected comma or pipe separator')
4702      if elements.isSequence not in (None, sequence):
4703        self._error('Cannot mix comma and pipe separators')
4704      elements.isSequence= sequence
4705      self._checkPE(doctype)
4706    if elements.isSequence is None:
4707      elements.isSequence= False
4708    return self._SuffixD(elements)
4709
4710  def _SuffixD(self, cp):
4711    """ Parse suffix that appears on content particles in element content
4712         declarations. Return altered version of cp.
4713    """
4714    isOptional= False
4715    isMultiple= False
4716    if self._match('*'):
4717      isOptional= True
4718      isMultiple= True
4719    elif self._match('+'):
4720      isMultiple= True
4721    elif self._match('?'):
4722      isOptional= True
4723    if not isinstance(cp, ContentDeclaration) and (isOptional or isMultiple):
4724      c= ContentDeclaration()
4725      c._append(cp)
4726      cp= c
4727    if isOptional:
4728      cp.isOptional= True
4729    if isMultiple:
4730      cp.isMultiple= True
4731    return cp
4732
4733
4734# Convenience parsing functions. The default parameters for these functions
4735# are slightly different than those of a standard LSParser, to emulate the
4736# minidom functions of the same name. Other DOMConfiguration parameters may be
4737# passed in an optional mapping.
4738#
4739def parse(fileorpath, parameters= {}):
4740  """ Get a Document object from a file.
4741  """
4742  parser= LSParser()
4743  parser.domConfig.setParameter('cdata-sections', True)
4744  parser.domConfig.setParameter('pxdom-resolve-resources', False)
4745  for (key, value) in parameters.items():
4746    parser.domConfig.setParameter(key, value)
4747  src= _implementation.createLSInput()
4748  if hasattr(fileorpath, 'read'):
4749    src.byteStream= fileorpath
4750  else:
4751    # pathname2url is inconsistent across platforms, Windows adds // to rooted
4752    # where Unix does not
4753    url= urllib.pathname2url(os.path.abspath(fileorpath))
4754    if url[:2]!='//':
4755      url= '//'+url
4756    src.systemId= 'file:'+url
4757  doc= parser.parse(src)
4758  return doc
4759
4760def parseString(content, parameters= {}):
4761  """ Get a Document object from a string.
4762  """
4763  parser= LSParser()
4764  parser.domConfig.setParameter('cdata-sections', True)
4765  parser.domConfig.setParameter('pxdom-resolve-resources', False)
4766  for (key, value) in parameters.items():
4767    parser.domConfig.setParameter(key, value)
4768  src= _implementation.createLSInput()
4769  src.stringData= content
4770  return parser.parse(src)
4771
4772
4773# DOM 3 LS Save features
4774# ============================================================================
4775
4776def _DOMImplementation__createLSOutput(self):
4777  return LSOutput()
4778def _DOMImplementation__createLSSerializer(self):
4779  return LSSerializer()
4780
4781# Markup content as a property, a convenience interface that was in the June
4782# WD as ElementLS.markupContent. It is no longer in the standard, but is
4783# included in pxdom for its convenience, extended to appear on all node types
4784# (though it is not always writable).
4785#
4786def _Node___get_pxdomContent(self):
4787  config= DOMConfiguration(self._ownerDocument.domConfig)
4788  s= LSSerializer(config)
4789  s.newLine= '\n'
4790  return s.writeToString(self)
4791
4792def _Node___set_pxdomContent(self, value):
4793  input= LSInput()
4794  input.stringData= value
4795  parser= LSParser(self._ownerDocument.domConfig)
4796  parser.parseWithContext(input, self, LSParser.ACTION_REPLACE)
4797
4798
4799class LSOutput(DOMObject):
4800  """ Abstraction for the output destination of an LSSerializer. As well as
4801      the standard-defined options, we use characterStream= True internally to
4802      mean 'return data as string'.
4803  """
4804  def __init__(self):
4805    DOMObject.__init__(self)
4806    self._characterStream= None
4807    self._byteStream= None
4808    self._systemId= None
4809    self._encoding= None
4810
4811  def _get_characterStream(self): return self._characterStream
4812  def _get_byteStream(self): return self._byteStream
4813  def _get_systemId(self): return self._systemId
4814  def _get_encoding(self): return self._encoding
4815
4816  def _set_characterStream(self, value): self._characterStream= value
4817  def _set_byteStream(self, value): self._byteStream= value
4818  def _set_systemId(self, value): self._systemId= value
4819  def _set_encoding(self, value): self._encoding= value
4820
4821
4822class OutputBuffer:
4823  def __init__(self, output, document):
4824    self._output= output
4825    self._buffer= StringIO.StringIO()
4826    self._separator= None
4827    if (
4828      output.characterStream is None and output.byteStream is None
4829      and output.systemId is None
4830    ):
4831      raise NoOutputErr()
4832
4833    # Work out which charsets to use (a) for detecting unencodable characters
4834    # and escaping them (and also putting in the XML declaration if there is
4835    # one) and (b) encoding the final output.
4836    #
4837    if output.characterStream is None:
4838      self.encoding=self.outputEncoding= (
4839        output.encoding or document.inputEncoding or document.xmlEncoding
4840        or 'utf-8'
4841      )
4842    else:
4843      if output.encoding is not None:
4844        self.encoding= output.encoding
4845      elif unicode is not None:
4846        self.encoding= 'utf-16'
4847      else:
4848        self.encoding= 'utf-8'
4849      self.outputEncoding= None
4850
4851    # Ignore endianness in the declared version of the encoding, and check it
4852    # actually exists.
4853    #
4854    if self.encoding is not None:
4855      if (
4856        string.lower(self.encoding)[:6] in ('utf-16', 'utf-32') and
4857        self.encoding[6:-2] in ('', '-', '_') and
4858        string.lower(self.encoding)[-2:] in ('le', 'be')
4859      ):
4860        self.encoding= self.encoding[:6]
4861      if unicode is not None:
4862        try:
4863          unicode('').encode(self.encoding)
4864        except LookupError:
4865          document.domConfig._handleError(UnsupportedEncodingErr())
4866
4867
4868  def flush(self):
4869    """ Finish output, sending buffer contents to the nominated destination
4870        (optionally encoding it). In the special case where characterStream
4871        was 'True' return the buffer as a string, else return a success flag,
4872        which is always True since we raise an exception when there is an
4873        fatal error and don't attempt to carry on.
4874    """
4875    data= self._buffer.getvalue()
4876    self._buffer= None
4877    bs, cs= self._output.byteStream, self._output.characterStream
4878    try:
4879
4880      # Unless outputting to byte-based destination with no outputEncoding,
4881      # try to coerce collected string to unicode. Leave the string narrow if
4882      # it contains characters than cannot be coerced into unicode.
4883      #
4884      if unicode is not None and not isinstance(data, Unicode) and not (
4885        cs is None and self.outputEncoding is None
4886      ):
4887        try:
4888          data= unicode(data, self.outputEncoding or 'us-ascii')
4889        except UnicodeError:
4890          pass
4891
4892      # If outputting character string or stream, return the probably-unicode
4893      # data
4894      #
4895      if cs is True:
4896        return data
4897      elif cs is not None:
4898        cs.write(data)
4899        return True
4900
4901      # If outputting to byte stream/URI, encode if necessary. May fail if
4902      # data still contains non-ascii byte character.
4903      #
4904      if unicode is not None and self.outputEncoding is not None:
4905        try:
4906          data= data.encode(self.outputEncoding)
4907        except UnicodeError:
4908          pass
4909
4910      if bs is True:
4911        return data
4912      if self._output.byteStream is not None:
4913        self._output.byteStream.write(data)
4914        return True
4915
4916      if self._output.systemId is not None:
4917        urlparts= urlparse.urlparse(self._output.systemId, 'file')
4918        scheme= string.lower(urlparts[0])
4919        if scheme=='file':
4920          stream= open(urllib.url2pathname(urlparts[2]), 'wb')
4921          stream.write(data)
4922          stream.close()
4923          return True
4924        elif scheme in ('http', 'https'):
4925          if scheme=='https':
4926            conn= httplib.HTTPSConnection(urlparts[1])
4927          else:
4928            conn= httplib.HTTPConnection(urlparts[1])
4929          conn.request('PUT', urlparts[2], data, {
4930            'Content-Type': 'text/xml', 'Content-Length': str(len(data))
4931          })
4932          response= conn.getresponse()
4933          conn.close()
4934          if not (response.status>=200 and response.status<300):
4935            raise IOErrorErr(IOError(
4936              'HTTP response %d %s' % (response.status, response.reason)
4937            ))
4938          return True
4939        else:
4940          raise IOErrorErr(
4941            ValueError('Can\'t write to URI type %s' % urlparts[0])
4942          )
4943    except IOError, e:
4944      raise IOErrorErr(e)
4945
4946  def setSeparator(self, separator):
4947    """ A separator can be set (or cleared by passing None) on the output,
4948        causing that string to be written the next time write() is called with
4949        a non-empty string. This is useful for eg. attributes, where the
4950        serializer won't know if a whitespace character is needed until the
4951        attribute markup arrives.
4952    """
4953    self._separator= separator
4954
4955  def write(self, data, escaper= None):
4956    """ Accumulate string parts, calling an escaper function back for any
4957        characters that cannot be output in the desired encoding. Note that
4958        even though we do an encode step here, it is only to detect tricky
4959        characters - it is the plain, unencoded versions that are sent to the
4960        output buffer, they will be encoded in the final output encoding in
4961        the flush() step.
4962    """
4963    if self._separator is not None:
4964      self._buffer.write(self._separator)
4965      self._separator= None
4966
4967    # Call the escaper for any restrictedChars in the string.
4968    #
4969    if escaper is not None:
4970      for ch in NOTCHAR:
4971        if ch in data:
4972          data= r(data, ch, escaper.escape(ord(ch)))
4973      if isinstance(data, Unicode):
4974        for ch in NOTCHARU:
4975          if ch in data:
4976            data= r(data, ch, escaper.escape(ord(ch)))
4977
4978    # Try to unicode-encode if we will need to and the result isn't going to
4979    # be a UTF encoding - by definition, all possible characters are encodable
4980    # in a UTF form.
4981    #
4982    if not isinstance(data,Unicode) or string.lower(self.encoding[:3])=='utf':
4983      self._buffer.write(data)
4984    else:
4985      chars= unicode(data)
4986
4987      # See if there are any characters that we can't encode in the string. If
4988      # not, just slap it into the buffer as-is, otherwise we'll need to
4989      # handle the string character-by-character, because up until Python 2.3
4990      # and UnicodeEncodeError it's impossible to tell where the error was.
4991      #
4992      try:
4993        chars.encode(self.encoding, 'strict')
4994      except UnicodeError:
4995
4996        # Iterate over characters. If Python is not compiled in wide-mode
4997        # (UTF-32), there may be surrogates in there; detect and deal with
4998        # two characters at a time in this case.
4999        #
5000        ix= 0
5001        while ix<len(chars):
5002          isSurrogate= 0xD800<=ord(chars[ix])<0xDC00 and (
5003            ix<len(chars)-1 and 0xDC00<=ord(chars[ix+1])<0xE000
5004          )
5005          try:
5006            chars[ix:ix+1+isSurrogate].encode(self.encoding, 'strict')
5007          except UnicodeError:
5008            if escaper is not None:
5009              if isSurrogate:
5010                c= 0x10000+(
5011                  (ord(chars[ix])&0x3FF)<<10)+(
5012                  ord(chars[ix+1])&0x3FF
5013                )
5014              else:
5015                c= ord(chars[ix])
5016              self._buffer.write(escaper.escape(c))
5017          else:
5018            self._buffer.write(chars[ix:ix+1+isSurrogate])
5019          ix= ix+1+isSurrogate
5020
5021      else:
5022        self._buffer.write(chars)
5023
5024
5025# OuputBuffer escapers
5026#
5027class _Complainer:
5028  """ Holds an escaper method for OutputBuffer that just raises a given kind
5029      of DOMError when called back.
5030  """
5031  def __init__(self, config, node, isName= False):
5032    if isName:
5033      self._exn= WfInvalidCharacterErr
5034    else:
5035      self._exn= InvalidCharacterInNodeNameErr
5036    self._node= node
5037    self._domConfig= config
5038  def escape(self, c):
5039    self._domConfig._handleError(self._exn(self._node))
5040    return ''
5041
5042class _Charreffer:
5043  """ Holds an escaper method that outputs a character reference, optionally
5044      in hex for canonical-form.
5045  """
5046  def __init__(self, hexref= False):
5047    self._hexref= hexref
5048  def escape(self, c):
5049    return ('&#%d;', '&#x%x;')[self._hexref] % c
5050
5051class _CdataSplitter:
5052  """ Holds an escaper method that outputs a CDATA-end-section then a charref,
5053      then re-opens CDATA, as long as the DOMConfiguration allows it. Config
5054      is only called back once per node, after that we null the reference. No
5055      hex option, as canonical-form allows no CDATASections.
5056  """
5057  def __init__(self, config, node):
5058    self._domConfig= config
5059    self._node= node
5060  def escape(self, c):
5061    config= self._domConfig
5062    if config is not None:
5063      if config.getParameter('split-cdata-sections'):
5064        config._handleError(CdataSectionsSplittedErr(self._node))
5065      else:
5066        config._handleError(WfInvalidCharacterErr(self._node))
5067      self._domConfig= None
5068    return ']]>&#%d;<![CDATA[' % c
5069
5070
5071class LSSerializer(DOMObject):
5072  def __init__(self, config= None):
5073    DOMObject.__init__(self)
5074    if config is None:
5075      config= DOMConfiguration()
5076      if CNORM:
5077        config.setParameter('normalize-characters', True)
5078    self._domConfig= config
5079    self._newLine= os.linesep
5080    self._filter= None
5081
5082  def _get_domConfig(self): return self._domConfig
5083  def _get_filter(self): return self._filter
5084  def _get_newLine(self): return self._newLine
5085
5086  def _set_filter(self, value): self._filter= value
5087  def _set_newLine(self, value):
5088    if value is None:
5089      self._newLine= os.linesep
5090    else:
5091      self._newLine= value
5092
5093  def write(self, node, destination):
5094    try:
5095      buffer= OutputBuffer(destination, node._ownerDocument)
5096    except DOMException, e:
5097      self._domConfig._handleError(e)
5098    if node.parentNode is not None:
5099      namespaces= node.parentNode._getNamespaces(FIXEDNS.copy())
5100    else:
5101      namespaces= FIXEDNS.copy()
5102    node._writeTo(
5103      buffer, self._domConfig, self._filter, self._newLine, namespaces
5104    )
5105    return buffer.flush()
5106
5107  def writeToString(self, node):
5108    destination= LSOutput()
5109    destination.characterStream= True
5110    return self.write(node, destination)
5111
5112  def writeToURI(self, node, uri):
5113    destination= LSOutput()
5114    destination.systemId= uri
5115    return self.write(node, destination)
5116
5117
5118def _Node___writeTo(self, dest, config, filter, newLine, namespaces):
5119  """ Markup production, for various node types. The default node behaviour is
5120      just to recurse to all children.
5121  """
5122  for child in self._childNodes:
5123    child._writeTo(dest, config, filter, newLine, namespaces)
5124
5125
5126def _Document___writeTo(self,dest,config,filter,newLine,namespaces):
5127  if config.getParameter('canonical-form') and self._xmlVersion=='1.1':
5128    config._handleError(CanonicalXmlErr(self))
5129
5130  # Output XML preamble
5131  #
5132  if config.getParameter('xml-declaration'):
5133    dest.write('<?xml version="')
5134    dest.write(self._xmlVersion or '1.0', _Complainer(config, self))
5135    if dest.encoding is not None:
5136      dest.write('" encoding="')
5137      dest.write(dest.encoding)
5138    if self._xmlStandalone:
5139      dest.write('" standalone="yes')
5140    dest.write('"?>'+newLine)
5141  elif (self._xmlVersion not in ('1.0', None, '') or self._xmlStandalone):
5142    config._handleError(XmlDeclarationNeededErr(self))
5143
5144  # Put a single newline between each document-level child, as there are no
5145  # whitespace nodes
5146  #
5147  for child in self._childNodes:
5148    child._writeTo(dest, config, filter, newLine, namespaces)
5149    dest.setSeparator(newLine)
5150
5151
5152def _Element___writeTo(self, dest, config, filter, newLine, namespaces):
5153  accepted= _acceptNode(filter, self)
5154  if accepted==NodeFilter.FILTER_SKIP:
5155    NamedNodeNS._writeTo(self, dest, config, filter, newLine, namespaces)
5156  if accepted!=NodeFilter.FILTER_ACCEPT:
5157    return
5158
5159  # Get list of attributes. If doing namespace fixup at output stage, update
5160  # the namespaces lookup table from namespace declaration attributes then
5161  # from fixups.
5162  #
5163  attrs= self._attributes._list[:]
5164  newspaces= namespaces.copy()
5165  reprefix= []
5166  if config.getParameter('namespaces'):
5167    for attr in attrs:
5168      if attr.namespaceURI==NSNS:
5169        prefix= [attr.localName, None][attr.prefix is None]
5170        newspaces[prefix]= attr.value or None
5171    create, reprefix= self._getFixups(newspaces)
5172
5173    for prefix, namespaceURI in create:
5174      name= 'xmlns'
5175      if prefix is not None:
5176        name= name+':'+prefix
5177      for attr in attrs:
5178        if attr.nodeName==name:
5179          attrs.remove(attr)
5180          break
5181      attr= self._ownerDocument.createAttributeNS(NSNS, name)
5182      attr.value= namespaceURI or ''
5183      attrs.append(attr)
5184      newspaces[prefix]= namespaceURI
5185
5186  # If outputting canonically, put the attribute list in order.
5187  #
5188  if config.getParameter('canonical-form'):
5189    attrs= attrs._list[:]
5190    attrs.sort(_canonicalAttrSort)
5191
5192  # Write beginning of start-tag.
5193  #
5194  escaper= _Complainer(config, self, True)
5195  dest.write('<')
5196  dest.write(config._cnorm(self.tagName, self), escaper)
5197  dest.setSeparator(' ')
5198
5199  # Write attributes. Where we remembered that a changed prefix would be
5200  # required, ask Attr._writeTo to override the actual prefix.
5201  #
5202  for attr in attrs:
5203    for pattr, prefix in reprefix:
5204      if attr is pattr:
5205        attr._writeTo(dest,config,filter,newLine,namespaces,prefix)
5206        break
5207    else:
5208      attr._writeTo(dest, config, filter, newLine, namespaces)
5209    dest.setSeparator(' ')
5210  dest.setSeparator(None)
5211
5212  if config.getParameter('canonical-form'):
5213    empty= False
5214  else:
5215    empty= self._childNodes.length==0
5216    if config.getParameter('pxdom-html-compatible'):
5217      empty= empty and (
5218        self.namespaceURI in (HTNS, None) and self.localName in HTMLEMPTY
5219      )
5220
5221  if empty:
5222    if config.getParameter('pxdom-html-compatible'):
5223      dest.write(' ')
5224    dest.write('/>')
5225  else:
5226    dest.write('>')
5227    if self._childNodes.length!=0:
5228
5229      # Write children, reformatting them in pretty-print mode
5230      #
5231      if not config.getParameter('format-pretty-print') or (
5232        self._childNodes.length==1 and
5233        self._childNodes[0].nodeType==Node.TEXT_NODE and
5234        '\n' not in self._childNodes[0].data
5235      ):
5236        NamedNodeNS._writeTo(
5237          self, dest, config, filter, newLine, newspaces
5238        )
5239      else:
5240        dest.write(newLine+'  ')
5241        NamedNodeNS._writeTo(
5242          self, dest, config, filter, newLine+'  ', newspaces
5243        )
5244        dest.write(newLine)
5245
5246    dest.write('</')
5247    dest.write(self.tagName, escaper)
5248    dest.write('>')
5249
5250
5251def _Attr___writeTo(
5252  self, dest, config, filter, newLine, namespaces, prefix= NONS
5253):
5254  # Apply LSSerializerFiltering to non-namespace-declaring attributes only
5255  #
5256  isNsDecl= self.namespaceURI==NSNS and config.getParameter('namespaces')
5257  if (isNsDecl and not config.getParameter('namespace-declarations')):
5258    return
5259  if not isNsDecl and _acceptNode(filter, self)!=NodeFilter.FILTER_ACCEPT:
5260    return
5261
5262  # Possibly discard default and redundant attributes depending on config
5263  #
5264  if not self._specified and config.getParameter('discard-default-content'):
5265    return
5266  if self.namespaceURI==NSNS and config.getParameter('canonical-form'):
5267    prefix= [self.localName, None][self.prefix is None]
5268    value= None
5269    if self._containerNode is not None:
5270      if self._containerNode.parentNode is not None:
5271        value= self._containerNode.parentNode._lookupNamespaceURI(prefix)
5272    if self.value==(value or ''):
5273      return
5274
5275  # Output attribute name, with possible overridden prefix
5276  #
5277  name= self.nodeName
5278  if prefix is not NONS:
5279    name= self.localName
5280    if prefix is not None:
5281      name= prefix+':'+name
5282  dest.write(config._cnorm(name, self),_Complainer(config, self, True))
5283
5284  # In canonical form mode, output actual attribute value (suitably encoded)
5285  # no entrefs
5286  #
5287  dest.write('="')
5288  if config.getParameter('canonical-form'):
5289    s= r(r(r(r(r(r(self.value, '&', '&amp;'), '<','&lt;'),'"','&quot;'),
5290      '\x0D','&#xD;'),'\n','&#xA'),'\t','&#x9;')
5291    if isinstance(m, Unicode):
5292      m= r(r(m, unichr(0x85), '&#x85;'), unichr(0x2028), unichr(0x2028))
5293    dest.write(s, _Charreffer(True))
5294
5295  # Otherwise, iterate into children, but replacing " marks. Don't filter
5296  # children.
5297  #
5298  else:
5299    for child in self._childNodes:
5300      child._writeTo(dest, config, None,'&#10;', namespaces, attr=True)
5301  dest.write('"')
5302
5303
5304def _Comment___writeTo(self,dest,config,filter,newLine,namespaces):
5305  if (not config.getParameter('comments') or
5306    _acceptNode(filter, self)!=NodeFilter.FILTER_ACCEPT
5307  ):
5308    return
5309  if self.data[-1:]=='-' or string.find(self.data, '--')!=-1:
5310    config._handleError(WfInvalidCharacterErr(self))
5311  dest.write('<!--')
5312  pretty= config.getParameter('format-pretty-print')
5313  if pretty and '\n' in string.strip(self.data):
5314    for line in string.split(self.data, '\n'):
5315      line= string.strip(line)
5316      if line!='':
5317        dest.write(newLine+'  ')
5318        dest.write(line, _Complainer(config, self))
5319    dest.write(newLine)
5320  else:
5321    dest.write(r(self.data, '\n', newLine), _Complainer(config, self))
5322  dest.write('-->')
5323
5324def _Text___writeTo(
5325  self, dest, config, filter, newLine, namespaces, attr= False
5326):
5327  if (
5328    not config.getParameter('element-content-whitespace')
5329    and self._get_isElementContentWhitespace(config)
5330  ) or _acceptNode(filter, self)!=NodeFilter.FILTER_ACCEPT:
5331    return
5332
5333  m= r(r(config._cnorm(self.data, self), '&', '&amp;'), '<', '&lt;')
5334  if config.getParameter('canonical-form'): # attr always false here
5335    dest.write(r(r(r(m, '>', '&gt;'), '\r', '&#xD;'), '\n', newLine),
5336      _Charreffer(True)
5337    )
5338  else:
5339    if attr:
5340      m= r(r(m, '"', '&quot;'), '\t', '&#9;')
5341    m= r(r(m, ']]>', ']]&gt;'), '\r', '&#13;')
5342    if isinstance(m, Unicode):
5343      m= r(r(m, unichr(0x85), '&#133;'), unichr(0x2028), '&#8232;')
5344    if config.getParameter('format-pretty-print'):
5345      m= string.join(map(string.strip, string.split(m, '\n')), newLine)
5346    else:
5347      m= r(m, '\n', newLine)
5348    dest.write(m, _Charreffer())
5349
5350def _CDATASection___writeTo(
5351  self, dest, config, filter, newLine, namespaces
5352):
5353  if not config.getParameter('cdata-sections'):
5354    return Text._writeTo(self,dest,config,filter,newLine,namespaces)
5355  if (
5356    not config.getParameter('element-content-whitespace')
5357    and self.isElementContentWhitespace(config)
5358  ) or _acceptNode(filter, self)!=NodeFilter.FILTER_ACCEPT:
5359    return
5360
5361  m= config._cnorm(self.data, self)
5362  escaper= _CdataSplitter(config, self)
5363  dest.write('<![CDATA[')
5364  if string.find(m, ']]>')!=-1 or string.find(m, '\r')!=-1:
5365    escaper.escape(32)
5366    dest.write(r(r(r(m,
5367      ']]>',']]]]><![CDATA[>'), '\r',']]>&#13;<![CDATA['), '\n', newLine),
5368      escaper
5369    )
5370  else:
5371    dest.write(r(m, '\n', newLine), escaper)
5372  dest.write(']]>')
5373
5374def _ProcessingInstruction___writeTo(
5375  self, dest, config, filter, newLine, namespaces
5376):
5377  if _acceptNode(filter, self)!=NodeFilter.FILTER_ACCEPT:
5378    return
5379  dest.write('<?')
5380  dest.write(self._nodeName, _Complainer(config, self, True))
5381  if self._data!='':
5382    dest.write(' ')
5383    if string.find(self._data, '?>')!=-1 or string.find(self._data, '\r')!=-1:
5384      config._handleError(WfInvalidCharacterErr(self))
5385    dest.write(r(config._cnorm(self._data, self), '\n', newLine),
5386      _Complainer(config, self)
5387    )
5388  dest.write('?>')
5389
5390def _EntityReference___writeTo(self,
5391  dest, config, filter, newLine, namespaces, attr= False
5392):
5393  # If entities parameter is false, skip all bound available entity references
5394  # otherwise pass to filter as normal
5395  #
5396  doctype= self._ownerDocument.doctype
5397  entity= None
5398  if doctype is not None:
5399    entity= doctype.entities.getNamedItem(self.nodeName)
5400  accepted= NodeFilter.FILTER_ACCEPT
5401  if not config.getParameter('entities'):
5402      if entity is not None and entity.pxdomAvailable:
5403        accepted= NodeFilter.FILTER_SKIP
5404  if accepted==NodeFilter.FILTER_ACCEPT:
5405    accepted= _acceptNode(filter, self)
5406
5407  if accepted==NodeFilter.FILTER_ACCEPT:
5408    dest.write('&')
5409    dest.write(config._cnorm(self._nodeName, self),
5410      _Complainer(config, self, True)
5411    )
5412    dest.write(';')
5413
5414  elif accepted==NodeFilter.FILTER_SKIP:
5415    for child in entity._childNodes:
5416      if attr:
5417        if child.nodeType not in Attr._childTypes:
5418          config._handleError(InvalidEntityForAttrErr(self))
5419        child._writeTo(dest, config, filter, newLine, namespaces, True)
5420      else:
5421        child._writeTo(dest, config, filter, newLine, namespaces)
5422
5423
5424def _DocumentType___writeTo(
5425  self, dest, config, filter, newLine, namespaces
5426):
5427  dest.write('<!DOCTYPE ')
5428  dest.write(
5429    config._cnorm(self._nodeName, self),
5430    _Complainer(config, self, True)
5431  )
5432  escaper= _Complainer(config, self)
5433  if self._publicId is not None:
5434    dest.write(' PUBLIC "')
5435    dest.write(config._cnorm(self._publicId, self), escaper)
5436    dest.write('"')
5437    if self._systemId is not None:
5438      dest.write(' "')
5439      dest.write(config._cnorm(self._systemId, self), escaper)
5440      dest.write('"')
5441  elif self._systemId is not None:
5442    dest.write(' SYSTEM "')
5443    dest.write(config._cnorm(self._systemId, self), escaper)
5444    dest.write('"')
5445  if self._internalSubset is not None:
5446    dest.write(' [')
5447    dest.write(config._cnorm(self._internalSubset, self), escaper)
5448    dest.write(']')
5449  dest.write('>')
5450
5451
5452# Exceptions
5453# ============================================================================
5454
5455class DOMException(Exception):
5456  """ The pxdom DOMException implements the interfaces DOMException, DOMError
5457      and LSException. There are _get methods, but the properties are read
5458      directly and aren't read-only, as Exception behaves oddly when its
5459      getter/setter is overridden.
5460  """
5461  [INDEX_SIZE_ERR,DOMSTRING_SIZE_ERR,HIERARCHY_REQUEST_ERR,WRONG_DOCUMENT_ERR,
5462  INVALID_CHARACTER_ERR,NO_DATA_ALLOWED_ERR,NO_MODIFICATION_ALLOWED_ERR,
5463  NOT_FOUND_ERR,NOT_SUPPORTED_ERR,INUSE_ATTRIBUTE_ERR,INVALID_STATE_ERR,
5464  SYNTAX_ERR,INVALID_MODIFICATION_ERR,NAMESPACE_ERR,INVALID_ACCESS_ERR,
5465  VALIDATION_ERR, TYPE_MISMATCH_ERR
5466  ]= range(1, 18)
5467  [PARSE_ERR, SERIALIZE_ERR
5468  ]= range(81, 83)
5469  [SEVERITY_WARNING,SEVERITY_ERROR,SEVERITY_FATAL_ERROR
5470  ]= range(1, 4)
5471  SEVERITY_NAMES= ('', 'Warning', 'Error', 'Fatal error')
5472
5473  code= 0
5474  type= 'pxdom-exception'
5475  severity= SEVERITY_FATAL_ERROR
5476  message= 'pxdom exception'
5477  relatedData= None
5478  location= None
5479
5480  def __init__(self, related= None):
5481    if related is not None:
5482      self.relatedData= related
5483      self.location= related.pxdomLocation
5484    self.relatedException= self
5485    self.message= '%s \'%s\'' %(self.SEVERITY_NAMES[self.severity], self.type)
5486  def __str__(self):
5487    return self.message
5488  def __repr__(self):
5489    return self.message
5490
5491  def _get_code(self):
5492    return self.code
5493  def _get_relatedData(self):
5494    return self.relatedData
5495  def _get_location(self):
5496    return self.location
5497
5498  def allowContinue(self, cont):
5499    if self.severity==DOMException.SEVERITY_WARNING:
5500      return [cont, True][cont is None]
5501    elif self.severity==DOMException.SEVERITY_ERROR:
5502      return [cont, False][cont is None]
5503    else:
5504      return False
5505
5506
5507# Traditional DOMExceptions
5508#
5509class IndexSizeErr(DOMException):
5510  code= DOMException.INDEX_SIZE_ERR
5511  def __init__(self, data, index):
5512    DOMException.__init__(self)
5513    self.message= 'index %s in data of length %s' % (index, len(data))
5514
5515class HierarchyRequestErr(DOMException):
5516  code= DOMException.HIERARCHY_REQUEST_ERR
5517  def __init__(self, child, parent):
5518    DOMException.__init__(self)
5519    if child.nodeType not in parent._childTypes:
5520      self.message= 'putting %s inside %s' % (
5521        child.__class__.__name__, parent.__class__.__name__
5522      )
5523    elif parent.nodeType==Node.DOCUMENT_NODE:
5524      self.message= 'putting extra %s in Document' % child.__class__.__name__
5525    else:
5526      self.message= 'putting %s inside itself' % parent.__class__.__name__
5527
5528class WrongDocumentErr(DOMException):
5529  code= DOMException.WRONG_DOCUMENT_ERR
5530  def __init__(self, child, document):
5531    DOMException.__init__(self)
5532    self.message= '%s from foreign Document' % child.__class__.__name__
5533
5534class InvalidCharacterErr(DOMException):
5535</