/
opt
/
cloudlinux
/
venv
/
lib64
/
python3.11
/
site-packages
/
lxml
/
Upload Filee
HOME
# Configurable Element class lookup ################################################################################ # Custom Element classes cdef public class ElementBase(_Element) [ type LxmlElementBaseType, object LxmlElementBase ]: u"""ElementBase(*children, attrib=None, nsmap=None, **_extra) The public Element class. All custom Element classes must inherit from this one. To create an Element, use the `Element()` factory. BIG FAT WARNING: Subclasses *must not* override __init__ or __new__ as it is absolutely undefined when these objects will be created or destroyed. All persistent state of Elements must be stored in the underlying XML. If you really need to initialize the object after creation, you can implement an ``_init(self)`` method that will be called directly after object creation. Subclasses of this class can be instantiated to create a new Element. By default, the tag name will be the class name and the namespace will be empty. You can modify this with the following class attributes: * TAG - the tag name, possibly containing a namespace in Clark notation * NAMESPACE - the default namespace URI, unless provided as part of the TAG attribute. * HTML - flag if the class is an HTML tag, as opposed to an XML tag. This only applies to un-namespaced tags and defaults to false (i.e. XML). * PARSER - the parser that provides the configuration for the newly created document. Providing an HTML parser here will default to creating an HTML element. In user code, the latter three are commonly inherited in class hierarchies that implement a common namespace. """ def __init__(self, *children, attrib=None, nsmap=None, **_extra): u"""ElementBase(*children, attrib=None, nsmap=None, **_extra) """ cdef bint is_html = 0 cdef _BaseParser parser cdef _Element last_child # don't use normal attribute access as it might be overridden _getattr = object.__getattribute__ try: namespace = _utf8(_getattr(self, 'NAMESPACE')) except AttributeError: namespace = None try: ns, tag = _getNsTag(_getattr(self, 'TAG')) if ns is not None: namespace = ns except AttributeError: tag = _utf8(_getattr(_getattr(self, '__class__'), '__name__')) if b'.' in tag: tag = tag.split(b'.')[-1] try: parser = _getattr(self, 'PARSER') except AttributeError: parser = None for child in children: if isinstance(child, _Element): parser = (<_Element>child)._doc._parser break if isinstance(parser, HTMLParser): is_html = 1 if namespace is None: try: is_html = _getattr(self, 'HTML') except AttributeError: pass _initNewElement(self, is_html, tag, namespace, parser, attrib, nsmap, _extra) last_child = None for child in children: if _isString(child): if last_child is None: _setNodeText(self._c_node, (_collectText(self._c_node.children) or '') + child) else: _setTailText(last_child._c_node, (_collectText(last_child._c_node.next) or '') + child) elif isinstance(child, _Element): last_child = child _appendChild(self, last_child) elif isinstance(child, type) and issubclass(child, ElementBase): last_child = child() _appendChild(self, last_child) else: raise TypeError, f"Invalid child type: {type(child)!r}" cdef class CommentBase(_Comment): u"""All custom Comment classes must inherit from this one. To create an XML Comment instance, use the ``Comment()`` factory. Subclasses *must not* override __init__ or __new__ as it is absolutely undefined when these objects will be created or destroyed. All persistent state of Comments must be stored in the underlying XML. If you really need to initialize the object after creation, you can implement an ``_init(self)`` method that will be called after object creation. """ def __init__(self, text): # copied from Comment() factory cdef _Document doc cdef xmlDoc* c_doc if text is None: text = b'' else: text = _utf8(text) c_doc = _newXMLDoc() doc = _documentFactory(c_doc, None) self._c_node = _createComment(c_doc, _xcstr(text)) if self._c_node is NULL: raise MemoryError() tree.xmlAddChild(<xmlNode*>c_doc, self._c_node) _registerProxy(self, doc, self._c_node) self._init() cdef class PIBase(_ProcessingInstruction): u"""All custom Processing Instruction classes must inherit from this one. To create an XML ProcessingInstruction instance, use the ``PI()`` factory. Subclasses *must not* override __init__ or __new__ as it is absolutely undefined when these objects will be created or destroyed. All persistent state of PIs must be stored in the underlying XML. If you really need to initialize the object after creation, you can implement an ``_init(self)`` method that will be called after object creation. """ def __init__(self, target, text=None): # copied from PI() factory cdef _Document doc cdef xmlDoc* c_doc target = _utf8(target) if text is None: text = b'' else: text = _utf8(text) c_doc = _newXMLDoc() doc = _documentFactory(c_doc, None) self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text)) if self._c_node is NULL: raise MemoryError() tree.xmlAddChild(<xmlNode*>c_doc, self._c_node) _registerProxy(self, doc, self._c_node) self._init() cdef class EntityBase(_Entity): u"""All custom Entity classes must inherit from this one. To create an XML Entity instance, use the ``Entity()`` factory. Subclasses *must not* override __init__ or __new__ as it is absolutely undefined when these objects will be created or destroyed. All persistent state of Entities must be stored in the underlying XML. If you really need to initialize the object after creation, you can implement an ``_init(self)`` method that will be called after object creation. """ def __init__(self, name): cdef _Document doc cdef xmlDoc* c_doc name_utf = _utf8(name) c_name = _xcstr(name_utf) if c_name[0] == c'#': if not _characterReferenceIsValid(c_name + 1): raise ValueError, f"Invalid character reference: '{name}'" elif not _xmlNameIsValid(c_name): raise ValueError, f"Invalid entity reference: '{name}'" c_doc = _newXMLDoc() doc = _documentFactory(c_doc, None) self._c_node = _createEntity(c_doc, c_name) if self._c_node is NULL: raise MemoryError() tree.xmlAddChild(<xmlNode*>c_doc, self._c_node) _registerProxy(self, doc, self._c_node) self._init() cdef int _validateNodeClass(xmlNode* c_node, cls) except -1: if c_node.type == tree.XML_ELEMENT_NODE: expected = ElementBase elif c_node.type == tree.XML_COMMENT_NODE: expected = CommentBase elif c_node.type == tree.XML_ENTITY_REF_NODE: expected = EntityBase elif c_node.type == tree.XML_PI_NODE: expected = PIBase else: assert False, f"Unknown node type: {c_node.type}" if not (isinstance(cls, type) and issubclass(cls, expected)): raise TypeError( f"result of class lookup must be subclass of {type(expected)}, got {type(cls)}") return 0 ################################################################################ # Element class lookup ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # class to store element class lookup functions cdef public class ElementClassLookup [ type LxmlElementClassLookupType, object LxmlElementClassLookup ]: u"""ElementClassLookup(self) Superclass of Element class lookups. """ cdef _element_class_lookup_function _lookup_function cdef public class FallbackElementClassLookup(ElementClassLookup) \ [ type LxmlFallbackElementClassLookupType, object LxmlFallbackElementClassLookup ]: u"""FallbackElementClassLookup(self, fallback=None) Superclass of Element class lookups with additional fallback. """ cdef readonly ElementClassLookup fallback cdef _element_class_lookup_function _fallback_function def __cinit__(self): # fall back to default lookup self._fallback_function = _lookupDefaultElementClass def __init__(self, ElementClassLookup fallback=None): if fallback is not None: self._setFallback(fallback) else: self._fallback_function = _lookupDefaultElementClass cdef void _setFallback(self, ElementClassLookup lookup): u"""Sets the fallback scheme for this lookup method. """ self.fallback = lookup self._fallback_function = lookup._lookup_function if self._fallback_function is NULL: self._fallback_function = _lookupDefaultElementClass def set_fallback(self, ElementClassLookup lookup not None): u"""set_fallback(self, lookup) Sets the fallback scheme for this lookup method. """ self._setFallback(lookup) cdef inline object _callLookupFallback(FallbackElementClassLookup lookup, _Document doc, xmlNode* c_node): return lookup._fallback_function(lookup.fallback, doc, c_node) ################################################################################ # default lookup scheme cdef class ElementDefaultClassLookup(ElementClassLookup): u"""ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None) Element class lookup scheme that always returns the default Element class. The keyword arguments ``element``, ``comment``, ``pi`` and ``entity`` accept the respective Element classes. """ cdef readonly object element_class cdef readonly object comment_class cdef readonly object pi_class cdef readonly object entity_class def __cinit__(self): self._lookup_function = _lookupDefaultElementClass def __init__(self, element=None, comment=None, pi=None, entity=None): if element is None: self.element_class = _Element elif issubclass(element, ElementBase): self.element_class = element else: raise TypeError, u"element class must be subclass of ElementBase" if comment is None: self.comment_class = _Comment elif issubclass(comment, CommentBase): self.comment_class = comment else: raise TypeError, u"comment class must be subclass of CommentBase" if entity is None: self.entity_class = _Entity elif issubclass(entity, EntityBase): self.entity_class = entity else: raise TypeError, u"Entity class must be subclass of EntityBase" if pi is None: self.pi_class = None # special case, see below elif issubclass(pi, PIBase): self.pi_class = pi else: raise TypeError, u"PI class must be subclass of PIBase" cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node): u"Trivial class lookup function that always returns the default class." if c_node.type == tree.XML_ELEMENT_NODE: if state is not None: return (<ElementDefaultClassLookup>state).element_class else: return _Element elif c_node.type == tree.XML_COMMENT_NODE: if state is not None: return (<ElementDefaultClassLookup>state).comment_class else: return _Comment elif c_node.type == tree.XML_ENTITY_REF_NODE: if state is not None: return (<ElementDefaultClassLookup>state).entity_class else: return _Entity elif c_node.type == tree.XML_PI_NODE: if state is None or (<ElementDefaultClassLookup>state).pi_class is None: # special case XSLT-PI if c_node.name is not NULL and c_node.content is not NULL: if tree.xmlStrcmp(c_node.name, <unsigned char*>"xml-stylesheet") == 0: if tree.xmlStrstr(c_node.content, <unsigned char*>"text/xsl") is not NULL or \ tree.xmlStrstr(c_node.content, <unsigned char*>"text/xml") is not NULL: return _XSLTProcessingInstruction return _ProcessingInstruction else: return (<ElementDefaultClassLookup>state).pi_class else: assert False, f"Unknown node type: {c_node.type}" ################################################################################ # attribute based lookup scheme cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): u"""AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None) Checks an attribute of an Element and looks up the value in a class dictionary. Arguments: - attribute name - '{ns}name' style string - class mapping - Python dict mapping attribute values to Element classes - fallback - optional fallback lookup mechanism A None key in the class mapping will be checked if the attribute is missing. """ cdef object _class_mapping cdef tuple _pytag cdef const_xmlChar* _c_ns cdef const_xmlChar* _c_name def __cinit__(self): self._lookup_function = _attribute_class_lookup def __init__(self, attribute_name, class_mapping, ElementClassLookup fallback=None): self._pytag = _getNsTag(attribute_name) ns, name = self._pytag if ns is None: self._c_ns = NULL else: self._c_ns = _xcstr(ns) self._c_name = _xcstr(name) self._class_mapping = dict(class_mapping) FallbackElementClassLookup.__init__(self, fallback) cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node): cdef AttributeBasedElementClassLookup lookup cdef python.PyObject* dict_result lookup = <AttributeBasedElementClassLookup>state if c_node.type == tree.XML_ELEMENT_NODE: value = _attributeValueFromNsName( c_node, lookup._c_ns, lookup._c_name) dict_result = python.PyDict_GetItem(lookup._class_mapping, value) if dict_result is not NULL: cls = <object>dict_result _validateNodeClass(c_node, cls) return cls return _callLookupFallback(lookup, doc, c_node) ################################################################################ # per-parser lookup scheme cdef class ParserBasedElementClassLookup(FallbackElementClassLookup): u"""ParserBasedElementClassLookup(self, fallback=None) Element class lookup based on the XML parser. """ def __cinit__(self): self._lookup_function = _parser_class_lookup cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node): if doc._parser._class_lookup is not None: return doc._parser._class_lookup._lookup_function( doc._parser._class_lookup, doc, c_node) return _callLookupFallback(<FallbackElementClassLookup>state, doc, c_node) ################################################################################ # custom class lookup based on node type, namespace, name cdef class CustomElementClassLookup(FallbackElementClassLookup): u"""CustomElementClassLookup(self, fallback=None) Element class lookup based on a subclass method. You can inherit from this class and override the method:: lookup(self, type, doc, namespace, name) to lookup the element class for a node. Arguments of the method: * type: one of 'element', 'comment', 'PI', 'entity' * doc: document that the node is in * namespace: namespace URI of the node (or None for comments/PIs/entities) * name: name of the element/entity, None for comments, target for PIs If you return None from this method, the fallback will be called. """ def __cinit__(self): self._lookup_function = _custom_class_lookup def lookup(self, type, doc, namespace, name): u"lookup(self, type, doc, namespace, name)" return None cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node): cdef CustomElementClassLookup lookup lookup = <CustomElementClassLookup>state if c_node.type == tree.XML_ELEMENT_NODE: element_type = u"element" elif c_node.type == tree.XML_COMMENT_NODE: element_type = u"comment" elif c_node.type == tree.XML_PI_NODE: element_type = u"PI" elif c_node.type == tree.XML_ENTITY_REF_NODE: element_type = u"entity" else: element_type = u"element" if c_node.name is NULL: name = None else: name = funicode(c_node.name) c_str = tree._getNs(c_node) ns = funicode(c_str) if c_str is not NULL else None cls = lookup.lookup(element_type, doc, ns, name) if cls is not None: _validateNodeClass(c_node, cls) return cls return _callLookupFallback(lookup, doc, c_node) ################################################################################ # read-only tree based class lookup cdef class PythonElementClassLookup(FallbackElementClassLookup): u"""PythonElementClassLookup(self, fallback=None) Element class lookup based on a subclass method. This class lookup scheme allows access to the entire XML tree in read-only mode. To use it, re-implement the ``lookup(self, doc, root)`` method in a subclass:: from lxml import etree, pyclasslookup class MyElementClass(etree.ElementBase): honkey = True class MyLookup(pyclasslookup.PythonElementClassLookup): def lookup(self, doc, root): if root.tag == "sometag": return MyElementClass else: for child in root: if child.tag == "someothertag": return MyElementClass # delegate to default return None If you return None from this method, the fallback will be called. The first argument is the opaque document instance that contains the Element. The second argument is a lightweight Element proxy implementation that is only valid during the lookup. Do not try to keep a reference to it. Once the lookup is done, the proxy will be invalid. Also, you cannot wrap such a read-only Element in an ElementTree, and you must take care not to keep a reference to them outside of the `lookup()` method. Note that the API of the Element objects is not complete. It is purely read-only and does not support all features of the normal `lxml.etree` API (such as XPath, extended slicing or some iteration methods). See https://lxml.de/element_classes.html """ def __cinit__(self): self._lookup_function = _python_class_lookup def lookup(self, doc, element): u"""lookup(self, doc, element) Override this method to implement your own lookup scheme. """ return None cdef object _python_class_lookup(state, _Document doc, tree.xmlNode* c_node): cdef PythonElementClassLookup lookup cdef _ReadOnlyProxy proxy lookup = <PythonElementClassLookup>state proxy = _newReadOnlyProxy(None, c_node) cls = lookup.lookup(doc, proxy) _freeReadOnlyProxies(proxy) if cls is not None: _validateNodeClass(c_node, cls) return cls return _callLookupFallback(lookup, doc, c_node) ################################################################################ # Global setup cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS cdef object ELEMENT_CLASS_LOOKUP_STATE cdef void _setElementClassLookupFunction( _element_class_lookup_function function, object state): global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE if function is NULL: state = DEFAULT_ELEMENT_CLASS_LOOKUP function = DEFAULT_ELEMENT_CLASS_LOOKUP._lookup_function ELEMENT_CLASS_LOOKUP_STATE = state LOOKUP_ELEMENT_CLASS = function def set_element_class_lookup(ElementClassLookup lookup = None): u"""set_element_class_lookup(lookup = None) Set the global element class lookup method. This defines the main entry point for looking up element implementations. The standard implementation uses the :class:`ParserBasedElementClassLookup` to delegate to different lookup schemes for each parser. .. warning:: This should only be changed by applications, not by library packages. In most cases, parser specific lookups should be preferred, which can be configured via :meth:`~lxml.etree.XMLParser.set_element_class_lookup` (and the same for HTML parsers). Globally replacing the element class lookup by something other than a :class:`ParserBasedElementClassLookup` will prevent parser specific lookup schemes from working. Several tools rely on parser specific lookups, including :mod:`lxml.html` and :mod:`lxml.objectify`. """ if lookup is None or lookup._lookup_function is NULL: _setElementClassLookupFunction(NULL, None) else: _setElementClassLookupFunction(lookup._lookup_function, lookup) # default setup: parser delegation cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup() set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP)