| /* | |
| * ElementTree | |
| * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $ | |
| * | |
| * elementtree accelerator | |
| * | |
| * History: | |
| * 1999-06-20 fl created (as part of sgmlop) | |
| * 2001-05-29 fl effdom edition | |
| * 2003-02-27 fl elementtree edition (alpha) | |
| * 2004-06-03 fl updates for elementtree 1.2 | |
| * 2005-01-05 fl major optimization effort | |
| * 2005-01-11 fl first public release (cElementTree 0.8) | |
| * 2005-01-12 fl split element object into base and extras | |
| * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9) | |
| * 2005-01-17 fl added treebuilder close method | |
| * 2005-01-17 fl fixed crash in getchildren | |
| * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3) | |
| * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8) | |
| * 2005-01-26 fl added VERSION module property (cElementTree 1.0) | |
| * 2005-01-28 fl added remove method (1.0.1) | |
| * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2) | |
| * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers | |
| * 2005-03-26 fl added Comment and PI support to XMLParser | |
| * 2005-03-27 fl event optimizations; complain about bogus events | |
| * 2005-08-08 fl fixed read error handling in parse | |
| * 2005-08-11 fl added runtime test for copy workaround (1.0.3) | |
| * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4) | |
| * 2005-12-16 fl added support for non-standard encodings | |
| * 2006-03-08 fl fixed a couple of potential null-refs and leaks | |
| * 2006-03-12 fl merge in 2.5 ssize_t changes | |
| * 2007-08-25 fl call custom builder's close method from XMLParser | |
| * 2007-08-31 fl added iter, extend from ET 1.3 | |
| * 2007-09-01 fl fixed ParseError exception, setslice source type, etc | |
| * 2007-09-03 fl fixed handling of negative insert indexes | |
| * 2007-09-04 fl added itertext from ET 1.3 | |
| * 2007-09-06 fl added position attribute to ParseError exception | |
| * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic) | |
| * | |
| * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved. | |
| * Copyright (c) 1999-2009 by Fredrik Lundh. | |
| * | |
| * info@pythonware.com | |
| * http://www.pythonware.com | |
| */ | |
| /* Licensed to PSF under a Contributor Agreement. */ | |
| /* See http://www.python.org/psf/license for licensing details. */ | |
| #include "Python.h" | |
| #define VERSION "1.0.6" | |
| /* -------------------------------------------------------------------- */ | |
| /* configuration */ | |
| /* Leave defined to include the expat-based XMLParser type */ | |
| #define USE_EXPAT | |
| /* Define to do all expat calls via pyexpat's embedded expat library */ | |
| /* #define USE_PYEXPAT_CAPI */ | |
| /* An element can hold this many children without extra memory | |
| allocations. */ | |
| #define STATIC_CHILDREN 4 | |
| /* For best performance, chose a value so that 80-90% of all nodes | |
| have no more than the given number of children. Set this to zero | |
| to minimize the size of the element structure itself (this only | |
| helps if you have lots of leaf nodes with attributes). */ | |
| /* Also note that pymalloc always allocates blocks in multiples of | |
| eight bytes. For the current version of cElementTree, this means | |
| that the number of children should be an even number, at least on | |
| 32-bit platforms. */ | |
| /* -------------------------------------------------------------------- */ | |
| #if 0 | |
| static int memory = 0; | |
| #define ALLOC(size, comment)\ | |
| do { memory += size; printf("%8d - %s\n", memory, comment); } while (0) | |
| #define RELEASE(size, comment)\ | |
| do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) | |
| #else | |
| #define ALLOC(size, comment) | |
| #define RELEASE(size, comment) | |
| #endif | |
| /* compiler tweaks */ | |
| #if defined(_MSC_VER) | |
| #define LOCAL(type) static __inline type __fastcall | |
| #else | |
| #define LOCAL(type) static type | |
| #endif | |
| /* compatibility macros */ | |
| #if (PY_VERSION_HEX < 0x02060000) | |
| #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) | |
| #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) | |
| #endif | |
| #if (PY_VERSION_HEX < 0x02050000) | |
| typedef int Py_ssize_t; | |
| #define lenfunc inquiry | |
| #endif | |
| #if (PY_VERSION_HEX < 0x02040000) | |
| #define PyDict_CheckExact PyDict_Check | |
| #if !defined(Py_RETURN_NONE) | |
| #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None | |
| #endif | |
| #endif | |
| /* macros used to store 'join' flags in string object pointers. note | |
| that all use of text and tail as object pointers must be wrapped in | |
| JOIN_OBJ. see comments in the ElementObject definition for more | |
| info. */ | |
| #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1) | |
| #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag))) | |
| #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1)) | |
| /* glue functions (see the init function for details) */ | |
| static PyObject* elementtree_parseerror_obj; | |
| static PyObject* elementtree_copyelement_obj; | |
| static PyObject* elementtree_deepcopy_obj; | |
| static PyObject* elementtree_iter_obj; | |
| static PyObject* elementtree_itertext_obj; | |
| static PyObject* elementpath_obj; | |
| /* helpers */ | |
| LOCAL(PyObject*) | |
| deepcopy(PyObject* object, PyObject* memo) | |
| { | |
| /* do a deep copy of the given object */ | |
| PyObject* args; | |
| PyObject* result; | |
| if (!elementtree_deepcopy_obj) { | |
| PyErr_SetString( | |
| PyExc_RuntimeError, | |
| "deepcopy helper not found" | |
| ); | |
| return NULL; | |
| } | |
| args = PyTuple_New(2); | |
| if (!args) | |
| return NULL; | |
| Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object); | |
| Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo); | |
| result = PyObject_CallObject(elementtree_deepcopy_obj, args); | |
| Py_DECREF(args); | |
| return result; | |
| } | |
| LOCAL(PyObject*) | |
| list_join(PyObject* list) | |
| { | |
| /* join list elements (destroying the list in the process) */ | |
| PyObject* joiner; | |
| PyObject* function; | |
| PyObject* args; | |
| PyObject* result; | |
| switch (PyList_GET_SIZE(list)) { | |
| case 0: | |
| Py_DECREF(list); | |
| return PyString_FromString(""); | |
| case 1: | |
| result = PyList_GET_ITEM(list, 0); | |
| Py_INCREF(result); | |
| Py_DECREF(list); | |
| return result; | |
| } | |
| /* two or more elements: slice out a suitable separator from the | |
| first member, and use that to join the entire list */ | |
| joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0); | |
| if (!joiner) | |
| return NULL; | |
| function = PyObject_GetAttrString(joiner, "join"); | |
| if (!function) { | |
| Py_DECREF(joiner); | |
| return NULL; | |
| } | |
| args = PyTuple_New(1); | |
| if (!args) | |
| return NULL; | |
| PyTuple_SET_ITEM(args, 0, list); | |
| result = PyObject_CallObject(function, args); | |
| Py_DECREF(args); /* also removes list */ | |
| Py_DECREF(function); | |
| Py_DECREF(joiner); | |
| return result; | |
| } | |
| /* -------------------------------------------------------------------- */ | |
| /* the element type */ | |
| typedef struct { | |
| /* attributes (a dictionary object), or None if no attributes */ | |
| PyObject* attrib; | |
| /* child elements */ | |
| int length; /* actual number of items */ | |
| int allocated; /* allocated items */ | |
| /* this either points to _children or to a malloced buffer */ | |
| PyObject* *children; | |
| PyObject* _children[STATIC_CHILDREN]; | |
| } ElementObjectExtra; | |
| typedef struct { | |
| PyObject_HEAD | |
| /* element tag (a string). */ | |
| PyObject* tag; | |
| /* text before first child. note that this is a tagged pointer; | |
| use JOIN_OBJ to get the object pointer. the join flag is used | |
| to distinguish lists created by the tree builder from lists | |
| assigned to the attribute by application code; the former | |
| should be joined before being returned to the user, the latter | |
| should be left intact. */ | |
| PyObject* text; | |
| /* text after this element, in parent. note that this is a tagged | |
| pointer; use JOIN_OBJ to get the object pointer. */ | |
| PyObject* tail; | |
| ElementObjectExtra* extra; | |
| } ElementObject; | |
| staticforward PyTypeObject Element_Type; | |
| #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type) | |
| /* -------------------------------------------------------------------- */ | |
| /* element constructor and destructor */ | |
| LOCAL(int) | |
| element_new_extra(ElementObject* self, PyObject* attrib) | |
| { | |
| self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); | |
| if (!self->extra) | |
| return -1; | |
| if (!attrib) | |
| attrib = Py_None; | |
| Py_INCREF(attrib); | |
| self->extra->attrib = attrib; | |
| self->extra->length = 0; | |
| self->extra->allocated = STATIC_CHILDREN; | |
| self->extra->children = self->extra->_children; | |
| return 0; | |
| } | |
| LOCAL(void) | |
| element_dealloc_extra(ElementObject* self) | |
| { | |
| int i; | |
| Py_DECREF(self->extra->attrib); | |
| for (i = 0; i < self->extra->length; i++) | |
| Py_DECREF(self->extra->children[i]); | |
| if (self->extra->children != self->extra->_children) | |
| PyObject_Free(self->extra->children); | |
| PyObject_Free(self->extra); | |
| } | |
| LOCAL(PyObject*) | |
| element_new(PyObject* tag, PyObject* attrib) | |
| { | |
| ElementObject* self; | |
| self = PyObject_New(ElementObject, &Element_Type); | |
| if (self == NULL) | |
| return NULL; | |
| /* use None for empty dictionaries */ | |
| if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib)) | |
| attrib = Py_None; | |
| self->extra = NULL; | |
| if (attrib != Py_None) { | |
| if (element_new_extra(self, attrib) < 0) { | |
| PyObject_Del(self); | |
| return NULL; | |
| } | |
| self->extra->length = 0; | |
| self->extra->allocated = STATIC_CHILDREN; | |
| self->extra->children = self->extra->_children; | |
| } | |
| Py_INCREF(tag); | |
| self->tag = tag; | |
| Py_INCREF(Py_None); | |
| self->text = Py_None; | |
| Py_INCREF(Py_None); | |
| self->tail = Py_None; | |
| ALLOC(sizeof(ElementObject), "create element"); | |
| return (PyObject*) self; | |
| } | |
| LOCAL(int) | |
| element_resize(ElementObject* self, int extra) | |
| { | |
| int size; | |
| PyObject* *children; | |
| /* make sure self->children can hold the given number of extra | |
| elements. set an exception and return -1 if allocation failed */ | |
| if (!self->extra) | |
| element_new_extra(self, NULL); | |
| size = self->extra->length + extra; | |
| if (size > self->extra->allocated) { | |
| /* use Python 2.4's list growth strategy */ | |
| size = (size >> 3) + (size < 9 ? 3 : 6) + size; | |
| /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children" | |
| * which needs at least 4 bytes. | |
| * Although it's a false alarm always assume at least one child to | |
| * be safe. | |
| */ | |
| size = size ? size : 1; | |
| if (self->extra->children != self->extra->_children) { | |
| /* Coverity CID #182 size_error: Allocating 1 bytes to pointer | |
| * "children", which needs at least 4 bytes. Although it's a | |
| * false alarm always assume at least one child to be safe. | |
| */ | |
| children = PyObject_Realloc(self->extra->children, | |
| size * sizeof(PyObject*)); | |
| if (!children) | |
| goto nomemory; | |
| } else { | |
| children = PyObject_Malloc(size * sizeof(PyObject*)); | |
| if (!children) | |
| goto nomemory; | |
| /* copy existing children from static area to malloc buffer */ | |
| memcpy(children, self->extra->children, | |
| self->extra->length * sizeof(PyObject*)); | |
| } | |
| self->extra->children = children; | |
| self->extra->allocated = size; | |
| } | |
| return 0; | |
| nomemory: | |
| PyErr_NoMemory(); | |
| return -1; | |
| } | |
| LOCAL(int) | |
| element_add_subelement(ElementObject* self, PyObject* element) | |
| { | |
| /* add a child element to a parent */ | |
| if (element_resize(self, 1) < 0) | |
| return -1; | |
| Py_INCREF(element); | |
| self->extra->children[self->extra->length] = element; | |
| self->extra->length++; | |
| return 0; | |
| } | |
| LOCAL(PyObject*) | |
| element_get_attrib(ElementObject* self) | |
| { | |
| /* return borrowed reference to attrib dictionary */ | |
| /* note: this function assumes that the extra section exists */ | |
| PyObject* res = self->extra->attrib; | |
| if (res == Py_None) { | |
| Py_DECREF(res); | |
| /* create missing dictionary */ | |
| res = PyDict_New(); | |
| if (!res) | |
| return NULL; | |
| self->extra->attrib = res; | |
| } | |
| return res; | |
| } | |
| LOCAL(PyObject*) | |
| element_get_text(ElementObject* self) | |
| { | |
| /* return borrowed reference to text attribute */ | |
| PyObject* res = self->text; | |
| if (JOIN_GET(res)) { | |
| res = JOIN_OBJ(res); | |
| if (PyList_CheckExact(res)) { | |
| res = list_join(res); | |
| if (!res) | |
| return NULL; | |
| self->text = res; | |
| } | |
| } | |
| return res; | |
| } | |
| LOCAL(PyObject*) | |
| element_get_tail(ElementObject* self) | |
| { | |
| /* return borrowed reference to text attribute */ | |
| PyObject* res = self->tail; | |
| if (JOIN_GET(res)) { | |
| res = JOIN_OBJ(res); | |
| if (PyList_CheckExact(res)) { | |
| res = list_join(res); | |
| if (!res) | |
| return NULL; | |
| self->tail = res; | |
| } | |
| } | |
| return res; | |
| } | |
| static PyObject* | |
| element(PyObject* self, PyObject* args, PyObject* kw) | |
| { | |
| PyObject* elem; | |
| PyObject* tag; | |
| PyObject* attrib = NULL; | |
| if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, | |
| &PyDict_Type, &attrib)) | |
| return NULL; | |
| if (attrib || kw) { | |
| attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); | |
| if (!attrib) | |
| return NULL; | |
| if (kw) | |
| PyDict_Update(attrib, kw); | |
| } else { | |
| Py_INCREF(Py_None); | |
| attrib = Py_None; | |
| } | |
| elem = element_new(tag, attrib); | |
| Py_DECREF(attrib); | |
| return elem; | |
| } | |
| static PyObject* | |
| subelement(PyObject* self, PyObject* args, PyObject* kw) | |
| { | |
| PyObject* elem; | |
| ElementObject* parent; | |
| PyObject* tag; | |
| PyObject* attrib = NULL; | |
| if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", | |
| &Element_Type, &parent, &tag, | |
| &PyDict_Type, &attrib)) | |
| return NULL; | |
| if (attrib || kw) { | |
| attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); | |
| if (!attrib) | |
| return NULL; | |
| if (kw) | |
| PyDict_Update(attrib, kw); | |
| } else { | |
| Py_INCREF(Py_None); | |
| attrib = Py_None; | |
| } | |
| elem = element_new(tag, attrib); | |
| Py_DECREF(attrib); | |
| if (element_add_subelement(parent, elem) < 0) { | |
| Py_DECREF(elem); | |
| return NULL; | |
| } | |
| return elem; | |
| } | |
| static void | |
| element_dealloc(ElementObject* self) | |
| { | |
| if (self->extra) | |
| element_dealloc_extra(self); | |
| /* discard attributes */ | |
| Py_DECREF(self->tag); | |
| Py_DECREF(JOIN_OBJ(self->text)); | |
| Py_DECREF(JOIN_OBJ(self->tail)); | |
| RELEASE(sizeof(ElementObject), "destroy element"); | |
| PyObject_Del(self); | |
| } | |
| /* -------------------------------------------------------------------- */ | |
| /* methods (in alphabetical order) */ | |
| static PyObject* | |
| element_append(ElementObject* self, PyObject* args) | |
| { | |
| PyObject* element; | |
| if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element)) | |
| return NULL; | |
| if (element_add_subelement(self, element) < 0) | |
| return NULL; | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject* | |
| element_clear(ElementObject* self, PyObject* args) | |
| { | |
| if (!PyArg_ParseTuple(args, ":clear")) | |
| return NULL; | |
| if (self->extra) { | |
| element_dealloc_extra(self); | |
| self->extra = NULL; | |
| } | |
| Py_INCREF(Py_None); | |
| Py_DECREF(JOIN_OBJ(self->text)); | |
| self->text = Py_None; | |
| Py_INCREF(Py_None); | |
| Py_DECREF(JOIN_OBJ(self->tail)); | |
| self->tail = Py_None; | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject* | |
| element_copy(ElementObject* self, PyObject* args) | |
| { | |
| int i; | |
| ElementObject* element; | |
| if (!PyArg_ParseTuple(args, ":__copy__")) | |
| return NULL; | |
| element = (ElementObject*) element_new( | |
| self->tag, (self->extra) ? self->extra->attrib : Py_None | |
| ); | |
| if (!element) | |
| return NULL; | |
| Py_DECREF(JOIN_OBJ(element->text)); | |
| element->text = self->text; | |
| Py_INCREF(JOIN_OBJ(element->text)); | |
| Py_DECREF(JOIN_OBJ(element->tail)); | |
| element->tail = self->tail; | |
| Py_INCREF(JOIN_OBJ(element->tail)); | |
| if (self->extra) { | |
| if (element_resize(element, self->extra->length) < 0) { | |
| Py_DECREF(element); | |
| return NULL; | |
| } | |
| for (i = 0; i < self->extra->length; i++) { | |
| Py_INCREF(self->extra->children[i]); | |
| element->extra->children[i] = self->extra->children[i]; | |
| } | |
| element->extra->length = self->extra->length; | |
| } | |
| return (PyObject*) element; | |
| } | |
| static PyObject* | |
| element_deepcopy(ElementObject* self, PyObject* args) | |
| { | |
| int i; | |
| ElementObject* element; | |
| PyObject* tag; | |
| PyObject* attrib; | |
| PyObject* text; | |
| PyObject* tail; | |
| PyObject* id; | |
| PyObject* memo; | |
| if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) | |
| return NULL; | |
| tag = deepcopy(self->tag, memo); | |
| if (!tag) | |
| return NULL; | |
| if (self->extra) { | |
| attrib = deepcopy(self->extra->attrib, memo); | |
| if (!attrib) { | |
| Py_DECREF(tag); | |
| return NULL; | |
| } | |
| } else { | |
| Py_INCREF(Py_None); | |
| attrib = Py_None; | |
| } | |
| element = (ElementObject*) element_new(tag, attrib); | |
| Py_DECREF(tag); | |
| Py_DECREF(attrib); | |
| if (!element) | |
| return NULL; | |
| text = deepcopy(JOIN_OBJ(self->text), memo); | |
| if (!text) | |
| goto error; | |
| Py_DECREF(element->text); | |
| element->text = JOIN_SET(text, JOIN_GET(self->text)); | |
| tail = deepcopy(JOIN_OBJ(self->tail), memo); | |
| if (!tail) | |
| goto error; | |
| Py_DECREF(element->tail); | |
| element->tail = JOIN_SET(tail, JOIN_GET(self->tail)); | |
| if (self->extra) { | |
| if (element_resize(element, self->extra->length) < 0) | |
| goto error; | |
| for (i = 0; i < self->extra->length; i++) { | |
| PyObject* child = deepcopy(self->extra->children[i], memo); | |
| if (!child) { | |
| element->extra->length = i; | |
| goto error; | |
| } | |
| element->extra->children[i] = child; | |
| } | |
| element->extra->length = self->extra->length; | |
| } | |
| /* add object to memo dictionary (so deepcopy won't visit it again) */ | |
| id = PyInt_FromLong((Py_uintptr_t) self); | |
| if (!id) | |
| goto error; | |
| i = PyDict_SetItem(memo, id, (PyObject*) element); | |
| Py_DECREF(id); | |
| if (i < 0) | |
| goto error; | |
| return (PyObject*) element; | |
| error: | |
| Py_DECREF(element); | |
| return NULL; | |
| } | |
| LOCAL(int) | |
| checkpath(PyObject* tag) | |
| { | |
| Py_ssize_t i; | |
| int check = 1; | |
| /* check if a tag contains an xpath character */ | |
| #define PATHCHAR(ch) \ | |
| (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.') | |
| #if defined(Py_USING_UNICODE) | |
| if (PyUnicode_Check(tag)) { | |
| Py_UNICODE *p = PyUnicode_AS_UNICODE(tag); | |
| for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) { | |
| if (p[i] == '{') | |
| check = 0; | |
| else if (p[i] == '}') | |
| check = 1; | |
| else if (check && PATHCHAR(p[i])) | |
| return 1; | |
| } | |
| return 0; | |
| } | |
| #endif | |
| if (PyString_Check(tag)) { | |
| char *p = PyString_AS_STRING(tag); | |
| for (i = 0; i < PyString_GET_SIZE(tag); i++) { | |
| if (p[i] == '{') | |
| check = 0; | |
| else if (p[i] == '}') | |
| check = 1; | |
| else if (check && PATHCHAR(p[i])) | |
| return 1; | |
| } | |
| return 0; | |
| } | |
| return 1; /* unknown type; might be path expression */ | |
| } | |
| static PyObject* | |
| element_extend(ElementObject* self, PyObject* args) | |
| { | |
| PyObject* seq; | |
| Py_ssize_t i, seqlen = 0; | |
| PyObject* seq_in; | |
| if (!PyArg_ParseTuple(args, "O:extend", &seq_in)) | |
| return NULL; | |
| seq = PySequence_Fast(seq_in, ""); | |
| if (!seq) { | |
| PyErr_Format( | |
| PyExc_TypeError, | |
| "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name | |
| ); | |
| return NULL; | |
| } | |
| seqlen = PySequence_Size(seq); | |
| for (i = 0; i < seqlen; i++) { | |
| PyObject* element = PySequence_Fast_GET_ITEM(seq, i); | |
| if (element_add_subelement(self, element) < 0) { | |
| Py_DECREF(seq); | |
| return NULL; | |
| } | |
| } | |
| Py_DECREF(seq); | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject* | |
| element_find(ElementObject* self, PyObject* args) | |
| { | |
| int i; | |
| PyObject* tag; | |
| PyObject* namespaces = Py_None; | |
| if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces)) | |
| return NULL; | |
| if (checkpath(tag) || namespaces != Py_None) | |
| return PyObject_CallMethod( | |
| elementpath_obj, "find", "OOO", self, tag, namespaces | |
| ); | |
| if (!self->extra) | |
| Py_RETURN_NONE; | |
| for (i = 0; i < self->extra->length; i++) { | |
| PyObject* item = self->extra->children[i]; | |
| if (Element_CheckExact(item) && | |
| PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { | |
| Py_INCREF(item); | |
| return item; | |
| } | |
| } | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject* | |
| element_findtext(ElementObject* self, PyObject* args) | |
| { | |
| int i; | |
| PyObject* tag; | |
| PyObject* default_value = Py_None; | |
| PyObject* namespaces = Py_None; | |
| if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces)) | |
| return NULL; | |
| if (checkpath(tag) || namespaces != Py_None) | |
| return PyObject_CallMethod( | |
| elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces | |
| ); | |
| if (!self->extra) { | |
| Py_INCREF(default_value); | |
| return default_value; | |
| } | |
| for (i = 0; i < self->extra->length; i++) { | |
| ElementObject* item = (ElementObject*) self->extra->children[i]; | |
| if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) { | |
| PyObject* text = element_get_text(item); | |
| if (text == Py_None) | |
| return PyString_FromString(""); | |
| Py_XINCREF(text); | |
| return text; | |
| } | |
| } | |
| Py_INCREF(default_value); | |
| return default_value; | |
| } | |
| static PyObject* | |
| element_findall(ElementObject* self, PyObject* args) | |
| { | |
| int i; | |
| PyObject* out; | |
| PyObject* tag; | |
| PyObject* namespaces = Py_None; | |
| if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces)) | |
| return NULL; | |
| if (checkpath(tag) || namespaces != Py_None) | |
| return PyObject_CallMethod( | |
| elementpath_obj, "findall", "OOO", self, tag, namespaces | |
| ); | |
| out = PyList_New(0); | |
| if (!out) | |
| return NULL; | |
| if (!self->extra) | |
| return out; | |
| for (i = 0; i < self->extra->length; i++) { | |
| PyObject* item = self->extra->children[i]; | |
| if (Element_CheckExact(item) && | |
| PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { | |
| if (PyList_Append(out, item) < 0) { | |
| Py_DECREF(out); | |
| return NULL; | |
| } | |
| } | |
| } | |
| return out; | |
| } | |
| static PyObject* | |
| element_iterfind(ElementObject* self, PyObject* args) | |
| { | |
| PyObject* tag; | |
| PyObject* namespaces = Py_None; | |
| if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces)) | |
| return NULL; | |
| return PyObject_CallMethod( | |
| elementpath_obj, "iterfind", "OOO", self, tag, namespaces | |
| ); | |
| } | |
| static PyObject* | |
| element_get(ElementObject* self, PyObject* args) | |
| { | |
| PyObject* value; | |
| PyObject* key; | |
| PyObject* default_value = Py_None; | |
| if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value)) | |
| return NULL; | |
| if (!self->extra || self->extra->attrib == Py_None) | |
| value = default_value; | |
| else { | |
| value = PyDict_GetItem(self->extra->attrib, key); | |
| if (!value) | |
| value = default_value; | |
| } | |
| Py_INCREF(value); | |
| return value; | |
| } | |
| static PyObject* | |
| element_getchildren(ElementObject* self, PyObject* args) | |
| { | |
| int i; | |
| PyObject* list; | |
| /* FIXME: report as deprecated? */ | |
| if (!PyArg_ParseTuple(args, ":getchildren")) | |
| return NULL; | |
| if (!self->extra) | |
| return PyList_New(0); | |
| list = PyList_New(self->extra->length); | |
| if (!list) | |
| return NULL; | |
| for (i = 0; i < self->extra->length; i++) { | |
| PyObject* item = self->extra->children[i]; | |
| Py_INCREF(item); | |
| PyList_SET_ITEM(list, i, item); | |
| } | |
| return list; | |
| } | |
| static PyObject* | |
| element_iter(ElementObject* self, PyObject* args) | |
| { | |
| PyObject* result; | |
| PyObject* tag = Py_None; | |
| if (!PyArg_ParseTuple(args, "|O:iter", &tag)) | |
| return NULL; | |
| if (!elementtree_iter_obj) { | |
| PyErr_SetString( | |
| PyExc_RuntimeError, | |
| "iter helper not found" | |
| ); | |
| return NULL; | |
| } | |
| args = PyTuple_New(2); | |
| if (!args) | |
| return NULL; | |
| Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); | |
| Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag); | |
| result = PyObject_CallObject(elementtree_iter_obj, args); | |
| Py_DECREF(args); | |
| return result; | |
| } | |
| static PyObject* | |
| element_itertext(ElementObject* self, PyObject* args) | |
| { | |
| PyObject* result; | |
| if (!PyArg_ParseTuple(args, ":itertext")) | |
| return NULL; | |
| if (!elementtree_itertext_obj) { | |
| PyErr_SetString( | |
| PyExc_RuntimeError, | |
| "itertext helper not found" | |
| ); | |
| return NULL; | |
| } | |
| args = PyTuple_New(1); | |
| if (!args) | |
| return NULL; | |
| Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); | |
| result = PyObject_CallObject(elementtree_itertext_obj, args); | |
| Py_DECREF(args); | |
| return result; | |
| } | |
| static PyObject* | |
| element_getitem(PyObject* self_, Py_ssize_t index) | |
| { | |
| ElementObject* self = (ElementObject*) self_; | |
| if (!self->extra || index < 0 || index >= self->extra->length) { | |
| PyErr_SetString( | |
| PyExc_IndexError, | |
| "child index out of range" | |
| ); | |
| return NULL; | |
| } | |
| Py_INCREF(self->extra->children[index]); | |
| return self->extra->children[index]; | |
| } | |
| static PyObject* | |
| element_insert(ElementObject* self, PyObject* args) | |
| { | |
| int i; | |
| int index; | |
| PyObject* element; | |
| if (!PyArg_ParseTuple(args, "iO!:insert", &index, | |
| &Element_Type, &element)) | |
| return NULL; | |
| if (!self->extra) | |
| element_new_extra(self, NULL); | |
| if (index < 0) { | |
| index += self->extra->length; | |
| if (index < 0) | |
| index = 0; | |
| } | |
| if (index > self->extra->length) | |
| index = self->extra->length; | |
| if (element_resize(self, 1) < 0) | |
| return NULL; | |
| for (i = self->extra->length; i > index; i--) | |
| self->extra->children[i] = self->extra->children[i-1]; | |
| Py_INCREF(element); | |
| self->extra->children[index] = element; | |
| self->extra->length++; | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject* | |
| element_items(ElementObject* self, PyObject* args) | |
| { | |
| if (!PyArg_ParseTuple(args, ":items")) | |
| return NULL; | |
| if (!self->extra || self->extra->attrib == Py_None) | |
| return PyList_New(0); | |
| return PyDict_Items(self->extra->attrib); | |
| } | |
| static PyObject* | |
| element_keys(ElementObject* self, PyObject* args) | |
| { | |
| if (!PyArg_ParseTuple(args, ":keys")) | |
| return NULL; | |
| if (!self->extra || self->extra->attrib == Py_None) | |
| return PyList_New(0); | |
| return PyDict_Keys(self->extra->attrib); | |
| } | |
| static Py_ssize_t | |
| element_length(ElementObject* self) | |
| { | |
| if (!self->extra) | |
| return 0; | |
| return self->extra->length; | |
| } | |
| static PyObject* | |
| element_makeelement(PyObject* self, PyObject* args, PyObject* kw) | |
| { | |
| PyObject* elem; | |
| PyObject* tag; | |
| PyObject* attrib; | |
| if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib)) | |
| return NULL; | |
| attrib = PyDict_Copy(attrib); | |
| if (!attrib) | |
| return NULL; | |
| elem = element_new(tag, attrib); | |
| Py_DECREF(attrib); | |
| return elem; | |
| } | |
| static PyObject* | |
| element_reduce(ElementObject* self, PyObject* args) | |
| { | |
| if (!PyArg_ParseTuple(args, ":__reduce__")) | |
| return NULL; | |
| /* Hack alert: This method is used to work around a __copy__ | |
| problem on certain 2.3 and 2.4 versions. To save time and | |
| simplify the code, we create the copy in here, and use a dummy | |
| copyelement helper to trick the copy module into doing the | |
| right thing. */ | |
| if (!elementtree_copyelement_obj) { | |
| PyErr_SetString( | |
| PyExc_RuntimeError, | |
| "copyelement helper not found" | |
| ); | |
| return NULL; | |
| } | |
| return Py_BuildValue( | |
| "O(N)", elementtree_copyelement_obj, element_copy(self, args) | |
| ); | |
| } | |
| static PyObject* | |
| element_remove(ElementObject* self, PyObject* args) | |
| { | |
| int i; | |
| PyObject* element; | |
| if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element)) | |
| return NULL; | |
| if (!self->extra) { | |
| /* element has no children, so raise exception */ | |
| PyErr_SetString( | |
| PyExc_ValueError, | |
| "list.remove(x): x not in list" | |
| ); | |
| return NULL; | |
| } | |
| for (i = 0; i < self->extra->length; i++) { | |
| if (self->extra->children[i] == element) | |
| break; | |
| if (PyObject_Compare(self->extra->children[i], element) == 0) | |
| break; | |
| } | |
| if (i == self->extra->length) { | |
| /* element is not in children, so raise exception */ | |
| PyErr_SetString( | |
| PyExc_ValueError, | |
| "list.remove(x): x not in list" | |
| ); | |
| return NULL; | |
| } | |
| Py_DECREF(self->extra->children[i]); | |
| self->extra->length--; | |
| for (; i < self->extra->length; i++) | |
| self->extra->children[i] = self->extra->children[i+1]; | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject* | |
| element_repr(ElementObject* self) | |
| { | |
| PyObject *repr, *tag; | |
| tag = PyObject_Repr(self->tag); | |
| if (!tag) | |
| return NULL; | |
| repr = PyString_FromFormat("<Element %s at %p>", | |
| PyString_AS_STRING(tag), self); | |
| Py_DECREF(tag); | |
| return repr; | |
| } | |
| static PyObject* | |
| element_set(ElementObject* self, PyObject* args) | |
| { | |
| PyObject* attrib; | |
| PyObject* key; | |
| PyObject* value; | |
| if (!PyArg_ParseTuple(args, "OO:set", &key, &value)) | |
| return NULL; | |
| if (!self->extra) | |
| element_new_extra(self, NULL); | |
| attrib = element_get_attrib(self); | |
| if (!attrib) | |
| return NULL; | |
| if (PyDict_SetItem(attrib, key, value) < 0) | |
| return NULL; | |
| Py_RETURN_NONE; | |
| } | |
| static int | |
| element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) | |
| { | |
| ElementObject* self = (ElementObject*) self_; | |
| int i; | |
| PyObject* old; | |
| if (!self->extra || index < 0 || index >= self->extra->length) { | |
| PyErr_SetString( | |
| PyExc_IndexError, | |
| "child assignment index out of range"); | |
| return -1; | |
| } | |
| old = self->extra->children[index]; | |
| if (item) { | |
| Py_INCREF(item); | |
| self->extra->children[index] = item; | |
| } else { | |
| self->extra->length--; | |
| for (i = index; i < self->extra->length; i++) | |
| self->extra->children[i] = self->extra->children[i+1]; | |
| } | |
| Py_DECREF(old); | |
| return 0; | |
| } | |
| static PyObject* | |
| element_subscr(PyObject* self_, PyObject* item) | |
| { | |
| ElementObject* self = (ElementObject*) self_; | |
| #if (PY_VERSION_HEX < 0x02050000) | |
| if (PyInt_Check(item) || PyLong_Check(item)) { | |
| long i = PyInt_AsLong(item); | |
| #else | |
| if (PyIndex_Check(item)) { | |
| Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); | |
| #endif | |
| if (i == -1 && PyErr_Occurred()) { | |
| return NULL; | |
| } | |
| if (i < 0 && self->extra) | |
| i += self->extra->length; | |
| return element_getitem(self_, i); | |
| } | |
| else if (PySlice_Check(item)) { | |
| Py_ssize_t start, stop, step, slicelen, cur, i; | |
| PyObject* list; | |
| if (!self->extra) | |
| return PyList_New(0); | |
| if (PySlice_GetIndicesEx((PySliceObject *)item, | |
| self->extra->length, | |
| &start, &stop, &step, &slicelen) < 0) { | |
| return NULL; | |
| } | |
| if (slicelen <= 0) | |
| return PyList_New(0); | |
| else { | |
| list = PyList_New(slicelen); | |
| if (!list) | |
| return NULL; | |
| for (cur = start, i = 0; i < slicelen; | |
| cur += step, i++) { | |
| PyObject* item = self->extra->children[cur]; | |
| Py_INCREF(item); | |
| PyList_SET_ITEM(list, i, item); | |
| } | |
| return list; | |
| } | |
| } | |
| else { | |
| PyErr_SetString(PyExc_TypeError, | |
| "element indices must be integers"); | |
| return NULL; | |
| } | |
| } | |
| static int | |
| element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) | |
| { | |
| ElementObject* self = (ElementObject*) self_; | |
| #if (PY_VERSION_HEX < 0x02050000) | |
| if (PyInt_Check(item) || PyLong_Check(item)) { | |
| long i = PyInt_AsLong(item); | |
| #else | |
| if (PyIndex_Check(item)) { | |
| Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); | |
| #endif | |
| if (i == -1 && PyErr_Occurred()) { | |
| return -1; | |
| } | |
| if (i < 0 && self->extra) | |
| i += self->extra->length; | |
| return element_setitem(self_, i, value); | |
| } | |
| else if (PySlice_Check(item)) { | |
| Py_ssize_t start, stop, step, slicelen, newlen, cur, i; | |
| PyObject* recycle = NULL; | |
| PyObject* seq = NULL; | |
| if (!self->extra) | |
| element_new_extra(self, NULL); | |
| if (PySlice_GetIndicesEx((PySliceObject *)item, | |
| self->extra->length, | |
| &start, &stop, &step, &slicelen) < 0) { | |
| return -1; | |
| } | |
| if (value == NULL) | |
| newlen = 0; | |
| else { | |
| seq = PySequence_Fast(value, ""); | |
| if (!seq) { | |
| PyErr_Format( | |
| PyExc_TypeError, | |
| "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name | |
| ); | |
| return -1; | |
| } | |
| newlen = PySequence_Size(seq); | |
| } | |
| if (step != 1 && newlen != slicelen) | |
| { | |
| PyErr_Format(PyExc_ValueError, | |
| #if (PY_VERSION_HEX < 0x02050000) | |
| "attempt to assign sequence of size %d " | |
| "to extended slice of size %d", | |
| #else | |
| "attempt to assign sequence of size %zd " | |
| "to extended slice of size %zd", | |
| #endif | |
| newlen, slicelen | |
| ); | |
| return -1; | |
| } | |
| /* Resize before creating the recycle bin, to prevent refleaks. */ | |
| if (newlen > slicelen) { | |
| if (element_resize(self, newlen - slicelen) < 0) { | |
| if (seq) { | |
| Py_DECREF(seq); | |
| } | |
| return -1; | |
| } | |
| } | |
| if (slicelen > 0) { | |
| /* to avoid recursive calls to this method (via decref), move | |
| old items to the recycle bin here, and get rid of them when | |
| we're done modifying the element */ | |
| recycle = PyList_New(slicelen); | |
| if (!recycle) { | |
| if (seq) { | |
| Py_DECREF(seq); | |
| } | |
| return -1; | |
| } | |
| for (cur = start, i = 0; i < slicelen; | |
| cur += step, i++) | |
| PyList_SET_ITEM(recycle, i, self->extra->children[cur]); | |
| } | |
| if (newlen < slicelen) { | |
| /* delete slice */ | |
| for (i = stop; i < self->extra->length; i++) | |
| self->extra->children[i + newlen - slicelen] = self->extra->children[i]; | |
| } else if (newlen > slicelen) { | |
| /* insert slice */ | |
| for (i = self->extra->length-1; i >= stop; i--) | |
| self->extra->children[i + newlen - slicelen] = self->extra->children[i]; | |
| } | |
| /* replace the slice */ | |
| for (cur = start, i = 0; i < newlen; | |
| cur += step, i++) { | |
| PyObject* element = PySequence_Fast_GET_ITEM(seq, i); | |
| Py_INCREF(element); | |
| self->extra->children[cur] = element; | |
| } | |
| self->extra->length += newlen - slicelen; | |
| if (seq) { | |
| Py_DECREF(seq); | |
| } | |
| /* discard the recycle bin, and everything in it */ | |
| Py_XDECREF(recycle); | |
| return 0; | |
| } | |
| else { | |
| PyErr_SetString(PyExc_TypeError, | |
| "element indices must be integers"); | |
| return -1; | |
| } | |
| } | |
| static PyMethodDef element_methods[] = { | |
| {"clear", (PyCFunction) element_clear, METH_VARARGS}, | |
| {"get", (PyCFunction) element_get, METH_VARARGS}, | |
| {"set", (PyCFunction) element_set, METH_VARARGS}, | |
| {"find", (PyCFunction) element_find, METH_VARARGS}, | |
| {"findtext", (PyCFunction) element_findtext, METH_VARARGS}, | |
| {"findall", (PyCFunction) element_findall, METH_VARARGS}, | |
| {"append", (PyCFunction) element_append, METH_VARARGS}, | |
| {"extend", (PyCFunction) element_extend, METH_VARARGS}, | |
| {"insert", (PyCFunction) element_insert, METH_VARARGS}, | |
| {"remove", (PyCFunction) element_remove, METH_VARARGS}, | |
| {"iter", (PyCFunction) element_iter, METH_VARARGS}, | |
| {"itertext", (PyCFunction) element_itertext, METH_VARARGS}, | |
| {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS}, | |
| {"getiterator", (PyCFunction) element_iter, METH_VARARGS}, | |
| {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS}, | |
| {"items", (PyCFunction) element_items, METH_VARARGS}, | |
| {"keys", (PyCFunction) element_keys, METH_VARARGS}, | |
| {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS}, | |
| {"__copy__", (PyCFunction) element_copy, METH_VARARGS}, | |
| {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS}, | |
| /* Some 2.3 and 2.4 versions do not handle the __copy__ method on | |
| C objects correctly, so we have to fake it using a __reduce__- | |
| based hack (see the element_reduce implementation above for | |
| details). */ | |
| /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're | |
| using a runtime test to figure out if we need to fake things | |
| or now (see the init code below). The following entry is | |
| enabled only if the hack is needed. */ | |
| {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS}, | |
| {NULL, NULL} | |
| }; | |
| static PyObject* | |
| element_getattr(ElementObject* self, char* name) | |
| { | |
| PyObject* res; | |
| /* handle common attributes first */ | |
| if (strcmp(name, "tag") == 0) { | |
| res = self->tag; | |
| Py_INCREF(res); | |
| return res; | |
| } else if (strcmp(name, "text") == 0) { | |
| res = element_get_text(self); | |
| Py_INCREF(res); | |
| return res; | |
| } | |
| /* methods */ | |
| res = Py_FindMethod(element_methods, (PyObject*) self, name); | |
| if (res) | |
| return res; | |
| PyErr_Clear(); | |
| /* less common attributes */ | |
| if (strcmp(name, "tail") == 0) { | |
| res = element_get_tail(self); | |
| } else if (strcmp(name, "attrib") == 0) { | |
| if (!self->extra) | |
| element_new_extra(self, NULL); | |
| res = element_get_attrib(self); | |
| } else { | |
| PyErr_SetString(PyExc_AttributeError, name); | |
| return NULL; | |
| } | |
| if (!res) | |
| return NULL; | |
| Py_INCREF(res); | |
| return res; | |
| } | |
| static int | |
| element_setattr(ElementObject* self, const char* name, PyObject* value) | |
| { | |
| if (value == NULL) { | |
| PyErr_SetString( | |
| PyExc_AttributeError, | |
| "can't delete element attributes" | |
| ); | |
| return -1; | |
| } | |
| if (strcmp(name, "tag") == 0) { | |
| Py_DECREF(self->tag); | |
| self->tag = value; | |
| Py_INCREF(self->tag); | |
| } else if (strcmp(name, "text") == 0) { | |
| Py_DECREF(JOIN_OBJ(self->text)); | |
| self->text = value; | |
| Py_INCREF(self->text); | |
| } else if (strcmp(name, "tail") == 0) { | |
| Py_DECREF(JOIN_OBJ(self->tail)); | |
| self->tail = value; | |
| Py_INCREF(self->tail); | |
| } else if (strcmp(name, "attrib") == 0) { | |
| if (!self->extra) | |
| element_new_extra(self, NULL); | |
| Py_DECREF(self->extra->attrib); | |
| self->extra->attrib = value; | |
| Py_INCREF(self->extra->attrib); | |
| } else { | |
| PyErr_SetString(PyExc_AttributeError, name); | |
| return -1; | |
| } | |
| return 0; | |
| } | |
| static PySequenceMethods element_as_sequence = { | |
| (lenfunc) element_length, | |
| 0, /* sq_concat */ | |
| 0, /* sq_repeat */ | |
| element_getitem, | |
| 0, | |
| element_setitem, | |
| 0, | |
| }; | |
| static PyMappingMethods element_as_mapping = { | |
| (lenfunc) element_length, | |
| (binaryfunc) element_subscr, | |
| (objobjargproc) element_ass_subscr, | |
| }; | |
| statichere PyTypeObject Element_Type = { | |
| PyObject_HEAD_INIT(NULL) | |
| 0, "Element", sizeof(ElementObject), 0, | |
| /* methods */ | |
| (destructor)element_dealloc, /* tp_dealloc */ | |
| 0, /* tp_print */ | |
| (getattrfunc)element_getattr, /* tp_getattr */ | |
| (setattrfunc)element_setattr, /* tp_setattr */ | |
| 0, /* tp_compare */ | |
| (reprfunc)element_repr, /* tp_repr */ | |
| 0, /* tp_as_number */ | |
| &element_as_sequence, /* tp_as_sequence */ | |
| &element_as_mapping, /* tp_as_mapping */ | |
| }; | |
| /* ==================================================================== */ | |
| /* the tree builder type */ | |
| typedef struct { | |
| PyObject_HEAD | |
| PyObject* root; /* root node (first created node) */ | |
| ElementObject* this; /* current node */ | |
| ElementObject* last; /* most recently created node */ | |
| PyObject* data; /* data collector (string or list), or NULL */ | |
| PyObject* stack; /* element stack */ | |
| Py_ssize_t index; /* current stack size (0=empty) */ | |
| /* element tracing */ | |
| PyObject* events; /* list of events, or NULL if not collecting */ | |
| PyObject* start_event_obj; /* event objects (NULL to ignore) */ | |
| PyObject* end_event_obj; | |
| PyObject* start_ns_event_obj; | |
| PyObject* end_ns_event_obj; | |
| } TreeBuilderObject; | |
| staticforward PyTypeObject TreeBuilder_Type; | |
| #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type) | |
| /* -------------------------------------------------------------------- */ | |
| /* constructor and destructor */ | |
| LOCAL(PyObject*) | |
| treebuilder_new(void) | |
| { | |
| TreeBuilderObject* self; | |
| self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type); | |
| if (self == NULL) | |
| return NULL; | |
| self->root = NULL; | |
| Py_INCREF(Py_None); | |
| self->this = (ElementObject*) Py_None; | |
| Py_INCREF(Py_None); | |
| self->last = (ElementObject*) Py_None; | |
| self->data = NULL; | |
| self->stack = PyList_New(20); | |
| self->index = 0; | |
| self->events = NULL; | |
| self->start_event_obj = self->end_event_obj = NULL; | |
| self->start_ns_event_obj = self->end_ns_event_obj = NULL; | |
| ALLOC(sizeof(TreeBuilderObject), "create treebuilder"); | |
| return (PyObject*) self; | |
| } | |
| static PyObject* | |
| treebuilder(PyObject* self_, PyObject* args) | |
| { | |
| if (!PyArg_ParseTuple(args, ":TreeBuilder")) | |
| return NULL; | |
| return treebuilder_new(); | |
| } | |
| static void | |
| treebuilder_dealloc(TreeBuilderObject* self) | |
| { | |
| Py_XDECREF(self->end_ns_event_obj); | |
| Py_XDECREF(self->start_ns_event_obj); | |
| Py_XDECREF(self->end_event_obj); | |
| Py_XDECREF(self->start_event_obj); | |
| Py_XDECREF(self->events); | |
| Py_DECREF(self->stack); | |
| Py_XDECREF(self->data); | |
| Py_DECREF(self->last); | |
| Py_DECREF(self->this); | |
| Py_XDECREF(self->root); | |
| RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder"); | |
| PyObject_Del(self); | |
| } | |
| /* -------------------------------------------------------------------- */ | |
| /* handlers */ | |
| LOCAL(PyObject*) | |
| treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding, | |
| PyObject* standalone) | |
| { | |
| Py_RETURN_NONE; | |
| } | |
| LOCAL(PyObject*) | |
| treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, | |
| PyObject* attrib) | |
| { | |
| PyObject* node; | |
| PyObject* this; | |
| if (self->data) { | |
| if (self->this == self->last) { | |
| Py_DECREF(JOIN_OBJ(self->last->text)); | |
| self->last->text = JOIN_SET( | |
| self->data, PyList_CheckExact(self->data) | |
| ); | |
| } else { | |
| Py_DECREF(JOIN_OBJ(self->last->tail)); | |
| self->last->tail = JOIN_SET( | |
| self->data, PyList_CheckExact(self->data) | |
| ); | |
| } | |
| self->data = NULL; | |
| } | |
| node = element_new(tag, attrib); | |
| if (!node) | |
| return NULL; | |
| this = (PyObject*) self->this; | |
| if (this != Py_None) { | |
| if (element_add_subelement((ElementObject*) this, node) < 0) | |
| goto error; | |
| } else { | |
| if (self->root) { | |
| PyErr_SetString( | |
| elementtree_parseerror_obj, | |
| "multiple elements on top level" | |
| ); | |
| goto error; | |
| } | |
| Py_INCREF(node); | |
| self->root = node; | |
| } | |
| if (self->index < PyList_GET_SIZE(self->stack)) { | |
| if (PyList_SetItem(self->stack, self->index, this) < 0) | |
| goto error; | |
| Py_INCREF(this); | |
| } else { | |
| if (PyList_Append(self->stack, this) < 0) | |
| goto error; | |
| } | |
| self->index++; | |
| Py_DECREF(this); | |
| Py_INCREF(node); | |
| self->this = (ElementObject*) node; | |
| Py_DECREF(self->last); | |
| Py_INCREF(node); | |
| self->last = (ElementObject*) node; | |
| if (self->start_event_obj) { | |
| PyObject* res; | |
| PyObject* action = self->start_event_obj; | |
| res = PyTuple_New(2); | |
| if (res) { | |
| Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); | |
| Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); | |
| PyList_Append(self->events, res); | |
| Py_DECREF(res); | |
| } else | |
| PyErr_Clear(); /* FIXME: propagate error */ | |
| } | |
| return node; | |
| error: | |
| Py_DECREF(node); | |
| return NULL; | |
| } | |
| LOCAL(PyObject*) | |
| treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) | |
| { | |
| if (!self->data) { | |
| if (self->last == (ElementObject*) Py_None) { | |
| /* ignore calls to data before the first call to start */ | |
| Py_RETURN_NONE; | |
| } | |
| /* store the first item as is */ | |
| Py_INCREF(data); self->data = data; | |
| } else { | |
| /* more than one item; use a list to collect items */ | |
| if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 && | |
| PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) { | |
| /* expat often generates single character data sections; handle | |
| the most common case by resizing the existing string... */ | |
| Py_ssize_t size = PyString_GET_SIZE(self->data); | |
| if (_PyString_Resize(&self->data, size + 1) < 0) | |
| return NULL; | |
| PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0]; | |
| } else if (PyList_CheckExact(self->data)) { | |
| if (PyList_Append(self->data, data) < 0) | |
| return NULL; | |
| } else { | |
| PyObject* list = PyList_New(2); | |
| if (!list) | |
| return NULL; | |
| PyList_SET_ITEM(list, 0, self->data); | |
| Py_INCREF(data); PyList_SET_ITEM(list, 1, data); | |
| self->data = list; | |
| } | |
| } | |
| Py_RETURN_NONE; | |
| } | |
| LOCAL(PyObject*) | |
| treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) | |
| { | |
| PyObject* item; | |
| if (self->data) { | |
| if (self->this == self->last) { | |
| Py_DECREF(JOIN_OBJ(self->last->text)); | |
| self->last->text = JOIN_SET( | |
| self->data, PyList_CheckExact(self->data) | |
| ); | |
| } else { | |
| Py_DECREF(JOIN_OBJ(self->last->tail)); | |
| self->last->tail = JOIN_SET( | |
| self->data, PyList_CheckExact(self->data) | |
| ); | |
| } | |
| self->data = NULL; | |
| } | |
| if (self->index == 0) { | |
| PyErr_SetString( | |
| PyExc_IndexError, | |
| "pop from empty stack" | |
| ); | |
| return NULL; | |
| } | |
| self->index--; | |
| item = PyList_GET_ITEM(self->stack, self->index); | |
| Py_INCREF(item); | |
| Py_DECREF(self->last); | |
| self->last = (ElementObject*) self->this; | |
| self->this = (ElementObject*) item; | |
| if (self->end_event_obj) { | |
| PyObject* res; | |
| PyObject* action = self->end_event_obj; | |
| PyObject* node = (PyObject*) self->last; | |
| res = PyTuple_New(2); | |
| if (res) { | |
| Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); | |
| Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); | |
| PyList_Append(self->events, res); | |
| Py_DECREF(res); | |
| } else | |
| PyErr_Clear(); /* FIXME: propagate error */ | |
| } | |
| Py_INCREF(self->last); | |
| return (PyObject*) self->last; | |
| } | |
| LOCAL(void) | |
| treebuilder_handle_namespace(TreeBuilderObject* self, int start, | |
| PyObject *prefix, PyObject *uri) | |
| { | |
| PyObject* res; | |
| PyObject* action; | |
| PyObject* parcel; | |
| if (!self->events) | |
| return; | |
| if (start) { | |
| if (!self->start_ns_event_obj) | |
| return; | |
| action = self->start_ns_event_obj; | |
| parcel = Py_BuildValue("OO", prefix, uri); | |
| if (!parcel) | |
| return; | |
| Py_INCREF(action); | |
| } else { | |
| if (!self->end_ns_event_obj) | |
| return; | |
| action = self->end_ns_event_obj; | |
| Py_INCREF(action); | |
| parcel = Py_None; | |
| Py_INCREF(parcel); | |
| } | |
| res = PyTuple_New(2); | |
| if (res) { | |
| PyTuple_SET_ITEM(res, 0, action); | |
| PyTuple_SET_ITEM(res, 1, parcel); | |
| PyList_Append(self->events, res); | |
| Py_DECREF(res); | |
| } else | |
| PyErr_Clear(); /* FIXME: propagate error */ | |
| } | |
| /* -------------------------------------------------------------------- */ | |
| /* methods (in alphabetical order) */ | |
| static PyObject* | |
| treebuilder_data(TreeBuilderObject* self, PyObject* args) | |
| { | |
| PyObject* data; | |
| if (!PyArg_ParseTuple(args, "O:data", &data)) | |
| return NULL; | |
| return treebuilder_handle_data(self, data); | |
| } | |
| static PyObject* | |
| treebuilder_end(TreeBuilderObject* self, PyObject* args) | |
| { | |
| PyObject* tag; | |
| if (!PyArg_ParseTuple(args, "O:end", &tag)) | |
| return NULL; | |
| return treebuilder_handle_end(self, tag); | |
| } | |
| LOCAL(PyObject*) | |
| treebuilder_done(TreeBuilderObject* self) | |
| { | |
| PyObject* res; | |
| /* FIXME: check stack size? */ | |
| if (self->root) | |
| res = self->root; | |
| else | |
| res = Py_None; | |
| Py_INCREF(res); | |
| return res; | |
| } | |
| static PyObject* | |
| treebuilder_close(TreeBuilderObject* self, PyObject* args) | |
| { | |
| if (!PyArg_ParseTuple(args, ":close")) | |
| return NULL; | |
| return treebuilder_done(self); | |
| } | |
| static PyObject* | |
| treebuilder_start(TreeBuilderObject* self, PyObject* args) | |
| { | |
| PyObject* tag; | |
| PyObject* attrib = Py_None; | |
| if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib)) | |
| return NULL; | |
| return treebuilder_handle_start(self, tag, attrib); | |
| } | |
| static PyObject* | |
| treebuilder_xml(TreeBuilderObject* self, PyObject* args) | |
| { | |
| PyObject* encoding; | |
| PyObject* standalone; | |
| if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone)) | |
| return NULL; | |
| return treebuilder_handle_xml(self, encoding, standalone); | |
| } | |
| static PyMethodDef treebuilder_methods[] = { | |
| {"data", (PyCFunction) treebuilder_data, METH_VARARGS}, | |
| {"start", (PyCFunction) treebuilder_start, METH_VARARGS}, | |
| {"end", (PyCFunction) treebuilder_end, METH_VARARGS}, | |
| {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS}, | |
| {"close", (PyCFunction) treebuilder_close, METH_VARARGS}, | |
| {NULL, NULL} | |
| }; | |
| static PyObject* | |
| treebuilder_getattr(TreeBuilderObject* self, char* name) | |
| { | |
| return Py_FindMethod(treebuilder_methods, (PyObject*) self, name); | |
| } | |
| statichere PyTypeObject TreeBuilder_Type = { | |
| PyObject_HEAD_INIT(NULL) | |
| 0, "TreeBuilder", sizeof(TreeBuilderObject), 0, | |
| /* methods */ | |
| (destructor)treebuilder_dealloc, /* tp_dealloc */ | |
| 0, /* tp_print */ | |
| (getattrfunc)treebuilder_getattr, /* tp_getattr */ | |
| }; | |
| /* ==================================================================== */ | |
| /* the expat interface */ | |
| #if defined(USE_EXPAT) | |
| #include "expat.h" | |
| #if defined(USE_PYEXPAT_CAPI) | |
| #include "pyexpat.h" | |
| static struct PyExpat_CAPI* expat_capi; | |
| #define EXPAT(func) (expat_capi->func) | |
| #else | |
| #define EXPAT(func) (XML_##func) | |
| #endif | |
| typedef struct { | |
| PyObject_HEAD | |
| XML_Parser parser; | |
| PyObject* target; | |
| PyObject* entity; | |
| PyObject* names; | |
| PyObject* handle_xml; | |
| PyObject* handle_start; | |
| PyObject* handle_data; | |
| PyObject* handle_end; | |
| PyObject* handle_comment; | |
| PyObject* handle_pi; | |
| PyObject* handle_close; | |
| } XMLParserObject; | |
| staticforward PyTypeObject XMLParser_Type; | |
| /* helpers */ | |
| #if defined(Py_USING_UNICODE) | |
| LOCAL(int) | |
| checkstring(const char* string, int size) | |
| { | |
| int i; | |
| /* check if an 8-bit string contains UTF-8 characters */ | |
| for (i = 0; i < size; i++) | |
| if (string[i] & 0x80) | |
| return 1; | |
| return 0; | |
| } | |
| #endif | |
| LOCAL(PyObject*) | |
| makestring(const char* string, int size) | |
| { | |
| /* convert a UTF-8 string to either a 7-bit ascii string or a | |
| Unicode string */ | |
| #if defined(Py_USING_UNICODE) | |
| if (checkstring(string, size)) | |
| return PyUnicode_DecodeUTF8(string, size, "strict"); | |
| #endif | |
| return PyString_FromStringAndSize(string, size); | |
| } | |
| LOCAL(PyObject*) | |
| makeuniversal(XMLParserObject* self, const char* string) | |
| { | |
| /* convert a UTF-8 tag/attribute name from the expat parser | |
| to a universal name string */ | |
| int size = strlen(string); | |
| PyObject* key; | |
| PyObject* value; | |
| /* look the 'raw' name up in the names dictionary */ | |
| key = PyString_FromStringAndSize(string, size); | |
| if (!key) | |
| return NULL; | |
| value = PyDict_GetItem(self->names, key); | |
| if (value) { | |
| Py_INCREF(value); | |
| } else { | |
| /* new name. convert to universal name, and decode as | |
| necessary */ | |
| PyObject* tag; | |
| char* p; | |
| int i; | |
| /* look for namespace separator */ | |
| for (i = 0; i < size; i++) | |
| if (string[i] == '}') | |
| break; | |
| if (i != size) { | |
| /* convert to universal name */ | |
| tag = PyString_FromStringAndSize(NULL, size+1); | |
| p = PyString_AS_STRING(tag); | |
| p[0] = '{'; | |
| memcpy(p+1, string, size); | |
| size++; | |
| } else { | |
| /* plain name; use key as tag */ | |
| Py_INCREF(key); | |
| tag = key; | |
| } | |
| /* decode universal name */ | |
| #if defined(Py_USING_UNICODE) | |
| /* inline makestring, to avoid duplicating the source string if | |
| it's not an utf-8 string */ | |
| p = PyString_AS_STRING(tag); | |
| if (checkstring(p, size)) { | |
| value = PyUnicode_DecodeUTF8(p, size, "strict"); | |
| Py_DECREF(tag); | |
| if (!value) { | |
| Py_DECREF(key); | |
| return NULL; | |
| } | |
| } else | |
| #endif | |
| value = tag; /* use tag as is */ | |
| /* add to names dictionary */ | |
| if (PyDict_SetItem(self->names, key, value) < 0) { | |
| Py_DECREF(key); | |
| Py_DECREF(value); | |
| return NULL; | |
| } | |
| } | |
| Py_DECREF(key); | |
| return value; | |
| } | |
| static void | |
| expat_set_error(const char* message, int line, int column) | |
| { | |
| PyObject *error; | |
| PyObject *position; | |
| char buffer[256]; | |
| sprintf(buffer, "%s: line %d, column %d", message, line, column); | |
| error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer); | |
| if (!error) | |
| return; | |
| /* add position attribute */ | |
| position = Py_BuildValue("(ii)", line, column); | |
| if (!position) { | |
| Py_DECREF(error); | |
| return; | |
| } | |
| if (PyObject_SetAttrString(error, "position", position) == -1) { | |
| Py_DECREF(error); | |
| Py_DECREF(position); | |
| return; | |
| } | |
| Py_DECREF(position); | |
| PyErr_SetObject(elementtree_parseerror_obj, error); | |
| Py_DECREF(error); | |
| } | |
| /* -------------------------------------------------------------------- */ | |
| /* handlers */ | |
| static void | |
| expat_default_handler(XMLParserObject* self, const XML_Char* data_in, | |
| int data_len) | |
| { | |
| PyObject* key; | |
| PyObject* value; | |
| PyObject* res; | |
| if (data_len < 2 || data_in[0] != '&') | |
| return; | |
| key = makestring(data_in + 1, data_len - 2); | |
| if (!key) | |
| return; | |
| value = PyDict_GetItem(self->entity, key); | |
| if (value) { | |
| if (TreeBuilder_CheckExact(self->target)) | |
| res = treebuilder_handle_data( | |
| (TreeBuilderObject*) self->target, value | |
| ); | |
| else if (self->handle_data) | |
| res = PyObject_CallFunction(self->handle_data, "O", value); | |
| else | |
| res = NULL; | |
| Py_XDECREF(res); | |
| } else if (!PyErr_Occurred()) { | |
| /* Report the first error, not the last */ | |
| char message[128]; | |
| sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key)); | |
| expat_set_error( | |
| message, | |
| EXPAT(GetErrorLineNumber)(self->parser), | |
| EXPAT(GetErrorColumnNumber)(self->parser) | |
| ); | |
| } | |
| Py_DECREF(key); | |
| } | |
| static void | |
| expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, | |
| const XML_Char **attrib_in) | |
| { | |
| PyObject* res; | |
| PyObject* tag; | |
| PyObject* attrib; | |
| int ok; | |
| /* tag name */ | |
| tag = makeuniversal(self, tag_in); | |
| if (!tag) | |
| return; /* parser will look for errors */ | |
| /* attributes */ | |
| if (attrib_in[0]) { | |
| attrib = PyDict_New(); | |
| if (!attrib) | |
| return; | |
| while (attrib_in[0] && attrib_in[1]) { | |
| PyObject* key = makeuniversal(self, attrib_in[0]); | |
| PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1])); | |
| if (!key || !value) { | |
| Py_XDECREF(value); | |
| Py_XDECREF(key); | |
| Py_DECREF(attrib); | |
| return; | |
| } | |
| ok = PyDict_SetItem(attrib, key, value); | |
| Py_DECREF(value); | |
| Py_DECREF(key); | |
| if (ok < 0) { | |
| Py_DECREF(attrib); | |
| return; | |
| } | |
| attrib_in += 2; | |
| } | |
| } else { | |
| Py_INCREF(Py_None); | |
| attrib = Py_None; | |
| } | |
| if (TreeBuilder_CheckExact(self->target)) | |
| /* shortcut */ | |
| res = treebuilder_handle_start((TreeBuilderObject*) self->target, | |
| tag, attrib); | |
| else if (self->handle_start) { | |
| if (attrib == Py_None) { | |
| Py_DECREF(attrib); | |
| attrib = PyDict_New(); | |
| if (!attrib) | |
| return; | |
| } | |
| res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib); | |
| } else | |
| res = NULL; | |
| Py_DECREF(tag); | |
| Py_DECREF(attrib); | |
| Py_XDECREF(res); | |
| } | |
| static void | |
| expat_data_handler(XMLParserObject* self, const XML_Char* data_in, | |
| int data_len) | |
| { | |
| PyObject* data; | |
| PyObject* res; | |
| data = makestring(data_in, data_len); | |
| if (!data) | |
| return; /* parser will look for errors */ | |
| if (TreeBuilder_CheckExact(self->target)) | |
| /* shortcut */ | |
| res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); | |
| else if (self->handle_data) | |
| res = PyObject_CallFunction(self->handle_data, "O", data); | |
| else | |
| res = NULL; | |
| Py_DECREF(data); | |
| Py_XDECREF(res); | |
| } | |
| static void | |
| expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) | |
| { | |
| PyObject* tag; | |
| PyObject* res = NULL; | |
| if (TreeBuilder_CheckExact(self->target)) | |
| /* shortcut */ | |
| /* the standard tree builder doesn't look at the end tag */ | |
| res = treebuilder_handle_end( | |
| (TreeBuilderObject*) self->target, Py_None | |
| ); | |
| else if (self->handle_end) { | |
| tag = makeuniversal(self, tag_in); | |
| if (tag) { | |
| res = PyObject_CallFunction(self->handle_end, "O", tag); | |
| Py_DECREF(tag); | |
| } | |
| } | |
| Py_XDECREF(res); | |
| } | |
| static void | |
| expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix, | |
| const XML_Char *uri) | |
| { | |
| PyObject* sprefix = NULL; | |
| PyObject* suri = NULL; | |
| suri = makestring(uri, strlen(uri)); | |
| if (!suri) | |
| return; | |
| if (prefix) | |
| sprefix = makestring(prefix, strlen(prefix)); | |
| else | |
| sprefix = PyString_FromStringAndSize("", 0); | |
| if (!sprefix) { | |
| Py_DECREF(suri); | |
| return; | |
| } | |
| treebuilder_handle_namespace( | |
| (TreeBuilderObject*) self->target, 1, sprefix, suri | |
| ); | |
| Py_DECREF(sprefix); | |
| Py_DECREF(suri); | |
| } | |
| static void | |
| expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) | |
| { | |
| treebuilder_handle_namespace( | |
| (TreeBuilderObject*) self->target, 0, NULL, NULL | |
| ); | |
| } | |
| static void | |
| expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) | |
| { | |
| PyObject* comment; | |
| PyObject* res; | |
| if (self->handle_comment) { | |
| comment = makestring(comment_in, strlen(comment_in)); | |
| if (comment) { | |
| res = PyObject_CallFunction(self->handle_comment, "O", comment); | |
| Py_XDECREF(res); | |
| Py_DECREF(comment); | |
| } | |
| } | |
| } | |
| static void | |
| expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, | |
| const XML_Char* data_in) | |
| { | |
| PyObject* target; | |
| PyObject* data; | |
| PyObject* res; | |
| if (self->handle_pi) { | |
| target = makestring(target_in, strlen(target_in)); | |
| data = makestring(data_in, strlen(data_in)); | |
| if (target && data) { | |
| res = PyObject_CallFunction(self->handle_pi, "OO", target, data); | |
| Py_XDECREF(res); | |
| Py_DECREF(data); | |
| Py_DECREF(target); | |
| } else { | |
| Py_XDECREF(data); | |
| Py_XDECREF(target); | |
| } | |
| } | |
| } | |
| #if defined(Py_USING_UNICODE) | |
| static int | |
| expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, | |
| XML_Encoding *info) | |
| { | |
| PyObject* u; | |
| Py_UNICODE* p; | |
| unsigned char s[256]; | |
| int i; | |
| memset(info, 0, sizeof(XML_Encoding)); | |
| for (i = 0; i < 256; i++) | |
| s[i] = i; | |
| u = PyUnicode_Decode((char*) s, 256, name, "replace"); | |
| if (!u) | |
| return XML_STATUS_ERROR; | |
| if (PyUnicode_GET_SIZE(u) != 256) { | |
| Py_DECREF(u); | |
| return XML_STATUS_ERROR; | |
| } | |
| p = PyUnicode_AS_UNICODE(u); | |
| for (i = 0; i < 256; i++) { | |
| if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER) | |
| info->map[i] = p[i]; | |
| else | |
| info->map[i] = -1; | |
| } | |
| Py_DECREF(u); | |
| return XML_STATUS_OK; | |
| } | |
| #endif | |
| /* -------------------------------------------------------------------- */ | |
| /* constructor and destructor */ | |
| static PyObject* | |
| xmlparser(PyObject* self_, PyObject* args, PyObject* kw) | |
| { | |
| XMLParserObject* self; | |
| /* FIXME: does this need to be static? */ | |
| static XML_Memory_Handling_Suite memory_handler; | |
| PyObject* target = NULL; | |
| char* encoding = NULL; | |
| static char* kwlist[] = { "target", "encoding", NULL }; | |
| if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist, | |
| &target, &encoding)) | |
| return NULL; | |
| #if defined(USE_PYEXPAT_CAPI) | |
| if (!expat_capi) { | |
| PyErr_SetString( | |
| PyExc_RuntimeError, "cannot load dispatch table from pyexpat" | |
| ); | |
| return NULL; | |
| } | |
| #endif | |
| self = PyObject_New(XMLParserObject, &XMLParser_Type); | |
| if (self == NULL) | |
| return NULL; | |
| self->entity = PyDict_New(); | |
| if (!self->entity) { | |
| PyObject_Del(self); | |
| return NULL; | |
| } | |
| self->names = PyDict_New(); | |
| if (!self->names) { | |
| PyObject_Del(self->entity); | |
| PyObject_Del(self); | |
| return NULL; | |
| } | |
| memory_handler.malloc_fcn = PyObject_Malloc; | |
| memory_handler.realloc_fcn = PyObject_Realloc; | |
| memory_handler.free_fcn = PyObject_Free; | |
| self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}"); | |
| if (!self->parser) { | |
| PyObject_Del(self->names); | |
| PyObject_Del(self->entity); | |
| PyObject_Del(self); | |
| PyErr_NoMemory(); | |
| return NULL; | |
| } | |
| /* setup target handlers */ | |
| if (!target) { | |
| target = treebuilder_new(); | |
| if (!target) { | |
| EXPAT(ParserFree)(self->parser); | |
| PyObject_Del(self->names); | |
| PyObject_Del(self->entity); | |
| PyObject_Del(self); | |
| return NULL; | |
| } | |
| } else | |
| Py_INCREF(target); | |
| self->target = target; | |
| self->handle_xml = PyObject_GetAttrString(target, "xml"); | |
| self->handle_start = PyObject_GetAttrString(target, "start"); | |
| self->handle_data = PyObject_GetAttrString(target, "data"); | |
| self->handle_end = PyObject_GetAttrString(target, "end"); | |
| self->handle_comment = PyObject_GetAttrString(target, "comment"); | |
| self->handle_pi = PyObject_GetAttrString(target, "pi"); | |
| self->handle_close = PyObject_GetAttrString(target, "close"); | |
| PyErr_Clear(); | |
| /* configure parser */ | |
| EXPAT(SetUserData)(self->parser, self); | |
| EXPAT(SetElementHandler)( | |
| self->parser, | |
| (XML_StartElementHandler) expat_start_handler, | |
| (XML_EndElementHandler) expat_end_handler | |
| ); | |
| EXPAT(SetDefaultHandlerExpand)( | |
| self->parser, | |
| (XML_DefaultHandler) expat_default_handler | |
| ); | |
| EXPAT(SetCharacterDataHandler)( | |
| self->parser, | |
| (XML_CharacterDataHandler) expat_data_handler | |
| ); | |
| if (self->handle_comment) | |
| EXPAT(SetCommentHandler)( | |
| self->parser, | |
| (XML_CommentHandler) expat_comment_handler | |
| ); | |
| if (self->handle_pi) | |
| EXPAT(SetProcessingInstructionHandler)( | |
| self->parser, | |
| (XML_ProcessingInstructionHandler) expat_pi_handler | |
| ); | |
| #if defined(Py_USING_UNICODE) | |
| EXPAT(SetUnknownEncodingHandler)( | |
| self->parser, | |
| (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL | |
| ); | |
| #endif | |
| ALLOC(sizeof(XMLParserObject), "create expatparser"); | |
| return (PyObject*) self; | |
| } | |
| static void | |
| xmlparser_dealloc(XMLParserObject* self) | |
| { | |
| EXPAT(ParserFree)(self->parser); | |
| Py_XDECREF(self->handle_close); | |
| Py_XDECREF(self->handle_pi); | |
| Py_XDECREF(self->handle_comment); | |
| Py_XDECREF(self->handle_end); | |
| Py_XDECREF(self->handle_data); | |
| Py_XDECREF(self->handle_start); | |
| Py_XDECREF(self->handle_xml); | |
| Py_DECREF(self->target); | |
| Py_DECREF(self->entity); | |
| Py_DECREF(self->names); | |
| RELEASE(sizeof(XMLParserObject), "destroy expatparser"); | |
| PyObject_Del(self); | |
| } | |
| /* -------------------------------------------------------------------- */ | |
| /* methods (in alphabetical order) */ | |
| LOCAL(PyObject*) | |
| expat_parse(XMLParserObject* self, char* data, int data_len, int final) | |
| { | |
| int ok; | |
| ok = EXPAT(Parse)(self->parser, data, data_len, final); | |
| if (PyErr_Occurred()) | |
| return NULL; | |
| if (!ok) { | |
| expat_set_error( | |
| EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)), | |
| EXPAT(GetErrorLineNumber)(self->parser), | |
| EXPAT(GetErrorColumnNumber)(self->parser) | |
| ); | |
| return NULL; | |
| } | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject* | |
| xmlparser_close(XMLParserObject* self, PyObject* args) | |
| { | |
| /* end feeding data to parser */ | |
| PyObject* res; | |
| if (!PyArg_ParseTuple(args, ":close")) | |
| return NULL; | |
| res = expat_parse(self, "", 0, 1); | |
| if (!res) | |
| return NULL; | |
| if (TreeBuilder_CheckExact(self->target)) { | |
| Py_DECREF(res); | |
| return treebuilder_done((TreeBuilderObject*) self->target); | |
| } if (self->handle_close) { | |
| Py_DECREF(res); | |
| return PyObject_CallFunction(self->handle_close, ""); | |
| } else | |
| return res; | |
| } | |
| static PyObject* | |
| xmlparser_feed(XMLParserObject* self, PyObject* args) | |
| { | |
| /* feed data to parser */ | |
| char* data; | |
| int data_len; | |
| if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len)) | |
| return NULL; | |
| return expat_parse(self, data, data_len, 0); | |
| } | |
| static PyObject* | |
| xmlparser_parse(XMLParserObject* self, PyObject* args) | |
| { | |
| /* (internal) parse until end of input stream */ | |
| PyObject* reader; | |
| PyObject* buffer; | |
| PyObject* res; | |
| PyObject* fileobj; | |
| if (!PyArg_ParseTuple(args, "O:_parse", &fileobj)) | |
| return NULL; | |
| reader = PyObject_GetAttrString(fileobj, "read"); | |
| if (!reader) | |
| return NULL; | |
| /* read from open file object */ | |
| for (;;) { | |
| buffer = PyObject_CallFunction(reader, "i", 64*1024); | |
| if (!buffer) { | |
| /* read failed (e.g. due to KeyboardInterrupt) */ | |
| Py_DECREF(reader); | |
| return NULL; | |
| } | |
| if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) { | |
| Py_DECREF(buffer); | |
| break; | |
| } | |
| res = expat_parse( | |
| self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0 | |
| ); | |
| Py_DECREF(buffer); | |
| if (!res) { | |
| Py_DECREF(reader); | |
| return NULL; | |
| } | |
| Py_DECREF(res); | |
| } | |
| Py_DECREF(reader); | |
| res = expat_parse(self, "", 0, 1); | |
| if (res && TreeBuilder_CheckExact(self->target)) { | |
| Py_DECREF(res); | |
| return treebuilder_done((TreeBuilderObject*) self->target); | |
| } | |
| return res; | |
| } | |
| static PyObject* | |
| xmlparser_setevents(XMLParserObject* self, PyObject* args) | |
| { | |
| /* activate element event reporting */ | |
| Py_ssize_t i; | |
| TreeBuilderObject* target; | |
| PyObject* events; /* event collector */ | |
| PyObject* event_set = Py_None; | |
| if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events, | |
| &event_set)) | |
| return NULL; | |
| if (!TreeBuilder_CheckExact(self->target)) { | |
| PyErr_SetString( | |
| PyExc_TypeError, | |
| "event handling only supported for cElementTree.Treebuilder " | |
| "targets" | |
| ); | |
| return NULL; | |
| } | |
| target = (TreeBuilderObject*) self->target; | |
| Py_INCREF(events); | |
| Py_XDECREF(target->events); | |
| target->events = events; | |
| /* clear out existing events */ | |
| Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL; | |
| Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL; | |
| Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL; | |
| Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL; | |
| if (event_set == Py_None) { | |
| /* default is "end" only */ | |
| target->end_event_obj = PyString_FromString("end"); | |
| Py_RETURN_NONE; | |
| } | |
| if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */ | |
| goto error; | |
| for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) { | |
| PyObject* item = PyTuple_GET_ITEM(event_set, i); | |
| char* event; | |
| if (!PyString_Check(item)) | |
| goto error; | |
| event = PyString_AS_STRING(item); | |
| if (strcmp(event, "start") == 0) { | |
| Py_INCREF(item); | |
| target->start_event_obj = item; | |
| } else if (strcmp(event, "end") == 0) { | |
| Py_INCREF(item); | |
| Py_XDECREF(target->end_event_obj); | |
| target->end_event_obj = item; | |
| } else if (strcmp(event, "start-ns") == 0) { | |
| Py_INCREF(item); | |
| Py_XDECREF(target->start_ns_event_obj); | |
| target->start_ns_event_obj = item; | |
| EXPAT(SetNamespaceDeclHandler)( | |
| self->parser, | |
| (XML_StartNamespaceDeclHandler) expat_start_ns_handler, | |
| (XML_EndNamespaceDeclHandler) expat_end_ns_handler | |
| ); | |
| } else if (strcmp(event, "end-ns") == 0) { | |
| Py_INCREF(item); | |
| Py_XDECREF(target->end_ns_event_obj); | |
| target->end_ns_event_obj = item; | |
| EXPAT(SetNamespaceDeclHandler)( | |
| self->parser, | |
| (XML_StartNamespaceDeclHandler) expat_start_ns_handler, | |
| (XML_EndNamespaceDeclHandler) expat_end_ns_handler | |
| ); | |
| } else { | |
| PyErr_Format( | |
| PyExc_ValueError, | |
| "unknown event '%s'", event | |
| ); | |
| return NULL; | |
| } | |
| } | |
| Py_RETURN_NONE; | |
| error: | |
| PyErr_SetString( | |
| PyExc_TypeError, | |
| "invalid event tuple" | |
| ); | |
| return NULL; | |
| } | |
| static PyMethodDef xmlparser_methods[] = { | |
| {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS}, | |
| {"close", (PyCFunction) xmlparser_close, METH_VARARGS}, | |
| {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS}, | |
| {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS}, | |
| {NULL, NULL} | |
| }; | |
| static PyObject* | |
| xmlparser_getattr(XMLParserObject* self, char* name) | |
| { | |
| PyObject* res; | |
| res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name); | |
| if (res) | |
| return res; | |
| PyErr_Clear(); | |
| if (strcmp(name, "entity") == 0) | |
| res = self->entity; | |
| else if (strcmp(name, "target") == 0) | |
| res = self->target; | |
| else if (strcmp(name, "version") == 0) { | |
| char buffer[100]; | |
| sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION, | |
| XML_MINOR_VERSION, XML_MICRO_VERSION); | |
| return PyString_FromString(buffer); | |
| } else { | |
| PyErr_SetString(PyExc_AttributeError, name); | |
| return NULL; | |
| } | |
| Py_INCREF(res); | |
| return res; | |
| } | |
| statichere PyTypeObject XMLParser_Type = { | |
| PyObject_HEAD_INIT(NULL) | |
| 0, "XMLParser", sizeof(XMLParserObject), 0, | |
| /* methods */ | |
| (destructor)xmlparser_dealloc, /* tp_dealloc */ | |
| 0, /* tp_print */ | |
| (getattrfunc)xmlparser_getattr, /* tp_getattr */ | |
| }; | |
| #endif | |
| /* ==================================================================== */ | |
| /* python module interface */ | |
| static PyMethodDef _functions[] = { | |
| {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS}, | |
| {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS}, | |
| {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS}, | |
| #if defined(USE_EXPAT) | |
| {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, | |
| {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, | |
| #endif | |
| {NULL, NULL} | |
| }; | |
| DL_EXPORT(void) | |
| init_elementtree(void) | |
| { | |
| PyObject* m; | |
| PyObject* g; | |
| char* bootstrap; | |
| /* Patch object type */ | |
| Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type; | |
| #if defined(USE_EXPAT) | |
| Py_TYPE(&XMLParser_Type) = &PyType_Type; | |
| #endif | |
| m = Py_InitModule("_elementtree", _functions); | |
| if (!m) | |
| return; | |
| /* python glue code */ | |
| g = PyDict_New(); | |
| if (!g) | |
| return; | |
| PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins()); | |
| bootstrap = ( | |
| "from copy import copy, deepcopy\n" | |
| "try:\n" | |
| " from xml.etree import ElementTree\n" | |
| "except ImportError:\n" | |
| " import ElementTree\n" | |
| "ET = ElementTree\n" | |
| "del ElementTree\n" | |
| "import _elementtree as cElementTree\n" | |
| "try:\n" /* check if copy works as is */ | |
| " copy(cElementTree.Element('x'))\n" | |
| "except:\n" | |
| " def copyelement(elem):\n" | |
| " return elem\n" | |
| "class CommentProxy:\n" | |
| " def __call__(self, text=None):\n" | |
| " element = cElementTree.Element(ET.Comment)\n" | |
| " element.text = text\n" | |
| " return element\n" | |
| " def __cmp__(self, other):\n" | |
| " return cmp(ET.Comment, other)\n" | |
| "cElementTree.Comment = CommentProxy()\n" | |
| "class ElementTree(ET.ElementTree):\n" /* public */ | |
| " def parse(self, source, parser=None):\n" | |
| " if not hasattr(source, 'read'):\n" | |
| " source = open(source, 'rb')\n" | |
| " if parser is not None:\n" | |
| " while 1:\n" | |
| " data = source.read(65536)\n" | |
| " if not data:\n" | |
| " break\n" | |
| " parser.feed(data)\n" | |
| " self._root = parser.close()\n" | |
| " else:\n" | |
| " parser = cElementTree.XMLParser()\n" | |
| " self._root = parser._parse(source)\n" | |
| " return self._root\n" | |
| "cElementTree.ElementTree = ElementTree\n" | |
| "def iter(node, tag=None):\n" /* helper */ | |
| " if tag == '*':\n" | |
| " tag = None\n" | |
| " if tag is None or node.tag == tag:\n" | |
| " yield node\n" | |
| " for node in node:\n" | |
| " for node in iter(node, tag):\n" | |
| " yield node\n" | |
| "def itertext(node):\n" /* helper */ | |
| " if node.text:\n" | |
| " yield node.text\n" | |
| " for e in node:\n" | |
| " for s in e.itertext():\n" | |
| " yield s\n" | |
| " if e.tail:\n" | |
| " yield e.tail\n" | |
| "def parse(source, parser=None):\n" /* public */ | |
| " tree = ElementTree()\n" | |
| " tree.parse(source, parser)\n" | |
| " return tree\n" | |
| "cElementTree.parse = parse\n" | |
| "class iterparse(object):\n" | |
| " root = None\n" | |
| " def __init__(self, file, events=None):\n" | |
| " if not hasattr(file, 'read'):\n" | |
| " file = open(file, 'rb')\n" | |
| " self._file = file\n" | |
| " self._events = []\n" | |
| " self._index = 0\n" | |
| " self.root = self._root = None\n" | |
| " b = cElementTree.TreeBuilder()\n" | |
| " self._parser = cElementTree.XMLParser(b)\n" | |
| " self._parser._setevents(self._events, events)\n" | |
| " def next(self):\n" | |
| " while 1:\n" | |
| " try:\n" | |
| " item = self._events[self._index]\n" | |
| " except IndexError:\n" | |
| " if self._parser is None:\n" | |
| " self.root = self._root\n" | |
| " raise StopIteration\n" | |
| " # load event buffer\n" | |
| " del self._events[:]\n" | |
| " self._index = 0\n" | |
| " data = self._file.read(16384)\n" | |
| " if data:\n" | |
| " self._parser.feed(data)\n" | |
| " else:\n" | |
| " self._root = self._parser.close()\n" | |
| " self._parser = None\n" | |
| " else:\n" | |
| " self._index = self._index + 1\n" | |
| " return item\n" | |
| " def __iter__(self):\n" | |
| " return self\n" | |
| "cElementTree.iterparse = iterparse\n" | |
| "class PIProxy:\n" | |
| " def __call__(self, target, text=None):\n" | |
| " element = cElementTree.Element(ET.PI)\n" | |
| " element.text = target\n" | |
| " if text:\n" | |
| " element.text = element.text + ' ' + text\n" | |
| " return element\n" | |
| " def __cmp__(self, other):\n" | |
| " return cmp(ET.PI, other)\n" | |
| "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n" | |
| "def XML(text):\n" /* public */ | |
| " parser = cElementTree.XMLParser()\n" | |
| " parser.feed(text)\n" | |
| " return parser.close()\n" | |
| "cElementTree.XML = cElementTree.fromstring = XML\n" | |
| "def XMLID(text):\n" /* public */ | |
| " tree = XML(text)\n" | |
| " ids = {}\n" | |
| " for elem in tree.iter():\n" | |
| " id = elem.get('id')\n" | |
| " if id:\n" | |
| " ids[id] = elem\n" | |
| " return tree, ids\n" | |
| "cElementTree.XMLID = XMLID\n" | |
| "try:\n" | |
| " register_namespace = ET.register_namespace\n" | |
| "except AttributeError:\n" | |
| " def register_namespace(prefix, uri):\n" | |
| " ET._namespace_map[uri] = prefix\n" | |
| "cElementTree.register_namespace = register_namespace\n" | |
| "cElementTree.dump = ET.dump\n" | |
| "cElementTree.ElementPath = ElementPath = ET.ElementPath\n" | |
| "cElementTree.iselement = ET.iselement\n" | |
| "cElementTree.QName = ET.QName\n" | |
| "cElementTree.tostring = ET.tostring\n" | |
| "cElementTree.fromstringlist = ET.fromstringlist\n" | |
| "cElementTree.tostringlist = ET.tostringlist\n" | |
| "cElementTree.VERSION = '" VERSION "'\n" | |
| "cElementTree.__version__ = '" VERSION "'\n" | |
| ); | |
| if (!PyRun_String(bootstrap, Py_file_input, g, NULL)) | |
| return; | |
| elementpath_obj = PyDict_GetItemString(g, "ElementPath"); | |
| elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement"); | |
| if (elementtree_copyelement_obj) { | |
| /* reduce hack needed; enable reduce method */ | |
| PyMethodDef* mp; | |
| for (mp = element_methods; mp->ml_name; mp++) | |
| if (mp->ml_meth == (PyCFunction) element_reduce) { | |
| mp->ml_name = "__reduce__"; | |
| break; | |
| } | |
| } else | |
| PyErr_Clear(); | |
| elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy"); | |
| elementtree_iter_obj = PyDict_GetItemString(g, "iter"); | |
| elementtree_itertext_obj = PyDict_GetItemString(g, "itertext"); | |
| #if defined(USE_PYEXPAT_CAPI) | |
| /* link against pyexpat, if possible */ | |
| expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); | |
| if (expat_capi) { | |
| /* check that it's usable */ | |
| if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 || | |
| expat_capi->size < sizeof(struct PyExpat_CAPI) || | |
| expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION || | |
| expat_capi->MINOR_VERSION != XML_MINOR_VERSION || | |
| expat_capi->MICRO_VERSION != XML_MICRO_VERSION) | |
| expat_capi = NULL; | |
| } | |
| #endif | |
| elementtree_parseerror_obj = PyErr_NewException( | |
| "cElementTree.ParseError", PyExc_SyntaxError, NULL | |
| ); | |
| Py_INCREF(elementtree_parseerror_obj); | |
| PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj); | |
| } |