| /* ------------------------------------------------------------------------ | |
| _codecs -- Provides access to the codec registry and the builtin | |
| codecs. | |
| This module should never be imported directly. The standard library | |
| module "codecs" wraps this builtin module for use within Python. | |
| The codec registry is accessible via: | |
| register(search_function) -> None | |
| lookup(encoding) -> CodecInfo object | |
| The builtin Unicode codecs use the following interface: | |
| <encoding>_encode(Unicode_object[,errors='strict']) -> | |
| (string object, bytes consumed) | |
| <encoding>_decode(char_buffer_obj[,errors='strict']) -> | |
| (Unicode object, bytes consumed) | |
| <encoding>_encode() interfaces also accept non-Unicode object as | |
| input. The objects are then converted to Unicode using | |
| PyUnicode_FromObject() prior to applying the conversion. | |
| These <encoding>s are available: utf_8, unicode_escape, | |
| raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit), | |
| mbcs (on win32). | |
| Written by Marc-Andre Lemburg (mal@lemburg.com). | |
| Copyright (c) Corporation for National Research Initiatives. | |
| ------------------------------------------------------------------------ */ | |
| #define PY_SSIZE_T_CLEAN | |
| #include "Python.h" | |
| /* --- Registry ----------------------------------------------------------- */ | |
| PyDoc_STRVAR(register__doc__, | |
| "register(search_function)\n\ | |
| \n\ | |
| Register a codec search function. Search functions are expected to take\n\ | |
| one argument, the encoding name in all lower case letters, and return\n\ | |
| a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\ | |
| (or a CodecInfo object)."); | |
| static | |
| PyObject *codec_register(PyObject *self, PyObject *search_function) | |
| { | |
| if (PyCodec_Register(search_function)) | |
| return NULL; | |
| Py_RETURN_NONE; | |
| } | |
| PyDoc_STRVAR(lookup__doc__, | |
| "lookup(encoding) -> CodecInfo\n\ | |
| \n\ | |
| Looks up a codec tuple in the Python codec registry and returns\n\ | |
| a CodecInfo object."); | |
| static | |
| PyObject *codec_lookup(PyObject *self, PyObject *args) | |
| { | |
| char *encoding; | |
| if (!PyArg_ParseTuple(args, "s:lookup", &encoding)) | |
| return NULL; | |
| return _PyCodec_Lookup(encoding); | |
| } | |
| PyDoc_STRVAR(encode__doc__, | |
| "encode(obj, [encoding[,errors]]) -> object\n\ | |
| \n\ | |
| Encodes obj using the codec registered for encoding. encoding defaults\n\ | |
| to the default encoding. errors may be given to set a different error\n\ | |
| handling scheme. Default is 'strict' meaning that encoding errors raise\n\ | |
| a ValueError. Other possible values are 'ignore', 'replace' and\n\ | |
| 'xmlcharrefreplace' as well as any other name registered with\n\ | |
| codecs.register_error that can handle ValueErrors."); | |
| static PyObject * | |
| codec_encode(PyObject *self, PyObject *args) | |
| { | |
| const char *encoding = NULL; | |
| const char *errors = NULL; | |
| PyObject *v; | |
| if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors)) | |
| return NULL; | |
| #ifdef Py_USING_UNICODE | |
| if (encoding == NULL) | |
| encoding = PyUnicode_GetDefaultEncoding(); | |
| #else | |
| if (encoding == NULL) { | |
| PyErr_SetString(PyExc_ValueError, "no encoding specified"); | |
| return NULL; | |
| } | |
| #endif | |
| /* Encode via the codec registry */ | |
| return PyCodec_Encode(v, encoding, errors); | |
| } | |
| PyDoc_STRVAR(decode__doc__, | |
| "decode(obj, [encoding[,errors]]) -> object\n\ | |
| \n\ | |
| Decodes obj using the codec registered for encoding. encoding defaults\n\ | |
| to the default encoding. errors may be given to set a different error\n\ | |
| handling scheme. Default is 'strict' meaning that encoding errors raise\n\ | |
| a ValueError. Other possible values are 'ignore' and 'replace'\n\ | |
| as well as any other name registered with codecs.register_error that is\n\ | |
| able to handle ValueErrors."); | |
| static PyObject * | |
| codec_decode(PyObject *self, PyObject *args) | |
| { | |
| const char *encoding = NULL; | |
| const char *errors = NULL; | |
| PyObject *v; | |
| if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors)) | |
| return NULL; | |
| #ifdef Py_USING_UNICODE | |
| if (encoding == NULL) | |
| encoding = PyUnicode_GetDefaultEncoding(); | |
| #else | |
| if (encoding == NULL) { | |
| PyErr_SetString(PyExc_ValueError, "no encoding specified"); | |
| return NULL; | |
| } | |
| #endif | |
| /* Decode via the codec registry */ | |
| return PyCodec_Decode(v, encoding, errors); | |
| } | |
| /* --- Helpers ------------------------------------------------------------ */ | |
| static | |
| PyObject *codec_tuple(PyObject *unicode, | |
| Py_ssize_t len) | |
| { | |
| PyObject *v; | |
| if (unicode == NULL) | |
| return NULL; | |
| v = Py_BuildValue("On", unicode, len); | |
| Py_DECREF(unicode); | |
| return v; | |
| } | |
| /* --- String codecs ------------------------------------------------------ */ | |
| static PyObject * | |
| escape_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| const char *errors = NULL; | |
| const char *data; | |
| Py_ssize_t size; | |
| if (!PyArg_ParseTuple(args, "s#|z:escape_decode", | |
| &data, &size, &errors)) | |
| return NULL; | |
| return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL), | |
| size); | |
| } | |
| static PyObject * | |
| escape_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str; | |
| const char *errors = NULL; | |
| char *buf; | |
| Py_ssize_t consumed, len; | |
| if (!PyArg_ParseTuple(args, "S|z:escape_encode", | |
| &str, &errors)) | |
| return NULL; | |
| consumed = PyString_GET_SIZE(str); | |
| str = PyString_Repr(str, 0); | |
| if (!str) | |
| return NULL; | |
| /* The string will be quoted. Unquote, similar to unicode-escape. */ | |
| buf = PyString_AS_STRING (str); | |
| len = PyString_GET_SIZE (str); | |
| memmove(buf, buf+1, len-2); | |
| if (_PyString_Resize(&str, len-2) < 0) | |
| return NULL; | |
| return codec_tuple(str, consumed); | |
| } | |
| #ifdef Py_USING_UNICODE | |
| /* --- Decoder ------------------------------------------------------------ */ | |
| static PyObject * | |
| unicode_internal_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *obj; | |
| const char *errors = NULL; | |
| const char *data; | |
| Py_ssize_t size; | |
| if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode", | |
| &obj, &errors)) | |
| return NULL; | |
| if (PyUnicode_Check(obj)) { | |
| Py_INCREF(obj); | |
| return codec_tuple(obj, PyUnicode_GET_SIZE(obj)); | |
| } | |
| else { | |
| if (PyObject_AsReadBuffer(obj, (const void **)&data, &size)) | |
| return NULL; | |
| return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors), | |
| size); | |
| } | |
| } | |
| static PyObject * | |
| utf_7_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| PyObject *decoded = NULL; | |
| if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode", | |
| &pbuf, &errors, &final)) | |
| return NULL; | |
| consumed = pbuf.len; | |
| decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors, | |
| final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (decoded == NULL) | |
| return NULL; | |
| return codec_tuple(decoded, consumed); | |
| } | |
| static PyObject * | |
| utf_8_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| PyObject *decoded = NULL; | |
| if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode", | |
| &pbuf, &errors, &final)) | |
| return NULL; | |
| consumed = pbuf.len; | |
| decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors, | |
| final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (decoded == NULL) | |
| return NULL; | |
| return codec_tuple(decoded, consumed); | |
| } | |
| static PyObject * | |
| utf_16_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int byteorder = 0; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| PyObject *decoded; | |
| if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode", | |
| &pbuf, &errors, &final)) | |
| return NULL; | |
| consumed = pbuf.len; /* This is overwritten unless final is true. */ | |
| decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, | |
| &byteorder, final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (decoded == NULL) | |
| return NULL; | |
| return codec_tuple(decoded, consumed); | |
| } | |
| static PyObject * | |
| utf_16_le_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int byteorder = -1; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| PyObject *decoded = NULL; | |
| if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode", | |
| &pbuf, &errors, &final)) | |
| return NULL; | |
| consumed = pbuf.len; /* This is overwritten unless final is true. */ | |
| decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, | |
| &byteorder, final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (decoded == NULL) | |
| return NULL; | |
| return codec_tuple(decoded, consumed); | |
| } | |
| static PyObject * | |
| utf_16_be_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int byteorder = 1; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| PyObject *decoded = NULL; | |
| if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode", | |
| &pbuf, &errors, &final)) | |
| return NULL; | |
| consumed = pbuf.len; /* This is overwritten unless final is true. */ | |
| decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, | |
| &byteorder, final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (decoded == NULL) | |
| return NULL; | |
| return codec_tuple(decoded, consumed); | |
| } | |
| /* This non-standard version also provides access to the byteorder | |
| parameter of the builtin UTF-16 codec. | |
| It returns a tuple (unicode, bytesread, byteorder) with byteorder | |
| being the value in effect at the end of data. | |
| */ | |
| static PyObject * | |
| utf_16_ex_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int byteorder = 0; | |
| PyObject *unicode, *tuple; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode", | |
| &pbuf, &errors, &byteorder, &final)) | |
| return NULL; | |
| consumed = pbuf.len; /* This is overwritten unless final is true. */ | |
| unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, | |
| &byteorder, final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (unicode == NULL) | |
| return NULL; | |
| tuple = Py_BuildValue("Oni", unicode, consumed, byteorder); | |
| Py_DECREF(unicode); | |
| return tuple; | |
| } | |
| static PyObject * | |
| utf_32_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int byteorder = 0; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| PyObject *decoded; | |
| if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode", | |
| &pbuf, &errors, &final)) | |
| return NULL; | |
| consumed = pbuf.len; /* This is overwritten unless final is true. */ | |
| decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, | |
| &byteorder, final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (decoded == NULL) | |
| return NULL; | |
| return codec_tuple(decoded, consumed); | |
| } | |
| static PyObject * | |
| utf_32_le_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int byteorder = -1; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| PyObject *decoded; | |
| if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode", | |
| &pbuf, &errors, &final)) | |
| return NULL; | |
| consumed = pbuf.len; /* This is overwritten unless final is true. */ | |
| decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, | |
| &byteorder, final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (decoded == NULL) | |
| return NULL; | |
| return codec_tuple(decoded, consumed); | |
| } | |
| static PyObject * | |
| utf_32_be_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int byteorder = 1; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| PyObject *decoded; | |
| if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode", | |
| &pbuf, &errors, &final)) | |
| return NULL; | |
| consumed = pbuf.len; /* This is overwritten unless final is true. */ | |
| decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, | |
| &byteorder, final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (decoded == NULL) | |
| return NULL; | |
| return codec_tuple(decoded, consumed); | |
| } | |
| /* This non-standard version also provides access to the byteorder | |
| parameter of the builtin UTF-32 codec. | |
| It returns a tuple (unicode, bytesread, byteorder) with byteorder | |
| being the value in effect at the end of data. | |
| */ | |
| static PyObject * | |
| utf_32_ex_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int byteorder = 0; | |
| PyObject *unicode, *tuple; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode", | |
| &pbuf, &errors, &byteorder, &final)) | |
| return NULL; | |
| consumed = pbuf.len; /* This is overwritten unless final is true. */ | |
| unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, | |
| &byteorder, final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (unicode == NULL) | |
| return NULL; | |
| tuple = Py_BuildValue("Oni", unicode, consumed, byteorder); | |
| Py_DECREF(unicode); | |
| return tuple; | |
| } | |
| static PyObject * | |
| unicode_escape_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| PyObject *unicode; | |
| if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode", | |
| &pbuf, &errors)) | |
| return NULL; | |
| unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors); | |
| PyBuffer_Release(&pbuf); | |
| return codec_tuple(unicode, pbuf.len); | |
| } | |
| static PyObject * | |
| raw_unicode_escape_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| PyObject *unicode; | |
| if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode", | |
| &pbuf, &errors)) | |
| return NULL; | |
| unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors); | |
| PyBuffer_Release(&pbuf); | |
| return codec_tuple(unicode, pbuf.len); | |
| } | |
| static PyObject * | |
| latin_1_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| PyObject *unicode; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode", | |
| &pbuf, &errors)) | |
| return NULL; | |
| unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors); | |
| PyBuffer_Release(&pbuf); | |
| return codec_tuple(unicode, pbuf.len); | |
| } | |
| static PyObject * | |
| ascii_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| PyObject *unicode; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "s*|z:ascii_decode", | |
| &pbuf, &errors)) | |
| return NULL; | |
| unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors); | |
| PyBuffer_Release(&pbuf); | |
| return codec_tuple(unicode, pbuf.len); | |
| } | |
| static PyObject * | |
| charmap_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| PyObject *unicode; | |
| const char *errors = NULL; | |
| PyObject *mapping = NULL; | |
| if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode", | |
| &pbuf, &errors, &mapping)) | |
| return NULL; | |
| if (mapping == Py_None) | |
| mapping = NULL; | |
| unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors); | |
| PyBuffer_Release(&pbuf); | |
| return codec_tuple(unicode, pbuf.len); | |
| } | |
| #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) | |
| static PyObject * | |
| mbcs_decode(PyObject *self, | |
| PyObject *args) | |
| { | |
| Py_buffer pbuf; | |
| const char *errors = NULL; | |
| int final = 0; | |
| Py_ssize_t consumed; | |
| PyObject *decoded = NULL; | |
| if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode", | |
| &pbuf, &errors, &final)) | |
| return NULL; | |
| consumed = pbuf.len; | |
| decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors, | |
| final ? NULL : &consumed); | |
| PyBuffer_Release(&pbuf); | |
| if (decoded == NULL) | |
| return NULL; | |
| return codec_tuple(decoded, consumed); | |
| } | |
| #endif /* MS_WINDOWS */ | |
| /* --- Encoder ------------------------------------------------------------ */ | |
| static PyObject * | |
| readbuffer_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| const char *data; | |
| Py_ssize_t size; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode", | |
| &data, &size, &errors)) | |
| return NULL; | |
| return codec_tuple(PyString_FromStringAndSize(data, size), | |
| size); | |
| } | |
| static PyObject * | |
| charbuffer_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| const char *data; | |
| Py_ssize_t size; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode", | |
| &data, &size, &errors)) | |
| return NULL; | |
| return codec_tuple(PyString_FromStringAndSize(data, size), | |
| size); | |
| } | |
| static PyObject * | |
| unicode_internal_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *obj; | |
| const char *errors = NULL; | |
| const char *data; | |
| Py_ssize_t size; | |
| if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode", | |
| &obj, &errors)) | |
| return NULL; | |
| if (PyUnicode_Check(obj)) { | |
| data = PyUnicode_AS_DATA(obj); | |
| size = PyUnicode_GET_DATA_SIZE(obj); | |
| return codec_tuple(PyString_FromStringAndSize(data, size), | |
| PyUnicode_GET_SIZE(obj)); | |
| } | |
| else { | |
| if (PyObject_AsReadBuffer(obj, (const void **)&data, &size)) | |
| return NULL; | |
| return codec_tuple(PyString_FromStringAndSize(data, size), | |
| size); | |
| } | |
| } | |
| static PyObject * | |
| utf_7_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:utf_7_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| 0, | |
| 0, | |
| errors), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| utf_8_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:utf_8_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| /* This version provides access to the byteorder parameter of the | |
| builtin UTF-16 codecs as optional third argument. It defaults to 0 | |
| which means: use the native byte order and prepend the data with a | |
| BOM mark. | |
| */ | |
| static PyObject * | |
| utf_16_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| int byteorder = 0; | |
| if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode", | |
| &str, &errors, &byteorder)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors, | |
| byteorder), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| utf_16_le_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors, | |
| -1), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| utf_16_be_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors, | |
| +1), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| /* This version provides access to the byteorder parameter of the | |
| builtin UTF-32 codecs as optional third argument. It defaults to 0 | |
| which means: use the native byte order and prepend the data with a | |
| BOM mark. | |
| */ | |
| static PyObject * | |
| utf_32_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| int byteorder = 0; | |
| if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode", | |
| &str, &errors, &byteorder)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors, | |
| byteorder), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| utf_32_le_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors, | |
| -1), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| utf_32_be_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors, | |
| +1), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| unicode_escape_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str)), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| raw_unicode_escape_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape( | |
| PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str)), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| latin_1_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:latin_1_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeLatin1( | |
| PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| ascii_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:ascii_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeASCII( | |
| PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject * | |
| charmap_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| PyObject *mapping = NULL; | |
| if (!PyArg_ParseTuple(args, "O|zO:charmap_encode", | |
| &str, &errors, &mapping)) | |
| return NULL; | |
| if (mapping == Py_None) | |
| mapping = NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeCharmap( | |
| PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| mapping, | |
| errors), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| static PyObject* | |
| charmap_build(PyObject *self, PyObject *args) | |
| { | |
| PyObject *map; | |
| if (!PyArg_ParseTuple(args, "U:charmap_build", &map)) | |
| return NULL; | |
| return PyUnicode_BuildEncodingMap(map); | |
| } | |
| #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) | |
| static PyObject * | |
| mbcs_encode(PyObject *self, | |
| PyObject *args) | |
| { | |
| PyObject *str, *v; | |
| const char *errors = NULL; | |
| if (!PyArg_ParseTuple(args, "O|z:mbcs_encode", | |
| &str, &errors)) | |
| return NULL; | |
| str = PyUnicode_FromObject(str); | |
| if (str == NULL) | |
| return NULL; | |
| v = codec_tuple(PyUnicode_EncodeMBCS( | |
| PyUnicode_AS_UNICODE(str), | |
| PyUnicode_GET_SIZE(str), | |
| errors), | |
| PyUnicode_GET_SIZE(str)); | |
| Py_DECREF(str); | |
| return v; | |
| } | |
| #endif /* MS_WINDOWS */ | |
| #endif /* Py_USING_UNICODE */ | |
| /* --- Error handler registry --------------------------------------------- */ | |
| PyDoc_STRVAR(register_error__doc__, | |
| "register_error(errors, handler)\n\ | |
| \n\ | |
| Register the specified error handler under the name\n\ | |
| errors. handler must be a callable object, that\n\ | |
| will be called with an exception instance containing\n\ | |
| information about the location of the encoding/decoding\n\ | |
| error and must return a (replacement, new position) tuple."); | |
| static PyObject *register_error(PyObject *self, PyObject *args) | |
| { | |
| const char *name; | |
| PyObject *handler; | |
| if (!PyArg_ParseTuple(args, "sO:register_error", | |
| &name, &handler)) | |
| return NULL; | |
| if (PyCodec_RegisterError(name, handler)) | |
| return NULL; | |
| Py_RETURN_NONE; | |
| } | |
| PyDoc_STRVAR(lookup_error__doc__, | |
| "lookup_error(errors) -> handler\n\ | |
| \n\ | |
| Return the error handler for the specified error handling name\n\ | |
| or raise a LookupError, if no handler exists under this name."); | |
| static PyObject *lookup_error(PyObject *self, PyObject *args) | |
| { | |
| const char *name; | |
| if (!PyArg_ParseTuple(args, "s:lookup_error", | |
| &name)) | |
| return NULL; | |
| return PyCodec_LookupError(name); | |
| } | |
| /* --- Module API --------------------------------------------------------- */ | |
| static PyMethodDef _codecs_functions[] = { | |
| {"register", codec_register, METH_O, | |
| register__doc__}, | |
| {"lookup", codec_lookup, METH_VARARGS, | |
| lookup__doc__}, | |
| {"encode", codec_encode, METH_VARARGS, | |
| encode__doc__}, | |
| {"decode", codec_decode, METH_VARARGS, | |
| decode__doc__}, | |
| {"escape_encode", escape_encode, METH_VARARGS}, | |
| {"escape_decode", escape_decode, METH_VARARGS}, | |
| #ifdef Py_USING_UNICODE | |
| {"utf_8_encode", utf_8_encode, METH_VARARGS}, | |
| {"utf_8_decode", utf_8_decode, METH_VARARGS}, | |
| {"utf_7_encode", utf_7_encode, METH_VARARGS}, | |
| {"utf_7_decode", utf_7_decode, METH_VARARGS}, | |
| {"utf_16_encode", utf_16_encode, METH_VARARGS}, | |
| {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS}, | |
| {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS}, | |
| {"utf_16_decode", utf_16_decode, METH_VARARGS}, | |
| {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS}, | |
| {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS}, | |
| {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS}, | |
| {"utf_32_encode", utf_32_encode, METH_VARARGS}, | |
| {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS}, | |
| {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS}, | |
| {"utf_32_decode", utf_32_decode, METH_VARARGS}, | |
| {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS}, | |
| {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS}, | |
| {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS}, | |
| {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS}, | |
| {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS}, | |
| {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS}, | |
| {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS}, | |
| {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS}, | |
| {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS}, | |
| {"latin_1_encode", latin_1_encode, METH_VARARGS}, | |
| {"latin_1_decode", latin_1_decode, METH_VARARGS}, | |
| {"ascii_encode", ascii_encode, METH_VARARGS}, | |
| {"ascii_decode", ascii_decode, METH_VARARGS}, | |
| {"charmap_encode", charmap_encode, METH_VARARGS}, | |
| {"charmap_decode", charmap_decode, METH_VARARGS}, | |
| {"charmap_build", charmap_build, METH_VARARGS}, | |
| {"readbuffer_encode", readbuffer_encode, METH_VARARGS}, | |
| {"charbuffer_encode", charbuffer_encode, METH_VARARGS}, | |
| #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) | |
| {"mbcs_encode", mbcs_encode, METH_VARARGS}, | |
| {"mbcs_decode", mbcs_decode, METH_VARARGS}, | |
| #endif | |
| #endif /* Py_USING_UNICODE */ | |
| {"register_error", register_error, METH_VARARGS, | |
| register_error__doc__}, | |
| {"lookup_error", lookup_error, METH_VARARGS, | |
| lookup_error__doc__}, | |
| {NULL, NULL} /* sentinel */ | |
| }; | |
| PyMODINIT_FUNC | |
| init_codecs(void) | |
| { | |
| Py_InitModule("_codecs", _codecs_functions); | |
| } |