Internal module _codecs -- Provides access to the codec registry and

the builtin codecs. Written by Marc-Andre Lemburg.

Internal module _codecs -- Provides access to the codec registry and
the builtin codecs. Written by Marc-Andre Lemburg.
e2d67f98 · Guido van Rossum · 5bfc2eb6 · e2d67f98
Kaydet (Commit) e2d67f98 authored Mar 10, 2000 tarafından Guido van Rossum
Show whitespace changes
Inline Side-by-side

Showing with 529 additions and 0 deletions

_codecsmodule.c Modules/_codecsmodule.c +529 -0

No files found.
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
+/* ------------------------------------------------------------------------
+
+   _codecs -- Provides access to the codec registry and the builtin
+              codecs.
+
+   This module should never be imported directly. The standard library
+   module "codecs" wraps this builtin module for use within Python.
+
+   The codec registry is accessible via:
+
+     register(search_function) -> None
+
+     lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
+
+   The builtin Unicode codecs use the following interface:
+
+     <encoding>_encode(Unicode_object[,errors='strict']) -> 
+     	(string object, bytes consumed)
+
+     <encoding>_decode(char_buffer_obj[,errors='strict']) -> 
+        (Unicode object, bytes consumed)
+
+   These <encoding>s are available: utf_8, unicode_escape,
+   raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit)
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+   ------------------------------------------------------------------------ */
+
+#include "Python.h"
+
+/* --- Registry ----------------------------------------------------------- */
+
+static
+PyObject *codecregister(PyObject *self, PyObject *args)
+{
+    PyObject *search_function;
+
+    if (!PyArg_ParseTuple(args, "O:register", &search_function))
+        goto onError;
+
+    if (PyCodec_Register(search_function))
+	goto onError;
+    
+    Py_INCREF(Py_None);
+    return Py_None;
+
+ onError:
+    return NULL;
+}
+
+static
+PyObject *codeclookup(PyObject *self, PyObject *args)
+{
+    char *encoding;
+
+    if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
+        goto onError;
+
+    return _PyCodec_Lookup(encoding);
+
+ onError:
+    return NULL;
+}
+
+/* --- Helpers ------------------------------------------------------------ */
+
+static
+PyObject *codec_tuple(PyObject *unicode,
+		      int len)
+{
+    PyObject *v,*w;
+    
+    if (unicode == NULL)
+	return NULL;
+    v = PyTuple_New(2);
+    if (v == NULL) {
+	Py_DECREF(unicode);
+	return NULL;
+    }
+    PyTuple_SET_ITEM(v,0,unicode);
+    w = PyInt_FromLong(len);
+    if (w == NULL) {
+	Py_DECREF(v);
+	return NULL;
+    }
+    PyTuple_SET_ITEM(v,1,w);
+    return v;
+}
+
+/* --- Decoder ------------------------------------------------------------ */
+
+static PyObject *
+unicode_internal_decode(PyObject *self,
+			PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "s#|z:unicode_internal_decode",
+			  &data, &size, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data, 
+					       size / sizeof(Py_UNICODE)),
+		       size);
+}
+
+static PyObject *
+utf_8_decode(PyObject *self,
+	    PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
+			  &data, &size, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
+		       size);
+}
+
+static PyObject *
+utf_16_decode(PyObject *self,
+	    PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    int byteorder = 0;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
+			  &data, &size, &errors))
+	return NULL;
+    return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
+		       size);
+}
+
+static PyObject *
+utf_16_le_decode(PyObject *self,
+		 PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    int byteorder = -1;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
+			  &data, &size, &errors))
+	return NULL;
+    return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
+		       size);
+}
+
+static PyObject *
+utf_16_be_decode(PyObject *self,
+		 PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    int byteorder = 1;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
+			  &data, &size, &errors))
+	return NULL;
+    return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
+		       size);
+}
+
+/* This non-standard version also provides access to the byteorder
+   parameter of the builtin UTF-16 codec.
+
+   It returns a tuple (unicode, bytesread, byteorder) with byteorder
+   being the value in effect at the end of data.
+
+*/
+
+static PyObject *
+utf_16_ex_decode(PyObject *self,
+		 PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    int byteorder = 0;
+    PyObject *unicode, *tuple;
+    
+    if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
+			  &data, &size, &errors, &byteorder))
+	return NULL;
+
+    unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
+    if (unicode == NULL)
+	return NULL;
+    tuple = Py_BuildValue("Oii", unicode, size, byteorder);
+    Py_DECREF(unicode);
+    return tuple;
+}
+
+static PyObject *
+unicode_escape_decode(PyObject *self,
+		     PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
+			  &data, &size, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
+		       size);
+}
+
+static PyObject *
+raw_unicode_escape_decode(PyObject *self,
+			PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
+			  &data, &size, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
+		       size);
+}
+
+static PyObject *
+latin_1_decode(PyObject *self,
+	       PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
+			  &data, &size, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
+		       size);
+}
+
+static PyObject *
+ascii_decode(PyObject *self,
+	     PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
+			  &data, &size, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
+		       size);
+}
+
+static PyObject *
+charmap_decode(PyObject *self,
+	       PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    PyObject *mapping = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
+			  &data, &size, &errors, &mapping))
+	return NULL;
+    if (mapping == Py_None)
+	mapping = NULL;
+
+    return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
+		       size);
+}
+
+/* --- Encoder ------------------------------------------------------------ */
+
+static PyObject *
+readbuffer_encode(PyObject *self,
+		  PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
+			  &data, &size, &errors))
+	return NULL;
+
+    return codec_tuple(PyString_FromStringAndSize(data, size),
+		       size);
+}
+
+static PyObject *
+charbuffer_encode(PyObject *self,
+		  PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
+			  &data, &size, &errors))
+	return NULL;
+
+    return codec_tuple(PyString_FromStringAndSize(data, size),
+		       size);
+}
+
+static PyObject *
+utf_8_encode(PyObject *self,
+	    PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:utf_8_encode",
+			  &str, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
+					    PyUnicode_GET_SIZE(str),
+					    errors),
+		       PyUnicode_GET_SIZE(str));
+}
+
+/* This version provides access to the byteorder parameter of the
+   builtin UTF-16 codecs as optional third argument. It defaults to 0
+   which means: use the native byte order and prepend the data with a
+   BOM mark.  
+
+*/
+
+static PyObject *
+utf_16_encode(PyObject *self,
+	    PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+    int byteorder = 0;
+
+    if (!PyArg_ParseTuple(args, "U|zi:utf_16_encode",
+			  &str, &errors, &byteorder))
+	return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
+					     PyUnicode_GET_SIZE(str),
+					     errors,
+					     byteorder),
+		       PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+utf_16_le_encode(PyObject *self,
+		 PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|zi:utf_16_le_encode",
+			  &str, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
+					     PyUnicode_GET_SIZE(str),
+					     errors,
+					     -1),
+		       PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+utf_16_be_encode(PyObject *self,
+		 PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|zi:utf_16_be_encode",
+			  &str, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
+					     PyUnicode_GET_SIZE(str),
+					     errors,
+					     +1),
+		       PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+unicode_escape_encode(PyObject *self,
+		     PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:unicode_escape_encode",
+			  &str, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUnicodeEscape(
+			       PyUnicode_AS_UNICODE(str), 
+			       PyUnicode_GET_SIZE(str)),
+		       PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+raw_unicode_escape_encode(PyObject *self,
+			PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:raw_unicode_escape_encode",
+			  &str, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
+			       PyUnicode_AS_UNICODE(str), 
+			       PyUnicode_GET_SIZE(str)),
+		       PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+latin_1_encode(PyObject *self,
+	       PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:latin_1_encode",
+			  &str, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_EncodeLatin1(
+			       PyUnicode_AS_UNICODE(str), 
+			       PyUnicode_GET_SIZE(str),
+			       errors),
+		       PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+ascii_encode(PyObject *self,
+	     PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:ascii_encode",
+			  &str, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_EncodeASCII(
+			       PyUnicode_AS_UNICODE(str), 
+			       PyUnicode_GET_SIZE(str),
+			       errors),
+		       PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+charmap_encode(PyObject *self,
+	     PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+    PyObject *mapping = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|zO:charmap_encode",
+			  &str, &errors, &mapping))
+	return NULL;
+    if (mapping == Py_None)
+	mapping = NULL;
+
+    return codec_tuple(PyUnicode_EncodeCharmap(
+			       PyUnicode_AS_UNICODE(str), 
+			       PyUnicode_GET_SIZE(str),
+			       mapping, 
+			       errors),
+		       PyUnicode_GET_SIZE(str));
+}
+
+/* --- Module API --------------------------------------------------------- */
+
+static PyMethodDef _codecs_functions[] = {
+    {"register",		codecregister,			1},
+    {"lookup",			codeclookup, 			1},
+    {"utf_8_encode",		utf_8_encode,			1},
+    {"utf_8_decode",		utf_8_decode,			1},
+    {"utf_16_encode",		utf_16_encode,			1},
+    {"utf_16_le_encode",	utf_16_le_encode,		1},
+    {"utf_16_be_encode",	utf_16_be_encode,		1},
+    {"utf_16_decode",		utf_16_decode,			1},
+    {"utf_16_le_decode",	utf_16_le_decode,		1},
+    {"utf_16_be_decode",	utf_16_be_decode,		1},
+    {"utf_16_ex_decode",	utf_16_ex_decode,		1},
+    {"unicode_escape_encode",	unicode_escape_encode,		1},
+    {"unicode_escape_decode",	unicode_escape_decode,		1},
+    {"unicode_internal_encode",	readbuffer_encode,		1},
+    {"unicode_internal_decode",	unicode_internal_decode,	1},
+    {"raw_unicode_escape_encode", raw_unicode_escape_encode,	1},
+    {"raw_unicode_escape_decode", raw_unicode_escape_decode,	1},
+    {"latin_1_encode", 		latin_1_encode,			1},
+    {"latin_1_decode", 		latin_1_decode,			1},
+    {"ascii_encode", 		ascii_encode,			1},
+    {"ascii_decode", 		ascii_decode,			1},
+    {"charmap_encode", 		charmap_encode,			1},
+    {"charmap_decode", 		charmap_decode,			1},
+    {"readbuffer_encode",	readbuffer_encode,		1},
+    {"charbuffer_encode",	charbuffer_encode,		1},
+    {NULL, NULL}		/* sentinel */
+};
+
+DL_EXPORT(void)
+init_codecs()
+{
+    Py_InitModule("_codecs", _codecs_functions);
+}