codecs.c 20.3 KB
Newer Older
1 2 3 4 5 6
/* ------------------------------------------------------------------------

   Python Codec Registry and support functions

Written by Marc-Andre Lemburg (mal@lemburg.com).

7
Copyright (c) Corporation for National Research Initiatives.
8 9 10 11 12 13 14 15 16

   ------------------------------------------------------------------------ */

#include "Python.h"
#include <ctype.h>

/* --- Codec Registry ----------------------------------------------------- */

/* Import the standard encodings package which will register the first
17
   codec search function.
18 19 20 21

   This is done in a lazy way so that the Unicode implementation does
   not downgrade startup time of scripts not needing it.

22 23
   ImportErrors are silently ignored by this function. Only one try is
   made.
24 25 26

*/

27
static int _PyCodecRegistry_Init(void); /* Forward */
28 29 30

int PyCodec_Register(PyObject *search_function)
{
31
    PyInterpreterState *interp = PyThreadState_GET()->interp;
32 33
    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
	goto onError;
34 35
    if (search_function == NULL) {
	PyErr_BadArgument();
36
	goto onError;
37 38
    }
    if (!PyCallable_Check(search_function)) {
39
	PyErr_SetString(PyExc_TypeError, "argument must be callable");
40
	goto onError;
41
    }
42
    return PyList_Append(interp->codec_search_path, search_function);
43 44 45

 onError:
    return -1;
46 47
}

48 49 50
/* Convert a string to a normalized Python string: all characters are
   converted to lower case, spaces are replaced with underscores. */

51
static
52
PyObject *normalizestring(const char *string)
53
{
54
    register size_t i;
55
    size_t len = strlen(string);
56 57
    char *p;
    PyObject *v;
58

59 60 61 62
    if (len > PY_SSIZE_T_MAX) {
	PyErr_SetString(PyExc_OverflowError, "string is too large");
	return NULL;
    }
63 64 65 66

    p = PyMem_Malloc(len + 1);
    if (p == NULL)
        return NULL;
67 68 69 70 71
    for (i = 0; i < len; i++) {
        register char ch = string[i];
        if (ch == ' ')
            ch = '-';
        else
72
            ch = tolower(Py_CHARMASK(ch));
73 74
	p[i] = ch;
    }
75 76 77 78 79
    p[i] = '\0';
    v = PyUnicode_FromString(p);
    if (v == NULL)
        return NULL;
    PyMem_Free(p);
80 81 82 83 84 85 86 87 88 89
    return v;
}

/* Lookup the given encoding and return a tuple providing the codec
   facilities.

   The encoding string is looked up converted to all lower-case
   characters. This makes encodings looked up through this mechanism
   effectively case-insensitive.

90
   If no codec is found, a LookupError is set and NULL returned.
91 92 93 94 95 96

   As side effect, this tries to load the encodings package, if not
   yet done. This is part of the lazy load strategy for the encodings
   package.

*/
97 98 99

PyObject *_PyCodec_Lookup(const char *encoding)
{
100
    PyInterpreterState *interp;
Guido van Rossum's avatar
Guido van Rossum committed
101
    PyObject *result, *args = NULL, *v;
102
    Py_ssize_t i, len;
103

104 105 106 107
    if (encoding == NULL) {
	PyErr_BadArgument();
	goto onError;
    }
108

109
    interp = PyThreadState_GET()->interp;
110
    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
111
	goto onError;
112

113
    /* Convert the encoding to a normalized Python string: all
114
       characters are converted to lower case, spaces and hyphens are
115 116
       replaced with underscores. */
    v = normalizestring(encoding);
117 118
    if (v == NULL)
	goto onError;
119
    PyUnicode_InternInPlace(&v);
120 121

    /* First, try to lookup the name in the registry dictionary */
122
    result = PyDict_GetItem(interp->codec_search_cache, v);
123 124
    if (result != NULL) {
	Py_INCREF(result);
125
	Py_DECREF(v);
126 127
	return result;
    }
128

129 130 131 132 133
    /* Next, scan the search functions in order of registration */
    args = PyTuple_New(1);
    if (args == NULL)
	goto onError;
    PyTuple_SET_ITEM(args,0,v);
Guido van Rossum's avatar
Guido van Rossum committed
134

135
    len = PyList_Size(interp->codec_search_path);
Guido van Rossum's avatar
Guido van Rossum committed
136 137
    if (len < 0)
	goto onError;
138 139 140 141 142 143
    if (len == 0) {
	PyErr_SetString(PyExc_LookupError,
			"no codec search functions registered: "
			"can't find encoding");
	goto onError;
    }
144 145 146

    for (i = 0; i < len; i++) {
	PyObject *func;
147

148
	func = PyList_GetItem(interp->codec_search_path, i);
149 150
	if (func == NULL)
	    goto onError;
Guido van Rossum's avatar
Guido van Rossum committed
151
	result = PyEval_CallObject(func, args);
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
	if (result == NULL)
	    goto onError;
	if (result == Py_None) {
	    Py_DECREF(result);
	    continue;
	}
	if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
	    PyErr_SetString(PyExc_TypeError,
			    "codec search functions must return 4-tuples");
	    Py_DECREF(result);
	    goto onError;
	}
	break;
    }
    if (i == len) {
	/* XXX Perhaps we should cache misses too ? */
168 169
	PyErr_Format(PyExc_LookupError,
                     "unknown encoding: %s", encoding);
170 171 172 173
	goto onError;
    }

    /* Cache and return the result */
Neal Norwitz's avatar
Neal Norwitz committed
174 175 176 177
    if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
	Py_DECREF(result);
	goto onError;
    }
178 179 180 181 182 183 184 185
    Py_DECREF(args);
    return result;

 onError:
    Py_XDECREF(args);
    return NULL;
}

186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
/* Codec registry encoding check API. */

int PyCodec_KnownEncoding(const char *encoding)
{
    PyObject *codecs;
    
    codecs = _PyCodec_Lookup(encoding);
    if (!codecs) {
	PyErr_Clear();
	return 0;
    }
    else {
	Py_DECREF(codecs);
	return 1;
    }
}

203 204 205 206 207
static
PyObject *args_tuple(PyObject *object,
		     const char *errors)
{
    PyObject *args;
208

209 210 211 212 213 214 215
    args = PyTuple_New(1 + (errors != NULL));
    if (args == NULL)
	return NULL;
    Py_INCREF(object);
    PyTuple_SET_ITEM(args,0,object);
    if (errors) {
	PyObject *v;
216

217
	v = PyUnicode_FromString(errors);
218 219 220 221 222 223 224 225 226
	if (v == NULL) {
	    Py_DECREF(args);
	    return NULL;
	}
	PyTuple_SET_ITEM(args, 1, v);
    }
    return args;
}

227
/* Helper function to get a codec item */
228 229

static
230
PyObject *codec_getitem(const char *encoding, int index)
231 232 233 234 235 236
{
    PyObject *codecs;
    PyObject *v;

    codecs = _PyCodec_Lookup(encoding);
    if (codecs == NULL)
237 238
	return NULL;
    v = PyTuple_GET_ITEM(codecs, index);
239
    Py_DECREF(codecs);
240 241 242 243
    Py_INCREF(v);
    return v;
}

244 245 246 247 248 249
/* Helper function to create an incremental codec. */

static
PyObject *codec_getincrementalcodec(const char *encoding,
				    const char *errors,
				    const char *attrname)
250
{
251
    PyObject *codecs, *ret, *inccodec;
252 253 254

    codecs = _PyCodec_Lookup(encoding);
    if (codecs == NULL)
255 256
	return NULL;
    inccodec = PyObject_GetAttrString(codecs, attrname);
257
    Py_DECREF(codecs);
258 259 260 261 262 263 264 265
    if (inccodec == NULL)
	return NULL;
    if (errors)
	ret = PyObject_CallFunction(inccodec, "s", errors);
    else
	ret = PyObject_CallFunction(inccodec, NULL);
    Py_DECREF(inccodec);
    return ret;
266 267
}

268 269 270 271 272 273 274
/* Helper function to create a stream codec. */

static
PyObject *codec_getstreamcodec(const char *encoding,
			       PyObject *stream,
			       const char *errors,
			       const int index)
275
{
276
    PyObject *codecs, *streamcodec, *codeccls;
277 278 279 280

    codecs = _PyCodec_Lookup(encoding);
    if (codecs == NULL)
	return NULL;
281

282 283 284 285 286
    codeccls = PyTuple_GET_ITEM(codecs, index);
    if (errors != NULL)
	streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
    else
	streamcodec = PyObject_CallFunction(codeccls, "O", stream);
287
    Py_DECREF(codecs);
288 289
    return streamcodec;
}
290

291 292
/* Convenience APIs to query the Codec registry.

293
   All APIs return a codec object with incremented refcount.
294

295 296 297 298 299
 */

PyObject *PyCodec_Encoder(const char *encoding)
{
    return codec_getitem(encoding, 0);
300 301
}

302
PyObject *PyCodec_Decoder(const char *encoding)
303
{
304 305
    return codec_getitem(encoding, 1);
}
306

307 308 309 310 311
PyObject *PyCodec_IncrementalEncoder(const char *encoding,
				     const char *errors)
{
    return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
}
312

313 314 315 316
PyObject *PyCodec_IncrementalDecoder(const char *encoding,
				     const char *errors)
{
    return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
317 318
}

319 320 321 322
PyObject *PyCodec_StreamReader(const char *encoding,
			       PyObject *stream,
			       const char *errors)
{
323
    return codec_getstreamcodec(encoding, stream, errors, 2);
324 325 326 327 328 329
}

PyObject *PyCodec_StreamWriter(const char *encoding,
			       PyObject *stream,
			       const char *errors)
{
330
    return codec_getstreamcodec(encoding, stream, errors, 3);
331 332 333 334 335 336 337 338 339 340 341 342
}

/* Encode an object (e.g. an Unicode object) using the given encoding
   and return the resulting encoded object (usually a Python string).

   errors is passed to the encoder factory as argument if non-NULL. */

PyObject *PyCodec_Encode(PyObject *object,
			 const char *encoding,
			 const char *errors)
{
    PyObject *encoder = NULL;
343
    PyObject *args = NULL, *result = NULL;
344
    PyObject *v = NULL;
345 346 347 348 349 350 351 352

    encoder = PyCodec_Encoder(encoding);
    if (encoder == NULL)
	goto onError;

    args = args_tuple(object, errors);
    if (args == NULL)
	goto onError;
353 354

    result = PyEval_CallObject(encoder, args);
355 356 357
    if (result == NULL)
	goto onError;

358
    if (!PyTuple_Check(result) ||
359 360
	PyTuple_GET_SIZE(result) != 2) {
	PyErr_SetString(PyExc_TypeError,
361
			"encoder must return a tuple (object, integer)");
362 363
	goto onError;
    }
364 365
    v = PyTuple_GET_ITEM(result,0);
    Py_INCREF(v);
366 367
    /* We don't check or use the second (integer) entry. */

368 369 370 371 372
    Py_DECREF(args);
    Py_DECREF(encoder);
    Py_DECREF(result);
    return v;
	
373
 onError:
374
    Py_XDECREF(result);
375 376
    Py_XDECREF(args);
    Py_XDECREF(encoder);
377
    return NULL;
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
}

/* Decode an object (usually a Python string) using the given encoding
   and return an equivalent object (e.g. an Unicode object).

   errors is passed to the decoder factory as argument if non-NULL. */

PyObject *PyCodec_Decode(PyObject *object,
			 const char *encoding,
			 const char *errors)
{
    PyObject *decoder = NULL;
    PyObject *args = NULL, *result = NULL;
    PyObject *v;

    decoder = PyCodec_Decoder(encoding);
    if (decoder == NULL)
	goto onError;

    args = args_tuple(object, errors);
    if (args == NULL)
	goto onError;
400

401 402 403
    result = PyEval_CallObject(decoder,args);
    if (result == NULL)
	goto onError;
404
    if (!PyTuple_Check(result) ||
405 406 407 408 409 410 411 412 413 414 415 416 417
	PyTuple_GET_SIZE(result) != 2) {
	PyErr_SetString(PyExc_TypeError,
			"decoder must return a tuple (object,integer)");
	goto onError;
    }
    v = PyTuple_GET_ITEM(result,0);
    Py_INCREF(v);
    /* We don't check or use the second (integer) entry. */

    Py_DECREF(args);
    Py_DECREF(decoder);
    Py_DECREF(result);
    return v;
418

419 420 421 422 423 424 425
 onError:
    Py_XDECREF(args);
    Py_XDECREF(decoder);
    Py_XDECREF(result);
    return NULL;
}

426 427 428 429 430 431 432 433
/* Register the error handling callback function error under the name
   name. This function will be called by the codec when it encounters
   an unencodable characters/undecodable bytes and doesn't know the
   callback name, when name is specified as the error parameter
   in the call to the encode/decode function.
   Return 0 on success, -1 on error */
int PyCodec_RegisterError(const char *name, PyObject *error)
{
434
    PyInterpreterState *interp = PyThreadState_GET()->interp;
435 436
    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
	return -1;
437 438 439 440
    if (!PyCallable_Check(error)) {
	PyErr_SetString(PyExc_TypeError, "handler must be callable");
	return -1;
    }
441 442
    return PyDict_SetItemString(interp->codec_error_registry,
	    			(char *)name, error);
443 444 445 446 447 448 449 450 451
}

/* Lookup the error handling callback function registered under the
   name error. As a special case NULL can be passed, in which case
   the error handling callback for strict encoding will be returned. */
PyObject *PyCodec_LookupError(const char *name)
{
    PyObject *handler = NULL;

452
    PyInterpreterState *interp = PyThreadState_GET()->interp;
453 454 455
    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
	return NULL;

456 457
    if (name==NULL)
	name = "strict";
458
    handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
459 460 461 462 463 464 465 466 467 468 469
    if (!handler)
	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
    else
	Py_INCREF(handler);
    return handler;
}

static void wrong_exception_type(PyObject *exc)
{
    PyObject *type = PyObject_GetAttrString(exc, "__class__");
    if (type != NULL) {
470 471 472 473 474 475 476
        PyObject *name = PyObject_GetAttrString(type, "__name__");
        Py_DECREF(type);
        if (name != NULL) {
            PyErr_Format(PyExc_TypeError,
                         "don't know how to handle %S in error callback", name);
            Py_DECREF(name);
        }
477 478 479 480 481
    }
}

PyObject *PyCodec_StrictErrors(PyObject *exc)
{
482 483
    if (PyExceptionInstance_Check(exc))
        PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
484 485 486 487 488 489 490 491
    else
	PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
    return NULL;
}


PyObject *PyCodec_IgnoreErrors(PyObject *exc)
{
Martin v. Löwis's avatar
Martin v. Löwis committed
492
    Py_ssize_t end;
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
	if (PyUnicodeEncodeError_GetEnd(exc, &end))
	    return NULL;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
	if (PyUnicodeDecodeError_GetEnd(exc, &end))
	    return NULL;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
	if (PyUnicodeTranslateError_GetEnd(exc, &end))
	    return NULL;
    }
    else {
	wrong_exception_type(exc);
	return NULL;
    }
    /* ouch: passing NULL, 0, pos gives None instead of u'' */
Martin v. Löwis's avatar
Martin v. Löwis committed
510
    return Py_BuildValue("(u#n)", &end, 0, end);
511 512 513 514 515 516
}


PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
    PyObject *restuple;
Martin v. Löwis's avatar
Martin v. Löwis committed
517 518 519
    Py_ssize_t start;
    Py_ssize_t end;
    Py_ssize_t i;
520 521 522 523 524 525 526 527 528 529 530 531 532 533

    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
	PyObject *res;
	Py_UNICODE *p;
	if (PyUnicodeEncodeError_GetStart(exc, &start))
	    return NULL;
	if (PyUnicodeEncodeError_GetEnd(exc, &end))
	    return NULL;
	res = PyUnicode_FromUnicode(NULL, end-start);
	if (res == NULL)
	    return NULL;
	for (p = PyUnicode_AS_UNICODE(res), i = start;
	    i<end; ++p, ++i)
	    *p = '?';
Martin v. Löwis's avatar
Martin v. Löwis committed
534
	restuple = Py_BuildValue("(On)", res, end);
535 536 537 538 539 540 541
	Py_DECREF(res);
	return restuple;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
	Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
	if (PyUnicodeDecodeError_GetEnd(exc, &end))
	    return NULL;
Martin v. Löwis's avatar
Martin v. Löwis committed
542
	return Py_BuildValue("(u#n)", &res, 1, end);
543 544 545 546 547 548 549 550 551 552 553 554 555 556
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
	PyObject *res;
	Py_UNICODE *p;
	if (PyUnicodeTranslateError_GetStart(exc, &start))
	    return NULL;
	if (PyUnicodeTranslateError_GetEnd(exc, &end))
	    return NULL;
	res = PyUnicode_FromUnicode(NULL, end-start);
	if (res == NULL)
	    return NULL;
	for (p = PyUnicode_AS_UNICODE(res), i = start;
	    i<end; ++p, ++i)
	    *p = Py_UNICODE_REPLACEMENT_CHARACTER;
Martin v. Löwis's avatar
Martin v. Löwis committed
557
	restuple = Py_BuildValue("(On)", res, end);
558 559 560 561 562 563 564 565 566 567 568 569 570 571
	Py_DECREF(res);
	return restuple;
    }
    else {
	wrong_exception_type(exc);
	return NULL;
    }
}

PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
{
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
	PyObject *restuple;
	PyObject *object;
Martin v. Löwis's avatar
Martin v. Löwis committed
572 573
	Py_ssize_t start;
	Py_ssize_t end;
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
	PyObject *res;
	Py_UNICODE *p;
	Py_UNICODE *startp;
	Py_UNICODE *outp;
	int ressize;
	if (PyUnicodeEncodeError_GetStart(exc, &start))
	    return NULL;
	if (PyUnicodeEncodeError_GetEnd(exc, &end))
	    return NULL;
	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
	    return NULL;
	startp = PyUnicode_AS_UNICODE(object);
	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
	    if (*p<10)
		ressize += 2+1+1;
	    else if (*p<100)
		ressize += 2+2+1;
	    else if (*p<1000)
		ressize += 2+3+1;
	    else if (*p<10000)
		ressize += 2+4+1;
595 596 597 598
#ifndef Py_UNICODE_WIDE
	    else
		ressize += 2+5+1;
#else
599 600 601 602 603 604
	    else if (*p<100000)
		ressize += 2+5+1;
	    else if (*p<1000000)
		ressize += 2+6+1;
	    else
		ressize += 2+7+1;
605
#endif
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
	}
	/* allocate replacement */
	res = PyUnicode_FromUnicode(NULL, ressize);
	if (res == NULL) {
	    Py_DECREF(object);
	    return NULL;
	}
	/* generate replacement */
	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
	    p < startp+end; ++p) {
	    Py_UNICODE c = *p;
	    int digits;
	    int base;
	    *outp++ = '&';
	    *outp++ = '#';
	    if (*p<10) {
		digits = 1;
		base = 1;
	    }
	    else if (*p<100) {
		digits = 2;
		base = 10;
	    }
	    else if (*p<1000) {
		digits = 3;
		base = 100;
	    }
	    else if (*p<10000) {
		digits = 4;
		base = 1000;
	    }
637 638 639 640 641 642
#ifndef Py_UNICODE_WIDE
	    else {
		digits = 5;
		base = 10000;
	    }
#else
643 644 645 646 647 648 649 650 651 652 653 654
	    else if (*p<100000) {
		digits = 5;
		base = 10000;
	    }
	    else if (*p<1000000) {
		digits = 6;
		base = 100000;
	    }
	    else {
		digits = 7;
		base = 1000000;
	    }
655
#endif
656 657 658 659 660 661 662
	    while (digits-->0) {
		*outp++ = '0' + c/base;
		c %= base;
		base /= 10;
	    }
	    *outp++ = ';';
	}
Martin v. Löwis's avatar
Martin v. Löwis committed
663
	restuple = Py_BuildValue("(On)", res, end);
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
	Py_DECREF(res);
	Py_DECREF(object);
	return restuple;
    }
    else {
	wrong_exception_type(exc);
	return NULL;
    }
}

static Py_UNICODE hexdigits[] = {
    '0', '1', '2', '3', '4', '5', '6', '7',
    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};

PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
	PyObject *restuple;
	PyObject *object;
Martin v. Löwis's avatar
Martin v. Löwis committed
684 685
	Py_ssize_t start;
	Py_ssize_t end;
686 687 688 689 690 691 692 693 694 695 696 697 698
	PyObject *res;
	Py_UNICODE *p;
	Py_UNICODE *startp;
	Py_UNICODE *outp;
	int ressize;
	if (PyUnicodeEncodeError_GetStart(exc, &start))
	    return NULL;
	if (PyUnicodeEncodeError_GetEnd(exc, &end))
	    return NULL;
	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
	    return NULL;
	startp = PyUnicode_AS_UNICODE(object);
	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
699
#ifdef Py_UNICODE_WIDE
700 701
	    if (*p >= 0x00010000)
		ressize += 1+1+8;
702 703 704
	    else
#endif
	    if (*p >= 0x100) {
705 706 707 708 709 710 711 712 713 714 715 716
		ressize += 1+1+4;
	    }
	    else
		ressize += 1+1+2;
	}
	res = PyUnicode_FromUnicode(NULL, ressize);
	if (res==NULL)
	    return NULL;
	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
	    p < startp+end; ++p) {
	    Py_UNICODE c = *p;
	    *outp++ = '\\';
717
#ifdef Py_UNICODE_WIDE
718 719 720 721 722 723 724 725 726
	    if (c >= 0x00010000) {
		*outp++ = 'U';
		*outp++ = hexdigits[(c>>28)&0xf];
		*outp++ = hexdigits[(c>>24)&0xf];
		*outp++ = hexdigits[(c>>20)&0xf];
		*outp++ = hexdigits[(c>>16)&0xf];
		*outp++ = hexdigits[(c>>12)&0xf];
		*outp++ = hexdigits[(c>>8)&0xf];
	    }
727 728 729
	    else
#endif
	    if (c >= 0x100) {
730 731 732 733 734 735 736 737 738 739
		*outp++ = 'u';
		*outp++ = hexdigits[(c>>12)&0xf];
		*outp++ = hexdigits[(c>>8)&0xf];
	    }
	    else
		*outp++ = 'x';
	    *outp++ = hexdigits[(c>>4)&0xf];
	    *outp++ = hexdigits[c&0xf];
	}

Martin v. Löwis's avatar
Martin v. Löwis committed
740
	restuple = Py_BuildValue("(On)", res, end);
741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
	Py_DECREF(res);
	Py_DECREF(object);
	return restuple;
    }
    else {
	wrong_exception_type(exc);
	return NULL;
    }
}

static PyObject *strict_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_StrictErrors(exc);
}


static PyObject *ignore_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_IgnoreErrors(exc);
}


static PyObject *replace_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_ReplaceErrors(exc);
}


static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_XMLCharRefReplaceErrors(exc);
}


static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_BackslashReplaceErrors(exc);
}

780
static int _PyCodecRegistry_Init(void)
781
{
782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827
    static struct {
	char *name;
	PyMethodDef def;
    } methods[] =
    {
	{
	    "strict",
	    {
		"strict_errors",
		strict_errors,
		METH_O
	    }
	},
	{
	    "ignore",
	    {
		"ignore_errors",
		ignore_errors,
		METH_O
	    }
	},
	{
	    "replace",
	    {
		"replace_errors",
		replace_errors,
		METH_O
	    }
	},
	{
	    "xmlcharrefreplace",
	    {
		"xmlcharrefreplace_errors",
		xmlcharrefreplace_errors,
		METH_O
	    }
	},
	{
	    "backslashreplace",
	    {
		"backslashreplace_errors",
		backslashreplace_errors,
		METH_O
	    }
	}
    };
828

829
    PyInterpreterState *interp = PyThreadState_GET()->interp;
830
    PyObject *mod;
831
    unsigned i;
832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849

    if (interp->codec_search_path != NULL)
	return 0;

    interp->codec_search_path = PyList_New(0);
    interp->codec_search_cache = PyDict_New();
    interp->codec_error_registry = PyDict_New();

    if (interp->codec_error_registry) {
	for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
	    PyObject *func = PyCFunction_New(&methods[i].def, NULL);
	    int res;
	    if (!func)
		Py_FatalError("can't initialize codec error registry");
	    res = PyCodec_RegisterError(methods[i].name, func);
	    Py_DECREF(func);
	    if (res)
		Py_FatalError("can't initialize codec error registry");
850 851
	}
    }
852 853 854 855

    if (interp->codec_search_path == NULL ||
	interp->codec_search_cache == NULL ||
	interp->codec_error_registry == NULL)
856
	Py_FatalError("can't initialize codec registry");
857

858
    mod = PyImport_ImportModuleNoBlock("encodings");
859 860 861 862 863 864 865 866 867 868 869 870 871
    if (mod == NULL) {
	if (PyErr_ExceptionMatches(PyExc_ImportError)) {
	    /* Ignore ImportErrors... this is done so that
	       distributions can disable the encodings package. Note
	       that other errors are not masked, e.g. SystemErrors
	       raised to inform the user of an error in the Python
	       configuration are still reported back to the user. */
	    PyErr_Clear();
	    return 0;
	}
	return -1;
    }
    Py_DECREF(mod);
872
    interp->codecs_initialized = 1;
873
    return 0;
874
}