Added new parser markers 'et' and 'et#' which do not recode string

objects but instead assume that they use the requested encoding. This is needed on Windows to enable opening files by passing in Unicode file names.

Added new parser markers 'et' and 'et#' which do not recode string
objects but instead assume that they use the requested encoding. This is needed on Windows to enable opening files by passing in Unicode file names.
6f15e579 · Marc-André Lemburg · b1f35bff · 6f15e579 · 6f15e579
Kaydet (Commit) 6f15e579 authored May 02, 2001 tarafından Marc-André Lemburg
Hide whitespace changes
Inline Side-by-side

Showing with 32 additions and 4 deletions

ext.tex Doc/ext/ext.tex +12 -0

getargs.c Python/getargs.c +20 -4

No files found.
--- a/Doc/ext/ext.tex
+++ b/Doc/ext/ext.tex
@@ -736,6 +736,12 @@ buffer and adjust \var{*buffer} to reference the newly allocated
 storage. The caller is responsible for calling
 \cfunction{PyMem_Free()} to free the allocated buffer after usage.
+\item[\samp{et} (string, Unicode object or character buffer compatible
+object) {[const char *encoding, char **buffer]}]
+Same as \samp{es} except that string objects are passed through without
+recoding them. Instead, the implementation assumes that the string
+object uses the encoding passed in as parameter.
 \item[\samp{es\#} (string, Unicode object or character buffer compatible
 object) {[const char *encoding, char **buffer, int *buffer_length]}]
 This variant on \samp{s\#} is used for encoding Unicode and objects
@@ -767,6 +773,12 @@ overflow is signalled with an exception.
 In both cases, \var{*buffer_length} is set to the length of the
 encoded data without the trailing 0-byte.
+\item[\samp{et\#} (string, Unicode object or character buffer compatible
+object) {[const char *encoding, char **buffer]}]
+Same as \samp{es\#} except that string objects are passed through without
+recoding them. Instead, the implementation assumes that the string
+object uses the encoding passed in as parameter.
 \item[\samp{b} (integer) {[char]}]
 Convert a Python integer to a tiny int, stored in a C \ctype{char}.

--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -687,25 +687,39 @@ convertsimple1(PyObject *arg, char **p_format, va_list *p_va)
 			char **buffer;
 			const char *encoding;
 			PyObject *u, *s;
-			int size;
+			int size, recode_strings;
 			/* Get 'e' parameter: the encoding name */
 			encoding = (const char *)va_arg(*p_va, const char *);
 			if (encoding == NULL)
 			    	encoding = PyUnicode_GetDefaultEncoding();
-			/* Get 's' parameter: the output buffer to use */
+			/* Get output buffer parameter:
+			     's' (recode all objects via Unicode) or
+			     't' (only recode non-string objects) 
+			*/
 			if (*format != 's')
+			    	recode_strings = 1;
+			else if (*format == 't')
+			    	recode_strings = 0;
+			else
 				return "(unknown parser marker combination)";
 			buffer = (char **)va_arg(*p_va, char **);
 			format++;
 			if (buffer == NULL)
 				return "(buffer is NULL)";
+			/* Encode object */
+			if (!recode_strings && PyString_Check(arg)) {
+			    	s = arg;
+				Py_INCREF(s);
+			}
+			else {
 			/* Convert object to Unicode */
 			u = PyUnicode_FromObject(arg);
 			if (u == NULL)
-				return "string or unicode or text buffer";
+					return \
+				     "string or unicode or text buffer";
 			/* Encode object; use default error handling */
 			s = PyUnicode_AsEncodedString(u,
@@ -716,7 +730,9 @@ convertsimple1(PyObject *arg, char **p_format, va_list *p_va)
 				return "(encoding failed)";
 			if (!PyString_Check(s)) {
 				Py_DECREF(s);
-				return "(encoder failed to return a string)";
+					return \
+				     "(encoder failed to return a string)";
+				}
 			}
 			size = PyString_GET_SIZE(s);