Modified parsing of format strings, so that we always return

a tuple (literal, field_name, format_spec, conversion). literal will always be a string, but might be of zero length. field_name will be None if there is no markup text format_spec will be a (possibly zero length) string if field_name is non-None conversion will be a one character string, or None This makes the Formatter class, and especially it's parse() method, easier to understand. Suggestion was by Jim Jewett, inspired by the "tail" of an elementtree node. Also, fixed a reference leak in fieldnameiter_next.

Modified parsing of format strings, so that we always return
a tuple (literal, field_name, format_spec, conversion). literal will always be a string, but might be of zero length. field_name will be None if there is no markup text format_spec will be a (possibly zero length) string if field_name is non-None conversion will be a one character string, or None This makes the Formatter class, and especially it's parse() method, easier to understand. Suggestion was by Jim Jewett, inspired by the "tail" of an elementtree node. Also, fixed a reference leak in fieldnameiter_next.
625cbf28 · Eric Smith · 9600f93d · 625cbf28 · 625cbf28
Kaydet (Commit) 625cbf28 authored Agu 29, 2007 tarafından Eric Smith
Hide whitespace changes
Inline Side-by-side

Showing with 170 additions and 159 deletions

string.py Lib/string.py +13 -4

string_format.h Objects/stringlib/string_format.h +157 -155

No files found.
--- a/Lib/string.py
+++ b/Lib/string.py
@@ -212,7 +212,13 @@ class Formatter:
        result = []
        for literal_text, field_name, format_spec, conversion in \
                self.parse(format_string):
-            if literal_text is None:
+            # output the literal text
+            if literal_text:
+                result.append(literal_text)
+            # if there's a field, output it
+            if field_name is not None:
                # this is some markup, find the object and do
                #  the formatting
@@ -224,9 +230,7 @@ class Formatter:
                # format the object and append to the result
                result.append(self.format_field(obj, format_spec))
-            else:
-                # this is literal text, use it directly
-                result.append(literal_text)
        self.check_unused_args(used_args, args, kwargs)
        return ''.join(result)
@@ -263,6 +267,11 @@ class Formatter:
    # returns an iterable that contains tuples of the form:
    # (literal_text, field_name, format_spec, conversion)
+    # literal_text can be zero length
+    # field_name can be None, in which case there's no
+    #  object to format and output
+    # if field_name is not None, it is looked up, formatted
+    #  with format_spec and conversion and then used
    def parse(self, format_string):
        return format_string._formatter_parser()

--- a/Objects/stringlib/string_format.h
+++ b/Objects/stringlib/string_format.h
@@ -48,9 +48,24 @@ SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
        str->end = str->ptr + len;
 }
+/* return a new string.  if str->ptr is NULL, return None */
 Py_LOCAL_INLINE(PyObject *)
 SubString_new_object(SubString *str)
 {
+    if (str->ptr == NULL) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+    return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
+}
+/* return a new string.  if str->ptr is NULL, return None */
+Py_LOCAL_INLINE(PyObject *)
+SubString_new_object_or_empty(SubString *str)
+{
+    if (str->ptr == NULL) {
+        return STRINGLIB_NEW(NULL, 0);
+    }
    return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
 }
@@ -481,7 +496,7 @@ format(PyObject *fieldobj, SubString *format_spec)
            return NULL;
    /* we need to create an object out of the pointers we have */
-    spec = SubString_new_object(format_spec);
+    spec = SubString_new_object_or_empty(format_spec);
    if (spec == NULL)
        goto done;
@@ -609,21 +624,19 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
 typedef struct {
    SubString str;
-    int in_markup;
 } MarkupIterator;
 static int
 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
 {
    SubString_init(&self->str, ptr, len);
-    self->in_markup = 0;
    return 1;
 }
 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
   string (or something to be expanded) */
 static int
-MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal,
+MarkupIterator_next(MarkupIterator *self, SubString *literal,
                    SubString *field_name, SubString *format_spec,
                    STRINGLIB_CHAR *conversion,
                    int *format_spec_needs_expanding)
@@ -633,101 +646,116 @@ MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal,
    STRINGLIB_CHAR *start;
    int count;
    Py_ssize_t len;
+    int markup_follows = 0;
+    /* initialize all of the output variables */
+    SubString_init(literal, NULL, 0);
+    SubString_init(field_name, NULL, 0);
+    SubString_init(format_spec, NULL, 0);
+    *conversion = '\0';
    *format_spec_needs_expanding = 0;
-    /* no more input, end of iterator */
+    /* No more input, end of iterator.  This is the normal exit
+       path. */
    if (self->str.ptr >= self->str.end)
        return 1;
-    *is_markup = self->in_markup;
    start = self->str.ptr;
-    if (self->in_markup) {
+    /* First read any literal text. Read until the end of string, an
+       escaped '{' or '}', or an unescaped '{'.  In order to never
-        /* prepare for next iteration */
+       allocate memory and so I can just pass pointers around, if
-        self->in_markup = 0;
+       there's an escaped '{' or '}' then we'll return the literal
+       including the brace, but no format object.  The next time
-        /* this is markup, find the end of the string by counting nested
+       through, we'll return the rest of the literal, skipping past
-           braces.  note that this prohibits escaped braces, so that
+       the second consecutive brace. */
-           format_specs cannot have braces in them. */
+    while (self->str.ptr < self->str.end) {
-        count = 1;
+        switch (c = *(self->str.ptr++)) {
+        case '{':
-        /* we know we can't have a zero length string, so don't worry
+        case '}':
-           about that case */
+            markup_follows = 1;
-        while (self->str.ptr < self->str.end) {
+            break;
-            switch (c = *(self->str.ptr++)) {
+        default:
-            case '{':
+            continue;
-                /* the format spec needs to be recursively expanded.
-                   this is an optimization, and not strictly needed */
-                *format_spec_needs_expanding = 1;
-                count++;
-                break;
-            case '}':
-                count--;
-                if (count <= 0) {
-                    /* we're done.  parse and get out */
-                    literal->ptr = start;
-                    literal->end = self->str.ptr-1;
-                    if (parse_field(literal, field_name, format_spec,
-                                    conversion) == 0)
-                        return 0;
-                    /* success */
-                    return 2;
-                }
-                break;
-            }
        }
-        /* end of string while searching for matching '}' */
+        break;
-        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
+    }
-        return 0;
+    at_end = self->str.ptr >= self->str.end;
+    len = self->str.ptr - start;
+    if ((c == '}') && (at_end || (c != *self->str.ptr))) {
+        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
+                        "in format string");
+        return 0;
    }
-    else {
+    if (at_end && c == '{') {
-        /* literal text, read until the end of string, an escaped { or },
+        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
-           or an unescaped { */
+                        "in format string");
-        while (self->str.ptr < self->str.end) {
+        return 0;
-            switch (c = *(self->str.ptr++)) {
+    }
-            case '{':
+    if (!at_end) {
-            case '}':
+        if (c == *self->str.ptr) {
-                self->in_markup = 1;
+            /* escaped } or {, skip it in the input.  there is no
-                break;
+               markup object following us, just this literal text */
-            default:
+            self->str.ptr++;
-                continue;
+            markup_follows = 0;
-            }
-            break;
        }
+        else
+            len--;
+    }
-        at_end = self->str.ptr >= self->str.end;
+    /* record the literal text */
-        len = self->str.ptr - start;
+    literal->ptr = start;
+    literal->end = start + len;
-        if ((c == '}') && (at_end || (c != *self->str.ptr))) {
+    if (!markup_follows)
-            PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
+        return 2;
-                            "in format string");
-            return 0;
+    /* this is markup, find the end of the string by counting nested
-        }
+       braces.  note that this prohibits escaped braces, so that
-        if (at_end && c == '{') {
+       format_specs cannot have braces in them. */
-            PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
+    count = 1;
-                            "in format string");
-            return 0;
+    start = self->str.ptr;
-        }
-        if (!at_end) {
+    /* we know we can't have a zero length string, so don't worry
-            if (c == *self->str.ptr) {
+       about that case */
-                /* escaped } or {, skip it in the input */
+    while (self->str.ptr < self->str.end) {
-                self->str.ptr++;
+        switch (c = *(self->str.ptr++)) {
-                self->in_markup = 0;
+        case '{':
+            /* the format spec needs to be recursively expanded.
+               this is an optimization, and not strictly needed */
+            *format_spec_needs_expanding = 1;
+            count++;
+            break;
+        case '}':
+            count--;
+            if (count <= 0) {
+                /* we're done.  parse and get out */
+                SubString s;
+                SubString_init(&s, start, self->str.ptr - 1 - start);
+                if (parse_field(&s, field_name, format_spec, conversion) == 0)
+                    return 0;
+                /* a zero length field_name is an error */
+                if (field_name->ptr == field_name->end) {
+                    PyErr_SetString(PyExc_ValueError, "zero length field name "
+                                    "in format");
+                    return 0;
+                }
+                /* success */
+                return 2;
            }
-            else
+            break;
-                len--;
        }
-        /* this is just plain text, return it */
-        literal->ptr = start;
-        literal->end = start + len;
-        return 2;
    }
+    /* end of string while searching for matching '}' */
+    PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
+    return 0;
 }
@@ -826,27 +854,24 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
          OutputString *output, int *recursion_level)
 {
    MarkupIterator iter;
-    int is_markup;
    int format_spec_needs_expanding;
    int result;
-    SubString str;
+    SubString literal;
    SubString field_name;
    SubString format_spec;
    STRINGLIB_CHAR conversion;
    MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
-    while ((result = MarkupIterator_next(&iter, &is_markup, &str, &field_name,
+    while ((result = MarkupIterator_next(&iter, &literal, &field_name,
                                         &format_spec, &conversion,
                                         &format_spec_needs_expanding)) == 2) {
-        if (is_markup) {
+        if (!output_data(output, literal.ptr, literal.end - literal.ptr))
+            return 0;
+        if (field_name.ptr != field_name.end)
            if (!output_markup(&field_name, &format_spec,
                               format_spec_needs_expanding, conversion, output,
                               args, kwargs, recursion_level))
                return 0;
-        }
-        else
-            if (!output_data(output, str.ptr, str.end-str.ptr))
-                return 0;
    }
    return result;
 }
@@ -947,17 +972,12 @@ formatteriter_dealloc(formatteriterobject *it)
 }
 /* returns a tuple:
-   (is_markup, literal, field_name, format_spec, conversion)
+   (literal, field_name, format_spec, conversion)
-   if is_markup == True:
-        literal is None
+   literal is any literal text to output.  might be zero length
-        field_name is the string before the ':'
+   field_name is the string before the ':'.  might be None
-        format_spec is the string after the ':'
+   format_spec is the string after the ':'.  mibht be None
-        conversion is either None, or the string after the '!'
+   conversion is either None, or the string after the '!'
-   if is_markup == False:
-        literal is the literal string
-        field_name is None
-        format_spec is None
-        conversion is None
 */
 static PyObject *
 formatteriter_next(formatteriterobject *it)
@@ -966,10 +986,9 @@ formatteriter_next(formatteriterobject *it)
    SubString field_name;
    SubString format_spec;
    Py_UNICODE conversion;
-    int is_markup;
    int format_spec_needs_expanding;
-    int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
+    int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
-                                     &field_name, &format_spec, &conversion,
+                                     &format_spec, &conversion,
                                     &format_spec_needs_expanding);
    /* all of the SubString objects point into it->str, so no
@@ -984,50 +1003,39 @@ formatteriter_next(formatteriterobject *it)
        PyObject *format_spec_str = NULL;
        PyObject *conversion_str = NULL;
        PyObject *tuple = NULL;
+        int has_field = field_name.ptr != field_name.end;
-        if (is_markup) {
+        literal_str = SubString_new_object(&literal);
-            /* field_name, format_spec, and conversion are returned */
+        if (literal_str == NULL)
-            literal_str = Py_None;
+            goto done;
-            Py_INCREF(literal_str);
+        field_name_str = SubString_new_object(&field_name);
-            field_name_str = SubString_new_object(&field_name);
+        if (field_name_str == NULL)
-            if (field_name_str == NULL)
+            goto done;
-                goto error;
+        /* if field_name is non-zero length, return a string for
-            format_spec_str = SubString_new_object(&format_spec);
+           format_spec (even if zero length), else return None */
-            if (format_spec_str == NULL)
+        format_spec_str = (has_field ?
-                goto error;
+                           SubString_new_object_or_empty :
+                           SubString_new_object)(&format_spec);
-            /* if the conversion is not specified, return a None,
+        if (format_spec_str == NULL)
-               otherwise create a one length string with the
+            goto done;
-               conversion characater */
-            if (conversion == '\0') {
-                conversion_str = Py_None;
-                Py_INCREF(conversion_str);
-            }
-            else
-                conversion_str = PyUnicode_FromUnicode(&conversion,
-                                                       1);
-            if (conversion_str == NULL)
-                goto error;
-        }
-        else {
-            /* only literal is returned */
-            literal_str = SubString_new_object(&literal);
-            if (literal_str == NULL)
-                goto error;
-            field_name_str = Py_None;
-            format_spec_str = Py_None;
-            conversion_str = Py_None;
-            Py_INCREF(field_name_str);
+        /* if the conversion is not specified, return a None,
-            Py_INCREF(format_spec_str);
+           otherwise create a one length string with the conversion
+           character */
+        if (conversion == '\0') {
+            conversion_str = Py_None;
            Py_INCREF(conversion_str);
        }
+        else
+            conversion_str = PyUnicode_FromUnicode(&conversion, 1);
+        if (conversion_str == NULL)
+            goto done;
        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
                             conversion_str);
-    error:
+    done:
        Py_XDECREF(literal_str);
        Py_XDECREF(field_name_str);
        Py_XDECREF(format_spec_str);
@@ -1149,7 +1157,7 @@ fieldnameiter_next(fieldnameiterobject *it)
        is_attr_obj = PyBool_FromLong(is_attr);
        if (is_attr_obj == NULL)
-            goto error;
+            goto done;
        /* either an integer or a string */
        if (idx != -1)
@@ -1157,22 +1165,16 @@ fieldnameiter_next(fieldnameiterobject *it)
        else
            obj = SubString_new_object(&name);
        if (obj == NULL)
-            goto error;
+            goto done;
        /* return a tuple of values */
        result = PyTuple_Pack(2, is_attr_obj, obj);
-        if (result == NULL)
-            goto error;
-        return result;
+    done:
-    error:
-        Py_XDECREF(result);
        Py_XDECREF(is_attr_obj);
        Py_XDECREF(obj);
-        return NULL;
+        return result;
    }
-    return NULL;
 }
 static PyMethodDef fieldnameiter_methods[] = {
@@ -1240,7 +1242,7 @@ formatter_field_name_split(PyUnicodeObject *self)
    if (!field_name_split(STRINGLIB_STR(self),
                          STRINGLIB_LEN(self),
                          &first, &first_idx, &it->it_field))
-        goto error;
+        goto done;
    /* first becomes an integer, if possible; else a string */
    if (first_idx != -1)
@@ -1249,12 +1251,12 @@ formatter_field_name_split(PyUnicodeObject *self)
        /* convert "first" into a string object */
        first_obj = SubString_new_object(&first);
    if (first_obj == NULL)
-        goto error;
+        goto done;
    /* return a tuple of values */
    result = PyTuple_Pack(2, first_obj, it);
-error:
+done:
    Py_XDECREF(it);
    Py_XDECREF(first_obj);
    return result;