Kaydet (Commit) 625cbf28 authored tarafından Eric Smith's avatar Eric Smith

Modified parsing of format strings, so that we always return

a tuple (literal, field_name, format_spec, conversion).

literal will always be a string, but might be of zero length.
field_name will be None if there is no markup text
format_spec will be a (possibly zero length) string if
  field_name is non-None
conversion will be a one character string, or None

This makes the Formatter class, and especially it's parse()
method, easier to understand.

Suggestion was by Jim Jewett, inspired by the "tail" of an
elementtree node.

Also, fixed a reference leak in fieldnameiter_next.
üst 9600f93d
...@@ -212,7 +212,13 @@ class Formatter: ...@@ -212,7 +212,13 @@ class Formatter:
result = [] result = []
for literal_text, field_name, format_spec, conversion in \ for literal_text, field_name, format_spec, conversion in \
self.parse(format_string): self.parse(format_string):
if literal_text is None:
# output the literal text
if literal_text:
result.append(literal_text)
# if there's a field, output it
if field_name is not None:
# this is some markup, find the object and do # this is some markup, find the object and do
# the formatting # the formatting
...@@ -224,9 +230,7 @@ class Formatter: ...@@ -224,9 +230,7 @@ class Formatter:
# format the object and append to the result # format the object and append to the result
result.append(self.format_field(obj, format_spec)) result.append(self.format_field(obj, format_spec))
else:
# this is literal text, use it directly
result.append(literal_text)
self.check_unused_args(used_args, args, kwargs) self.check_unused_args(used_args, args, kwargs)
return ''.join(result) return ''.join(result)
...@@ -263,6 +267,11 @@ class Formatter: ...@@ -263,6 +267,11 @@ class Formatter:
# returns an iterable that contains tuples of the form: # returns an iterable that contains tuples of the form:
# (literal_text, field_name, format_spec, conversion) # (literal_text, field_name, format_spec, conversion)
# literal_text can be zero length
# field_name can be None, in which case there's no
# object to format and output
# if field_name is not None, it is looked up, formatted
# with format_spec and conversion and then used
def parse(self, format_string): def parse(self, format_string):
return format_string._formatter_parser() return format_string._formatter_parser()
......
...@@ -48,9 +48,24 @@ SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len) ...@@ -48,9 +48,24 @@ SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
str->end = str->ptr + len; str->end = str->ptr + len;
} }
/* return a new string. if str->ptr is NULL, return None */
Py_LOCAL_INLINE(PyObject *) Py_LOCAL_INLINE(PyObject *)
SubString_new_object(SubString *str) SubString_new_object(SubString *str)
{ {
if (str->ptr == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
}
/* return a new string. if str->ptr is NULL, return None */
Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString *str)
{
if (str->ptr == NULL) {
return STRINGLIB_NEW(NULL, 0);
}
return STRINGLIB_NEW(str->ptr, str->end - str->ptr); return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
} }
...@@ -481,7 +496,7 @@ format(PyObject *fieldobj, SubString *format_spec) ...@@ -481,7 +496,7 @@ format(PyObject *fieldobj, SubString *format_spec)
return NULL; return NULL;
/* we need to create an object out of the pointers we have */ /* we need to create an object out of the pointers we have */
spec = SubString_new_object(format_spec); spec = SubString_new_object_or_empty(format_spec);
if (spec == NULL) if (spec == NULL)
goto done; goto done;
...@@ -609,21 +624,19 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec, ...@@ -609,21 +624,19 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
typedef struct { typedef struct {
SubString str; SubString str;
int in_markup;
} MarkupIterator; } MarkupIterator;
static int static int
MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len) MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
{ {
SubString_init(&self->str, ptr, len); SubString_init(&self->str, ptr, len);
self->in_markup = 0;
return 1; return 1;
} }
/* returns 0 on error, 1 on non-error termination, and 2 if it got a /* returns 0 on error, 1 on non-error termination, and 2 if it got a
string (or something to be expanded) */ string (or something to be expanded) */
static int static int
MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal, MarkupIterator_next(MarkupIterator *self, SubString *literal,
SubString *field_name, SubString *format_spec, SubString *field_name, SubString *format_spec,
STRINGLIB_CHAR *conversion, STRINGLIB_CHAR *conversion,
int *format_spec_needs_expanding) int *format_spec_needs_expanding)
...@@ -633,101 +646,116 @@ MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal, ...@@ -633,101 +646,116 @@ MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal,
STRINGLIB_CHAR *start; STRINGLIB_CHAR *start;
int count; int count;
Py_ssize_t len; Py_ssize_t len;
int markup_follows = 0;
/* initialize all of the output variables */
SubString_init(literal, NULL, 0);
SubString_init(field_name, NULL, 0);
SubString_init(format_spec, NULL, 0);
*conversion = '\0';
*format_spec_needs_expanding = 0; *format_spec_needs_expanding = 0;
/* no more input, end of iterator */ /* No more input, end of iterator. This is the normal exit
path. */
if (self->str.ptr >= self->str.end) if (self->str.ptr >= self->str.end)
return 1; return 1;
*is_markup = self->in_markup;
start = self->str.ptr; start = self->str.ptr;
if (self->in_markup) { /* First read any literal text. Read until the end of string, an
escaped '{' or '}', or an unescaped '{'. In order to never
/* prepare for next iteration */ allocate memory and so I can just pass pointers around, if
self->in_markup = 0; there's an escaped '{' or '}' then we'll return the literal
including the brace, but no format object. The next time
/* this is markup, find the end of the string by counting nested through, we'll return the rest of the literal, skipping past
braces. note that this prohibits escaped braces, so that the second consecutive brace. */
format_specs cannot have braces in them. */ while (self->str.ptr < self->str.end) {
count = 1; switch (c = *(self->str.ptr++)) {
case '{':
/* we know we can't have a zero length string, so don't worry case '}':
about that case */ markup_follows = 1;
while (self->str.ptr < self->str.end) { break;
switch (c = *(self->str.ptr++)) { default:
case '{': continue;
/* the format spec needs to be recursively expanded.
this is an optimization, and not strictly needed */
*format_spec_needs_expanding = 1;
count++;
break;
case '}':
count--;
if (count <= 0) {
/* we're done. parse and get out */
literal->ptr = start;
literal->end = self->str.ptr-1;
if (parse_field(literal, field_name, format_spec,
conversion) == 0)
return 0;
/* success */
return 2;
}
break;
}
} }
/* end of string while searching for matching '}' */ break;
PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); }
return 0;
at_end = self->str.ptr >= self->str.end;
len = self->str.ptr - start;
if ((c == '}') && (at_end || (c != *self->str.ptr))) {
PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
"in format string");
return 0;
} }
else { if (at_end && c == '{') {
/* literal text, read until the end of string, an escaped { or }, PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
or an unescaped { */ "in format string");
while (self->str.ptr < self->str.end) { return 0;
switch (c = *(self->str.ptr++)) { }
case '{': if (!at_end) {
case '}': if (c == *self->str.ptr) {
self->in_markup = 1; /* escaped } or {, skip it in the input. there is no
break; markup object following us, just this literal text */
default: self->str.ptr++;
continue; markup_follows = 0;
}
break;
} }
else
len--;
}
at_end = self->str.ptr >= self->str.end; /* record the literal text */
len = self->str.ptr - start; literal->ptr = start;
literal->end = start + len;
if ((c == '}') && (at_end || (c != *self->str.ptr))) { if (!markup_follows)
PyErr_SetString(PyExc_ValueError, "Single '}' encountered " return 2;
"in format string");
return 0; /* this is markup, find the end of the string by counting nested
} braces. note that this prohibits escaped braces, so that
if (at_end && c == '{') { format_specs cannot have braces in them. */
PyErr_SetString(PyExc_ValueError, "Single '{' encountered " count = 1;
"in format string");
return 0; start = self->str.ptr;
}
if (!at_end) { /* we know we can't have a zero length string, so don't worry
if (c == *self->str.ptr) { about that case */
/* escaped } or {, skip it in the input */ while (self->str.ptr < self->str.end) {
self->str.ptr++; switch (c = *(self->str.ptr++)) {
self->in_markup = 0; case '{':
/* the format spec needs to be recursively expanded.
this is an optimization, and not strictly needed */
*format_spec_needs_expanding = 1;
count++;
break;
case '}':
count--;
if (count <= 0) {
/* we're done. parse and get out */
SubString s;
SubString_init(&s, start, self->str.ptr - 1 - start);
if (parse_field(&s, field_name, format_spec, conversion) == 0)
return 0;
/* a zero length field_name is an error */
if (field_name->ptr == field_name->end) {
PyErr_SetString(PyExc_ValueError, "zero length field name "
"in format");
return 0;
}
/* success */
return 2;
} }
else break;
len--;
} }
/* this is just plain text, return it */
literal->ptr = start;
literal->end = start + len;
return 2;
} }
/* end of string while searching for matching '}' */
PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
return 0;
} }
...@@ -826,27 +854,24 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, ...@@ -826,27 +854,24 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
OutputString *output, int *recursion_level) OutputString *output, int *recursion_level)
{ {
MarkupIterator iter; MarkupIterator iter;
int is_markup;
int format_spec_needs_expanding; int format_spec_needs_expanding;
int result; int result;
SubString str; SubString literal;
SubString field_name; SubString field_name;
SubString format_spec; SubString format_spec;
STRINGLIB_CHAR conversion; STRINGLIB_CHAR conversion;
MarkupIterator_init(&iter, input->ptr, input->end - input->ptr); MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
while ((result = MarkupIterator_next(&iter, &is_markup, &str, &field_name, while ((result = MarkupIterator_next(&iter, &literal, &field_name,
&format_spec, &conversion, &format_spec, &conversion,
&format_spec_needs_expanding)) == 2) { &format_spec_needs_expanding)) == 2) {
if (is_markup) { if (!output_data(output, literal.ptr, literal.end - literal.ptr))
return 0;
if (field_name.ptr != field_name.end)
if (!output_markup(&field_name, &format_spec, if (!output_markup(&field_name, &format_spec,
format_spec_needs_expanding, conversion, output, format_spec_needs_expanding, conversion, output,
args, kwargs, recursion_level)) args, kwargs, recursion_level))
return 0; return 0;
}
else
if (!output_data(output, str.ptr, str.end-str.ptr))
return 0;
} }
return result; return result;
} }
...@@ -947,17 +972,12 @@ formatteriter_dealloc(formatteriterobject *it) ...@@ -947,17 +972,12 @@ formatteriter_dealloc(formatteriterobject *it)
} }
/* returns a tuple: /* returns a tuple:
(is_markup, literal, field_name, format_spec, conversion) (literal, field_name, format_spec, conversion)
if is_markup == True:
literal is None literal is any literal text to output. might be zero length
field_name is the string before the ':' field_name is the string before the ':'. might be None
format_spec is the string after the ':' format_spec is the string after the ':'. mibht be None
conversion is either None, or the string after the '!' conversion is either None, or the string after the '!'
if is_markup == False:
literal is the literal string
field_name is None
format_spec is None
conversion is None
*/ */
static PyObject * static PyObject *
formatteriter_next(formatteriterobject *it) formatteriter_next(formatteriterobject *it)
...@@ -966,10 +986,9 @@ formatteriter_next(formatteriterobject *it) ...@@ -966,10 +986,9 @@ formatteriter_next(formatteriterobject *it)
SubString field_name; SubString field_name;
SubString format_spec; SubString format_spec;
Py_UNICODE conversion; Py_UNICODE conversion;
int is_markup;
int format_spec_needs_expanding; int format_spec_needs_expanding;
int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal, int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
&field_name, &format_spec, &conversion, &format_spec, &conversion,
&format_spec_needs_expanding); &format_spec_needs_expanding);
/* all of the SubString objects point into it->str, so no /* all of the SubString objects point into it->str, so no
...@@ -984,50 +1003,39 @@ formatteriter_next(formatteriterobject *it) ...@@ -984,50 +1003,39 @@ formatteriter_next(formatteriterobject *it)
PyObject *format_spec_str = NULL; PyObject *format_spec_str = NULL;
PyObject *conversion_str = NULL; PyObject *conversion_str = NULL;
PyObject *tuple = NULL; PyObject *tuple = NULL;
int has_field = field_name.ptr != field_name.end;
if (is_markup) { literal_str = SubString_new_object(&literal);
/* field_name, format_spec, and conversion are returned */ if (literal_str == NULL)
literal_str = Py_None; goto done;
Py_INCREF(literal_str);
field_name_str = SubString_new_object(&field_name);
field_name_str = SubString_new_object(&field_name); if (field_name_str == NULL)
if (field_name_str == NULL) goto done;
goto error;
/* if field_name is non-zero length, return a string for
format_spec_str = SubString_new_object(&format_spec); format_spec (even if zero length), else return None */
if (format_spec_str == NULL) format_spec_str = (has_field ?
goto error; SubString_new_object_or_empty :
SubString_new_object)(&format_spec);
/* if the conversion is not specified, return a None, if (format_spec_str == NULL)
otherwise create a one length string with the goto done;
conversion characater */
if (conversion == '\0') {
conversion_str = Py_None;
Py_INCREF(conversion_str);
}
else
conversion_str = PyUnicode_FromUnicode(&conversion,
1);
if (conversion_str == NULL)
goto error;
}
else {
/* only literal is returned */
literal_str = SubString_new_object(&literal);
if (literal_str == NULL)
goto error;
field_name_str = Py_None;
format_spec_str = Py_None;
conversion_str = Py_None;
Py_INCREF(field_name_str); /* if the conversion is not specified, return a None,
Py_INCREF(format_spec_str); otherwise create a one length string with the conversion
character */
if (conversion == '\0') {
conversion_str = Py_None;
Py_INCREF(conversion_str); Py_INCREF(conversion_str);
} }
else
conversion_str = PyUnicode_FromUnicode(&conversion, 1);
if (conversion_str == NULL)
goto done;
tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
conversion_str); conversion_str);
error: done:
Py_XDECREF(literal_str); Py_XDECREF(literal_str);
Py_XDECREF(field_name_str); Py_XDECREF(field_name_str);
Py_XDECREF(format_spec_str); Py_XDECREF(format_spec_str);
...@@ -1149,7 +1157,7 @@ fieldnameiter_next(fieldnameiterobject *it) ...@@ -1149,7 +1157,7 @@ fieldnameiter_next(fieldnameiterobject *it)
is_attr_obj = PyBool_FromLong(is_attr); is_attr_obj = PyBool_FromLong(is_attr);
if (is_attr_obj == NULL) if (is_attr_obj == NULL)
goto error; goto done;
/* either an integer or a string */ /* either an integer or a string */
if (idx != -1) if (idx != -1)
...@@ -1157,22 +1165,16 @@ fieldnameiter_next(fieldnameiterobject *it) ...@@ -1157,22 +1165,16 @@ fieldnameiter_next(fieldnameiterobject *it)
else else
obj = SubString_new_object(&name); obj = SubString_new_object(&name);
if (obj == NULL) if (obj == NULL)
goto error; goto done;
/* return a tuple of values */ /* return a tuple of values */
result = PyTuple_Pack(2, is_attr_obj, obj); result = PyTuple_Pack(2, is_attr_obj, obj);
if (result == NULL)
goto error;
return result; done:
error:
Py_XDECREF(result);
Py_XDECREF(is_attr_obj); Py_XDECREF(is_attr_obj);
Py_XDECREF(obj); Py_XDECREF(obj);
return NULL; return result;
} }
return NULL;
} }
static PyMethodDef fieldnameiter_methods[] = { static PyMethodDef fieldnameiter_methods[] = {
...@@ -1240,7 +1242,7 @@ formatter_field_name_split(PyUnicodeObject *self) ...@@ -1240,7 +1242,7 @@ formatter_field_name_split(PyUnicodeObject *self)
if (!field_name_split(STRINGLIB_STR(self), if (!field_name_split(STRINGLIB_STR(self),
STRINGLIB_LEN(self), STRINGLIB_LEN(self),
&first, &first_idx, &it->it_field)) &first, &first_idx, &it->it_field))
goto error; goto done;
/* first becomes an integer, if possible; else a string */ /* first becomes an integer, if possible; else a string */
if (first_idx != -1) if (first_idx != -1)
...@@ -1249,12 +1251,12 @@ formatter_field_name_split(PyUnicodeObject *self) ...@@ -1249,12 +1251,12 @@ formatter_field_name_split(PyUnicodeObject *self)
/* convert "first" into a string object */ /* convert "first" into a string object */
first_obj = SubString_new_object(&first); first_obj = SubString_new_object(&first);
if (first_obj == NULL) if (first_obj == NULL)
goto error; goto done;
/* return a tuple of values */ /* return a tuple of values */
result = PyTuple_Pack(2, first_obj, it); result = PyTuple_Pack(2, first_obj, it);
error: done:
Py_XDECREF(it); Py_XDECREF(it);
Py_XDECREF(first_obj); Py_XDECREF(first_obj);
return result; return result;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment