string.py 9.13 KB
Newer Older
1
"""A collection of string constants.
2 3 4

Public module variables:

5 6 7 8 9 10 11 12 13
whitespace -- a string containing all ASCII whitespace
ascii_lowercase -- a string containing all ASCII lowercase letters
ascii_uppercase -- a string containing all ASCII uppercase letters
ascii_letters -- a string containing all ASCII letters
digits -- a string containing all ASCII decimal digits
hexdigits -- a string containing all ASCII hexadecimal digits
octdigits -- a string containing all ASCII octal digits
punctuation -- a string containing all ASCII punctuation characters
printable -- a string containing all ASCII characters considered printable
14 15 16

"""

17 18
import _string

Guido van Rossum's avatar
Guido van Rossum committed
19
# Some strings for ctype-style character classification
20
whitespace = ' \t\n\r\v\f'
21 22
ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
23
ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossum's avatar
Guido van Rossum committed
24 25 26
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'
Tim Peters's avatar
Tim Peters committed
27
punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
28
printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossum's avatar
Guido van Rossum committed
29

30 31 32 33
# Functions which aren't available as string methods.

# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
def capwords(s, sep=None):
34
    """capwords(s [,sep]) -> string
35 36 37

    Split the argument into words using split, capitalize each
    word using capitalize, and join the capitalized words using
38 39 40 41
    join.  If the optional second argument sep is absent or None,
    runs of whitespace characters are replaced by a single space
    and leading and trailing whitespace are removed, otherwise
    sep is used to split and join the words.
42 43

    """
44
    return (sep or ' ').join(x.capitalize() for x in s.split(sep))
45 46


47
####################################################################
48
import re as _re
49
from collections import ChainMap
50

51 52
class _TemplateMetaclass(type):
    pattern = r"""
53 54 55 56 57 58
    %(delim)s(?:
      (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
      (?P<named>%(id)s)      |   # delimiter and a Python identifier
      {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier
      (?P<invalid>)              # Other ill-formed delimiter exprs
    )
59 60 61 62 63 64 65 66
    """

    def __init__(cls, name, bases, dct):
        super(_TemplateMetaclass, cls).__init__(name, bases, dct)
        if 'pattern' in dct:
            pattern = cls.pattern
        else:
            pattern = _TemplateMetaclass.pattern % {
67
                'delim' : _re.escape(cls.delimiter),
68 69
                'id'    : cls.idpattern,
                }
70
        cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
71 72


73
class Template(metaclass=_TemplateMetaclass):
74
    """A string class for supporting $-substitutions."""
75

76
    delimiter = '$'
77
    idpattern = r'[_a-z][_a-z0-9]*'
78
    flags = _re.IGNORECASE
79 80 81

    def __init__(self, template):
        self.template = template
82 83

    # Search for $$, $identifier, ${identifier}, and any bare $'s
84

85 86
    def _invalid(self, mo):
        i = mo.start('invalid')
87
        lines = self.template[:i].splitlines(keepends=True)
88 89 90 91 92 93 94 95 96
        if not lines:
            colno = 1
            lineno = 1
        else:
            colno = i - len(''.join(lines[:-1]))
            lineno = len(lines)
        raise ValueError('Invalid placeholder in string: line %d, col %d' %
                         (lineno, colno))

97 98 99 100 101
    def substitute(self, *args, **kws):
        if len(args) > 1:
            raise TypeError('Too many positional arguments')
        if not args:
            mapping = kws
102
        elif kws:
103
            mapping = ChainMap(kws, args[0])
104 105
        else:
            mapping = args[0]
106
        # Helper function for .sub()
107
        def convert(mo):
108 109 110 111 112 113
            # Check the most common path first.
            named = mo.group('named') or mo.group('braced')
            if named is not None:
                val = mapping[named]
                # We use this idiom instead of str() because the latter will
                # fail if val is a Unicode containing non-ASCII characters.
114
                return '%s' % (val,)
115
            if mo.group('escaped') is not None:
116
                return self.delimiter
117 118
            if mo.group('invalid') is not None:
                self._invalid(mo)
119 120
            raise ValueError('Unrecognized named group in pattern',
                             self.pattern)
121
        return self.pattern.sub(convert, self.template)
122

123 124 125 126 127
    def safe_substitute(self, *args, **kws):
        if len(args) > 1:
            raise TypeError('Too many positional arguments')
        if not args:
            mapping = kws
128
        elif kws:
129
            mapping = ChainMap(kws, args[0])
130 131
        else:
            mapping = args[0]
132
        # Helper function for .sub()
133
        def convert(mo):
134
            named = mo.group('named') or mo.group('braced')
135 136
            if named is not None:
                try:
137 138
                    # We use this idiom instead of str() because the latter
                    # will fail if val is a Unicode containing non-ASCII
139
                    return '%s' % (mapping[named],)
140
                except KeyError:
141
                    return mo.group()
142
            if mo.group('escaped') is not None:
143
                return self.delimiter
144
            if mo.group('invalid') is not None:
145
                return mo.group()
146 147
            raise ValueError('Unrecognized named group in pattern',
                             self.pattern)
148
        return self.pattern.sub(convert, self.template)
149 150 151 152 153 154 155



########################################################################
# the Formatter class
# see PEP 3101 for details and purpose of this class

156
# The hard parts are reused from the C implementation.  They're exposed as "_"
Florent Xicluna's avatar
Florent Xicluna committed
157
# prefixed methods of str.
158

159 160
# The overall parser is implemented in _string.formatter_parser.
# The field name parser is implemented in _string.formatter_field_name_split
161 162 163 164 165 166

class Formatter:
    def format(self, format_string, *args, **kwargs):
        return self.vformat(format_string, args, kwargs)

    def vformat(self, format_string, args, kwargs):
167
        used_args = set()
168 169 170 171 172 173 174
        result = self._vformat(format_string, args, kwargs, used_args, 2)
        self.check_unused_args(used_args, args, kwargs)
        return result

    def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
        if recursion_depth < 0:
            raise ValueError('Max string recursion exceeded')
175
        result = []
176 177
        for literal_text, field_name, format_spec, conversion in \
                self.parse(format_string):
178 179 180 181 182 183 184

            # output the literal text
            if literal_text:
                result.append(literal_text)

            # if there's a field, output it
            if field_name is not None:
185 186
                # this is some markup, find the object and do
                #  the formatting
187

188
                # given the field_name, find the object it references
189
                #  and the argument it came from
190
                obj, arg_used = self.get_field(field_name, args, kwargs)
191
                used_args.add(arg_used)
192 193

                # do any conversion on the resulting object
194
                obj = self.convert_field(obj, conversion)
195

196 197 198 199
                # expand the format spec, if needed
                format_spec = self._vformat(format_spec, args, kwargs,
                                            used_args, recursion_depth-1)

200 201
                # format the object and append to the result
                result.append(self.format_field(obj, format_spec))
202

203 204
        return ''.join(result)

205

206
    def get_value(self, key, args, kwargs):
207 208 209 210
        if isinstance(key, int):
            return args[key]
        else:
            return kwargs[key]
211

212

213 214 215
    def check_unused_args(self, used_args, args, kwargs):
        pass

216

217
    def format_field(self, value, format_spec):
218
        return format(value, format_spec)
219 220 221 222 223 224 225 226


    def convert_field(self, value, conversion):
        # do any conversion on the resulting object
        if conversion == 'r':
            return repr(value)
        elif conversion == 's':
            return str(value)
227
        elif conversion is None:
228
            return value
Florent Xicluna's avatar
Florent Xicluna committed
229
        raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
230 231 232 233


    # returns an iterable that contains tuples of the form:
    # (literal_text, field_name, format_spec, conversion)
234 235 236 237 238
    # literal_text can be zero length
    # field_name can be None, in which case there's no
    #  object to format and output
    # if field_name is not None, it is looked up, formatted
    #  with format_spec and conversion and then used
239
    def parse(self, format_string):
240
        return _string.formatter_parser(format_string)
241 242 243 244 245 246 247


    # given a field_name, find the object it references.
    #  field_name:   the field being looked up, e.g. "0.name"
    #                 or "lookup[3]"
    #  used_args:    a set of which args have been used
    #  args, kwargs: as passed in to vformat
248
    def get_field(self, field_name, args, kwargs):
249
        first, rest = _string.formatter_field_name_split(field_name)
250 251 252 253 254 255 256 257 258 259 260

        obj = self.get_value(first, args, kwargs)

        # loop through the rest of the field_name, doing
        #  getattr or getitem as needed
        for is_attr, i in rest:
            if is_attr:
                obj = getattr(obj, i)
            else:
                obj = obj[i]

261
        return obj, first