string.py 11.3 KB
Newer Older
1
"""A collection of string constants.
2 3 4

Public module variables:

5 6 7 8 9 10 11 12 13
whitespace -- a string containing all ASCII whitespace
ascii_lowercase -- a string containing all ASCII lowercase letters
ascii_uppercase -- a string containing all ASCII uppercase letters
ascii_letters -- a string containing all ASCII letters
digits -- a string containing all ASCII decimal digits
hexdigits -- a string containing all ASCII hexadecimal digits
octdigits -- a string containing all ASCII octal digits
punctuation -- a string containing all ASCII punctuation characters
printable -- a string containing all ASCII characters considered printable
14 15 16

"""

17 18 19 20
__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords",
           "digits", "hexdigits", "octdigits", "printable", "punctuation",
           "whitespace", "Formatter", "Template"]

21 22
import _string

Guido van Rossum's avatar
Guido van Rossum committed
23
# Some strings for ctype-style character classification
24
whitespace = ' \t\n\r\v\f'
25 26
ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
27
ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossum's avatar
Guido van Rossum committed
28 29 30
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'
31
punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
32
printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossum's avatar
Guido van Rossum committed
33

34 35 36 37
# Functions which aren't available as string methods.

# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
def capwords(s, sep=None):
38
    """capwords(s [,sep]) -> string
39 40 41

    Split the argument into words using split, capitalize each
    word using capitalize, and join the capitalized words using
42 43 44 45
    join.  If the optional second argument sep is absent or None,
    runs of whitespace characters are replaced by a single space
    and leading and trailing whitespace are removed, otherwise
    sep is used to split and join the words.
46 47

    """
48
    return (sep or ' ').join(x.capitalize() for x in s.split(sep))
49 50


51
####################################################################
52
import re as _re
53
from collections import ChainMap as _ChainMap
54

55 56
class _TemplateMetaclass(type):
    pattern = r"""
57 58 59
    %(delim)s(?:
      (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
      (?P<named>%(id)s)      |   # delimiter and a Python identifier
60
      {(?P<braced>%(bid)s)}  |   # delimiter and a braced identifier
61 62
      (?P<invalid>)              # Other ill-formed delimiter exprs
    )
63 64 65 66 67 68 69 70
    """

    def __init__(cls, name, bases, dct):
        super(_TemplateMetaclass, cls).__init__(name, bases, dct)
        if 'pattern' in dct:
            pattern = cls.pattern
        else:
            pattern = _TemplateMetaclass.pattern % {
71
                'delim' : _re.escape(cls.delimiter),
72
                'id'    : cls.idpattern,
73
                'bid'   : cls.braceidpattern or cls.idpattern,
74
                }
75
        cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
76 77


78
class Template(metaclass=_TemplateMetaclass):
79
    """A string class for supporting $-substitutions."""
80

81
    delimiter = '$'
82 83 84
    # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but
    # without the ASCII flag.  We can't add re.ASCII to flags because of
    # backward compatibility.  So we use the ?a local flag and [a-z] pattern.
85
    # See https://bugs.python.org/issue31672
86
    idpattern = r'(?a:[_a-z][_a-z0-9]*)'
87
    braceidpattern = None
88
    flags = _re.IGNORECASE
89 90 91

    def __init__(self, template):
        self.template = template
92 93

    # Search for $$, $identifier, ${identifier}, and any bare $'s
94

95 96
    def _invalid(self, mo):
        i = mo.start('invalid')
97
        lines = self.template[:i].splitlines(keepends=True)
98 99 100 101 102 103 104 105 106
        if not lines:
            colno = 1
            lineno = 1
        else:
            colno = i - len(''.join(lines[:-1]))
            lineno = len(lines)
        raise ValueError('Invalid placeholder in string: line %d, col %d' %
                         (lineno, colno))

107 108 109 110 111
    def substitute(*args, **kws):
        if not args:
            raise TypeError("descriptor 'substitute' of 'Template' object "
                            "needs an argument")
        self, *args = args  # allow the "self" keyword be passed
112 113 114 115
        if len(args) > 1:
            raise TypeError('Too many positional arguments')
        if not args:
            mapping = kws
116
        elif kws:
117
            mapping = _ChainMap(kws, args[0])
118 119
        else:
            mapping = args[0]
120
        # Helper function for .sub()
121
        def convert(mo):
122 123 124
            # Check the most common path first.
            named = mo.group('named') or mo.group('braced')
            if named is not None:
125
                return str(mapping[named])
126
            if mo.group('escaped') is not None:
127
                return self.delimiter
128 129
            if mo.group('invalid') is not None:
                self._invalid(mo)
130 131
            raise ValueError('Unrecognized named group in pattern',
                             self.pattern)
132
        return self.pattern.sub(convert, self.template)
133

134 135 136 137 138
    def safe_substitute(*args, **kws):
        if not args:
            raise TypeError("descriptor 'safe_substitute' of 'Template' object "
                            "needs an argument")
        self, *args = args  # allow the "self" keyword be passed
139 140 141 142
        if len(args) > 1:
            raise TypeError('Too many positional arguments')
        if not args:
            mapping = kws
143
        elif kws:
144
            mapping = _ChainMap(kws, args[0])
145 146
        else:
            mapping = args[0]
147
        # Helper function for .sub()
148
        def convert(mo):
149
            named = mo.group('named') or mo.group('braced')
150 151
            if named is not None:
                try:
152
                    return str(mapping[named])
153
                except KeyError:
154
                    return mo.group()
155
            if mo.group('escaped') is not None:
156
                return self.delimiter
157
            if mo.group('invalid') is not None:
158
                return mo.group()
159 160
            raise ValueError('Unrecognized named group in pattern',
                             self.pattern)
161
        return self.pattern.sub(convert, self.template)
162 163 164 165 166 167 168



########################################################################
# the Formatter class
# see PEP 3101 for details and purpose of this class

169
# The hard parts are reused from the C implementation.  They're exposed as "_"
Florent Xicluna's avatar
Florent Xicluna committed
170
# prefixed methods of str.
171

172 173
# The overall parser is implemented in _string.formatter_parser.
# The field name parser is implemented in _string.formatter_field_name_split
174 175

class Formatter:
176 177 178 179 180 181 182 183
    def format(*args, **kwargs):
        if not args:
            raise TypeError("descriptor 'format' of 'Formatter' object "
                            "needs an argument")
        self, *args = args  # allow the "self" keyword be passed
        try:
            format_string, *args = args # allow the "format_string" keyword be passed
        except ValueError:
184 185
            raise TypeError("format() missing 1 required positional "
                            "argument: 'format_string'") from None
186 187 188
        return self.vformat(format_string, args, kwargs)

    def vformat(self, format_string, args, kwargs):
189
        used_args = set()
190
        result, _ = self._vformat(format_string, args, kwargs, used_args, 2)
191 192 193
        self.check_unused_args(used_args, args, kwargs)
        return result

194 195
    def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
                 auto_arg_index=0):
196 197
        if recursion_depth < 0:
            raise ValueError('Max string recursion exceeded')
198
        result = []
199 200
        for literal_text, field_name, format_spec, conversion in \
                self.parse(format_string):
201 202 203 204 205 206 207

            # output the literal text
            if literal_text:
                result.append(literal_text)

            # if there's a field, output it
            if field_name is not None:
208 209
                # this is some markup, find the object and do
                #  the formatting
210

211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
                # handle arg indexing when empty field_names are given.
                if field_name == '':
                    if auto_arg_index is False:
                        raise ValueError('cannot switch from manual field '
                                         'specification to automatic field '
                                         'numbering')
                    field_name = str(auto_arg_index)
                    auto_arg_index += 1
                elif field_name.isdigit():
                    if auto_arg_index:
                        raise ValueError('cannot switch from manual field '
                                         'specification to automatic field '
                                         'numbering')
                    # disable auto arg incrementing, if it gets
                    # used later on, then an exception will be raised
                    auto_arg_index = False

228
                # given the field_name, find the object it references
229
                #  and the argument it came from
230
                obj, arg_used = self.get_field(field_name, args, kwargs)
231
                used_args.add(arg_used)
232 233

                # do any conversion on the resulting object
234
                obj = self.convert_field(obj, conversion)
235

236
                # expand the format spec, if needed
237 238 239 240
                format_spec, auto_arg_index = self._vformat(
                    format_spec, args, kwargs,
                    used_args, recursion_depth-1,
                    auto_arg_index=auto_arg_index)
241

242 243
                # format the object and append to the result
                result.append(self.format_field(obj, format_spec))
244

245
        return ''.join(result), auto_arg_index
246

247

248
    def get_value(self, key, args, kwargs):
249 250 251 252
        if isinstance(key, int):
            return args[key]
        else:
            return kwargs[key]
253

254

255 256 257
    def check_unused_args(self, used_args, args, kwargs):
        pass

258

259
    def format_field(self, value, format_spec):
260
        return format(value, format_spec)
261 262 263 264


    def convert_field(self, value, conversion):
        # do any conversion on the resulting object
265 266
        if conversion is None:
            return value
267 268
        elif conversion == 's':
            return str(value)
269 270 271 272
        elif conversion == 'r':
            return repr(value)
        elif conversion == 'a':
            return ascii(value)
Florent Xicluna's avatar
Florent Xicluna committed
273
        raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
274 275 276 277


    # returns an iterable that contains tuples of the form:
    # (literal_text, field_name, format_spec, conversion)
278 279 280 281 282
    # literal_text can be zero length
    # field_name can be None, in which case there's no
    #  object to format and output
    # if field_name is not None, it is looked up, formatted
    #  with format_spec and conversion and then used
283
    def parse(self, format_string):
284
        return _string.formatter_parser(format_string)
285 286 287 288 289 290 291


    # given a field_name, find the object it references.
    #  field_name:   the field being looked up, e.g. "0.name"
    #                 or "lookup[3]"
    #  used_args:    a set of which args have been used
    #  args, kwargs: as passed in to vformat
292
    def get_field(self, field_name, args, kwargs):
293
        first, rest = _string.formatter_field_name_split(field_name)
294 295 296 297 298 299 300 301 302 303 304

        obj = self.get_value(first, args, kwargs)

        # loop through the rest of the field_name, doing
        #  getattr or getitem as needed
        for is_attr, i in rest:
            if is_attr:
                obj = getattr(obj, i)
            else:
                obj = obj[i]

305
        return obj, first