string.py 11.4 KB
Newer Older
1
"""A collection of string constants.
2 3 4

Public module variables:

5 6 7 8 9 10 11 12 13
whitespace -- a string containing all ASCII whitespace
ascii_lowercase -- a string containing all ASCII lowercase letters
ascii_uppercase -- a string containing all ASCII uppercase letters
ascii_letters -- a string containing all ASCII letters
digits -- a string containing all ASCII decimal digits
hexdigits -- a string containing all ASCII hexadecimal digits
octdigits -- a string containing all ASCII octal digits
punctuation -- a string containing all ASCII punctuation characters
printable -- a string containing all ASCII characters considered printable
14 15 16

"""

17 18
import _string

Guido van Rossum's avatar
Guido van Rossum committed
19
# Some strings for ctype-style character classification
20
whitespace = ' \t\n\r\v\f'
21 22
ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
23
ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossum's avatar
Guido van Rossum committed
24 25 26
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'
Tim Peters's avatar
Tim Peters committed
27
punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
28
printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossum's avatar
Guido van Rossum committed
29

30 31 32 33
# Functions which aren't available as string methods.

# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
def capwords(s, sep=None):
34
    """capwords(s [,sep]) -> string
35 36 37

    Split the argument into words using split, capitalize each
    word using capitalize, and join the capitalized words using
38 39 40 41
    join.  If the optional second argument sep is absent or None,
    runs of whitespace characters are replaced by a single space
    and leading and trailing whitespace are removed, otherwise
    sep is used to split and join the words.
42 43

    """
44
    return (sep or ' ').join(x.capitalize() for x in s.split(sep))
45 46


47
####################################################################
48
import re as _re
49
from collections import ChainMap
50

51 52
class _TemplateMetaclass(type):
    pattern = r"""
53 54 55 56 57 58
    %(delim)s(?:
      (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
      (?P<named>%(id)s)      |   # delimiter and a Python identifier
      {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier
      (?P<invalid>)              # Other ill-formed delimiter exprs
    )
59 60 61 62 63 64 65 66
    """

    def __init__(cls, name, bases, dct):
        super(_TemplateMetaclass, cls).__init__(name, bases, dct)
        if 'pattern' in dct:
            pattern = cls.pattern
        else:
            pattern = _TemplateMetaclass.pattern % {
67
                'delim' : _re.escape(cls.delimiter),
68 69
                'id'    : cls.idpattern,
                }
70
        cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
71 72


73
class Template(metaclass=_TemplateMetaclass):
74
    """A string class for supporting $-substitutions."""
75

76
    delimiter = '$'
77
    idpattern = r'[_a-z][_a-z0-9]*'
78
    flags = _re.IGNORECASE
79 80 81

    def __init__(self, template):
        self.template = template
82 83

    # Search for $$, $identifier, ${identifier}, and any bare $'s
84

85 86
    def _invalid(self, mo):
        i = mo.start('invalid')
87
        lines = self.template[:i].splitlines(keepends=True)
88 89 90 91 92 93 94 95 96
        if not lines:
            colno = 1
            lineno = 1
        else:
            colno = i - len(''.join(lines[:-1]))
            lineno = len(lines)
        raise ValueError('Invalid placeholder in string: line %d, col %d' %
                         (lineno, colno))

97 98 99 100 101
    def substitute(*args, **kws):
        if not args:
            raise TypeError("descriptor 'substitute' of 'Template' object "
                            "needs an argument")
        self, *args = args  # allow the "self" keyword be passed
102 103 104 105
        if len(args) > 1:
            raise TypeError('Too many positional arguments')
        if not args:
            mapping = kws
106
        elif kws:
107
            mapping = ChainMap(kws, args[0])
108 109
        else:
            mapping = args[0]
110
        # Helper function for .sub()
111
        def convert(mo):
112 113 114
            # Check the most common path first.
            named = mo.group('named') or mo.group('braced')
            if named is not None:
115 116 117 118
                val = mapping[named]
                # We use this idiom instead of str() because the latter will
                # fail if val is a Unicode containing non-ASCII characters.
                return '%s' % (val,)
119
            if mo.group('escaped') is not None:
120
                return self.delimiter
121 122
            if mo.group('invalid') is not None:
                self._invalid(mo)
123 124
            raise ValueError('Unrecognized named group in pattern',
                             self.pattern)
125
        return self.pattern.sub(convert, self.template)
126

127 128 129 130 131
    def safe_substitute(*args, **kws):
        if not args:
            raise TypeError("descriptor 'safe_substitute' of 'Template' object "
                            "needs an argument")
        self, *args = args  # allow the "self" keyword be passed
132 133 134 135
        if len(args) > 1:
            raise TypeError('Too many positional arguments')
        if not args:
            mapping = kws
136
        elif kws:
137
            mapping = ChainMap(kws, args[0])
138 139
        else:
            mapping = args[0]
140
        # Helper function for .sub()
141
        def convert(mo):
142
            named = mo.group('named') or mo.group('braced')
143 144
            if named is not None:
                try:
145 146 147
                    # We use this idiom instead of str() because the latter
                    # will fail if val is a Unicode containing non-ASCII
                    return '%s' % (mapping[named],)
148
                except KeyError:
149
                    return mo.group()
150
            if mo.group('escaped') is not None:
151
                return self.delimiter
152
            if mo.group('invalid') is not None:
153
                return mo.group()
154 155
            raise ValueError('Unrecognized named group in pattern',
                             self.pattern)
156
        return self.pattern.sub(convert, self.template)
157 158 159 160 161 162 163



########################################################################
# the Formatter class
# see PEP 3101 for details and purpose of this class

164
# The hard parts are reused from the C implementation.  They're exposed as "_"
Florent Xicluna's avatar
Florent Xicluna committed
165
# prefixed methods of str.
166

167 168
# The overall parser is implemented in _string.formatter_parser.
# The field name parser is implemented in _string.formatter_field_name_split
169 170

class Formatter:
171 172 173 174 175 176 177 178 179 180
    def format(*args, **kwargs):
        if not args:
            raise TypeError("descriptor 'format' of 'Formatter' object "
                            "needs an argument")
        self, *args = args  # allow the "self" keyword be passed
        try:
            format_string, *args = args # allow the "format_string" keyword be passed
        except ValueError:
            if 'format_string' in kwargs:
                format_string = kwargs.pop('format_string')
181 182 183
                import warnings
                warnings.warn("Passing 'format_string' as keyword argument is "
                              "deprecated", DeprecationWarning, stacklevel=2)
184 185 186
            else:
                raise TypeError("format() missing 1 required positional "
                                "argument: 'format_string'") from None
187 188 189
        return self.vformat(format_string, args, kwargs)

    def vformat(self, format_string, args, kwargs):
190
        used_args = set()
191 192 193 194
        result = self._vformat(format_string, args, kwargs, used_args, 2)
        self.check_unused_args(used_args, args, kwargs)
        return result

195 196
    def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
                 auto_arg_index=0):
197 198
        if recursion_depth < 0:
            raise ValueError('Max string recursion exceeded')
199
        result = []
200 201
        for literal_text, field_name, format_spec, conversion in \
                self.parse(format_string):
202 203 204 205 206 207 208

            # output the literal text
            if literal_text:
                result.append(literal_text)

            # if there's a field, output it
            if field_name is not None:
209 210
                # this is some markup, find the object and do
                #  the formatting
211

212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
                # handle arg indexing when empty field_names are given.
                if field_name == '':
                    if auto_arg_index is False:
                        raise ValueError('cannot switch from manual field '
                                         'specification to automatic field '
                                         'numbering')
                    field_name = str(auto_arg_index)
                    auto_arg_index += 1
                elif field_name.isdigit():
                    if auto_arg_index:
                        raise ValueError('cannot switch from manual field '
                                         'specification to automatic field '
                                         'numbering')
                    # disable auto arg incrementing, if it gets
                    # used later on, then an exception will be raised
                    auto_arg_index = False

229
                # given the field_name, find the object it references
230
                #  and the argument it came from
231
                obj, arg_used = self.get_field(field_name, args, kwargs)
232
                used_args.add(arg_used)
233 234

                # do any conversion on the resulting object
235
                obj = self.convert_field(obj, conversion)
236

237 238
                # expand the format spec, if needed
                format_spec = self._vformat(format_spec, args, kwargs,
239 240
                                            used_args, recursion_depth-1,
                                            auto_arg_index=auto_arg_index)
241

242 243
                # format the object and append to the result
                result.append(self.format_field(obj, format_spec))
244

245 246
        return ''.join(result)

247

248
    def get_value(self, key, args, kwargs):
249 250 251 252
        if isinstance(key, int):
            return args[key]
        else:
            return kwargs[key]
253

254

255 256 257
    def check_unused_args(self, used_args, args, kwargs):
        pass

258

259
    def format_field(self, value, format_spec):
260
        return format(value, format_spec)
261 262 263 264


    def convert_field(self, value, conversion):
        # do any conversion on the resulting object
265 266
        if conversion is None:
            return value
267 268
        elif conversion == 's':
            return str(value)
269 270 271 272
        elif conversion == 'r':
            return repr(value)
        elif conversion == 'a':
            return ascii(value)
Florent Xicluna's avatar
Florent Xicluna committed
273
        raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
274 275 276 277


    # returns an iterable that contains tuples of the form:
    # (literal_text, field_name, format_spec, conversion)
278 279 280 281 282
    # literal_text can be zero length
    # field_name can be None, in which case there's no
    #  object to format and output
    # if field_name is not None, it is looked up, formatted
    #  with format_spec and conversion and then used
283
    def parse(self, format_string):
284
        return _string.formatter_parser(format_string)
285 286 287 288 289 290 291


    # given a field_name, find the object it references.
    #  field_name:   the field being looked up, e.g. "0.name"
    #                 or "lookup[3]"
    #  used_args:    a set of which args have been used
    #  args, kwargs: as passed in to vformat
292
    def get_field(self, field_name, args, kwargs):
293
        first, rest = _string.formatter_field_name_split(field_name)
294 295 296 297 298 299 300 301 302 303 304

        obj = self.get_value(first, args, kwargs)

        # loop through the rest of the field_name, doing
        #  getattr or getitem as needed
        for is_attr, i in rest:
            if is_attr:
                obj = getattr(obj, i)
            else:
                obj = obj[i]

305
        return obj, first