stringold.py 12 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2
# module 'string' -- A collection of string operations

3 4 5 6
# Warning: most of the code you see here isn't normally used nowadays.  With
# Python 1.6, many of these functions are implemented as methods on the
# standard string object. They used to be implemented by a built-in module
# called strop, but strop is now obsolete itself.
Guido van Rossum's avatar
Guido van Rossum committed
7

8 9 10 11 12 13 14 15 16 17 18 19 20 21
"""Common string manipulations.

Public module variables:

whitespace -- a string containing all characters considered whitespace
lowercase -- a string containing all characters considered lowercase letters
uppercase -- a string containing all characters considered uppercase letters
letters -- a string containing all characters considered letters
digits -- a string containing all characters considered decimal digits
hexdigits -- a string containing all characters considered hexadecimal digits
octdigits -- a string containing all characters considered octal digits

"""

Guido van Rossum's avatar
Guido van Rossum committed
22
# Some strings for ctype-style character classification
23
whitespace = ' \t\n\r\v\f'
Guido van Rossum's avatar
Guido van Rossum committed
24 25 26 27 28 29 30 31
lowercase = 'abcdefghijklmnopqrstuvwxyz'
uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
letters = lowercase + uppercase
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'

# Case conversion helpers
32 33
_idmap = ''
for i in range(256): _idmap = _idmap + chr(i)
Guido van Rossum's avatar
Guido van Rossum committed
34 35
del i

36 37 38 39 40 41
# Backward compatible names for exceptions
index_error = ValueError
atoi_error = ValueError
atof_error = ValueError
atol_error = ValueError

Guido van Rossum's avatar
Guido van Rossum committed
42 43
# convert UPPER CASE letters to lower case
def lower(s):
44
    """lower(s) -> string
45

46
    Return a copy of the string s converted to lowercase.
47

48 49
    """
    return s.lower()
Guido van Rossum's avatar
Guido van Rossum committed
50 51 52

# Convert lower case letters to UPPER CASE
def upper(s):
53
    """upper(s) -> string
54

55
    Return a copy of the string s converted to uppercase.
56

57 58
    """
    return s.upper()
Guido van Rossum's avatar
Guido van Rossum committed
59 60 61

# Swap lower case letters and UPPER CASE
def swapcase(s):
62
    """swapcase(s) -> string
63

64 65
    Return a copy of the string s with upper case characters
    converted to lowercase and vice versa.
66

67 68
    """
    return s.swapcase()
Guido van Rossum's avatar
Guido van Rossum committed
69 70 71

# Strip leading and trailing tabs and spaces
def strip(s):
72
    """strip(s) -> string
73

74 75
    Return a copy of the string s with leading and trailing
    whitespace removed.
76

77 78
    """
    return s.strip()
Guido van Rossum's avatar
Guido van Rossum committed
79

80 81
# Strip leading tabs and spaces
def lstrip(s):
82
    """lstrip(s) -> string
83

84
    Return a copy of the string s with leading whitespace removed.
85

86 87
    """
    return s.lstrip()
88 89 90

# Strip trailing tabs and spaces
def rstrip(s):
91
    """rstrip(s) -> string
92

93 94
    Return a copy of the string s with trailing whitespace
    removed.
95

96 97
    """
    return s.rstrip()
98 99


Guido van Rossum's avatar
Guido van Rossum committed
100
# Split a string into a list of space/tab-separated words
101
def split(s, sep=None, maxsplit=0):
102
    """split(str [,sep [,maxsplit]]) -> list of strings
103

104 105 106 107
    Return a list of the words in the string s, using sep as the
    delimiter string.  If maxsplit is nonzero, splits into at most
    maxsplit words If sep is not specified, any whitespace string
    is a separator.  Maxsplit defaults to 0.
108

109
    (split and splitfields are synonymous)
110

111 112 113
    """
    return s.split(sep, maxsplit)
splitfields = split
114

115
# Join fields with optional separator
116 117
def join(words, sep = ' '):
    """join(list [,sep]) -> string
118

119
    Return a string composed of the words in list, with
120
    intervening occurrences of sep.  The default separator is a
121
    single space.
122

123
    (joinfields and join are synonymous)
124

125 126 127
    """
    return sep.join(words)
joinfields = join
128

129 130
# for a little bit of speed
_apply = apply
131

132 133 134
# Find substring, raise exception if not found
def index(s, *args):
    """index(s, sub [,start [,end]]) -> int
135

136
    Like find but raises ValueError when the substring is not found.
137

138 139
    """
    return _apply(s.index, args)
140

141
# Find last substring, raise exception if not found
142 143
def rindex(s, *args):
    """rindex(s, sub [,start [,end]]) -> int
144

145
    Like rfind but raises ValueError when the substring is not found.
146

147 148
    """
    return _apply(s.rindex, args)
149 150

# Count non-overlapping occurrences of substring
151 152 153 154 155 156 157 158 159
def count(s, *args):
    """count(s, sub[, start[,end]]) -> int

    Return the number of occurrences of substring sub in string
    s[start:end].  Optional arguments start and end are
    interpreted as in slice notation.

    """
    return _apply(s.count, args)
160

161
# Find substring, return -1 if not found
162 163 164 165 166 167 168 169 170 171 172
def find(s, *args):
    """find(s, sub [,start [,end]]) -> in

    Return the lowest index in s where substring sub is found,
    such that sub is contained within s[start,end].  Optional
    arguments start and end are interpreted as in slice notation.

    Return -1 on failure.

    """
    return _apply(s.find, args)
Guido van Rossum's avatar
Guido van Rossum committed
173

174
# Find last substring, return -1 if not found
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
def rfind(s, *args):
    """rfind(s, sub [,start [,end]]) -> int

    Return the highest index in s where substring sub is found,
    such that sub is contained within s[start,end].  Optional
    arguments start and end are interpreted as in slice notation.

    Return -1 on failure.

    """
    return _apply(s.rfind, args)

# for a bit of speed
_float = float
_int = int
_long = long
_StringType = type('')
192

193
# Convert string to float
194 195 196 197 198 199 200
def atof(s):
    """atof(s) -> float

    Return the floating point number represented by the string s.

    """
    if type(s) == _StringType:
201
        return _float(s)
202
    else:
203 204
        raise TypeError('argument 1: expected string, %s found' %
                        type(s).__name__)
205

Guido van Rossum's avatar
Guido van Rossum committed
206
# Convert string to integer
207 208 209 210 211 212 213 214 215 216 217 218
def atoi(*args):
    """atoi(s [,base]) -> int

    Return the integer represented by the string s in the given
    base, which defaults to 10.  The string s must consist of one
    or more digits, possibly preceded by a sign.  If base is 0, it
    is chosen from the leading characters of s, 0 for octal, 0x or
    0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
    accepted.

    """
    try:
219
        s = args[0]
220
    except IndexError:
221 222
        raise TypeError('function requires at least 1 argument: %d given' %
                        len(args))
223 224 225 226
    # Don't catch type error resulting from too many arguments to int().  The
    # error message isn't compatible but the error type is, and this function
    # is complicated enough already.
    if type(s) == _StringType:
227
        return _apply(_int, args)
228
    else:
229 230
        raise TypeError('argument 1: expected string, %s found' %
                        type(s).__name__)
231

Guido van Rossum's avatar
Guido van Rossum committed
232

233
# Convert string to long integer
234 235 236 237 238 239 240 241 242 243 244 245 246
def atol(*args):
    """atol(s [,base]) -> long

    Return the long integer represented by the string s in the
    given base, which defaults to 10.  The string s must consist
    of one or more digits, possibly preceded by a sign.  If base
    is 0, it is chosen from the leading characters of s, 0 for
    octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
    0x or 0X is accepted.  A trailing L or l is not accepted,
    unless base is 0.

    """
    try:
247
        s = args[0]
248
    except IndexError:
249 250
        raise TypeError('function requires at least 1 argument: %d given' %
                        len(args))
251 252 253 254
    # Don't catch type error resulting from too many arguments to long().  The
    # error message isn't compatible but the error type is, and this function
    # is complicated enough already.
    if type(s) == _StringType:
255
        return _apply(_long, args)
256
    else:
257 258
        raise TypeError('argument 1: expected string, %s found' %
                        type(s).__name__)
259

260

Guido van Rossum's avatar
Guido van Rossum committed
261 262
# Left-justify a string
def ljust(s, width):
263
    """ljust(s, width) -> string
264

265 266 267
    Return a left-justified version of s, in a field of the
    specified width, padded with spaces as needed.  The string is
    never truncated.
268

269 270 271 272
    """
    n = width - len(s)
    if n <= 0: return s
    return s + ' '*n
Guido van Rossum's avatar
Guido van Rossum committed
273 274 275

# Right-justify a string
def rjust(s, width):
276
    """rjust(s, width) -> string
277

278 279 280
    Return a right-justified version of s, in a field of the
    specified width, padded with spaces as needed.  The string is
    never truncated.
281

282 283 284 285
    """
    n = width - len(s)
    if n <= 0: return s
    return ' '*n + s
Guido van Rossum's avatar
Guido van Rossum committed
286 287 288

# Center a string
def center(s, width):
289
    """center(s, width) -> string
290

291 292 293
    Return a center version of s, in a field of the specified
    width. padded with spaces as needed.  The string is never
    truncated.
294

295 296 297 298 299
    """
    n = width - len(s)
    if n <= 0: return s
    half = n/2
    if n%2 and width%2:
300 301
        # This ensures that center(center(s, i), j) = center(s, j)
        half = half+1
302
    return ' '*half +  s + ' '*(n-half)
Guido van Rossum's avatar
Guido van Rossum committed
303 304 305 306 307

# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
# Decadent feature: the argument may be a string or a number
# (Use of this is deprecated; it should be a string as with ljust c.s.)
def zfill(x, width):
308
    """zfill(x, width) -> string
309

310 311
    Pad a numeric string x with zeros on the left, to fill a field
    of the specified width.  The string x is never truncated.
312

313 314
    """
    if type(x) == type(''): s = x
315
    else: s = repr(x)
316 317 318 319
    n = len(s)
    if n >= width: return s
    sign = ''
    if s[0] in ('-', '+'):
320
        sign, s = s[0], s[1:]
321
    return sign + '0'*(width-n) + s
322 323 324

# Expand tabs in a string.
# Doesn't take non-printing chars into account, but does understand \n.
Guido van Rossum's avatar
Guido van Rossum committed
325
def expandtabs(s, tabsize=8):
326 327 328 329 330 331 332 333 334
    """expandtabs(s [,tabsize]) -> string

    Return a copy of the string s with all tab characters replaced
    by the appropriate number of spaces, depending on the current
    column, and the tabsize (default 8).

    """
    res = line = ''
    for c in s:
335 336 337 338 339 340
        if c == '\t':
            c = ' '*(tabsize - len(line) % tabsize)
        line = line + c
        if c == '\n':
            res = res + line
            line = ''
341
    return res + line
342

343
# Character translation through look-up table.
344
def translate(s, table, deletions=""):
345 346 347 348 349 350 351 352 353
    """translate(s,table [,deletechars]) -> string

    Return a copy of the string s, where all characters occurring
    in the optional argument deletechars are removed, and the
    remaining characters have been mapped through the given
    translation table, which must be a string of length 256.

    """
    return s.translate(table, deletions)
354

355 356
# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
def capitalize(s):
357
    """capitalize(s) -> string
358

359 360
    Return a copy of the string s with only its first character
    capitalized.
361

362 363
    """
    return s.capitalize()
364 365

# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
366
def capwords(s, sep=None):
367
    """capwords(s, [sep]) -> string
368

369 370 371 372
    Split the argument into words using split, capitalize each
    word using capitalize, and join the capitalized words using
    join. Note that this replaces runs of whitespace characters by
    a single space.
373

374 375
    """
    return join(map(capitalize, s.split(sep)), sep or ' ')
376

377 378 379
# Construct a translation string
_idmapL = None
def maketrans(fromstr, tostr):
380 381 382 383 384 385 386 387
    """maketrans(frm, to) -> string

    Return a translation table (a string of 256 bytes long)
    suitable for use in string.translate.  The strings frm and to
    must be of the same length.

    """
    if len(fromstr) != len(tostr):
388
        raise ValueError, "maketrans arguments must have same length"
389 390
    global _idmapL
    if not _idmapL:
391
        _idmapL = map(None, _idmap)
392 393 394
    L = _idmapL[:]
    fromstr = map(ord, fromstr)
    for i in range(len(fromstr)):
395
        L[fromstr[i]] = tostr[i]
396
    return join(L, "")
397

398
# Substring replacement (global)
399 400
def replace(s, old, new, maxsplit=0):
    """replace (str, old, new[, maxsplit]) -> string
401

402 403 404
    Return a copy of string str with all occurrences of substring
    old replaced by new. If the optional argument maxsplit is
    given, only the first maxsplit occurrences are replaced.
405

406 407
    """
    return s.replace(old, new, maxsplit)
408 409


410 411 412 413 414 415 416 417 418 419
# XXX: transitional
#
# If string objects do not have methods, then we need to use the old string.py
# library, which uses strop for many more things than just the few outlined
# below.
try:
    ''.upper
except AttributeError:
    from stringold import *

420 421
# Try importing optional built-in module "strop" -- if it exists,
# it redefines some string operations that are 100-1000 times faster.
422 423
# It also defines values for whitespace, lowercase and uppercase
# that match <ctype.h>'s definitions.
424 425

try:
426 427
    from strop import maketrans, lowercase, uppercase, whitespace
    letters = lowercase + uppercase
428
except ImportError:
429
    pass                                          # Use the original versions