stringold.py 12.1 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2
# module 'string' -- A collection of string operations

3 4 5 6
# Warning: most of the code you see here isn't normally used nowadays.  With
# Python 1.6, many of these functions are implemented as methods on the
# standard string object. They used to be implemented by a built-in module
# called strop, but strop is now obsolete itself.
Guido van Rossum's avatar
Guido van Rossum committed
7

8 9 10 11 12 13 14 15 16 17 18 19 20 21
"""Common string manipulations.

Public module variables:

whitespace -- a string containing all characters considered whitespace
lowercase -- a string containing all characters considered lowercase letters
uppercase -- a string containing all characters considered uppercase letters
letters -- a string containing all characters considered letters
digits -- a string containing all characters considered decimal digits
hexdigits -- a string containing all characters considered hexadecimal digits
octdigits -- a string containing all characters considered octal digits

"""

Guido van Rossum's avatar
Guido van Rossum committed
22
# Some strings for ctype-style character classification
23
whitespace = ' \t\n\r\v\f'
Guido van Rossum's avatar
Guido van Rossum committed
24 25 26 27 28 29 30 31
lowercase = 'abcdefghijklmnopqrstuvwxyz'
uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
letters = lowercase + uppercase
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'

# Case conversion helpers
32 33
_idmap = ''
for i in range(256): _idmap = _idmap + chr(i)
Guido van Rossum's avatar
Guido van Rossum committed
34 35
del i

36 37 38 39 40 41
# Backward compatible names for exceptions
index_error = ValueError
atoi_error = ValueError
atof_error = ValueError
atol_error = ValueError

Guido van Rossum's avatar
Guido van Rossum committed
42 43
# convert UPPER CASE letters to lower case
def lower(s):
44
    """lower(s) -> string
45

46
    Return a copy of the string s converted to lowercase.
47

48 49
    """
    return s.lower()
Guido van Rossum's avatar
Guido van Rossum committed
50 51 52

# Convert lower case letters to UPPER CASE
def upper(s):
53
    """upper(s) -> string
54

55
    Return a copy of the string s converted to uppercase.
56

57 58
    """
    return s.upper()
Guido van Rossum's avatar
Guido van Rossum committed
59 60 61

# Swap lower case letters and UPPER CASE
def swapcase(s):
62
    """swapcase(s) -> string
63

64 65
    Return a copy of the string s with upper case characters
    converted to lowercase and vice versa.
66

67 68
    """
    return s.swapcase()
Guido van Rossum's avatar
Guido van Rossum committed
69 70 71

# Strip leading and trailing tabs and spaces
def strip(s):
72
    """strip(s) -> string
73

74 75
    Return a copy of the string s with leading and trailing
    whitespace removed.
76

77 78
    """
    return s.strip()
Guido van Rossum's avatar
Guido van Rossum committed
79

80 81
# Strip leading tabs and spaces
def lstrip(s):
82
    """lstrip(s) -> string
83

84
    Return a copy of the string s with leading whitespace removed.
85

86 87
    """
    return s.lstrip()
88 89 90

# Strip trailing tabs and spaces
def rstrip(s):
91
    """rstrip(s) -> string
92

93 94
    Return a copy of the string s with trailing whitespace
    removed.
95

96 97
    """
    return s.rstrip()
98 99


Guido van Rossum's avatar
Guido van Rossum committed
100 101
# Split a string into a list of space/tab-separated words
# NB: split(s) is NOT the same as splitfields(s, ' ')!
102
def split(s, sep=None, maxsplit=0):
103
    """split(str [,sep [,maxsplit]]) -> list of strings
104

105 106 107 108
    Return a list of the words in the string s, using sep as the
    delimiter string.  If maxsplit is nonzero, splits into at most
    maxsplit words If sep is not specified, any whitespace string
    is a separator.  Maxsplit defaults to 0.
109

110
    (split and splitfields are synonymous)
111

112 113 114
    """
    return s.split(sep, maxsplit)
splitfields = split
115

116
# Join fields with optional separator
117 118
def join(words, sep = ' '):
    """join(list [,sep]) -> string
119

120 121 122
    Return a string composed of the words in list, with
    intervening occurences of sep.  The default separator is a
    single space.
123

124
    (joinfields and join are synonymous)
125

126 127 128
    """
    return sep.join(words)
joinfields = join
129

130 131
# for a little bit of speed
_apply = apply
132

133 134 135
# Find substring, raise exception if not found
def index(s, *args):
    """index(s, sub [,start [,end]]) -> int
136

137
    Like find but raises ValueError when the substring is not found.
138

139 140
    """
    return _apply(s.index, args)
141

142
# Find last substring, raise exception if not found
143 144
def rindex(s, *args):
    """rindex(s, sub [,start [,end]]) -> int
145

146
    Like rfind but raises ValueError when the substring is not found.
147

148 149
    """
    return _apply(s.rindex, args)
150 151

# Count non-overlapping occurrences of substring
152 153 154 155 156 157 158 159 160
def count(s, *args):
    """count(s, sub[, start[,end]]) -> int

    Return the number of occurrences of substring sub in string
    s[start:end].  Optional arguments start and end are
    interpreted as in slice notation.

    """
    return _apply(s.count, args)
161

162
# Find substring, return -1 if not found
163 164 165 166 167 168 169 170 171 172 173
def find(s, *args):
    """find(s, sub [,start [,end]]) -> in

    Return the lowest index in s where substring sub is found,
    such that sub is contained within s[start,end].  Optional
    arguments start and end are interpreted as in slice notation.

    Return -1 on failure.

    """
    return _apply(s.find, args)
Guido van Rossum's avatar
Guido van Rossum committed
174

175
# Find last substring, return -1 if not found
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
def rfind(s, *args):
    """rfind(s, sub [,start [,end]]) -> int

    Return the highest index in s where substring sub is found,
    such that sub is contained within s[start,end].  Optional
    arguments start and end are interpreted as in slice notation.

    Return -1 on failure.

    """
    return _apply(s.rfind, args)

# for a bit of speed
_float = float
_int = int
_long = long
_StringType = type('')
193

194
# Convert string to float
195 196 197 198 199 200 201
def atof(s):
    """atof(s) -> float

    Return the floating point number represented by the string s.

    """
    if type(s) == _StringType:
202
        return _float(s)
203
    else:
204 205
        raise TypeError('argument 1: expected string, %s found' %
                        type(s).__name__)
206

Guido van Rossum's avatar
Guido van Rossum committed
207
# Convert string to integer
208 209 210 211 212 213 214 215 216 217 218 219
def atoi(*args):
    """atoi(s [,base]) -> int

    Return the integer represented by the string s in the given
    base, which defaults to 10.  The string s must consist of one
    or more digits, possibly preceded by a sign.  If base is 0, it
    is chosen from the leading characters of s, 0 for octal, 0x or
    0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
    accepted.

    """
    try:
220
        s = args[0]
221
    except IndexError:
222 223
        raise TypeError('function requires at least 1 argument: %d given' %
                        len(args))
224 225 226 227
    # Don't catch type error resulting from too many arguments to int().  The
    # error message isn't compatible but the error type is, and this function
    # is complicated enough already.
    if type(s) == _StringType:
228
        return _apply(_int, args)
229
    else:
230 231
        raise TypeError('argument 1: expected string, %s found' %
                        type(s).__name__)
232

Guido van Rossum's avatar
Guido van Rossum committed
233

234
# Convert string to long integer
235 236 237 238 239 240 241 242 243 244 245 246 247
def atol(*args):
    """atol(s [,base]) -> long

    Return the long integer represented by the string s in the
    given base, which defaults to 10.  The string s must consist
    of one or more digits, possibly preceded by a sign.  If base
    is 0, it is chosen from the leading characters of s, 0 for
    octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
    0x or 0X is accepted.  A trailing L or l is not accepted,
    unless base is 0.

    """
    try:
248
        s = args[0]
249
    except IndexError:
250 251
        raise TypeError('function requires at least 1 argument: %d given' %
                        len(args))
252 253 254 255
    # Don't catch type error resulting from too many arguments to long().  The
    # error message isn't compatible but the error type is, and this function
    # is complicated enough already.
    if type(s) == _StringType:
256
        return _apply(_long, args)
257
    else:
258 259
        raise TypeError('argument 1: expected string, %s found' %
                        type(s).__name__)
260

261

Guido van Rossum's avatar
Guido van Rossum committed
262 263
# Left-justify a string
def ljust(s, width):
264
    """ljust(s, width) -> string
265

266 267 268
    Return a left-justified version of s, in a field of the
    specified width, padded with spaces as needed.  The string is
    never truncated.
269

270 271 272 273
    """
    n = width - len(s)
    if n <= 0: return s
    return s + ' '*n
Guido van Rossum's avatar
Guido van Rossum committed
274 275 276

# Right-justify a string
def rjust(s, width):
277
    """rjust(s, width) -> string
278

279 280 281
    Return a right-justified version of s, in a field of the
    specified width, padded with spaces as needed.  The string is
    never truncated.
282

283 284 285 286
    """
    n = width - len(s)
    if n <= 0: return s
    return ' '*n + s
Guido van Rossum's avatar
Guido van Rossum committed
287 288 289

# Center a string
def center(s, width):
290
    """center(s, width) -> string
291

292 293 294
    Return a center version of s, in a field of the specified
    width. padded with spaces as needed.  The string is never
    truncated.
295

296 297 298 299 300
    """
    n = width - len(s)
    if n <= 0: return s
    half = n/2
    if n%2 and width%2:
301 302
        # This ensures that center(center(s, i), j) = center(s, j)
        half = half+1
303
    return ' '*half +  s + ' '*(n-half)
Guido van Rossum's avatar
Guido van Rossum committed
304 305 306 307 308

# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
# Decadent feature: the argument may be a string or a number
# (Use of this is deprecated; it should be a string as with ljust c.s.)
def zfill(x, width):
309
    """zfill(x, width) -> string
310

311 312
    Pad a numeric string x with zeros on the left, to fill a field
    of the specified width.  The string x is never truncated.
313

314 315 316 317 318 319 320
    """
    if type(x) == type(''): s = x
    else: s = `x`
    n = len(s)
    if n >= width: return s
    sign = ''
    if s[0] in ('-', '+'):
321
        sign, s = s[0], s[1:]
322
    return sign + '0'*(width-n) + s
323 324 325

# Expand tabs in a string.
# Doesn't take non-printing chars into account, but does understand \n.
Guido van Rossum's avatar
Guido van Rossum committed
326
def expandtabs(s, tabsize=8):
327 328 329 330 331 332 333 334 335
    """expandtabs(s [,tabsize]) -> string

    Return a copy of the string s with all tab characters replaced
    by the appropriate number of spaces, depending on the current
    column, and the tabsize (default 8).

    """
    res = line = ''
    for c in s:
336 337 338 339 340 341
        if c == '\t':
            c = ' '*(tabsize - len(line) % tabsize)
        line = line + c
        if c == '\n':
            res = res + line
            line = ''
342
    return res + line
343

344
# Character translation through look-up table.
345
def translate(s, table, deletions=""):
346 347 348 349 350 351 352 353 354
    """translate(s,table [,deletechars]) -> string

    Return a copy of the string s, where all characters occurring
    in the optional argument deletechars are removed, and the
    remaining characters have been mapped through the given
    translation table, which must be a string of length 256.

    """
    return s.translate(table, deletions)
355

356 357
# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
def capitalize(s):
358
    """capitalize(s) -> string
359

360 361
    Return a copy of the string s with only its first character
    capitalized.
362

363 364
    """
    return s.capitalize()
365 366 367

# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
# See also regsub.capwords().
368
def capwords(s, sep=None):
369
    """capwords(s, [sep]) -> string
370

371 372 373 374
    Split the argument into words using split, capitalize each
    word using capitalize, and join the capitalized words using
    join. Note that this replaces runs of whitespace characters by
    a single space.
375

376 377
    """
    return join(map(capitalize, s.split(sep)), sep or ' ')
378

379 380 381
# Construct a translation string
_idmapL = None
def maketrans(fromstr, tostr):
382 383 384 385 386 387 388 389
    """maketrans(frm, to) -> string

    Return a translation table (a string of 256 bytes long)
    suitable for use in string.translate.  The strings frm and to
    must be of the same length.

    """
    if len(fromstr) != len(tostr):
390
        raise ValueError, "maketrans arguments must have same length"
391 392
    global _idmapL
    if not _idmapL:
393
        _idmapL = map(None, _idmap)
394 395 396
    L = _idmapL[:]
    fromstr = map(ord, fromstr)
    for i in range(len(fromstr)):
397
        L[fromstr[i]] = tostr[i]
398
    return joinfields(L, "")
399

400
# Substring replacement (global)
401 402
def replace(s, old, new, maxsplit=0):
    """replace (str, old, new[, maxsplit]) -> string
403

404 405 406
    Return a copy of string str with all occurrences of substring
    old replaced by new. If the optional argument maxsplit is
    given, only the first maxsplit occurrences are replaced.
407

408 409
    """
    return s.replace(old, new, maxsplit)
410 411


412 413 414 415 416 417 418 419 420 421
# XXX: transitional
#
# If string objects do not have methods, then we need to use the old string.py
# library, which uses strop for many more things than just the few outlined
# below.
try:
    ''.upper
except AttributeError:
    from stringold import *

422 423
# Try importing optional built-in module "strop" -- if it exists,
# it redefines some string operations that are 100-1000 times faster.
424 425
# It also defines values for whitespace, lowercase and uppercase
# that match <ctype.h>'s definitions.
426 427

try:
428 429
    from strop import maketrans, lowercase, uppercase, whitespace
    letters = lowercase + uppercase
430
except ImportError:
431
    pass                                          # Use the original versions