stringold.py 15.1 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2
# module 'string' -- A collection of string operations

3 4 5
# Warning: most of the code you see here isn't normally used nowadays.
# At the end of this file most functions are replaced by built-in
# functions imported from built-in module "strop".
Guido van Rossum's avatar
Guido van Rossum committed
6

7 8 9 10 11 12 13 14 15 16 17 18 19 20
"""Common string manipulations.

Public module variables:

whitespace -- a string containing all characters considered whitespace
lowercase -- a string containing all characters considered lowercase letters
uppercase -- a string containing all characters considered uppercase letters
letters -- a string containing all characters considered letters
digits -- a string containing all characters considered decimal digits
hexdigits -- a string containing all characters considered hexadecimal digits
octdigits -- a string containing all characters considered octal digits

"""

Guido van Rossum's avatar
Guido van Rossum committed
21
# Some strings for ctype-style character classification
22
whitespace = ' \t\n\r\v\f'
Guido van Rossum's avatar
Guido van Rossum committed
23 24 25 26 27 28 29 30
lowercase = 'abcdefghijklmnopqrstuvwxyz'
uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
letters = lowercase + uppercase
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'

# Case conversion helpers
31 32 33 34 35
_idmap = ''
for i in range(256): _idmap = _idmap + chr(i)
_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
Guido van Rossum's avatar
Guido van Rossum committed
36 37
del i

38 39 40 41 42 43
# Backward compatible names for exceptions
index_error = ValueError
atoi_error = ValueError
atof_error = ValueError
atol_error = ValueError

Guido van Rossum's avatar
Guido van Rossum committed
44 45
# convert UPPER CASE letters to lower case
def lower(s):
46 47
	"""lower(s) -> string

48
	Return a copy of the string s converted to lowercase.
49 50

	"""
Guido van Rossum's avatar
Guido van Rossum committed
51 52
	res = ''
	for c in s:
53
		res = res + _lower[ord(c)]
Guido van Rossum's avatar
Guido van Rossum committed
54 55 56 57
	return res

# Convert lower case letters to UPPER CASE
def upper(s):
58 59
	"""upper(s) -> string

60
	Return a copy of the string s converted to uppercase.
61 62

	"""
Guido van Rossum's avatar
Guido van Rossum committed
63 64
	res = ''
	for c in s:
65
		res = res + _upper[ord(c)]
Guido van Rossum's avatar
Guido van Rossum committed
66 67 68 69
	return res

# Swap lower case letters and UPPER CASE
def swapcase(s):
70
	"""swapcase(s) -> string
71 72

	Return a copy of the string s with upper case characters
73
	converted to lowercase and vice versa.
74 75

	"""
Guido van Rossum's avatar
Guido van Rossum committed
76 77
	res = ''
	for c in s:
78
		res = res + _swapcase[ord(c)]
Guido van Rossum's avatar
Guido van Rossum committed
79 80 81 82
	return res

# Strip leading and trailing tabs and spaces
def strip(s):
83 84 85
	"""strip(s) -> string

	Return a copy of the string s with leading and trailing
86
	whitespace removed.
87 88

	"""
Guido van Rossum's avatar
Guido van Rossum committed
89 90 91 92 93
	i, j = 0, len(s)
	while i < j and s[i] in whitespace: i = i+1
	while i < j and s[j-1] in whitespace: j = j-1
	return s[i:j]

94 95
# Strip leading tabs and spaces
def lstrip(s):
96 97
	"""lstrip(s) -> string

98
	Return a copy of the string s with leading whitespace removed.
99 100

	"""
101 102 103 104 105 106
	i, j = 0, len(s)
	while i < j and s[i] in whitespace: i = i+1
	return s[i:j]

# Strip trailing tabs and spaces
def rstrip(s):
107 108
	"""rstrip(s) -> string

109 110
	Return a copy of the string s with trailing whitespace
	removed.
111 112

	"""
113 114 115 116 117
	i, j = 0, len(s)
	while i < j and s[j-1] in whitespace: j = j-1
	return s[i:j]


Guido van Rossum's avatar
Guido van Rossum committed
118 119
# Split a string into a list of space/tab-separated words
# NB: split(s) is NOT the same as splitfields(s, ' ')!
120
def split(s, sep=None, maxsplit=0):
121 122 123 124 125 126 127 128 129 130
	"""split(str [,sep [,maxsplit]]) -> list of strings

	Return a list of the words in the string s, using sep as the
	delimiter string.  If maxsplit is nonzero, splits into at most
	maxsplit words If sep is not specified, any whitespace string
	is a separator.  Maxsplit defaults to 0.

	(split and splitfields are synonymous)

	"""
131
	if sep is not None: return splitfields(s, sep, maxsplit)
Guido van Rossum's avatar
Guido van Rossum committed
132 133
	res = []
	i, n = 0, len(s)
134 135
	if maxsplit <= 0: maxsplit = n
	count = 0
Guido van Rossum's avatar
Guido van Rossum committed
136 137
	while i < n:
		while i < n and s[i] in whitespace: i = i+1
Guido van Rossum's avatar
Guido van Rossum committed
138
		if i == n: break
139 140 141
		if count >= maxsplit:
		    res.append(s[i:])
		    break
Guido van Rossum's avatar
Guido van Rossum committed
142 143
		j = i
		while j < n and s[j] not in whitespace: j = j+1
144
		count = count + 1
Guido van Rossum's avatar
Guido van Rossum committed
145 146 147 148 149 150
		res.append(s[i:j])
		i = j
	return res

# Split a list into fields separated by a given string
# NB: splitfields(s, ' ') is NOT the same as split(s)!
151
# splitfields(s, '') returns [s] (in analogy with split() in nawk)
152
def splitfields(s, sep=None, maxsplit=0):
153 154 155 156 157 158 159 160 161 162
	"""splitfields(str [,sep [,maxsplit]]) -> list of strings

	Return a list of the words in the string s, using sep as the
	delimiter string.  If maxsplit is nonzero, splits into at most
	maxsplit words If sep is not specified, any whitespace string
	is a separator.  Maxsplit defaults to 0.

	(split and splitfields are synonymous)

	"""
163
	if sep is None: return split(s, None, maxsplit)
Guido van Rossum's avatar
Guido van Rossum committed
164 165
	res = []
	nsep = len(sep)
166
	if nsep == 0:
167
		return [s]
168
	ns = len(s)
169
	if maxsplit <= 0: maxsplit = ns
Guido van Rossum's avatar
Guido van Rossum committed
170
	i = j = 0
171
	count = 0
Guido van Rossum's avatar
Guido van Rossum committed
172
	while j+nsep <= ns:
Guido van Rossum's avatar
Guido van Rossum committed
173
		if s[j:j+nsep] == sep:
174
			count = count + 1
Guido van Rossum's avatar
Guido van Rossum committed
175 176
			res.append(s[i:j])
			i = j = j + nsep
177
			if count >= maxsplit: break
Guido van Rossum's avatar
Guido van Rossum committed
178 179 180 181 182
		else:
			j = j + 1
	res.append(s[i:])
	return res

183
# Join words with spaces between them
184
def join(words, sep = ' '):
185 186 187
	"""join(list [,sep]) -> string

	Return a string composed of the words in list, with
188 189
	intervening occurences of sep.  Sep defaults to a single
	space.
190 191 192

	(joinfields and join are synonymous)

193
	"""
194
	return joinfields(words, sep)
195

196 197
# Join fields with optional separator
def joinfields(words, sep = ' '):
198 199 200 201 202 203 204 205
	"""joinfields(list [,sep]) -> string

	Return a string composed of the words in list, with
	intervening occurences of sep.  The default separator is a
	single space.

	(joinfields and join are synonymous)

206
	"""
207 208 209 210 211
	res = ''
	for w in words:
		res = res + (sep + w)
	return res[len(sep):]

212
# Find substring, raise exception if not found
213
def index(s, sub, i = 0, last=None):
214 215 216 217 218 219 220 221 222
	"""index(s, sub [,start [,end]]) -> int

	Return the lowest index in s where substring sub is found,
	such that sub is contained within s[start,end].  Optional
	arguments start and end are interpreted as in slice notation.

	Raise ValueError if not found.

	"""
223
	if last is None: last = len(s)
224
	res = find(s, sub, i, last)
225 226 227
	if res < 0:
		raise ValueError, 'substring not found in string.index'
	return res
228

229
# Find last substring, raise exception if not found
230
def rindex(s, sub, i = 0, last=None):
231 232 233 234 235 236 237 238 239
	"""rindex(s, sub [,start [,end]]) -> int

	Return the highest index in s where substring sub is found,
	such that sub is contained within s[start,end].  Optional
	arguments start and end are interpreted as in slice notation.

	Raise ValueError if not found.

	"""
240
	if last is None: last = len(s)
241
	res = rfind(s, sub, i, last)
242 243 244
	if res < 0:
		raise ValueError, 'substring not found in string.index'
	return res
245 246

# Count non-overlapping occurrences of substring
247
def count(s, sub, i = 0, last=None):
248 249 250 251 252 253 254
	"""count(s, sub[, start[,end]]) -> int

	Return the number of occurrences of substring sub in string
	s[start:end].  Optional arguments start and end are
	interpreted as in slice notation.

	"""
255 256 257 258 259 260 261 262
	Slen = len(s)  # cache this value, for speed
	if last is None:
		last = Slen
	elif last < 0:
		last = max(0, last + Slen)
	elif last > Slen:
		last = Slen
	if i < 0: i = max(0, i + Slen)
263
	n = len(sub)
264
	m = last + 1 - n
265 266 267 268 269 270 271 272
	if n == 0: return m-i
	r = 0
	while i < m:
		if sub == s[i:i+n]:
			r = r+1
			i = i+n
		else:
			i = i+1
273 274
	return r

275
# Find substring, return -1 if not found
276
def find(s, sub, i = 0, last=None):
277 278 279 280 281 282 283 284 285
	"""find(s, sub [,start [,end]]) -> in

	Return the lowest index in s where substring sub is found,
	such that sub is contained within s[start,end].  Optional
	arguments start and end are interpreted as in slice notation.

	Return -1 on failure.

	"""
286
	Slen = len(s)  # cache this value, for speed
287
	if last is None:
288 289 290 291 292 293
		last = Slen
	elif last < 0:
		last = max(0, last + Slen)
	elif last > Slen:
		last = Slen
	if i < 0: i = max(0, i + Slen)
294
	n = len(sub)
295
	m = last + 1 - n
296 297 298 299
	while i < m:
		if sub == s[i:i+n]: return i
		i = i+1
	return -1
Guido van Rossum's avatar
Guido van Rossum committed
300

301
# Find last substring, return -1 if not found
302
def rfind(s, sub, i = 0, last=None):
303 304 305 306 307 308
	"""rfind(s, sub [,start [,end]]) -> int

	Return the highest index in s where substring sub is found,
	such that sub is contained within s[start,end].  Optional
	arguments start and end are interpreted as in slice notation.

309
	Return -1 on failure.
310 311

	"""
312
	Slen = len(s)  # cache this value, for speed
313
	if last is None:
314 315 316 317 318 319
		last = Slen
	elif last < 0:
		last = max(0, last + Slen)
	elif last > Slen:
		last = Slen
	if i < 0: i = max(0, i + Slen)
320
	n = len(sub)
321
	m = last + 1 - n
322 323 324 325 326
	r = -1
	while i < m:
		if sub == s[i:i+n]: r = i
		i = i+1
	return r
327

328
# "Safe" environment for eval()
329
_safe_env = {"__builtins__": {}}
330

331
# Convert string to float
332
_re = None
333
def atof(str):
334 335 336 337 338
	"""atof(s) -> float

	Return the floating point number represented by the string s.

	"""
339 340
	global _re
	if _re is None:
341 342 343 344
		# Don't fail if re doesn't exist -- just skip the syntax check
		try:
			import re
		except ImportError:
345 346 347
			_re = 0
		else:
			_re = re
348
	sign = ''
349
	s = strip(str)
350 351 352
	if s and s[0] in '+-':
		sign = s[0]
		s = s[1:]
353 354
	if not s:
		raise ValueError, 'non-float argument to string.atof'
355
	while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
356
	if _re and not _re.match('[0-9]*(\.[0-9]*)?([eE][-+]?[0-9]+)?$', s):
357
		raise ValueError, 'non-float argument to string.atof'
358
	try:
359
		return float(eval(sign + s, _safe_env))
360
	except SyntaxError:
361
		raise ValueError, 'non-float argument to string.atof'
362

Guido van Rossum's avatar
Guido van Rossum committed
363
# Convert string to integer
364
def atoi(str, base=10):
365 366 367 368 369 370 371 372 373 374
	"""atoi(s [,base]) -> int

	Return the integer represented by the string s in the given
	base, which defaults to 10.  The string s must consist of one
	or more digits, possibly preceded by a sign.  If base is 0, it
	is chosen from the leading characters of s, 0 for octal, 0x or
	0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
	accepted.

	"""
375 376 377
	if base != 10:
		# We only get here if strop doesn't define atoi()
		raise ValueError, "this string.atoi doesn't support base != 10"
378
	sign = ''
379
	s = strip(str)
380
	if s and s[0] in '+-':
381 382
		sign = s[0]
		s = s[1:]
383 384
	if not s:
		raise ValueError, 'non-integer argument to string.atoi'
385
	while s[0] == '0' and len(s) > 1: s = s[1:]
Guido van Rossum's avatar
Guido van Rossum committed
386
	for c in s:
387 388
		if c not in digits:
			raise ValueError, 'non-integer argument to string.atoi'
389
	return eval(sign + s, _safe_env)
Guido van Rossum's avatar
Guido van Rossum committed
390

391
# Convert string to long integer
392
def atol(str, base=10):
393 394 395 396 397 398 399 400 401 402 403
	"""atol(s [,base]) -> long

	Return the long integer represented by the string s in the
	given base, which defaults to 10.  The string s must consist
	of one or more digits, possibly preceded by a sign.  If base
	is 0, it is chosen from the leading characters of s, 0 for
	octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
	0x or 0X is accepted.  A trailing L or l is not accepted,
	unless base is 0.

	"""
404 405 406
	if base != 10:
		# We only get here if strop doesn't define atol()
		raise ValueError, "this string.atol doesn't support base != 10"
407
	sign = ''
408
	s = strip(str)
409 410 411
	if s and s[0] in '+-':
		sign = s[0]
		s = s[1:]
412 413
	if not s:
		raise ValueError, 'non-integer argument to string.atol'
414 415
	while s[0] == '0' and len(s) > 1: s = s[1:]
	for c in s:
416 417
		if c not in digits:
			raise ValueError, 'non-integer argument to string.atol'
418
	return eval(sign + s + 'L', _safe_env)
419

Guido van Rossum's avatar
Guido van Rossum committed
420 421
# Left-justify a string
def ljust(s, width):
422 423 424 425 426 427 428
	"""ljust(s, width) -> string

	Return a left-justified version of s, in a field of the
	specified width, padded with spaces as needed.  The string is
	never truncated.

	"""
429 430 431
	n = width - len(s)
	if n <= 0: return s
	return s + ' '*n
Guido van Rossum's avatar
Guido van Rossum committed
432 433 434

# Right-justify a string
def rjust(s, width):
435
	"""rjust(s, width) -> string
436 437 438 439 440 441

	Return a right-justified version of s, in a field of the
	specified width, padded with spaces as needed.  The string is
	never truncated.

	"""
442 443 444
	n = width - len(s)
	if n <= 0: return s
	return ' '*n + s
Guido van Rossum's avatar
Guido van Rossum committed
445 446 447

# Center a string
def center(s, width):
448
	"""center(s, width) -> string
449 450 451 452 453 454

	Return a center version of s, in a field of the specified
	width. padded with spaces as needed.  The string is never
	truncated.

	"""
455 456 457 458 459 460 461
	n = width - len(s)
	if n <= 0: return s
	half = n/2
	if n%2 and width%2:
		# This ensures that center(center(s, i), j) = center(s, j)
		half = half+1
	return ' '*half +  s + ' '*(n-half)
Guido van Rossum's avatar
Guido van Rossum committed
462 463 464 465 466

# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
# Decadent feature: the argument may be a string or a number
# (Use of this is deprecated; it should be a string as with ljust c.s.)
def zfill(x, width):
467 468 469 470 471 472
	"""zfill(x, width) -> string

	Pad a numeric string x with zeros on the left, to fill a field
	of the specified width.  The string x is never truncated.

	"""
Guido van Rossum's avatar
Guido van Rossum committed
473
	if type(x) == type(''): s = x
Guido van Rossum's avatar
Guido van Rossum committed
474 475 476 477
	else: s = `x`
	n = len(s)
	if n >= width: return s
	sign = ''
478 479
	if s[0] in ('-', '+'):
		sign, s = s[0], s[1:]
Guido van Rossum's avatar
Guido van Rossum committed
480
	return sign + '0'*(width-n) + s
481 482 483

# Expand tabs in a string.
# Doesn't take non-printing chars into account, but does understand \n.
Guido van Rossum's avatar
Guido van Rossum committed
484
def expandtabs(s, tabsize=8):
485 486 487 488
	"""expandtabs(s [,tabsize]) -> string

	Return a copy of the string s with all tab characters replaced
	by the appropriate number of spaces, depending on the current
489
	column, and the tabsize (default 8).
490 491

	"""
492 493 494 495 496 497 498 499 500
	res = line = ''
	for c in s:
		if c == '\t':
			c = ' '*(tabsize - len(line)%tabsize)
		line = line + c
		if c == '\n':
			res = res + line
			line = ''
	return res + line
501

502
# Character translation through look-up table.
503
def translate(s, table, deletions=""):
504 505 506 507 508 509 510 511
	"""translate(s,table [,deletechars]) -> string

	Return a copy of the string s, where all characters occurring
	in the optional argument deletechars are removed, and the
	remaining characters have been mapped through the given
	translation table, which must be a string of length 256.

	"""
512
	if type(table) != type('') or len(table) != 256:
513 514
		raise TypeError, \
		      "translation table must be 256 characters long"
515 516 517 518 519
	res = ""
	for c in s:
		if c not in deletions:
			res = res + table[ord(c)]
	return res
520

521 522
# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
def capitalize(s):
523 524 525 526 527 528
	"""capitalize(s) -> string

	Return a copy of the string s with only its first character
	capitalized.

	"""
529
	return upper(s[:1]) + lower(s[1:])
530 531 532

# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
# See also regsub.capwords().
533
def capwords(s, sep=None):
534 535 536 537 538 539 540 541
	"""capwords(s, [sep]) -> string

	Split the argument into words using split, capitalize each
	word using capitalize, and join the capitalized words using
	join. Note that this replaces runs of whitespace characters by
	a single space.

	"""
542
	return join(map(capitalize, split(s, sep)), sep or ' ')
543

544 545 546
# Construct a translation string
_idmapL = None
def maketrans(fromstr, tostr):
547 548 549 550 551 552 553
	"""maketrans(frm, to) -> string

	Return a translation table (a string of 256 bytes long)
	suitable for use in string.translate.  The strings frm and to
	must be of the same length.

	"""
554 555 556 557 558 559 560 561 562 563
	if len(fromstr) != len(tostr):
		raise ValueError, "maketrans arguments must have same length"
	global _idmapL
	if not _idmapL:
		_idmapL = map(None, _idmap)
	L = _idmapL[:]
	fromstr = map(ord, fromstr)
	for i in range(len(fromstr)):
		L[fromstr[i]] = tostr[i]
	return joinfields(L, "")
564

565
# Substring replacement (global)
566
def replace(str, old, new, maxsplit=0):
567 568 569 570 571 572 573
	"""replace (str, old, new[, maxsplit]) -> string

	Return a copy of string str with all occurrences of substring
	old replaced by new. If the optional argument maxsplit is
	given, only the first maxsplit occurrences are replaced.

	"""
574
	return joinfields(splitfields(str, old, maxsplit), new)
575 576


577 578
# Try importing optional built-in module "strop" -- if it exists,
# it redefines some string operations that are 100-1000 times faster.
579 580
# It also defines values for whitespace, lowercase and uppercase
# that match <ctype.h>'s definitions.
581 582 583

try:
	from strop import *
584
	letters = lowercase + uppercase
585 586
except ImportError:
	pass # Use the original, slow versions