stropmodule.c 27.4 KB
Newer Older
1 2
/* strop module */

3 4 5 6
#include "Python.h"
#include <ctype.h>

PyDoc_STRVAR(strop_module__doc__,
7 8 9
"Common string manipulations, optimized for speed.\n"
"\n"
"Always use \"import string\" rather than referencing\n"
10
"this module directly.");
11

12 13
/* XXX This file assumes that the <ctype.h> is*() functions
   XXX are defined for all 8-bit characters! */
14

15 16 17 18
#define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
		       "strop functions are obsolete; use string methods")) \
	     return NULL

19 20 21 22 23 24 25 26
/* The lstrip(), rstrip() and strip() functions are implemented
   in do_strip(), which uses an additional parameter to indicate what
   type of strip should occur. */

#define LEFTSTRIP 0
#define RIGHTSTRIP 1
#define BOTHSTRIP 2

27

Barry Warsaw's avatar
Barry Warsaw committed
28
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
29
split_whitespace(char *s, int len, int maxsplit)
30
{
Barry Warsaw's avatar
Barry Warsaw committed
31 32 33 34
	int i = 0, j, err;
	int countsplit = 0;
	PyObject* item;
	PyObject *list = PyList_New(0);
35 36 37 38 39

	if (list == NULL)
		return NULL;

	while (i < len) {
Guido van Rossum's avatar
Guido van Rossum committed
40
		while (i < len && isspace(Py_CHARMASK(s[i]))) {
41 42 43
			i = i+1;
		}
		j = i;
44
		while (i < len && !isspace(Py_CHARMASK(s[i]))) {
45 46 47
			i = i+1;
		}
		if (j < i) {
Barry Warsaw's avatar
Barry Warsaw committed
48
			item = PyString_FromStringAndSize(s+j, (int)(i-j));
Barry Warsaw's avatar
Barry Warsaw committed
49 50 51
			if (item == NULL)
				goto finally;

Barry Warsaw's avatar
Barry Warsaw committed
52 53
			err = PyList_Append(list, item);
			Py_DECREF(item);
Barry Warsaw's avatar
Barry Warsaw committed
54 55
			if (err < 0)
				goto finally;
56 57

			countsplit++;
58 59 60 61
			while (i < len && isspace(Py_CHARMASK(s[i]))) {
				i = i+1;
			}
			if (maxsplit && (countsplit >= maxsplit) && i < len) {
Barry Warsaw's avatar
Barry Warsaw committed
62 63
				item = PyString_FromStringAndSize(
                                        s+i, (int)(len - i));
Barry Warsaw's avatar
Barry Warsaw committed
64 65 66
				if (item == NULL)
					goto finally;

Barry Warsaw's avatar
Barry Warsaw committed
67 68
				err = PyList_Append(list, item);
				Py_DECREF(item);
Barry Warsaw's avatar
Barry Warsaw committed
69 70 71
				if (err < 0)
					goto finally;

72 73
				i = len;
			}
74 75 76
		}
	}
	return list;
Barry Warsaw's avatar
Barry Warsaw committed
77 78 79
  finally:
	Py_DECREF(list);
	return NULL;
80 81 82
}


83
PyDoc_STRVAR(splitfields__doc__,
84 85 86 87 88 89 90 91
"split(s [,sep [,maxsplit]]) -> list of strings\n"
"splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
"\n"
"Return a list of the words in the string s, using sep as the\n"
"delimiter string.  If maxsplit is nonzero, splits into at most\n"
"maxsplit words.  If sep is not specified, any whitespace string\n"
"is a separator.  Maxsplit defaults to 0.\n"
"\n"
92
"(split and splitfields are synonymous)");
93

Barry Warsaw's avatar
Barry Warsaw committed
94
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
95
strop_splitfields(PyObject *self, PyObject *args)
96
{
97
	int len, n, i, j, err;
98
	int splitcount, maxsplit;
99
	char *s, *sub;
Barry Warsaw's avatar
Barry Warsaw committed
100
	PyObject *list, *item;
101

102
	WARN;
103 104
	sub = NULL;
	n = 0;
105 106
	splitcount = 0;
	maxsplit = 0;
107
	if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
108
		return NULL;
109
	if (sub == NULL)
110
		return split_whitespace(s, len, maxsplit);
111
	if (n == 0) {
Barry Warsaw's avatar
Barry Warsaw committed
112
		PyErr_SetString(PyExc_ValueError, "empty separator");
113 114 115
		return NULL;
	}

Barry Warsaw's avatar
Barry Warsaw committed
116
	list = PyList_New(0);
117 118 119 120 121
	if (list == NULL)
		return NULL;

	i = j = 0;
	while (i+n <= len) {
122
		if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Barry Warsaw's avatar
Barry Warsaw committed
123
			item = PyString_FromStringAndSize(s+j, (int)(i-j));
124 125
			if (item == NULL)
				goto fail;
Barry Warsaw's avatar
Barry Warsaw committed
126 127
			err = PyList_Append(list, item);
			Py_DECREF(item);
128 129
			if (err < 0)
				goto fail;
130
			i = j = i + n;
131 132 133
			splitcount++;
			if (maxsplit && (splitcount >= maxsplit))
				break;
134 135 136 137
		}
		else
			i++;
	}
Barry Warsaw's avatar
Barry Warsaw committed
138
	item = PyString_FromStringAndSize(s+j, (int)(len-j));
139 140
	if (item == NULL)
		goto fail;
Barry Warsaw's avatar
Barry Warsaw committed
141 142
	err = PyList_Append(list, item);
	Py_DECREF(item);
143 144
	if (err < 0)
		goto fail;
145 146

	return list;
147 148

 fail:
Barry Warsaw's avatar
Barry Warsaw committed
149
	Py_DECREF(list);
150
	return NULL;
151 152 153
}


154
PyDoc_STRVAR(joinfields__doc__,
155 156 157 158 159 160 161
"join(list [,sep]) -> string\n"
"joinfields(list [,sep]) -> string\n"
"\n"
"Return a string composed of the words in list, with\n"
"intervening occurrences of sep.  Sep defaults to a single\n"
"space.\n"
"\n"
162
"(join and joinfields are synonymous)");
163

Barry Warsaw's avatar
Barry Warsaw committed
164
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
165
strop_joinfields(PyObject *self, PyObject *args)
166
{
167 168 169 170 171 172 173
	PyObject *seq;
	char *sep = NULL;
	int seqlen, seplen = 0;
	int i, reslen = 0, slen = 0, sz = 100;
	PyObject *res = NULL;
	char* p = NULL;
	intargfunc getitemfunc;
174

175
	WARN;
176
	if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
177
		return NULL;
178 179 180 181
	if (sep == NULL) {
		sep = " ";
		seplen = 1;
	}
182

183
	seqlen = PySequence_Size(seq);
Barry Warsaw's avatar
Barry Warsaw committed
184 185 186
	if (seqlen < 0 && PyErr_Occurred())
		return NULL;

187
	if (seqlen == 1) {
188
		/* Optimization if there's only one item */
189
		PyObject *item = PySequence_GetItem(seq, 0);
190
		if (item && !PyString_Check(item)) {
191 192
			PyErr_SetString(PyExc_TypeError,
				 "first argument must be sequence of strings");
193
			Py_DECREF(item);
194 195
			return NULL;
		}
196 197
		return item;
	}
Barry Warsaw's avatar
Barry Warsaw committed
198

199
	if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
200
		return NULL;
Barry Warsaw's avatar
Barry Warsaw committed
201
	p = PyString_AsString(res);
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217

	/* optimize for lists, since it's the most common case.  all others
	 * (tuples and arbitrary sequences) just use the sequence abstract
	 * interface.
	 */
	if (PyList_Check(seq)) {
		for (i = 0; i < seqlen; i++) {
			PyObject *item = PyList_GET_ITEM(seq, i);
			if (!PyString_Check(item)) {
				PyErr_SetString(PyExc_TypeError,
				"first argument must be sequence of strings");
				Py_DECREF(res);
				return NULL;
			}
			slen = PyString_GET_SIZE(item);
			while (reslen + slen + seplen >= sz) {
218
				if (_PyString_Resize(&res, sz * 2) < 0)
219 220 221 222 223 224 225 226 227 228 229 230 231
					return NULL;
				sz *= 2;
				p = PyString_AsString(res) + reslen;
			}
			if (i > 0) {
				memcpy(p, sep, seplen);
				p += seplen;
				reslen += seplen;
			}
			memcpy(p, PyString_AS_STRING(item), slen);
			p += slen;
			reslen += slen;
		}
232
		_PyString_Resize(&res, reslen);
233 234
		return res;
	}
235 236 237 238

	if (seq->ob_type->tp_as_sequence == NULL ||
		 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
	{
239 240 241 242
		PyErr_SetString(PyExc_TypeError,
				"first argument must be a sequence");
		return NULL;
	}
243
	/* This is now type safe */
244
	for (i = 0; i < seqlen; i++) {
245 246 247 248
		PyObject *item = getitemfunc(seq, i);
		if (!item || !PyString_Check(item)) {
			PyErr_SetString(PyExc_TypeError,
				 "first argument must be sequence of strings");
Barry Warsaw's avatar
Barry Warsaw committed
249
			Py_DECREF(res);
250
			Py_XDECREF(item);
Barry Warsaw's avatar
Barry Warsaw committed
251 252
			return NULL;
		}
253 254
		slen = PyString_GET_SIZE(item);
		while (reslen + slen + seplen >= sz) {
255
			if (_PyString_Resize(&res, sz * 2) < 0) {
256 257 258 259 260 261
				Py_DECREF(item);
				return NULL;
			}
			sz *= 2;
			p = PyString_AsString(res) + reslen;
		}
262 263 264
		if (i > 0) {
			memcpy(p, sep, seplen);
			p += seplen;
265
			reslen += seplen;
266
		}
267 268 269 270
		memcpy(p, PyString_AS_STRING(item), slen);
		p += slen;
		reslen += slen;
		Py_DECREF(item);
271
	}
272
	_PyString_Resize(&res, reslen);
273 274 275
	return res;
}

276

277
PyDoc_STRVAR(find__doc__,
278 279 280 281 282 283
"find(s, sub [,start [,end]]) -> in\n"
"\n"
"Return the lowest index in s where substring sub is found,\n"
"such that sub is contained within s[start,end].  Optional\n"
"arguments start and end are interpreted as in slice notation.\n"
"\n"
284
"Return -1 on failure.");
285

Barry Warsaw's avatar
Barry Warsaw committed
286
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
287
strop_find(PyObject *self, PyObject *args)
288 289
{
	char *s, *sub;
290
	int len, n, i = 0, last = INT_MAX;
291

292
	WARN;
293
	if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
Barry Warsaw's avatar
Barry Warsaw committed
294 295
		return NULL;

296 297 298 299 300 301
	if (last > len)
		last = len;
	if (last < 0)
		last += len;
	if (last < 0)
		last = 0;
Barry Warsaw's avatar
Barry Warsaw committed
302 303 304
	if (i < 0)
		i += len;
	if (i < 0)
305 306
		i = 0;

307
	if (n == 0 && i <= last)
Barry Warsaw's avatar
Barry Warsaw committed
308
		return PyInt_FromLong((long)i);
309

310 311
	last -= n;
	for (; i <= last; ++i)
312
		if (s[i] == sub[0] &&
313
		    (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
Barry Warsaw's avatar
Barry Warsaw committed
314
			return PyInt_FromLong((long)i);
315

Barry Warsaw's avatar
Barry Warsaw committed
316
	return PyInt_FromLong(-1L);
317 318 319
}


320
PyDoc_STRVAR(rfind__doc__,
321 322 323 324 325 326
"rfind(s, sub [,start [,end]]) -> int\n"
"\n"
"Return the highest index in s where substring sub is found,\n"
"such that sub is contained within s[start,end].  Optional\n"
"arguments start and end are interpreted as in slice notation.\n"
"\n"
327
"Return -1 on failure.");
328

Barry Warsaw's avatar
Barry Warsaw committed
329
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
330
strop_rfind(PyObject *self, PyObject *args)
331 332
{
	char *s, *sub;
Barry Warsaw's avatar
Barry Warsaw committed
333
	int len, n, j;
334
	int i = 0, last = INT_MAX;
335

336
	WARN;
337
	if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
Barry Warsaw's avatar
Barry Warsaw committed
338 339
		return NULL;

340 341 342 343 344 345
	if (last > len)
		last = len;
	if (last < 0)
		last += len;
	if (last < 0)
		last = 0;
Barry Warsaw's avatar
Barry Warsaw committed
346 347 348
	if (i < 0)
		i += len;
	if (i < 0)
349
		i = 0;
350

351
	if (n == 0 && i <= last)
352
		return PyInt_FromLong((long)last);
353

354
	for (j = last-n; j >= i; --j)
355
		if (s[j] == sub[0] &&
356
		    (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
Barry Warsaw's avatar
Barry Warsaw committed
357
			return PyInt_FromLong((long)j);
358

Barry Warsaw's avatar
Barry Warsaw committed
359
	return PyInt_FromLong(-1L);
360 361
}

362

Barry Warsaw's avatar
Barry Warsaw committed
363
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
364
do_strip(PyObject *args, int striptype)
365 366 367 368
{
	char *s;
	int len, i, j;

369

370
	if (PyString_AsStringAndSize(args, &s, &len))
371 372 373
		return NULL;

	i = 0;
374 375 376 377
	if (striptype != RIGHTSTRIP) {
		while (i < len && isspace(Py_CHARMASK(s[i]))) {
			i++;
		}
378 379 380
	}

	j = len;
381 382 383 384 385 386
	if (striptype != LEFTSTRIP) {
		do {
			j--;
		} while (j >= i && isspace(Py_CHARMASK(s[j])));
		j++;
	}
387 388

	if (i == 0 && j == len) {
Barry Warsaw's avatar
Barry Warsaw committed
389
		Py_INCREF(args);
390 391 392
		return args;
	}
	else
Barry Warsaw's avatar
Barry Warsaw committed
393
		return PyString_FromStringAndSize(s+i, j-i);
394 395 396
}


397
PyDoc_STRVAR(strip__doc__,
398 399 400
"strip(s) -> string\n"
"\n"
"Return a copy of the string s with leading and trailing\n"
401
"whitespace removed.");
402

Barry Warsaw's avatar
Barry Warsaw committed
403
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
404
strop_strip(PyObject *self, PyObject *args)
405
{
406
	WARN;
407 408 409
	return do_strip(args, BOTHSTRIP);
}

410

411
PyDoc_STRVAR(lstrip__doc__,
412 413
"lstrip(s) -> string\n"
"\n"
414
"Return a copy of the string s with leading whitespace removed.");
415

Barry Warsaw's avatar
Barry Warsaw committed
416
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
417
strop_lstrip(PyObject *self, PyObject *args)
418
{
419
	WARN;
420 421 422
	return do_strip(args, LEFTSTRIP);
}

423

424
PyDoc_STRVAR(rstrip__doc__,
425 426
"rstrip(s) -> string\n"
"\n"
427
"Return a copy of the string s with trailing whitespace removed.");
428

Barry Warsaw's avatar
Barry Warsaw committed
429
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
430
strop_rstrip(PyObject *self, PyObject *args)
431
{
432
	WARN;
433 434 435 436
	return do_strip(args, RIGHTSTRIP);
}


437
PyDoc_STRVAR(lower__doc__,
438 439
"lower(s) -> string\n"
"\n"
440
"Return a copy of the string s converted to lowercase.");
441

Barry Warsaw's avatar
Barry Warsaw committed
442
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
443
strop_lower(PyObject *self, PyObject *args)
444
{
445
	char *s, *s_new;
446
	int i, n;
Barry Warsaw's avatar
Barry Warsaw committed
447
	PyObject *new;
448 449
	int changed;

450
	WARN;
451
	if (PyString_AsStringAndSize(args, &s, &n))
452
		return NULL;
Barry Warsaw's avatar
Barry Warsaw committed
453
	new = PyString_FromStringAndSize(NULL, n);
454 455
	if (new == NULL)
		return NULL;
Barry Warsaw's avatar
Barry Warsaw committed
456
	s_new = PyString_AsString(new);
457 458
	changed = 0;
	for (i = 0; i < n; i++) {
Guido van Rossum's avatar
Guido van Rossum committed
459
		int c = Py_CHARMASK(*s++);
460
		if (isupper(c)) {
461
			changed = 1;
462
			*s_new = tolower(c);
463 464 465
		} else
			*s_new = c;
		s_new++;
466 467
	}
	if (!changed) {
Barry Warsaw's avatar
Barry Warsaw committed
468 469
		Py_DECREF(new);
		Py_INCREF(args);
470 471 472 473 474 475
		return args;
	}
	return new;
}


476
PyDoc_STRVAR(upper__doc__,
477 478
"upper(s) -> string\n"
"\n"
479
"Return a copy of the string s converted to uppercase.");
480

Barry Warsaw's avatar
Barry Warsaw committed
481
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
482
strop_upper(PyObject *self, PyObject *args)
483
{
484 485 486 487 488
	char *s, *s_new;
	int i, n;
	PyObject *new;
	int changed;

489
	WARN;
490
	if (PyString_AsStringAndSize(args, &s, &n))
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
		return NULL;
	new = PyString_FromStringAndSize(NULL, n);
	if (new == NULL)
		return NULL;
	s_new = PyString_AsString(new);
	changed = 0;
	for (i = 0; i < n; i++) {
		int c = Py_CHARMASK(*s++);
		if (islower(c)) {
			changed = 1;
			*s_new = toupper(c);
		} else
			*s_new = c;
		s_new++;
	}
	if (!changed) {
		Py_DECREF(new);
		Py_INCREF(args);
		return args;
	}
	return new;
512 513 514
}


515
PyDoc_STRVAR(capitalize__doc__,
516 517 518
"capitalize(s) -> string\n"
"\n"
"Return a copy of the string s with only its first character\n"
519
"capitalized.");
520

Barry Warsaw's avatar
Barry Warsaw committed
521
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
522
strop_capitalize(PyObject *self, PyObject *args)
Guido van Rossum's avatar
Guido van Rossum committed
523 524 525
{
	char *s, *s_new;
	int i, n;
Barry Warsaw's avatar
Barry Warsaw committed
526
	PyObject *new;
Guido van Rossum's avatar
Guido van Rossum committed
527 528
	int changed;

529
	WARN;
530
	if (PyString_AsStringAndSize(args, &s, &n))
Guido van Rossum's avatar
Guido van Rossum committed
531
		return NULL;
Barry Warsaw's avatar
Barry Warsaw committed
532
	new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum's avatar
Guido van Rossum committed
533 534
	if (new == NULL)
		return NULL;
Barry Warsaw's avatar
Barry Warsaw committed
535
	s_new = PyString_AsString(new);
Guido van Rossum's avatar
Guido van Rossum committed
536
	changed = 0;
537
	if (0 < n) {
Guido van Rossum's avatar
Guido van Rossum committed
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
		int c = Py_CHARMASK(*s++);
		if (islower(c)) {
			changed = 1;
			*s_new = toupper(c);
		} else
			*s_new = c;
		s_new++;
	}
	for (i = 1; i < n; i++) {
		int c = Py_CHARMASK(*s++);
		if (isupper(c)) {
			changed = 1;
			*s_new = tolower(c);
		} else
			*s_new = c;
		s_new++;
	}
	if (!changed) {
Barry Warsaw's avatar
Barry Warsaw committed
556 557
		Py_DECREF(new);
		Py_INCREF(args);
Guido van Rossum's avatar
Guido van Rossum committed
558 559 560 561 562 563
		return args;
	}
	return new;
}


564
PyDoc_STRVAR(expandtabs__doc__,
565 566 567 568 569
"expandtabs(string, [tabsize]) -> string\n"
"\n"
"Expand tabs in a string, i.e. replace them by one or more spaces,\n"
"depending on the current column and the given tab size (default 8).\n"
"The column number is reset to zero after each newline occurring in the\n"
570
"string.  This doesn't understand other non-printing characters.");
571 572

static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
573
strop_expandtabs(PyObject *self, PyObject *args)
574 575 576 577 578 579 580 581 582 583 584
{
	/* Original by Fredrik Lundh */
	char* e;
	char* p;
	char* q;
	int i, j;
	PyObject* out;
	char* string;
	int stringlen;
	int tabsize = 8;

585
	WARN;
586
	/* Get arguments */
587
	if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
		return NULL;
	if (tabsize < 1) {
		PyErr_SetString(PyExc_ValueError,
				"tabsize must be at least 1");
		return NULL;
	}

	/* First pass: determine size of output string */
	i = j = 0; /* j: current column; i: total of previous lines */
	e = string + stringlen;
	for (p = string; p < e; p++) {
		if (*p == '\t')
			j += tabsize - (j%tabsize);
		else {
			j++;
			if (*p == '\n') {
				i += j;
				j = 0;
			}
		}
	}

	/* Second pass: create output string and fill it */
	out = PyString_FromStringAndSize(NULL, i+j);
	if (out == NULL)
		return NULL;

	i = 0;
	q = PyString_AS_STRING(out);

	for (p = string; p < e; p++) {
		if (*p == '\t') {
			j = tabsize - (i%tabsize);
			i += j;
			while (j-- > 0)
				*q++ = ' ';
		} else {
			*q++ = *p;
			i++;
			if (*p == '\n')
				i = 0;
		}
	}

	return out;
}


636
PyDoc_STRVAR(count__doc__,
637 638 639 640
"count(s, sub[, start[, end]]) -> int\n"
"\n"
"Return the number of occurrences of substring sub in string\n"
"s[start:end].  Optional arguments start and end are\n"
641
"interpreted as in slice notation.");
642 643

static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
644
strop_count(PyObject *self, PyObject *args)
645 646
{
	char *s, *sub;
Guido van Rossum's avatar
Guido van Rossum committed
647
	int len, n;
648 649 650
	int i = 0, last = INT_MAX;
	int m, r;

651
	WARN;
652
	if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680
		return NULL;
	if (last > len)
		last = len;
	if (last < 0)
		last += len;
	if (last < 0)
		last = 0;
	if (i < 0)
		i += len;
	if (i < 0)
		i = 0;
	m = last + 1 - n;
	if (n == 0)
		return PyInt_FromLong((long) (m-i));

	r = 0;
	while (i < m) {
		if (!memcmp(s+i, sub, n)) {
			r++;
			i += n;
		} else {
			i++;
		}
	}
	return PyInt_FromLong((long) r);
}


681
PyDoc_STRVAR(swapcase__doc__,
682 683 684
"swapcase(s) -> string\n"
"\n"
"Return a copy of the string s with upper case characters\n"
685
"converted to lowercase and vice versa.");
686

Barry Warsaw's avatar
Barry Warsaw committed
687
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
688
strop_swapcase(PyObject *self, PyObject *args)
689
{
690
	char *s, *s_new;
691
	int i, n;
Barry Warsaw's avatar
Barry Warsaw committed
692
	PyObject *new;
693 694
	int changed;

695
	WARN;
696
	if (PyString_AsStringAndSize(args, &s, &n))
697
		return NULL;
Barry Warsaw's avatar
Barry Warsaw committed
698
	new = PyString_FromStringAndSize(NULL, n);
699 700
	if (new == NULL)
		return NULL;
Barry Warsaw's avatar
Barry Warsaw committed
701
	s_new = PyString_AsString(new);
702 703
	changed = 0;
	for (i = 0; i < n; i++) {
Guido van Rossum's avatar
Guido van Rossum committed
704
		int c = Py_CHARMASK(*s++);
705 706
		if (islower(c)) {
			changed = 1;
707
			*s_new = toupper(c);
708 709 710
		}
		else if (isupper(c)) {
			changed = 1;
711
			*s_new = tolower(c);
712
		}
713 714 715
		else
			*s_new = c;
		s_new++;
716 717
	}
	if (!changed) {
Barry Warsaw's avatar
Barry Warsaw committed
718 719
		Py_DECREF(new);
		Py_INCREF(args);
720 721 722 723 724 725
		return args;
	}
	return new;
}


726
PyDoc_STRVAR(atoi__doc__,
727 728 729 730 731 732 733
"atoi(s [,base]) -> int\n"
"\n"
"Return the integer represented by the string s in the given\n"
"base, which defaults to 10.  The string s must consist of one\n"
"or more digits, possibly preceded by a sign.  If base is 0, it\n"
"is chosen from the leading characters of s, 0 for octal, 0x or\n"
"0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n"
734
"accepted.");
735

Barry Warsaw's avatar
Barry Warsaw committed
736
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
737
strop_atoi(PyObject *self, PyObject *args)
738 739 740 741
{
	char *s, *end;
	int base = 10;
	long x;
742
	char buffer[256]; /* For errors */
743

744
	WARN;
745
	if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
Barry Warsaw's avatar
Barry Warsaw committed
746 747 748 749
		return NULL;

	if ((base != 0 && base < 2) || base > 36) {
		PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
750
		return NULL;
Barry Warsaw's avatar
Barry Warsaw committed
751 752
	}

753 754
	while (*s && isspace(Py_CHARMASK(*s)))
		s++;
755 756
	errno = 0;
	if (base == 0 && s[0] == '0')
Barry Warsaw's avatar
Barry Warsaw committed
757
		x = (long) PyOS_strtoul(s, &end, base);
758
	else
Barry Warsaw's avatar
Barry Warsaw committed
759
		x = PyOS_strtol(s, &end, base);
760
	if (end == s || !isalnum((int)end[-1]))
761
		goto bad;
762 763
	while (*end && isspace(Py_CHARMASK(*end)))
		end++;
764
	if (*end != '\0') {
765
  bad:
766 767
		PyOS_snprintf(buffer, sizeof(buffer),
			      "invalid literal for atoi(): %.200s", s);
Barry Warsaw's avatar
Barry Warsaw committed
768
		PyErr_SetString(PyExc_ValueError, buffer);
769 770 771
		return NULL;
	}
	else if (errno != 0) {
772 773
		PyOS_snprintf(buffer, sizeof(buffer), 
			      "atoi() literal too large: %.200s", s);
Barry Warsaw's avatar
Barry Warsaw committed
774
		PyErr_SetString(PyExc_ValueError, buffer);
775 776
		return NULL;
	}
Barry Warsaw's avatar
Barry Warsaw committed
777
	return PyInt_FromLong(x);
778 779 780
}


781
PyDoc_STRVAR(atol__doc__,
782 783 784 785 786 787 788 789
"atol(s [,base]) -> long\n"
"\n"
"Return the long integer represented by the string s in the\n"
"given base, which defaults to 10.  The string s must consist\n"
"of one or more digits, possibly preceded by a sign.  If base\n"
"is 0, it is chosen from the leading characters of s, 0 for\n"
"octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n"
"0x or 0X is accepted.  A trailing L or l is not accepted,\n"
790
"unless base is 0.");
791

Barry Warsaw's avatar
Barry Warsaw committed
792
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
793
strop_atol(PyObject *self, PyObject *args)
794 795 796
{
	char *s, *end;
	int base = 10;
Barry Warsaw's avatar
Barry Warsaw committed
797
	PyObject *x;
798
	char buffer[256]; /* For errors */
799

800
	WARN;
801
	if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
Barry Warsaw's avatar
Barry Warsaw committed
802 803 804 805
		return NULL;

	if ((base != 0 && base < 2) || base > 36) {
		PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
806
		return NULL;
Barry Warsaw's avatar
Barry Warsaw committed
807 808
	}

809 810
	while (*s && isspace(Py_CHARMASK(*s)))
		s++;
811
	if (s[0] == '\0') {
Barry Warsaw's avatar
Barry Warsaw committed
812
		PyErr_SetString(PyExc_ValueError, "empty string for atol()");
813 814
		return NULL;
	}
Barry Warsaw's avatar
Barry Warsaw committed
815
	x = PyLong_FromString(s, &end, base);
816 817 818 819
	if (x == NULL)
		return NULL;
	if (base == 0 && (*end == 'l' || *end == 'L'))
		end++;
820 821
	while (*end && isspace(Py_CHARMASK(*end)))
		end++;
822
	if (*end != '\0') {
823 824
		PyOS_snprintf(buffer, sizeof(buffer),
			      "invalid literal for atol(): %.200s", s);
Barry Warsaw's avatar
Barry Warsaw committed
825 826
		PyErr_SetString(PyExc_ValueError, buffer);
		Py_DECREF(x);
827 828 829 830 831 832
		return NULL;
	}
	return x;
}


833
PyDoc_STRVAR(atof__doc__,
834 835
"atof(s) -> float\n"
"\n"
836
"Return the floating point number represented by the string s.");
837

Barry Warsaw's avatar
Barry Warsaw committed
838
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
839
strop_atof(PyObject *self, PyObject *args)
840 841 842
{
	char *s, *end;
	double x;
843
	char buffer[256]; /* For errors */
844

845
	WARN;
846
	if (!PyArg_ParseTuple(args, "s:atof", &s))
847
		return NULL;
848 849
	while (*s && isspace(Py_CHARMASK(*s)))
		s++;
850
	if (s[0] == '\0') {
Barry Warsaw's avatar
Barry Warsaw committed
851
		PyErr_SetString(PyExc_ValueError, "empty string for atof()");
852 853
		return NULL;
	}
854
	errno = 0;
855
	PyFPE_START_PROTECT("strop_atof", return 0)
856
	x = PyOS_ascii_strtod(s, &end);
857
	PyFPE_END_PROTECT(x)
858 859
	while (*end && isspace(Py_CHARMASK(*end)))
		end++;
860
	if (*end != '\0') {
861 862
		PyOS_snprintf(buffer, sizeof(buffer),
			      "invalid literal for atof(): %.200s", s);
Barry Warsaw's avatar
Barry Warsaw committed
863
		PyErr_SetString(PyExc_ValueError, buffer);
864 865 866
		return NULL;
	}
	else if (errno != 0) {
867 868
		PyOS_snprintf(buffer, sizeof(buffer), 
			      "atof() literal too large: %.200s", s);
Barry Warsaw's avatar
Barry Warsaw committed
869
		PyErr_SetString(PyExc_ValueError, buffer);
870 871
		return NULL;
	}
Barry Warsaw's avatar
Barry Warsaw committed
872
	return PyFloat_FromDouble(x);
873 874 875
}


876
PyDoc_STRVAR(maketrans__doc__,
877 878 879 880
"maketrans(frm, to) -> string\n"
"\n"
"Return a translation table (a string of 256 bytes long)\n"
"suitable for use in string.translate.  The strings frm and to\n"
881
"must be of the same length.");
882

883
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
884
strop_maketrans(PyObject *self, PyObject *args)
885
{
886
	unsigned char *c, *from=NULL, *to=NULL;
887
	int i, fromlen=0, tolen=0;
888
	PyObject *result;
889

890
	if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
Barry Warsaw's avatar
Barry Warsaw committed
891
		return NULL;
892

Barry Warsaw's avatar
Barry Warsaw committed
893
	if (fromlen != tolen) {
Barry Warsaw's avatar
Barry Warsaw committed
894
		PyErr_SetString(PyExc_ValueError,
895 896 897
				"maketrans arguments must have same length");
		return NULL;
	}
898 899 900 901 902

	result = PyString_FromStringAndSize((char *)NULL, 256);
	if (result == NULL)
		return NULL;
	c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
Barry Warsaw's avatar
Barry Warsaw committed
903
	for (i = 0; i < 256; i++)
904
		c[i]=(unsigned char)i;
Barry Warsaw's avatar
Barry Warsaw committed
905
	for (i = 0; i < fromlen; i++)
906
		c[from[i]]=to[i];
Barry Warsaw's avatar
Barry Warsaw committed
907

908
	return result;
909 910 911
}


912
PyDoc_STRVAR(translate__doc__,
913 914 915 916 917
"translate(s,table [,deletechars]) -> string\n"
"\n"
"Return a copy of the string s, where all characters occurring\n"
"in the optional argument deletechars are removed, and the\n"
"remaining characters have been mapped through the given\n"
918
"translation table, which must be a string of length 256.");
919

Barry Warsaw's avatar
Barry Warsaw committed
920
static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
921
strop_translate(PyObject *self, PyObject *args)
922
{
923 924 925 926
	register char *input, *table, *output;
	register int i, c, changed = 0;
	PyObject *input_obj;
	char *table1, *output_start, *del_table=NULL;
Barry Warsaw's avatar
Barry Warsaw committed
927
	int inlen, tablen, dellen = 0;
928
	PyObject *result;
929
	int trans_table[256];
930

931
	WARN;
932
	if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
933
			      &table1, &tablen, &del_table, &dellen))
934 935
		return NULL;
	if (tablen != 256) {
Barry Warsaw's avatar
Barry Warsaw committed
936
		PyErr_SetString(PyExc_ValueError,
Barry Warsaw's avatar
Barry Warsaw committed
937
			      "translation table must be 256 characters long");
938 939
		return NULL;
	}
940

941 942
	table = table1;
	inlen = PyString_Size(input_obj);
943
	result = PyString_FromStringAndSize((char *)NULL, inlen);
944 945
	if (result == NULL)
		return NULL;
946
	output_start = output = PyString_AsString(result);
947 948 949 950 951 952 953 954
	input = PyString_AsString(input_obj);

	if (dellen == 0) {
		/* If no deletions are required, use faster code */
		for (i = inlen; --i >= 0; ) {
			c = Py_CHARMASK(*input++);
			if (Py_CHARMASK((*output++ = table[c])) != c)
				changed = 1;
955
		}
956 957 958 959 960 961 962 963 964 965
		if (changed)
			return result;
		Py_DECREF(result);
		Py_INCREF(input_obj);
		return input_obj;
	}

	for (i = 0; i < 256; i++)
		trans_table[i] = Py_CHARMASK(table[i]);

966
	for (i = 0; i < dellen; i++)
Guido van Rossum's avatar
Guido van Rossum committed
967
		trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
968 969 970 971 972 973 974 975 976 977 978 979

	for (i = inlen; --i >= 0; ) {
		c = Py_CHARMASK(*input++);
		if (trans_table[c] != -1)
			if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
				continue;
		changed = 1;
	}
	if (!changed) {
		Py_DECREF(result);
		Py_INCREF(input_obj);
		return input_obj;
980
	}
981
	/* Fix the size of the resulting string */
982 983
	if (inlen > 0)
		_PyString_Resize(&result, output - output_start);
984 985 986 987
	return result;
}


988 989 990 991 992 993 994
/* What follows is used for implementing replace().  Perry Stoll. */

/*
  mymemfind

  strstr replacement for arbitrary blocks of memory.

995
  Locates the first occurrence in the memory pointed to by MEM of the
996 997
  contents of memory pointed to by PAT.  Returns the index into MEM if
  found, or -1 if not found.  If len of PAT is greater than length of
998
  MEM, the function returns -1.
999
*/
1000
static int 
1001
mymemfind(const char *mem, int len, const char *pat, int pat_len)
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
{
	register int ii;

	/* pattern can not occur in the last pat_len-1 chars */
	len -= pat_len;

	for (ii = 0; ii <= len; ii++) {
		if (mem[ii] == pat[0] &&
		    (pat_len == 1 ||
		     memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
			return ii;
		}
	}
	return -1;
}

/*
  mymemcnt

   Return the number of distinct times PAT is found in MEM.
   meaning mem=1111 and pat==11 returns 2.
           mem=11111 and pat==11 also return 2.
 */
1025
static int 
1026
mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
{
	register int offset = 0;
	int nfound = 0;

	while (len >= 0) {
		offset = mymemfind(mem, len, pat, pat_len);
		if (offset == -1)
			break;
		mem += offset + pat_len;
		len -= offset + pat_len;
		nfound++;
	}
	return nfound;
}

1042
/*
1043 1044
   mymemreplace

1045
   Return a string in which all occurrences of PAT in memory STR are
1046
   replaced with SUB.
1047

1048
   If length of PAT is less than length of STR or there are no occurrences
1049 1050
   of PAT in STR, then the original string is returned. Otherwise, a new
   string is allocated here and returned.
1051

1052 1053 1054 1055 1056 1057 1058 1059 1060
   on return, out_len is:
       the length of output string, or
       -1 if the input string is returned, or
       unchanged if an error occurs (no memory).

   return value is:
       the new string allocated locally, or
       NULL if an error occurred.
*/
1061
static char *
1062 1063 1064 1065 1066
mymemreplace(const char *str, int len,		/* input string */
             const char *pat, int pat_len,	/* pattern string to find */
             const char *sub, int sub_len,	/* substitution string */
             int count,				/* number of replacements */
	     int *out_len)
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076
{
	char *out_s;
	char *new_s;
	int nfound, offset, new_len;

	if (len == 0 || pat_len > len)
		goto return_same;

	/* find length of output string */
	nfound = mymemcnt(str, len, pat, pat_len);
1077 1078 1079 1080
	if (count < 0)
		count = INT_MAX;
	else if (nfound > count)
		nfound = count;
1081 1082
	if (nfound == 0)
		goto return_same;
1083

1084
	new_len = len + nfound*(sub_len - pat_len);
1085
	if (new_len == 0) {
1086 1087
		/* Have to allocate something for the caller to free(). */
		out_s = (char *)PyMem_MALLOC(1);
1088
		if (out_s == NULL)
1089 1090
			return NULL;
		out_s[0] = '\0';
1091 1092 1093 1094 1095 1096 1097
	}
	else {
		assert(new_len > 0);
		new_s = (char *)PyMem_MALLOC(new_len);
		if (new_s == NULL)
			return NULL;
		out_s = new_s;
1098

1099
		for (; count > 0 && len > 0; --count) {
1100 1101 1102 1103
			/* find index of next instance of pattern */
			offset = mymemfind(str, len, pat, pat_len);
			if (offset == -1)
				break;
1104

1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117
			/* copy non matching part of input string */
			memcpy(new_s, str, offset);
			str += offset + pat_len;
			len -= offset + pat_len;

			/* copy substitute into the output string */
			new_s += offset;
			memcpy(new_s, sub, sub_len);
			new_s += sub_len;
		}
		/* copy any remaining values into output string */
		if (len > 0)
			memcpy(new_s, str, len);
1118
	}
1119
	*out_len = new_len;
1120 1121 1122 1123
	return out_s;

  return_same:
	*out_len = -1;
1124
	return (char *)str; /* cast away const */
1125 1126 1127
}


1128
PyDoc_STRVAR(replace__doc__,
1129 1130 1131 1132
"replace (str, old, new[, maxsplit]) -> string\n"
"\n"
"Return a copy of string str with all occurrences of substring\n"
"old replaced by new. If the optional argument maxsplit is\n"
1133
"given, only the first maxsplit occurrences are replaced.");
1134 1135

static PyObject *
Peter Schneider-Kamp's avatar
Peter Schneider-Kamp committed
1136
strop_replace(PyObject *self, PyObject *args)
1137 1138 1139
{
	char *str, *pat,*sub,*new_s;
	int len,pat_len,sub_len,out_len;
1140
	int count = -1;
1141 1142
	PyObject *new;

1143
	WARN;
1144
	if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
1145 1146
			      &str, &len, &pat, &pat_len, &sub, &sub_len,
			      &count))
1147
		return NULL;
1148 1149 1150 1151
	if (pat_len <= 0) {
		PyErr_SetString(PyExc_ValueError, "empty pattern string");
		return NULL;
	}
1152 1153 1154 1155 1156 1157
	/* CAUTION:  strop treats a replace count of 0 as infinity, unlke
	 * current (2.1) string.py and string methods.  Preserve this for
	 * ... well, hard to say for what <wink>.
	 */
	if (count == 0)
		count = -1;
1158
	new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
	if (new_s == NULL) {
		PyErr_NoMemory();
		return NULL;
	}
	if (out_len == -1) {
		/* we're returning another reference to the input string */
		new = PyTuple_GetItem(args, 0);
		Py_XINCREF(new);
	}
	else {
		new = PyString_FromStringAndSize(new_s, out_len);
1170
		PyMem_FREE(new_s);
1171 1172 1173 1174 1175
	}
	return new;
}


1176 1177
/* List of functions defined in the module */

Barry Warsaw's avatar
Barry Warsaw committed
1178 1179
static PyMethodDef
strop_methods[] = {
1180 1181 1182
	{"atof",	strop_atof,	   METH_VARARGS, atof__doc__},
	{"atoi",	strop_atoi,	   METH_VARARGS, atoi__doc__},
	{"atol",	strop_atol,	   METH_VARARGS, atol__doc__},
1183
	{"capitalize",	strop_capitalize,  METH_O,       capitalize__doc__},
1184 1185 1186 1187 1188
	{"count",	strop_count,	   METH_VARARGS, count__doc__},
	{"expandtabs",	strop_expandtabs,  METH_VARARGS, expandtabs__doc__},
	{"find",	strop_find,	   METH_VARARGS, find__doc__},
	{"join",	strop_joinfields,  METH_VARARGS, joinfields__doc__},
	{"joinfields",	strop_joinfields,  METH_VARARGS, joinfields__doc__},
1189 1190
	{"lstrip",	strop_lstrip,	   METH_O,       lstrip__doc__},
	{"lower",	strop_lower,	   METH_O,       lower__doc__},
1191 1192 1193
	{"maketrans",	strop_maketrans,   METH_VARARGS, maketrans__doc__},
	{"replace",	strop_replace,	   METH_VARARGS, replace__doc__},
	{"rfind",	strop_rfind,	   METH_VARARGS, rfind__doc__},
1194
	{"rstrip",	strop_rstrip,	   METH_O,       rstrip__doc__},
1195 1196
	{"split",	strop_splitfields, METH_VARARGS, splitfields__doc__},
	{"splitfields",	strop_splitfields, METH_VARARGS, splitfields__doc__},
1197 1198
	{"strip",	strop_strip,	   METH_O,       strip__doc__},
	{"swapcase",	strop_swapcase,    METH_O,       swapcase__doc__},
1199
	{"translate",	strop_translate,   METH_VARARGS, translate__doc__},
1200
	{"upper",	strop_upper,	   METH_O,       upper__doc__},
1201 1202 1203 1204
	{NULL,		NULL}	/* sentinel */
};


1205
PyMODINIT_FUNC
1206
initstrop(void)
1207
{
1208
	PyObject *m, *s;
1209 1210
	char buf[256];
	int c, n;
1211 1212
	m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
			   (PyObject*)NULL, PYTHON_API_VERSION);
1213 1214

	/* Create 'whitespace' object */
1215
	n = 0;
Guido van Rossum's avatar
Guido van Rossum committed
1216
	for (c = 0; c < 256; c++) {
1217 1218 1219
		if (isspace(c))
			buf[n++] = c;
	}
Barry Warsaw's avatar
Barry Warsaw committed
1220
	s = PyString_FromStringAndSize(buf, n);
1221 1222 1223
	if (s)
		PyModule_AddObject(m, "whitespace", s);

1224 1225
	/* Create 'lowercase' object */
	n = 0;
Guido van Rossum's avatar
Guido van Rossum committed
1226
	for (c = 0; c < 256; c++) {
1227 1228 1229
		if (islower(c))
			buf[n++] = c;
	}
Barry Warsaw's avatar
Barry Warsaw committed
1230
	s = PyString_FromStringAndSize(buf, n);
1231 1232
	if (s)
		PyModule_AddObject(m, "lowercase", s);
1233 1234 1235

	/* Create 'uppercase' object */
	n = 0;
Guido van Rossum's avatar
Guido van Rossum committed
1236
	for (c = 0; c < 256; c++) {
1237 1238 1239
		if (isupper(c))
			buf[n++] = c;
	}
Barry Warsaw's avatar
Barry Warsaw committed
1240
	s = PyString_FromStringAndSize(buf, n);
1241 1242
	if (s)
		PyModule_AddObject(m, "uppercase", s);
1243
}