bitvec.py 8.35 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
#
# this is a rather strict implementation of a bit vector class
# it is accessed the same way as an array of python-ints, except
# the value must be 0 or 1
#

import sys; rprt = sys.stderr.write #for debugging

error = 'bitvec.error'


def _check_value(value):
	if type(value) != type(0) or not 0 <= value < 2:
		raise error, 'bitvec() items must have int value 0 or 1'


import math

def _compute_len(param):
	mant, l = math.frexp(float(param))
	bitmask = 1L << l
	if bitmask <= param:
		raise 'FATAL', '(param, l) = ' + `param, l`
	while l:
		bitmask = bitmask >> 1
		if param & bitmask:
			break
		l = l - 1
	return l


def _check_key(len, key):
	if type(key) != type(0):
		raise TypeError, 'sequence subscript not int'
	if key < 0:
		key = key + len
	if not 0 <= key < len:
		raise IndexError, 'list index out of range'
	return key

def _check_slice(len, i, j):
	#the type is ok, Python already checked that
	i, j = max(i, 0), min(len, j)
	if i > j:
		i = j
	return i, j
	

class BitVec:

51
	def __init__(self, *params):
Guido van Rossum's avatar
Guido van Rossum committed
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
		self._data = 0L
		self._len = 0
		if not len(params):
			pass
		elif len(params) == 1:
			param, = params
			if type(param) == type([]):
				value = 0L
				bit_mask = 1L
				for item in param:
					# strict check
					#_check_value(item)
					if item:
						value = value | bit_mask
					bit_mask = bit_mask << 1
				self._data = value
				self._len = len(param)
			elif type(param) == type(0L):
				if param < 0:
					raise error, 'bitvec() can\'t handle negative longs'
				self._data = param
				self._len = _compute_len(param)
			else:
				raise error, 'bitvec() requires array or long parameter'
		elif len(params) == 2:
			param, length = params
			if type(param) == type(0L):
				if param < 0:
					raise error, \
					  'can\'t handle negative longs'
				self._data = param
				if type(length) != type(0):
					raise error, 'bitvec()\'s 2nd parameter must be int'
				computed_length = _compute_len(param)
				if computed_length > length:
87
					print 'warning: bitvec() value is longer than the length indicates, truncating value'
Guido van Rossum's avatar
Guido van Rossum committed
88 89 90 91 92 93 94 95 96 97 98 99 100
					self._data = self._data & \
						  ((1L << length) - 1)
				self._len = length
			else:
				raise error, 'bitvec() requires array or long parameter'
		else:
			raise error, 'bitvec() requires 0 -- 2 parameter(s)'

		
	def append(self, item):
		#_check_value(item)
		#self[self._len:self._len] = [item]
		self[self._len:self._len] = \
101
			  BitVec(long(not not item), 1)
Guido van Rossum's avatar
Guido van Rossum committed
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132

		
	def count(self, value):
		#_check_value(value)
		if value:
			data = self._data
		else:
			data = (~self)._data
		count = 0
		while data:
			data, count = data >> 1, count + (data & 1 != 0)
		return count


	def index(self, value):
		#_check_value(value):
		if value:
			data = self._data
		else:
			data = (~self)._data
		index = 0
		if not data:
			raise ValueError, 'list.index(x): x not in list'
		while not (data & 1):
			data, index = data >> 1, index + 1
		return index


	def insert(self, index, item):
		#_check_value(item)
		#self[index:index] = [item]
133
		self[index:index] = BitVec(long(not not item), 1)
Guido van Rossum's avatar
Guido van Rossum committed
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157


	def remove(self, value):
		del self[self.index(value)]


	def reverse(self):
		#ouch, this one is expensive!
		#for i in self._len>>1: self[i], self[l-i] = self[l-i], self[i]
		data, result = self._data, 0L
		for i in range(self._len):
			if not data:
				result = result << (self._len - i)
				break
			result, data = (result << 1) | (data & 1), data >> 1
		self._data = result

		
	def sort(self):
		c = self.count(1)
		self._data = ((1L << c) - 1) << (self._len - c)


	def copy(self):
158
		return BitVec(self._data, self._len)
Guido van Rossum's avatar
Guido van Rossum committed
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180


	def seq(self):
		result = []
		for i in self:
			result.append(i)
		return result
		

	def __repr__(self):
		##rprt('<bitvec class instance object>.' + '__repr__()\n')
		return 'bitvec' + `self._data, self._len`

	def __cmp__(self, other, *rest):
		#rprt(`self`+'.__cmp__'+`(other, ) + rest`+'\n')
		if type(other) != type(self):
			other = apply(bitvec, (other, ) + rest)
		#expensive solution... recursive binary, with slicing
		length = self._len
		if length == 0 or other._len == 0:
			return cmp(length, other._len)
		if length != other._len:
181
			min_length = min(length, other._len)
Guido van Rossum's avatar
Guido van Rossum committed
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
			return cmp(self[:min_length], other[:min_length]) or \
				  cmp(self[min_length:], other[min_length:])
		#the lengths are the same now...
		if self._data == other._data:
			return 0
		if length == 1:
			return cmp(self[0], other[0])
		else:
			length = length >> 1
			return cmp(self[:length], other[:length]) or \
				  cmp(self[length:], other[length:])
		

	def __len__(self):
		#rprt(`self`+'.__len__()\n')
		return self._len

	def __getitem__(self, key):
		#rprt(`self`+'.__getitem__('+`key`+')\n')
		key = _check_key(self._len, key)
		return self._data & (1L << key) != 0

	def __setitem__(self, key, value):
		#rprt(`self`+'.__setitem__'+`key, value`+'\n')
		key = _check_key(self._len, key)
		#_check_value(value)
		if value:
			self._data = self._data | (1L << key)
		else:
			self._data = self._data & ~(1L << key)

	def __delitem__(self, key):
		#rprt(`self`+'.__delitem__('+`key`+')\n')
		key = _check_key(self._len, key)
		#el cheapo solution...
		self._data = self[:key]._data | self[key+1:]._data >> key
		self._len = self._len - 1

	def __getslice__(self, i, j):
		#rprt(`self`+'.__getslice__'+`i, j`+'\n')
		i, j = _check_slice(self._len, i, j)
		if i >= j:
224
			return BitVec(0L, 0)
Guido van Rossum's avatar
Guido van Rossum committed
225 226 227 228 229 230 231 232 233
		if i:
			ndata = self._data >> i
		else:
			ndata = self._data
		nlength = j - i
		if j != self._len:
			#we'll have to invent faster variants here
			#e.g. mod_2exp
			ndata = ndata & ((1L << nlength) - 1)
234
		return BitVec(ndata, nlength)
Guido van Rossum's avatar
Guido van Rossum committed
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268

	def __setslice__(self, i, j, sequence, *rest):
		#rprt(`self`+'.__setslice__'+`(i, j, sequence) + rest`+'\n')
		i, j = _check_slice(self._len, i, j)
		if type(sequence) != type(self):
			sequence = apply(bitvec, (sequence, ) + rest)
		#sequence is now of our own type
		ls_part = self[:i]
		ms_part = self[j:]
		self._data = ls_part._data | \
			  ((sequence._data | \
			  (ms_part._data << sequence._len)) << ls_part._len)
		self._len = self._len - j + i + sequence._len

	def __delslice__(self, i, j):
		#rprt(`self`+'.__delslice__'+`i, j`+'\n')
		i, j = _check_slice(self._len, i, j)
		if i == 0 and j == self._len:
			self._data, self._len = 0L, 0
		elif i < j:
			self._data = self[:i]._data | (self[j:]._data >> i)
			self._len = self._len - j + i

	def __add__(self, other):
		#rprt(`self`+'.__add__('+`other`+')\n')
		retval = self.copy()
		retval[self._len:self._len] = other
		return retval

	def __mul__(self, multiplier):
		#rprt(`self`+'.__mul__('+`multiplier`+')\n')
		if type(multiplier) != type(0):
			raise TypeError, 'sequence subscript not int'
		if multiplier <= 0:
269
			return BitVec(0L, 0)
Guido van Rossum's avatar
Guido van Rossum committed
270 271 272 273
		elif multiplier == 1:
			return self.copy()
		#handle special cases all 0 or all 1...
		if self._data == 0L:
274
			return BitVec(0L, self._len * multiplier)
Guido van Rossum's avatar
Guido van Rossum committed
275
		elif (~self)._data == 0L:
276
			return ~BitVec(0L, self._len * multiplier)
Guido van Rossum's avatar
Guido van Rossum committed
277
		#otherwise el cheapo again...
278
		retval = BitVec(0L, 0)
Guido van Rossum's avatar
Guido van Rossum committed
279 280 281 282 283 284 285 286 287
		while multiplier:
			retval, multiplier = retval + self, multiplier - 1
		return retval

	def __and__(self, otherseq, *rest):
		#rprt(`self`+'.__and__'+`(otherseq, ) + rest`+'\n')
		if type(otherseq) != type(self):
			otherseq = apply(bitvec, (otherseq, ) + rest)
		#sequence is now of our own type
288
		return BitVec(self._data & otherseq._data, \
Guido van Rossum's avatar
Guido van Rossum committed
289 290 291 292 293 294 295 296
			  min(self._len, otherseq._len))


	def __xor__(self, otherseq, *rest):
		#rprt(`self`+'.__xor__'+`(otherseq, ) + rest`+'\n')
		if type(otherseq) != type(self):
			otherseq = apply(bitvec, (otherseq, ) + rest)
		#sequence is now of our own type
297
		return BitVec(self._data ^ otherseq._data, \
Guido van Rossum's avatar
Guido van Rossum committed
298 299 300 301 302 303 304 305
			  max(self._len, otherseq._len))


	def __or__(self, otherseq, *rest):
		#rprt(`self`+'.__or__'+`(otherseq, ) + rest`+'\n')
		if type(otherseq) != type(self):
			otherseq = apply(bitvec, (otherseq, ) + rest)
		#sequence is now of our own type
306
		return BitVec(self._data | otherseq._data, \
Guido van Rossum's avatar
Guido van Rossum committed
307 308 309 310 311
			  max(self._len, otherseq._len))


	def __invert__(self):
		#rprt(`self`+'.__invert__()\n')
312
		return BitVec(~self._data & ((1L << self._len) - 1), \
Guido van Rossum's avatar
Guido van Rossum committed
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
			  self._len)

	def __coerce__(self, otherseq, *rest):
		#needed for *some* of the arithmetic operations
		#rprt(`self`+'.__coerce__'+`(otherseq, ) + rest`+'\n')
		if type(otherseq) != type(self):
			otherseq = apply(bitvec, (otherseq, ) + rest)
		return self, otherseq

	def __int__(self):
		return int(self._data)

	def __long__(self):
		return long(self._data)

	def __float__(self):
		return float(self._data)


332
bitvec = BitVec