| 1 | """Classes to represent arbitrary sets (including sets of sets).
|
|---|
| 2 |
|
|---|
| 3 | This module implements sets using dictionaries whose values are
|
|---|
| 4 | ignored. The usual operations (union, intersection, deletion, etc.)
|
|---|
| 5 | are provided as both methods and operators.
|
|---|
| 6 |
|
|---|
| 7 | Important: sets are not sequences! While they support 'x in s',
|
|---|
| 8 | 'len(s)', and 'for x in s', none of those operations are unique for
|
|---|
| 9 | sequences; for example, mappings support all three as well. The
|
|---|
| 10 | characteristic operation for sequences is subscripting with small
|
|---|
| 11 | integers: s[i], for i in range(len(s)). Sets don't support
|
|---|
| 12 | subscripting at all. Also, sequences allow multiple occurrences and
|
|---|
| 13 | their elements have a definite order; sets on the other hand don't
|
|---|
| 14 | record multiple occurrences and don't remember the order of element
|
|---|
| 15 | insertion (which is why they don't support s[i]).
|
|---|
| 16 |
|
|---|
| 17 | The following classes are provided:
|
|---|
| 18 |
|
|---|
| 19 | BaseSet -- All the operations common to both mutable and immutable
|
|---|
| 20 | sets. This is an abstract class, not meant to be directly
|
|---|
| 21 | instantiated.
|
|---|
| 22 |
|
|---|
| 23 | Set -- Mutable sets, subclass of BaseSet; not hashable.
|
|---|
| 24 |
|
|---|
| 25 | ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
|
|---|
| 26 | An iterable argument is mandatory to create an ImmutableSet.
|
|---|
| 27 |
|
|---|
| 28 | _TemporarilyImmutableSet -- A wrapper around a Set, hashable,
|
|---|
| 29 | giving the same hash value as the immutable set equivalent
|
|---|
| 30 | would have. Do not use this class directly.
|
|---|
| 31 |
|
|---|
| 32 | Only hashable objects can be added to a Set. In particular, you cannot
|
|---|
| 33 | really add a Set as an element to another Set; if you try, what is
|
|---|
| 34 | actually added is an ImmutableSet built from it (it compares equal to
|
|---|
| 35 | the one you tried adding).
|
|---|
| 36 |
|
|---|
| 37 | When you ask if `x in y' where x is a Set and y is a Set or
|
|---|
| 38 | ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
|
|---|
| 39 | what's tested is actually `z in y'.
|
|---|
| 40 |
|
|---|
| 41 | """
|
|---|
| 42 |
|
|---|
| 43 | # Code history:
|
|---|
| 44 | #
|
|---|
| 45 | # - Greg V. Wilson wrote the first version, using a different approach
|
|---|
| 46 | # to the mutable/immutable problem, and inheriting from dict.
|
|---|
| 47 | #
|
|---|
| 48 | # - Alex Martelli modified Greg's version to implement the current
|
|---|
| 49 | # Set/ImmutableSet approach, and make the data an attribute.
|
|---|
| 50 | #
|
|---|
| 51 | # - Guido van Rossum rewrote much of the code, made some API changes,
|
|---|
| 52 | # and cleaned up the docstrings.
|
|---|
| 53 | #
|
|---|
| 54 | # - Raymond Hettinger added a number of speedups and other
|
|---|
| 55 | # improvements.
|
|---|
| 56 |
|
|---|
| 57 | from __future__ import generators
|
|---|
| 58 | try:
|
|---|
| 59 | from itertools import ifilter, ifilterfalse
|
|---|
| 60 | except ImportError:
|
|---|
| 61 | # Code to make the module run under Py2.2
|
|---|
| 62 | def ifilter(predicate, iterable):
|
|---|
| 63 | if predicate is None:
|
|---|
| 64 | def predicate(x):
|
|---|
| 65 | return x
|
|---|
| 66 | for x in iterable:
|
|---|
| 67 | if predicate(x):
|
|---|
| 68 | yield x
|
|---|
| 69 | def ifilterfalse(predicate, iterable):
|
|---|
| 70 | if predicate is None:
|
|---|
| 71 | def predicate(x):
|
|---|
| 72 | return x
|
|---|
| 73 | for x in iterable:
|
|---|
| 74 | if not predicate(x):
|
|---|
| 75 | yield x
|
|---|
| 76 | try:
|
|---|
| 77 | True, False
|
|---|
| 78 | except NameError:
|
|---|
| 79 | True, False = (0==0, 0!=0)
|
|---|
| 80 |
|
|---|
| 81 | __all__ = ['BaseSet', 'Set', 'ImmutableSet']
|
|---|
| 82 |
|
|---|
| 83 | class BaseSet(object):
|
|---|
| 84 | """Common base class for mutable and immutable sets."""
|
|---|
| 85 |
|
|---|
| 86 | __slots__ = ['_data']
|
|---|
| 87 |
|
|---|
| 88 | # Constructor
|
|---|
| 89 |
|
|---|
| 90 | def __init__(self):
|
|---|
| 91 | """This is an abstract class."""
|
|---|
| 92 | # Don't call this from a concrete subclass!
|
|---|
| 93 | if self.__class__ is BaseSet:
|
|---|
| 94 | raise TypeError, ("BaseSet is an abstract class. "
|
|---|
| 95 | "Use Set or ImmutableSet.")
|
|---|
| 96 |
|
|---|
| 97 | # Standard protocols: __len__, __repr__, __str__, __iter__
|
|---|
| 98 |
|
|---|
| 99 | def __len__(self):
|
|---|
| 100 | """Return the number of elements of a set."""
|
|---|
| 101 | return len(self._data)
|
|---|
| 102 |
|
|---|
| 103 | def __repr__(self):
|
|---|
| 104 | """Return string representation of a set.
|
|---|
| 105 |
|
|---|
| 106 | This looks like 'Set([<list of elements>])'.
|
|---|
| 107 | """
|
|---|
| 108 | return self._repr()
|
|---|
| 109 |
|
|---|
| 110 | # __str__ is the same as __repr__
|
|---|
| 111 | __str__ = __repr__
|
|---|
| 112 |
|
|---|
| 113 | def _repr(self, sorted=False):
|
|---|
| 114 | elements = self._data.keys()
|
|---|
| 115 | if sorted:
|
|---|
| 116 | elements.sort()
|
|---|
| 117 | return '%s(%r)' % (self.__class__.__name__, elements)
|
|---|
| 118 |
|
|---|
| 119 | def __iter__(self):
|
|---|
| 120 | """Return an iterator over the elements or a set.
|
|---|
| 121 |
|
|---|
| 122 | This is the keys iterator for the underlying dict.
|
|---|
| 123 | """
|
|---|
| 124 | return self._data.iterkeys()
|
|---|
| 125 |
|
|---|
| 126 | # Three-way comparison is not supported. However, because __eq__ is
|
|---|
| 127 | # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
|
|---|
| 128 | # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
|
|---|
| 129 | # case).
|
|---|
| 130 |
|
|---|
| 131 | def __cmp__(self, other):
|
|---|
| 132 | raise TypeError, "can't compare sets using cmp()"
|
|---|
| 133 |
|
|---|
| 134 | # Equality comparisons using the underlying dicts. Mixed-type comparisons
|
|---|
| 135 | # are allowed here, where Set == z for non-Set z always returns False,
|
|---|
| 136 | # and Set != z always True. This allows expressions like "x in y" to
|
|---|
| 137 | # give the expected result when y is a sequence of mixed types, not
|
|---|
| 138 | # raising a pointless TypeError just because y contains a Set, or x is
|
|---|
| 139 | # a Set and y contain's a non-set ("in" invokes only __eq__).
|
|---|
| 140 | # Subtle: it would be nicer if __eq__ and __ne__ could return
|
|---|
| 141 | # NotImplemented instead of True or False. Then the other comparand
|
|---|
| 142 | # would get a chance to determine the result, and if the other comparand
|
|---|
| 143 | # also returned NotImplemented then it would fall back to object address
|
|---|
| 144 | # comparison (which would always return False for __eq__ and always
|
|---|
| 145 | # True for __ne__). However, that doesn't work, because this type
|
|---|
| 146 | # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented,
|
|---|
| 147 | # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
|
|---|
| 148 |
|
|---|
| 149 | def __eq__(self, other):
|
|---|
| 150 | if isinstance(other, BaseSet):
|
|---|
| 151 | return self._data == other._data
|
|---|
| 152 | else:
|
|---|
| 153 | return False
|
|---|
| 154 |
|
|---|
| 155 | def __ne__(self, other):
|
|---|
| 156 | if isinstance(other, BaseSet):
|
|---|
| 157 | return self._data != other._data
|
|---|
| 158 | else:
|
|---|
| 159 | return True
|
|---|
| 160 |
|
|---|
| 161 | # Copying operations
|
|---|
| 162 |
|
|---|
| 163 | def copy(self):
|
|---|
| 164 | """Return a shallow copy of a set."""
|
|---|
| 165 | result = self.__class__()
|
|---|
| 166 | result._data.update(self._data)
|
|---|
| 167 | return result
|
|---|
| 168 |
|
|---|
| 169 | __copy__ = copy # For the copy module
|
|---|
| 170 |
|
|---|
| 171 | def __deepcopy__(self, memo):
|
|---|
| 172 | """Return a deep copy of a set; used by copy module."""
|
|---|
| 173 | # This pre-creates the result and inserts it in the memo
|
|---|
| 174 | # early, in case the deep copy recurses into another reference
|
|---|
| 175 | # to this same set. A set can't be an element of itself, but
|
|---|
| 176 | # it can certainly contain an object that has a reference to
|
|---|
| 177 | # itself.
|
|---|
| 178 | from copy import deepcopy
|
|---|
| 179 | result = self.__class__()
|
|---|
| 180 | memo[id(self)] = result
|
|---|
| 181 | data = result._data
|
|---|
| 182 | value = True
|
|---|
| 183 | for elt in self:
|
|---|
| 184 | data[deepcopy(elt, memo)] = value
|
|---|
| 185 | return result
|
|---|
| 186 |
|
|---|
| 187 | # Standard set operations: union, intersection, both differences.
|
|---|
| 188 | # Each has an operator version (e.g. __or__, invoked with |) and a
|
|---|
| 189 | # method version (e.g. union).
|
|---|
| 190 | # Subtle: Each pair requires distinct code so that the outcome is
|
|---|
| 191 | # correct when the type of other isn't suitable. For example, if
|
|---|
| 192 | # we did "union = __or__" instead, then Set().union(3) would return
|
|---|
| 193 | # NotImplemented instead of raising TypeError (albeit that *why* it
|
|---|
| 194 | # raises TypeError as-is is also a bit subtle).
|
|---|
| 195 |
|
|---|
| 196 | def __or__(self, other):
|
|---|
| 197 | """Return the union of two sets as a new set.
|
|---|
| 198 |
|
|---|
| 199 | (I.e. all elements that are in either set.)
|
|---|
| 200 | """
|
|---|
| 201 | if not isinstance(other, BaseSet):
|
|---|
| 202 | return NotImplemented
|
|---|
| 203 | return self.union(other)
|
|---|
| 204 |
|
|---|
| 205 | def union(self, other):
|
|---|
| 206 | """Return the union of two sets as a new set.
|
|---|
| 207 |
|
|---|
| 208 | (I.e. all elements that are in either set.)
|
|---|
| 209 | """
|
|---|
| 210 | result = self.__class__(self)
|
|---|
| 211 | result._update(other)
|
|---|
| 212 | return result
|
|---|
| 213 |
|
|---|
| 214 | def __and__(self, other):
|
|---|
| 215 | """Return the intersection of two sets as a new set.
|
|---|
| 216 |
|
|---|
| 217 | (I.e. all elements that are in both sets.)
|
|---|
| 218 | """
|
|---|
| 219 | if not isinstance(other, BaseSet):
|
|---|
| 220 | return NotImplemented
|
|---|
| 221 | return self.intersection(other)
|
|---|
| 222 |
|
|---|
| 223 | def intersection(self, other):
|
|---|
| 224 | """Return the intersection of two sets as a new set.
|
|---|
| 225 |
|
|---|
| 226 | (I.e. all elements that are in both sets.)
|
|---|
| 227 | """
|
|---|
| 228 | if not isinstance(other, BaseSet):
|
|---|
| 229 | other = Set(other)
|
|---|
| 230 | if len(self) <= len(other):
|
|---|
| 231 | little, big = self, other
|
|---|
| 232 | else:
|
|---|
| 233 | little, big = other, self
|
|---|
| 234 | common = ifilter(big._data.has_key, little)
|
|---|
| 235 | return self.__class__(common)
|
|---|
| 236 |
|
|---|
| 237 | def __xor__(self, other):
|
|---|
| 238 | """Return the symmetric difference of two sets as a new set.
|
|---|
| 239 |
|
|---|
| 240 | (I.e. all elements that are in exactly one of the sets.)
|
|---|
| 241 | """
|
|---|
| 242 | if not isinstance(other, BaseSet):
|
|---|
| 243 | return NotImplemented
|
|---|
| 244 | return self.symmetric_difference(other)
|
|---|
| 245 |
|
|---|
| 246 | def symmetric_difference(self, other):
|
|---|
| 247 | """Return the symmetric difference of two sets as a new set.
|
|---|
| 248 |
|
|---|
| 249 | (I.e. all elements that are in exactly one of the sets.)
|
|---|
| 250 | """
|
|---|
| 251 | result = self.__class__()
|
|---|
| 252 | data = result._data
|
|---|
| 253 | value = True
|
|---|
| 254 | selfdata = self._data
|
|---|
| 255 | try:
|
|---|
| 256 | otherdata = other._data
|
|---|
| 257 | except AttributeError:
|
|---|
| 258 | otherdata = Set(other)._data
|
|---|
| 259 | for elt in ifilterfalse(otherdata.has_key, selfdata):
|
|---|
| 260 | data[elt] = value
|
|---|
| 261 | for elt in ifilterfalse(selfdata.has_key, otherdata):
|
|---|
| 262 | data[elt] = value
|
|---|
| 263 | return result
|
|---|
| 264 |
|
|---|
| 265 | def __sub__(self, other):
|
|---|
| 266 | """Return the difference of two sets as a new Set.
|
|---|
| 267 |
|
|---|
|
|---|