how to tell a variable is iterable but not a string - python

I have a function that take an argument which can be either a single item or a double item:
def iterable(arg)
if #arg is an iterable:
print "yes"
else:
print "no"
so that:
>>> iterable( ("f","f") )
yes
>>> iterable( ["f","f"] )
yes
>>> iterable("ff")
no
The problem is that string is technically iterable, so I can't just catch the ValueError when trying arg[1]. I don't want to use isinstance(), because that's not good practice (or so I'm told).

Use isinstance (I don't see why it's bad practice)
import types
if not isinstance(arg, types.StringTypes):
Note the use of StringTypes. It ensures that we don't forget about some obscure type of string.
On the upside, this also works for derived string classes.
class MyString(str):
pass
isinstance(MyString(" "), types.StringTypes) # true
Also, you might want to have a look at this previous question.
Cheers.
NB: behavior changed in Python 3 as StringTypes and basestring are no longer defined. Depending on your needs, you can replace them in isinstance by str, or a subset tuple of (str, bytes, unicode), e.g. for Cython users.
As #Theron Luhn mentionned, you can also use six.

As of 2017, here is a portable solution that works with all versions of Python:
#!/usr/bin/env python
import collections
import six
def iterable(arg):
return (
isinstance(arg, collections.Iterable)
and not isinstance(arg, six.string_types)
)
# non-string iterables
assert iterable(("f", "f")) # tuple
assert iterable(["f", "f"]) # list
assert iterable(iter("ff")) # iterator
assert iterable(range(44)) # generator
assert iterable(b"ff") # bytes (Python 2 calls this a string)
# strings or non-iterables
assert not iterable(u"ff") # string
assert not iterable(44) # integer
assert not iterable(iterable) # function

Since Python 2.6, with the introduction of abstract base classes, isinstance (used on ABCs, not concrete classes) is now considered perfectly acceptable. Specifically:
from abc import ABCMeta, abstractmethod
class NonStringIterable:
__metaclass__ = ABCMeta
#abstractmethod
def __iter__(self):
while False:
yield None
#classmethod
def __subclasshook__(cls, C):
if cls is NonStringIterable:
if any("__iter__" in B.__dict__ for B in C.__mro__):
return True
return NotImplemented
This is an exact copy (changing only the class name) of Iterable as defined in _abcoll.py (an implementation detail of collections.py)... the reason this works as you wish, while collections.Iterable doesn't, is that the latter goes the extra mile to ensure strings are considered iterable, by calling Iterable.register(str) explicitly just after this class statement.
Of course it's easy to augment __subclasshook__ by returning False before the any call for other classes you want to specifically exclude from your definition.
In any case, after you have imported this new module as myiter, isinstance('ciao', myiter.NonStringIterable) will be False, and isinstance([1,2,3], myiter.NonStringIterable)will be True, just as you request -- and in Python 2.6 and later this is considered the proper way to embody such checks... define an abstract base class and check isinstance on it.

By combining previous replies, I'm using:
import types
import collections
#[...]
if isinstance(var, types.StringTypes ) \
or not isinstance(var, collections.Iterable):
#[Do stuff...]
Not 100% fools proof, but if an object is not an iterable you still can let it pass and fall back to duck typing.
Edit: Python3
types.StringTypes == (str, unicode). The Phython3 equivalent is:
if isinstance(var, str ) \
or not isinstance(var, collections.Iterable):
Edit: Python3.3
types.StringTypes == (str, unicode). The Phython3 equivalent is:
if isinstance(var, str ) \
or not isinstance(var, collections.abc.Iterable):

I realise this is an old post but thought it was worth adding my approach for Internet posterity. The function below seems to work for me under most circumstances with both Python 2 and 3:
def is_collection(obj):
""" Returns true for any iterable which is not a string or byte sequence.
"""
try:
if isinstance(obj, unicode):
return False
except NameError:
pass
if isinstance(obj, bytes):
return False
try:
iter(obj)
except TypeError:
return False
try:
hasattr(None, obj)
except TypeError:
return True
return False
This checks for a non-string iterable by (mis)using the built-in hasattr which will raise a TypeError when its second argument is not a string or unicode string.

2.x
I would have suggested:
hasattr(x, '__iter__')
or in view of David Charles' comment tweaking this for Python3, what about:
hasattr(x, '__iter__') and not isinstance(x, (str, bytes))
3.x
the builtin basestring abstract type was removed. Use str instead. The str and bytes types don’t have functionality enough in common to warrant a shared base class.

To explicitly expand on Alex Martelli's excellent hack of collections.py and address some of the questions around it: The current working solution in python 3.6+ is
import collections
import _collections_abc as cabc
import abc
class NonStringIterable(metaclass=abc.ABCMeta):
__slots__ = ()
#abc.abstractmethod
def __iter__(self):
while False:
yield None
#classmethod
def __subclasshook__(cls, c):
if cls is NonStringIterable:
if issubclass(c, str):
return False
return cabc._check_methods(c, "__iter__")
return NotImplemented
and demonstrated
>>> typs = ['string', iter(''), list(), dict(), tuple(), set()]
>>> [isinstance(o, NonStringIterable) for o in typs]
[False, True, True, True, True, True]
If you want to add iter('') into the exclusions, for example, modify the line
if issubclass(c, str):
return False
to be
# `str_iterator` is just a shortcut for `type(iter(''))`*
if issubclass(c, (str, cabc.str_iterator)):
return False
to get
[False, False, True, True, True, True]

If you like to test if the variable is a iterable object and not a "string like" object (str, bytes, ...) you can use the fact that the __mod__() function exists in such "string like" objects for formatting proposes. So you can do a check like this:
>>> def is_not_iterable(item):
... return hasattr(item, '__trunc__') or hasattr(item, '__mod__')
>>> is_not_iterable('')
True
>>> is_not_iterable(b'')
True
>>> is_not_iterable(())
False
>>> is_not_iterable([])
False
>>> is_not_iterable(1)
True
>>> is_not_iterable({})
False
>>> is_not_iterable(set())
False
>>> is_not_iterable(range(19)) #considers also Generators or Iterators
False

As you point out correctly, a single string is a character sequence.
So the thing you really want to do is to find out what kind of sequence arg is by using isinstance or type(a)==str.
If you want to realize a function that takes a variable amount of parameters, you should do it like this:
def function(*args):
# args is a tuple
for arg in args:
do_something(arg)
function("ff") and function("ff", "ff") will work.
I can't see a scenario where an isiterable() function like yours is needed. It isn't isinstance() that is bad style but situations where you need to use isinstance().

Adding another answer here that doesn't require extra imports and is maybe more "pythonic", relying on duck typing and the fact that str has had a unicode casefold method since Python 3.
def iterable_not_string(x):
'''
Check if input has an __iter__ method and then determine if it's a
string by checking for a casefold method.
'''
try:
assert x.__iter__
try:
assert x.casefold
# could do the following instead for python 2.7 because
# str and unicode types both had a splitlines method
# assert x.splitlines
return False
except AttributeError:
return True
except AttributeError:
return False

Python 3.X
Notes:
You need implement "isListable" method.
In my case dict is not iterable because iter(obj_dict) returns an iterator of just the keys.
Sequences are iterables, but not all iterables are sequences (immutable, mutable).
set, dict are iterables but not sequence.
list is iterable and sequence.
str is an iterable and immutable sequence.
Sources:
https://docs.python.org/3/library/stdtypes.html
https://opensource.com/article/18/3/loop-better-deeper-look-iteration-python
See this example:
from typing import Iterable, Sequence, MutableSequence, Mapping, Text
class Custom():
pass
def isListable(obj):
if(isinstance(obj, type)): return isListable(obj.__new__(obj))
return isinstance(obj, MutableSequence)
try:
# Listable
#o = [Custom()]
#o = ["a","b"]
#o = [{"a":"va"},{"b":"vb"}]
#o = list # class type
# Not listable
#o = {"a" : "Value"}
o = "Only string"
#o = 1
#o = False
#o = 2.4
#o = None
#o = Custom()
#o = {1, 2, 3} #type set
#o = (n**2 for n in {1, 2, 3})
#o = bytes("Only string", 'utf-8')
#o = Custom # class type
if isListable(o):
print("Is Listable[%s]: %s" % (o.__class__, str(o)))
else:
print("Not Listable[%s]: %s" % (o.__class__, str(o)))
except Exception as exc:
raise exc

Related

__iter__: int and str vs list and tuple

some_obj = "scalar"
list_like = "__iter__" in dir(some_obj) # Py2: False; Py3: True
I used it in python 2 to distinguish between "non-iterables" (str, int, bool, None) and iterables (list, dict, tuples).
This does not work with python3 anymore, since str has now the __iter__ attribute (Why do strings in python 2.7 not have the "__iter__" attribute, but strings in python 3.7 have the "__iter__" attribute).
Well, often it is desirable to regard str not as list-like. So is there a better py2+py3 way then "__iter__" in dir(some_obj) and not type(some_obj)==str or all the case checks in this question?
Do I miss other objects that are disputable like str?
I'm not sure if it's good to use __iter__ to check the type, there is a better choice for this one, the Iterable type.
It's your own opinion to divide the groups, so I think the easiest way is to set blacklists...
try:
from collections.abc import Iterable # py3
except ImportError:
from collections import Iterable #py2
def check(arg):
if not isinstance(arg, Iterable):
return False
elif isinstance(arg, (str, bytes)):
return False
else:
return True
Edited:
To not trigger confusion with my answer I am quoting the docs here.
class collections.abc.Iterable
ABC for classes that provide the iter() method.
Checking isinstance(obj, Iterable) detects classes that are registered
as Iterable or that have an iter() method, but it does not detect
classes that iterate with the getitem() method. The only reliable
way to determine whether an object is iterable is to call iter(obj).
This works in 2 and 3.
stris iterable of course.
n1 = 1
s1 = 'abc'
objs = [n1, s1]
for o in objs:
try:
iter(o)
except TypeError:
print(o, 'is not Iterable!')
else:
print(o, 'is Iterable!')
Output:
1 is not Iterable!
abc is Iterable!
The quote was taken from here

Custom IDE-compatible static-types in Python

For the sake of nicer design and OOP, I would like to create a custom IDE-compatible static type. For instance, consider the following idealized class:
class IntOrIntString(Union[int, str]):
#staticmethod
def is_int_string(item):
try:
int(item)
return True
except:
return False
def __instancecheck__(self, instance):
# I know __instacecheck__ is declared in the metaclass. It's written here for the sake of the argument.
return isinstance(instance, int) or (isinstance(instance, str) and self.is_int_string(instance))
#staticmethod
def as_integer(item):
return int(item)
Now, this is a silly class, I know, but it serves as a simple example. Defining such class has the following advantages:
It allows for static type-checking in the IDE (e.g. def parse(s: IntOrIntString): ...).
It allows dynamic type-checking (e.g. isinstance(item, IntOrIntString)).
It can be used to better encapsulate type-related static functions (e.g. inetger = IntOrIntString.as_integer(item)).
However, this code won't run because Union[int, str] can not be subclassed - I get:
TypeError: Cannot subclass typing.Union
So, I tried to work-around this by creating this "type" by referring to it as an instance of Union (which it actually is). Meaning:
IntOrIntString = Union[int, str]
IntOrIntString.as_integer = lambda item: int(item)
...
but that didn't work either as I get the error message
AttributeError: '_Union' object has no attribute 'as_integer'
Any thoughts on how that could be accomplished, or, perhaps, justifications for why it shouldn't be possible to accomplish?
I use python 3.6, but that's not set in stone because I could change the version if needed. The IDE I use is PyCharm.
Thanks
Edit: Two more possible examples for where this is useful:
The type AnyNumber that can accept any number that I wish. Maybe starting with float and int, but can be extended to support any number-like type I want such as int-strings, or single-item iterables. Such extension is immediately system-wide, which is a huge bonus. As an example, consider the function
def func(n: AnyNumber):
n = AnyNumber.get_as_float()
# The rest of the function is implemented just for float.
...
Working with pandas, you can usually perform similar operations on Series, DataFrame and Index, so suppose that there's a "type-class" like above called SeriesContainer that simplifies the usage - allows me to handle all the data-types uniformly by invoking SeriesContainer.as_series_collection(...), or SeriesContainer.as_data_frame(...) depending on the usage.
if I were you I would avoid creating such classes since they create unnecessary type ambiguity. Instead, to take your example, in order to achieve the objective of differentiating between a regular string and an int string, this is how I would go about it. First, make a (non static) intString class:
from typing import Union
class intString(object):
def __init__(self, item: str):
try:
int(item)
except ValueError:
print("error message")
exit(1)
self.val = item
def __int__(self):
return int(self.val)
(It might be better to inherit from str, but I'm not sure how to do it correctly and it's not material to the issue).
Lets say we have the following three variables:
regular_string = "3"
int_string = intString(regular_string)
int_literal = 3
Now we can use the built in python tools to achieve our three objectives:
static type checking:
def foo(f: Union[int, intString]):
pass
foo(regular_string) # Warning
foo(3) # No warnings
foo(int_string) # No warnings
You will notice that here we have stricter type checking then what you were proposing - even though the first string can be cast into an intString, the IDE will recognize that it isn't one before runtime and warn you.
Dynamic type checking:
print(isinstance(regular_string, (intString, int))) # <<False
print(isinstance(int_string, (intString, int))) # <<True
print(isinstance(int_literal, (intString, int))) # <<True
Notice that isinstance returns true if any of the items in the tuple match any of its parent classes or its own class.
I'm not sure that I understood how this relates to encapsulation honestly. But since we defined the int operator in the IntString class, we have polymorphism with ints as desired:
for i in [intString("4"), 5, intString("77"), "5"]:
print(int(i))
will print 4,5,77 as expected.
I'm sorry if I got too hung up on this specific example, but I just found it hard to imagine a situation where merging different types like this would be useful, since I believe that the three advantages you brought up can be achieved in a more pythonic manner.
I suggest you take a look at https://docs.python.org/3/library/typing.html#newtype for more basic functionality relating to defining new types.
A couple thoughts. First, Union[int, str] includes all strings, even strings like "9.3" and "cat", which don't look like an int.
If you're okay with this, you could do something like the following:
intStr = Union[int, str]
isinstance(5, intStr.__args__) # True
isinstance(5.3, intStr.__args__) # False
isinstance("5.3", intStr.__args__) # True
isinstance("howdy", intStr.__args__) # True
Note that when using a Union type, or a type with an origin of Union, you have to use .__args__ for isinstance() to work, as isinstance() doesn't work with straight up Unions. It can't differentiate Unions from generic types.
I'm assuming, though, that intStr shouldn't include all strings, but only a subset of strings. In this case, why not separate the type-checking methods from the type hinting?
def intStr_check(x):
"checks if x is an instance of intStr"
if isinstance(x, int):
return True
elif isinstance(x, str):
try:
x = int(x)
return True
except:
return False
else:
return False
Then simply use that function in place of isinstance() when checking if the type is an intStr.
Note that your original method had an error, being that int(3.14) does not throw an error and would have passed your check.
Now that we've gotten isinstance() out of the way, if for parsing purposes you need to differentiate intStr objects from Union[int,str] objects, you could use the NewType from typing like so:
from typing import NewType
IntStr = NewType("IntStr", Union[int,str])
def some_func(a: IntStr):
if intStr_check(a):
return int(a) + 1
else:
raise ValueError("Argument must be an intStr (an int or string of an int)")
some_num = IntStr("9")
print(some_func(some_num)) # 10
There's no need to create an as_integer() function or method, as it's exactly the same as int(), which is more concise and readable.
My opinion on style: nothing should be done simply for the sake of OOP. Sure, sometimes you need to store state and update parameters, but in cases where that's unnecessary, I believe OOP tends to lead to more verbose code, and potentially more headaches maintaining mutable state and avoiding unintended side effects. Hence, I prefer to declare new classes only when necessary.
EDIT: Since you insist on reusing the function name isinstance, you can overwrite isinstance to add additional functionality like so:
from typing import NewType, Union, _GenericAlias
isinstance_original = isinstance
def intStr_check(x):
"checks if x is an instance of intStr"
if isinstance_original(x, int):
return True
elif isinstance_original(x, str):
try:
x = int(x)
return True
except:
return False
else:
return False
def isinstance(x, t):
if (t == 'IntStr'): # run intStr_check
return intStr_check(x)
elif (type(t) == _GenericAlias): # check Union types
try:
check = False
for i in t.__args__:
check = check or isinstance_original(x,i)
if check == True: break
return check
except:
return isinstance_original(x,t)
else: # regular isinstance
return isinstance_original(x, t)
# Some tests
assert isinstance("4", 'IntStr') == True
assert isinstance("4.2", 'IntStr') == False
assert isinstance("4h", 'IntStr') == False
assert isinstance(4, 'IntStr') == True
assert isinstance(4.2, int) == False
assert isinstance(4, int) == True
assert isinstance("4", int) == False
assert isinstance("4", str) == True
assert isinstance(4, Union[str,int]) == True
assert isinstance(4, Union[str,float]) == False
Just be careful not to run isinstance_original = isinstance multiple times.
You could still use IntStr = NewType("IntStr", Union[int,str]) for static type checking, but since you're in love with OOP, you could also do something like the following:
class IntStr:
"an integer or a string of an integer"
def __init__(self, value):
self.value = value
if not (isinstance(self.value, 'IntStr')):
raise ValueError(f"could not convert {type(self.value)} to IntStr (an int or string of int): {self.value}")
def check(self):
return isinstance(self.value, 'IntStr')
def as_integer(self):
return int(self.value)
def __call__(self):
return self.value
# Some tests
try:
a = IntStr("4.2")
except ValueError:
print("it works")
a = IntStr("4")
print(f"a == {a()}")
assert a.as_integer() + 1 == 5
assert isinstance(a, IntStr) == True
assert isinstance(a(), str) == True
assert a.check() == True
a.value = 4.2
assert a.check() == False

Is it possible for `__contains__` to return non-boolean value?

The documentation says that __contains__ should return true if item is in self, false otherwise. However, if the method returns a non-boolean value x, it seems that python automatically converts it to bool(x).
Is there any way to avoid that, and return the actual value x? Or is this feature behavior implemented directly in the interpreter and there's no way to change this?
Note that it's not __contains__ that converts the value to a Boolean, but the in operator that calls __contains__. With
class Foo(list):
def __contains__(self, v):
if super().__contains__(v):
return v
else:
return False
>>> f = Foo([1,2,3])
>>> f.__contains__(2)
2
>>> 2 in f
True
A foo in bar will be compiled to COMPARE_OP (in) for CPython3. The implementation uses PySequence_Contain() and then coerces to result to a bool. So while you could return something else, you always end up with a bool after the call.
__bool__ is indeed being called on the return value of __contains__.
Consider the following classes:
class BoolWithPrint:
def __init__(self, value):
self.value = value
def __bool__(self):
print("Im being booled.")
return self.value
class StrangeContains:
def __contains__(self, x):
return BoolWithPrint(x)
... which behave like this:
>>> True in StrangeContains()
Im being booled.
True
>>> False in StrangeContains()
Im being booled.
False
>>> 'stuff' in StrangeContains()
Im being booled.
[...]
TypeError: __bool__ should return bool, returned str
So as far as I know, you are out of luck. You could sneakily override __bool__ on the value __contains__ returns, but that will only delay the TypeError because __bool__ must return True or False.
For additional context, see Can the Python bool() function raise an exception for an invalid argument?.
In Python documentation, section 6.10.2. Membership test operations says:
For user-defined classes which define the __contains__() method, x in y
returns True if y.__contains__(x) returns a true value, and False
otherwise.
So clearly, if you return a non-bool, the in operator will still return a boolean.
If you directly call __contains__, then of course you will get whatever result is returned from it.
For example:
class X:
def __contains__(self, other):
return 11
x = X()
8 in x # True
x.__contains__(8) # 11

How to determine that a named tuple is a namedtuple object? [duplicate]

How do I check if an object is an instance of a Named tuple?
Calling the function collections.namedtuple gives you a new type that's a subclass of tuple (and no other classes) with a member named _fields that's a tuple whose items are all strings. So you could check for each and every one of these things:
def isnamedtupleinstance(x):
t = type(x)
b = t.__bases__
if len(b) != 1 or b[0] != tuple: return False
f = getattr(t, '_fields', None)
if not isinstance(f, tuple): return False
return all(type(n)==str for n in f)
it IS possible to get a false positive from this, but only if somebody's going out of their way to make a type that looks a lot like a named tuple but isn't one;-).
If you want to determine whether an object is an instance of a specific namedtuple, you can do this:
from collections import namedtuple
SomeThing = namedtuple('SomeThing', 'prop another_prop')
SomeOtherThing = namedtuple('SomeOtherThing', 'prop still_another_prop')
a = SomeThing(1, 2)
isinstance(a, SomeThing) # True
isinstance(a, SomeOtherThing) # False
3.7+
def isinstance_namedtuple(obj) -> bool:
return (
isinstance(obj, tuple) and
hasattr(obj, '_asdict') and
hasattr(obj, '_fields')
)
If you need to check before calling namedtuple specific functions on it, then just call them and catch the exception instead. That's the preferred way to do it in python.
Improving on what Lutz posted:
def isinstance_namedtuple(x):
return (isinstance(x, tuple) and
isinstance(getattr(x, '__dict__', None), collections.Mapping) and
getattr(x, '_fields', None) is not None)
I use
isinstance(x, tuple) and isinstance(x.__dict__, collections.abc.Mapping)
which to me appears to best reflect the dictionary aspect of the nature of named tuples.
It appears robust against some conceivable future changes too and might also work with many third-party namedtuple-ish classes, if such things happen to exist.
IMO this might be the best solution for Python 3.6 and later.
You can set a custom __module__ when you instantiate your namedtuple, and check for it later
from collections import namedtuple
# module parameter added in python 3.6
namespace = namedtuple("namespace", "foo bar", module=__name__ + ".namespace")
then check for __module__
if getattr(x, "__module__", None) == "xxxx.namespace":

Detecting if an iterator will be consumed

Is there an uniform way of knowing if an iterable object will be consumed by the iteration?
Suppose you have a certain function crunch which asks for an iterable object for parameter, and uses it many times. Something like:
def crunch (vals):
for v in vals:
chomp(v)
for v in vals:
yum(v)
(note: merging together the two for loops is not an option).
An issue arises if the function gets called with an iterable which is not a list. In the following call the yum function is never executed:
crunch(iter(range(4))
We could in principle fix this by redefining the crunch function as follows:
def crunch (vals):
vals = list(vals)
for v in vals:
chomp(v)
for v in vals:
yum(v)
But this would result in using twice the memory if the call to crunch is:
hugeList = list(longDataStream)
crunch(hugeList)
We could fix this by defining crunch like this:
def crunch (vals):
if type(vals) is not list:
vals = list(vals)
for v in vals:
chomp(v)
for v in vals:
yum(v)
But still there colud be the case in which the calling code stores data in something which
cannot be consumed
is not a list
For instance:
from collections import deque
hugeDeque = deque(longDataStream)
crunch(hugeDeque)
It would be nice to have a isconsumable predicate, so that we can define crunch like this:
def crunch (vals):
if isconsumable(vals):
vals = list(vals)
for v in vals:
chomp(v)
for v in vals:
yum(v)
Is there a solution for this problem?
One possibility is to test whether the item is a Sequence, using isinstance(val, collections.Sequence). Non-consumability still isn't totally guaranteed but I think it's about the best you can get. A Python sequence has to have a length, which means that at least it can't be an open-ended iterator, and in general implies that the elements have to be known ahead of time, which in turn implies that they can be iterated over without consuming them. It's still possible to write pathological classes that fit the sequence protocol but aren't re-iterable, but you'll never be able to handle those.
Note that neither Iterable nor Iterator is the appropriate choice, because these types don't guarantee a length, and hence can't guarantee that the iteration will even be finite, let alone repeatable. You could, however, check for both Sized and Iterable.
The important thing is to document that your function will iterate over its argument twice, thus warning users that they must pass in an object that supports this.
Another, additional option could be to query if the iterable is its own iterator:
if iter(vals) is vals:
vals = list(vals)
because in this case, it is just an iterator.
This works with generators, iterators, files and many other objects which are designed for "one run", in other words, all iterables which are iterators by itself, because an iterator returns self from its __iter__().
But this might not be enough, because there are objects which empty themselves on iteration without being their own iterator.
Normally, a self-consuming object will be its own iterator, but there are cases where this might not be allowed.
Imagine a class which wraps a list and empties this list on iteration, such as
class ListPart(object):
"""Liste stückweise zerlegen."""
def __init__(self, data=None):
if data is None: data = []
self.data = data
def next(self):
try:
return self.data.pop(0)
except IndexError:
raise StopIteration
def __iter__(self):
return self
def __len__(self): # doesn't work with __getattr__...
return len(self.data)
which you call like
l = [1, 2, 3, 4]
lp = ListPart(l)
for i in lp: process(i)
# now l is empty.
If I add now additional data to that list and iterate over the same object again, I'll get the new data which is a breach of the protocol:
The intention of the protocol is that once an iterator’s next() method raises StopIteration, it will continue to do so on subsequent calls. Implementations that do not obey this property are deemed broken. (This constraint was added in Python 2.3; in Python 2.2, various iterators are broken according to this rule.)
So in this case, the object would have to return an iterator distinct from itself despite of being self-consuming. In this case, this could be done with
def __iter__(self):
while True:
try:
yield l.pop(0)
except IndexError: # pop from empty list
return
which returns a new generator on each iteration - something which would fall though the mash in the case we are discussing.
def crunch (vals):
vals1, vals2 = itertools.tee(vals, 2)
for v in vals1:
chomp(v)
for v in vals2:
yum(v)
In this case tee will end up storing the entirity of vals internally since one iterator is completed before the other one is started
Many answers come close to the point but miss it.
An Iterator is an object that is consumed by iterating over it. There is no way around it. Example of iterator objects are those returned by calls to iter(), or those returned by the functions in the itertools module.
The proper way to check whether an object is an iterator is to call isinstance(obj, Iterator). This basically checks whether the object implements the next() method (__next__() in Python 3) but you don't need to care about this.
So, remember, an iterator is always consumed. For example:
# suppose you have a list
my_list = [10, 20, 30]
# and build an iterator on the list
my_iterator = iter(my_list)
# iterate the first time over the object
for x in my_iterator:
print x
# then again
for x in my_iterator:
print x
This will print the content of the list just once.
Then there are Iterable objects. When you call iter() on an iterable it will return an iterator. Commenting in this page I made myself an error, so I will clarify here. Iterable objects are not required to return a new iterator on every call. Many iterators themselves are iterables (i.e. you can call iter() on them) and they will return the object itself.
A simple example for this are list iterators. iter(my_list) and iter(iter(my_list)) are the same object, and this is basically what #glglgl answer is checking for.
The iterator protocol requires iterator objects to return themselves as their own iterator (and thus be iterable). This is not required for the iteration mechanics to work, but you wouldn't be able to loop over the iterator object.
All of this said, what you should do is check whether you're given an Iterator, and if that's the case, make a copy of the result of the iteration (with list()). Your isconsumable(obj) is (as someone already said) isinstance(obj, Iterator).
Note that this also works for xrange(). xrange(10) returns an xrange object. Every time you iter over the xrange objects it returns a new iterator starting from the start, so you're fine and don't need to make a copy.
Here is a summary of definitions.
container
An object with a __contains__ method
generator
A function which returns an iterator.
iterable
A object with an __iter__() or __getitem__() method.
Examples of iterables include all sequence types (such as list,
str, and tuple) and some non-sequence types like dict and file.
When an iterable object is passed as an argument to the builtin
function iter(), it returns an iterator for the object. This
iterator is good for one pass over the set of values.
iterator
An iterable which has a next() method.
Iterators are required to have an
__iter__() method that returns the iterator object itself.
An iterator is
good for one pass over the set of values.
sequence
An iterable which supports efficient element access using integer
indices
via the __getitem__() special method and defines a len() method that returns
the length of the sequence.
Some built-in sequence types are list, str,
tuple, and unicode.
Note that dict also supports __getitem__() and
__len__(), but is considered a mapping rather than a sequence because the
lookups use arbitrary immutable keys rather than integers.
Now there is a multitude of ways of testing if an object is an iterable, or iterator, or sequence of some sort. Here is a summary of these ways, and how they classify various kinds of objects:
Iterable Iterator iter_is_self Sequence MutableSeq
object
[] True False False True True
() True False False True False
set([]) True False False False False
{} True False False False False
deque([]) True False False False False
<listiterator> True True True False False
<generator> True True True False False
string True False False True False
unicode True False False True False
<open> True True True False False
xrange(1) True False False True False
Foo.__iter__ True False False False False
Sized has_len has_iter has_contains
object
[] True True True True
() True True True True
set([]) True True True True
{} True True True True
deque([]) True True True False
<listiterator> False False True False
<generator> False False True False
string True True False True
unicode True True False True
<open> False False True False
xrange(1) True True True False
Foo.__iter__ False False True False
Each columns refers to a different way to classify iterables, each rows refers to a different kind of object.
import pandas as pd
import collections
import os
def col_iterable(obj):
return isinstance(obj, collections.Iterable)
def col_iterator(obj):
return isinstance(obj, collections.Iterator)
def col_sequence(obj):
return isinstance(obj, collections.Sequence)
def col_mutable_sequence(obj):
return isinstance(obj, collections.MutableSequence)
def col_sized(obj):
return isinstance(obj, collections.Sized)
def has_len(obj):
return hasattr(obj, '__len__')
def listtype(obj):
return isinstance(obj, types.ListType)
def tupletype(obj):
return isinstance(obj, types.TupleType)
def has_iter(obj):
"Could this be a way to distinguish basestrings from other iterables?"
return hasattr(obj, '__iter__')
def has_contains(obj):
return hasattr(obj, '__contains__')
def iter_is_self(obj):
"Seems identical to col_iterator"
return iter(obj) is obj
def gen():
yield
def short_str(obj):
text = str(obj)
if text.startswith('<'):
text = text.split()[0] + '>'
return text
def isiterable():
class Foo(object):
def __init__(self):
self.data = [1, 2, 3]
def __iter__(self):
while True:
try:
yield self.data.pop(0)
except IndexError: # pop from empty list
return
def __repr__(self):
return "Foo.__iter__"
filename = 'mytestfile'
f = open(filename, 'w')
objs = [list(), tuple(), set(), dict(),
collections.deque(), iter([]), gen(), 'string', u'unicode',
f, xrange(1), Foo()]
tests = [
(short_str, 'object'),
(col_iterable, 'Iterable'),
(col_iterator, 'Iterator'),
(iter_is_self, 'iter_is_self'),
(col_sequence, 'Sequence'),
(col_mutable_sequence, 'MutableSeq'),
(col_sized, 'Sized'),
(has_len, 'has_len'),
(has_iter, 'has_iter'),
(has_contains, 'has_contains'),
]
funcs, labels = zip(*tests)
data = [[test(obj) for test in funcs] for obj in objs]
f.close()
os.unlink(filename)
df = pd.DataFrame(data, columns=labels)
df = df.set_index('object')
print(df.ix[:, 'Iterable':'MutableSeq'])
print
print(df.ix[:, 'Sized':])
isiterable()

Categories

Resources