Skip first entry in for loop in python? - python

In python, How do I do something like:
for car in cars:
# Skip first and last, do work for rest

To skip the first element in Python you can simply write
for car in cars[1:]:
# Do What Ever you want
or to skip the last elem
for car in cars[:-1]:
# Do What Ever you want
You can use this concept for any sequence (not for any iterable though).

The other answers only work for a sequence.
For any iterable, to skip the first item:
itercars = iter(cars)
next(itercars)
for car in itercars:
# do work
If you want to skip the last, you could do:
itercars = iter(cars)
# add 'next(itercars)' here if you also want to skip the first
prev = next(itercars)
for car in itercars:
# do work on 'prev' not 'car'
# at end of loop:
prev = car
# now you can do whatever you want to do to the last one on 'prev'

The best way to skip the first item(s) is:
from itertools import islice
for car in islice(cars, 1, None):
pass
# do something
islice in this case is invoked with a start-point of 1, and an end point of None, signifying the end of the iterable.
To be able to skip items from the end of an iterable, you need to know its length (always possible for a list, but not necessarily for everything you can iterate on). for example, islice(cars, 1, len(cars)-1) will skip the first and last items in cars.

Here is a more general generator function that skips any number of items from the beginning and end of an iterable:
def skip(iterable, at_start=0, at_end=0):
it = iter(iterable)
for x in itertools.islice(it, at_start):
pass
queue = collections.deque(itertools.islice(it, at_end))
for x in it:
queue.append(x)
yield queue.popleft()
Example usage:
>>> list(skip(range(10), at_start=2, at_end=2))
[2, 3, 4, 5, 6, 7]

This code skips the first and the last element of the list:
for item in list_name[1:-1]:
#...do whatever

Here's my preferred choice. It doesn't require adding on much to the loop, and uses nothing but built in tools.
Go from:
for item in my_items:
do_something(item)
to:
for i, item in enumerate(my_items):
if i == 0:
continue
do_something(item)

Example:
mylist=['one','two','three','four','five']
for i in mylist[1:]:
print(i)
In python index start from 0, We can use slicing operator to make manipulations in iteration.
for i in range(1,-1):

Well, your syntax isn't really Python to begin with.
Iterations in Python are over he contents of containers (well, technically it's over iterators), with a syntax for item in container. In this case, the container is the cars list, but you want to skip the first and last elements, so that means cars[1:-1] (python lists are zero-based, negative numbers count from the end, and : is slicing syntax.
So you want
for c in cars[1:-1]:
do something with c

Based on #SvenMarnach 's Answer, but bit simpler and without using deque
>>> def skip(iterable, at_start=0, at_end=0):
it = iter(iterable)
it = itertools.islice(it, at_start, None)
it, it1 = itertools.tee(it)
it1 = itertools.islice(it1, at_end, None)
return (next(it) for _ in it1)
>>> list(skip(range(10), at_start=2, at_end=2))
[2, 3, 4, 5, 6, 7]
>>> list(skip(range(10), at_start=2, at_end=5))
[2, 3, 4]
Also Note, based on my timeit result, this is marginally faster than the deque solution
>>> iterable=xrange(1000)
>>> stmt1="""
def skip(iterable, at_start=0, at_end=0):
it = iter(iterable)
it = itertools.islice(it, at_start, None)
it, it1 = itertools.tee(it)
it1 = itertools.islice(it1, at_end, None)
return (next(it) for _ in it1)
list(skip(iterable,2,2))
"""
>>> stmt2="""
def skip(iterable, at_start=0, at_end=0):
it = iter(iterable)
for x in itertools.islice(it, at_start):
pass
queue = collections.deque(itertools.islice(it, at_end))
for x in it:
queue.append(x)
yield queue.popleft()
list(skip(iterable,2,2))
"""
>>> timeit.timeit(stmt = stmt1, setup='from __main__ import iterable, skip, itertools', number = 10000)
2.0313770640908047
>>> timeit.timeit(stmt = stmt2, setup='from __main__ import iterable, skip, itertools, collections', number = 10000)
2.9903135454296716

An alternative method:
for idx, car in enumerate(cars):
# Skip first line.
if not idx:
continue
# Skip last line.
if idx + 1 == len(cars):
continue
# Real code here.
print car

The more_itertools project extends itertools.islice to handle negative indices.
Example
import more_itertools as mit
iterable = 'ABCDEFGH'
list(mit.islice_extended(iterable, 1, -1))
# Out: ['B', 'C', 'D', 'E', 'F', 'G']
Therefore, you can elegantly apply it slice elements between the first and last items of an iterable:
for car in mit.islice_extended(cars, 1, -1):
# do something

Similar to #maninthecomputer 's answer, when you need to skip the first iteration of a loop based on an int (self._model.columnCount() in my case):
for col in range(self._model.columnCount()):
if col == 0:
continue
Put more simply:
test_int = 3
for col in range(test_int):
if col == 0:
continue
print(col)
Provides output:
1
2
3

Good solution for support of itertools.chain is to use itertools.islice in order to take a slice of an iterable:
your_input_list = ['list', 'of', 'things']
for i, variant in list(itertools.islice(enumerate(some_function_that_will_output_itertools_chain(your_input_list)), 1, None)):
"""
# No need for unnecessary conditions like this:
if i == 0:
continue
"""
variant = list(variant) # (optional) converting back to list
print(variant)

I do it like this, even though it looks like a hack it works every time:
ls_of_things = ['apple', 'car', 'truck', 'bike', 'banana']
first = 0
last = len(ls_of_things)
for items in ls_of_things:
if first == 0
first = first + 1
pass
elif first == last - 1:
break
else:
do_stuff
first = first + 1
pass

Related

Best way to skip the first loop of a python for loop? [duplicate]

In python, How do I do something like:
for car in cars:
# Skip first and last, do work for rest
To skip the first element in Python you can simply write
for car in cars[1:]:
# Do What Ever you want
or to skip the last elem
for car in cars[:-1]:
# Do What Ever you want
You can use this concept for any sequence (not for any iterable though).
The other answers only work for a sequence.
For any iterable, to skip the first item:
itercars = iter(cars)
next(itercars)
for car in itercars:
# do work
If you want to skip the last, you could do:
itercars = iter(cars)
# add 'next(itercars)' here if you also want to skip the first
prev = next(itercars)
for car in itercars:
# do work on 'prev' not 'car'
# at end of loop:
prev = car
# now you can do whatever you want to do to the last one on 'prev'
The best way to skip the first item(s) is:
from itertools import islice
for car in islice(cars, 1, None):
pass
# do something
islice in this case is invoked with a start-point of 1, and an end point of None, signifying the end of the iterable.
To be able to skip items from the end of an iterable, you need to know its length (always possible for a list, but not necessarily for everything you can iterate on). for example, islice(cars, 1, len(cars)-1) will skip the first and last items in cars.
Here is a more general generator function that skips any number of items from the beginning and end of an iterable:
def skip(iterable, at_start=0, at_end=0):
it = iter(iterable)
for x in itertools.islice(it, at_start):
pass
queue = collections.deque(itertools.islice(it, at_end))
for x in it:
queue.append(x)
yield queue.popleft()
Example usage:
>>> list(skip(range(10), at_start=2, at_end=2))
[2, 3, 4, 5, 6, 7]
This code skips the first and the last element of the list:
for item in list_name[1:-1]:
#...do whatever
Here's my preferred choice. It doesn't require adding on much to the loop, and uses nothing but built in tools.
Go from:
for item in my_items:
do_something(item)
to:
for i, item in enumerate(my_items):
if i == 0:
continue
do_something(item)
Example:
mylist=['one','two','three','four','five']
for i in mylist[1:]:
print(i)
In python index start from 0, We can use slicing operator to make manipulations in iteration.
for i in range(1,-1):
Well, your syntax isn't really Python to begin with.
Iterations in Python are over he contents of containers (well, technically it's over iterators), with a syntax for item in container. In this case, the container is the cars list, but you want to skip the first and last elements, so that means cars[1:-1] (python lists are zero-based, negative numbers count from the end, and : is slicing syntax.
So you want
for c in cars[1:-1]:
do something with c
Based on #SvenMarnach 's Answer, but bit simpler and without using deque
>>> def skip(iterable, at_start=0, at_end=0):
it = iter(iterable)
it = itertools.islice(it, at_start, None)
it, it1 = itertools.tee(it)
it1 = itertools.islice(it1, at_end, None)
return (next(it) for _ in it1)
>>> list(skip(range(10), at_start=2, at_end=2))
[2, 3, 4, 5, 6, 7]
>>> list(skip(range(10), at_start=2, at_end=5))
[2, 3, 4]
Also Note, based on my timeit result, this is marginally faster than the deque solution
>>> iterable=xrange(1000)
>>> stmt1="""
def skip(iterable, at_start=0, at_end=0):
it = iter(iterable)
it = itertools.islice(it, at_start, None)
it, it1 = itertools.tee(it)
it1 = itertools.islice(it1, at_end, None)
return (next(it) for _ in it1)
list(skip(iterable,2,2))
"""
>>> stmt2="""
def skip(iterable, at_start=0, at_end=0):
it = iter(iterable)
for x in itertools.islice(it, at_start):
pass
queue = collections.deque(itertools.islice(it, at_end))
for x in it:
queue.append(x)
yield queue.popleft()
list(skip(iterable,2,2))
"""
>>> timeit.timeit(stmt = stmt1, setup='from __main__ import iterable, skip, itertools', number = 10000)
2.0313770640908047
>>> timeit.timeit(stmt = stmt2, setup='from __main__ import iterable, skip, itertools, collections', number = 10000)
2.9903135454296716
An alternative method:
for idx, car in enumerate(cars):
# Skip first line.
if not idx:
continue
# Skip last line.
if idx + 1 == len(cars):
continue
# Real code here.
print car
The more_itertools project extends itertools.islice to handle negative indices.
Example
import more_itertools as mit
iterable = 'ABCDEFGH'
list(mit.islice_extended(iterable, 1, -1))
# Out: ['B', 'C', 'D', 'E', 'F', 'G']
Therefore, you can elegantly apply it slice elements between the first and last items of an iterable:
for car in mit.islice_extended(cars, 1, -1):
# do something
Similar to #maninthecomputer 's answer, when you need to skip the first iteration of a loop based on an int (self._model.columnCount() in my case):
for col in range(self._model.columnCount()):
if col == 0:
continue
Put more simply:
test_int = 3
for col in range(test_int):
if col == 0:
continue
print(col)
Provides output:
1
2
3
Good solution for support of itertools.chain is to use itertools.islice in order to take a slice of an iterable:
your_input_list = ['list', 'of', 'things']
for i, variant in list(itertools.islice(enumerate(some_function_that_will_output_itertools_chain(your_input_list)), 1, None)):
"""
# No need for unnecessary conditions like this:
if i == 0:
continue
"""
variant = list(variant) # (optional) converting back to list
print(variant)
I do it like this, even though it looks like a hack it works every time:
ls_of_things = ['apple', 'car', 'truck', 'bike', 'banana']
first = 0
last = len(ls_of_things)
for items in ls_of_things:
if first == 0
first = first + 1
pass
elif first == last - 1:
break
else:
do_stuff
first = first + 1
pass

Create two output list using if else in comprehensions [duplicate]

I have some code like:
good = [x for x in mylist if x in goodvals]
bad = [x for x in mylist if x not in goodvals]
The goal is to split up the contents of mylist into two other lists, based on whether or not they meet a condition.
How can I do this more elegantly? Can I avoid doing two separate iterations over mylist? Can I improve performance by doing so?
Iterate manually, using the condition to select a list to which each element will be appended:
good, bad = [], []
for x in mylist:
(bad, good)[x in goodvals].append(x)
good = [x for x in mylist if x in goodvals]
bad = [x for x in mylist if x not in goodvals]
How can I do this more elegantly?
That code is already perfectly elegant.
There might be slight performance improvements using sets, but the difference is trivial. set based approaches will also discard duplicates and will not preserve the order of elements. I find the list comprehension far easier to read, too.
In fact, we could even more simply just use a for loop:
good, bad = [], []
for x in mylist:
if x in goodvals:
good.append(f)
else:
bad.append(f)
This approach makes it easier to add additional logic. For example, the code is easily modified to discard None values:
good, bad = [], []
for x in mylist:
if x is None:
continue
if x in goodvals:
good.append(f)
else:
bad.append(f)
Here's the lazy iterator approach:
from itertools import tee
def split_on_condition(seq, condition):
l1, l2 = tee((condition(item), item) for item in seq)
return (i for p, i in l1 if p), (i for p, i in l2 if not p)
It evaluates the condition once per item and returns two generators, first yielding values from the sequence where the condition is true, the other where it's false.
Because it's lazy you can use it on any iterator, even an infinite one:
from itertools import count, islice
def is_prime(n):
return n > 1 and all(n % i for i in xrange(2, n))
primes, not_primes = split_on_condition(count(), is_prime)
print("First 10 primes", list(islice(primes, 10)))
print("First 10 non-primes", list(islice(not_primes, 10)))
Usually though the non-lazy list returning approach is better:
def split_on_condition(seq, condition):
a, b = [], []
for item in seq:
(a if condition(item) else b).append(item)
return a, b
Edit: For your more specific usecase of splitting items into different lists by some key, heres a generic function that does that:
DROP_VALUE = lambda _:_
def split_by_key(seq, resultmapping, keyfunc, default=DROP_VALUE):
"""Split a sequence into lists based on a key function.
seq - input sequence
resultmapping - a dictionary that maps from target lists to keys that go to that list
keyfunc - function to calculate the key of an input value
default - the target where items that don't have a corresponding key go, by default they are dropped
"""
result_lists = dict((key, []) for key in resultmapping)
appenders = dict((key, result_lists[target].append) for target, keys in resultmapping.items() for key in keys)
if default is not DROP_VALUE:
result_lists.setdefault(default, [])
default_action = result_lists[default].append
else:
default_action = DROP_VALUE
for item in seq:
appenders.get(keyfunc(item), default_action)(item)
return result_lists
Usage:
def file_extension(f):
return f[2].lower()
split_files = split_by_key(files, {'images': IMAGE_TYPES}, keyfunc=file_extension, default='anims')
print split_files['images']
print split_files['anims']
Problem with all proposed solutions is that it will scan and apply the filtering function twice. I'd make a simple small function like this:
def split_into_two_lists(lst, f):
a = []
b = []
for elem in lst:
if f(elem):
a.append(elem)
else:
b.append(elem)
return a, b
That way you are not processing anything twice and also are not repeating code.
My take on it. I propose a lazy, single-pass, partition function,
which preserves relative order in the output subsequences.
1. Requirements
I assume that the requirements are:
maintain elements' relative order (hence, no sets and
dictionaries)
evaluate condition only once for every element (hence not using
(i)filter or groupby)
allow for lazy consumption of either sequence (if we can afford to
precompute them, then the naïve implementation is likely to be
acceptable too)
2. split library
My partition function (introduced below) and other similar functions
have made it into a small library:
python-split
It's installable normally via PyPI:
pip install --user split
To split a list base on condition, use partition function:
>>> from split import partition
>>> files = [ ('file1.jpg', 33L, '.jpg'), ('file2.avi', 999L, '.avi') ]
>>> image_types = ('.jpg','.jpeg','.gif','.bmp','.png')
>>> images, other = partition(lambda f: f[-1] in image_types, files)
>>> list(images)
[('file1.jpg', 33L, '.jpg')]
>>> list(other)
[('file2.avi', 999L, '.avi')]
3. partition function explained
Internally we need to build two subsequences at once, so consuming
only one output sequence will force the other one to be computed
too. And we need to keep state between user requests (store processed
but not yet requested elements). To keep state, I use two double-ended
queues (deques):
from collections import deque
SplitSeq class takes care of the housekeeping:
class SplitSeq:
def __init__(self, condition, sequence):
self.cond = condition
self.goods = deque([])
self.bads = deque([])
self.seq = iter(sequence)
Magic happens in its .getNext() method. It is almost like .next()
of the iterators, but allows to specify which kind of element we want
this time. Behind the scene it doesn't discard the rejected elements,
but instead puts them in one of the two queues:
def getNext(self, getGood=True):
if getGood:
these, those, cond = self.goods, self.bads, self.cond
else:
these, those, cond = self.bads, self.goods, lambda x: not self.cond(x)
if these:
return these.popleft()
else:
while 1: # exit on StopIteration
n = self.seq.next()
if cond(n):
return n
else:
those.append(n)
The end user is supposed to use partition function. It takes a
condition function and a sequence (just like map or filter), and
returns two generators. The first generator builds a subsequence of
elements for which the condition holds, the second one builds the
complementary subsequence. Iterators and generators allow for lazy
splitting of even long or infinite sequences.
def partition(condition, sequence):
cond = condition if condition else bool # evaluate as bool if condition == None
ss = SplitSeq(cond, sequence)
def goods():
while 1:
yield ss.getNext(getGood=True)
def bads():
while 1:
yield ss.getNext(getGood=False)
return goods(), bads()
I chose the test function to be the first argument to facilitate
partial application in the future (similar to how map and filter
have the test function as the first argument).
I basically like Anders' approach as it is very general. Here's a version that puts the categorizer first (to match filter syntax) and uses a defaultdict (assumed imported).
def categorize(func, seq):
"""Return mapping from categories to lists
of categorized items.
"""
d = defaultdict(list)
for item in seq:
d[func(item)].append(item)
return d
First go (pre-OP-edit): Use sets:
mylist = [1,2,3,4,5,6,7]
goodvals = [1,3,7,8,9]
myset = set(mylist)
goodset = set(goodvals)
print list(myset.intersection(goodset)) # [1, 3, 7]
print list(myset.difference(goodset)) # [2, 4, 5, 6]
That's good for both readability (IMHO) and performance.
Second go (post-OP-edit):
Create your list of good extensions as a set:
IMAGE_TYPES = set(['.jpg','.jpeg','.gif','.bmp','.png'])
and that will increase performance. Otherwise, what you have looks fine to me.
itertools.groupby almost does what you want, except it requires the items to be sorted to ensure that you get a single contiguous range, so you need to sort by your key first (otherwise you'll get multiple interleaved groups for each type). eg.
def is_good(f):
return f[2].lower() in IMAGE_TYPES
files = [ ('file1.jpg', 33L, '.jpg'), ('file2.avi', 999L, '.avi'), ('file3.gif', 123L, '.gif')]
for key, group in itertools.groupby(sorted(files, key=is_good), key=is_good):
print key, list(group)
gives:
False [('file2.avi', 999L, '.avi')]
True [('file1.jpg', 33L, '.jpg'), ('file3.gif', 123L, '.gif')]
Similar to the other solutions, the key func can be defined to divide into any number of groups you want.
Elegant and Fast
Inspired by DanSalmo's comment, here is a solution that is concise, elegant, and at the same time is one of the fastest solutions.
good_set = set(goodvals)
good, bad = [], []
for item in my_list:
good.append(item) if item in good_set else bad.append(item)
Tip: Turning goodvals into a set gives us an easy speed boost.
Fastest
For maximum speed, we take the fastest answer and turbocharge it by turning good_list into a set. That alone gives us a 40%+ speed boost, and we end up with a solution that is more than 5.5x as fast as the slowest solution, even while it remains readable.
good_list_set = set(good_list) # 40%+ faster than a tuple.
good, bad = [], []
for item in my_origin_list:
if item in good_list_set:
good.append(item)
else:
bad.append(item)
A little shorter
This is a more concise version of the previous answer.
good_list_set = set(good_list) # 40%+ faster than a tuple.
good, bad = [], []
for item in my_origin_list:
out = good if item in good_list_set else bad
out.append(item)
Elegance can be somewhat subjective, but some of the Rube Goldberg style solutions that are cute and ingenious are quite concerning and should not be used in production code in any language, let alone python which is elegant at heart.
Benchmark results:
filter_BJHomer 80/s -- -3265% -5312% -5900% -6262% -7273% -7363% -8051% -8162% -8244%
zip_Funky 118/s 4848% -- -3040% -3913% -4450% -5951% -6085% -7106% -7271% -7393%
two_lst_tuple_JohnLaRoy 170/s 11332% 4367% -- -1254% -2026% -4182% -4375% -5842% -6079% -6254%
if_else_DBR 195/s 14392% 6428% 1434% -- -882% -3348% -3568% -5246% -5516% -5717%
two_lst_compr_Parand 213/s 16750% 8016% 2540% 967% -- -2705% -2946% -4786% -5083% -5303%
if_else_1_line_DanSalmo 292/s 26668% 14696% 7189% 5033% 3707% -- -331% -2853% -3260% -3562%
tuple_if_else 302/s 27923% 15542% 7778% 5548% 4177% 343% -- -2609% -3029% -3341%
set_1_line 409/s 41308% 24556% 14053% 11035% 9181% 3993% 3529% -- -569% -991%
set_shorter 434/s 44401% 26640% 15503% 12303% 10337% 4836% 4345% 603% -- -448%
set_if_else 454/s 46952% 28358% 16699% 13349% 11290% 5532% 5018% 1100% 469% --
The full benchmark code for Python 3.7 (modified from FunkySayu):
good_list = ['.jpg','.jpeg','.gif','.bmp','.png']
import random
import string
my_origin_list = []
for i in range(10000):
fname = ''.join(random.choice(string.ascii_lowercase) for i in range(random.randrange(10)))
if random.getrandbits(1):
fext = random.choice(list(good_list))
else:
fext = "." + ''.join(random.choice(string.ascii_lowercase) for i in range(3))
my_origin_list.append((fname + fext, random.randrange(1000), fext))
# Parand
def two_lst_compr_Parand(*_):
return [e for e in my_origin_list if e[2] in good_list], [e for e in my_origin_list if not e[2] in good_list]
# dbr
def if_else_DBR(*_):
a, b = list(), list()
for e in my_origin_list:
if e[2] in good_list:
a.append(e)
else:
b.append(e)
return a, b
# John La Rooy
def two_lst_tuple_JohnLaRoy(*_):
a, b = list(), list()
for e in my_origin_list:
(b, a)[e[2] in good_list].append(e)
return a, b
# # Ants Aasma
# def f4():
# l1, l2 = tee((e[2] in good_list, e) for e in my_origin_list)
# return [i for p, i in l1 if p], [i for p, i in l2 if not p]
# My personal way to do
def zip_Funky(*_):
a, b = zip(*[(e, None) if e[2] in good_list else (None, e) for e in my_origin_list])
return list(filter(None, a)), list(filter(None, b))
# BJ Homer
def filter_BJHomer(*_):
return list(filter(lambda e: e[2] in good_list, my_origin_list)), list(filter(lambda e: not e[2] in good_list, my_origin_list))
# ChaimG's answer; as a list.
def if_else_1_line_DanSalmo(*_):
good, bad = [], []
for e in my_origin_list:
_ = good.append(e) if e[2] in good_list else bad.append(e)
return good, bad
# ChaimG's answer; as a set.
def set_1_line(*_):
good_list_set = set(good_list)
good, bad = [], []
for e in my_origin_list:
_ = good.append(e) if e[2] in good_list_set else bad.append(e)
return good, bad
# ChaimG set and if else list.
def set_shorter(*_):
good_list_set = set(good_list)
good, bad = [], []
for e in my_origin_list:
out = good if e[2] in good_list_set else bad
out.append(e)
return good, bad
# ChaimG's best answer; if else as a set.
def set_if_else(*_):
good_list_set = set(good_list)
good, bad = [], []
for e in my_origin_list:
if e[2] in good_list_set:
good.append(e)
else:
bad.append(e)
return good, bad
# ChaimG's best answer; if else as a set.
def tuple_if_else(*_):
good_list_tuple = tuple(good_list)
good, bad = [], []
for e in my_origin_list:
if e[2] in good_list_tuple:
good.append(e)
else:
bad.append(e)
return good, bad
def cmpthese(n=0, functions=None):
results = {}
for func_name in functions:
args = ['%s(range(256))' % func_name, 'from __main__ import %s' % func_name]
t = Timer(*args)
results[func_name] = 1 / (t.timeit(number=n) / n) # passes/sec
functions_sorted = sorted(functions, key=results.__getitem__)
for f in functions_sorted:
diff = []
for func in functions_sorted:
if func == f:
diff.append("--")
else:
diff.append(f"{results[f]/results[func]*100 - 100:5.0%}")
diffs = " ".join(f'{x:>8s}' for x in diff)
print(f"{f:27s} \t{results[f]:,.0f}/s {diffs}")
if __name__=='__main__':
from timeit import Timer
cmpthese(1000, 'two_lst_compr_Parand if_else_DBR two_lst_tuple_JohnLaRoy zip_Funky filter_BJHomer if_else_1_line_DanSalmo set_1_line set_if_else tuple_if_else set_shorter'.split(" "))
good.append(x) if x in goodvals else bad.append(x)
This elegant and concise answer by #dansalmo showed up buried in the comments, so I'm just reposting it here as an answer so it can get the prominence it deserves, especially for new readers.
Complete example:
good, bad = [], []
for x in my_list:
good.append(x) if x in goodvals else bad.append(x)
bad = []
good = [x for x in mylist if x in goodvals or bad.append(x)]
append returns None, so it works.
Personally, I like the version you cited, assuming you already have a list of goodvals hanging around. If not, something like:
good = filter(lambda x: is_good(x), mylist)
bad = filter(lambda x: not is_good(x), mylist)
Of course, that's really very similar to using a list comprehension like you originally did, but with a function instead of a lookup:
good = [x for x in mylist if is_good(x)]
bad = [x for x in mylist if not is_good(x)]
In general, I find the aesthetics of list comprehensions to be very pleasing. Of course, if you don't actually need to preserve ordering and don't need duplicates, using the intersection and difference methods on sets would work well too.
If you want to make it in FP style:
good, bad = [ sum(x, []) for x in zip(*(([y], []) if y in goodvals else ([], [y])
for y in mylist)) ]
Not the most readable solution, but at least iterates through mylist only once.
Sometimes, it looks like list comprehension is not the best thing to use !
I made a little test based on the answer people gave to this topic, tested on a random generated list. Here is the generation of the list (there's probably a better way to do, but it's not the point) :
good_list = ('.jpg','.jpeg','.gif','.bmp','.png')
import random
import string
my_origin_list = []
for i in xrange(10000):
fname = ''.join(random.choice(string.lowercase) for i in range(random.randrange(10)))
if random.getrandbits(1):
fext = random.choice(good_list)
else:
fext = "." + ''.join(random.choice(string.lowercase) for i in range(3))
my_origin_list.append((fname + fext, random.randrange(1000), fext))
And here we go
# Parand
def f1():
return [e for e in my_origin_list if e[2] in good_list], [e for e in my_origin_list if not e[2] in good_list]
# dbr
def f2():
a, b = list(), list()
for e in my_origin_list:
if e[2] in good_list:
a.append(e)
else:
b.append(e)
return a, b
# John La Rooy
def f3():
a, b = list(), list()
for e in my_origin_list:
(b, a)[e[2] in good_list].append(e)
return a, b
# Ants Aasma
def f4():
l1, l2 = tee((e[2] in good_list, e) for e in my_origin_list)
return [i for p, i in l1 if p], [i for p, i in l2 if not p]
# My personal way to do
def f5():
a, b = zip(*[(e, None) if e[2] in good_list else (None, e) for e in my_origin_list])
return list(filter(None, a)), list(filter(None, b))
# BJ Homer
def f6():
return filter(lambda e: e[2] in good_list, my_origin_list), filter(lambda e: not e[2] in good_list, my_origin_list)
Using the cmpthese function, the best result is the dbr answer :
f1 204/s -- -5% -14% -15% -20% -26%
f6 215/s 6% -- -9% -11% -16% -22%
f3 237/s 16% 10% -- -2% -7% -14%
f4 240/s 18% 12% 2% -- -6% -13%
f5 255/s 25% 18% 8% 6% -- -8%
f2 277/s 36% 29% 17% 15% 9% --
def partition(pred, iterable):
'Use a predicate to partition entries into false entries and true entries'
# partition(is_odd, range(10)) --> 0 2 4 6 8 and 1 3 5 7 9
t1, t2 = tee(iterable)
return filterfalse(pred, t1), filter(pred, t2)
Check this
I think a generalization of splitting a an iterable based on N conditions is handy
from collections import OrderedDict
def partition(iterable,*conditions):
'''Returns a list with the elements that satisfy each of condition.
Conditions are assumed to be exclusive'''
d= OrderedDict((i,list())for i in range(len(conditions)))
for e in iterable:
for i,condition in enumerate(conditions):
if condition(e):
d[i].append(e)
break
return d.values()
For instance:
ints,floats,other = partition([2, 3.14, 1, 1.69, [], None],
lambda x: isinstance(x, int),
lambda x: isinstance(x, float),
lambda x: True)
print " ints: {}\n floats:{}\n other:{}".format(ints,floats,other)
ints: [2, 1]
floats:[3.14, 1.69]
other:[[], None]
If the element may satisfy multiple conditions, remove the break.
Yet another solution to this problem. I needed a solution that is as fast as possible. That means only one iteration over the list and preferably O(1) for adding data to one of the resulting lists. This is very similar to the solution provided by sastanin, except much shorter:
from collections import deque
def split(iterable, function):
dq_true = deque()
dq_false = deque()
# deque - the fastest way to consume an iterator and append items
deque((
(dq_true if function(item) else dq_false).append(item) for item in iterable
), maxlen=0)
return dq_true, dq_false
Then, you can use the function in the following way:
lower, higher = split([0,1,2,3,4,5,6,7,8,9], lambda x: x < 5)
selected, other = split([0,1,2,3,4,5,6,7,8,9], lambda x: x in {0,4,9})
If you're not fine with the resulting deque object, you can easily convert it to list, set, whatever you like (for example list(lower)). The conversion is much faster, that construction of the lists directly.
This methods keeps order of the items, as well as any duplicates.
If you don't mind using an external library there two I know that nativly implement this operation:
>>> files = [ ('file1.jpg', 33, '.jpg'), ('file2.avi', 999, '.avi')]
>>> IMAGE_TYPES = ('.jpg','.jpeg','.gif','.bmp','.png')
iteration_utilities.partition:
>>> from iteration_utilities import partition
>>> notimages, images = partition(files, lambda x: x[2].lower() in IMAGE_TYPES)
>>> notimages
[('file2.avi', 999, '.avi')]
>>> images
[('file1.jpg', 33, '.jpg')]
more_itertools.partition
>>> from more_itertools import partition
>>> notimages, images = partition(lambda x: x[2].lower() in IMAGE_TYPES, files)
>>> list(notimages) # returns a generator so you need to explicitly convert to list.
[('file2.avi', 999, '.avi')]
>>> list(images)
[('file1.jpg', 33, '.jpg')]
For example, splitting list by even and odd
arr = range(20)
even, odd = reduce(lambda res, next: res[next % 2].append(next) or res, arr, ([], []))
Or in general:
def split(predicate, iterable):
return reduce(lambda res, e: res[predicate(e)].append(e) or res, iterable, ([], []))
Advantages:
Shortest posible way
Predicate applies only once for each element
Disadvantages
Requires knowledge of functional programing paradigm
Inspired by #gnibbler's great (but terse!) answer, we can apply that approach to map to multiple partitions:
from collections import defaultdict
def splitter(l, mapper):
"""Split an iterable into multiple partitions generated by a callable mapper."""
results = defaultdict(list)
for x in l:
results[mapper(x)] += [x]
return results
Then splitter can then be used as follows:
>>> l = [1, 2, 3, 4, 2, 3, 4, 5, 6, 4, 3, 2, 3]
>>> split = splitter(l, lambda x: x % 2 == 0) # partition l into odds and evens
>>> split.items()
>>> [(False, [1, 3, 3, 5, 3, 3]), (True, [2, 4, 2, 4, 6, 4, 2])]
This works for more than two partitions with a more complicated mapping (and on iterators, too):
>>> import math
>>> l = xrange(1, 23)
>>> split = splitter(l, lambda x: int(math.log10(x) * 5))
>>> split.items()
[(0, [1]),
(1, [2]),
(2, [3]),
(3, [4, 5, 6]),
(4, [7, 8, 9]),
(5, [10, 11, 12, 13, 14, 15]),
(6, [16, 17, 18, 19, 20, 21, 22])]
Or using a dictionary to map:
>>> map = {'A': 1, 'X': 2, 'B': 3, 'Y': 1, 'C': 2, 'Z': 3}
>>> l = ['A', 'B', 'C', 'C', 'X', 'Y', 'Z', 'A', 'Z']
>>> split = splitter(l, map.get)
>>> split.items()
(1, ['A', 'Y', 'A']), (2, ['C', 'C', 'X']), (3, ['B', 'Z', 'Z'])]
solution
from itertools import tee
def unpack_args(fn):
return lambda t: fn(*t)
def separate(fn, lx):
return map(
unpack_args(
lambda i, ly: filter(
lambda el: bool(i) == fn(el),
ly)),
enumerate(tee(lx, 2)))
test
[even, odd] = separate(
lambda x: bool(x % 2),
[1, 2, 3, 4, 5])
print(list(even) == [2, 4])
print(list(odd) == [1, 3, 5])
If the list is made of groups and intermittent separators, you can use:
def split(items, p):
groups = [[]]
for i in items:
if p(i):
groups.append([])
groups[-1].append(i)
return groups
Usage:
split(range(1,11), lambda x: x % 3 == 0)
# gives [[1, 2], [3, 4, 5], [6, 7, 8], [9, 10]]
Use Boolean logic to assign data to two arrays
>>> images, anims = [[i for i in files if t ^ (i[2].lower() in IMAGE_TYPES) ] for t in (0, 1)]
>>> images
[('file1.jpg', 33, '.jpg')]
>>> anims
[('file2.avi', 999, '.avi')]
For perfomance, try itertools.
The itertools module standardizes a core set of fast, memory efficient tools that are useful by themselves or in combination. Together, they form an “iterator algebra” making it possible to construct specialized tools succinctly and efficiently in pure Python.
See itertools.ifilter or imap.
itertools.ifilter(predicate, iterable)
Make an iterator that filters elements from iterable returning only those for which the predicate is True
If you insist on clever, you could take Winden's solution and just a bit spurious cleverness:
def splay(l, f, d=None):
d = d or {}
for x in l: d.setdefault(f(x), []).append(x)
return d
Sometimes you won't need that other half of the list.
For example:
import sys
from itertools import ifilter
trustedPeople = sys.argv[1].split(',')
newName = sys.argv[2]
myFriends = ifilter(lambda x: x.startswith('Shi'), trustedPeople)
print '%s is %smy friend.' % (newName, newName not in myFriends 'not ' or '')
Already quite a few solutions here, but yet another way of doing that would be -
anims = []
images = [f for f in files if (lambda t: True if f[2].lower() in IMAGE_TYPES else anims.append(t) and False)(f)]
Iterates over the list only once, and looks a bit more pythonic and hence readable to me.
>>> files = [ ('file1.jpg', 33L, '.jpg'), ('file2.avi', 999L, '.avi'), ('file1.bmp', 33L, '.bmp')]
>>> IMAGE_TYPES = ('.jpg','.jpeg','.gif','.bmp','.png')
>>> anims = []
>>> images = [f for f in files if (lambda t: True if f[2].lower() in IMAGE_TYPES else anims.append(t) and False)(f)]
>>> print '\n'.join([str(anims), str(images)])
[('file2.avi', 999L, '.avi')]
[('file1.jpg', 33L, '.jpg'), ('file1.bmp', 33L, '.bmp')]
>>>
I'd take a 2-pass approach, separating evaluation of the predicate from filtering the list:
def partition(pred, iterable):
xs = list(zip(map(pred, iterable), iterable))
return [x[1] for x in xs if x[0]], [x[1] for x in xs if not x[0]]
What's nice about this, performance-wise (in addition to evaluating pred only once on each member of iterable), is that it moves a lot of logic out of the interpreter and into highly-optimized iteration and mapping code. This can speed up iteration over long iterables, as described in this answer.
Expressivity-wise, it takes advantage of expressive idioms like comprehensions and mapping.
Not sure if this is a good approach but it can be done in this way as well
IMAGE_TYPES = ('.jpg','.jpeg','.gif','.bmp','.png')
files = [ ('file1.jpg', 33L, '.jpg'), ('file2.avi', 999L, '.avi')]
images, anims = reduce(lambda (i, a), f: (i + [f], a) if f[2] in IMAGE_TYPES else (i, a + [f]), files, ([], []))
Yet another answer, short but "evil" (for list-comprehension side effects).
digits = list(range(10))
odd = [x.pop(i) for i, x in enumerate(digits) if x % 2]
>>> odd
[1, 3, 5, 7, 9]
>>> digits
[0, 2, 4, 6, 8]

Replace one item in a string with one item from a list

I have a string and a list:
seq = '01202112'
l = [(0,1,0),(1,1,0)]
I would like a pythonic way of replacing each '2' with the value at the corresponding index in the list l such that I obtain two new strings:
list_seq = [01001110, 01101110]
By using .replace(), I could iterate through l, but I wondered is there a more pythonic way to get list_seq?
I might do something like this:
out = [''.join(c if c != '2' else str(next(f, c)) for c in seq) for f in map(iter, l)]
The basic idea is that we call iter to turn the tuples in l into iterators. At that point every time we call next on them, we get the next element we need to use instead of the '2'.
If this is too compact, the logic might be easier to read as a function:
def replace(seq, to_replace, fill):
fill = iter(fill)
for element in seq:
if element != to_replace:
yield element
else:
yield next(fill, element)
giving
In [32]: list(replace([1,2,3,2,2,3,1,2,4,2], to_replace=2, fill="apple"))
Out[32]: [1, 'a', 3, 'p', 'p', 3, 1, 'l', 4, 'e']
Thanks to #DanD in the comments for noting that I had assumed I'd always have enough characters to fill from! We'll follow his suggestion to keep the original characters if we run out, but modifying this approach to behave differently is straightforward and left as an exercise for the reader. :-)
[''.join([str(next(digit, 0)) if x is '2' else x for x in seq])
for digit in map(iter, l)]
I don't know if this solution is 'more pythonic' but:
def my_replace(s, c=None, *other):
return s if c is None else my_replace(s.replace('2', str(c), 1), *other)
seq = '01202112'
l = [(0,1,0),(1,1,0)]
list_req = [my_replace(seq, *x) for x in l]
seq = '01202112'
li = [(0,1,0),(1,1,0)]
def grunch(s, tu):
it = map(str,tu)
return ''.join(next(it) if c=='2' else c for c in s)
list_seq = [grunch(seq,tu) for tu in li]

Removing some of the duplicates from a list in Python

I would like to remove a certain number of duplicates of a list without removing all of them. For example, I have a list [1,2,3,4,4,4,4,4] and I want to remove 3 of the 4's, so that I am left with [1,2,3,4,4]. A naive way to do it would probably be
def remove_n_duplicates(remove_from, what, how_many):
for j in range(how_many):
remove_from.remove(what)
Is there a way to do remove the three 4's in one pass through the list, but keep the other two.
If you just want to remove the first n occurrences of something from a list, this is pretty easy to do with a generator:
def remove_n_dupes(remove_from, what, how_many):
count = 0
for item in remove_from:
if item == what and count < how_many:
count += 1
else:
yield item
Usage looks like:
lst = [1,2,3,4,4,4,4,4]
print list(remove_n_dupes(lst, 4, 3)) # [1, 2, 3, 4, 4]
Keeping a specified number of duplicates of any item is similarly easy if we use a little extra auxiliary storage:
from collections import Counter
def keep_n_dupes(remove_from, how_many):
counts = Counter()
for item in remove_from:
counts[item] += 1
if counts[item] <= how_many:
yield item
Usage is similar:
lst = [1,1,1,1,2,3,4,4,4,4,4]
print list(keep_n_dupes(lst, 2)) # [1, 1, 2, 3, 4, 4]
Here the input is the list and the max number of items that you want to keep. The caveat is that the items need to be hashable...
You can use Python's set functionality with the & operator to create a list of lists and then flatten the list. The result list will be [1, 2, 3, 4, 4].
x = [1,2,3,4,4,4,4,4]
x2 = [val for sublist in [[item]*max(1, x.count(item)-3) for item in set(x) & set(x)] for val in sublist]
As a function you would have the following.
def remove_n_duplicates(remove_from, what, how_many):
return [val for sublist in [[item]*max(1, remove_from.count(item)-how_many) if item == what else [item]*remove_from.count(item) for item in set(remove_from) & set(remove_from)] for val in sublist]
If the list is sorted, there's the fast solution:
def remove_n_duplicates(remove_from, what, how_many):
index = 0
for i in range(len(remove_from)):
if remove_from[i] == what:
index = i
break
if index + how_many >= len(remove_from):
#There aren't enough things to remove.
return
for i in range(index, how_many):
if remove_from[i] != what:
#Again, there aren't enough things to remove
return
endIndex = index + how_many
return remove_from[:index+1] + remove_from[endIndex:]
Note that this returns the new array, so you want to do arr = removeCount(arr, 4, 3)
Here is another trick which might be useful sometimes. Not to be taken as the recommended recipe.
def remove_n_duplicates(remove_from, what, how_many):
exec('remove_from.remove(what);'*how_many)
I can solve it in different way using collections.
from collections import Counter
li = [1,2,3,4,4,4,4]
cntLi = Counter(li)
print cntLi.keys()

Delete item in a list using a for-loop

I have an array with subjects and every subject has connected time. I want to compare every subjects in the list. If there are two of the same subjects, I want to add the times of both subjects, and also want to delete the second subject information (subject-name and time).
But If I delete the item, the list become shorter, and I get an out-of-range-error. I tried to make the list shorter with using subjectlegth-1, but this also don't work.
...
subjectlegth = 8
for x in range(subjectlength):
for y in range(subjectlength):
if subject[x] == subject[y]:
if x != y:
#add
time[x] = time[x] + time[y]
#delete
del time[y]
del subject[y]
subjectlength = subjectlength - 1
Iterate backwards, if you can:
for x in range(subjectlength - 1, -1, -1):
and similarly for y.
If the elements of subject are hashable:
finalinfo = {}
for s, t in zip(subject, time):
finalinfo[s] = finalinfo.get(s, 0) + t
This will result in a dict with subject: time key-value pairs.
The best practice is to make a new list of the entries to delete, and to delete them after walking the list:
to_del = []
subjectlength = 8
for x in range(subjectlength):
for y in range(x):
if subject[x] == subject[y]:
#add
time[x] = time[x] + time[y]
to_del.append(y)
to_del.reverse()
for d in to_del:
del subject[d]
del time[d]
An alternate way would be to create the subject and time lists anew, using a dict to sum up the times of recurring subjects (I am assuming subjects are strings i.e. hashable).
subjects=['math','english','necromancy','philosophy','english','latin','physics','latin']
time=[1,2,3,4,5,6,7,8]
tuples=zip(subjects,time)
my_dict={}
for subject,t in tuples:
try:
my_dict[subject]+=t
except KeyError:
my_dict[subject]=t
subjects,time=my_dict.keys(), my_dict.values()
print subjects,time
Though a while loop is certainly a better choice for this, if you insist on using a for loop, one can replace the list elements-to-be-deleted with None, or any other distinguishable item, and redefine the list after the for loop. The following code removes even elements from a list of integers:
nums = [1, 1, 5, 2, 10, 4, 4, 9, 3, 9]
for i in range(len(nums)):
# select the item that satisfies the condition
if nums[i] % 2 == 0:
# do_something_with_the(item)
nums[i] = None # Not needed anymore, so set it to None
# redefine the list and exclude the None items
nums = [item for item in nums if item is not None]
# num = [1, 1, 5, 9, 3, 9]
In the case of the question in this post:
...
for i in range(subjectlength - 1):
for j in range(i+1, subjectlength):
if subject[i] == subject[j]:
#add
time[i] += time[j]
# set to None instead of delete
time[j] = None
subject[j] = None
time = [item for item in time if item is not None]
subject = [item for item in subject if item is not None]

Categories

Resources