Python: split list of integers based on step between them - python

I have the following problem. Having a list of integers, I want to split it, into a list of lists, whenever the step between two elements of the original input list is not 1.
For example: input = [0, 1, 3, 5, 6, 7], output = [[0, 1], [3], [5, 6, 7]]
I wrote the following function, but it's uggly as hell, and I was wondering if anyone of you guys would help me get a nicer solution. I tried to use itertools, but couldn't solve it.
Here's my solution:
def _get_parts(list_of_indices):
lv = list_of_indices
tuples = zip(lv[:-1], lv[1:])
split_values = []
for i in tuples:
if i[1] - i[0] != 1:
split_values.append(i[1])
string = '/'.join([str(i) for i in lv])
substrings = []
for i in split_values:
part = string.split(str(i))
substrings.append(part[0])
string = string.lstrip(part[0])
substrings.append(string)
result = []
for i in substrings:
i = i.rstrip('/')
result.append([int(n) for n in i.split('/')])
return result
Thanks a lot!

This works with any iterable
>>> from itertools import groupby, count
>>> inp = [0, 1, 3, 5, 6, 7]
>>> [list(g) for k, g in groupby(inp, key=lambda i,j=count(): i-next(j))]
[[0, 1], [3], [5, 6, 7]]

def _get_parts(i, step=1):
o = []
for x in i:
if o and o[-1] and x - step == o[-1][-1]:
o[-1].append(x)
else:
o.append([x])
return o
_get_parts([0, 1, 3, 5, 6, 7], step=1)
# [[0, 1], [3], [5, 6, 7]])

Here is a solution utilizing a for loop.
def splitbystep(alist):
newlist = [[alist[0]]]
for i in range(1,len(alist)):
if alist[i] - alist[i-1] == 1:
newlist[-1].append(alist[i])
else:
newlist.append([alist[i]])
return newlist

This is how I'd do it:
inp = [0, 1, 3, 5, 6, 7]
base = []
for item in inp:
if not base or item - base[-1][-1] != 1: # If base is empty (first item) or diff isn't 1
base.append([item]) # Append a new list containing just one item
else:
base[-1].append(item) # Otherwise, add current item to the last stored list in base
print base # => [[0, 1], [3], [5, 6, 7]]

This is the textbook use case for function split_when from module more_itertools:
import more_itertools
print(list(more_itertools.split_when([0, 1, 3, 5, 6, 7], lambda x,y: y-x != 1)))
# [[0, 1], [3], [5, 6, 7]]
Or, even more simple with more_itertools.consecutive_groups:
print([list(g) for g in more_itertools.consecutive_groups([0, 1, 3, 5, 6, 7])])
# [[0, 1], [3], [5, 6, 7]]

Related

Extract Lists from tuple with a condition

I was trying to extract lists from a tuple with a condition that every X numbers in ascending order should be extracted in same list, using a list comprehension.
Example:
Input: (4,2,2,3,5,6,0,0,2)
Desired output: [[4],[2,2,3,5,6],[0,0,2]]
I tried the following:
E=tuple([random.randint(0,10) for x in range(10)])
Res=[E[x:y] for x in range(0,len(E)) for y in range(x+1,len(E)) if (y>x)]
Here, simple logic to create new list already with the first element tuple. and then loop over tuple and if its current element is less then last element or not, if its smaller then append element as a list or append element into the last list.
Code:
import random
E=tuple([random.randint(0,10) for x in range(10)])
E
ls=[[E[0]]]
[ls.append([E[e]]) if E[e]<E[e-1] else ls[len(ls)-1].append(E[e]) for e in range(1, len(E)) ]
ls
OR Without pre-defining the first value
from itertools import pairwise
l=[[]]
[l[len(l)-1].append(y) if x-y < 0 else l.append([y]) for (x, y) in pairwise([e for e in (0,)+E])] ##Insert the dump 0 at first position
l
Input: (7, 10, 7, 2, 3, 7, 4, 3, 5, 10)
Output: [[7, 10], [7], [2, 3, 7], [4], [3, 5, 10]]
Input: (0, 3, 4, 3, 0, 7, 0, 0, 2, 3)
Output: [[0, 3, 4], [3], [0, 7], [0, 0, 2, 3]]
Here is one possible way to do this. You could use np.split together with random.sample to get the random numbers which is used as index to split.
import random
import numpy as np
lst = [4,2,2,3,5,6,0,0,2]
number_of_splits = 4
random.seed(42)
random_splits = sorted(random.sample(set(range(1,len(lst)-1)),number_of_splits-1))
print(random_splits)
result = [list(elem) for elem in np.split(lst, random_splits)]
print(result)
[1, 6, 7]
[[4], [2, 2, 3, 5, 6], [0], [0, 2]]
Probably easier to do this without a list comprehension - i.e., a straightforward loop like this:
from random import randint as RANDINT
t = 4, 2, 2, 3, 5, 6, 0, 0, 2
out = []
offset = 0
while offset < len(t):
e = RANDINT(1, len(t)+1-offset)
out.append(list(t[offset:offset+e]))
offset += e
print(out)
Output (example):
[[4], [2, 2], [3], [5, 6, 0, 0], [2]]
We can use slice to mark start and end of sub-lists
t=(4,2,2,3,5,6,0,0,2)
split_list=[0]+[x for x in range(len(t)) if t[x]<t[x-1]]
split_list_end=[x for x in range(len(t)) if t[x]<t[x-1]]
split_list_end.append(len(t))
[list(t[x]) for x in [slice(x[0],x[1]) for x in list(zip(split_list,split_list_end))]]

Find missing sequences in list that cycles

Given 2 lists:
target = [0, 1, 2, 3, 4, 5, 6, 7]
given = [1, 2, 5, 6]
The missing numbers is [[0], [3,4], [7]]. However, both list circulates, which means the end of the list is linked to the first, so they actually look like this:
target = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,......]
given = [1,2, 5,6, 1,2, 5,6,......] # I put a space to better tell where numbers missing
That way, the desired output of missing numbers should actually be [[7,0], [3,4]] since 7 and 0 is consecutive.
How do I make up the function that does the job?
First you can construct a list of missing patches using itertools.groupby and then glue the left-most and right-most sublists if necessary:
import itertools
def missing(target, given):
output = [list(g) for k, g in itertools.groupby(target, key=lambda x: x in given) if not k]
if output and output[0][0] == target[0] and output[-1][-1] == target[-1]:
output[-1] += output.pop(0)
return output
print(missing([0, 1, 2, 3, 4, 5, 6, 7], [1, 2, 5, 6])) # [[3, 4], [7, 0]]
print(missing([0, 1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 5, 6, 8])) # [[0], [3, 4], [7]]
Update: If you don't want to import itertools module, then replace the line output = [list(g) ...] with the following:
output, temp = [], [] # an empty "temporary bucket"
for x in target:
if x in given:
if temp: # if temp is nonempty
output.append(temp) # put the bucket into output
temp = [] # a new empty bucket
else: # if x is "missing"
temp.append(x) # append x into the bucket
else: # this is called when for loop is over (or target is empty)
if temp: # if the last temporary bucket is nonempty
output.append(temp)

How to reshape a list without numpy

How do I reshape a list into a n-dimensional list
Input:
list = [1, 2, 3, 4, 5, 6, 7, 8]
shape = [2, 2, 2]
output = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
This recursive approach should work.
lst = [1, 2, 3, 4, 5, 6, 7, 8]
shape = [2, 2, 2]
from functools import reduce
from operator import mul
def reshape(lst, shape):
if len(shape) == 1:
return lst
n = reduce(mul, shape[1:])
return [reshape(lst[i*n:(i+1)*n], shape[1:]) for i in range(len(lst)//n)]
reshape(lst, shape)
You probably want to wrap that with a check that your dimensions make sense... e.g.
assert reduce(mul, shape) == len(lst)
oooold post.. but since i'm currently looking for a more elegant way than mine, i just tell you my approach
# first, i create some data
l = [ i for i in range(256) ]
# now I reshape in to slices of 4 items
x = [ l[x:x+4] for x in range(0, len(l), 4) ]
Here is an approach using the grouper once on each dimension except the first:
import functools as ft
# example
L = list(range(2*3*4))
S = 2,3,4
# if tuples are acceptable
tuple(ft.reduce(lambda x, y: zip(*y*(x,)), (iter(L), *S[:0:-1])))
# (((0, 1, 2, 3), (4, 5, 6, 7), (8, 9, 10, 11)), ((12, 13, 14, 15), (16, 17, 18, 19), (20, 21, 22, 23)))
# if it must be lists
list(ft.reduce(lambda x, y: map(list, zip(*y*(x,))), (iter(L), *S[:0:-1])))
# [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]
The code below should do the trick.
The solution given below very general. The input list can be a nested list of lists of an any old/undesired shape; it need not be a list of integers.
Also, there are separate re-usable tools. For example the all_for_one function is very handy.
EDIT:
I failed to note something important. If you put 1s inside of the shape parameter, then you can get superfluous list nestings (only one list inside of a list instead of five or six lists inside of a list)
For example, if shape is [1, 1, 2]
then the return value might be [[[0.1, 0.2]]] instead of [0.1, 0.2]
the length of shape is the number of valid subscripts in the output list.
For example,
shape = [1, 2] # length 2
lyst = [[0.1, 0.2]]
print(lyst[0][0]) # valid.... no KeyError raised
If you want a true column or row vector, then len(shape) must be 1.
For example, shape = [49] will give you a row/column vector of length 49.
shape = [2] # length 2
output = [0.1, 0.2]
print(lyst[0])
Here's the code:
from operator import mul
import itertools as itts
import copy
import functools
one_for_all = lambda one: itts.repeat(one, 1)
def all_for_one(lyst):
"""
EXAMPLE:
INPUT:
[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
OUTPUT:
iterator to [1, 2, 3, 4, 5, 6, 7, 8]
IN GENERAL:
Gets iterator to all nested elements
of a list of lists of ... of lists of lists.
"""
# make an iterator which **IMMEDIATELY**
# raises a `StopIteration` exception
its = itts.repeat("", 0)
for sublyst in lyst:
if hasattr(sublyst, "__iter__") and id(sublyst) != id(lyst):
# Be careful ....
#
# "string"[0] == "s"[0] == "s"[0][0][0][0][0][0]...
#
# do not drill down while `sublyst` has an "__iter__" method
# do not drill down while `sublyst` has a `__getitem__` method
#
it = all_for_one(sublyst)
else:
it = one_for_all(sublyst)
# concatenate results to what we had previously
its = itts.chain(its, it)
return its
merged = list(all_for_one([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]))
print("merged == ", merged)
def reshape(xread_lyst, xshape):
"""
similar to `numpy.reshape`
EXAMPLE:
lyst = [1, 2, 3, 4, 5, 6, 7, 8]
shape = [2, 2, 2]
result = reshape(lyst)
print(result)
result ==
[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
For this function, input parameter `xshape` can be
any iterable containing at least one element.
`xshape` is not required to be a tuple, but it can be.
The length of xshape should be equal to the number
of desired list nestings
If you want a list of integers: len(xshape) == 1
If you want a list of lists: len(xshape) == 2
If you want a list of lists of lists: len(xshape) == 3
If xshape = [1, 2],
outermost list has 1 element
that one element is a list of 2 elements.
result == [[1, 2]]
If xshape == [2]
outermost list has 2 elements
those 2 elements are non-lists:
result: [1, 2]
If xshape = [2, 2],
outermost list has 2 elements
each element is a list of 2 elements.
result == [[1, 2] [3, 4]]
"""
# BEGIN SANITIZING INPUTS
# unfortunately, iterators are not re-usable
# Also, they don't have `len` methods
iread_lyst = [x for x in ReshapeTools.unnest(xread_lyst)]
ishape = [x for x in self.unnest(xshape)]
number_of_elements = functools.reduce(mul, ishape, 1)
if(number_of_elements != len(iread_lyst)):
msg = [str(x) for x in [
"\nAn array having dimensions ", ishape,
"\nMust contain ", number_of_elements, " element(s).",
"\nHowever, we were only given ", len(iread_lyst), " element(s)."
]]
if len(iread_lyst) < 10:
msg.append('\nList before reshape: ')
msg.append(str([str(x)[:5] for x in iread_lyst]))
raise TypeError(''.join(msg))
ishape = iter(ishape)
iread_lyst = iter(iread_lyst)
# END SANITATIZATION OF INPUTS
write_parent = list()
parent_list_len = next(ishape)
try:
child_list_len = next(ishape)
for _ in range(0, parent_list_len):
write_child = []
write_parent.append(write_child)
i_reshape(write_child, iread_lyst, child_list_len, copy.copy(ishape))
except StopIteration:
for _ in range(0, parent_list_len):
write_child = next(iread_lyst)
write_parent.append(write_child)
return write_parent
def ilyst_reshape(write_parent, iread_lyst, parent_list_len, ishape):
"""
You really shouldn't call this function directly.
Try calling `reshape` instead
The `i` in the name of this function stands for "internal"
"""
try:
child_list_len = next(ishape)
for _ in range(0, parent_list_len):
write_child = []
write_parent.append(write_child)
ilyst_reshape(write_child, iread_lyst, child_list_len, copy.copy(ishape))
except StopIteration:
for _ in range(0, parent_list_len):
write_child = next(iread_lyst)
write_parent.append(write_child)
return None
three_dee_mat = reshape(merged, [2, 2, 2])
print("three_dee_mat == ", three_dee_mat)
Not particularly elegant:
from functools import reduce
from itertools import islice
l=[1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4]
s=[2,3,4]
if s and reduce(lambda x,y:x*y, s) == len(l):
# if number of elements matches product of dimensions,
# the first dimension is actually redundant
s=[1:]
else:
print("length of input list does not match shape")
return
while s:
size = s.pop() # how many elements for this dimension
#split the list based on the size of the dimension
it=iter(l)
l = list(iter(lambda:list(islice(it,size)),[]))
# [[[1, 2, 3, 4], [5, 6, 7, 8], [9, 0, 1, 2]],
# [[3, 4, 5, 6], [7, 8, 9, 0], [1, 2, 3, 4]]]

How to flatten a list of lists and join with a separator

How to join list of lists with separator?
For example:
[[1, 2], [3, 4, 5], [6, 7]]
with separator:
0
result:
[1, 2, 0, 3, 4, 5, 0, 6, 7]
For example your list is stored in x:
x=[[1,2], [3,4,5], [6,7]]
Simply use reduce with lambda function:
y=reduce(lambda a,b:a+[0]+b,x)
Now y is
[1, 2, 0, 3, 4, 5, 0, 6, 7]
Or you could define an generator function:
def chainwithseperator(lis,sep):
it=iter(lis)
for item in it.next():
yield item
for sublis in it:
yield sep
for item in sublis:
yield item
Now calling:
y=list(chainwithseperator(x,0))
Will bring you the same result
You can tee the list as an iterable and only yield the separator when there's a following item. Here we're defining a function called joinlist which contains a generator helper function to yield the appropriate elements, then returns a flattened list of all those elements using chain.from_iterable:
from itertools import tee, chain
def joinlist(iterable, sep):
def _yielder(iterable):
fst, snd = tee(iterable)
next(snd, [])
while True:
yield next(fst)
if next(snd, None):
yield [sep]
return list(chain.from_iterable(_yielder(iterable)))
It's important to note that the termination for the while True: occurs in yield next(fst) as that'll raise a StopIteration at some point and will cause the generator to exit.
Example:
x = [[1,2]]
y = [[1, 2], [3,4,5]]
z = [[1, 2], [3,4,5], [6, 7]]
for item in (x, y, z):
print item, '->', joinlist(item, 0)
# [[1, 2]] -> [1, 2]
# [[1, 2], [3, 4, 5]] -> [1, 2, 0, 3, 4, 5]
# [[1, 2], [3, 4, 5], [6, 7]] -> [1, 2, 0, 3, 4, 5, 0, 6, 7]
That's how I would do it:
l = [[1,2], [3,4,5], [6,7]]
result = [number for sublist in l for number in sublist+[0]][:-1]
The last [:-1] is to remove the last item which is a 0.
You can use it with Python list extend() method:
orig_list = [[1,2], [3,4,5], [6,7]]
out_list = []
for i in orig_list:
out_list.extend(i + [0])
# To remove the last element '0'.
print my_list[:-1]

How to merge multiple arrays in pairs

I have a problem with "pairing" arrays into one (by index). Here is an example:
INPUT:
inputArray = [[0, 1, 2, 3, 4], [2, 3, 5, 7, 8], [9, 6, 1]]
EXPECTED OUTPUT:
outputArray =
[[0,2,9],
[1,3,6],
[2,5,1],
[3,7,chooseRandom()],
[4,8,chooseRandom()]]
Questions:
How to avoid "out of range" "index error" problem
How to write chooseRandom() to choose N neighbour
Answers:
[SOLVED] Solutions provided by #jonrsharpe & #Christian & #Decency works as
expected
Clarification:
By N neighbour I mean:
I'm using python but feel free to share your thoughts in any language.
I think the following will do what you want:
from itertools import izip_longest # 'zip_longest' in Python 3.x
from random import choice
# Step 1
outputArray = list(map(list, izip_longest(*inputArray)))
# Step 2
for index, arr in enumerate(outputArray):
if any(item is None for item in arr):
valid = [item for item in arr if item is not None]
outputArray[index] = [choice(valid) if item is None else item
for item in arr]
This has two steps:
Combine all sub-lists of inputArray to the length of the longest sub-array, filling with None: [[0, 2, 9], [1, 3, 6], [2, 5, 1], [3, 7, None], [4, 8, None]]; and
Work through the outputArray, finding any sub-lists that contain None and replacing the None with a random choice from the other items in the sub-list that aren't None.
Example output:
[[0, 2, 9], [1, 3, 6], [2, 5, 1], [3, 7, 3], [4, 8, 8]]
Here's my approach to the problem, in Python 3.4. I don't really know what you mean by "choose N neighbour" but it should be pretty easy to write that however you'd like in the context below.
inputArray = [[0, 1, 2, 3, 4], [2, 3, 5, 7, 8], [9, 6, 1]]
import itertools
zipped = itertools.zip_longest(*inputArray, fillvalue=None)
outputArray = [list(item) for item in zipped]
# [[0, 2, 9], [1, 3, 6], [2, 5, 1], [3, 7, None], [4, 8, None]]
# Now replace the sentinel None in our sublists
for sublist in outputArray:
for i, element in enumerate(sublist):
if element is None:
sublist[i] = chooseRandom()
print(outputArray)
Not the most pythonic way, but you could try using this code snipped, read the comments in the code below:
import itertools, random
inputArray = [ [0, 1, 2, 3, 4], [2, 3, 5, 7, 8], [9, 6, 1] ]
outputArray = []
max_length = max(len(e) for e in inputArray) # maximum length of the sublists in <inputArray>
i = 0 # to keep the index of sublists of <outputArray>
for j in range(max_length):
outputArray.append([]) # add new sublist
for e in inputArray: # iterate through each element of <inputArray>
try:
outputArray[i].append(e[j]) # try to append the number, if an exception is raised
# then the code in the <except> clause will be executed
except IndexError as e:
outputArray[i].append(random.randint(0, 10)) # add the random number
i += 1 # increase the sublists index on each iteration
print outputArray
# [[0, 2, 9], [1, 3, 6], [2, 5, 1], [3, 7, 3], [4, 8, 7]]
Note:
You may want to change the part
random.randint(0, 10)
to get the "N neighbour".
Let me know whether you like this code:
import random
array = [[0, 1, 2, 3, 4], [2, 3, 5, 7, 8], [9, 6, 1]]
max_len = max([len(l) for l in array])
dictionary = {}
for l in array:
for i in range(0,len(l)):
if dictionary.has_key(i):
dictionary[i].append(l[i])
else:
dictionary[i] = [l[i]]
for i in range(len(l),max_len):
if dictionary.has_key(i):
dictionary[i].append(random.choice(l))
else:
dictionary[i] = [random.choice(l)]
print dictionary.values()

Categories

Resources