Is it possible to remove recursion from this function? - python

I have been playing with this a while, and just cannot see an obvious solution. I want to remove the recursion from the XinY_Go function.
def XinY_Go(x,y,index,slots):
if (y - index) == 1:
slots[index] = x
print slots
slots[index] = 0
return
for i in range(x+1):
slots[index] = x-i
XinY_Go(x-(x-i), y, index + 1, slots)
def XinY(x,y):
return XinY_Go(x,y,0,[0] * y)
The function is calculating the number of ways to put X marbles in Y slots. Here is some sample output:
>>> xy.XinY(1,2)
[1, 0]
[0, 1]
>>> xy.XinY(2,3)
[2, 0, 0]
[1, 1, 0]
[1, 0, 1]
[0, 2, 0]
[0, 1, 1]
[0, 0, 2]

Everything we think of as recursion can also be thought of as a stack-based problem, where the recursive function just uses the program's call stack rather than creating a separate stack. That means any recursive function can be re-written using a stack instead.
I don't know python well enough to give you an implementation, but that should point you in the right direction. But in a nutshell, push the initial arguments for the function onto the stack and add a loop that runs as long as the size of the stack is greater than zero. Pop once per loop iteration, push every time the function currently calls itself.

A naive implementation of #Joel Coehoorn's suggestion follows:
def XinY_Stack(x, y):
stack = [(x, 0, [0]*y)]
while stack:
x, index, slots = stack.pop()
if (y - index) == 1:
slots[index] = x
print slots
slots[index] = 0
else:
for i in range(x + 1):
slots[index] = x-i
stack.append((i, index + 1, slots[:]))
Example:
>>> XinY_Stack(2, 3)
[0, 0, 2]
[0, 1, 1]
[0, 2, 0]
[1, 0, 1]
[1, 1, 0]
[2, 0, 0]
Based on itertools.product
def XinY_Product(nmarbles, nslots):
return (slots
for slots in product(xrange(nmarbles + 1), repeat=nslots)
if sum(slots) == nmarbles)
Based on nested loops
def XinY_Iter(nmarbles, nslots):
assert 0 < nslots < 22 # 22 -> too many statically nested blocks
if nslots == 1: return iter([nmarbles])
# generate code for iter solution
TAB = " "
loopvars = []
stmt = ["def f(n):\n"]
for i in range(nslots - 1):
var = "m%d" % i
stmt += [TAB * (i + 1), "for %s in xrange(n - (%s)):\n"
% (var, '+'.join(loopvars) or 0)]
loopvars.append(var)
stmt += [TAB * (i + 2), "yield ", ','.join(loopvars),
', n - 1 - (', '+'.join(loopvars), ')\n']
print ''.join(stmt)
# exec the code within empty namespace
ns = {}
exec(''.join(stmt), ns, ns)
return ns['f'](nmarbles + 1)
Example:
>>> list(XinY_Product(2, 3))
[(0, 0, 2), (0, 1, 1), (0, 2, 0), (1, 0, 1), (1, 1, 0), (2, 0, 0)]
>>> list(XinY_Iter(2, 3))
def f(n):
for m0 in xrange(n - (0)):
for m1 in xrange(n - (m0)):
yield m0,m1, n - 1 - (m0+m1)
[(0, 0, 2), (0, 1, 1), (0, 2, 0), (1, 0, 1), (1, 1, 0), (2, 0, 0)]

Look at this code for creating all permutations, I guess I'd be relatively simple to implement something similar for your problem.
How to generate all permutations of a list in python?

Related

Simple method to find 0,0,1 in that order, within a given List

I am trying to write a simple function to find if 0,0,1 occurs in a list, in that order.
It should return True or False.
The list can contain any number of numbers.
For the function ZeroZeroOne examples would be as follows:
>> ZeroZeroOne( [0,0,1] )
>> True
>> ZeroZeroOne( [1,0,0] )
>> False
# there are 2s in between but the following does have 0,0,1 occurring and in correct order
>> ZeroZeroOne( [0,2,2,2,2,0,1] )
>> True
I have this function:
def ZeroZeroOne(nums):
FoundIt = False
#quick return if defo not possible
if (nums.count(0) < 2) and (nums.count(1) == 0):
return FoundIt
n = len(nums)
for x in range(n-2):
if nums[x] == 0:
for i,z in enumerate(nums[(x+1):]):
if z==0 and z!=1:
for j,q in enumerate(nums[(i+1):]):
if q==1 and q!=0:
FoundIt=True
return FoundIt
Why does the function return True for this list [0, 1, 0, 2, 1]?
Moreover....
This function seems overly-complex for a seemingly simple problem.
Is there a correct approach to this problem in Python - a canonical or Pythonic approach?
Or is ones approach simply opinion-based?
You can trivially modify the ordered subsequence test from this answer for an elegant solution:
def ZeroZeroOne(arr):
test = iter(a for a in arr if a in (0, 1))
return all(z in test for z in (0, 0, 1))
I realize now that you don't want to accept 0, 1 0, 1.
You can use itertools.tee to check for a match:
def ZeroZeroOne(arr):
e = itertools.tee((a for a in arr if a in (0, 1)), 3)
# move second iterator forward one
next(e[1])
# move third iterator forward two
next(e[2])
next(e[2])
return (0, 0, 1) in zip(*e)
The nice thing about using tee in this case is that it effectively maintains a rolling buffer of the last three elements for you. You don't need to make a new slice or loop over indices it anything like that.
Just for fun, here's a more general solution in pure python. It accepts any iterable for arr and template:
def contains_template(arr, template):
template = tuple(template)
unique = set(template)
filtered = (a for a in arr if a in unique)
e = itertools.tee(filtered, len(template))
for n, it in enumerate(e):
for _ in range(n):
next(it)
return template in zip(*e)
While itertools.tee is a nice way to maintain a rolling buffer, you can implement the same thing using a list (or more efficiently, collections.deque):
def contains_template(arr, template):
template = list(template)
unique = set(template)
filtered = (a for a in arr if a in unique)
buffer = [next(filtered) for _ in range(len(template) - 1)]
buffer.insert(0, None)
for e in filtered:
buffer.pop(0)
buffer.append(e)
if template == buffer:
return True
return False
Finally, here is the really simple solution, without a rolling buffer:
def contains_template(arr, template):
template = list(template)
n = len(template)
unique = set(template)
filtered = [a for a in arr if a in unique]
return any(filtered[i:i + n] == template for i in range(len(filtered) - n))
You can also do it with a recursive function :
def check(seq, liste, i=0, j=0):
if i >= len(seq):
return True
if j >= len(liste):
return False
if seq[i] == liste[j]:
return check(seq, liste, i + 1, j + 1)
elif liste[j] in seq:
# look for the last index you can restart from
for k in range(i - 1, -1, -1):
if seq[k] == liste[j]:
if seq[:k] == seq[i - k:i]:
ind = k
break
else:
ind = 0
return check(seq, liste, ind, j + (not i))
else:
return check(seq, liste, i, j + 1)
# seq = [0,0,1] for ZeroZeroOne
print(check([0, 0, 1], [0, 0, 0, 0, 1])) # True
print(check([0, 0, 1], [0, 200, 0, 0, 101, 1])) # True
print(check([0, 2, 2, 0, 1], [0, 2, 0, 4, 2, 5, 2, 0, 3, 1])) # True
print(check([0, 2, 2, 0, 1], [0, 2, 4, 2, 5, 2, 0, 3, 1])) # False
You can achieve this with a single loop - O(n) time complexity. Since it is for this specific case. Try the code below.
def ZeroZeroOne(nums):
found_pattern = []
for num in nums:
if num == 1:
found_pattern.append(1)
if len(found_pattern) == 3:
return True
else:
found_pattern = []
elif num == 0 and len(found_pattern) < 2:
found_pattern.append(0)
return False
print(ZeroZeroOne([0, 0, 1]))
print(ZeroZeroOne([0, 1, 0, 2, 1]))
print(ZeroZeroOne([0, 2, 0, 1]))
print(ZeroZeroOne([0, 0, 0, 1]))
print(ZeroZeroOne([0, 2, 2, 2, 2, 0, 1]))
But I think you can generalize this as well if required. Probably you need to look in to how grep works and modify it for your use case if you want a generic approach.
I think this does what you want :)
def ZeroZeroOne(arr):
dropped = [x for x in arr if x==0 or x==1]
slices = [dropped[i:i+3] for i in range(len(dropped)-2)]
if [0,0,1] in slices: return True
else: return False
def ZeroZeroOne(nums):
filtered_nums = [x for x in nums if x in [0,1]]
return '*'.join([str(x) for x in [0,0,1]) in '*'.join([str(x) for x in filtered_nums])

Python, permutation to permuation-index function

I have some permutations of a list:
>>> import itertools
>>> perms = list(itertools.permutations([0,1,2,3]))
>>> perms
[(0, 1, 2, 3), (0, 1, 3, 2), (0, 2, 1, 3), (0, 2, 3, 1), (0, 3, 1, 2), (0, 3, 2, 1), (1, 0, 2, 3), (1, 0, 3, 2), (1, 2, 0, 3), (1, 2, 3, 0), (1, 3, 0, 2), (1, 3, 2, 0), (2, 0, 1, 3), (2, 0, 3, 1), (2, 1, 0, 3), (2, 1, 3, 0), (2, 3, 0, 1), (2, 3, 1, 0), (3, 0, 1, 2), (3, 0, 2, 1), (3, 1, 0, 2), (3, 1, 2, 0), (3, 2, 0, 1), (3, 2, 1, 0)]
>>> len(perms)
24
What function can I use (without access to the list perm) to get the index of an arbitrary permutation, e.g. (0, 2, 3, 1) -> 3?
(You can assume that permuted elements are always an ascending list of integers, starting at zero.)
Hint: The factorial number system may be involved. https://en.wikipedia.org/wiki/Factorial_number_system
Off the top of my head I came up with the following, didn't test it thoroughly.
from math import factorial
elements = list(range(4))
permutation = (3, 2, 1, 0)
index = 0
nf = factorial(len(elements))
for n in permutation:
nf //= len(elements)
index += elements.index(n) * nf
elements.remove(n)
print(index)
EDIT: replaced nf /= len(elements) with nf //= len(elements)
I suppose this is a challenge, so here is my (recursive) answer:
import math
import itertools
def get_index(l):
# In a real function, there should be more tests to validate that the input is valid, e.g. len(l)>0
# Terminal case
if len(l)==1:
return 0
# Number of possible permutations starting with l[0]
span = math.factorial(len(l)-1)
# Slightly modifying l[1:] to use the function recursively
new_l = [ val if val < l[0] else val-1 for val in l[1:] ]
# Actual solution
return get_index(new_l) + span*l[0]
get_index((0,1,2,3))
# 0
get_index((0,2,3,1))
# 3
get_index((3,2,1,0))
# 23
get_index((4,2,0,1,5,3))
# 529
list(itertools.permutations((0,1,2,3,4,5))).index((4,2,0,1,5,3))
# 529
You need to write your own function. Something like this would work
import math
def perm_loc(P):
N = len(P)
assert set(P) == set(range(N))
def rec(perm):
nums = set(perm)
if not perm:
return 0
else:
sub_res = rec(perm[1:]) # Result for tail of permutation
sub_size = math.factorial(len(nums) - 1) # How many tail permutations exist
sub_index = sorted(nums).index(perm[0]) # Location of first element in permutaiotn
# in the sorted list of number
return sub_index * sub_size + sub_res
return rec(P)
The function that does all the work is rec, with perm_loc just serving as a wrapper around it. Note that this algorithm is based on the nature of the permutation algorithm that itertools.permutation happens to use.
The following code tests the above function. First on your sample, and then on all permutations of range(7):
print perm_loc([0,2,3,1]) # Print the result from the example
import itertools
def test(N):
correct = 0
perms = list(itertools.permutations(range(N)))
for (i, p) in enumerate(perms):
pl = perm_loc(p)
if i == pl:
correct += 1
else:
print ":: Incorrect", p, perms.index(p), perm_loc(N, p)
print ":: Found %d correct results" % correct
test(7) # Test on all permutations of range(7)
from math import factorial
def perm_to_permidx(perm):
# Extract info
n = len(perm)
elements = range(n)
# "Gone"s will be the elements of the given perm
gones = []
# According to each number in perm, we add the repsective offsets
offset = 0
for i, num in enumerate(perm[:-1], start=1):
idx = num - sum(num > gone for gone in gones)
offset += idx * factorial(n - i)
gones.append(num)
return offset
the_perm = (0, 2, 3, 1)
print(perm_to_permidx(the_perm))
# 3
Explanation: All permutations of a given range can be considered as a groups of permutations. So, for example, for the permutations of 0, 1, 2, 3 we first "fix" 0 and permute rest, then fix 1 and permute rest, and so on. Once we fix a number, the rest is again permutations; so we again fix a number at a time from the remaining numbers and permute the rest. This goes on till we are left with one number only. Every level of fixing has a corresponding (n-i)! permutations.
So this code finds the "offsets" for each level of permutation. The offset corresonds to where the given permutation starts when we fix numbers of perm in order. For the given example of (0, 2, 3, 1), we first look at the first number in the given perm which is 0, and figure the offset as 0. Then this goes to gones list (we will see its usage). Then, at the next level of permutation we see 2 as the fixing number. To calculate the offset for this, we need the "order" of this 2 among the remaining three numbers. This is where gones come into play; if an already-fixed and considered number (in this case 0) is less than the current fixer, we subtract 1 to find the new order. Then offset is calculated and accumulated. For the next number 3, the new order is 3 - (1 + 1) = 1 because both previous fixers 0 and 2 are at the "left" of 3.
This goes on till the last number of the given perm since there is no need to look at it; it will have been determined anyway.

harvard cs109 homework0 switch_guesses

def switch_guess(guesses, goatdoors):
result = np.zeros(guesses.size)
switch = {(0, 1): 2, (0, 2): 1, (1, 0): 2, (1, 2): 1, (2, 0): 1, (2, 1): 0}
for i in [0, 1, 2]:
for j in [0, 1, 2]:
mask = (guesses == i) & (goatdoors == j)
if not mask.any():
continue
result = np.where(mask, np.ones_like(result) * switch[(i, j)], result)
return result
don't quite understand how this works, can anybody helps explain it?
thanks!
hints:
Returns The new door after switching. Should be different from both
guesses and goatdoors
Examples
>>> print switch_guess(np.array([0, 1, 2]), np.array([1, 2, 1]))
>>> array([2, 0, 0])
"""
You might have some trouble understanding this example due to a mistake in the 4th entry of the switch dictionary. It should be as follows:
switch = {(0, 1): 2, (0, 2): 1, (1, 0): 2, (1, 2): 0, (2, 0): 1, (2, 1): 0}
I've added some comments throughout the source, which explain my understanding of the code. It does look a lot like the Monty Hall experiment, so I'm guessing the idea is that the function basically says:
Given that you guessed guesses[i] and there is a goat behind goatdoors[i], you should switch and pick door results[i].
It also seems a slightly convoluted way to go about this problem, if I understood it correctly.
def switch_guess(guesses, goatdoors):
# Initialise result to an array of zeros, the same size as the guesses array
result = np.zeros(guesses.size)
# Create a dictionary to be used later in determining the values of result.
switch = {(0, 1): 2, (0, 2): 1, (1, 0): 2, (1, 2): 1, (2, 0): 1, (2, 1): 0}
for i in [0, 1, 2]:
for j in [0, 1, 2]:
# Create a mask, which is True when the corresponding elements of
# guesses and goatdoors are both equal to i and j, respectively.
mask = (guesses == i) & (goatdoors == j)
# If no elements of mask are true, go to the next loop iteration (of
# the j loop).
if not mask.any():
continue
# For each element of result, if the corresponding element of mask
# is True, set the element of result to 1 * switch[(i, j)], otherwise
# leave it unchanged.
result = np.where(mask, np.ones_like(result) * switch[(i, j)], result)
return result

fill in list in multiple steps

Lets assume you have a list with y poisitions (0 for sake of this question). If y = 10:
[0,0,0,0,0,0,0,0,0,0]
You want to fill adjacent positions up to a given value x and append it to an empty list. If x = 4:
[[1,1,1,1,0,0,0,0,0,0], [0,1,1,1,1,0,0,0,0,0], [0,0,1,1,1,1,0,0,0,0], ... , [0,0,0,0,0,0,1,1,1,1]]
I made that occur through this function:
def permutations(number=4, limit=10):
perms = []
if type(number) == int:
a = -1
b = a + number
while b < limit:
a+=1
b = a + number
start = [0 for x in range(limit)]
for i in range(a, b):
start[i] = 1
perms.append(start)
This is fine, but if I want to do the same thing, but pass a tuple instead of an integer I'd like the output to be:
if number = (4,3):
[[1,1,1,1,0,1,1,1,0,0], [1,1,1,1,0,0,1,1,1,0], [1,1,1,1,0,0,0,1,1,1],
[0,1,1,1,1,0,1,1,1,0], [0,1,1,1,1,0,0,1,1,1],
[0,0,1,1,1,1,0,1,1,1]]
The 0 between the two groupings of 1's is necessary the first value of the tuple corresponds to the number of 1's in the first grouping, and the second value of the tuple corresponds to the number of 1's in the second grouping. Ideally this function would work with tuples that have more than 2 values.
This idea is a little challenging to get across so please let me know if you need any clarification.
Thank you for your help!
The simplest approach I can think of is to generate all possible combinations of 1 and 0, and filter out all of the ones that don't have the right grouping lengths.
import itertools
def permutations(tup, limit=10):
for candidate in itertools.product([0,1], repeat=limit):
segment_lengths = [len(list(b)) for a,b in itertools.groupby(candidate) if a == 1]
if tup == tuple(segment_lengths):
yield candidate
for seq in permutations((4, 3), 10):
print seq
Result:
(0, 0, 1, 1, 1, 1, 0, 1, 1, 1)
(0, 1, 1, 1, 1, 0, 0, 1, 1, 1)
(0, 1, 1, 1, 1, 0, 1, 1, 1, 0)
(1, 1, 1, 1, 0, 0, 0, 1, 1, 1)
(1, 1, 1, 1, 0, 0, 1, 1, 1, 0)
(1, 1, 1, 1, 0, 1, 1, 1, 0, 0)
Note that this is very slow for large values of limit - it has to evaluate 2^limit candidate sequences. Not bad for limit = 10; only 1024 candidates need to be evaluated. But it quickly grows into the millions and beyond for larger limits.
Edit: Inspired by user2097159's excellent comment, here's an approach with better run time.
import itertools
"""Finds all non-negative integer sequences whose sum equals `total`, and who have `size` elements."""
def possible_sums(total, size):
if total == 0:
yield [0]*size
return
if size == 1:
yield [total]
return
for i in range(total+1):
left = [i]
for right in possible_sums(total-i, size-1):
yield left + right
"""
combines two lists a and b in order like:
[a[0], b[0], a[1], b[1]...]
"""
def interleave(a,b):
result = []
for pair in itertools.izip_longest(a,b):
for item in pair:
if item is not None:
result.append(item)
return result
"""flattens a list of lists into a one dimensional list"""
def flatten(seq):
return [x for item in seq for x in item]
def permutations(tup, limit):
one_segments = [[1]*size for size in tup]
for i in range(len(tup)-1):
one_segments[i].append(0)
remaining_zeroes = limit - sum(tup) - len(tup) + 1
assert remaining_zeroes >= 0, "not enough room to separate ranges!"
for gap_sizes in possible_sums(remaining_zeroes, len(tup)+1):
zero_segments = [[0]*size for size in gap_sizes]
yield flatten(interleave(zero_segments, one_segments))
for seq in permutations((4, 3), 10):
print seq
You can generate all list recursively.
F(tup, limit) =
[1, 1, ...1, 0] combine with all solutions of F(tup[1:], limit - len(tup[1]) - 1)
[0, 1 ,1 , ... 1, 0] combine with all solutions of F(tup[1:], limit - len(tup[1]) - 2)
.
.
.
if tup is empty return a list of zero
if sum(tup) + len(tup) - 1 > limit, return an empty list since there is no solution.
e.g. permutations((4,3,2), 10) shall return []
Otherwise, enumerating how many prefix zero there will be:
Generate prefix list which is [0, 0, 0 .. 0, 1, 1, ... 1, 0] The number of 1s is the value of first item in the tuple. Append additional 0 if it's not the last item of the tuple.
Call the function recursively for the rest element in the tuple to solve the similar sub-problem
Combine the prefix list with each solution of the sub-problem
Here is the code:
def permutations(tup, limit=100):
if len(tup) <= 0:
return [[0] * limit]
minimum_len = sum(tup) + len(tup) - 1
if minimum_len > limit:
return []
perms = []
for prefix_zero in range(0, limit - minimum_len + 1):
prefix = [0] * prefix_zero + [1] * tup[0]
if len(tup) > 1:
prefix += [0]
suffix_list = permutations(tup[1:], limit - len(prefix))
perms += [prefix + suffix for suffix in suffix_list] #combine the solutions
return perms
This solution creates all permutations of blocks of ones (a list defined by each entry in the tuple) with blocks of zeros (lists of length one) for the extra padding.
import itertools as it
spec = (1,2,3)
nBlocks = len(spec)
nZeros = 5
totalSize = sum(spec) + nZeros+1-nBlocks
blocks = [[1,]*s + [0,] for s in spec]
zeros = [[0,],]*(nZeros+1-nBlocks)
a = list(it.permutations(blocks + zeros, nZeros+1))
b = [list(it.chain.from_iterable(l))[:-1] for l in a]
for l in b:
print l
Without using itertools.
My shot at this, should be fairly quick, but uses a recursive generator (python recursion depth limit, here I come...).
# simple test case
seqs = (1, 2, 3)
length = 10
# '0' spots count
zeros = length - (sum(seqs))
# partitions count
partitions = len(seqs) + 1
# first and last can partitions have 0 zeros
# so use a flag when we call the function or check if it's the last partition
def generate_gaps(zeros_left, partition, first=False):
"""
:param zeros_left: how many zeros we can still use
:param partition: what partition is this
:param first: is this the first gap
:return: all possible gaps
"""
for gap in range((0 if first or partition == 0 else 1), zeros_left + 1):
if partition == 0:
if (zeros_left - gap) == 0:
yield [gap]
else:
for rest in generate_gaps(zeros_left - gap, partition - 1):
yield [gap] + rest
for gaps in generate_gaps(zeros, partitions - 1, True):
print "using gaps: " + str(gaps)
# merge lists
# zip gaps (0's) and sequences (1's) - all but last gap (added to result)
gaps_seqs = zip(gaps, seqs)
# expand everything... magic (could be done explicitly trivially).
result = sum(map(lambda x: [0] * x[0] + [1] * x[1], gaps_seqs)
# last gap (truncated from zip)
result = result + [[0] * gaps[-1]], [])
A simple non-recursive generator solution without itertools:
def fill_sequence(sequence, size):
n_slots = size - len(sequence)
for start in xrange(n_slots + 1):
yield [0]*start + sequence + [0]*(n_slots - start)
def all_placements(inner_sizes, outer_size):
x, y = inner_sizes
for margin in xrange(1, outer_size - sum(block_sizes) + 1):
sequence = [1]*x + [0]*margin + [1]*y
for p in fill_sequence(sequence, outer_size):
yield p
So that:
>>> list(all_placements((4,3), 10))
[[1, 1, 1, 1, 0, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 0, 1, 1, 1, 0],
[0, 0, 1, 1, 1, 1, 0, 1, 1, 1],
[1, 1, 1, 1, 0, 0, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 0, 0, 1, 1, 1],
[1, 1, 1, 1, 0, 0, 0, 1, 1, 1]]
The idea is quite simple. Suppose you fix the number of zeros between your two blocks of ones, call it the margin. This gives you a 4 + margin + 3 sequence. You can easily place this sequence in the larger list of zeros using the approach you took in your post. Then simply iteratively increase the margin, yielding all possible placements.

Detect whether sequence is a multiple of a subsequence in Python

I have a tuple of zeros and ones, for instance:
(1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1)
It turns out:
(1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1) == (1, 0, 1, 1) * 3
I want a function f such that if s is a non-empty tuple of zeros and ones, f(s) is the shortest subtuple r such that s == r * n for some positive integer n.
So for instance,
f( (1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1) ) == (1, 0, 1, 1)
What is a slick way to write the function f in Python?
Edit:
The naive method I am currently using
def f(s):
for i in range(1,len(s)):
if len(s)%i == 0 and s == s[:i] * (len(s)/i):
return s[:i]
I believe I have an O(n) time solution (actually 2n+r, n is length of tuple, r is sub tuplle) which does not use suffix trees, but uses a string matching algorithm (like KMP, which you should find off-the shelf).
We use the following little known theorem:
If x,y are strings over some alphabet,
then xy = yx if and only if x = z^k and y = z^l for some string z and integers k,l.
I now claim that, for the purposes of our problem, this means that all we need to do is determine if the given tuple/list (or string) is a cyclic shift of itself!
To determine if a string is a cyclic shift of itself, we concatenate it with itself (it does not even have to be a real concat, just a virtual one will do) and check for a substring match (with itself).
For a proof of that, suppose the string is a cyclic shift of itself.
The we have that the given string y = uv = vu.
Since uv = vu, we must have that u = z^k and v= z^l and hence y = z^{k+l} from the above theorem. The other direction is easy to prove.
Here is the python code. The method is called powercheck.
def powercheck(lst):
count = 0
position = 0
for pos in KnuthMorrisPratt(double(lst), lst):
count += 1
position = pos
if count == 2:
break
return lst[:position]
def double(lst):
for i in range(1,3):
for elem in lst:
yield elem
def main():
print powercheck([1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1])
if __name__ == "__main__":
main()
And here is the KMP code which I used (due to David Eppstein).
# Knuth-Morris-Pratt string matching
# David Eppstein, UC Irvine, 1 Mar 2002
def KnuthMorrisPratt(text, pattern):
'''Yields all starting positions of copies of the pattern in the text.
Calling conventions are similar to string.find, but its arguments can be
lists or iterators, not just strings, it returns all matches, not just
the first one, and it does not need the whole text in memory at once.
Whenever it yields, it will have read the text exactly up to and including
the match that caused the yield.'''
# allow indexing into pattern and protect against change during yield
pattern = list(pattern)
# build table of shift amounts
shifts = [1] * (len(pattern) + 1)
shift = 1
for pos in range(len(pattern)):
while shift <= pos and pattern[pos] != pattern[pos-shift]:
shift += shifts[pos-shift]
shifts[pos+1] = shift
# do the actual search
startPos = 0
matchLen = 0
for c in text:
while matchLen == len(pattern) or \
matchLen >= 0 and pattern[matchLen] != c:
startPos += shifts[matchLen]
matchLen -= shifts[matchLen]
matchLen += 1
if matchLen == len(pattern):
yield startPos
For your sample this outputs
[1,0,1,1]
as expected.
I compared this against shx2's code(not the numpy one), by generating a random 50 bit string, then replication to make the total length as 1 million. This was the output (the decimal number is the output of time.time())
1362988461.75
(50, [1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1])
1362988465.96
50 [1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1]
1362988487.14
The above method took ~4 seconds, while shx2's method took ~21 seconds!
Here was the timing code. (shx2's method was called powercheck2).
def rand_bitstring(n):
rand = random.SystemRandom()
lst = []
for j in range(1, n+1):
r = rand.randint(1,2)
if r == 2:
lst.append(0)
else:
lst.append(1)
return lst
def main():
lst = rand_bitstring(50)*200000
print time.time()
print powercheck(lst)
print time.time()
powercheck2(lst)
print time.time()
The following solution is O(N^2), but has the advantage of not creating any copies (or slices) of your data, as it is based on iterators.
Depending on the size of your input, the fact you avoid making copies of the data can result in a significant speed-up, but of course, it would not scale as well for huge inputs as algorithms with lower complexity (e.g. O(N*logN)).
[This is the second revision of my solution, the first one is given below. This one is simpler to understand, and is more along the lines of OP's tuple-multiplication, only using iterators.]
from itertools import izip, chain, tee
def iter_eq(seq1, seq2):
""" assumes the sequences have the same len """
return all( v1 == v2 for v1, v2 in izip(seq1, seq2) )
def dup_seq(seq, n):
""" returns an iterator which is seq chained to itself n times """
return chain(*tee(seq, n))
def is_reps(arr, slice_size):
if len(arr) % slice_size != 0:
return False
num_slices = len(arr) / slice_size
return iter_eq(arr, dup_seq(arr[:slice_size], num_slices))
s = (1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1)
for i in range(1,len(s)):
if is_reps(s, i):
print i, s[:i]
break
[My original solution]
from itertools import islice
def is_reps(arr, num_slices):
if len(arr) % num_slices != 0:
return False
slice_size = len(arr) / num_slices
for i in xrange(slice_size):
if len(set( islice(arr, i, None, num_slices) )) > 1:
return False
return True
s = (1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1)
for i in range(1,len(s)):
if is_reps(s, i):
print i, s[:i]
break
You can avoid the call to set() by using something like:
def is_iter_unique(seq):
""" a faster version of testing len(set(seq)) <= 1 """
seen = set()
for x in seq:
seen.add(x)
if len(seen) > 1:
return False
return True
and replacing this line:
if len(set( islice(arr, i, None, num_slices) )) > 1:
with:
if not is_iter_unique(islice(arr, i, None, num_slices)):
Simplifying Knoothe's solution. His algorithm is right, but his implementation is too complex. This implementation is also O(n).
Since your array is only composed of ones and zeros, what I do is use existing str.find implementation (Bayer Moore) to implement Knoothe's idea. It's suprisingly simpler and amazingly faster at runtime.
def f(s):
s2 = ''.join(map(str, s))
return s[:(s2+s2).index(s2, 1)]
Here's another solution (competing with my earlier iterators-based solution), leveraging numpy.
It does make a (single) copy of your data, but taking advantage of the fact your values are 0s and 1s, it is super-fast, thanks to numpy's magics.
import numpy as np
def is_reps(arr, slice_size):
if len(arr) % slice_size != 0:
return False
arr = arr.reshape((-1, slice_size))
return (arr.all(axis=0) | (~arr).all(axis=0)).all()
s = (1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1) * 1000
a = np.array(s, dtype=bool)
for i in range(1,len(s)):
if is_reps(a, i):
print i, s[:i]
break
Just a different approach to the problem
I first determine all the factors of the length and then split the list and check if all the parts are same
>>> def f(s):
def factors(n):
#http://stackoverflow.com/a/6800214/977038
return set(reduce(list.__add__,
([i, n//i] for i in range(2, int(n**0.5) + 1) if n % i == 0)))
_len = len(s)
for fact in reversed(list(factors(_len))):
compare_set = set(izip(*[iter(s)]*fact))
if len(compare_set) == 1:
return compare_set
>>> f(t)
set([(1, 0, 1, 1)])
You can archive it in sublinear time by XOR'ing the rotated binary form for the input array:
get the binary representation of the array, input_binary
loop from i = 1 to len(input_array)/2, and for each loop, rotate the input_binary to the right by i bits, save it as rotated_bin, then compare the XOR of rotated_bin and input_binary.
The first i that yields 0, is the index to which is the desired substring.
Complete code:
def get_substring(arr):
binary = ''.join(map(str, arr)) # join the elements to get the binary form
for i in xrange(1, len(arr) / 2):
# do a i bit rotation shift, get bit string sub_bin
rotated_bin = binary[-i:] + binary[:-i]
if int(rotated_bin) ^ int(binary) == 0:
return arr[0:i]
return None
if __name__ == "__main__":
test = [1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
print get_substring(test) # [1,0,1,1]
This one is just a dumb recursive comparison in Haskell. It takes about one second for Knoothe's million long string (f a). Cool problem! I'll think about it some more.
a = concat $ replicate 20000
[1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,
0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,
1,1,1,0,0,1,1,1,0,0,0,0,0,1]
f s =
f' s [] where
f' [] result = []
f' (x:xs) result =
let y = result ++ [x]
in if concat (replicate (div (length s) (length y)) y) == s
then y
else f' xs y

Categories

Resources