Largest subarray with sum equal to 0 - python

This is a typical interview question. Given an array that contains both positive and negative elements without 0, find the largest subarray whose sum equals 0. I tried to solve this. This is what I came up with.
def sub_array_sum(array,k=0):
start_index = -1
hash_sum = {}
current_sum = 0
keys = set()
best_index_hash = {}
for i in array:
start_index += 1
current_sum += i
if current_sum in hash_sum:
hash_sum[current_sum].append(start_index)
keys.add(current_sum)
else:
if current_sum == 0:
best_index_hash[start_index] = [(0,start_index)]
else:
hash_sum[current_sum] = [start_index]
if keys:
for k_1 in keys:
best_start = hash_sum.get(k_1)[0]
best_end_list = hash_sum.get(k_1)[1:]
for best_end in best_end_list:
if abs(best_start-best_end) in best_index_hash:
best_index_hash[abs(best_start-best_end)].append((best_start+1,best_end))
else:
best_index_hash[abs(best_start-best_end)] = [(best_start+1,best_end)]
if best_index_hash:
(bs,be) = best_index_hash[max(best_index_hash.keys(),key=int)].pop()
return array[bs:be+1]
else:
print "No sub array with sum equal to 0"
def Main():
a = [6,-2,8,5,4,-9,8,-2,1,2]
b = [-8,8]
c = [-7,8,-1]
d = [2200,300,-6,6,5,-9]
e = [-9,9,-6,-3]
print sub_array_sum(a)
print sub_array_sum(b)
print sub_array_sum(c)
print sub_array_sum(d)
print sub_array_sum(e)
if __name__ == '__main__':
Main()
I am not sure if this will satisfy all the edge case. if someone can comment on that, it would be excellent Also i want to extend this to sum equalling to any K not just 0. How should i go about it. And any pointers to optimize this further is also helpful.

You have given a nice, linear-time solution (better than the two other answers at this time, which are quadratic-time), based on the idea that whenever sum(i .. j) = 0, it must be that sum(0 .. i-1) = sum(0 .. j) and vice versa. Essentially you compute the prefix sums sum(0 .. i) for all i, building up a hashtable hash_sum in which hash_sum[x] is a list of all positions i having sum(0 .. i) = x. Then you go through this hashtable, one sum at a time, looking for any sum that was made by more than one prefix. Among all such made-more-than-once sums, you choose the one that was made by a pair of prefixes that are furthest apart -- this is the longest.
Since you already noticed the key insight needed to make this algorithm linear-time, I'm a bit puzzled as to why you build up so much unnecessary stuff in best_index_hash in your second loop. For a given sum x, the furthest-apart pair of prefixes that make that sum will always be the smallest and largest entries in hash_sum[x], which will necessarily be the first and last entries (because that's the order they were appended), so there's no need to loop over the elements in between. In fact you don't even need a second loop at all: you can keep a running maximum during your first loop, by treating start_index as the rightmost endpoint.
To handle an arbitrary difference k: Instead of finding the leftmost occurrence of a prefix summing to current_sum, we need to find the leftmost occurrence of a prefix summing to current_sum - k. But that's just first_with_sum{current_sum - k}.
The following code isn't tested, but should work:
def sub_array_sum(array,k=0):
start_index = -1
first_with_sum = {}
first_with_sum{0} = -1
best_start = -1
best_len = 0
current_sum = 0
for i in array:
start_index += 1
current_sum += i
if current_sum - k in first_with_sum:
if start_index - first_with_sum{current_sum - k} > best_len:
best_start = first_with_sum{current_sum - k} + 1
best_len = start_index - first_with_sum{current_sum - k}
else:
first_with_sum{current_sum} = start_index
if best_len > 0:
return array[best_start:best_start+best_len-1]
else:
print "No subarray found"
Setting first_with_sum{0} = -1 at the start means that we don't have to treat a range beginning at index 0 as a special case. Note that this algorithm doesn't improve on the asymptotic time or space complexity of your original one, but it's simpler to implement and will use a small amount less space on any input that contains a zero-sum subarray.

Here's my own answer, just for fun.
The number of subsequences is quadratic, and the time to sum a subsequence is linear, so the most naive solution would be cubic.
This approach is just an exhaustive search over the subsequences, but a little trickery avoids the linear summing factor, so it's only quadratic.
from collections import namedtuple
from itertools import chain
class Element(namedtuple('Element', ('index', 'value'))):
"""
An element in the input sequence. ``index`` is the position
of the element, and ``value`` is the element itself.
"""
pass
class Node(namedtuple('Node', ('a', 'b', 'sum'))):
"""
A node in the search graph, which looks like this:
0 1 2 3
\ / \ / \ /
0-1 1-2 2-3
\ / \ /
0-2 1-3
\ /
0-3
``a`` is the start Element, ``b`` is the end Element, and
``sum`` is the sum of elements ``a`` through ``b``.
"""
#classmethod
def from_element(cls, e):
"""Construct a Node from a single Element."""
return Node(a=e, b=e, sum=e.value)
def __add__(self, other):
"""The combining operation depicted by the graph above."""
assert self.a.index == other.a.index - 1
assert self.b.index == other.b.index - 1
return Node(a=self.a, b=other.b, sum=(self.sum + other.b.value))
def __len__(self):
"""The number of elements represented by this node."""
return self.b.index - self.a.index + 1
def get_longest_k_sum_subsequence(ints, k):
"""The longest subsequence of ``ints`` that sums to ``k``."""
n = get_longest_node(n for n in generate_nodes(ints) if n.sum == k)
if n:
return ints[n.a.index:(n.b.index + 1)]
if k == 0:
return []
def get_longest_zero_sum_subsequence(ints):
"""The longest subsequence of ``ints`` that sums to zero."""
return get_longest_k_sum_subsequence(ints, k=0)
def generate_nodes(ints):
"""Generates all Nodes in the graph."""
nodes = [Node.from_element(Element(i, v)) for i, v in enumerate(ints)]
while len(nodes) > 0:
for n in nodes:
yield n
nodes = [x + y for x, y in zip(nodes, nodes[1:])]
def get_longest_node(nodes):
"""The longest Node in ``nodes``, or None if there are no Nodes."""
return max(chain([()], nodes), key=len) or None
if __name__ == '__main__':
def f(*ints):
return get_longest_zero_sum_subsequence(list(ints))
assert f() == []
assert f(1) == []
assert f(0) == [0]
assert f(0, 0) == [0, 0]
assert f(-1, 1) == [-1, 1]
assert f(-1, 2, 1) == []
assert f(1, -1, 1, -1) == [1, -1, 1, -1]
assert f(1, -1, 8) == [1, -1]
assert f(0, 1, -1, 8) == [0, 1, -1]
assert f(5, 6, -2, 1, 1, 7, -2, 2, 8) == [-2, 1, 1]
assert f(5, 6, -2, 2, 7, -2, 1, 1, 8) == [-2, 1, 1]

I agree with sundar nataraj when he says that this must be posted to the code review forum.
For fun though I looked at your code. Though I am able to understand your approach, I fail to understand the need to use Counter.
best_index_hash[start_index] = [(0,start_index)] - Here best_index_hash is of the type Counter. Why are you assigning a list to it?
for key_1, value_1 in best_index_hash.most_common(1) - You trying to get largest subsequence and for that you are using most_common as the answer. This is not intuitive semantically.
I am tempted to post a solution but I will wait for you to edit the code snippet and improve it.
Addendum
For fun, I had a go at this puzzle and I present my effort below. I make no guarantees of correctness/completeness.
from collections import defaultdict
def max_sub_array_sum(a, s):
if a:
span = defaultdict(lambda : (0,0))
current_total = 0
for i in xrange(len(a)):
current_total = a[i]
for j in xrange (i + 1, len(a)):
current_total += a[j]
x,y = span[current_total]
if j - i > y - x:
span[current_total] = i,j
if s in span:
i, j = span[s]
print "sum=%d,span_length=%d,indices=(%d,%d),sequence=%s" %\
(s, j-i + 1, i, j, str(a[i:j + 1]))
return
print "Could not find a subsequence of sum %d in sequence %s" % \
(s, str(a))
max_sub_array_sum(range(-6, -1), 0)
max_sub_array_sum(None, 0)
max_sub_array_sum([], 0)
max_sub_array_sum(range(6), 15)
max_sub_array_sum(range(6), 14)
max_sub_array_sum(range(6), 13)
max_sub_array_sum(range(6), 0)

Here's the solution taken from LeetCode :
def sub_array_sum(nums, k=0):
count, sum = 0, 0
map = dict()
map[0] = 1
for i in range(len(nums)):
sum += nums[i]
if map.__contains__(sum - k):
count += map[sum - k]
map[sum] = map.get(sum, 0) + 1
return count

Related

FibFrog Codility Problem - Optimising for Performance

I'm trying to solve the FibFrog Codility problem and I came up with the following approach:
If len(A) is 0 we know we can reach the other side in one jump.
If len(A) + 1 is a fibonacci number, we can also reach it in one jump.
Else, we loop through A, and for the positions we can reach, we check if we can either reach them directly from -1 using a fibonacci number (idx + 1 in fibonaccis) or if we can reach them by first jumping to another position (reachables) and then jumping to the current position. In either case, we also check if we can go from the current position to the end of the river - if we can, then we can return because we found the minimum number of steps required.
Finally, if unreachable is True once this loop completes, this means we can't reach any position using a Fibonacci number, so we return -1.
I'm getting 83% correctness and 0% performance with this approach.
I understand the solution is O(n^2), assuming the array consists of only 1, the nested loop for v in reachables: would run n times - however I'm not sure how else I can compute this, since for each of the positions I need to check whether we can reach it from the start of the array, or from any previous positions using a fibonacci number.
def solution(A):
if len(A) == 0: return 1
fibonaccis = fibonacci(len(A) + 3)
if len(A) + 1 in fibonaccis: return 1
leaves = [0] * len(A)
unreachable = True
reachables = []
for idx, val in enumerate(A):
if val == 1:
if idx + 1 in fibonaccis:
unreachable = False
leaves[idx] = 1
if len(A) - idx in fibonaccis:
return 2
reachables.append(idx)
elif len(reachables) > 0:
for v in reachables:
if idx - v in fibonaccis:
leaves[idx] = leaves[v] + 1
if len(A) - idx in fibonaccis:
return leaves[v] + 2
reachables.append(idx)
break
if unreachable: return -1
if len(A) - reachables[-1] in fibonaccis:
return leaves[reachables[-1]] + 1
def fibonacci(N):
arr = [0] * N
arr[1] = 1
for i in range(2, N):
arr[i] = arr[i-1] + arr[i-2]
return arr
Some suggestions for improving performance of your algorithm -
If len(A) = 100000, you are calculating 100003 fibonacci numbers, while we only need fibonacci numbers which are less than 100k, which would be <30 of them.
Your solution is O(n^4), since each X in reachables or Y in fibonaccis operation is O(N) where N is len(A). (and length of fibonaccis being N because of above issue)
Since you are doing a lot of item in list operations on fibonaccis and reachables, consider making it a set or a dictionary for faster(O(1) instead of O(n)) lookup.
Even with the above changes, the algorithm would be O(N^2) because of nested looping across A and reachables, so you need to come up with a better approach.
With your existing implementation, you need to traverse through all the paths and then in the end you will get the smallest number of jumps.
Instead of this approach, if you start at 0, and then keep a count of the number of jumps you have taken so far, and maintain how far(and to which numbers) you can reach after each jump then you can easily find the minimum jumps required to reach the end. (this will also save on redundant work you would have to do in case you have all 1s in A.
e.g. for
A = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
fibonacci = set(1, 2, 3, 5)
At first jump, we can reach following 1-based indexes -
reachable = [1, 2, 3, 5]
jumps = 1
After second jump
reachables = [2, 3, 4, 5, 6, 7, 8]
jumps = 2
After third jump
reachables = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
jumps = 3
so you have reached the end(10) after 3 jumps.
Please check out #nialloc's answer here - https://stackoverflow.com/a/64558623/8677071 which seems to be doing something similar.
Check out also my solution, which scores 100% on Codility tests and is easy to comprehend.
The idea is to track all possible positions of the frog after k jumps. If possible position == n, return k.
def fib_up_to(n):
numbers = [1]
i = 1
while True:
new_num = (numbers[-2] + numbers[-1]) if i > 1 else 2
if new_num > n:
break
numbers.append(new_num)
i += 1
return numbers
def solution(A):
n = len(A)
if n == 0:
return 1
numbers = fib_up_to(n+1)
possible_positions = set([-1])
for k in range(1, n+1):
positions_after_k = set()
for pos in possible_positions:
for jump in numbers:
if pos + jump == n:
return k
if pos + jump < n and A[pos + jump]:
positions_after_k.add(pos + jump)
possible_positions = positions_after_k
return -1

Python query in list without for loop

I want to find a sum with pair of numbers in python list.
List is sorted
Need to check consecutive combinations
Avoid using for loop
I used a for loop to get the job done and its working fine. I want to learn other optimized way to get the same result.
Can I get the same result with other ways without using a for loop?
How could I use binary search in this situation?
This is my code:
def query_sum(list, find_sum):
"""
This function will find sum of two pairs in list
and return True if sum exist in list
:param list:
:param find_sum:
:return:
"""
previous = 0
for number in list:
sum_value = previous + number
if sum_value == find_sum:
print("Yes sum exist with pair {} {}".format(previous, number))
return True
previous = number
x = [1, 2, 3, 4, 5]
y = [1, 2, 4, 8, 16]
query_sum(x, 7)
query_sum(y, 3)
this is the result.
Yes sum exist with pair 3 4
Yes sum exist with pair 1 2
You can indeed use binary search if your list is sorted (and you are only looking at sums of successive elements), since the sums will be monotonically increasing as well. In a list of N elements, there are N-1 successive pairs. You can copy and paste any properly implemented binary search algorithm you find online and replace the criteria with the sum of successive elements. For example:
def query_sum(seq, target):
def bsearch(l, r):
if r >= l:
mid = l + (r - l) // 2
s = sum(seq[mid:mid + 2])
if s == target:
return mid
elif s > target:
return bsearch(l, mid - 1)
else:
return bsearch(mid + 1, r)
else:
return -1
i = bsearch(0, len(seq) - 1)
if i < 0:
return False
print("Sum {} exists with pair {} {}".format(target, *seq[i:i + 2]))
return True
IDEOne Link
You could use the built-in bisect module, but then you would have to pre-compute the sums. This is a much cheaper method since you only have to compute log2(N) sums.
Also, this solution avoids looping using recursion, but you might be better off writing a loop like while r >= l: around the logic instead of using recursion:
def query_sum(seq, target):
def bsearch(l, r):
while r >= l:
mid = l + (r - l) // 2
s = sum(seq[mid:mid + 2])
if s == target:
return mid
elif s > target:
r = mid - 1
else:
l = mid + 1
return -1
i = bsearch(0, len(seq) - 1)
if i < 0:
return False
print("Yes sum exist with pair {} {}".format(*seq[i:i + 2]))
return True
IDEOne Link
# simpler one:
def query_sum(seq, target):
def search(seq, index, target):
if index < len(seq):
if sum(seq[index:index+2]) == target:
return index
else:
return search(seq, index+1, target)
else:
return -1
return search(seq, 0, target)

Faster Python technique to count triples from a list of numbers that are multiples of each other

Suppose we have a list of numbers, l. I need to COUNT all tuples of length 3 from l, (l_i,l_j,l_k) such that l_i evenly divides l_j, and l_j evenly divides l_k. With the stipulation that the indices i,j,k have the relationship i<j<k
I.e.;
If l=[1,2,3,4,5,6], then the tuples would be [1,2,6], [1,3,6],[1,2,4], so the COUNT would be 3.
If l=[1,1,1], then the only tuple would be [1,1,1], so the COUNT would be 1.
Here's what I've done so far, using list comprehensions:
def myCOUNT(l):
newlist=[[x,y,z] for x in l for y in l for z in l if (z%y==0 and y%x==0 and l.index(x)<l.index(y) and l.index(y)<l.index(z))]
return len(newlist)
>>>l=[1,2,3,4,5,6]
>>>myCOUNT(l)
3
This works, but as l gets longer (and it can be as large as 2000 elements long), the time it takes increases too much. Is there a faster/better way to do this?
We can count the number of triples with a given number in the middle by counting how many factors of that number are to its left, counting how many multiples of that number are to its right, and multiplying. Doing this for any given middle element is O(n) for a length-n list, and doing it for all n possible middle elements is O(n^2).
def num_triples(l):
total = 0
for mid_i, mid in enumerate(l):
num_left = sum(1 for x in l[:mid_i] if mid % x == 0)
num_right = sum(1 for x in l[mid_i+1:] if x % mid == 0)
total += num_left * num_right
return total
Incidentally, the code in your question doesn't actually work. It's fallen into the common newbie trap of calling index instead of using enumerate to get iteration indices. More than just being inefficient, this is actually wrong when the input has duplicate elements, causing your myCOUNT to return 0 instead of 1 on the [1, 1, 1] example input.
Finding all tuples in O(n2)
You algorithm iterates over all possible combinations, which makes it O(n3).
Instead, you should precompute the division-tree of your list of numbers and recover triples from the paths down the tree.
Division tree
A division tree is a graph which nodes are numbers and children are the multiples of each number.
By example, given the list [1, 2, 3, 4], the division tree looks like this.
1
/|\
2 | 3
\|
4
Computing the division tree requires to compare each number against all others, making its creation O(n2).
Here is a basic implementation of a division-tree that can be used for your problem.
class DivisionTree:
def __init__(self, values):
values = sorted(values)
# For a division-tree to be connected, we need 1 to be its head
# Thus we artificially add it and note whether it was actually in our numbers
if 1 in values:
self._has_one = True
values = values[1:]
else:
self._has_one = False
self._graph = {1: []}
for v in values:
self.insert(v)
def __iter__(self):
"""Iterate over all values of the division-tree"""
yield from self._graph
def insert(self, n):
"""Insert value in division tree, adding it as child of each divisor"""
self._graph[n] = []
for x in self:
if n != x and n % x == 0:
self._graph[x].append(n)
def paths(self, depth, _from=1):
"""Return a generator of all paths of *depth* down the division-tree"""
if _from == 1:
for x in self._graph[_from]:
yield from self.paths(depth , _from=x)
if depth == 1:
yield [_from]
return
if _from != 1 or self._has_one:
for x in self._graph[_from]:
for p in self.paths(depth - 1, _from=x):
yield [_from, *p]
Usage
Once we built a DivisionTree, it suffices to iterate over all paths down the graph and select only those which have length 3.
Example
l = [1,2,3,4,5,6]
dt = DivisionTree(l)
for p in dt.paths(3):
print(p)
Output
[1, 2, 4]
[1, 2, 6]
[1, 3, 6]
This solution assumes that the list of number is initially sorted, as in your example. Although, the output could be filtered with regard to the condition on indices i < j < k to provide a more general solution.
Time complexity
Generating the division-tree is O(n2).
In turn, there can be up to n! different paths, although stopping the iteration whenever we go deeper than 3 prevents traversing them all. This makes us iterate over the following paths:
the paths corresponding to three tuples, say there are m of them;
the paths corresponding to two tuples, there are O(n2) of them;
the paths corresponding to one tuples, there are O(n) of them.
Thus this overall yields an algorithm O(n2 + m).
I suppose this solution without list comprehension will be faster (you can see analogue with list comprehension further):
a = [1, 2, 3, 4, 5, 6]
def count(a):
result = 0
length = len(a)
for i in range(length):
for j in range(i + 1, length):
for k in range(j + 1, length):
if a[j] % a[i] == 0 and a[k] % a[j] == 0:
result += 1
return result
print(count(a))
Output:
3
In your solution index method is too expensive (requires O(n) operations). Also you don't need to iterate over full list for each x, y and z (x = a[i], y = a[j], z = a[k]). Notice how I use indexes in my loops for y and z because I know that a.index(x) < a.index(y) < a.index(z) is always satisfied.
You can write it as one liner too:
def count(a):
length = len(a)
return sum(1 for i in range(length)
for j in range(i + 1, length)
for k in range(j + 1, length)
if a[j] % a[i] == 0 and a[k] % a[j] == 0)
P.S.
Please, don't use l letter for variables names because it's very similar to 1:)
There is a way to do this with itertools combinations:
from itertools import combinations
l=[1,2,3,4,5,6]
>>> [(x,y,z) for x,y,z in combinations(l,3) if z%y==0 and y%x==0]
[(1, 2, 4), (1, 2, 6), (1, 3, 6)]
Since combinations generates the tuples in list order, you do not then need to check the index of z.
Then your myCOUNT function becomes:
def cnt(li):
return sum(1 for x,y,z in combinations(li,3) if z%y==0 and y%x==0)
>>> cnt([1,1,1])
1
>>> cnt([1,2,3,4,5,6])
3
This is a known problem.
Here are some timing for the solutions here:
from itertools import combinations
class DivisionTree:
def __init__(self, values):
# For a division-tree to be connected, we need 1 to be its head
# Thus we artificially add it and note whether it was actually in our numbers
if 1 in values:
self._has_one = True
values = values[1:]
else:
self._has_one = False
self._graph = {1: []}
for v in values:
self.insert(v)
def __iter__(self):
"""Iterate over all values of the division-tree"""
yield from self._graph
def insert(self, n):
"""Insert value in division tree, adding it as child of each divisor"""
self._graph[n] = []
for x in self:
if n != x and n % x == 0:
self._graph[x].append(n)
def paths(self, depth, _from=1):
"""Return a generator of all paths of *depth* down the division-tree"""
if _from == 1:
for x in self._graph[_from]:
yield from self.paths(depth , _from=x)
if depth == 1:
yield [_from]
return
if _from != 1 or self._has_one:
for x in self._graph[_from]:
for p in self.paths(depth - 1, _from=x):
yield [_from, *p]
def f1(li):
return sum(1 for x,y,z in combinations(li,3) if z%y==0 and y%x==0)
def f2(l):
newlist=[[x,y,z] for x in l for y in l for z in l if (z%y==0 and y%x==0 and l.index(x)<l.index(y) and l.index(y)<l.index(z))]
return len(newlist)
def f3(a):
result = 0
length = len(a)
for i in range(length):
for j in range(i + 1, length):
for k in range(j + 1, length):
if a[j] % a[i] == 0 and a[k] % a[j] == 0:
result += 1
return result
def f4(l):
dt = DivisionTree(l)
return sum(1 for _ in dt.paths(3))
def f5(l):
total = 0
for mid_i, mid in enumerate(l):
num_left = sum(1 for x in l[:mid_i] if mid % x == 0)
num_right = sum(1 for x in l[mid_i+1:] if x % mid == 0)
total += num_left * num_right
return total
if __name__=='__main__':
import timeit
tl=list(range(3,155))
funcs=(f1,f2,f3,f4,f5)
td={f.__name__:f(tl) for f in funcs}
print(td)
for case, x in (('small',50),('medium',500),('large',5000)):
li=list(range(2,x))
print('{}: {} elements'.format(case,x))
for f in funcs:
print(" {:^10s}{:.4f} secs".format(f.__name__, timeit.timeit("f(li)", setup="from __main__ import f, li", number=1)))
And the results:
{'f1': 463, 'f2': 463, 'f3': 463, 'f4': 463, 'f5': 463}
small: 50 elements
f1 0.0010 secs
f2 0.0056 secs
f3 0.0018 secs
f4 0.0003 secs
f5 0.0002 secs
medium: 500 elements
f1 1.1702 secs
f2 5.3396 secs
f3 1.8519 secs
f4 0.0156 secs
f5 0.0110 secs
large: 5000 elements
f1 1527.4956 secs
f2 6245.9930 secs
f3 2074.2257 secs
f4 1.3492 secs
f5 1.2993 secs
You can see that f1,f2,f3 are clearly O(n^3) or worse and f4,f5 are O(n^2). f2 took more than 90 minutes for what f4 and f5 did in 1.3 seconds.
Solution in O(M*log(M)) for a sorted list containing positive numbers
As user2357112 has answered, we can count the number of triplets in O(n^2) by calculating for every number the number of its factors and multiples. However, if instead of comparing every pair we go over its multiples smaller than the largest number and check whether they are in the list, we can change the efficiency to O(N+M*log(N)), when M is the largest number in the list.
Code:
def countTriples(myList):
counts = {} #Contains the number of appearances of every number.
factors = {} #Contains the number of factors of every number.
multiples = {} #Contains the number of multiples of every number.
for i in myList: #Initializing the dictionaries.
counts[i] = 0
factors[i] = 0
multiples[i] = 0
maxNum = max(myList) #The maximum number in the list.
#First, we count the number of appearances of every number.
for i in myList:
counts[i] += 1
#Then, for every number in the list, we check whether its multiples are in the list.
for i in counts:
for j in range(2*i, maxNum+1, i):
if(counts.has_key(j)):
factors[j] += counts[i]
multiples[i] += counts[j]
#Finally, we count the number of triples.
ans = 0
for i in counts:
ans += counts[i]*factors[i]*multiples[i] #Counting triplets with three numbers.
ans += counts[i]*(counts[i]-1)*factors[i]/2 #Counting triplets with two larger and one smaller number.
ans += counts[i]*(counts[i]-1)*multiples[i]/2 #Counting triplets with two smaller numbers and one larger number.
ans += counts[i]*(counts[i]-1)*(counts[i]-2)/6 #Counting triplets with three copies of the same number.
return ans
While this solution will work quickly for lists containing many small numbers, it will not work for lists containing large numbers:
countTriples(list(range(1,1000000)) #Took 80 seconds on my computer
countTriples([1,2,1000000000000]) #Will take a very long time
Fast solution with unknown efficiency for unsorted lists
Another method to count the number of multiples and factors of every number in the list would be to use a binary tree data structure, with leaves corresponding to numbers. The data structure supports three operations:
1) Add a number to every position which is a multiple of a number.
2) Add a number to every position which is specified in a set.
3) Get the value of a position.
We use lazy propagation, and propagate the updates from the root to lower nodes only during queries.
To find the number of factors of every item in the list, we iterate over the list, query the number of factors of the current item from the data structure, and add 1 to every position which is a multiple of the item.
To find the number of multiples of every item, we first find for every item in the list all its factors using the algorithm described in the previous solution.
We then iterate over the list in the reverse order. For every item, we query the number of its multiples from the data structure, and add 1 to its factors in the data structure.
Finally, for every item, we add the multiplication of its factors and multiples to the answer.
Code:
'''A tree that supports two operations:
addOrder(num) - If given a number, adds 1 to all the values which are multiples of the given number. If given a tuple, adds 1 to all the values in the tuple.
getValue(num) - returns the value of the number.
Uses lazy evaluation to speed up the algorithm.
'''
class fen:
'''Initiates the tree from either a list, or a segment of the list designated by s and e'''
def __init__(this, l, s = 0, e = -1):
if(e == -1): e = len(l)-1
this.x1 = l[s]
this.x2 = l[e]
this.val = 0
this.orders = {}
if(s != e):
this.s1 = fen(l, s, (s+e)/2)
this.s2 = fen(l, (s+e)/2+1, e)
else:
this.s1 = None
this.s2 = None
'''Testing if a multiple of the number appears in the range of this node.'''
def _numGood(this, num):
if(this.x2-this.x1+1 >= num): return True
m1 = this.x1%num
m2 = this.x2%num
return m1 == 0 or m1 > m2
'''Testing if a member of the group appears in the range of this node.'''
def _groupGood(this, group):
low = 0
high = len(group)
if(this.x1 <= group[0] <= this.x2): return True
while(low != high-1):
mid = (low+high)/2;
if(group[mid] < this.x1): low = mid
elif(group[mid] > this.x2): high = mid
else: return True
return False
def _isGood(this, val):
if(type(val) == tuple):
return this._groupGood(val)
return this._numGood(val)
'''Adds an order to this node.'''
def addOrder(this, num, count = 1):
if(not this._isGood(num)): return
if(this.x1 == this.x2): this.val += count
else :this.orders[num] = this.orders.get(num, 0)+count
'''Pushes the orders to lower nodes.'''
def _pushOrders(this):
if(this.x1 == this.x2): return
for i in this.orders:
this.s1.addOrder(i, this.orders[i])
this.s2.addOrder(i, this.orders[i])
this.orders = {}
def getValue(this, num):
this._pushOrders()
if(num < this.x1 or num > this.x2):
return 0
if(this.x1 == this.x2):
return this.val
return this.s1.getValue(num)+this.s2.getValue(num)
def countTriples2(myList):
factors = [0 for i in myList]
multiples = [0 for i in myList]
numSet = set((abs(i) for i in myList))
sortedList = sorted(list(numSet))
#Calculating factors.
tree = fen(sortedList)
for i in range(len(myList)):
factors[i] = tree.getValue(abs(myList[i]))
tree.addOrder(abs(myList[i]))
#Calculating the divisors of every number in the group.
mxNum = max(numSet)
divisors = {i:[] for i in numSet}
for i in sortedList:
for j in range(i, mxNum+1, i):
if(j in numSet):
divisors[j].append(i)
divisors = {i:tuple(divisors[i]) for i in divisors}
#Calculating the number of multiples to the right of every number.
tree = fen(sortedList)
for i in range(len(myList)-1, -1, -1):
multiples[i] = tree.getValue(abs(myList[i]))
tree.addOrder(divisors[abs(myList[i])])
ans = 0
for i in range(len(myList)):
ans += factors[i]*multiples[i]
return ans
This solution worked for a list containing the numbers 1..10000 in six seconds on my computer, and for a list containing the numbers 1..100000 in 87 seconds.

Python given an array A of N integers, returns the smallest positive integer (greater than 0) that does not occur in A in O(n) time complexity

For example:
input: A = [ 6 4 3 -5 0 2 -7 1 ]
output: 5
Since 5 is the smallest positive integer that does not occur in the array.
I have written two solutions to that problem. The first one is good but I don't want to use any external libraries + its O(n)*log(n) complexity. The second solution "In which I need your help to optimize it" gives an error when the input is chaotic sequences length=10005 (with minus).
Solution 1:
from itertools import count, filterfalse
def minpositive(a):
return(next(filterfalse(set(a).__contains__, count(1))))
Solution 2:
def minpositive(a):
count = 0
b = list(set([i for i in a if i>0]))
if min(b, default = 0) > 1 or min(b, default = 0) == 0 :
min_val = 1
else:
min_val = min([b[i-1]+1 for i, x in enumerate(b) if x - b[i - 1] >1], default=b[-1]+1)
return min_val
Note: This was a demo test in codility, solution 1 got 100% and
solution 2 got 77 %.
Error in "solution2" was due to:
Performance tests ->
medium chaotic sequences length=10005 (with minus) got 3 expected
10000
Performance tests -> large chaotic + many -1, 1, 2, 3 (with
minus) got 5 expected 10000
Testing for the presence of a number in a set is fast in Python so you could try something like this:
def minpositive(a):
A = set(a)
ans = 1
while ans in A:
ans += 1
return ans
Fast for large arrays.
def minpositive(arr):
if 1 not in arr: # protection from error if ( max(arr) < 0 )
return 1
else:
maxArr = max(arr) # find max element in 'arr'
c1 = set(range(2, maxArr+2)) # create array from 2 to max
c2 = c1 - set(arr) # find all positive elements outside the array
return min(c2)
I have an easy solution. No need to sort.
def solution(A):
s = set(A)
m = max(A) + 2
for N in range(1, m):
if N not in s:
return N
return 1
Note: It is 100% total score (Correctness & Performance)
def minpositive(A):
"""Given an list A of N integers,
returns the smallest positive integer (greater than 0)
that does not occur in A in O(n) time complexity
Args:
A: list of integers
Returns:
integer: smallest positive integer
e.g:
A = [1,2,3]
smallest_positive_int = 4
"""
len_nrs_list = len(A)
N = set(range(1, len_nrs_list+2))
return min(N-set(A)) #gets the min value using the N integers
This solution passes the performance test with a score of 100%
def solution(A):
n = sorted(i for i in set(A) if i > 0) # Remove duplicates and negative numbers
if not n:
return 1
ln = len(n)
for i in range(1, ln + 1):
if i != n[i - 1]:
return i
return ln + 1
def solution(A):
B = set(sorted(A))
m = 1
for x in B:
if x == m:
m+=1
return m
Continuing on from Niroj Shrestha and najeeb-jebreel, added an initial portion to avoid iteration in case of a complete set. Especially important if the array is very large.
def smallest_positive_int(A):
sorted_A = sorted(A)
last_in_sorted_A = sorted_A[-1]
#check if straight continuous list
if len(sorted_A) == last_in_sorted_A:
return last_in_sorted_A + 1
else:
#incomplete list, iterate to find the smallest missing number
sol=1
for x in sorted_A:
if x == sol:
sol += 1
else:
break
return sol
A = [1,2,7,4,5,6]
print(smallest_positive_int(A))
This question doesn't really need another answer, but there is a solution that has not been proposed yet, that I believe to be faster than what's been presented so far.
As others have pointed out, we know the answer lies in the range [1, len(A)+1], inclusively. We can turn that into a set and take the minimum element in the set difference with A. That's a good O(N) solution since set operations are O(1).
However, we don't need to use a Python set to store [1, len(A)+1], because we're starting with a dense set. We can use an array instead, which will replace set hashing by list indexing and give us another O(N) solution with a lower constant.
def minpositive(a):
# the "set" of possible answer - values_found[i-1] will tell us whether i is in a
values_found = [False] * (len(a)+1)
# note any values in a in the range [1, len(a)+1] as found
for i in a:
if i > 0 and i <= len(a)+1:
values_found[i-1] = True
# extract the smallest value not found
for i, found in enumerate(values_found):
if not found:
return i+1
We know the final for loop always finds a value that was not marked, because it has one more element than a, so at least one of its cells was not set to True.
def check_min(a):
x= max(a)
if x-1 in a:
return x+1
elif x <= 0:
return 1
else:
return x-1
Correct me if i'm wrong but this works for me.
def solution(A):
clone = 1
A.sort()
for itr in range(max(A) + 2):
if itr not in A and itr >= 1:
clone = itr
break
return clone
print(solution([2,1,4,7]))
#returns 3
def solution(A):
n = 1
for i in A:
if n in A:
n = n+1
else:
return n
return n
def not_in_A(a):
a=sorted(a)
if max(a)<1:
return(1)
for i in range(0,len(a)-1):
if a[i+1]-a[i]>1:
out=a[i]+1
if out==0 or out<1:
continue
return(out)
return(max(a)+1)
mark and then find the first one that didn't find
nums = [ 6, 4, 3, -5, 0, 2, -7, 1 ]
def check_min(nums):
marks = [-1] * len(nums)
for idx, num in enumerate(nums):
if num >= 0:
marks[num] = idx
for idx, mark in enumerate(marks):
if mark == -1:
return idx
return idx + 1
I just modified the answer by #najeeb-jebreel and now the function gives an optimal solution.
def solution(A):
sorted_set = set(sorted(A))
sol = 1
for x in sorted_set:
if x == sol:
sol += 1
else:
break
return sol
I reduced the length of set before comparing
a=[1,222,3,4,24,5,6,7,8,9,10,15,2,3,3,11,-1]
#a=[1,2,3,6,3]
def sol(a_array):
a_set=set()
b_set=set()
cnt=1
for i in a_array:
#In order to get the greater performance
#Checking if element is greater than length+1
#then it can't be output( our result in solution)
if i<=len(a) and i >=1:
a_set.add(i) # Adding array element in set
b_set.add(cnt) # Adding iterator in set
cnt=cnt+1
b_set=b_set.difference(a_set)
if((len(b_set)) > 1):
return(min(b_set))
else:
return max(a_set)+1
sol(a)
def solution(A):
nw_A = sorted(set(A))
if all(i < 0 for i in nw_A):
return 1
else:
ans = 1
while ans in nw_A:
ans += 1
if ans not in nw_A:
return ans
For better performance if there is a possibility to import numpy package.
def solution(A):
import numpy as np
nw_A = np.unique(np.array(A))
if np.all((nw_A < 0)):
return 1
else:
ans = 1
while ans in nw_A:
ans += 1
if ans not in nw_A:
return ans
def solution(A):
# write your code in Python 3.6
min_num = float("inf")
set_A = set(A)
# finding the smallest number
for num in set_A:
if num < min_num:
min_num = num
# print(min_num)
#if negative make positive
if min_num < 0 or min_num == 0:
min_num = 1
# print(min_num)
# if in set add 1 until not
while min_num in set_A:
min_num += 1
return min_num
Not sure why this is not 100% in correctness. It is 100% performance
def solution(A):
arr = set(A)
N = set(range(1, 100001))
while N in arr:
N += 1
return min(N - arr)
solution([1, 2, 6, 4])
#returns 3

Find the subset of a set of integers that has the maximum product

Let A be a non-empty set of integers. Write a function find that outputs a non-empty subset of A that has the maximum product. For example, find([-1, -2, -3, 0, 2]) = 12 = (-2)*(-3)*2
Here's what I think: divide the list into a list of positive integers and a list of negative integers:
If we have an even number of negative integers, multiply everything in both list and we have the answer.
If we have an odd number of negative integers, find the largest and remove it from the list. Then multiply everything in both lists.
If the list has only one element, return this element.
Here's my code in Python:
def find(xs):
neg_int = []
pos_int = []
if len(xs) == 1:
return str(xs[0])
for i in xs:
if i < 0:
neg_int.append(i)
elif i > 0:
pos_int.append(i)
if len(neg_int) == 1 and len(pos_int) == 0 and 0 in xs:
return str(0)
if len(neg_int) == len(pos_int) == 0:
return str(0)
max = 1
if len(pos_int) > 0:
for x in pos_int:
max=x*max
if len(neg_int) % 2 == 1:
max_neg = neg_int[0]
for j in neg_int:
if j > max_neg:
max_neg = j
neg_int.remove(max_neg)
for k in neg_int:
max = k*max
return str(max)
Am I missing anything? P.S. This is a problem from Google's foobar challenge, I am apparently missing one case but I don't know which.
Now here's actual problem:
from functools import reduce
from operator import mul
def find(array):
negative = []
positive = []
zero = None
removed = None
def string_product(iterable):
return str(reduce(mul, iterable, 1))
for number in array:
if number < 0:
negative.append(number)
elif number > 0:
positive.append(number)
else:
zero = str(number)
if negative:
if len(negative) % 2 == 0:
return string_product(negative + positive)
removed = max(negative)
negative.remove(removed)
if negative:
return string_product(negative + positive)
if positive:
return string_product(positive)
return zero or str(removed)
You can simplify this problem with reduce (in functools in Py3)
import functools as ft
from operator import mul
def find(ns):
if len(ns) == 1 or len(ns) == 2 and 0 in ns:
return str(max(ns))
pos = filter(lambda x: x > 0, ns)
negs = sorted(filter(lambda x: x < 0, ns))
return str(ft.reduce(mul, negs[:-1 if len(negs)%2 else None], 1) * ft.reduce(mul, pos, 1))
>>> find([-1, -2, -3, 0, 2])
'12'
>>> find([-3, 0])
'0'
>>> find([-1])
'-1'
>>> find([])
'1'
Here's a solution in one loop:
def max_product(A):
"""Calculate maximal product of elements of A"""
product = 1
greatest_negative = float("-inf") # greatest negative multiplicand so far
for x in A:
product = max(product, product*x, key=abs)
if x <= -1:
greatest_negative = max(x, greatest_negative)
return max(product, product // greatest_negative)
assert max_product([2,3]) == 6
assert max_product([-2,-3]) == 6
assert max_product([-1, -2, -3, 0, 2]) == 12
assert max_product([]) == 1
assert max_product([-5]) == 1
Extra credit: what if the integer constraint were relaxed? What extra information do you need to collect during the loop?
Here is another solution that doesn't require libraries :
def find(l):
if len(l) <= 2 and 0 in l: # This is the missing case, try [-3,0], it should return 0
return max(l)
l = [e for e in l if e != 0] # remove 0s
r = 1
for e in l: # multiply all
r *= e
if r < 0: # if the result is negative, remove biggest negative number and retry
l.remove(max([e for e in l if e < 0]))
r = find(l)
return r
print(find([-1, -2, -3, 0, 2])) # 12
print(find([-3, 0])) # 0
EDIT :
I think I've found the missing case which is when there are only two elements in the list, and the highest is 0.

Categories

Resources