Finding smallest contiguous integers in a Python set - python

What is the best way to get a list of the smallest N contiguous integers in a Python set?
>>> s=set([5,6,10,12,13,15,30,40,41,42,43,44,55,56,90,300,500])
>>> s
set([42, 43, 44, 5, 6, 90, 300, 30, 10, 12, 13, 55, 56, 15, 500, 40, 41])
>>> smallest_contiguous(s,5)
[40,41,42,43,44]
>>> smallest_contiguous(s,6)
[]
Edit: Thanks for the answers, everyone.

Sven has the right idea. You can avoid having to check supersets by just checking the number N - 1 ahead.
def smallest_contiguous(s, N):
lst = list(s)
lst.sort()
Nm = N-1
for i in xrange(len(lst) - Nm):
if lst[i] + Nm == lst[i + Nm]:
return range(lst[i], lst[i]+N)
return []
This will only always be correct for a set as input and knowing that the set only contains integers.

How about this?
def smallest_contiguous(s, N):
lst = sorted(s)
for i in lst:
t = range(i, i+N)
if s.issuperset(t):
return t
return []
It might not be the most efficient solution, but it is concise.
Edit: Justin's approach could also be made more concise:
def smallest_contiguous(s, N):
lst = sorted(s)
for a, b in zip(lst, lst[N - 1:]):
if b - a == N - 1:
return range(a, b + 1)
return []

That should do it ... look ahead length - 1 steps in the sorted list. Since it contains integers only and is sorted, the difference must be length - 1 as well.
def smallest_contiguous(myset, length):
if len(myset) < length:
return []
s = sorted(myset)
for idx in range(0, len(myset) - length + 1):
if s[idx+length-1] - s[idx] == length - 1:
return s[idx:idx+length]
return []
s=set([5,6,10,12,13,15,30,40,41,42,43,44,55,56,90,300,500])
print smallest_contiguous(s, 5)
print smallest_contiguous(s, 6)

Here's one I came up with:
def smallest_contiguous(s,N):
try:
result = []
while len(result) < N:
min_value = min(s)
s.remove(min_value)
if result == [] or min_value == result[-1] + 1:
result.append(min_value)
else:
result = [min_value]
return result
except ValueError:
return []
It modifies the input set as a side effect.

itertools to the rescue. groupby does all the grunt work here
The algorithm is O(n logn) because of the call to sorted()
>>> from itertools import groupby, count
>>> def smallest_contiguous(s, N):
... for i,j in groupby(sorted(s), key=lambda i,c=count().next: i-c()):
... res = list(j)
... if len(res) == N:
... return res
... return []
...
>>> smallest_contiguous(s,5)
[40, 41, 42, 43, 44]
>>> smallest_contiguous(s,6)
[]

def smallest_contiguous(s, n):
xs = sorted(s)
return next(x for i, x in enumerate(xs) if xs[i + n - 1] == x + n - 1)

Related

Generating sequences a recursively defined math string in Python

I have a sequence defined:
and my current code:
a = [1]
i=1
while a[-1] < 50:
if a[-1] + 5*i < 50:
a.append(a[-1] + 5*i)
i+=1
else:
break
Output:
[1, 6, 16, 31]
can it be done more elegantly and optimally?
One liners using accumulate
Solution 1: Creates n Terms
from itertools import accumulate
def seq(n):
" Creates n terms of sequence"
return list(accumulate(range(1, n+1), lambda acc, v: acc + 5*(v-1)))
print(seq(5)) # Generate first 5 terms
# Out: [1, 6, 16, 31, 51]
Solution 2--Creates up to Max Value
from itertools import accumulate, takewhile, count
def seq_gen(MAX):
" terms up to value MAX "
return list(takewhile(lambda v: v<=MAX, accumulate(count(start=1), lambda acc, v: acc + 5*(v-1))))
print(seq_gen(50)) # Generate to a maximum value of 50
# Out: [1, 6, 16, 31]
I'd write it with a generator:
import itertools
def gen_sequence(maximum):
n = 1
for i in itertools.count(0):
n = n + 5 * i
if n > maximum: break
yield n
if __name__ == "__main__":
print(list(gen_sequence(50))) # => [1, 6, 16, 31]
Or if you don't want to provide an upper limit, you can generate infinitely and let the caller dynamically decide when to quit. This enables the caller to step forward a few iterations and resume later or use itertools.takewhile to immediately pick up to a limit:
from itertools import count, takewhile
def gen_sequence():
n = 1
for i in count(0):
n = n + 5 * i
yield n
if __name__ == "__main__":
print(list(takewhile(lambda x: x < 50, gen_sequence()))) # => [1, 6, 16, 31]
Whichever way you do it, it's a good idea to put the logic in a function with parameters and check the termination condition once. Ideally come up with a better name for the sequence than gen_sequence, which is rather ambiguous.
This code is just like yours without useless if and break
a = [1]
i=1
while a[-1] + 5*i < 50:
a.append(a[-1] + 5*i)
i+=1
Here's the way that I would write it:
def seq(a, n, lst=[]):
a = a + 5 * (n - 1)
n += 1
if a < 50:
lst.append(a)
seq(a, n)
return lst
print(list(seq(1, 1)))
A really cute one liner:
def seq(n):
return([(1+sum(i for i in range(0,5*j,5))) for j in range(1,n+1)])
print(seq(5))
[1, 6, 16, 31, 51]
I didn't quite understand what exactly the function you are looking for returns,
but here's a general implementation in case you are looking to get the number An
def func(n):
if n==1:
return 1
return (func(n-1) + 5*(n-1))

How do I get smallest postitive integer not present in the array

I am trying to find out smallest positive number not present in the list a
def smallitem(a):
a = sorted(set(a))
lst = []
for item in a:
item + = 1
if item not in a:
lst.append(item)
continue
mst = []
for item in lst:
if item < 1:
item += 1
if item not in a:
mst.append(item)
continue
n = 0
if mst:
n = min(mst)
return n or min(lst)
I think I have got the solution but it doesnt look correct to me the way I have done it.
for example:
smallitem([-1, -3]) # 1
smallitem([1,3,6,4,1,2, 87]) # 5
You can convert the list to a set and then keep incrementing a positive integer from 1 until it is not found in the set:
def smallitem(a):
set_a = set(a)
i = 1
while i in set_a:
i += 1
return i
Perhaps there's a lighter way do this.
The time complexity is always O(n).
def small_item(a):
s = set(a)
for i in range(1, max(s)):
if i not in s:
return i
return max(max(s) + 1, 1)
print small_item([1,3,6,4,1,2, 87])
print small_item([-1, -3])
Here's anther way to do this:
def smallitem(l):
l = list(set(sorted(l)))
x = l[0]
for i in l:
if i != x and x >= 1:return x
x += 1
return 1
Testing:
>>> smallitem([-3, -1])
1
>>> smallitem([1, 3, 6, 4, 1, 2, 87])
5
>>>

Get the absolute difference between elements of a circular array

Let's say I have a array like l = [1, 3, 4, 5, 6, 8]
where the nth element represents the distance between the nth and n+1th object.
I want to find the distance between any two objects, and I used this code for this:
def dis(l_list, index1, index2, mylist):
m = mylist.index(index1)
n = mylist.index(index2)
i=0
j=0
if n > m:
while n >= m:
i = i + mylist[m]
m = m + 1
elif n < m:
while n <= m:
i = i + mylist[n]
n = n + 1
else:
return(0)
j = mylist[n] % l_mylist
print(abs(i - j))
l_mylist = input()
l_mylist = int(l_mylist)
mylist = []
mylist = list(map(int, input().split()))
i,j = input().split()
i, j=int(i), int(j)
dis(l_mylist, i, j, mylist)
but I am still getting the wrong output. Can anyone please point out where I am wrong?
If you want to sum around a potentially circular list. You can use a collections.deque() to rotate the list, e.g.:
from collections import deque
def dist(l, i1, i2):
d = deque(l)
d.rotate(-i1)
return sum(list(d)[:i2-i1]))
In []:
l = [1,2,3,4,5,6,7,8]
dist(l, 3-1, 6-1) # 3, 4, 5
Out[]:
12
In []:
dist(l, 6-1, 3-1) # 6, 7, 8, 1, 2
Out[]:
24
def distance(first_index, second_index, my_list):
temp_list = my_list + my_list
if (first_index > second_index):
first_index += len(my_list)
requested_sum = sum(my_list[second_index-1:first_index-1])
else:
requested_sum = sum(my_list[first_index-1:second_index-1])
return requested_sum
If I understood you correctly, then this should do the trick.
There are much more compact and efficient ways to do this, but this is the simplest and easiest to understand in my opinion.

Python Quickselect not printing/returning pivot

Here is the code for quickselect
def quickSelect(lst, k):
if len(lst) != 0:
pivot = lst[(len(lst)) // 2]
smallerList = []
for i in lst:
if i < pivot:
smallerList.append(i)
largerList = []
for i in lst:
if i > pivot:
largerList.append(i)
count = len(lst) - len(smallerList) - len(largerList)
m = len(smallerList)
if k >= m and k < m + count:
return pivot
print(pivot)
elif m > k:
return quickSelect(smallerList, k)
else:
return quickSelect(largerList, k-m-count)
the issue I am having with it is that it runs with no errors or anything, but when it completes itself I am expecting it to output something to the python shell (in this specific case a median of a list), but I get nothing back. Am I doing something wrong here?
As for what I am inputting for lst and k....
lst = [70, 120, 170, 200]
k = len(lst) // 2
I have tried it with a few different k values as well but to no avail
You can optimize this algorithm using list comprehension. Also, I don't think you need count...
def quickSelect(seq, k):
# this part is the same as quick sort
len_seq = len(seq)
if len_seq < 2: return seq
ipivot = len_seq // 2
pivot = seq[ipivot]
smallerList = [x for i,x in enumerate(seq) if x <= pivot and i != ipivot]
largerList = [x for i,x in enumerate(seq) if x > pivot and i != ipivot]
# here starts the different part
m = len(smallerList)
if k == m:
return pivot
elif k < m:
return quickSelect(smallerList, k)
else:
return quickSelect(largerList, k-m-1)
if __name__ == '__main__':
# Checking the Answer
seq = [10, 60, 100, 50, 60, 75, 31, 50, 30, 20, 120, 170, 200]
# we want the middle element
k = len(seq) // 2
# Note that this only work for odd arrays, since median in
# even arrays is the mean of the two middle elements
print(quickSelect(seq, k))
import numpy
print numpy.median(seq)
def quickSelect(lst, k):
if len(lst) != 0:
pivot = lst[(len(lst)) // 2]
smallerList = []
for i in lst:
if i < pivot:
smallerList.append(i)
largerList = []
for i in lst:
if i > pivot:
largerList.append(i)
count = len(lst) - len(smallerList) - len(largerList)
m = len(smallerList)
if k >= m and k < m + count:
return pivot
print(pivot)
elif m > k:
return quickSelect(smallerList, k)
else:
return quickSelect(largerList, k-m-count)
lst = [70, 120, 170, 200]
k = len(lst) // 2
print(quickSelect(lst, k))
produces
>>> 170
The only thing I corrected was the indenting.

Convert List of Numbers to String Ranges

I'd like to know if there is a simple (or already created) way of doing the opposite of this: Generate List of Numbers from Hyphenated.... This link could be used to do:
>> list(hyphen_range('1-9,12,15-20,23'))
[1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 15, 16, 17, 18, 19, 20, 23]:
I'm looking to do the opposite (note that 10 and 21 are included so it would be compatible with the range function, where range(1,10)=[1,2,3,4,5,6,7,8,9]):
>> list_to_ranges([1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 15, 16, 17, 18, 19, 20, 23])
'1-10,12,15-21,23'
Eventually, I would like to have the output also incorporate a step where the last number of the output indicates the step:
>> list_to_ranges([1, 3, 5, 7, 8, 9, 10, 11])
'1-13:2,8,10'
Essentially, this would end up being kind of like an "inverse" range function
>> tmp = list_to_ranges([1, 3, 5])
>> print tmp
'1-7:2'
>> range(1, 7, 2)
[1, 3, 5]
My guess is that there is no really easy/simple way to do this, but I thought I would ask on here before I go make some brute force, long method.
EDIT
Using the code from an answer to this post as an example, I came up with a simple way to do the first part. But I think that identifying the patterns to do steps would be a bit harder.
from itertools import groupby
from operator import itemgetter
data = [ 1, 4,5,6, 10, 15,16,17,18, 22, 25,26,27,28]
print data, '\n'
str_list = []
for k, g in groupby(enumerate(data), lambda (i,x):i-x):
ilist = map(itemgetter(1), g)
print ilist
if len(ilist) > 1:
str_list.append('%d-%d' % (ilist[0], ilist[-1]+1))
else:
str_list.append('%d' % ilist[0])
print '\n', ','.join(str_list)
EDIT 2
Here is my attempt at including the step size...it is pretty close, but the first numbers get repeated. I think that with a little bit of tweaking of this, it will be close to what I want - or at least good enough.
import numpy as np
from itertools import groupby
def list_to_ranges(data):
data = sorted(data)
diff_data = np.diff(data).tolist()
ranges = []
i = 0
for k, iterable in groupby(diff_data, None):
rng = list(iterable)
step = rng[0]
if len(rng) == 1:
ranges.append('%d' % data[i])
elif step == 1:
ranges.append('%d-%d' % (data[i], data[i+len(rng)]+step))
else:
ranges.append('%d-%d:%d' % (data[i], data[i+len(rng)]+step, step))
i += len(rng)
return ','.join(ranges)
data = [1, 3, 5, 6, 7, 11, 13, 15, 16, 17, 18, 19, 22, 25, 28]
print data
data_str = list_to_ranges(data)
print data_str
_list = []
for r in data_str.replace('-',':').split(','):
r = [int(a) for a in r.split(':')]
if len(r) == 1:
_list.extend(r)
elif len(r) == 2:
_list.extend(range(r[0], r[1]))
else:
_list.extend(range(r[0], r[1], r[2]))
print _list
print list(set(_list))
One approach could be "eating" piece by piece the input sequence and store the partial range results untill you've got them all:
def formatter(start, end, step):
return '{}-{}:{}'.format(start, end, step)
# return '{}-{}:{}'.format(start, end + step, step)
def helper(lst):
if len(lst) == 1:
return str(lst[0]), []
if len(lst) == 2:
return ','.join(map(str,lst)), []
step = lst[1] - lst[0]
for i,x,y in zip(itertools.count(1), lst[1:], lst[2:]):
if y-x != step:
if i > 1:
return formatter(lst[0], lst[i], step), lst[i+1:]
else:
return str(lst[0]), lst[1:]
return formatter(lst[0], lst[-1], step), []
def re_range(lst):
result = []
while lst:
partial,lst = helper(lst)
result.append(partial)
return ','.join(result)
I test it with a bunch of unit tests and it passed them all, it can handle negative numbers too, but they'll look kind of ugly (it's really anybody's fault).
Example:
>>> re_range([1, 4,5,6, 10, 15,16,17,18, 22, 25,26,27,28])
'1,4-6:1,10,15-18:1,22,25-28:1'
>>> re_range([1, 3, 5, 7, 8, 9, 10, 11, 13, 15, 17])
'1-7:2,8-11:1,13-17:2'
Note: I wrote the code for Python 3.
Performance
I didn't put any performance effort in the solution above. In particular, every time a list get re-builded with slicing, it might take some time if the input list has a particular shape. So, the first simple improvement would be using itertools.islice() where possible.
Anyway here's another implementation of the same algorithm, that scan through the input list with a scan index instead of slicing:
def re_range(lst):
n = len(lst)
result = []
scan = 0
while n - scan > 2:
step = lst[scan + 1] - lst[scan]
if lst[scan + 2] - lst[scan + 1] != step:
result.append(str(lst[scan]))
scan += 1
continue
for j in range(scan+2, n-1):
if lst[j+1] - lst[j] != step:
result.append(formatter(lst[scan], lst[j], step))
scan = j+1
break
else:
result.append(formatter(lst[scan], lst[-1], step))
return ','.join(result)
if n - scan == 1:
result.append(str(lst[scan]))
elif n - scan == 2:
result.append(','.join(map(str, lst[scan:])))
return ','.join(result)
I stopped working on it once it got ~65% faster than the previous top solution, it seemed enough :)
Anyway I'd say that there might still be room for improvement (expecially in the middle for-loop).
This is a comparison of the 3 methods. Change the amount of data and the density via the values below...no matter what values I use, the first solution seems to be the quickest for me. For very large sets of data, the third solution becomes very slow.
EDITED
Edited to include comments below and add in a new solution. The last solution seems to be the quickest now.
import numpy as np
import itertools
import random
import timeit
# --- My Solution --------------------------------------------------------------
def list_to_ranges1(data):
data = sorted(data)
diff_data = np.diff(data)
ranges = []
i = 0
skip_next = False
for k, iterable in itertools.groupby(diff_data, None):
rng = list(iterable)
step = rng[0]
if skip_next:
skip_next = False
rng.pop()
if len(rng) == 0:
continue
elif len(rng) == 1:
ranges.append('%d' % data[i])
elif step == 1:
ranges.append('%d-%d' % (data[i], data[i+len(rng)]+step))
i += 1
skip_next = True
else:
ranges.append('%d-%d:%d' % (data[i], data[i+len(rng)]+step, step))
i += 1
skip_next = True
i += len(rng)
if len(rng) == 0 or len(rng) == 1:
ranges.append('%d' % data[i])
return ','.join(ranges)
# --- Kaidence Solution --------------------------------------------------------
# With a minor edit for use in range function
def list_to_ranges2(data):
onediff = np.diff(data)
twodiff = np.diff(onediff)
increments, breakingindices = [], []
for i in range(len(twodiff)):
if twodiff[i] != 0:
breakingindices.append(i+2) # Correct index because of the two diffs
increments.append(onediff[i]) # Record the increment for this section
# Increments and breakingindices should be the same size
str_list = []
start = data[0]
for i in range(len(breakingindices)):
str_list.append("%d-%d:%d" % (start,
data[breakingindices[i]-1] + increments[i],
increments[i]))
start = data[breakingindices[i]]
str_list.append("%d-%d:%d" % (start,
data[len(data)-1] + onediff[len(onediff)-1],
onediff[len(onediff)-1]))
return ','.join(str_list)
# --- Rik Poggi Solution -------------------------------------------------------
# With a minor edit for use in range function
def helper(lst):
if len(lst) == 1:
return str(lst[0]), []
if len(lst) == 2:
return ','.join(map(str,lst)), []
step = lst[1] - lst[0]
#for i,x,y in itertools.izip(itertools.count(1), lst[1:], lst[2:]):
for i,x,y in itertools.izip(itertools.count(1),
itertools.islice(lst, 1, None, 1),
itertools.islice(lst, 2, None, 1)):
if y-x != step:
if i > 1:
return '{}-{}:{}'.format(lst[0], lst[i]+step, step), lst[i+1:]
else:
return str(lst[0]), lst[1:]
return '{}-{}:{}'.format(lst[0], lst[-1]+step, step), []
def list_to_ranges3(lst):
result = []
while lst:
partial,lst = helper(lst)
result.append(partial)
return ','.join(result)
# --- Rik Poggi Solution 2 -----------------------------------------------------
def formatter(start, end, step):
#return '{}-{}:{}'.format(start, end, step)
return '{}-{}:{}'.format(start, end + step, step)
def list_to_ranges4(lst):
n = len(lst)
result = []
scan = 0
while n - scan > 2:
step = lst[scan + 1] - lst[scan]
if lst[scan + 2] - lst[scan + 1] != step:
result.append(str(lst[scan]))
scan += 1
continue
for j in xrange(scan+2, n-1):
if lst[j+1] - lst[j] != step:
result.append(formatter(lst[scan], lst[j], step))
scan = j+1
break
else:
result.append(formatter(lst[scan], lst[-1], step))
return ','.join(result)
if n - scan == 1:
result.append(str(lst[scan]))
elif n - scan == 2:
result.append(','.join(itertools.imap(str, lst[scan:])))
return ','.join(result)
# --- Test Function ------------------------------------------------------------
def test_data(data, f_to_test):
data_str = f_to_test(data)
_list = []
for r in data_str.replace('-',':').split(','):
r = [int(a) for a in r.split(':')]
if len(r) == 1:
_list.extend(r)
elif len(r) == 2:
_list.extend(range(r[0], r[1]))
else:
_list.extend(range(r[0], r[1], r[2]))
return _list
# --- Timing Tests -------------------------------------------------------------
# Generate some sample data...
data_list = []
for i in range(5):
# Note: using the "4000" and "5000" values below, the relative density of
# the data can be changed. This has a huge effect on the results
# (particularly on the results for list_to_ranges3 which uses recursion).
data_list.append(sorted(list(set([random.randint(1,4000) for a in \
range(random.randint(5,5000))]))))
testfuncs = list_to_ranges1, list_to_ranges2, list_to_ranges3, list_to_ranges4
for f in testfuncs:
print '\n', f.__name__
for i, data in enumerate(data_list):
t = timeit.Timer('f(data)', 'from __main__ import data, f')
#print f(data)
print i, data==test_data(data, f), round(t.timeit(200), 3)
This is most likely what you are looking for.
Edit: I see you already found the post. My apologies.
To help with the second part, I've tinkered a bit myself. This is what I came up with:
from numpy import diff
data = [ 1, 3, 5, 7, 8, 9, 10, 11, 13, 15, 17 ]
onediff, twodiff = diff(data), diff(diff(data))
increments, breakingindices = [], []
for i in range(len(twodiff)):
if twodiff[i] != 0:
breakingindices.append(i+2) # Correct index because of the two diffs
increments.append(onediff[i]) # Record the increment for this section
# Increments and breakingindices should be the same size
str_list = []
start = data[0]
for i in range(len(breakingindices)):
str_list.append("%d-%d:%d" % (start, data[breakingindices[i]-1], increments[i]))
start = data[breakingindices[i]]
str_list.append("%d-%d:%d" % (start, data[len(data)-1], onediff[len(onediff)-1]))
print str_list
For the given input list, this gives: ['1-7:2', '8-11:1', '13-17:2']. The code could do with a bit of cleanup, but this sorts with your problem assuming the grouping can be done sequentially.
{caution: for [1,2,3,5,6,7] this gives ['1-3:1', '5-5:2', '6-7:1'] instead of ['1-3:1', '5-7:1']}
This is similar to versions that handle the step-size-of-one case enumerated here but also handles the singleton (elements with no more than 2 elements in a sequence or repeated elements) and non-unitary step sizes (including negative step sizes). It also does not drop duplicates for lists like [1, 2, 3, 3, 4, 5].
As for runtime: it's done before you blink.
def ranges(L):
"""return a list of singletons or ranges of integers, (first, last, step)
as they occur sequentially in the list of integers, L.
Examples
========
>>> list(ranges([1, 2, 4, 6, 7, 8, 10, 12, 13]))
[1, (2, 6, 2), 7, (8, 12, 2), 13]
>>> list(ranges([1,2,3,4,3,2,1,3,5,7,11,1,2,3]))
[(1, 4, 1), (3, 1, -1), (3, 7, 2), 11, (1, 3, 1)]
"""
if not L:
return []
r = []
for i in L:
if len(r) < 2:
r.append(i)
if len(r) == 2:
d = r[1] - r[0]
else:
if i - r[1] == d:
r[1] = i
else:
if r[1] - r[0] == d:
yield(r.pop(0))
r.append(i)
d = r[1] - r[0]
else:
yield(tuple(r+[d]))
r[:] = [i]
if len(r) == 1:
yield(r.pop())
elif r[1] - r[0] == d:
for i in r:
yield i
else:
yield(tuple(r+[d]))
The raw output can be modified as desired, e.g. actual range instances can be created.
def sranges(i):
"""return pretty string for output of ranges.
Examples
========
>>> sranges([1,2,4,6,7,8,10,12,13,15,16,17])
'1, range(2, 8, 2), 7, range(8, 14, 2), 13, range(15, 18)'
"""
out = []
for i in ranges(i):
if type(i) is int:
out.append(str(i))
elif i[-1] == 1:
if i[0] == 0:
out.append('range(%s)'%(i[1] + 1))
else:
out.append('range(%s, %s)'%(i[0], i[1] + 1))
else:
out.append('range(%s, %s, %s)'%(i[0], i[1] + i[2], i[2]))
return ', '.join(out)
This function should do what you need without requiring any imports.
def listToRanges(self, intList):
ret = []
for val in sorted(intList):
if not ret or ret[-1][-1]+1 != val:
ret.append([val])
else:
ret[-1].append(val)
return ",".join([str(x[0]) if len(x)==1 else str(x[0])+"-"+str(x[-1]) for x in ret])

Categories

Resources