Hey this is my first question so I hope I'm doing it right.
I'm trying to write a function that given a list of integers and N as the maximum occurrence, would then return a list with any integer above the maximum occurrence deleted. For example if I input:
[20,37,20,21] #list of integers and 1 #maximum occurrence.
Then as output I would get:
[20,37,21] because the number 20 appears twice and the maximum occurrence is 1, so it is deleted from the list. Here's another example:
Input: [1,1,3,3,7,2,2,2,2], 3
Output: [1,1,3,3,7,2,2,2]
Here's what I wrote so far, how would I be able to optimize it? I keep on getting a timeout error. Thank you very much in advance.
def delete_nth(order,n):
order = Counter(order)
for i in order:
if order[i] > n:
while order[i] > n:
order[i] - 1
return order
print(delete_nth([20,37,20,21], 1))
You can remove building the Counter at the beginning - and just have temporary dictionary as counter:
def delete_nth(order,n):
out, counter = [], {}
for v in order:
counter.setdefault(v, 0)
if counter[v] < n:
out.append(v)
counter[v] += 1
return out
print(delete_nth([20,37,20,21], 1))
Prints:
[20, 37, 21]
You wrote:
while order[i] > n:
order[i] - 1
That second line should presumably be order[i] -= 1, or any code that enters the loop will never leave it.
You could use a predicate with a default argument collections.defaultdict to retain state as your list of numbers is being filtered.
def delete_nth(numbers, n):
from collections import defaultdict
def predicate(number, seen=defaultdict(int)):
seen[number] += 1
return seen[number] <= n
return list(filter(predicate, numbers))
print(delete_nth([1, 1, 3, 3, 7, 2, 2, 2, 2], 3))
Output:
[1, 1, 3, 3, 7, 2, 2, 2]
>>>
I've renamed variables to something that had more meaning for me:
This version, though very short and fairly efficient, will output identical values adjacently:
from collections import Counter
def delete_nth(order, n):
counters = Counter(order)
output = []
for value in counters:
cnt = min(counters[value], n)
output.extend([value] * cnt)
return output
print(delete_nth([1,1,2,3,3,2,7,2,2,2,2], 3))
print(delete_nth([20,37,20,21], 1))
Prints:
[1, 1, 2, 2, 2, 3, 3, 7]
[20, 37, 21]
This version will maintain original order, but run a bit more slowly:
from collections import Counter
def delete_nth(order, n):
counters = Counter(order)
for value in counters:
counters[value] = min(counters[value], n)
output = []
for value in order:
if counters[value]:
output.append(value)
counters[value] -= 1
return output
print(delete_nth([1,1,2,3,3,2,7,2,2,2,2], 3))
print(delete_nth([20,37,20,21], 1))
Prints:
[1, 1, 2, 3, 3, 2, 7, 2]
[20, 37, 21]
Related
How can I fix my code to pass the test case for Delete occurrences of an element if it occurs more than n times?
My current code pass one test case and I'm sure that the problem is caused by order.remove(check_list[i]).
However, there is no way to delete the specific element with pop() because it is required to put an index number rather than the element in pop().
Test case
Test.assert_equals(delete_nth([20,37,20,21], 1), [20,37,21])
Test.assert_equals(delete_nth([1,1,3,3,7,2,2,2,2], 3), [1, 1, 3, 3, 7, 2, 2, 2])
Program
def delete_nth(order, max_e):
# code here
check_list = [x for x in dict.fromkeys(order) if order.count(x) > 1]
print(check_list)
print(order)
for i in range(len(check_list)):
while(order.count(check_list[i]) > max_e):
order.remove(check_list[i])
#order.pop(index)
return order
Your assertions fails, because the order is not preserved. Here is a simple example of how this could be done without doing redundant internal loops to count the occurrences for each number:
def delete_nth(order, max_e):
# Get a new list that we will return
result = []
# Get a dictionary to count the occurences
occurrences = {}
# Loop through all provided numbers
for n in order:
# Get the count of the current number, or assign it to 0
count = occurrences.setdefault(n, 0)
# If we reached the max occurence for that number, skip it
if count >= max_e:
continue
# Add the current number to the list
result.append(n)
# Increase the
occurrences[n] += 1
# We are done, return the list
return result
assert delete_nth([20,37,20,21], 1) == [20, 37, 21]
assert delete_nth([1, 1, 1, 1], 2) == [1, 1]
assert delete_nth([1, 1, 3, 3, 7, 2, 2, 2, 2], 3) == [1, 1, 3, 3, 7, 2, 2, 2]
assert delete_nth([1, 1, 2, 2], 1) == [1, 2]
A version which maintains the order:
from collections import defaultdict
def delete_nth(order, max_e):
count = defaultdict(int)
delet = []
for i, v in enumerate(order):
count[v] += 1
if count[v] > max_e:
delet.append(i)
for i in reversed(delet): # start deleting from the end
order.pop(i)
return order
print(delete_nth([1,1,2,2], 1))
print(delete_nth([20,37,20,21], 1))
print(delete_nth([1,1,3,3,7,2,2,2,2], 3))
This should do the trick:
from itertools import groupby
import numpy as np
def delete_nth(order, max_e):
if(len(order)<=max_e):
return order
elif(max_e<=0):
return []
return np.array(
sorted(
np.concatenate(
[list(v)[:max_e]
for k,v in groupby(
sorted(
zip(order, list(range(len(order)))),
key=lambda k: k[0]),
key=lambda k: k[0])
]
),
key=lambda k: k[1])
)[:,0].tolist()
Outputs:
print(delete_nth([2,3,4,5,3,2,3,2,1], 2))
[2, 3, 4, 5, 3, 2, 1]
print(delete_nth([2,3,4,5,5,3,2,3,2,1], 1))
[2, 3, 4, 5, 1]
print(delete_nth([2,3,4,5,3,2,3,2,1], 3))
[2, 3, 4, 5, 3, 2, 3, 2, 1]
print(delete_nth([2,2,1,1], 1))
[2, 1]
Originally my answer only worked for one test case, this is quick (not the prettiest) but works for both:
def delete_nth(x, e):
x = x[::-1]
for i in x:
while x.count(i) > e:
x.remove(i)
return x[::-1]
Given a list of data, I'm trying to create a new list in which the value at position i is the length of the longest run starting from position i in the original list. For instance, given
x_list = [1, 1, 2, 3, 3, 3]
Should return:
run_list = [2, 1, 1, 3, 2, 1]
My solution:
freq_list = []
current = x_list[0]
count = 0
for num in x_list:
if num == current:
count += 1
else:
freq_list.append((current,count))
current = num
count = 1
freq_list.append((current,count))
run_list = []
for i in freq_list:
z = i[1]
while z > 0:
run_list.append(z)
z -= 1
Firstly I create a list freq_list of tuples, where every tuple's first element is the element from x_list, and where the second element is the number of the total run.
In this case:
freq_list = [(1, 2), (2, 1), (3, 3)]
Having this, I create a new list and append appropriate values.
However, I was wondering if there is a shorter way/another way to do this?
Here's a simple solution that iterates over the list backwards and increments a counter each time a number is repeated:
last_num = None
result = []
for num in reversed(x_list):
if num != last_num:
# if the number changed, reset the counter to 1
counter = 1
last_num = num
else:
# if the number is the same, increment the counter
counter += 1
result.append(counter)
# reverse the result
result = list(reversed(result))
Result:
[2, 1, 1, 3, 2, 1]
This is possible using itertools:
from itertools import groupby, chain
x_list = [1, 1, 2, 3, 3, 3]
gen = (range(len(list(j)), 0, -1) for _, j in groupby(x_list))
res = list(chain.from_iterable(gen))
Result
[2, 1, 1, 3, 2, 1]
Explanation
First use itertools.groupby to group identical items in your list.
For each item in your groupby, create a range object which counts backwards from the length of the number of consecutive items to 1.
Turn this all into a generator to avoid building a list of lists.
Use itertools.chain to chain the ranges from the generator.
Performance note
Performance will be inferior to #Aran-Fey's solution. Although itertools.groupby is O(n), it makes heavy use of expensive __next__ calls. These do not scale as well as iteration in simple for loops. See itertools docs for groupby pseudo-code.
If performance is your main concern, stick with the for loop.
You are performing a reverse cumulative count on contiguous groups. We can create a Numpy cumulative count function with
import numpy as np
def cumcount(a):
a = np.asarray(a)
b = np.append(False, a[:-1] != a[1:])
c = b.cumsum()
r = np.arange(len(a))
return r - np.append(0, np.flatnonzero(b))[c] + 1
and then generate our result with
a = np.array(x_list)
cumcount(a[::-1])[::-1]
array([2, 1, 1, 3, 2, 1])
I would use a generator for this kind of task because it avoids building the resulting list incrementally and can be used lazily if one wanted:
def gen(iterable): # you have to think about a better name :-)
iterable = iter(iterable)
# Get the first element, in case that fails
# we can stop right now.
try:
last_seen = next(iterable)
except StopIteration:
return
count = 1
# Go through the remaining items
for item in iterable:
if item == last_seen:
count += 1
else:
# The consecutive run finished, return the
# desired values for the run and then reset
# counter and the new item for the next run.
yield from range(count, 0, -1)
count = 1
last_seen = item
# Return the result for the last run
yield from range(count, 0, -1)
This will also work if the input cannot be reversed (certain generators/iterators cannot be reversed):
>>> x_list = (i for i in range(10)) # it's a generator despite the variable name :-)
>>> ... arans solution ...
TypeError: 'generator' object is not reversible
>>> list(gen((i for i in range(10))))
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
And it works for your input:
>>> x_list = [1, 1, 2, 3, 3, 3]
>>> list(gen(x_list))
[2, 1, 1, 3, 2, 1]
This can actually be made simpler by using itertools.groupby:
import itertools
def gen(iterable):
for _, group in itertools.groupby(iterable):
length = sum(1 for _ in group) # or len(list(group))
yield from range(length, 0, -1)
>>> x_list = [1, 1, 2, 3, 3, 3]
>>> list(gen(x_list))
[2, 1, 1, 3, 2, 1]
I also did some benchmarks and according to these Aran-Feys solution is the fastest except for long lists where piRSquareds solution wins:
This was my benchmarking setup if you want to confirm the results:
from itertools import groupby, chain
import numpy as np
def gen1(iterable):
iterable = iter(iterable)
try:
last_seen = next(iterable)
except StopIteration:
return
count = 1
for item in iterable:
if item == last_seen:
count += 1
else:
yield from range(count, 0, -1)
count = 1
last_seen = item
yield from range(count, 0, -1)
def gen2(iterable):
for _, group in groupby(iterable):
length = sum(1 for _ in group)
yield from range(length, 0, -1)
def mseifert1(iterable):
return list(gen1(iterable))
def mseifert2(iterable):
return list(gen2(iterable))
def aran(x_list):
last_num = None
result = []
for num in reversed(x_list):
if num != last_num:
counter = 1
last_num = num
else:
counter += 1
result.append(counter)
return list(reversed(result))
def jpp(x_list):
gen = (range(len(list(j)), 0, -1) for _, j in groupby(x_list))
res = list(chain.from_iterable(gen))
return res
def cumcount(a):
a = np.asarray(a)
b = np.append(False, a[:-1] != a[1:])
c = b.cumsum()
r = np.arange(len(a))
return r - np.append(0, np.flatnonzero(b))[c] + 1
def pirsquared(x_list):
a = np.array(x_list)
return cumcount(a[::-1])[::-1]
from simple_benchmark import benchmark
import random
funcs = [mseifert1, mseifert2, aran, jpp, pirsquared]
args = {2**i: [random.randint(0, 5) for _ in range(2**i)] for i in range(1, 20)}
bench = benchmark(funcs, args, "list size")
%matplotlib notebook
bench.plot()
Python 3.6.5, NumPy 1.14
Here's a simple iterative approach to achieve it using collections.Counter:
from collections import Counter
x_list = [1, 1, 2, 3, 3, 3]
x_counter, run_list = Counter(x_list), []
for x in x_list:
run_list.append(x_counter[x])
x_counter[x] -= 1
which will return you run_list as:
[2, 1, 1, 3, 2, 1]
As an alternative, here's one-liner to achieve this using list comprehension with enumerate but it is not performance efficient due to iterative usage of list.index(..):
>>> [x_list[i:].count(x) for i, x in enumerate(x_list)]
[2, 1, 1, 3, 2, 1]
You can count the consecutive equal items and then add a countdown from count-of-items to 1 to the result:
def runs(p):
old = p[0]
n = 0
q = []
for x in p:
if x == old:
n += 1
else:
q.extend(range(n, 0, -1))
n = 1
old = x
q.extend(range(n, 0, -1))
return q
(A couple of minutes later) Oh, that's the same as MSeifert's code but without the iterable aspect. This version seems to be almost as fast as the method shown by Aran-Fey.
Okay so lets say I have number , 100, and I want to split it up into 12 groups randomly and have the total from all the groups equate to 100. How would I do this, I have written a piece of code but it has a flaw.
from random import randint
total = 100
while total < 101:
for i in range(0,9):
num = randint(1,total)
print(i,"|","*"*num)
total -= num
if total <= 0:
for j in range (i,10):
print(j,"|","*"*total)
when you run it , once the total is equal to zero or less, it crashes, so it wont post the full results and I do not know how to deal fix this. I'm new to Python so my knowledge is limited.
Splitting a number n into k groups randomly is mathematically equivalent to randomly placing k-1 dividers. Thus, I would do something like this:
from random import randint
def split_randomly(n,k):
dividers = set() # guarantee no duplicate dividers
while len(dividers) < k-1:
dividers.add(randint(1,n-1))
dividers = [0] + sorted(dividers) + [n]
return [dividers[i+1] - dividers[i] for i in range(len(dividers)-1)]
For a proper random split, you could use random.sample. THe following gathers a sample of k-1 random split points from [1..n] and 0 and n and returns a list of their pairwise differences:
from random import sample
def split(n, k):
splits = [0] + sorted(sample(range(1, n), k-1)) + [n]
return [end-start for start, end in zip(splits, splits[1:])]
>>> split(10, 7)
[1, 3, 1, 1, 1, 1, 2]
>>> split(10, 7)
[1, 1, 1, 1, 1, 4, 1]
>>> split(10, 7)
[1, 2, 2, 1, 2, 1, 1]
>>> split(100, 12)
[10, 10, 8, 5, 7, 15, 1, 5, 1, 8, 26, 4]
Here is what I am trying to do. Given a number and a set of numbers, I want to partition that number into the numbers given in the set (with repetitions).
For example :
take the number 9, and the set of numbers = {1, 4, 9}.
It will yield the following partitions :
{ (1, 1, 1, 1, 1, 1, 1, 1, 1), (1, 1, 1, 1, 1, 4), (1, 4, 4), (9,)}
No other possible partitions using the set {1, 4, 9} cannot be formed to sum the number 9.
I wrote a function in Python which do the task :
S = [ 1, 4, 9, 16 ]
def partition_nr_into_given_set_of_nrs(nr , S):
lst = set()
# Build the base case :
M = [1]*(nr%S[0]) + [S[0]] * (nr //S[0])
if set(M).difference(S) == 0 :
lst.add(M)
else :
for x in S :
for j in range(1, len(M)+1):
for k in range(1, nr//x +1 ) :
if k*x == sum(M[:j]) :
lst.add( tuple(sorted([x]*k + M[j:])) )
return lst
It works correctly but I want to see some opinions about it. I'm not satisfied about the fact that it uses 3 loops and I guess that it can be improved in a more elegant way. Maybe recursion is more suited in this case. Any suggestions or corrections would be appreciated. Thanks in advance.
I would solve this using a recursive function, starting with the largest number and recursively finding solutions for the remaining value, using smaller and smaller numbers.
def partition_nr_into_given_set_of_nrs(nr, S):
nrs = sorted(S, reverse=True)
def inner(n, i):
if n == 0:
yield []
for k in range(i, len(nrs)):
if nrs[k] <= n:
for rest in inner(n - nrs[k], k):
yield [nrs[k]] + rest
return list(inner(nr, 0))
S = [ 1, 4, 9, 16 ]
print(partition_nr_into_given_set_of_nrs(9, S))
# [[9], [4, 4, 1], [4, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1]]
Of course you could also do without the inner function by changing the parameters of the function and assuming that the list is already sorted in reverse order.
If you want to limit the number of parts for large numbers, you can add an aditional parameter indicating the remaining allowed number of elements and only yield result if that number is still greater than zero.
def partition_nr_into_given_set_of_nrs(nr, S, m=10):
nrs = sorted(S, reverse=True)
def inner(n, i, m):
if m > 0:
if n == 0:
yield []
for k in range(i, len(nrs)):
if nrs[k] <= n:
for rest in inner(n - nrs[k], k, m - 1):
yield [nrs[k]] + rest
return list(inner(nr, 0, m))
Here is a solution using itertools and has two for loops so time complexity is about O(n*n) (roughly)
A little memoization applied to reshape list by removing any element that is greater than max sum needed.
Assuming you are taking sum to be max of your set (9 in this case).
sourceCode
import itertools
x = [ 1, 4, 9, 16 ]
s = []
n = 9
#Remove elements >9
x = [ i for i in x if i <= n]
for i in xrange(1,n + 1):
for j in itertools.product(x,repeat = i):
if sum(j) == n:
s.append(list(j))
#Sort each combo
s =[sorted(i) for i in s]
#group by unique combo
print list(k for k,_ in itertools.groupby(s))
Result
>>>
>>>
[[9], [1, 4, 4], [1, 1, 1, 1, 1, 4], [1, 1, 1, 1, 1, 1, 1, 1, 1]]
EDIT
You can further optimize speed (if needed) by stopping finding combo's after sum of product is > 9
e.g.
if sum(j) > n + 2:
break
Q: A run is a sequence of adjacent repeated values. Given a list, write a function to
determine the length of the longest run. For example, for the sequence [1, 2, 5, 5, 3, 1, 2, 4, 3, 2, 2, 2, 2, 3, 6, 5, 5, 6, 3, 1], the longest run is 4.
I am having trouble with this, I've written a code that finds the longest run consist of the number '2' but have yet to get the length of the run which is 4.
Here is my code so far (i've commented out a part that i was working on but don't pay attention to it):
# longestrun.py
# A function to determine the length of the longest run
# A run is a sequence of adjacent repeated values.
def longestrun(myList):
result = None
prev = None
size = 0
max_size = 0
for i in myList:
if i == prev:
size += 1
if size > max_size:
result = i
max_size = size
else:
size = 0
prev = i
return result
def main():
print("This program finds the length of the longest run within a given list.")
print("A run is a sequence of adjacent repeated values.")
myString = input("Please enter a list of objects (numbers, words, etc.) separated by
commas: ")
myList = myString.split(',')
longest_run = longestrun(myList)
print(">>>", longest_run, "<<<")
main()
Help please!!! :(((
You can do this in one line using itertools.groupby:
import itertools
max(sum(1 for _ in l) for n, l in itertools.groupby(lst))
This should work if you do not want to use itertools and imports.
a=[1, 2, 5, 5, 3, 1, 2, 4, 3, 2, 2, 2, 2, 3, 6, 5, 5, 6, 3, 1]
def longestrun(myList):
result = None
prev = None
size = 0
max_size = 0
for i in myList:
if i == prev:
print (i)
size += 1
if size > max_size:
print ('******* '+ str(max_size))
max_size = size
else:
size = 0
prev = i
print (max_size+1)
return max_size+1
longestrun(a)
Just another way of doing it:
def longestrun(myList):
sett = set()
size = 1
for ind, elm in enumerate(myList):
if ind > 0:
if elm == myList[ind - 1]:
size += 1
else:
sett.update([size])
size = 1
sett.update([size])
return max(sett)
myList = [1, 2, 5, 5, 3, 1, 2, 4, 3, 2, 2, 2, 2, 3, 6, 5, 5, 6, 3, 1]
print longestrun(myList)
def getSublists(L,n):
outL=[]
for i in range(0,len(L)-n+1):
outL.append(L[i:i+n])
return outL
def longestRun(L):
for n in range(len(L), 0, -1):
temp=getSublists(L,n)
for subL in temp:
if subL==sorted(subL):
return len(subL)
def longestrun(myList):
size = 1
max_size = 0
for i in range(len(myList)-1):
if myList[i+1] = myList[i]:
size += 1
else:
size = 1
if max_size<size:
max_size = size
return size
Remove the .split() from myList in main() and you're good to go with this.
As an update to David Robinson's answer, it is now (Python 3.4) possible to return 0 on an empty sequence (instead of raising ValueError):
import itertools
max((sum(1 for _ in l) for n, l in itertools.groupby(lst)), default=0)