I'm doing a statistical problem set in Python on Hackerrank. When I input a list of values to calculate the mode. It shows me a runtime error.
# Enter your code here. Read input from STDIN. Print output to STDOUT
N = int(input())
X = list(map(int, input().split()))
X.sort()
# Find the mean
mean = sum(X) / N
print(mean)
# Find the median
if N % 2 == 0:
median = (X[N//2] + X[N//2 - 1]) / 2
else:
median = X[N//2]
print(median)
# Find the mode
occurrence = list([1 for _ in range(N)])
for i in range(N):
for j in range(i+1, N):
if X[i] == X[j]:
occurrence += 1
if max(occurrence) == 1:
mode = min(X)
else:
mode = X[occurrence[max(occurrence)]]
print(mode)
When I take a 2500 input for X, it just shows me a runtime error.
This is the link to the test case
enter link description here
I use this when looking for mean, median, and mode
import numpy as np
from scipy import stats
n = int(input())
arr = list(map(int, input().split()))
print(np.mean(arr))
print(np.median(arr))
print(stats.mode(arr)[0][0])
You are trying to add 1 to occurence which is of list type:
Also, I'm sure this may be a copying mistake but your loop is incorrect:
for i in range(N):
for j in range(i+1, N):
if X[i] == X[j]:
occurrence += 1
# It will be
for i in range(N):
for j in range(i+1, N):
if X[i] == X[j]:
occurrence += 1
Then you might wanna change your occurrence to something like:
occurrence[i] += 1
# from
occurrence += 1
Hope this helps
I have run your code, here is the compile problem:
for i in range(N):
for j in range(i+1, N):
if X[i] == X[j]:
occurrence += 1
I think your meaning is if inside two for, like:
for i in range(N):
for j in range(i + 1, N):
if X[i] == X[j]:
occurrence += 1
but occurrence is list here, can't plus by one, I think you means to count the occurrence of int, and output the max one? you can use defaultdict or Counter here, but defaultdict is only in one loops.
# import collections
# import operator
# Find the mode
occurrence = collections.Counter(X)
# occurrence = collections.defaultdict(int)
#
# for i in range(N):
# occurrence[X[i]] += 1
mode = max(occurrence.items(), key=operator.itemgetter(1))[0]
print(mode)
Here is a Mean, Median, and Mode class.
import statistics
from collections import Counter
def median(list):
n = len(list)
s = sorted(list)
return (sum(s[n//2-1:n//2+1])/2.0, s[n//2])[n % 2] if n else None
def mean(list):
if len(list) == 0:
return 0
list.sort()
total = 0
for number in list:
total += number
return total / len(list)
def mode(list):
counter = Counter(list)
if len(counter) > 1:
possible_mode, next_highest = counter.most_common(2)
if possible_mode[1] > next_highest[1]:
return possible_mode[0]
return "None"
Related
I am trying to find a method for comparing two lists in python in a more efficient way than what I think is the current O(m*n) runtime. Right now I have a brute force approach of iterating each item in m and comparing it to n but is anything else possible? I have tried maybe sorting the lists first for maybe something faster but I am kind of stuck on whether anything else could work here.
In my function i take each item in m and compare it to n and count the number of times the item in m is greater than the item in n.
n = [1,3,7]
m = [2,9]
def comparison(n,m):
counter = 0
for i in m:
for j in n:
if i >= j:
counter += 1
return counter
Here's how you could use a binary search approach after sorting the target list:
from bisect import bisect_right
n = [1,3,7,2]
m = [2,9]
n.sort()
counter = sum(bisect_right(n,value) for value in m)
print(counter) # 6
This should correspond to O((n+m) x log(n)) if n is not known to be sorted. If n is always provided in sorted order, then you don't need your function to sort it and you will get O(m x log(n)) time complexity.
I wrote a code for you to test which one runs faster using the built-in "timeit" library. You can test others' advice using the same structure. There is the code:
import timeit
import numpy as np
n = [1,3,7]
m = [9,2]
my_code = '''
def comparison(n,m):
counter = 0
for i in n:
for j in m:
if i >= j:
counter += 1
return counter
'''
mysetup = "import numpy as np"
my_code2 = '''
def comparison_with_numpy(n,m):
x = np.array(n)
y = np.array(m)
smaller = np.array([x[i] > y[:] for i in range(x.shape[0])]).astype('int')
return sum(smaller)[0]
'''
my_code3 = '''
def sort_first(n,m):
sorted(n)
sorted(m)
count = 0
if len(n) > len(m):
iteration = len(n)
else:
iteration = len(m)
for _ in range(iteration):
if n != []:
y = n.pop(0)
if m != []:
x = m.pop(0)
if y > x:
count += 1
return count
'''
def comparison(n,m):
counter = 0
for i in n:
for j in m:
if i >= j:
counter += 1
print(counter)
return counter
def comparison_with_numpy(n,m):
x = np.array(n)
y = np.array(m)
smaller = np.array([x[i] > y[:] for i in range(x.shape[0])]).astype('int')
return sum(smaller)[0]
def sort_first(n,m):
sorted(n)
sorted(m)
count = 0
if len(n) > len(m):
iteration = len(n)
else:
iteration = len(m)
for _ in range(iteration):
if n != []:
y = n.pop(0)
if m != []:
x = m.pop(0)
if y > x:
count += 1
return count
def main():
print('comparison /w sort\t\t',timeit.timeit(stmt = my_code3,number=10000))
print('comparison\t\t',timeit.timeit(stmt = my_code,number=10000))
print('comparison with numpy\t\t',timeit.timeit(setup = mysetup
,stmt = my_code2
,number=10000))
if __name__ == "__main__":
main()
I'm a new at programming, I like solving this euler questions and I know there are solutions for this problem but it's not about the problem at all actually.
So, i managed to create a working function for finding example: 33. triangular number. It works but i couldn't manage to properly desing my while loop. I wanted to make it like, it starts from first triangular checks it's divisors make list of it's divisors, checks the length of the divisors, because problem wants "What is the value of the first triangle number to have over five hundred divisors?" . But i never managed to work the while loop. Thank you for reading.
nums = [1]
triangles = [1]
divisors = []
def triangularcreator(x):
if x == 1:
return 1
n = 1
sum = 0
while n!=0:
n += 1
nums.append(n)
for i in range(len(nums)):
sum += nums[i]
triangles.append(sum)
sum = 0
if x == len(triangles):
n = 0
return triangles[-1]
counter = 1
while True:
for i in range(1, triangularcreator(counter) + 1):
if triangularcreator(counter) % i == 0:
divisors.append(i)
if len(divisors) == 500:
print(triangularcreator(counter))
break
counter +=1
divisors.clear()
You should try to change a few things, starting with calculating just once the value of triangularcreator(counter) and assigning this value to a variable that you can use in different points of your code.
Second, you create a loop which will be calculate always triangularcreator(1). At the end of each iteration you increase the value of counter+=1, but then at the beginign of the new iteration you assignt it again value 1, so it will not progress as you expect. Try this few things:
counter = 1
while True:
triangle = triangularcreator(counter)
for i in range(1, triangle + 1):
if triangle % i == 0:
divisors.append(i)
if len(divisors) == 500:
print(triangle )
break
counter +=1
Also these two arrays nums = [1], triangles = [1] should be declared and initialized inside the def triangularcreator. Otherwise you would be appending elements in each iteration
Edit: I believe it is better to give you my own answer to the problem, since you are doing some expensive operations which will make code to run for a long time. Try this solution:
import numpy as np
factor_num = 0
n = 0
def factors(n):
cnt = 0
# You dont need to iterate through all the numbers from 1 to n
# Just to the sqrt, and multiply by two.
for i in range(1,int(np.sqrt(n)+1)):
if n % i == 0:
cnt += 1
# If n is perfect square, it will exist a middle number
if (np.sqrt(n)).is_integer():
return (cnt*2)-1
else:
return (cnt*2)-1
while factor_num < 500:
# Here you generate the triangle, by summing all elements from 1 to n
triangle = sum(list(range(n)))
# Here you calculate the number of factors of the triangle
factor_num = factors(triangle)
n += 1
print(triangle)
Turns out that both of your while loop are infinite either in triangularcreatorin the other while loop:
nums = [1]
triangles = [1]
divisors = []
def triangularcreator(x):
if x == 1:
return 1
n = 1
sum = 0
while n:
n += 1
nums.append(n)
for i in range(len(nums)):
sum += nums[i]
triangles.append(sum)
sum = 0
if len(triangles) >= x:
return triangles[-1]
return triangles[-1]
counter = 1
while True:
check = triangularcreator(counter)
for i in range(1, check + 1):
if check % i == 0:
divisors.append(i)
if len(divisors) >= 500:
tr = triangularcreator(counter)
print(tr)
break
counter +=1
Solution
Disclaimer: This is not my solution but is #TamoghnaChowdhury, as it seems the most clean one in the web. I wanted to solve it my self but really run out of time today!
import math
def count_factors(num):
# One and itself are included now
count = 2
for i in range(2, int(math.sqrt(num)) + 1):
if num % i == 0:
count += 2
return count
def triangle_number(num):
return (num * (num + 1) // 2)
def divisors_of_triangle_number(num):
if num % 2 == 0:
return count_factors(num // 2) * count_factors(num + 1)
else:
return count_factors((num + 1) // 2) * count_factors(num)
def factors_greater_than_triangular_number(n):
x = n
while divisors_of_triangle_number(x) <= n:
x += 1
return triangle_number(x)
print('The answer is', factors_greater_than_triangular_number(500))
Given an array A with zero index and N integers find equal elements with different positions in the array. Pair of indexes (P,Q) such that 0 <= P < Q < N such that A[P] = A[Q].
My idea:
def function(arr, n) :
count = 0
arr.sort()
i = 0
while i < (n-1) :
if (arr[i] == arr[i + 1]) :
count += 1
i = i + 2
else :
i += 1
return count
Two questions:
How do I avoid counting elements whose first indices are not smaller than the second indices?
How do I build a function where the input is only the array? (So not (arr, n))
What you can do is similar to this:
This one is the naive approach:
def function(arr) :
count = 0
n = len(arr)
i = 0
for i in range(n):
for j in range(i+1,n):
if arr[i]==arr[j]:
count+=1
return count
This one is more optimized approach you can try:
def function(arr) :
mp = dict()
n = len(arr)
for i in range(n):
if arr[i] in mp.keys():
mp[arr[i]] += 1
else:
mp[arr[i]] = 1
ans = 0
for it in mp:
count = mp[it]
ans += (count * (count - 1)) // 2
return ans
You can use collections.Counter to count the number of occurrences of every integer,
then use math.comb with n=count and k=2 to get the number of such pairs for every integer, and simply sum them:
from collections import Counter
from math import comb
def function(arr):
return sum(comb(count, 2) for num,count in Counter(arr).items())
print(function([1,2,3,6,3,6,3,2]))
The reason math.comb(count,2) is exactly the number of pairs is that any 2 elements out of the count you choose, regardless of their order, are a single pair: the former one is P and the latter is Q.
EDIT: Added timeit benchmakrs:
Here's a full example you can test to compare the performance of both methods:
from timeit import timeit
from random import randint
from collections import Counter
from math import comb
def with_comb(arr):
return sum(comb(count, 2) for num,count in Counter(arr).items())
def with_loops(arr):
mp = dict()
n = len(arr)
for i in range(n):
if arr[i] in mp.keys():
mp[arr[i]] += 1
else:
mp[arr[i]] = 1
ans = 0
for it in mp:
count = mp[it]
ans += (count * (count - 1)) // 2
return ans
a = [randint(1,1000) for _ in range(10000)]
time1 = timeit('with_loops(a)', globals=globals(), number=1000)
time2 = timeit('with_comb(a)', globals=globals(), number=1000)
print(time1)
print(time2)
print(time1/time2)
Output (on my laptop):
2.9549962
0.8175686999999998
3.6143705110041524
I took following codility demo task
Write a function:
def solution(A)
that, given an array A of N integers, returns the smallest positive integer (greater than 0) that does not occur in A.
For example, given A = [1, 3, 6, 4, 1, 2], the function should return 5.
Given A = [1, 2, 3], the function should return 4.
Given A = [−1, −3], the function should return 1.
Write an efficient algorithm for the following assumptions:
N is an integer within the range [1..100,000];
each element of array A is an integer within the range [−1,000,000..1,000,000].
My Solution
def solution(A):
# write your code in Python 3.6
l = len(A)
B = []
result = 0
n = 0
for i in range(l):
if A[i] >=1:
B.append(A[i])
if B ==[]:
return(1)
else:
B.sort()
B = list(dict.fromkeys(B))
n = len(B)
for j in range(n-1):
if B[j+1]>B[j]+1:
result = (B[j]+1)
if result != 0:
return(result)
else:
return(B[n-1]+1)
Although I get correct output for all inputs I tried but my score was just 22%. Could somebody please highlight where I am going wrong.
Python solution with O(N) time complexity and O(N) space complexity:
def solution(A):
arr = [0] * 1000001
for a in A:
if a>0:
arr[a] = 1
for i in range(1, 1000000+1):
if arr[i] == 0:
return i
My main idea was to:
creat a zero-initialized "buckets" for all the positive possibilities.
Iterate over A. Whenever you meet a positive number, mark it's bucket as visited (1).
Iterate over the "buckets" and return the first zero "bucket".
def solution(A):
s = set(A)
for x in range(1,100002):
if x not in s:
return x
pass
And GOT 100%
# you can write to stdout for debugging purposes, e.g.
# print("this is a debug message")
def solution(A):
# write your code in Python 3.6
i = 1;
B = set(A);
while True:
if i not in B:
return i;
i+=1;
My Javascript solution. The solution is to sort the array and compare the adjacent elements of the array. Complexity is O(N)
function solution(A) {
// write your code in JavaScript (Node.js 8.9.4)
A.sort((a, b) => a - b);
if (A[0] > 1 || A[A.length - 1] < 0 || A.length <= 2) return 1;
for (let i = 1; i < A.length - 1; ++i) {
if (A[i] > 0 && (A[i + 1] - A[i]) > 1) {
return A[i] + 1;
}
}
return A[A.length - 1] + 1;
}
in Codility you must predict correctly others inputs, not only the sample ones and also get a nice performance. I've done this way:
from collections import Counter
def maior_menos_zero(A):
if A < 0:
return 1
else:
return 1 if A != 1 else 2
def solution(A):
if len(A) > 1:
copia = set(A.copy())
b = max(A)
c = Counter(A)
if len(c) == 1:
return maior_menos_zero(A[0])
elif 1 not in copia:
return 1
else:
for x in range(1,b+2):
if x not in copia:
return x
else:
return maior_menos_zero(A[0])
Got it 100%. If is an array A of len(A) == 1, function maior_menos_zero will be called. Moreover, if it's an len(A) > 1 but its elements are the same (Counter), then function maior_menos_zero will be called again. Finally, if 1 is not in the array, so 1 is the smallest positive integer in it, otherwise 1 is in it and we shall make a for X in range(1,max(A)+2) and check if its elements are in A, futhermore, to save time, the first ocurrence of X not in A is the smallest positive integer.
My solution (100% acceptance):
def solution(nums):
nums_set = set()
for el in nums:
if el > 0 and el not in nums_set:
nums_set.add(el)
sorted_set = sorted(nums_set)
if len(sorted_set) == 0:
return 1
if sorted_set[0] != 1:
return 1
for i in range(0, len(sorted_set) - 1, 1):
diff = sorted_set[i + 1] - sorted_set[i]
if diff >= 2:
return sorted_set[i] + 1
return sorted_set[-1] + 1
I tried the following, and got 100% score
def solution(A):
A_set = set(A)
for x in range(10**5 + 1, 1):
if x not in A_set:
return x
else:
return 10**5 + 1
This solution is an easy approach!
def solution(A):
... A.sort()
... maxval = A[-1]
... nextmaxval = A[-2]
... if maxval < 0:
... while maxval<= 0:
... maxval += 1
... return maxval
... else:
... if nextmaxval + 1 in A:
... return maxval +1
... else:
... return nextmaxval + 1
This is my solution
def solution(A):
# write your code in Python 3.8.10
new = set(A)
max_ = abs(max(A)) #use the absolute here for negative maximum value
for num in range(1,max_+2):
if num not in new:
return num
Try this, I am assuming the list is not sorted but if it is sorted you can remove the number_list = sorted(number_list) to make it a little bit faster.
def get_smallest_positive_integer(number_list):
if all(number < 0 for number in number_list) or 1 not in number_list:
#checks if numbers in list are all negative integers or if 1 is not in list
return 1
else:
try:
#get the smallest number in missing integers
number_list = sorted(number_list) # remove if list is already sorted by default
return min(x for x in range(number_list[0], number_list[-1] + 1) if x not in number_list and x != 0)
except:
#if there is no missing number in list get largest number + 1
return max(number_list) + 1
print(get_smallest_positive_integer(number_list))
input:
number_list = [1,2,3]
output:
>>4
input:
number_list = [-1,-2,-3]
output:
>>1
input:
number_list = [2]
output:
>>1
input:
number_list = [12,1,23,3,4,5,61,7,8,9,11]
output:
>>2
input:
number_list = [-1,3,2,1]
output:
>>4
I think this should be as easy as starting at 1 and checking which number first fails to appear.
def solution(A):
i = 1
while i in A:
i += 1
return i
You can also consider putting A's elements into a set (for better performance on the search), but I'm not sure that it's worth for this case.
Update:
I've been doing some tests with the numbers OP gave (numbers from negative million to positive million and 100000 elements).
100000 elements:
Linear Search: 0.003s
Set Search: 0.017s
1000000 elements (extra test):
Linear Search: 0.8s
Set Search: 2.58s
For example:
input: A = [ 6 4 3 -5 0 2 -7 1 ]
output: 5
Since 5 is the smallest positive integer that does not occur in the array.
I have written two solutions to that problem. The first one is good but I don't want to use any external libraries + its O(n)*log(n) complexity. The second solution "In which I need your help to optimize it" gives an error when the input is chaotic sequences length=10005 (with minus).
Solution 1:
from itertools import count, filterfalse
def minpositive(a):
return(next(filterfalse(set(a).__contains__, count(1))))
Solution 2:
def minpositive(a):
count = 0
b = list(set([i for i in a if i>0]))
if min(b, default = 0) > 1 or min(b, default = 0) == 0 :
min_val = 1
else:
min_val = min([b[i-1]+1 for i, x in enumerate(b) if x - b[i - 1] >1], default=b[-1]+1)
return min_val
Note: This was a demo test in codility, solution 1 got 100% and
solution 2 got 77 %.
Error in "solution2" was due to:
Performance tests ->
medium chaotic sequences length=10005 (with minus) got 3 expected
10000
Performance tests -> large chaotic + many -1, 1, 2, 3 (with
minus) got 5 expected 10000
Testing for the presence of a number in a set is fast in Python so you could try something like this:
def minpositive(a):
A = set(a)
ans = 1
while ans in A:
ans += 1
return ans
Fast for large arrays.
def minpositive(arr):
if 1 not in arr: # protection from error if ( max(arr) < 0 )
return 1
else:
maxArr = max(arr) # find max element in 'arr'
c1 = set(range(2, maxArr+2)) # create array from 2 to max
c2 = c1 - set(arr) # find all positive elements outside the array
return min(c2)
I have an easy solution. No need to sort.
def solution(A):
s = set(A)
m = max(A) + 2
for N in range(1, m):
if N not in s:
return N
return 1
Note: It is 100% total score (Correctness & Performance)
def minpositive(A):
"""Given an list A of N integers,
returns the smallest positive integer (greater than 0)
that does not occur in A in O(n) time complexity
Args:
A: list of integers
Returns:
integer: smallest positive integer
e.g:
A = [1,2,3]
smallest_positive_int = 4
"""
len_nrs_list = len(A)
N = set(range(1, len_nrs_list+2))
return min(N-set(A)) #gets the min value using the N integers
This solution passes the performance test with a score of 100%
def solution(A):
n = sorted(i for i in set(A) if i > 0) # Remove duplicates and negative numbers
if not n:
return 1
ln = len(n)
for i in range(1, ln + 1):
if i != n[i - 1]:
return i
return ln + 1
def solution(A):
B = set(sorted(A))
m = 1
for x in B:
if x == m:
m+=1
return m
Continuing on from Niroj Shrestha and najeeb-jebreel, added an initial portion to avoid iteration in case of a complete set. Especially important if the array is very large.
def smallest_positive_int(A):
sorted_A = sorted(A)
last_in_sorted_A = sorted_A[-1]
#check if straight continuous list
if len(sorted_A) == last_in_sorted_A:
return last_in_sorted_A + 1
else:
#incomplete list, iterate to find the smallest missing number
sol=1
for x in sorted_A:
if x == sol:
sol += 1
else:
break
return sol
A = [1,2,7,4,5,6]
print(smallest_positive_int(A))
This question doesn't really need another answer, but there is a solution that has not been proposed yet, that I believe to be faster than what's been presented so far.
As others have pointed out, we know the answer lies in the range [1, len(A)+1], inclusively. We can turn that into a set and take the minimum element in the set difference with A. That's a good O(N) solution since set operations are O(1).
However, we don't need to use a Python set to store [1, len(A)+1], because we're starting with a dense set. We can use an array instead, which will replace set hashing by list indexing and give us another O(N) solution with a lower constant.
def minpositive(a):
# the "set" of possible answer - values_found[i-1] will tell us whether i is in a
values_found = [False] * (len(a)+1)
# note any values in a in the range [1, len(a)+1] as found
for i in a:
if i > 0 and i <= len(a)+1:
values_found[i-1] = True
# extract the smallest value not found
for i, found in enumerate(values_found):
if not found:
return i+1
We know the final for loop always finds a value that was not marked, because it has one more element than a, so at least one of its cells was not set to True.
def check_min(a):
x= max(a)
if x-1 in a:
return x+1
elif x <= 0:
return 1
else:
return x-1
Correct me if i'm wrong but this works for me.
def solution(A):
clone = 1
A.sort()
for itr in range(max(A) + 2):
if itr not in A and itr >= 1:
clone = itr
break
return clone
print(solution([2,1,4,7]))
#returns 3
def solution(A):
n = 1
for i in A:
if n in A:
n = n+1
else:
return n
return n
def not_in_A(a):
a=sorted(a)
if max(a)<1:
return(1)
for i in range(0,len(a)-1):
if a[i+1]-a[i]>1:
out=a[i]+1
if out==0 or out<1:
continue
return(out)
return(max(a)+1)
mark and then find the first one that didn't find
nums = [ 6, 4, 3, -5, 0, 2, -7, 1 ]
def check_min(nums):
marks = [-1] * len(nums)
for idx, num in enumerate(nums):
if num >= 0:
marks[num] = idx
for idx, mark in enumerate(marks):
if mark == -1:
return idx
return idx + 1
I just modified the answer by #najeeb-jebreel and now the function gives an optimal solution.
def solution(A):
sorted_set = set(sorted(A))
sol = 1
for x in sorted_set:
if x == sol:
sol += 1
else:
break
return sol
I reduced the length of set before comparing
a=[1,222,3,4,24,5,6,7,8,9,10,15,2,3,3,11,-1]
#a=[1,2,3,6,3]
def sol(a_array):
a_set=set()
b_set=set()
cnt=1
for i in a_array:
#In order to get the greater performance
#Checking if element is greater than length+1
#then it can't be output( our result in solution)
if i<=len(a) and i >=1:
a_set.add(i) # Adding array element in set
b_set.add(cnt) # Adding iterator in set
cnt=cnt+1
b_set=b_set.difference(a_set)
if((len(b_set)) > 1):
return(min(b_set))
else:
return max(a_set)+1
sol(a)
def solution(A):
nw_A = sorted(set(A))
if all(i < 0 for i in nw_A):
return 1
else:
ans = 1
while ans in nw_A:
ans += 1
if ans not in nw_A:
return ans
For better performance if there is a possibility to import numpy package.
def solution(A):
import numpy as np
nw_A = np.unique(np.array(A))
if np.all((nw_A < 0)):
return 1
else:
ans = 1
while ans in nw_A:
ans += 1
if ans not in nw_A:
return ans
def solution(A):
# write your code in Python 3.6
min_num = float("inf")
set_A = set(A)
# finding the smallest number
for num in set_A:
if num < min_num:
min_num = num
# print(min_num)
#if negative make positive
if min_num < 0 or min_num == 0:
min_num = 1
# print(min_num)
# if in set add 1 until not
while min_num in set_A:
min_num += 1
return min_num
Not sure why this is not 100% in correctness. It is 100% performance
def solution(A):
arr = set(A)
N = set(range(1, 100001))
while N in arr:
N += 1
return min(N - arr)
solution([1, 2, 6, 4])
#returns 3