How can I compare count of performing? - python

import random
import time
def insertion_Sort(A):
if len(A) == 1 :
return A
else :
S = [A[0]]
for i in range(1,len(A)):
j = i-1
while j >= 0:
if A[i] > S[j]:
S.insert((j+1),A[i])
break
else :
j = j-1
if j==-1:
S.insert(0,A[i])
return S
def quick_Sort(A):
if not A:
return []
else:
pivot = random.randint(0, len(A) - 1)
pivot_index = A[pivot]
L = quick_Sort([l for i,l in enumerate(A)
if l <= pivot_index and i != pivot])
R = quick_Sort([r for r in A if r > pivot_index])
return L + [pivot_index] + R
RN = [random.randrange(0,10000) for k in range(100)]
This is the code about quick_sort and insertion_sort.
I want to compare two things, insertion_sort(RN)'s count of performing and quick_sort(RN)'s count of performing.
How can I compare these things?

There is a python module called timeit which is exactly what you are looking for. You can use it as follows:
from timeit import timeit
print(timeit('insertion_Sort(params)','from __main__ import insertion_Sort',number=100))
print(timeit('quick_Sort(params)','from __main__ import quick_Sort',number=100))
And you replace params with the value of your parameter A and number=100 with the number of times you want it to be tested.

Related

Error messages while implementing mergesort related to timeit and the index range

I need to implement mergesort in python and compare the execution time given a few lists of different lengths consisting of random numbers. More precisely, I have to find if the execution time is linear, quadratic or something else, by means of a plot. We know that mergesort has a running time of $O(NlogN).$ Unfortunately, I get error messages related to the execution time part but I also get the following:
File "/home/myname/file.py", line 58, in merge
S[k] = S2[j]
IndexError: list assignment index out of range
Here is the code. Thanks for any suggestions.
import numpy as np
import random
import timeit
import matplotlib.pyplot as plt
def mergesort(S):
n = S.size
if n == 1:
return S
else:
S1, S2 = split(S)
S1sorted = mergesort(S1)
S2sorted = mergesort(S2)
Ssorted = merge(S1sorted, S2sorted)
return Ssorted
def split(S):
l = len(S)//2
S1 = S[:l]
S2 = S[l:]
return S1, S2
def merge(S1, S2):
i = 0
j = 0
k = 0
S = []
while i < len(S1) and j < len(S2):
if S1[i] < S2[j]:
S[k] = S1[i]
i += 1
else:
S[k] = S2[j]
j += 1
k += 1
while i < len(S1):
S[k] = S1[i]
i += 1
k += 1
while j < len(S2):
S[k] = S2[j]
j += 1
k += 1
return S
if __name__ == '__main__':
random.seed(5)
d = [np.random.rand(10**i) for i in range(10)]
print("List of lists:\n",d)
time_list = []
length_list = []
for s in d:
execution_time = timeit.timeit(stmt = 'mergesort(s)', setup='from __main__ import mergesort,s')
time_list.append(execution_time)
length_list.append(len(s))
print("The list s:\n", s)
print("The execution time for the list",s,"is:\n", timeit.timeit(stmt = 'mergesort(s)', setup='from __main__ import mergesort,s'))
plt.scatter(length_list, time_list)
plt.xlabel("N")
plt.ylabel("Execution time for a list of length N")
plt.show()
In that specific line S[k] = S1[i] you are trying to assign to the k-ish element of your list a value. However this element does not exist. Try using S.append(S1[i]).
issue is in function merge you have define the size of list S to zero ie S = []
and in the below code you are setting the value for index k. means for empty array you are assigning values which shouldn't be.
All you need to do is make list S equal to len(S1) + len(S2)`.
so below is right code for merge function
def merge(S1, S2):
i = 0
j = 0
k = 0
S = [None for _ in range(len(S1)+len(S2))] # < -- change happen here
while i < len(S1) and j < len(S2):
if S1[i] < S2[j]:
S[k] = S1[i]
i += 1
else:
S[k] = S2[j]
j += 1
k += 1
while i < len(S1):
S[k] = S1[i]
i += 1
k += 1
while j < len(S2):
S[k] = S2[j]
j += 1
k += 1
return S
EDIT:
what does None and _ means below
S = [None for _ in range(len(S1)+len(S2))]
_ here it is just a throwaway variable, which is not assigned anywhere and not need, just used to make list comprehension
None used to Assign as default value to the final sorted list,when we are initalising/declaring the final list ie S
this is same as S = [n] * None

Is there a more efficient way to compare two lists in python than O(m*n)?

I am trying to find a method for comparing two lists in python in a more efficient way than what I think is the current O(m*n) runtime. Right now I have a brute force approach of iterating each item in m and comparing it to n but is anything else possible? I have tried maybe sorting the lists first for maybe something faster but I am kind of stuck on whether anything else could work here.
In my function i take each item in m and compare it to n and count the number of times the item in m is greater than the item in n.
n = [1,3,7]
m = [2,9]
def comparison(n,m):
counter = 0
for i in m:
for j in n:
if i >= j:
counter += 1
return counter
Here's how you could use a binary search approach after sorting the target list:
from bisect import bisect_right
n = [1,3,7,2]
m = [2,9]
n.sort()
counter = sum(bisect_right(n,value) for value in m)
print(counter) # 6
This should correspond to O((n+m) x log(n)) if n is not known to be sorted. If n is always provided in sorted order, then you don't need your function to sort it and you will get O(m x log(n)) time complexity.
I wrote a code for you to test which one runs faster using the built-in "timeit" library. You can test others' advice using the same structure. There is the code:
import timeit
import numpy as np
n = [1,3,7]
m = [9,2]
my_code = '''
def comparison(n,m):
counter = 0
for i in n:
for j in m:
if i >= j:
counter += 1
return counter
'''
mysetup = "import numpy as np"
my_code2 = '''
def comparison_with_numpy(n,m):
x = np.array(n)
y = np.array(m)
smaller = np.array([x[i] > y[:] for i in range(x.shape[0])]).astype('int')
return sum(smaller)[0]
'''
my_code3 = '''
def sort_first(n,m):
sorted(n)
sorted(m)
count = 0
if len(n) > len(m):
iteration = len(n)
else:
iteration = len(m)
for _ in range(iteration):
if n != []:
y = n.pop(0)
if m != []:
x = m.pop(0)
if y > x:
count += 1
return count
'''
def comparison(n,m):
counter = 0
for i in n:
for j in m:
if i >= j:
counter += 1
print(counter)
return counter
def comparison_with_numpy(n,m):
x = np.array(n)
y = np.array(m)
smaller = np.array([x[i] > y[:] for i in range(x.shape[0])]).astype('int')
return sum(smaller)[0]
def sort_first(n,m):
sorted(n)
sorted(m)
count = 0
if len(n) > len(m):
iteration = len(n)
else:
iteration = len(m)
for _ in range(iteration):
if n != []:
y = n.pop(0)
if m != []:
x = m.pop(0)
if y > x:
count += 1
return count
def main():
print('comparison /w sort\t\t',timeit.timeit(stmt = my_code3,number=10000))
print('comparison\t\t',timeit.timeit(stmt = my_code,number=10000))
print('comparison with numpy\t\t',timeit.timeit(setup = mysetup
,stmt = my_code2
,number=10000))
if __name__ == "__main__":
main()

Duplicate pairs in an array

Given an array A with zero index and N integers find equal elements with different positions in the array. Pair of indexes (P,Q) such that 0 <= P < Q < N such that A[P] = A[Q].
My idea:
def function(arr, n) :
count = 0
arr.sort()
i = 0
while i < (n-1) :
if (arr[i] == arr[i + 1]) :
count += 1
i = i + 2
else :
i += 1
return count
Two questions:
How do I avoid counting elements whose first indices are not smaller than the second indices?
How do I build a function where the input is only the array? (So not (arr, n))
What you can do is similar to this:
This one is the naive approach:
def function(arr) :
count = 0
n = len(arr)
i = 0
for i in range(n):
for j in range(i+1,n):
if arr[i]==arr[j]:
count+=1
return count
This one is more optimized approach you can try:
def function(arr) :
mp = dict()
n = len(arr)
for i in range(n):
if arr[i] in mp.keys():
mp[arr[i]] += 1
else:
mp[arr[i]] = 1
ans = 0
for it in mp:
count = mp[it]
ans += (count * (count - 1)) // 2
return ans
You can use collections.Counter to count the number of occurrences of every integer,
then use math.comb with n=count and k=2 to get the number of such pairs for every integer, and simply sum them:
from collections import Counter
from math import comb
def function(arr):
return sum(comb(count, 2) for num,count in Counter(arr).items())
print(function([1,2,3,6,3,6,3,2]))
The reason math.comb(count,2) is exactly the number of pairs is that any 2 elements out of the count you choose, regardless of their order, are a single pair: the former one is P and the latter is Q.
EDIT: Added timeit benchmakrs:
Here's a full example you can test to compare the performance of both methods:
from timeit import timeit
from random import randint
from collections import Counter
from math import comb
def with_comb(arr):
return sum(comb(count, 2) for num,count in Counter(arr).items())
def with_loops(arr):
mp = dict()
n = len(arr)
for i in range(n):
if arr[i] in mp.keys():
mp[arr[i]] += 1
else:
mp[arr[i]] = 1
ans = 0
for it in mp:
count = mp[it]
ans += (count * (count - 1)) // 2
return ans
a = [randint(1,1000) for _ in range(10000)]
time1 = timeit('with_loops(a)', globals=globals(), number=1000)
time2 = timeit('with_comb(a)', globals=globals(), number=1000)
print(time1)
print(time2)
print(time1/time2)
Output (on my laptop):
2.9549962
0.8175686999999998
3.6143705110041524

Mean, Median, and Mode in Python

I'm doing a statistical problem set in Python on Hackerrank. When I input a list of values to calculate the mode. It shows me a runtime error.
# Enter your code here. Read input from STDIN. Print output to STDOUT
N = int(input())
X = list(map(int, input().split()))
X.sort()
# Find the mean
mean = sum(X) / N
print(mean)
# Find the median
if N % 2 == 0:
median = (X[N//2] + X[N//2 - 1]) / 2
else:
median = X[N//2]
print(median)
# Find the mode
occurrence = list([1 for _ in range(N)])
for i in range(N):
for j in range(i+1, N):
if X[i] == X[j]:
occurrence += 1
if max(occurrence) == 1:
mode = min(X)
else:
mode = X[occurrence[max(occurrence)]]
print(mode)
When I take a 2500 input for X, it just shows me a runtime error.
This is the link to the test case
enter link description here
I use this when looking for mean, median, and mode
import numpy as np
from scipy import stats
n = int(input())
arr = list(map(int, input().split()))
print(np.mean(arr))
print(np.median(arr))
print(stats.mode(arr)[0][0])
You are trying to add 1 to occurence which is of list type:
Also, I'm sure this may be a copying mistake but your loop is incorrect:
for i in range(N):
for j in range(i+1, N):
if X[i] == X[j]:
occurrence += 1
# It will be
for i in range(N):
for j in range(i+1, N):
if X[i] == X[j]:
occurrence += 1
Then you might wanna change your occurrence to something like:
occurrence[i] += 1
# from
occurrence += 1
Hope this helps
I have run your code, here is the compile problem:
for i in range(N):
for j in range(i+1, N):
if X[i] == X[j]:
occurrence += 1
I think your meaning is if inside two for, like:
for i in range(N):
for j in range(i + 1, N):
if X[i] == X[j]:
occurrence += 1
but occurrence is list here, can't plus by one, I think you means to count the occurrence of int, and output the max one? you can use defaultdict or Counter here, but defaultdict is only in one loops.
# import collections
# import operator
# Find the mode
occurrence = collections.Counter(X)
# occurrence = collections.defaultdict(int)
#
# for i in range(N):
# occurrence[X[i]] += 1
mode = max(occurrence.items(), key=operator.itemgetter(1))[0]
print(mode)
Here is a Mean, Median, and Mode class.
import statistics
from collections import Counter
def median(list):
n = len(list)
s = sorted(list)
return (sum(s[n//2-1:n//2+1])/2.0, s[n//2])[n % 2] if n else None
def mean(list):
if len(list) == 0:
return 0
list.sort()
total = 0
for number in list:
total += number
return total / len(list)
def mode(list):
counter = Counter(list)
if len(counter) > 1:
possible_mode, next_highest = counter.most_common(2)
if possible_mode[1] > next_highest[1]:
return possible_mode[0]
return "None"

Recursive formula in python for recursive sigma how to?

I recently asked this question and got the first answer. I'm trying to put this into python code. This is what I have, but I keep getting 0 as the answer.
def f(n, k, s):
ans = 0
for j in range(1, min({k,s}) + 1):
print j
if (n == 1):
if (k >= s):
ans = ans + 1
elif (k < s):
ans = ans + 0
elif (s > n):
ans = ans + 0
elif (n*k < s):
ans = ans + 0
else:
ans = ans + f(n-1,j,s-j)
return ans
print f(10, 12, 70)
What is wrong with my code? What do I need to change? I don't know what's wrong. Please help. Thanks!
Your code is way too complex. You can write an almost one-to-one transcription of the answer you got on math exchange:
def f(n, k, s):
if n == 1:
return int(k >= s)
# or: 1 if k >=s else 0
return sum(f(n-1, j, s-j) for j in range(1, min(k, s)+1))
# to make it faster:
#return sum(f(n-1, j, s-j) for j in range(1, min(k, s)+1) if n*k >= s)
The problem in your code is that you put the base-case checking inside the loop, when it should be outside:
def f(n, k, s):
ans = 0
if n == 1:
return int(k >= s)
for j in range(1, min({k,s}) + 1):
print j
if n*k >= s:
ans += f(n-1,j,s-j)
return ans
With both implementations I get 12660 as result for f(10, 12, 70).
I don't know why yours doesn't work, but here's an implementation that does, which IMO is MUCH more readable:
from itertools import permutations
def f(n, k, s):
if k > s:
k = s-1
count = 0
sum_perms = []
number_list = []
for i in range(1,k):
for j in range(1,k,i):
number_list.append(i)
for perm in permutations(number_list, n):
if sum(perm) == s and perm not in sum_perms:
sum_perms.append(perm[:])
count += 1
return sum_perms, count
It's a lot slower than the recursion technique though :-(
itertools is amazing.

Categories

Resources