Implementing 3-way quicksort - python

I'm a new to algorithms and I'm confused as to where are the errors in my code that I'm writing as an assignment. I'm trying to implement a quicksort algorithm in Python 3 that deals with equal values in the array.
Here's a quicksort function (a stands for the array):
def randomized_quick_sort(a, l, r):
if l >= r:
return
k = random.randint(l, r)
a[l], a[k] = a[k], a[l]
m1, m2 = partition3(a, l, r)
randomized_quick_sort(a, l, m1 - 1);
randomized_quick_sort(a, m2 + 1, r);
And here's my partition function:
def partition3(a, l, r):
x, j, t = a[l], l, r
for i in range(l + 1, r + 1):
if a[i] < x:
j +=1
a[i], a[j] = a[j], a[i]
elif a[i] > x:
a[i], a[t] = a[t], a[i]
t -=1
else:
j +=1
a[l], a[j] = a[j], a[l]
return j, t

You should rectify your partition function:
Here is a working example :
def partition3(a, l, r):
x, j, t = a[l], l, r
i = j
while i <= t :
if a[i] < x:
a[j], a[i] = a[i], a[j]
j += 1
elif a[i] > x:
a[t], a[i] = a[i], a[t]
t -= 1
i -= 1 # remain in the same i in this case
i += 1
return j, t

Here is a dead simple quicksort implementation in python. While it is still nlogn there are a bunch of performance optimizations that can be made. For example the partitioning into less,equal,greater can be made in a single pass instead of 3 passes of the array.
def qsort(arr):
if len(arr) <= 1: return arr
pivot = arr[0]
less = [x for x in arr if x < pivot]
equal = [x for x in arr if x == pivot]
greater = [x for x in arr if x > pivot]
return qsort(less) + equal + qsort(greater)
To make that partition happen in one pass of the array, make a helper function like follows:
def partition(arr, pivot):
less, equal, greater = [], [], []
for val in arr:
if val < pivot: less.append(val)
if val == pivot: equal.append(val)
if val > pivot: greater.append(val)
return less, equal, greater
def qsort(arr):
if len(arr) <= 1: return arr
pivot = arr[0]
less, equal, greater = partition(arr, pivot)
return qsort(less) + equal + qsort(greater)

Another implementation with for loop
def partition3(a, l, r):
x = a[l]
m1 = l
m2 = l
i = m1
for i in range(l + 1, r + 1):
if a[i] < x:
a[i], a[m1] = a[m1], a[i]
a[i], a[m2+1] = a[m2+1], a[i]
m1 += 1
m2 += 1
elif a[i] == x:
a[i], a[m2+1] = a[m2+1], a[i]
m2 += 1
return m1, m2

Related

I am not getting the correct output for merge sort in Python

I am trying to writing the algorithm for merge sort in Python, but I am not getting the correct output. The error I am getting is "list assignment index out of range", so there is some logical error.
This is my code:
def mergeSort(a, l, r):
if l < r:
mid = (l+r)//2
mergeSort(a, l, mid)
mergeSort(a, mid+1, r)
merge(a, l, mid, r)
def merge(a, l, mid, r):
b = []
i = l
j = mid + 1
k = l
while i <= mid and j <= r:
if a[i] < a[j]:
b[k] = a[i]
i = i + 1
else:
b[k] = a[j]
j = j + 1
k = k + 1
if i > mid:
while j <= r:
b[k] = a[j]
k = k + 1
j = j + 1
else:
while i <= mid:
b[k] = a[i]
k = k + 1
j = j + 1
for k in range(l, r+1):
a[k] = b[k]
a = []
n = int(input("Enter the number of elements: "))
print("Enter the elements now")
for i in range(0, n):
element = int(input())
a.append(element)
print("Given array: ", a)
mergeSort(a, 0, len(a) - 1)
print("Sorted array is: ", a)
The first thing is that the array out of bounds error is coming because you are using b = []. When you do b[k] = a[i], it will throw the out of bounds exception. You have to initialize the b array.
Secondly, there are some issues in the logic. I corrected your code as follows:
def mergeSort(a):
if len(a) > 1:
mid = len(a) // 2
L = a[:mid]
R = a[mid:]
mergeSort(L)
mergeSort(R)
merge(a, mid, L, R)
def merge(a, mid, L, R):
i = 0
j = 0
k = 0
while i < len(L) and j < len(R):
if L[i] < R[j]:
a[k] = L[i]
i = i + 1
else:
a[k] = R[j]
j = j + 1
k = k + 1
while j < len(R):
a[k] = R[j]
k = k + 1
j = j + 1
while i < len(L):
a[k] = L[i]
k = k + 1
i = i + 1
a = []
n = int(input("Enter the number of elements: "))
print("Enter the elements now")
for i in range(0, n):
element = int(input())
a.append(element)
print("Given array: ", a)
mergeSort(a)
print("Sorted array is: ", a)

Why my python version hoare partition quick sort is slower than lomuto partition?

Normally when implementing quick sort, Hoare's scheme is more efficient than Lomuto's partition scheme because of fewer swaps on average. To verify this, I try to count the swaps in all possible permutations with repeated elements. I generate a list of all possible repeatable permutations of 8 elements to test, totally 8 to the power of 8 permutations. But the result on my machine is strange, Hoare's scheme is slower even it has fewer swaps. Here is the python codes:
import copy
import time
swap_lomuto = 0
swap_hoare = 0
def hoare_partition(a, p, r):
global swap_hoare
pivot = a[(p+r)//2]
i = p - 1
j = r + 1
while True:
i = i + 1
j = j - 1
while a[i] < pivot:
i = i + 1
while a[j] > pivot:
j = j - 1
if i < j:
a[i], a[j] = a[j], a[i]
swap_hoare += 1
else:
return j
def quick_sort_hoare(a, lo, hi):
if lo < hi:
p = hoare_partition(a, lo, hi)
quick_sort_hoare(a, lo, p)
quick_sort_hoare(a, p+1, hi)
def lomuto_partition(a, p, r):
global swap_lomuto
pivot = a[r]
i = j = p
while j < r:
if a[j] < pivot:
a[i], a[j] = a[j], a[i]
i = i + 1
swap_lomuto += 1
j = j + 1
a[i], a[r] = a[r], a[i]
swap_lomuto +=1
return i
def quick_sort_lomuto(a, lo, hi):
if lo < hi:
p = lomuto_partition(a, lo, hi)
quick_sort_lomuto(a, lo, p-1)
quick_sort_lomuto(a, p+1, hi)
def gen_cases(n):
a = list(range(1, n+1))
r = [[]]
def product(A, B):
result = []
for a in A:
for b in B:
result.append(a + [b])
return result
for i in range(n):
r = product(r, a)
return r
# generate 8**8 permutations with repeated elements
# e.g. [[1, 1], [1, 2], [2, 1], [2, 2]] when n == 2
n = 8
cases1 = gen_cases(n)
cases2 = copy.deepcopy(cases1)
start = time.time()
for c in cases1:
quick_sort_lomuto(c, 0, len(c)-1)
end = time.time()
print("lomuto time: ", end-start)
print("lomuto swaps: ", swap_lomuto)
start = time.time()
for c in cases2:
quick_sort_hoare(c, 0, len(c)-1)
end = time.time()
print("hoare time: ", end-start)
print("hoare swaps: ", swap_hoare)
# assert if two methods give the same result
for i in range(len(cases1)):
for j in range(n):
assert cases1[i][j] == cases2[i][j]
The result on my machine is
lomuto time: 46.35845708847046
lomuto swaps: 199594736
hoare time: 59.4106240272522
hoare swaps: 116165488
I can't figure out where is wrong.
See if this version does any better. This may be an issue where the interpreter overhead has a greater effect then the algorithm efficiency.
If sorting random or pseudo random numbers, I've found Lomuto to be slightly faster than Hoare.
def qsort(a, lo, hi):
if(lo >= hi):
return
p = a[(lo + hi) // 2] # pivot, any a[] except a[hi]
i = lo - 1
j = hi + 1
while(1):
while(1): # while(a[++i] < p)
i += 1
if(a[i] >= p):
break
while(1): # while(a[--j] < p)
j -= 1
if(a[j] <= p):
break
if(i >= j):
break
a[i],a[j] = a[j],a[i]
qsort(a, lo, j)
qsort(a, j+1, hi)

Why this Merge Sort does not work properly?

I need to implement Merge Sort using Python 3. I coded it. But It doesn't give proper output. Can anybody check it please?
Here my code is,
def mergeSort(A, p, r):
if p < r:
q = (p + r) // 2
mergeSort(A, p, q)
mergeSort(A, q+1, r)
Merge(A, p, q, r)
def Merge(A, p, q, r):
i = 1
j = q+1
k = 0
TEMP = [0] * (r+1)
while i <= q and j <= r:
if A[i] <= A[j]:
TEMP[k] = A[i]
k += 1
i += 1
else:
TEMP[k] = A[j]
k += 1
j += 1
if (j > r) :
for t in range(0, q-1):
A[r-t] = A[q-t]
for t in range(0, k-1):
A[p+t] = TEMP[t+1]
A = [15, 16, 13, 10, 19, 18]
mergeSort(A, 0, len(A)-1)
print(A)
Thank you
The way you perform merge looks weird (to me), but I will correct on what you have so far.
1- Initialization value of i is wrong, it should be:
i = p
because i is the first element you will look in array A.
2- Initialization value of size of TEMP array is wrong, it should be:
(r - p + 1)
3- There seems a mistake in filling in TEMP array and/or replacing A array elements, here is the fixed code. I wrote a comment about the part after first while loop to indicate what needs to be done at that point.
def mergeSort(A,p,r):
if p < r:
q = (p+r)//2
mergeSort(A,p,q)
mergeSort(A,q+1,r)
Merge(A,p,q,r)
def Merge(A,p,q,r):
i = p
j = q+1
k=0
TEMP = [0]*(r - p + 1)
while i <= q and j <= r:
if A[i] <= A[j]:
TEMP[k] = A[i]
k += 1
i += 1
else:
TEMP[k] = A[j]
k += 1
j += 1
"""
There are currently 2 cases
1- i > q, means we exhausted left but there are elements in the right
2- j > r, means we exhausted right but there are elements in the left
"""
if (j > r):
# copy elements at the left side to temp
while (i <= q):
TEMP[k] = A[i]
i += 1
k += 1
else:
# copy elements at the right side to temp
while (j <= r):
TEMP[k] = A[j]
j += 1
k += 1
# replace elements in A with elements in TEMP
for t in range(k):
A[p+t] = TEMP[t]
A = [15,16,13,10,19,18]
mergeSort(A,0,len(A)-1)
print(A)
The error lies in the Merge() function.
Initialisation of i=p and not i=1
After the while loop terminates, there's a chance that either i<q or j<r. We need to accommodate those cases as well.
Size of array TEMP was incorrect.
Corrected Merge Function:
def Merge(A,p,q,r):
i = p
j = q+1
k=0
TEMP = [0]*(r-p+1)
while i <= q and j <= r:
if A[i] <= A[j]:
TEMP[k] = A[i]
k += 1
i += 1
else:
TEMP[k] = A[j]
k += 1
j += 1
while i<=q:
TEMP[k] = A[i]
k+=1
i += 1
while j<=r:
TEMP[k] = A[j]
k+=1
j += 1
for t in range (p,r+1):
A[t] = TEMP[t-p]
Note: Please try using more meaningful variable names.

I have written a code for merge sort.but I couldn't return sorted value how can I return sorted list

I have written two functions for merge sort:
def mergesort1(c, l, r):
i, j, k = 0, 0, 0
while (i < len(l)) & (j < len(r)):
if l[i] < r[j]:
c[k] = l[i]
i += 1
else:
c[k] = r[j]
j += 1
k += 1
while i < len(l):
c[k] = l[i]
i += 1
k += 1
while j < len(r):
c[k] = r[j]
j += 1
k += 1
def mergesort(c):
if len(c) < 2:
return c
l = c[0 : (len(c) // 2)]
r = c[len(c) // 2 : len(c)]
mergesort(l)
mergesort(r)
mergesort1(c, l, r)
I am unable to return sorted value of my function
You are sorting the list in-place: the result of the sorting is in the original list. If you'd like to return a new list with the result without modifying the original list:
def mergesort1(l,r):
c = [0]*(len(l) + len(r))
i,j,k=0,0,0
while (i<len(l))&(j<len(r)):
if l[i]<r[j]:
c[k]=l[i]
i +=1
else:
c[k]=r[j]
j +=1
k +=1
while i<len(l):
c[k]=l[i]
i+=1
k+=1
while j<len(r):
c[k]=r[j]
j+=1
k+=1
return c
def mergesort(c):
if len(c)<2:
return list(c)
l = mergesort(c[0:(len(c)//2)])
r = mergesort(c[len(c)//2:len(c)])
return mergesort1(l,r)
Your mergesort function sorts the list in place. You can add a return statement to facilitate method chaining but the function will always return its original argument:
def mergesort1(c, l, r):
i, j, k = 0, 0, 0
while (i < len(l)) & (j < len(r)):
if l[i] < r[j]:
c[k] = l[i]
i += 1
else:
c[k] = r[j]
j += 1
k += 1
while i < len(l):
c[k] = l[i]
i += 1
k += 1
while j < len(r):
c[k] = r[j]
j += 1
k += 1
def mergesort(c):
if len(c) >= 2:
l = c[0 : (len(c) // 2)]
r = c[len(c) // 2 : len(c)]
mergesort(l)
mergesort(r)
mergesort1(c, l, r)
return c

Finding median of two sorted arrays. Can some inequality checks be eliminated?

Working on this problem and post code, my question is whether it is safe to change this line of code
j > 0 and i < m and B[j-1] > A[i]
to
i < m and B[j-1] > A[i]
and also it is safe to change this line of code
i > 0 and j < n and A[i-1] > B[j]
to
i > 0 and A[i-1] > B[j]
I think remove the condition check of j is safe since we already making sure size of A is no bigger than size of B.
Problem statement
There are two sorted arrays nums1 and nums2 of size m and n respectively. Find the median of the two sorted arrays. The overall run time complexity should be O(log (m+n)).
Implementation
def median(A, B):
m, n = len(A), len(B)
if m > n:
A, B, m, n = B, A, n, m
if n == 0:
raise ValueError
imin, imax, half_len = 0, m, (m + n + 1) / 2
while imin <= imax:
i = (imin + imax) / 2
j = half_len - i
if j > 0 and i < m and B[j-1] > A[i]:
# i is too small, must increase it
imin = i + 1
elif i > 0 and j < n and A[i-1] > B[j]:
# i is too big, must decrease it
imax = i - 1
else:
# i is perfect
if i == 0: max_of_left = B[j-1]
elif j == 0: max_of_left = A[i-1]
else: max_of_left = max(A[i-1], B[j-1])
if (m + n) % 2 == 1:
return max_of_left
if i == m: min_of_right = B[j]
elif j == n: min_of_right = A[i]
else: min_of_right = min(A[i], B[j])
return (max_of_left + min_of_right) / 2.0
Yes I think, you can remove the condition j > 0, because
j = half_len - i
and you already check that i<m and (m + n + 1) / 2 must be bigger than m since n>=m
same for the second condition j < n. You already make sure that i>0, which ensures that j can at most be (2n+1)/2 - 1 which is smaller than n and thus automatically satisfies your condition

Categories

Resources