Mergesort with Python

Mergesort with Python - python

I couldn't find any working Python 3.3 mergesort algorithm codes, so I made one myself. Is there any way to speed it up? It sorts 20,000 numbers in about 0.3-0.5 seconds
def msort(x):
result = []
if len(x) < 2:
return x
mid = int(len(x)/2)
y = msort(x[:mid])
z = msort(x[mid:])
while (len(y) > 0) or (len(z) > 0):
if len(y) > 0 and len(z) > 0:
if y[0] > z[0]:
result.append(z[0])
z.pop(0)
else:
result.append(y[0])
y.pop(0)
elif len(z) > 0:
for i in z:
result.append(i)
z.pop(0)
else:
for i in y:
result.append(i)
y.pop(0)
return result

The first improvement would be to simplify the three cases in the main loop: Rather than iterating while some of the sequence has elements, iterate while both sequences have elements. When leaving the loop, one of them will be empty, we don't know which, but we don't care: We append them at the end of the result.
def msort2(x):
if len(x) < 2:
return x
result = [] # moved!
mid = int(len(x) / 2)
y = msort2(x[:mid])
z = msort2(x[mid:])
while (len(y) > 0) and (len(z) > 0):
if y[0] > z[0]:
result.append(z[0])
z.pop(0)
else:
result.append(y[0])
y.pop(0)
result += y
result += z
return result
The second optimization is to avoid popping the elements. Rather, have two indices:
def msort3(x):
if len(x) < 2:
return x
result = []
mid = int(len(x) / 2)
y = msort3(x[:mid])
z = msort3(x[mid:])
i = 0
j = 0
while i < len(y) and j < len(z):
if y[i] > z[j]:
result.append(z[j])
j += 1
else:
result.append(y[i])
i += 1
result += y[i:]
result += z[j:]
return result
A final improvement consists in using a non recursive algorithm to sort short sequences. In this case I use the built-in sorted function and use it when the size of the input is less than 20:
def msort4(x):
if len(x) < 20:
return sorted(x)
result = []
mid = int(len(x) / 2)
y = msort4(x[:mid])
z = msort4(x[mid:])
i = 0
j = 0
while i < len(y) and j < len(z):
if y[i] > z[j]:
result.append(z[j])
j += 1
else:
result.append(y[i])
i += 1
result += y[i:]
result += z[j:]
return result
My measurements to sort a random list of 100000 integers are 2.46 seconds for the original version, 2.33 for msort2, 0.60 for msort3 and 0.40 for msort4. For reference, sorting all the list with sorted takes 0.03 seconds.

Code from MIT course. (with generic cooperator )
import operator
def merge(left, right, compare):
result = []
i, j = 0, 0
while i < len(left) and j < len(right):
if compare(left[i], right[j]):
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
while i < len(left):
result.append(left[i])
i += 1
while j < len(right):
result.append(right[j])
j += 1
return result
def mergeSort(L, compare=operator.lt):
if len(L) < 2:
return L[:]
else:
middle = int(len(L) / 2)
left = mergeSort(L[:middle], compare)
right = mergeSort(L[middle:], compare)
return merge(left, right, compare)

def merge_sort(x):
if len(x) < 2:return x
result,mid = [],int(len(x)/2)
y = merge_sort(x[:mid])
z = merge_sort(x[mid:])
while (len(y) > 0) and (len(z) > 0):
if y[0] > z[0]:result.append(z.pop(0))
else:result.append(y.pop(0))
result.extend(y+z)
return result

You can initialise the whole result list in the top level call to mergesort:
result = [0]*len(x) # replace 0 with a suitable default element if necessary.
# or just copy x (result = x[:])
Then for the recursive calls you can use a helper function to which you pass not sublists, but indices into x. And the bottom level calls read their values from x and write into result directly.
That way you can avoid all that poping and appending which should improve performance.

Take my implementation
def merge_sort(sequence):
"""
Sequence of numbers is taken as input, and is split into two halves, following which they are recursively sorted.
"""
if len(sequence) < 2:
return sequence
mid = len(sequence) // 2 # note: 7//2 = 3, whereas 7/2 = 3.5
left_sequence = merge_sort(sequence[:mid])
right_sequence = merge_sort(sequence[mid:])
return merge(left_sequence, right_sequence)
def merge(left, right):
"""
Traverse both sorted sub-arrays (left and right), and populate the result array
"""
result = []
i = j = 0
while i < len(left) and j < len(right):
if left[i] < right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
result += left[i:]
result += right[j:]
return result
# Print the sorted list.
print(merge_sort([5, 2, 6, 8, 5, 8, 1]))

As already said, l.pop(0) is a O(len(l)) operation and must be avoided, the above msort function is O(n**2). If efficiency matter, indexing is better but have cost too. The for x in l is faster but not easy to implement for mergesort : iter can be used instead here. Finally, checking i < len(l) is made twice because tested again when accessing the element : the exception mechanism (try except) is better and give a last improvement of 30% .
def msort(l):
if len(l)>1:
t=len(l)//2
it1=iter(msort(l[:t]));x1=next(it1)
it2=iter(msort(l[t:]));x2=next(it2)
l=[]
try:
while True:
if x1<=x2: l.append(x1);x1=next(it1)
else : l.append(x2);x2=next(it2)
except:
if x1<=x2: l.append(x2);l.extend(it2)
else: l.append(x1);l.extend(it1)
return l

Loops like this can probably be speeded up:
for i in z:
result.append(i)
z.pop(0)
Instead, simply do this:
result.extend(z)
Note that there is no need to clean the contents of z because you won't use it anyway.

A longer one that counts inversions and adheres to the sorted interface. It's trivial to modify this to make it a method of an object that sorts in place.
import operator
class MergeSorted:
def __init__(self):
self.inversions = 0
def __call__(self, l, key=None, reverse=False):
self.inversions = 0
if key is None:
self.key = lambda x: x
else:
self.key = key
if reverse:
self.compare = operator.gt
else:
self.compare = operator.lt
dest = list(l)
working = [0] * len(l)
self.inversions = self._merge_sort(dest, working, 0, len(dest))
return dest
def _merge_sort(self, dest, working, low, high):
if low < high - 1:
mid = (low + high) // 2
x = self._merge_sort(dest, working, low, mid)
y = self._merge_sort(dest, working, mid, high)
z = self._merge(dest, working, low, mid, high)
return (x + y + z)
else:
return 0
def _merge(self, dest, working, low, mid, high):
i = 0
j = 0
inversions = 0
while (low + i < mid) and (mid + j < high):
if self.compare(self.key(dest[low + i]), self.key(dest[mid + j])):
working[low + i + j] = dest[low + i]
i += 1
else:
working[low + i + j] = dest[mid + j]
inversions += (mid - (low + i))
j += 1
while low + i < mid:
working[low + i + j] = dest[low + i]
i += 1
while mid + j < high:
working[low + i + j] = dest[mid + j]
j += 1
for k in range(low, high):
dest[k] = working[k]
return inversions
msorted = MergeSorted()
Uses
>>> l = [5, 2, 3, 1, 4]
>>> s = msorted(l)
>>> s
[1, 2, 3, 4, 5]
>>> msorted.inversions
6
>>> l = ['e', 'b', 'c', 'a', 'd']
>>> d = {'a': 10,
... 'b': 4,
... 'c': 2,
... 'd': 5,
... 'e': 9}
>>> key = lambda x: d[x]
>>> s = msorted(l, key=key)
>>> s
['c', 'b', 'd', 'e', 'a']
>>> msorted.inversions
5
>>> l = [5, 2, 3, 1, 4]
>>> s = msorted(l, reverse=True)
>>> s
[5, 4, 3, 2, 1]
>>> msorted.inversions
4
>>> l = ['e', 'b', 'c', 'a', 'd']
>>> d = {'a': 10,
... 'b': 4,
... 'c': 2,
... 'd': 5,
... 'e': 9}
>>> key = lambda x: d[x]
>>> s = msorted(l, key=key, reverse=True)
>>> s
['a', 'e', 'd', 'b', 'c']
>>> msorted.inversions
5

Here is the CLRS Implementation:
def merge(arr, p, q, r):
n1 = q - p + 1
n2 = r - q
right, left = [], []
for i in range(n1):
left.append(arr[p + i])
for j in range(n2):
right.append(arr[q + j + 1])
left.append(float('inf'))
right.append(float('inf'))
i = j = 0
for k in range(p, r + 1):
if left[i] <= right[j]:
arr[k] = left[i]
i += 1
else:
arr[k] = right[j]
j += 1
def merge_sort(arr, p, r):
if p < r:
q = (p + r) // 2
merge_sort(arr, p, q)
merge_sort(arr, q + 1, r)
merge(arr, p, q, r)
if __name__ == '__main__':
test = [5, 2, 4, 7, 1, 3, 2, 6]
merge_sort(test, 0, len(test) - 1)
print test
Result:
[1, 2, 2, 3, 4, 5, 6, 7]

Many have answered this question correctly, this is just another solution (although my solution is very similar to Max Montana) but I have few differences for implementation:
let's review the general idea here before we get to the code:
Divide the list into two roughly equal halves.
Sort the left half.
Sort the right half.
Merge the two sorted halves into one sorted list.
here is the code (tested with python 3.7):
def merge(left,right):
result=[]
i,j=0,0
while i<len(left) and j<len(right):
if left[i] < right[j]:
result.append(left[i])
i+=1
else:
result.append(right[j])
j+=1
result.extend(left[i:]) # since we want to add each element and not the object list
result.extend(right[j:])
return result
def merge_sort(data):
if len(data)==1:
return data
middle=len(data)//2
left_data=merge_sort(data[:middle])
right_data=merge_sort(data[middle:])
return merge(left_data,right_data)
data=[100,5,200,3,100,4,8,9]
print(merge_sort(data))

here is another solution
class MergeSort(object):
def _merge(self,left, right):
nl = len(left)
nr = len(right)
result = [0]*(nl+nr)
i=0
j=0
for k in range(len(result)):
if nl>i and nr>j:
if left[i] <= right[j]:
result[k]=left[i]
i+=1
else:
result[k]=right[j]
j+=1
elif nl==i:
result[k] = right[j]
j+=1
else: #nr>j:
result[k] = left[i]
i+=1
return result
def sort(self,arr):
n = len(arr)
if n<=1:
return arr
left = self.sort(arr[:n/2])
right = self.sort(arr[n/2:] )
return self._merge(left, right)
def main():
import random
a= range(100000)
random.shuffle(a)
mr_clss = MergeSort()
result = mr_clss.sort(a)
#print result
if __name__ == '__main__':
main()
and here is run time for list with 100000 elements:
real 0m1.073s
user 0m1.053s
sys 0m0.017s

def merge(l1, l2, out=[]):
if l1==[]: return out+l2
if l2==[]: return out+l1
if l1[0]<l2[0]: return merge(l1[1:], l2, out+l1[0:1])
return merge(l1, l2[1:], out+l2[0:1])
def merge_sort(l): return (lambda h: l if h<1 else merge(merge_sort(l[:h]), merge_sort(l[h:])))(len(l)/2)
print(merge_sort([1,4,6,3,2,5,78,4,2,1,4,6,8]))

def merge(x):
if len(x) == 1:
return x
else:
mid = int(len(x) / 2)
l = merge(x[:mid])
r = merge(x[mid:])
i = j = 0
result = []
while i < len(l) and j < len(r):
if l[i] < r[j]:
result.append(l[i])
i += 1
else:
result.append(r[j])
j += 1
result += l[i:]
result += r[j:]
return result

A little late the the party, but I figured I'd throw my hat in the ring as my solution seems to run faster than OP's (on my machine, anyway):
# [Python 3]
def merge_sort(arr):
if len(arr) < 2:
return arr
half = len(arr) // 2
left = merge_sort(arr[:half])
right = merge_sort(arr[half:])
out = []
li = ri = 0 # index of next element from left, right halves
while True:
if li >= len(left): # left half is exhausted
out.extend(right[ri:])
break
if ri >= len(right): # right half is exhausted
out.extend(left[li:])
break
if left[li] < right[ri]:
out.append(left[li])
li += 1
else:
out.append(right[ri])
ri += 1
return out
This doesn't have any slow pop()s, and once one of the half-arrays is exhausted, it immediately extends the other one onto the output array rather than starting a new loop.
I know it's machine dependent, but for 100,000 random elements (above merge_sort() vs. Python built-in sorted()):
merge sort: 1.03605 seconds
Python sort: 0.045 seconds
Ratio merge / Python sort: 23.0229

def mergeSort(alist):
print("Splitting ",alist)
if len(alist)>1:
mid = len(alist)//2
lefthalf = alist[:mid]
righthalf = alist[mid:]
mergeSort(lefthalf)
mergeSort(righthalf)
i=0
j=0
k=0
while i < len(lefthalf) and j < len(righthalf):
if lefthalf[i] < righthalf[j]:
alist[k]=lefthalf[i]
i=i+1
else:
alist[k]=righthalf[j]
j=j+1
k=k+1
while i < len(lefthalf):
alist[k]=lefthalf[i]
i=i+1
k=k+1
while j < len(righthalf):
alist[k]=righthalf[j]
j=j+1
k=k+1
print("Merging ",alist)
alist = [54,26,93,17,77,31,44,55,20]
mergeSort(alist)
print(alist)

Glad there are tons of answers, I hope you find this one to be clear, concise, and fast.
Thank you
import math
def merge_array(ar1, ar2):
c, i, j= [], 0, 0
while i < len(ar1) and j < len(ar2):
if ar1[i] < ar2[j]:
c.append(ar1[i])
i+=1
else:
c.append(ar2[j])
j+=1
return c + ar1[i:] + ar2[j:]
def mergesort(array):
n = len(array)
if n == 1:
return array
half_n = math.floor(n/2)
ar1, ar2 = mergesort(array[:half_n]), mergesort(array[half_n:])
return merge_array(ar1, ar2)

After implementing different versions of solution,
I finally made a trade-off to achieve these goals based on CLRS version.
Goal
not using list.pop() to iterate values
not creating a new list for saving result, modifying the original one instead
not using float('inf') as sentinel values
def mergesort(A, p, r):
if(p < r):
q = (p+r)//2
mergesort(A, p, q)
mergesort(A, q+1, r)
merge(A, p, q, r)
def merge(A, p, q, r):
L = A[p:q+1]
R = A[q+1:r+1]
i = 0
j = 0
k = p
while i < len(L) and j < len(R):
if(L[i] < R[j]):
A[k] = L[i]
i += 1
else:
A[k] = R[j]
j += 1
k += 1
if i < len(L):
A[k:r+1] = L[i:]
if __name__ == "__main__":
items = [6, 2, 9, 1, 7, 3, 4, 5, 8]
mergesort(items, 0, len(items)-1)
print items
assert items == [1, 2, 3, 4, 5, 6, 7, 8, 9]
Reference
[1] Book: CLRS
[2] https://github.com/gzc/CLRS/blob/master/C02-Getting-Started/exercise_code/merge-sort.py

Try this recursive version
def mergeList(l1,l2):
l3=[]
Tlen=len(l1)+len(l2)
inf= float("inf")
for i in range(Tlen):
print "l1= ",l1[0]," l2= ",l2[0]
if l1[0]<=l2[0]:
l3.append(l1[0])
del l1[0]
l1.append(inf)
else:
l3.append(l2[0])
del l2[0]
l2.append(inf)
return l3
def main():
l1=[2,10,7,6,8]
print mergeSort(breaklist(l1))
def breaklist(rawlist):
newlist=[]
for atom in rawlist:
print atom
list_atom=[atom]
newlist.append(list_atom)
return newlist
def mergeSort(inputList):
listlen=len(inputList)
if listlen ==1:
return inputList
else:
newlist=[]
if listlen % 2==0:
for i in range(listlen/2):
newlist.append(mergeList(inputList[2*i],inputList[2*i+1]))
else:
for i in range((listlen+1)/2):
if 2*i+1<listlen:
newlist.append(mergeList(inputList[2*i],inputList[2*i+1]))
else:
newlist.append(inputList[2*i])
return mergeSort(newlist)
if __name__ == '__main__':
main()

def merge(a,low,mid,high):
l=a[low:mid+1]
r=a[mid+1:high+1]
#print(l,r)
k=0;i=0;j=0;
c=[0 for i in range(low,high+1)]
while(i<len(l) and j<len(r)):
if(l[i]<=r[j]):
c[k]=(l[i])
k+=1
i+=1
else:
c[k]=(r[j])
j+=1
k+=1
while(i<len(l)):
c[k]=(l[i])
k+=1
i+=1
while(j<len(r)):
c[k]=(r[j])
k+=1
j+=1
#print(c)
a[low:high+1]=c
def mergesort(a,low,high):
if(high>low):
mid=(low+high)//2
mergesort(a,low,mid)
mergesort(a,mid+1,high)
merge(a,low,mid,high)
a=[12,8,3,2,9,0]
mergesort(a,0,len(a)-1)
print(a)

If you change your code like that it'll be working.
def merge_sort(arr):
if len(arr) < 2:
return arr[:]
middle_of_arr = len(arr) / 2
left = arr[0:middle_of_arr]
right = arr[middle_of_arr:]
left_side = merge_sort(left)
right_side = merge_sort(right)
return merge(left_side, right_side)
def merge(left_side, right_side):
result = []
while len(left_side) > 0 or len(right_side) > 0:
if len(left_side) > 0 and len(right_side) > 0:
if left_side[0] <= right_side[0]:
result.append(left_side.pop(0))
else:
result.append(right_side.pop(0))
elif len(left_side) > 0:
result.append(left_side.pop(0))
elif len(right_side) > 0:
result.append(right_side.pop(0))
return result
arr = [6, 5, 4, 3, 2, 1]
# print merge_sort(arr)
# [1, 2, 3, 4, 5, 6]

The following code pops at the end (efficient enough) and sorts inplace despite returning as well.
def mergesort(lis):
if len(lis) > 1:
left, right = map(lambda l: list(reversed(mergesort(l))), (lis[::2], lis[1::2]))
lis.clear()
while left and right:
lis.append(left.pop() if left[-1] < right[-1] else right.pop())
lis.extend(left[::-1])
lis.extend(right[::-1])
return lis

This is very similar to the "MIT" solution and a couple others above, but answers the question in a little more "Pythonic" manner by passing references to the left and right partitions instead of positional indexes, and by using a range in the for loop with slice notation to fill in the sorted array:
def merge_sort(array):
n = len(array)
if n > 1:
mid = n//2
left = array[0:mid]
right = array[mid:n]
print(mid, left, right, array)
merge_sort(left)
merge_sort(right)
merge(left, right, array)
def merge(left, right, array):
array_length = len(array)
right_length = len(right)
left_length = len(left)
left_index = right_index = 0
for array_index in range(0, array_length):
if right_index == right_length:
array[array_index:array_length] = left[left_index:left_length]
break
elif left_index == left_length:
array[array_index:array_length] = right[right_index:right_length]
break
elif left[left_index] <= right[right_index]:
array[array_index] = left[left_index]
left_index += 1
else:
array[array_index] = right[right_index]
right_index += 1
array = [99,2,3,3,12,4,5]
arr_len = len(array)
merge_sort(array)
print(array)
assert len(array) == arr_len
This solution finds the left and right partitions using Python's handy // operator, and then passes the left, right, and array references to the merge function, which in turn rebuilds the original array in place.
The trick is in the cleanup: when you have reached the end of either the left or the right partition, the original array is filled in with whatever is left over in the other partition.

#here is my answer using two function one for merge and another for divide and
#conquer
l=int(input('enter range len'))
c=list(range(l,0,-1))
print('list before sorting is',c)
def mergesort1(c,l,r):
i,j,k=0,0,0
while (i<len(l))&(j<len(r)):
if l[i]<r[j]:
c[k]=l[i]
i +=1
else:
c[k]=r[j]
j +=1
k +=1
while i<len(l):
c[k]=l[i]
i+=1
k+=1
while j<len(r):
c[k]=r[j]
j+=1
k+=1
return c
def mergesort(c):
if len(c)<2:
return c
else:
l=c[0:(len(c)//2)]
r=c[len(c)//2:len(c)]
mergesort(l)
mergesort(r)
return mergesort1(c,l,r)

def merge(arr, p, q, r):
left = arr[p:q + 1]
right = arr[q + 1:r + 1]
left.append(float('inf'))
right.append(float('inf'))
i = j = 0
for k in range(p, r + 1):
if left[i] <= right[j]:
arr[k] = left[i]
i += 1
else:
arr[k] = right[j]
j += 1
def init_func(function):
def wrapper(*args):
a = []
if len(args) == 1:
a = args[0] + []
function(a, 0, len(a) - 1)
else:
function(*args)
return a
return wrapper
#init_func
def merge_sort(arr, p, r):
if p < r:
q = (p + r) // 2
merge_sort(arr, p, q)
merge_sort(arr, q + 1, r)
merge(arr, p, q, r)
if __name__ == "__main__":
test = [5, 4, 3, 2, 1]
print(merge_sort(test))
Result would be
[1, 2, 3, 4, 5]

from run_time import run_time
from random_arr import make_arr
def merge(arr1: list, arr2: list):
temp = []
x, y = 0, 0
while len(arr1) and len(arr2):
if arr1[0] < arr2[0]:
temp.append(arr1[0])
x += 1
arr1 = arr1[x:]
elif arr1[0] > arr2[0]:
temp.append(arr2[0])
y += 1
arr2 = arr2[y:]
else:
temp.append(arr1[0])
temp.append(arr2[0])
x += 1
y += 1
arr1 = arr1[x:]
arr2 = arr2[y:]
if len(arr1) > 0:
temp += arr1
if len(arr2) > 0:
temp += arr2
return temp
#run_time
def merge_sort(arr: list):
total = len(arr)
step = 2
while True:
for i in range(0, total, step):
arr[i:i + step] = merge(arr[i:i + step//2], arr[i + step//2:i + step])
step *= 2
if step > 2 * total:
return arr
arr = make_arr(20000)
merge_sort(arr)
# run_time is 0.10300588607788086

Here is my attempt at the recursive merge_sort function in python. Note, this is my first python class and my first encounter with this problem so please bear with me if my code is rough, but it works.
def merge_sort(S):
temp = []
if len(S) < 2:
return S
split = len(S) // 2
left = merge_sort(S[:split])
right = merge_sort(S[split:])
finale = temp + merge(left, right)
return finale
def merge(left, right):
holder = []
while len(left) > 0 and len(right) > 0:
if left[0] < right[0]:
holder.append(left[0])
del left[0]
elif left[0] > right[0]:
holder.append(right[0])
del right[0]
if len(left) > 0:
holder.extend(left)
elif len(right) > 0:
holder.extend(right)
return holder

def splitArray(s):
return s[:len(s)//2], s[len(s)//2:]
# the idea here is i+j should sum to n as you increment i and j,
# but once out of bound, the next item of a or b is infinity
# therefore, the comparison will always switch to the other array
def merge(a, b, n):
result = [0] * n
a = a + [float('inf')]
b = b + [float('inf')]
result = [0] * n
i, j = 0, 0
for k in range(0, n):
if a[i] < b[j]:
result[k] = a[i]
i+=1
else:
result[k] = b[j]
j+=1
return result
def mergeSort(items):
n = len(items)
baseCase = []
if n == 0:
return baseCase
if n == 1:
baseCase.append(items[0])
return baseCase
if n == 2:
if items[0] < items[1]:
baseCase.append(items[0])
baseCase.append(items[1])
return baseCase
else:
baseCase.append(items[1])
baseCase.append(items[0])
return baseCase
left, right = splitArray(items)
sortedLeft = mergeSort(left)
sortedRight = mergeSort(right)
return merge(sortedLeft,sortedRight,n)
# Driver code to test above
arr = [12, 11, 13, 5, 6, 7]
n = len(arr)
print ("Given array is")
for i in range(n):
print ("%d" %arr[i]),
arr = mergeSort(arr)
print ("\n\nSorted array is")
for i in range(n):
print ("%d" %arr[i]),

def merge_sort(l):
if len(l) == 1:
if len(n)> 0:
for i in range(len(n)):
if n[i] > l[0]:
break
else:
i = i+1
n.insert(i, l[0])
else:
n.append(l[0])
else:
p = len(l)//2
a = l[:p]
b = l[p:]
merge_sort(a)
merge_sort(b)
m = [3,5,2,4,1]
n = []
merge_sort(m)
print(n)

first divide the array until it's size grater than 1(which is base condition) and do it by recursive function.
compare the left & right sub array value & place those value in your array.
check any item remain in left & right array...
def merge_sort(my_array):
base condition for recursively dividing the array...
if len(my_array) > 1:
middle = len(my_array) // 2
left_array = my_array[:middle]
right_array = my_array[middle:]
#recursive function
merge_sort(left_array)
merge_sort(right_array)
i = 0 # index of left array...
j = 0 # index of right array...
k = 0 # index of new array...
# conquer the array and sorted like below condition
while i < len(left_array) and j < len(right_array):
if left_array[i] < right_array[j]:
my_array[k] = left_array[i]
i += 1
else:
my_array[k] = right_array[j]
j += 1
k += 1
# checking any item remain in left sub array....
while i < len(left_array):
my_array[k] = left_array[i]
i += 1
j += 1
# checking any item remain in right sub array....
while j < len(right_array):
my_array[k] = right_array[j]
j += 1
k += 1
my_array = [11, 31, 7, 41, 101, 56, 77, 2]
print("Input Array: ",my_array)
merge_sort(my_array)
print("Sorted Array: ",my_array)

I would suggest to leverage Python3's protocols instead of passing a comparator here, there and everywhere.
Also a simple set of tests based Knuth's shuffle would be a decent idea to verify implementation correctness:
from abc import abstractmethod
from collections import deque
from typing import Deque, Protocol, TypeVar, List
from random import randint
class Comparable(Protocol):
"""Protocol for annotating comparable types."""
#abstractmethod
def __lt__(self: 'T', x: 'T') -> bool:
pass
#abstractmethod
def __gt__(self: 'T', x: 'T') -> bool:
pass
T = TypeVar('T', bound=Comparable)
def _swap(items: List[T], i: int, j: int):
tmp = items[i]
items[i] = items[j]
items[j] = tmp
def knuths_shuffle(items: List[T]):
for i in range(len(items) - 1, 1, -1):
j = randint(0, i)
_swap(items, i, j)
return items
def merge(items: List[T], low: int, mid: int, high: int):
left_q = deque(items[low: mid])
right_q = deque(items[mid: high])
def put(q: Deque[T]):
nonlocal low
items[low] = q.popleft()
low += 1
while left_q and right_q:
put(left_q if left_q[0] < right_q[0] else right_q)
def put_all(q: Deque[T]):
while q:
put(q)
put_all(left_q)
put_all(right_q)
return items
def mergesort(items: List[T], low: int, high: int):
if high - low <= 1:
return
mid = (low + high) // 2
mergesort(items, low, mid)
mergesort(items, mid, high)
merge(items, low, mid, high)
def sort(items: List[T]) -> List[T]:
"""
>>> for i in range(100):
... rand = knuths_shuffle(list(range(100)))
... assert sorted(rand) == sort(rand)
"""
mergesort(items, 0, len(items))
return items

Related

Implementing quick sort in Python - list index/infinite loop bugs

I'm trying to implement quicksort in Python. I used the pseudocode found on Wikipedia here as a basis. That had do...while loops in it, so I used the
while True:
# code
if condition:
break
construct to simulate it.
I have tried a lot of things like incrementing/decrementing differently or adding checks for indexes out of bounds (which the pseudocode does not have because it is not supposed to be necessary using the Hoare algorithm, at least I think so...). I always get either an IndexError or an infinite loop.
Could someone show me where I have gone wrong in implementing the algo please?
class QuickSort:
def sort(self, data):
if data is None:
raise TypeError()
n = len(data)
if n < 2:
return data
self._sort(data, 0, len(data) - 1)
def _sort(self, A, lo, hi):
if lo >= 0 and hi >= 0:
if lo < hi:
p = self._partition(A, lo, hi)
self._sort(A, lo, p)
self._sort(A, p + 1, hi)
def _partition(self, A, lo, hi):
pivot = A[(hi + lo) // 2]
i = lo - 1
j = hi + 1
while True:
while True:
i += 1
if A[i] < pivot:
break
while True:
j -= 1
if A[j] > pivot:
break
if i >= j:
return j
A[i], A[j] = A[j], A[i]
EDIT: The reason was simple, when using the python version of do...while you have to invert the condition!

It's need to change < and > to >= and <= in the loops:
class QuickSort:
def sort(self, data):
if data is None:
raise TypeError()
n = len(data)
if n < 2:
return data
self._sort(data, 0, len(data) - 1)
def _sort(self, A, lo, hi):
if lo >= 0 and hi >= 0:
if lo < hi:
p = self._partition(A, lo, hi)
self._sort(A, lo, p)
self._sort(A, p + 1, hi)
def _partition(self, A, lo, hi):
pivot = A[(hi + lo) // 2]
i = lo - 1
j = hi + 1
while True:
while True:
i += 1
if A[i] >= pivot: # >= instead of <
break
while True:
j -= 1
if A[j] <= pivot: # <= instead of >
break
if i >= j:
return j
A[i], A[j] = A[j], A[i]
lst = [9, 8, 7, 6, 5, 4, 1, 2]
QuickSort().sort(lst)
print(lst) # [1, 2, 4, 5, 6, 7, 8, 9]

Why is this merge sort implementation not giving the correct answer?

I have 2 merge sort implementation in Python that looks the same but I have no idea why 1 of them isn't working.
def merge(left, right):
result = []
i,j = 0,0
while i < len(left) and j < len(right):
if left[i] < right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
while (i < len(left)):
result.append(left[i])
i += 1
while (j < len(right)):
result.append(right[j])
j += 1
return result
def merge_sort(L):
if len(L) < 2:
return L[:]
else:
middle = len(L)//2
left = merge_sort(L[:middle])
right = merge_sort(L[middle:])
return merge(left, right)
Running merge_sort([1,3,5,2,4,6]) gives [1, 2, 3, 4, 5, 6] which is the correct answer.
However
def merge1(X,Y):
result = []
n = len(X)
m = len(Y)
i = 0
j = 0
while i < n and j < m:
if X[i] < Y[j]:
result.append(X[i])
i += 1
else:
result.append(Y[j])
j += 1
while (i < n):
result.append(X[i])
i += 1
while (j < n):
result.append(Y[j])
j += 1
return result
def merge_sort1(L):
if len(L) <2 :
return L[:]
else:
middle = len(L) // 2
X = merge_sort1(L[:middle])
Y = merge_sort1(L[middle:])
return merge1(X,Y)
running merge_sort1([1,3,5,2,4,6]) gives a strange answer [1, 2, 3, 4] which is wrong.
But I have no idea why the 2nd attempt gives an incorrect answer when it looks the same with the 1st attempt.
What is the problem and why did this happen?

You wrote while (j < n): instead of while (j < m):

Merge sort: how to sort in a decreasing way?

I'm interested in merge sorting. I wrote the merge step to sort in ascending order and it works (see code below). But when I try to adapt my code so that it sorts in a decreasing way, I can't do it. I wanted to reverse the i and j but it doesn't seem to work. Does anyone have any ideas?
def fusion(A,low,mid,high):
aux = [2,7,8,3,6,9]
i = low
j = mid+1
for k in range(low,high,1):
if i>mid:
A[k] = aux[j]
j = j+1
elif j > high:
A[k] = aux[i]
i = i+1
elif aux[i] <= aux[j]:
A[k] = aux[i]
i = i+1
else:
A[k] = aux[j]
j = j+1
return A
fusion([2,7,8,3,6,9],0,2,5)
The output is 2,3,6,7,8,9 but I would like 9,8,7,6,3,2

Here is the "reversed" variant:
def fusion(A, low, mid, high):
aux = A[::-1] # reverse A
i = low
j = mid + 1
for k in range(low, high, 1):
if i > mid:
aux[-k - 1] = A[j]
j += 1
elif j > high:
aux[-k - 1] = A[i]
i += 1
elif A[i] <= A[j]:
aux[-k - 1] = A[i]
i += 1
else:
aux[-k - 1] = A[j]
j += 1
return aux
print(fusion([2, 7, 8, 3, 6, 9], low=0, mid=2, high=5))
Or just reverse range():
def fusion(A, low, mid, high):
aux = A[::-1] # reverse A
i = low
j = mid + 1
for k in range(high, low, -1):
if i > mid:
aux[k] = A[j]
j += 1
elif j > high:
aux[k] = A[i]
i += 1
elif A[i] <= A[j]:
aux[k] = A[i]
i += 1
else:
aux[k] = A[j]
j += 1
return aux
Or just ...
list(reversed(fusion([2, 7, 8, 3, 6, 9], 0, 2, 5)))

Python Quicksort implementation

I tried to implement the recursive quicksort in Python, but it doesn't work. I know that there is the problem that the array doesn't get sorted because the pivot is always higher than i, which results in the problem that i is always equals to m.
def partition(array):
pivot = array[-1]
m = 0
for i in range(len(array) - 1):
if array[i] < pivot:
array[i], array[m] = array[m], array[i]
m += 1
else:
continue
array[m], array[len(array)-1] = array[len(array)-1], array[m]
return m
def quicksort(array):
if len(array) > 1:
m = partition(array)
quicksort(array[:m])
quicksort(array[m+1:])
return array
def main():
testarray = [3,6,2,4,5,1,9,8,7,10,14]
print(quicksort(testarray))
if __name__ == '__main__':
main()

Two things. Firstly, you forgot to return array when it's of length 1, and secondly you aren't actually modifying array before returning. This will work.
def quicksort(array):
if len(array) > 1:
m = partition(array)
# return the concatenation of the two sorted arrays
return quicksort(array[:m]) + quicksort(array[m:])
else:
return array

For those looking for an iterative/non-recursive version of Quicksort, here's an implementation I came up with in Python:
from random import randint
def default_comparator_fn(a, b):
return -1 if a < b else (1 if a > b else 0)
def reverse_comparator_fn(a, b):
return default_comparator_fn(b, a)
def quick_sort(A, comparator_fn=default_comparator_fn):
n = len(A)
if n < 2:
# The list has only 1 element or does not have any.
return A
# There are at least 2 elements.
partitions = [[0, n - 1]] # [[start, end]]
while len(partitions):
partition = partitions.pop()
start = partition[0]
end = partition[1]
pivot_index = randint(start, end)
pivot = A[pivot_index]
A[pivot_index], A[start] = A[start], A[pivot_index]
breakpoint_index = start
k = start + 1
m = end
while k <= m:
res = comparator_fn(A[k], pivot)
if res < 0:
breakpoint_index = k
else:
while m > k:
res = comparator_fn(A[m], pivot)
if res < 0:
breakpoint_index = k
A[m], A[k] = A[k], A[m]
m -= 1
break
m -= 1
k += 1
A[start], A[breakpoint_index] = A[breakpoint_index], A[start]
if start < breakpoint_index - 1:
partitions.append([start, breakpoint_index - 1])
if breakpoint_index + 1 < end:
partitions.append([breakpoint_index + 1, end])
return A
# Example:
A = [4, 2, 5, 1, 3]
quick_sort(A) # Sort in ascending order ([1, 2, 3, 4, 5]).
quick_sort(A, reverse_comparator_fn) # Sort in descending order ([5, 4, 3, 2, 1]).
This implementation of Quicksort accepts an optional custom comparator function which defaults to a comparator which compares the elements of the list in ascending order.

Implementing 3-way quicksort

I'm a new to algorithms and I'm confused as to where are the errors in my code that I'm writing as an assignment. I'm trying to implement a quicksort algorithm in Python 3 that deals with equal values in the array.
Here's a quicksort function (a stands for the array):
def randomized_quick_sort(a, l, r):
if l >= r:
return
k = random.randint(l, r)
a[l], a[k] = a[k], a[l]
m1, m2 = partition3(a, l, r)
randomized_quick_sort(a, l, m1 - 1);
randomized_quick_sort(a, m2 + 1, r);
And here's my partition function:
def partition3(a, l, r):
x, j, t = a[l], l, r
for i in range(l + 1, r + 1):
if a[i] < x:
j +=1
a[i], a[j] = a[j], a[i]
elif a[i] > x:
a[i], a[t] = a[t], a[i]
t -=1
else:
j +=1
a[l], a[j] = a[j], a[l]
return j, t

You should rectify your partition function:
Here is a working example :
def partition3(a, l, r):
x, j, t = a[l], l, r
i = j
while i <= t :
if a[i] < x:
a[j], a[i] = a[i], a[j]
j += 1
elif a[i] > x:
a[t], a[i] = a[i], a[t]
t -= 1
i -= 1 # remain in the same i in this case
i += 1
return j, t

Here is a dead simple quicksort implementation in python. While it is still nlogn there are a bunch of performance optimizations that can be made. For example the partitioning into less,equal,greater can be made in a single pass instead of 3 passes of the array.
def qsort(arr):
if len(arr) <= 1: return arr
pivot = arr[0]
less = [x for x in arr if x < pivot]
equal = [x for x in arr if x == pivot]
greater = [x for x in arr if x > pivot]
return qsort(less) + equal + qsort(greater)
To make that partition happen in one pass of the array, make a helper function like follows:
def partition(arr, pivot):
less, equal, greater = [], [], []
for val in arr:
if val < pivot: less.append(val)
if val == pivot: equal.append(val)
if val > pivot: greater.append(val)
return less, equal, greater
def qsort(arr):
if len(arr) <= 1: return arr
pivot = arr[0]
less, equal, greater = partition(arr, pivot)
return qsort(less) + equal + qsort(greater)

Another implementation with for loop
def partition3(a, l, r):
x = a[l]
m1 = l
m2 = l
i = m1
for i in range(l + 1, r + 1):
if a[i] < x:
a[i], a[m1] = a[m1], a[i]
a[i], a[m2+1] = a[m2+1], a[i]
m1 += 1
m2 += 1
elif a[i] == x:
a[i], a[m2+1] = a[m2+1], a[i]
m2 += 1
return m1, m2

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Mergesort with Python - python

def merge_sort(x): if len(x) < 2:return x result,mid = [],int(len(x)/2) y = merge_sort(x[:mid]) z = merge_sort(x[mid:]) while (len(y) > 0) and (len(z) > 0): if y[0] > z[0]:result.append(z.pop(0)) else:result.append(y.pop(0)) result.extend(y+z) return result

Loops like this can probably be speeded up: for i in z: result.append(i) z.pop(0) Instead, simply do this: result.extend(z) Note that there is no need to clean the contents of z because you won't use it anyway.

def merge(x): if len(x) == 1: return x else: mid = int(len(x) / 2) l = merge(x[:mid]) r = merge(x[mid:]) i = j = 0 result = [] while i < len(l) and j < len(r): if l[i] < r[j]: result.append(l[i]) i += 1 else: result.append(r[j]) j += 1 result += l[i:] result += r[j:] return result

def merge_sort(l): if len(l) == 1: if len(n)> 0: for i in range(len(n)): if n[i] > l[0]: break else: i = i+1 n.insert(i, l[0]) else: n.append(l[0]) else: p = len(l)//2 a = l[:p] b = l[p:] merge_sort(a) merge_sort(b) m = [3,5,2,4,1] n = [] merge_sort(m) print(n)

Related

Implementing quick sort in Python - list index/infinite loop bugs

Why is this merge sort implementation not giving the correct answer?

Merge sort: how to sort in a decreasing way?

Python Quicksort implementation

Implementing 3-way quicksort

Categories

Resources