Buggy Merge Sort - python

Below is my Merge function which is suppose to resemble what is shown in CLRS on page 31. For now I have commented out the code which would handle any remaining list items.
If I pass A = [1, 2, 1, 12, 2, 5] as input. The output is [1, 2, 1, None, None, None].
Can anyone shred some light on what I'm doing wrong?
def Merge(left, right):
result = [None] * (len(left) + len(right))
i, j, k = 0, 0, 0
while i < len(left) and j < len(right):
if left[i] < right[j]:
result[k] = left[i]
#result.append(left[i])
i += 1
else:
result[k] = right[j]
#result.append(right[j])
j += 1
k += 1
## remaining items in remaining list
## while i < len(left):
## result[k] = left[i]
## i += 1; k+= 1;
##
## while j < len(right):
## result[k] = right[j]
## j += 1; k+= 1;
##
return result
## Ref.: CLRS page 34
def MergeSort(A):
if len(A) > 1:
mid = int(len(A)/2)
left = A[:mid]
right = A[mid:]
MergeSort(left)
MergeSort(right)
return Merge(left, right)
else:
return A
if __name__ == "__main__":
a = [1, 2, 1, 12, 2, 5]
print "a = %s" % a
print "sort a = %s" % MergeSort(a)

When calling MergeSort you are recursively returning new lists but, are never assigning them:
def Merge(left, right):
result = [None] * (len(left) + len(right))
i, j, k = 0, 0, 0
while i < len(left) and j < len(right):
if left[i] < right[j]:
result[k] = left[i]
#result.append(left[i])
i += 1
else:
result[k] = right[j]
#result.append(right[j])
j += 1
k += 1
## remaining items in remaining list
## while i < len(left):
## result[k] = left[i]
## i += 1; k+= 1;
##
## while j < len(right):
## result[k] = right[j]
## j += 1; k+= 1;
##
return result
## Ref.: CLRS page 34
def MergeSort(A):
if len(A) > 1:
mid = int(len(A)/2)
left = A[:mid]
right = A[mid:]
#MergeSort(left)
# here should be
left = MergeSort(left)
#MergeSort(right)
# here should be
right = MergeSort(right)
return Merge(left, right)
else:
return A
if __name__ == "__main__":
a = [1, 2, 1, 12, 2, 5]
print "a = %s" % a
print "sort a = %s" % MergeSort(a)

Related

sorting orders with mergesort incorrect output

I have to design an algorithm to sort a list of orders by selection time (t selection, finding the good in the warehouse and bringing it to the surface) plus shipping time (t shipping, constant). The customer orders can be retrieved (in the same order as placed) from a server database. You should expect between 100-10K elements.
The program takes as input a data-set of orders where the id, t selection, and t shipping are of type unsigned int, n is the number of orders and a space character.
id1, t selection1, t shipping1; ...; idn, t selectionn, t shippingn \n
The expected output is a space-separated list of the ids, sorted by t selection + t shipping and terminated by a new line \n.
Input: 1, 500, 100; 2, 700, 100; 3, 100, 100\n
Output: 3 1 2\n
I am trying to do it with merge sort, however my program returns
1 2 3/n instead of 3 1 2/n
I have provided my code below, could anyone help me out?
#!/usr/bin/env python3
import sys
class Order:
def __init__(self, id: int, selection_time: int, shipping_time: int):
self.id: int = id
self.selection_time: int = selection_time
self.shipping_time: int = shipping_time
def merge(left, right):
if not len(left) or not len(right):
return left or right
result = []
i, j = 0, 0
while len(result) < len(left) + len(right):
if left[i].shipping_time + left[i].selection_time < right[j].shipping_time + right[j].selection_time:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
if i == len(left) or j == len(right):
result.extend(left[i:] or right[j:])
break
return result
def sort(list):
if len(list) < 2:
return list
middle = int(len(list) / 2)
left = sort(list[:middle])
right = sort(list[middle:])
return merge(left, right)
if __name__ == '__main__':
'''
Retrieves and splits the input
'''
data = input()
data = data.split('; ')
order_list = []
for d in data:
id, selection_t, shipping_t = d.split(', ', 2)
order: Order = Order(int(id), int(selection_t), int(shipping_t))
order_list.append(order)
sort(order_list)
for order in order_list:
sys.stdout.write(str(order.id))
sys.stdout.write(" ")
The simplest (and probably least efficient) sorting algorithm is the Bubble sort. But the question says nothing about performance so it can be simplified like this:
class Order:
def __init__(self, ident, selection_time, shipping_time):
self._ident = ident
self._selection_time = selection_time
self._shipping_time = shipping_time
#property
def selection_time(self):
return self._selection_time
#property
def shipping_time(self):
return self._shipping_time
#property
def ident(self):
return self._ident
def merge(lst):
def comboval(order):
return order.selection_time + order.shipping_time
if len(lst) > 1:
mid = len(lst) // 2
left = lst[:mid]
right = lst[mid:]
merge(left)
merge(right)
i = j = k = 0
while i < len(left) and j < len(right):
if comboval(left[i]) < comboval(right[j]):
lst[k] = left[i]
i += 1
else:
lst[k] = right[j]
j += 1
k += 1
for _i in range(i, len(left)):
lst[k] = left[_i]
k += 1
for _j in range(j, len(right)):
lst[k] = right[_j]
k += 1
return lst
inval = '1, 500, 100; 2, 700, 100; 3, 100, 100'
orderlist = []
for order in inval.split(';'):
orderlist.append(Order(*map(int, order.split(','))))
print(*[order.ident for order in merge(orderlist)])
Output:
3 1 2
Note:
This is an in-place sort

Python decorator to time recursive functions properly

I am working in a piece of code for studying purposes, and i want to compare the time required to sort a list using different algorithms. I tried using a decorator but since the mergeSort function is recursive, it gives me the result for each recursion. I want to find a way to summarize the result, if possible. Since i'm very new to decorators i'm not sure what could be done in that case. Is there a way to achieve that goal using a decorator?
import random
import functools
import time
def timeIt(func):
#functools.wraps(func)
def newfunc(*args, **kwargs):
startTime = time.time()
func(*args, **kwargs)
elapsedTime = time.time() - startTime
print('function [{}] finished in {} ms'.format(
func.__name__, int(elapsedTime * 1000)))
return newfunc
#timeIt
def mergeSort(L):
if len(L) > 1:
mid = len(L) // 2
left = L[:mid]
right = L[mid:]
mergeSort(left)
mergeSort(right)
i = j = k = 0
while i < len(left) and j < len(right):
if left[i] < right[j]:
L[k] = left[i]
i += 1
else:
L[k] = right[j]
j += 1
k += 1
while i < len(left):
L[k] = left[i]
i += 1
k += 1
while j < len(right):
L[k] = right[j]
j += 1
k += 1
#timeIt
def selectionSort(L):
for fillslot in range(len(L) - 1, 0, -1):
maxpos = 0
for location in range(1, fillslot + 1):
if L[location] > L[maxpos]:
maxpos = location
temp = L[fillslot]
L[fillslot] = L[maxpos]
L[maxpos] = temp
randomList = random.sample(range(10000), 10000)
mergeSort(randomList.copy())
selectionSort(randomList.copy())
Output:
[...] truncated
function [mergeSort] finished in 7 ms
function [mergeSort] finished in 15 ms
function [mergeSort] finished in 33 ms
function [mergeSort] finished in 68 ms
function [selectionSort] finished in 2049 ms
You can set an attribute (_entered in the example) on the wrapper function as a flag so that it can tell that it is inside a recursive call if the attribute is set:
def timeIt(func):
#functools.wraps(func)
def newfunc(*args, **kwargs):
if not hasattr(newfunc, '_entered'): # enter only if _entered is not set
newfunc._entered = True # set _entered
startTime = time.time()
func(*args, **kwargs)
elapsedTime = time.time() - startTime
print('function [{}] finished in {} ms'.format(
func.__name__, int(elapsedTime * 1000)))
del newfunc._entered # remove _entered
return newfunc
You could just wrap it with another function...
import random
import functools
import time
def timeIt(func):
#functools.wraps(func)
def newfunc(*args, **kwargs):
startTime = time.time()
func(*args, **kwargs)
elapsedTime = time.time() - startTime
print('function [{}] finished in {} ms'.format(
func.__name__, int(elapsedTime * 1000)))
return newfunc
def mergeSort(L):
if len(L) > 1:
mid = len(L) // 2
left = L[:mid]
right = L[mid:]
mergeSort(left)
mergeSort(right)
i = j = k = 0
while i < len(left) and j < len(right):
if left[i] < right[j]:
L[k] = left[i]
i += 1
else:
L[k] = right[j]
j += 1
k += 1
while i < len(left):
L[k] = left[i]
i += 1
k += 1
while j < len(right):
L[k] = right[j]
j += 1
k += 1
def selectionSort(L):
for fillslot in range(len(L) - 1, 0, -1):
maxpos = 0
for location in range(1, fillslot + 1):
if L[location] > L[maxpos]:
maxpos = location
temp = L[fillslot]
L[fillslot] = L[maxpos]
L[maxpos] = temp
#timeIt
def timedSelectionSort(L):
selectionSort(L)
#timeIt
def timedMergeSort(L):
mergeSort(L)
randomList = random.sample(range(10000), 10000)
timedSelectionSort(randomList.copy())
timedMergeSort(randomList.copy())

Bad performance of sorting algorithms implemented

I have implemented some sorting algorithms including Insertion, Selection, Shell, two kinds of Merge. I found that the performance of my implements didn't accord with the description of Algorithms(4th).
For example, here are two kinds of Merge sorting. When sorting a list contains 100,000 elements, Merge1 takes about 0.6s, and Merge2 takes about 50+s. But Merge2 is almost the same as the one in Algorithms(4th) except I use python. I can't figure it out why Merge2 is so slow and how to improve it. Can somebody help me? Thanks!
class Merge1:
def merge(self, a, b):
i = 0; j = 0
res = []
while i < len(a) and j < len(b):
if a[i] < b[j]:
res.append(a[i])
i = i + 1
else:
res.append(b[j])
j = j + 1
res = res + a[i:] + b[j:]
return res
def sort(self, source):
if len(source) <= 1:
return source
half = len(source) // 2
left = self.sort(source[:half])
right = self.sort(source[half:])
retval = self.merge(left, right)
return retval
def is_sort(self, source):
length = len(source)
for i in range(0, length-1):
if source[i] > source[i+1]:
return False
return True
class Merge2:
def merge(self, source, lo, mid ,hi):
i = lo
j = mid + 1
aux = source[:]
k = lo
while k <= hi:
if i > mid:
source[k] = aux[j]
j = j + 1
elif j > hi:
source[k] = aux[i]
i = i + 1
elif aux[i] < aux[j]:
source[k] = aux[i]
i = i + 1
else:
source[k] = aux[j]
j = j + 1
k = k+1
def sort(self, source):
sz = 1
N = len(source)
while sz < N:
for lo in range(0, N-sz, sz+sz):
# pdb.set_trace()
self.merge(source, lo, lo+sz-1, min(lo+sz+sz-1, N-1))
sz = sz + sz
def is_sort(self, source):
length = len(source)
for i in range(0, length-1):
if source[i] > source[i+1]:
return False
return True
Here is the implement in Algorithms:
Here is the test code:
merge1 = Merge1()
source = np.random.randint(100000, size=100000).tolist()
start = time.time()
merge1.sort(source)
end = time.time()
print("Merge1 takes: {}s".format(end-start))
merge2 = Merge2()
source = np.random.randint(100000, size=100000).tolist()
start = time.time()
merge2.sort(source)
end = time.time()
print("Merge2 takes: {}s".format(end-start))
result:
E:>python sort.py
Merge1 takes: 0.6376256942749023s
Merge2 takes: 57.99568271636963s
Consider this modification. According to my quick tests, it improved the performance considerably (from nearly one minute down to less than 1 second). The main performance gain comes from avoiding to create that many copies of the whole list. The other alterations only increase performance marginally.
According to a simple comparison of the sum it should not mess up the list, but you should do some more tests if you like to use it.
class Merge4:
def merge(self, source, aux, lo, mid ,hi):
i = lo
j = mid + 1
a_j= aux[j]
a_i= aux[i]
k = lo
while k <= hi:
if i > mid:
source[k] = a_j
j += 1
a_j= aux[j]
elif j > hi:
source[k] = a_i
i += 1
a_i= aux[i]
elif a_i < a_j:
source[k] = a_i
i += 1
a_i= aux[i]
else:
source[k] = a_j
j += 1
a_j= aux[j]
k += 1
# update the aux array for the next call
aux[lo:hi+1]= source[lo:hi+1]
def sort(self, source):
sz = 1
N = len(source)
while sz < N:
sz_2= sz * 2
# create the aux array, that will be maintained continuously
# and add one extra None, so the "prefetching" works also
# during the last iteration (refering to a_i and a_j)
aux= source[:]
aux.append(None)
for lo in range(0, N-sz, sz_2):
# pdb.set_trace()
self.merge(source, aux, lo, lo+sz-1, min(lo+sz_2-1, N-1))
sz = sz_2
def is_sort(self, source):
length = len(source)
for i in range(0, length-1):
if source[i] > source[i+1]:
return False
return True

What is the error in this merge sort implementation?

def merge(l1,l2):
(lmerged,i,j) = ([],0,0)
while i+j < len(l1) + len(l2):
if i == len(l1):
lmerged.append(l2[j])
j = j+1
elif j == len(l2):
lmerged.append(l1[i])
i = i+1
elif l1[i] < l2[j]:
lmerged.append(l1[i])
i = i+1
elif l2[j] < l1[i]:
lmerged.append(l2[j])
j = j+1
else:
lmerged.append(l1[i])
i = i+1
j = j+1
return(lmerged)
def mergesort(l):
if len(l) < 2:
return(l)
else:
n = len(l)
leftsorted = mergesort(l[:n//2])
rightsorted = mergesort(l[n//2:])
return(merge(leftsorted,rightsorted))
What is the error in this code sample? On which list will this implementation fail? Is the logic correct or there is some flaw in my logic itself?
fail test: [1, 1] is sorted as [1]
fix: remove j = j + 1 in merge function in the last else block.

How to count swaps in a quicksort? (Python)

Hello I have this code and I don't know how to count how many exchanges it does :(
def quicksort(lista,izq,der):
i = izq
j = der
pivote = lista[(izq + der)//2]
while i <= j:
while lista[i] < pivote:
i += 1
while pivote < lista[j]:
j -= 1
if i <= j:
aux = lista[i]
lista[i] = lista[j]
lista[j] = aux
i += 1
j -= 1
if izq < j:
quicksort(lista, izq, j);
if i < der:
quicksort(lista, i, der);
So where can I put a counter that says me how many exchanges It does?
Edit: I need that the function returns me that number and how many comparisons It does.
def quicksort(lista,izq,der):
i = izq
j = der
pivote = lista[(izq + der)//2]
swap_count = 0
while i <= j:
while lista[i] < pivote:
i += 1
while pivote < lista[j]:
j -= 1
if i <= j:
aux = lista[i]
lista[i] = lista[j]
lista[j] = aux
swap_count += 1
i += 1
j -= 1
if izq < j:
swap_count += quicksort(lista, izq, j)
if i < der:
swap_count += quicksort(lista, i, der)
return swap_count
Here's what I would do, if I'm understanding you right.
def quicksort(lista,izq,der):
i = izq
j = der
swap_count = 0
compare_count = 0
pivote = lista[(izq + der)//2]
while i <= j:
while lista[i] < pivote:
i += 1
while pivote < lista[j]:
j -= 1
if i <= j:
aux = lista[i]
lista[i] = lista[j]
lista[j] = aux
swap_count += 1
i += 1
j -= 1
compare_count += 1
if izq < j:
other_swap, other_compare = quicksort(lista, izq, j)
swap_count += other_swap
compare_count += other_compare
if i < der:
other_swap, other_compare = quicksort(lista, i, der)
swap_count += other_swap
compare_count += other_compare
return (swap_count, compare_count)
This way you add in the swaps and the compares of the recursive calls as you make them.

Categories

Resources