Python - bubble sort take forever to complete - python

Doing this for school project.
I need to:
Execution of execution time measurements for given algorithms:
a. Measurements should be made for random sets of n = 100,
1000, 10000, 100000, 300000
b. For every n, sort should be looped 100 times and on
the end of the measurement results are averaged:
Draw a new set of numbers to sort by quantity
elements n. The range of random numbers 0 - 10000.
Get T1 time.
Perform a sort of a randomly selected set.
Get T2 time.
Carry out a subtraction of the time T2 - T1 and save the result on
set of measurements
Is there any good way to speed up processing of that code?
import time
# bubble_sort
def bubble_sort(array):
for iter_num in range(len(array) - 1, 0, -1):
for idx in range(iter_num):
if array[idx] > array[idx + 1]:
temp = array[idx]
array[idx] = array[idx + 1]
array[idx + 1] = temp
def timing(func, b):
timingList = list()
for x in range(100):
array = [randint(0, 10000) for i in range(b)]
time1 = time.time()
func(array)
time2 = time.time()
timingList.append(time2 - time1)
return (sum(timingList) / len(timingList)) * 1000
def imlazy(func):
print(timing(func, 100))
print(timing(func, 1000))
print(timing(func, 10000))
print(timing(func, 100000))
print(timing(func, 300000))
if __name__ == '__main__':
print(imlazy(bubble_sort))

Notwithstanding the general awfulness of the bubble sort, here's a slightly more efficient implementation:-
def bubble_sort(L):
e = len(L)
while e > 1:
es = 0
for i in range(e-1):
if L[i] > L[i+1]:
L[i], L[i+1] = L[i+1], L[i]
es = i+1
e = es

Related

How to iterate faster?

I'm iterating 4 million times (for a project). This is taking forever to do. I was wondering how I can go faster.
numbers = [0,1]
evenNumbers = []
y = 0
l = 0
for x in range (1,4000000):
l = numbers[x-1] + numbers[x]
numbers.append(l)
for k in numbers:
if k % 2 ==0:
evenNumbers.append(k)
for n in evenNumbers:
y += n
print(y)
This is going to be very slow regardless due to the how big the numbers are getting, but you can speed it up significantly by just not storing all the intermediate values:
m, n = 0, 1
y = 0
for _ in range(1, 4000000):
m, n = n, m + n
if n % 2 == 0:
y += n
print(y)
You should just compare the time it takes for each function here to complete, as they are the three different ways that most would approach iteration.
import time
def foreach(arr):
for i in range(len(arr)):
print(arr[i])
def forin(arr):
for i in arr:
print(i)
def whileloop(arr):
i = 0
while i < len(arr):
print(arr[i])
i += 1
def main():
arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
start = time.time()
foreach(arr)
end = time.time()
print("foreach: ", end - start)
start = time.time()
forin(arr)
end = time.time()
print("forin: ", end - start)
start = time.time()
whileloop(arr)
end = time.time()
print("whileloop: ", end - start)
if __name__ == "__main__":
main()

Measuring the time elapsed gives incorrect values

I am trying to measure the time elapsed (in milliseconds) for sorting an array of variable size using the mergesort algorithm but the code gives inconsistent values of time elapsed, so for example when no. of elements = 60 --> time = 16.407999999999998 ms
and when no. of elements = 70 --> time = 0.988 ms
def mergeSort(arr):
if len(arr) > 1:
# Finding the mid of the array
mid = len(arr)//2
# Dividing the array elements
L = arr[:mid]
# into 2 halves
R = arr[mid:]
# Sorting the first half
mergeSort(L)
# Sorting the second half
mergeSort(R)
i = j = k = 0
# Copy data to temp arrays L[] and R[]
while i < len(L) and j < len(R):
if L[i] < R[j]:
arr[k] = L[i]
i += 1
else:
arr[k] = R[j]
j += 1
k += 1
# Checking if any element was left
while i < len(L):
arr[k] = L[i]
i += 1
k += 1
while j < len(R):
arr[k] = R[j]
j += 1
k += 1
# random number array generator
def arrGen(num):
myArr = list(np.random.randint(0,100, size = num))
return myArr
def printList(arr):
for i in range(len(arr)):
print(arr[i], end=" ")
print()
# Driver Code
if __name__ == '__main__':
for i in range(10,100,10):
arr = arrGen(i)
print(f"Arr length is {len(arr)}\n")
print("Given array is", end="\n")
printList(arr)
start_time = datetime.datetime.now()
mergeSort(arr)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
execution_time = time_diff.total_seconds() * 1000.0
print("Sorted array is: ", end="\n")
printList(arr)
print(f"\nTotal time is {execution_time}")
print("\n\n")
Use a benchmarking library (e.g. timeit).
elapsed_secs = timeit.timeit(
'mergeSort(data.copy())',
setup='data = arrGen(100)',
globals=globals())
Note this makes a fresh copy of the unsorted data on each pass. Otherwise the input would be sorted after the first pass.

How to sum elements of the rows of a lattice periodically

Suppose I have a lattice
a = np.array([[1, 1, 1, 1],
[2, 2, 2, 2],
[3, 3, 3, 3],
[4, 4, 4, 4]])
I'd like to make a function func(lattice, start, end) that takes in 3 inputs, where start and end are the index of rows for which the function would sum the elements. For example, for func(a,1,3) it'll sum all the elements of those rows such that func(a,1,3) = 2+2+2+2+3+3+3+3+4+4+4+4 = 36.
Now I know this can be done easily with slicing and np.sum() whatever. But crucially what I want func to do is to also have the ability to wrap around. Namely func(a,2,4) should return 3+3+3+3+4+4+4+4+1+1+1+1.
Couple more examples would be
func(a,3,4) = 4+4+4+4+1+1+1+1
func(a,3,5) = 4+4+4+4+1+1+1+1+2+2+2+2
func(a,0,1) = 1+1+1+1+2+2+2+2
In my situation I'm never gonna get to a point where it'll sum the whole thing again i.e.
func(a,3,6) = sum of all elements
Update:
For my algorithm
for i in range(MC_STEPS_NODE):
sweep(lattice, prob, start_index_master, end_index_master,
rows_for_master)
# calculate the energy
Ene = subhamiltonian(lattice, start_index_master, end_index_master)
# calculate the magnetisation
Mag = mag(lattice, start_index_master, end_index_master)
E1 += Ene
M1 += Mag
E2 += Ene*Ene
M2 += Mag*Mag
if i % sites_for_master == 0:
comm.Send([lattice[start_index_master:start_index_master+1], L, MPI.INT],
dest=(rank-1)%size, tag=4)
comm.Recv([lattice[end_index_master:end_index_master+1], L, MPI.INT],
source = (rank+1)%size, tag=4)
start_index_master = (start_index_master + 1)
end_index_master = (end_index_master + 1)
if start_index_master > 100:
start_index_master = start_index_master % L
if end_index_master > 100:
end_index_master = end_index_master % L
The function I want is the mag() function which calculates the magnetisation of a sublattice which are just sum of all its elements. Imagine a LxL lattice split up into two sublattices, one belongs to the master and the other to the worker. Each sweep sweeps the corresponding sublattice of lattice with start_index_master and end_index_master determining the start and end row of the sublattice. For every i%sites_for_master = 0, the indices move down by adding 1, eventually mod by 100 to prevent memory overflow in mpi4py. So you can imagine if the sublattice is at the centre of the main lattice then start_index_master < end_index_master. Eventually the sublattice will keep moving down to the point where start_index_master < end_index_master where end_index_master > L, so in this case if start_index_master = 10 for a lattice L=10, the most bottom row of the sublattice is the first row ([0]) of the main lattice.
Energy function:
def subhamiltonian(lattice: np.ndarray, col_len_start: int,
col_len_end: int) -> float:
energy = 0
for i in range(col_len_start, col_len_end+1):
for j in range(len(lattice)):
spin = lattice[i%L, j]
nb_sum = lattice[(i%L+1) % L, j] + lattice[i%L, (j+1) % L] + \
lattice[(i%L-1) % L, j] + lattice[i%L, (j-1) % L]
energy += -nb_sum*spin
return energy/4.
This is my function for computing the energy of the sublattice.
You could use np.arange to create the indexes to be summed.
>>> def func(lattice, start, end):
... rows = lattice.shape[0]
... return lattice[np.arange(start, end+1) % rows].sum()
...
>>> func(a,3,4)
20
>>> func(a,3,5)
28
>>> func(a,0,1)
12
You can check if the stop index wraps-around and if it does add the sum from the beginning of the array to the result. This is efficient because it relies on slice indexing and only does extra work if necessary.
def func(a, start, stop):
stop += 1
result = np.sum(a[start:stop])
if stop > len(a):
result += np.sum(a[:stop % len(a)])
return result
The above version works for stop - start < len(a), i.e. no more than one full wrap-around. For an arbitrary number of wrap-around (i.e. arbitrary values for start and stop) the following version can be used:
def multi_wraps(a, start, stop):
result = 0
# Adjust both indices in case the start index wrapped around.
stop -= (start // len(a)) * len(a)
start %= len(a)
stop += 1 # Include the element pointed to by the stop index.
n_wraps = (stop - start) // len(a)
if n_wraps > 0:
result += n_wraps * a.sum()
stop = start + (stop - start) % len(a)
result += np.sum(a[start:stop])
if stop > len(a):
result += np.sum(a[:stop % len(a)])
return result
In case n_wraps > 0 some parts of the array will be summed twice which is unnecessarily inefficient, so we can compute the sum of the various array parts as necessary. The following version sums every array element at most once:
def multi_wraps_efficient(a, start, stop):
# Adjust both indices in case the start index wrapped around.
stop -= (start // len(a)) * len(a)
start %= len(a)
stop += 1 # Include the element pointed to by the stop index.
n_wraps = (stop - start) // len(a)
stop = start + (stop - start) % len(a) # Eliminate the wraps since they will be accounted for separately.
tail_sum = a[start:stop].sum()
if stop > len(a):
head_sum = a[:stop % len(a)].sum()
if n_wraps > 0:
remaining_sum = a[stop % len(a):start].sum()
elif n_wraps > 0:
head_sum = a[:start].sum()
remaining_sum = a[stop:].sum()
result = tail_sum
if stop > len(a):
result += head_sum
if n_wraps > 0:
result += n_wraps * (head_sum + tail_sum + remaining_sum)
return result
The following plot shows a performance comparison between using index arrays and the two multi-wrap methods presented above. The tests are run on a (1_000, 1_000) lattice. One can observe that for the multi_wraps method there is an increase in runtime when going from 1 to 2 wrap-around since it unnecessarily sums the array twice. The multi_wraps_efficient method has the same performance irregardless of the number of wrap-around since it sums every array element no more than once.
The performance plot was generated using the perfplot package:
perfplot.show(
setup=lambda n: (np.ones(shape=(1_000, 1_000), dtype=int), 400, n*1_000 + 200),
kernels=[
lambda x: index_arrays(*x),
lambda x: multi_wraps(*x),
lambda x: multi_wraps_efficient(*x),
],
labels=['index_arrays', 'multi_wraps', 'multi_wraps_efficient'],
n_range=range(1, 11),
xlabel="Number of wrap-around",
equality_check=lambda x, y: x == y,
)

Remove the biggest item from the array and add half of it back to the same position

How to remove the biggest integer from the array and add half of that number(rounded up) back into the array in the same position. Do it 'n' times.
I solved this problem but it is very slow. Hackerrank does not accept as a valid answer due is taking too long to solve.
n = 10
num = [1,1,1,1,1,1,4,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
for i in range(0, n):
index = num.index(max(num))
num[index] = math.ceil(num[index]/2)
The example above only works because it is a small array.
Edited:
So far the only improvement I made is described below. Managed to pass 5 out of 10 the unit test.
num.sort(reverse=True)
for i in range(0, n):
num[0] = math.ceil(num[0] / 2)
if len(num) > 1 and num[0] < num[1]:
num.sort(reverse=True)
You could try using a heap (assuming that you are allowed to use extra memory.) For small inputs your solution is faster. But for large inputs, it appears that heap is faster. I have also timed one of the answers above.
import heapq
import math
import time
import random
n = 500
num = random.sample(range(10000), k=1000)
num_copy = list(num)
num_copy2 = list(num)
start = time.time()
tuples = list(zip([-n for n in num], range(len(num))))
heapq.heapify(tuples)
largest = heapq.heappop(tuples)
for i in range(n):
new_item = (-math.ceil(-largest[0]/2), largest[1])
largest = heapq.heappushpop(tuples, new_item)
heapq.heappush(tuples, largest)
for _ in range(len(num)):
val, index = heapq.heappop(tuples)
num[index] = -val
print(time.time() - start)
start = time.time()
num = num_copy
for i in range(0, n):
index = num.index(max(num))
num[index] = math.ceil(num[index] / 2)
print(time.time() - start)
start = time.time()
num = num_copy2
for i in range(n):
max_index = max(enumerate(num), key=lambda pair: pair[1])[0]
num[max_index] = math.ceil(num[max_index]/2)
print(time.time() - start)
Output:
0.0014951229095458984
0.007564067840576172
0.03954339027404785
for i in range(0, n):
max = max(num)
index = num.index(max)
num[index] = math.ceil(max/2)
Might be a little faster
I think this would be faster (only using built-ins):
n = 10
num = [1,1,1,1,1,1,4,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
for i in range(n):
max_index = max(enumerate(num), key=lambda pair: pair[1])[0]
num[max_index] = math.ceil(num[max_index]/2)
C++ Solution
Insert all elements in a C++ max priority queue.
For next n steps:
Pop the first element (takes O(1) time)
Insert it's half in the priority queue (takes log(n) time).

Why is this python code taking so long?

Alright, I have this python code that compares merge sort and selection sort, but it is taking forever. When done from n = 0 to 90,000 (the size of the list), it only takes about 3 seconds to sort the list. By this logic, it would take about 10 * 3 * 9 seconds (number of run throughs * duration * incremented run throughs [ we start with 10,000 then do 20,000, then 30,000, etc ] ). However, it takes far longer than that.
import time
import random
# Selection Sort Code #
def maxIndex(J):
return J.index(max(J))
def swap(LCopy, i, j):
temp = LCopy[i]
LCopy[i] = LCopy[j]
LCopy[j] = temp
# Implementation of selection sort
def selectionSort(L):
for i in range(len(L)-1, 1, -1):
j = maxIndex(L[0:i+1])
swap(L, i, j)
# Merge Sort Code #
# Assumes that L[first:mid+1] is sorted and also
# that L[mid: last+1] is sorted. Returns L with L[first: last+1] sorted
def merge(L, first, mid, last):
i = first # index into the first half
j = mid + 1 # index into the second half
tempList = []
# This loops goes on as long as BOTH i and j stay within their
# respective sorted blocks
while (i <= mid) and (j <= last):
if L[i] <= L[j]:
tempList.append(L[i])
#print L[i], "from the first block"
i += 1
else:
tempList.append(L[j])
#print L[j], "from the second block"
j += 1
# If i goes beyond the first block, there may be some elements
# in the second block that need to be copied into tempList.
# Similarly, if j goes beyond the second block, there may be some
# elements in the first block that need to be copied into tempList
if i == mid + 1:
tempList.extend(L[j:last+1])
#print L[j:last+1], "some elements in second block are left over"
elif j == last+1:
tempList.extend(L[i:mid+1])
#print L[i:mid+1], "some elements from first block are left over"
L[first:last+1] = tempList
#print tempList
# The merge sort function; sorts the sublist L[first:last+1]
def generalMergeSort(L, first, last):
# Base case: if first == last then it is already sorted
# Recursive case: L[first:last+1] has size 2 or more
if first < last:
# divide step
mid = (first + last)/2
# conquer step
generalMergeSort(L, first, mid)
generalMergeSort(L, mid+1, last)
# combine step
merge(L, first, mid, last)
# Wrapper function
def mergeSort(L):
generalMergeSort(L, 0, len(L)-1)
m = 10
n = 100000
n_increments = 9
baseList = [ random.randint(0,100) for r in range(n) ]
i = 0
while i < n_increments:
j = 0
sel_time = 0
mer_time = 0
while j < m:
# Do a Selection Sort #
x = time.clock()
selectionSort( baseList)
y = time.clock()
sel_time += ( y - x )
random.shuffle( baseList )
# Do a Merge Sort #
x = time.clock()
mergeSort( baseList )
y = time.clock()
mer_time += ( y - x )
random.shuffle( baseList )
j += 1
print "average select sort time for a list of", n, "size:", sel_time / m
print "average merge sort time for a list of", n, "size:", mer_time / m
j = 0
i += 1
n += 10000
Because you are using O(n^2) sorting algorithms. This means that if you double n, the algorithm takes 4 times longer to run. Note that you are starting at 100,000 not 10,000

Categories

Resources