What is this bucket sort implementation doing?

What is this bucket sort implementation doing? - python

This is my code for bucket sort in Python.
from random import randrange
def insertion_sort(aList):
for i in range(1, len(aList)):
for j in range(i, 0, -1):
if aList[j] < aList[j-1]:
aList[j], aList[j-1] = aList[j-1], aList[j]
return aList
def bucket_sort(aList):
buckets = [[]] * len(aList)
for index, value in enumerate(aList):
buckets_index = value * len(aList) // (max(aList) + 1)
buckets[buckets_index].append(value)
answer = []
for bucket in buckets:
answer.extend(insertion_sort(bucket))
# answer += insertion_sort(bucket)
print(buckets[0])
print("\n")
# return answer
aList = [randrange(10) for _ in range(100)]
print(aList)
print("\n")
answer = bucket_sort(aList)
#print(answer)
What is happening? When I run the code, I always find that the first list in buckets is already sorted and the other lists in buckets are all copies of it.
Do I need the insertion sort for each list?
What would I use the "answer" variable for?!
I'm mainly relying on this visualization.

One thing that i notice right off the bat is that you initialize your variable buckets as buckets = [[]] * len(aList). This makes a list of identical copies of the empty list. As such, any modification of this list is replicated in every element of buckets. Change this line to:
buckets = [[] for _ in xrange(len(aList))]
To check if the lists inside the list are separate object, you could check their id's:
print [id(x) for x in buckets]
This should print a list of unique numbers.

I think this bucket sort would be more efficient and is more pythonesque.
def bucket(k):
unique = list(set(k))
values = [k.count(uni) for uni in unique]
result = ([unique[uni] for i in range(values[uni])] for uni in range(len(unique)))
result = sum(result, [])
return result

Related

Why can't I sum these lists? It only returns the first answer, but if I print I can get it to print all answers instead? Thanks :)

This is the code that I have so far. I don't understand why it would work with print but not as a return function?
# Setup
import numpy as np
data_string = input("Enter elements of a list separated by space")
data = data_string.split()
# Function
def sumrescubed(data):
for i in range(len(data)):
data[i] = float(data[i])
data_sum = sum(data)
mean = sum(data) / len(data)
for i in range(1, len(data)):
answer_sum = sum([(data[i] - mean) ** 3])
return answer_sum
sumrescubed(data)

What you probably want to do is make answer_sum a list, and append each cube to it so that you can return the list of individual items (which are what you're seeing when you print(answer_sum) within the loop in your current code):
answer_sum = []
for i in data:
answer_sum.append((i - mean)**3)
return answer_sum
I'd suggest simplifying the whole thing by using comprehensions instead of iterating over the lists by index:
def sumrescubed(data):
nums = [float(i) for i in data]
mean = sum(nums) / len(nums)
return [(i - mean)**3 for i in nums]

Merging sublists with same initial element by substituting null values

I want to merge sublists with the same initial first element, but instead of adding the other values one after the other, I want to substitute values where they are None.
I have a matrix with sublists, each sublist contains 7 values: number of the element, score A, score B, score C, score D, score E, score F. So far for each sublist there is only one value (even if this is the same for different sublists), but I want to merge sublists that contain different scores for the same element.
I have
sub_lists = [(1,None,None,12,None,None,None),
(2,67,None,None,None,None,None),
(2,None,None,83,None,None,None),
...]
So for each sublist there is only 1 score indicated while the others are null. The result I am looking for is
sub_lists = [(1,None,None,12,None,None,None),
(2,67,None,83,None,None,None),
...]
What I have tried is
res = []
for sub in sub_lists:
if res and res[-1][0] == sub[0]:
res[-1].extend(sub[1:])
else:
res.append([ele for ele in sub])
res = list(map(tuple, res))
But this only adds the values one after the other, resulting in
sub_lists = [(1,None,None,12,None,None,None),
(2,67,None,None,None,None,None,None,None,83,None,None,None),
...]
Does someone know how to help me with this?

adam-smooch
he was right I think
sub_lists=[
(1,None,None,12,None,None,None),
(2,67,None,None,None,None,None),
(2,None,None,83,None,None,None)
]
def my_combine(l1, l2):
l1 = list(l1)
l2 = list(l2)
for i in range(len(l1)):
if l1[i] is None:
l1[i] = l2[i]
return l1
results = dict()
for sl in sub_lists:
if sl[0] not in results:
results[sl[0]] = sl[1:]
else:
results[sl[0]] = my_combine(results[sl[0]], sl[1:])
print(results)
# desired outcome.
# sub_lists=[
# (1,None,None,12,None,None,None),
# (2,67,None,83,None,None,None)
# ]
I think #Adam Smooch was right I am modified it a bit

since sub-lists' first numbers will be unique at the end, you could use a dictionary.
so do something like:
def my_combine(l1, l2):
l1 = list(l1)
for i in range(len(l1)):
if l1[i] is None:
l1[i] = l2[i]
return tuple(l1)
results = dict()
for sl in sub_lists:
if sl[0] not in results:
results[sl[0]] = sl[1:]
else:
results[sl[0]] = my_combine(results[sl[0]], sl[1:])

sub_lists=[(1,None,None,12,None,None,None),(2,67,None,None,None,None,None),(2,None,None,83,None,None,None)]
res = [] #resultant matrix
for sub in sub_lists:
if res and res[-1][0] == sub[0]:
valid = [] # let "valid" valid sublist that can be e added in resultant matrix.
valid.append(res[-1][0]) # we will add the the first element of last sublist of resultant matrix, and rest of the elements of sub list of "sub_lists"
valid.extend(sub[1:])
res[-1] = valid
print(sub)
else:
res.append([ele for ele in sub])
res = list(map(tuple, res))
print(res)
# desired outcome.
# sub_lists=[(1,None,None,12,None,None,None),(2,67,None,83,None,None,None)]
hope you got the desirable code,
and my comments make sense.☺️
feel free to ask anything further...!

Generate a dictionary of all possible Kakuro solutions

I'm just starting out with Python and had an idea to try to generate a dictionary of all the possible solutions for a Kakuro puzzle. There are a few posts out there about these puzzles, but none that show how to generate said dictionary. What I'm after is a dictionary that has keys from 3-45, with their values being tuples of the integers which sum to the key (so for example mydict[6] = ([1,5],[2,4],[1,2,3])). It is essentially a Subset Sum Problem - https://mathworld.wolfram.com/SubsetSumProblem.html
I've had a go at this myself and have it working for tuples up to three digits long. My method requires a loop for each additional integer in the tuple, so would require me to write some very repetitive code! Is there a better way to do this? I feel like i want to loop the creation of loops, if that is a thing?
def kakuro():
L = [i for i in range(1,10)]
mydict = {}
for i in L:
L1 = L[i:]
for j in L1:
if i+j in mydict:
mydict[i+j].append((i,j))
else:
mydict[i+j] = [(i,j)]
L2 = L[j:]
for k in L2:
if i+j+k in mydict:
mydict[i+j+k].append((i,j,k))
else:
mydict[i+j+k] = [(i,j,k)]
for i in sorted (mydict.keys()):
print(i,mydict[i])
return
my attempt round 2 - getting better!
def kakurodict():
from itertools import combinations as combs
L = [i for i in range(1,10)]
mydict={}
mydict2={}
for i in L[1:]:
mydict[i] = list(combs(L,i))
for j in combs(L,i):
val = sum(j)
if val in mydict2:
mydict2[val].append(j)
else:
mydict2[val] = [j]
return mydict2

So this is written with the following assumptions.
dict[n] cannot have a list with the value [n].
Each element in the subset has to be unique.
I hope there is a better solution offered by someone else, because when we generate all subsets for values 3-45, it takes quite some time. I believe the time complexity of the subset sum generation problem is 2^n so if n is 45, it's not ideal.
import itertools
def subsetsums(max):
if (max < 45):
numbers = [x for x in range(1, max)]
else:
numbers = [x for x in range(1, 45)]
result = [list(seq) for i in range(len(numbers), 0, -1) for seq in itertools.combinations(numbers, i) if sum(seq) == max]
return(result)
mydict = {}
for i in range(3, 46):
mydict[i] = subsetsums(i)
print(mydict)

Python - List Binary Search without bisect

I have a list and I want to binary_search a key(number).
My code is below but I don't have a clue what to do where the bold text on code is:
(What to do with this? Is an other function? int imid = midpoint(imin, imax))
List = []
x = 1
#Import 20 numbers to list
for i in range (0,20):
List.append (i)
print (List)
key = input("\nGive me a number for key: ")
def midpoint(imin, imax):
return point((imin+imax)/2)
def binary_search(List,key,imin,imax,point):
while (imax >= imin):
int imid = midpoint(imin, imax)
if(List[imid] == key):
return imid;
elif (List[imid] < key):
imin = imid + 1;
else:
imax = imid - 1;
return KEY_NOT_FOUND;
print (binary_search(key))
midpoint(imin, imax)
binary_search(List,key,imin,imax,point)

It doesn't seem to be doing anything for you; remove the call to midpoint, and point, and just have
def binary_search(List,key,imin,imax,point):
while (imax >= imin):
imid = (imin + imax) / 2
(However, there are some things wrong with your code, and it won't work with just that change;
You create a list called List then try to append to an uninitialized variable called myList
You 'import 20 random' numbers, but range() is not random, it's a simple sequence 1, 2, 3, 4...
range already returns a list, no need to count through it and copy it, just use it
You call binary_search with an empty List, a key, and three uninitialized variables
binary_search assumes the list is sorted, which it is, but if the comment about 'random numbers' was correct, it wouldn't be.
)

Fast way to remove a few items from a list/queue

This is a follow up to a similar question which asked the best way to write
for item in somelist:
if determine(item):
code_to_remove_item
and it seems the consensus was on something like
somelist[:] = [x for x in somelist if not determine(x)]
However, I think if you are only removing a few items, most of the items are being copied into the same object, and perhaps that is slow. In an answer to another related question, someone suggests:
for item in reversed(somelist):
if determine(item):
somelist.remove(item)
However, here the list.remove will search for the item, which is O(N) in the length of the list. May be we are limited in that the list is represented as an array, rather than a linked list, so removing items will need to move everything after it. However, it is suggested here that collections.dequeue is represented as a doubly linked list. It should then be possible to remove in O(1) while iterating. How would we actually accomplish this?
Update:
I did some time testing as well, with the following code:
import timeit
setup = """
import random
random.seed(1)
b = [(random.random(),random.random()) for i in xrange(1000)]
c = []
def tokeep(x):
return (x[1]>.45) and (x[1]<.5)
"""
listcomp = """
c[:] = [x for x in b if tokeep(x)]
"""
filt = """
c = filter(tokeep, b)
"""
print "list comp = ", timeit.timeit(listcomp,setup, number = 10000)
print "filtering = ", timeit.timeit(filt,setup, number = 10000)
and got:
list comp = 4.01255393028
filtering = 3.59962391853

The list comprehension is the asymptotically optimal solution:
somelist = [x for x in somelist if not determine(x)]
It only makes one pass over the list, so runs in O(n) time. Since you need to call determine() on each object, any algorithm will require at least O(n) operations. The list comprehension does have to do some copying, but it's only copying references to the objects not copying the objects themselves.
Removing items from a list in Python is O(n), so anything with a remove, pop, or del inside the loop will be O(n**2).
Also, in CPython list comprehensions are faster than for loops.

If you need to remove item in O(1) you can use HashMaps

Since list.remove is equivalent to del list[list.index(x)], you could do:
for idx, item in enumerate(somelist):
if determine(item):
del somelist[idx]
But: you should not modify the list while iterating over it. It will bite you, sooner or later. Use filter or list comprehension first, and optimise later.

A deque is optimized for head and tail removal, not for arbitrary removal in the middle. The removal itself is fast, but you still have to traverse the list to the removal point. If you're iterating through the entire length, then the only difference between filtering a deque and filtering a list (using filter or a comprehension) is the overhead of copying, which at worst is a constant multiple; it's still a O(n) operation. Also, note that the objects in the list aren't being copied -- just the references to them. So it's not that much overhead.
It's possible that you could avoid copying like so, but I have no particular reason to believe this is faster than a straightforward list comprehension -- it's probably not:
write_i = 0
for read_i in range(len(L)):
L[write_i] = L[read_i]
if L[read_i] not in ['a', 'c']:
write_i += 1
del L[write_i:]

I took a stab at this. My solution is slower, but requires less memory overhead (i.e. doesn't create a new array). It might even be faster in some circumstances!
This code has been edited since its first posting
I had problems with timeit, I might be doing this wrong.
import timeit
setup = """
import random
random.seed(1)
global b
setup_b = [(random.random(), random.random()) for i in xrange(1000)]
c = []
def tokeep(x):
return (x[1]>.45) and (x[1]<.5)
# define and call to turn into psyco bytecode (if using psyco)
b = setup_b[:]
def listcomp():
c[:] = [x for x in b if tokeep(x)]
listcomp()
b = setup_b[:]
def filt():
c = filter(tokeep, b)
filt()
b = setup_b[:]
def forfilt():
marked = (i for i, x in enumerate(b) if tokeep(x))
shift = 0
for n in marked:
del b[n - shift]
shift += 1
forfilt()
b = setup_b[:]
def forfiltCheating():
marked = (i for i, x in enumerate(b) if (x[1] > .45) and (x[1] < .5))
shift = 0
for n in marked:
del b[n - shift]
shift += 1
forfiltCheating()
"""
listcomp = """
b = setup_b[:]
listcomp()
"""
filt = """
b = setup_b[:]
filt()
"""
forfilt = """
b = setup_b[:]
forfilt()
"""
forfiltCheating = '''
b = setup_b[:]
forfiltCheating()
'''
psycosetup = '''
import psyco
psyco.full()
'''
print "list comp = ", timeit.timeit(listcomp, setup, number = 10000)
print "filtering = ", timeit.timeit(filt, setup, number = 10000)
print 'forfilter = ', timeit.timeit(forfilt, setup, number = 10000)
print 'forfiltCheating = ', timeit.timeit(forfiltCheating, setup, number = 10000)
print '\nnow with psyco \n'
print "list comp = ", timeit.timeit(listcomp, psycosetup + setup, number = 10000)
print "filtering = ", timeit.timeit(filt, psycosetup + setup, number = 10000)
print 'forfilter = ', timeit.timeit(forfilt, psycosetup + setup, number = 10000)
print 'forfiltCheating = ', timeit.timeit(forfiltCheating, psycosetup + setup, number = 10000)
And here are the results
list comp = 6.56407690048
filtering = 5.64738512039
forfilter = 7.31555104256
forfiltCheating = 4.8994679451
now with psyco
list comp = 8.0485959053
filtering = 7.79016900063
forfilter = 9.00477004051
forfiltCheating = 4.90830993652
I must be doing something wrong with psyco, because it is actually running slower.

elements are not copied by list comprehension
this took me a while to figure out. See the example code below, to experiment yourself with different approaches
code
You can specify how long a list element takes to copy and how long it takes to evaluate. The time to copy is irrelevant for list comprehension, as it turned out.
import time
import timeit
import numpy as np
def ObjectFactory(time_eval, time_copy):
"""
Creates a class
Parameters
----------
time_eval : float
time to evaluate (True or False, i.e. keep in list or not) an object
time_copy : float
time to (shallow-) copy an object. Used by list comprehension.
Returns
-------
New class with defined copy-evaluate performance
"""
class Object:
def __init__(self, id_, keep):
self.id_ = id_
self._keep = keep
def __repr__(self):
return f"Object({self.id_}, {self.keep})"
#property
def keep(self):
time.sleep(time_eval)
return self._keep
def __copy__(self): # list comprehension does not copy the object
time.sleep(time_copy)
return self.__class__(self.id_, self._keep)
return Object
def remove_items_from_list_list_comprehension(lst):
return [el for el in lst if el.keep]
def remove_items_from_list_new_list(lst):
new_list = []
for el in lst:
if el.keep:
new_list += [el]
return new_list
def remove_items_from_list_new_list_by_ind(lst):
new_list_inds = []
for ee in range(len(lst)):
if lst[ee].keep:
new_list_inds += [ee]
return [lst[ee] for ee in new_list_inds]
def remove_items_from_list_del_elements(lst):
"""WARNING: Modifies lst"""
new_list_inds = []
for ee in range(len(lst)):
if lst[ee].keep:
new_list_inds += [ee]
for ind in new_list_inds[::-1]:
if not lst[ind].keep:
del lst[ind]
if __name__ == "__main__":
ClassSlowCopy = ObjectFactory(time_eval=0, time_copy=0.1)
ClassSlowEval = ObjectFactory(time_eval=1e-8, time_copy=0)
keep_ratio = .8
n_runs_timeit = int(1e2)
n_elements_list = int(1e2)
lsts_to_tests = dict(
list_slow_copy_remove_many = [ClassSlowCopy(ii, np.random.rand() > keep_ratio) for ii in range(n_elements_list)],
list_slow_copy_keep_many = [ClassSlowCopy(ii, np.random.rand() > keep_ratio) for ii in range(n_elements_list)],
list_slow_eval_remove_many = [ClassSlowEval(ii, np.random.rand() > keep_ratio) for ii in range(n_elements_list)],
list_slow_eval_keep_many = [ClassSlowEval(ii, np.random.rand() > keep_ratio) for ii in range(n_elements_list)],
)
for lbl, lst in lsts_to_tests.items():
print()
for fct in [
remove_items_from_list_list_comprehension,
remove_items_from_list_new_list,
remove_items_from_list_new_list_by_ind,
remove_items_from_list_del_elements,
]:
lst_loc = lst.copy()
t = timeit.timeit(lambda: fct(lst_loc), number=n_runs_timeit)
print(f"{fct.__name__}, {lbl}: {t=}")
output
remove_items_from_list_list_comprehension, list_slow_copy_remove_many: t=0.0064229519994114526
remove_items_from_list_new_list, list_slow_copy_remove_many: t=0.006507338999654166
remove_items_from_list_new_list_by_ind, list_slow_copy_remove_many: t=0.006562008995388169
remove_items_from_list_del_elements, list_slow_copy_remove_many: t=0.0076057760015828535
remove_items_from_list_list_comprehension, list_slow_copy_keep_many: t=0.006243691001145635
remove_items_from_list_new_list, list_slow_copy_keep_many: t=0.007145451003452763
remove_items_from_list_new_list_by_ind, list_slow_copy_keep_many: t=0.007032064997474663
remove_items_from_list_del_elements, list_slow_copy_keep_many: t=0.007690364996960852
remove_items_from_list_list_comprehension, list_slow_eval_remove_many: t=1.2495998149970546
remove_items_from_list_new_list, list_slow_eval_remove_many: t=1.1657221479981672
remove_items_from_list_new_list_by_ind, list_slow_eval_remove_many: t=1.2621939050004585
remove_items_from_list_del_elements, list_slow_eval_remove_many: t=1.4632593330024974
remove_items_from_list_list_comprehension, list_slow_eval_keep_many: t=1.1344162709938246
remove_items_from_list_new_list, list_slow_eval_keep_many: t=1.1323430630000075
remove_items_from_list_new_list_by_ind, list_slow_eval_keep_many: t=1.1354237199993804
remove_items_from_list_del_elements, list_slow_eval_keep_many: t=1.3084568729973398

import collections
list1=collections.deque(list1)
for i in list2:
try:
list1.remove(i)
except:
pass
INSTEAD OF CHECKING IF ELEMENT IS THERE. USING TRY EXCEPT.
I GUESS THIS FASTER

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

What is this bucket sort implementation doing? - python

I think this bucket sort would be more efficient and is more pythonesque. def bucket(k): unique = list(set(k)) values = [k.count(uni) for uni in unique] result = ([unique[uni] for i in range(values[uni])] for uni in range(len(unique))) result = sum(result, []) return result

Related

Why can't I sum these lists? It only returns the first answer, but if I print I can get it to print all answers instead? Thanks :)

Merging sublists with same initial element by substituting null values

Generate a dictionary of all possible Kakuro solutions

Python - List Binary Search without bisect

Fast way to remove a few items from a list/queue

Categories

Resources