Class somehow links lists [duplicate] - python

This question already has answers here:
List of lists changes reflected across sublists unexpectedly
(17 answers)
Closed 4 years ago.
I have a Class called Heap. In it i have declared three separate lists (min_list, max_list and xor_list). When build_heap is called, it creates a heap list over some inital input list. I then set all three lists (min,max and xor) to the heap list. So they shouldn't be linked in any way. But when I do something like
self.min_list[2] = "6"
it changes all of the three lists. My code is below:
class Heap:
def __init__(self):
self.heap_list = [None]
self.currentSize = 0
self.alist = []
self.min_list = []
self.max_list = []
self.xor_list = []
self.i = 0
self.list_start = 0
self.SCA = 0
self.interval_to = 0 # interval in initial list, not in heap numbering
self.interval_from = 0
def build_heap(self, alist):
i = 0
self.alist = alist
while len(alist) > (2**i):
i += 1
self.i = i
self.list_start = (2**self.i)
for j in range(len(alist)):
self.heap_list.insert(self.list_start + j, alist[j])
for k in range((2**self.i)-1):
self.heap_list.insert(k, None)
# append None to list
# print(len(alist))
for l in range((2**self.i)-len(alist)):
# print("lol")
self.heap_list.append(None)
tup_list = self.heap_list
self.min_list = tup_list
self.max_list = tup_list
self.xor_list = tup_list
def smallest_common_ancestor(self):
bin_from = list(bin(int(self.interval_from) + int(self.list_start))[2:])
bin_to = list(bin(int(self.interval_to) + int(self.list_start))[2:])
smallest_common_ancestor = ""
p = 0
while len(bin_to) > p:
if bin_to[p] == bin_from[p]:
print(p)
smallest_common_ancestor = smallest_common_ancestor + str(bin_to[p])
p += 1
self.SCA = int(smallest_common_ancestor, 2)
def low_id(self):
print("Min_list before change: {}".format(self.min_list))
int_from = int(self.interval_from) + int(self.list_start)
int_to = int(self.interval_to) + int(self.list_start)
m = int_to // 2
while m + 1 != self.SCA:
if (m * 2) == int_to:
self.min_list[m] = self.min_list[(m * 2)]
elif ((m*2) + 1) == int_from:
self.min_list[m] = self.min_list[(m * 2) + 1]
elif self.min_list[m * 2] is None:
if self.min_list[(m * 2) + 1] is None:
self.min_list[m] = None
else:
self.min_list = self.min_list[(m * 2) + 1]
elif self.min_list[(m * 2) + 1] is None:
self.min_list[m] = self.min_list[m * 2]
elif int(self.min_list[m * 2]) >= int(self.min_list[(m * 2) + 1]):
self.min_list[m] = self.min_list[(m * 2) + 1]
else:
self.min_list[m] = self.min_list[(m * 2)]
m -= 1
return self.min_list[self.SCA]
At the beginning i do something along these lines, it shouldn't be the part of the problem
heap = Heap()
heap.build_heap(input_list)
heap.interval_to, heap.interval_from = f.readline.split()

def build_heap(self, alist):
# ...
tup_list = self.heap_list
self.min_list = tup_list
self.max_list = tup_list
self.xor_list = tup_list
This is where your class "links" the lists. Python is pass-by-reference, so min_list, max_list, xor_list, tup_list and heap_list will all point to the same object after you've called build_heap().
To create independent lists with the same values, you need to explicitly copy them:
new_list = list(old_list)

Related

Heap Dijkstra Implementation is slower than Naive Dijsktra Implementation

I have tried to implement the Naive and Heap Dijkstra as shown below but somehow my naive Dijkstra implementation is surprisingly faster. I debugged my code but couldn't understand where my problem in my implementations are.
Why is my heap-based implementation is slower than the naive implementation?
Raw data is stored here:
https://www.algorithmsilluminated.org/datasets/problem9.8.txt
Data Import and Manipulation:
import time
with open("DijkstraTest2.txt", 'r') as input:
lines = input.readlines()
lengths = {}
vertices = []
for line in lines:
contents = line.split("\t")
vertices.append(contents[0])
for content in contents:
content = content.replace('\n', '')
if ',' in content:
edge = contents[0] + '-' + content.split(',')[0]
lengths[edge] = int(content.split(',')[1])
Naive Dijkstra:
def NaiveDijkstra(vertices, start_point, lengths):
X = [start_point]
shortest_paths = {}
for vertex in vertices:
if vertex == start_point:
shortest_paths[vertex] = 0
else:
shortest_paths[vertex] = 999999999999
subset = [key for key in lengths.keys() if start_point == key.split('-')[0]
and key.split('-')[0] in X and key.split('-')[1] not in X]
while len(subset) > 0:
temp_min_dict = {}
for edge in subset:
temp_min = shortest_paths[edge.split('-')[0]] + lengths[edge]
temp_min_dict[edge] = temp_min
new_edge = min(temp_min_dict, key=temp_min_dict.get)
X.append(new_edge.split('-')[1])
shortest_paths[new_edge.split('-')[1]] = shortest_paths[new_edge.split('-')[0]] + lengths[new_edge]
subset = []
for key in lengths.keys():
if key.split('-')[0] in X and key.split('-')[1] not in X:
subset.append(key)
return shortest_paths
start_time = time.time()
print(NaiveDijkstra(vertices = vertices, start_point = '1', lengths = lengths)['197'])
print(time.time() - start_time, "seconds")
My Heap based Dijkstra code:
class Heap:
def __init__(self):
self.size = 0
self.lst = []
def swap(self, a):
if self.size == 1:
return self.lst
else:
if a == 1:
i = 1
else:
i = a // 2
while i > 0:
if i * 2 - 1 >= self.size:
break
elif self.lst[i - 1][1] > self.lst[i * 2 - 1][1]:
temp = self.lst[i - 1]
self.lst[i - 1] = self.lst[i * 2 - 1]
self.lst[i * 2 - 1] = temp
elif i * 2 >= self.size:
break
elif self.lst[i - 1][1] > self.lst[i * 2][1]:
temp = self.lst[i - 1]
self.lst[i - 1] = self.lst[i * 2]
self.lst[i * 2] = temp
i -= 1
# print(f"output: {self.lst}")
def insert(self, element):
# print(f"input: {self.lst}")
self.lst.append(element)
self.size += 1
self.swap(self.size)
def extractmin(self):
val = self.lst.pop(0)[0]
self.size -= 1
self.swap(self.size - 1)
return val
def delete(self, deleted):
ix = self.lst.index(deleted)
temp = self.lst[-1]
self.lst[ix] = temp
self.lst[-1] = deleted
self.lst.pop(-1)
self.size -= 1
#self.swap(self.size)
def FastDijkstra(vertices, start_point, lengths):
X = []
h = Heap()
width = {}
shortest_paths = {}
for vertex in vertices:
if vertex == start_point:
width[vertex] = 0
h.insert((vertex, width[vertex]))
else:
width[vertex] = 999999999999
h.insert((vertex, width[vertex]))
while h.size > 0:
w = h.extractmin()
X.append(w)
shortest_paths[w] = width[w]
Y = set(vertices).difference(X)
for x in X:
for y in Y:
key = f"{x}-{y}"
if lengths.get(key) is not None:
h.delete((y, width[y]))
if width[y] > shortest_paths[x] + lengths[key]:
width[y] = shortest_paths[x] + lengths[key]
h.insert((y, width[y]))
return shortest_paths
start_time = time.time()
print(FastDijkstra(vertices=vertices, start_point='1', lengths=lengths)['197'])
print(time.time() - start_time, "seconds")
The way you implemented the heap version is not efficient. Notably the following make it inefficient:
All nodes are put on the heap instead of only the direct neighbors of the visited nodes. This makes the heap large and slower than needed.
Y = set(vertices).difference(X) is a slow operation, and makes Y unnecessary large.
The nested loop that tries every pair in the Cartesian product of X and Y to see if it is an edge. This point together with the previous should be replaced with a collection of edges starting from X, and then discarding edges that lead to already visited nodes.
For every found edge to delete the target node from the heap, and re-insert it, even if the width didn't change! Deletion is a costly operation (see next point). Only if the Heap implementation supports a decrease-key operation, this is an option, but otherwise the heap should just get an extra entry for the same vertex, knowing that the one with the lowest cost will come out of the heap first.
The heap's delete method has a bad time complexity due to the .index() call.
The heap's extractmin method has a bad time complexity, due to the .pop(0) call. This has O(n) time complexity.
The heap's extractmin does not give correct results (again due to that pop(0)). Here is a little script that shows a mistake:
h = Heap()
for value in 4, 3, 5, 2, 1:
h.insert((value, value))
print(h.extractmin()) # 1 = OK
print(h.extractmin()) # 2 = OK
print(h.extractmin()) # 4 = NOK. 3 expected.
The data structure lengths does not allow to quickly find the edges from a particular vertex. But this is a point that is also making the naive implementation slow. I would however suggest to turn that in a dict.
If this is done right it should run faster. Certainly when you would make use of the native heapq module you'll get good running times.
Here is a (much) faster implementation. It doesn't bother about unreachable vertices, and doesn't bother about possibly having multiple entries on the heap for the same node (with different distances). But it does start with only the starting node on the heap, and uses heapq:
from heapq import heappush, heappop
from collections import defaultdict
def FastDijkstra(vertices, start_point, lengths):
# Create a dictionary for the edges, keyed by source node
edges = defaultdict(list)
for key, length in lengths.items():
x, y = key.split("-")
edges[x].append((length, y))
heap = [(0, start_point)]
shortest_paths = {}
while heap:
cost, x = heappop(heap)
if x in shortest_paths:
continue # this vertex had already been on the heap before
shortest_paths[x] = cost
for length, y in edges[x]:
if y not in shortest_paths:
heappush(heap, (cost + length, y))
return shortest_paths
In my tests this ran hundreds times faster.
Thanks to the above answer (wonderful analysis) I adjusted my implementation which is way faster than the previous version. It is shown below.
class Heap:
def __init__(self):
self.size = 0
self.lst = []
def swap(self, a):
if self.size == 1:
return self.lst
else:
if a == 1:
i = 1
else:
i = a // 2
while i > 0:
if i * 2 - 1 >= self.size:
break
elif self.lst[i - 1][1] > self.lst[i * 2 - 1][1]:
temp = self.lst[i - 1]
self.lst[i - 1] = self.lst[i * 2 - 1]
self.lst[i * 2 - 1] = temp
elif i * 2 >= self.size:
break
elif self.lst[i - 1][1] > self.lst[i * 2][1]:
temp = self.lst[i - 1]
self.lst[i - 1] = self.lst[i * 2]
self.lst[i * 2] = temp
elif self.lst[2*i - 1][1] > self.lst[i * 2][1]:
temp = self.lst[2*i - 1]
self.lst[2*i - 1] = self.lst[i * 2]
self.lst[i * 2] = temp
i -= 1
#print(f"output: {self.lst}")
def insert(self, element):
#print(f"input: {self.lst}")
self.lst.append(element)
self.size += 1
self.swap(self.size)
def extractmin(self):
val = self.lst[0][0]
del self.lst[0]
self.size -= 1
self.swap(self.size-1)
return val
def delete(self, deleted):
ix = self.lst.index(deleted)
temp = self.lst[-1]
self.lst[ix] = temp
self.lst[-1] = deleted
del self.lst[-1]
self.size -= 1
#self.swap(self.size)
def FastDijkstra(vertices, start_point, lengths):
X = []
h = Heap()
width = {}
shortest_paths = {}
for vertex in vertices:
if vertex == start_point:
width[vertex] = 0
h.insert((vertex, width[vertex]))
else:
width[vertex] = 999999999999
h.insert((vertex, width[vertex]))
while h.size > 0:
w = h.extractmin()
X.append(w)
shortest_paths[w] = width[w]
Y = set(vertices).difference(X)
for y in Y:
key = f"{w}-{y}"
if lengths.get(key) is not None:
h.delete((y, width[y]))
if width[y] > shortest_paths[w] + lengths[key]:
width[y] = shortest_paths[w] + lengths[key]
h.insert((y, width[y]))
return shortest_paths
start_time = time.time()
print(FastDijkstra(vertices=vertices, start_point='1', lengths=lengths)['197'])
print(time.time() - start_time, "seconds")

Getting Wrong ancestor for the right side of the tree

So I was trying to implement LCA using Euler tour and RMQ with Sparse Table in python. My code is working fine, if I insert number from left side of the tree. The code for building the tree is:
The node class has data, left_child and right_child:
class Node:
def __init__(self, value):
self.data = value
self.right_child = None
self.left_child = None
Then I have a tree class which I used to build the tree:
class Tree:
content = []
def __init__(self):
self.root = None
def add_child(self, arr, root, i, n):
if i < n:
temp = Node(arr[i])
root = temp
root.left_child = self.add_child(arr, root.left_child, 2 * i + 1, n)
root.right_child = self.add_child(arr, root.right_child, 2 * i + 2, n)
return root
def pre_order(self, root):
if root != None:
Tree.content.append(root.data)
self.pre_order(root.left_child)
self.pre_order(root.right_child)
return Tree.content
Finally, I got an LCA class which mainly contains the function for building the sparse table, then do a RMQ on the sparse table, then a function that does the part of the euler tour and finally the function for finding LCA.
class LCA:
def __init__(self, root, length):
self.pre_process_array = [[0] * (math.floor(math.log2(length) + 2)) for _ in range((length * 2) - 1)]
self.euler = [0] * (length * 2 - 1)
self.height = [0] * (length * 2 - 1)
self.index = [-1 for _ in range(length + 1)]
self.tour = 0
self.euler_tour(root, 0)
self.build_sparse_table(self.height)
print(self.pre_process_array)
def build_sparse_table(self, height_array):
for i in range(len(height_array)):
self.pre_process_array[i][0] = height_array[i]
j = 1
while (1 << j) <= len(height_array):
i = 0
while (i + (1 << j) - 1) < len(height_array):
if self.pre_process_array[i][j - 1] < self.pre_process_array[i + (1 << (j - 1))][j-1]:
self.pre_process_array[i][j] = self.pre_process_array[i][j - 1]
else:
self.pre_process_array[i][j] = self.pre_process_array[i + (1 << (j - 1))][j - 1]
i += 1
j += 1
def rmq(self, l, h):
j = int(math.log2(h - l + 1))
if self.pre_process_array[l][j] <= self.pre_process_array[h - (1 << j) + 1][j]:
return self.pre_process_array[l][j]
else:
return self.pre_process_array[h - (1 << j) + 1][j]
def euler_tour(self, root, level):
if root is not None:
self.euler[self.tour] = root.data
self.height[self.tour] = level
self.tour += 1
if self.index[root.data] == -1:
self.index[root.data] = self.tour - 1
if root.left_child is not None:
self.euler_tour(root.left_child, level + 1)
self.euler[self.tour] = root.data
self.height[self.tour] = level
self.tour += 1
if root.right_child is not None:
self.euler_tour(root.right_child, level + 1)
self.euler[self.tour] = root.data
self.height[self.tour] = level
self.tour += 1
def find_LCA(self, val1, val2):
if val1 >= len(self.index) or val2 >= len(self.index) or self.index[val1] == -1 or self.index[val2] == -1:
return -1
if self.index[val1] > self.index[val2]:
return self.euler[self.rmq(self.index[val2], self.index[val1])]
elif self.index[val2] > self.index[val1]:
return self.euler[self.rmq(self.index[val1], self.index[val2])]
And my driver code is as follow:
tree_data = [1,2,3,4,5,6,7,8,9]
length = len(tree_data)
tree = Tree()
root = tree.root
tree.root = tree.add_child(tree_data, root, 0, length)
l = LCA(tree.root, length)
print(l.find_LCA(6, 3))
so, my tree should something look like:
1
/ \
2 3
/ \ / \
4 5 6 7
/ \
8 9
Now, if I try to find the LCA(8, 9) or LCA(4, 3), I get the correct output but whenever I try to give LCA(6, 7) or LCA(3, 7), It always keeps returning 2 as the answer where it should return 3 and 3 respectively.
I don't have any idea where is the mistake as, from my side it looks pretty fine.

Bubble sort a dictionary

I am having problem to implement the bubble, insertion and selection sort. I have no idea how to make the whole thing works. Any kind soul pleaseee
Here below are the object added into the dictionary
items = {}
items[1] = Item(1,'Juicy',2,99,'New Zealand','Orange')
items[2] = Item(4,'Sweet',2,99,'Thailand','Mango')
items[3] = Item(6,'Tasty & Sweet',4,99,'Malaysia','Bananas')
items[4] = Item(2,'Juicy',5,99,'Australia','Watermelons')
Here below are the sort function but are unable to sort
def bubble(theSeq):
n = len(theSeq)
for i in range(1, n):
for j in range(n - i):
if theSeq[j] > theSeq[j + 1]:
tmp = theSeq[j]
theSeq[j] = theSeq[j + 1]
theSeq[j + 1] = tmp
def selection(theSeq):
n = len(theSeq)
for i in range(n - 1):
smallNdx = i # 0
for j in range(i + 1, n):
if theSeq[j] > theSeq[smallNdx]:
smallNdx = j
if smallNdx != i:
tmp = theSeq[i]
theSeq[i] = theSeq[smallNdx]
theSeq[smallNdx] = tmp
def insertion(theSeq):
n = len(theSeq)
for i in range(0, n):
value = theSeq[i]
pos = i
while pos > 0 and value < theSeq[pos - 1]:
theSeq[pos] = theSeq[pos - 1]
pos -= 1
theSeq[pos] = value
If "Item" is a class, firstly, you should make sure, that you enabled comparison of class instances, using method: __le__, __ge__ and so on, secondly, I think you have some problems with first index in your dictionary, which is equal to 1, but in sorting you start counting from 0, in your place I would write this code for insertion sort:
def insertion(theSeq):
n = len(theSeq)
for i in range(1, n + 1):
pos = i
while pos > 1 and theSeq[pos] < theSeq[pos - 1]:
theSeq[pos], theSeq[pos - 1] = theSeq[pos - 1], theSeq[pos]
pos -= 1
But I think, that the main problem is that you can't compare instances of "Item" class, I've run your code on list's with parameters and it went successfully:
items = {}
items[1] = [1,'Juicy',2,99,'New Zealand','Orange']
items[2] = [4,'Sweet',2,99,'Thailand','Mango']
items[3] = [6,'Tasty & Sweet',4,99,'Malaysia','Bananas']
items[4] = [2,'Juicy',5,99,'Australia','Watermelons']
insertion(items)
for x in items.items():
print(x)

Hash table problem for education perposes

The problem is this:
It is required to create and use the hash table structure in a problem
large number of keys.
Here are the steps:
Creating one million (1,000,000) visits to a department store
and Credit Card Payment.
From the very large number of different cards, a relatively small subset is created
as follows. Credit cards for visits will have
sixteen (16) specific fixed digits eg 1234567890123456,
but in four (4) of the sixteen (16) random positions they will also have
four characters: X, Y, Z, W in random order.
eg 12Y45W789012Z4X6
In the other places the prices are the initial ones.
I have written the codes. Is is supposed to run super fast but it runs super slowly and I don't know why... Currently, I am running my code for 10,000 cards. Could you help me? Please excuse my poor english...
The code is bellow:
import string
import random
import time
random.seed(1059442)
global max_load_factor
max_load_factor = 0.6
def printGreaterThan2(num):
while True:
if num % 2 == 1:
isPrime = True
for x in range(3,int(num**0.5),2):
if num % x == 0:
isPrime = False
break
if isPrime:
return num
num += 1
N = printGreaterThan2(1000)
arr = [ [] for _ in range(N)]
arr = [ None for _ in range(N)]
def CreatNewItem():
letters = "WXZY"
days = ["Mon", "Tue", "Wed" , "Thu" , "Fri", "Sat"]
s = ''
count = 0
num = ['1','2','3','4','5','6','7','8','9','0','1','2','3','4','5','6']
list_a = []
while(count!=4):
a = random.randint(0,15)
b = random.choice(letters)
if b not in num and a not in list_a:
num[a] = b
count = count + 1
list_a.append(a)
s = ''.join(num)
d = random.randint(0,5)
day = days[d]
money = random.randint(10,100)
a = [s,day,money]
return a
def hash(key, tablesize):
sum = 0
for pos in range(len(key)):
sum = sum + ord(key[pos])
hash = sum % tablesize
return hash
#--------------------------------------
def rehash(oldhash , tablesize):
rehash = ( oldhash + 1 ) % tablesize
return rehash
#--------------------------------------
def put2 (arr,a,N,lenght,collitions):
if float(lenght)/float(N) >= max_load_factor:
(arr,N,collitions) = Resize(arr,N,lenght,collitions)
key = a[0]
i = hash(key,N)
j =0
while (True):
if arr[i] is None:
arr[i] = a
lenght = lenght + 1
break
elif arr[i][0] == key:
arr[i][2] = arr[i][2] + a[2]
arr[i][1] = arr[i][1] + a[1]
break
else:
if j == 0 :
collitions = collitions +1
j = 1
i = rehash(i,N)
return (lenght,N,arr,collitions )
#----------------------------------------
def Resize(arr,N,lenght,collitions):
print("resize")
N = printGreaterThan2(2*N)
collitions = 0
arr2 = [ [] for _ in range(N)]
arr2 = [ None for _ in range(N)]
for p in arr:
if p is not None:
(lenght,N,arr2,collitions)=put2(arr2,p,N,lenght,collitions)
return (arr2,N,collitions)
#-----------------------------------------
l = 0
cards = []
collitions = 0
t0 = time.time()
i=0
while i!=10000:
b = CreatNewItem()
(l,N,arr,collitions) = put2(arr,b,N,l,collitions)
i=i+1
t1 = time.time() - t0
print('\ntime is {:0.20f}'.format(t1))

Minimum spanning tree using Kruskal in Python, recursion enters a infinite loop

The purpose of this code is to create the shortest path that connect each point. I've tried to implement the Kruskals algorithm in Python, but there is a problem and it occurs when the function findSet(x) is called. It seems like the program enters a infinite loop. I've tried to print x, x.dad and x.dad.dad and x in the crucial moment returns the initial x. I really don't know where is the problem.
class Point:
def __init__(self,x = None, y = None):
self.x = x
self.y = y
self.dad = None
self.rank = None
class Edge:
def __init__(self, firstPoint = None, secondPoint = None, value = None):
self.firstPoint = firstPoint
self.secondPoint = secondPoint
if self.firstPoint != None and self.secondPoint != None:
self.value = abs(firstPoint.x-secondPoint.x) + abs(firstPoint.y - secondPoint.y)
else:
self.value = float("inf")
def makeSet(node):
node.dad = node
node.rank = 0
def findSet(x):
print x, x.dad,x.dad.dad
if x == x.dad:
return x
x.dad = findSet(x.dad)
return x.dad
def unionSet(node1,node2):
root1 = findSet(node1)
root2 = findSet(node2)
if root1.rank < root2.rank:
root1.dad = root2
if root1.rank > root2.rank:
root2.dad = root1
else:
root2.dad = root1
root1.rank += 1
def merge(A, p, q, r):
L = A[p:q+1]
R = A[q+1:r+1]
infinite = Edge()
L.append(infinite)
R.append(infinite)
temp = []
i = 0
j = 0
for k in range(p,r+1):
if L[i].value <= R[j].value:
temp.append(L[i])
i += 1
else:
temp.append(R[j])
j += 1
A[p:r+1] = temp
def mergeSort(A,p,r):
A2 = A
if p < r:
q = int((p+r)/2)
mergeSort(A2, p, q)
mergeSort(A2, q+1, r)
merge(A2, p, q, r)
def readPoint(A):
temp = []
for i in range(0,len(A), 2):
tempPoint = Point(A[i],A[i+1])
makeSet(tempPoint)
temp.append(tempPoint)
return temp
def calculateEdges(A):
tempArray = []
for i in range(0,len(A)):
for j in range(i+1,len(A)):
edge = Edge(A[i], A[j])
tempArray.append(edge)
return tempArray
def Kruskal(N, E):
mergeSort(E,0,len(E)-1)
j = 0
i = 0
total = 0
while j < N:
if findSet(E[i].firstPoint) != findSet(E[i].secondPoint):
unionSet(E[i].firstPoint,E[i].secondPoint)
j += 1
total += E[i].value
if i < len(E) - 1:
i += 1
return total
Ar = readPoint([18, 2, 99, 68])
E = calculateEdges(Ar)
print Kruskal(len(Ar),E)

Categories

Resources