I've come across this question to do with double hashing after finishing a linear and quadratic hashing question.
Linear:
class BasicHashTable:
def __init__(self,size=7):
self.size = size
self.slots = [None] * self.size
def hash_function(self, key):
return key%len(self.slots)
def rehash(self, old_pos):
return (old_pos + 1) % self.size
def put(self, key):
hash_value = key%len(self.slots)
probe_seq = []
insert_pos = hash_value
probe_seq += [insert_pos]
probes = 1
while(self.slots[insert_pos] != None):
probes += 1
insert_pos=(insert_pos+1)%len(self.slots)
probe_seq += [insert_pos]
self.slots[insert_pos] = key
return insert_pos
Quadratic:
class BasicHashTable:
def __init__(self,size=7):
self.size = size
self.slots = [None] * self.size
def hash_function(self, key):
return key%len(self.slots)
def rehash(self, old_pos):
return (old_pos + 1) % self.size
def put(self, key):
hash_value = key%len(self.slots)
probe_seq = []
insert_pos = hash_value
probe_seq += [insert_pos]
probes = 1
while(self.slots[insert_pos] != None):
insert_pos=(hash_value+probes**2)%len(self.slots)
probes+=1
probe_seq += [insert_pos]
self.slots[insert_pos] = key
return insert_pos
Now I'm asked to implement double hashing using: "The hash2 function for double hashing should be: hash2(key) = 5 - key % 5"
Can anyone explain to me what the hash2 function means? Do I just write up a function?
def hash2(key):
If not how would I answer this question?
The test:
hash_t = BasicHashTable()
hash_t.put(3)
hash_t.put(20)
hash_t.put(10)
print(hash_t.slots)
Gives: [None, 10, None, 3, None, None, 20]
Thanks
Related
Update
Thanks to the comments of some community members, I realize that there are some similar problems, but they may a bit different, please allow me to explain it further.
I actually hope to use the same method in a real problem, So briefly:
Reuse of edges in differernt path is completely allowed
a unique(or a new) path from A to B is defined as a collection of vertices that have any different vertices.
Let me use a quiz from Python data structure and algorithm analysis by Bradley .N Miller and David L. Ranum to expain my qusetion.
Quesion:
Consider the task of converting the word FOOL to SAGE, also called word ladder problem. In solving
In the word ladder problem, only one letter must be replaced at a time, and the result of each step must be a word, not non-existent.
Input:
FOUL
FOOL
FOIL
FAIL
COOL
FALL
POOL
PALL
POLL
POLE
PALE
PAGE
SALE
POPE
POPE
SAGE
We can easily find the path from FOOL to SAGE, as Bradley showed:
enter image description here
and I used Breadth First Search (BFS) to solve probem:
class Vertex:
def __init__(self, key, value = None):
self.id = key
self.connectedTo = {}
self.color = 'white'
self.dist = sys.maxsize
self.pred = []
self.disc = 0
self.fin = 0
self.value = value,
#self.GraphBulided = False
self.traverseIndex = 0
self.predNum = 0
def addNeighbor(self, nbr, weight=0):
self.connectedTo[nbr] = weight
def __str__(self):
return '{} connectedTo: {}'.format(self.id, \
str([x.id for x in self.connectedTo]))
def setColor(self, color):
self.color = color
def setDistance(self, d):
self.dist = d
#I want store all Pred for next traverse so I use a list to do it
def setPred(self, p, list = False):
if not list:
self.pred = p
else:
self.pred.append(p)
self.predNum += 1
def setDiscovery(self,dtime):
self.disc = dtime
def setFinish(self,ftime):
self.fin = ftime
#def setGraphBulided(self, tag = True):
# self.GraphBulided = tag
def getFinish(self):
return self.fin
def getDiscovery(self):
return self.disc
def getPred(self):
if isinstance(self.pred, list):
if self.traverseIndex < self.predNum:
return self.pred[self.traverseIndex]
else:
return self.pred[-1]
else:
return self.pred
def __hash__(self):
return hash(self.id)
def getPredById(self):
if self.traverseIndex < self.predNum and isinstance(self.pred, list):
pred = self.pred[self.traverseIndex]
self.traverseIndex += 1
print("vertix {}: {} of {} preds".format(self.id, self.traverseIndex, self.predNum))
return [pred, self.traverseIndex]
else:
pred = None
return [pred, None]
def getCurrPredStaus(self):
#if not self.pred:
# return None
return self.predNum - self.traverseIndex
def getDistance(self):
return self.dist
def getColor(self):
return self.color
def getConnections(self):
return self.connectedTo.keys()
def getId(self):
return self.id
def getWeight(self, nbr):
return self.connectedTo[nbr]
def getValue(self):
return self.value
def findPath(self, dest):
pass
class Graph:
def __init__(self):
self.vertList = {}
self.numVertics = 0
self.verticsInSerach = set()
self.GraphBulided = False
def addVertex(self, key, value = None):
self.numVertics = self.numVertics + 1
newVertex = Vertex(key, value=value)
self.vertList[key] = newVertex
return newVertex
def getVertex(self, n):
if n in self.vertList:
return self.vertList[n]
else:
return None
def __contains__(self, n):
return n in self.vertList
def addEdge(self, f, t, cost = 0, fvalue = None, tvalue = None):
if f not in self.vertList:
nv = self.addVertex(f, fvalue)
if t not in self.vertList:
nv = self.addVertex(t, tvalue)
self.vertList[f].addNeighbor(self.vertList[t], cost)
def setGraphBulided(self, tag = True):
self.GraphBulided = tag
def getVertices(self):
return self.vertList.keys()
def setGraphBulided(self, tag = True):
self.GraphBulided = tag
def setSerachedVertixs(self, vertix):
self.verticsInSerach.add(vertix)
def getGraphBulided(self):
return self.GraphBulided
def getSerachedVertixs(self):
return self.verticsInSerach
def __iter__(self):
return iter(self.vertList.values())
def __hash__(self):
hashIds = [x for x in self.getVertices()]
if len(hashIds) > 0 and hashIds[0]:
return hash(', '.join(hashIds))
else:
return None
Here are some additional functions for building graphs
def buildGraph(wordFile, DFSgraph = False):
d = {}
g = Graph()
if DFSgraph:
g = DFSGraph()
wfile = open(wordFile)
for line in wfile:
word = line[:-1]
for i in range(len(word)):
bucket = word[:i] + '_' + word[i+1:]
if bucket in d:
d[bucket].append(word)
else:
d[bucket] = [word]
for bucket in d.keys():
for word1 in d[bucket]:
for word2 in d[bucket]:
if word1 != word2:
g.addEdge(word1, word2)
wfile.close()
return g
class Queue:
def __init__(self):
self.items = []
def isEmpty(self):
return self.items == []
def enqueue(self, item):
self.items.insert(0,item)
def dequeue(self):
return self.items.pop()
def size(self):
return len(self.items)
def bfs(g, start, listpred = False):
start.setDistance(0)
start.setPred(None)
vertQueue = Queue()
vertQueue.enqueue(start)
while (vertQueue.size() > 0):
currentVert = vertQueue.dequeue()
if currentVert.getConnections():
g.setSerachedVertixs(currentVert)
for nbr in currentVert.getConnections():
#print('sreach {}'.format(currentVert.getId()))
if (nbr.getColor() == 'white' or nbr.getColor() == 'gray'):
nbr.setColor('gray')
nbr.setDistance(currentVert.getDistance() + 1)
if nbr.predNum > 0 and currentVert.getId() not in [x.getId() for x in nbr.pred]:
nbr.setPred(currentVert, listpred)
elif nbr.predNum == 0:
nbr.setPred(currentVert, listpred)
vertQueue.enqueue(nbr)
currentVert.setColor('black')
Therefore, we can easily find the shortest path we need (If we only store one pred for one vertix).
wordGraph = buildGraph('fourletterwords1.txt', DFSgraph=False)
bfs(wordGraph, wordGraph.getVertex('FOOL'), listpred=True)
def traverse(y):
x=y
while(x.getPred()):
print(x.getPred())
x = x.getPred()
print(x.getId())
traverse(wordGraph.getVertex('SAGE'))
However, I still don't know how to trace all the paths correctly, can you give me some suggestions?
FIND path from src to dst ( Dijkstra algorithm )
ADD path to list of paths
LOOP P over list of paths
LOOP V over vertices in P
IF V == src OR V == dst
CONTINUE to next V
COPY graph to working graph
REMOVE V from working graph
FIND path from src to dst in working graph( Dijkstra algorithm )
IF path found
IF path not in list of paths
ADD path to list of paths
I have this code to implement queue and stack data structures. The QueueT by itself works as desired.
When I call st.push(1), it agreeable calls push(self, v) and this in turn calls the enqueue method.
The issue I am facing is that after the statement self._q1.enqueue(v) is executed, self._q1 does not retain the value v.
Code for class QueueT:
class QueueT:
def __init__(self):
self._CAPACITY = 6
self._data = [None] * self._CAPACITY
self._length = 0
self._first = 0
def enqueue(self, val):
if self.size() == self._CAPACITY:
self._resize(self._CAPACITY * 2)
self.enqueue(val)
else:
new_place = (self._first + self.size()) % self._CAPACITY
self._data[new_place] = val
self._length += 1
def size(self):
return self._length
def _resize(self, new_capacity):
old = self._data
old_capacity = len(old)
self._data = [None] * new_capacity
k = self._first
self._first = 0
for j in range(self._length):
self._data[j] = old[k]
k = (1 + k) % old_capacity
self._CAPACITY = new_capacity
Now code from StackFromQ:
class StackFromQ:
def __init__(self):
self._q1 = QueueT()
self._top = -1
def push(self, v):
self._q1.enqueue(v)
self._top += 1
Caller function:
def stack_of_q():
st = StackFromQ()
st.push(1)
st.push(2)
Finally invocation:
stack_of_q()
By using this code I'm able to hash only 1 record without any errors or warnings. How can I hash a hundred thousand records taken as input from the CSV file?
import pandas as pd
proper = []
with open("C:\\Users\\krupa\\Downloads\\proper.csv","r") as f:
for line in f:
tokens = line.split(',')
order_id =tokens[0]
country = tokens[1]
proper.append([order_id,country])
#print(proper)
proper = {}
with open("C:\\Users\\krupa\\Downloads\\proper.csv","r") as f:
for line in f:
tokens = line.split(',')
order_id =tokens[0]
country = tokens[1]
proper[order_id] = country
#print(proper)
def get_hash(key):
key = int(key, base=10)
hash_key = 0
for i in range(key):
hash_key += 1
return hash_key % 100
get_hash('503618705')
class HashTable:
def __init__(self):
self.MAX = 100
self.arr = [None for i in range(self.MAX)]
def get_hash(self, key):
key = int(key, base=10)
hash_key = 0
for i in range(key):
hash_key += 1
return hash_key % self.MAX
def __getitem__(self, index):
h = self.get_hash(index)
return self.arr[h]
def __setitem__(self, key, val):
h = self.get_hash(key)
self.arr[h] = val
def __delitem__(self, key):
h = self.get_hash(key)
self.arr[h] = None
t = HashTable()
t["503618705"] = "Tanzania"
t.arr
print(t.arr)
The code is free of errors but I want to hash all the records in the CSV File
What you need to do, is acctually use your methods.
right now you __init__ a new object t. Then you are refering to a index in your list and set "Tanzania" as value. så you actually dont use your methods in your object t only the function list, so what you can do is (I hope I understanded your question correctly.):
lst = ['Sweden', 'Germany', 'Pakistan', 'Syria', 'Norway']
idx = ["1234", "30", "500", "2034", "443"]
def get_hash(key):
key = int(key, base=10)
hash_key = 0
for i in range(key):
hash_key += 1
return hash_key % 100
get_hash('50')
class HashTable:
def __init__(self):
self.MAX = 100
self.arr = [None for i in range(self.MAX)]
def get_hash(self, key):
key = int(key, base=10)
hash_key = 0
for i in range(key):
hash_key += 1
return hash_key % self.MAX
def __getitem__(self, index):
h = self.get_hash(index)
return self.arr[h]
def __setitem__(self, key, val):
h = self.get_hash(key)
self.arr[h] = val
def __delitem__(self, key):
h = self.get_hash(key)
self.arr[h] = None
t = HashTable()
for n, i in enumerate(lst):
t.__setitem__(idx[n], lst[n])
for i in range(len(idx)):
print(t.__getitem__(idx[i]))
print(t.get_hash(idx[i]))
Is there any HashSet implementation in Python? I know HashTable can be represented using dictionaries, but how do we represent HashSet implementation.
I am NOT looking for a data structure with the same methods as HashSets but rather someone with a CONSTANT lookup time, or the order of O(1);
Also, I want to know if the lookup time in a Python Dictionary is constant aka O(1)
I think the HashSet implementation you are looking is set().
This answer may help you: What's the difference between HashSet and Set?
And yes, the average time complexity for python dictionary O(1). You may read on why we use the term: "Average time complexity":
Time complexity of accessing a Python dict
I guess this is what you want. You may define a hash function yourself, like what I did in HashSet or just use the built-in hash() function in Python.
class HashSet:
CONST = 2 ** 61 - 1
def __init__(self, size = 10_000):
self.size = size * 2
self.contents = [None] * self.size
def hash(self, x):
return x % CONST
def put(self, key):
idx = self.hash(key) % self.size
arr = self.contents[idx]
if arr is None:
self.contents[idx] = [key]
elif key not in arr:
arr.append(key)
return None
def get(self, key):
idx = self.hash(key) % self.size
arr = self.contents[idx]
if arr is None or key not in arr:
return False
return True
myset = HashSet()
myset.put(123)
myset.put(145)
myset.put(138)
res = myset.get(145)
print(res)
res = myset.get(10)
print(res)
class HashMap:
def __init__(self, size = 10_000):
self.size = size * 2
self.contents = [None] * self.size
class __Pair:
def __init__(self, key, value):
self.key = key
self.value = value
def find(self, arr, key):
for pair in arr:
if pair.key == key:
return pair
return None
def put(self, key, value):
idx = hash(key) % self.size
pair = self.__Pair(key, value)
arr = self.contents[idx]
if arr is None:
self.contents[idx] = [pair,]
return None
t = self.find(arr, key)
if t != None:
t.value = value
else:
arr.append(pair)
def get(self, key):
idx = hash(key) % self.size
arr = self.contents[idx]
if arr == None:
raise KeyError(f'{key} is not a valid key')
t = self.find(arr, key)
if t == None:
raise KeyError(f'{key} is not a valid key')
return t.value
mymap = HashMap()
mymap.put('abc', [123,456])
mymap.put('def', [456,789])
res = mymap.get('abc')
print(res)
res = mymap.get('def')
print(res)
res = mymap.get('defx')
print(res)
#Ayush Gupta, I have implemented the HashSet. Please do have a look at it. Comment for any feedback.
class MyHashSet:
def __init__(self):
self.l = []
def add(self, key: int) -> None:
if key not in self.l:
self.l.append(key)
def remove(self, key: int) -> None:
if key in self.l:
self.l.remove(key)
def contains(self, key: int) -> bool:
return key in self.l
# Your MyHashSet object will be instantiated and called as such:
obj = MyHashSet()
obj.add(key)
obj.remove(key)
param_3 = obj.contains(key)
This is the code I've got that uses open addressing:
import math
class HashTable:
def __init__(self):
self.size = 11
self.slots = [None] * self.size
self.data = [None] * self.size
def put(self,key,data):
hashvalue = self.hashfunction(key,len(self.slots))
if self.slots[hashvalue] == None:
self.slots[hashvalue] = key
self.data[hashvalue] = data
else:
if self.slots[hashvalue] == key:
self.data[hashvalue] = data #replace
else:
nextslot = self.rehash(hashvalue,len(self.slots))
while self.slots[nextslot] != None and \
self.slots[nextslot] != key:
nextslot = self.rehash(nextslot,len(self.slots))
if self.slots[nextslot] == None:
self.slots[nextslot]=key
self.data[nextslot]=data
else:
self.data[nextslot] = data #replace
def hashfunction(self,key,size):
return key%size
def rehash(self,oldhash,size):
return (oldhash+1)%size
def get(self,key):
startslot = self.hashfunction(key,len(self.slots))
data = None
stop = False
found = False
position = startslot
while self.slots[position] != None and \
not found and not stop:
if self.slots[position] == key:
found = True
data = self.data[position]
else:
position=self.rehash(position,len(self.slots))
if position == startslot:
stop = True
return data
def __getitem__(self,key):
return self.get(key)
def __setitem__(self,key,data):
self.put(key,data)
def hash(astring, tablesize):
sum = 0
for pos in range(len(astring)):
sum = sum + ord(astring[pos])
return sum%tablesize
I'm open to using either dictionaries or linked lists for the chaining because it's simple basic.
I'm not sure if I need to make everything in the list a linkedlist or just the ones that need to be chained, also I'm not sure how to get the data from a chained location. Can anyone help me get some ideas?