Is there any HashSet implementation in Python? I know HashTable can be represented using dictionaries, but how do we represent HashSet implementation.
I am NOT looking for a data structure with the same methods as HashSets but rather someone with a CONSTANT lookup time, or the order of O(1);
Also, I want to know if the lookup time in a Python Dictionary is constant aka O(1)
I think the HashSet implementation you are looking is set().
This answer may help you: What's the difference between HashSet and Set?
And yes, the average time complexity for python dictionary O(1). You may read on why we use the term: "Average time complexity":
Time complexity of accessing a Python dict
I guess this is what you want. You may define a hash function yourself, like what I did in HashSet or just use the built-in hash() function in Python.
class HashSet:
CONST = 2 ** 61 - 1
def __init__(self, size = 10_000):
self.size = size * 2
self.contents = [None] * self.size
def hash(self, x):
return x % CONST
def put(self, key):
idx = self.hash(key) % self.size
arr = self.contents[idx]
if arr is None:
self.contents[idx] = [key]
elif key not in arr:
arr.append(key)
return None
def get(self, key):
idx = self.hash(key) % self.size
arr = self.contents[idx]
if arr is None or key not in arr:
return False
return True
myset = HashSet()
myset.put(123)
myset.put(145)
myset.put(138)
res = myset.get(145)
print(res)
res = myset.get(10)
print(res)
class HashMap:
def __init__(self, size = 10_000):
self.size = size * 2
self.contents = [None] * self.size
class __Pair:
def __init__(self, key, value):
self.key = key
self.value = value
def find(self, arr, key):
for pair in arr:
if pair.key == key:
return pair
return None
def put(self, key, value):
idx = hash(key) % self.size
pair = self.__Pair(key, value)
arr = self.contents[idx]
if arr is None:
self.contents[idx] = [pair,]
return None
t = self.find(arr, key)
if t != None:
t.value = value
else:
arr.append(pair)
def get(self, key):
idx = hash(key) % self.size
arr = self.contents[idx]
if arr == None:
raise KeyError(f'{key} is not a valid key')
t = self.find(arr, key)
if t == None:
raise KeyError(f'{key} is not a valid key')
return t.value
mymap = HashMap()
mymap.put('abc', [123,456])
mymap.put('def', [456,789])
res = mymap.get('abc')
print(res)
res = mymap.get('def')
print(res)
res = mymap.get('defx')
print(res)
#Ayush Gupta, I have implemented the HashSet. Please do have a look at it. Comment for any feedback.
class MyHashSet:
def __init__(self):
self.l = []
def add(self, key: int) -> None:
if key not in self.l:
self.l.append(key)
def remove(self, key: int) -> None:
if key in self.l:
self.l.remove(key)
def contains(self, key: int) -> bool:
return key in self.l
# Your MyHashSet object will be instantiated and called as such:
obj = MyHashSet()
obj.add(key)
obj.remove(key)
param_3 = obj.contains(key)
Related
I am trying to build a dictionary from scratch in Python. I have done most of the work but am stuck on a little problem. First, I will start by saying that I am using the inbuilt Python hash() to get the hash_result of the key (key can be int or str) then the index is formed by the hash_result % capacity of dictionary. If the key is a string of characters, everything works fine. As soon as the key is an integer, my custom dictionary breaks. Sometimes everything works, other times, the key gets hash value 0 (for instance) when adding to the dictionary, but the same key returns hash value 4 (for instance) when searching for the key in the dictionary which returns a KeyError since the key is mapped at index 0 and not 4. I believe that at first, the index is calculated by hash(key) % capacity(4 for instance), but as soon as the capacity gets increased x2, the index that is returned by the function hash(key) % capacity(now 8 because x2) is different which results in the problem. I saw this formula in Wikipedia (hash(key) % capacity). I am interested in learning if this is the problem I am facing or if not, what is actually causing this unwanted behavior and how to tackle it.
Here is my code below:
class MyDictionary:
__LOAD_FACTOR_LIMIT = 0.75
__DEFAULT_CAPACITY = 4
def __init__(self):
self.__capacity = self.__DEFAULT_CAPACITY
self.__keys = [[] for i in range(self.__capacity)]
self.__values = [[] for i in range(self.__capacity)]
#property
def keys(self):
return [item for current_list in self.__keys for item in current_list]
#property
def values(self):
return [value for value_list in self.__values for value in value_list]
def __setitem__(self, key, value):
while self.__compute_load_factor() >= self.__LOAD_FACTOR_LIMIT:
self.__extend_dict()
index_hash = self.__hash_function(key)
if self.__is_key_in_dict(index_hash, key):
self.__set_value_to_an_existing_key(index_hash, key, value)
return
self.__set_value_to_a_new_key(index_hash, key, value)
def __getitem__(self, key):
index_hash = self.__hash_function(key)
if self.__is_key_in_dict(index_hash, key):
index_bucket = self.__get_index_bucket(index_hash, key)
return self.__values[index_hash][index_bucket]
raise KeyError('Key is not in dictionary!')
def __str__(self):
key_values = zip(self.keys, self.values)
result = '{' + ", ".join([f"{key}: {value}"
if isinstance(key, int) else f"'{key}': {value}"
for key, value in key_values]) + '}'
return result
def __hash_function(self, key):
index_hash = hash(key) % self.__capacity
return index_hash
def __is_key_in_dict(self, index_hash, key):
if key in self.__keys[index_hash]:
return True
return False
def __get_index_bucket(self, index_hash, key):
index_bucket = self.__keys[index_hash].index(key)
return index_bucket
def __extend_dict(self):
self.__keys += [[] for i in range(self.__capacity)]
self.__values += [[] for i in range(self.__capacity)]
self.__capacity *= 2
def __set_value_to_an_existing_key(self, index_hash, key, value):
index_bucket = self.__get_index_bucket(index_hash, key)
self.__values[index_hash][index_bucket] = value
def __set_value_to_a_new_key(self, index_hash, key, value):
self.__keys[index_hash].append(key)
self.__values[index_hash].append(value)
def __compute_load_factor(self):
k = len(self.__keys)
n = len([bucket for bucket in self.__keys if bucket])
return n / k
def get(self, key, return_value=None):
try:
index_hash = self.__hash_function(key)
index_bucket = self.__get_index_bucket(index_hash, key)
if self.__is_key_in_dict(index_hash, key):
return self.__keys[index_hash][index_bucket]
raise KeyError('Key is not in dictionary!')
except KeyError:
return return_value
def add(self):
pass
def pop(self):
pass
def clear(self):
self.__capacity = self.__DEFAULT_CAPACITY
self.__keys = [[] for i in range(self.__capacity)]
self.__values = [[] for i in range(self.__capacity)]
def items(self):
zipped_key_value = zip(self.keys, self.values)
return [item for item in zipped_key_value]
dictionary = MyDictionary()
dictionary.add()
dictionary[4] = 'hey'
dictionary['2'] = 'cya'
dictionary['4'] = 'welcome'
dictionary['5'] = 'welcome'
dictionary['32'] = 'heya'
dictionary['31'] = 'heya'
dictionary['36'] = 'heya'
dictionary['34'] = 'heya'
print(dictionary[4])
This is because you increase the capacity (stored in the __capacity attribute) by calling the __extend_dict method when the load is over a threshold, which makes the indices of the buckets in which the existing values are stored no longer valid, since you always derive the indices by taking the modulo of the capacity.
You should therefore re-insert the existing keys and values at their new indices every time you increase the dict's capacity:
def __extend_dict(self):
self.__capacity *= 2
new_keys = [[] for _ in range(self.__capacity)]
new_values = [[] for _ in range(self.__capacity)]
for keys, values in zip(self.__keys, self.__values):
for key, value in zip(keys, values):
index_hash = self.__hash_function(key)
new_keys[index_hash].append(key)
new_values[index_hash].append(value)
self.__keys = new_keys
self.__values = new_values
Demo: https://replit.com/#blhsing/NewEnchantingPerimeter
What I am trying to do
I have this hashtable which I am trying to double hash the value however i am getting the error
if hashtable_list[hashKey] == None:
IndexError: list index out of range
I have been at this for hours and can't seem to find where I am going wrong with this double hashing algorithm. Please can someone help me . Any help will be much appreciated from the bottom of my heart
# The HashParent class is the main class and follows an ADT
# in which it holds the key and value
class HashParent:
def __init__(self, key, value):
self.key = key
self.value = value
self.isItemDeleted = False
class HashTable(object):
"""
a basic, minimal implementation of a hash map
"""
def __init__(self):
"""
constructs a new Map
"""
#Create a table size of 4 None values eg [None, None, None, None]
self.table = [None] * 4
self.hashTableSize = 0
#Uses Linear Probing to hash values into the table
def __get_hash_code(self, key, value):
return (hash(key) + value) % len(self.table)
# Uses Linear Probing to hash values into the table
def hashUsingQudratic(self, key, value):
return (hash(key) + value ** 2) % len(self.table)
def double_hashing(self, key, value):
hashtable_size = self.hashTableSize
hashtable_list = self.table
hashKey = hash(key)
if hashtable_list[hashKey] == None:
hashtable_list[hashKey] = key
else:
new_hashkey = hashKey
while hashtable_list[new_hashkey] is not None:
steps = value - (key % value)
new_hashkey = (new_hashkey + steps) % hashtable_size
hashtable_list[new_hashkey] = key
return hashtable_list
def getitem(self, key):
"""
gets the value associated with the key
"""
hashTableLength = len(self.table)
for i in range(hashTableLength):
index = self.__get_hash_code(key, i)
if self.table[index] != None:
if self.table[index].key == key:
if self.table[index].isItemDeleted:
raise KeyError('Key is not in the map')
else:
return self.table[index].value
elif self.table[index] is None:
raise KeyError('Key is not in the map')
raise KeyError('Hmm something has gone wrong here')
def whichMethod(self, whichType, key,i):
if whichType == 'linear':
index = self.__get_hash_code(key, i)
return index
if whichType == 'quadratic':
index = self.hashUsingQudratic(key, i)
return index
if whichType == 'double':
index = self.double_hashing(key, i)
return index
def putItem(self, key, item, whichType):
"""
stores the key value combo in the table
implements open addressing collision resolution
"""
parent = HashParent(key, item)
for i in range(len(self.table)):
index = self.whichMethod(whichType,key,i)
if self.table[index] is None or self.table[index].isItemDeleted:
self.table[index] = parent
self.hashTableSize += 1
break
def deleteValue(self, key):
"""
deletes a value from the hash table
"""
hashTableLength = len(self.table)
for i in range(hashTableLength):
index = self.__get_hash_code(key, i)
if self.table[index] != None:
if self.table[index].key == key:
if self.table[index].isItemDeleted:
raise KeyError('Key is not in the map')
else:
self.table[index].isItemDeleted = True
self.hashTableSize -= 1
break
m = HashTable()
linear = 'linear'
quadratic = 'quadratic'
doubleHash = 'double'
m.putItem('first', 1,doubleHash)
m.putItem('ninth',9 ,doubleHash)
m.putItem('third', 3,doubleHash)
m.putItem('Tenth', 10,doubleHash)
print("The value at key 'ninth' is:" ,m.getitem('ninth'))
m.deleteValue('Tenth')
#Size should now be 3
print('The Hashatble size is:',m.hashTableSize)
Ok, so you do m.putItem('first', 1, doubleHash), so key is "first".
You pass key from putItem to whichMethod, then from whichMethod to double_hashing.
Then, double_hashing does this:
hashtable_list = self.table
self.table starts out as self.table = [None] * 4. It just has four Nones. So hashtable_list will be [None, None, None, None].
Then, it does:
hashKey = hash(key)
if hashtable_list[hashKey] == None:
hash returns an integer, and key is "first". Let's just try that in the interpreter:
>>> hash("first")
-4954399314613441385
>>>
So, hashtable_list[hashKey] is like saying [None, None, None, None][hash("first")], which is like saying [None, None, None, None][-4954399314613441385]. There's your IndexError.
I have this assignment where I would like to create an empty string where sparse vector data would be stored at. I need to check whether the key index exists in the sparse vector (self.data). If not, I need to store, in the empty string, the sparse vector and zero values. The error I am getting is TypeError: str returned non-string (type NoneType). This is my code:
class SparceVector(object):
def __init__(self, n):
self.length = n
self.data = {}
def __str__(self):
outstr = None or ""
assert 0 <= key and key < self.length
try:
outstr = ((str(key), str(value)) for key,value in self.data.items())
return
except KeyError:
return 0
return "The sparce vector is {}".format(outstr)
def __len__(self):
return self.legth
def __getitem__(self, key):
assert 0 <= key and key < self.length
try:
return self.data[key]
except KeyError:
return 0
return self.data[key]
def __setitem__(self, key, value):
assert 0 <= key and key < self.length
if value != 0:
self.data[key] = value
def nonzeros(self):
return self.data
def __add__(self, other):
assert self.length == other.length
merged = SparceVector(self.length)
c = {key:value for key, value in self.data.items()}
for key, value in other.data.items():
try:
c[key] += value
except KeyError:
c[key] = value
merged.data = c
merged.nonzeros()
return merged
def __iter__(self):
return iter(self.data)
def key(self):
return self.data.key()
def items(self):
return self.data.items()
def values(self):
return self.data.values()
I am implementing a Hashmap in python. Right now, I am manually inserting the key and value. What I want is to automatically assign the key of the given value in ascending order. Suppose, we have a number n=8 , then it will automatically start assigning key starts from 1 to 8 , when it reach the key number 8 and we want to insert more values, then it will show a print message like, entry is full.
Instead of
hm.put("1", "sachin")
I want ,
hm.put("sachin")
and it should automatically assign key 1 for sachin.
class Node:
def __init__(self, key, value):
self.key = key
self.value = value
self.next = None
class HashMap:
def __init__(self):
self.store = [None for _ in range(16)]
def get(self, key):
index = hash(key) & 15
if self.store[index] is None:
return None
n = self.store[index]
while True:
if n.key == key:
return n.value
else:
if n.next:
n = n.next
else:
return None
def put(self, key, value):
nd = Node(key, value)
index = hash(key) & 15
n = self.store[index]
if n is None:
self.store[index] = nd
else:
if n.key == key:
n.value = value
else:
while n.next:
if n.key == key:
n.value = value
return
else:
n = n.next
n.next = nd
hm = HashMap()
hm.put("1", "sachin")
hm.put("2", "sehwag")
hm.put("3", "ganguly")
hm.put("4", "srinath")
hm.put("5", "kumble")
hm.put("6", "dhoni")
hm.put("7", "kohli")
hm.put("8", "pandya")
hm.put("9", "rohit")
hm.put("10", "dhawan")
hm.put("11", "shastri")
hm.put("12", "manjarekar")
hm.put("13", "gupta")
hm.put("14", "agarkar")
hm.put("15", "nehra")
hm.put("16", "gawaskar")
hm.put("17", "vengsarkar")
print(hm.get("1"))
print(hm.get("2"))
print(hm.get("3"))
print(hm.get("4"))
print(hm.get("5"))
print(hm.get("6"))
print(hm.get("7"))
print(hm.get("8"))
print(hm.get("9"))
print(hm.get("10"))
print(hm.get("11"))
print(hm.get("12"))
print(hm.get("13"))
print(hm.get("14"))
print(hm.get("15"))
print(hm.get("16"))
print(hm.get("17"))
I've come across this question to do with double hashing after finishing a linear and quadratic hashing question.
Linear:
class BasicHashTable:
def __init__(self,size=7):
self.size = size
self.slots = [None] * self.size
def hash_function(self, key):
return key%len(self.slots)
def rehash(self, old_pos):
return (old_pos + 1) % self.size
def put(self, key):
hash_value = key%len(self.slots)
probe_seq = []
insert_pos = hash_value
probe_seq += [insert_pos]
probes = 1
while(self.slots[insert_pos] != None):
probes += 1
insert_pos=(insert_pos+1)%len(self.slots)
probe_seq += [insert_pos]
self.slots[insert_pos] = key
return insert_pos
Quadratic:
class BasicHashTable:
def __init__(self,size=7):
self.size = size
self.slots = [None] * self.size
def hash_function(self, key):
return key%len(self.slots)
def rehash(self, old_pos):
return (old_pos + 1) % self.size
def put(self, key):
hash_value = key%len(self.slots)
probe_seq = []
insert_pos = hash_value
probe_seq += [insert_pos]
probes = 1
while(self.slots[insert_pos] != None):
insert_pos=(hash_value+probes**2)%len(self.slots)
probes+=1
probe_seq += [insert_pos]
self.slots[insert_pos] = key
return insert_pos
Now I'm asked to implement double hashing using: "The hash2 function for double hashing should be: hash2(key) = 5 - key % 5"
Can anyone explain to me what the hash2 function means? Do I just write up a function?
def hash2(key):
If not how would I answer this question?
The test:
hash_t = BasicHashTable()
hash_t.put(3)
hash_t.put(20)
hash_t.put(10)
print(hash_t.slots)
Gives: [None, 10, None, 3, None, None, 20]
Thanks