how can I Optimize cuckoo hashing - python

I have a python code which implements cuckoo hashing.
I'm working on code for a hash table where collisions are corrected using cuckoo hashing. But it is taking a lot of time if there are more collisions.
I have used mmh3 family for hashing
so, is there a way to optimize this code and cuckoo hashing technique?
If yes kindly elaborate
from random import randint
import math
import mmh3
#parameters
from parameters import output_bits, number_of_hashes
mask_of_power_of_2 = 2 ** output_bits - 1
log_no_hashes = int(math.log(number_of_hashes) / math.log(2)) + 1
#The hash family used for Cuckoo hashing relies on the Murmur hash family (mmh3)
def location(seed, item):
'''
:param seed: a seed of a Murmur hash function
:param item: an integer
:return: Murmur_hash(item_left) xor item_right, where item = item_left || item_right
'''
item_left = item >> output_bits
item_right = item & mask_of_power_of_2
hash_item_left = mmh3.hash(str(item_left), seed, signed=False) >> (32 - output_bits)
return hash_item_left ^ item_right
def left_and_index(item, index):
'''
:param item: an integer
:param index: a log_no_hashes bits integer
:return: an integer represented as item_left || index
'''
return ((item >> (output_bits)) << (log_no_hashes)) + index
def extract_index(item_left_and_index):
'''
:param item_left_and_index: an integer represented as item_left || index
:return: index extracted
'''
return item_left_and_index & (2 ** log_no_hashes - 1)
def reconstruct_item(item_left_and_index, current_location, seed):
'''
:param item_left_and_index: an integer represented as item_left || index
:param current_location: the corresponding location, i.e. Murmur_hash(item_left) xor item_right
:param seed: the seed of the Murmur hash function
:return: the integer item
'''
item_left = item_left_and_index >> log_no_hashes
hashed_item_left = mmh3.hash(str(item_left), seed, signed=False) >> (32 - output_bits)
item_right = hashed_item_left ^ current_location
return (item_left << output_bits) + item_right
def rand_point(bound, i):
'''
:param bound: an integer
:param i: an integer less than bound
:return: a uniform integer from [0, bound - 1], distinct from i
'''
value = randint(0, bound - 1)
while (value == i):
value = randint(0, bound - 1)
return value
class Cuckoo():
def __init__(self, hash_seed):
self.number_of_bins = 2 ** output_bits
self.recursion_depth = int(8 * math.log(self.number_of_bins) / math.log(2))
self.data_structure = [None for j in range(self.number_of_bins)]
self.insert_index = randint(0, number_of_hashes - 1)
self.depth = 0
self.FAIL = 0
self.hash_seed = hash_seed
def insert(self, item): #item is an integer
current_location = location( self.hash_seed[self.insert_index], item)
current_item = self.data_structure[ current_location]
self.data_structure[ current_location ] = left_and_index(item, self.insert_index)
if (current_item == None):
self.insert_index = randint(0, number_of_hashes - 1)
self.depth = 0
else:
unwanted_index = extract_index(current_item)
self.insert_index = rand_point(number_of_hashes, unwanted_index)
if (self.depth < self.recursion_depth):
self.depth +=1
jumping_item = reconstruct_item(current_item, current_location, self.hash_seed[unwanted_index])
self.insert(jumping_item)
else:
self.FAIL = 1

Related

Setting Enumerate Value to a Variable changes the output of a function

I'm new to the enumeration function, I've only just started to use it. In this code block, I'm enumerating "height" multiple times:
class Solution(object):
def maxArea(self, height):
"""
:type height: List[int]
:rtype: int
"""
if len(height) == 2:
return (min(height))
maxArea = -1
for i, a in enumerate(height):
maxIndex, maxDistance = i, 0
for j, s in enumerate(height):
distance = abs(j-i)
if distance>maxDistance and s>=a:
maxIndex, maxDistance = j, distance
area = abs(maxIndex - i) * a
maxArea = max(area,maxArea)
return maxArea
I thought a way to optimize this was to set enumerate(height) to a variable to decrease the amount of times the height was put into the enumerate function:
class Solution(object):
def maxArea(self, height):
"""
:type height: List[int]
:rtype: int
"""
if len(height) == 2:
return (min(height))
maxArea = -1
enumeratedHeight = enumerate(height)
for i, a in enumeratedHeight:
maxIndex, maxDistance = i, 0
for j, s in enumeratedHeight:
distance = abs(j-i)
if distance>maxDistance and s>=a:
maxIndex, maxDistance = j, distance
area = abs(maxIndex - i) * a
maxArea = max(area,maxArea)
return maxArea
I tested this new function on this case [1,8,6,2,5,4,8,3,7]. The before function correctly answered 49, but this one returns 8?
(this code is to solve Container With Most Water on Leetcode, I know it's inefficient, but I've been told to write inefficient code if I can't find an efficient solution)

Unknown source of error in while command in python

I am testing a code written and posted on the website
http://foreverlearning.altervista.org/genetic-programming-symbolic-regression-pt-3/
The portion of the code is located at the bottom part of the webpage. When running the test code mainpova.py, I am getting the syntax error.
The syntax error is,
python mainprova4.py
Traceback (most recent call last):
File "mainprova4.py", line 1, in <module>
import generation as gn
File "/home/adam/DocumentsNew2/MathCode/SymbolicRegression/WebpageCode /generation.py", line 46
while len(selectedMembers) < numMembers: indexSelected = 0 while rnd.randint(0, 100) > int(pickProb * 100) and indexSelected != len(self.membersWithErrors) - 1:
^
SyntaxError: invalid syntax
The indicator ' ^ ' is actually located below e of the while word located before rnd.randint. This is for the portion of the code,
def getMembersForReproduction(self, numMembers, pickProb):
""" Returns a certain number of distinct members from the generation.
The first member is selected with probability pickProb. If it's not chosen, the
second member is selected with probability pickProb, and so on. """
selectedMembers = []
while len(selectedMembers) < numMembers: indexSelected = 0 while rnd.randint(0, 100) > int(pickProb * 100) and indexSelected != len(self.membersWithErrors) - 1:
indexSelected += 1
memberWithErrorSelected = self.membersWithErrors[indexSelected]
if memberWithErrorSelected[0] not in selectedMembers:
selectedMembers.append(memberWithErrorSelected[0])
return selectedMembers
The source code has been corrected for lesser and greater typos. Other than that the code is fine except for the while loop. What is the source for the syntax error? The website shows what the output should be.
The formatting is still incorrect. It should look like this:
def getMembersForReproduction(self, numMembers, pickProb):
""" Returns a certain number of distinct members from the generation.
The first member is selected with probability pickProb. If it's not chosen, the
second member is selected with probability pickProb, and so on. """
selectedMembers = []
while len(selectedMembers) < numMembers:
indexSelected = 0
while rnd.randint(0, 100) > int(pickProb * 100) and indexSelected != len(self.membersWithErrors) - 1:
indexSelected += 1
memberWithErrorSelected = self.membersWithErrors[indexSelected]
if memberWithErrorSelected[0] not in selectedMembers:
selectedMembers.append(memberWithErrorSelected[0])
return selectedMembers
I was not aware that indentation plays role in python coding. (I am rookie in python). Yes, it fixed the problem, but another popped out. The error is,
python mainprova4.py
Traceback (most recent call last):
File "mainprova4.py", line 1, in <module>
import generation as gn
File "/home/adam/DocumentsNew2/MathCode/SymbolicRegression/WebpageCode /generation.py", line 105, in <module>
for i in range(0, numCrossover):
NameError: name 'numCrossover' is not defined
The code for this is,
import random as rnd
import generator as gtr
import treeOperations as trop
class Generation(object):
def __init__(self):
self.membersWithErrors = []
def addMember(self, member):
""" Add a tree to the generation """
self.membersWithErrors.append([member, 0])
def setMember(self, member, index):
""" Updates the member at the specified position """
self.membersWithErrors[index] = member
def setError(self, index, error):
""" Sets the error of the member at the specified position """
self.membersWithErrors[index][1] = error
def getMember(self, index):
""" Returns the member at the specified position """
return self.membersWithErrors[index][0]
def getError(self, index):
""" Returns the error of the member at the specified position """
return self.membersWithErrors[index][1]
def size(self):
""" Returns the number of members curently in the generation """
return len(self.membersWithErrors)
def clear(self):
""" Clears the generation, i.e. removes all the members """
self.membersWithErrors.clear()
def sort(self, descending):
""" Sorts the members of the generation according the their score """
self.membersWithErrors.sort(key = lambda l: l[1], reverse = descending)
def getMembersForReproduction(self, numMembers, pickProb):
""" Returns a certain number of distinct members from the generation.
The first member is selected with probability pickProb. If it's not chosen, the
second member is selected with probability pickProb, and so on. """
selectedMembers = []
while len(selectedMembers) < numMembers:
indexSelected = 0
while rnd.randint(0, 100) > int(pickProb * 100) and indexSelected != len(self.membersWithErrors) - 1:
indexSelected += 1
memberWithErrorSelected = self.membersWithErrors[indexSelected]
if memberWithErrorSelected[0] not in selectedMembers:
selectedMembers.append(memberWithErrorSelected[0])
return selectedMembers
def next(self, crossoverPerc, mutationPerc, randomPerc, copyPerc, shouldPruneForMaxHeight, minHeight, maxHeight, minValue, maxValue, variables, operators):
""" It proceeds to the next generation with the help of genetic operations """
oldMembersWithError = self.membersWithErrors
newMembersWithError = []
maxMembers = len(oldMembersWithError)
numCrossover = int(maxMembers * crossoverPerc)
numMutation = int(maxMembers * mutationPerc)
numRandom = int(maxMembers * randomPerc)
numCopy = maxMembers - numCrossover - numMutation - numRandom
# Crossover
for i in range(0, numCrossover):
members = self.getMembersForReproduction(2, 0.3)
m1 = members[0]
m2 = members[1]
newMember = trop.crossover(m1, m2)
newMembersWithError.append([newMember, 0])
# Mutation
for i in range(0, numMutation):
m1 = self.getMembersForReproduction(1, 0.3)[0]
newMembersWithError.append([trop.mutation(m1, minValue, maxValue, variables, operators), 0])
# Random
for i in range(0, numRandom):
newMembersWithError.append([gtr.getTree(minHeight, maxHeight, minValue, maxValue, variables, operators), 0])
# Copy
members = self.getMembersForReproduction(numCopy, 0.3)
for m in members:
newMembersWithError.append([m.clone(), 0])
self.membersWithErrors = newMembersWithError
# No side effects
def pruneTreeForMaxHeight(tree, maxHeight, minValue, maxValue, variables):
""" Returns a new tree that is like the specified tree
but pruned so that its height is maxHeight """
def pruneTreeAux(tree, maxHeight, counter, minValue, maxValue, variables):
if tree.height() == 1:
return tree.clone()
if counter == maxHeight:
return gtr.getLeaf(minValue, maxValue, variables)
pruned1 = pruneTreeAux(tree.op1, maxHeight, counter + 1, minValue, maxValue, variables)
pruned2 = pruneTreeAux(tree.op2, maxHeight, counter + 1, minValue, maxValue, variables)
return tr.BinaryOperatorInternalNode(tree.operator, pruned1, pruned2)
return pruneTreeAux(tree, maxHeight, 1, minValue, maxValue, variables)
# Crossover
for i in range(0, numCrossover):
members = self.getMembersForReproduction(2, 0.3)
m1 = members[0]
m2 = members[1]
newMember = trop.crossover(m1, m2)
if shouldPruneForMaxHeight and newMember.height() > maxHeight:
newMember = trop.pruneTreeForMaxHeight(newMember, maxHeight, minValue, maxValue, variables)
newMembersWithError.append([newMember, 0])
The numCrossover is already defined. What I am missing here?

is there any data structure in python which is a substitute for bitset in c++? [duplicate]

Is there a Python class or module that implements a structure that is similar to the BitSet?
There's nothing in the standard library. Try:
http://pypi.python.org/pypi/bitarray
Have a look at this implementation in Python 3.
The implementation basically makes use of the built-in int type, which is arbitrary precision integer type in Python 3 (where long is the Python 2 equivalent).
#! /usr/bin/env python3
"""
bitset.py
Written by Geremy Condra
Licensed under GPLv3
Released 3 May 2009
This module provides a simple bitset implementation
for Python.
"""
from collections import Sequence
import math
class Bitset(Sequence):
"""A very simple bitset implementation for Python.
Note that, like with normal numbers, the leftmost
index is the MSB, and like normal sequences, that
is 0.
Usage:
>>> b = Bitset(5)
>>> b
Bitset(101)
>>> b[:]
[True, False, True]
>>> b[0] = False
>>> b
Bitset(001)
>>> b << 1
Bitset(010)
>>> b >> 1
Bitset(000)
>>> b & 1
Bitset(001)
>>> b | 2
Bitset(011)
>>> b ^ 6
Bitset(111)
>>> ~b
Bitset(110)
"""
value = 0
length = 0
#classmethod
def from_sequence(cls, seq):
"""Iterates over the sequence to produce a new Bitset.
As in integers, the 0 position represents the LSB.
"""
n = 0
for index, value in enumerate(reversed(seq)):
n += 2**index * bool(int(value))
b = Bitset(n)
return b
def __init__(self, value=0, length=0):
"""Creates a Bitset with the given integer value."""
self.value = value
try: self.length = length or math.floor(math.log(value, 2)) + 1
except Exception: self.length = 0
def __and__(self, other):
b = Bitset(self.value & int(other))
b.length = max((self.length, b.length))
return b
def __or__(self, other):
b = Bitset(self.value | int(other))
b.length = max((self.length, b.length))
return b
def __invert__(self):
b = Bitset(~self.value)
b.length = max((self.length, b.length))
return b
def __xor__(self, value):
b = Bitset(self.value ^ int(value))
b.length = max((self.length, b.length))
return b
def __lshift__(self, value):
b = Bitset(self.value << int(value))
b.length = max((self.length, b.length))
return b
def __rshift__(self, value):
b = Bitset(self.value >> int(value))
b.length = max((self.length, b.length))
return b
def __eq__(self, other):
try:
return self.value == other.value
except Exception:
return self.value == other
def __int__(self):
return self.value
def __str__(self):
s = ""
for i in self[:]:
s += "1" if i else "0"
return s
def __repr__(self):
return "Bitset(%s)" % str(self)
def __getitem__(self, s):
"""Gets the specified position.
Like normal integers, 0 represents the MSB.
"""
try:
start, stop, step = s.indices(len(self))
results = []
for position in range(start, stop, step):
pos = len(self) - position - 1
results.append(bool(self.value & (1 << pos)))
return results
except:
pos = len(self) - s - 1
return bool(self.value & (1 << pos))
def __setitem__(self, s, value):
"""Sets the specified position/s to value.
Like normal integers, 0 represents the MSB.
"""
try:
start, stop, step = s.indices(len(self))
for position in range(start, stop, step):
pos = len(self) - position - 1
if value: self.value |= (1 << pos)
else: self.value &= ~(1 << pos)
maximum_position = max((start + 1, stop, len(self)))
self.length = maximum_position
except:
pos = len(self) - s - 1
if value: self.value |= (1 << pos)
else: self.value &= ~(1 << pos)
if len(self) < pos: self.length = pos
return self
def __iter__(self):
"""Iterates over the values in the bitset."""
for i in self[:]:
yield i
def __len__(self):
"""Returns the length of the bitset."""
return self.length
I wouldn't recommend that in production code but for competitive programming, interview preparation and fun, one should make themselves familiar with bit fiddling.
b = 0 # The empty bitset :)
b |= 1 << i # Set
b & 1 << i # Test
b &= ~(1 << i) # Reset
b ^= 1 << i # Flip i
b = ~b # Flip all
You might like to take a look at a module I wrote called bitstring (full documentation here), although for simple cases that need to be as fast as possible I'd still recommend bitarray.
Some similar questions:
What is the best way to do Bit Field manipulation in Python?
Does Python have a bitfield type?
Python Bitstream implementations
If the number of bits is finite, enum.IntFlag can be used as a bit set.
See https://docs.python.org/3/howto/enum.html#intflag

Encrypting with AES using sha256 password

I'm trying to implement Shamir Secret Sharing Sceme in python,i need to ask the user for a password and then obtain a hash code with sha256, but when I try to use that as a parameter for AES it says that the length of the key must be 16, 24 or 32 bits, doesn´t sha256 return a 32 byte string?
Here's my code:
import getpass
import hashlib
import random
import operator
from functools import reduce
import sys
import os
from Crypto.Cipher import AES
p= 208351617316091241234326746312124448251235562226470491514186331217050270460481
points= []
## Class Prime
#
# Class that represents integers modulo p, it is built from a non negative integer
# , comes with its own implementations of basic operations like sum, multiplication
# and division, the division is calculated ith the extended euclidean algorithm.
class Prime(object):
## The constructor
def __init__(self, n):
self.n = n % p
def __add__(self, other): return Prime(self.n + other.n)
def __sub__(self, other): return Prime(self.n - other.n)
def __mul__(self, other): return Prime(self.n * other.n)
def __pow__(self, x): return Prime(self.n**x)
def __truediv__(self, other): return self * other.inverse()
def __div__(self, other): return self * other.inverse()
def __neg__(self): return Prime(-self.n)
def __eq__(self, other): return isinstance(other, Prime) and self.n == other.n
def __abs__(self): return abs(self.n)
def __str__(self): return str(self.n)
def __divmod__(self, divisor):
q,r = divmod(self.n, divisor.n)
return (Prime(q), Prime(r))
def EuclideanAlgorithm(a, b):
if abs(b) > abs(a):
(x,y,d) = EuclideanAlgorithm(b, a)
return (y,x,d)
if abs(b) == 0:
return (1, 0, a)
x1, x2, y1, y2 = 0, 1, 1, 0
while abs(b) > 0:
q, r = divmod(a,b)
x = x2 - q*x1
y = y2 - q*y1
a, b, x2, x1, y2, y1 = b, r, x1, x, y1, y
return (x2, y2, a)
def inverse(self):
(x,y,d)= EuclideanAlgorithm(self.n, p)
return Prime(x)
## Class Polynomial
#
# Class that represents a polynomial, it is built from a list of coefficients
# comes with a call method to evaluate the polynomial in a value "x", and has
# a reduced lagrange method that gets the constant value of a polynomial from
# a set of points
class Polynomial(object):
## The constructor
def __init__(self, coefficients):
self.coeff= coefficients
def __call__(self, x):
n = 0
tmp = Prime(0)
for i in self.coeff:
tmp = tmp + (i*(x**n))
n += 1
return tmp
def lagrange(points):
product= functools.reduce(operator.mul, points, 1)
sum= 0
for x in points:
p= 1
for y in points:
if x!= y:
p= p*(x-y)
sum+= poly(x)/(-x*p)
return sum
## Ask the user for a password and gets its hash code with sha256
def password():
p= getpass.getpass()
h= hashlib.sha256(p)
s= h.hexdigest()
constant= int(s, 16)
return constant
## Makes a polynomial with random coefficients and a fixed constant value, and
# evaluates it with n random values an writes the results to a file.
# #param constant The constant value of the polynomial.
# #param evaluations The number of evaluations to be made.
# #param degree The degree of the polynomial.
# #param out_fileName The name of the file where the evaluations are going
# to be written.
# \pre The constant, evaluations and degree must be positive integers.
# \post If no error occurs then, the file whose name was passed will contain
# n evaluations of the polynomial.
def makeKeys(constant, evaluations, degree, out_fileName):
coeffs= []
coeffs.append(Prime(constant))
for x in range(1, degree):
randomc= random.randint(1, p - 1)
coeffs.append(Prime(randomc))
poly= Polynomial(coeffs)
file= open(out_fileName, "w")
for x in range(1, evaluations + 1):
randomi= random.randint(1, p - 1)
points.append(randomi)
e= poly(Prime(randomi))
file.write("(%d, %d)\n" % (randomi, e.n))
file.close()
def encrypt(key, in_fileName, out_fileName):
iv = ''.join(chr(random.randint(0, 0xFF)) for i in range(16))
encryptor = AES.new(key, AES.MODE_ECB, iv)
filesize = os.path.getsize(in_filename)
with open(in_filename, 'rb') as infile:
with open(out_filename, 'wb') as outfile:
outfile.write(struct.pack('<Q', filesize))
outfile.write(iv)
while True:
chunk = infile.read(chunksize)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk += ' ' * (16 - len(chunk) % 16)
outfile.write(encryptor.encrypt(chunk))
def decrypt(ciphertext, key):
iv = ciphertext[:AES.block_size]
cipher = AES.new(key, AES.MODE_ECB, iv)
plaintext = cipher.decrypt(ciphertext[AES.block_size:])
return plaintext.rstrip(b"\0")
def decrypt_file(file_name, key):
with open(file_name, 'rb') as fo:
ciphertext = fo.read()
dec = decrypt(ciphertext, key)
with open(file_name[:-4], 'wb') as fo:
fo.write(dec)
I cannot see the link between pasword and encrypt/decrypt functions but I supose password returns the key. In this case the key is an int that has 32 or 64 bits. Minimum for AES is 128 bits.
Try this:
def password():
p= getpass.getpass()
h= hashlib.sha256(p)
return h.digest()

Optimize finding diameter of binary tree in Python

I'm wondering how I can optimally find the diameter (or longest path between any two leaf nodes) of a binary tree. I have the basic solution below, but the second solution requires passing pointers. How can I do something like this in Python?
def find_tree_diameter(node):
if node == None:
return 0
lheight = height(node.left)
rheight = height(node.right)
ldiameter = find_tree_diameter(node.left)
rdiameter = find_tree_diameter(node.right)
return max(lheight+rheight+1, ldiameter, rdiameter)
def find_tree_diameter_optimized(node, height):
lheight, rheight, ldiameter, rdiameter = 0, 0, 0, 0
if node == None:
# *height = 0;
return 0
ldiameter = diameterOpt(root.left, &lheight)
rdiameter = diameterOpt(root.right, &rheight)
# *height = max(lheight, rheight) + 1;
return max(lh + rh + 1, max(ldiameter, rdiameter));
Python supports multiple return values, so you don't need pointer arguments like in C or C++. Here's a translation of the code:
def diameter_height(node):
if node is None:
return 0, 0
ld, lh = diameter_height(node.left)
rd, rh = diameter_height(node.right)
return max(lh + rh + 1, ld, rd), 1 + max(lh, rh)
def find_tree_diameter(node):
d, _ = diameter_height(node)
return d
The function diameter_height returns the diameter and the height of the tree, and find_tree_diameter uses it to just compute the diameter (by discarding the height).
The function is O(n), no matter the shape of the tree. The original function is O(n^2) in the worst case when the tree is very unbalanced because of the repeated height calculations.
Simple Python 3 solution
def findDepth(root):
if root is None:
return 0
return 1 + max(findDepth(root.left), findDepth(root.right))
class Solution:
def diameterOfBinaryTree(self, root: TreeNode) -> int:
if root is None:
return 0
left = findDepth(root.left)
right = findDepth(root.right)
ldia = self.diameterOfBinaryTree(root.left)
rdia = self.diameterOfBinaryTree(root.right)
return max(left+right, max(ldia, rdia))

Categories

Resources