Python: Is this an ok way of overriding __eq__ and __hash__?

Python: Is this an ok way of overriding __eq__ and __hash__? - python

I'm new to Python, and I wanted to make sure that I overrode __eq__ and __hash__ correctly, so as not to cause painful errors later:
(I'm using Google App Engine.)
class Course(db.Model):
dept_code = db.StringProperty()
number = db.IntegerProperty()
title = db.StringProperty()
raw_pre_reqs = db.StringProperty(multiline=True)
original_description = db.StringProperty()
def getPreReqs(self):
return pickle.loads(str(self.raw_pre_reqs))
def __repr__(self):
title_msg = self.title if self.title else "Untitled"
return "%s %s: %s" % (self.dept_code, self.number, title_msg)
def __attrs(self):
return (self.dept_code, self.number, self.title, self.raw_pre_reqs, self.original_description)
def __eq__(self, other):
return isinstance(other, Course) and self.__attrs() == other.__attrs()
def __hash__(self):
return hash(self.__attrs())
A slightly more complicated type:
class DependencyArcTail(db.Model):
''' A list of courses that is a pre-req for something else '''
courses = db.ListProperty(db.Key)
''' a list of heads that reference this one '''
forwardLinks = db.ListProperty(db.Key)
def __repr__(self):
return "DepArcTail %d: courses='%s' forwardLinks='%s'" % (id(self), getReprOfKeys(self.courses), getIdOfKeys(self.forwardLinks))
def __eq__(self, other):
if not isinstance(other, DependencyArcTail):
return False
for this_course in self.courses:
if not (this_course in other.courses):
return False
for other_course in other.courses:
if not (other_course in self.courses):
return False
return True
def __hash__(self):
return hash((tuple(self.courses), tuple(self.forwardLinks)))
Everything look good?
Updated to reflect #Alex's comments
class DependencyArcTail(db.Model):
''' A list of courses that is a pre-req for something else '''
courses = db.ListProperty(db.Key)
''' a list of heads that reference this one '''
forwardLinks = db.ListProperty(db.Key)
def __repr__(self):
return "DepArcTail %d: courses='%s' forwardLinks='%s'" % (id(self), getReprOfKeys(self.courses), getIdOfKeys(self.forwardLinks))
def __eq__(self, other):
return isinstance(other, DependencyArcTail) and set(self.courses) == set(other.courses) and set(self.forwardLinks) == set(other.forwardLinks)
def __hash__(self):
return hash((tuple(self.courses), tuple(self.forwardLinks)))

The first one is fine. The second one is problematic for two reasons:
there might be duplicates in .courses
two entities with identical .courses but different .forwardLinks would compare equal but have different hashes
I would fix the second one by making equality depend on both courses and forward links, but both changes to sets (hence no duplicates), and the same for hashing. I.e.:
def __eq__(self, other):
if not isinstance(other, DependencyArcTail):
return False
return (set(self.courses) == set(other.courses) and
set(self.forwardLinks) == set(other.forwardLinks))
def __hash__(self):
return hash((frozenset(self.courses), frozenset(self.forwardLinks)))
This of course is assuming that the forward links are crucial to an object's "real value", otherwise they should be omitted from both __eq__ and __hash__.
Edit: removed from __hash__ calls to tuple which were at best redundant (and possibly damaging, as suggested by a comment by #Mark [[tx!!!]]); changed set to frozenset in the hashing, as suggested by a comment by #Phillips [[tx!!!]].

Related

How can I make a dataclass hash the same as a string?

I want to replace string keys in dictionaries in my code with a dataclass so that I can provide meta data to the keys for debugging. However, I still want to be able to use a string to lookup dictionaries. I tried implementing a data-class with a replaced __hash__ function, however my code is not working as expected:
from dataclasses import dataclass
#dataclass(eq=True, frozen=True)
class Key:
name: str
def __hash__(self):
return hash(self.name)
k = "foo"
foo = Key(name=k)
d = {}
d[foo] = 1
print(d[k]) # Key Error
The two hash functions are the same:
print(hash(k) == hash(foo)) # True
So I don't understand why this doesn't work.

Two objects having different hashes guarantees that they're different, but two objects having the same hash doesn't in itself guarantee that they're the same (because hash collisions exist). If you want the Key to be considered equal to a corresponding str, implement that in __eq__:
def __eq__(self, other):
if isinstance(other, Key):
return self.name == other.name
if isinstance(other, str):
return self.name == other
return False
This fixes the KeyError you're encountering.

Adding my notes here from the comments on the answer above, as no one looks at those in any case, so those are likely to get swept under the rug at some point.
PyCharm also produces a helpful warning:
'eq' is ignored if the class already defines '__eq__' method.
I think this means to remove the eq=True usage as well, from the #dataclass(...) decorator.
technically, you could also remove the last if isinstance(..., str): as well as the last return statement. I'm not entirely sure what would be the implications of that, however.
Here then, is a slightly more optimized approach (timings with timeit module below):
class Key:
name: str
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
return self.name == getattr(other, 'name', other)
Timings with timeit
from dataclasses import dataclass
from timeit import timeit
#dataclass(frozen=True)
class Key:
name: str
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
if isinstance(other, Key):
return self.name == other.name
if isinstance(other, str):
return self.name == other
return False
class KeyTwo(Key):
def __eq__(self, other):
return self.name == getattr(other, 'name', other)
k = "foo"
foo = Key(name=k)
foo_two = KeyTwo(name=k)
print('__eq__() Timings --')
print('isinstance(): ', timeit("foo == k", globals=globals()))
print('getattr(): ', timeit("foo_two == k", globals=globals()))
assert foo == foo_two == k
Results on my M1 Mac:
__eq__() Timings --
isinstance(): 0.10553250007797033
getattr(): 0.08371329202782363

Why isn't the hash function deterministic?

I'm developing a program using Python 3.6
I have a problem: if I use the deterministic hash function (from standard library of the language) on the same object, the string that results in output (after a run), is different for some runs!
For example:
class Generic:
def __init__(self, id, name, property):
self.id = id
self.name = name
self.property = property
def main():
my_object = Generic(3,'ddkdjsdk','casualstring')
print(hash(my_object))
I would like the output to always be the same (deterministic), but unfortunately different strings appear on the console:
8765256330262, -9223363264515786864, -9223363262437648366 and others...
Why this happens? I would like to guarantee the determinism with this function throughout my application! How do I solve the problem?

In this case it's probably easiest to define your own __eq__ function and __hash__ function. This will return the same hash every time for you:
class Generic:
def __init__(self, id, name, property):
self.id=id
self.name = name
self.property = property
def __eq__(self, other):
assert self.__class__ == other.__class__, "Types do not match"
return self.id == other.id and self.name == other.name and self.property == other.property
def __hash__(self):
return hash ( (self.id, self.name, self.property) )
This will also make hashes of equivalent objects equal, as well:
>>>obj = Generic(1, 'blah', 'blah')
>>>obj2 = Generic(1, 'blah', 'blah')
>>>obj == obj2
True
>>>hash(obj) == hash(obj2)
True
hope that helps!

For those looking to get hashes of built-in types, Python's built in hashlib might be easier than subclassing to redefine __hash__. Here's an example with for string.
from hashlib import md5
def string_hash(string):
return md5(string.encode()).hexdigest()
This will return the same hash for different string objects so long as the content is the same. Not all objects will work, but it could you save you time depending on your use case.

How to properly implement/overload "repr "?

New to python and this might be a silly question, but how does one properly implement the repr method?
I wrote a quick little program to simulate a game of cards but I don't know what to write for the repr method. The repr method for the Card class was pretty straight forward, but I don't know what to do for the DeckOfCards class Here's my code:
import random
class Card:
'''Create a single card, by id number'''
# Class variables, created once for the class
suits = [ '\u2660', '\u2661', '\u2662', '\u2663' ]
ranks = [ 'A','2','3','4','5','6','7','8','9','10','J','Q','K' ]
def __init__(self, n=0):
# instance variables for _num, _rank, _suit, _value
if 0 <= n < 52:
self._num = n
self._rank = Card.ranks[n%13] # note referencing class vars
self._suit = Card.suits[n//13]
self._value = n%13 + 1
if self._rank == 'A':
self._value = 14
else: # invalid card indicators
self._rank = 'x'
self._suit = 'x'
self._value = -1
def __repr__(self):
return self._rank + self._suit
def __lt__(self,other):
return self._value < other._value
def __le__(self,other):
return self._value <= other._value
def __eq__(self,other):
return self._value == other._value
class DeckOfCards:
'''A Deck is a collection of cards'''
def __init__(self):
self._deck = [ Card(i) for i in range(52) ]
def __repr__(self):
return 'Deck : ', self._deck
def shuffle(self):
return random.shuffle(self._deck)
def deal_a_card(self, i=-1):
#that way player can choose where to draw from
return self._deck.pop(i)
def cards_left(self,count):
return len(self._deck)
new_deck = DeckOfCards()
Also, feel free to comment on anything you'd like, whether it be a design flaw or redundancy in code, literally anything. Thanks in advance!

You should return a string type, for example in Deck:
def __repr__(self):
...
return 'Deck : '+str(self._deck)

__repr__ ideally could return the representation of the object that you would use to create this instance.
From repr():
For many types, this function makes an attempt to return a string that would yield an object with the same value when passed to eval(), otherwise the representation is a string enclosed in angle brackets that contains the name of the type of the object together with additional information often including the name and address of the object.

First, It should be noted that you don't have to implement the __repr__ method. Python provides a somewhat reasonable default (it'll at least tell you the type).
If you want to implement __repr__, the "rule of thumb" is that where it makes sense, you should provide enough information about the object that a user could reconstruct it. In your case, there doesn't seem to be any real difference from one deck to another, so
def __repr__(self):
return 'Deck()'
might be a reasonable return value. This doesn't get the state right (after shuffling), but you don't provide an interface for constructing a deck in a particular state. If you did, it might look like:
def __repr__(self):
return 'Deck(%s)' % self._deck

How to check if an object exists inside a list of objects?

I am using Python to implement an Earley Parser that has Context Free rules defined as follows:
class Rule:
def __init__(self,string,i,j,dot):
self.i = 0
self.j = 0
self.dot = 0
string = string.split('->')
self.lhs = string[0].strip()
self.rhs1 = string[1].strip()
self.rhs = []
self.rhs1 = self.rhs1.split(' ')
for word in self.rhs1:
if word.strip()!= '':
self.rhs.append(word)
def __eq__(self, other):
if self.i == other.i:
if self.j == other.j:
if self.dot == other.dot:
if self.lhs == other.lhs:
if self.rhs == other.rhs:
return True
return False
To check whether an object of class Rule exists within a chart array or not, I have used the following:
def enqueue(self, entry, state):
if state in self.chart[entry]:
return None
else:
self.chart[entry].append(state)
where chart is an array that is supposed to contain lists of objects of class Rule:
def __init__(self, words):
self.chart = [[] for i in range(len(words))]
Further I check whether a rule exists as that in the chart[entry] as follows (and if it does not exist, then simply append):
def enqueue(self, entry, state):
if state in self.chart[entry]:
return None
else:
self.chart[entry].append(state)
However this gives me an error as
TypeError: 'in <string>' requires string as left operand, not classobj
To circumvent this, I even declared an __eq__ function in the class itself but it doesn't seem to work. Can anyone help me with the same?

Assuming that your object has only a title attribute which is relevant for equality, you have to implement the __eq__ method as follows:
class YourObject:
[...]
def __eq__(self, other):
return self.title == other.title
Of course if you have more attributes that are relevant for equality, you must include those as well. You might also consider implementing __ne__ and __cmp__ for consistent behaviour.

Unexpected behavior for python set.contains

Borrowing the documentation from the __contains__ documentation
print set.__contains__.__doc__
x.__contains__(y) <==> y in x.
This seems to work fine for primitive objects such as int, basestring, etc. But for user-defined objects that define the __ne__ and __eq__ methods, I get unexpected behavior. Here is a sample code:
class CA(object):
def __init__(self,name):
self.name = name
def __eq__(self,other):
if self.name == other.name:
return True
return False
def __ne__(self,other):
return not self.__eq__(other)
obj1 = CA('hello')
obj2 = CA('hello')
theList = [obj1,]
theSet = set(theList)
# Test 1: list
print (obj2 in theList) # return True
# Test 2: set weird
print (obj2 in theSet) # return False unexpected
# Test 3: iterating over the set
found = False
for x in theSet:
if x == obj2:
found = True
print found # return True
# Test 4: Typcasting the set to a list
print (obj2 in list(theSet)) # return True
So is this a bug or a feature?

For sets and dicts, you need to define __hash__. Any two objects that are equal should hash the same in order to get consistent / expected behavior in sets and dicts.
I would reccomend using a _key method, and then just referencing that anywhere you need the part of the item to compare, just as you call __eq__ from __ne__ instead of reimplementing it:
class CA(object):
def __init__(self,name):
self.name = name
def _key(self):
return type(self), self.name
def __hash__(self):
return hash(self._key())
def __eq__(self,other):
if self._key() == other._key():
return True
return False
def __ne__(self,other):
return not self.__eq__(other)

This is because CA doesn't implement __hash__
A sensible implementation would be:
def __hash__(self):
return hash(self.name)

A set hashes it's elements to allow a fast lookup. You have to overwrite the __hash__ method so that a element can be found:
class CA(object):
def __hash__(self):
return hash(self.name)
Lists don't use hashing, but compare each element like your for loop does.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python: Is this an ok way of overriding eq and hash? - python

Related

How can I make a dataclass hash the same as a string?

Why isn't the hash function deterministic?

How to properly implement/overload "repr "?

How to check if an object exists inside a list of objects?

Unexpected behavior for python set.contains

Categories

Resources

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python: Is this an ok way of overriding __eq__ and __hash__? - python

Related

How can I make a dataclass hash the same as a string?

Why isn't the hash function deterministic?

How to properly implement/overload "__repr__ "?

How to check if an object exists inside a list of objects?

Unexpected behavior for python set.__contains__

Categories

Resources

Python: Is this an ok way of overriding eq and hash? - python

How to properly implement/overload "repr "?

Unexpected behavior for python set.contains