I am trying to write a class that will look for certain column types in a sqlalchemy reflected table and then do some operations for a subset of columns based on the data type.
I can correctly reflect the table and grab a list of the 'date' type columns as shown in the date_types list. However, when it gets to table[name] the function fails with the error:
*** TypeError: 'DeclarativeMeta' object is not subscriptable
If I use dot subscripts instead of square brackets i.e. table.col_name I can access the table column attribute but I don't see how I would iterate over the attribute list using that syntax.
Here is my class:
from pdb import set_trace
class dateRangeProfiler():
def __init__(self, session):
self.date_ranges = {}
self.date_types = [Date(), DateTime(), TIMESTAMP()]
self.session = session
print('date data types: ', str(self.date_types))
def __call__(self, table):
date_columns = self.getDateColumns(table)
print(date_columns)
date_column_profile = self.profileColumns(table, date_columns)
return date_column_profile
def getDateColumns(self, table):
columns = [(c.name, c.type) for c in table.__table__.columns if str(c.type) in [str(dt) for dt in self.date_types]]
return columns
def profileColumns(self, table, date_cols):
profile = {}
for (name, _) in date_cols:
set_trace()
print(name)
qry = self.session.query(func.max(table[name]).label("max_date"),
func.min(testTable[name]).label("min_date"),) # <-- fails here
res = qry.one()
max = res.max_date
min = res.min_date
profile.append({name: {'max':max, 'min':min}})
Here is how I call the profiler:
date_range_profiler = dateRangeProfiler(sess)
date_range_profiler(my_table)
And the error:
*** TypeError: 'DeclarativeMeta' object is not subscriptable
The issue doesn't have anything to do with the sqlalchemy module. When accesing attributes of object using variable references, use the getattr() base python function.
qry = self.session.query(func.max(getattr(table,name)).label("max_date"),
func.min(getattr(table,name)).label("min_date"),)
Related
I have this code segment in Python2:
def super_cool_method():
con = psycopg2.connect(**connection_stuff)
cur = con.cursor(cursor_factory=DictCursor)
cur.execute("Super duper SQL query")
rows = cur.fetchall()
for row in rows:
# do some data manipulation on row
return rows
that I'd like to write some unittests for. I'm wondering how to use mock.patch in order to patch out the cursor and connection variables so that they return a fake set of data? I've tried the following segment of code for my unittests but to no avail:
#mock.patch("psycopg2.connect")
#mock.patch("psycopg2.extensions.cursor.fetchall")
def test_super_awesome_stuff(self, a, b):
testing = super_cool_method()
But I seem to get the following error:
TypeError: can't set attributes of built-in/extension type 'psycopg2.extensions.cursor'
You have a series of chained calls, each returning a new object. If you mock just the psycopg2.connect() call, you can follow that chain of calls (each producing mock objects) via .return_value attributes, which reference the returned mock for such calls:
#mock.patch("psycopg2.connect")
def test_super_awesome_stuff(self, mock_connect):
expected = [['fake', 'row', 1], ['fake', 'row', 2]]
mock_con = mock_connect.return_value # result of psycopg2.connect(**connection_stuff)
mock_cur = mock_con.cursor.return_value # result of con.cursor(cursor_factory=DictCursor)
mock_cur.fetchall.return_value = expected # return this when calling cur.fetchall()
result = super_cool_method()
self.assertEqual(result, expected)
Because you hold onto references for the mock connect function, as well as the mock connection and cursor objects you can then also assert if they were called correctly:
mock_connect.assert_called_with(**connection_stuff)
mock_con.cursor.asset_called_with(cursor_factory=DictCursor)
mock_cur.execute.assert_called_with("Super duper SQL query")
If you don't need to test these, you could just chain up the return_value references to go straight to the result of cursor() call on the connection object:
#mock.patch("psycopg2.connect")
def test_super_awesome_stuff(self, mock_connect):
expected = [['fake', 'row', 1], ['fake', 'row' 2]]
mock_connect.return_value.cursor.return_value.fetchall.return_value = expected
result = super_cool_method()
self.assertEqual(result, expected)
Note that if you are using the connection as a context manager to automatically commit the transaction and you use as to bind the object returned by __enter__() to a new name (so with psycopg2.connect(...) as conn: # ...) then you'll need to inject an additional __enter__.return_value in the call chain:
mock_con_cm = mock_connect.return_value # result of psycopg2.connect(**connection_stuff)
mock_con = mock_con_cm.__enter__.return_value # object assigned to con in with ... as con
mock_cur = mock_con.cursor.return_value # result of con.cursor(cursor_factory=DictCursor)
mock_cur.fetchall.return_value = expected # return this when calling cur.fetchall()
The same applies to the result of with conn.cursor() as cursor:, the conn.cursor.return_value.__enter__.return_value object is assigned to the as target.
Since the cursor is the return value of con.cursor, you only need to mock the connection, then configure it properly. For example,
query_result = [("field1a", "field2a"), ("field1b", "field2b")]
with mock.patch('psycopg2.connect') as mock_connect:
mock_connect.cursor.return_value.fetchall.return_value = query_result
super_cool_method()
The following answer is the variation of above answers.
I was using django.db.connections cursor object.
So following code worked for me
#patch('django.db.connections')
def test_supercool_method(self, mock_connections):
query_result = [("field1a", "field2a"), ("field1b", "field2b")]
mock_connections.__getitem__.return_value.cursor.return_value.__enter__.return_value.fetchall.return_value = query_result
result = supercool_method()
self.assertIsInstance(result, list)
#patch("psycopg2.connect")
async def test_update_task_after_launch(fake_connection):
"""
"""
fake_update_count =4
fake_connection.return_value = Mock(cursor=lambda : Mock(execute=lambda x,y :"",
fetch_all=lambda:['some','fake','rows'],rowcount=fake_update_count,close=lambda:""))
I used the exec() function and the for loop to create various class objects and store them in variables.
I think the variables were created successfully as shown in the vs code log, but when I try to access it to change the value or print it, I get this error (name 'membro_1' is not defined) as if it's not defined
What did i do wrong here? how can i access those variables? Is there another better way to create all these objects?
see the variables defined at left
the code from image
class Membros:
def __init__(self, tag, pontos=0) -> None:
self.tag = tag
self.pontos = pontos
# war_cla_member have a len() of 47
war_cla_members = ['#VYQPR', '#82PP2LL20', '#LP0RVCPLV', '#LUVQQ2G2', '#PRP20LUL', '#8GUY0V92R',
'#Y02UVP0UV', '#9U0J8GVJL', '#9P2GGVR9Y', '#20QRJLVU8', '#QRYPRQGP9', '#8GRVUG8', '#PCJYUP2L8', '#22VJPRQRL', '#RJQ8JQ8QR', '#2CVR9C2U9', '#PG2UGPJP', '#L0QG9CG2U', '#9R0PR9Q0U', '#2G8VGQ208', '#8GJ8PGY0C', '#9QLLPJQ90', '#C9PGG8JC', '#8YG8RJV90', '#9YLLQLJGU', '#2GQQ2PU92', '#2PYU080Q', '#22QCRQCPG', '#C9JRU9U2', '#9JQLPGLJJ', '#8RR8QVR09', '#9QY2CLVJR', '#U0V0G2YY', '#28PR280CJ', '#P2RC2G9CL', '#9QVVY2P8', '#CVUGYPCP', '#9PVYQP080', '#29P2V8GLJ', '#YUJ88YRU', '#2RU0UGCUU', '#Y08LY8GJY', '#9R00QQU20', '#P08UJ920', '#2C00L02RU', '#YYQP9JGVC', '#YLULUC8L']
for idx, val in enumerate(war_cla_members):
exec(f'membro_{idx} = Membros("{val}")')
for idx, _ in enumerate(war_cla_members):
if current_river_race['clan']['participants'][idx]['tag'] in war_cla_members: #current_river_race is the return of the clashroyale api request. (https://developer.clashroyale.com/#/documentation)
membro_1.pontos = current_river_race['clan']['participants'][idx]['decksUsedToday']
i'm trying to turn password into a hashed one, while keeping the original as well.
it seems that the two versions i try to return are not the same(even with deep copy, which i removed.)
def publish_rando(pub_id):
rand_user = rando() #just a random string generator function
rand_pass = rando() #just a random string generator function
hashed_pass = _hash.bcrypt.hash(rand_pass) #hashed version of the random pass
pub_pass = _models.pubUser(pub_id = pub_id, rand_user = rand_user, rand_pass = rand_pass)
pub_hash = _models.pubUser(pub_id = pub_id, rand_user = rand_user, rand_pass = hashed_pass)
return {'pub_hash': pub_hash, 'pub_pass': pub_pass}
what i get:
{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState object at 0x7f20e21486a0>, 'pub_id': '57', 'rand_user': '5uJm#<B>WYu?', 'rand_pass': '$2b$12$tHRolNUYQZ6XJm27UZaXTO7SKmJKAbHaqH97DbKQxvMrw79pAK9t.'}
{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState object at 0x7f20e2148490>, 'pub_id': '57', 'rand_user': 'J*gne5Nm#B<2', 'rand_pass': 'sjM%OvAtHg>8'}
notice that the user is different, which tells me that the password is not the same either.
_models.pubUser is a sqlalchemy model:
class pubUser(Base):
__table__ = Table('pub_user', metadata,
autoload=True, autoload_with=engine)
the call:
pub_rand_hashed = _crud.publish_rando(d['pub_id']).get('pub_hash') #this creates another sqlalchemy row object
pub_rand_unhashed = _crud.publish_rando(d['pub_id']).get('pub_pass') #this creates another sqlalchemy row object
print(pub_rand_hashed.__dict__, pub_rand_unhashed.__dict__)
ok this was a total rookie mistake, but maybe it'll help someone.
#Barmar was keen to point out that if you don't call the function twice, a different answer is not possible.
well, i changed this:
pub_rand_hashed = _crud.publish_rando(d['pub_id']).get('pub_hash') #this creates another sqlalchemy row object
pub_rand_unhashed = _crud.publish_rando(d['pub_id']).get('pub_pass') #this creates another sqlalchemy row object
to this:
pub_rand = _crud.publish_rando(d['pub_id']) #this creates another sqlalchemy row object
pub_rand_unhashed = pub_rand.get('pub_pass')
pub_rand_hashed = pub_rand.get('pub_hash')
I am using a Python package which read some type of data. From the data, it creates attributes to easily access meta-information related to the data.
How can create a short name to an attribute?
Basically let's assume the package name is read_data and it has an attribute named data_header_infomation_x_location
import read_data
my_data = read_data(file_path)
How can I instead create a short name to this attribute?
x = "data_header_infomation_x_location"
my_data[1].x gives an error no attribute
Here is a full example from my case
from obspy.io.segy.core import _read_segy
file_path = "some_file_in_my_pc)
sgy = _read_segy(file_path, unpack_trace_headers=True)
sgy[1].stats.segy.trace_header.x_coordinate_of_ensemble_position_of_this_trace
The last line gives a number. e.g., x location
what I want is to rename all this long nested attribute stats.segy.trace_header.x_coordinate_of_ensemble_position_of_this_trace with a short name.
trying for example
attribute = "stats.segy.trace_header.x_coordinate_of_ensemble_position_of_this_trace"
getattr(sgy[1], attribute )
does not work
how about:
from obspy.io.segy.core import _read_segy
attribute_tree_x = ['stats', 'segy', 'trace_header', 'x_coordinate_of_ensemble_position_of_this_trace']
def get_nested_attribute(obj, attribute_tree):
for attr in attribute_tree:
obj = getattr(obj, attr)
return obj
file_path = "some_file_in_my_pc"
sgy = _read_segy(file_path, unpack_trace_headers=True)
sgy[1].stats.segy.trace_header.x_coordinate_of_ensemble_position_of_this_trace
x = get_nested_attribute(sgy[1], attribute_tree_x) # should be the same as the line above
You cannot request the attribute of the attribute in one go, but this loops through the layers to obtain the final value you are looking for.
I am attempting to query all rows for a column called show_id. I would then like to compare each potential item to be added to the DB with the results. Now the simplest way I can think of doing that is by checking if each show is in the results. If so pass etc. However the results from the below snippet are returned as objects. So this check fails.
Is there a better way to create the query to achieve this?
shows_inDB = Show.query.filter(Show.show_id).all()
print(shows_inDB)
Results:
<app.models.user.Show object at 0x10c2c5fd0>,
<app.models.user.Show object at 0x10c2da080>,
<app.models.user.Show object at 0x10c2da0f0>
Code for the entire function:
def save_changes_show(show_details):
"""
Save the changes to the database
"""
try:
shows_inDB = Show.query.filter(Show.show_id).all()
print(shows_inDB)
for show in show_details:
#Check the show isnt already in the DB
if show['id'] in shows_inDB:
print(str(show['id']) + ' Already Present')
else:
#Add show to DB
tv_show = Show(
show_id = show['id'],
seriesName = str(show['seriesName']).encode(),
aliases = str(show['aliases']).encode(),
banner = str(show['banner']).encode(),
seriesId = str(show['seriesId']).encode(),
status = str(show['status']).encode(),
firstAired = str(show['firstAired']).encode(),
network = str(show['network']).encode(),
networkId = str(show['networkId']).encode(),
runtime = str(show['runtime']).encode(),
genre = str(show['genre']).encode(),
overview = str(show['overview']).encode(),
lastUpdated = str(show['lastUpdated']).encode(),
airsDayOfWeek = str(show['airsDayOfWeek']).encode(),
airsTime = str(show['airsTime']).encode(),
rating = str(show['rating']).encode(),
imdbId = str(show['imdbId']).encode(),
zap2itId = str(show['zap2itId']).encode(),
added = str(show['added']).encode(),
addedBy = str(show['addedBy']).encode(),
siteRating = str(show['siteRating']).encode(),
siteRatingCount = str(show['siteRatingCount']).encode(),
slug = str(show['slug']).encode()
)
db.session.add(tv_show)
db.session.commit()
except Exception:
print(traceback.print_exc())
I have decided to use the method above and extract the data I wanted into a list, comparing each show to the list.
show_compare = []
shows_inDB = Show.query.filter().all()
for item in shows_inDB:
show_compare.append(item.show_id)
for show in show_details:
#Check the show isnt already in the DB
if show['id'] in show_compare:
print(str(show['id']) + ' Already Present')
else:
#Add show to DB
For querying a specific column value, have a look at this question: Flask SQLAlchemy query, specify column names. This is the example code given in the top answer there:
result = SomeModel.query.with_entities(SomeModel.col1, SomeModel.col2)
The crux of your problem is that you want to create a new Show instance if that show doesn't already exist in the database.
Querying the database for all shows and looping through the result for each potential new show might become very inefficient if you end up with a lot of shows in the database, and finding an object by identity is what an RDBMS does best!
This function will check to see if an object exists, and create it if not. Inspired by this answer:
def add_if_not_exists(model, **kwargs):
if not model.query.filter_by(**kwargs).first():
instance = model(**kwargs)
db.session.add(instance)
So your example would look like:
def add_if_not_exists(model, **kwargs):
if not model.query.filter_by(**kwargs).first():
instance = model(**kwargs)
db.session.add(instance)
for show in show_details:
add_if_not_exists(Show, id=show['id'])
If you really want to query all shows upfront, instead of putting all of the id's into a list, you could use a set instead of a list which will speed up your inclusion test.
E.g:
show_compare = {item.show_id for item in Show.query.all()}
for show in show_details:
# ... same as your code