Is there a SqlAlchemy database agnostic FROM_UNIXTIME() function? - python

Currently I have a query similar to the below in flask sqlalchemy:
from sqlalchemy.sql import func
models = (
Model.query
.join(ModelTwo)
.filter(Model.finish_time >= func.from_unixtime(ModelTwo.start_date))
.all()
)
This works fine with MySQL which I am running in production, however when I run tests against the method using an in-memory SqlLite database it fails because from_unixtime is not a SqlLite function.
Aside from the running tests on the same database as production as closely as possible issue and the fact that I have two different ways of representing data in the database, is there a database agnostic method in SqlAlchemy for handling the conversion of dates to unix timestamps and vice-versa?

For anyone else interested in this, I found a way to create custom functions in SqlAlchemy based on the SQL dialect being used. As such the below achieves what I need:
from sqlalchemy.sql import expression
from sqlalchemy.ext.compiler import compiles
class convert_timestamp_to_date(expression.FunctionElement):
name = 'convert_timestamp_to_date'
#compiles(convert_timestamp_to_date)
def mysql_convert_timestamp_to_date(element, compiler, **kwargs):
return 'from_unixtime({})'.format(compiler.process(element.clauses))
#compiles(convert_timestamp_to_date, 'sqlite')
def sqlite_convert_timestamp_to_date(element, compiler, **kwargs):
return 'datetime({}, "unixepoch")'.format(compiler.process(element.clauses))
The query above can now be re-written as such:
models = (
Model.query
.join(ModelTwo)
.filter(Model.finish_time >= convert_timestamp_to_date(ModelTwo.start_date))
.all()
)

Related

Join query in PostgreSQL using Python Flask sqlalchemy orm

I have two tables X and Y.
X has two columns id and name,
Y has three columns id, city, country
I need to make a join query and take city and country values corresponding to its name using id. id is a foreign key.
I tried directly in PostgreSQL with this query,
select * from x inner join y on x.name = 'xyz'
It's giving exact result. But while trying the same query using sqlalchemy orm in Python flask app it's not. I'm new to Python and Flask framework. I don't know how to achieve it.
I declared table definition and serialize function in a separate python file inside my models directory.
In my main.py file, I'm importing that model and tried this kind of querying, But it's not working.
from models import x, y
response = x.query.join(y).filter_by(name=name).all()
I'm getting error message like this,
sqlalchemy.exc.InvalidRequestError
InvalidRequestError: Could not find a FROM clause to join from. Tried joining to <class 'models.y'>, but got: Can't find any foreign key relationships between 'x' and 'y'.
If you prefer to write out regular SQL statements, then why not use SQLAlchemy to do just that. It's not a requirement that you use chain methods like join, filter_by, etc to query your database.
In order to answer this question, I need to make some assumptions about what's in your models file. I'll assume it's something like this:
from flask_sqlalchemy import SQLAlchemy
import datetime
db = SQLAlchemy()
class BaseModel(db.Model):
"""Base data model for all objects"""
# more code here
class x(BaseModel, db.Model):
# more table setup code here
class y(BaseModel, db.Model):
# more table setup code here
If that's the case, then here's what you can do to execute plain old parameterized SQL statements::
from flask import Flask
from models import db
import json
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'your_database_connection_string'
db.init_app(app)
result = db.session.execute("select * from x inner join y on x.name = :name", {"name":"xyz"})
# If no rows were returned in the result, return an empty list
if result.returns_rows == False:
response = []
# Convert the response to a plain list of dicts
else:
response = [dict(row.items()) for row in result]
# Output the query result as JSON
print(json.dumps(response))
I find this method of running SQL queries in Flash with SQLAlchemy far easier to follow and understand than trying to use all of the different method chaining that you were attempting in your original post.
Although it is possible to avoid the ORM alltogether like in Elliot's example, the ORM does have its advantages if the database is small. The most convenient way to solve your problem is by adding a relationship. You can then query the results by, for example:
tree = db.session.query(Tree).first()
for leaf in tree.leaves:
print(leaf.id)
If you cannot create the relationship, the proper syntax for a join is as follows:
db.session.query(Leaf).join(Tree,Leaf.tree_id==Tree.id)

How to add a SQL function to sqlalchemy

I'm working with Oracle SQL and I want to use some of Oracle's function that don't exist in other types of relational databases.
Basically I want to add function that return a weekday for a given date.
From what I understand sqlachemy gives me two way to do that, one is provide sql query as text, another exend sqlalchemy implementing a new python function that represents the SQL function. I'm leaning torwards implementing the function because I expect to use this in few queries.
Here is what I implemented so far to get this done, I'm not really sure what is my next step, or if this is even correct.
from sqlalchemy.sql.expression import FunctionElement
from sqlalchemy.ext.compiler import compiles
class weekday(FunctionElement):
name= 'weekday'
#compiles(weekday)
def complie(element, compiler, **kw):
if len(element.clauses) == 1:
return "TO_CHAR(%s,'DY')" % compiler.process(element.clauses)
elif len(element.clauses) == 0:
raise TypeError("Weekday needs a date as parameter")
else:
raise TypeError("Weekday needs just one parameter")
When I tried to add this funtion for one of my objects instead of caculating results I got the function istelf back, here is an example of what I'm taking about:
from sqlalchemy import Column, Date
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.hybrid import hybrid_property
class SomeObject(Base):
__tablename__ = 'table1'
asof = Column(Date,primary_key=True)
#hybrid_property
def weekday(self):
return weekday(self.asof)
In shell I tried:
from datetime import datetime
my_object = SomeObject()
my_object.asof = datetime(2018,1,1)
session.add(my_object)
session.commit()
result = session.query(SomeObject).filter(SomeObject.asof == datetime(2018,1,1)).first()
result.weekday # returns `<orm.weekday as 0x1724b7deeb8; weekday>`
NOTE
I insist on extracting that weekday in SQL query rather than in python because I need this to filter out some records, and in my case that funtion will determine if sqlalchemy pulls out couple million or just couple records.
After trying out few things I realized that hybrid_property is not supposed to return a sql expression, it needs to return the actual value that sql expression would have returned.
Thta being said my sql function would go into the 'expression' part of hybrid_property which would look like this:
#weekday.expression
def weekday(cls):
return weekday(cls.asof)
Old question, but you can also use sqlachemy's SQL and Generic Functions:
https://docs.sqlalchemy.org/en/13/core/functions.html

Unit testing a function that depends on database

I am running tests on some functions. I have a function that uses database queries. So, I have gone through the blogs and docs that say we have to make an in memory or test database to use such functions. Below is my function,
def already_exists(story_data,c):
# TODO(salmanhaseeb): Implement de-dupe functionality by checking if it already
# exists in the DB.
c.execute("""SELECT COUNT(*) from posts where post_id = ?""", (story_data.post_id,))
(number_of_rows,)=c.fetchone()
if number_of_rows > 0:
return True
return False
This function hits the production database. My question is that, when in testing, I create an in memory database and populate my values there, I will be querying that database (test DB). But I want to test my already_exists() function, after calling my already_exists function from test, my production db will be hit. How do I make my test DB hit while testing this function?
There are two routes you can take to address this problem:
Make an integration test instead of a unit test and just use a copy of the real database.
Provide a fake to the method instead of actual connection object.
Which one you should do depends on what you're trying to achieve.
If you want to test that the query itself works, then you should use an integration test. Full stop. The only way to make sure the query as intended is to run it with test data already in a copy of the database. Running it against a different database technology (e.g., running against SQLite when your production database in PostgreSQL) will not ensure that it works in production. Needing a copy of the database means you will need some automated deployment process for it that can be easily invoked against a separate database. You should have such an automated process, anyway, as it helps ensure that your deployments across environments are consistent, allows you to test them prior to release, and "documents" the process of upgrading the database. Standard solutions to this are migration tools written in your programming language like albemic or tools to execute raw SQL like yoyo or Flyway. You would need to invoke the deployment and fill it with test data prior to running the test, then run the test and assert the output you expect to be returned.
If you want to test the code around the query and not the query itself, then you can use a fake for the connection object. The most common solution to this is a mock. Mocks provide stand ins that can be configured to accept the function calls and inputs and return some output in place of the real object. This would allow you to test that the logic of the method works correctly, assuming that the query returns the results you expect. For your method, such a test might look something like this:
from unittest.mock import Mock
...
def test_already_exists_returns_true_for_positive_count():
mockConn = Mock(
execute=Mock(),
fetchone=Mock(return_value=(5,)),
)
story = Story(post_id=10) # Making some assumptions about what your object might look like.
result = already_exists(story, mockConn)
assert result
# Possibly assert calls on the mock. Value of these asserts is debatable.
mockConn.execute.assert_called("""SELECT COUNT(*) from posts where post_id = ?""", (story.post_id,))
mockConn.fetchone.assert_called()
The issue is ensuring that your code consistently uses the same database connection. Then you can set it once to whatever is appropriate for the current environment.
Rather than passing the database connection around from method to method, it might make more sense to make it a singleton.
def already_exists(story_data):
# Here `connection` is a singleton which returns the database connection.
connection.execute("""SELECT COUNT(*) from posts where post_id = ?""", (story_data.post_id,))
(number_of_rows,) = connection.fetchone()
if number_of_rows > 0:
return True
return False
Or make connection a method on each class and turn already_exists into a method. It should probably be a method regardless.
def already_exists(self):
# Here the connection is associated with the object.
self.connection.execute("""SELECT COUNT(*) from posts where post_id = ?""", (self.post_id,))
(number_of_rows,) = self.connection.fetchone()
if number_of_rows > 0:
return True
return False
But really you shouldn't be rolling this code yourself. Instead you should use an ORM such as SQLAlchemy which takes care of basic queries and connection management like this for you. It has a single connection, the "session".
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy_declarative import Address, Base, Person
engine = create_engine('sqlite:///sqlalchemy_example.db')
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
session = DBSession()
Then you use that to make queries. For example, it has an exists method.
session.query(Post.id).filter(q.exists()).scalar()
Using an ORM will greatly simplify your code. Here's a short tutorial for the basics, and a longer and more complete tutorial.

Dialect-specific SQLAlchemy declarative Column defaults

Short Version
In SQLAlchemy's ORM column declaration, how can I use server_default=sa.FetchedValue() on one dialect, and default=somePythonFunction on another, so that my real DBMS can populate things with triggers, and my test code can be written against sqlite?
Background
I'm using SQLAlchemy's declarative ORM to work with a Postgres database, but trying to write unit tests against an sqlite:///:memory:, and running into a problem with columns that have computed defaults on their primary keys. For a minimal example:
CREATE TABLE test_table(
id VARCHAR PRIMARY KEY NOT NULL
DEFAULT (lower(hex(randomblob(16))))
)
SQLite itself is quite happy with this table definition (sqlfiddle) but SQLAlchemy seems unable to work out the ID of newly created rows.
class TestTable(Base):
__tablename__ = 'test_table'
id = sa.Column(
sa.VARCHAR,
primary_key=True,
server_default=sa.FetchedValue())
Definitions like this work just fine in postgres, but die in sqlite (as you can see on Ideone) with a FlushError when I call Session.commit:
sqlalchemy.orm.exc.FlushError: Instance <TestTable at 0x7fc0e0254a10> has a NULL identity key. If this is an auto-generated value, check that the database table allows generation of new primary key values, and that the mapped Column object is configured to expect these generated values. Ensure also that this flush() is not occurring at an inappropriate time, such as within a load() event.
The documentation for FetchedValue warns us that this can happen on dialects that don't support the RETURNING clause on INSERT:
For special situations where triggers are used to generate primary key
values, and the database in use does not support the RETURNING clause,
it may be necessary to forego the usage of the trigger and instead
apply the SQL expression or function as a “pre execute” expression:
t = Table('test', meta,
Column('abc', MyType, default=func.generate_new_value(),
primary_key=True)
)
func.generate_new_value is not defined anywhere else in SQLAlchemy, so it seems they intend I either generate defaults in Python, or else write a separate function to do a SQL query to generate a default value in the DBMS. I can do that, but the problem is, I only want to do that for SQLite, since FetchedValue does exactly what I want on postgres.
Dead Ends
Subclassing Column probably won't work. Nothing that I can find in the sources ever tells the Column what dialect is being used, and the behavior of the default and server_default fields is defined outside the class
Writing a python function that calls the triggers by hand on the real DBMS creates a race condition. Avoiding the race condition by changing the isolation level creates a deadlock.
My Current Workaround
Bad because it breaks integration tests that connect to a real postgres.
import sys
import sqlalchemy as sa
def trigger_column(*a, **kw):
python_default = kw.pop('python_default')
if 'unittest' in sys.modules:
return sa.Column(*a, default=python_default, **kw)
else
return sa.Column(*a, server_default=sa.FetchedValue(), **kw)
Not a direct answer to you question but hopefully helpful to someone
My problem was wanting to change the collation depending on the dialect, this was my solution:
from sqlalchemy import Unicode
from sqlalchemy.ext.compiler import compiles
#compiles(Unicode, 'sqlite')
def compile_unicode(element, compiler, **kw):
element.collation = None
return compiler.visit_unicode(element, **kw)
This changes the collation for all Unicode columns only for sqlite.
Here's some documentation: http://docs.sqlalchemy.org/en/latest/core/custom_types.html#overriding-type-compilation

Can I use Psycopog2's LoggingConnection with SQLAlchemy?

I am using SQLAlchemy 0.9.7 over Postgres with psyopg2 as the driver.
I have a stray transaction that isn't being closed properly, and in order to debug it, I would like to log all of the operations being sent to the database.
The psycopg2.extras.LoggingConnection looks like it provides the functionality I need, but I can't see how I might persuade SQLAlchemy to use this feature of the dialect.
Is this possible via SQLAlchemy?
You could pass custom connection factory to SQLAlchemy engine:
def _connection_factory(*args, **kwargs):
connection = psycopg2.extras.LoggingConnection(*args, **kwargs)
connection.initialize(open('sql.log', 'a'))
return connection
db_engine = create_engine(conn_string,
connect_args={ "connection_factory": _connection_factory })
Alternatively, you could implement a custom cursor class (see psycopg2.extras.LoggingCursor for example), and pass it in a similar way:
connect_args={ "cursor_factory": MyCursor }
It isn't a direct answer to my own question, but a workaround: similar functionality can be obtained by turning on query logging at the SQLAlchemy layer, rather than the Psycopg2 layer:

Categories

Resources