Counting rows by date in sql alchemy - python

I am executing some code in python, but need to access some data from my database. This is my first time I have used sql alchemy
I have a table in my database called reports.bigjoin it has columns with the following types
id (varchar)
id2 (varchar)
ts_min (int4)
ts_local_min (int4)
10_meter_cell (int8)
ds (date)
ds_log (date)
ds_local (date)
I need to know the number of rows for a given set of dates. For example, within python I want to execute
x= select count(*) from reports.bigjoin where (ds>='2016-01-01' and ds<='2016-01-04')
My attempt so far has been
from sqlalchemy import create_engine, MetaData, Table
from sqlalchemy import Integer, String,Date
from sqlalchemy.orm import sessionmaker
engine = sqlalchemy.create_engine(url).connect()
Session = sqlalchemy.orm.sessionmaker(bind=engine)
session = Session()
metadata = sqlalchemy.MetaData(engine)
moz_bookmarks = Table('reports.bigjoin', metadata,
Column('id', String, primary_key=True),
Column('id2', String),
Column('ts_min', Integer),
Column('ts_local', Integer),
Column('10m_cell', Integer),
Column('ds', Date),
Column('ds_log', Date),
Column('ds_local', Date)
)
x = session.query(moz_bookmarks).filter(
(moz_bookmarks.ds >= '2016-01-01', moz_bookmarks.ds <= '2016-01-04')).count()
this has failed. Any help would be greatly appreciated.

cnt = (
session
.query(func.count('*').label('cnt'))
.filter(moz_bookmarks.c.ds >= '2016-01-01')
.filter(moz_bookmarks.c.ds <= '2016-01-04')
)
print(cnt)

After searching a bit and applying
How to use variables in SQL statement in Python?
I found that
connection = create_engine('url').connect()
result = connection.execute("select count(*) from reports.bigjoin where (ds>= %s and ds<= %s)", (x,y))
solved the problem

Related

How can I Insert the value of CURRENT TIMESTAMP using SQLAlchemy's connection.execute

I want to insert multiple rows using connection.execute, and one of the columns must be set to the result of the database's CURRENT_TIMESTAMP function.
For example, given this table:
import sqlalchemy as sa
metadata = sa.MetaData()
foo = sa.Table('foo', metadata,
sa.Column('id', sa.Integer, primary_key=True),
sa.Column('ts', sa.TIMESTAMP))
# I'm using Sqlite for this example, but this question
# is database-agnostic.
engine = sa.create_engine('sqlite://', echo=True)
metadata.create_all(engine)
I can insert a single row like this:
conn = engine.connect()
with conn.begin():
ins = foo.insert().values(ts=sa.func.current_timestamp())
conn.execute(ins)
However when I try to insert multiple rows:
with conn.begin():
ins = foo.insert()
conn.execute(ins, [{'ts': sa.func.current_timestamp()}])
a TypeError is raised:
sqlalchemy.exc.StatementError: (builtins.TypeError) SQLite DateTime type only accepts Python datetime and date objects as input.
[SQL: INSERT INTO foo (ts) VALUES (?)]
[parameters: [{'ts': <sqlalchemy.sql.functions.current_timestamp at 0x7f3607e21070; current_timestamp>}]
Replacing the function with the string "CURRENT_TIMESTAMP" results in a similar error.
Is there a way to get the database to set the column to CURRENT_TIMESTAMP using connection.execute?
I'm aware that I can work around this by querying for the value of CURRENT_TIMESTAMP within the same transaction and using that value in the INSERT values, or executing and UPDATE after the INSERT. I'm specifically asking whether this can be done in connection.execute's *multiparams argument.
It's a hack for sure, but this appears to work for SQLite at least:
from datetime import datetime
from pprint import pprint
import sqlalchemy as sa
engine = sa.create_engine("sqlite:///:memory:")
metadata = sa.MetaData()
foo = sa.Table(
"foo",
metadata,
sa.Column("id", sa.Integer, primary_key=True, autoincrement=True),
sa.Column("ts", sa.TIMESTAMP),
sa.Column("txt", sa.String(50)),
)
foo.create(engine)
with engine.begin() as conn:
ins_query = str(foo.insert().compile()).replace(
" :ts, ", " CURRENT_TIMESTAMP, "
)
print(ins_query)
# INSERT INTO foo (id, ts, txt) VALUES (:id, CURRENT_TIMESTAMP, :txt)
data = [{"id": None, "txt": "Alfa"}, {"id": None, "txt": "Bravo"}]
conn.execute(sa.text(ins_query), data)
print(datetime.now())
# 2021-03-06 17:41:35.743452
# (local time here is UTC-07:00)
results = conn.execute(sa.text("SELECT * FROM foo")).fetchall()
pprint(results, width=60)
"""
[(1, '2021-03-07 00:41:35', 'Alfa'),
(2, '2021-03-07 00:41:35', 'Bravo')]
"""

How do you create a sequence using Sqlalchemy and add that particular sequence to a table?

rollback.py :
def write_to_db(dataset_id):
try:
initialize_datamodels()
EpdUmpPushRollback = datamodels.relational_tables.EpdUmpPushRollback
with session_scope() as session:
epdumppushrollback_obj = EpdUmpPushRollback()
epdumppushrollback_obj.dataset_id = dataset_id
epdumppushrollback_obj.record_id = ''
epdumppushrollback_obj.operator_name = 'vis'
epdumppushrollback_obj.activation_flag = 'active'
epdumppushrollback_obj.record_creation_time = now()
epdumppushrollback_obj.start_time = now()
session.add(epdumppushrollback_obj)
session.flush()
except Exception as e:
#err = "Error in updating the table epd_ump_push_rollback "
#_log.exception(err)
_log.exception("Error in updating the table {}".format(e))
table.py :
"""epd_ump_push_rollback_table
Revision ID: 4e4d99a8e544
Revises: c010f4d4b319
Create Date: 2018-12-19 18:04:30.271380
"""
from alembic import op
from sqlalchemy import Column, String, INTEGER, VARCHAR, NVARCHAR, TIMESTAMP, \
Enum, ForeignKey, Sequence, MetaData
# revision identifiers, used by Alembic.
revision = '4e4d99a8e544'
down_revision = '2396e1b7de5c'
branch_labels = None
depends_on = None
meta = MetaData()
seq_obj = Sequence('record_id_seq', metadata=meta)
def upgrade():
activation_flag_state = Enum('active', 'inactive', name="activation_flag_state")
op.create_table('epd_ump_push_rollback',
Column('dataset_id', String, ForeignKey('epd_ip_dataset.dataset_id'),
primary_key=True),
Column('record_id', INTEGER, seq_obj, server_default=seq_obj.next_value(),
primary_key=True),
Column('operator_name', String, nullable=False),
Column('activation_flag', activation_flag_state, nullable=False),
Column('record_creation_time', TIMESTAMP),
Column('start_time', TIMESTAMP),
Column('end_time', TIMESTAMP))
def downgrade():
op.drop_table('epd_ump_push_rollback')
op.execute('DROP type activation_flag_state')
Explanation:
In the rollback.py file I am writing into db. I am setting a session with db(postgresql) using with session_scope() as session:. I am creating a object of table EpdUmpPushRollback and setting those values appropriately. The column record_id should be generated as sequence which I am defining in table.py and I am using alembic to upgrade my schema to new one which will have the table EpdUmpPushRollback.
I have two questions now.
For the column where we have defined sequence, is it mandatory to add using pdumppushrollback_obj.record_id = '' or it gets added automatically?
What should be the name of the sequence, whenever I try to add any entry in the table, it throws this error.
Error: sqlalchemy.exc.ProgrammingError: (psycopg2.ProgrammingError)
"record_id_seq" is not a sequence [SQL: 'INSERT INTO
epd_ump_push_rollback (dataset_id, operator_name, activation_flag,
record_creation_time, start_time, end_time) VALUES (%(dataset_id)s,
%(operator_name)s, %(activation_flag)s, now(), now(), %(end_time)s)
RETURNING epd_ump_push_rollback.record_id'] [parameters:
{'dataset_id': '20181221_1200_mno', 'operator_name': 'vis',
'activation_flag': 'active', 'end_time': None}]
The line
seq_obj = Sequence('record_id_seq', metadata=meta)
is not enough.
You need to add, in upgrade(), above the table creation:
op.execute(schema.CreateSequence(seq_obj))
Also, drop sequence is needed in downgrade() function
I already had this problem once and to solve it I created the sequence, so you can try this postgres statement:
CREATE SEQUENCE record_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;

Returning Number of affected rows from SQL Server with SQLAlchemy

I use sqlalchemy to make changes to a table in SQL Server database, and would like to get back number of affected rows.
I know there is .rowcount attribute to ResultProxy, but as, for example, this answer is demonstrating .rowcount is not necessarily the same as number of affected rows.
SQL Server uses ##ROWCOUNT to access number of affected rows from the previous statement execution.
Is there a way to modify an sqlalchemy expression that uses insert / update statement to end with SELECT ##ROWCOUNT?
For example, given:
from sqlalchemy import Table, Column, Integer, String, MetaData, create_engine
url = 'mssql+pyodbc://dsn'
engine = create_engine(url)
metadata = MetaData()
users = Table('users', metadata,
Column('id', Integer, primary_key=True),
Column('name', String),
Column('fullname', String),
)
ins = users.insert().values(name='jack', fullname='Jack Jones')
upd1 = users.update().values(fullname='Jack Doe').where(users.c.name == 'jack')
upd2 = users.update().values(fullname='Jack Doe').where(users.c.name == 'jack')
I could prepend SELECT ##ROWCOUNT to an update statement:
sel = select([text('##ROWCOUNT')])
sql1 = sel.suffix_with(upd2)
print(sql1.compile(engine, compile_kwargs={"literal_binds": True}))
Yielding "wrong" query:
SELECT ##ROWCOUNT UPDATE users SET fullname='Jack Doe' WHERE users.name = 'jack'
Trying to do the "right" thing:
sql2 = upd2.suffix_with(sel)
Raises AttributeError since 'Update' object has no attribute 'suffix_with'.
So is there a way to get desired sql query:
UPDATE users SET fullname='Jack Doe' WHERE users.name = 'jack';
SELECT ##ROWCOUNT
Using sql expression language without fully textual constructs.

creating a temporary table from a query using sqlalchemy orm

I can create a temporary table this way:
session.execute("CREATE TABLE temptable SELECT existingtable.id, "
"existingtable.column2 FROM existingtable WHERE existingtable.id<100000")
but the new table is unreadable because it says it has no primary key. existingtable.id is the primary key of exisitingtable, so I expected it to get the same treatment in the temp table.
However, I would rather find some ORM way of doing this anyway. Given:
temp_table = Table('temptable', metadata,
Column('id', Integer, primary_key=True),
Column('column2', Integer),
useexisting=True )
class TempTable(object):
pass
mapper(TempTable, temp_table)
temp_table.create(bind=session.bind, checkfirst=True)
if session.query(TempTable).delete(): #make sure it's empty
session.commit()
How can I populate temp_table with some selected contents of existingtable without doing 100000 session.query.add(TempTable(...)) commands? Or is there a way of creating the table from a query similar to the plain SQL version above?
It's not exactly ORM, but to create the table initially, I'd clone the table structure (see cloneTable in the example below). For copying the data, I then would use the InsertFromSelect example.
Edit: Since version 0.8.3, SqlAlchemy supports Insert.from_select() out of the box. Hence the InsertFromSelect class and the respective visitor in the example below can be directly replaced and are no longer needed. I leave the original example unchanged for historic reasons.
Here is a working example
from sqlalchemy import Table
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.expression import UpdateBase
class InsertFromSelect(UpdateBase):
def __init__(self, table, select):
self.table = table
self.select = select
#compiles(InsertFromSelect)
def visit_insert_from_select(element, compiler, **kw):
return "INSERT INTO %s %s" % (
compiler.process(element.table, asfrom=True),
compiler.process(element.select)
)
def cloneTable(name, table, metadata):
cols = [c.copy() for c in table.columns]
constraints = [c.copy() for c in table.constraints]
return Table(name, metadata, *(cols + constraints))
# test data
from sqlalchemy import MetaData, Column, Integer
from sqlalchemy.engine import create_engine
e = create_engine('sqlite://')
m = MetaData(e)
t = Table('t', m, Column('id', Integer, primary_key=True),
Column('number', Integer))
t.create()
e.execute(t.insert().values(id=1, number=3))
e.execute(t.insert().values(id=9, number=-3))
# create temp table
temp = cloneTable('temp', t, m)
temp.create()
# copy data
ins = InsertFromSelect(temp, t.select().where(t.c.id>5))
e.execute(ins)
# print result
for r in e.execute(temp.select()):
print(r)

Sqlalchemy Sqlite Autoincrement not behaving as expected

I have some trouble making my script incrementing my PK in a correct way. Following the sqlalchemy documentation some special configuration has to be done in order to make it work with sqlite. Here is my script:
def db_stuff():
engine = create_engine('sqlite:///test.db', echo=True)
metadata = MetaData()
db = Table('users', metadata,
Column('id', Integer, primary_key=True),
Column('name', String),
Column('fullname', String),
Column('password', String),
sqlite_autoincrement=True)
metadata.create_all(engine)
return engine.connect(),db
def add_to_db():
ret = db_stuff()
conn = ret[0]
db = ret[1]
try:
conn.execute("INSERT INTO users VALUES ('1','john','smith john','23')")
result = conn.execute(db.select())
for row in result:
print row
finally:
conn.close()
It would be cool if you could help me figuring out what I'm missing here, I start to be desperate...
The problem is that the "id" is not incremented each time and i get an error that it should be unique when I run the script twice.
TIA
try
conn.execute("INSERT INTO users(name, fullname, password) VALUES ('john','smith john','23')")
id is autoincrement hence we should not pass it, however we need to specify what other parameters represent in the table i.e. where should the values ('john', 'smith john', '23') should go.
It should work.
Do this:
conn.execute("INSERT INTO users VALUES ('john','smith john','23')")
You are setting the id to 1 - always. Just leave it and it will be filled due auto-increment.

Categories

Resources