SQLalchemy: Mapping columns into different properties - python

I am trying to store some simulation measurements (times and values) using sqlalchemy. Here are the relevant table definitions. If there is a more sensible table definition, I'd love to see it.
from sqlalchemy import create_engine, schema, orm
engine = create_engine('sqlite:///:memory:', echo=True)
metadata = schema.MetaData(bind=engine)
container_table = schema.Table('containers', metadata,
schema.Column('id', schema.types.Integer, primary_key=True))
measurement_table = schema.Table('measurements', metadata,
schema.Column('id', schema.types.Integer, primary_key=True),
schema.Column('container_id', schema.types.Integer,
schema.ForeignKey('containers.id')),
schema.Column('time', schema.types.Float),
schema.Column('value', schema.types.Float))
metadata.create_all()
The times will be unique for each container, and the below properties should be ordered by time.
I would like to be able to both read and assign these properties:
c = Container()
times = range(10)
values = [t**2 for t in times]
c.times = times
c.values = values
But I don't know how to do the mapping. I assume that if it's possible, it will look something like this:
class Container(object):
times = some_sort_of_proxy()
values = some_sort_of_proxy()
orm.mapper(Container, container_table, properties={
# Magic
})
How do I go about doing this? Is this a reasonable mapping, or do I need to have a different underlying table structure?

class EmailAddress(object):
#property
def email(self):
return self._email
#email.setter
def email(self, email):
self._email = email
mapper(EmailAddress, addresses_table, properties={
'_email': addresses_table.c.email
})

Related

How to dinamicaly create multiple Table Classes in SQLAlchemy?

I'm trying to create multiple classes in SQLAlchemy to generate specific tables.
I found different things here and more on https://docs.python.org/3/library/functions.html#import and https://python-course.eu/oop/dynamically-creating-classes-with-type.php
It seems clear enough, but their examples is using global opeartions, and I don't understand exactly how can I use as a sub function in something like this:
class _Table(Base):
__tablename__ = '_table'
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String)
table_list = ['Table1', 'Table2']
def table_class_generator(table_list):
# here I need to create the tables
def tables_operators():
# here I make operations on tables
So I need this one to convert globally to something like:
class Table1(Base):
__tablename__ = 'table1'
id = ...
name = ...
class Table2(Base):
__tablename__ = 'table2'
id = ...
name = ...
def table_operators():
#
Thanks #Gord Thompson because he gave me another track.
I believe the solution will be this, I only test the init of database.
def tables_constructor(names: list) -> list:
"""
Creates all tables in the database
:return:
"""
engine = get_engine()
metadata_obj = MetaData()
table_names = []
for name in names:
name_obj = Table(
name,
metadata_obj,
Column('id', Integer, primary_key=True, autoincrement=True),
Column('timestamp', Integer)
)
table_names.append(name_obj)
metadata_obj.create_all(engine)
return table_names
def do_something()
#
def start_app():
names = ["Table_1", "Table_2"]
tables = tables_constructor(symbols)
do_something(tables)
Now I think I can do things in tables as they are mapped to table objects.
Would be cool to improve to define table columns somewhere else. I tried but isn't working.

Postgresql partition and sqlalchemy

SQLAlchemy doc explain how to create a partitioned table. But it does not explains how to create partitions.
So if I have this :
#Skipping create_engine and metadata
Base = declarative_base()
class Measure(Base):
__tablename__ = 'measures'
__table_args__ = {
postgresql_partition_by: 'RANGE (log_date)'
}
city_id = Column(Integer, not_null=True)
log_date = Columne(Date, not_null=True)
peaktemp = Column(Integer)
unitsales = Column(Integer)
class Measure2020(Base):
"""How am I suppposed to declare this ? """
I know that most of the I'll be doing SELECT * FROM measures WHERE logdate between XX and YY. But that seems interesting.
You can use a MeasureMixin that both classes can inherit from.
And then use an event to attach the table partition.
from sqlalchemy import event
class MeasureMixin:
city_id = Column(Integer, not_null=True)
log_date = Column(Date, not_null=True)
peaktemp = Column(Integer)
unitsales = Column(Integer)
class Measure(MeasureMixin, Base):
__tablename__ = 'measures'
__table_args__ = {
postgresql_partition_by: 'RANGE (log_date)'
}
class Measure2020(MeasureMixin, Base):
__tablename__ = 'measures2020'
Measure2020.__table__.add_is_dependent_on(Measure.__table__)
event.listen(
Measure2020.__table__,
"after_create",
DDL("""ALTER TABLE measures ATTACH PARTITION measures2020
VALUES FROM ('2020-01-01') TO ('2021-01-01');""")
)
I had a similar problem. I found #moshevi's answer quite useful, and ended up generalising it a bit (as I had many tables to partition).
First, create a metaclass such as this:
from sqlalchemy.ext.declarative import DeclarativeMeta
from sqlalchemy.sql.ddl import DDL
from sqlalchemy import event
class PartitionByYearMeta(DeclarativeMeta):
def __new__(cls, clsname, bases, attrs, *, partition_by):
#classmethod
def get_partition_name(cls_, key):
# 'measures' -> 'measures_2020' (customise as needed)
return f'{cls_.__tablename__}_{key}'
#classmethod
def create_partition(cls_, key):
if key not in cls_.partitions:
Partition = type(
f'{clsname}{key}', # Class name, only used internally
bases,
{'__tablename__': cls_.get_partition_name(key)}
)
Partition.__table__.add_is_dependent_on(cls_.__table__)
event.listen(
Partition.__table__,
'after_create',
DDL(
# For non-year ranges, modify the FROM and TO below
f"""
ALTER TABLE {cls_.__tablename__}
ATTACH PARTITION {Partition.__tablename__}
FOR VALUES FROM ('{key}-01-01') TO ('{key+1}-01-01');
"""
)
)
cls_.partitions[key] = Partition
return cls_.partitions[key]
attrs.update(
{
# For non-RANGE partitions, modify the `postgresql_partition_by` key below
'__table_args__': attrs.get('__table_args__', ())
+ (dict(postgresql_partition_by=f'RANGE({partition_by})'),),
'partitions': {},
'partitioned_by': partition_by,
'get_partition_name': get_partition_name,
'create_partition': create_partition
}
)
return super().__new__(cls, clsname, bases, attrs)
Next, for any table in your model that you want to partition:
class MeasureMixin:
# The columns need to be pulled out into this mixin
# Note: any foreign key columns will need to be wrapped like this:
#declared_attr
def city_id(self):
return Column(ForeignKey('cities.id'), not_null=True)
log_date = Column(Date, not_null=True)
peaktemp = Column(Integer)
unitsales = Column(Integer)
class Measure(MeasureMixin, Base, metaclass=PartitionByYearMeta, partition_by='logdate'):
__tablename__ = 'measures'
This makes it easy to add more tables and partition by any number of values.
Creating a new partition on the fly works like this:
# Make sure you commit any session that is currently open, even for select queries:
session.commit()
Partition = Measure.create_partition(2020)
if not engine.dialect.has_table(Partition.__table__.name):
Partition.__table__.create(bind=engine)
Now the partition for key 2020 is created and values for that year can be inserted.
Maybe a bit late, but I would like to share what I built upon #moshevi 's and #Seb 's answers:
In my IoT use-case, I required actual sub-partitioning (first level year, second level nodeid). Also I wanted to generalize it slightly.
This is what I came up with:
from sqlalchemy.ext.declarative import DeclarativeMeta
from sqlalchemy.sql.ddl import DDL
from sqlalchemy import event
class PartitionByMeta(DeclarativeMeta):
def __new__(cls, clsname, bases, attrs, *, partition_by, partition_type):
#classmethod
def get_partition_name(cls_, suffix):
return f'{cls_.__tablename__}_{suffix}'
#classmethod
def create_partition(cls_, suffix, partition_stmt, subpartition_by=None, subpartition_type=None):
if suffix not in cls_.partitions:
partition = PartitionByMeta(
f'{clsname}{suffix}',
bases,
{'__tablename__': cls_.get_partition_name(suffix)},
partition_type = subpartition_type,
partition_by=subpartition_by,
)
partition.__table__.add_is_dependent_on(cls_.__table__)
event.listen(
partition.__table__,
'after_create',
DDL(
# For non-year ranges, modify the FROM and TO below
# LIST: IN ('first', 'second');
# RANGE: FROM ('{key}-01-01') TO ('{key+1}-01-01')
f"""
ALTER TABLE {cls_.__tablename__}
ATTACH PARTITION {partition.__tablename__}
{partition_stmt};
"""
)
)
cls_.partitions[suffix] = partition
return cls_.partitions[suffix]
if partition_by is not None:
attrs.update(
{
'__table_args__': attrs.get('__table_args__', ())
+ (dict(postgresql_partition_by=f'{partition_type.upper()}({partition_by})'),),
'partitions': {},
'partitioned_by': partition_by,
'get_partition_name': get_partition_name,
'create_partition': create_partition
}
)
return super().__new__(cls, clsname, bases, attrs)
Which is to be used as follows, assuming the respective VehicleDataMixin class to be created as introduced by #moshevi
class VehicleData(VehicleDataMixin, Project, metaclass=PartitionByMeta, partition_by='timestamp',partition_type='RANGE'):
__tablename__ = 'vehicle_data'
__table_args__ = (
Index('ts_ch_nod_idx', "timestamp", "nodeid", "channelid", postgresql_using='brin'),
UniqueConstraint('timestamp','nodeid','channelid', name='ts_ch_nod_constr')
)
Which can then be subpartitoned iteratively like so (to be adapted)
for y in range(2017, 2021):
# Creating tables for all known nodeids
tbl_vehid_y = VehicleData.create_partition(
f"{y}", partition_stmt=f"""FOR VALUES FROM ('{y}-01-01') TO ('{y+1}-01-01')""",
subpartition_by='nodeid', subpartition_type='LIST'
)
for i in {3, 4, 7, 9}:
# Creating all the years below these nodeids including a default partition
tbl_vehid_y.create_partition(
f"nid{i}", partition_stmt=f"""FOR VALUES IN ('{i}')"""
)
# Defaults (nodeid) per year partition
tbl_vehid_y.create_partition("def", partition_stmt="DEFAULT")
# Default to any other year than anticipated
VehicleData.create_partition("def", partition_stmt="DEFAULT")
partition_by='timestamp' <= This is the column to partition by
partition_type='RANGE' <= This is the (PSQL specific) partition type
partition_stmt=f"""FOR VALUES IN ('{i}')""" <= This is the (PSQL specific) partitioning statement.
For database partitioning (PostgreSQL or MySQL), you can try using the architect package. It works with a range of ORM libraties, including SQLAlchemy. Here is an example of PostgreSQL - https://architect.readthedocs.io/features/partition/postgresql.html. Several partitioning types are supported, so hopefully it meets the requirements of your case.

Dynamically setting __tablename__ for sharding in SQLAlchemy?

In order to handle a growing database table, we are sharding on table name. So we could have database tables that are named like this:
table_md5one
table_md5two
table_md5three
All tables have the exact same schema.
How do we use SQLAlchemy and dynamically specify the tablename for the class that corresponds to this? Looks like the declarative_base() classes need to have tablename pre-specified.
There will eventually be too many tables to manually specify derived classes from a parent/base class. We want to be able to build a class that can have the tablename set up dynamically (maybe passed as a parameter to a function.)
OK, we went with the custom SQLAlchemy declaration rather than the declarative one.
So we create a dynamic table object like this:
from sqlalchemy import MetaData, Table, Column
def get_table_object(self, md5hash):
metadata = MetaData()
table_name = 'table_' + md5hash
table_object = Table(table_name, metadata,
Column('Column1', DATE, nullable=False),
Column('Column2', DATE, nullable=False)
)
clear_mappers()
mapper(ActualTableObject, table_object)
return ActualTableObject
Where ActualTableObject is the class mapping to the table.
In Augmenting the Base you find a way of using a custom Base class that can, for example, calculate the __tablename__ attribure dynamically:
class Base(object):
#declared_attr
def __tablename__(cls):
return cls.__name__.lower()
The only problem here is that I don't know where your hash comes from, but this should give a good starting point.
If you require this algorithm not for all your tables but only for one you could just use the declared_attr on the table you are interested in sharding.
Because I insist to use declarative classes with their __tablename__ dynamically specified by given parameter, after days of failing with other solutions and hours of studying SQLAlchemy internals, I come up with the following solution that I believe is simple, elegant and race-condition free.
def get_model(suffix):
DynamicBase = declarative_base(class_registry=dict())
class MyModel(DynamicBase):
__tablename__ = 'table_{suffix}'.format(suffix=suffix)
id = Column(Integer, primary_key=True)
name = Column(String)
...
return MyModel
Since they have their own class_registry, you will not get that warning saying:
This declarative base already contains a class with the same class name and module name as mypackage.models.MyModel, and will be replaced in the string-lookup table.
Hence, you will not be able to reference them from other models with string lookup. However, it works perfectly fine to use these on-the-fly declared models for foreign keys as well:
ParentModel1 = get_model(123)
ParentModel2 = get_model(456)
class MyChildModel(BaseModel):
__tablename__ = 'table_child'
id = Column(Integer, primary_key=True)
name = Column(String)
parent_1_id = Column(Integer, ForeignKey(ParentModel1.id))
parent_2_id = Column(Integer, ForeignKey(ParentModel2.id))
parent_1 = relationship(ParentModel1)
parent_2 = relationship(ParentModel2)
If you only use them to query/insert/update/delete without any reference left such as foreign key reference from another table, they, their base classes and also their class_registry will be garbage collected, so no trace will be left.
you can write a function with tablename parameter and send back the class with setting appropriate attributes.
def get_class(table_name):
class GenericTable(Base):
__tablename__ = table_name
ID= Column(types.Integer, primary_key=True)
def funcation(self):
......
return GenericTable
Then you can create a table using:
get_class("test").__table__.create(bind=engine) # See sqlachemy.engine
Try this
import zlib
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, BigInteger, DateTime, String
from datetime import datetime
BASE = declarative_base()
ENTITY_CLASS_DICT = {}
class AbsShardingClass(BASE):
__abstract__ = True
def get_class_name_and_table_name(hashid):
return 'ShardingClass%s' % hashid, 'sharding_class_%s' % hashid
def get_sharding_entity_class(hashid):
"""
#param hashid: hashid
#type hashid: int
#rtype AbsClientUserAuth
"""
if hashid not in ENTITY_CLASS_DICT:
class_name, table_name = get_class_name_and_table_name(hashid)
cls = type(class_name, (AbsShardingClass,),
{'__tablename__': table_name})
ENTITY_CLASS_DICT[hashid] = cls
return ENTITY_CLASS_DICT[hashid]
cls = get_sharding_entity_class(1)
print session.query(cls).get(100)
Instead of using imperative creating Table object, you can use usual declarative_base and make a closure to set a table name as the following:
def make_class(Base, table_name):
class User(Base):
__tablename__ = table_name
id = Column(Integer, primary_key=True)
name= Column(String)
return User
Base = declarative_base()
engine = make_engine()
custom_named_usertable = make_class(Base, 'custom_name')
Base.metadata.create_all(engine)
session = make_session(engine)
new_user = custom_named_usertable(name='Adam')
session.add(new_user)
session.commit()
session.close()
engine.dispose()
just you need to create class object for Base.
from sqlalchemy.ext.declarative import declarative_base, declared_attr
class Base(object):
#declared_attr
def __tablename__(cls):
return cls.__name.lower()
Base = declarative_base(cls=Base)

Insert relational data in SQL Alchemy with API queries in Python

Maybe my previous question was too much long and endless to answer, sorry for that... I will try to be more specific shortening my previous question
I can extract from an API query (json format as output) the following information:
GENE1
Experiment1
Experiment2
Experiment3
Experiment4
GENE2
Experiment5
Experiment2
Experiment3
Experiment8
Experiment9
[...]
So I obtain genes and their related experiments in which they have been studied... One gene can have more than one experiment, and 1 experiment can have more than one gene (many to many)
I have this schema in SQL Alchemy:
from sqlalchemy import create_engine, Column, Integer, String, Date, ForeignKey, Table, Float
from sqlalchemy.orm import sessionmaker, relationship, backref
from sqlalchemy.ext.declarative import declarative_base
import requests
Base = declarative_base()
Genes2experiments = Table('genes2experiments',Base.metadata,
Column('gene_id', String, ForeignKey('genes.id')),
Column('experiment_id', String, ForeignKey('experiments.id'))
)
class Genes(Base):
__tablename__ = 'genes'
id = Column(String(45), primary_key=True)
experiments = relationship("Experiments", secondary=Genes2experiments, backref="genes")
def __init__(self, id=""):
self.id= id
def __repr__(self):
return "<genes(id:'%s')>" % (self.id)
class Experiments(Base):
__tablename__ = 'experiments'
id = Column(String(45), primary_key=True)
def __init__(self, id=""):
self.id= id
def __repr__(self):
return "<experiments(id:'%s')>" % (self.id)
def setUp():
global Session
engine=create_engine('mysql://root:password#localhost/db_name?charset=utf8', pool_recycle=3600,echo=False)
Session=sessionmaker(bind=engine)
def add_data():
session=Session()
for i in range(0,1000,200):
request= requests.get('http://www.ebi.ac.uk/gxa/api/v1',params={"updownInOrganism_part":"brain","rows":200,"start":i})
result = request.json
for item in result['results']:
gene_to_add = item['gene']['ensemblGeneId']
session.commit()
session.close()
setUp()
add_data()
With this code I just add to my database all the genes from the API query to the Genes table...
1st question: how and when should I add the experiments information to keep their relationship someway???
2nd question: should I add a new secondary relationship in the Experiments class, as in the Genes class, or is it enough putting just one?
Thank you
(for more context/info: my previous question)
Whenever you records the results of an experiment, or even when you plan an experiment, you can already add instances to the database and the relationships as well.
having backref will effectively add the other side of the relationship, so that having an instance of Experiments, you can get the Genes[] via my_experiment.genes
Note: I would remove plural from the names of your entities: class Gene, class Experiment instead of class Genes, class Experiments.

Setting up relations/mappings for a SQLAlchemy many-to-many database

I'm new to SQLAlchemy and relational databases, and I'm trying to set up a model for an annotated lexicon. I want to support an arbitrary number of key-value annotations for the words which can be added or removed at runtime. Since there will be a lot of repetition in the names of the keys, I don't want to use this solution directly, although the code is similar.
My design has word objects and property objects. The words and properties are stored in separate tables with a property_values table that links the two. Here's the code:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///test.db', echo=True)
meta = MetaData(bind=engine)
property_values = Table('property_values', meta,
Column('word_id', Integer, ForeignKey('words.id')),
Column('property_id', Integer, ForeignKey('properties.id')),
Column('value', String(20))
)
words = Table('words', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20)),
Column('freq', Integer)
)
properties = Table('properties', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20), nullable=False, unique=True)
)
meta.create_all()
class Word(object):
def __init__(self, name, freq=1):
self.name = name
self.freq = freq
class Property(object):
def __init__(self, name):
self.name = name
mapper(Property, properties)
Now I'd like to be able to do the following:
Session = sessionmaker(bind=engine)
s = Session()
word = Word('foo', 42)
word['bar'] = 'yes' # or word.bar = 'yes' ?
s.add(word)
s.commit()
Ideally this should add 1|foo|42 to the words table, add 1|bar to the properties table, and add 1|1|yes to the property_values table. However, I don't have the right mappings and relations in place to make this happen. I get the sense from reading the documentation at http://www.sqlalchemy.org/docs/05/mappers.html#association-pattern that I want to use an association proxy or something of that sort here, but the syntax is unclear to me. I experimented with this:
mapper(Word, words, properties={
'properties': relation(Property, secondary=property_values)
})
but this mapper only fills in the foreign key values, and I need to fill in the other value as well. Any assistance would be greatly appreciated.
Simply use Dictionary-Based Collections mapping mapping - out of the box solution to your question. Extract from the link:
from sqlalchemy.orm.collections import column_mapped_collection, attribute_mapped_collection, mapped_collection
mapper(Item, items_table, properties={
# key by column
'notes': relation(Note, collection_class=column_mapped_collection(notes_table.c.keyword)),
# or named attribute
'notes2': relation(Note, collection_class=attribute_mapped_collection('keyword')),
# or any callable
'notes3': relation(Note, collection_class=mapped_collection(lambda entity: entity.a + entity.b))
})
# ...
item = Item()
item.notes['color'] = Note('color', 'blue')
print item.notes['color']
Or try the solution for Inserting data in Many to Many relationship in SQLAlchemy. Obviously you have to replace the list logic with the dict one.
Ask question author to post hist final code with associationproxy, which he mentioned he used in the end.
There is very similar question with slight interface difference. But it's easy to fix it by defining __getitem__, __setitem__ and __delitem__ methods.
Comment for Brent, above:
You can use session.flush() instead of commit() to get an id on your model instances. flush() will execute the necessary SQL, but will not commit, so you can rollback later if needed.
I ended up combining Denis and van's posts together to form the solution:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
meta = MetaData()
Base = declarative_base(metadata=meta, name='Base')
class PropertyValue(Base):
__tablename__ = 'property_values'
WordID = Column(Integer, ForeignKey('words.id'), primary_key=True)
PropID = Column(Integer, ForeignKey('properties.id'), primary_key=True)
Value = Column(String(20))
def _property_for_name(prop_name):
return s.query(Property).filter_by(name=prop_name).first()
def _create_propval(prop_name, prop_val):
p = _property_for_name(prop_name)
if not p:
p = Property(prop_name)
s.add(p)
s.commit()
return PropertyValue(PropID=p.id, Value=prop_val)
class Word(Base):
__tablename__ = 'words'
id = Column(Integer, primary_key=True)
string = Column(String(20), nullable=False)
freq = Column(Integer)
_props = relation(PropertyValue, collection_class=attribute_mapped_collection('PropID'), cascade='all, delete-orphan')
props = association_proxy('_props', 'Value', creator=_create_propval)
def __init__(self, string, freq=1):
self.string = string
self.freq = freq
def __getitem__(self, prop):
p = _property_for_name(prop)
if p:
return self.props[p.id]
else:
return None
def __setitem__(self, prop, val):
self.props[prop] = val
def __delitem__(self, prop):
p = _property_for_name(prop)
if p:
del self.props[prop]
class Property(Base):
__tablename__ = 'properties'
id = Column(Integer, primary_key=True)
name = Column(String(20), nullable=False, unique=True)
def __init__(self, name):
self.name = name
engine = create_engine('sqlite:///test.db', echo=False)
Session = sessionmaker(bind=engine)
s = Session()
meta.create_all(engine)
The test code is as follows:
word = Word('foo', 42)
word['bar'] = "yes"
word['baz'] = "certainly"
s.add(word)
word2 = Word('quux', 20)
word2['bar'] = "nope"
word2['groink'] = "nope"
s.add(word2)
word2['groink'] = "uh-uh"
del word2['bar']
s.commit()
word = s.query(Word).filter_by(string="foo").first()
print word.freq, word['baz']
# prints 42 certainly
The contents of the databases are:
$ sqlite3 test.db "select * from property_values"
1|2|certainly
1|1|yes
2|3|uh-uh
$ sqlite3 test.db "select * from words"
1|foo|42
2|quux|20
$ sqlite3 test.db "select * from properties"
1|bar
2|baz
3|groink

Categories

Resources