How to get dict of lists from relationship in sqlalchemy? - python

I've found out that you can use collections in relationship in order to change the type of return value, specifically I was interested in dictionaries.
Documentation gives an example:
class Item(Base):
__tablename__ = 'item'
id = Column(Integer, primary_key=True)
notes = relationship("Note",
collection_class=attribute_mapped_collection('keyword'),
cascade="all, delete-orphan")
class Note(Base):
__tablename__ = 'note'
id = Column(Integer, primary_key=True)
item_id = Column(Integer, ForeignKey('item.id'), nullable=False)
keyword = Column(String)
text = Column(String)
And it works. However I was hoping that it will make list values if there are more than just one key with the same name. But it only puts the last value under unique key name.
Here is an example:
| Note table |
|---------------------|------------------|
| id | keyword |
|---------------------|------------------|
| 1 | foo |
|---------------------|------------------|
| 2 | foo |
|---------------------|------------------|
| 3 | bar |
|---------------------|------------------|
| 4 | bar |
|---------------------|------------------|
item.notes will return something like this:
{'foo': <project.models.note.Note at 0x7fc6840fadd2>,
'bar': <project.models.note.Note at 0x7fc6840fadd4>}
Where ids of foo and bar objects are 2 and 4 respectively.
What I'm looking for is to get something like this:
{'foo': [<project.models.note.Note at 0x7fc6840fadd1,
<project.models.note.Note at 0x7fc6840fadd2>],
'bar': [<project.models.note.Note at 0x7fc6840fadd3>,
<project.models.note.Note at 0x7fc6840fadd4>]}
Is it possible to get dict of lists from relationship in sqlalchemy?

So, it turns out you can simply inherit MappedCollection and do whatever you like in setitem there.
from sqlalchemy.orm.collections import (MappedCollection,
_SerializableAttrGetter,
collection,
_instrument_class)
#This will ensure that the MappedCollection has been properly
#initialized with custom __setitem__() and __delitem__() methods
#before used in a custom subclass
_instrument_class(MappedCollection)
class DictOfListsCollection(MappedCollection):
#collection.internally_instrumented
def __setitem__(self, key, value, _sa_initiator=None):
if not super(DictOfListsCollection, self).get(key):
super(DictOfListsCollection, self).__setitem__(key, [], _sa_initiator)
super(DictOfListsCollection, self).__getitem__(key).append(value)

Related

SQLAlchemy ORM: Merge two rows based on one common value

How can I merge two rows with same value in one column. Lets say I have a model with ~40 columns like below:
class Model(Base):
__tablename__ = "table"
id = Column(Integer, primary_key=True)
value_a = Column(String)
value_b = Column(String)
value_c = Column(String)
...
And I need to process each time ~500k rows of new data. Also each process creates a new table.
Once inserting the data first time(using session.bulk_insert_mappings(Model, data)) there are duplicated value_c values(max 2), but each time either it has value_a with some string and value_b is empty or value_b with some string and value_a is empty.
After initial insert:
| id | value_a | value_b | value_c |
| -- | ------- | ------- | ------- |
| 1 | foo | None | xyz |
| 2 | None | bar | xyz |
Having all rows I need to merge the rows with common value_c values together to get rid of duplicates.
After update:
| id | value_a | value_b | value_c |
| -- | ------- | ------- | ------- |
| 3 | foo | bar | xyz |
What is the most efficient way to do that? I was using from beginning session.merge(row) for each row but it is to slow and I decided to split it into insert and update stages.
You should be able to insert from a select statement that joins the not null a to the not null b. Then after inserted the combined rows you can delete the old rows. This matches the case you outlined exactly you might need to add more conditions to ignore other entries you might not want inserted or not deleted. (ie. (a, b, c) == (None, None, 'value'))
I used aliased so that i can join the same table against itself.
import sys
from sqlalchemy import (
create_engine,
Integer,
String,
)
from sqlalchemy.schema import (
Column,
)
from sqlalchemy.orm import Session, declarative_base, aliased
from sqlalchemy.sql import select, or_, and_, delete, insert
username, password, db = sys.argv[1:4]
Base = declarative_base()
engine = create_engine(f"postgresql+psycopg2://{username}:{password}#/{db}", echo=True)
metadata = Base.metadata
class Model(Base):
__tablename__ = "table"
id = Column(Integer, primary_key=True)
value_a = Column(String)
value_b = Column(String)
value_c = Column(String)
metadata.create_all(engine)
def print_models(session):
for (model,) in session.execute(select(Model)).all():
print(model.id, model.value_a, model.value_b, model.value_c)
with Session(engine) as session, session.begin():
for (a, b, c) in [('foo', None, 'xyz'), (None, 'bar', 'xyz'), ('leave', 'it', 'asis')]:
session.add(Model(value_a=a, value_b=b, value_c=c))
session.flush()
print_models(session)
with Session(engine) as session, session.begin():
#
# Insert de-nulled entires.
#
left = aliased(Model)
right = aliased(Model)
nulls_joined_q = select(
left.value_a,
right.value_b,
left.value_c
).distinct().select_from(
left
).join(
right,
left.value_c == right.value_c
).where(
and_(
# Ignore entries with no C value.
left.value_c != None,
left.value_b == None,
right.value_a == None))
stmt = insert(
Model.__table__
).from_select([
"value_a",
"value_b",
"value_c"
], nulls_joined_q)
session.execute(stmt)
#
# Remove null entries: All rows where value_c is NOT NULL and either value_a is empty or value b is empty.
#
# #NOTE: This deletes entries where value_a and value_b are BOTH null in the same row as well.
#
stmt = delete(Model.__table__).where(and_(
# Ignore these like we did in insert.
Model.value_c != None,
or_(
Model.value_a == None,
Model.value_b == None),
))
session.execute(stmt)
session.flush()
# Output
print_models(session)
Output
1 foo None xyz
2 None bar xyz
3 leave it asis
#... then
3 leave it asis
4 foo bar xyz
Docs
https://docs.sqlalchemy.org/en/14/core/dml.html#sqlalchemy.sql.expression.Insert.from_select
https://docs.sqlalchemy.org/en/14/orm/query.html#sqlalchemy.orm.aliased
https://docs.sqlalchemy.org/en/14/core/dml.html#sqlalchemy.sql.expression.delete
https://docs.sqlalchemy.org/en/14/core/dml.html#sqlalchemy.sql.expression.insert

How to create index on on SQLAlchemy column_property?

Using SQLAlchemy with an SQLite engine, I've got a self-referential hierarchal table that describes a directory structure.
from sqlalchemy import Column, Integer, String, ForeignKey, Index
from sqlalchemy.orm import column_property, aliased, join
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Dr(Base):
__tablename__ = 'directories'
id = Column(Integer, primary_key=True)
name = Column(String)
parent_id = Column(Integer, ForeignKey('directories.id'))
Each Dr row only knows it's own "name" and its "parent_id". I've added a recursive column_property called "path" that returns a string containing all of a Dr's ancestors from the root Dr.
root_anchor = (
select([Dr.id, Dr.name, Dr.parent_id,Dr.name.label('path')])
.where(Dr.parent_id == None).cte(recursive=True)
)
dir_alias = aliased(Dr)
cte_alias = aliased(root_anchor)
path_table = root_anchor.union_all(
select([
dir_alias.id, dir_alias.name,
dir_alias.parent_id, cte_alias.c.path + "/" + dir_alias.name
]).select_from(join(
dir_alias, cte_alias, onclause=cte_alias.c.id==dir_alias.parent_id)
))
)
Dr.path = column_property(
select([path_table.c.path]).where(path_table.c.id==Dr.id)
)
Here's an example of the output:
"""
-----------------------------
| id | name | parent_id |
-----------------------------
| 1 | root | NULL |
-----------------------------
| 2 | kid | 1 |
-----------------------------
| 3 | grandkid | 2 |
-----------------------------
"""
sqllite_engine = create_engine('sqlite:///:memory:')
Session = sessionmaker(bind=sqllite_engine)
session = Session()
instance = session.query(Dr).filter(Dr.name=='grandkid').one()
print(instance.path)
# Outputs: "root/kid/grandkid"
I'd like to be able to add an index, or a least a unique constraint, on the "path" property so that unique paths cannot exist more than once in the table. I've tried:
Index('pathindex', Directory.path, unique=True)
...with no luck. No error is raised, but SQLAlchemy doesn't seem to register the index, it just silently ignores it. It still allows adding a duplicate path, e.g.:
session.add(Dr(name='grandkid', parent_id=2))
session.commit()
As further evidence that the Index() was ignored, inspecting the "indexes" property of the table results in an empty set:
print(Dr.__table__.indexes)
#Outputs: set([])
It's essential to me that duplicate paths cannot exist in the database. I'm not sure whether what I'm trying to do with column_property is possible in SQLAlchemy, and if not I'd love to hear some suggestions on how else I can go about this.
I think unique index should suffice, in class Db
__table_args__ = (UniqueConstraint('parent_id', 'name'), )

SQLAlchemy relationships populating foreign key fields

I have the following tables with their respective sqlalchemy classes:
class Enrolled(Base):
__tablename__ = 'enrolled'
id = Column(Integer, primary_key=True, nullable=False, autoincrement=True)
student_fk = Column(Integer, ForeignKey('students.id'))
student = relationship('Students', foreign_keys=[device_fk], uselist=False,backref="enrolled", innerjoin=False, post_update=False)
subject = Column(String(5, convert_unicode=True), nullable=False)
//__init__ for id and subject is here.
class Students(Base):
__tablename__ = 'students'
id = Column(Integer, primary_key=True, nullable=False, autoincrement=True)
name = Column(String(50, convert_unicode=True), nullable=False)
//init for name is here
Relationship between students and enrolled is one to many. i.e one student can enroll himself in more then 1 subject.
Now, I know to insert a couple of subjects into 'Enrolled' and names into 'Students' classes.
DBSession.add(Enrolled(subject="maths"))
In the end this is how my tables look
Enrolled:
+----+------------+---------+
| id | student_fk | subject |
+----+------------+---------+
| 1 | | Maths |
| 2 | | Physics |
| 3 | | Art |
+----+------------+---------+
Students:
+----+------+
| id | name |
+----+------+
| 1 | Jim |
| 2 | Bob |
| 3 | Cara |
+----+------+
Now, how do I get the students id get into Enrolled table as foreign keys?
I have this information : which student is enrolled into which subject as a .csv file..
mycsv: name,subject,name1,subject1,name2,subject2
Should I have a manual dictionary like dict {jim:maths,Bob:Art,Cara:Physics} and then map like
query=Enrolled(subject="maths")
for k, v in dict.items():
if subject in v:
list.append(k)
for i in list:
query.student=DBSession.query(Students).filter(name=i).first()
DBSession.add(query)
Please help.. How do I get the student_fk field populated properly?
Your 1-to-many enrollment table should have composite primary key on Student ID and subject. Assuming you want to keep subjects as ENUM (which works with small list of subjects, otherwise you should move it to a separate table), you tables should look something like:
subjects = [ 'Maths', 'Physics', 'Art', ]
class Student(Base):
__tablename__ = 'Student'
student_id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String(50, convert_unicode=True), nullable=False)
class StudentEnrollment(Base):
__tablename__ = 'StudentEnrollment'
student_id = Column(Integer, ForeignKey('Student.student_id', ondelete='CASCADE'), primary_key=True)
subject = Column(Enum(*subjects), primary_key=True)
student = relationship("Student", primaryjoin='StudentEnrollment.student_id==Student.student_id', uselist=True, backref="enrollments")
which will result in:
root#localhost [inDB]> show create table Student\G
*************************** 1. row ***************************
Table: Student
Create Table: CREATE TABLE `Student` (
`student_id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(50) NOT NULL,
PRIMARY KEY (`student_id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
1 row in set (0.00 sec)
root#localhost [inDB]> show create table StudentEnrollment\G
*************************** 1. row ***************************
Table: StudentEnrollment
Create Table: CREATE TABLE `StudentEnrollment` (
`student_id` int(11) NOT NULL,
`subject` enum('Maths','Physics','Art') NOT NULL,
PRIMARY KEY (`student_id`,`subject`),
CONSTRAINT `StudentEnrollment_ibfk_1` FOREIGN KEY (`student_id`) REFERENCES `Student` (`student_id`) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1
1 row in set (0.00 sec)
then to insert few enrollments for user Jim:
student = Student(name='Jim')
session.add(student)
session.flush()
for enr in ('Maths', 'Physics', 'Art'):
session.add(StudentEnrollment(student_id=student.student_id, subject=enr))
session.flush()
session.commit()
which will result in:
root#localhost [inDB]> select * from Student;
+------------+------+
| student_id | name |
+------------+------+
| 3 | Jim |
+------------+------+
1 row in set (0.00 sec)
root#localhost [inDB]> select * from StudentEnrollment;
+------------+---------+
| student_id | subject |
+------------+---------+
| 3 | Maths |
| 3 | Physics |
| 3 | Art |
+------------+---------+
3 rows in set (0.00 sec)
This is a very basic example with two tables. A better option would be to normalize Enrollments into separate table and use association proxy pattern, see http://docs.sqlalchemy.org/en/rel_0_9/orm/extensions/associationproxy.html

How to avoid adding duplicates in a many-to-many relationship table in SQLAlchemy - python?

I am dealing with a many-to-many relationship with sqlalchemy. My question is how to avoid adding duplicate pair values in a many-to-many relational table.
To make things clearer, I will use the example from the official SQLAlchemy documentation.
Base = declarative_base()
Parents2children = Table('parents2children', Base.metadata,
Column('parents_id', Integer, ForeignKey('parents.id')),
Column('children_id', Integer, ForeignKey('children.id'))
)
class Parent(Base):
__tablename__ = 'parents'
id = Column(Integer, primary_key=True)
parent_name = Column(String(45))
child_rel = relationship("Child", secondary=Parents2children, backref= "parents_backref")
def __init__(self, parent_name=""):
self.parent_name=parent_name
def __repr__(self):
return "<parents(id:'%i', parent_name:'%s')>" % (self.id, self.parent_name)
class Child(Base):
__tablename__ = 'children'
id = Column(Integer, primary_key=True)
child_name = Column(String(45))
def __init__(self, child_name=""):
self.child_name= child_name
def __repr__(self):
return "<experiments(id:'%i', child_name:'%s')>" % (self.id, self.child_name)
###########################################
def setUp():
global Session
engine=create_engine('mysql://root:root#localhost/db_name?charset=utf8', pool_recycle=3600,echo=False)
Session=sessionmaker(bind=engine)
def add_data():
session=Session()
name_father1=Parent(parent_name="Richard")
name_mother1=Parent(parent_name="Kate")
name_daughter1=Child(child_name="Helen")
name_son1=Child(child_name="John")
session.add(name_father1)
session.add(name_mother1)
name_father1.child_rel.append(name_son1)
name_daughter1.parents_backref.append(name_father1)
name_son1.parents_backref.append(name_father1)
session.commit()
session.close()
setUp()
add_data()
session.close()
With this code, the data inserted in the tables is the following:
Parents table:
+----+-------------+
| id | parent_name |
+----+-------------+
| 1 | Richard |
| 2 | Kate |
+----+-------------+
Children table:
+----+------------+
| id | child_name |
+----+------------+
| 1 | Helen |
| 2 | John |
+----+------------+
Parents2children table
+------------+-------------+
| parents_id | children_id |
+------------+-------------+
| 1 | 1 |
| 1 | 2 |
| 1 | 1 |
+------------+-------------+
As you can see, there's a duplicate in the last table... how could I prevent SQLAlchemy from adding these duplicates?
I've tried to put relationship("Child", secondary=..., collection_class=set) but this error is displayed:
AttributeError: 'InstrumentedSet' object has no attribute 'append'
Add a PrimaryKeyConstraint (or a UniqueConstraint) to your relationship table:
Parents2children = Table('parents2children', Base.metadata,
Column('parents_id', Integer, ForeignKey('parents.id')),
Column('children_id', Integer, ForeignKey('children.id')),
PrimaryKeyConstraint('parents_id', 'children_id'),
)
and your code will generate an error when you try to commit the relationship added from both sides. This is very recommended to do.
In order to not even generate an error, just check first:
if not(name_father1 in name_son1.parents_backref):
name_son1.parents_backref.append(name_father1)

SQLAlchemy Column to Row Transformation and vice versa -- is it possible?

I'm looking for a SQLAlchemy only solution for converting a dict received from a form submission into a series of rows in the database, one for each field submitted. This is to handle preferences and settings that vary widely across applications. But, it's very likely applicable to creating pivot table like functionality. I've seen this type of thing in ETL tools but I was looking for a way to do it directly in the ORM. I couldn't find any documentation on it but maybe I missed something.
Example:
Submitted from form: {"UniqueId":1, "a":23, "b":"Hello", "c":"World"}
I would like it to be transformed (in the ORM) so that it is recorded in the database like this:
_______________________________________
|UniqueId| ItemName | ItemValue |
---------------------------------------
| 1 | a | 23 |
---------------------------------------
| 1 | b | Hello |
---------------------------------------
| 1 | c | World |
---------------------------------------
Upon a select the result would be transformed (in the ORM) back into a row of data from each of the individual values.
---------------------------------------------------
| UniqueId | a | b | c |
---------------------------------------------------
| 1 | 23 | Hello | World |
---------------------------------------------------
I would assume on an update that the best course of action would be to wrap a delete/create in a transaction so the current records would be removed and the new ones inserted.
The definitive list of ItemNames will be maintained in a separate table.
Totally open to more elegant solutions but would like to keep out of the database side if at all possible.
I'm using the declarative_base approach with SQLAlchemy.
Thanks in advance...
Cheers,
Paul
Here is a slightly modified example from documentation to work with such table structure mapped to dictionary in model:
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.orm import relation, sessionmaker
metadata = MetaData()
Base = declarative_base(metadata=metadata, name='Base')
class Item(Base):
__tablename__ = 'Item'
UniqueId = Column(Integer, ForeignKey('ItemSet.UniqueId'),
primary_key=True)
ItemSet = relation('ItemSet')
ItemName = Column(String(10), primary_key=True)
ItemValue = Column(Text) # Use PickleType?
def _create_item(ItemName, ItemValue):
return Item(ItemName=ItemName, ItemValue=ItemValue)
class ItemSet(Base):
__tablename__ = 'ItemSet'
UniqueId = Column(Integer, primary_key=True)
_items = relation(Item,
collection_class=attribute_mapped_collection('ItemName'))
items = association_proxy('_items', 'ItemValue', creator=_create_item)
engine = create_engine('sqlite://', echo=True)
metadata.create_all(engine)
session = sessionmaker(bind=engine)()
data = {"UniqueId": 1, "a": 23, "b": "Hello", "c": "World"}
s = ItemSet(UniqueId=data.pop("UniqueId"))
s.items = data
session.add(s)
session.commit()

Categories

Resources