SQLAlchemy - How to correctly connect two sets of data? - python

I am hoping for some guidance about what I believe is going to be a common pattern in SQLAlchemy for Python. However, I have so far failed to find a simple explanation for someone new to SQLAlchemy.
I have the follow objects:
Customers
Orders
Products
I am building a Python FastAPI application and I want to be able to create customers, and products individually. And subsequently, I want to then be able to create an order for a customer that can contain 1 or more products. A customer will be able to have multiple orders also.
Here are my SQLAlchemy models:
order_products = Table('order_products', Base.metadata,
Column('order_id', ForeignKey('orders.id'), primary_key=True),
Column('product_id', ForeignKey('products.id'), primary_key=True)
)
class Customer(Base):
__tablename__ = "customers"
id = Column(Integer, primary_key=True, index=True)
name = Column(String, index=True)
address = Column(String)
phonenumber = Column(String)
email = Column(String, unique=True, index=True)
is_active = Column(Boolean, default=True)
orders = relationship("Order", back_populates="customers")
class Order(Base):
__tablename__ = "orders"
id = Column(Integer, primary_key=True, index=True)
ordernumber = Column(String, index=True)
customer_id = Column(Integer, ForeignKey("customers.id"))
customers = relationship("Customer", back_populates="orders")
products = relationship("Product", secondary="order_products", back_populates="orders")
class Product(Base):
__tablename__ = "products"
id = Column(Integer, primary_key=True, index=True)
name = Column(String, index=True)
size = Column(Integer)
order_id = Column(Integer, ForeignKey("orders.id"))
orders = relationship("Order", secondary="order_products", back_populates="products")
And here are my CRUD operations:
def create_customer(db: Session, customer: customer.CustomerCreate):
db_customer = models.Customer(name = customer.name, address = customer.address, email=customer.email, phonenumber=customer.phonenumber)
db.add(db_customer)
db.commit()
db.refresh(db_customer)
return db_customer
def create_product(db: Session, product: product.Productreate):
db_product = models.Product(name = product.name, size = product.size)
db.add(db_product)
db.commit()
db.refresh(db_product)
return db_product
def create_order(db: Session, order: order.OrderCreate, cust_id: int):
db_order = models.Order(**order.dict(), customer_id=cust_id)
db.add(db_order)
db.commit()
db.refresh(db_order)
return db_order
def update_order_with_product(db: Session, order: order.Order):
db_order = db.query(models.Order).filter(models.Order.id==1).first()
if db_order is None:
return None
db_product = db.query(models.Order).filter(models.Product.id==1).first()
if db_order is None:
return None
db_order.products.append(db_product)
db.add(db_order)
db.commit()
db.refresh(db_order)
return db_order
All of the CRUD operations work apart from update_order_with_product which gives me this error:
child_impl = child_state.manager[key].impl
KeyError: 'orders'
I'm not sure if I am taking the correct approach to the pattern needed to define the relationships between my models. If not, can someone point me in the right direction of some good examples for a beginner?
If my pattern is valid then there must be an issue with my CRUD operation trying to create the relationships? Can anyone help with that?

This query could be a problem:
db_product = db.query(models.Order).filter(models.Product.id==1).first()
Should probably be:
db_product = db.query(models.Product).filter(models.Product.id==1).first()
because you want to get a Product instance, not Order.
When you update a record you should not add it to the session (because it has been registered to the session when you queried the record).
def update_order_with_product(db: Session, order: order.Order):
db_order = db.query(models.Order).filter(models.Order.id==1).first()
if db_order is None:
return None
db_product = db.query(models.Product).filter(models.Product.id==1).first()
if db_product is None:
return None
db_order.products.append(db_product)
db.commit()
db.refresh(db_order)
return db_order

Related

Best practice for unique constraint validation

Coming from Django and DRF unique key validation is a piece of cake.
I'm trying to understand what is the best practice to this kind of validation.
I have some keys that need to be unique within the db table, but I can't figure out how to do so.
I looked for answer in pydantic's repo and they are say it is a bad practice to have a validator on the schema which queries the database and checks for duplicate.
so, what is the best practice to validate a unique key value?
# crud create
def create(self, db: Session, obj: CreateSchemaType) -> ModelType:
obj_data = jsonable_encoder(obj)
db_obj = self.model(**obj_data)
db.add(db_obj)
db.commit()
db.refresh(db_obj)
return db_obj
# model
class Company(Base):
__tablename__ = "companies"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
name = Column(String, nullable=False, index=True)
business_number = Column(String, nullable=False, index=True)
carrier_license_exp_date = Column(Date)
professional_manager = Column(JSON, nullable=False)
address = Column(JSON)
contact = Column(JSON)
email = Column(String)
fax_number = Column(String)
phone_number = Column(String)
# schema
class CompanyBase(BaseModel):
name: str
business_number: str
professional_manager: ProfessionalManager
carrier_license_exp_date: date | None
contact: Contact | None = None
address: Address | None = None
phone_number: str | None = None
fax_number: str | None = None
email: EmailStr | None = None
# endpoint
#router.post(
"/",
status_code=status.HTTP_201_CREATED,
response_model=schemas.Company,
response_model_exclude_none=True,
)
async def create_company(
company_obj: schemas.CompanyCreate,
db: Session = Depends(deps.get_db),
) -> Any:
company = crud.company.create(db=db, obj=company_obj)
return company
example case:
The values of the keys name and business_number should be unique.
As Gord says, the best practice is to let the database handle those kinds of constraints. That way you can be sure your data is consistent, no matter how it ends up in the database (even with pydantic validation, someone might e.g. want to import a list of companies later and forget to use that specific validation).
Also, since you don't have the entire database in memory, you're gonna have to run a database query either way, so first checking if the data is OK with a select query and then inserting would result in two database calls instead of one, and more code to maintain.
See this part of the SQLAlchemy documentation for examples. In your example, it would be:
class Company(Base):
__tablename__ = "companies"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
name = Column(String, nullable=False, unique=True)
business_number = Column(String, nullable=False, unique=True)
...
Or, if the combination of name and business_number should be unique:
from sqlalchemy import UniqueConstraint
class Company(Base):
__tablename__ = "companies"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
name = Column(String, nullable=False)
business_number = Column(String, nullable=False)
...
__table_args__ = (
UniqueConstraint('name', 'business_number', name='_name_business_number_uc'),
)

How to speed up python and sqlalchemy?

The model in my source code is in the format below.
Array in dict Array in dict Array in dict...
# data structure
user_list = [{user_name: 'A',
email: 'aaa#aaa.com',
items:[{name:'a_itme1', properties:[{1....},{2....}...]}
]} * 100]
I'm trying to put the above data into a postgresql db with SQLAlchemy.
There is a user table, an entity table, and an attribute table.
And there are tables that link users and items, and items and properties respectively.
for u in user_list:
new_user = User(user_name=u.get('user_name'),....)
session.add(new_user)
session.flush()
for item in u.get('items'):
new_item = Item(name=item.get('name'),.....)
session.add(new_item)
session.flush()
new_item_link = UserItemLink(user_id=new_user.id, item_id=new_item.id,...)
session.add(new_item_link)
session.flush()
for prop in item.properties:
new_properties = Properties(name=prop.get('name'),...)
session.add(new_properties)
session.flush()
new_prop_link = ItemPropLink(item_id=new_item.id, prop_id=new_properties.id,...)
session.add(new_prop_link)
session.flush()
session.commit()
My models look like this:
class User(Base):
__tablename__ = 'user'
id = Column(Integer, Identity(always=True, start=1, increment=1, minvalue=1, maxvalue=2147483647, cycle=False, cache=1), primary_key=True)
name = Column(String(20))
email = Column(String(50))
user_item_link = relationship('UserItemLink', back_populates='user')
class Item(Base):
__tablename__ = 'item'
id = Column(Integer, Identity(always=True, start=1, increment=1, minvalue=1, maxvalue=2147483647, cycle=False, cache=1), primary_key=True)
name = Column(String(50))
note = Column(String(50))
user_item_link = relationship('UserItemLink', back_populates='item')
class Properties(Base):
__tablename__ = 'properties'
id = Column(Integer, Identity(always=True, start=1, increment=1, minvalue=1, maxvalue=2147483647, cycle=False, cache=1), primary_key=True)
name = Column(String(50))
value = Column(String(50))
item_prop_link = relationship('ItemPropLink', back_populates='properties')
class UserItemLink(Base):
__tablename__ = 'user_item_link'
id = Column(Integer, Identity(always=True, start=1, increment=1, minvalue=1, maxvalue=2147483647, cycle=False, cache=1), primary_key=True)
user_id = Column(ForeignKey('db.user.id'), nullable=False)
item_id = Column(ForeignKey('db.item.id'), nullable=False)
The above sources have been simplified for better understanding.
When session.add() is performed sequentially with the above information, it takes a lot of time.
When 100 user information is input, there is a delay of 8 seconds or more.
Please advise to improve python speed and sqlalchemy speed.
As you have relationships configured on the models you can compose complex objects using these relationships instead of relying on ids:
with Session() as s, s.begin():
for u in user_list:
user_item_links = []
for item in u.get('items'):
item_prop_links = []
for prop in item['properties']:
item_prop_link = ItemPropLink()
item_prop_link.properties = Properties(name=prop.get('name'), value=prop.get('value'))
item_prop_links.append(item_prop_link)
item = Item(name=item.get('name'), item_prop_link=item_prop_links)
user_item_link = UserItemLink()
user_item_link.item = item
user_item_links.append(user_item_link)
new_user = User(name=u.get('user_name'), email=u.get('email'), user_item_link=user_item_links)
s.add(new_user)
SQLAlchemy will automatically set the foreign keys when the session is flushed at commit time, removing the need to manually flush.

How to obtain data from a table that has been joined

I have two tables items and games.
#app.route('/collection/<username>/<int:page>/<platform>/<path:path>')
def collection(username, page=1, platform='DMG', path=None):
# first I get the user by his username
user = User.query.filter_by(username=username).first()
# then I get all items form the user and related games
items = user.items.join(Game)
# until now it works perfectly fine
# now I would like to obtain all titles from the joined table games
game_titles = items.filter(Game.title).all()
# but unfortunately I get only an empty list
What is missing?
Here my models:
class Game(db.Model):
__tablename__ = 'games'
id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.String(64), index=True)
publisher = db.Column(db.String(32), index=True)
region = db.Column(db.String(3), index=True)
code_platform = db.Column(db.String(3), index=True)
code_identifier = db.Column(db.String(4), index=True)
code_region = db.Column(db.String(3), index=True)
code_revision = db.Column(db.String(1))
code = db.Column(db.String(16), index=True, unique=True)
year = db.Column(db.Integer)
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
items = db.relationship('Item', backref='game', lazy='dynamic')
def __repr__(self):
return '<Game %r>' % (self.title)
class Item(db.Model):
__tablename__ = 'items'
id = db.Column(db.Integer, primary_key=True)
code = db.Column(db.String(8), index=True)
cart = db.Column(db.Boolean)
box = db.Column(db.Boolean)
manual = db.Column(db.Boolean)
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
game_id = db.Column(db.Integer, db.ForeignKey('game.id'))
def __repr__(self):
return '<Collection %r>' % (self.user_id)
You have two options. Using SQLAlchemy ORM:
game_titles = [i.game.title for i in user.items]
To make this more efficient, you can apply the joinedload optimization:
game_titles = [i.game.title for i in user.items.options(joinedload(Item.game))]
Alternatively, you can use SQLAlchemy core if all you care about are the titles (and nothing else):
game_titles = user.items.join(Item.game).with_entities(Game.title).all()
You can even skip fetching the user altogether if you don't care about the user at all:
game_titles = User.query.join(User.items).join(Item.game).filter(User.username == username).with_entities(Game.title).all()
As an aside, .filter and .filter_by correspond to the selection operator in relational algebra, whereas .with_entities and db.session.query(...) correspond to the projection operator, contrary to what you had initially assumed.
Try something like this:
items.join(Game).options(joinedload(Item.game, innerjoin=True))
Essentially, you're joining with Game and explicitly loading it, where the innerjoin forces it to do so only on the games listed in the table you're joining with (items)

SQLAlchemy Inserting Data in a Many-to-Many Relationship with Association Table

I've seen a few questions similar to this but none quite hit the nail on the head. Essentially I have three table models Center(), Business(), and CenterBusiness() in a Flask Application using SQLAlchemy. Currently I'm adding to said relationship in this manner:
biz = Business(typId=form.type.data, name=form.name.data,
contact=form.contact.data, phone=form.phone.data)
db.session.add(biz)
db.session.commit()
assoc = CenterBusiness(bizId=biz.id, cenId=session['center'])
db.session.add(assoc)
db.session.commit()
As you can see that's a bit ugly and I know there is a way to do it in one hit with the relationship as they are defined. I see on SQLAlchemy's docs they have a explanation of working with such a table but I can't seem to get it to work.
#Directly from SQLAlchemy Docs
p = Parent()
a = Association(extra_data="some data")
a.child = Child()
p.children.append(a)
#My Version Using my Tables
center = Center.query.get(session['center']
assoc = CenterBusiness()
assoc.business = Business(typId=form.type.data, name=form.name.data,
contact=form.contact.data, phone=form.phone.data)
center.businesses.append(assoc)
db.session.commit()
Unfortunately, that doesn't seem to be doing the trick... Any help would be greatly appreciated and below I've posted the models involved.
class Center(db.Model):
id = db.Column(MEDIUMINT(8, unsigned=True), primary_key=True,
autoincrement=False)
phone = db.Column(VARCHAR(10), nullable=False)
location = db.Column(VARCHAR(255), nullable=False)
businesses = db.relationship('CenterBusiness', lazy='dynamic')
employees = db.relationship('CenterEmployee', lazy='dynamic')
class Business(db.Model):
id = db.Column(MEDIUMINT(8, unsigned=True), primary_key=True,
autoincrement=True)
typId = db.Column(TINYINT(2, unsigned=True),
db.ForeignKey('biz_type.id',
onupdate='RESTRICT',
ondelete='RESTRICT'),
nullable=False)
type = db.relationship('BizType', backref='businesses',
lazy='subquery')
name = db.Column(VARCHAR(255), nullable=False)
contact = db.Column(VARCHAR(255), nullable=False)
phone = db.Column(VARCHAR(10), nullable=False)
documents = db.relationship('Document', backref='business',
lazy='dynamic')
class CenterBusiness(db.Model):
cenId = db.Column(MEDIUMINT(8, unsigned=True),
db.ForeignKey('center.id',
onupdate='RESTRICT',
ondelete='RESTRICT'),
primary_key=True)
bizId = db.Column(MEDIUMINT(8, unsigned=True),
db.ForeignKey('business.id',
onupdate='RESTRICT',
ondelete='RESTRICT'),
primary_key=True)
info = db.relationship('Business', backref='centers',
lazy='joined')
archived = db.Column(TINYINT(1, unsigned=True), nullable=False,
server_default='0')
I was able to get this working, my problem lied in the following bit of code (error in bold):
#My Version Using my Tables
center = Center.query.get(session['center']
assoc = CenterBusiness()
**assoc.info** = Business(typId=form.type.data, name=form.name.data,
contact=form.contact.data, phone=form.phone.data)
center.businesses.append(assoc)
db.session.commit()
As explained in my comment in the question:
Alright my issue was that I was not using the relationship key "info"
I have in my CenterBusiness model to define the appended association.
I was saying center.business thinking that the term business in that
case was arbitrary. However, I needed to actually reference that
relationship. As such, the appropriate key I had setup already in
CenterBusiness was info.
I will still accept any updates and/or better ways to handle this situation, though I think this is the best route at the time.
below example can help u
more details http://docs.sqlalchemy.org/en/latest/orm/extensions/associationproxy.html
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String(64))
# association proxy of "user_keywords" collection
# to "keyword" attribute
keywords = association_proxy('user_keywords', 'keyword')
def __init__(self, name):
self.name = name
class UserKeyword(Base):
__tablename__ = 'user_keyword'
user_id = Column(Integer, ForeignKey('user.id'), primary_key=True)
keyword_id = Column(Integer, ForeignKey('keyword.id'), primary_key=True)
special_key = Column(String(50))
# bidirectional attribute/collection of "user"/"user_keywords"
user = relationship(User,
backref=backref("user_keywords",
cascade="all, delete-orphan")
)
# reference to the "Keyword" object
keyword = relationship("Keyword")
def __init__(self, keyword=None, user=None, special_key=None):
self.user = user
self.keyword = keyword
self.special_key = special_key
class Keyword(Base):
__tablename__ = 'keyword'
id = Column(Integer, primary_key=True)
keyword = Column('keyword', String(64))
def __init__(self, keyword):
self.keyword = keyword
def __repr__(self):
return 'Keyword(%s)' % repr(self.keyword)

join tables in sqlalchemy to get a many relationship in a query for a flask app

How do i query all of the manys-- 2 tables away from my groupby variable?
My data is structured like this (even though my database allows many-to-many everywhere)
my data is like a bowtie
syntax \......................................../ Whenz
syntax --Clump--Clumpdetail -- Whenz
syntax /..........................................\ Whenz
For each syntax.filename I want all of the Whenz.freq
my model
clump_syntaxs = db.Table('clump_syntaxs',
db.Column('syntax_id', db.Integer, db.ForeignKey('syntax.id')),
db.Column('clump_id', db.Integer, db.ForeignKey('clump.id')),
)
clump_deets = db.Table('clump_deets',
db.Column('clumpdetail_id', db.Integer, db.ForeignKey('clumpdetail.id')),
db.Column('clump_id', db.Integer, db.ForeignKey('clump.id')),
)
when_deets = db.Table('when_deets',
db.Column('clumpdetail_id', db.Integer, db.ForeignKey('clumpdetail.id')),
db.Column('whenz_id', db.Integer, db.ForeignKey('whenz.id')),
)
class Whenz(db.Model):
id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.String(150))
freq = db.Column(db.String(150))
frequency = db.relationship('Clumpdetail', secondary=when_deets,
backref=db.backref('frequency', lazy='dynamic'))
def __repr__(self):
return str(self.title)
class Clumpdetail(db.Model):
id = db.Column(db.Integer, primary_key=True)
whenhour = db.Column(db.Time())
wherevar = db.Column(db.String(150))
cat = db.Column(db.String(150))
clumps = db.relationship('Clump', secondary=clump_deets,
backref=db.backref('deets', lazy='dynamic'))
def __repr__(self):
return str(self.cat)
class Clump(db.Model):
id = db.Column(db.Integer, primary_key=True)
clumpname = db.Column(db.String(150), unique=True)
ordervar = db.Column(db.Integer)
syntaxs = db.relationship('Syntax', secondary=clump_syntaxs,
backref=db.backref('clumps', lazy='dynamic'),order_by="Syntax.position",
collection_class=ordering_list('position'))
def __repr__(self):
return str(self.clumpname)
class Syntax(db.Model):
id = db.Column(db.Integer, primary_key=True)
filename = db.Column(db.String(150), unique=True,)
jobs = db.relationship('Jobs',lazy='dynamic', backref='jobhistory')
position = db.Column(db.Integer)
def __repr__(self):
return str(self.filename)
.... cut.
my attempt of many
joblist=db.session.query((models.Syntax.filename).label('filename'),\
(models.Clump.clumpname).label('clumpname'),\
(models.Clumpdetail.whenhour).label('hour'),\
(models.Clumpdetail.wherevar).label('where'),\
(models.Clumpdetail.cat).label('cat'),\
(models.Whenz.freq).label('freq'))\
.outerjoin((models.Syntax.clumps,models.Clump))\
.outerjoin((models.Clump.deets,models.Clumpdetail))\
.outerjoin((models.Clumpdetail.frequency,models.Whenz))\
.group_by(models.Syntax.filename).all()
I would expect this line
(models.Whenz.freq).label('freq'))\
to give me all of the whenz, but it only gives me one. Why is that?
I am able to get what i want using
models.Syntax.query.filter_by(filename=models.Syntax.query.first()\
.filename).first()\
.clumps.first()\
.deets.first()\
.frequency.all()
but this is just one by one, and i want it by syntax.filename
Thank you
This is because of the group_by. If you group by syntax.filename, you'll only have one row for each unique filename, so you won't get all of the freqs back.
If you want a SQLAlchemy core solution, remove the group_by and group the results yourself in python.
If you want a SQLAlchemy ORM solution, you can use subqueryload:
Syntax.query.options(subqueryload(Syntax.clumps).subqueryload(Clump.deets).subqueryload(Clumpdetail.frequency)).all()
Note: I believe you'll have to remove the lazy="dynamic" on Syntax.clumps, Clump.deets, and Clumpdetail.frequency.

Categories

Resources