How to make an Inner Join in django without foreign key? - python

how to make statement of join on in django 1.11,
i want to create this statement :
select t1.name ,t2.str, t2.num
from table_1 as t1
join table_2 as t2 on t2.product_id = t1.id and t2.section_num = 2;
the models:
class t1(UTModelTS):
alt_keys = product_alt_keys
name = utCharField()
...
class t2(UTModelTS):
alt_keys= [('pr_id', 'section')]
str = utCharField()
num = models.IntegerField()
...
i tried
t1 = t1.objects.filter(**params).exclude(**exclude)
t1 = t1.select_related('t2')
`
but this make no sense since acoridng to django doc :
select_related
Returns a QuerySet that will “follow” foreign-key relationships...
from https://docs.djangoproject.com/en/2.2/ref/models/querysets/ .

No, there isn't an effective / elegant way unfortunately.
although you can use .raw()/RawSQL() method for this exact thing. Even if it could it probably would be a lot slower than raw SQL.
https://docs.djangoproject.com/en/2.2/topics/db/sql/

You should not add on statement. Django's ORM will performs inner join for you automatically.
Imagine this is our models. User Table and Post table.
class User(models.Model):
name = models.CharField(max_length=30)
surname = models.CharField(max_length=50)
class Post(models.Model):
title = models.CharField(max_length=50)
text = models.TextField()
user = models.ForeignKey(to='User', on_delete=models.CASCADE) # this is FK field to users
Usage
qs = Post.objects.select_related('user') # This will performs SQL INNER JOIN.
print(qs.query) # use query attribute to show what query is performed.
This will be generated SQL query
SELECT "myapp_post"."id", "myapp_post"."title", "myapp_post"."text", "myapp_post"."user_id", "myapp_user"."id", "myapp_user"."name", "myapp_user"."surname" FROM "myapp_post" INNER JOIN "myapp_user" ON ("myapp_post"."user_id" = "myapp_user"."id")
Look your models,,,
class t1(UTModelTS):
alt_keys = product_alt_keys
name = utCharField()
t2 = models.ForeignKey(to='t2', on_delete=models.CASCADE)
class t2(UTModelTS):
alt_keys= [('pr_id', 'section')]
str = utCharField()
num = models.IntegerField()
add t2 FK to your t1 Model.
Querying
qs = t1.objects.select_related('t2')
OR
qs = t1.objects.select_related('t2').filter(**lookup_kwargs).
select_related() returns QuerySet object, you can use QuerySet methods after select_related().

Related

Django perform a join on multiple tables

I have the following tables:
class A:
field_1 = models.CharField()
field_2 = models.IntegerField()
class B:
a = models.ForeignKey(A, related_name='table_b')
some_other_field = models.CharField()
class C:
b = models.ForeignKey(B, related_name="table_c")
other_field = models.CharField()
Let's assume ids are provided for objects on table A, I need to get all the C objects that are related to table A through table B. I have the following query, which gives me what I need but I am wondering if there is a better way to do this, I was reading into prefetch_related and select_related but can't wrap my head around on how to use them so far:
c_list = C.objects.filter(b__in=B.objects.filter(a__pk__in=table_a_ids))
Also, I would like to group them by other_field.
No need for .select_related(…) or .prefetch_related(…). You can filter with:
c_list = C.objects.filter(b__a_id__in=table_a_ids)

How to execute two update statements in one transaction so they won't run into unique constraint in Django ORM?

Given models
from django.db import models
class RelatedTo(models.Model):
pass
class Thing(models.Model):
n = models.IntegerField()
related_to = models.ForeignKey(RelatedTo, on_delete=models.CASCADE)
class Meta:
constraints = [
models.UniqueConstraint(
fields=['n', 'related_to'],
name='unique_n_per_related_to'
)
]
and
>>> r = RelatedTo.objects.create()
>>> thing_zero = Thing.objects.create(related_to=r, n=0)
>>> thing_one = Thing.objects.create(related_to=r, n=1)
I want to switch their numbers (n).
In update method of my serializer (drf) I was trying to
#transaction.atomic
def update(self, instance, validated_data):
old_n = instance.n
new_n = validated_data['n']
Thing.objects.filter(
related_to=instance.related_to,
n=new_n
).update(n=old_n)
return super().update(instance, validated_data)
but it still runs into constraint.
select_for_update doesn't help either.
Is it possible not to run into this DB constraint using Django ORM or do I have to run raw sql to achieve that?
Django==3.1.2
postgres:12.5
Error
duplicate key value violates unique constraint "unique_n_per_related_to"
DETAIL: Key (n, related_to)=(1, 1) already exists.
I wasn't able to resolve this issue neither with bulk_update nor with raw sql.
stmt = f"""
update {to_update._meta.db_table} as t
set n = i.n
from (values
('{to_update.id}'::uuid, {n}),
('{method.id}'::uuid, {n})
) as i(id, n)
where t.id = i.id
"""
with connection.cursor() as cur:
cur.execute(stmt)
The only solution for this problem is making the column nullable and write 3 times to the table which physically hurts.

How do you count the number of self-referential relationships (that fit certain criteria) in a combined query using SQLAlchemy?

I have models that look like this:
class User(db.Model):
id = db.Column(UUID_TYPE, primary_key=True)
class Post(db.Model):
id = db.Column(UUID_TYPE, primary_key=True)
private = db.Column(db.Boolean)
class PostSubscription(db.Model):
user_id = db.Column(UUID_TYPE, db.ForeignKey("users.id"))
user = db.relationship(
"User",
backref=db.backref("subscriptions", cascade="all, delete-orphan")
)
post_id = db.Column(UUID_TYPE, db.ForeignKey("posts.id"))
post = db.relationship(
"Post",
foreign_keys=[post_id],
lazy="joined",
backref=db.backref("subscriptions", cascade="all, delete-orphan"),
)
liked = db.Column(db.Boolean)
I have a query that fetches all of the private PostSubscriptions which a user has access to, e.g.
query = (db.session.query(PostSubscription)
.filter(User.id == user_id, Post.private)
.join(PostSubscription.post)
)
I also want to fetch the total number of likes for the Post in the same query (to avoid the N+1 problem). My issue is that these likes are themselves stored in PostSubscription, which is the main thing I'm fetching in the first place. So the whole mess becomes a bit self-referential / circular.
My initial attempt involved adding a property to the Post model, but as mentioned above, this solution suffers from the N+1 problem, e.g.
#property
def likes(self):
return db.session.query(PostSubscription).filter_by(
post_id=self.id,
liked=True
).count()
After a bit of research, I've realized that what I probably need is to use func.count combined with case, but I can't figure out how I do this in my particular use case. What I want to do is this:
query = (
db.session.query(
PostSubscription,
func.count(case([Post.subscriptions.liked, Post.id]))
)
.filter(User.id == user_id, Post.private)
.join(PostSubscription.post)
)
But that obviously doesn't work because I can't reference a relationship like that. The two tables are already joined, I just don't know how to only get the subscriptions for the Post I already have (that is linked to the PostSubscriptions I am fetching).
Any thoughts?
You could try the following SQLAlchemy query:
from sqlalchemy.orm import aliased, contains_eager
u = 1
PS1 = aliased(PostSubscription)
PS2 = aliased(PostSubscription)
query = db.session.query(PS1, func.count(PS2.liked))\
.join(Post, PS1.post_id == Post.id)\
.join(PS2, PS2.post_id == PS1.post_id)\
.options(contains_eager(PS1.post))\
.filter(PS1.user_id == u, Post.private)\
.group_by(PS1.id)
Which should give the following SQL:
SELECT post.id AS post_id,
post.private AS post_private,
postsubscription_1.id AS postsubscription_1_id,
postsubscription_1.user_id AS postsubscription_1_user_id,
postsubscription_1.post_id AS postsubscription_1_post_id,
postsubscription_1.liked AS postsubscription_1_liked,
Count(postsubscription_2.liked) AS count_1
FROM postsubscription AS postsubscription_1
JOIN post ON postsubscription_1.post_id = post.id
JOIN postsubscription AS postsubscription_2 ON postsubscription_2.post_id = postsubscription_1.post_id
WHERE postsubscription_1.user_id = 1
AND post.private = 1
GROUP BY postsubscription_1.id
Which you could then for example execute and print like this:
result = query.all()
for postsubscription, likes in result:
print(postsubscription, likes)

How would I do these multiple joins as a Django queryset?

I have this query that joins multiple tables together:
select
p.player_id
, d.player_data_1
, l.year
, l.league
, s.stat_1
, l.stat_1_league_average
from
stats s
inner join players p on p.player_id = s.player_id
left join player_data d on d.other_player_id = p.other_player_id
left join league_averages as l on l.year = s.year and l.league = s.year
where
p.player_id = 123
My models look like this:
class Stats(models.Model):
player_id = models.ForeignKey(Player)
stat_1 = models.IntegerField()
year = models.IntegerField()
league = models.IntegerField()
class Player(models.Model):
player_id = models.IntegerField(primary_key=True)
other_player_id = models.ForeignKey(PlayerData)
class PlayerData(models.Model):
other_player_id = models.IntegerField(primary_key=True)
player_data_1 = models.TextField()
class LeagueAverages(models.Model):
year = models.IntegerField()
league = models.IntegerField()
stat_1_league_average = models.DecimalField()
I can do something like this:
Stats.objects.filter(player_id=123).select_related('player')
to do the first join. For the second join, I tried:
Stats.objects.filter(player_id=123).select_related('player').select_related('player_data')
but I got this error:
django.core.exceptions.FieldError: Invalid field name(s) given in select_related: 'player_data'. Choices are: player
How would I do the third join considering that year and league aren't foreign keys in any of the tables? Thanks!
select_related(*fields) Returns a QuerySet that will “follow” foreign-key relationships, [...]
According to the django documentation select_related follows foreign-key relationships. player_data is neighter a foreign key, nor even an field of Stats. If you'd want to INNER join PlayerData and Player you could follow its foreign-keys. In your case use the
double-underscore to get to PlayerData:
Stats.objects.all()
.select_related('player_id')
.select_related('player_id__other_player_id')
As for joining LeagueAverages: There is not a way to join models without an appropriate foreign key, but to use raw sql. Have a look at a related question: Django JOIN query without foreign key. By using .raw(), your LEFT join (which by the way is also not that easy without using raw: Django Custom Left Outer Join) could also be taken care of.
Quick notes about your models:
Each model by default has an automatically incrementing primary key that can be accessed via .id or .pk. So there is no need to add for example player_id
A models.ForeignKey field references an object not it's id. Therefore it's more intuitive to rename for example player_id to player. If you name your field player django allows you automatically to access it's id via player_id

SQLAlchemy Relationship / Hybrid Property to specific instance of One-To-Many

I'm trying to create a hybrid property or a relationship (either works) to pick out a single model from the "Many" side of a One-To-Many relationship.
The accepted answer for How to set one to many and one to one relationship at same time in Flask-SQLAlchemy? doesn't work for me, as I need an expression-level construct to use in additional queries.
Relevant model details are as follows:
class ItemIdentifierType(db.Model):
id = db.Column(db.Integer, primary_key=True)
code = db.Column(db.String(12))
priority = db.Column(db.Integer)
class ItemIdentifier(db.Model):
id = db.Column(db.String(8), primary_key=True)
type_id = db.Column(db.ForeignKey('item_identifier_type.id')
type = relationship('ItemIdentifierType')
item_id = db.Column(db.ForeignKey('item.id'))
item = db.relationship('Item', back_populates='identifiers')
class Item(db.Model):
id = db.Column(db.String(8), primary_key=True)
name = db.Column(db.String(40))
identifiers = db.relationship('ItemIdentifier', back_populates='instrument', lazy='dynamic')
#hybrid_property
def primary_identifier(self):
return sorted(self.identifiers, key=lambda x: x.type.priority)[0]
#primary_identifier.expression:
def primary_identifier(cls):
primary_identifiers = select([
ItemIdentifier.item_id,
ItemIdentifierType.code,
ItemIdentifier.value
]).select_from(join(ItemIdentifier, ItemIdentifierType,
ItemIdentifier.type_id == ItemIdentifierType.id))\
.order_by(ItemIdentifier.item_id,
ItemIdentifierType.priority.asc())\
.distinct(ItemIdentifier.item_id)\
.alias()
# <<< psycopg2 throws the error shown below >>>
return select([ItemIdentifierType.code, ItemIdentifier.value])\
.select_from(primary_identifiers)\
.where(primary_identifiers.c.item_id == self.id)
Error this throws when attempting to use the sql expression:
(psycopg2.ProgrammingError) subquery in FROM must have an alias
LINE 2: FROM (SELECT item_identifier_type.code AS code, instru...
^
HINT: For example, FROM (SELECT ...) [AS] foo.
[SQL: 'SELECT code AS code, value AS value
FROM (SELECT item_identifier_type.code AS code, item_identifier.value AS value
FROM item_identifier_type, item_identifier, (SELECT DISTINCT item_identifier.item_id AS item_id, item_identifier.id AS id
FROM item_identifier JOIN item_identifier_type ON item_identifier.type_id = item_identifier_type.id ORDER BY item_identifier.item_id, item_identifier_type.priority ASC, item_identifier.id) AS primary_identifiers, item
WHERE primary_identifiers.item_id = item.id) ORDER BY item.name ASC']
The following query pulls out what I'm after, no problem:
SELECT
DISTINCT ON (item_identifier.item_id)
item_identifier.item_id,
item_identifier_type.code,
item_identifier.value
FROM item_identifier
JOIN item_identifier_type
ON item_identifier.type_id = item_identifier_type.id
ORDER BY
item_identifier.item_id,
item_identifier_type.priority ASC;

Categories

Resources