Django query api: complex subquery - python

I wasted lots of time trying to compose such query. Here my models:
class User(Dealer):
pass
class Post(models.Model):
text = models.CharField(max_length=500, default='')
date = models.DateTimeField(default=timezone.now)
interactions = models.ManyToManyField(User, through='UserPostInteraction', related_name='post_interaction')
class UserPostInteraction(models.Model):
post = models.ForeignKey(Post, related_name='pppost')
user = models.ForeignKey(User, related_name='uuuuser')
status = models.SmallIntegerField()
DISCARD = -1
VIEWED = 0
LIKED = 1
DISLIKED = 2
And what i need:
Subquery is: (UserPostInteractions where status = LIKED) - (UserPostInteractions where status = DISLIKED) of Post(OuterRef('pk'))
Query is : Select all posts order by value of subquery.
I'm stuck at error Subquery returned multiple rows
Elp!!))

If i understand correctly your needs, you can get what you need with such qs:
from django.db.models import Case, Sum, When, IntegerField
posts = Post.objects.values('id', 'text', 'date').annotate(
rate=Sum(Case(
When(pppost__status=1, then=1),
When(pppost__status=2, then=-1),
default=0,
output_field=IntegerField()
))
).order_by('rate')
In MySql it converts in such sql query:
SELECT
`yourapp_post`.`id`,
`yourapp_post`.`text`,
`yourapp_post`.`date`,
SUM(
CASE
WHEN `yourapp_userpostinteraction`.`status` = 1
THEN 1
WHEN `yourapp_userpostinteraction`.`status` = 2
THEN -1
ELSE 0
END) AS `rate`
FROM `yourapp_post`
LEFT OUTER JOIN `yourapp_userpostinteraction` ON (`yourapp_post`.`id` = `yourapp_userpostinteraction`.`post_id`)
GROUP BY `yourapp_post`.`id`
ORDER BY `rate` ASC

Related

Using annotate and distinct(field) together in Django

I've got a bunch of reviews in my app. Users are able to "like" reviews.
I'm trying to get the most liked reviews. However, there are some popular users on the app, and all their reviews have the most likes. I want to only select one review (ideally the most liked one) per user.
Here are my objects,
class Review(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE, related_name='review_user', db_index=True)
review_text = models.TextField(max_length=5000)
rating = models.SmallIntegerField(
validators=[
MaxValueValidator(10),
MinValueValidator(1),
],
)
date_added = models.DateTimeField(db_index=True)
review_id = models.AutoField(primary_key=True, db_index=True)
class LikeReview(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE, related_name='likereview_user', db_index=True)
review = models.ForeignKey(Review, on_delete=models.CASCADE, related_name='likereview_review', db_index=True)
date_added = models.DateTimeField()
class Meta:
unique_together = [['user', 'review']]
And here's what I currently have to get the most liked reviews:
reviews = Review.objects.filter().annotate(
num_likes=Count('likereview_review')
).order_by('-num_likes').distinct()
As you can see, the reviews I get will be sorted by the most likes, but its possible that the top liked reviews are all by the same user. I want to add distinct('user') here but I get annotate() + distinct(fields) is not implemented.
How can I accomplish this?
This will be a bit badly readable because of your related names. I would suggest to change Review.user.related_name to reviews, it will make this much more understandable, but I've elaborated on that in the second part of the answer.
With your current setup, I managed to do it fully in the DB using subqueries:
from django.db.models import Subquery, OuterRef, Count
# No DB Queries
best_reviews_per_user = Review.objects.all()\
.annotate(num_likes=Count('likereview_review'))\
.order_by('-num_likes')\
.filter(user=OuterRef('id'))
# No DB Queries
review_sq = Subquery(best_reviews_per_user.values('review_id')[:1])
# First DB Query
best_review_ids = User.objects.all()\
.annotate(best_review_id=review_sq)\
.values_list('best_review_id', flat=True)
# Second DB Query
best_reviews = Review.objects.all()\
.annotate(num_likes=Count('likereview_review'))\
.order_by('-num_likes')\
.filter(review_id__in=best_review_ids)\
.exclude(num_likes=0) # I assume this is the case
# Print it
for review in best_reviews:
print(review, review.num_likes, review.user)
# Test it
assert len({review.user for review in best_reviews}) == len(best_reviews)
assert sorted([r.num_likes for r in best_reviews], reverse=True) == [r.num_likes for r in best_reviews]
assert all([r.num_likes for r in best_reviews])
Let's try with this completely equivalent model structure:
from django.db import models
from django.utils import timezone
class TimestampedModel(models.Model):
"""This makes your life much easier and is pretty DRY"""
created = models.DateTimeField(default=timezone.now)
class Meta:
abstract = True
class Review(TimestampedModel):
user = models.ForeignKey(User, on_delete=models.CASCADE, related_name='reviews', db_index=True)
text = models.TextField(max_length=5000)
rating = models.SmallIntegerField()
likes = models.ManyToManyField(User, through='ReviewLike')
class ReviewLike(TimestampedModel):
user = models.ForeignKey(User, on_delete=models.CASCADE, db_index=True)
review = models.ForeignKey(Review, on_delete=models.CASCADE, db_index=True)
The likes are a clear m2m relationship between reviews and users, with an extra timestamp column - it's a model use for a Through model. Docs here.
Now everything is imho much much easier to read.
from django.db.models import OuterRef, Count, Subquery
# No DB Queries
best_reviews = Review.objects.all()\
.annotate(like_count=Count('likes'))\
.exclude(like_count=0)\
.order_by('-like_count')\
# No DB Queries
sq = Subquery(best_reviews.filter(user=OuterRef('id')).values('id')[:1])
# First DB Query
user_distinct_best_review_ids = User.objects.all()\
.annotate(best_review=sq)\
.values_list('best_review', flat=True)
# Second DB Query
best_reviews = best_reviews.filter(id__in=user_distinct_best_review_ids).all()
One way of doing it is as follows:
Get a list of tuples that represent the user.id and review.id, ordered by user and number of likes ASCENDING
Convert the list to a dict to remove duplicate user.ids. Later items replace earlier ones, which is why the ordering in step 1 is important
Create a list of review.ids from the values in the dict
Get a queryset using the list of review.ids, ordered by the number of likes DESCENDING
from django.db.models import Count
user_review_list = Review.objects\
.annotate(num_likes=Count('likereview_review'))\
.order_by('user', 'num_likes')\
.values_list('user', 'pk')
user_review_dict = dict(user_review_list)
review_pk_list = list(user_review_dict.values())
reviews = Review.objects\
.annotate(num_likes=Count('likereview_review'))\
.filter(pk__in=review_pk_list)\
.order_by('-num_likes')

Django use LEFT JOIN instead of INNER JOIN

I have two models: Comments and CommentFlags
class Comments(models.Model):
content_type = models.ForeignKey(ContentType,
verbose_name=_('content type'),
related_name="content_type_set_for_%(class)s",
on_delete=models.CASCADE)
object_pk = models.CharField(_('object ID'), db_index=True, max_length=64)
content_object = GenericForeignKey(ct_field="content_type", fk_field="object_pk")
submit_date = models.DateTimeField(_('date/time submitted'), default=None, db_index=True)
...
...
class CommentFlags(models.Model):
user = models.ForeignKey(settings.AUTH_USER_MODEL, related_name="comment_flags",
on_delete=models.CASCADE)
comment = models.ForeignKey(Comment, related_name="flags", on_delete=models.CASCADE)
flag = models.CharField(max_length=30, db_index=True)
...
...
CommentFlags flag can have values: like, dislike etc.
Problem Statement: I want to get all Comments sorted by number of likes in DESC manner.
Raw Query for above problem statement:
SELECT
cmnts.*, coalesce(cmnt_flgs.num_like, 0) as num_like
FROM
comments cmnts
LEFT JOIN
(
SELECT
comment_id, Count(comment_id) AS num_like
FROM
comment_flags
WHERE
flag='like'
GROUP BY comment_id
) cmnt_flgs
ON
cmnt_flgs.comment_id = cmnts.id
ORDER BY
num_like DESC
I have not been able to convert the above query in Django ORM Queryset.
What I have tried so far...
>>> qs = (Comment.objects.filter(flags__flag='like').values('flags__comment_id')
.annotate(num_likes=Count('flags__comment_id')))
which generates different query.
>>> print(qs.query)
>>> SELECT "comment_flags"."comment_id",
COUNT("comment_flags"."comment_id") AS "num_likes"
FROM "comments"
INNER JOIN "comment_flags"
ON ("comments"."id" = "comment_flags"."comment_id")
WHERE "comment_flags"."flag" = 'like'
GROUP BY "comment_flags"."comment_id",
"comments"."submit_date"
ORDER BY "comments"."submit_date" ASC
LIMIT 21
Problem with above ORM queryset is, it uses InnerJoin and also I don't know how it adds submit_date in groupby clause.
Can you please suggest me a way to convert above mentioned Raw query to Django ORM queryset ?
You can try using filter argument in Count:
qs = (Comment.objects.all()
.annotate(num_likes=Count('flags__comment_id', filter=Q(flags__flag='like'))))
It may produce slightly different query that you're expecting, depending on the database backend, but it should have equivalent behavior.

Is there a way to filter many-to-many filters in Django using fields from the intermediate table?

I have 3 models in Django.
Group, Membership and User.
class Group(models.Model):
name = models.CharField(max_length=32)
permissions = JSONField(max_length=4096, default=list)
class Membership(models.Model):
user = models.ForeignKey('User', on_delete=models.CASCADE, related_name='memberships')
group = models.ForeignKey(Group, on_delete=models.CASCADE, related_name='memberships')
expires_at = models.DateTimeField(null=True)
valid = models.BooleanField(default=True)
class User(models.Model):
groups = models.ManyToManyField(Group, through=Membership)
last_seen = models.DateTimeField(null=True)
created_at = models.DateTimeField(auto_now=True)
I was wondering how I could "filter" the many-to-many on user to only retrieve group objects from memberships where expires_at is either greater than now or null. Thank you!
I believe you are looking for expires at less than now, but I have written the query for expires at greater than now as per your request
Use Q for querying on "OR". So your filter query will be
from django.db.models import Q
Q(expires_at__gt=now) | Q(expires_at__isnull=True)
You want to filter memberships for a certain user. So your query boils down to
Membership.objects.filter(user=user).filter(Q(expires_at__gt=now) | Q(expires_at__isnull=True))
Get all group ids for your query
Membership.objects.filter(user=user).filter(Q(expires_at__gt=now) | Q(expires_at__isnull=True)).values_list('group_id', flat=True)
Get all relevant groups
queryset = Membership.objects.filter(user=user).filter(Q(expires_at__gt=now) | Q(expires_at__isnull=True)).values_list('group_id', flat=True)
result = Group.objects.filter(id__in=queryset)
Though I think there must be a better solution for step 4, but this is fine too.
If you are just looking for group names, then this is enough
result = Membership.objects.filter(user=user).filter(Q(expires_at__gt=now) | Q(expires_at__isnull=True)).values_list('group__name', flat=True)

rawsql equivalent django queryset

I would like to write django queryset which is equivalent of below query with one hit in db. Right now I am using manager.raw() to execute.
With annotate, I can generate the inner query. But I can't use that in the filter condition (when I checked queryset.query, it looks like ex1).
select *
from table1
where (company_id, year) in (select company_id, max(year) year
from table1
where company_id=3
and total_employees is not null
group by company_id);
Ex1:
SELECT `table1`.`company_id`, `table1`.`total_employees`
FROM `table1`
WHERE `table1`.`id` = (SELECT U0.`company_id` AS Col1, MAX(U0.`year`) AS `year`
FROM `table1` U0
WHERE NOT (U0.`total_employees` IS NULL)
GROUP BY U0.`company_id`
ORDER BY NULL)
Model:
class Table1(models.Model):
year = models.IntegerField(null=False, validators=[validate_not_null])
total_employees = models.FloatField(null=True, blank=True)
company = models.ForeignKey('Company', on_delete=models.CASCADE, related_name='dummy_relation')
last_modified = models.DateTimeField(auto_now=True)
updated_by = models.CharField(max_length=100, null=False, default="research")
class Meta:
unique_together = ('company', 'year',)
I appreciate your response.
You can use OuterRef and Subquery to achive it. Try like this:
newest = Table1.objects.filter(company=OuterRef('pk'), total_employees_isnull=False).order_by('-year')
companies = Company.objects.annotate(total_employees=Subquery(newest.values('total_employees')[:1])).annotate(max_year=Subquery(newest.values('year')[:1]))
# these queries will not execute until you call companies. So DB gets hit once
Show values:
# all values
companies.values('id', 'total_employees', 'max_year')
# company three values
company_three_values = companies.filter(id=3).values('id', 'total_employees', 'max_year')
Filter on Max Year:
companies_max = companies.filter(max_year__gte=2018)
FYI: OuterRef and Subquery is available in Django from version 1.11
if you have model name is Table1, try this.
Table1.objects.get(pk=Table1.objects.filter(company_id=3, total_employees_isnull=False).latest('year').first().id)
This maybe one hit in db.
But if .first() not match anything. Better like this:
filter_item = Table1.objects.filter(company_id=3, total_employees_isnull=False).latest('year').first()
if filter_item:
return Table1.objects.get(pk=filter_item.id)

Erroneous group_by query generated in python django

I am using Django==1.8.7 and I have the following models
# a model in users.py
class User(models.Model):
id = models.AutoField(primary_key=True)
username = models.CharField(max_length=100, blank=True)
displayname = models.CharField(max_length=100, blank=True)
# other fields deleted
# a model in healthrepo.py
class Report(models.Model):
id = models.AutoField(primary_key=True)
uploaded_by = models.ForeignKey(User, related_name='uploads',
db_index=True)
owner = models.ForeignKey(User, related_name='reports', db_index=True)
# other fields like dateofreport, deleted
I use the following Django queryset:
Report.objects.filter(owner__id=1).values('uploaded_by__username',
'uploaded_by__displayname').annotate(
total=Count('uploaded_by__username')
)
I see that this generates the following query:
SELECT T3."username", T3."displayname", COUNT(T3."username") AS "total" FROM "healthrepo_report"
INNER JOIN "users_user" T3 ON ( "healthrepo_report"."uploaded_by_id" = T3."id" )
WHERE "healthrepo_report”.”owner_id" = 1
GROUP BY T3."username", T3."displayname", "healthrepo_report"."dateofreport", "healthrepo_report”.”owner_id", "healthrepo_report"."uploaded_by_id"
ORDER BY "healthrepo_report"."dateofreport" DESC, "healthrepo_report"."user_id" ASC, "healthrepo_report"."uploaded_by_id" ASC
However, what I really wanted was just grouping based on "healthrepo_report”.”owner_id" and not multiple fields. i.e. What I wanted was:
SELECT T3."username", T3."displayname", COUNT(T3."username") AS "total" FROM "healthrepo_report"
INNER JOIN "users_user" T3 ON ( "healthrepo_report"."uploaded_by_id" = T3."id" )
WHERE "healthrepo_report”.”owner_id" = 1
GROUP BY T3."username", T3."displayname" ORDER BY "healthrepo_report"."dateofreport" DESC, "healthrepo_report"."user_id" ASC, "healthrepo_report"."uploaded_by_id" ASC
I am wondering why this is happening and how do I get grouping based on single column.
I just saw this post:
Django annotate and values(): extra field in 'group by' causes unexpected results
Changing the query by adding empty order_by() fixes it
Report.objects.filter(owner__id=1).values('uploaded_by__username',
'uploaded_by__displayname').annotate(
total=Count('uploaded_by__username')
).order_by()

Categories

Resources