Django: annotate Count with filter - python

I have "post" objects and a "post like" object with how many likes a post has received by which user:
class Post(models.Model):
text = models.CharField(max_length=500, default ='')
user = models.ForeignKey(User)
class PostLike(models.Model):
user = models.ForeignKey(User)
post = models.ForeignKey(Post)
I can select how many likes a post has received like this:
Post.objects.all().annotate(likes=Count('postlike'))
This roughly translates to:
SELECT p.*,
Count(l.id) AS likes
FROM post p, postlike l
WHERE p.id = l.post_id
GROUP BY (p.id)
It works. Now, how I can filter the Count aggregation by the current user? I'd like to retrieve not all the likes of the post, but all the likes by the logged user. The resulting SQL should be like:
SELECT p.*,
(SELECT COUNT(*) FROM postlike WHERE postlike.user_id = 1 AND postlike.post_id = p.id) AS likes
FROM post p, postlike l
WHERE p.id = l.post_id
GROUP BY (p.id)

Do you know the Count has a filter argument?
Post.objects.annotate(
likes=Count('postlike', filter=Q(postlike__user=logged_in_user))
)

It's not exactly as clean, but you could use Case/When...
posts = Post.objects.all().annotate(likes=models.Count(
models.Case(
models.When(postlike__user_id=user.id, then=1),
default=0,
output_field=models.IntegerField(),
)
))
And of course, you can always drop down to .extra() or even raw SQL when there's something you can't express via the Django ORM.

Try to add filter first:
Post.objects.filter(postlike__user=request.user).annotate(likes=Count('postlike'))
From the docs:
The filter precedes the annotation, so the filter constrains the objects considered when calculating the annotation.

Related

Django Left join how

I fairly new to Django and stuck with creating a left join in Django. I tried so many, but none of them seems to be working:
The query I want to translate to Django is:
select ssc.id
,mgz.Title
,tli.id
,tli.Time
from Subscription ssc
join Person prs
on ssc.PersonID = prs.id
and prs.id = 3
join Magazine mgz
on mgz.id = ssc.MagazineID
and mgz.from <= date.today()
and mgz.until > date.today()
left join TimeLogedIn tli
on tli.SubscriptionID = ssc.id
and tli.DateOnline = date.today()
The model I'm using looks like this:
class Magazine(models.Model):
Title = models.CharField(max_length=100L)
from = models.Datefield()
until = models.Datefield()
Persons = models.ManyToManyField(Person, through='Subscription')
class Person(models.Model):
user = models.OneToOneField(User, on_delete=models.CASCADE)
Magazines = models.ManyToManyField(Magazine, through='Subscription')
class Subscription(models.Model):
MagazineID = models.ForeignKey(Magazine,on_delete=models.CASCADE)
PersonID = models.ForeignKey(Person,on_delete=models.CASCADE)
class TimeLogedIn(models.Model):
SubscriptionID = models.ForeignKey('Subscription', on_delete=models.CASCADE)
DateOnline = models.DateField()
Time = models.DecimalField(max_digits=5, decimal_places=2)
Like I said, tried so many but no succes and now I don't know how to do this in Django ORM , is it even possible? I created already a raw-query and this is working ok, but how to create this in Django ORM?
You can use field lookups lte and gt to filter your objects and then values() method.
You can also querying in the opposite direction and use Q objects for null values:
from django.db.models import Q
Subscription.objects.filter(
PersonID_id=3,
MagazineID__from__lte=date.today(),
MagazineID__until__gt=date.today()
).filter(
Q(TimeLogedIn__DateOnline=date.today()) | Q(TimeLogedIn__DateOnline__isnull=True)
).values("id", "MagazineID__Title", "TimeLogedIn__id", "TimeLogedIn__Time")
OR from TimeLogedIn:
TimeLogedIn.objects.filter(DateOnline=date.today()).filter(
SubscriptionID__MagazineID__from__lte=date.today(),
SubscriptionID__MagazineID__util__gt=date.today()
).values(
"SubscriptionID_id", "SubscriptionID__MagazineID__Title", "id", "Time"
)
Querysets also have the query attribute that contains the sql query to be executed, you can see it like following:
print(TimeLogedIn.objects.filter(...).values(...).query)
Note: Behind the scenes, Django appends "_id" to the field name to create its database column name. Therefore it should be
subscription, instead of SubscriptionID.
You can also use prefetch_related() and select_related() to prevent multiple database hits:
SubscriptionID.objects.filter(...).prefetch_related("TimeLogedIn_set")
SubscriptionID.objects.filter(...).select_related("PersonID")

django - prefetch only the newest record?

I am trying to prefetch only the latest record against the parent record.
my models are as such
class LinkTargets(models.Model):
device_circuit_subnet = models.ForeignKey(DeviceCircuitSubnets, verbose_name="Device", on_delete=models.PROTECT)
interface_index = models.CharField(max_length=100, verbose_name='Interface index (SNMP)', blank=True, null=True)
get_bgp = models.BooleanField(default=False, verbose_name="get BGP Data?")
dashboard = models.BooleanField(default=False, verbose_name="Display on monitoring dashboard?")
class LinkData(models.Model):
link_target = models.ForeignKey(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT)
interface_description = models.CharField(max_length=200, verbose_name='Interface Description', blank=True, null=True)
...
The below query fails with the error
AttributeError: 'LinkData' object has no attribute '_iterable_class'
Query:
link_data = LinkTargets.objects.filter(dashboard=True) \
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.all().order_by('-id')[0]
)
)
I thought about getting LinkData instead and doing a select related but ive no idea how to get only 1 record for each link_target_id
link_data = LinkData.objects.filter(link_target__dashboard=True) \
.select_related('link_target')..?
EDIT:
using rtindru's solution, the pre fetched seems to be empty. there is 6 records in there currently, atest 1 record for each of the 3 LinkTargets
>>> link_data[0]
<LinkTargets: LinkTargets object>
>>> link_data[0].linkdata_set.all()
<QuerySet []>
>>>
The reason is that Prefetch expects a Django Queryset as the queryset parameter and you are giving an instance of an object.
Change your query as follows:
link_data = LinkTargets.objects.filter(dashboard=True) \
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.filter(pk=LinkData.objects.latest('id').pk)
)
)
This does have the unfortunate effect of undoing the purpose of Prefetch to a large degree.
Update
This prefetches exactly one record globally; not the latest LinkData record per LinkTarget.
To prefetch the max LinkData for each LinkTarget you should start at LinkData: you can achieve this as follows:
LinkData.objects.filter(link_target__dashboard=True).values('link_target').annotate(max_id=Max('id'))
This will return a dictionary of {link_target: 12, max_id: 3223}
You can then use this to return the right set of objects; perhaps filter LinkData based on the values of max_id.
That will look something like this:
latest_link_data_pks = LinkData.objects.filter(link_target__dashboard=True).values('link_target').annotate(max_id=Max('id')).values_list('max_id', flat=True)
link_data = LinkTargets.objects.filter(dashboard=True) \
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.filter(pk__in=latest_link_data_pks)
)
)
The following works on PostgreSQL. I understand it won't help OP, but it might be useful to somebody else.
from django.db.models import Count, Prefetch
from .models import LinkTargets, LinkData
link_data_qs = LinkData.objects.order_by(
'link_target__id',
'-id',
).distinct(
'link_target__id',
)
qs = LinkTargets.objects.prefetch_related(
Prefetch(
'linkdata_set',
queryset=link_data_qs,
)
).all()
LinkData.objects.all().order_by('-id')[0] is not a queryset, it is an model object, hence your error.
You could try LinkData.objects.all().order_by('-id')[0:1] which is indeed a QuerySet, but it's not going to work. Given how prefetch_related works, the queryset argument must return a queryset that contains all the LinkData records you need (this is then further filtered, and the items in it joined up with the LinkTarget objects). This queryset only contains one item, so that's no good. (And Django will complain "Cannot filter a query once a slice has been taken" and raise an exception, as it should).
Let's back up. Essentially you are asking an aggregation/annotation question - for each LinkTarget, you want to know the most recent LinkData object, or the 'max' of an 'id' column. The easiest way is to just annotate with the id, and then do a separate query to get all the objects.
So, it would look like this (I've checked with a similar model in my project, so it should work, but the code below may have some typos):
linktargets = (LinkTargets.objects
.filter(dashboard=True)
.annotate(most_recent_linkdata_id=Max('linkdata_set__id'))
# Now, if we need them, lets collect and get the actual objects
linkdata_ids = [t.most_recent_linkdata_id for t in linktargets]
linkdata_objects = LinkData.objects.filter(id__in=linkdata_ids)
# And we can decorate the LinkTarget objects as well if we want:
linkdata_d = {l.id: l for l in linkdata_objects}
for t in linktargets:
if t.most_recent_linkdata_id is not None:
t.most_recent_linkdata = linkdata_d[t.most_recent_linkdata_id]
I have deliberately not made this into a prefetch that masks linkdata_set, because the result is that you have objects that lie to you - the linkdata_set attribute is now missing results. Do you really want to be bitten by that somewhere down the line? Best to make a new attribute that has just the thing you want.
Tricky, but it seems to work:
class ForeignKeyAsOneToOneField(models.OneToOneField):
def __init__(self, to, on_delete, to_field=None, **kwargs):
super().__init__(to, on_delete, to_field=to_field, **kwargs)
self._unique = False
class LinkData(models.Model):
# link_target = models.ForeignKey(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT)
link_target = ForeignKeyAsOneToOneField(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT, related_name='linkdata_helper')
interface_description = models.CharField(max_length=200, verbose_name='Interface Description', blank=True, null=True)
link_data = LinkTargets.objects.filter(dashboard=True) \
.prefetch_related(
Prefetch(
'linkdata_helper',
queryset=LinkData.objects.all().order_by('-id'),
'linkdata'
)
)
# Now you can access linkdata:
link_data[0].linkdata
Ofcourse with this approach you can't use linkdata_helper to get related objects.
This is not a direct answer to you question, but solves the same problem. It is possible annotate newest object with a subquery, which I think is more clear. You also don't have to do stuff like Max("id") to limit the prefetch query.
It makes use of django.db.models.functions.JSONObject (added in Django 3.2) to combine multiple fields:
MainModel.objects.annotate(
last_object=RelatedModel.objects.filter(mainmodel=OuterRef("pk"))
.order_by("-date_created")
.values(
data=JSONObject(
id="id", body="body", date_created="date_created"
)
)[:1]
)

Django ORM for given group by SQL query with aggregation method sum and count

I have below given Django model
class ABC(models.Model):
user = models.ForeignKey(DEF)
name = models.CharField()
phone_num = models.CharField()
date = models.DateTimeField(auto_now=True)
amount = models.IntegerField()
I want to perform below query using Django ORM.
select *, sum(amount), count(date) from ABC group by phone_num;
I tried code below, but it does not work.
ABC.objects.all().annotate(count = Count("phone_num")).order_by("phone_num")
Not sure whether it possible to grub data you mentioned above ( Select *, sum(amount), count( date ) by simple order by, probab;y that's JOIN query, at least you could try variants below and perform some intersection by phone_num on ABC.all():
ABC.objects.values("phone_num").order_by().annotate(count = Count("date"), amount= Sum("amount"))
Notes:
values('phone_num') - for GROUP BY 'phone_num' clause.
order_by() - for exclusion possible default ordering which ( you could remove that order_by().
p.s.
Also try to run query below:
ABC.objects.all().values("phone_num").annotate(count = Count("date"), amount= Sum("amount"))
Update
You could do next loop to grub desired data as Django ORM solution is absent:
data = (dict(o, data=ABC.objects.filter(phone_num=o['phone_num'])[:1][0]) for o in ABC.objects
.values("phone_num")
.order_by()
.annotate(count = Count("date"), amount= Sum("amount")).all())
// know you could access your data in next way:
for item in data:
phone_num = item['phone_num']
count = item['count']
amount = item['amount']
id = item['data'].id
name = item['data'].name
// Do other staff...
Note
data formed with generator expression(comprehension)

Django: order a queryset by the sum of annotated fields?

I'm building a Django site for discussions. Users can participate in discussions and can also cast votes of approval for discussions and messages in the discussions. A simplified data model is as follows:
class Discussion:
name = models.CharField(max_length=255)
class Message:
owner = models.ForeignKey(User, related_name='messages')
body = models.TextField()
discussion = models.ForeignKey(Discussion, related_name='messages')
class MessageApprovalVote:
owner = models.ForeignKey(User, related_name='message_approval_votes')
message = models.ForeignKey(Message, related_name='approval_votes')
class DiscussionApprovalVote:
owner = models.ForeignKey(User, related_name='discussion_approval_votes')
discussion = models.ForeignKey(Discussion, related_name='approval_votes')
I want to select the top 20 "most active" discussions, which means ordering by the sum of the number of messages, total number of message approval votes, and number of discussion approval votes for that discussion, or (in pseudocode):
# Doesn't work
Discussion.objects.
order_by(Count('messages') +
Count('approval_votes') +
Count('messages__approval_votes'))
Using annotations, I can calculate the totals of each of the three scoring factors:
scores = Discussion.objects.annotate(
total_messages=Count('messages', distinct=True),
total_discussion_approval_votes=Count('approval_votes', distinct=True),
total_message_approval_votes=Count('messages__approval_votes', distinct=True))
I then thought I was on to something when I found the extra method:
total_scores = scores.extra(
select={
'score_total': 'total_messages + total_discussion_approval_votes + total_message_approval_votes'
}
)
and would then be able to do:
final_answer = total_scores.order_by('-score_total')[:20]
but the extra call gives a DatabaseError:
DatabaseError: column "total_messages" does not exist
LINE 1: SELECT (total_votes + total_messages + total_persuasions) AS...
and thus I was foiled. Can the extra method not reference annotated fields? Is there another way to do what I'm trying to do, short of using a raw sql query? I'm using Postgres if that makes a difference.
Any insights would be greatly appreciated!
I don't think this is possible in a single top-level SQL query. The score_total value depends on the three aggregate results, but you're asking them all to be calculated at the same time.
In straight SQL, you can do this with a subquery, but I'm not sure how to inject it into Django. After setting up a simple Django app with your models, the following query seems to do the trick against a SQLite database:
SELECT id, name,
total_messages, total_discussion_approval_votes, total_message_approval_votes,
(total_messages +
total_discussion_approval_votes +
total_message_approval_votes) as score_total
FROM
(SELECT
discussion.id,
discussion.name,
COUNT(DISTINCT discussionapprovalvote.id) AS total_discussion_approval_votes,
COUNT(DISTINCT messageapprovalvote.id) AS total_message_approval_votes,
COUNT(DISTINCT message.id) AS total_messages
FROM discussion
LEFT OUTER JOIN discussionapprovalvote
ON (discussion.id = discussionapprovalvote.discussion_id)
LEFT OUTER JOIN message
ON (discussion.id = message.discussion_id)
LEFT OUTER JOIN messageapprovalvote
ON (message.id = messageapprovalvote.message_id)
GROUP BY discussion.id, discussion.name)
ORDER BY score_total DESC
LIMIT 20;
Actually there's a way by using an extra annotate with F expressions:
Discussion.objects.annotate(
total_messages=Count('messages', distinct=True),
total_discussion_approval_votes=Count('approval_votes', distinct=True),
total_message_approval_votes=Count('messages__approval_votes', distinct=True)),
total_score=F('total_messages') + F('total_discussion_approval_votes') + F('total_message_approval_votes')
).order_by('total_score')

Database error: Lookup type 'in' can't be negated

I am using Django framework, appengine database.
My code for model is:
class Group(models.Model):
name = models.CharField(max_length=200)
ispublic = models.BooleanField()
logo = models.CharField(max_length=200)
description = models.CharField(max_length=200)
groupwebsite = models.CharField(max_length=200)
owner = models.ForeignKey('profile')
class Group_members(models.Model):
profile = models.CharField(max_length=200)
group = models.ForeignKey('group')
I am querying on Group_members to remove group. My query is as follows:
groups = Group_members.objects.filter(Q(profile=profile.id),~Q(group__in=group_id)
INFO:
group_id = ['128','52']
group is a foreign key to group model
My problem is when I run this query, it throws Database error: Lookup type 'in' can't be negated.
I have also performed query using __in it worked fine but does not work for foreign key.
Thanks in advance
I think you trying to filter profile id and remove groups in group_id in single filter
groups = Group_members.objects.filter(Q(profile=profile.id),~Q(group__in=group_id)
instead try this:
1)first filter the profiles form group_member :
groups = Group_members.objects.filter(profile=profile.id)
2)remove the groups form Queryset by:
groupId = [x.group.id for x in groups if x.group.id not in group_id]
Hope this will give you perfect result
2 suggestions.
Use ~Q(group__ pk__in=group_id)
Instead of using filter and not in, use exclude and in

Categories

Resources