Django annotate() and count() conditional from 0 to null - python

I am new to django and django-rest-framework and I am wondering if it is possible to put conditional if else on annotate(total_sessions=Count()) where if Count() = 0 then total_sessions="null"? I have a project where I need to make that if the total sessions is equal to 0 the output must be null.
I though of using SerializerMethodField() on total_sessions to get the count but this cause multiple SQL queries causing slow API response that is why it is out of the question.
Sample code below for serializers.py and views.py(this is only a sample code as my real codes have multiple querysets).
serializers.py
class ClassStatisticsSerializer(ModelBaseSerializer):
total_sessions = serializers.IntegerField()
class Meta:
model = Class
fields = (
'id',
'batch',
'type,
'total_sessions'
)
views.py
from django.db.models import Count, Q
class ClassStatisticsList(APIView):
condition = {}
if date:
condition = {'classactivity__date':date}
queryset = Class.objects.all().annotate(
total_sessions=Count(
'classactivity',
filter=Q(**condition),
distinct=True
)
)

You are looking for Conditional Expressions that are documented nicely on the Django site.
I haven't tried the following snippet (it's just your code augmented), it's just to give you a starting point:
queryset = Class.objects.all()
.annotate(
total_session_cnt=Count(
'classactivity',
filter=Q(**condition),
distinct=True
)
)
.annotate(
total_sessions=Case(
When(total_session_count=0, then=Value(None, output_field=IntegerField())),
default='total_session_count'
)
)

Related

What are the options to get filter on union querysets behavior with Django?

Basically the problem I have: I need an option or alternative approach to filter on annotated fields on union queryset.
I have the following simplified models setup:
class Course(Model):
groups = ManyToManyField(through=CourseAssignment)
class CourseAssignment(Model):
course = ForeignKey(Course)
group = ForeignKey(Group)
teacher = ForeignKey(Teacher)
class Lesson(Model):
course = ForeignKey(Course, related_name='lessons')
class AssignmentProgress(Model):
lesson = ForeignKey(related_name='progresses')
course_assignment = ForeignKey(CourseAssignment)
student = ForeignKey(Student)
group = ForeignKey(Group)
status = CharField(choices=(
('on_check', 'On check'),
('complete', 'Complete'),
('assigned', 'Assigned'),
))
deadline = DateTimeField()
checked_date = DateTimeField()
I need to display a statistics on assignment progresses grouped by lessons and groups for which courses assigned. Here is a my initial queryset, note that lessons are repeated in final result, the difference is in annotated data:
def annotated_lessons_queryset():
lessons = None
for course_assignment in CourseAssignment.objects.all():
qs = Lesson.objects.filter(
course=course_assignment.course
).annotate(
completed_progresses=Count(
'progresses',
filter=Q(group=course_assignment.group),
output_field=IntegerField()
),
on_check=Exists(
AssignmentProgress.objects.filter(
lesson=OuterRef('id'), group=course_assignment.group, status='on_check'
)
)
)
lessons = qs if lessons is None else lessons.union(qs)
return lessons
I canon use | OR operator here, because it returns only distinct lesson values.
So far this works until I try filter all the lessons with annotated status on_check:
qs = annotated_lessons_queryset().filter(on_check=True)
Which fails with the error:
raise NotSupportedError(
django.db.utils.NotSupportedError: Calling QuerySet.filter() after union() is not supported.
Please, suggest a workaround or another approach to make this queryset filtered.
I haven't pulled this in and tried it out yet, but as the error message states you have to use union() last. This is a bit complicated as "Lessons can be repeated" in this queryset. So I would suggest using a list comprehension to get what you need out.
qs = annotated_lessons_queryset()
filtered = [lesson for lesson in qs if lesson.on_check]

how to use a queryset expression in a then clause in conditional annotation in Django

i have two models, one of organizations and one with the membership and rol of an user in the organization
class Organization(models.Model):
name = models.CharField(blank=False,null=False,max_length=100, unique=True)
class Member(models.Model):
user_request = models.ForeignKey('accounts.User',on_delete=models.CASCADE,related_name="member_user_request")
user_second = models.ForeignKey('accounts.User',on_delete=models.CASCADE,blank=True,null=True, related_name="member_user_second")
role = models.ForeignKey(RoleOrganization,on_delete=models.CASCADE, verbose_name=_('Rol'))
status = models.ForeignKey(Status,on_delete=models.CASCADE, verbose_name=_('Status'))
organization = models.ForeignKey(Organization,on_delete=models.CASCADE, verbose_name=_('Organization'))
and im trying to use a annotate with case clause where i want to get the role of an user in the organization with this expression:
my_organizations = Member.objects.filter(
Q(user_request_id=self.request.user.id, status__name="accepted", type_request__name="request") |
Q(user_second_id=self.request.user.id, status__name="accepted", type_request__name="invitation")
)
Organization.objects.annotate(
rol=Case(
When(id__in=list(my_organizations.values_list('organization_id', flat=True)),
then=Value(my_organizations.get(organization_id=F('id')).role.name)),
default=None, output_field=CharField()
)
)
the problem here is that the then expression doesn't get the id of the object in the main queryset, if i return in the then just the F('id') the expression gets the value of the id in the main queryset, but i can use a filter or any queryset expression with some values of the main object.
its there a way to accomplish this.
PS: im just putting part of the code here for cleanliness, but if you need to know more please let me know
I think you can do it like this using Subquery:
from django.db.models import OuterRef, Subquery
members = Member.objects.filter(
Q(user_request_id=self.request.user.id, status__name="accepted", type_request__name="request") |
Q(user_second_id=self.request.user.id, status__name="accepted", type_request__name="invitation")
)
member_subquery = members.filter(organization=OuterRef('pk'))
organizations = Organization.objects.annotate(member_role=Subquery(member_subquery.values('role')[:1]))
print(organizations.values('member_role'))

django - prefetch only the newest record?

I am trying to prefetch only the latest record against the parent record.
my models are as such
class LinkTargets(models.Model):
device_circuit_subnet = models.ForeignKey(DeviceCircuitSubnets, verbose_name="Device", on_delete=models.PROTECT)
interface_index = models.CharField(max_length=100, verbose_name='Interface index (SNMP)', blank=True, null=True)
get_bgp = models.BooleanField(default=False, verbose_name="get BGP Data?")
dashboard = models.BooleanField(default=False, verbose_name="Display on monitoring dashboard?")
class LinkData(models.Model):
link_target = models.ForeignKey(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT)
interface_description = models.CharField(max_length=200, verbose_name='Interface Description', blank=True, null=True)
...
The below query fails with the error
AttributeError: 'LinkData' object has no attribute '_iterable_class'
Query:
link_data = LinkTargets.objects.filter(dashboard=True) \
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.all().order_by('-id')[0]
)
)
I thought about getting LinkData instead and doing a select related but ive no idea how to get only 1 record for each link_target_id
link_data = LinkData.objects.filter(link_target__dashboard=True) \
.select_related('link_target')..?
EDIT:
using rtindru's solution, the pre fetched seems to be empty. there is 6 records in there currently, atest 1 record for each of the 3 LinkTargets
>>> link_data[0]
<LinkTargets: LinkTargets object>
>>> link_data[0].linkdata_set.all()
<QuerySet []>
>>>
The reason is that Prefetch expects a Django Queryset as the queryset parameter and you are giving an instance of an object.
Change your query as follows:
link_data = LinkTargets.objects.filter(dashboard=True) \
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.filter(pk=LinkData.objects.latest('id').pk)
)
)
This does have the unfortunate effect of undoing the purpose of Prefetch to a large degree.
Update
This prefetches exactly one record globally; not the latest LinkData record per LinkTarget.
To prefetch the max LinkData for each LinkTarget you should start at LinkData: you can achieve this as follows:
LinkData.objects.filter(link_target__dashboard=True).values('link_target').annotate(max_id=Max('id'))
This will return a dictionary of {link_target: 12, max_id: 3223}
You can then use this to return the right set of objects; perhaps filter LinkData based on the values of max_id.
That will look something like this:
latest_link_data_pks = LinkData.objects.filter(link_target__dashboard=True).values('link_target').annotate(max_id=Max('id')).values_list('max_id', flat=True)
link_data = LinkTargets.objects.filter(dashboard=True) \
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.filter(pk__in=latest_link_data_pks)
)
)
The following works on PostgreSQL. I understand it won't help OP, but it might be useful to somebody else.
from django.db.models import Count, Prefetch
from .models import LinkTargets, LinkData
link_data_qs = LinkData.objects.order_by(
'link_target__id',
'-id',
).distinct(
'link_target__id',
)
qs = LinkTargets.objects.prefetch_related(
Prefetch(
'linkdata_set',
queryset=link_data_qs,
)
).all()
LinkData.objects.all().order_by('-id')[0] is not a queryset, it is an model object, hence your error.
You could try LinkData.objects.all().order_by('-id')[0:1] which is indeed a QuerySet, but it's not going to work. Given how prefetch_related works, the queryset argument must return a queryset that contains all the LinkData records you need (this is then further filtered, and the items in it joined up with the LinkTarget objects). This queryset only contains one item, so that's no good. (And Django will complain "Cannot filter a query once a slice has been taken" and raise an exception, as it should).
Let's back up. Essentially you are asking an aggregation/annotation question - for each LinkTarget, you want to know the most recent LinkData object, or the 'max' of an 'id' column. The easiest way is to just annotate with the id, and then do a separate query to get all the objects.
So, it would look like this (I've checked with a similar model in my project, so it should work, but the code below may have some typos):
linktargets = (LinkTargets.objects
.filter(dashboard=True)
.annotate(most_recent_linkdata_id=Max('linkdata_set__id'))
# Now, if we need them, lets collect and get the actual objects
linkdata_ids = [t.most_recent_linkdata_id for t in linktargets]
linkdata_objects = LinkData.objects.filter(id__in=linkdata_ids)
# And we can decorate the LinkTarget objects as well if we want:
linkdata_d = {l.id: l for l in linkdata_objects}
for t in linktargets:
if t.most_recent_linkdata_id is not None:
t.most_recent_linkdata = linkdata_d[t.most_recent_linkdata_id]
I have deliberately not made this into a prefetch that masks linkdata_set, because the result is that you have objects that lie to you - the linkdata_set attribute is now missing results. Do you really want to be bitten by that somewhere down the line? Best to make a new attribute that has just the thing you want.
Tricky, but it seems to work:
class ForeignKeyAsOneToOneField(models.OneToOneField):
def __init__(self, to, on_delete, to_field=None, **kwargs):
super().__init__(to, on_delete, to_field=to_field, **kwargs)
self._unique = False
class LinkData(models.Model):
# link_target = models.ForeignKey(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT)
link_target = ForeignKeyAsOneToOneField(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT, related_name='linkdata_helper')
interface_description = models.CharField(max_length=200, verbose_name='Interface Description', blank=True, null=True)
link_data = LinkTargets.objects.filter(dashboard=True) \
.prefetch_related(
Prefetch(
'linkdata_helper',
queryset=LinkData.objects.all().order_by('-id'),
'linkdata'
)
)
# Now you can access linkdata:
link_data[0].linkdata
Ofcourse with this approach you can't use linkdata_helper to get related objects.
This is not a direct answer to you question, but solves the same problem. It is possible annotate newest object with a subquery, which I think is more clear. You also don't have to do stuff like Max("id") to limit the prefetch query.
It makes use of django.db.models.functions.JSONObject (added in Django 3.2) to combine multiple fields:
MainModel.objects.annotate(
last_object=RelatedModel.objects.filter(mainmodel=OuterRef("pk"))
.order_by("-date_created")
.values(
data=JSONObject(
id="id", body="body", date_created="date_created"
)
)[:1]
)

Django ORM get jobs with top 3 scores for each model_used

Models.py:
class ScoringModel(models.Model):
title = models.CharField(max_length=64)
class PredictedScore(models.Model):
job = models.ForeignKey('Job')
candidate = models.ForeignKey('Candidate')
model_used = models.ForeignKey('ScoringModel')
score = models.FloatField()
created_at = models.DateField(auto_now_add=True)
modified_at = models.DateTimeField(auto_now=True)
serializers.py:
class MatchingJobsSerializer(serializers.ModelSerializer):
job_title = serializers.CharField(source='job.title', read_only=True)
class Meta:
model = PredictedScore
fields = ('job', 'job_title', 'score', 'model_used', 'candidate')
To fetch the top 3 jobs, I tried the following code:
queryset = PredictedScore.objects.filter(candidate=candidate)
jobs_serializer = MatchingJobsSerializer(queryset, many=True)
jobs = jobs_serializer.data
top_3_jobs = heapq.nlargest(3, jobs, key=lambda item: item['score'])
Its giving me top 3 jobs for the whole set which contains all the models.
I want to fetch the jobs with top 3 scores for a given candidate for each model used.
So, it should return the top 3 matching jobs with each ML model for the given candidate.
I followed this answer https://stackoverflow.com/a/2076665/2256258 . Its giving the latest entry of cake for each bakery, but I need the top 3.
I read about annotations in django ORM but couldn't get much about this issue. I want to use DRF serializers for this operations. This is a read only operation.
I am using Postgres as database.
What should be the Django ORM query to perform this operation?
Make the database do the work. You don't need annotations either as you want the objects, not the values or manipulated values.
To get a set of all scores for a candidate (not split by model_used) you would do:
queryset = candidate.property_set.filter(candidate=candidate).order_by('-score)[:2]
jobs_serializer = MatchingJobsSerializer(queryset, many=True)
jobs = jobs_serializer.data
What you're proposing isn't particularly well suited in the Django ORM, annoyingly - I think you may need to make separate queries for each model_used. A nicer solution (untested for this example) is to hook Q queries together, as per this answer.
Example is there is tags, but I think holds -
#lets get a distinct list of the models_used -
all_models_used = PredictedScore.objects.values('models_used').distinct()
q_objects = Q() # Create an empty Q object to start with
for m in all_models_used:
q_objects |= Q(model_used=m)[:3] # 'or' the Q objects together
queryset = PredictedScore.objects.filter(q_objects)

Filter on annotated column in django

I have two models:
class Project(models.Model):
...
class StateChange(models.Model):
created_at = models.DateTimeField(default=now, db_index=True)
project = models.ForeignKey("project.Project")
state = models.IntegerField(choices=PROJECT_STATE_TYPES, db_index=True)
...
The models are linked and I need a list of projects which is filtered by the related StateChange if there's one.
I build my queryset like this:
state_checked = Case(
When(statechange__state=PROJECT_STATE_CHECKED, then=F('statechange__created_at'))
)
state_construction_ordered = Case(
When(statechange__state=PROJECT_STATE_CONSTRUCTION_ORDERED, then=F('statechange__created_at'))
)
qs = Projekt.objects.visible_for_me(self.request.user) \
.annotate(date_reached_state_checked=state_checked) \
.annotate(date_reached_state_construction_ordered=state_construction_ordered)\
.exclude(Q(date_reached_state_checked__isnull=True) & Q(statechange__state=PROJECT_STATE_CHECKED) |
Q(date_reached_state_construction_ordered__isnull=True) & Q(statechange__state=PROJECT_STATE_CONSTRUCTION_ORDERED))
The Project may have no matching StateChange, or one or both.
I need the list to show one Project-line in all cases. My queryset only works for zero or one matching StateChange. It excludes the Projects where both StateChanges are present and I see why it does it when I look at the generated query.
If I do not exclude anything, it shows 1 line for each case.
Can anyone give me a hint about how to make django create the JOINs I need?
We did it by using .extra():
return Projekt.objects.all().extra(
select={
"date_reached_state_checked": "SELECT created_at FROM tracking_statechange WHERE tracking_statechange.projekt_id = projekt_projekt.id AND tracking_statechange.state = 20",
"date_reached_state_construction_ordered": "SELECT created_at FROM tracking_statechange WHERE tracking_statechange.projekt_id = projekt_projekt.id AND tracking_statewechsel.state = 50"
})

Categories

Resources