Problem with aggregation by annotated fields - python

I have models:
class Publisher(Model):
name = TextField()
class Author(Model):
name = TextField()
class Book(Model):
publisher = ForeignKey("Publisher")
author = ForeignKey("Author")
class Magazine(Model):
publisher = ForeignKey("Publisher")
writer = ForeignKey("Author")
I want to know which authors wrote for publishers. My version is this:
from django.db.models import TextField, F, Subquery, OuterRef
from django.contrib.postgres.aggregates import StringAgg # I use postgres
# to lead to the same name
books = Book.objects.annotate(author_name=F("author__name"))
magazines = Magazine.objects.annotate(author_name=F("writer__name"))
books = books.values("publisher_id", "author_name")
magazines = magazines.values("publisher_id", "author_name")
product = books.union(magazines)
# !! here I have a problem with grouping
product = product.group_by(
"publisher_id"
).annonate(
author_names=StringAgg("author_name", ";")
)
publishers = Publisher.objects.all().annotate(
author_names=Subquery(
product.filter(publisher_id=OuterRef("id")).values("author_names")[:1],
output_field=TextField()
)
)
# I was expecting something like
# name | author_names
# ------------------------------------------
# Publisher1 | Author1;Author2;Author3
# Publisher2 | Author2
# Publisher3 | Author2;Author3
The problem is that QuerySet has no .group_by() method, instead the .values() method is suggested (product.values("publisher_id").annonate(...)).
But this is complicated by the fact that I had previously called .values("publisher_id", "author_name") to bring two different models into the same view.
I also tried using .only("publisher_id", "author_name"), but (maybe it's a Django bug) this method can't work together with annotated and normal fields.
Is there any way to fix this problem or some other way to get a list of authors for a publisher?

Related

Optimize Django ORM query to get object if a specific related object does not exist

I have the following table structures:
class Library:
id = models.CharField(...)
bookcase = models.ForeignKey(
Bookcase,
related_name="libraries"
)
location = models.ChoiceField(...)
# Other attributes...
class Bookcase:
# some attributes
type = models.ChoiceField(..)
class Book:
bookcase = models.ForeignKey(
Bookcase,
related_name="books"
)
title=models.CharField(...)
status=models.ChoiceField(...) # borrowed | missing | available
Say if I want to get all Library objects that does not have a book with title "Foo" that is NOT missing, how can I optimize this query? I have the following:
libraries = Library.objects.select_related('bookcase').filter(location='NY', bookcase__type='wooden')
libraries_without_book = []
for library in libraries:
has_non_missing_book = Book.objects.filter(
bookcase=library.bookcase,
title="Foo",
).exclude(status='missing').exists()
if not has_non_missing_book:
libraries_without_book.append(library.id)
Unfortunately, this performs an extra query for every Library object that matches the initial filtering condition. Is there a more optimized method I can use here that makes use of prefetch_related in some way?
Book.objects.filter(~Q(status='missing'),bookcase=library.bookcase,title='Foo')
This query should be sufficient

How to change join and group by SQL to ORM in Django

I'm new in Django. So, I want to join two models which are company and client and count the number of clients for each of the company. Here the SQL
SELECT Company_company.name, count(Client_client.cid)
FROM Company_company
LEFT JOIN Client_client
ON Company_company.comid = Client_client.comid_id
GROUP BY Company_company.name;
But since in Django, we use ORM. So I'm a little bit confusing since I'm a beginner. I already refer few SQL to ORM converter website such as Django ORM and do some try and error. But, I didn't know where the problem since I want the output from the ORM to be classified into a different array. Here is my code:
labels = []
data = []
queryClientCompany = client.objects.values('comid').annotate(c=Count('cid')).values('comid__name','c')
for comp in queryClientCompany:
labels.append(comp.comid__name)
data.append(comp.c)
Here some of the relevant things in the client and company models:
class client (models.Model):
#client info
cid = models.AutoField(primary_key = True)
comid = models.ForeignKey(company,related_name='companys',
on_delete = models.DO_NOTHING,verbose_name="Company",null = True, blank = True)
class company(models.Model):
comid = models.AutoField(_('Company'),primary_key = True)
#company info
name = models.CharField(_('Company Name'),max_length = 50)
The error stated that the comid__name is not defined. So actually how to append the result? I hope someone can help me. Thank you for helping in advanced.
You should query from the opposite side to perform the LEFT OUTER JOIN between company and client (and not client and company):
from django.db.models import Count
labels = []
data = []
queryClientCompany = company.objects.annotate(
c=Count('companys__cid')
)
for comp in queryClientCompany:
labels.append(comp.name)
data.append(comp.c)
The companys part is due to the related_name='copanys', but it does not make much sense to name this relation that way. The related_name=… parameter [Django-doc] specifies how to access the Clients for a given Company, so clients is a more appropriate value for the related_name:
class client (models.Model):
cid = models.AutoField(primary_key=True)
comid = models.ForeignKey(
company,
related_name='clients',
on_delete = models.DO_NOTHING,
verbose_name="Company",
null = True,
blank = True
)
then the query is:
from django.db.models import Count
labels = []
data = []
queryClientCompany = company.objects.annotate(
c=Count('clients__cid')
)
for comp in queryClientCompany:
labels.append(comp.name)
data.append(comp.c)

how to use a queryset expression in a then clause in conditional annotation in Django

i have two models, one of organizations and one with the membership and rol of an user in the organization
class Organization(models.Model):
name = models.CharField(blank=False,null=False,max_length=100, unique=True)
class Member(models.Model):
user_request = models.ForeignKey('accounts.User',on_delete=models.CASCADE,related_name="member_user_request")
user_second = models.ForeignKey('accounts.User',on_delete=models.CASCADE,blank=True,null=True, related_name="member_user_second")
role = models.ForeignKey(RoleOrganization,on_delete=models.CASCADE, verbose_name=_('Rol'))
status = models.ForeignKey(Status,on_delete=models.CASCADE, verbose_name=_('Status'))
organization = models.ForeignKey(Organization,on_delete=models.CASCADE, verbose_name=_('Organization'))
and im trying to use a annotate with case clause where i want to get the role of an user in the organization with this expression:
my_organizations = Member.objects.filter(
Q(user_request_id=self.request.user.id, status__name="accepted", type_request__name="request") |
Q(user_second_id=self.request.user.id, status__name="accepted", type_request__name="invitation")
)
Organization.objects.annotate(
rol=Case(
When(id__in=list(my_organizations.values_list('organization_id', flat=True)),
then=Value(my_organizations.get(organization_id=F('id')).role.name)),
default=None, output_field=CharField()
)
)
the problem here is that the then expression doesn't get the id of the object in the main queryset, if i return in the then just the F('id') the expression gets the value of the id in the main queryset, but i can use a filter or any queryset expression with some values of the main object.
its there a way to accomplish this.
PS: im just putting part of the code here for cleanliness, but if you need to know more please let me know
I think you can do it like this using Subquery:
from django.db.models import OuterRef, Subquery
members = Member.objects.filter(
Q(user_request_id=self.request.user.id, status__name="accepted", type_request__name="request") |
Q(user_second_id=self.request.user.id, status__name="accepted", type_request__name="invitation")
)
member_subquery = members.filter(organization=OuterRef('pk'))
organizations = Organization.objects.annotate(member_role=Subquery(member_subquery.values('role')[:1]))
print(organizations.values('member_role'))

Perform lookup and update within a single Django query

I have two models: MetaModel and RelatedModel. I want to include the result of a RelatedModel lookup within a MetaModel query, and I'd like to do this within a single DB call.
I've tried to define a 'subquery' QuerySet for use in the main query, but that hasn't worked - it's still making two queries to complete the operation.
Note: I can't use a traditional ForeignKey relationship because the profile_id field is not unique. Uniqueness is a combination of profile_id and channel. This is an aggregation table and profile_id is not guaranteed to be unique across multiple third-party channels.
Any suggestions?
Models:
class Channel(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(
max_length=25,
)
class MetaModel(models.Model):
profile_id = fields.IntegerField()
channel = fields.ForeignKey(Channel))
metadata = fields.TextField()
class RelatedModel(models.Model):
related_id = fields.IntegerField()
profile_id = fields.IntegerField()
channel = fields.ForeignKey(Channel))
Dummy data
channel = Channel("Web site A")
channel.save()
sample_meta = MetaModel(profile_id=1234, channel=channel)
sample_related = RelatedModel(profile_id=1234, related_id=5678, channel=channel)
Query:
# Create a queryset to filter down to the single record we need the `profile_id` for
# I've limited to the only field we need via a `values` operation
related_qs = RelatedAccount.objects.filter(
related_id=5678,
channel=channel
).values_list("profile_id", flat=True)
# I'm doing an update_or_create as there is other data to store, not included for brevity
obj, created = MetaModel.objects.update_or_create(
profile_id=related_qs.first(), # <<< This var is the dynamic part of the query
channel=channel,
defaults={"metadata": "Metadata is added to a new or existing record."}
)
Regarding your note on uniqueness, you can use unique_together option in Django as described here in the documentation.
class MetaModel(models.Model):
profile_id = fields.ForeignKey(RelatedModel)
channel = fields.ForeignKey(Channel)
metadata = fields.TextField()
class Meta:
unique_together = ('profile_id', 'channel')
Then you can change your query accordingly and should solve your problem.

Django filtering based on count of related model

I have the following working code:
houses_of_agency = House.objects.filter(agency_id=90)
area_list = AreaHouse.objects.filter(house__in=houses_of_agency).values('area')
area_ids = Area.objects.filter(area_id__in=area_list).values_list('area_id', flat=True)
That returns a queryset with a list of area_ids. I want to filter further so that I only get area_ids where there are more than 100 houses belonging to the agency.
I tried the following adjustment:
houses_of_agency = House.objects.filter(agency_id=90)
area_list = AreaHouse.objects.filter(house__in=houses_of_agency).annotate(num_houses=Count('house_id')).filter(num_houses__gte=100).values('area')
area_ids = Area.objects.filter(area_id__in=area_list).values_list('area_id', flat=True)
But it returns an empty queryset.
My models (simplified) look like this:
class House(TimeStampedModel):
house_pk = models.IntegerField()
agency = models.ForeignKey(Agency, on_delete=models.CASCADE)
class AreaHouse(TimeStampedModel):
area = models.ForeignKey(Area, on_delete=models.CASCADE)
house = models.ForeignKey(House, on_delete=models.CASCADE)
class Area(TimeStampedModel):
area_id = models.IntegerField(primary_key=True)
parent = models.ForeignKey('self', null=True)
name = models.CharField(null=True, max_length=30)
Edit: I'm using MySQL for the database backend.
You are querying for agency_id with just one underscore. I corrected your queries below. Also, in django it's more common to use pk instead of id however the behaviour is the same. Further, there's no need for three separate queries as you can combine everything into one.
Also note that your fields area_id and house_pk are unnecessary, django automatically creates primary key fields which area accessible via modelname__pk.
# note how i inlined your first query in the .filter() call
area_list = AreaHouse.objects \
.filter(house__agency__pk=90) \
.annotate(num_houses=Count('house')) \ # <- 'house'
.filter(num_houses__gte=100) \
.values('area')
# note the double underscore
area_ids = Area.objects.filter(area__in=area_list)\
.values_list('area__pk', flat=True)
you could simplify this even further if you don't need the intermediate results. here are both queries combined:
area_ids = AreaHouse.objects \
.filter(house__agency__pk=90) \
.annotate(num_houses=Count('house')) \
.filter(num_houses__gte=100) \
.values_list('area__pk', flat=True)
Finally, you seem to be manually defining a many-to-many relation in your model (through AreaHouse). There are better ways of doing this, please read the django docs.

Categories

Resources