I'm building a Django site for discussions. Users can participate in discussions and can also cast votes of approval for discussions and messages in the discussions. A simplified data model is as follows:
class Discussion:
name = models.CharField(max_length=255)
class Message:
owner = models.ForeignKey(User, related_name='messages')
body = models.TextField()
discussion = models.ForeignKey(Discussion, related_name='messages')
class MessageApprovalVote:
owner = models.ForeignKey(User, related_name='message_approval_votes')
message = models.ForeignKey(Message, related_name='approval_votes')
class DiscussionApprovalVote:
owner = models.ForeignKey(User, related_name='discussion_approval_votes')
discussion = models.ForeignKey(Discussion, related_name='approval_votes')
I want to select the top 20 "most active" discussions, which means ordering by the sum of the number of messages, total number of message approval votes, and number of discussion approval votes for that discussion, or (in pseudocode):
# Doesn't work
Discussion.objects.
order_by(Count('messages') +
Count('approval_votes') +
Count('messages__approval_votes'))
Using annotations, I can calculate the totals of each of the three scoring factors:
scores = Discussion.objects.annotate(
total_messages=Count('messages', distinct=True),
total_discussion_approval_votes=Count('approval_votes', distinct=True),
total_message_approval_votes=Count('messages__approval_votes', distinct=True))
I then thought I was on to something when I found the extra method:
total_scores = scores.extra(
select={
'score_total': 'total_messages + total_discussion_approval_votes + total_message_approval_votes'
}
)
and would then be able to do:
final_answer = total_scores.order_by('-score_total')[:20]
but the extra call gives a DatabaseError:
DatabaseError: column "total_messages" does not exist
LINE 1: SELECT (total_votes + total_messages + total_persuasions) AS...
and thus I was foiled. Can the extra method not reference annotated fields? Is there another way to do what I'm trying to do, short of using a raw sql query? I'm using Postgres if that makes a difference.
Any insights would be greatly appreciated!
I don't think this is possible in a single top-level SQL query. The score_total value depends on the three aggregate results, but you're asking them all to be calculated at the same time.
In straight SQL, you can do this with a subquery, but I'm not sure how to inject it into Django. After setting up a simple Django app with your models, the following query seems to do the trick against a SQLite database:
SELECT id, name,
total_messages, total_discussion_approval_votes, total_message_approval_votes,
(total_messages +
total_discussion_approval_votes +
total_message_approval_votes) as score_total
FROM
(SELECT
discussion.id,
discussion.name,
COUNT(DISTINCT discussionapprovalvote.id) AS total_discussion_approval_votes,
COUNT(DISTINCT messageapprovalvote.id) AS total_message_approval_votes,
COUNT(DISTINCT message.id) AS total_messages
FROM discussion
LEFT OUTER JOIN discussionapprovalvote
ON (discussion.id = discussionapprovalvote.discussion_id)
LEFT OUTER JOIN message
ON (discussion.id = message.discussion_id)
LEFT OUTER JOIN messageapprovalvote
ON (message.id = messageapprovalvote.message_id)
GROUP BY discussion.id, discussion.name)
ORDER BY score_total DESC
LIMIT 20;
Actually there's a way by using an extra annotate with F expressions:
Discussion.objects.annotate(
total_messages=Count('messages', distinct=True),
total_discussion_approval_votes=Count('approval_votes', distinct=True),
total_message_approval_votes=Count('messages__approval_votes', distinct=True)),
total_score=F('total_messages') + F('total_discussion_approval_votes') + F('total_message_approval_votes')
).order_by('total_score')
Related
I fairly new to Django and stuck with creating a left join in Django. I tried so many, but none of them seems to be working:
The query I want to translate to Django is:
select ssc.id
,mgz.Title
,tli.id
,tli.Time
from Subscription ssc
join Person prs
on ssc.PersonID = prs.id
and prs.id = 3
join Magazine mgz
on mgz.id = ssc.MagazineID
and mgz.from <= date.today()
and mgz.until > date.today()
left join TimeLogedIn tli
on tli.SubscriptionID = ssc.id
and tli.DateOnline = date.today()
The model I'm using looks like this:
class Magazine(models.Model):
Title = models.CharField(max_length=100L)
from = models.Datefield()
until = models.Datefield()
Persons = models.ManyToManyField(Person, through='Subscription')
class Person(models.Model):
user = models.OneToOneField(User, on_delete=models.CASCADE)
Magazines = models.ManyToManyField(Magazine, through='Subscription')
class Subscription(models.Model):
MagazineID = models.ForeignKey(Magazine,on_delete=models.CASCADE)
PersonID = models.ForeignKey(Person,on_delete=models.CASCADE)
class TimeLogedIn(models.Model):
SubscriptionID = models.ForeignKey('Subscription', on_delete=models.CASCADE)
DateOnline = models.DateField()
Time = models.DecimalField(max_digits=5, decimal_places=2)
Like I said, tried so many but no succes and now I don't know how to do this in Django ORM , is it even possible? I created already a raw-query and this is working ok, but how to create this in Django ORM?
You can use field lookups lte and gt to filter your objects and then values() method.
You can also querying in the opposite direction and use Q objects for null values:
from django.db.models import Q
Subscription.objects.filter(
PersonID_id=3,
MagazineID__from__lte=date.today(),
MagazineID__until__gt=date.today()
).filter(
Q(TimeLogedIn__DateOnline=date.today()) | Q(TimeLogedIn__DateOnline__isnull=True)
).values("id", "MagazineID__Title", "TimeLogedIn__id", "TimeLogedIn__Time")
OR from TimeLogedIn:
TimeLogedIn.objects.filter(DateOnline=date.today()).filter(
SubscriptionID__MagazineID__from__lte=date.today(),
SubscriptionID__MagazineID__util__gt=date.today()
).values(
"SubscriptionID_id", "SubscriptionID__MagazineID__Title", "id", "Time"
)
Querysets also have the query attribute that contains the sql query to be executed, you can see it like following:
print(TimeLogedIn.objects.filter(...).values(...).query)
Note: Behind the scenes, Django appends "_id" to the field name to create its database column name. Therefore it should be
subscription, instead of SubscriptionID.
You can also use prefetch_related() and select_related() to prevent multiple database hits:
SubscriptionID.objects.filter(...).prefetch_related("TimeLogedIn_set")
SubscriptionID.objects.filter(...).select_related("PersonID")
I have "post" objects and a "post like" object with how many likes a post has received by which user:
class Post(models.Model):
text = models.CharField(max_length=500, default ='')
user = models.ForeignKey(User)
class PostLike(models.Model):
user = models.ForeignKey(User)
post = models.ForeignKey(Post)
I can select how many likes a post has received like this:
Post.objects.all().annotate(likes=Count('postlike'))
This roughly translates to:
SELECT p.*,
Count(l.id) AS likes
FROM post p, postlike l
WHERE p.id = l.post_id
GROUP BY (p.id)
It works. Now, how I can filter the Count aggregation by the current user? I'd like to retrieve not all the likes of the post, but all the likes by the logged user. The resulting SQL should be like:
SELECT p.*,
(SELECT COUNT(*) FROM postlike WHERE postlike.user_id = 1 AND postlike.post_id = p.id) AS likes
FROM post p, postlike l
WHERE p.id = l.post_id
GROUP BY (p.id)
Do you know the Count has a filter argument?
Post.objects.annotate(
likes=Count('postlike', filter=Q(postlike__user=logged_in_user))
)
It's not exactly as clean, but you could use Case/When...
posts = Post.objects.all().annotate(likes=models.Count(
models.Case(
models.When(postlike__user_id=user.id, then=1),
default=0,
output_field=models.IntegerField(),
)
))
And of course, you can always drop down to .extra() or even raw SQL when there's something you can't express via the Django ORM.
Try to add filter first:
Post.objects.filter(postlike__user=request.user).annotate(likes=Count('postlike'))
From the docs:
The filter precedes the annotation, so the filter constrains the objects considered when calculating the annotation.
I have below given Django model
class ABC(models.Model):
user = models.ForeignKey(DEF)
name = models.CharField()
phone_num = models.CharField()
date = models.DateTimeField(auto_now=True)
amount = models.IntegerField()
I want to perform below query using Django ORM.
select *, sum(amount), count(date) from ABC group by phone_num;
I tried code below, but it does not work.
ABC.objects.all().annotate(count = Count("phone_num")).order_by("phone_num")
Not sure whether it possible to grub data you mentioned above ( Select *, sum(amount), count( date ) by simple order by, probab;y that's JOIN query, at least you could try variants below and perform some intersection by phone_num on ABC.all():
ABC.objects.values("phone_num").order_by().annotate(count = Count("date"), amount= Sum("amount"))
Notes:
values('phone_num') - for GROUP BY 'phone_num' clause.
order_by() - for exclusion possible default ordering which ( you could remove that order_by().
p.s.
Also try to run query below:
ABC.objects.all().values("phone_num").annotate(count = Count("date"), amount= Sum("amount"))
Update
You could do next loop to grub desired data as Django ORM solution is absent:
data = (dict(o, data=ABC.objects.filter(phone_num=o['phone_num'])[:1][0]) for o in ABC.objects
.values("phone_num")
.order_by()
.annotate(count = Count("date"), amount= Sum("amount")).all())
// know you could access your data in next way:
for item in data:
phone_num = item['phone_num']
count = item['count']
amount = item['amount']
id = item['data'].id
name = item['data'].name
// Do other staff...
Note
data formed with generator expression(comprehension)
I have the following models:
class AcademicRecord(models.Model):
record_id = models.PositiveIntegerField(unique=True, primary_key=True)
subjects = models.ManyToManyField(Subject,through='AcademicRecordSubject')
...
class AcademicRecordSubject(models.Model):
academic_record = models.ForeignKey('AcademicRecord')
subject = models.ForeignKey('Subject')
language_group = IntegerCharField(max_length=2)
...
class SubjectTime(models.Model):
time_id = models.CharField(max_length=128, unique=True, primary_key=True)
subject = models.ForeignKey(Subject)
language_group = IntegerCharField(max_length=2)
...
class Subject(models.Model):
subject_id = models.PositiveIntegerField(unique=True,primary_key=True)
...
The academic records have list of subjects each with a language code and the subject times have a subject and language code.
With a given AcademicRecord, how can I get the subject times that matches with the AcademicRecordSubjects that the AcademicRecord has?
This is my approach, but it makes more queries than needed:
# record is the given AcademicRecord
times = []
for record_subject in record.academicrecordsubject_set.all():
matched_times = SubjectTime.objects.filter(subject=record_subject.subject)
current_times = matched_times.filter(language_group=record_subject.language_group)
times.append(current_times)
I want to make the query using django ORM not with raw SQL
SubjectTime language group has to match with Subject's language group aswell
I got it, in part thanks to #Robert Jørgensgaard Eng
My problem was how to do the inner join using more than 1 field, in which the F object came on handly.
The correct query is:
SubjectTime.objects.filter(subject__academicrecordsubject__academic_record=record,
subject__academicrecordsubject__language_group=F('language_group'))
Given an AcademicRecord instance academic_record, it is either
SubjectTime.objects.filter(subject__academicrecordsubject_set__academic_record=academic_record)
or
SubjectTime.objects.filter(subject__academicrecordsubject__academic_record=academic_record)
The results reflect all the rows of the join that these ORM queries become in SQL. To avoid duplicates, just use distinct().
Now this would be much easier, if I had a django shell to test in :)
I've looked at doing a query using an extra and/or annotate but have not been able to get the result I want.
I want to get a list of Products, which has active licenses and also the total number of available licenses. An active license is defined as being not obsolete, in date, and the number of licenses less the number of assigned licenses (as defined by a count on the manytomany field).
The models I have defined are:
class Vendor(models.Model):
name = models.CharField(max_length=200)
url = models.URLField(blank=True)
class Product(models.Model):
name = models.CharField(max_length=200)
vendor = models.ForeignKey(Vendor)
product_url = models.URLField(blank=True)
is_obsolete = models.BooleanField(default=False, help_text="Is this product obsolete?")
class License(models.Model):
product = models.ForeignKey(Product)
num_licenses = models.IntegerField(default=1, help_text="The number of assignable licenses.")
licensee_name = models.CharField(max_length=200, blank=True)
license_key = models.TextField(blank=True)
license_startdate = models.DateField(default=date.today())
license_enddate = models.DateField(null=True, blank=True)
is_obsolete = models.BooleanField(default=False, help_text="Is this licenses obsolete?")
licensees = models.ManyToManyField(User, blank=True)
I have tried filtering by the License model. Which works, but I don't know how to then collate / GROUP BY / aggregate the returned data into a single queryset that is returned.
When trying to filter by procuct, I can quite figure out the query I need to do. I can get bits and pieces, and have tried using a .extra() select= query to return the number of available licenses (which is all I really need at this point) of which there will be multiple licenses associated with a product.
So, the ultimate answer I am after is, how can I retrieve a list of available products with the number of available licenses in Django. I'd rather not resort to using raw as much as possible.
An example queryset that gets all the License details I want, I just can't get the product:
License.objects.annotate(
used_licenses=Count('licensees')
).extra(
select={
'avail_licenses': 'licenses_license.num_licenses - (SELECT count(*) FROM licenses_license_licensees WHERE licenses_license_licensees.license_id = licenses_license.id)'
}
).filter(
is_obsolete=False,
num_licenses__gt=F('used_licenses')
).exclude(
license_enddate__lte=date.today()
)
Thank you in advance.
EDIT (2014-02-11):
I think I've solved it in possibly an ugly way. I didn't want to make too many DB calls if I can, so I get all the information using a License query, then filter it in Python and return it all from inside a manager class. Maybe an overuse of Dict and list. Anyway, it works, and I can expand it with additional info later on without a huge amount of risk or custom SQL. And it also uses some of the models parameters that I have defined in the model class.
class LicenseManager(models.Manager):
def get_available_products(self):
licenses = self.get_queryset().annotate(
used_licenses=Count('licensees')
).extra(
select={
'avail_licenses': 'licenses_license.num_licenses - (SELECT count(*) FROM licenses_license_licensees WHERE licenses_license_licensees.license_id = licenses_license.id)'
}
).filter(
is_obsolete=False,
num_licenses__gt=F('used_licenses')
).exclude(
license_enddate__lte=date.today()
).prefetch_related('product')
products = {}
for lic in licenses:
if lic.product not in products:
products[lic.product] = lic.product
products[lic.product].avail_licenses = lic.avail_licenses
else:
products[lic.product].avail_licenses += lic.avail_licenses
avail_products = []
for prod in products.values():
if prod.avail_licenses > 0:
avail_products.append(prod)
return avail_products
EDIT (2014-02-12):
Okay, this is the final solution I have decided to go with. Uses Python to filter the results. Reduces cache calls, and has a constant number of SQL queries.
The lesson here is that for something with many levels of filtering, it's best to get as much as needed, and filter in Python when returned.
class ProductManager(models.Manager):
def get_all_available(self, curruser):
"""
Gets all available Products that are available to the current user
"""
q = self.get_queryset().select_related().prefetch_related('license', 'license__licensees').filter(
is_obsolete=False,
license__is_obsolete=False
).exclude(
license__enddate__lte=date.today()
).distinct()
# return a curated list. Need further information first
products = []
for x in q:
x.avail_licenses = 0
x.user_assigned = False
# checks licenses. Does this on the model level as it's cached so as to save SQL queries
for y in x.license.all():
if not y.is_active:
break
x.avail_licenses += y.available_licenses
if curruser in y.licensees.all():
x.user_assigned = True
products.append(x)
return q
One strategy would be to get all the product ids from your License queryset:
productIDList = list(License.objects.filter(...).values_list(
'product_id', flat=True))
and then query the products using that list of ids:
Product.objects.filter(id__in=productIDList)