python django sort with lambda with if statement - python

I have date and some dollar gross model:
class FirstDate(models.Model):
gross = models.DecimalField(max_digits=12, decimal_places=2, default=0)
updated = models.DateTimeField(auto_now=True)
class SecondDate(models.Model):
gross = models.DecimalField(max_digits=12, decimal_places=2, default=0)
updated = models.DateTimeField(auto_now=True)
And want to sort it by gross, and if gross is the same, then sort it with updated field
For example,
qs1 = SoloDate.objects.all()[:2]
qs2 = GroupDate.objects.all()[:2]
result_list = sorted(
chain(qs1, qs2),
key=lambda x: x.gross # and if gross is the same, for the gross same objects, x.updated and then update was also the same, x.pk,
reverse=True
)
I mean, let me say that there is two objects individually from qs1 and qs2.
# objects from qs1
qs1_obj1 = {
'pk': 1,
'gross': 5,
'updated': 2018-11-24 10:53:23.360707+00:00
}
qs1_obj2 = {
'pk': 2,
'gross': 5,
'updated': 2018-11-25 10:53:23.360707+00:00
}
# objects from qs2
qs2_obj1 = {
'pk': 3,
'gross': 5,
'updated': 2018-11-24 10:53:23.360707+00:00
}
qs2_obj2 = {
'pk': 4,
'gross': 1,
'updated': 2018-11-23 10:53:23.360707+00:00
}
It's result_list order will be qs1_obj1, qs2_obj1, qs1_obj2, qs_2_obj_2.
Reasons is it:
qs1_obj1: 1.by gross, 2.by updated, 3.by pk,
qs2_obj1: 1.by gross, 2.by updated, 3. but pk was not good,
qs1_obj2: 1.by gross, 2.but by dpdated was late,
qs2_obj2: 1.gross was small.
Maybe it is not good question or bothersome question, I need help.
Question line is that :
key=lambda x: x.gross # and if gross is the same, for the same gross objects, x.updated and then update was also the same, x.pk,
How can I do this?

Try sorting by multiple fields like so:
result_list = sorted(
chain(qs1, qs2),
key=lambda x: (x.gross, -x.updated.timestamp(), x.pk) # and if gross is the same, for the gross same objects, x.updated and then update was also the same, x.pk,
reverse=True
)

Related

Annotate every obj of QuerySet with count of every choice of related obj, grouped by choice value

Let's say I have two models:
class Mailing(models.Model):
...
class Message(models.Model):
date_created = models.DateTimeField(default=aware_utc_now)
class DeliveryStatusChoices(models.TextChoices): # Number of choices may vary.
PENDING = 'pending'
SUCCESS = 'success'
FAIL = 'fail'
status = models.CharField(
choices=DeliveryStatusChoices.choices,
default=DeliveryStatusChoices.PENDING,
max_length=50
)
mailing = models.ForeignKey(
'Mailing',
on_delete=models.CASCADE,
related_name='messages'
)
I'm looking for a way to annotate mailings with count of related messages grouped by choice value. To be able to do something like:
mailing.stats
and get:
{'pending': 15, 'success': 20, 'fail': 2}
Only messages related to particular mailing should be counted.
I found out how to get stats for particular mailing:
models.Message.objects.filter(mailing_id=1).values('status').annotate(count=Count('status'))
the output is:
<QuerySet [{'status': 'pending', 'count': 5}, {'status': 'success', 'count': 2}, {'status': 'fail', 'count': 3}]>
annotate cannot be called on particular object. So, for loop won't work.
Also output format is not as desired. But it's ok.
Another way I found:
result = []
for mailing in models.Mailing.objects.all():
statuses = mailing.messages.values('status').annotate(count=Count('status'))
mailing_result = {'id': mailing.id}
for status in statuses:
mailing_result[status['status']] = status['count']
result.append(mailing_result)
But this solution gives me just list of dicts.
Sometimes I have to prefetch related objects based on some criteria:
messages = Prefetch(
'messages',
models.Message.objects.filter(
date_created__gte=yesterday_midnight,
date_created__lt=today_midnight
)
)
mailings = models.Mailing.objects.prefetch_related(messages)
In this case I'd like to generate stats by counting only prefetched messages.
For example, if some mailing has 20 messages, but only 15 meet the criteria, than mailing should be annotated with stats for that 15 messages.
UPD: I came up with
models.Mailing.objects.annotate(
stats=Value(
dict(Subquery(
models.Message.objects.filter(
mailing_id=OuterRef('id')
).values_list('status').annotate(
count=Count('status')
)
)),
output_field=JSONField()
)
)
but got
'Subquery' object is not iterable
UPD 2: Another idea. Very inefficient I think.
qs = models.Mailing.objects.none()
for mailing in models.Mailing.objects.all():
item = models.Mailing.objects.filter(id=mailing.id).annotate(
stats=Value(
dict(models.Message.objects.filter(
mailing_id=mailing.id
).values_list('status').annotate(
count=Count('status')
)),
output_field=JSONField()
)
)
qs |= item
However, the problem is that every mailing in qs has the same value of .stats as the first one.
For example, if the first mailing's stats is {'success': 2, 'fail': 5} all other mailings has the same, despite not sharing the same object (qs[0].stats == qs[1].stats -> True, qs[0].stats is qs[1].stats -> False)
The only solution I found is to replace QuerySet (qs = models.Mailing.objects.none()) with python list ([])
qs = []
for mailing in models.Mailing.objects.all():
item = models.Mailing.objects.filter(id=mailing.id).annotate(
stats=Value(
dict(models.Message.objects.filter(
mailing_id=mailing.id
).values_list('status').annotate(
count=Count('status')
)),
output_field=JSONField()
)
)
qs.append(item[0])
How about something like:
from django.db.models import Q, Count
mailings = Mailing.objects.annotate(
pending_messages_count = Count(
'messages',
filter = Q(messages__status = 'pending')
),
success_messages_count = ...
)
EDIT:
You can create a dictionary of annotations like so, and then unpack them in your queryset:
annotations = {
status + '_message_count' : Count(
status,
filter = Q('messages__status' = status)
) for status in statuses
}
mailings = Mailing.objects.annotate(**annotations)

Django get total count and count by unique value in queryset

I have models Software and Domain described loosely as:
class Software(models.Model)
id = models.BigInteger(primary_key=True, db_index=True, null=False)
company = models.ForeignKey('Company')
domain = models.ForeignKey('Domain')
type = models.CharField(null=False)
vendor = models.CharField(null=False)
name = models.CharField(null=False)
class Domain(models.Model):
id = models.BigInteger(primary_key=True, db_index=True, null=False)
type = models.CharField()
importance = models.DecimalField(max_digits=11, decimal_places=10, null=False)
And I get a Software queryset with:
qs = Software.objects.filter(company=c).order_by('vendor')
The desired output should have an aggregated Domain importance with total count for each unique Software, i.e.
[
{
'type': 'type_1', \
'vendor': 'ajwr', | - unique together
'name': 'nginx', /
'domains': {
'total_count': 4,
'importance_counts': [0.1: 1, 0.5: 2, 0.9: 1] # sum of counts = total_count
},
},
{
...
},
]
I feel like the first step here should be to just group the type, vendor, name by Domain so each Software object has a list of Domains instead of just one but I'm not sure how to do that. Doing this in memory would make it a lot easier but it seems like it would be a lot slower than using querysets / SQL.
So I would do it like this:
from django.db.models import Sum
qs = Software.objects.filter(company=c).prefetch_related(
'domain'
).annotate(
total_count=Sum('domain__importance')
).order_by('vendor')
output = []
for obj in qs:
domains = obj.domain.all() # using prefetched domains, no db query
output.append({
# ...
'domains': {
'total_count': obj.total_count,
'importance_counts': [d.importance for d in domains]
}
})
And I belive it should be fast enough. Only if finding that it isn't I would try to improve. Remember "Premature optimization is the root of all evil"

Django and python, how to get a annotate from two different model?

I have the following model framework:
class Subcategory(models.Model):
nome=models.CharField()
class Order(models.Model):
order=models.CharField()
class Quantity(models.Model):
order=models.ForeignKey(Order)
subcategory=models.ForeignKey(Subcategory)
quantity=models.DecimalField()
class Price(models.Model):
order=models.ForeignKey(Order)
subcategory=models.ForeignKey(Subcategory)
price=models.DecimalField()
Now I want to obtain a new value that give me the possibility to filter for subcategory and order both price and quantity queryset and give me the moltiplication of them.
this is the code that I have set, but I don't know how obtain the price*quantity operation.
cod='1234'
price=dict()
defaults=list(0 for m in range(1))
filter_quantity = list(Quantity.objects.values_list('subcategory__id', flat=True).distinct()).filter(order__order=cod)
for subcategory__id, totals in(Price.objects.filter(
subcategoty__in=filter_quantity ).values_list('subcategory__id').annotate(totals=ExpressionWrapper(Sum(F('price')),
output_field=FloatField())).values_list('subcategory__id', 'totals'):
if subcategory__id not in price.keys():
price[subcategory__id ]=list(defaults)
index=0
price[subcategory__id][index]=totals
total_costs={'Costs': [sum(t) for t in zip(*price.values())]}
You can also make changes to this method according to your need.
def get_order_details(order_code):
order_details = []
quantities = Quantity.objects.filter(order__order=order_code)
prices_queryset = Price.objects.filter(order__order=order_code)
for quantity in quantities:
price = prices_queryset.filter(order__order=order_code, subcategory=quantity.subcategory).first()
if price:
order_details.append({
'subcategory_name': quantity.subcategory.nome,
'quantity': quantity.quantity,
'unit_price': price.price,
'total_price': quantity.quantity * price.price
})
return {
'order_code': order_code,
'details': order_details
}

Django: QuerySet with group of same entries

My goal is to show for a specific survey the Top 10 "Entities" per question ordered from high to low by salience.
A survey has several questions. And each question has several answers. Each answer can have several entities (sometimes the same name (CharField), sometimes different names). Entities are grouped by the name field per question.
I thought the following final result makes sense:
[
5: # question.pk
[
{
'name': 'Leonardo Di Caprio',
'count': 4, # E.g. answer__pk = 1, answer__pk = 1, answer__pk = 2, answer__pk = 3. Leonardo Di Caprio was mentioned twice in answer_pk 1 and therefore has entries.
'salience': 3.434 # Sum of all 4 entities
},
{
'name': 'titanic',
'count': 5,
'salience': 1.12
},
{
'name': 'music',
'count': 3,
'salience': 1.12
}
],
3: # question.pk
[
{
'name': 'Leonardo Di Caprio',
'count': 5,
'salience': 1.5
},
{
'name': 'titanic',
'count': 4,
'salience': 1.12
},
{
'name': 'music',
'count': 2,
'salience': 1.12
}
],
]
Now I am struggling to write the right QuerySet for my desired outcome. I came to the point that I probably have to use .values() and .annotate(). But my results are quite far away from what my goal ist.
Here my models.py:
class Entity(TimeStampedModel):
name = models.CharField()
type = models.CharField()
salience = models.FloatField()
sentiment_magnitude = models.FloatField()
sentiment_score = models.FloatField()
language = models.CharField()
answer = models.ForeignKey(
Answer, on_delete=models.CASCADE, related_name="entities"
)
class Answer(TimeStampedModel):
question = models.ForeignKey(
"surveys.Question", on_delete=models.CASCADE, related_name="answers"
)
response = models.ForeignKey()
answer = models.TextField()
class Question(TimeStampedModel):
survey = models.ForeignKey(
"surveys.Survey", on_delete=models.CASCADE, related_name="questions"
)
title = models.CharField(max_length=100, verbose_name=_("Title"))
focus = models.CharField()
class Response(TimeStampedModel):
survey = models.ForeignKey(
"surveys.Survey", on_delete=models.CASCADE, related_name="responses"
)
order = models.ForeignKey()
attendee = models.ForeignKey()
total_time = models.PositiveIntegerField()
ip_address = models.GenericIPAddressField()
language = models.CharField()
class Survey(TimeStampedModel):
id = models.UUIDField(primary_key=True, editable=False, default=uuid.uuid4)
event = models.ForeignKey()
template = models.CharField()
Here, what I tried so far. But that seems far from what my goal ist:
questions = self.request.event.surveys.get_results(
settings.SURVEY_PRE_EVENT
)
for question in questions:
print("------")
print(question.pk)
answers = question.answers.all()
for answer in answers:
print(
answer.entities.values("name")
.annotate(count=Count("name"))
.annotate(salience=Sum("salience"))
)
Here the output:
------
33
<QuerySet [{'name': 'people', 'count': 1, 'salience': 1.0}]>
<QuerySet [{'name': 'income', 'count': 1, 'salience': 1.0}]>
<QuerySet [{'name': 'incomes', 'count': 2, 'salience': 1.26287645101547}]>
I'm not sure entirely if I understood your problem correctly, but you may be looking for something like
Question.objects.values("answers__entities__name").annotate(
salience=Sum("answers__entities__salience"),
count=Count("answers"),
)
Disclaimers:
I haven't tested this and I may be wrong, but this is what I'd start playing around with.
Also you might find this useful: https://simpleisbetterthancomplex.com/tutorial/2016/12/06/how-to-create-group-by-queries.html
You can loop through the questions in order to create a list for each question:
Entity.objects.filter(answer__question=question).values('name').annotate(count=Count('pk')).annotate(total_salience=Sum('salience'))
Or if you want to have all in one queryset, group first by question (pk):
Entity.objects.values('answer__question__pk', 'name').annotate(count=Count('pk')).annotate(total_salience=Sum('salience'))
This will produce a list, not a nested list by question, but you can later regroup this in python to nest the entities for each question.

Django group by hour

I have the following model in Django.
class StoreVideoEventSummary(models.Model):
Customer = models.ForeignKey(GlobalCustomerDirectory, null=True, db_column='CustomerID', blank=True, db_index=True)
Store = models.ForeignKey(Store, null=True, db_column='StoreID', blank=True, related_name="VideoEventSummary")
Timestamp = models.DateTimeField(null=True, blank=True, db_index=True)
PeopleCount = models.IntegerField(null=True, blank=True)
I would like to find out the number of people entering the store each hour.
To achieve this, I'm trying to group the rows by the hour on Timestamp and sum the PeopleCount column.
store_count_events = StoreVideoEventSummary.objects.filter(Timestamp__range=(start_time, end_time),
Customer__id=customer_id,
Store__StoreName=store)\
.order_by("Timestamp")\
.extra({
"hour": "date_part(\'hour\', \"Timestamp\")"
}).annotate(TotalPeople=Sum("PeopleCount"))
This doesn't seem to group the results by the hour, it merely adds a new column TotalPeople which has the same value as PeopleCount to each row in the query set.
just break it into two steps
import itertools
from datetime import datetime
# ...
def date_hour(timestamp):
return datetime.fromtimestamp(timestamp).strftime("%x %H")
objs = StoreVideoEventSummary.objects.filter(
Timestamp__range=(start_time, end_time),
Customer__id=customer_id,
Store__StoreName=store
).order_by("Timestamp")
groups = itertools.groupby(objs, lambda x: date_hour(x.Timestamp))
# since groups is an iterator and not a list you have not yet traversed the list
for group, matches in groups: # now you are traversing the list ...
print(group, "TTL:", sum(1 for _ in matches))
This allows you to group by several distinct criteria
Of you just want the hour regardless of date just change date_hour
def date_hour(timestamp):
return datetime.fromtimestamp(timestamp).strftime("%H")
If you wanted to group by day of the week you just use
def date_day_of_week(timestamp):
return datetime.fromtimestamp(timestamp).strftime("%w %H")
And update itertools.groupby's lambda to use date_day_of_week.
Building off your original code, could you try:
store_count_events = StoreVideoEventSummary.objects.filter(Timestamp__range=(start_time, end_time), Customer__id=customer_id, Store__StoreName=store)\
.extra({
"hour": "date_part(\'hour\', \"Timestamp\")"
})\
.values("hour")\
.group_by("hour")\
.annotate(TotalPeople=Sum("PeopleCount"))
I know I'm late here, but taking cues from the doc, https://docs.djangoproject.com/en/1.11/ref/models/querysets/#django.db.models.query.QuerySet.extra
the below filter should work for you.
store_count_events = StoreVideoEventSummary.objects.filter(
Timestamp__range=(start_time, end_time),
Customer__id=customer_id,
Store__StoreName=store
).order_by(
'Timestamp'
).extra(
select={
'hour': 'hour(Timestamp)'
}
).values(
'hour'
).annotate(
TotalPeople=Sum('PeopleCount')
)

Categories

Resources