Django get total count and count by unique value in queryset

Django get total count and count by unique value in queryset - python

I have models Software and Domain described loosely as:
class Software(models.Model)
id = models.BigInteger(primary_key=True, db_index=True, null=False)
company = models.ForeignKey('Company')
domain = models.ForeignKey('Domain')
type = models.CharField(null=False)
vendor = models.CharField(null=False)
name = models.CharField(null=False)
class Domain(models.Model):
id = models.BigInteger(primary_key=True, db_index=True, null=False)
type = models.CharField()
importance = models.DecimalField(max_digits=11, decimal_places=10, null=False)
And I get a Software queryset with:
qs = Software.objects.filter(company=c).order_by('vendor')
The desired output should have an aggregated Domain importance with total count for each unique Software, i.e.
[
{
'type': 'type_1', \
'vendor': 'ajwr', | - unique together
'name': 'nginx', /
'domains': {
'total_count': 4,
'importance_counts': [0.1: 1, 0.5: 2, 0.9: 1] # sum of counts = total_count
},
},
{
...
},
]
I feel like the first step here should be to just group the type, vendor, name by Domain so each Software object has a list of Domains instead of just one but I'm not sure how to do that. Doing this in memory would make it a lot easier but it seems like it would be a lot slower than using querysets / SQL.

So I would do it like this:
from django.db.models import Sum
qs = Software.objects.filter(company=c).prefetch_related(
'domain'
).annotate(
total_count=Sum('domain__importance')
).order_by('vendor')
output = []
for obj in qs:
domains = obj.domain.all() # using prefetched domains, no db query
output.append({
# ...
'domains': {
'total_count': obj.total_count,
'importance_counts': [d.importance for d in domains]
}
})
And I belive it should be fast enough. Only if finding that it isn't I would try to improve. Remember "Premature optimization is the root of all evil"

Related

Django and python, how to get a annotate from two different model?

I have the following model framework:
class Subcategory(models.Model):
nome=models.CharField()
class Order(models.Model):
order=models.CharField()
class Quantity(models.Model):
order=models.ForeignKey(Order)
subcategory=models.ForeignKey(Subcategory)
quantity=models.DecimalField()
class Price(models.Model):
order=models.ForeignKey(Order)
subcategory=models.ForeignKey(Subcategory)
price=models.DecimalField()
Now I want to obtain a new value that give me the possibility to filter for subcategory and order both price and quantity queryset and give me the moltiplication of them.
this is the code that I have set, but I don't know how obtain the price*quantity operation.
cod='1234'
price=dict()
defaults=list(0 for m in range(1))
filter_quantity = list(Quantity.objects.values_list('subcategory__id', flat=True).distinct()).filter(order__order=cod)
for subcategory__id, totals in(Price.objects.filter(
subcategoty__in=filter_quantity ).values_list('subcategory__id').annotate(totals=ExpressionWrapper(Sum(F('price')),
output_field=FloatField())).values_list('subcategory__id', 'totals'):
if subcategory__id not in price.keys():
price[subcategory__id ]=list(defaults)
index=0
price[subcategory__id][index]=totals
total_costs={'Costs': [sum(t) for t in zip(*price.values())]}

You can also make changes to this method according to your need.
def get_order_details(order_code):
order_details = []
quantities = Quantity.objects.filter(order__order=order_code)
prices_queryset = Price.objects.filter(order__order=order_code)
for quantity in quantities:
price = prices_queryset.filter(order__order=order_code, subcategory=quantity.subcategory).first()
if price:
order_details.append({
'subcategory_name': quantity.subcategory.nome,
'quantity': quantity.quantity,
'unit_price': price.price,
'total_price': quantity.quantity * price.price
})
return {
'order_code': order_code,
'details': order_details
}

Django: QuerySet with group of same entries

My goal is to show for a specific survey the Top 10 "Entities" per question ordered from high to low by salience.
A survey has several questions. And each question has several answers. Each answer can have several entities (sometimes the same name (CharField), sometimes different names). Entities are grouped by the name field per question.
I thought the following final result makes sense:
[
5: # question.pk
[
{
'name': 'Leonardo Di Caprio',
'count': 4, # E.g. answer__pk = 1, answer__pk = 1, answer__pk = 2, answer__pk = 3. Leonardo Di Caprio was mentioned twice in answer_pk 1 and therefore has entries.
'salience': 3.434 # Sum of all 4 entities
},
{
'name': 'titanic',
'count': 5,
'salience': 1.12
},
{
'name': 'music',
'count': 3,
'salience': 1.12
}
],
3: # question.pk
[
{
'name': 'Leonardo Di Caprio',
'count': 5,
'salience': 1.5
},
{
'name': 'titanic',
'count': 4,
'salience': 1.12
},
{
'name': 'music',
'count': 2,
'salience': 1.12
}
],
]
Now I am struggling to write the right QuerySet for my desired outcome. I came to the point that I probably have to use .values() and .annotate(). But my results are quite far away from what my goal ist.
Here my models.py:
class Entity(TimeStampedModel):
name = models.CharField()
type = models.CharField()
salience = models.FloatField()
sentiment_magnitude = models.FloatField()
sentiment_score = models.FloatField()
language = models.CharField()
answer = models.ForeignKey(
Answer, on_delete=models.CASCADE, related_name="entities"
)
class Answer(TimeStampedModel):
question = models.ForeignKey(
"surveys.Question", on_delete=models.CASCADE, related_name="answers"
)
response = models.ForeignKey()
answer = models.TextField()
class Question(TimeStampedModel):
survey = models.ForeignKey(
"surveys.Survey", on_delete=models.CASCADE, related_name="questions"
)
title = models.CharField(max_length=100, verbose_name=_("Title"))
focus = models.CharField()
class Response(TimeStampedModel):
survey = models.ForeignKey(
"surveys.Survey", on_delete=models.CASCADE, related_name="responses"
)
order = models.ForeignKey()
attendee = models.ForeignKey()
total_time = models.PositiveIntegerField()
ip_address = models.GenericIPAddressField()
language = models.CharField()
class Survey(TimeStampedModel):
id = models.UUIDField(primary_key=True, editable=False, default=uuid.uuid4)
event = models.ForeignKey()
template = models.CharField()
Here, what I tried so far. But that seems far from what my goal ist:
questions = self.request.event.surveys.get_results(
settings.SURVEY_PRE_EVENT
)
for question in questions:
print("------")
print(question.pk)
answers = question.answers.all()
for answer in answers:
print(
answer.entities.values("name")
.annotate(count=Count("name"))
.annotate(salience=Sum("salience"))
)
Here the output:
------
33
<QuerySet [{'name': 'people', 'count': 1, 'salience': 1.0}]>
<QuerySet [{'name': 'income', 'count': 1, 'salience': 1.0}]>
<QuerySet [{'name': 'incomes', 'count': 2, 'salience': 1.26287645101547}]>

I'm not sure entirely if I understood your problem correctly, but you may be looking for something like
Question.objects.values("answers__entities__name").annotate(
salience=Sum("answers__entities__salience"),
count=Count("answers"),
)
Disclaimers:
I haven't tested this and I may be wrong, but this is what I'd start playing around with.
Also you might find this useful: https://simpleisbetterthancomplex.com/tutorial/2016/12/06/how-to-create-group-by-queries.html

You can loop through the questions in order to create a list for each question:
Entity.objects.filter(answer__question=question).values('name').annotate(count=Count('pk')).annotate(total_salience=Sum('salience'))
Or if you want to have all in one queryset, group first by question (pk):
Entity.objects.values('answer__question__pk', 'name').annotate(count=Count('pk')).annotate(total_salience=Sum('salience'))
This will produce a list, not a nested list by question, but you can later regroup this in python to nest the entities for each question.

QuerySet object has no attribute

I got a problem when I select the distinct value from DB.
Here is my model:
class Shift(models.Model):
shiftid = models.CharField(max_length=15)
shiftdesc = models.CharField(blank = False, null= False, max_length=20)
dayname = models.CharField(blank = False, null= False, max_length=20)
class Meta:
unique_together = ('shiftid','dayname')
This is the resulting data structure:
shiftid shiftdesc dayname
shift1 desc1 1
shift1 desc2 1
shift1 desc1 1
I want it to be like this:
shiftid shiftdesc dayname
shift1 desc1 1
shift1 desc2 1
I am trying to select the records like this:
#action(methods=['get'], detail=False)
def shiftsum(self, request):
newest = self.get_queryset().order_by('shiftid','dayname').values('shiftid','dayname').distinct()
serializer = self.get_serializer_class()(newest)
return Response(serializer.data)
When I try like that I always get this error:
QuerySet object has no attribute 'shiftid'
Also, I would like to know how to select the distinct value? I am new in Django and appreciate every help.

Serializers don't handle lists of objects by default. You need to pass many=True to tell it to process each item and output a list (rest framework docs).
self.get_serializer_class()(newest, many=True)
This will give you a list of days, like you expect:
[
{ "shiftid": "shift1", "dayname": "1" }
]
Distinct
Your distinct is fine. An example distinct query would look just like yours:
User.objects.values('field').order_by('field').distinct()
Final Query
The issue with your final query is that you are only selecting 2 of the 3 fields that you want, excluding shiftdesc.
There isn't really a logical way to get that value, since by definition you start with N and end up with 1.
If you just want ANY value for it, say for debugging or display, you can use .annotate() like this:
query = (
Shift.objects.values('shiftid', 'dayname')
.annotate(shiftdesc=Max('shiftdesc'))
.annotate(ct=Count('*')) # get count of rows in group
.order_by('shiftid', 'dayname')
.distinct()
)
Look into annotations/aggregations, more advanced stuff can be done that may help out, and some database specific stuff that can be really useful.
Full Example
Here is a full example, using the default django User table. You have not provided enough information to further debug it.
import json
from rest_framework.serializers import *
User.objects.create(email='x1#e', first_name='Angela', last_name='Smith')
User.objects.create(email='x2#e', first_name='James', last_name='Smith')
User.objects.create(email='x3#e', first_name='James', last_name='Joyce')
query = User.objects.values('last_name') \
.order_by('last_name') \
.annotate(first_name=Max('first_name')) \
.annotate(ct=Count('email')).distinct()
class X(Serializer):
last_name = CharField()
first_name = CharField()
ct = IntegerField()
data = X(query, many=True).data
print(json.dumps(data, indent=4))
[
{
"last_name": "Joyce",
"first_name": "James",
"ct": 1
},
{
"last_name": "Smith",
"first_name": "James",
"ct": 2
}
]

How can I use parent_id in Django?

In my model:
class HomePageFirstModule(models.Model):
name = models.CharField(max_length=8, unique=True)
is_active = models.BooleanField(default=True) # 是否启用
class HomePageSecondModule(models.Model):
name = models.CharField(max_length=16, unique=True)
is_active = models.BooleanField(default=True) # 是否启用
home_page_first_module = models.ForeignKey(to=HomePageFirstModule) # 所属的第一级模块
class HomePageThridModule(models.Model):
name = models.CharField(max_length=16, unique=True)
url = models.CharField(max_length=128)
is_active = models.BooleanField(default=True) # 是否启用
home_page_second_module = models.ForeignKey(to=HomePageSecondModule) # 所属的第二级模块
Then I use filter method to query out the data:
def get_homepage_module_list():
"""
获取到可以使用的模块信息
:return:
"""
data_query_list = models.HomePageThridModule.objects.filter(
home_page_second_module__home_page_first_module="1"
).values('id', 'name', 'is_active', 'home_page_second_module__name',
'home_page_second_module__home_page_first_module__name',
'home_page_second_module__home_page_first_module__is_active',
'home_page_second_module__is_active'
)
data_list_del = []
data_list = list(data_query_list)
for item in data_list:
if (item['is_active'] == False) or (
item['home_page_second_module__is_active'] == False
) or (
item['home_page_second_module__home_page_first_module__is_active'] == False
):
data_list_del.append(item)
for item_del in data_list_del:
data_list.remove(item_del)
return data_list
========================
How can I convert this list data :
[
{
"home_page_second_module__name": "云主机",
"home_page_second_module__home_page_first_module__name": "产品",
"id": 1,
"name": "云主机子1"
},
{
"home_page_second_module__name": "云主机",
"home_page_second_module__home_page_first_module__name": "产品",
"id": 4,
"name": "云主机子4"
},
{
"home_page_second_module__name": "云硬盘",
"home_page_second_module__home_page_first_module__name": "产品",
"id": 2,
"name": "云硬盘子2"
},
{
"home_page_second_module__name": "云硬盘",
"home_page_second_module__home_page_first_module__name": "产品",
"id": 3,
"name": "云硬盘子3"
}
]
to this:
[
{"name":"产品",
"data":[
{"name":"云主机",
"data":[{"name":"云主机子1",
"data":{"id":1}},
{"name":"云主机子2",
"data":{"id":2}}]},
{"name":"云硬盘",
"data":[{"name":"云硬盘子1",
"data":{"id":3}},
{"name":"云硬盘子2",
"data":{"id":4}}]}
]
}
]
There should has a arithmetic method to do this, but I tried, do not get that.
I only think of this below little things:
home_page_second_module__name_list = []
home_page_second_module__home_page_first_module__name_list = []
id_list = []
name_list = []
for home_page_second_module__name,home_page_second_module__home_page_first_module__name,id,name in ori_list:
if not (home_page_second_module__name_list.__contains__(home_page_second_module__name)):
home_page_second_module__name_list.append(home_page_second_module__name)
if not (home_page_second_module__home_page_first_module__name_list.__contains__(home_page_second_module__home_page_first_module__name_list)):
home_page_second_module__home_page_first_module__name_list.append(home_page_second_module__home_page_first_module__name)
But now I think this is very difficult to do that, and I think mine is wrong way to do that.
Is there a convenient way to realize it?
EDIT
The 产品, 云主机, 云硬盘 may can be deed as parent id.

You can use django-rest-framework, and use related serializers. relations serializers

Outdated code, as the original question just asked how to transform a list of dicts into another list of dicts with different outcome
I bet this can optimized a lot but... assuming your dict is named old, I think this might do it:
new = {'name': i['home_page_second_module__home_page_first_module__name'] for i in old if not i['home_page_second_module__home_page_first_module__name'] in old}
new['data'] = [['name': i['home_page_second_module__name'], 'data':[{'name': i['home_page_second_module__name'], 'data': {'id': i['id']}}]] for i in old]

Django REST serialize output - group by foreign keys

I have models like below.
Restaurant Model
class Restaurant(models.Model):
name = models.CharField(max_length=40, verbose_name='Name')
Menu Model
class Menu(models.Model):
name = models.CharField(max_length=40, unique=True, verbose_name='menu name')
Item Model
class Item(models.Model):
restaurant = models.ForeignKey(Restaurant)
menu = models.ForeignKey(Menu)
name = models.CharField(max_length=500)
price = models.IntegerField(default=0)
I want to get the menus for the shop id.
How can I group my results by menu for the restaurant id ?
call GET /menus/restaurant_id
Sample.
{
name: menu name 1
items: [ {item1}, {item2}]
},
{
name: menu name 2
items: [ {item1}, {item2}]
}
Thanks..

The only thing i can find it's postgres specific aggregation function ArrayAgg
You can use it like this:
from django.contrib.postgres.aggregates import ArrayAgg
Item.objects.filter(restaurant_id=1).values('menu__name').annotate(items=ArrayAgg('name'))
# example output:
# [
# {
# 'menu__name': 'menu1',
# 'items': ['item1', 'item2']
# },
# {
# 'menu__name': 'menu2',
# 'items': ['item3', 'item4']
# },
# ]
Such qs performs next raw sql query:
SELECT
"appname_menu"."name",
ARRAY_AGG("appname_item"."name") AS "items"
FROM "appname_item"
INNER JOIN "appname_menu" ON ("appname_item"."menu_id" = "appname_menu"."id")
WHERE "appname_item"."restaurant_id" = 1
GROUP BY "appname_menu"."name"
Probably it can help you.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Django get total count and count by unique value in queryset - python

Related

Django and python, how to get a annotate from two different model?

Django: QuerySet with group of same entries

QuerySet object has no attribute

How can I use parent_id in Django?

Django REST serialize output - group by foreign keys

Categories

Resources