Group by query in Django ORM - python

I an having a confusion on how to write a django query to get my data. I have 2 tables 'ticket' and 'ticket_details'. Below is the schema for them.
Ticket(id, name, type, user)
TicketDetails(ticket_id, message, created_time)
Note: Multiple message can be associated to one ticket id.
And ticket_id is a foreign key to the Ticket table.
I would like to fetch all the columns from both the table where only the latest message from the TicketDetails table should be picked for a particular ticket id.
Example:
Ticket
id, name, type, user
1,install, application, usr1
TicketDetails
ticket_id, message, creted_time
1, <message1>, 12:00 PM
1, <message2>, 04:00 PM
2, <message3>, 05:00 PM -->latest entry
Expected Output:
id, name, type, user, message, created_time
1, install, application, usr1, <message3>, 05:00PM
Thanks in advance

I made some assumptions about your models, you didn't provide any:
class Ticket(models.Model):
name = models.CharField(max_length=50)
type = models.CharField(max_length=50)
user = models.ForeignKey('auth.User', on_delete=models.CASCADE)
# Model names should NEVER end with "s"
class TicketDetail(models.Model):
ticket = models.ForeignKey(Ticket, on_delete=models.CASCADE)
message = models.CharField(max_length=50)
created_time = models.DateTimeField(auto_now_add=True)
You have 2 options:
you can write it in pure sql, you lose the ability to filter
sql = """
SELECT ticket.id, ticket.name, ticket.type, ticket.user_id, detail.message
FROM {ticket} ticket
LEFT JOIN (
SELECT detail.ticket_id, detail.message
FROM {detail} detail
INNER JOIN (
SELECT MAX(id) id, ticket_id
FROM {detail}
GROUP BY ticket_id
) detail_message ON detail.id = detail_message.id
) detail ON detail.ticket_id = ticket.id
""".format(ticket=Ticket._meta.db_table, detail=TicketDetail._meta.db_table)
tickets = Ticket.objects.raw(sql)
for ticket in tickets:
print(ticket.id, ticket.message)
Write it in the "django" way
latest_messages = TicketDetail.objects.values('ticket_id').annotate(id=models.Max('id')).values('id')
tickets = Ticket.objects.prefetch_related(models.Prefetch('ticketdetail_set', TicketDetail.objects.filter(id__in=latest_messages))).order_by('id')
for ticket in tickets:
print(ticket.id)
# this iteration will only ever yield 1 result.. or nothing.
for detail in ticket.ticketdetail_set.all():
print(detail.message)
Here are the tests:
# uses factoryboy and faker to fill in the data
class UserFactory(factory.django.DjangoModelFactory):
class Meta:
model = auth.models.User
django_get_or_create = ('username',)
first_name = fake.first_name()
last_name = fake.last_name()
email = factory.LazyAttribute(lambda obj: "{}.{}#gmail.com".format(obj.last_name, obj.first_name).lower())
username = factory.Sequence(lambda n: 'user' + str(n))
class SimpleTestCase(TestCase):
def setUp(self):
ticket1 = Ticket.objects.create(user=UserFactory(), type='A', name='Number 1')
TicketDetail.objects.create(ticket=ticket1, message='you wont see this')
TicketDetail.objects.create(ticket=ticket1, message='you wont see this either')
TicketDetail.objects.create(ticket=ticket1, message='YES!!')
ticket2 = Ticket.objects.create(user=UserFactory(), type='B', name='Number 2')
TicketDetail.objects.create(ticket=ticket2, message='you also wont see this')
TicketDetail.objects.create(ticket=ticket2, message='you also wont see this either')
TicketDetail.objects.create(ticket=ticket2, message='also YES!!')
def test_flatten_pure_sql(self):
sql = """
SELECT ticket.id, ticket.name, ticket.type, ticket.user_id, detail.message
FROM {ticket} ticket
LEFT JOIN (
SELECT detail.ticket_id, detail.message
FROM {detail} detail
INNER JOIN (
SELECT MAX(id) id, ticket_id
FROM {detail}
GROUP BY ticket_id
) detail_message ON detail.id = detail_message.id
) detail ON detail.ticket_id = ticket.id
""".format(ticket=Ticket._meta.db_table, detail=TicketDetail._meta.db_table)
self.assertEquals(['YES!!', 'also YES!!'], [x.message for x in Ticket.objects.raw(sql)])
def test_orm_way(self):
latest_messages = TicketDetail.objects.values('ticket_id').annotate(id=models.Max('id')).values('id')
tickets = Ticket.objects.prefetch_related(models.Prefetch('ticketdetail_set', TicketDetail.objects.filter(id__in=latest_messages))).order_by('id')
self.assertEquals(['Number 1', 'Number 2'], [x.name for x in tickets])
self.assertEquals(['YES!!'], [x.message for x in tickets[0].ticketdetail_set.all()])
self.assertEquals(['also YES!!'], [x.message for x in tickets[1].ticketdetail_set.all()])

Related

Get records from odoo relational table

I have two models in my odoo project. Employee and equipment, as shown bellow.
Equipment/model.py
from openerp import models, fields, api
import datetime
class equipment(models.Model):
_name = 'equipment.equipment'
name = fields.Char(string='Name', )
date_of_purchase = fields.Date(string='Date Of Purchase', default=fields.Date.today(), )
write_off_days = fields.Integer(string="Days To Write-off", required=True, )
write_off_date = fields.Date(string="Write-off Date", compute="_get_write_off_date", )
price = fields.Float(string="Price '$'", required=True, )
description = fields.Char(string="Description", required=False, )
employee_id = fields.Char(string="Owner", compute="_get_owner", )
#api.one
#api.depends('employee_id')
def _get_owner(self):
//result = self.env.['res.equipment_table'].
//get id from relation database <-------------
#api.one
#api.depends('write_off_days', 'date_of_purchase')
def _get_write_off_date(self):
date = datetime.datetime.strptime(self.date_of_purchase, "%Y-%m-%d")
self.write_off_date = date + datetime.timedelta(days=self.write_off_days)
employee/model.py
from openerp import models, fields, api
class employee(models.Model):
_name = 'employee.employee'
name = fields.Char(string='First Name')
last_name = fields.Char(string='Last Name')
birth_date = fields.Date(string='Birth Date', default=fields.Date.today(), )
equipment_ids = fields.Many2many(string="Equipment", comodel_name="equipment.equipment", relation="equipment_table", )
total_equipment_price = fields.Float(string="Total Equipment Price '$'", compute="_get_total_equipment_price", )
#api.one
#api.depends('equipment_ids')
def _get_total_equipment_price(self):
total = 0
for equipment in self.equipment_ids:
total += equipment.price
self.total_equipment_price = total
I have many2many field, which holds all equipment that employee owns. I need to update the equipment owner every time the field is changed. The reason for this is.. when a user adds new equipment to the employee, there should be shown only unowned equipment. That is why I need to check and update the owner.
I already made a domain to check if equipment already has an owner, which will be shown below. Just need to update that employee_id field somehow.
<notebook>
<page string="Equipment">
<group>
<field name="equipment_ids" domain="[('employee_id', '=', False)]"/>
</group>
</page>
</notebook>
Change the type of employee_id to Many2one:
employee_id = fields.Many2one('employee.employee', string="Owner")
Override create and write methods to set employee_id each time new equipment is added to the list:
class Employee(models.Model):
_name = 'employee.employee'
#api.model
def create(self, values):
res = super(Employee, self).create(values)
if res.equipment_ids:
res.equipment_ids.write({'employee_id': res.id})
return res
#api.multi
def write(self, values):
res = super(Employee, self).write(values)
for r in self:
equipment_ids = r.equipment_ids.filtered(lambda s: not s.employee_id)
if equipment_ids:
equipment_ids.write({'employee_id': r.id})
return res
Because an equipment can set to one owner, I suggest you to change the type of equipment_ids to One2many to be able to set a list of equipments by employee and the employee_id field value will be set automatically for you when you add equipment to the list.
If you change the value of employee_id the equipment will be automatically visible in the list of equipments of the new employee.
equipment_ids = fields.One2many("equipment.equipment", 'employee_id', string="Equipments")
Reset owner
#api.multi
def write(self, values):
removed_ids = set()
old_ids = {r.id: r.equipment_ids.ids for r in self}
res = super(Employee, self).write(values)
for r in self:
equipment_ids = r.equipment_ids.filtered(lambda s: not s.employee_id)
if equipment_ids:
equipment_ids.write({'employee_id': r.id})
removed_ids |= set(old_ids[r.id]) - set(r.equipment_ids.ids)
self.equipment_ids.browse(removed_ids).write({'employee_id': False})
return res

Django Left join how

I fairly new to Django and stuck with creating a left join in Django. I tried so many, but none of them seems to be working:
The query I want to translate to Django is:
select ssc.id
,mgz.Title
,tli.id
,tli.Time
from Subscription ssc
join Person prs
on ssc.PersonID = prs.id
and prs.id = 3
join Magazine mgz
on mgz.id = ssc.MagazineID
and mgz.from <= date.today()
and mgz.until > date.today()
left join TimeLogedIn tli
on tli.SubscriptionID = ssc.id
and tli.DateOnline = date.today()
The model I'm using looks like this:
class Magazine(models.Model):
Title = models.CharField(max_length=100L)
from = models.Datefield()
until = models.Datefield()
Persons = models.ManyToManyField(Person, through='Subscription')
class Person(models.Model):
user = models.OneToOneField(User, on_delete=models.CASCADE)
Magazines = models.ManyToManyField(Magazine, through='Subscription')
class Subscription(models.Model):
MagazineID = models.ForeignKey(Magazine,on_delete=models.CASCADE)
PersonID = models.ForeignKey(Person,on_delete=models.CASCADE)
class TimeLogedIn(models.Model):
SubscriptionID = models.ForeignKey('Subscription', on_delete=models.CASCADE)
DateOnline = models.DateField()
Time = models.DecimalField(max_digits=5, decimal_places=2)
Like I said, tried so many but no succes and now I don't know how to do this in Django ORM , is it even possible? I created already a raw-query and this is working ok, but how to create this in Django ORM?
You can use field lookups lte and gt to filter your objects and then values() method.
You can also querying in the opposite direction and use Q objects for null values:
from django.db.models import Q
Subscription.objects.filter(
PersonID_id=3,
MagazineID__from__lte=date.today(),
MagazineID__until__gt=date.today()
).filter(
Q(TimeLogedIn__DateOnline=date.today()) | Q(TimeLogedIn__DateOnline__isnull=True)
).values("id", "MagazineID__Title", "TimeLogedIn__id", "TimeLogedIn__Time")
OR from TimeLogedIn:
TimeLogedIn.objects.filter(DateOnline=date.today()).filter(
SubscriptionID__MagazineID__from__lte=date.today(),
SubscriptionID__MagazineID__util__gt=date.today()
).values(
"SubscriptionID_id", "SubscriptionID__MagazineID__Title", "id", "Time"
)
Querysets also have the query attribute that contains the sql query to be executed, you can see it like following:
print(TimeLogedIn.objects.filter(...).values(...).query)
Note: Behind the scenes, Django appends "_id" to the field name to create its database column name. Therefore it should be
subscription, instead of SubscriptionID.
You can also use prefetch_related() and select_related() to prevent multiple database hits:
SubscriptionID.objects.filter(...).prefetch_related("TimeLogedIn_set")
SubscriptionID.objects.filter(...).select_related("PersonID")

Django : Best way to Query a M2M Field , and count occurences

class Edge(BaseInfo):
source = models.ForeignKey('Node', on_delete=models.CASCADE,related_name="is_source")
target = models.ForeignKey('Node', on_delete=models.CASCADE,related_name="is_target")
def __str__(self):
return '%s' % (self.label)
class Meta:
unique_together = ('source','target','label','notes')
class Node(BaseInfo):
item_type_list = [('profile','Profile'),
('page','Page'),
('group','Group'),
('post','Post'),
('phone','Phone'),
('website','Website'),
('email','Email'),
('varia','Varia')
]
item_type = models.CharField(max_length=200,choices=item_type_list,blank = True,null=True)
firstname = models.CharField(max_length=200,blank = True, null=True)
lastname = models.CharField(max_length=200,blank = True,null=True)
identified = models.BooleanField(blank=True,null=True,default=False)
username = models.CharField(max_length=200, blank=True, null=True)
uid = models.CharField(max_length=200,blank=True,null=True)
url = models.CharField(max_length=2000,blank=True,null=True)
edges = models.ManyToManyField('self', through='Edge',blank = True)
I have a Model Node (in this case a soc media profile - item_type) that has relations with other nodes (in this case a post). A profile can be the author of a post. An other profile can like or comment that post.
Question : what is the most efficient way to get all the distinct profiles that liked or commented on anothes profile's post + the count of these likes /comments.
print(Edge.objects.filter(Q(label="Liked")|Q(label="Commented"),q).values("source").annotate(c=Count('source')))
Gets me somewhere but i have the values then (id) and i want to pass the objects to my template rather then .get() all the profiles again...
Result :
Thanks in advance
I ended up with iterating over the queryset and adding the objects that i wanted in a dictionary , if the object was already in dictionary , i would count +1 and add the relation in a nested list.
This doesnt feel right but works for now.
posts = Edge.objects.filter(source = self,target__item_type='post',label='Author')
if posts:
q = Q()
for post in posts:
q = q | Q(target=post.target)
contributors = Edge.objects.filter(Q(label="Liked")|Q(label="Commented"),q)
if contributors:
for i in contributors:
if i.source.uid in results:
if i.label in results[i.source.uid]['relation']:
pass
else:
results[i.source.uid]["relation"].append(i.label)
if 'post' in results[i.source.uid]:
results[i.source.uid]['post'].append(i.target)
else:
results[i.source.uid]['post']=[i.target]
else:
results[i.source.uid] = {'profile' : i.source , 'relation':[i.label],'post':[i.target]}

My django query is very slow in givig me data on terminal

I have a users table which has 3 types of users Student, Faculty and Club and I have a university table.
What I want is how many users are there in the specific university.
I am getting my desired output but the output is very slow.I have 90k users and the output it is generating it takes minutes to produce results.
My user model:-
from __future__ import unicode_literals
from django.db import models
from django.contrib.auth.models import User
from cms.models.masterUserTypes import MasterUserTypes
from cms.models.universities import Universities
from cms.models.departments import MasterDepartments
# WE ARE AT MODELS/APPUSERS
requestChoice = (
('male', 'male'),
('female', 'female'),
)
class Users(models.Model):
id = models.IntegerField(db_column="id", max_length=11, help_text="")
userTypeId = models.ForeignKey(MasterUserTypes, db_column="userTypeId")
universityId = models.ForeignKey(Universities, db_column="universityId")
departmentId = models.ForeignKey(MasterDepartments , db_column="departmentId",help_text="")
name = models.CharField(db_column="name",max_length=255,help_text="")
username = models.CharField(db_column="username",unique=True, max_length=255,help_text="")
email = models.CharField(db_column="email",unique=True, max_length=255,help_text="")
password = models.CharField(db_column="password",max_length=255,help_text="")
bio = models.TextField(db_column="bio",max_length=500,help_text="")
gender = models.CharField(db_column="gender",max_length=6, choices=requestChoice,help_text="")
mobileNo = models.CharField(db_column='mobileNo', max_length=16,help_text="")
dob = models.DateField(db_column="dob",help_text="")
major = models.CharField(db_column="major",max_length=255,help_text="")
graduationYear = models.IntegerField(db_column='graduationYear',max_length=11,help_text="")
canAddNews = models.BooleanField(db_column='canAddNews',default=False,help_text="")
receivePrivateMsgNotification = models.BooleanField(db_column='receivePrivateMsgNotification',default=True ,help_text="")
receivePrivateMsg = models.BooleanField(db_column='receivePrivateMsg',default=True ,help_text="")
receiveCommentNotification = models.BooleanField(db_column='receiveCommentNotification',default=True ,help_text="")
receiveLikeNotification = models.BooleanField(db_column='receiveLikeNotification',default=True ,help_text="")
receiveFavoriteFollowNotification = models.BooleanField(db_column='receiveFavoriteFollowNotification',default=True ,help_text="")
receiveNewPostNotification = models.BooleanField(db_column='receiveNewPostNotification',default=True ,help_text="")
allowInPopularList = models.BooleanField(db_column='allowInPopularList',default=True ,help_text="")
xmppResponse = models.TextField(db_column='xmppResponse',help_text="")
xmppDatetime = models.DateTimeField(db_column='xmppDatetime', help_text="")
status = models.BooleanField(db_column="status", default=False, help_text="")
deactivatedByAdmin = models.BooleanField(db_column="deactivatedByAdmin", default=False, help_text="")
createdAt = models.DateTimeField(db_column='createdAt', auto_now=True, help_text="")
modifiedAt = models.DateTimeField(db_column='modifiedAt', auto_now=True, help_text="")
updatedBy = models.ForeignKey(User,db_column="updatedBy",help_text="Logged in user updated by ......")
lastPasswordReset = models.DateTimeField(db_column='lastPasswordReset',help_text="")
authorities = models.CharField(db_column="departmentId",max_length=255,help_text="")
class Meta:
managed = False
db_table = 'users'
the query i am using which is producing the desired output but too sloq is:-
universities = Universities.objects.using('cms').all()
for item in universities:
studentcount = Users.objects.using('cms').filter(universityId=item.id,userTypeId=2).count()
facultyCount = Users.objects.using('cms').filter(universityId=item.id,userTypeId=1).count()
clubCount = Users.objects.using('cms').filter(universityId=item.id,userTypeId=3).count()
totalcount = Users.objects.using('cms').filter(universityId=item.id).count()
print studentcount,facultyCount,clubCount,totalcount
print item.name
You should use annotate to get the counts for each university and conditional expressions to get the counts based on conditions (docs)
Universities.objects.using('cms').annotate(
studentcount=Sum(Case(When(users_set__userTypeId=2, then=1), output_field=IntegerField())),
facultyCount =Sum(Case(When(users_set__userTypeId=1, then=1), output_field=IntegerField())),
clubCount=Sum(Case(When(users_set__userTypeId=3, then=1), output_field=IntegerField())),
totalcount=Count('users_set'),
)
First, an obvious optimization. In the loop, you're doing essentially the same query four times: thrice filtering for different userTypeId, and once without one. You can do this in a single COUNT(*) ... GROUP BY userTypeId query.
...
# Here, we're building a dict {userTypeId: count}
# by counting PKs over each userTypeId
qs = Users.objects.using('cms').filter(universityId=item.id)
counts = {
x["userTypeId"]: x["cnt"]
for x in qs.values('userTypeId').annotate(cnt=Count('pk'))
}
student_count = counts.get(2, 0)
faculty_count = counts.get(1, 0)
club_count = count.get(3, 0)
total_count = sum(count.values()) # Assuming there may be other userTypeIds
...
However, you're still doing 1+n queries, where n is number of universities you have in the database. This is fine if the number is low, but if it's high you need further aggregation, joining Universities and Users. A first draft I came with is something like this:
# Assuming University.name is unique, otherwise you'll need to use IDs
# to distinguish between different projects, instead of names.
qs = Users.objects.using('cms').values('userTypeId', 'university__name')\
.annotate(cnt=Count('pk').order_by('university__name')
for name, group in itertools.groupby(qs, lambda x: x["university__name"]):
print("University: %s" % name)
cnts = {g["userTypeId"]: g["cnt"] for g in group}
faculty, student, club = cnts.get(1, 0), cnts.get(2, 0), cnts.get(3, 0)
# NOTE: I'm assuming there are only few (if any) userTypeId values
# other than {1,2,3}.
total = sum(cnts.values())
print(" Student: %d, faculty: %d, club: %d, total: %d" % (
student, faculty, club, total))
I might've made a typo there, but hope it's correct. In terms of SQL, it should emit a query like
SELECT uni.name, usr.userTypeId, COUNT(usr.id)
FROM some_app_universities AS uni
LEFT JOUN some_app_users AS usr ON us.universityId = uni.id
GROUP BY uni.name, usr.userTypeId
ORDER BY uni.name
Consider reading documentation on aggregations and annotations. And be sure to check out raw SQL that Django ORM emits (e.g. use Django Debug Toolbar) and analyze how well it works on your database. For example, use EXPLAIN SELECT if you're using PostgreSQL. Depending on your dataset, you may benefit from some indexes there (e.g. on userTypeId column).
Oh, and on a side note... it's off-topic, but in Python it's a custom to have variables and attributes use lowercase_with_underscores. In Django, model class names are usually singular, e.g. User and University.

How to show the last message of each user to user conversations to keep a chat history?

I'm creating a private user to user chat, in order to chat with someone the connected user has to type the username of the user with whom he wants to talk to on his own url.
Now that this system is already built, I want to keep a chat history so that later on I can send notification of chat. To do that I need to get the last message of each conversations and I want to show it on the connected user's own chat profile.
Just as the image below :
Model userComment fields are : recipient, sender, comment, sent_at
views.py :
def inbox(request, username):
username = User.objects.get(username=username)
connected_user = request.user
if username == connected_user:
#I'm having the issue on this line
users = userComment.objects.filter(Q(client=request.user) | Q(worker=request.user)).order_by(?)
else:
users = userComment.objects.filter(Q(Q(client=request.user) & Q(worker=username)) | Q(Q(client=username) & Q(worker=request.user))).order_by('sent_at')
models.py
class userComment(models.Model):
client = models.ForeignKey(User, related_name="client")
worker = models.ForeignKey(User, blank=True, null=True, related_name="worker")
sent_at = models.DateTimeField(auto_now_add=True)
comment = models.TextField(max_length=255, null=True)
def __str__(self):
return str(self.client)
Question : How can I filter and order my view to do so ?
Firstly in your userComment model add a related query name for reverse relation
class UserComment(models.Model):
sender = models.ForeignKey(User, related_name='sender', related_query_name='s')
recipient = models.ForeignKey(User, related_name='recipient', related_query_name='r')
sent_at = models.DateTimeField(auto_now_add=True)
comment = models.TextField()
Now in your views.py use this query:
user = request.user
users = User.objects.filter(Q(r__sender=user) | Q(s__recipient=user)).distinct().extra(select={'last_message_time': 'select MAX(sent_at) from appname_usercomment where (recipient_id=auth_user.id and sender_id=%s) or (recipient_id=%s and sender_id=auth_user.id)'}, select_params=(user.id, user.id,)).extra(order_by=['-last_message_time']).extra(select={'message': 'select comment from appname_usercomment where (sent_at=(select MAX(sent_at) from appname_usercomment where (recipient_id=auth_user.id and sender_id=%s) or (recipient_id=%s and sender_id=auth_user.id)) and ((recipient_id=auth_user.id and sender_id=%s) or (recipient_id=%s and sender_id=auth_user.id)))',}, select_params=(user.id, user.id,user.id, user.id,))
Set the appname in extra according to name of the app in which the model is.
Now you can access it as follows:
for user in users:
print user.username
print user.last_message_time
print user.message
def inbox(request, username)
# first select all the comments related to user
user = User.objects.get(username=username)
related = userComment.objects.filter(q(client=user) | q(worker=user)).order_by('-sent_at')
# This selects the latest comments.
# Now loop over the related comments and group them.
chats = {}
for comment in related:
if comment.client == user:
previous_chat_history = chats.setdefault(comment.worker.username, [])
if not len(previous_chat_history) >= 3:
previous_chat_history.append(comment)
if comment.worker== user:
previous_chat_history = chats.setdefault(comment.client.username, [])
if not len(previous_chat_history) >= 3:
previous_chat_history.append(comment)
# Reverse each list to keep the latest message last
for k, v in chats.items():
chats[k] = v.reverse()
return render(request, 'template.html', context={chats: chats})
[Update]: I just realized that this solution will only work with Postgresql because it is using field names in distinct.
You can mix order_by and distinct to achieve the desired results:
filter the comments where the user is either client or a worker:
comments = userComment.objects.filter(Q(client=request.user) | Q(worker=request.user))
order the user comments with client, worker and sent_at fields. Make sure to have a descending order for sent_at field so the latest comments for each client-worker pair are at top:
comments = comments.order_by('client', 'worker', '-sent_at')
Now, get the distinct rows:
comments = comments.distinct('client', 'worker')
This will keep only the first row which is the latest comment for each for each client-worker pair and delete the rest of the rows from the queryset.
In one statement:
comments = userComment.objects \
.filter(Q(client=request.user) | Q(worker=request.user)) \
.order_by('client', 'worker', '-sent_at') \
.distinct('client', 'worker')
This will give you the latest comment for each conversation where the user is either a client or a worker.

Categories

Resources