I want to change this code to a faster query
response = []
for player in Player.objects.all():
total_earn = Credit.objects.filter(player=player).aggregate(Sum('amount')).get('amount__sum', 0)
total_earn += Purchase.objects.filter(player=player).aggregate(Sum('amount')).get('amount__sum', 0)
reponse.append([player.id, player.email, player.phone, total_earn])
I try this for a moment, but now it take a lot of time to execute and it causes timeout on the server.
I want something very fast, like that:
response = Player.objects.annotate(
id='id',
email='email',
phone='phone',
total_earn=(Credit.... + Purchase....)
)
My models:
class Player(AbstractUser):
email = models.EmailField(..)
phone = models.CharField(..)
class Credit(models.Model):
player = models.ForeignKey(Player, ..., CASCADE)
amount = modesl.DecimalField(decimal_places=2, ...)
class Purchase(models.Model):
player = models.ForeignKey(Player, ...)
amount = models.DecimalField(decimal_places=2, ...)
You can make use of subqueries:
from django.db.models import OuterRef, Subquery
credits = Credit.objects.filter(
player=OuterRef('pk')
).values('player').annotate(
total=Sum('amount')
).order_by('player').values('total')
purchases = Purchases.objects.filter(
player=OuterRef('pk')
).values('player').annotate(
total=Sum('amount')
).order_by('player').values('total')
Player.objects.annotate(
total_earn=Subquery(credits)[:1] - Subquery(purchases)[:1]
)
However it looks like there is some bad modeling. It might be better to make a single model for Credits and Purchases and thus use a negative amount for Purchases. If such model is named Earning for example, then one can do that with a simple Player.objects.annotate(total_earn=Sum('earning__amount')).
Related
I'm new in Django. So, I want to join two models which are company and client and count the number of clients for each of the company. Here the SQL
SELECT Company_company.name, count(Client_client.cid)
FROM Company_company
LEFT JOIN Client_client
ON Company_company.comid = Client_client.comid_id
GROUP BY Company_company.name;
But since in Django, we use ORM. So I'm a little bit confusing since I'm a beginner. I already refer few SQL to ORM converter website such as Django ORM and do some try and error. But, I didn't know where the problem since I want the output from the ORM to be classified into a different array. Here is my code:
labels = []
data = []
queryClientCompany = client.objects.values('comid').annotate(c=Count('cid')).values('comid__name','c')
for comp in queryClientCompany:
labels.append(comp.comid__name)
data.append(comp.c)
Here some of the relevant things in the client and company models:
class client (models.Model):
#client info
cid = models.AutoField(primary_key = True)
comid = models.ForeignKey(company,related_name='companys',
on_delete = models.DO_NOTHING,verbose_name="Company",null = True, blank = True)
class company(models.Model):
comid = models.AutoField(_('Company'),primary_key = True)
#company info
name = models.CharField(_('Company Name'),max_length = 50)
The error stated that the comid__name is not defined. So actually how to append the result? I hope someone can help me. Thank you for helping in advanced.
You should query from the opposite side to perform the LEFT OUTER JOIN between company and client (and not client and company):
from django.db.models import Count
labels = []
data = []
queryClientCompany = company.objects.annotate(
c=Count('companys__cid')
)
for comp in queryClientCompany:
labels.append(comp.name)
data.append(comp.c)
The companys part is due to the related_name='copanys', but it does not make much sense to name this relation that way. The related_name=… parameter [Django-doc] specifies how to access the Clients for a given Company, so clients is a more appropriate value for the related_name:
class client (models.Model):
cid = models.AutoField(primary_key=True)
comid = models.ForeignKey(
company,
related_name='clients',
on_delete = models.DO_NOTHING,
verbose_name="Company",
null = True,
blank = True
)
then the query is:
from django.db.models import Count
labels = []
data = []
queryClientCompany = company.objects.annotate(
c=Count('clients__cid')
)
for comp in queryClientCompany:
labels.append(comp.name)
data.append(comp.c)
I have 4 models:
class Run(models.Model):
start_time = models.DateTimeField(db_index=True)
end_time = models.DateTimeField()
chamber = models.ForeignKey(Chamber, on_delete=models.CASCADE)
recipe = models.ForeignKey(Recipe, default=None, blank=True, null=True, on_delete=models.CASCADE)
class RunProperty(models.Model):
run = models.ForeignKey(Run, on_delete=models.CASCADE)
property_name = models.CharField(max_length=50)
property_value = models.CharField(max_length=500)
class RunValue(models.Model):
run = models.ForeignKey(Run, on_delete=models.CASCADE)
run_parameter = models.ForeignKey(RunParameter, on_delete=models.CASCADE)
value = models.FloatField(default=0)
class RunParameter(models.Model):
parameter = models.ForeignKey(Parameter, on_delete=models.CASCADE)
chamber = models.ForeignKey(Chamber, on_delete=models.CASCADE)
param_name_user_defined = models.BooleanField(default=True)
A Run can have any number of RunProperty (usually user defined properties, can be custom), and a few predefined RunValue (such as Average Voltage, Minimum Voltage, Maximum Voltage) that are numeric values.
The RunParameter is basically just a container of parameter names (Voltage, Current, Frequency, Temperature, Impedance, Oscillation, Variability, etc, there's a ton of them.
When I build a front end table to show each Run along with all of its "File" RunProperty (where the Run came from) and all of its "Voltage" RunValue, I first query the DB for all Run objects, then do an additional 3 queries for the Min/Max/Avg, and then another query for the File, then I build a dict on the backend to pass to the front to build the table rows:
runs = Run.objects.filter(chamber__in=chambers)
min_v_run_values = RunValue.objects.filter(run__in=runs, run_parameter__parameter__parameter_name__icontains="Minimum Voltage")
max_v_run_values = RunValue.objects.filter(run__in=runs, run_parameter__parameter__parameter_name__icontains="Maximum Voltage")
avg_v_run_values = RunValue.objects.filter(run__in=runs, run_parameter__parameter__parameter_name__icontains="Average Voltage")
run_files = RunProperty.objects.filter(run__in=runs, property_name="File")
This is not such a big problem for customer with ~10 to 30 Run objects in their database, but we have one heavy usage customer who has 3500 Run instances. Needless to say, it's far, far too slow. I'm doing 5 queries to get all the needed instances, and then I have to loop and put them together into one dict. It takes upwards of 45 seconds to do this for that one customer (and about 8 or 10 for most other customers).
Is there a way that I can query my DB for all Run objects along with all of the Min/Max/Avg Voltage RunValue and the File RunProperty and return, say, a list of dicts, one for each Run along with the other objects?
I think Q queries can be used here, but I'm not quite sure HOW to use them, or if they are applicable for this scenario?
I tried this (but didn't get far):
runs = Run.objects.filter(chamber__in=chambers)
v_query = Q(run_parameter__parameter__parameter_name__icontains="Voltage")
run_values = RunValue.objects.filter(run__in=runs).filter(v_query)
run_files = RunProperty.objects.filter(run__in=runs, property_name="File")
That gets me all the RunValue related objects in 1 query, but it's still 3 queries per. I need to optimize this much more, if possible.
I am looking for something along the lines of:
runs = Run.objects.filter(chamber__in=chambers)
.annotate(Q(run__runvalue__run_parameter__parameter__parameter_name__icontains="Voltage")
& Q(run__runproperty__property_name__icontains="File"))
I think in very broad terms (not even pseudocode) I would need a query like:
"Get all Runs, and for each Run, get all the RunValue objects related to that Run that contain ["Average", "Maximum", "Minimum"] and also all the RunProperty objects for that Run that contain "File".
I don't know if it's possible (sounds like it should be), and I'm not sure whether I should use Q filtering, aggregates or annotation. In broad terms, I need to get all instances of one model, along with all foreign keys for each instance, in one query, if possible
Example:
I have table Run with 2 instances:
R1
R2
Each Run instance has an associated RunProperty instance "File" (just a string) for each:
R1_run.dat
R2_run.dat
EachRun instance has many RunValue instances (I am using Voltage as an example, but there's 26 of them):
R1_max_v
R1_min_v
R1_avg_v
R2_max_v
R2_min_v
R2_avg_v
I would need to query the DB such that it returns (list or dict, I can work around either):
[{R1, R1_run.dat, R1_max_v, R1_min_v, R1_avg_v},
{R2, R2_run.dat, R2_max_v, R2_min_v, R2_avg_v}]
Or a 2D array even:
[[R1, R1_run.dat, R1_max_v, R1_min_v, R1_avg_v],
[R2, R2_run.dat, R2_max_v, R2_min_v, R2_avg_v]]
Is this even possible?
From database perspective, you can get all the data you need using just a single query with a few joins:
-- This assumes that there is a primary key Run.id and
-- foreign keys RunValue.run_id and RunProperty.run_id.
-- IDs or names of min/max/avg run parameters, as well as
-- chamber ids are replaced with *_PARAMETER and CHAMBER_IDS
-- for brevity.
SELECT Run.*,
RVmin.value AS min_value,
RVmax.value AS max_value,
RVavg.value AS avg_value,
RP.value AS file_value
FROM Run
JOIN RunValue RVmin ON Run.id = RVmin.run_id
JOIN RunValue RVmax ON Run.id = RVmax.run_id
JOIN RunValue RVavg ON Run.id = RVavg.run_id
JOIN RunProperty RP ON Run.id = RP.run_id
WHERE
RVmin.run_parameter = MIN_PARAMETER AND
RVmax.run_parameter = MAX_PARAMETER AND
RVavg.run_parameter = AVG_PARAMETER AND
RP.property_name = 'File' AND
Run.chamber IN (CHAMBER_IDS);
Django way of building such joins must be something like Run.runvalue_set.filter(run_parameter__contains 'Maximum Voltage')
See "following relationships backward": https://docs.djangoproject.com/en/2.2/topics/db/queries/#following-relationships-backward
You can get this in query by using annotate, Min, Max, Avg.
For your problem. You can do this.
Add related name in ForeignKey fields.
class RunProperty(models.Model):
run = models.ForeignKey(Run, on_delete=models.CASCADE, related_name="run_prop_name")
class RunValue(models.Model):
run = models.ForeignKey(Run, on_delete=models.CASCADE, related_name="run_value_name")
run_parameter = models.ForeignKey(RunParameter, on_delete=models.CASCADE)
value = models.FloatField(default=0)
views.py
from django.db.models import Avg, Max, Min
filt = 'run_value_name__value'
query = Run.objects.annotate(run_avg = Avg(filt), run_max = Max(filt))
You can get all values:
for i in query:
print(i.run_avg, i.run_max, i.run_min )
-----------Edit------------
Please check I have added "related_name" in RunValue model.
let's assume you two values in Run model.
1) run_1
2) run_2
in model RunValue, 6 entries.
run = 1, run_parameter = "Avg_value", value = 50
run = 1, run_parameter = "Min_value", value = 25
run = 1, run_parameter = "Max_value", value = 75
run = 2, run_parameter = "Avg_value", value = 28
run = 2, run_parameter = "Max_value", value = 40
run = 2, run_parameter = "Min_value", value = 16
you want dictionary something like this:
{'run_1': {'Avg_value': 50, 'Min_value': 25, 'Max_value': 75}, 'run_2': {...}}
Do this remember to read select_related and prefetch_related for documentation.
rt = Rub.objects.all().prefetch_related('run_value_name')
s = {} # output dictionary
for i in rt:
s[i.run] = {} # run dictionary
for j in i.run_value_name.all():
s[i.run].update({j.run_parameter: j.value}) # update run dictionary
print(s)
----------Addition-----------
Check number of database hit by this code.
from django.db import connection, reset_queries
print(len(connection.queries))
reset_queries()
I have a Django model called Attendance that has the clock in and clock in times of an employee along with the status of that entry, to see whether it's authorized or not. I then, am making another model called Payroll. I want this to check inside the Attendance entries to see all the Authorized entries and then do some action on them. How do I check all the status fields for all the entries in Attendance?
EDIT: Updated to better elaborate my question.
To better elaborate my question, this is how I've setup my Attendance model:
class CWorkAttendance(models.Model):
AUTO_ATT = "AU"
MANUAL_ATT = "MA"
WORK_ENTRY_TYPES = (
(AUTO_ATT, "Auto-Attendance"),
(MANUAL_ATT, "Manual-Attendance"),
)
AUTHORIZED = "AU"
UNAUTHORIZED = "UA"
WORK_ENTRY_STATUSES = (
(AUTHORIZED, "Athorized"),
(UNAUTHORIZED, "Un-Authorized"),
)
#Thank you motatoes
def face_locations_in(self, instance):
now = datetime.datetime.now()
return "attendance/{}/{}/in".format(instance.work_employee, now.strftime("%Y/%m/%d"))
def face_locations_out(self, instance):
now = datetime.datetime.now()
return "attendance/{}/{}/out".format(instance.work_employee, now.strftime("%Y/%m/%d"))
work_employee = models.ForeignKey('CEmployees', on_delete=models.CASCADE,)
work_start_time = models.DateTimeField()
work_end_time = models.DateTimeField(null=True)
work_duration = models.IntegerField(null=True)
work_entry_type = models.CharField(max_length=2,choices=WORK_ENTRY_TYPES)
work_entry_status = models.CharField(max_length=2, choices=WORK_ENTRY_STATUSES, default=WORK_ENTRY_STATUSES[1][0])
employee_face_captured_in = models.ImageField(upload_to=face_locations_in,)#////////
employee_face_captured_out = models.ImageField(upload_to=face_locations_out,)
If you look closely at the work_entry_status, it's a choice CharField that will contain the status of the entry (UNAUTHORIZED by default).
I want to create a Payroll model that will check for all the rows in the CWorkAttendance model and check their work_entry_status fields to see if they are Authorized, which is what I want to learn how to do.
If those fields are authorized, I want the grab the row's work_employee, work_duration and also some details from the original CEmployees row for the employee.
This is what I want my Payslip/Payroll model to look like:
class Payslip(models.Model):
GENERATED = "GEN"
CONFIRMED = "CON"
PAYSLIP_STATUS = (
(GENERATED, "Generated-UNSAVED"),
(CONFIRMED, "Confirmed-SAVED"),
)
payslip_number = models.IntegerField()#MM/YY/AUTO_GENERATED_NUMBER(AUTO_INCREMENT)
payslip_employee = models.ForeignKey('CEmployees', on_delete=models.CASCADE,)#Choose the employee from the master table CEmployees
payslip_generation_date = models.DateTimeField(default=datetime.datetime.now())#Date of the payroll generation
payslip_total_hours = models.IntegerField()#Total hours that the employee worked
payslip_from_date = models.DateField()"""The date from when the payslip will be made. The payslip will be manual for now, so generate it after choosing a a date to generate from."""
payslip_total_basic_seconds = models.IntegerField()#Total seconds the employee worked
payslip_total_ot_seconds = models.IntegerField()#Total overtime seconds the employee worked
payslip_basic_hourly_rate = models.IntegerField()#The basic hourly rate of the employee mentioned here. Take from the master employees table.
payslip_basic_ot_rate = models.IntegerField()#Taking the basic overtime rate from the master table
payslip_total_amount = models.FloatField()#The total amount of the payslip
payslip_entry_status = models.CharField(max_length=3, default=GENERATED)#The status of the pay slip.
Thanks,
Not sure if I understand your requirements well, so let me know if I misunderstood.
# `employee` is the work_employee in question
# if you don't want to filter by employee, remove `work_employee=employee`
attendances = CWorkAttendance.objects.filter(work_entry_status=CWorkAttendance.AUTHORIZED, work_employee=employee)
for attendances in attendances:
# do things with this attendance record
attendance.work_duration
attendance.employee
# ....
Update
Since you would like to do it manually, I would suggest having a separate view to generate the Payslip. The important thing is to know the date_from and the date_to for this payslip. I imagine that it is the managers who would have access to this view, so you would need the proper access controls set for it. I also think you need to have a payslip_to_date even if you are going to generate it until the current date, which will be useful for record keeping. I assume you have that column in the code below.
views.py:
from django.views import View
class GeneratePayslip(View):
"""
make sure you have the right access controls set to this view
"""
def post(self, request, **kwargs):
employee_id = kwags.POST.get("employee_id")
date_from = kwargs.POST.get("from_date")
date_to = kwargs.POST.get("to_date")
# we fetch all the objects within range
attendances = CWorkAttendance.objects.filter( \
work_entry_status=CWorkAttendance.AUTHORIZED, \
work_employee_id=employee_id, \
work_start_time__gte=date_from, \
work_end_time__lte=date_to \
)
hours = 0
for attendance in attendances:
# perform calculations to compute total sum and rate
pass
# create the payslip object here ..
# redirect to a success page and return
If you wanted to do it automatically later on, you may want to generate payslips automatically, once a month. For that you could use something like Celery to have periodic tasks that run in the background, for each employee. If this is the case you could move the above code to a file such as utils.py. you can create a method which takes employee_id, from_date, to_date, and then generate the payslip object, returning the payslip_id to the calling method
I am currently struggling with a topic connected to transactions. I implemented a discount functionality. Whenever a sale is made with a discount code, the counter redeemed_quantity is increased by + 1.
Now I thought about the case. What if one or more users redeem a discount at the same time? Assuming redeemed_quantity is 10. User 1 buys the product and redeemed_quantity increases by +1 = 11. Now User 2 clicked on 'Pay' at the same time and again redeemed_quantity increases by +1 = 11. Even so, it should be 12. I learned about #transaction.atomic but I think the way I implemented them here will not help me with what I am actually trying to prevent. Can anyone help me with that?
view.py
class IndexView(TemplateView):
template_name = 'website/index.html'
initial_price_of_course = 100000 # TODO: Move to settings
def check_discount_and_get_price(self):
discount_code_get = self.request.GET.get('discount')
discount_code = Discount.objects.filter(code=discount_code_get).first()
if discount_code:
discount_available = discount_code.available()
if not discount_available:
messages.add_message(
self.request,
messages.WARNING,
'Discount not available anymore.'
)
if discount_code and discount_available:
return discount_code, self.initial_price_of_course - discount_code.value
else:
return discount_code, self.initial_price_of_course
def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
context['stripe_pub_key'] = settings.STRIPE_PUB_KEY
discount_object, course_price = self.check_discount_and_get_price()
context['course_price'] = course_price
return context
#transaction.atomic
def post(self, request, *args, **kwargs):
stripe.api_key = settings.STRIPE_SECRET_KEY
token = request.POST.get('stripeToken')
email = request.POST.get('stripeEmail')
discount_object, course_price = self.check_discount_and_get_price()
charge = stripe.Charge.create(
amount=course_price,
currency='EUR',
description='My Description',
source=token,
receipt_email=email,
)
if charge.paid:
if discount_object:
discount_object.redeemed_quantity += 1
discount_object.save()
order = Order(
total_gross=course_price,
discount=discount_object
)
order.save()
return redirect('website:index')
models.py
class Discount(TimeStampedModel):
code = models.CharField(max_length=20)
value = models.IntegerField() # Smallest currency unit, as amount charged
max_quantity = models.IntegerField()
redeemed_quantity = models.IntegerField(default=0)
def available(self):
available_quantity = self.max_quantity - self.redeemed_quantity
if available_quantity > 0:
return True
class Order(TimeStampedModel):
total_gross = models.IntegerField()
discount = models.ForeignKey(
Discount,
on_delete=models.PROTECT, # Can't delete discount if used.
related_name='orders',
null=True,
You can pass the handling of the incrementation to the database in order to avoid the race condition in your code by using django's F expression:
from django.db.models import F
# ...
discount_object.redeemed_quantity = F('redeemed_quantity') + 1
discount_object.save()
From the docs with a completely analogous example:
Although reporter.stories_filed = F('stories_filed') + 1 looks like a normal Python assignment of value to an instance attribute, in fact it’s an SQL construct describing an operation on the database.
When Django encounters an instance of F(), it overrides the standard Python operators to create an encapsulated SQL expression; in this case, one which instructs the database to increment the database field represented by reporter.stories_filed.
Django is a piece of a synchronous code. It means that every request you make to the server is processed individually. This problem could arise, when there are multiple server-workers (for example uwsgi workers), but again - it's practically impossible to do this. We run a webshop application with multiple workers and something like this never happend.
But back to the question - if you want to query the database to increase a value by one, see schwobaseggl's answer.
The last thing is that I think you misunderstand what transaction.atomic() does. Simply put it rolls back any queries made to the database in a function if function exits with an error to the state when function was called. See this answer and this piece of documentation. Maybe it will clear some things up.
I've looked at doing a query using an extra and/or annotate but have not been able to get the result I want.
I want to get a list of Products, which has active licenses and also the total number of available licenses. An active license is defined as being not obsolete, in date, and the number of licenses less the number of assigned licenses (as defined by a count on the manytomany field).
The models I have defined are:
class Vendor(models.Model):
name = models.CharField(max_length=200)
url = models.URLField(blank=True)
class Product(models.Model):
name = models.CharField(max_length=200)
vendor = models.ForeignKey(Vendor)
product_url = models.URLField(blank=True)
is_obsolete = models.BooleanField(default=False, help_text="Is this product obsolete?")
class License(models.Model):
product = models.ForeignKey(Product)
num_licenses = models.IntegerField(default=1, help_text="The number of assignable licenses.")
licensee_name = models.CharField(max_length=200, blank=True)
license_key = models.TextField(blank=True)
license_startdate = models.DateField(default=date.today())
license_enddate = models.DateField(null=True, blank=True)
is_obsolete = models.BooleanField(default=False, help_text="Is this licenses obsolete?")
licensees = models.ManyToManyField(User, blank=True)
I have tried filtering by the License model. Which works, but I don't know how to then collate / GROUP BY / aggregate the returned data into a single queryset that is returned.
When trying to filter by procuct, I can quite figure out the query I need to do. I can get bits and pieces, and have tried using a .extra() select= query to return the number of available licenses (which is all I really need at this point) of which there will be multiple licenses associated with a product.
So, the ultimate answer I am after is, how can I retrieve a list of available products with the number of available licenses in Django. I'd rather not resort to using raw as much as possible.
An example queryset that gets all the License details I want, I just can't get the product:
License.objects.annotate(
used_licenses=Count('licensees')
).extra(
select={
'avail_licenses': 'licenses_license.num_licenses - (SELECT count(*) FROM licenses_license_licensees WHERE licenses_license_licensees.license_id = licenses_license.id)'
}
).filter(
is_obsolete=False,
num_licenses__gt=F('used_licenses')
).exclude(
license_enddate__lte=date.today()
)
Thank you in advance.
EDIT (2014-02-11):
I think I've solved it in possibly an ugly way. I didn't want to make too many DB calls if I can, so I get all the information using a License query, then filter it in Python and return it all from inside a manager class. Maybe an overuse of Dict and list. Anyway, it works, and I can expand it with additional info later on without a huge amount of risk or custom SQL. And it also uses some of the models parameters that I have defined in the model class.
class LicenseManager(models.Manager):
def get_available_products(self):
licenses = self.get_queryset().annotate(
used_licenses=Count('licensees')
).extra(
select={
'avail_licenses': 'licenses_license.num_licenses - (SELECT count(*) FROM licenses_license_licensees WHERE licenses_license_licensees.license_id = licenses_license.id)'
}
).filter(
is_obsolete=False,
num_licenses__gt=F('used_licenses')
).exclude(
license_enddate__lte=date.today()
).prefetch_related('product')
products = {}
for lic in licenses:
if lic.product not in products:
products[lic.product] = lic.product
products[lic.product].avail_licenses = lic.avail_licenses
else:
products[lic.product].avail_licenses += lic.avail_licenses
avail_products = []
for prod in products.values():
if prod.avail_licenses > 0:
avail_products.append(prod)
return avail_products
EDIT (2014-02-12):
Okay, this is the final solution I have decided to go with. Uses Python to filter the results. Reduces cache calls, and has a constant number of SQL queries.
The lesson here is that for something with many levels of filtering, it's best to get as much as needed, and filter in Python when returned.
class ProductManager(models.Manager):
def get_all_available(self, curruser):
"""
Gets all available Products that are available to the current user
"""
q = self.get_queryset().select_related().prefetch_related('license', 'license__licensees').filter(
is_obsolete=False,
license__is_obsolete=False
).exclude(
license__enddate__lte=date.today()
).distinct()
# return a curated list. Need further information first
products = []
for x in q:
x.avail_licenses = 0
x.user_assigned = False
# checks licenses. Does this on the model level as it's cached so as to save SQL queries
for y in x.license.all():
if not y.is_active:
break
x.avail_licenses += y.available_licenses
if curruser in y.licensees.all():
x.user_assigned = True
products.append(x)
return q
One strategy would be to get all the product ids from your License queryset:
productIDList = list(License.objects.filter(...).values_list(
'product_id', flat=True))
and then query the products using that list of ids:
Product.objects.filter(id__in=productIDList)