Django filter query using Q - python

Can anyone help me. Im trying to use a django filter with Q.
This is my function
def get_first_time_customer_ids(year: int, month: int) -> QuerySet:
return Customer.objects.filter(
Q(bookings__status=Booking.STATUS.completed, bookings__pickup_time__year=year, bookings__pickup_time__month=month) &
~Q(bookings__status=Booking.STATUS.completed, bookings__pickup_time__lt=date(year, month, 1))
).distinct().values_list('id', flat=True)
What im trying to achieve is to yield all the customer id that have the first time booking for any given year and month.
But its failing on my test case.
My test case :
def test_get_first_time_customer_ids(self) -> None:
customer_1 = Customer.objects.create(name="Customer 1")
customer_2 = Customer.objects.create(name="Customer 2")
Booking.objects.bulk_create([
Booking(number="A", customer=customer_1, price=100_000, status=Booking.STATUS.completed,
pickup_time=dt(2023, 2, 4, 12), route_id=1, vehicle_category_id=1),
Booking(number="B", customer=customer_1, price=100_000, status=Booking.STATUS.completed,
pickup_time=dt(2023, 1, 5, 12), route_id=1, vehicle_category_id=1),
Booking(number="E", customer=customer_2, price=100_000, status=Booking.STATUS.completed,
pickup_time=dt(2023, 2, 10, 12), route_id=1, vehicle_category_id=1)
])
ids = get_first_time_customer_ids(2023, 2)
self.assertTrue(customer_2.id in ids)
self.assertFalse(customer_1.id in ids)
Its failing in the last line. The customer id for customer_1 included in query, it shouldnt have. Any help is appreciated

def get_first_time_customer_ids(year: int, month: int) -> QuerySet:
qs1 = Customer.objects.filter(
bookings_status=Booking.STATUS.completed,
bookings__pickup_time__year=year,
bookings__pickup_time__month=month,
).distinct().values("id")
qs2 = Customer.objects.filter(
bookings_status=Booking.STATUS.completed,
bookings__pickup_time__lt=date(year, month, 1),
).distinct().values("id")
return qs1.exclude(id__in=qs2).values_list('id', flat=True)
Try this code.
I add distinct operations for both of querysets (qs1, qs2)
But in original code, distinct operation is only at the end.

Related

Cannot filter my Django model depence on created_date

I am trying to do a chart. My database has created_date. I am getting product data every day about 150 times and I want to see a daily increase and decrease of my data. I have no problem with my front end and Django-template (I try manual data and it works well) I just want to see the last 7 days chart.
When I use Products.objects.filter(created_dates=days) filter method I am getting empty Queryset.
I already try created_dates__gte=startdate,created_dates__lte=enddate it return empty Queryset to.
I also try created_dates__range to there is no answer too.
I just get data from created_dates__gte=days but I don't want these data.
view.py
from datetime import date,timedelta
import datetime
def data_chart(request):
data = []
week_days = [datetime.datetime.now().date()-timedelta(days=i) for i in range (1,7)]
for days in week_days:
product_num = Products.objects.filter(created_dates=days)
date =days.strftime("%d.%m")
item = {"day": date,"value":len(product_num)}
data.append(item)
return render(request, 'chartpage.html', {'data': data})
In my database, I have thousands of data and my daily data about 150. My created_dates column format like this.
created_dates col:
2020-10-19 09:39:19.894184
So what is wrong with my code?. Could you please help?
You are trying to compare DateTimeField type (created_dates) with Date type (week_days is list of days) so maybe You should try __date lookup.
product_num = Products.objects.filter(created_dates__date=days)
https://docs.djangoproject.com/en/3.0/ref/models/querysets/#date
Furthermore maybe You should consider start using Count() database function with group by instead of iterating over days.
Here is great explanation:
https://stackoverflow.com/a/19102493/5160341
You should be able to do this with a single aggregation query:
import datetime
from django.db.models import Count
def data_chart(request):
cutoff = datetime.date.today() - datetime.timedelta(days=7)
raw_data = (
Products.objects.filter(created_dates__gte=cutoff)
.values_list("created_dates__date")
.annotate(count=Count("id"))
.values_list("created_dates__date", "count")
)
data = [{"day": str(date), "value": value} for (date, value) in raw_data]
return render(request, "chartpage.html", {"data": data})

For filtering data in Django, build dynamic query for multiple columns

I have to filter data from model based on the run time values. I am getting 5 values via query string. My querystring is like below:
http://127.0.0.1:8000/personal/search/?month=&year=&account=&deliveryManagedFrom=&marketmName=
So, I want to include all or none of the values in the filter so that it displays the desired result. Below is the filter query which I am writing:
sum_tt_count = NetworkRelatedInformation.objects.filter(month=month, year=year, account_id=account, account__deliveryManagedFrom=deliveryManagedFrom, account__marketName=market).aggregate(Sum('tt_count'))
totalttcount = sum_tt_count['tt_count__sum']
It is working well in case, all the values have been provided.
In case, if any value is blank, it should not consider that value and display output as per other filter criteria.
Pls suggest how to implement an OR filter with 5 data inputs. It is not necessary that all 5 data inputs have values . So the value can be None or the value in the querystring
Filter the request for non-empty values and then use dictionary expansion to do the query.
q = {k:v for k, v in request.GET.items() if v}
sum_tt_count = NetworkRelatedInformation.objects.filter(**q).aggregate(Sum('tt_count'))
You can do it using Q object
from django.db.models import Q
NetworkRelatedInformation.objects.filter(Q(month__isnull=True) | Q(month=month), Q(year__isnull=True) | Q(year=year)).aggregate(Sum('tt_count'))
For handling the None values i have to explicitly write the below code.
account = request.GET.get('account')
if account is '':
account = None
month = request.GET.get('month')
if month is '':
month = None
year = request.GET.get('year')
if year is '':
year = None
sum_alarm_count = NetworkRelatedInformation.objects.filter(Q(month=month) | Q(year=year) | Q(account_id=account)) \
.aggregate(Sum('alarm_count'))
totalalarmcount = sum_alarm_count['alarm_count__sum']

Django - Iterate over queryset and add static values

In the following queryset I am filtering planned hours per week (displayval is my week in this queryset) by employee. I would like to add an item for planned hours = 0 when the employee has no hours planned for a week I'm filtering by.
What's the easiest way to achieve this?
def DesignHubR(request):
emp3_list = Projectsummaryplannedhours.objects.values_list('displayval', 'employeename')
.filter(businessunit='a')
.filter(billinggroup__startswith='PLS - Project')
.filter(Q(displayval=sunday2)|Q(displayval=sunday))
.annotate(plannedhours__sum=Sum('plannedhours'))
emp3 = map(lambda x: {'date': x[0], 'employee_name': x[1], 'planned_hours': x[2]}, emp3_list)
context = {'sunday': sunday, 'sunday2': sunday2, 'emp3': emp3}
return render(request,'department_hub_ple.html', context)
I think that you can use the Coalesce(*expressions, **extra) function to solve your problem.
Accepts a list of at least two field names or expressions and returns the first non-null value (note that an empty string is not considered a null value).
So your query will be looking like:
from django.db.models import Sum, Value
from django.db.models.functions import Coalesce
emp3_list = \
Projectsummaryplannedhours.objects.\
filter(
Q(businessunit='a') &
Q(billinggroup__startswith='PLS - Project') &
(Q(displayval=sunday2) | Q(displayval=sunday))
).\
annotate(plannedhours__sum=Coalesce(
Sum('plannedhours'), Value(0)
)
).\
values_list('displayval', 'employeename')
See https://docs.djangoproject.com/en/1.9/ref/models/database-functions/#coalesce for more information.
This will help you to get plannedhours__sum = 0 if no entries to sum exists. If you also want to add additional parameter to each entry where plannedhours__sum = 0 you can use Django conditional expression.Read about Case expression for more information (https://docs.djangoproject.com/en/1.11/ref/models/conditional-expressions/#case).
Case() accepts any number of When() objects as individual arguments. Other options are provided using keyword arguments. If none of the conditions evaluate to TRUE, then the expression given with the default keyword argument is returned. If a default argument isn’t provided, None is used.
from django.db.models import Sum, Value, IntegerField
from django.db.models.functions import Coalesce
emp3_list = \
Projectsummaryplannedhours.objects.\
filter(
Q(businessunit='a') &
Q(billinggroup__startswith='PLS - Project') &
(Q(displayval=sunday2) | Q(displayval=sunday))
).\
annotate(plannedhours__sum=Coalesce(
Sum('plannedhours'), Value(0)
),
x=Case(When(plannedhours__sum=0, then=Value(0)),
output_field=IntegerField())
).\
values_list('displayval', 'employeename')
This will give you additional parameter x equals 0 if planned hours = 0 and None elsewhere. You can also filter emp3_list by annotated values.
As a result you can pass your queryset to a template context = {'sunday': sunday, 'sunday2': sunday2, 'emp3': emp3_list}, iterate over it there and get the attributes you need:
for q in emp3_list:
print(q[0], q[1], q[2])
Hope it will help you.

combine 2 tables and aggregate data

users
Could you please help with following?
I need to extract data from a mysql database and aggregate them.
There are two tables in the database, both of them have data in a dfferent timestep.
I need now to make one new table (txt), where all data of table 1 are combined with table 2 data.
I so only need the data of table 2 with most coresponding time to timesteps of table 1.
for better understanding, see an example of the tables here:
https://www.dropbox.com/s/mo2q0hj72ilx05n/data%20aggregation.xlsx?dl=0
I already have a python-code which extracts the hexadecimal data and makes table 2.
I also have a code which makes table 1.
I need to combine both now.
Thank you very much for your advices!
After copying your data tables into Python lists, I had to split up the values in table 2 back into independent series. Overall you may be able to skip the step where you consolidate these values into the single table Table2.
The key to solving this is to write a simple class that implements __getitem__, taking a single key argument and returning the corresponding value. For instance, in the case of a regular Python dict, then __getitem__ returns the dict entry that exactly matches the key, or a KeyError if there is no match. In your case, I implemented __getitem__ to just return the entry with the minimum difference of the entry's timestamp from the given timestamp, in this line:
closest = min(self.data, key=lambda x: abs(x[0]-keyts))
(Left as an exercise to the OP - how to handle the case where the key falls exactly between two entries.) If you need to adjust the lookup logic, just change the implementation of __getitem__ - everything else in the code will remain the same.
Here is my sample implementation:
# t1 and t2 are lists of tab-delimited strings copy-pasted
# from the OP's spreadsheet
TAB = '\t'
t1data = [t.split(TAB) for t in t1]
t2data = [t.split(TAB) for t in t2]
# split each parameter into individual time,value pairs
readings = {'A':[], 'B':[], 'C':[]}
for parm in "ABC":
for trec in t2data:
t,a,b,c = trec
t = int(t)
if a: readings['A'].append((t,int(a)))
if b: readings['B'].append((t,int(b)))
if c: readings['C'].append((t,int(c)))
# define class for retrieving value with "closest" key if
# there is not an exact match
class LookupClosest(object):
def __init__(self, pairs):
self.data = pairs
def __getitem__(self, key):
# implement logic here to find closest matching item in series
# TODO - what if key is exactly between two different values?
closest = min(self.data, key=lambda x: abs(x[0]-key))
return closest[1]
# convert each data series to LookupClosest
for key in "ABC":
readings[key] = LookupClosest(readings[key])
# extract and display data
for vals in t1data:
t = int(vals[0])
gps = vals[1]
a = readings['A'][t]
b = readings['B'][t]
c = readings['C'][t]
rec = t,gps,a,b,c
print rec
prints: (I modified the Table1 data so that you can tell the difference from one record to the next):
( 1, 'x01', 1, 10, 44)
(10, 'x10', 2, 11, 47)
(21, 'x21', 4, 12, 45)
(30, 'x30', 3, 12, 44)
(41, 'x41', 4, 12, 47)
(52, 'x52', 2, 10, 48)

query for values based on date w/ Django ORM

I have a bunch of objects that have a value and a date field:
obj1 = Obj(date='2009-8-20', value=10)
obj2 = Obj(date='2009-8-21', value=15)
obj3 = Obj(date='2009-8-23', value=8)
I want this returned:
[10, 15, 0, 8]
or better yet, an aggregate of the total up to that point:
[10, 25, 25, 33]
I would be best to get this data directly from the database, but otherwise I can do the totaling pretty easily with a forloop.
I'm using Django's ORM and also Postgres
edit:
Just to note, that my example only covers a few days, but in practice, I have hundreds of objects covering a couple decades... What I'm trying to do is create a line graph showing how the sum of all my objects has grown over time (a very long time)
This one isn't tested, since it's a bit too much of a pain to set up a Django table to test with:
from datetime import date, timedelta
# http://www.ianlewis.org/en/python-date-range-iterator
def datetimeRange(from_date, to_date=None):
while to_date is None or from_date <= to_date:
yield from_date
from_date = from_date + timedelta(days = 1)
start = date(2009, 8, 20)
end = date(2009, 8, 23)
objects = Obj.objects.filter(date__gte=start)
objects = objects.filter(date__lte=end)
results = {}
for o in objects:
results[o.date] = o.value
return [results.get(day, 0) for day in datetimeRange(start, end)]
This avoids running a separate query for every day.
result_list = []
for day in range(20,24):
result = Obj.objects.get(date=datetime(2009, 08, day))
if result:
result_list.append(result.value)
else:
result_list.append(0)
return result_list
If you have more than one Obj per date, you'll need to check len(obj) and iterate over them in case it's more than 1.
If you loop through a Obj.objects.get 100 times, you're doing 100 SQL queries. Obj.objects.filter will return the results in one SQL query, but you also select all model fields. The right way to do this is to use Obj.objects.values_list, which will do this with a single query, and only select the 'values' field.
start_date = date(2009, 8, 20)
end_date = date(2009, 8, 23)
objects = Obj.objects.filter(date__range=(start_date,end_date))
# values_list and 'value' aren't related. 'value' should be whatever field you're querying
val_list = objects.values_list('value',flat=True)
# val_list = [10, 15, 8]
To do a running aggregate of val_list, you can do this (not certain that this is the most pythonic way)
for i in xrange(len(val_list)):
if i > 0:
val_list[i] = val_list[i] + val_list[i-1]
# val_list = [10,25,33]
EDIT: If you need to account for missing days, #Glenn Maynard's answer is actually pretty good, although I prefer the dict() syntax:
objects = Obj.objects.filter(date__range=(start_date,end_date)).values('date','value')
val_dict = dict((obj['date'],obj['value']) for obj in objects)
# I'm stealing datetimeRange from #Glenn Maynard
val_list = [val_dict.get(day, 0) for day in datetimeRange(start_date, end_date)]
# val_list = [10,15,0,8]

Categories

Resources