Django group by hour - python

I have the following model in Django.
class StoreVideoEventSummary(models.Model):
Customer = models.ForeignKey(GlobalCustomerDirectory, null=True, db_column='CustomerID', blank=True, db_index=True)
Store = models.ForeignKey(Store, null=True, db_column='StoreID', blank=True, related_name="VideoEventSummary")
Timestamp = models.DateTimeField(null=True, blank=True, db_index=True)
PeopleCount = models.IntegerField(null=True, blank=True)
I would like to find out the number of people entering the store each hour.
To achieve this, I'm trying to group the rows by the hour on Timestamp and sum the PeopleCount column.
store_count_events = StoreVideoEventSummary.objects.filter(Timestamp__range=(start_time, end_time),
Customer__id=customer_id,
Store__StoreName=store)\
.order_by("Timestamp")\
.extra({
"hour": "date_part(\'hour\', \"Timestamp\")"
}).annotate(TotalPeople=Sum("PeopleCount"))
This doesn't seem to group the results by the hour, it merely adds a new column TotalPeople which has the same value as PeopleCount to each row in the query set.

just break it into two steps
import itertools
from datetime import datetime
# ...
def date_hour(timestamp):
return datetime.fromtimestamp(timestamp).strftime("%x %H")
objs = StoreVideoEventSummary.objects.filter(
Timestamp__range=(start_time, end_time),
Customer__id=customer_id,
Store__StoreName=store
).order_by("Timestamp")
groups = itertools.groupby(objs, lambda x: date_hour(x.Timestamp))
# since groups is an iterator and not a list you have not yet traversed the list
for group, matches in groups: # now you are traversing the list ...
print(group, "TTL:", sum(1 for _ in matches))
This allows you to group by several distinct criteria
Of you just want the hour regardless of date just change date_hour
def date_hour(timestamp):
return datetime.fromtimestamp(timestamp).strftime("%H")
If you wanted to group by day of the week you just use
def date_day_of_week(timestamp):
return datetime.fromtimestamp(timestamp).strftime("%w %H")
And update itertools.groupby's lambda to use date_day_of_week.

Building off your original code, could you try:
store_count_events = StoreVideoEventSummary.objects.filter(Timestamp__range=(start_time, end_time), Customer__id=customer_id, Store__StoreName=store)\
.extra({
"hour": "date_part(\'hour\', \"Timestamp\")"
})\
.values("hour")\
.group_by("hour")\
.annotate(TotalPeople=Sum("PeopleCount"))

I know I'm late here, but taking cues from the doc, https://docs.djangoproject.com/en/1.11/ref/models/querysets/#django.db.models.query.QuerySet.extra
the below filter should work for you.
store_count_events = StoreVideoEventSummary.objects.filter(
Timestamp__range=(start_time, end_time),
Customer__id=customer_id,
Store__StoreName=store
).order_by(
'Timestamp'
).extra(
select={
'hour': 'hour(Timestamp)'
}
).values(
'hour'
).annotate(
TotalPeople=Sum('PeopleCount')
)

Related

Order DateTimeField by rounded time using only the ORM?

I came up with a solution but it's using a model method ( as far as i understand it cannot be used with X.objects.filter() ) and then use the python method sorted. I've read that it's way faster to use django ORM than direct python so I'm searching for a solution. I precise that adding fields to my model is not possible as the database is already well populated.
Basically I've an Articles model :
class Articles(models.Model):
title = models.CharField(max_length=200, null=False, blank=False)
image = models.URLField(null=False, blank=False)
summary = models.TextField()
link = models.URLField(null=False, blank=False)
pub_date = models.DateTimeField(default=timezone.now)
source = models.ForeignKey(
Sources, on_delete=models.CASCADE, related_name="souurce")
category = models.ManyToManyField(Categories)
and what I want to do is ordering result by approximate publication date ( for example an article published at 5:34 and an another one published a 5:31 are both considered published at the same time ), then I can perform other orderings like per category, source or even by title.
Here is my class method to do that (by closest 10 minutes ):
def get_approximate_date(self):
pub_date = self.pub_date
def timeround10(dt):
"""timertound closets 10 minutes"""
a, b = divmod(round(dt.minute, -1), 60)
h = (dt.hour + a) % 24
m = b
new = dt.replace(hour=h, minute=m, second=0, microsecond=0)
return new
return timeround10(pub_date)
Then in my view I can do the following ( I chose to order by approximate date then by reverse alphabetical order ) :
articles_ = Articles.objects.all()
articles_list = sorted(articles_, key=lambda i: (i.get_approximate_date(), i.summary), reverse=True)
The closest thing I came up with using only django ORM is :
Articles.objects.order_by("-pub_date__year","-pub_date__month","-pub_date__day","-summary")
Apart from being ugly it only round the pub date by hour, so 1:59 PM = 1:01PM.
I'm aware of Trunc https://docs.djangoproject.com/en/3.1/ref/models/database-functions/#trunc but it doesn't only implement a way to order by hour minutes etc, maybe i should expand it if it's the only option.
Thanks in advance !

Django - How to join two querysets with different key values (but from same model)

I am trying to join two querysets in Django with different key values, but from the same model, is there any way to do so?
Here is my code:
models.py
class CustomerInformation(models.Model):
status = (
('lead', 'Lead'),
('client', 'Client'),
)
customer_id = models.AutoField(primary_key=True)
customer_name = models.CharField(max_length=100)
status = models.CharField(max_length=100, choices=status, default='lead')
conversion_date = models.DateField(null=True, blank=True)
created_date = models.DateField(default=timezone.localdate)
def save(self, *args, **kwargs):
if self.customer_id:
if self.status != CustomerInformation.objects.get(customer_id=self.customer_id).status and self.status == 'client':
self.conversion_date = timezone.now()
elif self.status != CustomerInformation.objects.get(customer_id=self.customer_id).status and self.status == 'lead':
self.conversion_date = None
super(CustomerInformation, self).save(*args, **kwargs)
here is my filtering
start = date.today() + relativedelta(days=-30)
client_qs = CustomerInformation.objects.filter(conversion_date__gte=start).values(date=F('conversion_date')).annotate(client_count=Count('date'))
lead_qs = CustomerInformation.objects.filter(created_date__gte=start).values(date=F('created_date')).annotate(lead_count=Count('date'))
Basically what I am trying to achieve is to get the count of CustomerInformation instances created in the past 30 days (by annotating the count of the field 'created_date'). Also, I want to get the count of CustomerInformation instances that have converted to 'client' status within the past 30 days (by annotating the count of the field 'conversion_date'). Is there any way for me to do so and receive them in a single queryset, preferably with a single date field?
For example, my desired output would be
[ {'date': '170620', 'lead_count': 2, 'client_count': 1}, {'date': '180620', 'lead_count': 1, 'client_count': 0 }, ... ]
All help is appreciated, thanks all!
I think you can achieve the same by doing:
combined_qs = CustomerInformation.objects.filter(created_date__gte=start, conversion_date__gte=start).annotate(lead_count=Count('created_date'), client_count=Count('conversion_date')
Note that the above will use AND the filter. Roughly that means it will only return customer information that have both created_date and conversion_date greater than date.( I might be wrong but I think that's what you want in this case).
Otherwise you can use the Q object for a more complex query.
from django.db.models import Q
combined_qs = CustomerInformation.objects.filter(Q(created_date__gte=start)|Q(conversion_date__gte=start)).annotate(lead_count=Count('created_date'), client_count=Count('conversion_date')
The official doc for using the Q objects is here
I will say try both and compare the results you get.
Hope that helps.

Python - Django - Filter and count all people in first year from start date

I need to count all the people who are in the first year of the contract.
I even made several attempts and failed. Can anyone help me? Thanks!
Model:
class Contracts(models.Model):
person = models.CharField(max_length=50, null=True, blank=True, verbose_name='Name')
start_date = models.DateField(null=True, blank=False, verbose_name='Start')
def __str__(self):
return '{}'.format(self.person)
So far...
View:
def people_in_first_year(request):
people = Contracts.objects.filter(Q(start_date__lte=timezone.now()) & Q(end_date__gte=timezone.now() + timedelta(days=365)))
total_people = people.count()
context = {
'total_people': total_people,
}
return render(request, 'people.html', context)
Since there is not an end_date, you can not filter much on that one. What you can do is simply set the start_date to be greater than or equal to now minus 365 days:
total_people = Contracts.objects.filter(
start_date__gte=timezone.now() - timedelta(days=365)
).count()
A year however is not per se 365 days. You can for example make use of the python-dateutil package [pypi], and work with a relativedelta [readthedocs.io]:
from dateutil.relativedelta import relativedelta
total_people = Contracts.objects.filter(
start_date__gte=timezone.now() - relativedelta(years=1)
).count()

Python Django get distinct queryset by month from a DateField

class MyModel(models.Model):
TRANSACTION_TYPE_CHOICES = (
('p', 'P'),
('c', 'C'),
)
status = models.CharField(max_length=50, choices=TRANSACTION_TYPE_CHOICES, default='c')
user = models.ForeignKey(User, db_index=True, on_delete=models.CASCADE,related_name='user_wallet')
date = models.DateField(auto_now=True)
amount = models.FloatField(null=True, blank=True)
def __unicode__(self):
return str(self.id)
I am a fresher in Python django and have a little knowledge in Django Rest Framework.
I have a model like above and I want to filter the date field by month and get distinct queryset by month.....Is there any default way to do this...
Thanks in advance
you can use TruncMonth with annotations
from django.db.models.functions import TruncMonth
MyModel.objects.annotate(
month=TruncMonth('date')
).filter(month=YOURVALUE).values('month').distinct()
or if you need only filter date by month with distinct you can use __month option
MyModel.objects.filter(date__month=YOURVALUE).distinct()
Older django
you can use extra, example for postgres
MyModel.objects.extra(
select={'month': "EXTRACT(month FROM date)"},
where=["EXTRACT(month FROM date)=%s"],
params=[5]
# CHANGE 5 on you value
).values('month').distinct()
This may help you
MyModel.object.values('col1','col2',...,'date').distinct('date')
OR try this:
from django.db.models.functions import TruncMonth
MyModel.objects
.annotate(month=TruncMonth('date')) # Truncate to month and add to select list
.values('month') # Group By month
.annotate(c=Count('id')) # Select the count of the grouping
.values('month', 'c') # (might be redundant, haven't tested) select month and count

How to run a custom aggregation on a queryset?

I have a model called LeaveEntry:
class LeaveEntry(models.Model):
date = models.DateField(auto_now=False, auto_now_add=False)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.PROTECT,
limit_choices_to={'is_active': True},
unique_for_date='date'
)
half_day = models.BooleanField(default=False)
I get a set of LeaveEntries with the filter:
LeaveEntry.objects.filter(
leave_request=self.unapproved_leave
).count()
I would like to get an aggregation called total days, so where a LeaveEntry has half_day=True then it is half a day so 0.5.
What I was thinking based on the django aggregations docs was annotating the days like this:
days = LeaveEntry.objects.annotate(days=<If this half_day is True: 0.5 else 1>)
You can use django's conditional expressions Case and When (only for django 1.8+):
Keeping the order of filter() and annotate() in wind you can count the the number of days left for unapproved leaves like so:
from django.db.models import FloatField, Case, When
# ...
LeaveEntry.objects.filter(
leave_request=self.unapproved_leave # not sure what self relates to
).annotate(
days=Count(Case(
When(half_day=True, then=0.5),
When(half_day=False, then=1),
output_field=FloatField()
)
)
)

Categories

Resources