How to compare dates from MongoDB object in python? - python

I am trying to work out the difference in years between a MongoDB object and Today's date. The MongoDB object looks like this:
Key: Value
club_joined: 2021-08-10T00:00:00.000+00:00
I have the python code (that works):
loyal_players = []
for player in players.find():
# get time players have been at club
current_date = datetime.today()
joined_club = player['club_joined']
time_at_club = relativedelta(current_date, joined_club)
time_at_club_years = time_at_club.years
# check if they are over 10 years
if time_at_club_years >= 10:
loyal_players.append(player['short_name'])
But what I want is a Mongo query that can add into the .find() line:
for player in players.find():
I know the logic behind it is:
for player in players.find(
where (
(todayDate - player['club_joined']) > 10
):
But how do I write this as a MongoDB query in python?
MongoDb Type:

The $subtract function will take 2 dates and return the diff in millis so this works:
from pymongo import MongoClient
import datetime
now = datetime.datetime.now()
TARG = 86400 * 1000 * 365 * 10 # secs in day X millis X 365 days X 10 yrs
cursor = db.foo.aggregate([
{"$match": {"$expr": {"$gt": [ {"$subtract": [now,"$club_joined"]}, TARG ] } }}
])
for doc in cursor:
print(doc)

Using relativedelta seems simpler:
import datetime
from dateutil.relativedelta import relativedelta
dt = datetime.datetime.now() - relativedelta(years=10)
cursor = db.foo.find({'club_joined': {'$lt': dt}})
for doc in cursor:
print(doc)

Related

Check if the number of slots is > 0 before picking a date and an hour?

I am building a vaccination appointment program that automatically assigns a slot to the user.
This builds the table and saves it into a CSV file:
import pandas
start_date = '1/1/2022'
end_date = '31/12/2022'
list_of_date = pandas.date_range(start=start_date, end=end_date)
df = pandas.DataFrame(list_of_date)
df.columns = ['Date/Time']
df['8:00'] = 100
df['9:00'] = 100
df['10:00'] = 100
df['11:00'] = 100
df['12:00'] = 100
df['13:00'] = 100
df['14:00'] = 100
df['15:00'] = 100
df['16:00'] = 100
df['17:00'] = 100
df.to_csv(r'C:\Users\Ric\PycharmProjects\pythonProject\new.csv')
And this code randomly pick a date and an hour from that date in the CSV table we just created:
import pandas
import random
from random import randrange
#randrange randomly picks an index for date and time for the user
random_date = randrange(365)
random_hour = randrange(10)
list = ["8:00", "9:00", "10:00", "11:00", "12:00", "13:00", "14:00", "15:00", "16:00", "17:00"]
hour = random.choice(list)
df = pandas.read_csv('new.csv')
date=df.iloc[random_date][0]
# 1 is substracted from that cell as 1 slot will be assigned to the user
df.loc[random_date, hour] -= 1
df.to_csv(r'C:\Users\Ric\PycharmProjects\pythonProject\new.csv',index=False)
print(date)
print(hour)
I need help with making the program check if the random hour it chose on that date has vacant slots. I can manage the while loops that are needed if the number of vacant slots is 0. And no, I have not tried much because I have no clue of how to do this.
P.S. If you're going to try running the code, please remember to change the save and read location.
Here is how I would do it. I've also cleaned it up a bit.
import random
import pandas as pd
start_date, end_date = '1/1/2022', '31/12/2022'
hours = [f'{hour}:00' for hour in range(8, 18)]
df = pd.DataFrame(
data=pd.date_range(start_date, end_date),
columns=['Date/Time']
)
for hour in hours:
df[hour] = 100
# 1000 simulations
for _ in range(1000):
random_date, random_hour = random.randrange(365), random.choice(hours)
# Check if slot has vacant slot
if df.at[random_date, random_hour] > 0:
df.at[random_date, random_hour] -= 1
else:
# Pass here, but you can add whatever logic you want
# for instance you could give it the next free slot in the same day
pass
print(df.describe())
import pandas
import random
from random import randrange
# randrange randomly picks an index for date and time for the user
random_date = randrange(365)
# random_hour = randrange(10) #consider removing this line since it's not used
lista = [# consider avoid using Python preserved names
"8:00",
"9:00",
"10:00",
"11:00",
"12:00",
"13:00",
"14:00",
"15:00",
"16:00",
"17:00",
]
hour = random.choice(lista)
df = pandas.read_csv("new.csv")
date = df.iloc[random_date][0]
# 1 is substracted from that cell as 1 slot will be assigned to the user
if df.loc[random_date, hour] > 0:#here is what you asked for
df.loc[random_date, hour] -= 1
else:
print(f"No Vacant Slots in {random_date}, {hour}")
df.to_csv(r"new.csv", index=False)
print(date)
print(hour)
Here's another alternative. I'm not sure you really need the very large and slow-to-load pandas module for this. This does it with plan Python structures. I tried to run the simulation until it got a failure, but with 365,000 open slots, and flushing the database to disk each time, it takes too long. I changed the 100 to 8, just to see it find a dup in reasonable time.
import csv
import datetime
import random
def create():
start = datetime.date( 2022, 1, 1 )
oneday = datetime.timedelta(days=1)
headers = ["date"] + [f"{i}:00" for i in range(8,18)]
data = []
for _ in range(365):
data.append( [start.strftime("%Y-%m-%d")] + [8]*10 ) # not 100
start += oneday
write( headers, data )
def write(headers, rows):
fcsv = csv.writer(open('data.csv','w',newline=''))
fcsv.writerow( headers )
fcsv.writerows( rows )
def read():
days = []
headers = []
for row in csv.reader(open('data.csv')):
if not headers:
headers = row
else:
days.append( [row[0]] + list(map(int,row[1:])))
return headers, days
def choose( headers, days ):
random_date = random.randrange(365)
random_hour = random.randrange(len(headers)-1)+1
choice = days[random_date][0] + " " + headers[random_hour]
print( "Chose", choice )
if days[random_date][random_hour]:
days[random_date][random_hour] -= 1
write(headers,days)
return choice
else:
print("Randomly chosen slot is full.")
return None
create()
data = read()
while choose( *data ):
pass

Python/Discord.py remove milliseconds from time.time()

I did this for my python Discord bot (basically it's a voice activity tracker), everything works fine but I want to remove the milliseconds from total_time. I would like to get something in this format '%H:%M:%S'
Is this possible ?
Here's a part of the code:
if(before.channel == None):
join_time = round(time.time())
userdata["join_time"] = join_time
elif(after.channel == None):
if(userdata["join_time"] == None): return
userdata = voice_data[guild_id][new_user]
leave_time = time.time()
passed_time = leave_time - userdata["join_time"]
userdata["total_time"] += passed_time
userdata["join_time"] = None
And here's the output:
{
"total_time": 7.4658853358879,
}
You can use a datetime.timedelta object, with some caveats.
>>> import datetime as dt
>>> data = {"total_time": 7.4658853358879}
>>> data["total_time"] = str(dt.timedelta(seconds=int(data["total_time"])))
>>> data
{'total_time': '0:00:07'}
If your time is greater than 1 day, or less than zero, the format starts including days
>>> str(dt.timedelta(days=1))
'1 day, 0:00:00'
>>> str(dt.timedelta(seconds=-1))
'-1 day, 23:59:59'
>>>

How can I speed up a python loop with a timestamp interval condition

I have this code that is rather done in a hurry but it works in general. The only thing it runs forever. The idea is to update 2 columns on a table that is holding 1495748 rows, so the number of the list of timestamp being queried in first place. For each update value there has to be done a comparison in which the timestamp has to be in an hourly interval that is formed by two timestamps coming from the api in two different dicts. Is there a way to speed up things a little or maybe multiprocess it?
Hint: db_mac = db_connection to a Postgres database.
the response looks like this:
{'meta': {'source': 'National Oceanic and Atmospheric Administration, Deutscher Wetterdienst'}, 'data': [{'time': '2019-11-26 23:00:00', 'time_local': '2019-11-27 00:00', 'temperature': 8.3, 'dewpoint': 5.9, 'humidity': 85, 'precipitation': 0, 'precipitation_3': None, 'precipitation_6': None, 'snowdepth': None, 'windspeed': 11, 'peakgust': 21, 'winddirection': 160, 'pressure': 1004.2, 'condition': 4}, {'time': '2019-11-27 00:00:00', ....
import requests
import db_mac
from collections import defaultdict
import datetime
import time
t = time.time()
station = [10382,"DE","Berlin / Tegel",52.5667,13.3167,37,"EDDT",10382,"TXL","Europe/Berlin"]
dates = [("2019-11-20","2019-11-22"), ("2019-11-27","2019-12-02") ]
insert_dict = defaultdict(tuple)
hist_weather_list = []
for d in dates:
end = d[1]
start = d[0]
print(start, end)
url = "https://api.meteostat.net/v1/history/hourly?station={station}&start={start}&end={end}&time_zone={timezone}&&time_format=Y-m-d%20H:i&key=<APIKEY>".format(station=station[0], start=start, end=end, timezone=station[-1])
response = requests.get(url)
weather = response.json()
print(weather)
for i in weather["data"]:
hist_weather_list.append(i)
sql = "select timestamp from dump order by timestamp asc"
result = db_mac.execute(sql)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step1 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
for row in result:
try:
ts_dump = datetime.datetime.timestamp(row[0])
for i, hour in enumerate(hist_weather_list):
ts1 = datetime.datetime.timestamp(datetime.datetime.strptime(hour["time"], '%Y-%m-%d %H:%M:%S'))
ts2 = datetime.datetime.timestamp(datetime.datetime.strptime(hist_weather_list[i + 1]["time"], '%Y-%m-%d %H:%M:%S'))
if ts1 <= ts_dump and ts_dump < ts2:
insert_dict[row[0]] = (hour["temperature"], hour["pressure"])
except Exception as e:
pass
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step2 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
for key, value in insert_dict.items():
sql2 = """UPDATE dump SET temperature = """ + str(value[0]) + """, pressure = """+ str(value[1]) + """ WHERE timestamp = '"""+ str(key) + """';"""
db_mac.execute(sql2)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step3 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
UPDATE the code for multiprocessing. I'll let it run the night and give an update of the running time.
import requests
import db_mac
from collections import defaultdict
import datetime
import time
import multiprocessing as mp
t = time.time()
station = [10382,"DE","Berlin / Tegel",52.5667,13.3167,37,"EDDT",10382,"TXL","Europe/Berlin"]
dates = [("2019-11-20","2019-11-22"), ("2019-11-27","2019-12-02") ]
insert_dict = defaultdict(tuple)
hist_weather_list = []
for d in dates:
end = d[1]
start = d[0]
print(start, end)
url = "https://api.meteostat.net/v1/history/hourly?station={station}&start={start}&end={end}&time_zone={timezone}&&time_format=Y-m-d%20H:i&key=wzwi2YR5".format(station=station[0], start=start, end=end, timezone=station[-1])
response = requests.get(url)
weather = response.json()
print(weather)
for i in weather["data"]:
hist_weather_list.append(i)
sql = "select timestamp from dump order by timestamp asc"
result = db_mac.execute(sql)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step1 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
def find_parameters(x):
for row in result[x[0]:x[1]]:
try:
ts_dump = datetime.datetime.timestamp(row[0])
for i, hour in enumerate(hist_weather_list):
ts1 = datetime.datetime.timestamp(datetime.datetime.strptime(hour["time"], '%Y-%m-%d %H:%M:%S'))
ts2 = datetime.datetime.timestamp(datetime.datetime.strptime(hist_weather_list[i + 1]["time"], '%Y-%m-%d %H:%M:%S'))
if ts1 <= ts_dump and ts_dump < ts2:
insert_dict[row[0]] = (hour["temperature"], hour["pressure"])
except Exception as e:
pass
step1 = int(len(result) /4)
step2 = 2 * step1
step3 = 3 * step1
step4 = len(result)
steps = [[0,step1],[step1,step2],[step2,step3], [step3,step4]]
pool = mp.Pool(mp.cpu_count())
pool.map(find_parameters, steps)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step2 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
for key, value in insert_dict.items():
sql2 = """UPDATE dump SET temperature = """ + str(value[0]) + """, pressure = """+ str(value[1]) + """ WHERE timestamp = '"""+ str(key) + """';"""
db_mac.execute(sql2)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step3 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
UPDATE 2
It finished and ran for 2:45 hours in 4 cores on a raspberry pi. Though is there a more efficient way to do such things?
So theres a few minor things I can think of to speed this up a little. I figure anything little bit helps especially if you have a lot of rows to process. For starters, print statements can slow down your code a lot. I'd get rid of those if they are unneeded.
Most importantly, you are calling the api in every iteration of the loop. Waiting for a response from the API is probably taking up the bulk of your time. I looked a bit at the api you are using, but don't know the exact case you're using it for or what your dates "start" and "end" look like, but if you could do it in less calls that would surely speed up this loop by a lot. Another way you can do this is, it looks like the api has a .csv version of the data you can download and use. Running this on local data would be way faster. If you choose to go this route i'd suggest using pandas. (Sorry if you already know pandas and i'm over explaining) You can use: df = pd.read_csv("filename.csv") and edit the table from there easily. You can also do df.to_sql(params) to write to your data base. Let me know if you want help forming a pandas version of this code.
Also, not sure from your code if this would cause an error, but I would try, instead of your for loop (for i in weather["data"]).
hist_weather_list += weather["data"]
or possibly
hist_weather_list += [weather["data"]
Let me know how it goes!

Displaying a timedelta object in a django template

I'm having trouble getting my django template to display a timedelta object consistently. I tried using the time filter in my template, but nothing is displayed when I do this. The timedelta object is shown as follows on the errors page if I use Assert False:
time datetime.timedelta(0, 38, 132827)
This displays the time difference as:
0:00:38.132827
I would like to only show the hours, minutes, and seconds for each timedelta object. Does anyone have any suggestions on how I can do this?
I followed Peter's advice and wrote a custom template filter.
Here's the steps I took.
First I followed this guide to create a custom template filter.
Be sure to read this section on code layout.
Here's my filter code
from django import template
register = template.Library()
#register.filter()
def smooth_timedelta(timedeltaobj):
"""Convert a datetime.timedelta object into Days, Hours, Minutes, Seconds."""
secs = timedeltaobj.total_seconds()
timetot = ""
if secs > 86400: # 60sec * 60min * 24hrs
days = secs // 86400
timetot += "{} days".format(int(days))
secs = secs - days*86400
if secs > 3600:
hrs = secs // 3600
timetot += " {} hours".format(int(hrs))
secs = secs - hrs*3600
if secs > 60:
mins = secs // 60
timetot += " {} minutes".format(int(mins))
secs = secs - mins*60
if secs > 0:
timetot += " {} seconds".format(int(secs))
return timetot
Then in my template I did
{% load smooth_timedelta %}
{% timedeltaobject|smooth_timedelta %}
Example output
You can try remove the microseconds from the timedelta object, before sending it to the template:
time = time - datetime.timedelta(microseconds=time.microseconds)
I don't think there's anything built in, and timedeltas don't directly expose their hour and minute values. but this package includes a timedelta custom filter tag that might help:
http://pydoc.net/django-timedeltafield/0.7.10/
As far as I know you have to write you're own template tag for this. Below is the one I've concocted based on the Django core timesince/timeuntil code that should output what you're after:
#register.simple_tag
def duration( duration ):
"""
Usage: {% duration timedelta %}
Returns seconds duration as weeks, days, hours, minutes, seconds
Based on core timesince/timeuntil
"""
def seconds_in_units(seconds):
"""
Returns a tuple containing the most appropriate unit for the
number of seconds supplied and the value in that units form.
>>> seconds_in_units(7700)
(2, 'hour')
"""
unit_totals = OrderedDict()
unit_limits = [
("week", 7 * 24 * 3600),
("day", 24 * 3600),
("hour", 3600),
("minute", 60),
("second", 1)
]
for unit_name, limit in unit_limits:
if seconds >= limit:
amount = int(float(seconds) / limit)
if amount != 1:
unit_name += 's' # dodgy pluralisation
unit_totals[unit_name] = amount
seconds = seconds - ( amount * limit )
return unit_totals;
if duration:
if isinstance( duration, datetime.timedelta ):
if duration.total_seconds > 0:
unit_totals = seconds_in_units( duration.total_seconds() )
return ', '.join([str(v)+" "+str(k) for (k,v) in unit_totals.iteritems()])
return 'None'
from datetime import datetime
start = datetime.now()
taken = datetime.now() - start
str(taken)
'0:03:08.243773'
str(taken).split('.')[0]
'0:03:08'
The advice to write your own custom template tag is 100% the right way to go. You'll have complete control and can format it anyway you like. BUT -- if you're lazy and want a quick solution using builtin django facilities, you can use a hackey technique using the built-in timesince tag.
Basically, subtract your timedelta from the current time and drop it into your template. For example
import datetime
import django.template
tdelta = datetime.timedelta(hours=5, minutes=10)
tm = datetime.datetime.utcnow() - tdelta
django_engine = django.template.engines['django']
template = django_engine.from_string("My delta {{ tm|timesince }}")
print(template.render({'tm': tm})
Execute the above code in ./manage.py shell and the output is:
My delta 5 hours, 10 minutes

returning different time frames from datetime

I am parsing a file this way :
for d in csvReader:
print datetime.datetime.strptime(d["Date"]+"-"+d["Time"], "%d-%b-%Y-%H:%M:%S.%f").date()
date() returns : 2000-01-08, which is correct
time() returns : 06:20:00, which is also correct
How would I go about returning informations like "date+time" or "date+hours+minutes"
EDIT
Sorry I should have been more precise, here is what I am trying to achieve :
lmb = lambda d: datetime.datetime.strptime(d["Date"]+"-"+d["Time"], "%d-%b-%Y-%H:%M:%S.%f").date()
daily_quotes = {}
for k, g in itertools.groupby(csvReader, key = lmb):
lowBids = []
highBids = []
openBids = []
closeBids = []
for i in g:
lowBids.append(float(i["Low Bid"]))
highBids.append(float(i["High Bid"]))
openBids.append(float(i["Open Bid"]))
closeBids.append(float(i["Close Bid"]))
dayMin = min(lowBids)
dayMax = max(highBids)
open = openBids[0]
close = closeBids[-1]
daily_quotes[k.strftime("%Y-%m-%d")] = [dayMin,dayMax,open,close]
As you can see, right now I'm grouping values by day, I would like to group them by hour ( for which I would need date + hour ) or minutes ( date + hour + minute )
thanks in advance !
Don't use the date method of the datetime object you're getting from strptime. Instead, apply strftime directly to the return from strptime, which gets you access to all the member fields, including year, month, day, hour, minute, seconds, etc...
d = {"Date": "01-Jan-2000", "Time": "01:02:03.456"}
dt = datetime.datetime.strptime(d["Date"]+"-"+d["Time"], "%d-%b-%Y-%H:%M:%S.%f")
print dt.strftime("%Y-%m-%d-%H-%M-%S")

Categories

Resources