Related
I am trying to work out the difference in years between a MongoDB object and Today's date. The MongoDB object looks like this:
Key: Value
club_joined: 2021-08-10T00:00:00.000+00:00
I have the python code (that works):
loyal_players = []
for player in players.find():
# get time players have been at club
current_date = datetime.today()
joined_club = player['club_joined']
time_at_club = relativedelta(current_date, joined_club)
time_at_club_years = time_at_club.years
# check if they are over 10 years
if time_at_club_years >= 10:
loyal_players.append(player['short_name'])
But what I want is a Mongo query that can add into the .find() line:
for player in players.find():
I know the logic behind it is:
for player in players.find(
where (
(todayDate - player['club_joined']) > 10
):
But how do I write this as a MongoDB query in python?
MongoDb Type:
The $subtract function will take 2 dates and return the diff in millis so this works:
from pymongo import MongoClient
import datetime
now = datetime.datetime.now()
TARG = 86400 * 1000 * 365 * 10 # secs in day X millis X 365 days X 10 yrs
cursor = db.foo.aggregate([
{"$match": {"$expr": {"$gt": [ {"$subtract": [now,"$club_joined"]}, TARG ] } }}
])
for doc in cursor:
print(doc)
Using relativedelta seems simpler:
import datetime
from dateutil.relativedelta import relativedelta
dt = datetime.datetime.now() - relativedelta(years=10)
cursor = db.foo.find({'club_joined': {'$lt': dt}})
for doc in cursor:
print(doc)
I have this code that is rather done in a hurry but it works in general. The only thing it runs forever. The idea is to update 2 columns on a table that is holding 1495748 rows, so the number of the list of timestamp being queried in first place. For each update value there has to be done a comparison in which the timestamp has to be in an hourly interval that is formed by two timestamps coming from the api in two different dicts. Is there a way to speed up things a little or maybe multiprocess it?
Hint: db_mac = db_connection to a Postgres database.
the response looks like this:
{'meta': {'source': 'National Oceanic and Atmospheric Administration, Deutscher Wetterdienst'}, 'data': [{'time': '2019-11-26 23:00:00', 'time_local': '2019-11-27 00:00', 'temperature': 8.3, 'dewpoint': 5.9, 'humidity': 85, 'precipitation': 0, 'precipitation_3': None, 'precipitation_6': None, 'snowdepth': None, 'windspeed': 11, 'peakgust': 21, 'winddirection': 160, 'pressure': 1004.2, 'condition': 4}, {'time': '2019-11-27 00:00:00', ....
import requests
import db_mac
from collections import defaultdict
import datetime
import time
t = time.time()
station = [10382,"DE","Berlin / Tegel",52.5667,13.3167,37,"EDDT",10382,"TXL","Europe/Berlin"]
dates = [("2019-11-20","2019-11-22"), ("2019-11-27","2019-12-02") ]
insert_dict = defaultdict(tuple)
hist_weather_list = []
for d in dates:
end = d[1]
start = d[0]
print(start, end)
url = "https://api.meteostat.net/v1/history/hourly?station={station}&start={start}&end={end}&time_zone={timezone}&&time_format=Y-m-d%20H:i&key=<APIKEY>".format(station=station[0], start=start, end=end, timezone=station[-1])
response = requests.get(url)
weather = response.json()
print(weather)
for i in weather["data"]:
hist_weather_list.append(i)
sql = "select timestamp from dump order by timestamp asc"
result = db_mac.execute(sql)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step1 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
for row in result:
try:
ts_dump = datetime.datetime.timestamp(row[0])
for i, hour in enumerate(hist_weather_list):
ts1 = datetime.datetime.timestamp(datetime.datetime.strptime(hour["time"], '%Y-%m-%d %H:%M:%S'))
ts2 = datetime.datetime.timestamp(datetime.datetime.strptime(hist_weather_list[i + 1]["time"], '%Y-%m-%d %H:%M:%S'))
if ts1 <= ts_dump and ts_dump < ts2:
insert_dict[row[0]] = (hour["temperature"], hour["pressure"])
except Exception as e:
pass
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step2 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
for key, value in insert_dict.items():
sql2 = """UPDATE dump SET temperature = """ + str(value[0]) + """, pressure = """+ str(value[1]) + """ WHERE timestamp = '"""+ str(key) + """';"""
db_mac.execute(sql2)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step3 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
UPDATE the code for multiprocessing. I'll let it run the night and give an update of the running time.
import requests
import db_mac
from collections import defaultdict
import datetime
import time
import multiprocessing as mp
t = time.time()
station = [10382,"DE","Berlin / Tegel",52.5667,13.3167,37,"EDDT",10382,"TXL","Europe/Berlin"]
dates = [("2019-11-20","2019-11-22"), ("2019-11-27","2019-12-02") ]
insert_dict = defaultdict(tuple)
hist_weather_list = []
for d in dates:
end = d[1]
start = d[0]
print(start, end)
url = "https://api.meteostat.net/v1/history/hourly?station={station}&start={start}&end={end}&time_zone={timezone}&&time_format=Y-m-d%20H:i&key=wzwi2YR5".format(station=station[0], start=start, end=end, timezone=station[-1])
response = requests.get(url)
weather = response.json()
print(weather)
for i in weather["data"]:
hist_weather_list.append(i)
sql = "select timestamp from dump order by timestamp asc"
result = db_mac.execute(sql)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step1 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
def find_parameters(x):
for row in result[x[0]:x[1]]:
try:
ts_dump = datetime.datetime.timestamp(row[0])
for i, hour in enumerate(hist_weather_list):
ts1 = datetime.datetime.timestamp(datetime.datetime.strptime(hour["time"], '%Y-%m-%d %H:%M:%S'))
ts2 = datetime.datetime.timestamp(datetime.datetime.strptime(hist_weather_list[i + 1]["time"], '%Y-%m-%d %H:%M:%S'))
if ts1 <= ts_dump and ts_dump < ts2:
insert_dict[row[0]] = (hour["temperature"], hour["pressure"])
except Exception as e:
pass
step1 = int(len(result) /4)
step2 = 2 * step1
step3 = 3 * step1
step4 = len(result)
steps = [[0,step1],[step1,step2],[step2,step3], [step3,step4]]
pool = mp.Pool(mp.cpu_count())
pool.map(find_parameters, steps)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step2 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
for key, value in insert_dict.items():
sql2 = """UPDATE dump SET temperature = """ + str(value[0]) + """, pressure = """+ str(value[1]) + """ WHERE timestamp = '"""+ str(key) + """';"""
db_mac.execute(sql2)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step3 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
UPDATE 2
It finished and ran for 2:45 hours in 4 cores on a raspberry pi. Though is there a more efficient way to do such things?
So theres a few minor things I can think of to speed this up a little. I figure anything little bit helps especially if you have a lot of rows to process. For starters, print statements can slow down your code a lot. I'd get rid of those if they are unneeded.
Most importantly, you are calling the api in every iteration of the loop. Waiting for a response from the API is probably taking up the bulk of your time. I looked a bit at the api you are using, but don't know the exact case you're using it for or what your dates "start" and "end" look like, but if you could do it in less calls that would surely speed up this loop by a lot. Another way you can do this is, it looks like the api has a .csv version of the data you can download and use. Running this on local data would be way faster. If you choose to go this route i'd suggest using pandas. (Sorry if you already know pandas and i'm over explaining) You can use: df = pd.read_csv("filename.csv") and edit the table from there easily. You can also do df.to_sql(params) to write to your data base. Let me know if you want help forming a pandas version of this code.
Also, not sure from your code if this would cause an error, but I would try, instead of your for loop (for i in weather["data"]).
hist_weather_list += weather["data"]
or possibly
hist_weather_list += [weather["data"]
Let me know how it goes!
I am parsing a file this way :
for d in csvReader:
print datetime.datetime.strptime(d["Date"]+"-"+d["Time"], "%d-%b-%Y-%H:%M:%S.%f").date()
date() returns : 2000-01-08, which is correct
time() returns : 06:20:00, which is also correct
How would I go about returning informations like "date+time" or "date+hours+minutes"
EDIT
Sorry I should have been more precise, here is what I am trying to achieve :
lmb = lambda d: datetime.datetime.strptime(d["Date"]+"-"+d["Time"], "%d-%b-%Y-%H:%M:%S.%f").date()
daily_quotes = {}
for k, g in itertools.groupby(csvReader, key = lmb):
lowBids = []
highBids = []
openBids = []
closeBids = []
for i in g:
lowBids.append(float(i["Low Bid"]))
highBids.append(float(i["High Bid"]))
openBids.append(float(i["Open Bid"]))
closeBids.append(float(i["Close Bid"]))
dayMin = min(lowBids)
dayMax = max(highBids)
open = openBids[0]
close = closeBids[-1]
daily_quotes[k.strftime("%Y-%m-%d")] = [dayMin,dayMax,open,close]
As you can see, right now I'm grouping values by day, I would like to group them by hour ( for which I would need date + hour ) or minutes ( date + hour + minute )
thanks in advance !
Don't use the date method of the datetime object you're getting from strptime. Instead, apply strftime directly to the return from strptime, which gets you access to all the member fields, including year, month, day, hour, minute, seconds, etc...
d = {"Date": "01-Jan-2000", "Time": "01:02:03.456"}
dt = datetime.datetime.strptime(d["Date"]+"-"+d["Time"], "%d-%b-%Y-%H:%M:%S.%f")
print dt.strftime("%Y-%m-%d-%H-%M-%S")
I'm currently writing some reporting code that allows users to optionally specify a date range. The way it works (simplified), is:
A user (optionally) specifies a year.
A user (optionally) specifies a month.
A user (optionally) specifies a day.
Here's a code snippet, along with comments describing what I'd like to do:
from datetime import datetime, timedelta
# ...
now = datetime.now()
start_time = now.replace(hour=0, minute=0, second=0, microsecond=0)
stop_time = now
# If the user enters no year, month, or day--then we'll simply run a
# report that only spans the current day (from the start of today to now).
if options['year']:
start_time = start_time.replace(year=options['year'], month=0, day=0)
stop_time = stop_time.replace(year=options['year'])
# If the user specifies a year value, we should set stop_time to the last
# day / minute / hour / second / microsecond of the year, that way we'll
# only generate reports from the start of the specified year, to the end
# of the specified year.
if options['month']:
start_time = start_time.replace(month=options['month'], day=0)
stop_time = stop_time.replace(month=options['month'])
# If the user specifies a month value, then set stop_time to the last
# day / minute / hour / second / microsecond of the specified month, that
# way we'll only generate reports for the specified month.
if options['day']:
start_time = start_time.replace(day=options['day'])
stop_time = stop_time.replace(day=options['day'])
# If the user specifies a day value, then set stop_time to the last moment of
# the current day, so that reports ONLY run on the current day.
I'm trying to find the most elegant way to write the code above--I've been trying to find a way to do it with timedelta, but can't seem to figure it out. Any advice would be appreciated.
To set the stop_time, advance start_time one year, month or day as appropriate, then subtract one timedelta(microseconds=1)
if options['year']:
start_time = start_time.replace(year=options['year'], month=1, day=1)
stop_time = stop_time.replace(year=options['year']+1)-timedelta(microseconds=1)
elif options['month']:
start_time = start_time.replace(month=options['month'], day=1)
months=options['month']%12+1
stop_time = stop_time.replace(month=months,day=1)-timedelta(microseconds=1)
else:
start_time = start_time.replace(day=options['day'])
stop_time = stop_time.replace(day=options['day'])+timedelta(days=1,microseconds=-1)
Using dict.get can simplify your code. It is a bit cleaner than using datetime.replace and timedelta objects.
Here's something to get you started:
from datetime import datetime
options = dict(month=5, day=20)
now = datetime.now()
start_time = datetime(year=options.get('year', now.year),
month=options.get('month', 1),
day=options.get('day', 1)
hour=0,
minute=0,
second=0)
stop_time = datetime(year=options.get('year', now.year),
month=options.get('month', now.month),
day=options.get('day', now.day),
hour=now.hour,
minute=now.minute,
second=now.second)
today = datetime.date.today()
begintime = today.strftime("%Y-%m-%d 00:00:00")
endtime = today.strftime("%Y-%m-%d 23:59:59")
from datetime import datetime, date, timedelta
def get_current_timestamp():
return int(datetime.now().timestamp())
def get_end_today_timestamp():
# get 23:59:59
result = datetime.combine(date.today() + timedelta(days=1), datetime.min.time())
return int(result.timestamp()) - 1
def get_datetime_from_timestamp(timestamp):
return datetime.fromtimestamp(timestamp)
end_today = get_datetime_from_timestamp(get_end_today_timestamp())
date = datetime.strftime('<input date str>')
date.replace(hour=0, minute=0, second=0, microsecond=0) # now we get begin of the day
date += timedelta(days=1, microseconds=-1) # now end of the day
After looking at some of the answers here, and not really finding anything extremely elegant, I did some poking around the standard library and found my current solution (which I like quite well): dateutil.
Here's how I implemented it:
from datetime import date
from dateutil.relativedelta import relativedelta
now = date.today()
stop_time = now + relativedelta(days=1)
start_time = date(
# NOTE: I'm not doing dict.get() since in my implementation, these dict
# keys are guaranteed to exist.
year = options['year'] or now.year,
month = options['month'] or now.month,
day = options['day'] or now.day
)
if options['year']:
start_time = date(year=options['year'] or now.year, month=1, day=1)
stop_time = start_time + relativedelta(years=1)
if options['month']:
start_time = date(
year = options['year'] or now.year,
month = options['month'] or now.month,
day = 1
)
stop_time = start_time + relativedelta(months=1)
if options['day']:
start_time = date(
year = options['year'] or now.year,
month = options['month'] or now.month,
day = options['day'] or now.day,
)
stop_time = start_time + relativedelta(days=1)
# ... do stuff with start_time and stop_time here ...
What I like about this implementation, is that python's dateutil.relativedata.relativedata works really well on edge cases. It gets the days/months/years correct. If I have month=12, and do relativedata(months=1), it'll increment the year and set the month to 1 (works nicely).
Also: in the above implementation, if the user specifies none of the optional dates (year, month, or day)--we'll fallback to a nice default (start_time = this morning, stop_time = tonight), that way we'll default to doing stuff for the current day only.
Thanks to everyone for their answers--they were helpful in my research.
How to increment the day of a datetime?
for i in range(1, 35)
date = datetime.datetime(2003, 8, i)
print(date)
But I need pass through months and years correctly? Any ideas?
date = datetime.datetime(2003,8,1,12,4,5)
for i in range(5):
date += datetime.timedelta(days=1)
print(date)
Incrementing dates can be accomplished using timedelta objects:
import datetime
datetime.datetime.now() + datetime.timedelta(days=1)
Look up timedelta objects in the Python docs: http://docs.python.org/library/datetime.html
All of the current answers are wrong in some cases as they do not consider that timezones change their offset relative to UTC. So in some cases adding 24h is different from adding a calendar day.
Proposed solution
The following solution works for Samoa and keeps the local time constant.
def add_day(today):
"""
Add a day to the current day.
This takes care of historic offset changes and DST.
Parameters
----------
today : timezone-aware datetime object
Returns
-------
tomorrow : timezone-aware datetime object
"""
today_utc = today.astimezone(datetime.timezone.utc)
tz = today.tzinfo
tomorrow_utc = today_utc + datetime.timedelta(days=1)
tomorrow_utc_tz = tomorrow_utc.astimezone(tz)
tomorrow_utc_tz = tomorrow_utc_tz.replace(hour=today.hour,
minute=today.minute,
second=today.second)
return tomorrow_utc_tz
Tested Code
# core modules
import datetime
# 3rd party modules
import pytz
# add_day methods
def add_day(today):
"""
Add a day to the current day.
This takes care of historic offset changes and DST.
Parameters
----------
today : timezone-aware datetime object
Returns
-------
tomorrow : timezone-aware datetime object
"""
today_utc = today.astimezone(datetime.timezone.utc)
tz = today.tzinfo
tomorrow_utc = today_utc + datetime.timedelta(days=1)
tomorrow_utc_tz = tomorrow_utc.astimezone(tz)
tomorrow_utc_tz = tomorrow_utc_tz.replace(hour=today.hour,
minute=today.minute,
second=today.second)
return tomorrow_utc_tz
def add_day_datetime_timedelta_conversion(today):
# Correct for Samoa, but dst shift
today_utc = today.astimezone(datetime.timezone.utc)
tz = today.tzinfo
tomorrow_utc = today_utc + datetime.timedelta(days=1)
tomorrow_utc_tz = tomorrow_utc.astimezone(tz)
return tomorrow_utc_tz
def add_day_dateutil_relativedelta(today):
# WRONG!
from dateutil.relativedelta import relativedelta
return today + relativedelta(days=1)
def add_day_datetime_timedelta(today):
# WRONG!
return today + datetime.timedelta(days=1)
# Test cases
def test_samoa(add_day):
"""
Test if add_day properly increases the calendar day for Samoa.
Due to economic considerations, Samoa went from 2011-12-30 10:00-11:00
to 2011-12-30 10:00+13:00. Hence the country skipped 2011-12-30 in its
local time.
See https://stackoverflow.com/q/52084423/562769
A common wrong result here is 2011-12-30T23:59:00-10:00. This date never
happened in Samoa.
"""
tz = pytz.timezone('Pacific/Apia')
today_utc = datetime.datetime(2011, 12, 30, 9, 59,
tzinfo=datetime.timezone.utc)
today_tz = today_utc.astimezone(tz) # 2011-12-29T23:59:00-10:00
tomorrow = add_day(today_tz)
return tomorrow.isoformat() == '2011-12-31T23:59:00+14:00'
def test_dst(add_day):
"""Test if add_day properly increases the calendar day if DST happens."""
tz = pytz.timezone('Europe/Berlin')
today_utc = datetime.datetime(2018, 3, 25, 0, 59,
tzinfo=datetime.timezone.utc)
today_tz = today_utc.astimezone(tz) # 2018-03-25T01:59:00+01:00
tomorrow = add_day(today_tz)
return tomorrow.isoformat() == '2018-03-26T01:59:00+02:00'
to_test = [(add_day_dateutil_relativedelta, 'relativedelta'),
(add_day_datetime_timedelta, 'timedelta'),
(add_day_datetime_timedelta_conversion, 'timedelta+conversion'),
(add_day, 'timedelta+conversion+dst')]
print('{:<25}: {:>5} {:>5}'.format('Method', 'Samoa', 'DST'))
for method, name in to_test:
print('{:<25}: {:>5} {:>5}'
.format(name,
test_samoa(method),
test_dst(method)))
Test results
Method : Samoa DST
relativedelta : 0 0
timedelta : 0 0
timedelta+conversion : 1 0
timedelta+conversion+dst : 1 1
Here is another method to add days on date using dateutil's relativedelta.
from datetime import datetime
from dateutil.relativedelta import relativedelta
print 'Today: ',datetime.now().strftime('%d/%m/%Y %H:%M:%S')
date_after_month = datetime.now()+ relativedelta(day=1)
print 'After a Days:', date_after_month.strftime('%d/%m/%Y %H:%M:%S')
Output:
Today: 25/06/2015 20:41:44
After a Days: 01/06/2015 20:41:44
Most Simplest solution
from datetime import timedelta, datetime
date = datetime(2003,8,1,12,4,5)
for i in range(5):
date += timedelta(days=1)
print(date)
This was a straightforward solution for me:
from datetime import timedelta, datetime
today = datetime.today().strftime("%Y-%m-%d")
tomorrow = datetime.today() + timedelta(1)
You can also import timedelta so the code is cleaner.
from datetime import datetime, timedelta
date = datetime.now() + timedelta(seconds=[delta_value])
Then convert to date to string
date = date.strftime('%Y-%m-%d %H:%M:%S')
Python one liner is
date = (datetime.now() + timedelta(seconds=[delta_value])).strftime('%Y-%m-%d %H:%M:%S')
A short solution without libraries at all. :)
d = "8/16/18"
day_value = d[(d.find('/')+1):d.find('/18')]
tomorrow = f"{d[0:d.find('/')]}/{int(day_value)+1}{d[d.find('/18'):len(d)]}".format()
print(tomorrow)
# 8/17/18
Make sure that "string d" is actually in the form of %m/%d/%Y so that you won't have problems transitioning from one month to the next.