I am trying to work out the difference in years between a MongoDB object and Today's date. The MongoDB object looks like this:
Key: Value
club_joined: 2021-08-10T00:00:00.000+00:00
I have the python code (that works):
loyal_players = []
for player in players.find():
# get time players have been at club
current_date = datetime.today()
joined_club = player['club_joined']
time_at_club = relativedelta(current_date, joined_club)
time_at_club_years = time_at_club.years
# check if they are over 10 years
if time_at_club_years >= 10:
loyal_players.append(player['short_name'])
But what I want is a Mongo query that can add into the .find() line:
for player in players.find():
I know the logic behind it is:
for player in players.find(
where (
(todayDate - player['club_joined']) > 10
):
But how do I write this as a MongoDB query in python?
MongoDb Type:
The $subtract function will take 2 dates and return the diff in millis so this works:
from pymongo import MongoClient
import datetime
now = datetime.datetime.now()
TARG = 86400 * 1000 * 365 * 10 # secs in day X millis X 365 days X 10 yrs
cursor = db.foo.aggregate([
{"$match": {"$expr": {"$gt": [ {"$subtract": [now,"$club_joined"]}, TARG ] } }}
])
for doc in cursor:
print(doc)
Using relativedelta seems simpler:
import datetime
from dateutil.relativedelta import relativedelta
dt = datetime.datetime.now() - relativedelta(years=10)
cursor = db.foo.find({'club_joined': {'$lt': dt}})
for doc in cursor:
print(doc)
im currently getting this error
Traceback (most recent call last):
File "/Users/user/Documents/test.py", line 44, in <module>
get_slots(hours, appointments)
File "/Users/user/Documents/test.py", line 36, in get_slots
while start + duration <= end:
TypeError: coercing to Unicode: need string or buffer, datetime.timedelta found
My code:
from datetime import timedelta
import datetime
#notice the additional brackets to keep the 2 slots as two separate lists. So, 930-1230 is one slot, 1330-1400 is an another.
# HOURS AND APPOINTMENTS ARE GENERATED BY GATHERING DATA FROM DATABASE
hours = [[u'08:00', u'17:00']]
appointments = [(u'12:00', u'12:30'), (u'10:30', u'11:00')]
def get_slots(hours, appointments, duration=timedelta(hours=1)):
slots = sorted([(hours[0][0], hours[0][0])] + appointments + [(hours[0][1], hours[0][1])])
for start, end in ((slots[i][1], slots[i+1][0]) for i in range(len(slots)-1)):
assert start <= end, "Cannot attend all appointments"
while start + duration <= end:
json = []
json.append("{:%H:%M} - {:%H:%M}".format(start, start + duration))
start += duration
return json
if __name__ == "__main__":
get_slots(hours, appointments)
The code should output something like:
09:00 - 10:00
10:30 - 11:30
13:00 - 14:00
14:00 - 15:00
I found this code from Python - finding time slots
You have to convert both start and end string to datetime objects. See below example:
from datetime import timedelta
import datetime
#notice the additional brackets to keep the 2 slots as two separate lists. So, 930-1230 is one slot, 1330-1400 is an another.
# HOURS AND APPOINTMENTS ARE GENERATED BY GATHERING DATA FROM DATABASE
hours = [[u'08:00', u'17:00']]
appointments = [(u'12:00', u'12:30'), (u'10:30', u'11:00')]
def get_slots(hours, appointments, duration=timedelta(hours=1)):
slots = sorted([(hours[0][0], hours[0][0])] + appointments + [(hours[0][1], hours[0][1])])
for start, end in ((slots[i][1], slots[i+1][0]) for i in range(len(slots)-1)):
start = datetime.datetime.strptime(start, "%H:%M")
end = datetime.datetime.strptime(end, "%H:%M")
print(start+duration)
assert start <= end, "Cannot attend all appointments"
while start + duration <= end:
json = []
json.append("{:%H:%M} - {:%H:%M}".format(start, start + duration))
start += duration
return json
if __name__ == "__main__":
x = get_slots(hours, appointments)
I have this code that is rather done in a hurry but it works in general. The only thing it runs forever. The idea is to update 2 columns on a table that is holding 1495748 rows, so the number of the list of timestamp being queried in first place. For each update value there has to be done a comparison in which the timestamp has to be in an hourly interval that is formed by two timestamps coming from the api in two different dicts. Is there a way to speed up things a little or maybe multiprocess it?
Hint: db_mac = db_connection to a Postgres database.
the response looks like this:
{'meta': {'source': 'National Oceanic and Atmospheric Administration, Deutscher Wetterdienst'}, 'data': [{'time': '2019-11-26 23:00:00', 'time_local': '2019-11-27 00:00', 'temperature': 8.3, 'dewpoint': 5.9, 'humidity': 85, 'precipitation': 0, 'precipitation_3': None, 'precipitation_6': None, 'snowdepth': None, 'windspeed': 11, 'peakgust': 21, 'winddirection': 160, 'pressure': 1004.2, 'condition': 4}, {'time': '2019-11-27 00:00:00', ....
import requests
import db_mac
from collections import defaultdict
import datetime
import time
t = time.time()
station = [10382,"DE","Berlin / Tegel",52.5667,13.3167,37,"EDDT",10382,"TXL","Europe/Berlin"]
dates = [("2019-11-20","2019-11-22"), ("2019-11-27","2019-12-02") ]
insert_dict = defaultdict(tuple)
hist_weather_list = []
for d in dates:
end = d[1]
start = d[0]
print(start, end)
url = "https://api.meteostat.net/v1/history/hourly?station={station}&start={start}&end={end}&time_zone={timezone}&&time_format=Y-m-d%20H:i&key=<APIKEY>".format(station=station[0], start=start, end=end, timezone=station[-1])
response = requests.get(url)
weather = response.json()
print(weather)
for i in weather["data"]:
hist_weather_list.append(i)
sql = "select timestamp from dump order by timestamp asc"
result = db_mac.execute(sql)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step1 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
for row in result:
try:
ts_dump = datetime.datetime.timestamp(row[0])
for i, hour in enumerate(hist_weather_list):
ts1 = datetime.datetime.timestamp(datetime.datetime.strptime(hour["time"], '%Y-%m-%d %H:%M:%S'))
ts2 = datetime.datetime.timestamp(datetime.datetime.strptime(hist_weather_list[i + 1]["time"], '%Y-%m-%d %H:%M:%S'))
if ts1 <= ts_dump and ts_dump < ts2:
insert_dict[row[0]] = (hour["temperature"], hour["pressure"])
except Exception as e:
pass
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step2 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
for key, value in insert_dict.items():
sql2 = """UPDATE dump SET temperature = """ + str(value[0]) + """, pressure = """+ str(value[1]) + """ WHERE timestamp = '"""+ str(key) + """';"""
db_mac.execute(sql2)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step3 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
UPDATE the code for multiprocessing. I'll let it run the night and give an update of the running time.
import requests
import db_mac
from collections import defaultdict
import datetime
import time
import multiprocessing as mp
t = time.time()
station = [10382,"DE","Berlin / Tegel",52.5667,13.3167,37,"EDDT",10382,"TXL","Europe/Berlin"]
dates = [("2019-11-20","2019-11-22"), ("2019-11-27","2019-12-02") ]
insert_dict = defaultdict(tuple)
hist_weather_list = []
for d in dates:
end = d[1]
start = d[0]
print(start, end)
url = "https://api.meteostat.net/v1/history/hourly?station={station}&start={start}&end={end}&time_zone={timezone}&&time_format=Y-m-d%20H:i&key=wzwi2YR5".format(station=station[0], start=start, end=end, timezone=station[-1])
response = requests.get(url)
weather = response.json()
print(weather)
for i in weather["data"]:
hist_weather_list.append(i)
sql = "select timestamp from dump order by timestamp asc"
result = db_mac.execute(sql)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step1 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
def find_parameters(x):
for row in result[x[0]:x[1]]:
try:
ts_dump = datetime.datetime.timestamp(row[0])
for i, hour in enumerate(hist_weather_list):
ts1 = datetime.datetime.timestamp(datetime.datetime.strptime(hour["time"], '%Y-%m-%d %H:%M:%S'))
ts2 = datetime.datetime.timestamp(datetime.datetime.strptime(hist_weather_list[i + 1]["time"], '%Y-%m-%d %H:%M:%S'))
if ts1 <= ts_dump and ts_dump < ts2:
insert_dict[row[0]] = (hour["temperature"], hour["pressure"])
except Exception as e:
pass
step1 = int(len(result) /4)
step2 = 2 * step1
step3 = 3 * step1
step4 = len(result)
steps = [[0,step1],[step1,step2],[step2,step3], [step3,step4]]
pool = mp.Pool(mp.cpu_count())
pool.map(find_parameters, steps)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step2 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
for key, value in insert_dict.items():
sql2 = """UPDATE dump SET temperature = """ + str(value[0]) + """, pressure = """+ str(value[1]) + """ WHERE timestamp = '"""+ str(key) + """';"""
db_mac.execute(sql2)
hours, rem = divmod(time.time() - t, 3600)
minutes, seconds = divmod(rem, 60)
print("step3 {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
UPDATE 2
It finished and ran for 2:45 hours in 4 cores on a raspberry pi. Though is there a more efficient way to do such things?
So theres a few minor things I can think of to speed this up a little. I figure anything little bit helps especially if you have a lot of rows to process. For starters, print statements can slow down your code a lot. I'd get rid of those if they are unneeded.
Most importantly, you are calling the api in every iteration of the loop. Waiting for a response from the API is probably taking up the bulk of your time. I looked a bit at the api you are using, but don't know the exact case you're using it for or what your dates "start" and "end" look like, but if you could do it in less calls that would surely speed up this loop by a lot. Another way you can do this is, it looks like the api has a .csv version of the data you can download and use. Running this on local data would be way faster. If you choose to go this route i'd suggest using pandas. (Sorry if you already know pandas and i'm over explaining) You can use: df = pd.read_csv("filename.csv") and edit the table from there easily. You can also do df.to_sql(params) to write to your data base. Let me know if you want help forming a pandas version of this code.
Also, not sure from your code if this would cause an error, but I would try, instead of your for loop (for i in weather["data"]).
hist_weather_list += weather["data"]
or possibly
hist_weather_list += [weather["data"]
Let me know how it goes!
I've been working on a python script and am having issues with some verification's I set up. I have this procedure file that has a function that uses a order number and a customer number to check some past history about the customers orders. Ive been testing live on our server and I keep failing the last if statement. The order number and customer number Im using does have more than one order and some are over 60 days so it should pass the test but it doesnt. Ive been looking over my code and I just cant see what could be causing this
edit: here are the print results of current and retrieved timestamps:
current_timestamp = 1531849617.921927
retrieved_timestamp = 1489622400
two_month_seconds = 5184000
one_month_seconds = 2592000
Python3
from classes import helper
from classes import api
from classes import order
from procedures import orderReleaseProcedure
import time
import datetime
import re
def verifyCustomer(customer_id, order_id):
self_helper = helper.Helper()
customer_blocked_reasons = self_helper.getConfig('customer_blocked_reasons')
order_statuses = self_helper.getConfig('order_statuses')
customer_is_blocked = False
self_api = api.Api()
self_order =order.Order(order_id)
status = {
'success' : 0,
'message' :'verify_payment_method'
}
results = self_api.which_api('orders?customer_id={}'.format(customer_id))
order_count = results['total_count']
if order_count > 1:
for result in results['orders']:
order_status_info= self_api.which_api('order_statuses/%d' % result['order_status_id'])
for customer_blocked_reason in customer_blocked_reasons:
if customer_blocked_reason in order_status_info['name']:
customer_is_blocked = True
order_id = 0
order_date = result['ordered_at']
two_month_seconds = (3600 * 24) * 60
one_month_seconds = (3600 * 24) * 30
stripped_date = order_date[:order_date.find("T")]
current_timestamp = time.time()
retrieved_timestamp = int(datetime.datetime.strptime(stripped_date, '%Y-%m-%d').strftime("%s"))
if retrieved_timestamp > (current_timestamp - one_month_seconds) and not customer_is_blocked:
status['success'] = 1
status['message'] = "Customer Verified with orders older than 30 days and no blocking reasons"
print(' 30 day check was triggered ')
print(status)
break
elif customer_is_blocked:
status_change_result = self_order.update_status(order_statuses['order_hold_manager_review'])
status['success'] = 1
status['message'] = "Changed order status to Order Hold - Manager Review"
print(' Customer block was triggered ')
print(status_change_result)
break
elif not retrieved_timestamp < (current_timestamp - two_month_seconds):
status['success'] = 0
status['message'] = "There is more than 1 order, and none are greater than 60 days, we need to check manually"
print(' 60 day check was triggered ')
print(status)
break
return status
I use naturaltime in my Django application.
How can I display only time in minutes, then hours, then days, then weeks?
Here is my code:
{{ obj.pub_date|naturaltime }}
I use two custom filters to solve a very similar problem using babel and pytz. I write "Today" or "Yesterday" plus the time. You're welcome to use my code any way you like.
My template code is two usages, one for writing "Today", "Yesterday", or the date if it was even earlier.
{{ scored_document.fields.10.value|format_date_human(locale='en') }}
Then this tag writes the actual time of day
{{ scored_document.fields.10.value|datetimeformat_list(hour=scored_document.fields.17.value|int ,minute =scored_document.fields.18.value|int, timezoneinfo=timezoneinfo, locale=locale) }}
The two corresponding functions are
MONTHS = ('Jan.', 'Feb.', 'Mar.', 'April.', 'May.', 'June.',
'July.', 'Aug.', 'Sep.', 'Oct.', 'Nov.', 'Dec.')
FORMAT = '%H:%M / %d-%m-%Y'
def format_date_human(to_format, locale='en', timezoneinfo='Asia/Calcutta'):
tzinfo = timezone(timezoneinfo)
now = datetime.now()
#logging.info('delta: %s', str((now - to_format).days))
#logging.info('delta2: %s', str((datetime.date(now)-datetime.date(to_format)).days))
if datetime.date(to_format) == datetime.date(now):
date_str = _('Today')
elif (now - to_format).days == 1:
date_str = _('Yesterday')
else:
month = MONTHS[to_format.month - 1]
date_str = '{0} {1}'.format(to_format.day, _(month))
time_str = format_time(to_format, 'H:mm', tzinfo=tzinfo, locale=locale)
return "{0}".format(date_str, time_str)
def datetimeformat_list(date, hour, minute, locale='en', timezoneinfo='Asia/Calcutta'):
import datetime as DT
import pytz
utc = pytz.utc
to_format = DT.datetime(int(date.year), int(date.month), int(date.day), int(hour), int(minute))
utc_date = utc.localize(to_format)
tzone = pytz.timezone(timezoneinfo)
tzone_date = utc_date.astimezone(tzone)
time_str = format_time(tzone_date, 'H:mm')
return "{0}".format(time_str)