Related
I have a numpy array with datetime stored in array A of size 100 as:
>>>A[0]
datetime.datetime(2011, 1, 1, 0, 0)
The other 99 elements are datetime.datetime objects also but few of them repeat e.g.
A[55]
datetime.datetime(2011, 11, 2, 0, 0)
A[56]
datetime.datetime(2011, 11, 2, 0, 0)
I have another array of Temperatures of same size as A with values corresponding to rows of A as:
Temperature[0] = 55
Temperature[55] = 40
Temperature[56] = 50
I am trying to obtain a new array from A2 which only has unique datetime from A and takes average of corresponding temperature repeats.
So in this case I will have A2 with only 1 datetime.datetime(2011, 11, 2, 0, 0) and temperature will be 0.5*(40+50) = 45
I am trying to use pandas pivot table as:
DayLightSavCure = pd.pivot_table(pd.DataFrame({'DateByHour': A, 'Temp': Temperature}), index=['DateByHour'], values=['Temp'], aggfunc=[np.mean])
But the error is:
ValueError: If using all scalar values, you must pass an index
I do actually concurr with #someone else, this could be achieved without digging into pandas. itertools is really nice for this. Written for Python 3.5+(because of statistics:
from itertools import groupby
from operator import itemgetter
from random import randint
import datetime
from statistics import mean
# Generate test data
dates = [datetime.datetime(2005, i % 12 + 1, 5, 5, 5, 5) for i in range(100)]
temperatures = [randint(0, 100) for _ in range(100)]
# Calculate averages
## Group data points by unique dates using `groupby`, `sorted` and `zip`
grouped = groupby(sorted(zip(dates, temperatures)), key=itemgetter(0))
##Calculate mean per unique date
averaged = [(key, mean(temperature[1] for temperature in values)) for key, values in grouped]
print(averaged) # List of tuples
#[(datetime.datetime(2005, 1, 5, 5, 5, 5), 65.22222222222223), (datetime.datetime(2005, 2, 5, 5, 5, 5), 60.0),.......
print(dict(averaged)) # Nicer as a dict
{datetime.datetime(2005, 3, 5, 5, 5, 5): 48.111111111111114, datetime.datetime(2005, 12, 5, 5, 5, 5): 43.75, ..........
If you have to have two separate lists/iterators at the end of the calculation just apply zip to averaged.
Given a Pandas series and a datetime slice or range of dates, how can I get all dates from the datetime slice or range that are in the series ?
Ex:
#my date time slice/ date_range
st = datetime.datetime(2014, 8, 31, 0, 0)
en = datetime.datetime(2014, 9, 7, 0, 0)
date_slice = slice(st,en)
rng = pd.date_range('08-31-2014', periods=8, freq='D')
#my series
s = pd.Series({'09-01-2014': 1,
'09-02-2014': 1,
'09-03-2014': np.nan,
'09-04-2014': 1,
'09-06-2014': 1})
In this example I would like to return an array of dates in str or datetime format
['08-31-2014', '09-05-2014' , '09-07-2014']
DatetimeIndex.difference returns a sorted set difference:
In [573]: (rng.difference(pd.to_datetime(s.index))).format()
Out[573]: ['2014-08-31', '2014-09-05', '2014-09-07']
In [598]: (rng.difference(pd.to_datetime(s.index))).format(formatter=lambda x: x.strftime('%m-%d-%Y'))
Out[598]: ['08-31-2014', '09-05-2014', '09-07-2014']
I'm looking for an elegant and pythonic way to get the date of the end of the previous quarter.
Something like this:
def previous_quarter(reference_date):
...
>>> previous_quarter(datetime.date(2013, 5, 31))
datetime.date(2013, 3, 31)
>>> previous_quarter(datetime.date(2013, 2, 1))
datetime.date(2012, 12, 31)
>>> previous_quarter(datetime.date(2013, 3, 31))
datetime.date(2012, 12, 31)
>>> previous_quarter(datetime.date(2013, 11, 1))
datetime.date(2013, 9, 30)
Edit: Have I tried anything?
Yes, this seems to work:
def previous_quarter(ref_date):
current_date = ref_date - timedelta(days=1)
while current_date.month % 3:
current_date -= timedelta(days=1)
return current_date
But it seems unnecessarily iterative.
You can do it the "hard way" by just looking at the month you receive:
def previous_quarter(ref):
if ref.month < 4:
return datetime.date(ref.year - 1, 12, 31)
elif ref.month < 7:
return datetime.date(ref.year, 3, 31)
elif ref.month < 10:
return datetime.date(ref.year, 6, 30)
return datetime.date(ref.year, 9, 30)
Using dateutil:
import datetime as DT
import dateutil.rrule as rrule
def previous_quarter(date):
date = DT.datetime(date.year, date.month, date.day)
rr = rrule.rrule(
rrule.DAILY,
bymonth=(3,6,9,12), # the month must be one of these
bymonthday=-1, # the day has to be the last of the month
dtstart = date-DT.timedelta(days=100))
result = rr.before(date, inc=False) # inc=False ensures result < date
return result.date()
print(previous_quarter(DT.date(2013, 5, 31)))
# 2013-03-31
print(previous_quarter(DT.date(2013, 2, 1)))
# 2012-12-31
print(previous_quarter(DT.date(2013, 3, 31)))
# 2012-12-31
print(previous_quarter(DT.date(2013, 11, 1)))
# 2013-09-30
Exploit the data pattern involved and turn the problem into a table-lookup - your classic space-time tradeff:
from datetime import date
PQTBL = (((12,31,-1),)*3 + ((3,31,0),)*3 + ((6,30,0),)*3 + ((9,30,0),)*3)
def previous_quarter(ref):
entry = PQTBL[ref.month-1]
return date(ref.year+entry[2], entry[0], entry[1])
Find the first day and month of the quarter, then use relativedelta to subtract a day.
from dateutil.relativedelta import relativedelta
def previous_quarter(ref):
first_month_of_quarter = ((ref.month - 1) // 3) * 3 + 1
return ref.replace(month=first_month_of_quarter, day=1) - relativedelta(days=1)
It's almost certain you would be happier using pandas (a python library), it has many functions for "business time" data.
http://pandas.pydata.org/pandas-docs/dev/timeseries.html
Reworked Justin Ethier's code for a "next quarter" version. Also added timezone via pytz and strftime formatting. #justin-ethier
import pytz
from datetime import datetime, timedelta
import datetime as dt
def nextQuarter():
ref = datetime.now(pytz.timezone('America/New_York'))
if ref.month < 4:
next = dt.datetime(ref.year, 3, 31, 23, 59, 59).strftime('%m-%d-%Y %H:%M:%S')
elif ref.month < 7:
next = dt.datetime(ref.year, 6, 30, 23, 59, 59).strftime('%m-%d-%Y %H:%M:%S')
elif ref.month < 10:
next = dt.datetime(ref.year, 9, 30, 23, 59, 59).strftime('%m-%d-%Y %H:%M:%S')
else:
next = dt.datetime(ref.year + 1, 12, 31, 23, 59, 59).strftime('%m-%d-%Y %H:%M:%S')
return next
next = nextQuarter()
import datetime
def previous_quarter(ref):
quarter = (ref.month - 1) // 3
prev_quarter = (quarter - 1) % 4
return datetime.datetime(ref.year if quarter>0 else ref.year-1, prev_quarter*3+1, 1)
Solution using only python's datetime library -
import datetime
def get_quarter_end(dt):
'''
given a datetime object, find the end of the quarter
'''
quarter_of_month = int((dt.month-1)/3 + 1)
#======================================================
# find the first day of the next quarter
#======================================================
# if in last quarter then go to the next year
year = dt.year + 1 if quarter_of_month==4 else dt.year
# if in last quarter then month is january (or 1)
month = 1 if quarter_of_month==4 else (quarter_of_month*3) + 1
first_of_next_quarter = datetime.datetime(year = year,
month = month,
day = 1
)
# last day of quarter for dt will be minus 1 day of first of next quarter
quarter_end_dt = first_of_next_quarter - datetime.timedelta(days=1)
return quarter_end_dt
if __name__=='__main__':
dt = datetime.datetime.strptime('2016-07-15', '%Y-%m-%d')
target_dt = get_quarter_end(dt)
and if you want to retreive the last fours quarter you can do this
if ref.month < 4:
list1 = [datetime.date(ref.year - 1, 12, 31),
datetime.date(ref.year - 1, 9, 30),
datetime.date(ref.year - 1, 6, 30),
datetime.date(ref.year - 1, 3, 31)]
list1 = [i.strftime('%Y%m%d') for i in list1]
return list1
elif ref.month < 7:
return [datetime.date(ref.year, 3, 31),
datetime.date(ref.year - 1, 12, 31),
datetime.date(ref.year - 1, 9, 30),
datetime.date(ref.year - 1, 6, 30)]
elif ref.month < 10:
return [datetime.date(ref.year, 6, 30),
datetime.date(ref.year, 3, 31),
datetime.date(ref.year - 1, 12, 31),
datetime.date(ref.year - 1, 9, 30)]
return [datetime.date(ref.year, 9, 30),
datetime.date(ref.year, 6, 30),
datetime.date(ref.year, 3, 30),
datetime.date(ref.year - 1, 12, 31)]
I've got a sorted list of datetimes: (with day gaps)
list_of_dts = [
datetime.datetime(2012,1,1,0,0,0),
datetime.datetime(2012,1,1,1,0,0),
datetime.datetime(2012,1,2,0,0,0),
datetime.datetime(2012,1,3,0,0,0),
datetime.datetime(2012,1,5,0,0,0),
]
And I'd like to split them in to a list for each day:
result = [
[datetime.datetime(2012,1,1,0,0,0), datetime.datetime(2012,1,1,1,0,0)],
[datetime.datetime(2012,1,2,0,0,0)],
[datetime.datetime(2012,1,3,0,0,0)],
[], # Empty list for no datetimes on day
[datetime.datetime(2012,1,5,0,0,0)]
]
Algorithmically, it should be possible to achieve at least O(n).
Perhaps something like the following:
(This obviously doesn't handle missed days, and drops the last dt, but it's a start)
def dt_to_d(list_of_dts):
result = []
start_dt = list_of_dts[0]
day = [start_dt]
for i, dt in enumerate(list_of_dts[1:]):
previous = start_dt if i == 0 else list_of_dts[i-1]
if dt.day > previous.day or dt.month > previous.month or dt.year > previous.year:
# split to new sub-list
result.append(day)
day = []
# Loop for each day gap?
day.append(dt)
return result
Thoughts?
The easiest way to go is to use dict.setdefault to group entries falling on the same day and then loop over the lowest day to the highest:
>>> import datetime
>>> list_of_dts = [
datetime.datetime(2012,1,1,0,0,0),
datetime.datetime(2012,1,1,1,0,0),
datetime.datetime(2012,1,2,0,0,0),
datetime.datetime(2012,1,3,0,0,0),
datetime.datetime(2012,1,5,0,0,0),
]
>>> days = {}
>>> for dt in list_of_dts:
days.setdefault(dt.toordinal(), []).append(dt)
>>> [days.get(day, []) for day in range(min(days), max(days)+1)]
[[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 1, 0)],
[datetime.datetime(2012, 1, 2, 0, 0)],
[datetime.datetime(2012, 1, 3, 0, 0)],
[],
[datetime.datetime(2012, 1, 5, 0, 0)]]
Another approach for making such groupings is itertools.groupby. It is designed for this kind of work, but it doesn't provide a way to fill-in an empty list for missing days:
>>> import itertools
>>> [list(group) for k, group in itertools.groupby(list_of_dts,
key=datetime.datetime.toordinal)]
[[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 1, 0)],
[datetime.datetime(2012, 1, 2, 0, 0)],
[datetime.datetime(2012, 1, 3, 0, 0)],
[datetime.datetime(2012, 1, 5, 0, 0)]]
You can use itertools.groupby to easily handle this kind of problems:
import datetime
import itertools
list_of_dts = [
datetime.datetime(2012,1,1,0,0,0),
datetime.datetime(2012,1,1,1,0,0),
datetime.datetime(2012,1,2,0,0,0),
datetime.datetime(2012,1,3,0,0,0),
datetime.datetime(2012,1,5,0,0,0),
]
print [list(g) for k, g in itertools.groupby(list_of_dts, key=lambda d: d.date())]
Filling the gaps:
date_dict = {}
for date_value in list_of_dates:
if date_dict.has_key(date_value.date()):
date_dict[date_value.date()].append(date_value)
else:
date_dict[date_value.date()] = [ date_value ]
sorted_dates = sorted(date_dict.keys())
date = sorted_dates[0]
while date <= sorted_dates[-1]:
print date_dict.get(date, [])
date += datetime.timedelta(1)
Results:
[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 1, 0)]
[datetime.datetime(2012, 1, 2, 0, 0)]
[datetime.datetime(2012, 1, 3, 0, 0)]
[]
[datetime.datetime(2012, 1, 5, 0, 0)]
This solution does not requires the original datetime list to be sorted.
list_of_dts = [
datetime.datetime(2012,1,1,0,0,0),
datetime.datetime(2012,1,1,1,0,0),
datetime.datetime(2012,1,2,0,0,0),
datetime.datetime(2012,1,3,0,0,0),
datetime.datetime(2012,1,5,0,0,0),
]
groupedByDay={}
for date in list_of_dts:
if date.date() in groupedByDay:
groupedByDay[date.date()].append(date)
else:
groupedByDay[date.date()]=[date]
Now you have a dictionary, where the date is the key and the value is a list of similar dates.
and if you are set on having a list instead
result = groupedByDay.values()
result.sort()
now results is a list of lists, where all the dates with the same day are grouped together
I have a list of birthdays stored in datetime objects. How would one go about sorting these in Python using only the month and day arguments?
For example,
[
datetime.datetime(1983, 1, 1, 0, 0)
datetime.datetime(1996, 1, 13, 0 ,0)
datetime.datetime(1976, 2, 6, 0, 0)
...
]
Thanks! :)
You can use month and day to create a value that can be used for sorting:
birthdays.sort(key = lambda d: (d.month, d.day))
l.sort(key = lambda x: x.timetuple()[1:3])
If the dates are stored as strings—you say they aren't, although it looks like they are—you might use dateutil's parser:
>>> from dateutil.parser import parse
>>> from pprint import pprint
>>> bd = ['February 6, 1976','January 13, 1996','January 1, 1983']
>>> bd = [parse(i) for i in bd]
>>> pprint(bd)
[datetime.datetime(1976, 2, 6, 0, 0),
datetime.datetime(1996, 1, 13, 0, 0),
datetime.datetime(1983, 1, 1, 0, 0)]
>>> bd.sort(key = lambda d: (d.month, d.day)) # from sth's answer
>>> pprint(bd)
[datetime.datetime(1983, 1, 1, 0, 0),
datetime.datetime(1996, 1, 13, 0, 0),
datetime.datetime(1976, 2, 6, 0, 0)]
If your dates are in different formats, you might give fuzzy parsing a shot:
>>> bd = [parse(i,fuzzy=True) for i in bd] # replace line 4 above with this line