Matplotlib : fill subarea of a Graph - python

I am using series and matplotlib to plot some graph like that :
basically, I build them using
ax.plot(df)
where df is a series with index : 'Sep', 'Oct', 'Nov', ....
and value some number.
What I wanted to have is fill the background, but only after the 'Mar' index (for example)
Currently the only things I was able to do is to create a series with only 'Mar' to 'Jul' as index with constant value to identify the "area", but ideally I would fill the background with some color to identify clearly both part of the graph (before mars it is data realised, after it is some prediction of my algo)
For example, if I use :
ax.set_facecolor('silver')
I have this result :
but that should appear only after 'Mars', but I don't find any way to filter the background to apply this kind of function

As mentioned, you can use axvspan() to add a suitable background colour.
For example:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import random
from datetime import datetime, timedelta
# Create some random data
data = []
x = datetime(2021, 1, 1)
y = 0
day = timedelta(days=1)
for _ in range(100):
x += day
y += random.random()
data.append([x, y])
df = pd.DataFrame(data, columns=['Date', 'Value'])
df.plot('Date', 'Value')
ax = plt.gca()
march = date2num(datetime(2021, 3, 1))
latest = date2num(data[-1][0])
ax.axvspan(xmin=march, xmax=latest, facecolor='silver')
plt.show()
Giving you:

Related

How to create a min-max lineplot by month

I have retail beef ad counts time series data, and I intend to make stacked line chart aim to show On a three-week average basis, quantity of average ads that grocers posted per store last week. To do so, I managed to aggregate data for plotting and tried to make line chart that I want. The main motivation is based on context of the problem and desired plot. In my attempt, I couldn't get very nice line chart because it is not informative to understand. I am wondering how can I achieve this goal in matplotlib. Can anyone suggest me what should I do from my current attempt? Any thoughts?
reproducible data and current attempt
Here is minimal reproducible data that I used in my current attempt:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from datetime import timedelta, datetime
url = 'https://gist.githubusercontent.com/adamFlyn/96e68902d8f71ad62a4d3cda135507ad/raw/4761264cbd55c81cf003a4219fea6a24740d7ce9/df.csv'
df = pd.read_csv(url, parse_dates=['date'])
df.drop(columns=['Unnamed: 0'], inplace=True)
df_grp = df.groupby(['date', 'retail_item']).agg({'number_of_ads': 'sum'})
df_grp["percentage"] = df_grp.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))
df_grp = df_grp.reset_index(level=[0,1])
for item in df_grp['retail_item'].unique():
dd = df_grp[df_grp['retail_item'] == item].groupby(['date', 'percentage'])[['number_of_ads']].sum().reset_index(level=[0,1])
dd['weakly_change'] = dd[['percentage']].rolling(7).mean()
fig, ax = plt.subplots(figsize=(8, 6), dpi=144)
sns.lineplot(dd.index, 'weakly_change', data=dd, ax=ax)
ax.set_xlim(dd.index.min(), dd.index.max())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gcf().autofmt_xdate()
plt.style.use('ggplot')
plt.xticks(rotation=90)
plt.show()
Current Result
but I couldn't get correct line chart that I expected, I want to reproduce the plot from this site. Is that doable to achieve this? Any idea?
desired plot
here is the example desired plot that I want to make from this minimal reproducible data:
I don't know how should make changes for my current attempt to get my desired plot above. Can anyone know any possible way of doing this in matplotlib? what else should I do? Any possible help would be appreciated. Thanks
Also see How to create a min-max plot by month with fill_between?
See in-line comments for details
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import calendar
#################################################################
# setup from question
url = 'https://gist.githubusercontent.com/adamFlyn/96e68902d8f71ad62a4d3cda135507ad/raw/4761264cbd55c81cf003a4219fea6a24740d7ce9/df.csv'
df = pd.read_csv(url, parse_dates=['date'])
df.drop(columns=['Unnamed: 0'], inplace=True)
df_grp = df.groupby(['date', 'retail_item']).agg({'number_of_ads': 'sum'})
df_grp["percentage"] = df_grp.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))
df_grp = df_grp.reset_index(level=[0,1])
#################################################################
# create a month map from long to abbreviated calendar names
month_map = dict(zip(calendar.month_name[1:], calendar.month_abbr[1:]))
# update the month column name
df_grp['month'] = df_grp.date.dt.month_name().map(month_map)
# set month as categorical so they are plotted in the correct order
df_grp.month = pd.Categorical(df_grp.month, categories=month_map.values(), ordered=True)
# use groupby to aggregate min mean and max
dfmm = df_grp.groupby(['retail_item', 'month'])['percentage'].agg([max, min, 'mean']).stack().reset_index(level=[2]).rename(columns={'level_2': 'mm', 0: 'vals'}).reset_index()
# create a palette map for line colors
cmap = {'min': 'k', 'max': 'k', 'mean': 'b'}
# iterate through each retail item and plot the corresponding data
for g, d in dfmm.groupby('retail_item'):
plt.figure(figsize=(7, 4))
sns.lineplot(x='month', y='vals', hue='mm', data=d, palette=cmap)
# select only min or max data for fill_between
y1 = d[d.mm == 'max']
y2 = d[d.mm == 'min']
plt.fill_between(x=y1.month, y1=y1.vals, y2=y2.vals, color='gainsboro')
# add lines for specific years
for year in [2016, 2018, 2020]:
data = df_grp[(df_grp.date.dt.year == year) & (df_grp.retail_item == g)]
sns.lineplot(x='month', y='percentage', ci=None, data=data, label=year)
plt.ylim(0, 100)
plt.margins(0, 0)
plt.legend(bbox_to_anchor=(1., 1), loc='upper left')
plt.ylabel('Percentage of Ads')
plt.title(g)
plt.show()

half yearly colorbar in matplotlib and pandas

I have a panda dataframe. I am making scatter plot and tried to categorize the data based on colorbar. I did it for monthly classification and quality classification as shown in the example code below.
a = np.random.rand(366)
b = np.random.rand(366)*0.4
index = (pd.date_range(pd.to_datetime('01-01-2000'), periods=366))
df = pd.DataFrame({'a':a,'b':b},index = index)
plt.scatter(df['a'],df['b'],c = df.index.month)
plt.colorbar()
And also for quality:
plt.scatter(df['a'],df['b'],c = df.index.quarter)
plt.colorbar()
My question: is there any way to categorize by half yearly. for example from the month 1-6 and 7-12 and also by month like: 10-3 and 4-9
Thank you and your help/suggestion will be highly appreciated.
Make a custom function to put in scatter function to color argument. I made an example for half yearly division. You can use it as template for your own split function:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
# if month is 1 to 6 then the first halfyear else the second halfyear
def halfyear(m):
return 0 if (m <= 6) else 1
# vectorize function to use with Series
hy = np.vectorize(halfyear)
a = np.random.rand(366)
b = np.random.rand(366)*0.4
index = (pd.date_range(pd.to_datetime('01-01-2000'), periods=366))
df = pd.DataFrame({'a':a,'b':b},index = index)
# apply custom function 'hy' for 'c' argument
plt.scatter(df['a'],df['b'], c = hy(df.index.month))
plt.colorbar()
plt.show()
Another way to use lambda function like:
plt.scatter(df['a'],df['b'], \
c = df.index.map(lambda m: 0 if (m.month > 0 and m.month < 7) else 1))
I would opt for a solution which does not completely truncate the monthly information. Using colors which are similar but distinguishable for the months allows to visually classify by half-year as well as month.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors
a = np.random.rand(366)
b = np.random.rand(366)*0.4
index = (pd.date_range(pd.to_datetime('01-01-2000'), periods=366))
df = pd.DataFrame({'a':a,'b':b},index = index)
colors=["crimson", "orange", "darkblue", "skyblue"]
cdic = list(zip([0,.499,.5,1],colors))
cmap = matplotlib.colors.LinearSegmentedColormap.from_list("name", cdic,12 )
norm = matplotlib.colors.BoundaryNorm(np.arange(13)+.5,12)
plt.scatter(df['a'],df['b'],c = df.index.month, cmap=cmap, norm=norm)
plt.colorbar(ticks=np.arange(1,13))
plt.show()

How to set correct time values to seaborn (tsplot) x-axis [duplicate]

Below I have the following script which creates a simple time series plot:
%matplotlib inline
import datetime
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
df = []
start_date = datetime.datetime(2015, 7, 1)
for i in range(10):
for j in [1,2]:
unit = 'Ones' if j == 1 else 'Twos'
date = start_date + datetime.timedelta(days=i)
df.append({
'Date': date.strftime('%Y%m%d'),
'Value': i * j,
'Unit': unit
})
df = pd.DataFrame(df)
sns.tsplot(df, time='Date', value='Value', unit='Unit', ax=ax)
fig.autofmt_xdate()
And the result of this is the following:
As you can see the x-axis has strange numbers for the datetimes, and not the usual "nice" representations that come with matplotlib and other plotting utilities. I've tried many things, re-formatting the data but it never comes out clean. Anyone know a way around?
Matplotlib represents dates as floating point numbers (in days), thus unless you (or pandas or seaborn), tell it that your values are representing dates, it will not format the ticks as dates. I'm not a seaborn expert, but it looks like it (or pandas) does convert the datetime objects to matplotlib dates, but then does not assign proper locators and formatters to the axes. This is why you get these strange numbers, which are in fact just the days since 0001.01.01. So you'll have to take care of the ticks manually (which, in most cases, is better anyways as it gives you more control).
So you'll have to assign a date locator, which decides where to put ticks, and a date formatter, which will then format the strings for the tick labels.
import datetime
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# build up the data
df = []
start_date = datetime.datetime(2015, 7, 1)
for i in range(10):
for j in [1,2]:
unit = 'Ones' if j == 1 else 'Twos'
date = start_date + datetime.timedelta(days=i)
# I believe it makes more sense to directly convert the datetime to a
# "matplotlib"-date (float), instead of creating strings and then let
# pandas parse the string again
df.append({
'Date': mdates.date2num(date),
'Value': i * j,
'Unit': unit
})
df = pd.DataFrame(df)
# build the figure
fig, ax = plt.subplots()
sns.tsplot(df, time='Date', value='Value', unit='Unit', ax=ax)
# assign locator and formatter for the xaxis ticks.
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y.%m.%d'))
# put the labels at 45deg since they tend to be too long
fig.autofmt_xdate()
plt.show()
Result:
For me, #hitzg's answer results in "OverflowError: signed integer is greater than maximum" in the depths of DateFormatter.
Looking at my dataframe, my indices are datetime64, not datetime. Pandas converts these nicely though. The following works great for me:
import matplotlib as mpl
def myFormatter(x, pos):
return pd.to_datetime(x)
[ . . . ]
ax.xaxis.set_major_formatter(mpl.ticker.FuncFormatter(myFormatter))
Here is a potentially inelegant solution, but it's the only one I have ... Hope it helps!
g = sns.pointplot(x, y, data=df, ci=False);
unique_dates = sorted(list(df['Date'].drop_duplicates()))
date_ticks = range(0, len(unique_dates), 5)
g.set_xticks(date_ticks);
g.set_xticklabels([unique_dates[i].strftime('%d %b') for i in date_ticks], rotation='vertical');
g.set_xlabel('Date');
Let me know if you see any issues!
def myFormatter(x, pos):
return pd.to_datetime(x).strftime('%Y%m%d')
ax.xaxis.set_major_formatter(mpl.ticker.FuncFormatter(myFormatter))

Sorting and conditional color formatting in matplotlib

to skip the context and get straight to the question, go down to "desired changes"
I wrote the helper function below to
Fetch data
Calculate the YTD return
Plot the results in a bar plot
Here is the function:
def ytd_perf(symb, col_names, source = 'yahoo'):
import datetime as datetime
from datetime import date
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import seaborn as sns
%pylab inline
#establish start and end dates
start = date(date.today().year, 1, 1)
end = datetime.date.today()
#fetch data
df = web.DataReader(symb, source, start = start, end = end)['Adj Close']
#make sure column orders don't change
df = df.reindex_axis(symb, 1)
#rename the columns
df.columns = col_names
#calc returns from the first element
df = (df / df.ix[0]) - 1
#Plot the most recent line of data -- this represents the YTD return
ax = df.ix[-1].plot(kind = 'bar', title = ('YTD Performance as of '+ str(end)),figsize=(12,9))
vals = ax.get_yticks()
ax.set_yticklabels(['{:3.1f}%'.format(x*100) for x in vals])
So, when I run:
tickers = ['SPY', 'TLT']
names = ['Stocks', 'Bonds']
ytd_perf(tickers, names)
I get the following output:
2 desired changes that I can't quite get to work:
I would like to change the color of the bar such that if the value < 0, it is red.
Sort the bars from highest to lowest (which is the case in this chart because there are only two series, but doesnt work with many series).

Matplotlib and Numpy - Create a calendar heatmap

Is it possible to create a calendar heatmap without using pandas?
If so, can someone post a simple example?
I have dates like Aug-16 and a count value like 16 and I thought this would be a quick and easy way to show intensity of counts between days for a long period of time.
Thank you
It's certainly possible, but you'll need to jump through a few hoops.
First off, I'm going to assume you mean a calendar display that looks like a calendar, as opposed to a more linear format (a linear formatted "heatmap" is much easier than this).
The key is reshaping your arbitrary-length 1D series into an Nx7 2D array where each row is a week and columns are days. That's easy enough, but you also need to properly label months and days, which can get a touch verbose.
Here's an example. It doesn't even remotely try to handle crossing across year boundaries (e.g. Dec 2014 to Jan 2015, etc). However, hopefully it gets you started:
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
def main():
dates, data = generate_data()
fig, ax = plt.subplots(figsize=(6, 10))
calendar_heatmap(ax, dates, data)
plt.show()
def generate_data():
num = 100
data = np.random.randint(0, 20, num)
start = dt.datetime(2015, 3, 13)
dates = [start + dt.timedelta(days=i) for i in range(num)]
return dates, data
def calendar_array(dates, data):
i, j = zip(*[d.isocalendar()[1:] for d in dates])
i = np.array(i) - min(i)
j = np.array(j) - 1
ni = max(i) + 1
calendar = np.nan * np.zeros((ni, 7))
calendar[i, j] = data
return i, j, calendar
def calendar_heatmap(ax, dates, data):
i, j, calendar = calendar_array(dates, data)
im = ax.imshow(calendar, interpolation='none', cmap='summer')
label_days(ax, dates, i, j, calendar)
label_months(ax, dates, i, j, calendar)
ax.figure.colorbar(im)
def label_days(ax, dates, i, j, calendar):
ni, nj = calendar.shape
day_of_month = np.nan * np.zeros((ni, 7))
day_of_month[i, j] = [d.day for d in dates]
for (i, j), day in np.ndenumerate(day_of_month):
if np.isfinite(day):
ax.text(j, i, int(day), ha='center', va='center')
ax.set(xticks=np.arange(7),
xticklabels=['M', 'T', 'W', 'R', 'F', 'S', 'S'])
ax.xaxis.tick_top()
def label_months(ax, dates, i, j, calendar):
month_labels = np.array(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
months = np.array([d.month for d in dates])
uniq_months = sorted(set(months))
yticks = [i[months == m].mean() for m in uniq_months]
labels = [month_labels[m - 1] for m in uniq_months]
ax.set(yticks=yticks)
ax.set_yticklabels(labels, rotation=90)
main()
Edit: I now see the question asks for a plot without pandas. Even so, this question is a first page Google result for "python calendar heatmap", so I will leave this here. I recommend using pandas anyway. You probably already have it as a dependency of another package, and pandas has by far the best APIs for working with datetime data (pandas.Timestamp and pandas.DatetimeIndex).
The only Python package that I can find for these plots is calmap which is unmaintained and incompatible with recent matplotlib. So I decided to write my own. It produces plots like the following:
Here is the code. The input is a series with a datetime index giving the values for the heatmap:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
DAYS = ['Sun.', 'Mon.', 'Tues.', 'Wed.', 'Thurs.', 'Fri.', 'Sat.']
MONTHS = ['Jan.', 'Feb.', 'Mar.', 'Apr.', 'May', 'June', 'July', 'Aug.', 'Sept.', 'Oct.', 'Nov.', 'Dec.']
def date_heatmap(series, start=None, end=None, mean=False, ax=None, **kwargs):
'''Plot a calendar heatmap given a datetime series.
Arguments:
series (pd.Series):
A series of numeric values with a datetime index. Values occurring
on the same day are combined by sum.
start (Any):
The first day to be considered in the plot. The value can be
anything accepted by :func:`pandas.to_datetime`. The default is the
earliest date in the data.
end (Any):
The last day to be considered in the plot. The value can be
anything accepted by :func:`pandas.to_datetime`. The default is the
latest date in the data.
mean (bool):
Combine values occurring on the same day by mean instead of sum.
ax (matplotlib.Axes or None):
The axes on which to draw the heatmap. The default is the current
axes in the :module:`~matplotlib.pyplot` API.
**kwargs:
Forwarded to :meth:`~matplotlib.Axes.pcolormesh` for drawing the
heatmap.
Returns:
matplotlib.collections.Axes:
The axes on which the heatmap was drawn. This is set as the current
axes in the `~matplotlib.pyplot` API.
'''
# Combine values occurring on the same day.
dates = series.index.floor('D')
group = series.groupby(dates)
series = group.mean() if mean else group.sum()
# Parse start/end, defaulting to the min/max of the index.
start = pd.to_datetime(start or series.index.min())
end = pd.to_datetime(end or series.index.max())
# We use [start, end) as a half-open interval below.
end += np.timedelta64(1, 'D')
# Get the previous/following Sunday to start/end.
# Pandas and numpy day-of-week conventions are Monday=0 and Sunday=6.
start_sun = start - np.timedelta64((start.dayofweek + 1) % 7, 'D')
end_sun = end + np.timedelta64(7 - end.dayofweek - 1, 'D')
# Create the heatmap and track ticks.
num_weeks = (end_sun - start_sun).days // 7
heatmap = np.zeros((7, num_weeks))
ticks = {} # week number -> month name
for week in range(num_weeks):
for day in range(7):
date = start_sun + np.timedelta64(7 * week + day, 'D')
if date.day == 1:
ticks[week] = MONTHS[date.month - 1]
if date.dayofyear == 1:
ticks[week] += f'\n{date.year}'
if start <= date < end:
heatmap[day, week] = series.get(date, 0)
# Get the coordinates, offset by 0.5 to align the ticks.
y = np.arange(8) - 0.5
x = np.arange(num_weeks + 1) - 0.5
# Plot the heatmap. Prefer pcolormesh over imshow so that the figure can be
# vectorized when saved to a compatible format. We must invert the axis for
# pcolormesh, but not for imshow, so that it reads top-bottom, left-right.
ax = ax or plt.gca()
mesh = ax.pcolormesh(x, y, heatmap, **kwargs)
ax.invert_yaxis()
# Set the ticks.
ax.set_xticks(list(ticks.keys()))
ax.set_xticklabels(list(ticks.values()))
ax.set_yticks(np.arange(7))
ax.set_yticklabels(DAYS)
# Set the current image and axes in the pyplot API.
plt.sca(ax)
plt.sci(mesh)
return ax
def date_heatmap_demo():
'''An example for `date_heatmap`.
Most of the sizes here are chosen arbitrarily to look nice with 1yr of
data. You may need to fiddle with the numbers to look right on other data.
'''
# Get some data, a series of values with datetime index.
data = np.random.randint(5, size=365)
data = pd.Series(data)
data.index = pd.date_range(start='2017-01-01', end='2017-12-31', freq='1D')
# Create the figure. For the aspect ratio, one year is 7 days by 53 weeks.
# We widen it further to account for the tick labels and color bar.
figsize = plt.figaspect(7 / 56)
fig = plt.figure(figsize=figsize)
# Plot the heatmap with a color bar.
ax = date_heatmap(data, edgecolor='black')
plt.colorbar(ticks=range(5), pad=0.02)
# Use a discrete color map with 5 colors (the data ranges from 0 to 4).
# Extending the color limits by 0.5 aligns the ticks in the color bar.
cmap = mpl.cm.get_cmap('Blues', 5)
plt.set_cmap(cmap)
plt.clim(-0.5, 4.5)
# Force the cells to be square. If this is set, the size of the color bar
# may look weird compared to the size of the heatmap. That can be corrected
# by the aspect ratio of the figure or scale of the color bar.
ax.set_aspect('equal')
# Save to a file. For embedding in a LaTeX doc, consider the PDF backend.
# http://sbillaudelle.de/2015/02/23/seamlessly-embedding-matplotlib-output-into-latex.html
fig.savefig('heatmap.pdf', bbox_inches='tight')
# The firgure must be explicitly closed if it was not shown.
plt.close(fig)
Disclaimer: This is is a plug for my own package. Though I am a couple of years late to help OP, I hope that someone else will find it useful.
I did some digging around on a related issue. I ended up writing a new package exactly for this purpose when I couldn't find any other package that met all my requirements.
The package is still unpolished and it still has a sparse documentation, but I published it on PyPi anyway to make it available for others. Any feedback is appreciated, either here or on my GitHub.
july
The package is called july and can be installed with pip:
$ pip install july
Here are some use cases straight from the README:
Import packages and generate data
import numpy as np
import july
from july.utils import date_range
dates = date_range("2020-01-01", "2020-12-31")
data = np.random.randint(0, 14, len(dates))
GitHub Activity like plot:
july.heatmap(dates, data, title='Github Activity', cmap="github")
Daily heatmap for continuous data (with colourbar):
july.heatmap(
osl_df.date, # Here, osl_df is a pandas data frame.
osl_df.temp,
cmap="golden",
colorbar=True,
title="Average temperatures: Oslo , Norway"
)
Outline each month with month_grid=True
july.heatmap(dates=dates,
data=data,
cmap="Pastel1",
month_grid=True,
horizontal=True,
value_label=False,
date_label=False,
weekday_label=True,
month_label=True,
year_label=True,
colorbar=False,
fontfamily="monospace",
fontsize=12,
title=None,
titlesize="large",
dpi=100)
Finally, you can also create month or calendar plots:
# july.month_plot(dates, data, month=5) # This will plot only May.
july.calendar_plot(dates, data)
Similar packages:
calplot by Tom Kwok.
GitHub: Link
Install: pip install calplot
Actively maintained and better documentation than july.
Pandas centric, takes in a pandas series with dates and values.
Very good option if you are only looking for the heatmap functionality and don't need month_plot or calendar_plot.
calmap by Martijn Vermaat.
GitHub: Link
Install: pip install calmap
The package that calplot sprung out from.
Seems to be longer actively maintained.
I was looking to create a calendar heatmap where each month is displayed separately. I also needed to annotate each day with the day number (day_of_month) and it's value label.
I've been inspired by the answers posted here and also the following sites:
Here, although in R
Heatmap using pcolormesh
However I didn't seem to find something exactly as I was looking for, so I've decided to post my solution here to perhaps save others wanting the same kind of plot some time.
My example uses a bit of Pandas simply to generate some dummy data, so you can easily plug your own data source instead. Other than that it's just matplotlib.
Output from the code is given below. For my needs I also wanted to highlight days where the data was 0 (see 1st January).
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
# Settings
years = [2018] # [2018, 2019, 2020]
weeks = [1, 2, 3, 4, 5, 6]
days = ['M', 'T', 'W', 'T', 'F', 'S', 'S']
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August',
'September', 'October', 'November', 'December']
def generate_data():
idx = pd.date_range('2018-01-01', periods=365, freq='D')
return pd.Series(range(len(idx)), index=idx)
def split_months(df, year):
"""
Take a df, slice by year, and produce a list of months,
where each month is a 2D array in the shape of the calendar
:param df: dataframe or series
:return: matrix for daily values and numerals
"""
df = df[df.index.year == year]
# Empty matrices
a = np.empty((6, 7))
a[:] = np.nan
day_nums = {m:np.copy(a) for m in range(1,13)} # matrix for day numbers
day_vals = {m:np.copy(a) for m in range(1,13)} # matrix for day values
# Logic to shape datetimes to matrices in calendar layout
for d in df.iteritems(): # use iterrows if you have a DataFrame
day = d[0].day
month = d[0].month
col = d[0].dayofweek
if d[0].is_month_start:
row = 0
day_nums[month][row, col] = day # day number (0-31)
day_vals[month][row, col] = d[1] # day value (the heatmap data)
if col == 6:
row += 1
return day_nums, day_vals
def create_year_calendar(day_nums, day_vals):
fig, ax = plt.subplots(3, 4, figsize=(14.85, 10.5))
for i, axs in enumerate(ax.flat):
axs.imshow(day_vals[i+1], cmap='viridis', vmin=1, vmax=365) # heatmap
axs.set_title(month_names[i])
# Labels
axs.set_xticks(np.arange(len(days)))
axs.set_xticklabels(days, fontsize=10, fontweight='bold', color='#555555')
axs.set_yticklabels([])
# Tick marks
axs.tick_params(axis=u'both', which=u'both', length=0) # remove tick marks
axs.xaxis.tick_top()
# Modify tick locations for proper grid placement
axs.set_xticks(np.arange(-.5, 6, 1), minor=True)
axs.set_yticks(np.arange(-.5, 5, 1), minor=True)
axs.grid(which='minor', color='w', linestyle='-', linewidth=2.1)
# Despine
for edge in ['left', 'right', 'bottom', 'top']:
axs.spines[edge].set_color('#FFFFFF')
# Annotate
for w in range(len(weeks)):
for d in range(len(days)):
day_val = day_vals[i+1][w, d]
day_num = day_nums[i+1][w, d]
# Value label
axs.text(d, w+0.3, f"{day_val:0.0f}",
ha="center", va="center",
fontsize=7, color="w", alpha=0.8)
# If value is 0, draw a grey patch
if day_val == 0:
patch_coords = ((d - 0.5, w - 0.5),
(d - 0.5, w + 0.5),
(d + 0.5, w + 0.5),
(d + 0.5, w - 0.5))
square = Polygon(patch_coords, fc='#DDDDDD')
axs.add_artist(square)
# If day number is a valid calendar day, add an annotation
if not np.isnan(day_num):
axs.text(d+0.45, w-0.31, f"{day_num:0.0f}",
ha="right", va="center",
fontsize=6, color="#003333", alpha=0.8) # day
# Aesthetic background for calendar day number
patch_coords = ((d-0.1, w-0.5),
(d+0.5, w-0.5),
(d+0.5, w+0.1))
triangle = Polygon(patch_coords, fc='w', alpha=0.7)
axs.add_artist(triangle)
# Final adjustments
fig.suptitle('Calendar', fontsize=16)
plt.subplots_adjust(left=0.04, right=0.96, top=0.88, bottom=0.04)
# Save to file
plt.savefig('calendar_example.pdf')
for year in years:
df = generate_data()
day_nums, day_vals = split_months(df, year)
create_year_calendar(day_nums, day_vals)
There is probably a lot of room for optimisation, but this gets what I need done.
Below is a code that can be used to generate a calendar map for daily profiles of a value.
"""
Created on Tue Sep 4 11:17:25 2018
#author: woldekidank
"""
import numpy as np
from datetime import date
import datetime
import matplotlib.pyplot as plt
import random
D = date(2016,1,1)
Dord = date.toordinal(D)
Dweekday = date.weekday(D)
Dsnday = Dord - Dweekday + 1 #find sunday
square = np.array([[0, 0],[ 0, 1], [1, 1], [1, 0], [0, 0]])#x and y to draw a square
row = 1
count = 0
while row != 0:
for column in range(1,7+1): #one week per row
prof = np.ones([24, 1])
hourly = np.zeros([24, 1])
for i in range(1,24+1):
prof[i-1, 0] = prof[i-1, 0] * random.uniform(0, 1)
hourly[i-1, 0] = i / 24
plt.title('Temperature Profile')
plt.plot(square[:, 0] + column - 1, square[:, 1] - row + 1,color='r') #go right each column, go down each row
if date.fromordinal(Dsnday).month == D.month:
if count == 0:
plt.plot(hourly, prof)
else:
plt.plot(hourly + min(square[:, 0] + column - 1), prof + min(square[:, 1] - row + 1))
plt.text(column - 0.5, 1.8 - row, datetime.datetime.strptime(str(date.fromordinal(Dsnday)),'%Y-%m-%d').strftime('%a'))
plt.text(column - 0.5, 1.5 - row, date.fromordinal(Dsnday).day)
Dsnday = Dsnday + 1
count = count + 1
if date.fromordinal(Dsnday).month == D.month:
row = row + 1 #new row
else:
row = 0 #stop the while loop
Below is the output from this code

Categories

Resources