how to plot a date into matplotlib - python

Hi I am trying to change the x-axis into a month axis but cant seem to do it.
I can't find a way to insert the datetime axis, and when I do I run into errors.
This is my datetime variables, basically its simple, just want to create a weekly counter that starts on 2020 for 1 year:
date = pd.date_range(start='2020-01-01', periods=52, freq='w')
This is my plot code:
cumm19=fcst2019.cumsum()
cumm20=fcst2020.cumsum()
cumm20w=fcst2020w.cumsum()
plt.axhline(y=cumm19['no._of_cases'][51], color='r', label= '2019 numbers (15,910)')
plt.plot(cumm20[:21], label ='2020 current')
plt.plot(cumm20[20:], label ='2020 predictions')
plt.plot(cumm20w[20:], label ='2020 worse case predictions')
plt.xlabel('Months')
plt.ylabel('Cumulative no. of cases')
plt.legend()
plt.show()
How do i combine them?

If you set the datetime variable as the index of the dataframes, the x-axis ticks automatically get date labels with an appropriate format:
import numpy as np # v 1.19.2
import pandas as pd # v 1.2.3
import matplotlib.pyplot as plt # v 3.3.4
import matplotlib.dates as mdates
# Create sample datasets
date = pd.date_range(start='2020-01-01', periods=52, freq='w')
fcst2020 = pd.DataFrame(dict(fcst=np.repeat([500, 750], [21, 31])), index=date)
fcst2020w = pd.DataFrame(dict(fcst=np.repeat([500, 600], [21, 31])), index=date)
cumm20 = fcst2020.cumsum()
cumm20w = fcst2020w.cumsum()
# Plot data
plt.plot(cumm20[:21], label ='2020 current')
plt.plot(cumm20[20:], label ='2020 predictions')
plt.plot(cumm20w[20:], label ='2020 worse case predictions')
plt.axhline(y=15910, color='r', label= '2019 numbers (15,910)', zorder=0)
plt.xlabel('Months')
plt.ylabel('Cumulative no. of cases')
plt.legend()
plt.show()
You can customize the format of the tick labels by using the matplotlib dates DateFormatter:
# Edit x-axis tick labels
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m'))
plt.show()

Related

How to add multiple custom ticks to seaborn boxplot

I generated a boxplot using seaborn. On the x axis, I would like to have, both the number of days (20, 25, 32) and the actual dates they refer to (2022-05-08, 2022-05-13, 2022-05-20).
I found a potential solution at the following link add custom tick with matplotlib. I'm trying to adapt it to my problem but I could only get the number of days or the dates, not both.
I really would appreciate any help. Thank you in advance for your time.
Please, find below my code and the desired output.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = [tick_label.get(t, ticks[i]) for i,t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()
Here is the desired output.
You can do that by adding these lines in place of ax.set_xticklabels(labels)
new_labels=["{}\n{}".format(a_, b_) for a_, b_ in zip(ticks, labels)]
ax.set_xticklabels(new_labels)
Output
Try this:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = ["{}\n".format(t)+tick_label.get(t, ticks[i]) for i, t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()

date and graph alignment - Economic analysis

I'm am running a fundamental economic analysis and when I get to visualising and charting I am not able to align the dates with the graph.
I wanted the most recent date entry to show on the right and the rest of the dates to show every two years.
I have tried literally everything and cant find the solution.
Here is my code:
%matplotlib inline
import pandas as pd
from matplotlib import pyplot
import matplotlib.dates as mdates
df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csvbgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=off&txtcolor=%23444444&ts=12&tts=12&width=1168&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=NAEXKP01EZQ657S&scale=left&cosd=1995-04-01&coed=2020-04-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Quarterly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2020-09-21&revision_date=2020-09-21&nd=1995-04-01')
df = df.set_index('DATE')
df['12MonthAvg'] = df.rolling(window=12).mean().dropna(how='all')
df['9MonthAvg'] = df['12MonthAvg'].rolling(window=12).mean().dropna(how='all')
df['Spread'] = df['12MonthAvg'] - df['9MonthAvg']
pyplot.style.use("seaborn")
pyplot.subplots(figsize=(10, 5), dpi=85)
df['Spread'].plot().set_title('EUROPE: GDP Q Growth Rate (12M/12M Avg Spread)', fontsize=16)
df['Spread'].plot().axhline(0, linestyle='-', color='r',alpha=1, linewidth=2, marker='')
df['Spread'].plot().spines['left'].set_position(('outward', 10))
df['Spread'].plot().spines['bottom'].set_position(('outward', 10))
df['Spread'].plot().spines['right'].set_visible(False)
df['Spread'].plot().spines['top'].set_visible(False)
df['Spread'].plot().yaxis.set_ticks_position('left')
df['Spread'].plot().xaxis.set_ticks_position('bottom')
df['Spread'].plot().text(0.50, 0.02, "Crossing red line downwards / Crossing red line Upwards",
transform=pyplot.gca().transAxes, fontsize=14, ha='center', color='blue')
df['Spread'].plot().fmt_xdata = mdates.DateFormatter('%Y-%m-%d')
print(df['Spread'].tail(3))
pyplot.autoscale()
pyplot.show()
And the output:
This is the raw data:
There is a couple of corrections to your code.
In your URL insert "?" after fredgraph.csv. It starts so called query string,
where bgcolor is the first parameter.
Read your DataFrame with additional parameters:
df = pd.read_csv('...', parse_dates=[0], index_col=[0])
The aim is to:
read Date column as datetime,
set it as the index.
Create additional columns as:
df['12MonthAvg'] = df.NAEXKP01EZQ657S.rolling(window=12).mean()
df['9MonthAvg'] = df.NAEXKP01EZQ657S.rolling(window=9).mean()
df['Spread'] = df['12MonthAvg'] - df['9MonthAvg']
Corrections:
9MonthAvg (as I think) should be computed from the source column,
not from 12MonthAvg,
dropna here is not needed, as you create whole column anyway.
Now is the place to use dropna() on Spread column and save it in
a dedicated variable:
spread = df['Spread'].dropna()
Draw your figure the following way:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
plt.style.use("seaborn")
fig, ax = plt.subplots(figsize=(10, 5), dpi=85)
plt.plot_date(spread.index, spread, fmt='-')
ax.set_title('EUROPE: GDP Q Growth Rate (12M/12M Avg Spread)', fontsize=16)
ax.axhline(0, linestyle='-', color='r',alpha=1, linewidth=2, marker='')
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.text(0.50, 0.02, "Crossing red line downwards / Crossing red line Upwards",
transform=ax.transAxes, fontsize=14, ha='center', color='blue')
ax.xaxis.set_major_formatter(mdates.DateFormatter(fmt='%Y-%m-%d'))
plt.show()
Corrections:
plt.subplots returns fig and ax, so I saved them (actually, only ax
is needed).
When one axis contains dates, it is better to use plot_date.
I changed the way DateFormatter is set.
Using the above code I got the following picture:

Matplotlib Plot time series with different periodicity

I have 2 dfs. One of them has data for a month. Another one, averages for the past quarters. I wanna plot the averages in front of the monthly data. How can I do it? Please note that I am trying to plot averages as dots and monthly as line chart.
So far my best result was achieved by ax1=ax.twiny(), but still not ideal result as data point appear in throughout the chart, rather than just in front.
import pandas as pd
import numpy as np
import matplotlib.dates as mdates
from matplotlib.ticker import ScalarFormatter, FormatStrFormatter, FuncFormatter
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
date_base = pd.date_range(start='1/1/2018', end='1/30/2018')
df_base = pd.DataFrame(np.random.randn(30,4), columns=list("ABCD"), index=date_base)
date_ext = pd.date_range(start='1/1/2017', end='1/1/2018', freq="Q")
df_ext = pd.DataFrame(np.random.randn(4,4), columns=list("ABCD"), index=date_ext)
def drawChartsPlt(df_base, df_ext):
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
number_of_plots = len(df_base.columns)
LINE_STYLES = ['-', '--', '-.', 'dotted']
colormap = plt.cm.nipy_spectral
ax.set_prop_cycle("color", [colormap(i) for i in np.linspace(0,1,number_of_plots)])
date_base = df_base.index
date_base = [i.strftime("%Y-%m-%d") for i in date_base]
q_ends = df_ext.index
q_ends = [i.strftime("%Y-%m-%d") for i in q_ends]
date_base.insert(0, "") #to shift xticks so they match chart
date_base += q_ends
for i in range(number_of_plots):
df_base.ix[:-3, df_base.columns[i]].plot(kind="line", linestyle=LINE_STYLES[i%2], subplots=False, ax=ax)
#ax.set_xticks(date_base)
#ax.set_xticklabels(date_base)
# ax.xaxis.set_major_locator(ticker.MultipleLocator(20))
ax.xaxis.set_major_locator(ticker.LinearLocator(len(date_base)))
ax.xaxis.set_major_formatter(plt.FixedFormatter(date_base))
fig.autofmt_xdate()
# ax1=ax.twinx()
ax1=ax.twiny()
ax1.set_prop_cycle("color", [colormap(i) for i in np.linspace(0,1,number_of_plots)])
for i in range(len(df_ext.columns)):
ax1.scatter(x=df_ext.index, y=df_ext[df_ext.columns[i]])
ax.set_title("Test")
#plt.minorticks_off())
ax.minorticks_off()
#ax1.minorticks_off()
#ax1.set_xticklabels(date_base)
#ax1.set_xticklabels(q_ends)
ax.legend(loc="center left", bbox_to_anchor=(1,0.5))
ax.xaxis.label.set_size(12)
plt.xlabel("TEST X Label")
plt.ylabel("TEST Y Label")
ax1.set_xlabel("Quarters")
plt.show()
drawChartsPlt(df_base, df_ext)
The way I ended up coding it is by saving quarterly index of df_ext to a temp variable, overwriting it with dates that are close to df_base.index using pd.date_range(start=df_base.index[-1], periods=len(df_ext), freq='D'), and the finally setting the dates that I need with ax.set_xticklabels(list(date_base)+list(date_ext)).
It looks like it could be achieved using broken axes as indicated Break // in x axis of matplotlib and Python/Matplotlib - Is there a way to make a discontinuous axis?, but I haven't tried that solution.

How do I index or plot datetimes after resampling so they display on a bar plot axis correctly?

I want to display my third plot x-axis data in the datetime like my other two plots (see linked figure). I have used similar approaches to each graph, but resampled the third dataset to plot precipitation in a bar graph for every hour in my time period. When I originally attempted to format the date for the third plot as I did in the previous two, the x-axis labels either disappeared or the data doesn't plot correctly. In the link below, the data is displayed the way I intended.
Three subplots of rainfall
My timeseries data appears like this, where I'm only concerned about 'Reading' and 'Value':
Reading,Receive,Value,Unit,Quality
2018-04-07 13:09:28,2018-04-07 13:09:35,0.00,in,A
2018-04-07 06:01:25,2018-04-07 06:01:35,0.04,in,A
2018-04-07 04:38:15,2018-04-07 04:38:35,0.04,in,A
Here is how I achieved the correct scheme in the second plot:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as patches
import matplotlib.dates as mdates
import datetime as dt
#read data from csv
data2 = pd.read_csv('Arroyo_Corte_Madera_del_Presidio_38021_Precipitation_Accumulation_0.txt', usecols=['Reading','Value'], parse_dates=['Reading'])
#set date as index
data2.set_index('Reading',inplace=True)
#plot data
ax2 = plt.subplot(3, 1, 2)
data2.plot(ax=ax2)
#set ticks every 12 hours
ax2.xaxis.set_major_locator(mdates.HourLocator(byhour=range(0,24,12)))
plt.xticks(rotation=0, ha='center')
#format date
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%b %d\n%H:%M:%S'))
ax2.legend().set_visible(False)
ax2.set_title('Accumulated Rainfall\nApril 5-7, 2018')
ax2.set_xlabel('')
ax2.set_ylabel('Inches Since Oct 1 2017')
ax2.set_ylim(17.5, 22)
arrow_date2 = mdates.datestr2num('04/07/2018 04:30:00')
start_date2 = mdates.datestr2num('04/07/2018 03:00:00')
end_date2 = mdates.datestr2num('04/07/2018 06:00:00')
text_date2 = mdates.datestr2num('04/07/2018 03:00:00')
ax2.axvspan(start_date2, end_date2, 0.86, 0.97, color='green', alpha=0.35)
ax2.annotate("Approximate time of\nSlope Failure", xy=(arrow_date2, 21.5), xycoords='data', xytext=(text_date2, 19), textcoords='data', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"))
My code so far for the third subplot:
#read data from csv
data =pd.read_csv('Arroyo_Corte_Madera_del_Presidio_38021_Precipitation_Increment_0.txt', usecols=['Reading','Value'], parse_dates=['Reading'])
#set date as index
data.set_index('Reading',inplace=True)
resamp = data.resample('1H').sum().reset_index()
#plot data
ax3 = plt.subplot(3, 1, 3)
resamp.plot(kind='bar',ax=ax3, x='Reading', y='Value', width=0.9)
#set ticks every other hour
plt.xticks(ha='center')
for label in ax3.xaxis.get_ticklabels()[::2]:
label.set_visible(False)
ax3.legend().set_visible(False)
ax3.set_title('Rainfall in Hours\nApril 6-7, 2018')
ax3.set_xlabel('')
ax3.set_ylabel('Precipitation Increment (in)')
plt.show()
How do I fix my code to make the axis labels plot in the way I want them to plot?
My code was wrong, obviously. When I resampled the data, I reset the index. This created a new index column that was messing with my desired x values ('Reading'). Additionally, I shouldn't have been plotting 'x' in resamp.plot. This solution helped: Plotting with Pandas. Here is the corrected code:
#read data from csv
data = pd.read_csv('Arroyo_Corte_Madera_del_Presidio_38021_Precipitation_Increment_0.txt', usecols=['Reading','Value'], parse_dates=['Reading'])
#set date as index
data.set_index('Reading',inplace=True)
resamp = data.resample('1H').sum() # changed here
#plot data
ax3 = plt.subplot(3, 1, 3)
resamp.plot(ax=ax3, y='Value', kind='bar', width=0.9) # changed here
ax3.set_xticklabels([dt.strftime('%b %d\n%H:%M:%S') for dt in resamp.index])
plt.xticks(rotation=0, ha='center')
for i, tick in enumerate(ax3.xaxis.get_major_ticks()):
if (i % (4) != 0): # 4 hours
tick.set_visible(False)
ax3.legend().set_visible(False)
ax3.set_title('Rainfall in Hours\nApril 6-7, 2018')
ax3.set_xlabel('')
ax3.set_ylabel('Precipitation Increment (in)')
ax3.set_ylim(0.00, 0.40)
plt.show()

Matplotlib AutoDateLocator not working with DatetimeIndex

I have a dataframe with a DatetimeIndex like so:
In [3]: index = pd.date_range('September 1 2014', 'September 1 2015', freq='M')
...: index
Out[3]:
DatetimeIndex(['2014-09-30', '2014-10-31', '2014-11-30', '2014-12-31',
'2015-01-31', '2015-02-28', '2015-03-31', '2015-04-30',
'2015-05-31', '2015-06-30', '2015-07-31', '2015-08-31'],
dtype='datetime64[ns]', freq='M'
Plotting without changing the x tick labels or explicit date formatting yields an x-axis from 0-12.
My figure contains 13 subplots in one column. I'm trying to set the x-axis on the last plot using AutoDateLocator() at the end of the code after all the subplots are plotted:
fig.axes[-1].xaxis.set_major_locator(mdates.AutoDateLocator())
Which returns the following error:
ValueError: ordinal must be >= 1
I tried converting the dataframe index with dates2num as suggested here but it yielded the same result:
df.index = mdates.date2num(df.index.to_pydatetime())
I tried consulting the documentation but I couldn't dig up any other way to make matplotlib recognize the x-axis as dates.
Here is the complete code:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
index = pd.date_range('September 1 2014', 'September 1 2015', freq='M')
data = np.random.random([12, 13])
df = DataFrame(data=data, index=index)
# Draw figure
fig = plt.figure(figsize=(19,20), dpi=72)
fig.suptitle('Chart', fontsize=40, color='#333333')
# Draw charts
for i in range(0, len(df)):
ax = plt.subplot(len(df),1, i+1)
# Set ticks and labels
ax.tick_params(direction='in', length=45, pad=60,colors='0.75')
ax.yaxis.set_tick_params(size=0)
ax.set_yticklabels([])
ax.set_ylim([0, 2])
plt.ylabel(df.columns[i], rotation=0, labelpad=60, fontsize=20, color='#404040')
# Remove spines
sns.despine(ax=ax, bottom=True)
# Draw baseline, data, and threshold lines
# Threshold
ax.axhline(1.6, color='#a0db8e', linestyle='--', label='Threshold')
# Draw baseline
ax.axhline(1, color='0.55',label='Enterprise')
# Plot data
ax.plot(df.iloc[:, i], color='#14509f',label='Data')
# Subplot spacing
fig.subplots_adjust(hspace=1)
# Hide tick labels and first/last tick lines
plt.setp([a.get_xticklabels() for a in fig.axes[:-1]], visible=False)
plt.setp([a.get_xticklines()[-2:] for a in fig.axes],visible=False)
# Date in x-axis
fig.axes[-1].xaxis.set_major_locator(mdates.AutoDateLocator())
fig.axes[-1].xaxis.set_major_formatter(mdates.DateFormatter('%Y.%m.%d'))

Categories

Resources