Custom X-Axis Date Range Using Matplotlib - python

This is my first time asking a Python question online. I have always been able to find answers to my questions on this site..until now. I am trying to plot data that were developed using the Index Sequential Method, which is a technique for projecting historical data into the future. I have 105 charts that each cover 47 years of data. The first chart x-axis ranges from 1906-1952, the second 1907-1953, thir 1908-1954, etc. My problem is when I get to 1963, which is when the 47th year reverts back to the begining (1906). So the 1963 chart xaxis would look like this: 1963, 1964, 1965,...2008,2009,2010,1906. The 1964 chart xaxis would look like this: 1964, 1965, 1967,...2009, 2010, 1906, 1907.
I can get the data to plot fine, I just need help figuring out how to format the xaxis to accept the unique wrap-around situation when it occurs.
There are three charts per page (ax1, ax2, and ax3). yearList and chartList are the x and y data, respectively. The code below is part of a for loop that creates the yearList and chartList data sets, and it creates the charts with the wrong xaxis labels.
import matplotlib, pyPdf
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as tkr
from matplotlib.ticker import MultipleLocator
import matplotlib.figure as figure
plt.rcParams['font.family'] = 'Times New Roman'
locator = mdates.YearLocator(2)
minorLocator = MultipleLocator(1)
dateFmt = mdates.DateFormatter('%Y')
datemin = min(yearList)
datemax = max(yearList)
fig, (ax1, ax2, ax3) = plt.subplots(3,1,sharex=False)
#3X3 Top to bottom
ax1.bar(yearList1, chartList1, width=200, align='center')
ax2.bar(yearList2, chartList2, width=200, align='center')
ax3.bar(yearList3, chartList3, width=200, align='center')
axList = [ax1, ax2, ax3]
for ax in axList:
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(dateFmt)
ax.xaxis.set_minor_locator(minorLocator)
ax.set_xlim(datemin - timedelta(365), datemax + timedelta(365))
ax.grid(1)
ax.set_ylim(0,30)
ax.set_yticks(np.arange(0, 31, 5))
ax.yaxis.set_minor_locator(minorLocator)
#Rotate tick labels 90 degrees
xlabels = ax.get_xticklabels()
for label in xlabels:
label.set_rotation(90)
fig.tight_layout()
plt.subplots_adjust(right=0.925)
plt.savefig('%s\\run.pdf' % outDir)

You are making a bar graph, which means the x-posistion has little to no meaning aside from the labels, so don't try to plot the bars vs their date, plot them against the integers, and then label them as you wish:
from itertools import izip
fig, axeses = plt.subplots(3,1,sharex=False)
#3X3 Top to bottom
for yl, cl, ax in izip([yearList1, yearList2, yearList3],
[chartList1, chartList2, chartist3],
axeses):
ax.bar(range(len(cl)), cl, align='center')
ax.set_ylim(0,30)
ax.set_yticks(np.arange(0, 31, 5))
ax.yaxis.set_minor_locator(minorLocator)
xlabels = [dateFmt(xl) for xl in yl] # make a list of formatted labels
ax.set_xticks(range(len(cl))) # put the tick markers under your bars
ax.set_xticklabels(xlabels) # set the labels to be your formatted years
#Rotate tick labels 90 degrees
for label in ax.get_xticklabels():
label.set_rotation(90)
# you only need to do this once
fig.tight_layout()
fig.subplots_adjust(right=0.925)
fig.savefig('%s\\run.pdf' % outDir)
Also see the demo and the docs set_xticks and set_xticklabels

You can use the ax.set_ticklabels() function to set the labels.
Example:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot([1, 2, 3, 4], [10, 20, 25, 30])
ax.xaxis.set_ticklabels(["foo" , "bar", "ouch"])
plt.show()
So, just add the transformation that you need, and create the labels list.
maybe something like this:
range = 47
yearList = [1967, 1968,..., last year]
range_of_years = map(lambda x: range(year,year + range), yearList)
for i in range(len(axis_list)):
axis_list[i].xaxis.set_ticklabels(years_list[i])

Related

How to force grid to fit data range [duplicate]

So currently learning how to import data and work with it in matplotlib and I am having trouble even tho I have the exact code from the book.
This is what the plot looks like, but my question is how can I get it where there is no white space between the start and the end of the x-axis.
Here is the code:
import csv
from matplotlib import pyplot as plt
from datetime import datetime
# Get dates and high temperatures from file.
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
#for index, column_header in enumerate(header_row):
#print(index, column_header)
dates, highs = [], []
for row in reader:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
dates.append(current_date)
high = int(row[1])
highs.append(high)
# Plot data.
fig = plt.figure(dpi=128, figsize=(10,6))
plt.plot(dates, highs, c='red')
# Format plot.
plt.title("Daily high temperatures, July 2014", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature (F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
There is an automatic margin set at the edges, which ensures the data to be nicely fitting within the axis spines. In this case such a margin is probably desired on the y axis. By default it is set to 0.05 in units of axis span.
To set the margin to 0 on the x axis, use
plt.margins(x=0)
or
ax.margins(x=0)
depending on the context. Also see the documentation.
In case you want to get rid of the margin in the whole script, you can use
plt.rcParams['axes.xmargin'] = 0
at the beginning of your script (same for y of course). If you want to get rid of the margin entirely and forever, you might want to change the according line in the matplotlib rc file:
axes.xmargin : 0
axes.ymargin : 0
Example
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
tips.plot(ax=ax1, title='Default Margin')
tips.plot(ax=ax2, title='Margins: x=0')
ax2.margins(x=0)
Alternatively, use plt.xlim(..) or ax.set_xlim(..) to manually set the limits of the axes such that there is no white space left.
If you only want to remove the margin on one side but not the other, e.g. remove the margin from the right but not from the left, you can use set_xlim() on a matplotlib axes object.
import seaborn as sns
import matplotlib.pyplot as plt
import math
max_x_value = 100
x_values = [i for i in range (1, max_x_value + 1)]
y_values = [math.log(i) for i in x_values]
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
sn.lineplot(ax=ax1, x=x_values, y=y_values)
sn.lineplot(ax=ax2, x=x_values, y=y_values)
ax2.set_xlim(-5, max_x_value) # tune the -5 to your needs

I am trying to plot the individual data points along with box plots from two different data frames based on a condition using seaborn

I am using seaborn in Jupyterlab to plot my data. Here is the code snippet for plotting the graph where I have separated data based on the presence/absence of PMMA shown by PMMA=1, PMMA=0 respectively. However, the strip plot on PMMA=1 for 17 and 20 on the x-axis is plotting the individual data points from PMMA=0 and the strip plot for PMMA=0 is not showing for the rest of the data. How can I fix this issue? Also, the legend is not showing the tag as "Day#"
both = pd.concat((df1, df2))
grped_bplot = sns.catplot(x='Passage#',
y='Dendrite Length (um)',
hue="Day#",
col="PMMA",
kind="box",
legend=False,
height=6,
aspect=1.3,
palette="Set2",
data=both);
grped_bplot = sns.stripplot(x='Passage#',
y='Dendrite Length (um)',
hue='Day#',
jitter=True,
dodge=True,
marker='o',
palette="Set2",
alpha=0.5,
data=both)
handles, labels = grped_bplot.get_legend_handles_labels()
l = plt.legend(handles[0:3], labels[0:3])
Boxplot with overlapping strip plot
sns.catplot returns a FacetGrid. You can call .map_dataframe(sns.stripplot) to create strip plots for the same data.
Here is some example code starting from generated test data:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# first, create some test data
both = pd.DataFrame({'Passage#': np.random.choice([6, 17, 18, 19, 20], 500),
'Dendrite Length (um)': np.random.uniform(1, 17, 500) ** 3,
'Day#': np.random.choice([3, 4, 7], 500),
'PMMA': np.random.randint(0, 2, 500)})
g = sns.catplot(x='Passage#',
y='Dendrite Length (um)',
hue="Day#",
col="PMMA",
kind="box",
legend=False,
height=6,
aspect=1.3,
palette="Set2",
boxprops={'alpha': 0.4},
data=both)
g.map_dataframe(sns.stripplot,
x='Passage#',
y='Dendrite Length (um)',
hue='Day#',
jitter=True,
dodge=True,
marker='o',
palette="Set2",
alpha=0.5)
g.add_legend(title='Day#')
plt.show()
PS: To have the boxes in the legend instead of the dots, you can call g.add_legend() after the catplot but before calling g.map_dataframe.

How to change seaborn/python default x label position, 0 and 0 in exact bottom corner [duplicate]

So currently learning how to import data and work with it in matplotlib and I am having trouble even tho I have the exact code from the book.
This is what the plot looks like, but my question is how can I get it where there is no white space between the start and the end of the x-axis.
Here is the code:
import csv
from matplotlib import pyplot as plt
from datetime import datetime
# Get dates and high temperatures from file.
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
#for index, column_header in enumerate(header_row):
#print(index, column_header)
dates, highs = [], []
for row in reader:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
dates.append(current_date)
high = int(row[1])
highs.append(high)
# Plot data.
fig = plt.figure(dpi=128, figsize=(10,6))
plt.plot(dates, highs, c='red')
# Format plot.
plt.title("Daily high temperatures, July 2014", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature (F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
There is an automatic margin set at the edges, which ensures the data to be nicely fitting within the axis spines. In this case such a margin is probably desired on the y axis. By default it is set to 0.05 in units of axis span.
To set the margin to 0 on the x axis, use
plt.margins(x=0)
or
ax.margins(x=0)
depending on the context. Also see the documentation.
In case you want to get rid of the margin in the whole script, you can use
plt.rcParams['axes.xmargin'] = 0
at the beginning of your script (same for y of course). If you want to get rid of the margin entirely and forever, you might want to change the according line in the matplotlib rc file:
axes.xmargin : 0
axes.ymargin : 0
Example
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
tips.plot(ax=ax1, title='Default Margin')
tips.plot(ax=ax2, title='Margins: x=0')
ax2.margins(x=0)
Alternatively, use plt.xlim(..) or ax.set_xlim(..) to manually set the limits of the axes such that there is no white space left.
If you only want to remove the margin on one side but not the other, e.g. remove the margin from the right but not from the left, you can use set_xlim() on a matplotlib axes object.
import seaborn as sns
import matplotlib.pyplot as plt
import math
max_x_value = 100
x_values = [i for i in range (1, max_x_value + 1)]
y_values = [math.log(i) for i in x_values]
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
sn.lineplot(ax=ax1, x=x_values, y=y_values)
sn.lineplot(ax=ax2, x=x_values, y=y_values)
ax2.set_xlim(-5, max_x_value) # tune the -5 to your needs

Plotting a vertical line for df.plot.bar works, but it doesn't for a lineplot

I followed all step following my question here : Pandas Dataframe : How to add a vertical line with label to a bar plot when your data is time-series?
it was supposed to solve my problem but when I change the The kind of plot to line , the vertical line did not appear . I copy the same code and change plot type to line instead of bar :
as you can see with bar , the vertical line (in red ) appears .
# function to plot a bar
def dessine_line3(madataframe,debut_date , mes_colonnes):
madataframe.index = pd.to_datetime(madataframe.index,format='%m/%d/%y')
df = madataframe.loc[debut_date:,mes_colonnes].copy()
filt = (df[df.index == '4/20/20']).index
df.index.searchsorted(value=filt)
fig,ax = plt.subplots()
df.plot.bar(figsize=(17,8),grid=True,ax=ax)
ax.axvline(df.index.searchsorted(filt), color="red", linestyle="--", lw=2, label="lancement")
plt.tight_layout()
out :
but whan I just change code by changing the type of plot to line : there is no vertical line and also the x axis (date ) changed .
so I wrote another code juste to draw line with vertical line
ax = madagascar_maurice_case_df[["Madagascar Covid-19 Ratio","Maurice Covid-19 Ratio"]].loc['3/17/20':].plot.line(figsize=(17,7),grid=True)
filt = (df[df.index=='4/20/20']).index
ax.axvline(df.index.searchsorted(filt),color="red",linestyle="--",lw=2 ,label="lancement")
plt.show()
but the result is the same
following the comment below , here is my final code :
def dessine_line5(madataframe,debut_date , mes_colonnes):
plt.figure(figsize=(17,8))
plt.grid(b=True,which='major',axis='y')
df = madataframe.loc[debut_date:,mes_colonnes]
sns.lineplot(data=df)
lt = datetime.toordinal(pd.to_datetime('4/20/20'))
plt.axvline(lt,color="red",linestyle="--",lw=2,label="lancement")
plt.show()
and the result is :
Plot tick locs
The issue is the plot tick locations are a different style depending on plot kind and api
df.plot vs. plt.plot vs. sns.lineplot
Place ticks, labels = plt.xticks() after df.plot.bar(figsize=(17,8),grid=True,ax=ax) and printing ticks will give array([0, 1, 2,..., len(df.index)]), which is why df.index.searchsorted(filt) works, it produces an integer location.
df.plot() has tick locs like array([13136, 13152, 13174, 13175], dtype=int64), for my sample date range. I don't actually know how those numbers are derived, so I don't know how to convert the date to that format.
sns.lineplot and plt.plot have tick locs that are the ordinal representation of the datetime, array([737553., 737560., 737567., 737577., 737584., 737591., 737598.,
737607.]
For a lineplot with your example:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
sns.lineplot(data=df)
lt = datetime.toordinal(pd.to_datetime('2020/04/20'))
plt.axvline(lt, color="red", linestyle="--", lw=2, label="lancement")
plt.show()
For my example data:
import numpy as np
data = {'a': [np.random.randint(10) for _ in range(40)],
'b': [np.random.randint(10) for _ in range(40)],
'date': pd.bdate_range(datetime.today(), periods=40).tolist()}
df = pd.DataFrame(data)
df.set_index('date', inplace=True)
sns.lineplot(data=df)
ticks, labels = plt.xticks()
lt = datetime.toordinal(pd.to_datetime('2020-05-19'))
plt.axvline(lt, color="red", linestyle="--", lw=2, label="lancement")
plt.show()

Matplotlib Different Scaled Y-Axes

I have a dataframe with the data below.
ex_dict = {'revenue': [613663, 1693667, 2145183, 2045065, 2036406,
1708862, 1068232, 1196899, 2185852, 2165778, 2144738, 2030337,
1784067],
'abs_percent_diff': [0.22279211315310588, 0.13248909660765254,
0.12044821447874667, 0.09438674840975962, 0.1193588387687364,
0.062100921139322744, 0.05875297161175445, 0.06240362963749895,
0.05085338590212515, 0.034877614941165744, 0.012263947005671703,
0.029227374323993634, 0.023411816504907524],
'ds': [dt.date(2017,1,1), dt.date(2017,1,2), dt.date(2017,1,3),
dt.date(2017,1,4), dt.date(2017,1,5), dt.date(2017,1,6),
dt.date(2017,1,7), dt.date(2017,1,8), dt.date(2017,1,9),
dt.date(2017,1,10), dt.date(2017,1,11), dt.date(2017,1,12),
dt.date(2017,1,13)],
'yhat_normal': [501853.9074623253, 1952329.3521464923, 1914575.7673396615,
1868685.8215084015, 1819261.1068672044, 1608945.031482406,
1008953.0123101478, 1126595.36037955, 2302965.598289115,
2244044.9351591542, 2171367.536396199, 2091465.0313570146,
1826836.562382966]}
df_vis=pd.DataFrame.from_dict(ex_dict)
I want to graph yhat_normal and revenue on the same y-axis and abs_percent_diff on a y-axis with a different scale.
df_vis = df_vis.set_index('ds')
df_vis[['rev', 'yhat_normal']].plot(figsize=(20, 12))
I can easily graph rev and yhat_normal with the code above, but I am struggling to get abs_percent_diff on a different y-axis scale. I tried converting my columns to numpy arrays and doing this, but it looks terrible.
npdate = df_vis.as_matrix(columns= ['ds'])
nppredictions = df_vis.as_matrix(columns= ['yhat_normal'])
npactuals = df_vis.as_matrix(columns= ['rev'])
npmape = df_vis.as_matrix(columns=['abs_percent_diff'])
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
fig.set_size_inches(20,10)
ax1.plot_date(npdate, nppredictions, ls= '-', color= 'b')
ax1.plot_date(npdate, npactuals, ls='-', color='g')
ax2.plot_date(npdate, npmape, 'r-')
ax1.set_xlabel('X data')
ax1.set_ylabel('Y1 data', color='g')
ax2.set_ylabel('Y2 data', color='b')
plt.show()
This is what I want. Where the red line is the abs_percent_diff. Obviously, I drew the line by hand so it is not accurate.
I'm not sure if I got the problem correclty, but it seems you simply want to draw one of the dataframe columns at the bottom of the plot area.
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
ex_dict = {'revenue': [613663, 1693667, 2145183, 2045065, 2036406,
1708862, 1068232, 1196899, 2185852, 2165778, 2144738, 2030337,
1784067],
'abs_percent_diff': [0.22279211315310588, 0.13248909660765254,
0.12044821447874667, 0.09438674840975962, 0.1193588387687364,
0.062100921139322744, 0.05875297161175445, 0.06240362963749895,
0.05085338590212515, 0.034877614941165744, 0.012263947005671703,
0.029227374323993634, 0.023411816504907524],
'ds': [dt.date(2017,1,1), dt.date(2017,1,2), dt.date(2017,1,3),
dt.date(2017,1,4), dt.date(2017,1,5), dt.date(2017,1,6),
dt.date(2017,1,7), dt.date(2017,1,8), dt.date(2017,1,9),
dt.date(2017,1,10), dt.date(2017,1,11), dt.date(2017,1,12),
dt.date(2017,1,13)],
'yhat_normal': [501853.9074623253, 1952329.3521464923, 1914575.7673396615,
1868685.8215084015, 1819261.1068672044, 1608945.031482406,
1008953.0123101478, 1126595.36037955, 2302965.598289115,
2244044.9351591542, 2171367.536396199, 2091465.0313570146,
1826836.562382966]}
df_vis=pd.DataFrame.from_dict(ex_dict)
df_vis = df_vis.set_index('ds')
ax = df_vis[['revenue','yhat_normal']].plot(figsize=(13, 8))
ax2 = df_vis['abs_percent_diff'].plot(secondary_y=True, ax=ax)
ax2.set_ylim(0,1)
plt.show()

Categories

Resources