The following plots a stacked bar chart separated into 4 subplots. The four subplots are called from Area. The values are called from Result. This column contains 0's and 1's. I want to plot the total count of these values for each different combination in Group.
This works fine but I'm hoping to use the secondary axis to show the normalised values as a line plot. Specifically, the percentage of 1's compared to 0's. At the moment, I just have to total count of 0's and 1's as a bar chart. I'm hoping to plot the percentage of 1's using the secondary y-axis.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({
'Result' :[0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1],
'Group' :[-2,-1,1,0,0,-1,-1,0,1,-1,0,1,-1,1,0,1],
'Area' :['North','East','South','West','North','East','South','West','North','East','South','West','North','East','South','West'],
})
total = df['Result'].sum()
def custom_stacked_barplot(t, sub_df, ax):
plot_df = pd.crosstab(index = sub_df['Group'],
columns = sub_df['Result'],
values = sub_df['Result'],
aggfunc = ['count',(lambda x: sum(x)/total*100)],
)
p = plot_df.plot(kind = "bar", y = 'count',stacked = True, ax = ax, rot = 0, width = 0.6, legend = False)
ax2=ax.twinx()
#plot norm line
#r = plot_df.plot(y = '<lambda>', ax = ax2, legend = False, zorder = 2, color = 'black')
return p
g_dfs = df.groupby(['Area'])
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8,12))
for ax, (i,g) in zip(axes.ravel(), sorted(g_dfs)):
custom_stacked_barplot(i, g, ax)
plt.legend(bbox_to_anchor=(1.129, 2.56))
plt.show()
intended df output to plot:
count perc
Result 0 1 0
Group
-1 1.0 2.0 0.66
1 0.0 1.0 1.0
count perc
Result 0 1 0
Group
-2 1.0 0.0 0.0
-1 0.0 1.0 1.0
0 1.0 0.0 0.0
1 0.0 1.0 1.0
count perc
Result 0 1 0
Group
-1 0.0 1.0 1.0
0 1.0 1.0 0.5
1 0.0 1.0 1.0
count perc
Result 0 1 0
Group
0 1.0 1.0 0.5
1 0.0 2.0 1.0
try using twinx()
import matplotlib.pyplot as plt
df = pd.DataFrame({
'Result' :[0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1],
'Group' :[-2,-1,1,0,0,-1,-1,0,1,-1,0,1,-1,1,0,1],
'Area' :['North','East','South','West','North','East','South','West','North','East','South','West','North','East','South','West'],
})
total = df['Result'].sum()
def custom_stacked_barplot(t, sub_df, ax):
plot_df = pd.crosstab(index = sub_df['Group'],
columns=sub_df['Result'],
values=sub_df['Result'],
aggfunc = ['count',(lambda x: sum(x)/total*100)])
print(plot_df)
p = plot_df.plot(kind="bar",y='count',stacked=True, ax = ax, rot = 0, width = 0.6, legend = False)
ax2=ax.twinx()
r = plot_df.plot(kind="bar",y='<lambda>', stacked=True, ax = ax2, rot = 0, width = 0.6, legend = False)
return p,r
g_dfs = df.groupby(['Area'])
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8,12))
for ax, (i,g) in zip(axes.ravel(), sorted(g_dfs)):
custom_stacked_barplot(i, g, ax)
plt.legend(bbox_to_anchor=(1.129, 2.56))
plt.show()
# save the plot as a file
fig.savefig('two_different_y_axis_for_single_python_plot_with_twinx.jpg',
format='jpeg',
dpi=100,
bbox_inches='tight')
plt.show()
The output looks something like :
Ok, so I gave this a try, too:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.DataFrame({
'Result' :[0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1],
'Group' :[-2,-1,1,0,0,-1,-1,0,1,-1,0,1,-1,1,0,1],
'Area' :['North','East','South','West','North','East','South','West','North','East','South','West','North','East','South','West'],
})
## iterate over unique areas
unique_areas = df['Area'].unique()
fig, axes = plt.subplots(nrows=len(unique_areas), ncols=1, figsize=(8,12))
twin_axes=[]
for i,key in enumerate(unique_areas):
# print(f"== {key} ==") #<- uncomment this line to debug
## first, filter the df by 'Area'
area_df = df[(df['Area']==key)]
## and do the crosstab:
ct_df = pd.crosstab(index=area_df['Group'],
columns=area_df['Result'],
)
## to add the 'count' label you wanted to the dataframe multiindex:
ct_df = pd.concat({'count': ct_df}, names=['type'],axis=1)
## now iterate over the unique 'Groups' in the index ...
for ix in ct_df.index:
sub_df = ct_df.loc[ix,'count']
## ... and calculate the contribution of each Result
# which is equal to '1' (ct_df.loc[ix,1])
# in the total for this group (ct_df.loc[ix].sum())
ct_df.loc[ix,'perc'] = sub_df.loc[1]/sub_df.sum()
# print(ct_df) #<- uncomment this line to debug
## add your stacked bar plot
bar = ct_df.plot(kind = "bar", y = 'count',stacked = True, ax = axes[i], rot = 0, width = 0.6, legend = False)
## keep the twin_axes in a separate list
twin_axes.append(axes[i].twinx())
## generate the "correct" x values that match the bar plot locations
# (i.e. use [0,1,2,3] instead of [-2,-1,0,1] )
xs=np.arange(0,len(ct_df),1)
## and plot the percentages as a function this new x range as a black line:
twin_axes[i].plot(xs,ct_df['perc'],zorder=2,color='black')
## optional:
# using these 'xs' you could also e.g. add some labels for the contained groups:
for x in xs:
twin_axes[i].text(x,1.15,ct_df.index[x],color="b")
# make some nice changes to the formatting of the plots
for a in [twin_axes]:
# a[i].set_xlim(-1,4)
a[i].set_ylim(0,1.1)
plt.show()
Mainly, instead of trying to use the pd.crosstab to do everything, I'd suggest to do some quick and easy for loops over the unique areas, in order to get the df structure you want.
Each group-dependent dataframe now looks like what you wanted:
type count perc
Result 0 1
Group
-2 1 0 0.0
-1 0 1 1.0
0 1 0 0.0
1 0 1 1.0
type count perc
Result 0 1
Group
-1 1 2 0.666667
1 0 1 1.000000
type count perc
Result 0 1
Group
-1 0 1 1.0
0 1 1 0.5
1 0 1 1.0
type count perc
Result 0 1
Group
0 1 1 0.5
1 0 2 1.0
And the plot now looks like this:
Edit:
def create_plot(ax, x, y1, y2, y3):
ax1 = ax
ax2 = ax1.twinx()
ax1.bar(x, y1)
ax1.bar(x, y2, bottom=y1)
ax2.plot(x, y3, c="C3")
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8,12))
for ax in axes:
create_plot(ax, (1,2,3,4), (1,2,3,4), (7,5,3,1), (1,4,2,3))
plt.show()
(Old post below)
Does something like
def create_plot(x, y1, y2, y3):
fig = plt.figure()
ax1 = fig.gca()
ax2 = ax1.twinx()
ax1.bar(x, y1)
ax1.bar(x, y2, bottom=y1)
ax2.plot(x, y3, c="C3")
return fig
fig = create_plot((1,2,3,4), (1,2,3,4), (7,5,3,1), (1,4,2,3))
plt.show()
meet what you need? This gives me:
I have two DataFrames
df1=
x y1
0 0 0
1 1 1
2 2 2
3 4 3
df2=
x y2
0 0.0 0
1 0.5 1
2 1.5 2
3 3.0 3
4 4.0 4
I need to calculate y2-y1 (for the same x value)
(in order to see the difference between 2 graphs)
As you can see, some values are in common between them... some are not
I think I will need to resample my data... but I don't know how !
I need to align data in order to have same 'x' column for the 2 dataframes.
between 2 points a linear interpolation should be done to get y value at a given x.
In this case resampling data with a x_step=0.5 will be good
I did this...
import pandas as pd
import matplotlib.pylab as plt
df1 = pd.DataFrame([[0.0,0.0],[1.0,1.0],[2.0,2.0],[4.0,3.0]],columns=['x','y1'])
df2 = pd.DataFrame([[0.0,0.0],[0.5,1.9],[1.5,2.0],[3.0,3.0],[4.0,4.0]],columns=['x','y2'])
print(df1)
print("="*10)
print(df1['x'])
print("="*10)
print(df1['y1'])
print("="*10)
fig = plt.figure()
fig.subplots_adjust(bottom=0.1)
ax = fig.add_subplot(111)
plt.title("{y} = f({x})".format(x='x', y='y'))
p1, = plt.plot(df1['x'], df1['y1'], color='b', marker='.')
p2, = plt.plot(df2['x'], df2['y2'], color='r', marker='.')
plt.legend([p1, p2], ["y1", "y2"])
plt.show()
import pandas as pd
import pylab as pl
df1 = pd.DataFrame([[0.0,0.0],[1.0,1.0],[2.0,2.0],[4.0,3.0]],columns=['x','y1'])
df2 = pd.DataFrame([[0.0,0.0],[0.5,1.9],[1.5,2.0],[3.0,3.0],[4.0,4.0]],columns=['x','y2'])
x = np.union1d(df1.x, df2.x)
y1 = np.interp(x, df1.x, df1.y1)
y2 = np.interp(x, df2.x, df2.y2)
pl.plot(x, y1, "-o")
pl.plot(x, y2, "-o")