I have a code from a dataframe
Y = df['label']
for col in categorical_cols:
tab = pd.crosstab(df[col],Y)
annot = x.div(x.sum(axis=1).astype('float64'),axis=0)
annot.plot(kind='bar',stacked=True)
plt.title('Distribution of %s'%col)
plt.xlabel('%s'%col,size='x-large')
plt.xticks(rotation=45)
plt.legend()
How can I plot these using different subplots in a single figure because this loops prints the last column's figure. So all figures are same.
Also: How can I produce the same using matplotlib/seaborn using matplotlib which shows me the % or absolute values.
You need to create the different subplots and then pass one axes object to each call of annot.plot via the ax keyword, something like this:
import math
import matplotlib.pyplot as plt
n = len(categorical_cols)
nrows = math.ceil(float(n) / 3.0)
fig, ax = plt.subplots(ncols=3, nrows=nrows, figsize=(9, nrows*3))
ax = ax.flatten()
Y = df['label']
for idx, col in enumerate(categorical_cols):
tab = pd.crosstab(df[col],Y)
annot = x.div(x.sum(axis=1).astype('float64'),axis=0)
annot.plot(kind='bar',stacked=True, ax=ax[idx])
ax[idx].title('Distribution of %s'%col)
ax[idx].set_xlabel('%s'%col,size='x-large')
ax.tick_params('x', labelrotation=45)
plt.legend()
Related
I'm currently trying to change the secondary y-axis values in a matplot graph to ymin = -1 and ymax = 2. I can't find anything on how to change the values though. I am using the secondary_y = True argument in .plot(), so I am not sure if changing the secondary y-axis values is possible for this. I've included my current code for creating the plot.
df.plot()
df.plot(secondary_y = "Market")
From your example code, it seems you're using Pandas built in ploting capabilities. One option to add a second layer is by using matplotlib directly like in the example "two_scales.py".
It uses
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
ax1.plot(df["..."])
# ...
ax2 = ax1.twinx()
ax2.plot(df["Market"])
ax2.set_ylim([0, 5])
where you can change the y-limits.
Setting ylim on plot does not appear to work in the case of secondary_y, but I was able to workaround with this:
import pandas as pd
df = pd.DataFrame({'one': range(10), 'two': range(10, 20)})
ax = df['one'].plot()
ax2 = df['two'].plot(secondary_y=True)
ax2.set_ylim(-20, 50)
fig = ax.get_figure()
fig.savefig('test.png')
This is a solution for showing as much y-axes as data columns the dataframe has
colors = ['tab:blue',
'tab:orange',
'tab:green',
'tab:red',
'tab:purple',
'tab:brown',
'tab:pink',
'tab:gray',
'tab:olive',
'tab:cyan']
#X axe and first Y axe
fig, ax1 = plt.subplots()
x_label = str( dataFrame.columns[0] )
index = dataFrame[x_label]
ax1.set_xlabel(x_label)
ax1.set_xticklabels(dataFrame[x_label], rotation=45, ha="right")
firstYLabel = str( dataFrame.columns[1] )
ax1.set_ylabel(firstYLabel, color = colors[0])
ax1.plot(index, dataFrame[firstYLabel], color = colors[0])
ax1.tick_params(axis='y', labelcolor = colors[0])
#Creates subplots with independet y-Axes
axS =[]
def newTwix(label, ax1, index, dataFrame):
print(label)
actualPos = len(axS)
axS.append(ax1.twinx())
axS[actualPos].set_ylabel(label, color = colors[actualPos%10 + 1])
axS[actualPos].plot(index, dataFrame[label], color=colors[actualPos%10 + 1])
axS[actualPos].tick_params(axis='y', labelcolor=colors[actualPos%10 + 1])
identation = 0.075 #would improve with a dynamic solution
p = 1 + identation
for i in range(2,len(dataFrame.columns)):
newTwix(str(dataFrame.columns[i]), ax1, index, dataFrame)
if (len(axS) == 1):
axS[len(axS)-1].spines.right.set_position(("axes", p))
else:
p = int((p + identation)*1000)/1000
axS[len(axS)-1].spines.right.set_position(("axes", p))
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.subplots_adjust(left=0.04, right=0.674, bottom=0.1)
mng = plt.get_current_fig_manager()
mng.full_screen_toggle()
plt.show()
multiple y-axes with independent scales
Here is the code for creating subplots and the graphs look like what's shown in the attached image. Is there a way to adjust the graphs so they look good and identical in size?
import matplotlib.pyplot as plt
df = pd.read_csv(path)
merged = df[df["test"].isin(['viral_load'])]
g = merged.groupby('id')
fig, axes = plt.subplots(g.ngroups, sharex=True, figsize=(8, 6))
for i, (id, d) in enumerate(g):
ax = d.plot.line(x='months', y='result', ax=axes[i], title=id)
ax.legend().remove()
fig.tight_layout()
Here you can see the result of the above code.
I have a pandas data frame, region, containing the prices of flats (Flat) and detached properties (Detached) in areas of the UK over time (the column Month). I'm trying to obtain plots of the change in price over time of both flats and detached properties, so that the plots have two different y axes - both of the average price but in different scales.
I've achieved this by using twinx(), however using the code below I get obviously get two figures. The first of these figures is exactly what I want, but I then get a second figure of blank plots. I have attached a screenshot of the kind of plot I want below.
When removing the second fig line fig, ax2 = ..., I get the error NameError: name 'ax2' is not defined. Also bringing the line ax2 = ax.twinx() outside of the loop gives the error AttributeError: 'numpy.ndarray' object has no attribute 'twinx'. I can't seem to figure out how to get this plot to work without having the duplicate blank figure, any help is much appreciated.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
region_list = sorted(region['Area'].unique().tolist())
fig, ax = plt.subplots(nrows=len(region_list), figsize=(13.8,len(region_list)*7))
fig, ax2 = plt.subplots(nrows=len(region_list), figsize=(13.8,len(region_list)*7))
for i in region_list:
ind = region_list.index(i)
filt = region['Area'] == i
ax2[ind] = ax[ind].twinx()
ax[ind].plot(region.loc[filt]['Month'],region.loc[filt]['Flat'], color='red', marker='o')
ax[ind].set_ylabel('Average price of flats', color='red', fontsize=14)
ax2[ind].plot(region.loc[filt]['Month'],region.loc[filt]['Detached'],color='blue',marker='o')
ax2[ind].set_ylabel('Average price of detached properties',color='blue',fontsize=14)
ax[ind].set_title(i, size=14)
ax[ind].xaxis.set_tick_params(labelsize=10)
ax[ind].yaxis.set_tick_params(labelsize=10)
plt.tight_layout()
When creating a secondary axis for a subplot, the result is a new object, and can't be referenced using array indices like the subplot axes (unless you specifically add the new twin axes to an array).
You've probably seen the following:
# with one axis
fig, ax = plt.subplots()
ax2 = ax.twinx()
ax2.plot(...)
But with multiple subplots, the same logic applies:
# with one axis
fig, axes = plt.subplots(1, 2)
ax2 = axes[0].twinx()
ax2.plot(...) # secondary axis on subplot 0
ax2 = axes[1].twinx()
ax2.plot(...) # secondary axis on subplot 1
In your case:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
region_list = sorted(region['Area'].unique().tolist())
fig, ax = plt.subplots(nrows=len(region_list), figsize=(13.8,len(region_list)*7))
# don't add a second plot - this would be blank
# fig, ax2 = plt.subplots(nrows=len(region_list), figsize=(13.8,len(region_list)*7))
for i in region_list:
ind = region_list.index(i)
filt = region['Area'] == i
# don't index into ax2
# ax2[ind] = ax[ind].twinx()
# instead, create a local variable ax2 which is the secondary axis
# on the subplot ax[ind]
ax2 = ax[ind].twinx()
ax[ind].plot(region.loc[filt]['Month'],region.loc[filt]['Flat'], color='red', marker='o')
ax[ind].set_ylabel('Average price of flats', color='red', fontsize=14)
ax2.plot(region.loc[filt]['Month'],region.loc[filt]['Detached'],color='blue',marker='o')
ax2.set_ylabel('Average price of detached properties',color='blue',fontsize=14)
ax[ind].set_title(i, size=14)
ax[ind].xaxis.set_tick_params(labelsize=10)
ax[ind].yaxis.set_tick_params(labelsize=10)
plt.tight_layout()
I'm plotting a CSV file from my simulation results. The plot has three graphs in the same figure fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(24, 6)).
However, for comparison purposes I want the y-axis in all graphs starting at zero and the ending at a specific value. I tried the solution mentioned here from the Seaborn author. I don't get any errors, but the solution also does not work for me.
Here's my script:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
fname = 'results/filename.csv'
def plot_file():
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(24, 6))
df = pd.read_csv(fname, sep='\t')
profits = \
df.groupby(['providerId', 'periods'], as_index=False)['profits'].sum()
# y-axis needs to start at zero and end at 10
g = sns.lineplot(x='periods',
y='profits',
data=profits,
hue='providerId',
legend='full',
ax=axes[0])
# y-axis need to start at zero and end at one
g = sns.scatterplot(x='periods',
y='price',
hue='providerId',
style='providerId',
data=df,
legend=False,
ax=axes[1])
# y-axis need to start at zero and end at one
g = sns.scatterplot(x='periods',
y='quality',
hue='providerId',
style='providerId',
data=df,
legend=False,
ax=axes[2])
g.set(ylim=(0, None))
plt.show()
print(g) # -> AxesSubplot(0.672059,0.11;0.227941x0.77)
The resulting figure is as follows:
How can I adjust each individual plot?
Based on the way you've written your code, you can refer to each subplot axis with g.axis and use g.axis.set_ylim(low,high). (A difference compared to the linked answer is that your graphs are not being plotted on a seaborn FacetGrid.)
An example using dummy data and different axis ranges to illustrate:
df = pd.DataFrame(np.random.uniform(0,10,(100,2)), columns=['a','b'])
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(8,4))
g = sns.lineplot(x='a',
y='b',
data=df.sample(10),
ax=axes[0])
g.axes.set_ylim(0,25)
g = sns.scatterplot(x='a',
y='b',
data=df.sample(10),
ax=axes[1])
g.axes.set_ylim(0,3.5)
g = sns.scatterplot(x='a',
y='b',
data=df.sample(10),
ax=axes[2])
g.axes.set_ylim(0,0.3)
plt.tight_layout()
plt.show()
I'm currently trying to change the secondary y-axis values in a matplot graph to ymin = -1 and ymax = 2. I can't find anything on how to change the values though. I am using the secondary_y = True argument in .plot(), so I am not sure if changing the secondary y-axis values is possible for this. I've included my current code for creating the plot.
df.plot()
df.plot(secondary_y = "Market")
From your example code, it seems you're using Pandas built in ploting capabilities. One option to add a second layer is by using matplotlib directly like in the example "two_scales.py".
It uses
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
ax1.plot(df["..."])
# ...
ax2 = ax1.twinx()
ax2.plot(df["Market"])
ax2.set_ylim([0, 5])
where you can change the y-limits.
Setting ylim on plot does not appear to work in the case of secondary_y, but I was able to workaround with this:
import pandas as pd
df = pd.DataFrame({'one': range(10), 'two': range(10, 20)})
ax = df['one'].plot()
ax2 = df['two'].plot(secondary_y=True)
ax2.set_ylim(-20, 50)
fig = ax.get_figure()
fig.savefig('test.png')
This is a solution for showing as much y-axes as data columns the dataframe has
colors = ['tab:blue',
'tab:orange',
'tab:green',
'tab:red',
'tab:purple',
'tab:brown',
'tab:pink',
'tab:gray',
'tab:olive',
'tab:cyan']
#X axe and first Y axe
fig, ax1 = plt.subplots()
x_label = str( dataFrame.columns[0] )
index = dataFrame[x_label]
ax1.set_xlabel(x_label)
ax1.set_xticklabels(dataFrame[x_label], rotation=45, ha="right")
firstYLabel = str( dataFrame.columns[1] )
ax1.set_ylabel(firstYLabel, color = colors[0])
ax1.plot(index, dataFrame[firstYLabel], color = colors[0])
ax1.tick_params(axis='y', labelcolor = colors[0])
#Creates subplots with independet y-Axes
axS =[]
def newTwix(label, ax1, index, dataFrame):
print(label)
actualPos = len(axS)
axS.append(ax1.twinx())
axS[actualPos].set_ylabel(label, color = colors[actualPos%10 + 1])
axS[actualPos].plot(index, dataFrame[label], color=colors[actualPos%10 + 1])
axS[actualPos].tick_params(axis='y', labelcolor=colors[actualPos%10 + 1])
identation = 0.075 #would improve with a dynamic solution
p = 1 + identation
for i in range(2,len(dataFrame.columns)):
newTwix(str(dataFrame.columns[i]), ax1, index, dataFrame)
if (len(axS) == 1):
axS[len(axS)-1].spines.right.set_position(("axes", p))
else:
p = int((p + identation)*1000)/1000
axS[len(axS)-1].spines.right.set_position(("axes", p))
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.subplots_adjust(left=0.04, right=0.674, bottom=0.1)
mng = plt.get_current_fig_manager()
mng.full_screen_toggle()
plt.show()
multiple y-axes with independent scales