Related to multiple swamplots inside a figure Pandas - python

This question is related to group multiple plot in one figure python, "individual 28 plots".
This is my code:
for column in df.columns[1:]:
sns.set()
fig, ax = plt.subplots(nrows=3, ncols=3) # tried 9 plots in one figure
sns.set(style="whitegrid")
sns.swarmplot(x='GF', y=column, data=df,order=["WT", 'Eulomin']) # Choose column
sns.despine(offset=10, trim=True) #?
plt.savefig('{}.png'.format(column), bbox_inches='tight') # filename
plt.show()
I have more than 100 columns and it saves every file individually and just prints empty plots beside the normal one . How do I save 9 plots in one figure, till it reachs the moment he'll have 5 left (which will have to be in one figure either)?

Instead of iterating through columns, iterate through multiples of 9 with range to index the data frame by column number while placing each swarmplot into the ax array you define:
from itertools import product
...
sns.set(style="whitegrid")
for i in range(1, 100, 9): # ITERATE WITH STEPS
col = i
fig, ax = plt.subplots(nrows=3, ncols=3, figsize = (12,6))
# TRAVERSE 3 X 3 MATRIX
for r, c in product(range(3), range(3)):
if col in range(len(df.columns)): # CHECK IF COLUMN EXISTS
# USE ax ARGUMENT WITH MATRIX INDEX
sns.swarmplot(x='GF', y=df[df.columns[col]], data=df, ax=ax[r,c],
order=["WT", 'Eulomin'])
sns.despine(offset=10, trim=True)
col += 1
plt.tight_layout()
plt.savefig('SwarmPlots_{0}-{1}.png'.format(i,i+8), bbox_inches='tight')
To demonstrate with random, seeded data of 100 columns by 500 rows for reproducibility:
Data
import numpy as np
import pandas as pd
np.random.seed(362020)
cols = ['Col'+str(i) for i in range(1,100)]
df = (pd.DataFrame([np.random.randn(99) for n in range(500)])
.assign(GF = np.random.choice(['r', 'python', 'julia'], 500))
.set_axis(cols + ['GF'], axis='columns', inplace = False)
.reindex(['GF'] + cols, axis='columns')
)
df.shape
# (500, 100)
Plot
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import product
sns.set(style="whitegrid")
for i in range(1, 100, 9):
col = i
fig, ax = plt.subplots(nrows=3, ncols=3, figsize = (12,6))
for r, c in product(range(3), range(3)):
if col in range(len(df.columns)):
sns.swarmplot(x='GF', y=df[df.columns[col]], data=df, ax=ax[r,c])
col += 1
plt.tight_layout()
plt.savefig('SwarmPlots_{0}-{1}.png'.format(i,i+8), bbox_inches='tight')
plt.show()
plt.clf()
plt.close()
Output (first plot)

Related

Is it possible to reduce the width of a single subplot in gridspec/Matplotlib?

I have a grid of subplots created using gridspec. I know how to create subplots that span rows and columns in gridspec. Is it possible to reduce the width of a single sub-plot just by a small amount? For example, can we set the width ratio for a single subplot? The way I want it is marked in red in the image.
My code looks like this:
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(6, 4))
gs = gridspec.GridSpec(3, 5, height_ratios=[0.5,1,1])
for i in range(1, 3):
for j in range(5):
ax = plt.subplot(gs[i, j])
ax1 = plt.subplot(gs[0,1:2])
ax2 = plt.subplot(gs[0,2:])
for ax in [ax1, ax2]:
ax.tick_params(size=0)
ax.set_xticklabels([])
ax.set_yticklabels([])
What I tried:
I tried setting the width ratio as width_ratios = [1,1,1,1,0.5], but that reduces the width of the whole column (last column).
You can use GridSpec for multicolumn layouts. Here create a grid with 3 rows, 7 columns with width ratios [1,1,0.5,0.5,1,0.5,0.5], and plot axes in the second and third rows in the combined 0.5 columns.
gs = GridSpec(3, 7, figure=fig, width_ratios=[1,1,0.5,0.5,1,0.5,0.5])
ax_merged_top = fig.add_subplot(gs[0, 3:6])
ax_row1_pseudocol3 = fig.add_subplot(gs[1, 2:4])
Here is a full example:
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(6, 4))
gs = gridspec.GridSpec(3, 7, figure=fig,
height_ratios=[0.5,1,1],
width_ratios=[1,1,0.5,0.5,1,0.5,0.5])
ax1 = plt.subplot(gs[0,1])
ax_merged_top = plt.subplot(gs[0, 3:6])
for row in [1,2]:
extra=0
for col in range(5):
if col in (2,4):
ax = plt.subplot(gs[row,col+extra:col+extra+2])
extra+=1
else:
ax = plt.subplot(gs[row,col+extra])
And now you can change width_ratio to anything provided the numbers initially set as [0.5,0.5] add up to 1, example below with width_ratios=[1,1,0.3,0.7,1,0.5,0.5]
Thank you #JodyKlymak for mentioning about ax.set_postion method. #mozway provided a working solution, but adding these few lines in my code gave me the desired output:
bb = ax2.get_position()
bb.x1 = 0.84
ax2.set_position(bb)
bb = ax2.get_position()
bb.x0 = 0.50
ax2.set_position(bb)

Subplot with multiple subplots

I am drawing 4 sets of 12 plots (i.e. 48 plots in total). I want to combine the 12 plots within each of the 4 sets into one figure. However, I do not know how to combine the plots. At the moment, I am only drawing the 48 plots.
The dictionary I am referring to in the following contains 4 dictionaries, in turn containing 12 datasets each:
import matplotlib.pyplot as plt
import seaborn as sns
for j in dic:
for i in dic[j]:
df = pd.concat(dic[j][i].values(), ignore_index=True)
var = np.random.normal(loc=0, scale=1, size=10000)
fig, ax = plt.subplots()
sns.histplot(df['Z'], stat='density', ax=ax)
sns.kdeplot(var, color='r', ax=ax)
Thanks to #GenG, I found the solution:
temp_1 = [0,1,2,0,1,2,0,1,2,0,1,2]
temp_2 = [0,0,0,1,1,1,2,2,2,3,3,3]
for j in dic:
fig, ax = plt.subplots(3,4)
for i, t, k in zip(dic[j], temp_1, temp_2):
df = pd.concat(dic[j][i].values(), ignore_index=True)
var = np.random.normal(loc=0, scale=1, size=10000)
sns.histplot(df['Z'], stat='density', ax=ax[t][k])
sns.kdeplot(var, color='r', ax=ax[t][k])

Trying to make scatter plots in subplots using for-loops

I am trying to make subplots using for loop to go through my x variables in the dataframe. All plots would be a scatter plot.
X-variable: 'Protein', 'Fat', 'Sodium', 'Fiber', 'Carbo', 'Sugars'
y-variable: 'Cal'
This is where I am stuck
plt.subplot(2, 3, 2)
for i in range(3):
plt.scatter(i,sub['Cal'])
With this code:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('data.csv')
columns = list(df.columns)
columns.remove('Cal')
fig, ax = plt.subplots(1, len(columns), figsize = (20, 5))
for idx, col in enumerate(columns, 0):
ax[idx].plot(df['Cal'], df[col], 'o')
ax[idx].set_xlabel('Cal')
ax[idx].set_title(col)
plt.show()
I get this subplot of scatter plots:
However, maybe it is a better choice to use a single scatterplot and use marker color in order to distinguish data type. See this code:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
df = pd.read_csv('data.csv')
# df.drop(columns = ['Sodium'], inplace = True) # <--- removes 'Sodium' column
table = df.melt('Cal', var_name = 'Type')
fig, ax = plt.subplots(1, 1, figsize = (10, 10))
sns.scatterplot(data = table,
x = 'Cal',
y = 'value',
hue = 'Type',
s = 200,
alpha = 0.5)
plt.show()
that give this plot where all data are together:
The 'Sodium' values are different from others by far, so, if you remove this column with this line:
df.drop(columns = ['Sodium'], inplace = True)
you get a more readable plot:

plotting sub_plots from pivot_table using matplotlib/seaborn

I have a code from a dataframe
Y = df['label']
for col in categorical_cols:
tab = pd.crosstab(df[col],Y)
annot = x.div(x.sum(axis=1).astype('float64'),axis=0)
annot.plot(kind='bar',stacked=True)
plt.title('Distribution of %s'%col)
plt.xlabel('%s'%col,size='x-large')
plt.xticks(rotation=45)
plt.legend()
How can I plot these using different subplots in a single figure because this loops prints the last column's figure. So all figures are same.
Also: How can I produce the same using matplotlib/seaborn using matplotlib which shows me the % or absolute values.
You need to create the different subplots and then pass one axes object to each call of annot.plot via the ax keyword, something like this:
import math
import matplotlib.pyplot as plt
n = len(categorical_cols)
nrows = math.ceil(float(n) / 3.0)
fig, ax = plt.subplots(ncols=3, nrows=nrows, figsize=(9, nrows*3))
ax = ax.flatten()
Y = df['label']
for idx, col in enumerate(categorical_cols):
tab = pd.crosstab(df[col],Y)
annot = x.div(x.sum(axis=1).astype('float64'),axis=0)
annot.plot(kind='bar',stacked=True, ax=ax[idx])
ax[idx].title('Distribution of %s'%col)
ax[idx].set_xlabel('%s'%col,size='x-large')
ax.tick_params('x', labelrotation=45)
plt.legend()

Share axis and remove unused in matplotlib subplots

I want to plot a series of seaborn heatmaps in a grid. I know the number of subplots (which can be odd or even).
The heatmaps will show the mean "occupation ratio" by "day of week" (y axis) and "hour of day" (x axis), e.g. they all share the same x / y domains.
Here's my current code:
df2 = df[['name','openLots','occupationRatio','DoW','Hour']]
fig, axs = plt.subplots(figsize=(24,24), nrows=7, ncols=6)
axs = axs.flatten()
locations = df2['name'].sort_values().unique()
def occupation_heatmap (name, ax):
dfn = df2[df2['name'] == name]
dfn = dfn.groupby(['DoW', 'Hour']).mean()['occupationRatio'].unstack()
dfn = dfn.reindex(['Mon', 'Tue', 'Wed','Thu','Fri','Sat','Sun'])
sns.heatmap(data=dfn, cmap="coolwarm", vmin=0, vmax=1.0, ax= ax)
ax.set_title(name)
i = 0
for n in locations:
occupation_heatmap (n, axs[i])
i = i+1
plt.tight_layout()
It looks almost like what I want (last few rows):
However want I want:
Have the y axis labels (DoW) only once per row (leftmost plot)
Have the colormap legend only on the rightmost plot in each row (or leave it out completely, the colors are pretty self-explainatory)
remove the "empty plots" in the last row because of an odd total number
Many thanks for any hints
Have the y axis labels (DoW) only once per row (leftmost plot)
This can be done using sharey = True as argument to plt.subplots.
Have the colormap legend only on the rightmost plot in each row (or leave it out completely, the colors are pretty self-explainatory)
Use the cbar = False argument to seaborn.heatmap in order not to show a colorbar. This can be given as an input to the plotting function in dependence of the actual number of subplots.
remove the "empty plots" in the last row because of an odd total number
After the loop for creating the plots you may add another loop removing the unused axes.
for j in range(len(locations), ncols*nrows):
axs[j].axis("off")
Here is a complete example (where I borrowed the cod to generate a dataframe from #Robbie):
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
days = ['Mon','Tue','Wed','Thurs','Fri','Sat','Sun']
names = ["Parkhaus {:02}".format(i+1) for i in range(22)]
nItems = 1000
df = pd.DataFrame()
df['name'] = [names[i] for i in np.random.randint(0,len(names),nItems)]
df['openLots'] = np.random.randint(0,100,nItems)
df['occupationRatio'] = np.random.rand(nItems)
df['DoW'] = [days[i] for i in np.random.randint(0,7,nItems)]
df['Hour'] = np.random.randint(0,12,nItems)
df2 = df[['name','openLots','occupationRatio','DoW','Hour']]
nrows = 4; ncols=6
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15,9), sharey=True)
axs = axs.flatten()
locations = df2['name'].sort_values().unique()
def occupation_heatmap (name, ax, cbar=False, ylabel=False):
dfn = df2[df2['name'] == name]
dfn = dfn.groupby(['DoW', 'Hour']).mean()['occupationRatio'].unstack()
dfn = dfn.reindex(['Mon', 'Tue', 'Wed','Thu','Fri','Sat','Sun'])
sns.heatmap(data=dfn, cmap="coolwarm", vmin=0, vmax=1.0, ax=ax, cbar=cbar)
ax.set_title(name)
plt.setp(ax.get_yticklabels(), rotation=0)
if not ylabel: ax.set_ylabel("")
for i, n in enumerate(locations):
occupation_heatmap (n, axs[i], cbar=i%ncols==ncols-1, ylabel=i%ncols==0)
for j in range(len(locations), ncols*nrows):
axs[j].axis("off")
plt.tight_layout()
plt.show()
You can be more flexible and just create an axis for each name present, something like this:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import string
days = ['Mon','Tue','Wed','Thurs','Fri','Sat','Sun']
names = [string.lowercase[i] for i in range(22)]
nItems = 1000
df = pd.DataFrame()
df['name'] = [names[i] for i in np.random.randint(0,len(names),nItems)]
df['openLots'] = np.random.randint(0,100,nItems)
df['occupationRatio'] = np.random.randint(0,100,nItems)
df['DoW'] = [days[i] for i in np.random.randint(0,7,nItems)]
df['Hour'] = np.random.randint(0,12,nItems)
fig = plt.figure(figsize=(12,12))
for index, name in enumerate(names):
ax = fig.add_subplot(4,6,index+1)
dfn = df.loc[df.name==name]
dfn = dfn.groupby(['DoW','Hour']).mean()['occupationRatio'].unstack()
dfn = dfn.reindex(days)
# Now we can operate on each plot axis individually
if index%6!=5: #i.e.
# Don't draw a colorbar
sns.heatmap(data = dfn, cmap='coolwarm', ax=ax, cbar=False)
else:
sns.heatmap(data = dfn, cmap='coolwarm', ax=ax)
if index%6!=0:
# Remove the y-axis label
ax.set_ylabel('')
ax.set_yticks(())
ax.set_title(name)
fig.tight_layout()
fig.show()
Results in:
You could also play around with the x-axes (for example remove labels and ticks except for the bottom row).

Categories

Resources