How to set column titles as subtitles in Python - python

I want to plot several graphs of an excel data in Python with a simple command. I am currently using the following code:
import matplotlib.pyplot as plt
fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(12,12))
data_cols = df.columns[8:16]
for data_col, ax in zip(data_cols, axes.ravel()):
ax.boxplot(df[data_col])
I want to set column titles that I choose for data_cols as subtitles of subplots. Do you have an idea about how can I do that?
Thanks in advance.

Of course you need to iterate over those axes and columns you want to plot to and from. So if you want to plot the last 8 columns of the dataframe you also need to iterate over the last 8 axes.
This would then allow you the use the column name as title, ax.set_title(data_col).
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np; np.random.seed(1)
a = np.random.rand(5, 16)
df = pd.DataFrame(a, columns=list("ABCDEFGHIJKLMNOP"))
fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(6,6), sharex=True, sharey=True)
data_cols = df.columns[8:16]
for data_col, ax in zip(data_cols, axes.ravel()[8:16]):
ax.boxplot(df[data_col])
ax.set_title(data_col)
plt.tight_layout()
plt.show()

Related

Plotting Errorbars from different DataFrame into SubPlots with matplotlib

i just stumpled upon a problem I simply cannot solve. I have a dataset with raw data which I will upload here: https://file.io/oJqkZjAGyqV1
Its an excel file with the data inside.
I then created some code to open it, read it, generate a mean and sem of my data as below.
# Import required packages
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from pylab import cm
df = pd.read_excel("Chlorophyll_data_mod.xlsx")
#----Calculation of meanvalues and sem from raw_data---------
meandf2 = df.set_index(["Group"])
sets = []
for x in ["A","B","AB","xc"]:
meandf3 = meandf2.filter(like=f"Chl_{x}_").reset_index()
sets.append(meandf3)
#---------Grouping DataFrame----------#
means = []
ster = []
for x in range(len(sets)):
meandf = sets[x].groupby(["Group"]).mean()
meandf = meandf.reset_index()
means.append(meandf)
sems = sets[x].groupby("Group").sem()
sems = sems.reset_index()
ster.append(sems)
#----Selecting Dataframe from List-----#
plotdf = means[0]
ploter = ster[0]
plotgroup = plotdf.iloc[:,[0,]]
plotdata = plotdf.iloc[:,[1,]]
grouparray = plotgroup.to_numpy()
dataarray = plotdata.to_numpy()
#-----CreatePlot------#
fig, ax = plt.subplots(nrows=3, ncols=1, sharex="all", figsize=(10,8))
plotdf.plot(ax=ax[0,],x="Group",y="Chl_A_0D", kind="bar", legend=False, color="black")
plt.errorbar(x=plotdf["Group"], y=plotdf["Chl_A_0D"],yerr=ploter["Chl_A_0D"])
plotdf.plot(ax=ax[1,],x="Group",y="Chl_A_10DaT", kind="bar", legend=False, color="blue")
plt.errorbar(x=plotdf["Group"], y=plotdf["Chl_A_10DaT"],yerr=ploter["Chl_A_10DaT"])
plotdf.plot(ax=ax[2,],x="Group",y="Chl_A_7DaR", kind="bar", legend=False, color="magenta")
plt.errorbar(x=plotdf["Group"], y=plotdf["Chl_A_7DaR"],yerr=ploter["Chl_A_7DaR"])
#----Legend of the Plot-----#
fig.legend(loc="lower center", bbox_to_anchor=(0.5,0), fancybox=True, ncol=6)
#----Layout------#
plt.tight_layout(rect=[0, 0.02, 1,1])
plt.show()
And I manage to create a subplot, which shows 3 of my interested data points. However, I struggle with the error bars.
My approach was to calculate the sem and store it into a new dataframe. And then just read it from there for the yerr. However, this doesn't work.
plotdf.plot(ax=ax[2,],x="Group",y="Chl_A_7DaR", kind="bar", legend=False, color="magenta", yerr=ploter["Chl_A_7DaR"])
Results in an array error because of the structure.
And my current approach, as in the main code above only draws the error bars in the last subplot, but not in each individual plot.
Maybe here is someone who could help me understanding this function?
Best regards

How to plot multiple dataframes in a single catplot figure

I have multiple data frames consist of three main columns: 1)the categories (c1, c2, c3), one includes the data values, and one includes different time-periods (AA, BB, CC, DD).
what I am trying to generate is to generate boxplots of the data for all dataframe, at once, and in one figure !
I did try with different enumerate options and "ax" argument, but still it generates the boxplot separately, I couldn't figure it out.
allCN=[df1, df2, df3]
fig, axs = plt.subplots(nrows = 3, ncols=4, figsize = (30,54))
axes = axes.flatten()
for i, x in enumerate(allCN):
sns.set(style="ticks", palette='Set2')
sns.set_context("paper", font_scale=1.1, rc={"lines.linewidth": 1.1})
g=sns.catplot(x="Cat", y="Data", ax=axs[i,0],
col="Period", data=x, kind="box", height=4, aspect=10/18,
width=0.6,fliersize=2.5,showfliers=False, linewidth=1.1,
notch=False,orient="v"))
g.set_ylabels("test", size=12)
g.set_xlabels("")
One way is to stack your data frames and use the row= argument inside catplot. First to create something like your data:
import pandas as pd
import numpy as np
import seaborn as sns
df1 = pd.DataFrame({'Cat':np.random.choice(['C1','C2','C3'],50),
'Data':np.random.uniform(0,1,50),"Period":np.random.choice(['AA','CC','DD'],50)})
df2 = pd.DataFrame({'Cat':np.random.choice(['C1','C2','C3'],50),
'Data':np.random.uniform(0,1,50),"Period":np.random.choice(['AA','CC','DD'],50)})
df3 = pd.DataFrame({'Cat':np.random.choice(['C1','C2','C3'],50),
'Data':np.random.uniform(0,1,50),"Period":np.random.choice(['AA','CC','DD'],50)})
Then concat the dataframes and add another column (i used source below) to annotate the dataframe:
allCN=pd.concat([df1,df2,df3])
allCN['source'] = np.repeat(['df1','df2','df3'],[len(df1),len(df2),len(df3)])
sns.catplot(x="Cat", y="Data",
col="Period", row = "source",
data=allCN, kind="box", height=2,aspect=1.6)
What about the hue parameter in sns.boxplot? Would that give you the result you want?
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset("tips")
box_plot = sns.boxplot(x="day", y="total_bill", data=tips, hue="smoker")
plt.show()

How can I plot slice of certain DataFrame for each row with different color?

I would like to plot certain slices of my Pandas Dataframe for each rows (based on row indexes) with different colors.
My data look like the following:
I already tried with the help of this tutorial to find a way but I couldn't - probably due to a lack of skills.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("D:\SOF10.csv" , header=None)
df.head()
#Slice interested data
C = df.iloc[:, 2::3]
#Plot Temp base on row index colorfully
C.apply(lambda x: plt.scatter(x.index, x, c='g'))
plt.show()
Following is my expected plot:
I was also wondering if I could displace the mean of each row of the sliced data which contains 480 values somewhere in the plot or in the legend beside of plot! Is it feasible (like the following picture) to calculate the mean and displaced somewhere in the legend or by using small font size displace next to its own data in graph ?
Data sample: data
This gives the plot without legend
C = df.iloc[:,2::3].stack().reset_index()
C.columns = ['level_0', 'level_1', 'Temperature']
fig, ax = plt.subplots(1,1)
C.plot('level_0', 'Temperature',
ax=ax, kind='scatter',
c='level_0', colormap='tab20',
colorbar=False, legend=True)
ax.set_xlabel('Cycles')
plt.show()
Edit to reflect modified question:
stack() transform your (sliced) dataframe to a series with index (row, col)
reset_index() reset the double-level index above to level_0 (row), level_1 (col).
set_xlabel sets the label of x-axis to what you want.
Edit 2: The following produces scatter with legend:
CC = df.iloc[:,2::3]
fig, ax = plt.subplots(1,1, figsize=(16,9))
labels = CC.mean(axis=1)
for i in CC.index:
ax.scatter([i]*len(CC.columns[1:]), CC.iloc[i,1:], label=labels[i])
ax.legend()
ax.set_xlabel('Cycles')
ax.set_ylabel('Temperature')
plt.show()
This may be an approximate answer. scatter(c=, cmap= can be used for desired coloring.
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import itertools
df = pd.DataFrame({'a':[34,22,1,34]})
fig, subplot_axes = plt.subplots(1, 1, figsize=(20, 10)) # width, height
colors = ['red','green','blue','purple']
cmap=matplotlib.colors.ListedColormap(colors)
for col in df.columns:
subplot_axes.scatter(df.index, df[col].values, c=df.index, cmap=cmap, alpha=.9)

Arranging multiple for loop categorical plots with Seaborn

I am creating multiple categorical plots for data frame df with a for loop:
object_bol = df.dtypes == 'object'
for catplot in df.dtypes[object_bol].index:
sns.countplot(y=catplot,data=df)
plt.show()
Output is all the plots sequenced one after the other, how do i assign this to a grid with n columns and m rows (n & m vary depending on number of objects in data frame)?
You would want to extend the example from How do I plot two countplot graphs side by side in seaborn? to more subplots.
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
df=pd.DataFrame(np.random.choice(list("abcd"), size=(100,20), p=[.4,.3,.2,.1]))
fig, axes =plt.subplots(5,4, figsize=(10,10), sharex=True)
axes = axes.flatten()
object_bol = df.dtypes == 'object'
for ax, catplot in zip(axes, df.dtypes[object_bol].index):
sns.countplot(y=catplot, data=df, ax=ax, order=np.unique(df.values))
plt.tight_layout()
plt.show()
You would get something similar without seaborn directly from pandas:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df=pd.DataFrame(np.random.choice(list("abcd"), size=(100,20), p=[.4,.3,.2,.1]))
df.apply(pd.value_counts).plot(kind="barh", subplots=True, layout=(4,5), legend=False)
plt.tight_layout()
plt.show()

Scatter plot from multiple columns of a pandas dataframe

I have a pandas dataframe that looks as below:
Filename GalCer(18:1/12:0)_IS GalCer(d18:1/16:0) GalCer(d18:1/18:0)
0 A-1-1 15.0 1.299366 40.662458 0.242658 6.891069 0.180315
1 A-1-2 15.0 1.341638 50.237734 0.270351 8.367316 0.233468
2 A-1-3 15.0 1.583500 47.039423 0.241681 7.902761 0.201153
3 A-1-4 15.0 1.635365 53.139610 0.322680 9.578195 0.345681
4 B-1-10 15.0 2.370330 80.209846 0.463770 13.729810 0.395355
I am trying to plot a scatter sub-plots with a shared x-axis with the first column "Filename" on the x-axis. While I am able to generate barplots, the following code gives me a key error for a scatter plot:
import matplotlib.pyplot as plt
colnames = list (qqq.columns)
qqq.plot.scatter(x=qqq.Filename, y=colnames[1:], legend=False, subplots = True, sharex = True, figsize = (10,50))
KeyError: "['A-1-1' 'A-1-2' 'A-1-3' 'A-1-4' 'B-1-10' ] not in index"
The following code for barplots works fine. Do I need to specify something differently for the scatterplots?
import matplotlib.pyplot as plt
colnames = list (qqq.columns)
qqq.plot(x=qqq.Filename, y=colnames[1:], kind = 'bar', legend=False, subplots = True, sharex = True, figsize = (10,30))
A scatter plot will require numeric values for both axes. In this case you can use the index as x values,
df.reset_index().plot(x="index", y="other column")
The problem is now that you cannot plot several columns at once using the scatter plot wrapper in pandas. Depending on what the reason for using a scatter plot are, you may decide to use a line plot instead, just without lines. I.e. you may specify linestyle="none" and marker="o" to the plot, such that points appear on the plot.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
fn = ["{}_{}".format(i,j) for i in list("ABCD") for j in range(4)]
df = pd.DataFrame(np.random.rand(len(fn), 4), columns=list("ZXYQ"))
df.insert(0,"Filename",pd.Series(fn))
colnames = list (df.columns)
df.reset_index().plot(x="index", y=colnames[1:], kind = 'line', legend=False,
subplots = True, sharex = True, figsize = (5.5,4), ls="none", marker="o")
plt.show()
In case you absolutely need a scatter plot, you may create a subplots grid first and then iterate over the columns and axes to plot one scatter plot at a time to the respective axes.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
fn = ["{}_{}".format(i,j) for i in list("ABCD") for j in range(4)]
df = pd.DataFrame(np.random.rand(len(fn), 4), columns=list("ZXYQ"))
df.insert(0,"Filename",pd.Series(fn))
colnames = list (df.columns)
fig, axes = plt.subplots(nrows=len(colnames)-1, sharex = True,figsize = (5.5,4),)
for i, ax in enumerate(axes):
df.reset_index().plot(x="index", y=colnames[i+1], kind = 'scatter', legend=False,
ax=ax, c=colnames[i+1], cmap="inferno")
plt.show()

Categories

Resources