Seaborn BoxPlot and log axis - python

I'm facing a problem on ticks since hours now.
I would like to plot a boxplot with log scaling but it seems there is 2 issues :
Data has no positive values, and therefore cannot be log-scaled.
And
Attempted to set non-positive xlimits for log-scale axis; invalid limits will be ignored.
Her is my code :
# Import librairies
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
# Undersampling the DataFrame through columns because too big
sampling_factor = 50
all_frame_inverted_undersampled = all_frame_inverted.take([x for x in range(0,all_frame_inverted.shape[1], sampling_factor)], axis=1)
# Settings the canvas
fig, ax= plt.subplots(figsize=(18, 6))
ax.set_xlabel('Cycles')
ax.set_ylabel('Measures')
# setting log axis
ax.set_xscale('log')
# Plotting
g = sns.boxplot(data=all_frame_inverted_undersampled , palette='rainbow', orient="v", ax=ax)
# Showing
plt.tight_layout()
Here is the DataFrame's for the 3 first lines :
And here, what I get :
I've tried many things like setting :
ax.xaxis.set_major_formatter(xmajor_formatter) with LogFormatter
I've tried symlog, the error message disapear but the results is quite the same.
I tried also : ax.set_xscale('symlog', linthreshy=1e3)
EDIT
I'm trying to get, on the x axis, ticks formatted as in the plot below :
EDIT 2 : Here is a minimal exemple of my code :
# Preprocessing and Cleaning data
import numpy as np
import pandas as pd
# Data Visualization
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline
# Data Analysis & Visualization
import seaborn as sns
# Data
data= [[181.08, 180.23, 181.60, 178.05, 172.33, 175.50, 169.81, 167.10, 166.92, 166.10, 159.69],
[144.94,140.30,140.15,146.07,143.90,143.06,139.74,139.97,144.06,145.13,146.34],
[176.25,195.43,217.50,183.75,174.74,169.11,166.81,161.82,164.08,162.25,166.72],
[198.31,221.16,214.19,209.06,202.08,180.08,185.79,181.73,178.95,179.53,189.08],
[167.81,166.28,144.18,138.22,139.48,144.66,141.34,141.60,146.53,145.84,155.20]]
# Data to Dataframe
df = pd.DataFrame(data=data, index=['M01', 'M02', 'M03', 'M04', 'M05'],
columns=[8796, 60501505, 142252576, 224057457, 305801670, 387546170, 487628661, 609874323, 732114489, 854385341, 1190477590])
# Boxplot calculated for each cycle through all structures Mxx
fig, ax= plt.subplots(figsize=(18, 6))
ax.set_xlabel('Cycles')
ax.set_ylabel('Measures')
ax.set_xscale('symlog')
plt.xticks(rotation=45)
sns.boxplot(data=df, palette='rainbow', orient="v", ax=ax)
# Showing plot
plt.tight_layout()
Here is what I can do with Plotly, I want this result using boxplot
please :
# Interactive Data Visualization needs : pip install plotly & pip install cufflinks
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
df.iplot(kind='box', xTitle='Cycles', yTitle='Measures', logx=True)
Thank you for your help

Related

Seaborn lineplot legend not showing correct line colour - plotting two pandas series on one graph

I'm trying to plot two data sets with Seaborn, this is my code.
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
sns.axes_style("ticks")
ss_data = pd.read_csv('A.csv')
ks_data = pd.read_csv('B.csv')
g = sns.lineplot(data=ks_data, x="K", y="pd", dashes=False)
sns.lineplot(data=ss_data, x="K", y="pd", dashes=False)
g.set_xticks(range(0,22,4))
plt.legend(labels=["A", "B"])
plt.savefig("test.png", dpi=500)
But this is the graph I am getting, as you can see, the legend doesn't correctly show the line colour for B.
I think it's probably due to the way that I am adding the second lineplot to the graph, but I couldn't make it work any other way.
Use the label parameter (passed to matplotlib.axes.Axes.plot()), and no need for plt.legend().
sns.lineplot(
data=ks_data, x="K", y="pd",
label='A', errobar=None)
sns.lineplot(
data=ss_data, x="K", y="pd",
label='B', errorbar=None)
Importantly, pass errorbar=None (or for seaborn versions prior to 0.12.0, ci=None), to turn off plotting of the confidence interval.
Maybe a matplotlib / seaborn version issue?
I'm not able to reproduce your graph. With some dummy data I get the expected results:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
sns.axes_style("ticks")
data1 = {"K":[1,5,10,15,20], "pd":[2,10,20,30,40]}
data2 = {"K":[1,5,10,15,20], "pd":[1.5,9,18,16,35]}
ss_data = pd.DataFrame(data=data1)
ks_data = pd.DataFrame(data=data2)
g = sns.lineplot(data=ks_data, x="K", y="pd", dashes=False)
sns.lineplot(data=ss_data, x="K", y="pd", dashes=False)
g.set_xticks(range(0,22,4))
plt.legend(labels=["A", "B"])
I have seaborn == 0.11.2 and matplotlib==3.5.0

seaborn jointplot with same size plots

I'm doing a jointplot with a basemap, the problem is that when I add the basemap the main plot doesn't have the same size of the marginal plots. I've tried with different parameters without luck. Does anyone have an idea?
import seaborn as sns
import matplotlib.pyplot as plt
import contextily as ctx
import pandas as pd
##exaplme of the data
coords={'longitud':[-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464,
-62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452,
-62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622,
-63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785,
-63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898,
-63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301,
-63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115,
-63.211982035, -63.1689040153],
'latitud':[8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179,
8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804,
8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531,
8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656,
8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527,
8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299,
8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
OSM_C = 'http://c.tile.openstreetmap.org/{z}/{x}/{y}.png'
joint_axes = sns.jointplot(
x='longitud', y='latitud', data=df, ec="r", s=5)
ctx.add_basemap(joint_axes.ax_joint,crs=4326,attribution=False,url=OSM_C)
adjust(hspace=0, wspace=0)
#plt.tight_layout()
plt.show()
Here is an approach that:
removes the axes sharing in the y-direction to be able to change the aspect to 'datalim'
sets the aspect to 'equal', 'datalim'
sets the y data limits of the marginal plot to be the same as the joint plot; this seems to need a redraw
The following code shows the idea (using imshow, as I don't have contextily installed):
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
coords = {'longitud' : [-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464, -62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452, -62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622, -63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785, -63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898, -63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301, -63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115, -63.211982035, -63.1689040153],
'latitud' : [8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179, 8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804, 8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531, 8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656, 8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527, 8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299, 8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
g = sns.jointplot(data=df, x='longitud', y='latitud')
ctx.add_basemap(g.ax_joint,crs=4326,attribution=False,url=OSM_C)
# g.ax_joint.imshow(np.random.rand(20, 10), cmap='spring', interpolation='bicubic',
# extent=[df['longitud'].min(), df['longitud'].max(), df['latitud'].min(), df['latitud'].max()])
for axes in g.ax_joint.get_shared_y_axes():
for ax in axes:
g.ax_joint.get_shared_y_axes().remove(ax)
g.ax_joint.set_aspect('equal', 'datalim')
g.fig.canvas.draw()
g.ax_marg_y.set_ylim(g.ax_joint.get_ylim())
plt.show()
You can still combine this approach with changing the figure's width or height, or adding more whitespace on top or below.

Not able to plot box plot separately

I have lot of feature in data and i want to make box plot for each feature. So for that
import pandas as pd
import seaborn as sns
plt.figure(figsize=(25,20))
for data in train_df.columns:
plt.subplot(7,4,i+1)
plt.subplots_adjust(hspace = 0.5, wspace = 0.5)
ax =sns.boxplot(train_df[data])
I did this
and the output is
All the plot are on one image i want something like
( not with skew graphs but with box plot )
What changes i need to do ?
In your code, I cannot see where the i is coming from and also it's not clear how ax was assigned.
Maybe try something like this, first an example data frame:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
train_df = pd.concat([pd.Series(np.random.normal(i,1,100)) for i in range(12)],axis=1)
Set up fig and a flattened ax for each subplot:
fig,ax = plt.subplots(4,3,figsize=(10,10))
ax = ax.flatten()
The most basic would be to call sns.boxplot assigning ax inside the function:
for i,data in enumerate(train_df.columns):
sns.boxplot(train_df[data],ax=ax[i])

Customize Seaborn Hue Legend for Boxplot

When I tried to plot this boxplot figure , legend of age group was shown as below.
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
plt.figure(figsize=(14,7))
sns.set(style="white", palette="Blues", color_codes=True)
f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df)
plt.savefig("out.png",pad_inches=0.5)
plt.show()
But when I tried to customize the legend my code is
plt.figure(figsize=(14,7))
sns.set(style="white", palette="Blues", color_codes=True)
f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df)
f.set_xlabel("Sex")
f.set_ylabel("Systolic Blood Pressure")
legend_label = ["(18, 30)", "(30, 40)", "(40, 50)", "(50, 60)", "(60, 70)", "(70, 80)"]
f.legend(title="Age Group", labels=legend_label)
plt.savefig("out.png",pad_inches=0.5)
plt.show()
This f.legend(title="Age Group", labels=legend_label) line was able to customize the title and labels but it caused errors in the markers. I need to set the markers to the color pallet as it was in the previous figure.
As of seaborn 0.10.1, the legend label is stored in ax.legend_.texts[0], where ax is the matplotlib Axes returned by sns.boxplot(). This means that you can edit the legend label without changing anything else about the legend as follows.
g = sns.boxplot(...)
new_legend_label = 'Age Group'
g.legend_.texts[0].set_text(new_legend_label)
Depending on what version of seaborn you're using, the method might be different. See these answers from 2017 and 2019 for slightly different syntax with older versions.
Thank You Emerson Harkin. Your solution was useful. I just iterate over list of labels to update all. Here is my updated code and figure:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
plt.figure(figsize=(14,7))
sns.set(style="white", palette="Blues", color_codes=True)
f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df)
f.set_xlabel("Sex")
f.set_ylabel("Systolic Blood Pressure")
legend_label = ["(18, 30)", "(30, 40)", "(40, 50)", "(50, 60)", "(60, 70)", "(70, 80)"]
f.legend(title="Age Group")
n = 0
for i in legend_label:
f.legend_.texts[n].set_text(i)
n += 1
plt.show()
Updated Figure

Matplotlib inline in Jupyter - how to contol when the plot is shown?

I have a function that creates a figure and for some reason it is shown in Jupyter notebook twice, even though I didn't run show at all. I pass the fig and ax as an output of this function, and plan to show it only later.
I get confused between plt, fig and ax functionaries and guess that the answer is hidden somewhere there.
Here is an anonymised version of my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
def plot_curve(dummydata):
# builds a chart
fig,ax = plt.subplots(1) # get subplots
fig.set_figheight(7)
fig.set_figwidth(12) #set shape
plt.plot(dummydata.x1, dummydata.y1,label = 'l1') #curve 1
plt.plot(dummydata.x2, dummydata.y2,label = 'l2') #curve2
plt.xlabel('xlabel') #labels
plt.ylabel('xlabel')
plt.yscale('linear') #scale and bounds
plt.ylim(0,100)
ymin,ymax= ax.get_ylim()
ax.axhline(1, color='k', linestyle=':', label = 'lab1') #guideline - horizontal
ax.axvline(2, color='r',linestyle='--', label = 'lab2') #guideline - vertical
ax.axvline(3, color='g',linestyle='--', label = 'lab3') #guideline - vertical
ax.arrow(1,2,3,0, head_width=0.1, head_length=0.01, fc='k', ec='k') # arrow
rect = mpl.patches.Rectangle((1,2), 2,3, alpha = 0.1, facecolor='yellow',
linewidth=0 , label= 'lab4') #yellow area patch
ax.add_patch(rect)
plt.legend()
plt.title('title')
return fig,ax
and then call it with:
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
What should I change to not show the figure by default, and show it only by my command?
Thanks
Try disabling matplotlib interactive mode using plt.ioff(). With interactive mode disabled the plots will only be shown with an explicit plt.show().
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
# Desactivate interactive mode
plt.ioff()
def plot_curve(dummydata):
# the same code as before
Then in another cell
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
# I'am assuming this should not be in the for loop
# The plot will NOT be shown because we are not in interactive mode
fig, ax = plot_curve(dummydata) #get the chart
No plot will be shown yet.
Now in another cell
# Now ANY plot (figure) which was created and not shown yet will be finally shown
plt.show()
The plot is finally shown. Note that if you have created several plots all of them will be shown now.
Try this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib
With this importing you should not see the figure after plotting.
But you can see the figure by writing fig to IPython cell:
dummydata = pd.DataFrame({
'x1':np.arange(1,100,0.1),
'y1':np.arange(11,110,0.1),
'x2':np.arange(1,100,0.1),
'y2':np.arange(21,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
fig # Will now plot the figure.
Is this the desired output?

Categories

Resources