I'm doing a jointplot with a basemap, the problem is that when I add the basemap the main plot doesn't have the same size of the marginal plots. I've tried with different parameters without luck. Does anyone have an idea?
import seaborn as sns
import matplotlib.pyplot as plt
import contextily as ctx
import pandas as pd
##exaplme of the data
coords={'longitud':[-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464,
-62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452,
-62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622,
-63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785,
-63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898,
-63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301,
-63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115,
-63.211982035, -63.1689040153],
'latitud':[8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179,
8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804,
8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531,
8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656,
8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527,
8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299,
8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
OSM_C = 'http://c.tile.openstreetmap.org/{z}/{x}/{y}.png'
joint_axes = sns.jointplot(
x='longitud', y='latitud', data=df, ec="r", s=5)
ctx.add_basemap(joint_axes.ax_joint,crs=4326,attribution=False,url=OSM_C)
adjust(hspace=0, wspace=0)
#plt.tight_layout()
plt.show()
Here is an approach that:
removes the axes sharing in the y-direction to be able to change the aspect to 'datalim'
sets the aspect to 'equal', 'datalim'
sets the y data limits of the marginal plot to be the same as the joint plot; this seems to need a redraw
The following code shows the idea (using imshow, as I don't have contextily installed):
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
coords = {'longitud' : [-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464, -62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452, -62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622, -63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785, -63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898, -63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301, -63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115, -63.211982035, -63.1689040153],
'latitud' : [8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179, 8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804, 8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531, 8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656, 8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527, 8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299, 8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
g = sns.jointplot(data=df, x='longitud', y='latitud')
ctx.add_basemap(g.ax_joint,crs=4326,attribution=False,url=OSM_C)
# g.ax_joint.imshow(np.random.rand(20, 10), cmap='spring', interpolation='bicubic',
# extent=[df['longitud'].min(), df['longitud'].max(), df['latitud'].min(), df['latitud'].max()])
for axes in g.ax_joint.get_shared_y_axes():
for ax in axes:
g.ax_joint.get_shared_y_axes().remove(ax)
g.ax_joint.set_aspect('equal', 'datalim')
g.fig.canvas.draw()
g.ax_marg_y.set_ylim(g.ax_joint.get_ylim())
plt.show()
You can still combine this approach with changing the figure's width or height, or adding more whitespace on top or below.
I have lot of feature in data and i want to make box plot for each feature. So for that
import pandas as pd
import seaborn as sns
plt.figure(figsize=(25,20))
for data in train_df.columns:
plt.subplot(7,4,i+1)
plt.subplots_adjust(hspace = 0.5, wspace = 0.5)
ax =sns.boxplot(train_df[data])
I did this
and the output is
All the plot are on one image i want something like
( not with skew graphs but with box plot )
What changes i need to do ?
In your code, I cannot see where the i is coming from and also it's not clear how ax was assigned.
Maybe try something like this, first an example data frame:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
train_df = pd.concat([pd.Series(np.random.normal(i,1,100)) for i in range(12)],axis=1)
Set up fig and a flattened ax for each subplot:
fig,ax = plt.subplots(4,3,figsize=(10,10))
ax = ax.flatten()
The most basic would be to call sns.boxplot assigning ax inside the function:
for i,data in enumerate(train_df.columns):
sns.boxplot(train_df[data],ax=ax[i])
When I tried to plot this boxplot figure , legend of age group was shown as below.
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
plt.figure(figsize=(14,7))
sns.set(style="white", palette="Blues", color_codes=True)
f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df)
plt.savefig("out.png",pad_inches=0.5)
plt.show()
But when I tried to customize the legend my code is
plt.figure(figsize=(14,7))
sns.set(style="white", palette="Blues", color_codes=True)
f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df)
f.set_xlabel("Sex")
f.set_ylabel("Systolic Blood Pressure")
legend_label = ["(18, 30)", "(30, 40)", "(40, 50)", "(50, 60)", "(60, 70)", "(70, 80)"]
f.legend(title="Age Group", labels=legend_label)
plt.savefig("out.png",pad_inches=0.5)
plt.show()
This f.legend(title="Age Group", labels=legend_label) line was able to customize the title and labels but it caused errors in the markers. I need to set the markers to the color pallet as it was in the previous figure.
As of seaborn 0.10.1, the legend label is stored in ax.legend_.texts[0], where ax is the matplotlib Axes returned by sns.boxplot(). This means that you can edit the legend label without changing anything else about the legend as follows.
g = sns.boxplot(...)
new_legend_label = 'Age Group'
g.legend_.texts[0].set_text(new_legend_label)
Depending on what version of seaborn you're using, the method might be different. See these answers from 2017 and 2019 for slightly different syntax with older versions.
Thank You Emerson Harkin. Your solution was useful. I just iterate over list of labels to update all. Here is my updated code and figure:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
plt.figure(figsize=(14,7))
sns.set(style="white", palette="Blues", color_codes=True)
f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df)
f.set_xlabel("Sex")
f.set_ylabel("Systolic Blood Pressure")
legend_label = ["(18, 30)", "(30, 40)", "(40, 50)", "(50, 60)", "(60, 70)", "(70, 80)"]
f.legend(title="Age Group")
n = 0
for i in legend_label:
f.legend_.texts[n].set_text(i)
n += 1
plt.show()
Updated Figure
I can plot multiple histograms in a single plot using pandas but there are few things missing:
How to give the label.
I can only plot one figure, how to change it to layout=(3,1) or something else.
Also, in figure 1, all the bins are filled with solid colors, and its kind of difficult to know which is which, how to fill then with different markers (eg. crosses,slashes,etc)?
Here is the MWE:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7,label='species')
plt.legend()
Output:
To change layout I can use by keyword, but can't give them colors
HOW TO GIVE DIFFERENT COLORS?
df.hist('sepal_length',by='species',layout=(3,1))
plt.tight_layout()
Gives:
You can resolve to groupby:
fig,ax = plt.subplots()
hatches = ('\\', '//', '..') # fill pattern
for (i, d),hatch in zip(df.groupby('species'), hatches):
d['sepal_length'].hist(alpha=0.7, ax=ax, label=i, hatch=hatch)
ax.legend()
Output:
In pandas version 1.1.0 you can simply set the legend keyword to true.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7, legend = True)
output image
It's more code, but using pure matplotlib will always give you more control over the plots. For your second case:
import matplotlib.pyplot as plt
import numpy as np
from itertools import zip_longest
# Dictionary of color for each species
color_d = dict(zip_longest(df.species.unique(),
plt.rcParams['axes.prop_cycle'].by_key()['color']))
# Use the same bins for each
xmin = df.sepal_length.min()
xmax = df.sepal_length.max()
bins = np.linspace(xmin, xmax, 20)
# Set up correct number of subplots, space them out.
fig, ax = plt.subplots(nrows=df.species.nunique(), figsize=(4,8))
plt.subplots_adjust(hspace=0.4)
for i, (lab, gp) in enumerate(df.groupby('species')):
ax[i].hist(gp.sepal_length, ec='k', bins=bins, color=color_d[lab])
ax[i].set_title(lab)
# same xlim for each so we can see differences
ax[i].set_xlim(xmin, xmax)
I am plotting a series of boxplots on the same axes and want to adda legend to identify them.
Very simplified, my script looks like this:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df={}
bp={}
positions = [1,2,3,4]
df[0]= pd.DataFrame (np.random.rand(4,4),columns =['A','B','C','D'])
df[1]= pd.DataFrame (np.random.rand(4,4),columns =['A','B','C','D'])
colour=['red','blue']
fig, ax = plt.subplots()
for i in [0,1]:
bp[i] = df[i].plot.box(ax=ax,
positions = positions,
color={'whiskers': colour[i],
'caps': colour[i],
'medians': colour[i],
'boxes': colour[i]}
)
plt.legend([bp[i] for i in [0,1]], ['first plot', 'second plot'])
fig.show()
The plot is fine, but the legend is not drawn and I get this warning
UserWarning: Legend does not support <matplotlib.axes._subplots.AxesSubplot object at 0x000000000A7830F0> instances.
A proxy artist may be used instead.
(I have had this warning before when adding a legend to a scatter plot, but the legend was still drawn, so i could ignore it. )
Here is a link to a description of proxy artists, but it is not clear how to apply this to my script. Any suggestions?
'pandas' plots return AxesSubplot objects which can not be used for generating legends. You must generate you own legend using proxy artist instead. I have modified your code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
df={}
bp={}
positions = [1,2,3,4]
df[0]= pd.DataFrame (np.random.rand(4,4),columns =['A','B','C','D'])
df[1]= pd.DataFrame (np.random.rand(4,4),columns =['A','B','C','D'])
colour=['red','blue']
fig, ax = plt.subplots()
for i in [0,1]:
bp[i] = df[i].plot.box(ax=ax,
positions = positions,
color={'whiskers': colour[i],
'caps': colour[i],
'medians': colour[i],
'boxes': colour[i]}
)
red_patch = mpatches.Patch(color='red', label='The red data')
blue_patch = mpatches.Patch(color='blue', label='The blue data')
plt.legend(handles=[red_patch, blue_patch])
plt.show()
The results are shown below: