I'm doing a jointplot with a basemap, the problem is that when I add the basemap the main plot doesn't have the same size of the marginal plots. I've tried with different parameters without luck. Does anyone have an idea?
import seaborn as sns
import matplotlib.pyplot as plt
import contextily as ctx
import pandas as pd
##exaplme of the data
coords={'longitud':[-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464,
-62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452,
-62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622,
-63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785,
-63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898,
-63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301,
-63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115,
-63.211982035, -63.1689040153],
'latitud':[8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179,
8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804,
8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531,
8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656,
8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527,
8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299,
8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
OSM_C = 'http://c.tile.openstreetmap.org/{z}/{x}/{y}.png'
joint_axes = sns.jointplot(
x='longitud', y='latitud', data=df, ec="r", s=5)
ctx.add_basemap(joint_axes.ax_joint,crs=4326,attribution=False,url=OSM_C)
adjust(hspace=0, wspace=0)
#plt.tight_layout()
plt.show()
Here is an approach that:
removes the axes sharing in the y-direction to be able to change the aspect to 'datalim'
sets the aspect to 'equal', 'datalim'
sets the y data limits of the marginal plot to be the same as the joint plot; this seems to need a redraw
The following code shows the idea (using imshow, as I don't have contextily installed):
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
coords = {'longitud' : [-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464, -62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452, -62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622, -63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785, -63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898, -63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301, -63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115, -63.211982035, -63.1689040153],
'latitud' : [8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179, 8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804, 8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531, 8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656, 8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527, 8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299, 8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
g = sns.jointplot(data=df, x='longitud', y='latitud')
ctx.add_basemap(g.ax_joint,crs=4326,attribution=False,url=OSM_C)
# g.ax_joint.imshow(np.random.rand(20, 10), cmap='spring', interpolation='bicubic',
# extent=[df['longitud'].min(), df['longitud'].max(), df['latitud'].min(), df['latitud'].max()])
for axes in g.ax_joint.get_shared_y_axes():
for ax in axes:
g.ax_joint.get_shared_y_axes().remove(ax)
g.ax_joint.set_aspect('equal', 'datalim')
g.fig.canvas.draw()
g.ax_marg_y.set_ylim(g.ax_joint.get_ylim())
plt.show()
You can still combine this approach with changing the figure's width or height, or adding more whitespace on top or below.
I can plot multiple histograms in a single plot using pandas but there are few things missing:
How to give the label.
I can only plot one figure, how to change it to layout=(3,1) or something else.
Also, in figure 1, all the bins are filled with solid colors, and its kind of difficult to know which is which, how to fill then with different markers (eg. crosses,slashes,etc)?
Here is the MWE:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7,label='species')
plt.legend()
Output:
To change layout I can use by keyword, but can't give them colors
HOW TO GIVE DIFFERENT COLORS?
df.hist('sepal_length',by='species',layout=(3,1))
plt.tight_layout()
Gives:
You can resolve to groupby:
fig,ax = plt.subplots()
hatches = ('\\', '//', '..') # fill pattern
for (i, d),hatch in zip(df.groupby('species'), hatches):
d['sepal_length'].hist(alpha=0.7, ax=ax, label=i, hatch=hatch)
ax.legend()
Output:
In pandas version 1.1.0 you can simply set the legend keyword to true.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7, legend = True)
output image
It's more code, but using pure matplotlib will always give you more control over the plots. For your second case:
import matplotlib.pyplot as plt
import numpy as np
from itertools import zip_longest
# Dictionary of color for each species
color_d = dict(zip_longest(df.species.unique(),
plt.rcParams['axes.prop_cycle'].by_key()['color']))
# Use the same bins for each
xmin = df.sepal_length.min()
xmax = df.sepal_length.max()
bins = np.linspace(xmin, xmax, 20)
# Set up correct number of subplots, space them out.
fig, ax = plt.subplots(nrows=df.species.nunique(), figsize=(4,8))
plt.subplots_adjust(hspace=0.4)
for i, (lab, gp) in enumerate(df.groupby('species')):
ax[i].hist(gp.sepal_length, ec='k', bins=bins, color=color_d[lab])
ax[i].set_title(lab)
# same xlim for each so we can see differences
ax[i].set_xlim(xmin, xmax)
I am running following code to draw histograms in 3 by 3 grid for 9 varaibles.However, it plots only one variable.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def draw_histograms(df, variables, n_rows, n_cols):
fig=plt.figure()
for i, var_name in enumerate(variables):
ax=fig.add_subplot(n_rows,n_cols,i+1)
df[var_name].hist(bins=10,ax=ax)
plt.title(var_name+"Distribution")
plt.show()
You're adding subplots correctly but you call plt.show for each added subplot which causes what has been drawn so far to be shown, i.e. one plot. If you're for instance plotting inline in IPython you will only see the last plot drawn.
Matplotlib provides some nice examples of how to use subplots.
Your problem is fixed like:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def draw_histograms(df, variables, n_rows, n_cols):
fig=plt.figure()
for i, var_name in enumerate(variables):
ax=fig.add_subplot(n_rows,n_cols,i+1)
df[var_name].hist(bins=10,ax=ax)
ax.set_title(var_name+" Distribution")
fig.tight_layout() # Improves appearance a bit.
plt.show()
test = pd.DataFrame(np.random.randn(30, 9), columns=map(str, range(9)))
draw_histograms(test, test.columns, 3, 3)
Which gives a plot like:
In case you don't really worry about titles, here's a one-liner
df = pd.DataFrame(np.random.randint(10, size=(100, 9)))
df.hist(color='k', alpha=0.5, bins=10)