Normalise with countplot() - python

The code below shows a graph with the numbers of values in my list:
import seaborn as sns
sns.countplot([0,1,2,3,1,2,1,3,2,1,2,1,3])
plt.show()
I would like the same plot with percentages instead. Is there an easy option with seaborn or matplotlib?

As shown here a countplot which shows normalized values can be easily achieved using a seaborn barplot.
import matplotlib.pyplot as plt
import seaborn as sns
x = [0,1,2,3,1,2,1,3,2,1,2,1,3]
percentage = lambda i: len(i) / float(len(x)) * 100
ax = sns.barplot(x=x, y=x, estimator=percentage)
ax.set(ylabel="Percent")
plt.show()
Or, using pandas,
import matplotlib.pyplot as plt
import pandas as pd
x = [0,1,2,3,1,2,1,3,2,1,2,1,3]
ax = (pd.Series(x).value_counts(normalize=True, sort=False)*100).plot.bar()
ax.set(ylabel="Percent")
plt.show()

Related

seaborn jointplot with same size plots

I'm doing a jointplot with a basemap, the problem is that when I add the basemap the main plot doesn't have the same size of the marginal plots. I've tried with different parameters without luck. Does anyone have an idea?
import seaborn as sns
import matplotlib.pyplot as plt
import contextily as ctx
import pandas as pd
##exaplme of the data
coords={'longitud':[-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464,
-62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452,
-62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622,
-63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785,
-63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898,
-63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301,
-63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115,
-63.211982035, -63.1689040153],
'latitud':[8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179,
8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804,
8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531,
8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656,
8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527,
8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299,
8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
OSM_C = 'http://c.tile.openstreetmap.org/{z}/{x}/{y}.png'
joint_axes = sns.jointplot(
x='longitud', y='latitud', data=df, ec="r", s=5)
ctx.add_basemap(joint_axes.ax_joint,crs=4326,attribution=False,url=OSM_C)
adjust(hspace=0, wspace=0)
#plt.tight_layout()
plt.show()
Here is an approach that:
removes the axes sharing in the y-direction to be able to change the aspect to 'datalim'
sets the aspect to 'equal', 'datalim'
sets the y data limits of the marginal plot to be the same as the joint plot; this seems to need a redraw
The following code shows the idea (using imshow, as I don't have contextily installed):
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
coords = {'longitud' : [-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464, -62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452, -62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622, -63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785, -63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898, -63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301, -63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115, -63.211982035, -63.1689040153],
'latitud' : [8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179, 8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804, 8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531, 8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656, 8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527, 8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299, 8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
g = sns.jointplot(data=df, x='longitud', y='latitud')
ctx.add_basemap(g.ax_joint,crs=4326,attribution=False,url=OSM_C)
# g.ax_joint.imshow(np.random.rand(20, 10), cmap='spring', interpolation='bicubic',
# extent=[df['longitud'].min(), df['longitud'].max(), df['latitud'].min(), df['latitud'].max()])
for axes in g.ax_joint.get_shared_y_axes():
for ax in axes:
g.ax_joint.get_shared_y_axes().remove(ax)
g.ax_joint.set_aspect('equal', 'datalim')
g.fig.canvas.draw()
g.ax_marg_y.set_ylim(g.ax_joint.get_ylim())
plt.show()
You can still combine this approach with changing the figure's width or height, or adding more whitespace on top or below.

Percentage in axis y histogram Matplotlib

I have the following Plot
I need to add percentage inside the bars, it should be like this:
My code is the following:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
pkmn = pd.read_csv('data/Pokemon.csv')
pkmn.head()
order = pkmn['Generation'].value_counts().index
order
pkmngen = pkmn['Generation'].value_counts()
plt.figure(figsize=(6,4))
sb.countplot(data=pkmn, y='Generation', color = sb.color_palette()[4], order=order, )
plt.xticks(rotation=90)
plt.show()

Not able to plot box plot separately

I have lot of feature in data and i want to make box plot for each feature. So for that
import pandas as pd
import seaborn as sns
plt.figure(figsize=(25,20))
for data in train_df.columns:
plt.subplot(7,4,i+1)
plt.subplots_adjust(hspace = 0.5, wspace = 0.5)
ax =sns.boxplot(train_df[data])
I did this
and the output is
All the plot are on one image i want something like
( not with skew graphs but with box plot )
What changes i need to do ?
In your code, I cannot see where the i is coming from and also it's not clear how ax was assigned.
Maybe try something like this, first an example data frame:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
train_df = pd.concat([pd.Series(np.random.normal(i,1,100)) for i in range(12)],axis=1)
Set up fig and a flattened ax for each subplot:
fig,ax = plt.subplots(4,3,figsize=(10,10))
ax = ax.flatten()
The most basic would be to call sns.boxplot assigning ax inside the function:
for i,data in enumerate(train_df.columns):
sns.boxplot(train_df[data],ax=ax[i])

How to show label names in pandas groupby histogram plot

I can plot multiple histograms in a single plot using pandas but there are few things missing:
How to give the label.
I can only plot one figure, how to change it to layout=(3,1) or something else.
Also, in figure 1, all the bins are filled with solid colors, and its kind of difficult to know which is which, how to fill then with different markers (eg. crosses,slashes,etc)?
Here is the MWE:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7,label='species')
plt.legend()
Output:
To change layout I can use by keyword, but can't give them colors
HOW TO GIVE DIFFERENT COLORS?
df.hist('sepal_length',by='species',layout=(3,1))
plt.tight_layout()
Gives:
You can resolve to groupby:
fig,ax = plt.subplots()
hatches = ('\\', '//', '..') # fill pattern
for (i, d),hatch in zip(df.groupby('species'), hatches):
d['sepal_length'].hist(alpha=0.7, ax=ax, label=i, hatch=hatch)
ax.legend()
Output:
In pandas version 1.1.0 you can simply set the legend keyword to true.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7, legend = True)
output image
It's more code, but using pure matplotlib will always give you more control over the plots. For your second case:
import matplotlib.pyplot as plt
import numpy as np
from itertools import zip_longest
# Dictionary of color for each species
color_d = dict(zip_longest(df.species.unique(),
plt.rcParams['axes.prop_cycle'].by_key()['color']))
# Use the same bins for each
xmin = df.sepal_length.min()
xmax = df.sepal_length.max()
bins = np.linspace(xmin, xmax, 20)
# Set up correct number of subplots, space them out.
fig, ax = plt.subplots(nrows=df.species.nunique(), figsize=(4,8))
plt.subplots_adjust(hspace=0.4)
for i, (lab, gp) in enumerate(df.groupby('species')):
ax[i].hist(gp.sepal_length, ec='k', bins=bins, color=color_d[lab])
ax[i].set_title(lab)
# same xlim for each so we can see differences
ax[i].set_xlim(xmin, xmax)

setting legend only for one of the marginal plots in seaborn

I am creating a JointGrid plot using seaborn.
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
mydataset=pd.DataFrame(data=np.random.rand(50,2),columns=['a','b'])
g = sns.JointGrid(x=mydataset['a'], y=mydataset['b'])
g=g.plot_marginals(sns.distplot,color='black',kde=True,hist=False,rug=True,bins=20,label='X')
g=g.plot_joint(plt.scatter,label='X')
legend_properties = {'weight':'bold','size':8}
legendMain=g.ax_joint.legend(prop=legend_properties,loc='upper right')
legendSide=g.ax_marg_x.legend(prop=legend_properties,loc='upper right')
I get this:
I would like to get rid of the legend within the vertical marginal plot (the one on the right side) but keep the one for the horizontal margin.
how to achieve that?
EDIT: The solution from #ImportanceOfBeingErnest works fine for one plot. However, if I repeat it in a for loops something unexpected happens.
I still get a legend in the upper plot and that is unexpected.
How to get rid of it?
The following code:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
mydataset=pd.DataFrame(data=np.random.rand(50,2),columns=['a','b'])
g = sns.JointGrid(x=mydataset['a'], y=mydataset['b'])
LABEL_LIST=['x','Y','Z']
for n in range(0,3):
g=g.plot_marginals(sns.distplot,color='black',kde=True,hist=False,rug=True,bins=20,label=LABEL_LIST[n])
g=g.plot_joint(plt.scatter,label=LABEL_LIST[n])
legend_properties = {'weight':'bold','size':8}
legendMain=g.ax_joint.legend(prop=legend_properties,loc='upper right')
legendSide=g.ax_marg_y.legend(labels=[LABEL_LIST[n]],prop=legend_properties,loc='upper right')
gives:
which is almost perfect, byt I need to get rid of the last legend entry in the plo on the right.
You may decide not to give any label to the marginals, but instead add the label when creating the legend inside the top marginal axes.
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
mydataset=pd.DataFrame(data=np.random.rand(50,2),columns=['a','b'])
g = sns.JointGrid(x=mydataset['a'], y=mydataset['b'])
g=g.plot_marginals(sns.distplot,color='black',
kde=True,hist=False,rug=True,bins=20)
g=g.plot_joint(plt.scatter,label='X')
legend_properties = {'weight':'bold','size':8}
legendMain=g.ax_joint.legend(prop=legend_properties,loc='upper right')
legendSide=g.ax_marg_x.legend(labels=["x"],
prop=legend_properties,loc='upper right')
plt.show()
The solution is the same for a plot in a loop.
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
mydataset=pd.DataFrame(data=np.random.rand(50,2),columns=['a','b'])
g = sns.JointGrid(x=mydataset['a'], y=mydataset['b'])
LABEL_LIST=['x','Y','Z']
for n in range(0,3):
g=g.plot_marginals(sns.distplot,color='black',kde=True,hist=False,rug=True,bins=20)
g=g.plot_joint(plt.scatter,label=LABEL_LIST[n])
legend_properties = {'weight':'bold','size':8}
legendMain=g.ax_joint.legend(prop=legend_properties,loc='upper right')
legendSide=g.ax_marg_x.legend(labels=LABEL_LIST,prop=legend_properties,loc='upper right')
plt.show()

Categories

Resources