Seaborn barplot legend labels lose color - python

I have a seaborn boxplot which when I try to use plt.legend("Strings") to change name of labels it loses the colors of the labels. I need to change labels while maintaining the color coding, but I do not know how to do this after searching for an answer.
The Hues legend 1-4 corresponds from 1 = Very interested in politics to 4 = not at all interested. I want to change the legend hue labels from 1-4 to how interested they are in politics.
My code is:
Packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
I didnt know how to create dataframe in any simpler way so i did this
a1 = {'Reads Newspapers': 0, 'Interest in Politics': 1}
a2 = {'Reads Newspapers': 0, 'Interest in Politics': 2}
a3 = {'Reads Newspapers': 0, 'Interest in Politics': 3}
a4 = {'Reads Newspapers': 0, 'Interest in Politics': 4}
b1 = {'Reads Newspapers': 1, 'Interest in Politics': 1}
b2 = {'Reads Newspapers': 1, 'Interest in Politics': 2}
b3 = {'Reads Newspapers': 1, 'Interest in Politics': 3}
b4 = {'Reads Newspapers': 1, 'Interest in Politics': 4}
df1 = pd.DataFrame(data=a1, index=range(1))
df1 = pd.concat([df1]*23)
df2 = pd.DataFrame(data=a2, index=range(1))
df2 = pd.concat([df2]*98)
df3 = pd.DataFrame(data=a3, index=range(1))
df3 = pd.concat([df3]*99)
df4 = pd.DataFrame(data=a4, index=range(1))
df4 = pd.concat([df4]*18)
b1 = pd.DataFrame(data=b1, index=range(1))
b1 = pd.concat([b1]*468)
b2 = pd.DataFrame(data=b2, index=range(1))
b2 = pd.concat([b2]*899)
b3 = pd.DataFrame(data=b3, index=range(1))
b3 = pd.concat([b3]*413)
b4 = pd.DataFrame(data=b4, index=range(1))
b4 = pd.concat([b4]*46)
data = pd.concat([df1,df2,df3,df4,b1,b2,b3,b4])
Actual plotting that produces error
plt.figure(figsize=(10,8))
g = sns.barplot(data=data, x='Reads Newspapers', estimator=len,y='Interest in Politics', hue='Interest in Politics' )
plt.ylabel("Sample Size")
ax = plt.subplot()
ax = ax.set_xticklabels(["No","Yes"])
#plt.legend(["very interested","somewhat interested", "only a little interested", "not at all interested "],)
#plt.savefig('Newspaper policy')
I tried using plt.legend but the legend labels lose their color when I do this so it becomes strings with no color association, making it even worse than before.
I have now editted in the entirety of my script.
https://github.com/HenrikMorpheus/Newspaper-reading-survey/blob/master/politicalinterest.ipynb
It loads with an error for some reason i dont know, but you should be able to open the notebook in jupyter.

Use dedicated dataframe column
An option is to create a new column in the dataframe with the respective labels in, and use this column as input for the hue, such that the desired labels are automatically created.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
df = pd.DataFrame({"reads" : ["Yes"] * 250 + ["No"]*150,
"interest" : [4,2,2,2,2,3,3,1,1,1]*40})
labels=["very interested","somewhat interested",
"only a little interested", "not at all interested"]
# Create new dataframe column with the labels instead of numbers
df["Interested in politics"] = df["interest"].map(dict(zip(range(1,5), labels)))
plt.figure(figsize=(10,8))
# Use newly created dataframe column as hue
ax = sns.barplot(data=df, x='reads', estimator=len,y='interest',
hue='Interested in politics', hue_order=labels)
ax.set_ylabel("Sample Size")
plt.show()
Setting the labels manually.
You may obtain the handles and labels for the legend via ax.get_legend_handles_labels() and use them to create a new legend with the labels from the list.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
df = pd.DataFrame({"reads" : ["Yes"] * 250 + ["No"]*150,
"interest" : [4,2,2,2,2,3,3,1,1,1]*40})
labels=["very interested","somewhat interested",
"only a little interested", "not at all interested"]
plt.figure(figsize=(10,8))
ax = sns.barplot(data=df, x='reads', estimator=len,y='interest', hue='interest' )
ax.set_ylabel("Sample Size")
h, l = ax.get_legend_handles_labels()
ax.legend(h, labels, title="Interested in politics")
plt.show()

Related

Skip bars in Seaborn bar plot for which no data exists [duplicate]

I have a grouped barplot. It's working very well, but I try to remove the empty barplots. They take too much space.
I have already tried :
%matplotlib inline
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import sys
import os
import glob
import seaborn as sns
import pandas as pd
import ggplot
from ggplot import aes
sns.set(style= "whitegrid", palette="pastel", color_codes=True )
tab_folder = 'myData'
out_folder ='myData/plots'
tab = glob.glob('%s/R*.tab'%(tab_folder))
#is reading all my data
for i, tab_file in enumerate(tab):
folder,file_name=os.path.split(tab_file)
s_id=file_name[:-4].replace('DD','')
df=pd.DataFrame.from_csv(tab_file, sep='\t')
df_2 = df.groupby(['name','ab']).size().reset_index(name='count')
df_2 = df_2[df_2['count'] != 0]
table = pd.pivot_table(df_2, index='name',columns='ab', values='count' )
table.plot(kind='barh', width = 0.9, color = ['b', 'g', 'r'], ax = ax)
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
label.set_fontsize(4)
ax.set_title(s_id).update({'color':'black', 'size':5, 'family':'monospace'})
ax.set_xlabel('')
ax.set_ylabel('')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], bbox_to_anchor=(1, 1.05),prop= {'size': 4} )
png_t = '%s/%s.b.png'%(out_folder,s_id)
plt.savefig(png_t, dpi = 500)
But it's not working. The bars are still the same.
Is there any other method to remove empty bars?
Your question is not complete. I don't know what you're trying to accomplish, but from what you've said I'd guess that you are trying not to display empty pivot pairs.
This is not possible by standard means of pandas. Plot of groups need to display all of them even NaNs which will be plot as "empty bars".
Furthermore after groupby every group is at least size of one, so df_2[df_2['count'] != 0] is allways true.
For example
df = pd.DataFrame([['nameA', 'abA'], ['nameB', 'abA'],['nameA','abB'],['nameD', 'abD']], columns=['names', 'ab'])
df_2 = df.groupby(['names', 'ab']).size().reset_index(name='count')
df_2 = df_2[df_2['count'] != 0] # this line has no effect
table = pd.pivot_table(df_2, index='names',columns='ab', values='count' )
table
gives
ab abA abB abD
names
nameA 1.00 1.00 NaN
nameB 1.00 NaN NaN
nameD NaN NaN 1.00
and
table.plot(kind='barh', width = 0.9, color = ['b', 'g', 'r'])
shows
And that's the way it is. Plot need to show all groups after pivot.
EDIT
You can also use stacked plot, to get rid of spaces
table.plot(kind='barh', width = 0.9, color = ['b', 'g', 'r'], stacked=True)

for-Loop to creat LinePlots with seaborn in DataFrame

i am a beginner with coding with python and i have a question:
This code works fantastic to creat a chart for each Column:
The Main DF is:
enter image description here
1- Removing Outliers:
def remove_outliers(df_in, col):
q1 = df_in[col].quantile(0.25)
q3 = df_in[col].quantile(0.75)
iqr = q3-q1
lower_bound = q1-1.5*iqr
upper_bound = q3+1.5*iqr
df_out = df_in.loc[(df_in[col] > lower_bound) & (df_in[col] < upper_bound)]
return df_out
2- Define the Format of the Lineplot
rc={'axes.labelsize': 20, 'font.size': 20, 'legend.fontsize':20,'axes.titlesize':20,'xtick.labelsize': 14,'ytick.labelsize': 14, 'lines.linewidth':1, 'lines.markersize':7, 'xtick.major.pad':10}
sns.set(rc=rc)
3- Creat a Lineplot with seaborn:
df1_DH001= remove_outliers(main_df, 'DH001')[['DH 001','Datum']]
df1_DH001_chart= sns.scatterplot(x='Datum', y='DH 001', data=df1_DH001)
df1_DH001_chart= sns.lineplot(x='Datum', y='DH 001', data=df1_DH001, lw=3, color="b")
df1_DH001_chart.set(xlim=('1995','2019'), ylim=(0, 220) ,title='DH 001', ylabel='Nitrat mg/L', xlabel="Jahr")
df1_DH001_chart.xaxis.set_major_locator(mdates.YearLocator(1))
df1_DH001_chart.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
df1_DH001_chart
So I got this:
enter image description here
Now I would like to creat a for-Loop to creat the same plot and the same x-Axis (Datum) but with another column (There are 22 Columns)
Could some one help me?
Import the following:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
Create asample DF:
data = {'day': ['Mon','Tue','Wed','Thu'],
'col1': [22000,25000,27000,35000],
'col2': [2200,2500,2700,3500],
}
df = pd.DataFrame(data)
Select only numeric columns from your DF or alternatively select the columns that you want to consider in the loop:
df1 = df.select_dtypes([np.int, np.float])
Iterate through the columns and print a line plot with seaborn:
for i, col in enumerate(df1.columns):
plt.figure(i)
sns.lineplot(x='day',y=col, data=df)
Then the following pictures will be shown:

Seaborn swarmplot: Get point coordinates [duplicate]

I have the following data:
import pandas as pd
import numpy as np
# Generate dummy data.
a = np.random.random(75)
b = np.random.random(75) - 0.6
c = np.random.random(75) + 0.75
# Collate into a DataFrame
df = pd.DataFrame({'a': a, 'b': b, 'c': c})
df.columns = [list(['WT', 'MUT', 'WTxMUT']), list(['Parent', 'Parent', 'Offspring'])]
df.columns.names = ['Genotype', 'Status']
df_melt = pd.melt(df)
and I plot it in seaborn using this code:
import seaborn as sb
sb.swarmplot(data = df_melt, x = "Status", y = "value", hue = "Genotype")
How do I get the x-span of each group? What is the range of the horizontal span of the swarmplot for the Parent group, for instance?
You can get the information from the collections which are created by swarmplot.
swarmplot actually returns the matplotlib Axes instance, and from there we can find the PathCollections that it creates. To get the positions, we can use .get_offsets().
Here is your example, modified to find and print the swarm limits, and then use them to plot a box around the swarms.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from matplotlib.patches import Rectangle
# Generate dummy data.
a = np.random.random(75)
b = np.random.random(75) - 0.6
c = np.random.random(75) + 0.75
# Collate into a DataFrame
df = pd.DataFrame({'a': a, 'b': b, 'c': c})
df.columns = [list(['WT', 'MUT', 'WTxMUT']), list(['Parent', 'Parent', 'Offspring'])]
df.columns.names = ['Genotype', 'Status']
df_melt = pd.melt(df)
ax = sb.swarmplot(data = df_melt, x = "Status", y = "value", hue = "Genotype")
def getdatalim(coll):
x,y = np.array(coll.get_offsets()).T
try:
print 'xmin={}, xmax={}, ymin={}, ymax={}'.format(
x.min(), x.max(), y.min(), y.max())
rect = Rectangle((x.min(),y.min()),x.ptp(),y.ptp(),edgecolor='k',facecolor='None',lw=3)
ax.add_patch(rect)
except ValueError:
pass
getdatalim(ax.collections[0]) # "Parent"
getdatalim(ax.collections[1]) # "Offspring"
plt.show()
which prints:
xmin=-0.107313729132, xmax=0.10661092707, ymin=-0.598534246847, ymax=0.980441247759
xmin=0.942829146473, xmax=1.06105941656, ymin=0.761277608688, ymax=1.74729717464
And here's the figure:

Remove empty bars from grouped barplot

I have a grouped barplot. It's working very well, but I try to remove the empty barplots. They take too much space.
I have already tried :
%matplotlib inline
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import sys
import os
import glob
import seaborn as sns
import pandas as pd
import ggplot
from ggplot import aes
sns.set(style= "whitegrid", palette="pastel", color_codes=True )
tab_folder = 'myData'
out_folder ='myData/plots'
tab = glob.glob('%s/R*.tab'%(tab_folder))
#is reading all my data
for i, tab_file in enumerate(tab):
folder,file_name=os.path.split(tab_file)
s_id=file_name[:-4].replace('DD','')
df=pd.DataFrame.from_csv(tab_file, sep='\t')
df_2 = df.groupby(['name','ab']).size().reset_index(name='count')
df_2 = df_2[df_2['count'] != 0]
table = pd.pivot_table(df_2, index='name',columns='ab', values='count' )
table.plot(kind='barh', width = 0.9, color = ['b', 'g', 'r'], ax = ax)
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
label.set_fontsize(4)
ax.set_title(s_id).update({'color':'black', 'size':5, 'family':'monospace'})
ax.set_xlabel('')
ax.set_ylabel('')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], bbox_to_anchor=(1, 1.05),prop= {'size': 4} )
png_t = '%s/%s.b.png'%(out_folder,s_id)
plt.savefig(png_t, dpi = 500)
But it's not working. The bars are still the same.
Is there any other method to remove empty bars?
Your question is not complete. I don't know what you're trying to accomplish, but from what you've said I'd guess that you are trying not to display empty pivot pairs.
This is not possible by standard means of pandas. Plot of groups need to display all of them even NaNs which will be plot as "empty bars".
Furthermore after groupby every group is at least size of one, so df_2[df_2['count'] != 0] is allways true.
For example
df = pd.DataFrame([['nameA', 'abA'], ['nameB', 'abA'],['nameA','abB'],['nameD', 'abD']], columns=['names', 'ab'])
df_2 = df.groupby(['names', 'ab']).size().reset_index(name='count')
df_2 = df_2[df_2['count'] != 0] # this line has no effect
table = pd.pivot_table(df_2, index='names',columns='ab', values='count' )
table
gives
ab abA abB abD
names
nameA 1.00 1.00 NaN
nameB 1.00 NaN NaN
nameD NaN NaN 1.00
and
table.plot(kind='barh', width = 0.9, color = ['b', 'g', 'r'])
shows
And that's the way it is. Plot need to show all groups after pivot.
EDIT
You can also use stacked plot, to get rid of spaces
table.plot(kind='barh', width = 0.9, color = ['b', 'g', 'r'], stacked=True)

Obtaining span of plotted points from seaborn swarmplot

I have the following data:
import pandas as pd
import numpy as np
# Generate dummy data.
a = np.random.random(75)
b = np.random.random(75) - 0.6
c = np.random.random(75) + 0.75
# Collate into a DataFrame
df = pd.DataFrame({'a': a, 'b': b, 'c': c})
df.columns = [list(['WT', 'MUT', 'WTxMUT']), list(['Parent', 'Parent', 'Offspring'])]
df.columns.names = ['Genotype', 'Status']
df_melt = pd.melt(df)
and I plot it in seaborn using this code:
import seaborn as sb
sb.swarmplot(data = df_melt, x = "Status", y = "value", hue = "Genotype")
How do I get the x-span of each group? What is the range of the horizontal span of the swarmplot for the Parent group, for instance?
You can get the information from the collections which are created by swarmplot.
swarmplot actually returns the matplotlib Axes instance, and from there we can find the PathCollections that it creates. To get the positions, we can use .get_offsets().
Here is your example, modified to find and print the swarm limits, and then use them to plot a box around the swarms.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from matplotlib.patches import Rectangle
# Generate dummy data.
a = np.random.random(75)
b = np.random.random(75) - 0.6
c = np.random.random(75) + 0.75
# Collate into a DataFrame
df = pd.DataFrame({'a': a, 'b': b, 'c': c})
df.columns = [list(['WT', 'MUT', 'WTxMUT']), list(['Parent', 'Parent', 'Offspring'])]
df.columns.names = ['Genotype', 'Status']
df_melt = pd.melt(df)
ax = sb.swarmplot(data = df_melt, x = "Status", y = "value", hue = "Genotype")
def getdatalim(coll):
x,y = np.array(coll.get_offsets()).T
try:
print 'xmin={}, xmax={}, ymin={}, ymax={}'.format(
x.min(), x.max(), y.min(), y.max())
rect = Rectangle((x.min(),y.min()),x.ptp(),y.ptp(),edgecolor='k',facecolor='None',lw=3)
ax.add_patch(rect)
except ValueError:
pass
getdatalim(ax.collections[0]) # "Parent"
getdatalim(ax.collections[1]) # "Offspring"
plt.show()
which prints:
xmin=-0.107313729132, xmax=0.10661092707, ymin=-0.598534246847, ymax=0.980441247759
xmin=0.942829146473, xmax=1.06105941656, ymin=0.761277608688, ymax=1.74729717464
And here's the figure:

Categories

Resources