Different colors in matpotlib bar plot - python

I calculated NaN value percentage of a dataframe and then plotted it. I want each variable to have a unique color. The code I used works well but every 9th variable color is same as 1st variable color, and the cycle repeats. See the pic:
The code:
per = df.isna().mean().round(4) * 100
f, ax = plt.subplots(figsize=(25, 12), dpi = 200)
i = 0
for key, value in zip(per.keys(), per.values):
if (value > 0):
ax.bar(key, value, label=key)
ax.text(i, value + 0.5, str(np.round(value, 2)), ha='center')
i = i + 1
ax.set_xticklabels([])
ax.set_xticks([])
plt.title('NaN Value percentage in the dataset')
plt.ylim(0,115)
plt.ylabel('Percentage')
plt.xlabel('Columns')
plt.legend(loc='upper left')
plt.show()
I tried the following line of code, but it picked only first color:
my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'c', 'm',
'tan', 'grey', 'pink', 'chocolate', 'gold']), None, len(df)))
f, ax = plt.subplots(figsize=(25, 12), dpi = 200)
i = 0
for key, value in zip(per.keys(), per.values):
if (value > 0):
ax.bar(key, value, label=key, color = my_colors)
ax.text(i, value + 0.5, str(np.round(value, 2)), ha='center')
i = i + 1
ax.set_xticklabels([])
ax.set_xticks([])
plt.title('NaN Value percentage in the dataset')
plt.ylim(0,115)
plt.ylabel('Percentage')
plt.xlabel('Columns')
plt.legend(loc='upper left')
plt.show()
The result:
Any help is appreciated.
See the data here.

I think there are two problems with your second code:
my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'c', 'm',
'tan', 'grey', 'pink', 'chocolate', 'gold']), None, len(df)))
Here len(df) gets you the number of rows, but you actually want a list that is equal to the number of per.keys(). So: len(per.keys()). Next, you need to use your variable i to iterate over your list of colors.
ax.bar(key, value, label=key, color = my_colors)
Here, I think you need to use my_colors[i].
Incidentally, using matplotlib.cm.get_cmap on matplotlib's Colormaps is great to get you a list of unique colors from a palette quickly. Try something like this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import random
import string
# build df with some random NaNs
data = np.random.uniform(low=0, high=10, size=(5,20))
mask = np.random.choice([1, 0], data.shape, p=[.4, .6]).astype(bool)
data[mask] = np.nan
df = pd.DataFrame(data, columns=list(string.ascii_lowercase)[:20])
per = df.isna().mean().round(4) * 100
length = len(per.keys())
cmap = cm.get_cmap('plasma', length)
lst = [*range(length)]
random.shuffle(lst)
f, ax = plt.subplots(figsize=(25, 12), dpi = 200)
i = 0
for key, value in zip(per.keys(), per.values):
if (value > 0):
ax.bar(key, value, label=key, color = cmap(lst[i])[:3])
ax.text(i, value + 0.5, str(np.round(value, 2)), ha='center')
i = i + 1
ax.set_xticklabels([])
ax.set_xticks([])
plt.title('NaN Value percentage in the dataset')
plt.ylim(0,115)
plt.ylabel('Percentage')
plt.xlabel('Columns')
plt.legend(loc='upper left')
plt.show()
Output:
Or non-random (comment out random.shuffle(lst)):

Related

How to modify xtick label of plt in Matplotlib

The objective is to modify the xticklabel upon plotting pcolormesh and scatter.
However, I am having difficulties accessing the existing xtick labels.
Simply
ax = plt.axes()
labels_x = [item.get_text() for item in ax.get_xticklabels()]
which produced:
['', '', '', '', '', '']
or
fig.canvas.draw()
xticks = ax.get_xticklabels()
which produced:
['', '', '', '', '', '']
does not return the corresponding label.
May I know how to properly access axis tick labels for a plt cases.
For readability, I split the code into two section.
The first section to generate the data used for plotting
Second section deal the plotting
Section 1: Generate data used for plotting
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
np.random.seed(0)
increment=120
max_val=172800
aran=np.arange(0,max_val,increment).astype(int)
arr=np.concatenate((aran.reshape(-1,1), np.random.random((aran.shape[0],4))), axis=1)
df=pd.DataFrame(arr,columns=[('lapse',''),('a','i'),('a','j'),('b','k'),('c','')])
ridx=df.index[df[('lapse','')] == 3600].tolist()[0]+1 # minus 1 so to allow 3600 start at new row
df[('event','')]=0
df.loc[[1,2,3,10,20,30],[('event','')]]=1
arr=df[[('a','i'),('event','')]].to_numpy()
col_len=ridx
v=arr[:,0].view()
nrow_size=math.ceil(v.shape[0]/col_len)
X=np.pad(arr[:,0].astype(float), (0, nrow_size*col_len - arr[:,0].size),
mode='constant', constant_values=np.nan).reshape(nrow_size,col_len)
mask_append_val=0 # This value must equal to 1 for masking
arrshape=np.pad(arr[:,1].astype(float), (0, nrow_size*col_len - arr[:,1].size),
mode='constant', constant_values=mask_append_val).reshape(nrow_size,col_len)
Section 2 Plotting
fig = plt.figure(figsize=(8,6))
plt.pcolormesh(X,cmap="plasma")
x,y = X.shape
xs,ys = np.ogrid[:x,:y]
# the non-zero coordinates
u = np.argwhere(arrshape)
plt.scatter(ys[:,u[:,1]].ravel()+.5,xs[u[:,0]].ravel()+0.5,marker='*', color='r', s=55)
plt.gca().invert_yaxis()
xlabels_to_use_this=df.loc[:30,[('lapse','')]].values.tolist()
# ax = plt.axes()
# labels_x = [item.get_text() for item in ax.get_xticklabels()]
# labels_y = [item.get_text() for item in ax.get_yticklabels()]
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title("Plot 2D array")
plt.colorbar()
plt.tight_layout()
plt.show()
Expected output
This is how the plot could be generated using matplotlib's pcolormesh and scatter:
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import pandas as pd
import numpy as np
np.random.seed(0)
increment = 120
max_val = 172800
aran = np.arange(0, max_val, increment).astype(int)
arr_df = np.concatenate((aran.reshape(-1, 1), np.random.random((aran.shape[0], 4))), axis=1)
df = pd.DataFrame(arr_df, columns=[('lapse', ''), ('a', 'i'), ('a', 'j'), ('b', 'k'), ('c', '')])
df[('event', '')] = 0
df.loc[[1, 2, 3, 10, 20, 30], [('event', '')]] = 1
col_len_lapse = 3600
col_len = df[df[('lapse', '')] == col_len_lapse].index[0]
nrow_size = int(np.ceil(v.shape[0] / col_len))
a_i_values = df[('a', 'i')].values
a_i_values_meshed = np.pad(a_i_values.astype(float), (0, nrow_size * col_len - len(a_i_values)),
mode='constant', constant_values=np.nan).reshape(nrow_size, col_len)
fig, ax = plt.subplots(figsize=(8, 6))
# the x_values indicate the mesh borders, subtract one half so the ticks can be at the centers
x_values = df[('lapse', '')][:col_len + 1].values - increment / 2
# divide lapses for y by col_len_lapse to get hours
y_values = df[('lapse', '')][::col_len].values / col_len_lapse - 0.5
y_values = np.append(y_values, 2 * y_values[-1] - y_values[-2]) # add the bottommost border (linear extension)
mesh = ax.pcolormesh(x_values, y_values, a_i_values_meshed, cmap="plasma")
event_lapses = df[('lapse', '')][df[('event', '')] == 1]
ax.scatter(event_lapses % col_len_lapse,
np.floor(event_lapses / col_len_lapse),
marker='*', color='red', edgecolor='white', s=55)
ax.xaxis.set_major_locator(MultipleLocator(increment * 5))
ax.yaxis.set_major_locator(MultipleLocator(5))
ax.invert_yaxis()
ax.set_xlabel('X-axis (s)')
ax.set_ylabel('Y-axis (hours)')
ax.set_title("Plot 2D array")
plt.colorbar(mesh)
plt.tight_layout() # fit the labels nicely into the plot
plt.show()
With Seaborn things can be simplified, adding new columns for hours and seconds, and using pandas' pivot (which automatically fills unavailable data with NaNs). Adding xtick_labels=5 sets the labels every 5 positions. (The star for lapse=3600 is at 1 hour, 0 seconds).
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# df created as before
df['hours'] = (df[('lapse', '')].astype(int) // 3600)
df['seconds'] = (df[('lapse', '')].astype(int) % 3600)
df_heatmap = df.pivot(index='hours', columns='seconds', values=('a', 'i'))
df_heatmap_markers = df.pivot(index='hours', columns='seconds', values=('event', '')).replace(
{0: '', 1: '★', np.nan: ''})
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(df_heatmap, xticklabels=5, yticklabels=5,
annot=df_heatmap_markers, fmt='s', annot_kws={'color': 'lime'}, ax=ax)
ax.tick_params(rotation=0)
plt.tight_layout()
plt.show()
Instead of a 'seconds' column, a 'minutes' column also might be interesting.
Here is an attempt to add time information as suggested in the comments:
from matplotlib import patheffects # to add some outline effect
# df prepared as the other seaborn example
fig, ax = plt.subplots(figsize=(8, 6))
path_effect = patheffects.withStroke(linewidth=2, foreground='yellow')
sns.heatmap(df_heatmap, xticklabels=5, yticklabels=5,
annot=df_heatmap_markers, fmt='s',
annot_kws={'color': 'red', 'path_effects': [path_effect]},
cbar=True, cbar_kws={'pad': 0.16}, ax=ax)
ax.tick_params(rotation=0)
ax2 = ax.twinx()
ax2.set_ylim(ax.get_ylim())
yticks = ax.get_yticks()
ax2.set_yticks(yticks)
ax2.set_yticklabels([str(pd.to_datetime('2019-01-15 7:00:00') + pd.to_timedelta(h, unit='h')).replace(' ', '\n')
for h in yticks])
I end up using Seaborn to address this issue.
Specifically, the following lines able to easily tweak the xticklabel
fig.canvas.draw()
new_ticks = [i.get_text() for i in g.get_xticklabels()]
i=[int(idx) for idx in new_ticks]
newlabel=xlabels_to_use_this[i]
newlabel=[np.array2string(x, precision=0) for x in newlabel]
The full code for plotting is as below
import seaborn as sns
fig, ax = plt.subplots()
sns.heatmap(X,ax=ax)
x,y = X.shape
xs,ys = np.ogrid[:x,:y]
# the non-zero coordinates
u = np.argwhere(arrshape)
g=sns.scatterplot(ys[:,u[:,1]].ravel()+.5,xs[u[:,0]].ravel()+0.5,marker='*', color='r', s=55)
fig.canvas.draw()
new_ticks = [i.get_text() for i in g.get_xticklabels()]
i=[int(idx) for idx in new_ticks]
newlabel=xlabels_to_use_this[i]
newlabel=[np.array2string(x, precision=0) for x in newlabel]
ax.set_xticklabels(newlabel)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
for ind, label in enumerate(g.get_xticklabels()):
if ind % 2 == 0: # every 10th label is kept
label.set_visible(True)
else:
label.set_visible(False)
for ind, label in enumerate(g.get_yticklabels()):
if ind % 4 == 0: # every 10th label is kept
label.set_visible(True)
else:
label.set_visible(False)
plt.xlabel('Elapsed (s)')
plt.ylabel('Hour (h)')
plt.title("Rastar Plot")
plt.tight_layout()
plt.show()

How to change the default group color for grouped bars

Code which I found here is helpful, but getting error while adding color information to avoid auto colors of bars in the grouped sub-bar. i.e getting repeated colors, if I add specific "color" inside the plt.bar(....,color). color=['black', 'red', 'green', 'blue', 'cyan','brown','grey','goldenrod','lime','violet','indigo','coral','olive'].
Where to add color details so that default colors of the grouped sub-bars and their legend will be unique as needed.
def grouped_barplot(df, cat,subcat, val , err):
u = df[cat].unique()
x = np.arange(len(u))
subx = df[subcat].unique()
offsets = (np.arange(len(subx))-np.arange(len(subx)).mean())/(len(subx)+1.)
width= np.diff(offsets).mean()
for i,gr in enumerate(subx):
dfg = df[df[subcat] == gr]
plt.bar(x+offsets[i], dfg[val].values, width=width,
label="{} {}".format(subcat, gr), yerr=dfg[err].values, capsize=5)
plt.xlabel("Test", fontsize=14)
plt.ylabel("value ",fontsize=14)
plt.xticks(x, u, fontsize=14)
plt.yticks((0,10,20,30,40,50,60,70,80,90,100), fontsize=14)
plt.legend(title = "ML",loc="upper center", bbox_to_anchor=(.5, 1.25), ncol=6, fontsize=12)
plt.show()
plt.title('comparision')
The code from the cited answer requires 20 lines of code to do what can be done with 3 lines of code, as explained in this answer.
Tested in python 3.8.12, pandas 1.3.4, matplotlib 3.4.3
Imports and DataFrame
import pandas as pd
import matplotlib as mpl
from matplotlib.patches import Patch
import matplotlib.pyplot as plt
import numpy as np
data = {'Candidate': ['X', 'Y', 'Z', 'X', 'Y', 'Z', 'X', 'Y', 'Z'],
'Sample_Set': [1, 1, 1, 2, 2, 2, 3, 3, 3],
'Values': [20, 10, 10, 200, 101, 99, 1999, 998, 1003],
'Error': [5, 2, 3, 30, 30, 30, 10, 10, 10]}
colors = ['black', 'red', 'green', 'blue', 'cyan','brown','grey','goldenrod','lime','violet','indigo','coral','olive']
df = pd.DataFrame(data)
df = df.sort_values(['Candidate', 'Sample_Set'])
# display(df)
Candidate Sample_Set Values Error
0 X 1 20 5
3 X 2 200 30
6 X 3 1999 10
1 Y 1 10 2
4 Y 2 101 30
7 Y 3 998 10
2 Z 1 10 3
5 Z 2 99 30
8 Z 3 1003 10
# reshape the dataframe into a wide format for Values
vals = df.pivot(index='Candidate', columns='Sample_Set', values='Values')
# display(vals)
Sample_Set 1 2 3
Candidate
X 20 200 1999
Y 10 101 998
Z 10 99 1003
# reshape the dataframe into a wide format for Errors
yerr = df.pivot(index='Candidate', columns='Sample_Set', values='Error')
# display(yerr)
Sample_Set 1 2 3
Candidate
X 5 30 10
Y 2 30 10
Z 3 30 10
Updated Answer
Based on a comment from the OP, it seems the goal is to replace the default colors, but colors across groups should be the same.
With the code from the new answer, this is easily done by passing the colors list to the color parameter.
# plot vals with yerr
ax = vals.plot(kind='bar', yerr=yerr, logy=True, rot=0, figsize=(6, 5), ylabel='Value', title='Comparison', color=colors)
ax.legend(title='Sample Set', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()
Using the answer cited in the OP
for i, (gr, color) in enumerate(zip(subx, colors)): allows for assigning a color from the list to each group.
def grouped_barplot(df, cat,subcat, val , err):
u = df[cat].unique()
x = np.arange(len(u))
subx = df[subcat].unique()
offsets = (np.arange(len(subx))-np.arange(len(subx)).mean())/(len(subx)+1.)
width= np.diff(offsets).mean()
colors = ['black', 'red', 'green', 'blue', 'cyan','brown','grey','goldenrod','lime','violet','indigo','coral','olive']
# add the colors to the loop
for i, (gr, color) in enumerate(zip(subx, colors)):
dfg = df[df[subcat].eq(gr)]
plt.bar(x+offsets[i], dfg[val], width=width, yerr=dfg[err], color=color, label=gr)
plt.legend(title='Sample_Set', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.yscale('log')
plt.xlabel(cat)
plt.ylabel(val)
plt.xticks(x, u)
plt.show()
cat = "Candidate"
subcat = "Sample_Set"
val = "Values"
err = "Error"
grouped_barplot(df, cat, subcat, val, err )
Original Answer
The request can be implemented, but should not be:
This is not a good visualization practice. The bars within a given group should all have the same color. The point of a visualization is to convey information. All this will do is make the plot confusing to read, thereby defeating the purpose.
Because the the bars are plotted in groups: 'X', 'Y', 'Z' of 'Sample_Set 1', 'X', 'Y', 'Z' of 'Sample_Set 2' and 'X', 'Y', 'Z' of 'Sample_Set 3', only 3 labels will be created in the legend, which means a custom patch legend with appropriate handles and labels will need to be created.
Plot and Customize
The order of rects is not the same as df, so df is sorted differently in order to zip the correct color to the correct rect
# add a colors column to the dataframe
df['color'] = colors[:len(df)]
# plot vals with yerr
ax = vals.plot(kind='bar', yerr=yerr, logy=True, rot=0, figsize=(6, 5), legend=False, ylabel='Value', title='Comparison')
# extract the Rectangle bar objects
rects = [c for c in ax.get_children() if isinstance(c, mpl.patches.Rectangle)]
# change the face color of the bar
for rect, color in zip(rects, df.sort_values(['Sample_Set', 'Candidate'])['color']):
rect.set_fc(color)
# create a custom handle for the legend
handles = list()
for idx, v in df.iterrows():
patch = Patch(color=v.color, label=f'{v.Candidate} {v.Sample_Set}')
handles.append(patch)
# add the legend
ax.legend(title='Candidate Sample_Set', handles=handles, bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()
Using the Cited Answer
Use df from above.
def grouped_barplot(df, cat,subcat, val , err):
u = df[cat].unique()
x = np.arange(len(u))
subx = df[subcat].unique()
offsets = (np.arange(len(subx))-np.arange(len(subx)).mean())/(len(subx)+1.)
width= np.diff(offsets).mean()
# group the colors: range(3) because there are 3 groups
colors = [df.color[n::3] for n in range(3)]
for i, (gr, color) in enumerate(zip(subx, colors)):
dfg = df[df[subcat].eq(gr)]
plt.bar(x+offsets[i], dfg[val], width=width, yerr=dfg[err], color=color)
handles = list()
for idx, v in df.iterrows():
patch = Patch(color=v.color, label=f'{v.Candidate} {v.Sample_Set}')
handles.append(patch)
plt.legend(title='Candidate Sample_Set', handles=handles, bbox_to_anchor=(1, 1.02), loc='upper left')
plt.yscale('log')
plt.xlabel(cat)
plt.ylabel(val)
plt.xticks(x, u)
plt.show()
cat = "Candidate"
subcat = "Sample_Set"
val = "Values"
err = "Error"
# add a colors column to the dataframe
df['color'] = colors[:len(df)]
grouped_barplot(df, cat, subcat, val, err )

How to resize the x-axis and make it different from the y-axis Matplotlib

I have the following development that I am working on with ElementTree, Pandas and Matplotlib modules in Python:
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
fig = plt.figure(figsize=(18, 20))
plt.suptitle(f'File PXML: {rootXML}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
fig.tight_layout()
i = 1
for name in names_list:
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
x = [n for n in range(len(datax))]
print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
ax = plt.subplot(num_names_list, 1, i)
ax.plot(x, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax.set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax.grid(alpha=0.4)
i += 1
plt.show()
I am getting the following error:
I have been looking for the error and I totally understand that the dimensions of x and y must be equal, but there is the possibility of making a graph where the x-axis is greater than the y-axis? and also the x-axis comes from a variable not related to the y-axis? how would this be?
The x-axis is the count of all the values it has in the Signal element of the xml file: I put it here because of how extensive it is and this value is larger than the y-axis, but how to contemplate the 3 values that I bring from the xml that are Singal Name, Signal Value as y-axis and Count of Signals as x-axis. I really appreciate your comments and help.
IIUC, you are trying to plot several stepped values agains their order of appearance (X-index) in XML file. Then you should plot against original dataframe's X values. I haven't changed your code much for style or such, just fixed a little.
import xml.etree.ElementTree as ET
import pandas as pd
from matplotlib import pyplot as plt
import random
file_xml = 'example_un_child4.xml'
def transfor_data_atri(rootXML):
file_xml = ET.parse(rootXML)
data_XML = [
{"Name": signal.attrib["Name"],
"Value": int(signal.attrib["Value"].split(' ')[0])
} for signal in file_xml.findall(".//Signal")
]
signals_df = pd.DataFrame(data_XML)
extract_name_value(signals_df)
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
#fig = plt.figure(figsize=(18, 20), sharex=True)
fig, ax = plt.subplots(nrows=num_names_list, figsize=(10, 15), sharex=True)
plt.suptitle(f'File PXML: {file_xml}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
#fig.tight_layout()
i = 1
for pos, name in enumerate(names_list):
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
#x = [n for n in range(len(datax))]
#print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
#ax[pos] = plt.subplot(num_names_list, 1, i)
ax[pos].plot(data.index, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax[pos].set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax[pos].grid(alpha=0.4)
i += 1
fig.tight_layout()
plt.show()
transfor_data_atri(file_xml)

Adding Percentages to sns.countplot - how do I show percentages for two values within the categories?

Hi I'm trying to add percentages to my countplot with 5 categories and 2 values (old and younger). I've tried adding the def and loop from
How to add percentages on top of bars in seaborn?
My code:
plt.figure(figsize =(7,5))
ax = sb.countplot(data = df_x_1, x = 'concern_virus', hue = 'age')
plt.xticks(size =12)
plt.xlabel('Level of Concern', size = 14)
plt.yticks(size = 12)
plt.ylabel('Number of People', size = 12)
plt.title("Older and Younger People's Concern over the Virus", size = 16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right");
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_height()/total)
x = p.get_x() + p.get_width()
y = p.get_height()
ax.annotate(percentage, (x, y),ha='center')
plt.show()
As you can see, the percentages don't make sense.
The problem seems to be with the variable that is undefined in the above code: total. total should be the number you want to call 100%, for example the total number of rows in the dataframe. That way all the displayed percentages sum up to 100.
Here is some sample code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
N = 250
df_x_1 = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], N),
'age': np.random.choice(['younger', 'older'], N)})
plt.figure(figsize=(7, 5))
ax = sns.countplot(data=df_x_1, x='concern_virus', order=['a', 'b', 'c', 'd', 'e'],
hue='age', hue_order=['younger', 'older'],
palette=['chartreuse', 'darkviolet'])
plt.xticks(size=12)
plt.xlabel('Level of Concern', size=14)
plt.yticks(size=12)
plt.ylabel('Number of People', size=12)
plt.title("Older and Younger People's Concern over the Virus", size=16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
total = len(df_x_1)
for p in ax.patches:
percentage = f'{100 * p.get_height() / total:.1f}%\n'
x = p.get_x() + p.get_width() / 2
y = p.get_height()
ax.annotate(percentage, (x, y), ha='center', va='center')
plt.tight_layout()
plt.show()
To have the text in the center of the bar, it helps to choose ha='center' and add half the width to the x-position. Appending a newline to the text can help to position the text nicely on top of the bar. plt.tight_layout() can help to fit all the labels into the plot.
Seaborn lets you fix the order of the x-axis via order=.... The order of the legend elements and the corresponding colors can be set via hue_order=... and palette=....
PS: For the new question, with totals per age group, instead of directly looping through all the bars, a first loop can visit the groups:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
label_younger = 'younger'
label_older = 'older'
df_younger = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], 230)})
df_older = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], 120)})
df_younger['age'] = label_younger
df_older['age'] = label_older
df_x_1 = pd.concat([df_younger, df_older], ignore_index=True)
plt.figure(figsize=(7, 5))
ax = sns.countplot(data=df_x_1, x='concern_virus', order=['a', 'b', 'c', 'd', 'e'],
hue='age', hue_order=[label_younger, label_older],
palette=['orangered', 'skyblue'])
plt.xticks(size=12)
plt.xlabel('Level of Concern', size=14)
plt.yticks(size=12)
plt.ylabel('Number of People', size=12)
plt.title("Older and Younger People's Concern over the Virus", size=16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
for bars in ax.containers:
if bars.get_label() == label_younger:
group_total = len(df_younger)
else:
group_total = len(df_older)
for p in bars.patches:
# print(p.get_facecolor(), p.get_label())
percentage = f'{100 * p.get_height() / group_total:.1f}%\n'
x = p.get_x() + p.get_width() / 2
y = p.get_height()
ax.annotate(percentage, (x, y), ha='center', va='center')
plt.tight_layout()
plt.show()

Matplotlib: Gridspec not displaying bar subplot

I have a 4x3 grid. I have 1 broken horizontal bar plot in the first row followed by 9 scatter plots. The height of the bar plot needs to be 2x height of the scatter plots. I am using gridspec to achieve this. However, it doesn't plot the bar plot completely. See picture below:
The complete bar plot looks like this
I am not sure why is this happening. Any suggestions?
Here's my code:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import gridspec
#####Importing Data from csv file#####
dataset1 = np.genfromtxt('dataSet1.csv', dtype = float, delimiter = ',', skip_header = 1, names = ['a', 'b', 'c', 'x0'])
dataset2 = np.genfromtxt('dataSet2.csv', dtype = float, delimiter = ',', skip_header = 1, names = ['a', 'b', 'c', 'x0'])
dataset3 = np.genfromtxt('dataSet3.csv', dtype = float, delimiter = ',', skip_header = 1, names = ['a', 'b', 'c', 'x0'])
corr1 = np.corrcoef(dataset1['a'],dataset1['x0'])
corr2 = np.corrcoef(dataset1['b'],dataset1['x0'])
corr3 = np.corrcoef(dataset1['c'],dataset1['x0'])
corr4 = np.corrcoef(dataset2['a'],dataset2['x0'])
corr5 = np.corrcoef(dataset2['b'],dataset2['x0'])
corr6 = np.corrcoef(dataset2['c'],dataset2['x0'])
corr7 = np.corrcoef(dataset3['a'],dataset3['x0'])
corr8 = np.corrcoef(dataset3['b'],dataset3['x0'])
corr9 = np.corrcoef(dataset3['c'],dataset3['x0'])
fig = plt.figure(figsize = (8,8))
gs = gridspec.GridSpec(4, 3, height_ratios=[2,1,1,1])
def tornado1():
np.set_printoptions(precision=4)
variables = ['a1','b1','c1','a2','b2','c2','a3','b3','c3']
base = 0
values = np.array([corr1[0,1],corr2[0,1],corr3[0,1],
corr4[0,1],corr5[0,1],corr6[0,1],
corr7[0,1],corr8[0,1],corr9[0,1]])
variables=zip(*sorted(zip(variables, values),reverse = True, key=lambda x: abs(x[1])))[0]
values = sorted(values,key=abs, reverse=True)
# The y position for each variable
ys = range(len(values))[::-1] # top to bottom
# Plot the bars, one by one
for y, value in zip(ys, values):
high_width = base + value
# Each bar is a "broken" horizontal bar chart
ax1= plt.subplot(gs[1]).broken_barh(
[(base, high_width)],
(y - 0.4, 0.8),
facecolors=['red', 'red'], # Try different colors if you like
edgecolors=['black', 'black'],
linewidth=1,
)
# Draw a vertical line down the middle
plt.axvline(base, color='black')
# Position the x-axis on the top/bottom, hide all the other spines (=axis lines)
axes = plt.gca() # (gca = get current axes)
axes.spines['left'].set_visible(False)
axes.spines['right'].set_visible(False)
axes.spines['top'].set_visible(False)
axes.xaxis.set_ticks_position('bottom')
# Make the y-axis display the variables
plt.yticks(ys, variables)
plt.ylim(-2, len(variables))
plt.draw()
return
def correlation1():
corr1 = np.corrcoef(dataset1['a'],dataset1['x0'])
print corr1[0,1]
corr2 = np.corrcoef(dataset1['b'],dataset1['x0'])
print corr2[0,1]
corr3 = np.corrcoef(dataset1['c'],dataset1['x0'])
print corr3[0,1]
ax2=plt.subplot(gs[3])
ax2.scatter(dataset1['a'],dataset1['x0'],marker = '.')
ax2.set_xlabel('a1')
ax2.set_ylabel('x01')
ax3=plt.subplot(gs[4])
ax3.scatter(dataset1['b'],dataset1['x0'],marker = '.')
ax3.set_xlabel('b1')
#ax3.set_ylabel('x01')
ax4=plt.subplot(gs[5])
ax4.scatter(dataset1['c'],dataset1['x0'],marker = '.')
ax4.set_xlabel('c1')
#ax4.set_ylabel('x01')
ax5=fig.add_subplot(gs[6])
ax5.scatter(dataset2['a'],dataset2['x0'],marker = '.')
ax5.set_xlabel('a2')
ax5.set_ylabel('x02')
ax6=fig.add_subplot(gs[7])
ax6.scatter(dataset2['b'],dataset2['x0'],marker = '.')
ax6.set_xlabel('b2')
#ax6.set_ylabel('x02')
ax7=fig.add_subplot(gs[8])
ax7.scatter(dataset2['c'],dataset2['x0'],marker = '.')
ax7.set_xlabel('c2')
#ax7.set_ylabel('x02')
ax8=plt.subplot(gs[9])
ax8.scatter(dataset3['a'],dataset3['x0'],marker = '.')
ax8.set_xlabel('a3')
ax8.set_ylabel('x03')
ax9=plt.subplot(gs[10])
ax9.scatter(dataset3['b'],dataset3['x0'],marker = '.')
ax9.set_xlabel('b3')
#ax9.set_ylabel('x03')
ax10=plt.subplot(gs[11])
ax10.scatter(dataset3['c'],dataset3['x0'],marker = '.')
ax10.set_xlabel('c3')
#ax10.set_ylabel('x03')
plt.show()
return
tornado1()
correlation1()
plt.tight_layout()
plt.show()
Any help would be highly appreciated :-)
In the block of code:
# Plot the bars, one by one
for y, value in zip(ys, values):
high_width = base + value
# Each bar is a "broken" horizontal bar chart
ax1= plt.subplot(gs[1]).broken_barh(
[(base, high_width)],
(y - 0.4, 0.8),
facecolors=['red', 'red'], # Try different colors if you like
edgecolors=['black', 'black'],
linewidth=1,
)
You're reinitializing gs[1] on each loop so in the end, your plot only contains the last bar. You should try something like this instead:
# Plot the bars, one by one
ax1 = plt.subplot(gs[1])
for y, value in zip(ys, values):
high_width = base + value
# Each bar is a "broken" horizontal bar chart
ax1.broken_barh(
[(base, high_width)],
(y - 0.4, 0.8),
facecolors=['red', 'red'], # Try different colors if you like
edgecolors=['black', 'black'],
linewidth=1,
)
Hope that helps.

Categories

Resources