I am plotting a horizontal bar chart with percentages on, but would like the values > 0 to be colored in green, with the negatives in red.
plt.figure(figsize=(10,6))
clrs = ['b' if (x > 10) else 'gray' for x in data2['BASE VOLUME %']]
ax = sns.barplot(x,y, data=data2, palette=clrs)
ax.set_xlabel('Base Volume',fontsize=15)
ax.set_ylabel('Color Group',fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
for i, v in enumerate(data2['BASE VOLUME %']):
ax.text(0 + 0.01*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='black', fontweight='bold', fontsize=14)
for i, v in enumerate(diff['% Change from last month']):
ax.text(1.06*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='red', fontweight='bold',
See graph below. The values 21.5%, 10.0% etc need to be in green.
When I try the line colors = ['r' if (y < 0) else 'g' for y in diff['% Change from last month'].values], and change color='red' to color='colors'
I get the error:
ValueError: Invalid RGBA argument: ('g', 'r', 'g', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r')
In the line,
for i, v in enumerate(diff['% Change from last month']):
ax.text(1.06*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='red', fontweight='bold', fontsize=14)
you could try an if-else statement like
for i, v in enumerate(diff['% Change from last month']):
if v > 0:
ax.text(1.06*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='g', fontweight='bold', fontsize=14)
else:
ax.text(1.06*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='r', fontweight='bold',
Related
results = df.groupby('depart')['Survey'].mean().to_frame(name = 'Mean').reset_index()
results.plot(x = 'Unit', y = 'Mean', marker = 'o', figsize=(8,5))
plt.grid(True)
plt.ylim(3.60, 5.00)
plt.show()
how do I add the value labels on top of the markers?
thanks!
One way is using annotate
results.plot(x = 'Unit', y = 'Mean', marker = 'o', figsize=(8,5))
plt.grid(True)
plt.ylim(3.60, 5.00)
ax = plt.gca()
for i, val in enumerate(results['Mean']):
label = results.loc[i, 'Mean']
ax.annotate(label, (i, val), ha='center')
plt.show()
I calculated NaN value percentage of a dataframe and then plotted it. I want each variable to have a unique color. The code I used works well but every 9th variable color is same as 1st variable color, and the cycle repeats. See the pic:
The code:
per = df.isna().mean().round(4) * 100
f, ax = plt.subplots(figsize=(25, 12), dpi = 200)
i = 0
for key, value in zip(per.keys(), per.values):
if (value > 0):
ax.bar(key, value, label=key)
ax.text(i, value + 0.5, str(np.round(value, 2)), ha='center')
i = i + 1
ax.set_xticklabels([])
ax.set_xticks([])
plt.title('NaN Value percentage in the dataset')
plt.ylim(0,115)
plt.ylabel('Percentage')
plt.xlabel('Columns')
plt.legend(loc='upper left')
plt.show()
I tried the following line of code, but it picked only first color:
my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'c', 'm',
'tan', 'grey', 'pink', 'chocolate', 'gold']), None, len(df)))
f, ax = plt.subplots(figsize=(25, 12), dpi = 200)
i = 0
for key, value in zip(per.keys(), per.values):
if (value > 0):
ax.bar(key, value, label=key, color = my_colors)
ax.text(i, value + 0.5, str(np.round(value, 2)), ha='center')
i = i + 1
ax.set_xticklabels([])
ax.set_xticks([])
plt.title('NaN Value percentage in the dataset')
plt.ylim(0,115)
plt.ylabel('Percentage')
plt.xlabel('Columns')
plt.legend(loc='upper left')
plt.show()
The result:
Any help is appreciated.
See the data here.
I think there are two problems with your second code:
my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'c', 'm',
'tan', 'grey', 'pink', 'chocolate', 'gold']), None, len(df)))
Here len(df) gets you the number of rows, but you actually want a list that is equal to the number of per.keys(). So: len(per.keys()). Next, you need to use your variable i to iterate over your list of colors.
ax.bar(key, value, label=key, color = my_colors)
Here, I think you need to use my_colors[i].
Incidentally, using matplotlib.cm.get_cmap on matplotlib's Colormaps is great to get you a list of unique colors from a palette quickly. Try something like this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import random
import string
# build df with some random NaNs
data = np.random.uniform(low=0, high=10, size=(5,20))
mask = np.random.choice([1, 0], data.shape, p=[.4, .6]).astype(bool)
data[mask] = np.nan
df = pd.DataFrame(data, columns=list(string.ascii_lowercase)[:20])
per = df.isna().mean().round(4) * 100
length = len(per.keys())
cmap = cm.get_cmap('plasma', length)
lst = [*range(length)]
random.shuffle(lst)
f, ax = plt.subplots(figsize=(25, 12), dpi = 200)
i = 0
for key, value in zip(per.keys(), per.values):
if (value > 0):
ax.bar(key, value, label=key, color = cmap(lst[i])[:3])
ax.text(i, value + 0.5, str(np.round(value, 2)), ha='center')
i = i + 1
ax.set_xticklabels([])
ax.set_xticks([])
plt.title('NaN Value percentage in the dataset')
plt.ylim(0,115)
plt.ylabel('Percentage')
plt.xlabel('Columns')
plt.legend(loc='upper left')
plt.show()
Output:
Or non-random (comment out random.shuffle(lst)):
based on another thread i got this code:
data = np.random.normal(loc=0.0, scale=1.0, size=2000)
df_data = pd.DataFrame(data)
import numpy as np
import scipy
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
def _plot(df):
for col in df.columns:
n_bins = 50
fig, axes = plt.subplots(figsize=(12,6))
n, bins, patches = axes.hist(df[col], n_bins, density=True, alpha=.1, edgecolor='black' )
mu = df[col].mean()
sigma = df[col].std()
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
#probability density function
axes.plot(bins, pdf, color='green', alpha=.6)
#dashed lines
plt.axvline(np.mean(df_data[0]),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]-sigma),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]-2*sigma),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]-3*sigma),color='b', linestyle='-.')
plt.axvline(min(df_data[0]),color='r', linestyle='-.')
plt.axvline(np.mean(df_data[0]+sigma),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]+2*sigma),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]+3*sigma),color='b', linestyle='-.')
plt.axvline(max(df_data[0]),color='r', linestyle='-.')
plt.ylabel('Probability Density')
plt.xlabel('Values')
print(mu)
print(sigma)
_plot(df_data)
Which returns me this nice plot:
As you can see the blue vertical lines indicate borders set by multiples of standard deviations. I would like to add the following information and color coding, which I now quickly placed in powerpoint:
I tried to mess with the plt.fill_between function but didnt really get anything useful. Also I do not know how to write something, like the mu+l*sigma here, above the plot. How can i achieve the second picture based on what I have?
EDIT:
solved by #Trenton McKinney
Putting new boxes inside the colored boxes:
for i, (x, c) in enumerate(locs[:-1]):
axes.axvspan(x, locs[i + 1][0], alpha=0.2, color=c)
tx = (x + locs[i + 1][0]) / 2
axes.text(tx, y1/2, f'Zustand {i + 1}', {'ha': 'center', 'va': 'center'}, rotation=90)
if i<4:
axes.text(tx, y1/1.25, r"$\mu$" + "-" + f"{4-i}"+ "$\cdot$" + "$\sigma$" , {'ha': 'center', 'va': 'center'}, rotation=90, bbox=dict(facecolor='white', alpha=0.8, edgecolor='black'))
else:
axes.text(tx, y1/1.25, r"$\mu$" + "+" + f"{i-4 + 1}"+ "$\cdot$" + "$\sigma$" , {'ha': 'center', 'va': 'center'}, rotation=90, bbox=dict(facecolor='white', alpha=0.8, edgecolor='black'))
It will be easier to create a container with all of the values for the vertical lines because those values will be reused for placing the lines, and determining the axvspan and text placement. In this case, a dictionary is used.
See inline notation for explanations
Use .Axes.axvspan to fill between vertical positions
How to highlight specific x-value ranges
See How do I merge two dictionaries in a single expression (take union of dictionaries)?
Add text to the plot with .Axes.text
Tested in python 3.10, matplotlib 3.5.1
# extra imports
from collections import OrderedDict
from itertools import zip_longest
np.random.seed(2022)
data = np.random.normal(loc=0.0, scale=1.0, size=2000)
df_data = pd.DataFrame(data)
def _plot(df):
for col in df.columns:
n_bins = 50
fig, axes = plt.subplots(figsize=(12,6))
n, bins, patches = axes.hist(df[col], n_bins, density=True, alpha=.1, edgecolor='black' )
mu = df[col].mean()
sigma = df[col].std()
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
#probability density function
axes.plot(bins, pdf, color='green', alpha=.6)
# get ylim to position the text
y0, y1 = axes.get_ylim()
# create a dict for all the x values for vertical lines with the line color
muu = {mu: 'b'}
mm = {df_data[0].min(): 'r', df_data[0].max(): 'r'}
mun = {df_data[0].sub(v*sigma).mean(): 'b' for v in range(1, 4)}
mup = {df_data[0].add(v*sigma).mean(): 'b' for v in range(1, 4)}
# combine the dicts: | requires python 3.9+. See linked SO answer for additional opitons to combine the dicts
vals = muu | mm | mun | mup
# order the keys (x values) from smallest to largest
vals = OrderedDict(sorted(vals.items()))
# plot the dashed lines
for x, c in vals.items():
plt.axvline(x, color=c, linestyle='-.')
# combine the x values with colors of the stages
locs = list(zip_longest(vals.keys(), ['blue', 'brown']*4))
# iterate through all but the last value, and add the vspan and the text
for i, (x, c) in enumerate(locs[:-1]):
axes.axvspan(x, locs[i + 1][0], alpha=0.2, color=c)
tx = (x + locs[i + 1][0]) / 2
axes.text(tx, y1/2, f'Stage {i + 1}', {'ha': 'center', 'va': 'center'}, rotation=90)
plt.ylabel('Probability Density')
plt.xlabel('Values')
print(mu)
print(sigma)
_plot(df_data)
Update for additional annotations
# extra annotations
sign = [f'µ - {v}σ' for v in range(4, 0, -1)]
sigp = [f'µ + {v}σ' for v in range(1, 5)]
anno = sign + sigp
# iterate through all but the last value and add the vspan and the text
for i, (x, c) in enumerate(locs[:-1]):
axes.axvspan(x, locs[i + 1][0], alpha=0.2, color=c)
tx = (x + locs[i + 1][0]) / 2
axes.text(tx, y1/2, f'Stage {i + 1}: {anno[i]}', {'ha': 'center', 'va': 'center'}, rotation=90)
I have the following development that I am working on with ElementTree, Pandas and Matplotlib modules in Python:
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
fig = plt.figure(figsize=(18, 20))
plt.suptitle(f'File PXML: {rootXML}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
fig.tight_layout()
i = 1
for name in names_list:
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
x = [n for n in range(len(datax))]
print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
ax = plt.subplot(num_names_list, 1, i)
ax.plot(x, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax.set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax.grid(alpha=0.4)
i += 1
plt.show()
I am getting the following error:
I have been looking for the error and I totally understand that the dimensions of x and y must be equal, but there is the possibility of making a graph where the x-axis is greater than the y-axis? and also the x-axis comes from a variable not related to the y-axis? how would this be?
The x-axis is the count of all the values it has in the Signal element of the xml file: I put it here because of how extensive it is and this value is larger than the y-axis, but how to contemplate the 3 values that I bring from the xml that are Singal Name, Signal Value as y-axis and Count of Signals as x-axis. I really appreciate your comments and help.
IIUC, you are trying to plot several stepped values agains their order of appearance (X-index) in XML file. Then you should plot against original dataframe's X values. I haven't changed your code much for style or such, just fixed a little.
import xml.etree.ElementTree as ET
import pandas as pd
from matplotlib import pyplot as plt
import random
file_xml = 'example_un_child4.xml'
def transfor_data_atri(rootXML):
file_xml = ET.parse(rootXML)
data_XML = [
{"Name": signal.attrib["Name"],
"Value": int(signal.attrib["Value"].split(' ')[0])
} for signal in file_xml.findall(".//Signal")
]
signals_df = pd.DataFrame(data_XML)
extract_name_value(signals_df)
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
#fig = plt.figure(figsize=(18, 20), sharex=True)
fig, ax = plt.subplots(nrows=num_names_list, figsize=(10, 15), sharex=True)
plt.suptitle(f'File PXML: {file_xml}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
#fig.tight_layout()
i = 1
for pos, name in enumerate(names_list):
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
#x = [n for n in range(len(datax))]
#print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
#ax[pos] = plt.subplot(num_names_list, 1, i)
ax[pos].plot(data.index, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax[pos].set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax[pos].grid(alpha=0.4)
i += 1
fig.tight_layout()
plt.show()
transfor_data_atri(file_xml)
I am using this code to generate a Bar chart from a dynamic source the problem that i have is that when i have less then 10 Bars the width of the bars change and it is no the same layout here is the code :
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from Processesing import dataProcess
def chartmak (dic) :
z = 0
D={}
D=dic
fig, ax = plt.subplots()
n = len(D)
ax.barh(range(n), D.values(), align='center', fc='#80d0f1', ec='w')
ax.set_yticks(range(n))
#this need more work to put the GB or the TB
ax.set_yticklabels(['{:3d} GB'.format(e) for e in D.values()], color='gray')
ax.tick_params(pad=10)
for i, (label, val) in enumerate(D.items()):
z+=1
ax.annotate(label.title(), xy=(10, i), fontsize=12, va='center')
for spine in ('top', 'right', 'bottom', 'left'):
ax.spines[spine].set_visible(False)
ax.xaxis.set_ticks([])
ax.yaxis.set_tick_params(length=0)
plt.gca().invert_yaxis()
plt.savefig("test.png")
plt.show()
You could just pad your dictionary entries if there are less than 10 as follows:
import matplotlib.pyplot as plt
def chartmak(dic):
entries = list(dic.items())
if len(entries) < 10:
entries.extend([('', 0)] * (10 - len(entries)))
values = [v for l, v in entries]
fig, ax = plt.subplots()
n = len(entries)
ax.barh(range(n), values, align='center', fc='#80d0f1', ec='w')
ax.set_yticks(range(n))
#this need more work to put the GB or the TB
ax.set_yticklabels(['' if e == 0 else '{:3d} GB'.format(e) for e in values], color='gray')
ax.tick_params(pad=10)
for i, (label, val) in enumerate(entries):
ax.annotate(label.title(), xy=(10, i), fontsize=12, va='center')
for spine in ('top', 'right', 'bottom', 'left'):
ax.spines[spine].set_visible(False)
ax.xaxis.set_ticks([])
ax.yaxis.set_tick_params(length=0)
plt.gca().invert_yaxis()
plt.show()
chartmak({'1': 10, '2':20, '3':30})
chartmak({'1': 10, '2':20, '3':30, '4':40, '5':80, '6':120, '7':100, '8':50, '9':30, '10':40})
This would show the following output: