Related
I calculated NaN value percentage of a dataframe and then plotted it. I want each variable to have a unique color. The code I used works well but every 9th variable color is same as 1st variable color, and the cycle repeats. See the pic:
The code:
per = df.isna().mean().round(4) * 100
f, ax = plt.subplots(figsize=(25, 12), dpi = 200)
i = 0
for key, value in zip(per.keys(), per.values):
if (value > 0):
ax.bar(key, value, label=key)
ax.text(i, value + 0.5, str(np.round(value, 2)), ha='center')
i = i + 1
ax.set_xticklabels([])
ax.set_xticks([])
plt.title('NaN Value percentage in the dataset')
plt.ylim(0,115)
plt.ylabel('Percentage')
plt.xlabel('Columns')
plt.legend(loc='upper left')
plt.show()
I tried the following line of code, but it picked only first color:
my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'c', 'm',
'tan', 'grey', 'pink', 'chocolate', 'gold']), None, len(df)))
f, ax = plt.subplots(figsize=(25, 12), dpi = 200)
i = 0
for key, value in zip(per.keys(), per.values):
if (value > 0):
ax.bar(key, value, label=key, color = my_colors)
ax.text(i, value + 0.5, str(np.round(value, 2)), ha='center')
i = i + 1
ax.set_xticklabels([])
ax.set_xticks([])
plt.title('NaN Value percentage in the dataset')
plt.ylim(0,115)
plt.ylabel('Percentage')
plt.xlabel('Columns')
plt.legend(loc='upper left')
plt.show()
The result:
Any help is appreciated.
See the data here.
I think there are two problems with your second code:
my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'c', 'm',
'tan', 'grey', 'pink', 'chocolate', 'gold']), None, len(df)))
Here len(df) gets you the number of rows, but you actually want a list that is equal to the number of per.keys(). So: len(per.keys()). Next, you need to use your variable i to iterate over your list of colors.
ax.bar(key, value, label=key, color = my_colors)
Here, I think you need to use my_colors[i].
Incidentally, using matplotlib.cm.get_cmap on matplotlib's Colormaps is great to get you a list of unique colors from a palette quickly. Try something like this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import random
import string
# build df with some random NaNs
data = np.random.uniform(low=0, high=10, size=(5,20))
mask = np.random.choice([1, 0], data.shape, p=[.4, .6]).astype(bool)
data[mask] = np.nan
df = pd.DataFrame(data, columns=list(string.ascii_lowercase)[:20])
per = df.isna().mean().round(4) * 100
length = len(per.keys())
cmap = cm.get_cmap('plasma', length)
lst = [*range(length)]
random.shuffle(lst)
f, ax = plt.subplots(figsize=(25, 12), dpi = 200)
i = 0
for key, value in zip(per.keys(), per.values):
if (value > 0):
ax.bar(key, value, label=key, color = cmap(lst[i])[:3])
ax.text(i, value + 0.5, str(np.round(value, 2)), ha='center')
i = i + 1
ax.set_xticklabels([])
ax.set_xticks([])
plt.title('NaN Value percentage in the dataset')
plt.ylim(0,115)
plt.ylabel('Percentage')
plt.xlabel('Columns')
plt.legend(loc='upper left')
plt.show()
Output:
Or non-random (comment out random.shuffle(lst)):
I have the following development that I am working on with ElementTree, Pandas and Matplotlib modules in Python:
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
fig = plt.figure(figsize=(18, 20))
plt.suptitle(f'File PXML: {rootXML}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
fig.tight_layout()
i = 1
for name in names_list:
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
x = [n for n in range(len(datax))]
print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
ax = plt.subplot(num_names_list, 1, i)
ax.plot(x, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax.set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax.grid(alpha=0.4)
i += 1
plt.show()
I am getting the following error:
I have been looking for the error and I totally understand that the dimensions of x and y must be equal, but there is the possibility of making a graph where the x-axis is greater than the y-axis? and also the x-axis comes from a variable not related to the y-axis? how would this be?
The x-axis is the count of all the values it has in the Signal element of the xml file: I put it here because of how extensive it is and this value is larger than the y-axis, but how to contemplate the 3 values that I bring from the xml that are Singal Name, Signal Value as y-axis and Count of Signals as x-axis. I really appreciate your comments and help.
IIUC, you are trying to plot several stepped values agains their order of appearance (X-index) in XML file. Then you should plot against original dataframe's X values. I haven't changed your code much for style or such, just fixed a little.
import xml.etree.ElementTree as ET
import pandas as pd
from matplotlib import pyplot as plt
import random
file_xml = 'example_un_child4.xml'
def transfor_data_atri(rootXML):
file_xml = ET.parse(rootXML)
data_XML = [
{"Name": signal.attrib["Name"],
"Value": int(signal.attrib["Value"].split(' ')[0])
} for signal in file_xml.findall(".//Signal")
]
signals_df = pd.DataFrame(data_XML)
extract_name_value(signals_df)
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
#fig = plt.figure(figsize=(18, 20), sharex=True)
fig, ax = plt.subplots(nrows=num_names_list, figsize=(10, 15), sharex=True)
plt.suptitle(f'File PXML: {file_xml}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
#fig.tight_layout()
i = 1
for pos, name in enumerate(names_list):
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
#x = [n for n in range(len(datax))]
#print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
#ax[pos] = plt.subplot(num_names_list, 1, i)
ax[pos].plot(data.index, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax[pos].set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax[pos].grid(alpha=0.4)
i += 1
fig.tight_layout()
plt.show()
transfor_data_atri(file_xml)
I am plotting a horizontal bar chart with percentages on, but would like the values > 0 to be colored in green, with the negatives in red.
plt.figure(figsize=(10,6))
clrs = ['b' if (x > 10) else 'gray' for x in data2['BASE VOLUME %']]
ax = sns.barplot(x,y, data=data2, palette=clrs)
ax.set_xlabel('Base Volume',fontsize=15)
ax.set_ylabel('Color Group',fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
for i, v in enumerate(data2['BASE VOLUME %']):
ax.text(0 + 0.01*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='black', fontweight='bold', fontsize=14)
for i, v in enumerate(diff['% Change from last month']):
ax.text(1.06*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='red', fontweight='bold',
See graph below. The values 21.5%, 10.0% etc need to be in green.
When I try the line colors = ['r' if (y < 0) else 'g' for y in diff['% Change from last month'].values], and change color='red' to color='colors'
I get the error:
ValueError: Invalid RGBA argument: ('g', 'r', 'g', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r')
In the line,
for i, v in enumerate(diff['% Change from last month']):
ax.text(1.06*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='red', fontweight='bold', fontsize=14)
you could try an if-else statement like
for i, v in enumerate(diff['% Change from last month']):
if v > 0:
ax.text(1.06*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='g', fontweight='bold', fontsize=14)
else:
ax.text(1.06*max(data['BASE VOLUME']), i + len(data2)/50, str("{0:.1f}%".format(v)), color='r', fontweight='bold',
Is there a way to set parameters for multiple graphs at once - basically defining a default?
What I have right now:
fig = plt.figure()
ax1 = fig.add_subplot(313)
ax1.scatter(entries,gvalues[0], color='b', marker = '1')
ax1.scatter(entries,gvalues[1], color = 'g', marker = 'v')
xticks([z for z in range(N)], namelist)
ylabel('Label1', ha='center', labelpad=28)
ylim(0,)
ax2 = fig.add_subplot(312, sharex=ax1)
ax2.scatter(entries,gvalues[2], color = 'b', marker = '1')
ax2.scatter(entries,gvalues[3], color= 'g', marker = 'v')
ylabel('Label2', ha='center', labelpad=28)
setp(ax2.get_xticklabels(),visible=False)
ylim(0,)
I am going to have a handful of these graphs, and it would be great if I didn't have to set ha = 'center', labelpad = 28 and ylim(0,) everytime.
Cheers
You can use a dictionary to store your options as below:
font_options = dict(ha='center', labelpad=28)
ax1.set_ylabel('Label1', **font_options)
...
ax2.set_ylabel('Label2', **font_options)
The **font_options argument will unpack each of the key-value pairs in the dictionary and apply them within the ylabel function.
Similarly for the ylim option you can store your limits in a variable such as:
ylimit = 0
ylim(ylimit,)
I am trying to have two sub-plots in the figure in the Python Script. But I am not able to set the axis separately according to my inputs. Can anybody help me to set the x-axis, y-axis for each of the sub-plot separately?
I am including the piece of code that I have done, which was not giving me the result.
fig = plt.figure()
ax = plt.subplot(121) # To show the ascending order
plt.xlabel ('RF Input Power (dBm)', fontsize = 'small')
plt.ylabel ('Gain (dB)', fontsize = 'small')
tx = plt.title('Gain Vs Ascending RFInputAmpl for ' + str(measuredFrequencyUnderTest) + 'MHz', fontsize = 'small')
axPlotAxis = plt.axis([rfInputMin, rfInputMax, -20.0, 30.0])
# Now, Plot all the gain stages across the RFInput
ax.plot(rfInput_Plot, lna_Pre_Plot, color = 'r', marker = '+', label = 'lna_Pre')
ax.plot(rfInput_Plot, lna_Post_Plot, color = 'm', marker = 'x', label = 'lna_Post')
ax.plot(rfInput_Plot, sampler1_Plot, color = 'k', marker = '*', label = 'Sampler1')
ax.plot(rfInput_Plot, sampler2_Plot, color = 'c', marker = 's', label = 'Sampler2')
ax.plot(rfInput_Plot, vga_Plot, color = 'b', marker = 'p', label = 'VGA')
ax.plot(rfInput_Plot, dagc1_Plot, color = 'g', marker = 'H', label = 'DAGC1')
ax.plot(rfInput_Plot, dagc2_Plot, color = 'y', marker = 'v', label = 'DAGC2')
# Put the Legend
ax.legend(loc='upper center', bbox_to_anchor = (1.3, -0.05), shadow=True,numpoints = 1, prop = legend_font_props, ncol = 3)
# Now, come to the second plot
ay = plt.subplot(122) # To show the descending order
plt.xlabel ('RF Input Power (dBm)', fontsize = 'small')
plt.ylabel ('Gain (dB)', fontsize = 'small', horizontalalignment = 'left')
ty = plt.title('Gain Vs Descending RF Input for '+ str(measuredFrequencyUnderTest)+ 'MHz', fontsize = 'small')
# Now, fix the x axis here in descending order
plt.axis([rfInputMax, rfInputMin, y_Min_Value, y_Max_Value])
plt.minorticks_on()
Is there something wrong that I am performing? Pls help me to correct it.
You can set independent scales very easily (from an ipython -pylab session):
In [8]: ax = plt.subplot(121)
In [9]: ay = plt.subplot(122)
In [10]: ax.set_xlim((-1,2))
In [11]: ax.set_ylim((10,20))
In [12]: ay.set_xlim((-10,4))
In [13]: ay.set_ylim((3,5))