I'm trying to plot data containing confidence intervals in radar chart.
I followed matplotlib documentation here and made changes to add the error area but it is showing an empty area between Cat.1 and Cat.9 as shown in the plot.
When I set endpoint to true in the theta calculation, I lose a data point.
How can I fix it without losing a data point.
Link to plot
here is the modified code:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle, RegularPolygon
from matplotlib.path import Path
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
from matplotlib.spines import Spine
from matplotlib.transforms import Affine2D
def radar_factory(num_vars, frame='circle'):
"""
Create a radar chart with `num_vars` axes.
This function creates a RadarAxes projection and registers it.
Parameters
----------
num_vars : int
Number of variables for radar chart.
frame : {'circle', 'polygon'}
Shape of frame surrounding axes.
"""
# calculate evenly-spaced axis angles
theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
class RadarAxes(PolarAxes):
name = 'radar'
# use 1 line segment to connect specified points
RESOLUTION = 1
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# rotate plot such that the first axis is at the top
self.set_theta_zero_location('N')
def fill(self, *args, closed=True, **kwargs):
"""Override fill so that line is closed by default"""
return super().fill(closed=closed, *args, **kwargs)
def plot(self, *args, **kwargs):
"""Override plot so that line is closed by default"""
lines = super().plot(*args, **kwargs)
for line in lines:
self._close_line(line)
def _close_line(self, line):
x, y = line.get_data()
# FIXME: markers at x[0], y[0] get doubled-up
if x[0] != x[-1]:
x = np.append(x, x[0])
y = np.append(y, y[0])
line.set_data(x, y)
def set_varlabels(self, labels):
self.set_thetagrids(np.degrees(theta), labels)
def _gen_axes_patch(self):
# The Axes patch must be centered at (0.5, 0.5) and of radius 0.5
# in axes coordinates.
if frame == 'circle':
return Circle((0.5, 0.5), 0.5)
elif frame == 'polygon':
return RegularPolygon((0.5, 0.5), num_vars,
radius=.5, edgecolor="k")
else:
raise ValueError("Unknown value for 'frame': %s" % frame)
def _gen_axes_spines(self):
if frame == 'circle':
return super()._gen_axes_spines()
elif frame == 'polygon':
# spine_type must be 'left'/'right'/'top'/'bottom'/'circle'.
spine = Spine(axes=self,
spine_type='circle',
path=Path.unit_regular_polygon(num_vars))
# unit_regular_polygon gives a polygon of radius 1 centered at
# (0, 0) but we want a polygon of radius 0.5 centered at (0.5,
# 0.5) in axes coordinates.
spine.set_transform(Affine2D().scale(.5).translate(.5, .5)
+ self.transAxes)
return {'polar': spine}
else:
raise ValueError("Unknown value for 'frame': %s" % frame)
register_projection(RadarAxes)
return theta
def example_data():
data = [
['Cat. 1', 'Cat. 2', 'Cat. 3', 'Cat. 4', 'Cat. 5', 'Cat. 6', 'Cat. 7', 'Cat. 8', 'Cat. 9'],
('mean1', [
[5, 5, 5, 5, 5, 5, 5, 5, 5]]),
('upper1', [
[6, 6, 6, 6, 6, 6, 6, 6, 6]]),
('lower1', [
[4, 4, 4, 4, 4, 4, 4, 4, 4]]),
('mean2', [
[2, 2, 2, 2, 2, 2, 2, 2, 2]]),
('upper2', [
[3, 3, 3, 3, 3, 3, 3, 3, 3]]),
('lower2', [
[1, 1, 1, 1, 1, 1, 1, 1, 1]])
]
return data
if __name__ == '__main__':
N = 9
theta = radar_factory(N, frame='circle')
data = example_data()
labels = data.pop(0)
mean_data=[data[0][1], data[3][1]]
upper_data=[data[1][1], data[4][1]]
lower_data=[data[2][1], data[5][1]]
fig, axs = plt.subplots(figsize=(9, 9), nrows=1, ncols=1,
subplot_kw=dict(projection='radar'))
fig.subplots_adjust(wspace=0.25, hspace=0.20, top=0.85, bottom=0.05)
colors = ['b', 'r']
# Plot the two cases from the example data on separate axes
for d, u, l, color in zip(mean_data, upper_data, lower_data, colors):
axs.plot(theta, d[0], color=color)
axs.fill_between(theta, u[0], l[0], facecolor=color, alpha=0.2)
axs.set_varlabels(labels)
# add legend relative to top-left plot
labels = ('Factor 1', 'Factor 2')
legend = axs.legend(labels, loc=(0.9, .95),
labelspacing=0.1, fontsize='small')
plt.show()
I modified the code mentioned here to add the confidence intervals and it works see the plot generated.
categories =['Cat. 1', 'Cat. 2', 'Cat. 3', 'Cat. 4', 'Cat. 5', 'Cat. 6', 'Cat. 7', 'Cat. 8', 'Cat. 9']
categories = [*categories, categories[0]]
mean = [5, 5, 5, 5, 5, 5, 5, 5, 5]
upper = [6, 6, 6, 6, 6, 6, 6, 6, 6]
lower = [4, 4, 4, 4, 4, 4, 4, 4, 4]
mean = [*mean, mean[0]]
upper = [*upper, upper[0]]
lower = [*lower, lower[0]]
mean1= [2, 2, 2, 2, 2, 2, 2, 2, 2]
upper1=[3, 3, 3, 3, 3, 3, 3, 3, 3]
lower1=[1, 1, 1, 1, 1, 1, 1, 1, 1]
mean1 = [*mean1, mean1[0]]
upper1 = [*upper1, upper1[0]]
lower1 = [*lower1, lower1[0]]
label_loc = np.linspace(start=0, stop=2 * np.pi, num=len(mean))
plt.figure(figsize=(8, 8))
plt.subplot(polar=True)
plt.plot(label_loc, mean, label='Factor 1',color='b')
plt.fill_between(label_loc, upper, lower, facecolor='b', alpha=0.2)
plt.subplot(polar=True)
plt.plot(label_loc, mean1, label='Factor 2',color='r')
plt.fill_between(label_loc, upper1, lower1, facecolor='r', alpha=0.2)
lines, labels = plt.thetagrids(np.degrees(label_loc), labels=categories)
plt.legend()
plt.show()
Related
I have prepared the following plot:
x = [1, 2, 3, 4, 5, 6]
y = [-0.015, 0.386, -0.273, -0.091, 0.955, 1.727]
errors = [0.744, 0.954, 0.737, 0.969, 0.848, 0.460]
plt.figure(figsize=(8, 4))
plt.bar(x, y, yerr=errors, align='center', alpha=0.5, color='grey')
plt.xticks((0, 1, 2, 3, 4, 5, 6, 7), ('', 'I1', 'I2', 'I3', 'I4', 'I5', 'I6', ''))
plt.ylim((-3, 3))
plt.show()
I have a couple of questions:
How do i add the bottom and top dashes in the error segment?
I would like to color the background green if y > 0.8, yellow if -0.8 <= y <= 0.8, red if y < -0.8. How can I do it?
To set the horizontal lines, you need to use capsize=n. Larger the n, wider the horizontal lines. You can set the background color using axvspan() and select color based on the value of y. Below is the updated code. I changed y value of one to show the red color...
x = [1, 2, 3, 4, 5, 6]
y = [-0.015, 0.386, -0.273, -0.091, -0.955, 1.727]
errors = [0.744, 0.954, 0.737, 0.969, 0.848, 0.460]
plt.figure(figsize=(8, 4))
ax=plt.bar(x, y, yerr=errors, align='center', alpha=0.5, color='grey', capsize=5)
plt.xticks((0, 1, 2, 3, 4, 5, 6, 7), ('', 'I1', 'I2', 'I3', 'I4', 'I5', 'I6', ''))
plt.ylim((-3, 3))
for i in range(1,7):
if y[i-1] > 0.8:
color = 'green'
elif y[i-1] > -0.8:
color = 'yellow'
else:
color = 'red'
plt.axvspan(i-0.5, i+.5, facecolor=color, alpha=0.3)
plt.show()
EDIT:
As requested, you are looking for horizontal ranges. So, you would need to use axhspan(). To bring the gray to solid, change alpha to 1 and you will need to bring the bars to the front using zorder(). I also added edgecolor='gray' to keep the look of a fully solid bar. Code is below...
plt.figure(figsize=(8, 4))
ax=plt.bar(x, y, yerr=errors, align='center', alpha=1, color='grey', edgecolor='gray',capsize=10, zorder=2)
plt.xticks((0, 1, 2, 3, 4, 5, 6, 7), ('', 'I1', 'I2', 'I3', 'I4', 'I5', 'I6', ''))
plt.ylim((-3, 3))
plt.axhspan(0.8, plt.gca().get_ylim()[1], facecolor='green', alpha=0.3, zorder=1)
plt.axhspan(-0.8, 0.8, facecolor='yellow', alpha=0.3, zorder=1)
plt.axhspan(plt.gca().get_ylim()[0], -0.8, facecolor='red', alpha=0.3, zorder=1)
plt.show()
I have been trying to create a matplotlib subplot (1 x 3) with horizontal bar plots on either side of a lineplot.
It looks like this:
The code for generating the above plot -
u_list = [2, 0, 0, 0, 1, 5, 0, 4, 0, 0]
n_list = [0, 0, 1, 0, 4, 3, 1, 1, 0, 6]
arr_ = list(np.arange(10, 11, 0.1))
data_ = pd.DataFrame({
'points': list(np.arange(0, 10, 1)),
'value': [10.4, 10.5, 10.3, 10.7, 10.9, 10.5, 10.6, 10.3, 10.2, 10.4][::-1]
})
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 8))
ax1 = plt.subplot(1, 3, 1)
sns.barplot(u_list, arr_, orient="h", ax=ax1)
ax2 = plt.subplot(1, 3, 2)
x = data_['points'].tolist()
y = data_['value'].tolist()
ax2.plot(x, y)
ax2.set_yticks(arr_)
plt.gca().invert_yaxis()
ax3 = plt.subplot(1, 3, 3, sharey=ax1, sharex=ax1)
sns.barplot(n_list, arr_, orient="h", ax=ax3)
fig.tight_layout()
plt.show()
Edit
How do I share the y-axis of the central line plot with the other horizontal bar plots?
I would set the limits of all y-axes to the same range, set the ticks in all axes and than set the ticks/tick-labels of all but the most left axis to be empty. Here is what I mean:
from matplotlib import pyplot as plt
import numpy as np
u_list = [2, 0, 0, 0, 1, 5, 0, 4, 0, 0]
n_list = [0, 0, 1, 0, 4, 3, 1, 1, 0, 6]
arr_ = list(np.arange(10, 11, 0.1))
x = list(np.arange(0, 10, 1))
y = [10.4, 10.5, 10.3, 10.7, 10.9, 10.5, 10.6, 10.3, 10.2, 10.4]
fig, axs = plt.subplots(1, 3, figsize=(20, 8))
axs[0].barh(arr_,u_list,height=0.1)
axs[0].invert_yaxis()
axs[1].plot(x, y)
axs[1].invert_yaxis()
axs[2].barh(arr_,n_list,height=0.1)
axs[2].invert_yaxis()
for i in range(1,len(axs)):
axs[i].set_ylim( axs[0].get_ylim() ) # align axes
axs[i].set_yticks([]) # set ticks to be empty (no ticks, no tick-labels)
fig.tight_layout()
plt.show()
This is a minimal example and for the sake of conciseness, I refrained from mixing matplotlib and searborn. Since seaborn uses matplotlib under the hood, you can reproduce the same output there (but with nicer bars).
I'm trying to annotate the values for a stacked horizontal bar graph created using pandas. Current code is below
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
d = {'group 1': [1, 2, 5, 7, 4, 5, 10],
'group 2': [5, 6, 1, 8, 2, 6, 2],
'group 3': [12, 2, 2, 4, 4, 8, 4]}
df = pd.DataFrame(d)
ax = df.plot.barh(stacked=True, figsize=(10,12))
for p in ax.patches:
ax.annotate(str(p.get_x()), xy=(p.get_x(), p.get_y()+0.2))
plt.legend(bbox_to_anchor=(0, -0.15), loc=3, prop={'size': 14}, frameon=False)
The problem is the annotation method I used gives the x starting points and not the values of each segment. I'd like to be able to annotate values of each segment in the center of each segment for each of the bars.
edit: for clarity, what I would like to achieve is something like this where the values are centered horizontally (and vertically) for each segment:
You can use the patches bbox to get the information you want.
ax = df.plot.barh(stacked=True, figsize=(10, 12))
for p in ax.patches:
left, bottom, width, height = p.get_bbox().bounds
ax.annotate(str(width), xy=(left+width/2, bottom+height/2),
ha='center', va='center')
Another possible solution is to get your df.values to a flatten array via values = df.values.flatten("F")
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
d = {'group 1': [1, 2, 5, 7, 4, 5, 10],
'group 2': [5, 6, 1, 8, 2, 6, 2],
'group 3': [12, 2, 2, 4, 4, 8, 4]}
df = pd.DataFrame(d)
ax = df.plot.barh(stacked=True, figsize=(10,12))
values = df.values.flatten("F")
for i, p in enumerate(ax.patches):
ax.annotate(str(values[i]), xy=(p.get_x()+ values[i]/2, p.get_y()+0.2))
plt.legend(bbox_to_anchor=(0, -0.15), loc=3, prop={'size': 14}, frameon=False);
From matplotlib 3.4.0 use matplotlib.pyplot.bar_label
The labels parameter can be used to customize annotations, but it's not required.
See this answer for additional details and examples.
Each group of containers must be iterated through to add labels.
Tested in python 3.10, pandas 1.4.2, matplotlib 3.5.1
Horizontal Stacked
d = {'group 1': [1, 2, 5, 7, 4, 5, 10],
'group 2': [5, 6, 1, 8, 2, 6, 2],
'group 3': [12, 2, 2, 4, 4, 8, 4]}
df = pd.DataFrame(d)
# add tot to sort the bars
df['tot'] = df.sum(axis=1)
# sort
df = df.sort_values('tot')
# plot all columns except tot
ax = df.iloc[:, :-1].plot.barh(stacked=True, figsize=(10, 12))
# iterate through each group of bars
for c in ax.containers:
# format the number of decimal places (if needed) and replace 0 with an empty string
labels = [f'{w:.0f}' if (w := v.get_width()) > 0 else '' for v in c ]
ax.bar_label(c, labels=labels, label_type='center')
Horizontal Grouped
Not stacked is a better presentation of the data, because it is easier to compare bar lengths visually.
# plot all columns except tot
ax = df.iloc[:, :-1].plot.barh(stacked=False, figsize=(8, 9))
# iterate through each group of bars
for c in ax.containers:
# format the number of decimal places (if needed) and replace 0 with an empty string
labels = [f'{w:.0f}' if (w := v.get_width()) > 0 else '' for v in c ]
ax.bar_label(c, labels=labels, label_type='center')
df view
group 1 group 2 group 3 tot
2 5 1 2 8
1 2 6 2 10
4 4 2 4 10
6 10 2 4 16
0 1 5 12 18
3 7 8 4 19
5 5 6 8 19
I guess I just didn't use the right keywords, because this probably has been asked before, but I didn't find a solution. Anyway, I have a problem where the the bars of a histogram do not line up with the xticks. I want the bars to be centred over the xticks they correspond to, but they get placed between ticks to fill the space in-between evenly.
import matplotlib.pyplot as plt
data = [1, 1, 1, 1.5, 2, 4, 4, 4, 4, 4.5, 5, 6, 6.5, 7, 9,9, 9.5]
bins = [x+n for n in range(1, 10) for x in [0.0, 0.5]]+[10.0]
plt.hist(data, bins, rwidth = .3)
plt.xticks(bins)
plt.show()
Note that what you are plotting here is not a histogram. A histogram would be
import matplotlib.pyplot as plt
data = [1, 1, 1, 1.5, 2, 4, 4, 4, 4, 4.5, 5, 6, 6.5, 7, 9,9, 9.5]
bins = [x+n for n in range(1, 10) for x in [0.0, 0.5]]+[10.0]
plt.hist(data, bins, edgecolor="k", alpha=1)
plt.xticks(bins)
plt.show()
Here, the bars range between the bins as expected. E.g. you have 3 values in the interval 1 <= x < 1.5.
Conceptually what you want to do here is get a bar plot of the counts of data values. This would not require any bins at all and could be done as follows:
import numpy as np
import matplotlib.pyplot as plt
data = [1, 1, 1, 1.5, 2, 4, 4, 4, 4, 4.5, 5, 6, 6.5, 7, 9,9, 9.5]
u, inv = np.unique(data, return_inverse=True)
counts = np.bincount(inv)
plt.bar(u, counts, width=0.3)
plt.xticks(np.arange(1,10,0.5))
plt.show()
Of course you can "misuse" a histogram plot to get a similar result. This would require to move the center of the bar to the left bin edge, plt.hist(.., align="left").
import matplotlib.pyplot as plt
data = [1, 1, 1, 1.5, 2, 4, 4, 4, 4, 4.5, 5, 6, 6.5, 7, 9,9, 9.5]
bins = [x+n for n in range(1, 10) for x in [0.0, 0.5]]+[10.0]
plt.hist(data, bins, align="left", rwidth = .6)
plt.xticks(bins)
plt.show()
This results in the same plot as above.
I am trying to plot a scatter plot using matplotlib, i am getting " IndexError: pop from empty list" error and I am not sure how to fix it.
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import time
import itertools
d = {'5000cca229d10d09': {374851: 1}, '5000cca229cf3f8f': {372496:3},'5000cca229d106f9': {372496: 3, 372455: 2}, '5000cca229d0b3e4': {380904: 2, 380905: 1, 380906: 1, 386569: 1}, '5000cca229d098f8': {379296: 2, 379297: 2, 379299: 2, 379303: 1, 379306: 1, 379469: 1, 379471: 1, 379459: 1, 379476: 1, 379456: 4, 379609: 4}, '5000cca229d03957': {380160: 3, 380736: 3, 380162: 1, 380174: 1, 381072: 2, 379608: 2, 380568: 3, 380569: 1, 380570: 1, 379296: 3, 379300: 1, 380328: 3, 379306: 1, 380331: 1, 379824: 2, 379825: 1, 379827: 1, 380344: 1, 379836: 1, 379456: 3, 380737: 1, 380739: 1, 379462: 1, 379476: 1, 379992: 3, 379609: 1, 379994: 1, 379611: 1, 379621: 1, 380006: 1, 380904: 3, 380905: 1, 380907: 1, 380535: 3, 380536: 1, 380538: 1}, '5000cca229cf6d0b': {372768: 10, 372550: 15, 372616: 14, 372617: 20, 372653: 3, 372505: 2}, '5000cca229cec4f1': {372510: 132}}
colors = list("rgbcmyk")
for data_dict in d.values():
x = data_dict.keys()
#print x
#X= time.asctime(time.localtime(x))
y = data_dict.values()
#plt.scatter(x,y,color=colors.pop(),s = 60)
plt.scatter(x,y,color=colors.pop(),s = 90, marker='^')
plt.ylabel("Errors" , fontsize=18, color="Green")
plt.xlabel("Occured on",fontsize=18, color="Green")
plt.title("DDN23b", fontsize=25, color="Blue")
plt.gca().get_xaxis().get_major_formatter().set_useOffset(False)
plt.xticks(rotation='vertical')
#plt.ylim(min(y),max(y))
#plt.grid()
#for x, y in dict(itertools.chain(*[item.items() for item in d.values()])).items():
# plt.text(x, y, time.strftime("%m/%d/%y, %H:%M:%S", time.localtime(x*3600)), ha='center', va='top', rotation='vertical', fontsize = '11', fontstyle = 'italic', color = '#844d4d')
plt.xticks(plt.xticks()[0], [time.strftime("%m/%d/%y, %H:%M:%S", time.localtime(item)) for item in plt.xticks()[0]*3600])
plt.legend(d.keys())
mng = plt.get_current_fig_manager()
mng.resize(*mng.window.maxsize())
plt.subplots_adjust(bottom=.24,right=.98,left=0.03,top=.89)
plt.grid()
plt.show()
I have several data sets for d, and d is a dictionary. when the data set is smaller, it works without any errors. When the data set is large, it runs out of collars. How do I add more colors to the list so every key in "d" gets its own color.
Feel free to edit my code and make suggestions.
Colormaps are callable. When passed a float between 0 and 1, it returns an RGBA color:
In [73]: jet = plt.cm.jet
In [74]: jet(0.5)
Out[74]: (0.49019607843137247, 1.0, 0.47754585705249841, 1.0)
So, you could generate len(d) number of colors by passing the NumPy array np.linspace(0, 1, len(d)) to the colormap:
jet = plt.cm.jet
colors = jet(np.linspace(0, 1, len(d)))
The colors selected will then be equally spaced along the colormap gradient.
import matplotlib.pyplot as plt
import numpy as np
import time
d = {'5000cca229d10d09': {374851: 1}, '5000cca229cf3f8f': {372496:3},'5000cca229d106f9': {372496: 3, 372455: 2}, '5000cca229d0b3e4': {380904: 2, 380905: 1, 380906: 1, 386569: 1}, '5000cca229d098f8': {379296: 2, 379297: 2, 379299: 2, 379303: 1, 379306: 1, 379469: 1, 379471: 1, 379459: 1, 379476: 1, 379456: 4, 379609: 4}, '5000cca229d03957': {380160: 3, 380736: 3, 380162: 1, 380174: 1, 381072: 2, 379608: 2, 380568: 3, 380569: 1, 380570: 1, 379296: 3, 379300: 1, 380328: 3, 379306: 1, 380331: 1, 379824: 2, 379825: 1, 379827: 1, 380344: 1, 379836: 1, 379456: 3, 380737: 1, 380739: 1, 379462: 1, 379476: 1, 379992: 3, 379609: 1, 379994: 1, 379611: 1, 379621: 1, 380006: 1, 380904: 3, 380905: 1, 380907: 1, 380535: 3, 380536: 1, 380538: 1}, '5000cca229cf6d0b': {372768: 10, 372550: 15, 372616: 14, 372617: 20, 372653: 3, 372505: 2}, '5000cca229cec4f1': {372510: 132}}
jet = plt.cm.jet
colors = jet(np.linspace(0, 1, len(d)))
fig, ax = plt.subplots()
for color, data_dict in zip(colors, d.values()):
x = data_dict.keys()
y = data_dict.values()
ax.scatter(x,y,color=color, s = 90, marker='^')
plt.ylabel("Errors" , fontsize=18, color="Green")
plt.xlabel("Occured on",fontsize=18, color="Green")
plt.title("DDN23b", fontsize=25, color="Blue")
ax.get_xaxis().get_major_formatter().set_useOffset(False)
plt.xticks(rotation='vertical')
plt.xticks(plt.xticks()[0],
[time.strftime("%m/%d/%y, %H:%M:%S", time.localtime(item))
for item in plt.xticks()[0]*3600])
plt.legend(d.keys())
plt.subplots_adjust(bottom=.24,right=.98,left=0.03,top=.89)
plt.grid()
plt.show()