Consider the following data frame,
d = {'Score': [0.25, 0.52, 0.26, 0.22, 0.31, 2.45, 3.68, 41.3, 87, 91],
'Thr1': 16.5,
'Thr2': 45.5,
'Anomaly':[0, 0, 0, 0, 0, 0, 0, 1, 1, 1]}
df = pd.DataFrame(data = d)
What I m trying to do is to plot a polar chart, with a dotted line for threshold or multiple dotted lines for multiple thresholds and different color for the anomalies. What I ve got so far is,
r = df['Score']
theta = df.index.values
fig = plt.figure()
ax = fig.add_subplot(111, projection = 'polar')
c = ax.scatter(theta, r)
I cannot get the threshold though and change the color of the anomalous points. Any ideas?
You need to draw a dashed line at the threshold level, to indicate where the threshold is. (a line will appear as a circle on a polar plot).
Then you need to segregate the values to plot on the scatter plot, based whether or not they are below, between, or above the thresholds, and color the points accordingly.
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dataset = {'Score': [0.25, 0.52, 0.26, 0.22, 0.31, 2.45, 3.68, 41.3, 87, 91],
'Thr1': 16.5,
'Thr2': 45.5,
'Anomaly':[0, 0, 0, 0, 0, 0, 0, 1, 1, 1]}
df = pd.DataFrame(data=dataset)
scores = df['Score']
theta, thr_1, thr_2 = df.index.values, dataset['Thr1'], dataset['Thr2']
fig = plt.figure()
ax = fig.add_subplot(111, projection='polar')
# assigns a color to each point based on their relative value to the thresholds
colors = ['b' if val < thr_1 else 'y' if val < thr_2 else 'r' for val in scores]
point_cloud = ax.scatter(theta, scores, color=colors, marker='o')
# Drawing the threshold dash lines (with alpha value 1/2)
theta_xs, thr_y1, thr_y2 = np.linspace(0, 2*np.pi, 20), [thr_1] * 20, [thr_2] * 20
thr_line_1 = ax.plot(theta_xs, thr_y1, color='blue', linestyle='--', alpha=0.5)
thr_line_2 = ax.plot(theta_xs, thr_y2, color='green', linestyle='--', alpha=0.5)
plt.show()
Well, i'm not exactly sure that it is what you want, because i never used Anomaly part of your dataset, and just take color info from Score array
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as c
d = {'Score': [0.25, 0.52, 0.26, 0.22, 0.31, 2.45, 3.68, 41.3, 87, 91],
'Thr1': 16.5,
'Thr2': 45.5,
'Anomaly': [0, 0, 0, 0, 0, 0, 0, 1, 1, 1]}
df = pd.DataFrame(data = d)
r = df['Score']
theta = df.index.values
fig = plt.figure()
ax = fig.add_subplot(111, projection = 'polar')
#Add thresholds
ax.plot(np.linspace(0, 2*np.pi, 100), np.ones(100)*d['Thr1'], c='g', ls='--')
ax.plot(np.linspace(0, 2*np.pi, 100), np.ones(100)*d['Thr2'], c='r', ls='--')
#Add colors
colors = ['g' if v < d['Thr1'] else 'y' if v < d['Thr2'] else "r" for v in r]
sc = ax.scatter(theta, r, c=colors)
plt.show()
Related
I am working on a code of a stacked histogram and I need help arranging the bins in the order if this is possible.
0.01 - 0.1, 0.1 - 0.5, 0.5 - 1.0, 1.0 - 2.5, > 2.5
Right now, my histogram looks like this:
with the order of bins being:
0.01 - 0.1, 1.0 - 2.5, > 2.5, 0.1 - 0.5, 0.5 - 1.0
Code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = [['0.01 - 0.1','A'],['0.1 - 0.5','B'],['0.5 - 1.0','B'],['0.01 - 0.1','C'],['> 2.5','A'],['1.0 - 2.5','A'],['> 2.5','A']]
df = pd.DataFrame(data, columns = ['Size','Index'])
### HISTOGRAM OF SIZE
df_new = df.sort_values(['Size'])
x_var = 'Size'
groupby_var = 'Index'
df_new_agg = df_new.loc[:, [x_var, groupby_var]].groupby(groupby_var)
vals = [df_new[x_var].values.tolist() for i, df_new in df_new_agg]
list_of_colors_element = ['lightcoral','palegreen','forestgreen']
# Draw
plt.figure(figsize=(16,10), dpi= 80)
colors = [plt.cm.Spectral(i/float(len(vals)-1)) for i in range(len(vals))]
n, bins, patches = plt.hist(vals, df_new[x_var].unique().__len__(), stacked=True, density=False, color=list_of_colors_element)
# Decorations
plt.legend({group:col for group, col in zip(np.unique(df_new[groupby_var]).tolist(), list_of_colors_element)}, prop={'size': 16})
plt.title("Stacked Histogram of Size colored by element of highest share", fontsize=22)
plt.xlabel(x_var, fontsize=22)
plt.ylabel("Frequency", fontsize=22)
plt.grid(color='black', linestyle='--', linewidth=0.4)
plt.xticks(range(5),fontsize=15)
plt.yticks(fontsize=15)
plt.show()
Any help is appreciated!
You can use:
piv = df_new.assign(dummy=1) \
.pivot_table('dummy', 'Size', 'Index', aggfunc='count', fill_value=0) \
.rename_axis(columns=None)
ax = piv.plot.bar(stacked=True, color=list_of_colors_element, rot=0, width=1)
plt.show()
I think I'd take a different route and represent the input data differently altogether to make the code easier to read.
import matplotlib.pyplot as plt
labels = ['0.01 - 0.1', '0.1 - 0.5', '0.5 - 1', '1.0 - 2.5', '> 2.5']
A = [1, 0, 0, 1, 2]
B = [0, 1, 1, 0, 0]
C = [1, 0, 0, 0, 0]
width = 1
fig, ax = plt.subplots()
ax.bar(labels, A, width, label='A', color='lightcoral')
ax.bar(labels, B, width, bottom=A, label='B', color='palegreen')
ax.bar(labels, C, width, bottom=A, label='C', color='forestgreen')
ax.set_ylabel('Frequency')
ax.set_xlabel('Size')
ax.set_title("Stacked Histogram of Size colored by element of highest share")
plt.show()
Is there a simple way to set two xticks at even distances from the xmin and xmax, for two plots with different ranges on the x-axis?
# Example:
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
fig = plt.figure(figsize=(6,4), constrained_layout=True)
gs = gridspec.GridSpec(ncols=2, nrows=1, figure=fig)
x1 = [1, 0.6, 0.4, 0.3, 0.25, 0.24, 0.23]
x2 = [0.1, 0.14, 0.15, 0.16, 0.166, 0.1666, 0.1666 ]
y = [1, 2, 3, 4, 5, 6, 7]
# xticks
number_of_xticks = 2
# Plot 1:
ax0 = fig.add_subplot(gs[0, 0])
ax0.plot(x1, y)
ax0.xaxis.set_major_locator(plt.MaxNLocator(number_of_xticks))
# Plot 2:
ax1 = fig.add_subplot(gs[0, 1])
ax1.plot(x2, y)
ax1.xaxis.set_major_locator(plt.MaxNLocator(number_of_xticks))
plt.show()
Example code does not work because xticks are at different distances from the xmin and xmax in the two plots:
You could try specifying the relative distance along the x-range:
# xticks
tick_fractions = [1/4, 3/4]
And then calculate the tick positions based on each x-range:
mini = min(x)
maxi = max(x)
dist = maxi - mini
ax.set_xticks([mini + f * dist for f in tick_fractions])
So full script would look like:
# Example:
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
fig = plt.figure(figsize=(6,4), constrained_layout=True)
gs = gridspec.GridSpec(ncols=2, nrows=1, figure=fig)
x1 = [1, 0.6, 0.4, 0.3, 0.25, 0.24, 0.23]
x2 = [0.1, 0.14, 0.15, 0.16, 0.166, 0.1666, 0.1666 ]
y = [1, 2, 3, 4, 5, 6, 7]
# xticks
tick_fractions = [1/4, 3/4]
# Plot 1:
ax0 = fig.add_subplot(gs[0, 0])
ax0.plot(x1, y)
mini = min(x1)
maxi = max(x1)
dist = maxi - mini
ax0.set_xticks([mini + f * dist for f in tick_fractions])
# Plot 2:
ax1 = fig.add_subplot(gs[0, 1])
ax1.plot(x2, y)
mini = min(x2)
maxi = max(x2)
dist = maxi - mini
ax1.set_xticks([mini + f * dist for f in tick_fractions])
plt.show()
You could add a call to round somewhere if you want to limit the decimals.
I am looking to develop an animated/growing bar plot.The plot basically contains of 6 rectangular bars and each bar has a particular value.
The problem I'm facing is that the plot is growing up to the maximum value on Y-axis instead it should stop at the bar's corresponding value.
The code I have tried makes the bars animate up to the maximum value on Y-Axis.I have found some information from
Growing matplotlib bar charts
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
%matplotlib notebook
fig = plt.figure()
position = np.arange(6) + .5
plt.tick_params(axis = 'x', colors = '#072b57')
plt.tick_params(axis = 'y', colors = '#072b57')
speeds = [.01, .02, .03, .04, .01, .02]
heights = [0, 0, 0, 0, 0, 0]
# Bar plot should animate up to these values
# Adding this portion is making the plot static
#heights = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
rects = plt.bar(position, heights, align = 'center', color=['red', 'orange', 'blue', 'pink', 'green','purple'])
plt.xticks(position, ('Anger', 'Sadness', 'Disgust', 'Fear', 'Happy', 'Surprise'))
plt.xlabel('Emotion', color = '#072b57')
plt.ylabel('Probabilities', color = '#072b57')
plt.title('Emotion - Ally', color = '#072b57')
plt.ylim((0,1))
plt.xlim((0,6))
plt.grid(True)
rs = [r for r in rects]
def init():
return rs
def animate(i):
global rs, heights
if all(map(lambda x: x==1, heights)):
heights = [0, 0, 0, 0, 0, 0]
else:
heights = [min(h+s,1) for h,s in zip(heights,speeds)]
# Bar plot should animate up to these values
# Adding this portion is making the plot static
#heights = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
for h,r in zip(heights,rs):
r.set_height(h)
return rs
anim = animation.FuncAnimation(fig, animate, init_func=init,frames=200, interval=20, blit=True)
plt.show()
The animated bar plot should stop at designated y-values instead of hitting the maximum value on y-axis.
Right now, your bar height is derived from min(h+s,1), which means it will grow with a certain speed (or rather step size) until it hits the value of 1.
If you want to limit the height, you should create a respective array, like max_heights = [0.5, .6, 1.0, 0.6, 0.1, 1.0] and change the heights calculation within the else case to heights = [min(h+s,mh) for h,s,mh in zip(heights,speeds,max_heights)].
In summary:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
# %matplotlib notebook
fig = plt.figure()
position = np.arange(6) + .5
plt.tick_params(axis = 'x', colors = '#072b57')
plt.tick_params(axis = 'y', colors = '#072b57')
speeds = [.01, .02, .03, .04, .01, .02]
heights = [0, 0, 0, 0, 0, 0]
# Bar plot should animate up to these values
# Adding this portion is making the plot static
max_heights = [0.5, .6, 1.0, 1.6, 0.1, 1.0]
rects = plt.bar(position, heights, align = 'center', color=['red', 'orange', 'blue', 'pink', 'green','purple'])
plt.xticks(position, ('Anger', 'Sadness', 'Disgust', 'Fear', 'Happy', 'Surprise'))
plt.xlabel('Emotion', color = '#072b57')
plt.ylabel('Probabilities', color = '#072b57')
plt.title('Emotion - Ally', color = '#072b57')
plt.ylim((0,1))
plt.xlim((0,6))
plt.grid(True)
rs = [r for r in rects]
def init():
return rs
def animate(i):
global rs, heights
if all(map(lambda x: x==1, heights)):
heights = [0, 0, 0, 0, 0, 0]
else:
heights = [min(h+s,mh) for h,s,mh in zip(heights,speeds,max_heights)]
# Bar plot should animate up to these values
# Adding this portion is making the plot static
#heights = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
print heights
for h,r in zip(heights,rs):
r.set_height(h)
return rs
anim = animation.FuncAnimation(fig, animate, init_func=init,frames=200, interval=20, blit=True)
plt.show()
You needs a little bit more calculation, e.g.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
fig = plt.figure()
position = np.arange(6) + .5
plt.tick_params(axis = 'x', colors = '#072b57')
plt.tick_params(axis = 'y', colors = '#072b57')
speeds = [.01, .02, .03, .04, .01, .02]
heights = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
rects = plt.bar(position, np.zeros_like(heights), align = 'center',
color=['red', 'orange', 'blue', 'pink', 'green','purple'])
plt.xticks(position, ('Anger', 'Sadness', 'Disgust', 'Fear', 'Happy', 'Surprise'))
plt.xlabel('Emotion', color = '#072b57')
plt.ylabel('Probabilities', color = '#072b57')
plt.title('Emotion - Ally', color = '#072b57')
plt.ylim((0,1))
plt.xlim((0,6))
plt.grid(True)
frames = 200
min_speed = np.min(speeds)
def init():
return rects
def animate(i):
for h,r,s in zip(heights,rects, speeds):
new_height = i / (frames-1) * h * s / min_speed
new_height= min(new_height, h)
r.set_height(new_height)
return rects
anim = animation.FuncAnimation(fig, animate, init_func=init,frames=frames, interval=20, blit=True, repeat=False)
plt.show()
I have a pandas DataFrame with non-uniformly spaced data points given by an x, y and z column, where x and y are pairs of variables and z is the dependent variable. For example:
import matplotlib.pyplot as plt
from matploblib.mlab import griddata
import numpy as np
import pandas as pd
df = pd.DataFrame({'x':[0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
'y':[0, 1, 0, 1, 0.2, 0.7, 1.4, 0.2, 1.4, 2],
'z':[50, 40, 40, 30, 30, 30, 20, 20, 20, 10]})
x = df['x']
y = df['y']
z = df['z']
I want to do a contour plot of the dependent variable z over x and y. For this, I create a new grid to interpolate the data on using matplotlib.mlab's griddata function.
xi = np.linspace(x.min(), x.max(), 100)
yi = np.linspace(y.min(), y.max(), 100)
z_grid = griddata(x, y, z, xi, yi, interp='linear')
plt.contourf(xi, yi, z_grid, 15)
plt.scatter(x, y, color='k') # The original data points
plt.show()
While this works, the output is not what I want. I do not want griddata to interpolate outside of the boundaries given by the min and max values of the x and y data. The following plots are what shows up after calling plt.show(), and then highlighted in purple what area of the data I want to have interpolated and contoured. The contour outside the purple line is supposed to be blank. How could I go about masking the outlying data?
The linked question does unfortunately not answer my question, as I don't have a clear mathematical way to define the conditions on which to do a triangulation. Is it possible to define a condition to mask the data based on the data alone, taking the above Dataframe as an example?
As seen in the answer to this question one may introduce a condition to mask the values.
The sentence from the question
"I do not want griddata to interpolate outside of the boundaries given by the min and max values of the x and y data." implies that there is some min/max condition present, which can be used.
Should that not be the case, one may clip the contour using a path. The points of this path need to be specified as there is no generic way of knowing which points should be the edges. The code below does this for three different possible paths.
import matplotlib.pyplot as plt
from matplotlib.path import Path
from matplotlib.patches import PathPatch
from matplotlib.mlab import griddata
import numpy as np
import pandas as pd
df = pd.DataFrame({'x':[0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
'y':[0, 1, 0, 1, 0.2, 0.7, 1.4, 0.2, 1.4, 2],
'z':[50, 40, 40, 30, 30, 30, 20, 20, 20, 10]})
x = df['x']
y = df['y']
z = df['z']
xi = np.linspace(x.min(), x.max(), 100)
yi = np.linspace(y.min(), y.max(), 100)
z_grid = griddata(x, y, z, xi, yi, interp='linear')
clipindex = [ [0,2,4,7,8,9,6,3,1,0],
[0,2,4,7,5,8,9,6,3,1,0],
[0,2,4,7,8,9,6,5,3,1,0]]
fig, axes = plt.subplots(ncols=3, sharey=True)
for i, ax in enumerate(axes):
cont = ax.contourf(xi, yi, z_grid, 15)
ax.scatter(x, y, color='k') # The original data points
ax.plot(x[clipindex[i]], y[clipindex[i]], color="crimson")
clippath = Path(np.c_[x[clipindex[i]], y[clipindex[i]]])
patch = PathPatch(clippath, facecolor='none')
ax.add_patch(patch)
for c in cont.collections:
c.set_clip_path(patch)
plt.show()
Ernest's answer is a great solution, but very slow for lots of contours. Instead of clipping every one of them, I built a mask by constructing the complement polygon of the desired clipping mask.
Here is the code based on Ernest's accepted answer:
import numpy as np
import pandas as pd
import matplotlib.tri as tri
import matplotlib.pyplot as plt
from descartes import PolygonPatch
from shapely.geometry import Polygon
df = pd.DataFrame({'x':[0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
'y':[0, 1, 0, 1, 0.2, 0.7, 1.4, 0.2, 1.4, 2],
'z':[50, 40, 40, 30, 30, 30, 20, 20, 20, 10]})
points = df[['x', 'y']]
values = df[['z']]
xi = np.linspace(points.x.min(), points.x.max(), 100)
yi = np.linspace(points.y.min(), points.y.max(), 100)
triang = tri.Triangulation(points.x, points.y)
interpolator = tri.LinearTriInterpolator(triang, values.z)
Xi, Yi = np.meshgrid(xi, yi)
zi = interpolator(Xi, Yi)
clipindex = [ [0,2,4,7,8,9,6,3,1,0],
[0,2,4,7,5,8,9,6,3,1,0],
[0,2,4,7,8,9,6,5,3,1,0]]
fig, axes = plt.subplots(ncols=3, sharey=True, figsize=(10,4))
for i, ax in enumerate(axes):
ax.set_xlim(-0.5, 4.5)
ax.set_ylim(-0.2, 2.2)
xlim = ax.get_xlim()
ylim = ax.get_ylim()
cont = ax.contourf(Xi, Yi, zi, 15)
ax.scatter(points.x, points.y, color='k', zorder=2) # The original data points
ax.plot(points.x[clipindex[i]], points.y[clipindex[i]], color="crimson", zorder=1)
#### 'Universe polygon':
ext_bound = Polygon([(xlim[0], ylim[0]), (xlim[0], ylim[1]), (xlim[1], ylim[1]), (xlim[1], ylim[0]), (xlim[0], ylim[0])])
#### Clipping mask as polygon:
inner_bound = Polygon([ (row.x, row.y) for idx, row in points.iloc[clipindex[i]].iterrows() ])
#### Mask as the symmetric difference of both polygons:
mask = ext_bound.symmetric_difference(inner_bound)
ax.add_patch(PolygonPatch(mask, facecolor='white', zorder=1, edgecolor='white'))
plt.show()
I'm looking to make a colorbar like plot, like so:
but with a controllable color, for example I have the following x and y arrays:
x = [0,1,2,4,7,8]
y = [1,2,1,3,4,5]
Then I would have a colorbar like the above picture, but when y=1, it would color red, y=2: green, y=3: blue, y=4:black, etc.
Here is the python code that I modified from matplotlib's gallery:
from matplotlib import pyplot
import matplotlib as mpl
fig = pyplot.figure(figsize=(8,1))
ax2 = fig.add_axes([0.05, 0.25, 0.9, 0.5])
cmap = mpl.cm.Accent
norm = mpl.colors.Normalize(vmin=5, vmax=10)
bounds = [1, 2, 4, 7, 8]
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
cb2 = mpl.colorbar.ColorbarBase(ax2, cmap=cmap,
norm=norm,
boundaries=[0]+bounds+[13],
ticks=bounds, # optional
spacing='proportional',
orientation='horizontal')
After adapting your code I managed to obtain something like you described.
In this case the colormap is generated using ListedColormap and I added the yellow color for y=5.
It is important to notice that while calculating the BoundaryNorm I am using the intervals that contain the values you described for y.
from matplotlib import pyplot,colors
import matplotlib as mpl
fig = pyplot.figure(figsize=(8,1))
ax2 = fig.add_axes([0.05, 0.25, 0.9, 0.5])
cmap = colors.ListedColormap(['r', 'g', 'b', 'k','y'])
bounds = [0, 1, 2, 4, 7, 8, 13]
yVals = [ 1, 2, 1, 3, 4, 5]
cBounds = [i+0.5 for i in range(6)]
norm = mpl.colors.BoundaryNorm(cBounds, cmap.N)
cb2 = mpl.colorbar.ColorbarBase(ax2, cmap=cmap,
norm=norm,
values=yVals,
boundaries=bounds,
ticks=bounds[1:-1], # optional
spacing='proportional',
orientation='horizontal')
-- Edited 14 of Jan (mrcl) --
Alternatively, you can use pcolormesh to plot your colormap and have a colorbar as your legend, such as in the example below.
from pylab import *
from matplotlib import pyplot,colors
import matplotlib as mpl
fig = pyplot.figure(figsize=(8,1.5))
ax1 = fig.add_axes([0.05, 0.25, 0.82, 0.5])
cmap = colors.ListedColormap(['r', 'g', 'b', 'k','y'])
xBounds = array([0, 1, 2, 4, 7, 8, 13])
yBounds = array([0, 1])
Vals = array([[ 1, 2, 1, 3, 4, 5]])
cBounds = [i+0.5 for i in arange(amax(Vals)+1)]
norm = mpl.colors.BoundaryNorm(cBounds, cmap.N)
c = ax1.pcolormesh(xBounds,yBounds,Vals,cmap=cmap,norm=norm)
ax1.set_xticks(xBounds[1:-1])
ax1.set_yticks([])
ax1.set_xlim(xBounds[0],xBounds[-1])
ax1.set_ylim(yBounds[0],yBounds[-1])
ax2 = fig.add_axes([0.9, 0.25, 0.05, 0.5])
colorbar(c,cax=ax2,ticks=arange(amax(Vals))+1)
Hope it helps.
Cheers
Well, I sort of tinkering with other ways:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cmx
import matplotlib.colors as colors
close('all')
def ColorPlot(x,y):
figure()
jet = plt.get_cmap('jet')
cNorm = colors.Normalize(vmin=min(y), vmax=max(y))
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
if len(x) == len(y):
x.insert(0,0)
for kk in range(len(x)-1):
colorVal = scalarMap.to_rgba(y[kk])
plt.axvspan(x[kk], x[kk+1], facecolor=colorVal,
alpha=0.5,label=colorVal)
plt.yticks([])
plt.xticks(x)
xlim([x[0],x[-1]])
plt.show()
x = [1,3,5,6,10,12]
y = [1,3,4,1,4,3]
ColorPlot(x,y)