How to arrange bins in stacked histogram, Python - python

I am working on a code of a stacked histogram and I need help arranging the bins in the order if this is possible.
0.01 - 0.1, 0.1 - 0.5, 0.5 - 1.0, 1.0 - 2.5, > 2.5
Right now, my histogram looks like this:
with the order of bins being:
0.01 - 0.1, 1.0 - 2.5, > 2.5, 0.1 - 0.5, 0.5 - 1.0
Code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = [['0.01 - 0.1','A'],['0.1 - 0.5','B'],['0.5 - 1.0','B'],['0.01 - 0.1','C'],['> 2.5','A'],['1.0 - 2.5','A'],['> 2.5','A']]
df = pd.DataFrame(data, columns = ['Size','Index'])
### HISTOGRAM OF SIZE
df_new = df.sort_values(['Size'])
x_var = 'Size'
groupby_var = 'Index'
df_new_agg = df_new.loc[:, [x_var, groupby_var]].groupby(groupby_var)
vals = [df_new[x_var].values.tolist() for i, df_new in df_new_agg]
list_of_colors_element = ['lightcoral','palegreen','forestgreen']
# Draw
plt.figure(figsize=(16,10), dpi= 80)
colors = [plt.cm.Spectral(i/float(len(vals)-1)) for i in range(len(vals))]
n, bins, patches = plt.hist(vals, df_new[x_var].unique().__len__(), stacked=True, density=False, color=list_of_colors_element)
# Decorations
plt.legend({group:col for group, col in zip(np.unique(df_new[groupby_var]).tolist(), list_of_colors_element)}, prop={'size': 16})
plt.title("Stacked Histogram of Size colored by element of highest share", fontsize=22)
plt.xlabel(x_var, fontsize=22)
plt.ylabel("Frequency", fontsize=22)
plt.grid(color='black', linestyle='--', linewidth=0.4)
plt.xticks(range(5),fontsize=15)
plt.yticks(fontsize=15)
plt.show()
Any help is appreciated!

You can use:
piv = df_new.assign(dummy=1) \
.pivot_table('dummy', 'Size', 'Index', aggfunc='count', fill_value=0) \
.rename_axis(columns=None)
ax = piv.plot.bar(stacked=True, color=list_of_colors_element, rot=0, width=1)
plt.show()

I think I'd take a different route and represent the input data differently altogether to make the code easier to read.
import matplotlib.pyplot as plt
labels = ['0.01 - 0.1', '0.1 - 0.5', '0.5 - 1', '1.0 - 2.5', '> 2.5']
A = [1, 0, 0, 1, 2]
B = [0, 1, 1, 0, 0]
C = [1, 0, 0, 0, 0]
width = 1
fig, ax = plt.subplots()
ax.bar(labels, A, width, label='A', color='lightcoral')
ax.bar(labels, B, width, bottom=A, label='B', color='palegreen')
ax.bar(labels, C, width, bottom=A, label='C', color='forestgreen')
ax.set_ylabel('Frequency')
ax.set_xlabel('Size')
ax.set_title("Stacked Histogram of Size colored by element of highest share")
plt.show()

Related

Plot list of lists pixels in a vertical strip using python

This plots a single strip
values = random.sample(range(60, 100), 40)
width = 10
plt.imshow(np.repeat(values, width).reshape(-1, width), cmap='gray')
How can I modify it to plot all lists of lists next to each other, instead of a single list such as;
values_list = []
for x in range(10):
values_list.append(random.sample(range(60, 100), 40))
Here is what I tried, but it only plots the last in the list
for i in range(len(values_list)):
plt.imshow(np.repeat(values_list[i], width).reshape(-1, width), cmap='gray')
plt.show()
How to make lists vertically stripped next to each other
There is no need to use np.repeat. Just reshape() to change it to a 2D array. The position can be set using imshow's extent= parameter. To get the x and y limits correct, they need to be set explicitly. Setting autoscale_on=False prevents imshow to take over the limits. origin='lower' sets the values at value_list[i][0] at the bottom so the y-axis has its usual direction.
The code below uses the approach from this example.
import matplotlib.pyplot as plt
import numpy as np
import random
values_list = [random.sample(range(60, 100), 40) for x in range(10)]
fig, ax = plt.subplots()
width = 10
xlim = 0, width*len(values_list)
ylim = 0, max([len(v) for v in values_list]) + 2
ax.set(xlim=xlim, ylim=ylim, autoscale_on=False)
for i in range(len(values_list)):
plt.imshow(np.array(values_list[i]).reshape(-1, 1), extent=[i * width, (i + 1) * width, 0, len(values_list[i])],
origin='lower', cmap='inferno')
ax.set_aspect('auto')
plt.show()
PS: To have the x-axis numbering the 'columns', set the width to 1 and add 0.5 to the x-positions. Optionally the distance between the 'columns' could be set larger than their width to get an effect of a bar plot.
import matplotlib.pyplot as plt
import numpy as np
import random
values_list = [random.sample(range(60, 100), random.randint(10,15)) for x in range(10)]
fig, ax = plt.subplots()
for i in range(len(values_list)):
plt.imshow(np.array(values_list[i]).reshape(-1, 1), origin='lower',
extent=[i + 0.6, i + 1.4, 0, len(values_list[i])], cmap='inferno')
ax.set_xticks(range(1, len(values_list) + 1))
xlim = 0.3, len(values_list) + 0.7
ylim = 0, max([len(v) for v in values_list]) + 2
ax.set(xlim=xlim, ylim=ylim)
ax.set_aspect('auto')
plt.show()
PS: To have horizontal bars, just interchange all x-related values with y-related. reshape(1, -1) will be needed to have the pixels progress left to right.
for i in range(len(values_list)):
plt.imshow(np.array(values_list[i]).reshape(1, -1), origin='lower',
extent=[0, len(values_list[i]), i + 0.6, i + 1.4], cmap='RdYlBu')
ax.set_yticks(range(1, len(values_list) + 1))
ylim = 0.3, len(values_list) + 0.7
xlim = 0, max([len(v) for v in values_list]) + 2
ax.set(xlim=xlim, ylim=ylim)

Adding colorbars to clustered heatmaps

I am trying to replicate this type of plot (heatmap with colorbars as leaves)
This is what I've done so far
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as ssd
#read data
fid_df = pd.read_csv(fid_file, index_col=[0])
# scale data
def scale(x):
return np.math.log2(x+1)
fid_df = fid_df.applymap(scale)
# clustering colums
data_1D_X = ssd.pdist(fid_df.T, 'euclidean')
X = sch.linkage(data_1D_X, method='ward')
# clustering rows
data_1D_Y = ssd.pdist(fid_df, 'cityblock')
Y = linkage(data_1D_Y, method='ward')
#plot first dendrogram
fig = plt.figure(figsize=(8, 8))
ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6])
Z1 = sch.dendrogram(Y, orientation='left')
ax1.set_xticks([])
ax1.set_yticks([])
# second dendrogram.
ax2 = fig.add_axes([0.3, 0.71, 0.6, 0.2])
Z2 = sch.dendrogram(X)
ax2.set_xticks([])
ax2.set_yticks([])
# plot matrix
axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.6])
# sorts based of clustering
idx1 = Z1['leaves']
idx2 = Z2['leaves']
D = fid_df.values[idx1, :]
D = D[:, idx2]
im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=plt.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
Example:
However, I need to add colorbars that would show the initial groups of rows and columns. Any idea how to do this?
Something like this?
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax1 = fig.add_axes((0, 0, 1, 0.9))
ax2 = fig.add_axes((0, 0.9, 1, 0.1))
gridY, gridX = np.mgrid[0:10:11 * 1j, 0:10:11 * 1j]
ax1.pcolormesh(gridX, gridY, np.sqrt(gridX ** 2 + gridY ** 2))
randCol = ['red', 'blue']
for value in np.linspace(0, 10, 1001):
ax2.axvline(value, color=randCol[np.random.default_rng().integers(2)])
ax2.set_xlim((0, 10))
ax2.tick_params(labelbottom=False, bottom=False, labelleft=False, left=False)
fig.savefig('so.png', bbox_inches='tight')

Polar chart with limit and anomalous points

Consider the following data frame,
d = {'Score': [0.25, 0.52, 0.26, 0.22, 0.31, 2.45, 3.68, 41.3, 87, 91],
'Thr1': 16.5,
'Thr2': 45.5,
'Anomaly':[0, 0, 0, 0, 0, 0, 0, 1, 1, 1]}
df = pd.DataFrame(data = d)
What I m trying to do is to plot a polar chart, with a dotted line for threshold or multiple dotted lines for multiple thresholds and different color for the anomalies. What I ve got so far is,
r = df['Score']
theta = df.index.values
fig = plt.figure()
ax = fig.add_subplot(111, projection = 'polar')
c = ax.scatter(theta, r)
I cannot get the threshold though and change the color of the anomalous points. Any ideas?
You need to draw a dashed line at the threshold level, to indicate where the threshold is. (a line will appear as a circle on a polar plot).
Then you need to segregate the values to plot on the scatter plot, based whether or not they are below, between, or above the thresholds, and color the points accordingly.
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dataset = {'Score': [0.25, 0.52, 0.26, 0.22, 0.31, 2.45, 3.68, 41.3, 87, 91],
'Thr1': 16.5,
'Thr2': 45.5,
'Anomaly':[0, 0, 0, 0, 0, 0, 0, 1, 1, 1]}
df = pd.DataFrame(data=dataset)
scores = df['Score']
theta, thr_1, thr_2 = df.index.values, dataset['Thr1'], dataset['Thr2']
fig = plt.figure()
ax = fig.add_subplot(111, projection='polar')
# assigns a color to each point based on their relative value to the thresholds
colors = ['b' if val < thr_1 else 'y' if val < thr_2 else 'r' for val in scores]
point_cloud = ax.scatter(theta, scores, color=colors, marker='o')
# Drawing the threshold dash lines (with alpha value 1/2)
theta_xs, thr_y1, thr_y2 = np.linspace(0, 2*np.pi, 20), [thr_1] * 20, [thr_2] * 20
thr_line_1 = ax.plot(theta_xs, thr_y1, color='blue', linestyle='--', alpha=0.5)
thr_line_2 = ax.plot(theta_xs, thr_y2, color='green', linestyle='--', alpha=0.5)
plt.show()
Well, i'm not exactly sure that it is what you want, because i never used Anomaly part of your dataset, and just take color info from Score array
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as c
d = {'Score': [0.25, 0.52, 0.26, 0.22, 0.31, 2.45, 3.68, 41.3, 87, 91],
'Thr1': 16.5,
'Thr2': 45.5,
'Anomaly': [0, 0, 0, 0, 0, 0, 0, 1, 1, 1]}
df = pd.DataFrame(data = d)
r = df['Score']
theta = df.index.values
fig = plt.figure()
ax = fig.add_subplot(111, projection = 'polar')
#Add thresholds
ax.plot(np.linspace(0, 2*np.pi, 100), np.ones(100)*d['Thr1'], c='g', ls='--')
ax.plot(np.linspace(0, 2*np.pi, 100), np.ones(100)*d['Thr2'], c='r', ls='--')
#Add colors
colors = ['g' if v < d['Thr1'] else 'y' if v < d['Thr2'] else "r" for v in r]
sc = ax.scatter(theta, r, c=colors)
plt.show()

Python Matplotlib - fill_between remove white spaces between lines at top of yaxis

When I use fill_between The colored patches are slightly angled vertically so there is white space at the top of the y axis, whereas the colors are nicely merged at the bottom of the yaxis. Anyone know how to prevent this/understand what is causing this?
The plot is showing a 'weather window': when weather parameters are below a certain threshold the time period is 'operational' and at other times it is 'non operational'. The code to generate this plot is:
figure = plt.figure(figsize=(8, 3 * 3))
gs = gridspec.GridSpec(3, 1)
gs.update(hspace=0.3)
ax0 = plt.subplot(gs[0])
df1.plot() # pandas DataSeries
ax0.set_xlabel('')
ax1 = plt.subplot(gs[1])
df2.plot() # pandas DataSeries
ax1.set_xlabel('')
ax2 = plt.subplot(gs[2])
trans = mtransforms.blended_transform_factory(ax2.transData, ax2.transAxes)
ax2.plot(xtime, y, color = 'green', alpha = 0.5, lw = 0.01)
ax2.set_xlim(xtime[0], xtime[-1])
ax2.fill_between(xtime2, 0, 1, where = yop > 0, facecolor = 'green', alpha = 0.5, interpolate = True, transform = trans)
# yop is numpy array of 0's and 1's
ax2.fill_between(xtime2, 0, 1, where = ynonop > 0, facecolor = 'red', alpha = 0.5, interpolate = True, transform = trans)
# ynonop has 0's and 1's opposite to yop
The interpolate = True plays some role is removing the white spaces between points.
Here is simpler code to test the issue:
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
x = np.arange(0.0, 365, 1)
yop = np.random.randint(2, size=len(x))
ynonop = np.copy(yop)
# make 0's and 1's opposite to yop
ynonop[ynonop == 1] = 2
ynonop[ynonop == 0] = 1
ynonop[ynonop == 2] = 0
import matplotlib.transforms as mtransforms
trans = mtransforms.blended_transform_factory(ax.transData, ax.transAxes)
ax.set_xlim(x[0], x[-1])
ax.fill_between(x, 0, 1, where=yop > 0, facecolor='green', alpha=0.5, interpolate = True, transform=trans)
ax.fill_between(x, 0, 1, where=ynonop > theta, facecolor='red', alpha=0.5, interpolate = True, transform=trans)
plt.show()
# plt.savefig('test.png', bbox_inches = 0)
To understand what is causing the white stripes, you may zoom into the plot.
Because fill_between fills between points that fulfil a certain condition, you get a sawtooth-like shape.
A possible solution might be to use a broken_barh plot. To this end one would need to rearange the data into a 2columns format of (position, width).
import matplotlib.pyplot as plt
import numpy as np
fig, (ax,ax2) = plt.subplots(nrows=2, sharex=True, sharey=True)
x = np.arange(0.0, 365, 1)
yop = np.random.randint(2, size=len(x))
ynonop = np.copy(yop)
# make 0's and 1's opposite to yop
ynonop[ynonop == 1] = 2
ynonop[ynonop == 0] = 1
ynonop[ynonop == 2] = 0
trans = ax.get_xaxis_transform()
ax.set_xlim(x[0], x[-1])
ax.fill_between(x, 0, 1, where=yop > 0, facecolor='green',
alpha=0.5, interpolate = True, transform=trans)
ax.fill_between(x, 0, 1, where=ynonop > 0, facecolor='red',
alpha=0.5, interpolate = True, transform=trans)
trans2 = ax2.get_xaxis_transform()
xra = np.c_[x[:-1],np.diff(x)]
ax2.broken_barh(xra[yop[:-1] > 0,:], (0,1),
facecolors='green', alpha=0.5, transform=trans2)
ax2.broken_barh(xra[ynonop[:-1] > 0,:], (0,1),
facecolors='red', alpha=0.5, transform=trans2)
ax.set_title("fill_between")
ax2.set_title("broken_barh")
plt.show()
You can also do this using imshow
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors
import matplotlib.transforms as mtransforms
fig, ax = plt.subplots()
x = np.arange(0.0, 365, 1)
yop = np.random.randint(2, size=len(x))
trans = mtransforms.blended_transform_factory(ax.transData, ax.transAxes)
ax.set_xlim(x[0], x[-1])
lc = mcolors.ListedColormap(['r', 'g'], name='RWG')
ax.imshow(yop.reshape(1, -1),
extent=[0, len(yop), 0, 1],
transform=trans,
cmap=lc,
norm=mcolors.NoNorm(), alpha=.5)
ax.set_aspect('auto')
# debugging plotting
ax.step(x, yop, '.', where='post', linestyle='none')
ax.set_ylim([-.1, 1.1])
plt.show()
By tweaking x values in extent you can control exactly where the pixels fall in dataspace.

pyplot/matplotlib Bar chart with fill color depending on value

I want to produce in python with matplotlib/pyplot
a bar chart with a fill depending on the value.
legend color bar
while keeping module dependencies at a minimum.
Is there something simpler than:
import matplotlib.pyplot as plt
def color_gradient ( val, beg_rgb, end_rgb, val_min = 0, val_max = 1):
val_scale = (1.0 * val - val_min) / (val_max - val_min)
return ( beg_rgb[0] + val_scale * (end_rgb[0] - beg_rgb[0]),
beg_rgb[1] + val_scale * (end_rgb[1] - beg_rgb[1]),
beg_rgb[2] + val_scale * (end_rgb[2] - beg_rgb[2]))
# -----------------------------------------------
x_lbls = [ "09:00", "09:15", "10:10"]
y_vals = [ 7, 9, 5]
plt_idx = np.arange( len( x_lbls))
bar_wd = 0.35
grad_beg, grad_end = ( 0.5, 0.5, 0.5), (1, 1, 0)
col_list = [ color_gradient( val,
grad_beg,
grad_end,
min( y_vals),
max( y_vals)) for val in y_vals]
plt.bar( plt_idx, y_vals, color = col_list)
plt.xticks( plt_idx + bar_wd, x_lbls)
plt.show()
this is still missing the legend color bar
my solution in R with ggplot would be:
library(ggplot2)
df = data.frame( time = 1:10, vals = abs(rnorm( n = 10)))
ggplot( df, aes( x = time, y = vals, fill = vals)) +
geom_bar(stat = "identity") +
scale_fill_gradient(low="#888888",high="#FFFF00")
and produces the desired output:
I couldn't figure out how to get the colorbar to work without plotting something else and then clearing it, so it's not the most elegant solution.
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
y = np.array([1, 4, 3, 2, 7, 11])
colors = cm.hsv(y / float(max(y)))
plot = plt.scatter(y, y, c = y, cmap = 'hsv')
plt.clf()
plt.colorbar(plot)
plt.bar(range(len(y)), y, color = colors)
plt.show()
You can use Normalize and ScalarMappable without plotting a scatter. For example:
import matplotlib mpl
import matplotlib.pyplot as plt
from matplotlib import cm
f,(ax1,ax2) = plt.subplots(2)
#ax1 --> plot here your bar chart
norm = mpl.colors.Normalize(vmin=0, vmax=1)
mpl.colorbar.ColorbarBase(ax2, cmap=cm.RdBu,
norm=norm,
orientation='horizontal')
Finally, add the desired format to the colorbar.

Categories

Resources