Bokeh: How to Add Vertical Lines that Connect Dots - python

I have 2 columns, sharing the same x-axis values, that I want to connect using vertical lines. This is the desired effect:
I was able to implement it in matplotlib:
for i, row in df.iterrows():
ax.plot([row['x']]*2, row[['y1', 'y2']], color='grey', lw=1, zorder=0, alpha=0.5)
How can I achieve this in Bokeh?
df = pd.DataFrame(np.random.normal(0, 5, (10, 2)), columns=['x','y'])
df_2 = df.copy()
df_2['y'] = df_2['y'] - 5
source = ColumnDataSource(df)
source_2 = ColumnDataSource(df_2)
myplot = figure(plot_width=600, plot_height=400, tools='hover,box_zoom,box_select,crosshair,reset')
myplot.circle('x', 'y', size=7, fill_alpha=0.5, source=source)
myplot.circle('x', 'y', size=7, fill_alpha=0.5, color='orange', source=source_2)
show(myplot, notebook_handle=True);
Bokeh code result:
Underlying data example: Y2 will always be larger than Y1.

You should use the segment glyph method:
from bokeh.plotting import figure, show
x = [1, 2, 3, 4, 5]
y1 = [6, 7, 2, 4, 5]
y2 = [10, 12, 11, 14, 13]
p = figure(plot_height=350)
p.segment(x, y1, x, y2, color="lightgrey", line_width=3)
p.circle(x, y1, color="blue", size=20)
p.circle(x, y2, color="red", size=20)
show(p)
This code passes the data directly to the glyph methods, but it would also be sensible to put everything in one ColumnDataSource that gets shared for all the glyphs.

Related

How to convert grouped bar chart from vertical to horizontal

How can this vertical grouped bar chart be changed to a horizontal bar chart (grouped, and stacked)? I need help to alter the code such that the bars are displayed horizontally instead of vertically.
import matplotlib.pyplot as plt
import numpy as np
N = 9
labels = ['L', 'S', 'S', 'M', 'W', 'W', 'S', 'R', 'C']
M_means = [1, 45, 28, 11, 4, 7, 1, 0.02, 0.3]
PO_means = [3, 58, 17, 8, 3, 8, 1, 0.06, 1]
K_means = [1, 44, 30, 11, 3, 7, 1, 0.01, 0.5]
x = np.arange(len(labels)) # the label locations
width = 0.30 # the width of the bars
fig, ax = plt.subplots(figsize=(15, 9))
rects1 = ax.bar(x - width, M_means, width, label='M S and K', color=('#b02a2a'))
rects2 = ax.bar(x, PO_means, width, label='P O S and K', color=('#055cad'))
rects3 = ax.bar(x + width, K_means, width, label='M K', color=('#0b7d53'))
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('% of workday', fontsize=32)
#ax.set_title('Scores by group and gender')
ax.set_xticks(x)
ax.set_xticklabels(labels, fontsize=32, rotation=15)
ax.legend(loc='upper right', frameon=False, fontsize=32, markerscale=2)
ax.bar_label(rects1, size = 32, padding=20, rotation=90)
ax.bar_label(rects2, size = 32, padding=20, rotation=90)
ax.bar_label(rects3, size = 32, padding=20, rotation=90)
plt.xticks(ha='center')
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(32)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(32)
plt.ylim(0, 100)
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
#fig.tight_layout()
plt.show()
Functionally, only two changes are needed:
Change ax.bar to ax.barh
Swap set_x* methods with set_y* methods, e.g. set_xticks() -> set_yticks() and so on
Semantically, the variables x and width should also be renamed to y and height.
import matplotlib.pyplot as plt
import numpy as np
N = 9
labels = list('LSSMWWSRC')
M_means = [1, 45, 28, 11, 4, 7, 1, 0.02, 0.3]
K_means = [2, 40, 21, 18, 3, 3, 2, 0.52, 0.3]
PO_means = [3, 58, 17, 8, 3, 8, 1, 0.06, 1]
K = [1, 44, 30, 11, 3, 7, 1, 0.01, 0.5]
# rename x/width to y/height
y = np.arange(len(labels))
height = 0.30
fig, ax = plt.subplots()
# use ax.barh instead of ax.bar
rects1 = ax.barh(y - height, M_means, height, label='M S and K', color='#b02a2a')
rects2 = ax.barh(y, PO_means, height, label='P O S and K', color='#055cad')
rects3 = ax.barh(y + height, K_means, height, label='M K', color='#0b7d53')
# swap set_x* methods with set_y* methods
ax.set_xlabel('% of workday')
ax.set_yticks(y)
ax.set_yticklabels(labels)
ax.legend(loc='upper right', frameon=False, markerscale=2)
ax.bar_label(rects1, padding=10)
ax.bar_label(rects2, padding=10)
ax.bar_label(rects3, padding=10)
# ...
The easiest solution is to load the data into a pandas.DataFrame, and then use pandas.DataFrame.plot with kind='barh'. This is easier because pandas uses matplotlib as the default plotting backend, and the API groups the bars automatically.
This reduces the code to 14 lines (not including imports).
When using 'barh', xlabel= applies to the y-axis. Therefore, xlabel='' removes the y-axis label.
Adjust figsize=(12, 10) if planning to use smaller / larger font sizes.
See Adding value labels on a matplotlib bar chart for additional details about using .bar_label.
Tested in python 3.10, pandas 1.4.2, matplotlib 3.5.1
import pandas as pd
import matplotlib.pylot as plt
# data
labels = ['L', 'S', 'S', 'M', 'W', 'W', 'S', 'R', 'C']
M_means = [1, 45, 28, 11, 4, 7, 1, 0.02, 0.3]
PO_means = [3, 58, 17, 8, 3, 8, 1, 0.06, 1]
K_means = [1, 44, 30, 11, 3, 7, 1, 0.01, 0.5]
# create a dict with the keys as the desired legend labels
data = {'labels': labels, 'M S and K': M_means, 'P O S and K': PO_means, 'M K': K_means}
# create dataframe
df = pd.DataFrame(data)
# plot: specify y=[...] if only certain columns are desired
ax = df.plot(kind='barh', x='labels', width=.85, figsize=(12, 10), xlabel='', color=['#b02a2a', '#055cad', '#0b7d53'])
ax.set_xlabel('% of workday', fontsize=15)
ax.set_xlim(0, 100)
ax.legend(loc='upper right', frameon=False, fontsize=15, markerscale=2)
for c in ax.containers:
ax.bar_label(c, label_type='edge', padding=1, size=15)
ax.tick_params(axis='both', which='both', labelsize=15)
ax.spines[['top', 'right']].set_visible(False)
Stacked
To manually create the stacked bar without pandas, see Horizontal stacked bar chart in Matplotlib
Use the parameter stacked=True
Some bar patches are to small for the label, so custom labels have been passed to the labels= parameter in .bar_label
Using := requires at least python 3.8. Otherwise use labels = [f'{v.get_width():.0f}' if v.get_width() > 1 else '' for v in c]
ax = df.plot(kind='barh', x='labels', width=.85, figsize=(12, 10), xlabel='',
color=['#b02a2a', '#055cad', '#0b7d53'], stacked=True)
ax.set_xlabel('% of workday', fontsize=15)
ax.set_xlim(0, 100)
ax.legend(loc='upper right', frameon=False, fontsize=15, markerscale=2)
for c in ax.containers:
# custom labels only show label size for values greater than 1
labels = [f'{w:.0f}' if (w := v.get_width()) > 1 else '' for v in c]
ax.bar_label(c, labels=labels, label_type='center', padding=1, size=15)
ax.tick_params(axis='both', which='both', labelsize=15)
ax.spines[['top', 'right']].set_visible(False)

How to have 2 different scales on same Y axis in Python using Matplotlib

I need to draw 4 X vs Y plots, where X is constant but different Y Values. I used below code to get the plots but need to show the Y scale on either side of the Secondary Y axes (Y Axis 2 in the image), the way Primary Y Axis has (both inward and outward). Right now, it comes on same side of Secondary Y Axis. How to modify the below code to get this done.
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
fig.subplots_adjust(left=0.1,right=0.9)
twin2 = ax.twinx()
twin3 = ax.twinx()
twin4 = ax.twinx()
twin3.spines['right'].set_position(("axes", 0.0))
twin4.spines['right'].set_position(('axes', 1.0))
p1, = ax.plot([0, 1, 2], [0, 1, 2], "b-", label="plot1")
p2, = twin2.plot([0, 1, 2], [0, 3, 2], "r-", label="plot2")
p3, = twin3.plot([0, 1, 2], [50, 30, 15], "g-", label="plot3")
p4, = twin4.plot([0, 1, 2], [5, 3, 1], "y-", label="plot4")
ax.set_xlim(0, 2)
ax.set_ylim(0, 2)
ax.set_xlabel("X Axis")
ax.set_ylabel("Y Axis")
twin2.set_ylabel("Y Axis 2")
plt.show()
On your 4th axes, set tick_left and move the left spine to the right-hand side:
twin4.yaxis.tick_left()
twin4.spines['left'].set_position(('axes', 1.0))

Create stacked bar with matplotlib

I have data displayed in the following format:
values = np.array([10, 12,13, 5,20], [30, 7, 10, 25,2], [10, 12,13, 5,20]])
And I want to create a straight-up stacked bar chart like the following figure. Each element in the array belongs to a stacked bar.
I have searched to see how can I do this with matplotlib, but unfortunately, I still haven't found a way to do it. How can I do this?
AFAIK, there is now straightforward way to do it. You need to calculate exact position of bars yourself and then normalize it.
import numpy as np
import matplotlib.pyplot as plt
values = np.array([[10, 12,13, 5,20], [30, 7, 10, 25,2], [10, 12,13, 5,20]])
values_normalized = values/np.sum(values, axis=0)
bottom_values = np.cumsum(values_normalized, axis=0)
bottom_values = np.vstack([np.zeros(values_normalized[0].size), bottom_values])
text_positions = (bottom_values[1:] + bottom_values[:-1])/2
r = [0, 1, 2, 3, 4] # position of the bars on the x-axis
names = ['A', 'B', 'C', 'D', 'E'] # names of groups
colors = ['lightblue', 'orange', 'lightgreen']
for i in range(3):
plt.bar(r, values_normalized[i], bottom=bottom_values[i], color=colors[i], edgecolor='white', width=1, tick_label=['a','b','c','d','e'])
for xpos, ypos, yval in zip(r, text_positions[i], values[i]):
plt.text(xpos, ypos, "N=%d"%yval, ha="center", va="center")
# Custom X axis
plt.xticks(r, names, fontweight='bold')
plt.xlabel("group")
plt.show()
There is a source that tells how to add text on top of bars. I'm a bit in a hurry right now so I hope this is useful and I'll update my answer next day if needed.
I've updated my answer. Adding text on top of the bars is tricky, it requires some calculations of their vertical positions.
Btw, I have refactored the most of code that is in a link I shared.
Python 3.8
matplotlib 3.3.1
numpy 1.19.1
Chat Result
import matplotlib.pyplot as plt
import numpy as np
values = np.array([[10, 12, 13, 5, 20], [30, 7, 10, 25, 2], [10, 12, 13, 5, 20]])
row, column = values.shape # (3, 5)
x_type = [x+1 for x in range(column)]
ind = [x for x, _ in enumerate(x_type)]
values_normalized = values/np.sum(values, axis=0)
value1, value2, value3 = values_normalized[0,:], values_normalized[1,:], values_normalized[2,:]
# Create figure
plt.figure(figsize=(8, 6))
plt.bar(ind, value1, width=0.8, label='Searies1', color='#5B9BD5')
plt.bar(ind, value2, width=0.8, label='Searies2', color='#C00000', bottom=value1)
plt.bar(ind, value3, width=0.8, label='Searies3', color='#70AD47', bottom=value1 + value2)
# Show text
bottom_values = np.cumsum(values_normalized, axis=0)
bottom_values = np.vstack([np.zeros(values_normalized[0].size), bottom_values])
text_positions = (bottom_values[1:] + bottom_values[:-1])/2
c = list(range(column))
for i in range(3):
for xpos, ypos, yval in zip(c, text_positions[i], values[i]):
plt.text(xpos, ypos, yval, horizontalalignment='center', verticalalignment='center', color='white')
plt.xticks(ind, x_type)
plt.legend(loc='center', bbox_to_anchor=(0, 1.02, 1, 0.1), handlelength=1, handleheight=1, ncol=row)
plt.title('CHART TITLE', fontdict = {'fontsize': 16,'fontweight': 'bold', 'family': 'serif'}, y=1.1)
# Hide y-axis
plt.gca().axes.yaxis.set_visible(False)
plt.show()

Can't change width when annotating bar

I'm creating a stacked horizontal bar graph with 3 segments using the code below:
import matplotlib.pyplot as plt
import numpy as np
def create_stacked_hbar(data):
fig, ax = plt.subplots(figsize=(10, 10))
ylabels = list(data.keys())
labels = ['a', 'b', 'c', 'd', 'e', 'f']
c = []
v = []
for key, val in data.items():
c.append(key)
v.append(val)
v = np.array(v)
print(v)
plt.barh(range(len(c)), v[:,0], width=1, color='red',
edgecolor='w',linewidth=2, tick_label=ylabels, label=labels[0])
plt.barh(range(len(c)), v[:,1], width=1, left=v[:,0], color='orange',
edgecolor='w', linewidth=2, label=labels[1])
plt.barh(range(len(c)), v[:,2], width=1, left=(v[:,0]+v[:,1]), color='yellow',
edgecolor='w', linewidth=2, label=labels[2])
for p in ax.patches:
left, bottom, width, height = p.get_bbox().bounds
if width != 0.0:
ax.annotate(str(int(width)), xy=(left+width/2, bottom+height/2),
ha='center', va='center', size = 12)
plt.legend(bbox_to_anchor=(0, -0.15), loc=3, prop={'size': 14}, frameon=False)
plt.yticks(np.arange(len(ylabels)), ylabels)
plt.show()
data = {'A': [8, 7, 2], 'B': [0, 2, 0],
'C': [3, 2, 4], 'D': [0, 4, 0],
'E': [0, 1, 1], 'F': [0, 1, 0],
'G': [0, 0, 0]}
create_stacked_hbar(data)
The issue is that in attempting to set width = 1 in the bars throws a type error:
TypeError: <lambda>() got multiple values for argument 'width'
removing width allows to the code to work, but I do need to increase the width of the bars in the chart. I suspect this has to do with the annotation code I use in this case. Does anyone have any suggestions on getting around this?
Also note I am unable to use the "dataframe.plot.barh(data, stacked=True)" method via pandas to generate this chart.
You are making a horizontal bar plot, the width parameter corresponds to the data, so in your example you are passing both v[:,0] and 1 as width. If you are trying to specify the height because you do not desire whitespace between the bars you need to set height=1, consider this example:
import numpy as np
import matplotlib.pyplot as plt
# Seeded for reproducing
np.random.seed(1)
v1 = abs(np.random.randn(10))
v2 = abs(np.random.randn(10))
v3 = abs(np.random.randn(10))
c = range(10)
plt.title("Sample bar plot")
plt.barh(c, v1, height=1, alpha=0.8, color='r')
plt.barh(c, v2, height=1, left=v1, alpha=0.8, color='b')
plt.barh(c, v3, height=1, left=v1+v2, alpha=0.8, color='g')
plt.show()
This will give you
    
Where removing the height=1 specification would give you
    

Plotting three sub plot bar chart with correct labelling

I cannot seem to make the plots work with labels correctly. The plots work in terms of generating three sub plots bar charts. But what I want to label each and every plot (3) with labels cr_lst. How do I ensure that I can label each of these bars with cr_lst and on each bar.
plt.figure(0)
width = 0.35 # the width of the bars
cr_lst = ['A', 'B', 'C', 'D']
A_lst = [1, 2, 3, 4]
B_lst = [2, 2, 6, 7]
A_lst = [8, 8, 6, 7]
ind = np.arange(len(A_lst)) # the x locations for the groups
f, axarr = plt.subplots(3, sharex=True)
axarr[0].set_title('Three plots\n')
axarr[0].set_ylabel('A')
axarr[1].set_ylabel('B')
axarr[2].set_ylabel('C')
axarr[0].set_ylim(ymin=0.001,ymax=max(A_lst)*1.10)
axarr[1].set_ylim(ymin=0.001,ymax=max(B_lst)*1.10)
axarr[2].set_ylim(ymin=0.001,ymax=max(B_lst)*1.10)
axarr[0].grid()
axarr[1].grid()
axarr[2].grid()
rects1 = axarr[0].bar(ind, A_lst, width, color='r', linewidth=1,alpha=0.8, label=cr_lst)
rects2 = axarr[1].bar(ind, B_lst, width, color='y', linewidth=1,alpha=0.8, label=cr_lst)
rects3 = axarr[2].bar(ind, C_lst, width, color='blue', linewidth=1, alpha=0.8, label=cr_lst)
plt.savefig("ByC.png")
I'd like to have the labels shown on the x-axis.
This will get you the labels under each bar on every axes:
width = 0.35 # the width of the bars
cr_lst = ['A', 'B', 'C', 'D']
x = range(len(cr_lst)) # the x locations for the groups
A_lst = [1, 2, 3, 4]
B_lst = [2, 2, 6, 7]
C_lst = [8, 8, 6, 7]
f, axarr = plt.subplots(3, sharex=False)
axarr[0].set_title('Three plots\n')
axarr[0].set_ylabel('A')
axarr[1].set_ylabel('B')
axarr[2].set_ylabel('C')
axarr[0].set_ylim(ymin=0.001,ymax=max(A_lst)*1.10)
axarr[1].set_ylim(ymin=0.001,ymax=max(B_lst)*1.10)
axarr[2].set_ylim(ymin=0.001,ymax=max(B_lst)*1.10)
axarr[0].grid()
axarr[1].grid()
axarr[2].grid()
rects1 = axarr[0].bar(x, A_lst, width, color='r', align='center', linewidth=1,alpha=0.8)
rects2 = axarr[1].bar(x, B_lst, width, color='y', align='center', linewidth=1,alpha=0.8)
rects3 = axarr[2].bar(x, C_lst, width, color='blue', align='center', linewidth=1, alpha=0.8)
for ax in axarr:
ax.set_xticks(x)
ax.set_xticklabels(cr_lst)
plt.savefig("ByC.png")
Note that share=False in plt.subplots. If you set it to True it hides all other labels but the lowest ax.
Also note the use of align='center' in .bar().
This yields:

Categories

Resources