Related
The annotation text at the last bar has been cut off somehow.
What's proper way to fix it?
#!/usr/bin/env python3
import pandas as pd
import re
import datetime
import os
import plotly.graph_objects as go
import numpy as np
import math
import datetime
def save_fig(fig,pngname):
fig.write_image(pngname,format="png", width=800, height=400, scale=1)
print("[[%s]]"%pngname)
return
def plot_bars(df,pngname):
colors = ['#5891ad','#004561','#ff6f31','#1c7685','#0f45a8','#4cdc8b','#0097a7']
fig = go.Figure()
traces = []
xname = df.columns[0]
for i,yname in enumerate(df.columns):
if i == 0: continue
trace1 = go.Bar(
name=yname,
x=df[xname],y=df[yname],meta=df.index,
#texttemplate="%{%.1f}",
text=df[yname],
textposition="outside",
textangle=-25,
textfont_color="black",
marker_color=colors[i-1],
hovertemplate='<br>'.join([
'id:%{meta}',
'ts: %{x|%H:%M:%S}',
'val: %{y:.1f}',
]),
)
traces.append(trace1)
fig.add_traces(traces)
#d0 = df[xname][0].replace(minute=0, second=0) - datetime.timedelta(hours=1)
fig.update_layout(
margin=dict(l=10,t=40,r=10,b=40),
plot_bgcolor='#ffffff',#'rgb(12,163,135)',
paper_bgcolor='#ffffff',
title="Boot progress",
xaxis_title="Keypoints",
yaxis_title="Timestamp(secs)",
title_x=0.5,
barmode='group',
bargap=0.05,
bargroupgap=0.0,
legend=dict(x=.02,y=1),
xaxis=dict(
#tick0 = d0,
#dtick=7200000,
tickangle=-25,
#tickmode='array',
#tickvals = xvals,
#ticktext= xtexts,
#tickformat = '%m-%d %H:%M:%S',#datetime format
showline=True,
linecolor='black',
color='black',
linewidth=.5,
ticks='outside',
#mirror=True,
),
yaxis=dict(
dtick=10,
showline=True,
linecolor='black',
color='black',
linewidth=.5,
#tickvals = yvals,
#ticktext= ytexts,
showgrid=True,
gridcolor='#ececec',
gridwidth=.5,
griddash='solid',#'dot',
zeroline=True,
zerolinecolor='grey',
zerolinewidth=.5,
showticklabels=True,
#mirror=True,
),
)
anns = []
#anns = add_line(fig,anns,x0,y0,x1,y1,text=None)
#add_anns(fig,anns)
save_fig(fig,pngname)
return
def main():
data = [
["A",10,12],
["B",12,11],
["C",14,13],
["D",16,15],
["E",18,19]
]
df = pd.DataFrame(data,columns=["Kepoint","g1","g2"])
plot_bars(df,"demo.png")
return
main()
output png:
Although you could hardcode the range of the y-axes to have a larger maximum value, it's better to use a more flexible solution. Plotly's default behavior is to set the y-range by calculating [y_min-padding, y_max+padding] where the padding = (y_max-y_min)/16.
So we can include a padding multipler to make the padding a bit larger, and specify the new range by passing the argument range=[y_min-y_padding, y_max+y_padding] to yaxis:
def plot_bars(df,pngname):
colors = ['#5891ad','#004561','#ff6f31','#1c7685','#0f45a8','#4cdc8b','#0097a7']
fig = go.Figure()
traces = []
xname = df.columns[0]
for i,yname in enumerate(df.columns):
if i == 0: continue
trace1 = go.Bar(
name=yname,
x=df[xname],y=df[yname],meta=df.index,
#texttemplate="%{%.1f}",
text=df[yname],
textposition="outside",
textangle=-25,
textfont_color="black",
marker_color=colors[i-1],
hovertemplate='<br>'.join([
'id:%{meta}',
'ts: %{x|%H:%M:%S}',
'val: %{y:.1f}',
]),
)
traces.append(trace1)
fig.add_traces(traces)
#d0 = df[xname][0].replace(minute=0, second=0) - datetime.timedelta(hours=1)
y_min = df[["g1","g2"]].min().min()
y_max = df[["g1","g2"]].max().max()
padding_multiplier = 1.25
y_padding = padding_multiplier*(y_max-y_min)/16
fig.update_layout(
margin=dict(l=10,t=40,r=10,b=40),
plot_bgcolor='#ffffff',#'rgb(12,163,135)',
paper_bgcolor='#ffffff',
title="Boot progress",
xaxis_title="Keypoints",
yaxis_title="Timestamp(secs)",
title_x=0.5,
barmode='group',
bargap=0.05,
bargroupgap=0.0,
legend=dict(x=.02,y=1),
xaxis=dict(
#tick0 = d0,
#dtick=7200000,
tickangle=-25,
#tickmode='array',
#tickvals = xvals,
#ticktext= xtexts,
#tickformat = '%m-%d %H:%M:%S',#datetime format
showline=True,
linecolor='black',
color='black',
linewidth=.5,
ticks='outside',
#mirror=True,
),
yaxis=dict(
dtick=10,
range=[y_min-y_padding, y_max+y_padding],
showline=True,
linecolor='black',
color='black',
linewidth=.5,
#tickvals = yvals,
#ticktext= ytexts,
showgrid=True,
gridcolor='#ececec',
gridwidth=.5,
griddash='solid',#'dot',
zeroline=True,
zerolinecolor='grey',
zerolinewidth=.5,
showticklabels=True,
#mirror=True,
),
)
anns = []
#anns = add_line(fig,anns,x0,y0,x1,y1,text=None)
#add_anns(fig,anns)
save_fig(fig,pngname)
return
Hi could someone point me what I do wrong: some time bokeh does not display color. If run the script there will be only red points, but when I change the 'green' to the 'lime' there will be points of two colors - why? what do I wrong?
import numpy as np
from scipy.signal import find_peaks
# Find peaks
max_peaks, max_other = find_peaks(price, height=0.1)
print(len(max_peaks))
min_peaks, min_other = find_peaks(-price)
print(len(min_peaks))
colors = np.full(len(price), 'None')
colors[max_peaks] = 'red'
colors[min_peaks] = 'green'
from bokeh.models import CrosshairTool
crosshair = CrosshairTool(dimensions='both')
width=1000
height = 600
price_dates = df['date'].to_numpy()
price_dates_dt = np.apply_over_axes(convert_int_to_datetime, price_dates, axes=0)
price_dates_str = np.apply_over_axes(convert_int_to_datetime_to_str, price_dates, axes=0)
price_source = ColumnDataSource(data=dict(
date = price_dates_dt,
date_str = price_dates_str,
price=df['close'].to_numpy(),
color=colors
))
f1 = figure(title=f'price', x_axis_type='datetime', tools='pan,wheel_zoom,box_zoom,reset,save,box_select,zoom_in,zoom_out, hover', sizing_mode='stretch_width',plot_width=width, plot_height=height)
f1.toolbar.logo = None
f1.add_tools(crosshair)
price_l = f1.line(source=price_source, x='date', y='price', legend_label='price', line_color='gray', line_dash='solid', line_width=0.8)
price_s = f1.scatter(source=price_source, x='date', y='price', legend_label='price', fill_color='color', line_color='color', size=2)
f1.add_tools(HoverTool(renderers=[price_s, price_l], tooltips=[
('index', '$index'),
('(x,y)', '($x{%F}, $y{0.00})'),
# ('date', '#date'),
('date_str', '#date_str'),
('price', '#price{0.00}')],
formatters={
'#date': 'datetime',
'#{price}': 'printf', }))
f1.legend.location = 'top_left'
f1.legend.click_policy = 'hide'
f1.xaxis.axis_label = 'Time'
f1.yaxis.axis_label = 'price'
f1.background_fill_color = '#dfe9f0'
# f1.background_fill_alpha = 0.5
f1.xgrid.grid_line_color='white'
f1.ygrid.grid_line_color='white'
show(f1)
I have a training and testing time series dataset that I would like to combine togther, to show how well the forecast did predicting the testing dataset.
Here is the toy code to reproduce the data:
import pandas as pd
import seaborn as sns
train_date = ['2017-01-01T00:00:00.000000000', '2017-02-01T00:00:00.000000000',
'2017-03-01T00:00:00.000000000', '2017-04-01T00:00:00.000000000',
'2017-05-01T00:00:00.000000000', '2017-06-01T00:00:00.000000000',
'2017-07-01T00:00:00.000000000', '2017-08-01T00:00:00.000000000',
'2017-09-01T00:00:00.000000000', '2017-10-01T00:00:00.000000000',
'2017-11-01T00:00:00.000000000', '2017-12-01T00:00:00.000000000',
'2018-01-01T00:00:00.000000000', '2018-02-01T00:00:00.000000000',
'2018-03-01T00:00:00.000000000', '2018-04-01T00:00:00.000000000',
'2018-05-01T00:00:00.000000000', '2018-06-01T00:00:00.000000000',
'2018-07-01T00:00:00.000000000', '2018-08-01T00:00:00.000000000',
'2018-09-01T00:00:00.000000000', '2018-10-01T00:00:00.000000000',
'2018-11-01T00:00:00.000000000', '2018-12-01T00:00:00.000000000',
'2019-01-01T00:00:00.000000000', '2019-02-01T00:00:00.000000000',
'2019-03-01T00:00:00.000000000', '2019-04-01T00:00:00.000000000',
'2019-05-01T00:00:00.000000000', '2019-06-01T00:00:00.000000000',
'2019-07-01T00:00:00.000000000', '2019-08-01T00:00:00.000000000',
'2019-09-01T00:00:00.000000000', '2019-10-01T00:00:00.000000000',
'2019-11-01T00:00:00.000000000', '2019-12-01T00:00:00.000000000',
'2020-01-01T00:00:00.000000000', '2020-02-01T00:00:00.000000000',
'2020-03-01T00:00:00.000000000', '2020-04-01T00:00:00.000000000',
'2020-05-01T00:00:00.000000000', '2020-06-01T00:00:00.000000000',
'2020-07-01T00:00:00.000000000', '2020-08-01T00:00:00.000000000',
'2020-09-01T00:00:00.000000000', '2020-10-01T00:00:00.000000000',
'2020-11-01T00:00:00.000000000', '2020-12-01T00:00:00.000000000']
test_date = ['2021-01-01T00:00:00.000000000', '2021-02-01T00:00:00.000000000',
'2021-03-01T00:00:00.000000000', '2021-04-01T00:00:00.000000000',
'2021-05-01T00:00:00.000000000', '2021-06-01T00:00:00.000000000',
'2021-07-01T00:00:00.000000000', '2021-08-01T00:00:00.000000000',
'2021-09-01T00:00:00.000000000', '2021-10-01T00:00:00.000000000',
'2021-11-01T00:00:00.000000000', '2021-12-01T00:00:00.000000000']
train_eaches = [1915.0, 1597.0, 1533.0, 1601.0, 1585.0, 1675.0, 1760.0, 1910.0, 1886.0, 1496.0, 1545.0, 1538.0, 1565.0, 1350.0,1686.0, 1535.0, 1629.0, 1589.0, 1605.0, 1560.0, 1353.0,1366.0, 1246.0, 1423.0, 1579.0, 1368.0, 1727.0, 1687.0, 1872.0, 1824.0, 2161.0, 1065.0, 727.0, 1567.0, 1509.0, 1687.0, 1647.0,1476.0, 1231.0, 1165.0, 1341.0, 1425.0, 1502.0, 1450.0, 1497.0, 1259.0, 1207.0, 1132.0]
test_eaches = [1252.0, 1038.0, 1184.0, 1200.0, 1219.0, 1339.0, 1504.0, 2652.0, 1724.0, 1029.0,
711.0, 1530.0]
test_predictions = [1914.7225, 1490.4715, 1317.4765, 1341.263375, 1459.5875, 1534.2375, 1467.208875, 1306.2145, 1171.652625, 1120.641, 1138.912, 1171.914125]
test_credibility_down = [1805. , 1303. , 1017. , 915.975, 870.975, 797. ,
657. , 507. , 392. , 320. , 272. , 235. ]
test_credibility_up = [2029.025, 1702. , 1681.025, 1908. , 2329.05 , 2695.025,
2867.075, 2835. , 2815.075, 2949. , 3278.025, 3679. ]
train_df = pd.DataFrame.from_dict({'date':train_date, 'eaches':train_eaches})
test_df = pd.DataFrame.from_dict({'date':test_date, 'eaches':test_eaches, '2.5% Credibilty':test_credibility_down,
'97.5% Credibility':test_credibility_up})
Here are the two plots (train and test) and code that produces those plots:
fig = plt.figure(figsize=(15,4))
c=sns.scatterplot(x =train_df['date'], y = train_df['eaches'], label = 'Train Eaches',
color = 'black')
fig = plt.figure(figsize=(15,4))
a=sns.lineplot(x =test_df['date'], y = test_df['predictions'], label = 'Posterior Prediction', color = 'red')
b=sns.lineplot(x =test_df['date'], y = test_df['2.5% Credibilty'], label = 'Credibilty Interval',
color = 'skyblue', alpha=.3)
c=sns.lineplot(x =test_df['date'], y = test_df['97.5% Credibility'], label = 'Credibilty Interval',
color = 'skyblue', alpha=.3)
line = c.get_lines()
plt.fill_between(line[0].get_xdata(), line[1].get_ydata(), line[2].get_ydata(), color='skyblue', alpha=.3)
sns.scatterplot(x =test_df['date'], y = test_df['eaches'], label = 'True Value', color='black')
plt.legend()
I would like to basically add the two x axis as a continuation and maybe add a vertical line to the start of the test period.
Put them on the same axes and use axvline to mark the prediction start. Also, you can fix the overlapping dates on the x-axis by casting the date columns as proper datetimes (train_df["date"] = pd.to_datetime(train_df.date)).
import matplotlib.pyplot as plt
import seaborn as sns
fig, ax = plt.subplots(1, 1, figsize=(15,4))
c_train= sns.scatterplot(x =train_df['date'], y = train_df['eaches'], label = 'Train Eaches',
color = 'black', ax=ax)
a = sns.lineplot(x =test_df['date'], y = test_df['predictions'], label = 'Posterior Prediction', color = 'red', ax=ax)
b = sns.lineplot(x =test_df['date'], y = test_df['2.5% Credibilty'], label = 'Credibilty Interval',
color = 'skyblue', alpha=.3, ax=ax)
c = sns.lineplot(x =test_df['date'], y = test_df['97.5% Credibility'], label = 'Credibilty Interval',
color = 'skyblue', alpha=.3)
line = c.get_lines()
ax.fill_between(line[0].get_xdata(), line[1].get_ydata(), line[2].get_ydata(), color='skyblue', alpha=.3)
sns.scatterplot(x =test_df['date'], y = test_df['eaches'], label = 'True Value', color='black', ax=ax)
ax.legend()
ax.axvline(test_df['date'][0])
I have a table generated by matplotlib, and I want to insert a title for it. Do anyone know how to do it?
ax = fig.add_subplot(111)
fig = plt.figure(constrained_layout=True)
spec2 = gridspec.GridSpec(ncols=2, nrows=3, figure=fig )
ax1 = fig.add_subplot(spec2[0, 0])
ax2 = fig.add_subplot(spec2[0, 1])
ax1.axis('off')
ax2.axis('off')
table_data=[
["Ni_Tot ", "NN", "2.5%" , round(df.NN_Ni_Tot.quantile(0.025),3)],
["N. of Samples", count, "5%" , round(df.NN_Ni_Tot.quantile(0.05),3)],
["Minimum", round(min(df['NN_Ni_Tot'].apply(pd.to_numeric)),3), "25%" , round(df.NN_Ni_Tot.quantile(0.25),3)],
["Maximum", round(max(df['NN_Ni_Tot'].apply(pd.to_numeric)),3), "Median", round(df.NN_Ni_Tot.quantile(0.5),3)],
["Average", round(statistics.mean(df['NN_Ni_Tot'].apply(pd.to_numeric)),3),"75%", round(df.NN_Ni_Tot.quantile(0.75),3)],
["Variance", round(df['NN_Ni_Tot'].var(),2), "95%", round(df.NN_Ni_Tot.quantile(0.95),3)],
["Std Deviation", round(df['NN_Ni_Tot'].std(),2),"97.5%", round(df.NN_Ni_Tot.quantile(0.975),3)],]
table = ax1.table(cellText=table_data, loc='center', cellLoc='center')
table.set_fontsize(14)
table.scale(1.5,1.4)
https://i.stack.imgur.com/P27Vu.png
ax.set_title("Your title")
Relevant documentation
ax1.set_title("Your title")
The code line above worked, but the title was positioned in the middle of the table, overlapping the data. So I had to ajust it manually
ax1.set_title("NN_NI", fontsize=8, y=1.8, pad=-14)
I have a code which will go through three dictionaries, and make some plots if the keys all match. I've been running into an odd issue due to the use of the matplotlib table.
When I first got this code to run, I had no issues finishing the whole loop. Now I am encountering a time out error by the second iteration
I tried moving the the table out of the for loop.
I added plt.close('all')
I also try importing matplotlib again at the end of the loop in hopes of resetting something in the backend.
for k, v in oct_dict.items():
for k2, v2 in stu_dict.items():
for k3, v3 in oct2_dict.items():
if k == k2 and k == k3:
with PdfPages('{}.pdf'.format(k)) as pdf:
#rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
#v = v[v['a_1920'] != 0]
rc('text', usetex=True)
fig = plt.figure(figsize = (8,10.5))
gs=GridSpec(2,2) # 2 rows, 3 columns
ax0 = fig.add_subplot(gs[0,0])
ax0.bar(x=np.arange(2), height = [float(v['a_1920'])*100, mean_a_1920*100], color = nice)
plt.xticks(np.arange(2), ['{}'.format(k), 'D75'])
for p in ax0.patches:
a =p.get_height()
ax0.annotate('{:0.2f}'.format(float(a)), (p.get_x()+.1, p.get_height() * .75), weight = 'bold')
ax1 = fig.add_subplot(gs[0,1])
c = str(len(v2['student_id']))
c2 = int(v['c_1920'])
props = dict(boxstyle='round', facecolor='white', alpha=0.0)
c3 = int(v['b_1920'])
# place a text box in upper left in axes coords
c4 = int(v['d_1920'])
ax1.text(0.0, 0.95, 'Number of Age : {}'.format(c3), transform=ax1.transAxes, fontsize=12,
verticalalignment='top')
ax1.text(0.0, 0.85, 'Number of Incomplete : {}'.format(c2), transform=ax1.transAxes, fontsize=12,
verticalalignment='top')
ax1.text(0.0, 0.75, 'Number of Invalid : {}'.format(c4), transform = ax1.transAxes, fontsize = 12,
verticalalignment = 'top' )
ax1.text(0.0, 0.65, 'Number of who will reach Age:\n{}'.format(c), transform=ax1.transAxes, fontsize=12,
verticalalignment='top' )
#ax1.table(cellLoc = 'center', cellText = [] , loc = 'upper center')
ax1.axis('off')
ax1.axis('tight')
#fig.suptitle('Monthly Summary', va = 'top', ha= 'center')
fig.text(0.3, 1, 'Monthly Summary '+ dt.date.today().strftime("%b %d, %Y"), fontsize=12, verticalalignment='top', bbox=props)
#plt.subplots_adjust(top = .75)
#plt.tight_layout()
#gs.constrained_layout()
#print(float(v3['inc']))
#print(float(v3['com']))
ax2 = fig.add_subplot(gs[1,0])
plt.sca(ax2)
p1 = plt.bar(np.arange(1), int(v3['com']), width=.25,color = 'b',label = 'Complete')
p2 = plt.bar(np.arange(1), int(v3['inc']), width = .25, bottom = int(v3['com']), color = 'r', label = 'Incomplete')
plt.legend()
for p in ax2.patches:
ax2.annotate((p.get_height()), (p.get_x()+.1, p.get_height() * .75), weight = 'bold')
ax2.set_xticks([])
# # #ax2.set_xlabel='Students Who Will Turn 15'
ax2.set_title('Students who will turn 15 later in the school year')
ax2.set_xticks([])
ax3 = fig.add_subplot(gs[1,1])
a = int(v3['com'])+int(v3['inc'])
ax3.axis('off')
plt.tight_layout()
pdf.savefig()
plt.close('all')
fig = plt.figure(figsize = (8,11.5))
gs=GridSpec(1,1)
axs = fig.add_subplot(gs[0])
cell_text = []
v2 = v2.drop(['Grand Total','birth_dte','loc'],axis = 1)
binarymap = {0:'No',1:'Yes'}
v2['Plan Not Complete'] = v2['Plan Not Complete'].map(binarymap)
v2['Plan Already Complete'] = v2['Plan Already Complete'].map(binarymap)
labels = [six column titles here]
for row in range(len(v2)):
try:
cell_text.append(v2.iloc[row])
except:
pass
table = axs.table(cellLoc = 'center', cellText = cell_text, colLabels = labels,
rowLoc = 'center', colLoc = 'center',loc = 'upper center',fontsize = 32)
table.set_fontsize(32)
table.scale(1, 1.5)
#axs.text(0.0,0.5,'For the column')
axs.axis('off')
pdf.savefig()
#plt.savefig('{}_list.pdf'.format(k))
plt.show()
plt.close('all')
import matplotlib.pyplot as plt
TimeoutError: Lock error: Matplotlib failed to acquire the following lock file:
C:\Users\myusername.matplotlib\tex.cache\23c95fa5c37310802233a994d78d178d.tex.matplotlib-lock
NOTE: If some of the key names dont match in this code it is on purpose, I had to change them up for this post since it is public. The error is thrown by the second iteration once the code reaches the axs.table line.
I got everything to run properly after using the conda command prompt to clean the environments
conda clean --all
Something that works but I would have liked to avoid was just removing the use of tex for this script. rc param tex set to False, code finished running pretty quickly as well