Can't plot heatmap in Bokeh with datetime x axis

Can't plot heatmap in Bokeh with datetime x axis - python

I'm trying to plot the following simple heatmap:
data = {
'value': [1, 2, 3, 4, 5, 6],
'x': [datetime(2016, 10, 25, 0, 0),
datetime(2016, 10, 25, 8, 0),
datetime(2016, 10, 25, 16, 0),
datetime(2016, 10, 25, 0, 0),
datetime(2016, 10, 25, 8, 0),
datetime(2016, 10, 25, 16, 0)],
'y': ['param1', 'param1', 'param1', 'param2', 'param2', 'param2']
}
hm = HeatMap(data, x='x', y='y', values='value', stat=None)
output_file('heatmap.html')
show(hm)
Unfortunately it doesn't render properly:
I've tried setting x_range but nothing seems to work.
I've managed to get something working with the following code:
d1 = data['x'][0]
d2 = data['x'][-1]
p = figure(
x_axis_type="datetime", x_range=(d1, d2), y_range=data['y'],
tools='xpan, xwheel_zoom, reset, save, resize,'
)
p.rect(
source=ColumnDataSource(data), x='x', y='y', width=12000000, height=1,
)
However as soon as I try to use the zoom tool, I get the following errors in console:
Uncaught Error: Number property 'start' given invalid value:
Uncaught TypeError: Cannot read property 'indexOf' of null
I've using Bokeh 0.12.3.

The bokeh.charts, including HeatMap was deprecated and removed in 2017. You should use the stable and supported bokeh.plotting API. With your data above, a complete example:
from datetime import datetime
from bokeh.plotting import figure, show
from bokeh.transform import linear_cmap
data = {
'value': [1, 2, 3, 4, 5, 6],
'x': [datetime(2016, 10, 25, 0, 0),
datetime(2016, 10, 25, 8, 0),
datetime(2016, 10, 25, 16, 0),
datetime(2016, 10, 25, 0, 0),
datetime(2016, 10, 25, 8, 0),
datetime(2016, 10, 25, 16, 0)],
'y': ['param1', 'param1', 'param1', 'param2', 'param2', 'param2']
}
p = figure(x_axis_type='datetime', y_range=('param1', 'param2'))
EIGHT_HOURS = 8*60*60*1000
p.rect(x='x', y='y', width=EIGHT_HOURS, height=1, line_color="white",
fill_color=linear_cmap('value', 'Spectral6', 1, 6), source=data)
show(p)

Related

Range slider Plotly : How not to add a line

I have the code below, that produce a subplot with a unique range slider. Except i want the range slider to be on row 2, not to create a replicated row2, is there a way to do it?
import pandas as pd
import plotly.express as px
import plotly.subplots as sp
from datetime import datetime
data1 = [
[1, datetime(2022, 11, 26)],
[7, datetime(2022, 11, 29)],
[4, datetime(2022, 11, 30)],
]
df1 = pd.DataFrame(data1, columns=["value", "date"])
data2 = [
["unique_row","a", datetime(2022, 11, 26),datetime(2022, 11, 27)],
["unique_row","b", datetime(2022, 11, 27),datetime(2022, 11, 30)],
["unique_row","c", datetime(2022, 11, 30),datetime(2022, 12, 2)],
]
df2 = pd.DataFrame(data2, columns=["unique_row","value", "dates_begin","date_end"])
fig1= px.line(df1, x="date", y="value")
fig2 = px.timeline(df2, x_start="dates_begin", x_end="date_end", y="unique_row", color="value")
fig_sub = sp.make_subplots(rows=2,
shared_xaxes=True)
fig_sub.append_trace(fig1['data'][0], row=1, col=1)
fig_sub.append_trace(fig2['data'][0], row=2, col=1)
fig_sub.update_layout(xaxis2_rangeslider_visible=True)

dateutil rruleset: How to combine EXRULE and RDATE correctly?

I have a rruleset with a daily recurrence rule and now I am trying to combine an RDATE with an EXRULE.
from dateutil.rrule import rruleset, rrule, DAILY, FR
rules = rruleset()
daily = rrule(freq=DAILY, dtstart=datetime(2022, 10, 12))
rules.rrule(daily)
not_on_friday = rrule(freq=DAILY, byweekday=FR, dtstart=datetime(2022, 10, 12))
but_on_friday_21th = datetime(2022, 10, 21)
rules.exrule(not_on_friday)
rules.rdate(but_on_friday_21th)
rules.between(datetime(2022,10,12), datetime(2022,10,24))
>>
[datetime.datetime(2022, 10, 13, 0, 0),
datetime.datetime(2022, 10, 15, 0, 0), # the 14th is excluded as expected
datetime.datetime(2022, 10, 16, 0, 0),
datetime.datetime(2022, 10, 17, 0, 0),
datetime.datetime(2022, 10, 18, 0, 0),
datetime.datetime(2022, 10, 19, 0, 0),
datetime.datetime(2022, 10, 20, 0, 0),
datetime.datetime(2022, 10, 22, 0, 0), # but the 21th is also excluded
datetime.datetime(2022, 10, 23, 0, 0)]
Now, confusingly, when I combine my EXRULE with an EXDATE it works:
rules = rruleset()
daily = rrule(freq=DAILY, dtstart=datetime(2022, 10, 12))
rules.rrule(daily)
not_on_friday = rrule(freq=DAILY, byweekday=FR, dtstart=datetime(2022, 10, 12))
but_also_not_on_the_22th_a_saturday = datetime(2022, 10, 22)
rules.exrule(not_on_friday)
rules.exdate(but_also_not_on_the_22th_a_saturday)
rules.between(datetime(2022,10,12), datetime(2022,10,24))
>>
[datetime.datetime(2022, 10, 13, 0, 0),
datetime.datetime(2022, 10, 15, 0, 0), # the 14th still excluded
datetime.datetime(2022, 10, 16, 0, 0),
datetime.datetime(2022, 10, 17, 0, 0),
datetime.datetime(2022, 10, 18, 0, 0),
datetime.datetime(2022, 10, 19, 0, 0),
datetime.datetime(2022, 10, 20, 0, 0), # the 22th also excluded as expected
datetime.datetime(2022, 10, 23, 0, 0)]
So, if possible at all, how to combine RDATE and EXRULE in my rruleset?

In your answer you note that exrule is applied last, after all other inclusive rules which actually does appear to be in the RFC. However, at least in dateutil, you can use an rruleset as the argument to exrule, so to accomplish what you want, you can try filtering out the date that you want included from the rule that gets passed to exrule, like so:
from datetime import datetime
from dateutil.rrule import rruleset, rrule, DAILY, WEEKLY, FR
# Create an rruleset that defaults to every day
rules = rruleset()
daily = rrule(freq=DAILY, dtstart=datetime(2022, 10, 12))
rules.rrule(daily)
# Create an rruleset corresponding to the days we want to *exclude*: every
# Friday, except 2022-10-21
ex_set = rruleset()
ex_set.rrule(rrule(freq=WEEKLY, byweekday=FR, dtstart=datetime(2022, 10, 14)))
ex_set.exdate(datetime(2022, 10, 21))
# Use our second rule set as an exrule
rules.exrule(ex_set)
rules.between(datetime(2022,10,12), datetime(2022,10,24))
Since the date you want to include never appears in the exrule, it is not filtered out:
>>> print("\n".join(map(str,
... map(datetime.date,
... rules.between(datetime(2022, 10, 12),
... datetime(2022, 10, 24))))))
2022-10-13
2022-10-15
2022-10-16
2022-10-17
2022-10-18
2022-10-19
2022-10-20
2022-10-21
2022-10-22
2022-10-23

So apparently there is no such thing as an EXRULE in the iCalendar specs. Its just RRULEs. And dateutils exdate function states in the doc string:
def exrule(self, exrule):
""" Include the given rrule instance in the recurrence set exclusion
list. Dates which are part of the given recurrence rules will not
be generated, even if some inclusive rrule or rdate matches them.
"""
So, even if I add an RDATE, if it is exclude by a rule added by exrule it will not show up in my occurrences. Same goes for the exdate function, hence my working second example.

What is plotted when string data is passed to the matplotlib API?

# first, some imports:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Let's say I want to make a scatter plot, using this data:
np.random.seed(42)
x=np.arange(0,50)
y=np.random.normal(loc=3000,scale=1,size=50)
Plot via:
plt.scatter(x,y)
I get this answer:
Ok, let's create a dataframe first:
df=pd.DataFrame.from_dict({'x':x,'y':y.astype(str)})
(I am aware that I am storing y as str - this is a reproducible example, and I do this to reflect the real use case.)
Then, if I do:
plt.scatter(df.x,df.y)
I get:
What am I seeing in this second plot? I thought that the second plot must be showing the x column plotted against the y column, which are converted to float. This is clearly not the case.

Matplotlib doesn't automatically convert str values to numerical, so your y values are treated as categorical. As far as Matplotlib is concerned, the differences '1.0' to '0.9' and '1.0' to '100.0' are not different.
So, the y-axis on the plot will be the same as range(len(y)) (since the difference between all categorical values is the same) with labels assigned from the categorical values.
Since your x is a range equal to range(50), and now your y is a range too (also equal to range(50)), it plots x = y, with y-labels set to respective str value.

As per the excellent answer by dm2, when you pass y as a string, y is simply being treated as arbitrary string labels, and being plotted one after the other in the order in which they appear. To demonstrate, here's an even simpler example.
from matplotlib import pyplot as plt
x = [1, 2, 3, 4]
y = [5, 25, 10, 1] # these are ints
plt.scatter(x, y)
So far so good. Now, different string y values.
y = list("abcd")
plt.scatter(x, y)
You can see how it just takes the y labels and just drops them on the axis one after another.
Finally,
y = ["5", "25", "10", "1"]
plt.scatter(x, y)
Compare this with the previous results and now it should become obvious what's going on.

It's more obvious if the labels and locations are extracted, that the API plots the strings as labels, and the axis locations are 0 indexed numbers based on the how many (len) categories exist.
.get_xticks() and .get_yticks() extract a list of the numeric locations.
.get_xticklabels() and .get_yticklabels() extract a list of matplotlib.text.Text, Text(x, y, text).
There are fewer numbers in the list for the y axis because there were duplicate values as a result of rounding.
This applies to any APIs, like seaborn or pandas that use matplotlib as the backend.
sns.scatterplot(data=df, x='x_num', y='y', ax=ax1)
ax1.scatter(data=df, x='x_num', y='y')
ax1.plot('x_num', 'y', 'o', data=df)
Labels, Locs, and Text
print(x_nums_loc)
print(y_nums_loc)
print(x_lets_loc)
print(y_lets_loc)
print(x_lets_labels)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
[Text(0, 0, 'A'), Text(1, 0, 'B'), Text(2, 0, 'C'), Text(3, 0, 'D'), Text(4, 0, 'E'),
Text(5, 0, 'F'), Text(6, 0, 'G'), Text(7, 0, 'H'), Text(8, 0, 'I'), Text(9, 0, 'J'),
Text(10, 0, 'K'), Text(11, 0, 'L'), Text(12, 0, 'M'), Text(13, 0, 'N'), Text(14, 0, 'O'),
Text(15, 0, 'P'), Text(16, 0, 'Q'), Text(17, 0, 'R'), Text(18, 0, 'S'), Text(19, 0, 'T'),
Text(20, 0, 'U'), Text(21, 0, 'V'), Text(22, 0, 'W'), Text(23, 0, 'X'), Text(24, 0, 'Y'),
Text(25, 0, 'Z')]
Imports, Data, and Plotting
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
import string
# sample data
np.random.seed(45)
x_numbers = np.arange(100, 126)
x_letters = list(string.ascii_uppercase)
y= np.random.normal(loc=3000, scale=1, size=26).round(2)
df = pd.DataFrame.from_dict({'x_num': x_numbers, 'x_let': x_letters, 'y': y}).astype(str)
# plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 3.5))
df.plot(kind='scatter', x='x_num', y='y', ax=ax1, title='X Numbers', rot=90)
df.plot(kind='scatter', x='x_let', y='y', ax=ax2, title='X Letters')
x_nums_loc = ax1.get_xticks()
y_nums_loc = ax1.get_yticks()
x_lets_loc = ax2.get_xticks()
y_lets_loc = ax2.get_yticks()
x_lets_labels = ax2.get_xticklabels()
fig.tight_layout()
plt.show()

DateLocator in matplotlib to show the first days of both the week and the month

I would like to create a DateLocator in matplotlib that selects all Mondays and the first days of the month. As matplotlib uses the dateutil library I read the docs of how to use RRuleLocator with rrule objects. With the rruleset object from dateutil I can achieve the required functionality:
>>> rrset = rruleset()
>>> rrset.rrule(rrule(DAILY, byweekday=MO, count=5))
>>> rrset.rrule(rrule(DAILY, bymonthday=1, count=5))
>>> list(rrset)
[datetime.datetime(2020, 11, 30, 16, 10, 2),
datetime.datetime(2020, 12, 1, 16, 10, 2),
datetime.datetime(2020, 12, 7, 16, 10, 2),
datetime.datetime(2020, 12, 14, 16, 10, 2),
datetime.datetime(2020, 12, 21, 16, 10, 2),
datetime.datetime(2020, 12, 28, 16, 10, 2),
datetime.datetime(2021, 1, 1, 16, 10, 2),
datetime.datetime(2021, 2, 1, 16, 10, 2),
datetime.datetime(2021, 3, 1, 16, 10, 2),
datetime.datetime(2021, 4, 1, 16, 10, 2)]
But unfortunately I did not manage to find out how to use rruleset with matplotlib. RRuleLocator expects a rrulewrapper object (defined in matplotlib) that hides away the rrule instance and I can not use it with rruleset. Any other way to do this?

If I understood you correctly, calling .set_xticks(list(rrset)) might be enough. For example:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import dateutil
from dateutil.rrule import *
import datetime
import numpy as np
rrset = rruleset()
rrset.rrule(rrule(DAILY, byweekday=MO, count=5))
rrset.rrule(rrule(DAILY, bymonthday=1, count=5))
print(list(rrset))
## generate dates 90 days into the future
base = datetime.datetime.today()
dates = [base + datetime.timedelta(days=3*x) for x in range(30)]
fig = plt.figure(figsize=(10,5))
ax = plt.subplot(111)
ax.set_autoscale_on(True)
## simply plot dates over dates
ax.plot(dates,dates,marker='s')
ax.set_xticks(list(rrset))
formatter = mdates.DateFormatter('%m/%d/%y')
ax.xaxis.set_major_formatter(formatter)
ax.xaxis.set_tick_params(rotation=30, labelsize=10)
ax.autoscale_view()
ax.grid()
plt.show()
yields (today on 11/26/20 where 11/30/2020 is the next Monday, hence the tick label overlapping with the first of the month):

Timeline bar using matplotlib & PolyCollection - Python

I have been trying to replicate #theimportanceofbeingernest 's answer to Timeline bar graph using python and matplotlib
and can't seem to get the correct output graph.
Here is my current output
Here is my desired output (but with using my data etc.)
I'm struggling to identify the issue.
Any help will be greatly appreciated!
Thank you.
Here's the code:
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.collections import PolyCollection
data = [(dt.datetime(1900, 1, 1, 14, 19, 26), dt.datetime(1900, 1, 1, 14, 19, 29), 'index'),
(dt.datetime(1900, 1, 1, 14, 19, 29), dt.datetime(1900, 1, 1, 14, 19, 31), 'links'),
(dt.datetime(1900, 1, 1, 14, 19, 31), dt.datetime(1900, 1, 1, 14, 19, 33), 'guides'),
(dt.datetime(1900, 1, 1, 14, 19, 33), dt.datetime(1900, 1, 1, 14, 19, 35), 'prices'),
(dt.datetime(1900, 1, 1, 14, 19, 35), dt.datetime(1900, 1, 1, 16, 39, 47), 'index'),
(dt.datetime(1900, 1, 1, 16, 39, 47), dt.datetime(1900, 1, 1, 16, 39, 48), 'prices')]
cats = {'index': 1, 'links': 2, 'guides': 3, 'prices': 4}
colormapping = {'index': 'C0', 'links': 'C1', 'guides': 'C2', 'prices': 'C3'}
verts = []
colors = []
for d in data:
v = [(mdates.date2num(d[0]), cats[d[2]]-.4),
(mdates.date2num(d[0]), cats[d[2]]+.4),
(mdates.date2num(d[1]), cats[d[2]]+.4),
(mdates.date2num(d[1]), cats[d[2]]-.4),
(mdates.date2num(d[0]), cats[d[2]]-.4)]
verts.append(v)
colors.append(colormapping[d[2]])
bars = PolyCollection(verts, facecolors=colors)
fig, ax = plt.subplots()
ax.add_collection(bars)
ax.autoscale()
loc = mdates.MinuteLocator(byminute=[0,30])
ax.xaxis.set_major_locator(loc)
ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(loc))
ax.set_yticks([1,2,3,4])
ax.set_yticklabels(['index', 'links', 'guides', 'prices'])
plt.show()

Your time differences are extremely short. They are a few seconds, while yourthe x-range is a few hours. So, these bars basically get invisible.
Note that in matplotlib areas are usually drawn without antialiasing, which is useful when putting together multiple semitransparent areas. Lines, however, are drawn with some thickness (in screenspace) and antialiased. Therefore, setting an explicit edgecolor helps to visualize these "bars".
bars = PolyCollection(verts, facecolors=colors, edgecolors=colors)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Can't plot heatmap in Bokeh with datetime x axis - python

Related

Range slider Plotly : How not to add a line

dateutil rruleset: How to combine EXRULE and RDATE correctly?

What is plotted when string data is passed to the matplotlib API?

DateLocator in matplotlib to show the first days of both the week and the month

Timeline bar using matplotlib & PolyCollection - Python

Categories

Resources