I'm plotting scatter3d projections of the 4d iris data set using plotly. To display all 4 possible projections in the same figure I am using sliders. However when "sliding" from one projection to the next the axis titles do not change. Normally I would use fig.update_layout() but that isn't working. How can I get these to change with the slider?
Projection 1
Projection 2
Here's the code for reference:
import numpy as np
import plotly.graph_objects as go
from matplotlib import cm
from itertools import combinations
def nd2scatter3d(X, labels = None, features = None, plot_axes = None, hovertext = None):
"""
Parameters
----------
X : array-like, shape = (n_samples, n_features).
labels : 1d int array, shape = (n_samples), optional, default None.
Target or clustering labels for each sample.
Defaults to np.ones(n_samples).
features : list, len = n_features, optional, default None.
List of feature names.
Defaults to numeric labeling.
plot_axes : list of 3-tuples, optional, default None.
List of axes to include in 3d projections. i.e. [(0,1,2), (0,1,3)] displays
projections along the 4th axis and 3rd axis in that order.
Defaults to all possible axes combinations.
hovertext : list, len = n_samples, optional, default None.
List of text to display on mouse hover.
Defaults to no text on hover.
"""
if labels is None:
labels = np.ones(X.shape[0]).astype(int)
if features is None:
features = np.arange(X.shape[1]).astype(str)
if plot_axes is None:
plot_axes = list(combinations(np.arange(X.shape[1]), 3))
if hovertext is None:
hoverinfo = 'none'
else:
hoverinfo = 'text'
fig = go.Figure()
for i in range(len(plot_axes)):
fig.add_trace(
go.Scatter3d(
visible=False,
x=X[:, plot_axes[i][0]],
y=X[:, plot_axes[i][1]],
z=X[:, plot_axes[i][2]],
mode='markers',
marker=dict(
size=3,
color = [list(cm.tab10.colors[c]) for c in labels],
opacity=1
),
hovertemplate=None,
hoverinfo= hoverinfo,
hovertext = hovertext,
),)
fig.data[0].visible = True
steps = []
for i in range(len(fig.data)):
step = dict(
method="update",
args=[{"visible": [False] * len(fig.data)},
{"title": features[plot_axes[i][0]] + ' vs. ' + features[plot_axes[i][1]] + ' vs. ' + features[plot_axes[i][2]]}, # layout attribute
],
label = str(plot_axes[i]),
)
step["args"][0]["visible"][i] = True # Toggle i'th trace to "visible"
steps.append(step)
sliders = [dict(
active=10,
currentvalue={"prefix": "Projection: "},
pad={"t": 10},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(width=900, height = 500, margin=dict(r=45, l=45, b=10, t=50),
showlegend=False)
fig.update_layout(scene_aspectmode='cube',
scene2_aspectmode='cube',
scene3_aspectmode='cube',
scene4_aspectmode='cube',
scene = dict(
xaxis_title = features[plot_axes[0][0]],
yaxis_title = features[plot_axes[0][1]],
zaxis_title = features[plot_axes[0][2]],),
scene2 = dict(
xaxis_title = features[plot_axes[1][0]],
yaxis_title = features[plot_axes[1][1]],
zaxis_title = features[plot_axes[1][2]],),
scene3 = dict(
xaxis_title = features[plot_axes[2][0]],
yaxis_title = features[plot_axes[2][1]],
zaxis_title = features[plot_axes[2][2]],),
scene4 = dict(
xaxis_title = features[plot_axes[3][0]],
yaxis_title = features[plot_axes[3][1]],
zaxis_title = features[plot_axes[3][2]],)
)
fig.show()
Solution thanks to jayveesea, as well as some minor changes:
def nd2scatter3d(X, labels = None, features = None, plot_axes = None, hovertext = None, size = 3):
"""
Parameters
----------
X : array-like, shape = (n_samples, n_features).
labels : 1d int array, shape = (n_samples), optional, default None.
Target or clustering labels for each sample.
Defaults to np.ones(n_samples).
features : list, len = n_features, optional, default None.
List of feature names.
Defaults to numeric labeling.
plot_axes : list of 3-tuples, optional, default None.
List of axes to include in 3d projections. i.e. [(0,1,2), (0,1,3)] displays
projections along the 4th axis and 3rd axis in that order.
Defaults to all possible axes combinations.
hovertext : list, len = n_samples, optional, default None.
List of text to display on mouse hover.
Defaults to no text on hover.
size : int, default 3.
Sets marker size.
"""
if labels is None:
# Label all datapoints zero.
labels = np.zeros(X.shape[0]).astype(int)
if features is None:
# numerical features if no names are passed.
features = np.arange(X.shape[1]).astype(str)
if plot_axes is None:
# plot all possible axes if none are passed.
plot_axes = list(combinations(np.arange(X.shape[1]), 3))
if hovertext is None:
hoverinfo = 'none'
else:
hoverinfo = 'text'
# Determine colormap from number of labels.
if len(np.unique(labels)) <= 10:
color = [list(cm.tab10.colors[c]) if c >= 0 else [0,0,0,1] for c in labels]
elif len(np.unique(labels)) <= 20:
color = [list(cm.tab20.colors[c]) if c >= 0 else [0,0,0,1] for c in labels]
else:
norm_labels = labels/max(labels)
color = [cm.viridis(c) if c >= 0 else [0,0,0,1] for c in norm_labels]
# Genterate 3d scatter plot slider.
fig = go.Figure()
for i in range(len(plot_axes)):
fig.add_trace(
# Scatter plot params.
go.Scatter3d(
visible=False,
x=X[:, plot_axes[i][0]],
y=X[:, plot_axes[i][1]],
z=X[:, plot_axes[i][2]],
mode='markers',
marker=dict(
size=size,
color = color,
opacity=1
),
hovertemplate=None,
hoverinfo= hoverinfo,
hovertext = hovertext,
),)
fig.data[0].visible = True
steps = []
# Slider update params.
for i in range(len(fig.data)):
step = dict(
method="update",
args=[{"visible": [False] * len(fig.data)},
{"title": features[plot_axes[i][0]] + ' vs. '
+ features[plot_axes[i][1]] + ' vs. ' + features[plot_axes[i][2]],
"scene.xaxis.title": features[plot_axes[i][0]],
"scene.yaxis.title": features[plot_axes[i][1]],
"scene.zaxis.title": features[plot_axes[i][2]],
},
],
label = str(plot_axes[i]),
)
step["args"][0]["visible"][i] = True # Toggle i'th trace to "visible".
steps.append(step)
sliders = [dict(
active=10,
currentvalue={"prefix": "Projection: (x, y, z) = "},
pad={"t": 10},
steps=steps,
)]
fig.update_layout(sliders=sliders)
fig.update_layout(width=900, height = 500, margin=dict(r=45, l=45, b=10, t=50))
fig.update_layout(scene_aspectmode='cube')
fig.show()
To update the axis titles you need to include the axis names with your slider entry. It may help to reference plotly's js document on update.
So instead of this chunk:
for i in range(len(fig.data)):
step = dict(
method="update",
args=[{"visible": [False] * len(fig.data)},
{"title": features[plot_axes[i][0]] + ' vs. '
+ features[plot_axes[i][1]] + ' vs. ' + features[plot_axes[i][2]]},
],
label = str(plot_axes[i]),
)
Use something like:
for i in range(len(fig.data)):
step = dict(
method="update",
args=[{"visible": [False] * len(fig.data)},
{"title": features[plot_axes[i][0]] + ' vs. '
+ features[plot_axes[i][1]] + ' vs. ' + features[plot_axes[i][2]],
"scene.xaxis.title": features[plot_axes[i][0]],
"scene.yaxis.title": features[plot_axes[i][1]],
"scene.zaxis.title": features[plot_axes[i][2]],
},
],
label = str(plot_axes[i]),
)
This creates an entry that will update the data and title and the axes titles when the slider changes.
Related
I am currently trying to rebuild the plot from the first figure (from Pang et al 2021) in plotly. However I do not find a setting in which I can prevent the x-y grid to also climb up to z axis (second figure). My code is the following.
f.update_layout(
scene = dict(
xaxis = dict(
gridcolor = "black",
showbackground = False
),
yaxis = dict(
showbackground = False,
gridcolor = "black"
),
zaxis = dict(
showbackground = False
)
))
I have the following piece of code
import plotly.express as px
import pandas as pd
import numpy as np
x = [1,2,3,4,5,6]
df = pd.DataFrame(
{
'x': x*3,
'y': list(np.array(x)) + list(np.array(x)**2) + list(np.array(x)**.5),
'color': list(np.array(x)*0) + list(np.array(x)*0+1) + list(np.array(x)*0+2),
}
)
for plotting_function in [px.scatter, px.line]:
fig = plotting_function(
df,
x = 'x',
y = 'y',
color = 'color',
title = f'Using {plotting_function.__name__}',
)
fig.show()
which produces the following two plots:
For some reason px.line is not producing the continuous color scale that I want, and in the documentation for px.scatter I cannot find how to join the points with lines. How can I produce a plot with a continuous color scale and lines joining the points for each trace?
This is the plot I want to produce:
I am not sure this is possible using only plotly.express. If you use px.line, then you can pass the argument markers=True as described in this answer, but from the px.line documentation it doesn't look like continuous color scales are supported.
UPDATED ANSWER: in order to have both a legend that groups both the lines and markers together, it's probably simpest to use go.Scatter with the argument mode='lines+markers'. You'll need to add the traces one at a time (by plotting each unique color portion of the data one at a time) in order to be able to control each line+marker group from the legend.
When plotting these traces, you will need some functions to retrieve the colors of the lines from the continuous color scale because go.Scatter won't know what color your lines are supposed to be unless you specify them - thankfully that has been answered here.
Also you won't be able to generate a colorbar adding the markers one color at a time, so to add a colorbar, you can plot all of the markers at once using go.Scatter, but use the argument marker=dict(size=0, color="rgba(0,0,0,0)", colorscale='Plasma', colorbar=dict(thickness=20)) to display a colorbar, but ensure that these duplicate markers are not visible.
Putting all of this together:
# import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
x = [1,2,3,4,5,6]
df = pd.DataFrame(
{
'x': x*3,
'y': list(np.array(x)) + list(np.array(x)**2) + list(np.array(x)**.5),
'color': list(np.array(x)*0) + list(np.array(x)*0+1) + list(np.array(x)*0+2),
}
)
# This function allows you to retrieve colors from a continuous color scale
# by providing the name of the color scale, and the normalized location between 0 and 1
# Reference: https://stackoverflow.com/questions/62710057/access-color-from-plotly-color-scale
def get_color(colorscale_name, loc):
from _plotly_utils.basevalidators import ColorscaleValidator
# first parameter: Name of the property being validated
# second parameter: a string, doesn't really matter in our use case
cv = ColorscaleValidator("colorscale", "")
# colorscale will be a list of lists: [[loc1, "rgb1"], [loc2, "rgb2"], ...]
colorscale = cv.validate_coerce(colorscale_name)
if hasattr(loc, "__iter__"):
return [get_continuous_color(colorscale, x) for x in loc]
return get_continuous_color(colorscale, loc)
# Identical to Adam's answer
import plotly.colors
from PIL import ImageColor
def get_continuous_color(colorscale, intermed):
"""
Plotly continuous colorscales assign colors to the range [0, 1]. This function computes the intermediate
color for any value in that range.
Plotly doesn't make the colorscales directly accessible in a common format.
Some are ready to use:
colorscale = plotly.colors.PLOTLY_SCALES["Greens"]
Others are just swatches that need to be constructed into a colorscale:
viridis_colors, scale = plotly.colors.convert_colors_to_same_type(plotly.colors.sequential.Viridis)
colorscale = plotly.colors.make_colorscale(viridis_colors, scale=scale)
:param colorscale: A plotly continuous colorscale defined with RGB string colors.
:param intermed: value in the range [0, 1]
:return: color in rgb string format
:rtype: str
"""
if len(colorscale) < 1:
raise ValueError("colorscale must have at least one color")
hex_to_rgb = lambda c: "rgb" + str(ImageColor.getcolor(c, "RGB"))
if intermed <= 0 or len(colorscale) == 1:
c = colorscale[0][1]
return c if c[0] != "#" else hex_to_rgb(c)
if intermed >= 1:
c = colorscale[-1][1]
return c if c[0] != "#" else hex_to_rgb(c)
for cutoff, color in colorscale:
if intermed > cutoff:
low_cutoff, low_color = cutoff, color
else:
high_cutoff, high_color = cutoff, color
break
if (low_color[0] == "#") or (high_color[0] == "#"):
# some color scale names (such as cividis) returns:
# [[loc1, "hex1"], [loc2, "hex2"], ...]
low_color = hex_to_rgb(low_color)
high_color = hex_to_rgb(high_color)
return plotly.colors.find_intermediate_color(
lowcolor=low_color,
highcolor=high_color,
intermed=((intermed - low_cutoff) / (high_cutoff - low_cutoff)),
colortype="rgb",
)
fig = go.Figure()
## add the lines+markers
for color_val in df.color.unique():
color_val_normalized = (color_val - min(df.color)) / (max(df.color) - min(df.color))
# print(f"color_val={color_val}, color_val_normalized={color_val_normalized}")
df_subset = df[df['color'] == color_val]
fig.add_trace(go.Scatter(
x=df_subset['x'],
y=df_subset['y'],
mode='lines+markers',
marker=dict(color=get_color('Plasma', color_val_normalized)),
name=f"line+marker {color_val}",
legendgroup=f"line+marker {color_val}"
))
## add invisible markers to display the colorbar without displaying the markers
fig.add_trace(go.Scatter(
x=df['x'],
y=df['y'],
mode='markers',
marker=dict(
size=0,
color="rgba(0,0,0,0)",
colorscale='Plasma',
cmin=min(df.color),
cmax=max(df.color),
colorbar=dict(thickness=40)
),
showlegend=False
))
fig.update_layout(
legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01),
yaxis_range=[min(df.y)-2,max(df.y)+2]
)
fig.show()
You can achieve this using only 2 more parameters in px.line:
markers=True
color_discrete_sequence=my_plotly_continuous_sequence
The complete code would look something like this (Note the list slicing [::4] so that the colors are well spaced):
import plotly.express as px
import pandas as pd
import numpy as np
x = [1, 2, 3, 4, 5, 6]
df = pd.DataFrame(
{
'x': x * 3,
'y': list(np.array(x)) + list(np.array(x) ** 2) + list(np.array(x) ** .5),
'color': list(np.array(x) * 0) + list(np.array(x) * 0 + 1) + list(np.array(x) * 0 + 2),
}
)
fig = px.line(
df,
x='x',
y='y',
color='color',
color_discrete_sequence=px.colors.sequential.Plasma[::4],
markers=True,
template='plotly'
)
fig.show()
This produces the following output.
In case you have more lines than the colors present in the colormap, you can construct a custom colorscale so that you get one complete sequence instead of a cycling sequence:
rgb = px.colors.convert_colors_to_same_type(px.colors.sequential.RdBu)[0]
colorscale = []
n_steps = 4 # Control the number of colors in the final colorscale
for i in range(len(rgb) - 1):
for step in np.linspace(0, 1, n_steps):
colorscale.append(px.colors.find_intermediate_color(rgb[i], rgb[i + 1], step, colortype='rgb'))
fig = px.line(df_e, x='temperature', y='probability', color='year', color_discrete_sequence=colorscale, height=900)
fig.show()
I am playing with the third example of "Scatter plots with a legend" in the matplotlib manual.
I have tweaked the marker sizes to:
s = (50 / price) ** 2
And as an input to legend_elements I am using:
func=lambda s: 50 / np.sqrt(s)
I get the output below. The marker sizes of the legend are wrong. Why is that?
Here is the code:
import numpy as np
import matplotlib.pyplot as plt
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
s = (50 / price) ** 2
scatter = ax.scatter(volume, amount, c=ranking, s=s,
vmin=-3, vmax=3, cmap="Spectral", label=price)
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
func=lambda s: 50 / np.sqrt(s),
)
legend2 = ax.legend(*scatter.legend_elements(**kw),
loc="lower right", title="Price")
for p, v, a in zip(price, volume, amount):
ax.annotate(round(p, 0), (v, a))
plt.show()
The issue appears to be related to the inverse relationship between price and marker size. The way the data is calculated in legend_elements doesn't account for this, and the calculation doesn't quite work. I've submitted a pull request.
The problem is in np.interp that expects increasing input for the second argument. Here is a work around for now that sorts the input first:
legend2 = ax.legend(*legend_elements(scatter, **kw),
loc="lower right", title="Price")
Run this after defining legend_elements as:
def legend_elements(self, prop="colors", num="auto",
fmt=None, func=lambda x: x, **kwargs):
"""
Creates legend handles and labels for a PathCollection. This is useful
for obtaining a legend for a :meth:`~.Axes.scatter` plot. E.g.::
scatter = plt.scatter([1, 2, 3], [4, 5, 6], c=[7, 2, 3])
plt.legend(*scatter.legend_elements())
Also see the :ref:`automatedlegendcreation` example.
Parameters
----------
prop : string, optional, default *"colors"*
Can be *"colors"* or *"sizes"*. In case of *"colors"*, the legend
handles will show the different colors of the collection. In case
of "sizes", the legend will show the different sizes.
num : int, None, "auto" (default), array-like, or `~.ticker.Locator`,
optional
Target number of elements to create.
If None, use all unique elements of the mappable array. If an
integer, target to use *num* elements in the normed range.
If *"auto"*, try to determine which option better suits the nature
of the data.
The number of created elements may slightly deviate from *num* due
to a `~.ticker.Locator` being used to find useful locations.
If a list or array, use exactly those elements for the legend.
Finally, a `~.ticker.Locator` can be provided.
fmt : str, `~matplotlib.ticker.Formatter`, or None (default)
The format or formatter to use for the labels. If a string must be
a valid input for a `~.StrMethodFormatter`. If None (the default),
use a `~.ScalarFormatter`.
func : function, default *lambda x: x*
Function to calculate the labels. Often the size (or color)
argument to :meth:`~.Axes.scatter` will have been pre-processed
by the user using a function *s = f(x)* to make the markers
visible; e.g. *size = np.log10(x)*. Providing the inverse of this
function here allows that pre-processing to be inverted, so that
the legend labels have the correct values;
e.g. *func = np.exp(x, 10)*.
kwargs : further parameters
Allowed keyword arguments are *color* and *size*. E.g. it may be
useful to set the color of the markers if *prop="sizes"* is used;
similarly to set the size of the markers if *prop="colors"* is
used. Any further parameters are passed onto the `.Line2D`
instance. This may be useful to e.g. specify a different
*markeredgecolor* or *alpha* for the legend handles.
Returns
-------
tuple (handles, labels)
with *handles* being a list of `.Line2D` objects
and *labels* a matching list of strings.
"""
handles = []
labels = []
hasarray = self.get_array() is not None
if fmt is None:
fmt = mpl.ticker.ScalarFormatter(useOffset=False, useMathText=True)
elif isinstance(fmt, str):
fmt = mpl.ticker.StrMethodFormatter(fmt)
fmt.create_dummy_axis()
if prop == "colors":
if not hasarray:
warnings.warn("Collection without array used. Make sure to "
"specify the values to be colormapped via the "
"`c` argument.")
return handles, labels
u = np.unique(self.get_array())
size = kwargs.pop("size", mpl.rcParams["lines.markersize"])
elif prop == "sizes":
u = np.unique(self.get_sizes())
color = kwargs.pop("color", "k")
else:
raise ValueError("Valid values for `prop` are 'colors' or "
f"'sizes'. You supplied '{prop}' instead.")
fmt.set_bounds(func(u).min(), func(u).max())
if num == "auto":
num = 9
if len(u) <= num:
num = None
if num is None:
values = u
label_values = func(values)
else:
if prop == "colors":
arr = self.get_array()
elif prop == "sizes":
arr = self.get_sizes()
if isinstance(num, mpl.ticker.Locator):
loc = num
elif np.iterable(num):
loc = mpl.ticker.FixedLocator(num)
else:
num = int(num)
loc = mpl.ticker.MaxNLocator(nbins=num, min_n_ticks=num-1,
steps=[1, 2, 2.5, 3, 5, 6, 8, 10])
label_values = loc.tick_values(func(arr).min(), func(arr).max())
cond = ((label_values >= func(arr).min()) &
(label_values <= func(arr).max()))
label_values = label_values[cond]
yarr = np.linspace(arr.min(), arr.max(), 256)
xarr = func(yarr)
ix = np.argsort(xarr)
values = np.interp(label_values, xarr[ix], yarr[ix])
kw = dict(markeredgewidth=self.get_linewidths()[0],
alpha=self.get_alpha())
kw.update(kwargs)
for val, lab in zip(values, label_values):
if prop == "colors":
color = self.cmap(self.norm(val))
elif prop == "sizes":
size = np.sqrt(val)
if np.isclose(size, 0.0):
continue
h = mlines.Line2D([0], [0], ls="", color=color, ms=size,
marker=self.get_paths()[0], **kw)
handles.append(h)
if hasattr(fmt, "set_locs"):
fmt.set_locs(label_values)
l = fmt(lab)
labels.append(l)
return handles, labels
You can also manually create your own legend. The trick here is that you have to apply np.sqrt to sizes in the legend for some reason I don't quite follow but #busybear has in her snippet.
import numpy as np
import matplotlib.pyplot as plt
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
s = (50 / price) ** 2
scatter = ax.scatter(volume, amount, c=ranking, s=s,
vmin=-3, vmax=3, cmap="Spectral", label=price)
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
# # easy legend
# kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
# func=lambda s: 50 / np.sqrt(s),
# )
# legend2 = ax.legend(*scatter.legend_elements(**kw),
# loc="lower right", title="Price")
# ax.add_artist(legend2)
# manual legend
legend_values = np.array([2,4,6,8])
legend_sizes = (50 / legend_values) ** 2
# IMPORTANT: for some reason the square root needs to be applied to sizes in the legend
legend_sizes_sqrt = np.sqrt(legend_sizes)
elements3 = [Line2D([0], [0], color=scatter.cmap(0.7), lw=0, marker="o", linestyle=None, markersize=s) for s in legend_sizes_sqrt]
legend3 = ax.legend(elements3, [f"$ {p:.2f}" for p in legend_values], loc='lower right', title="Price")
ax.add_artist(legend3)
for p, v, a in zip(price, volume, amount):
ax.annotate(round(p, 0), (v, a))
plt.show()
I am following the example found here: https://plot.ly/python/aggregations/#histogram-binning
The code they have works as expected, but I am trying to expand it to autobin by week as well as day, month, etc. I know this data set doesn't have time, but I would also like to bin my own set of data that has time using hour as well. It seems like this would be straightforward, but this code does not produce the correct results:
import plotly.io as pio
import pandas as pd
df = pd.read_csv("https://plot.ly/~public.health/17.csv")
data = [dict(
x = df['date'],
autobinx = False,
autobiny = True,
marker = dict(color = 'rgb(68, 68, 68)'),
name = 'date',
type = 'histogram',
xbins = dict(
end = '2016-12-31 12:00',
size = 'M1',
start = '1983-12-31 12:00'
)
)]
layout = dict(
paper_bgcolor = 'rgb(240, 240, 240)',
plot_bgcolor = 'rgb(240, 240, 240)',
title = '<b>Shooting Incidents</b>',
xaxis = dict(
title = '',
type = 'date'
),
yaxis = dict(
title = 'Shootings Incidents',
type = 'linear'
),
updatemenus = [dict(
x = 0.1,
y = 1.15,
xref = 'paper',
yref = 'paper',
yanchor = 'top',
active = 1,
showactive = True,
buttons = [
dict(
args = ['xbins.size', 'D1'],
label = 'Day',
method = 'restyle',
), dict(
args = ['xbins.size', 'D7'],
label = 'Week',
method = 'restyle',
), dict(
args = ['xbins.size', 'M1'],
label = 'Month',
method = 'restyle',
), dict(
args = ['xbins.size', 'M3'],
label = 'Quater',
method = 'restyle',
), dict(
args = ['xbins.size', 'M6'],
label = 'Half Year',
method = 'restyle',
), dict(
args = ['xbins.size', 'M12'],
label = 'Year',
method = 'restyle',
)]
)]
)
fig_dict = dict(data=data, layout=layout)
pio.show(fig_dict, validate=False)
Does anyone know how to get bins by week (as well as hypothetical bins by hour) to work? Thanks!
I figured out how to do what I was trying to do. The answer was buried in the Plot.ly docs found here: https://plot.ly/python/reference/
Specifically under xbins.size, they refer to following the same scheme in axis.dtick. Here is the axis.dtick documentation that had the answer:
dtick
Parent: data[type=histogram].marker.colorbar
Type: number or categorical coordinate string
Sets the step in-between ticks on this axis. Use with tick0. Must be a positive number, or special strings available to "log" and "date" axes. If the axis type is "log", then ticks are set every 10^(n"dtick) where n is the tick number. For example, to set a tick mark at 1, 10, 100, 1000, ... set dtick to 1. To set tick marks at 1, 100, 10000, ... set dtick to 2. To set tick marks at 1, 5, 25, 125, 625, 3125, ... set dtick to log_10(5), or 0.69897000433. "log" has several special values; "L", where f is a positive number, gives ticks linearly spaced in value (but not position). For example tick0 = 0.1, dtick = "L0.5" will put ticks at 0.1, 0.6, 1.1, 1.6 etc. To show powers of 10 plus small digits between, use "D1" (all digits) or "D2" (only 2 and 5). tick0 is ignored for "D1" and "D2". If the axis type is "date", then you must convert the time to milliseconds. For example, to set the interval between ticks to one day, set dtick to 86400000.0. "date" also has special values "M" gives ticks spaced by a number of months. n must be a positive integer. To set ticks on the 15th of every third month, set tick0 to "2000-01-15" and dtick to "M3". To set ticks every 4 years, set dtick to "M48"
As a result, the new snippet of code for the bin sizing is:
buttons = [
dict(
args = ['xbins.size', ' 3600000.0'],
label = 'Hour',
method = 'restyle',
), dict(
args = ['xbins.size', '86400000.0'],
label = 'Day',
method = 'restyle',
), dict(
args = ['xbins.size', ' 604800000.0'],
label = 'Week',
method = 'restyle',
), dict(
args = ['xbins.size', 'M1'],
label = 'Month',
method = 'restyle',
)]
But with this in mind, I would have suspected that using "D1" wouldn't have worked either. If anyone who works at Plot.ly sees this, could you make a note to update the example to point out this specific nuance?
I am using matplotlib and a stackedbarchart program for it that someone wrote to graph a stacked bar chart.
My graph:
x-axis has 8 income distributions, one for each bar
y-axis is the % of people in each income distribution. person type-a is the first stack, person type-b is the secon dstack, person type-c is the third stack.
My barchart is center aligned, and I am trying to figure out how to space out the bars so the graph looks better and so the labels are easier to read. Any suggestions, or clarifications?
The program is stackedBarGraph.py and the code looks like this, where widths is an array of 8 values, each corresponding to the width of a bar chart.
Let me know if you need any more information (I tried to keep everything relevant). Thanks!
Full code (I hope it's not too difficult to read):
from __future__ import division
from pylab import *
import seaborn as sns
import pandas as pd
import numpy as np
from stackedbars import StackedBarGrapher
data = csv2rec('coa.csv', delimiter=',')
x = data['totalgrantaid']
y = data['studenteffort']
z = data['parentcontim']
g = data['parentincomeim']
df = pd.DataFrame(dict(grant = x, stud = y, par = z, income = g))
#organize the data to graph
income_brackets = [(0, 25000), (25000, 50000), (50000, 75000), (75000, 100000), (100000, 150000), (150000,200000), (200000,250000), (250000,300000)]
source = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source:
source[key].append(median(df.query('income > {} and income < {}'.format(lower, upper))[key]))
#set the widths
source2 = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source2:
source2[key].append(pd.DataFrame(df.query('income > {} and income < {}'.format(lower,upper))).count()[key])
total = pd.DataFrame(df.query('income > 0 and income < 300000')['grant']).count()
total = total/10
#graph specifications
d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]
d_colors = ['r','g','b']
d_labels = ('<25000', '25000-\n50000', '50000-\n75000', '75000-\n100000', '100000-\n150000', '150000-\n200000', '200000-\n250000', '250000-\n300000')
d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])
#the graph
fig = plt.figure()
ax1 = fig.add_subplot(111)
mygraph = StackedBarGrapher()
mygraph.stackedBarPlot(ax1,d,d_colors, edgeCols=['#000000']*3,widths = d_widths, showFirst = 8, xLabels=d_labels,scale=True)
Stackedbarchart program:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='' # label for y axis
):
#------------------------------------------------------------------------------
# data fixeratering
# make sure this makes sense
if showFirst != -1:
showFirst = np.min([showFirst, np.shape(data)[0]])
data_copy = np.copy(data[:showFirst]).transpose().astype('float')
data_shape = np.shape(data_copy)
if heights is not None:
heights = heights[:showFirst]
if widths is not None:
widths = widths[:showFirst]
showFirst = -1
else:
data_copy = np.copy(data).transpose()
data_shape = np.shape(data_copy)
# determine the number of bars and corresponding levels from the shape of the data
num_bars = data_shape[1]
levels = data_shape[0]
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2)
# stack the data --
# replace the value in each level by the cumulative sum of all preceding levels
data_stack = np.reshape([float(i) for i in np.ravel(np.cumsum(data_copy, axis=0))], data_shape)
# scale the data is needed
if scale:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
if heights is not None:
print "WARNING: setting scale and heights does not make sense."
heights = None
elif heights is not None:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
for i in np.arange(num_bars):
data_copy[:,i] *= heights[i]
data_stack[:,i] *= heights[i]
#------------------------------------------------------------------------------
# ticks
if yTicks is not "none":
# it is either a set of ticks or the number of auto ticks to make
real_ticks = True
try:
k = len(yTicks[1])
except:
real_ticks = False
if not real_ticks:
yTicks = float(yTicks)
if scale:
# make the ticks line up to 100 %
y_ticks_at = np.arange(yTicks)/(yTicks-1)
y_tick_labels = np.array(["%0.0f"%(i * 100) for i in y_ticks_at])
else:
# space the ticks along the y axis
y_ticks_at = np.arange(yTicks)/(yTicks-1)*np.max(data_stack)
y_tick_labels = np.array([str(i) for i in y_ticks_at])
yTicks=(y_ticks_at, y_tick_labels)
#------------------------------------------------------------------------------
# plot
if edgeCols is None:
edgeCols = ["none"]*len(cols)
# bars
ax.bar(x,
data_stack[0],
color=cols[0],alpha=0.7,
edgecolor=edgeCols[0],
width=widths,
linewidth=0.5,
align='center'
)
for i in np.arange(1,levels):
ax.bar(x,
data_copy[i],
bottom=data_stack[i-1],
color=cols[i],alpha=0.7,
edgecolor=edgeCols[i],
width=widths,
linewidth=0.5,
align='center'
)
# borders
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["left"].set_visible(False)
# make ticks if necessary
if yTicks is not "none":
ax.tick_params(axis='y', which='both', labelsize=8, direction="out")
ax.yaxis.tick_left()
plt.yticks(yTicks[0], yTicks[1])
else:
plt.yticks([], [])
if xLabels is not None:
ax.tick_params(axis='x', which='both', labelsize=8, direction="out")
ax.xaxis.tick_bottom()
plt.xticks(x, xLabels, rotation='horizontal')
else:
plt.xticks([], [])
# limits
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
# labels
if xlabel != '':
ax.xlabel(xlabel)
if ylabel != '':
ax.ylabel(ylabel)
Alright thanks everyone for the input (and Bill for showing me how to use list comprehensions effectively).
I was able to alter the program to achieve what I wanted (I think). I added a new variable, axspacing to the below parts of the program:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='', # label for y axis
xaxlim=None,
axspacing=0,
):
.
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2 + axspacing)
.
# limits
#ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
if xaxlim is None:
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5 + num_bars * axspacing)
else:
ax.set_xlim(xaxlim)