How do I have multiple dataframes in an animated plotly scatter graph? - python

I am trying to display 3 sets of X/Y coordinates on an animated plotly scatter graph in which the animation key is time. Currently my workaround was to add all the coordinate sets into the same dataframe however I believe this will cause me problems as I need to change marker properties to easily distinguish between each point.
This is what my workaround looks like:
This is how I am generating the graph:
x1_trim += x2_trim
x1_trim += x3_trim
y1_trim += y2_trim
y1_trim += y3_trim
d = {
"x1": x1_trim,
"y1": y1_trim,
"time": time_trim
}
df = pd.DataFrame(d)
#Default x and y axis
x_range = [-1,1]
y_range = [-1,1]
fig = px.scatter(df, x="x1", y="y1", animation_frame="time", range_x=x_range, range_y=y_range)
fig.add_shape(type="rect",x0=-0.5, y0=-0.5, x1=0.5, y1=0.5, line=dict(color="Green",width=2))
As you can see I'm adding my x2/y2 and x3/y3 data onto the end of my x1/y1 list, how would I keep these separate whilst still having all the information on my animated plot? I was trying to display multiple scatter graphs on the same plot however never managed to get it working.
My solution attempt:
#Building the dataframe and drawing graph
d1 = {
"x": x1_trim,
"y": y1_trim,
"time": time_trim
}
d2 = {
"x": x2_trim,
"y": y2_trim,
"time":time_trim
}
d3 = {
"x": x3_trim,
"y": y3_trim,
"time": time_trim
}
dfs = {"d1": d1, "d2": d2, "d3": d3}
fig = go.Figure()
for i in dfs:
fig = fig.add_trace(go.Scatter(x = dfs[i]["x"], y = dfs[i]["y"], name = i, animation_frame=dfs[0]["time"] ))

this can be done very simply in a single data frame
x and y are float values
time string representation of time for animation frame
trace string representation of x/y pair corresponding to x1/y1, x2/y2 etc
using Plotly Express is then very simple
have included sample data to show how simple this structure is
comments requests that a different symbol is used for each of the traces. https://plotly.com/python/marker-style/#custom-marker-symbols Plotly: How to set marker symbol shapes for multiple traces using plotly express?
import numpy as np
import itertools
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# construct a dataframe that has four columns, x, y, trace: 0,1,2 as string for trace, time for animation frame
df = pd.DataFrame(np.random.uniform(1, 5, [288, 2]), columns=["x", "y"]).join(
pd.DataFrame(
itertools.product(
pd.Series(
pd.date_range("1-jan-2021", freq="1H", periods=24 * 4)
).dt.time.astype(str),
range(3),
),
columns=["time", "trace"],
).astype(str)
)
fig = px.scatter(
df.assign(size=8),
x="x",
y="y",
color="trace",
animation_frame="time",
symbol="trace",
size="size",
size_max=10,
)
fig
data frame structure
x float64
y float64
time object
trace object
dtype: object
raw data
per comments - construction of data frame has a number of techniques that are not core to solution
this shows just two times sample data and being loaded into a data frame
import io
df = pd.read_csv(io.StringIO("""x,y,time,trace
4.47189433943762,2.2663279945423125,00:00:00,0
3.263751615344729,2.7707896475420433,00:00:00,1
3.5073083888118197,3.937926244743114,00:00:00,2
3.254552306893224,1.7740014652497695,01:00:00,0
1.6111813732639115,1.5324478432794377,01:00:00,1
3.411314175447148,4.495634466903654,01:00:00,2
1.7036170024927264,4.284719804413246,00:00:00,0
1.9797441059531726,3.9012400136550798,00:00:00,1
1.5178030860172549,3.7904674709011084,00:00:00,2
2.03612601506845,2.5378053661978592,01:00:00,0
2.230688800088902,2.946463794148376,01:00:00,1
1.3626620551885207,2.442489690168825,01:00:00,2
4.733618949813925,3.9103378744051014,00:00:00,0
4.4816142771548435,1.1245335267028125,00:00:00,1
4.9550805577829315,3.2454665809417227,00:00:00,2
2.9007566994079816,1.1620429771047482,01:00:00,0
2.11807366926913,3.9811777626521083,01:00:00,1
2.0753910017252144,4.416934286540347,01:00:00,2
2.3867481776916804,1.6378885254464284,00:00:00,0
1.4021710772900526,2.1565431787254536,00:00:00,1
3.5150580308648562,2.2722969079838387,00:00:00,2
4.987010605760303,1.943335174662026,01:00:00,0
3.0504403251471484,4.398673922531113,01:00:00,1
4.021398175417694,4.422199058284852,01:00:00,2"""))
df["trace"] = df["trace"].astype(str)
px.scatter(df, x="x", y="y", color="trace", animation_frame="time")

Related

Adding traces in the same subplot messes with bar size [duplicate]

This question already has answers here:
How to plot a colored gantt chart with plotly keeping the correct bar height
(2 answers)
Closed 4 months ago.
I've made a function to generate a Gantt chart in plotly.
def create_plot(self, date_start, date_end):
df = self.filtered_df
# Create a dict for every customer
customers = df['Customer'].unique()
# Pixels per horizontal bar
pixel = 35
# Create a gantt chart for every customer
customer_plots = dict()
heights = []
for customer in customers:
# filter tasks for customer
customer_plot = df.loc[df['Customer'] == customer]
# create timeline
fig = timeline(customer_plot, x_start='StartDate', y='ProdName', x_end='DueDate', text='DocNum', color='Estat')
fig.update_traces(textposition="inside")
customer_plots[customer] = fig
heights.append(5 + len(customer_plot)*pixel)
# Plot them in a shared x_axis
fig_sub = make_subplots(rows=len(customer_plots), row_heights=heights, subplot_titles=list(customer_plots.keys()))
fig_sub.update_xaxes(type='date')
for i, (customer, plot) in enumerate(customer_plots.items()):
for trace in plot['data']:
fig_sub.add_trace(trace, row=i+1, col=1)
fig_sub.update_layout(
autosize=False,
height=sum(heights),
width=1500
)
for ax in fig_sub['layout']:
if ax[:5]=='xaxis':
fig_sub['layout'][ax]['range']=[date_start, date_end]
fig_sub['layout'][ax]['dtick']=86400000.0*7
self.plot = fig_sub
The function px.timeline returns me a tuple of traces for every different color. If there is one single color in the suplot the bars look fine (with the height I need in order to display the inside text)
But when two colors are present, I have to add another trace in the same subplot with the bars of the different colors.
for i, (customer, plot) in enumerate(customer_plots.items()):
for trace in plot['data']:
fig_sub.add_trace(trace, row=i+1, col=1)
When I do so, it messes with the heights of the bars and I can't actually see the text inside:
I'd like them to look the same as the first photo. Not with extra spacing between them. How could this be accomplished?
this as I see it reduces to same question and this answer: color coding gantt on column
you have not provided sample data, so have simulated some. Also question you are asking can be reduced to a much simpler set up
import pandas as pd
import plotly.express as px
import numpy as np
r = np.random.RandomState(22)
df = pd.DataFrame(
{
"Customer": np.repeat(list("ABCDE"), 20),
"StartDate": pd.date_range("1-jan-2020", freq="W-MON", periods=100),
"DueDate": pd.date_range("10-jan-2020", freq="W-FRI", periods=100),
"ProdName": np.tile(["quick", "brown", "fox", "jumped", "over"], 20),
"DocNum": np.tile(range(8, 13), 20),
"Estat": r.choice(list("QRSTUV"), 100),
}
)
fig = px.timeline(
df,
x_start="StartDate",
y="ProdName",
x_end="DueDate",
text="DocNum",
# color="Estat",
hover_data=["Estat"],
facet_row="Customer",
)
# generate a color map for values to color
cmap = {
v: c
for v, c in zip(df["Estat"].unique(), np.tile(px.colors.qualitative.Plotly, 20))
}
# update marker colors using column
fig.for_each_trace(
lambda t: t.update({"marker": {"color": [cmap[a] for a in t["customdata"][:, 0]]}})
)
don't use color plotly express parameter as this generates a trace per value. Save required information into customdata using hover_data parameter
update all traces, set marker_color to an array that maps the values to a color
integrated
your sample code looks like a method in a class. Have synthesized a class and integrated the solution
import pandas as pd
import numpy as np
from plotly.express import timeline
import plotly.express as px
from plotly.subplots import make_subplots
class dummy:
def __init__(self):
r = np.random.RandomState(22)
self.filtered_df = pd.DataFrame(
{
"Customer": np.repeat(list("ABCDE"), 20),
"StartDate": pd.date_range("1-jan-2020", freq="W-MON", periods=100),
"DueDate": pd.date_range("10-jan-2020", freq="W-FRI", periods=100),
"ProdName": np.tile(["quick", "brown", "fox", "jumped", "over"], 20),
"DocNum": np.tile(range(8, 13), 20),
"Estat": r.choice(list("QRSTUV"), 100),
}
)
def create_plot(self, date_start, date_end):
df = self.filtered_df
# Create a dict for every customer
customers = df["Customer"].unique()
# Pixels per horizontal bar
pixel = 35
# Create a gantt chart for every customer
customer_plots = dict()
heights = []
for customer in customers:
# filter tasks for customer
customer_plot = df.loc[df["Customer"] == customer]
# create timeline
fig = timeline(
customer_plot,
x_start="StartDate",
y="ProdName",
x_end="DueDate",
text="DocNum",
hover_data=["Estat"],
)
fig.update_traces(textposition="inside")
customer_plots[customer] = fig
heights.append(5 + len(customer_plot) * pixel)
# Plot them in a shared x_axis
fig_sub = make_subplots(
rows=len(customer_plots),
row_heights=heights,
subplot_titles=list(customer_plots.keys()),
)
fig_sub.update_xaxes(type="date")
for i, (customer, plot) in enumerate(customer_plots.items()):
for trace in plot["data"]:
fig_sub.add_trace(trace, row=i + 1, col=1)
fig_sub.update_layout(autosize=False, height=sum(heights), width=1500)
for ax in fig_sub["layout"]:
if ax[:5] == "xaxis":
fig_sub["layout"][ax]["range"] = [date_start, date_end]
fig_sub["layout"][ax]["dtick"] = 86400000.0 * 7
cmap = {
v: c
for v, c in zip(
df["Estat"].unique(), np.tile(px.colors.qualitative.Plotly, 20)
)
}
fig_sub.for_each_trace(
lambda t: t.update(
{"marker": {"color": [cmap[a] for a in t["customdata"][:, 0]]}}
)
)
self.plot = fig_sub
d = dummy()
d.create_plot(d.filtered_df["StartDate"].min(),d.filtered_df["DueDate"].max())
d.plot

Set different colors based on categories when box plotting in Plotly

Assume having the data:
import pandas as pd
df = pd.DataFrame(
{
"val": np.random.normal(0, 1, size=100),
"cat": np.random.choice(["a", "b"], size=100),
}
)
Next, visualize a box plot:
from plotly import graph_objects as go
fig = go.Figure()
fig.add_trace(go.Box(y=df["val"], x=df["cat"], boxmean="sd",))
I'm using go.Box since I want to visualize the STD. This yields:
How can I set different colors of for the left and right box plots depending on the category?
You can change the color by looping through each category variable.
for c in df['cat'].unique():
dff = df[df['cat'] == c]
fig.add_trace(go.Box(y=dff["val"], x=dff["cat"], boxmean="sd", name=c))

Add multiple filter dimensions in Plotly figure

I construct datasets for games, showing the course of a value produced by actors over time. I want to plot this data with Plotly and I'm trying to figure out how to customize the filter dimensions to be able to plot the data of multiple games in one plot.
How my data looks
I have for columns describing a piece of information in my dataset. For each game (column game) I have an amount (column value) for a specific round (column round) for each actor (column name).
To plot the "value" development over time (rounds) for each actor, I use Plotly to create a line plot with a slider for the different games. It looks like this with random data.
import numpy as np
import pandas as pd
import plotly.express as px
n_rounds = 30
actor_column = []
round_column = []
value_column = []
game_column = []
actor_names = ["A", "B", "C"]
X = range(n_rounds)
for game_id in range(1, 5):
Ys = []
for actor_name in actor_names:
Y = np.random.randint(0, 50000, n_rounds - 1)
Ys.append(Y)
for idx, Y in enumerate(Ys):
for x, y in zip(X, Y):
actor_column.append(actor_names[idx])
round_column.append(x)
value_column.append(y)
game_column.append(game_id)
data = {
"game": game_column,
"actor": actor_column,
"round": round_column,
"value": value_column
}
df = pd.DataFrame(data)
fig = px.line(
df,
x="round",
y="value",
color="actor",
animation_frame="game",
)
fig.show()
With the help of the slider I can chose the game to display. By using the actor filters to the right, I can chose which actor(s) to show. This comes pretty close to what I want.
Question
How to have an additional filter dimension for games instead of using the slider. Similar to the existing actor filters, I also want game filters for my plot. For example, I want to be able to show the plot for an actor for all games, using plotly.
I'm thinking of something like this (not necessarily using different dash types):
Note:
There has to be a better solution than using different line dashes for the games and having all combinations of games and actors as filter values, like this fig = px.line(df, x="round", y="value", color="actor", line_dash="game").

Create a stacked graph or bar graph using plotly in python

I have data like this :
[ ('2018-04-09', '10:18:11',['s1',10],['s2',15],['s3',5])
('2018-04-09', '10:20:11',['s4',8],['s2',20],['s1',10])
('2018-04-10', '10:30:11',['s4',10],['s5',6],['s6',3]) ]
I want to plot a stacked graph preferably of this data.
X-axis will be time,
it should be like this
I created this image in paint just to show.
X axis will show time like normal graph does( 10:00 ,April 3,2018).
I am stuck because the string value (like 's1',or 's2' ) will change in differnt bar graph.
Just to hard code and verify,I try this:
import plotly
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import matplotlib
plotly.offline.init_notebook_mode()
def createPage():
graph_data = []
l1=[('com.p1',1),('com.p2',2)('com.p3',3)]
l2=[('com.p1',1),('com.p4',2)('com.p5',3)]
l3=[('com.p2',8),('com.p3',2)('com.p6',30)]
trace_temp = go.Bar(
x='2018-04-09 10:18:11',
y=l1[0],
name = 'top',
)
graph_data.append(trace_temp)
plotly.offline.plot(graph_data, filename='basic-scatter3.html')
createPage()
Error I am getting is Tuple Object is not callable.
So can someone please suggest some code for how I can plot such data.
If needed,I may store data in some other form which may be helpful in plotting.
Edit :
I used the approach suggested in accepted answer and succeed in plotting using plotly like this
fig=df.iplot(kin='bar',barmode='stack',asFigure=True)
plotly.offline.plt(fig,filename="stack1.html)
However I faced one error:
1.When Time intervals are very close,Data overlaps on graph.
Is there a way to overcome it.
You could use pandas stacked bar plot. The advantage is that you can create with pandas easily the table of column/value pairs you have to generate anyhow.
from matplotlib import pyplot as plt
import pandas as pd
all_data = [('2018-04-09', '10:18:11', ['s1',10],['s2',15],['s3',5]),
('2018-04-09', '10:20:11', ['s4',8], ['s2',20],['s1',10]),
('2018-04-10', '10:30:11', ['s4',10],['s5',6], ['s6',3]) ]
#load data into dataframe
df = pd.DataFrame(all_data, columns = list("ABCDE"))
#combine the two descriptors
df["day/time"] = df["A"] + "\n" + df["B"]
#assign each list to a new row with the appropriate day/time label
df = df.melt(id_vars = ["day/time"], value_vars = ["C", "D", "E"])
#split each list into category and value
df[["category", "val"]] = pd.DataFrame(df.value.values.tolist(), index = df.index)
#create a table with category-value pairs from all lists, missing values are set to NaN
df = df.pivot(index = "day/time", columns = "category", values = "val")
#plot a stacked bar chart
df.plot(kind = "bar", stacked = True)
#give tick labels the right orientation
plt.xticks(rotation = 0)
plt.show()
Output:

Python: How to create a step plot with offline plotly for a pandas DataFrame?

Lets say we have following DataFrame and corresponding graph generated:
import pandas as pd
import plotly
from plotly.graph_objs import Scatter
df = pd.DataFrame({"value":[10,7,0,3,8]},
index=pd.to_datetime([
"2015-01-01 00:00",
"2015-01-01 10:00",
"2015-01-01 20:00",
"2015-01-02 22:00",
"2015-01-02 23:00"]))
plotly.offline.plot({"data": [Scatter( x=df.index, y=df["value"] )]})
Expected results
If I use below code:
import matplotlib.pyplot as plt
plt.step(df.index, df["value"],where="post")
plt.show()
I get a step graph as below:
Question
How can I get same results as step function but using offline plotly instead?
We can use the line parameter shape option as hv using below code:
trace1 = {
"x": df.index,
"y": df["value"],
"line": {"shape": 'hv'},
"mode": 'lines',
"name": 'value',
"type": 'scatter'
};
data = [trace1]
plotly.offline.plot({
"data": data
})
Which generates below graph:
As your data is a pandas dataframe, alternatively to offline plotly, you could use plotly express:
import plotly.express as plx
plx.line(df,line_shape='hv')
Other line_shape options, or interpolation methods between given points:
'hv' step ends, equivalent to pyplot's post option;
'vh' step starts;
'hvh' step middles, x axis;
'vhv' step middles, y axis;
'spline' smooth curve between points;
'linear' line segments between points, default value for line_shape.
Just try them...
hv = plx.line(df,line_shape='hv')
vh = plx.line(df,line_shape='vh')
vhv = plx.line(df,line_shape='vhv')
hvh = plx.line(df,line_shape='hvh')
spline = plx.line(df,line_shape='spline')
line = plx.line(df,line_shape='linear')
Selection of one of them should be commited to the nature of your data...

Categories

Resources