How to show every value in y-axis using Plotly Express? - python

When I do:
fig = px.line(df, x="day", y="avg_spending")
fig.show()
It doesn't put values in y axis by 2 (0, 2, 4,...). I want it to be 1 by 1 (0,1,2,3,..). My maximum value of "avg_spending" in df is 17, so I would like there to be 1,2,3,...,17 on y axis. How to do that?

set the dtick parameter on the axis
import numpy as np
import pandas as pd
import plotly.express as px
df = pd.DataFrame({"day":range(50),"avg_spending":np.random.randint(1,17,50)})
fig = px.line(df, x="day", y="avg_spending")
fig.update_layout(yaxis={"dtick":1},margin={"t":0,"b":0},height=500)

Related

Seaborn xaxis with large timeline

I have around 4475 rows of csv data like below:
,Time,Values,Size
0,1900-01-01 23:11:30.368,2,
1,1900-01-01 23:11:30.372,2,
2,1900-01-01 23:11:30.372,2,
3,1900-01-01 23:11:30.372,2,
4,1900-01-01 23:11:30.376,2,
5,1900-01-01 23:11:30.380,,
6,1900-01-01 23:11:30.380,,
7,1900-01-01 23:11:30.380,,
8,1900-01-01 23:11:30.380,,321
9,1900-01-01 23:11:30.380,,111
.
.
4474,1900-01-01 23:11:32.588,,
When I try to create simple seaborn lineplot with below code. It creates line chart but its continuous chart while my data i.e. 'Values' has many empty/nan values which should show as gap on chart. How can I do that?
[from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("Data.csv")
sns.set(rc={'figure.figsize':(13,4)})
ax =sns.lineplot(x="Time", y="Values", data=df)
ax.set(xlabel='Time', ylabel='Values')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()]
As reported in this answer:
I've looked at the source code and it looks like lineplot drops nans from the DataFrame before plotting. So unfortunately it's not possible to do it properly.
So, the easiest way to do it is to use matplotlib in place of seaborn.
In the code below I generate a dataframe like your with 20% of missing values in 'Values' column and I use matplotlib to draw a plot:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'Time': pd.date_range(start = '1900-01-01 23:11:30', end = '1900-01-01 23:11:30.1', freq = 'L')})
df['Values'] = np.random.randint(low = 2, high = 10, size = len(df))
df['Values'] = df['Values'].mask(np.random.random(df['Values'].shape) < 0.2)
fig, ax = plt.subplots(figsize = (13, 4))
ax.plot(df['Time'], df['Values'])
ax.set(xlabel = 'Time', ylabel = 'Values')
plt.xticks(rotation = 90)
plt.tight_layout()
plt.show()

Plotly: How to plot histogram in Root style showing only the contours of the histogram?

I want to make a histogram with this style:
But using plotly in Python. I.e. I want to merge the bars and plot only the contour. I am using this code:
import plotly.graph_objects as go
import numpy as np
x = np.random.randn(500)
fig = go.Figure(data=[go.Histogram(x=x)])
fig.show()
I have been looking for examples on how to do this but could not find any.
Your best option is to handle the histogram with numpy like count, index = np.histogram(df['data'], bins=25) , and then use go.Scatter() and set the linetype to horizontal, vertical, horizontal with line=dict(width = 1, shape='hvh'). Take a look at the very last section why go.Histogram() will not be your best option. With a few other specifications for the layout of go.Scatter(), the snippet below will produce the following plot:
Complete code
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.express as px
pio.templates.default = "plotly_white"
# random numbers to a df
np.random.seed(12)
df = pd.DataFrame({'data': np.random.randn(500)})
# produce histogram data wiht numpy
count, index = np.histogram(df['data'], bins=25)
# plotly, go.Scatter with line shape set to 'hvh'
fig = go.Figure()
fig.add_traces(go.Scatter(x=index, y = count,
line=dict(width = 1, shape='hvh')))
# y-axis cosmetics
fig.update_yaxes(
showgrid=False,
ticks="inside",
tickson="boundaries",
ticklen=10,
showline=True,
linewidth=1,
linecolor='black',
mirror=True,
zeroline=False)
# x-axis cosmetics
fig.update_xaxes(
showgrid=False,
ticks="inside",
tickson="boundaries",
ticklen=10,
showline=True,
linewidth=1,
linecolor='black',
mirror=True,
zeroline=False)
fig.show()
Why go.Scatter() and not go.Histogram()?
The closest you'll get to your desired plot using your approach with fig = go.Figure(data=[go.Histogram(x=x)]) is this:
And that's pretty close, but you specifically wanted to exclude the vertical lines for each "bar". And I have yet not found a way to exclude or hide them with the go.Histogram setup.
Code for go.Histogram()
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.express as px
pio.templates.default = "plotly_white"
import numpy as np
x = np.random.randn(500)
fig = go.Figure(data=[go.Histogram(x=x)])
fig.update_traces(marker=dict(color='rgba(0,0,0,0)', line=dict(width=1, color='blue')))
fig.show()
for a variation plotly.go.Histogram(): Show only horizontal lines of distribution. Plot just the lines
using pandas instead of numpy to build data for histogram then plotting as a line scatter
import plotly.graph_objects as go
import numpy as np
import pandas as pd
x = np.random.randn(100)
# build data frame that is histogram
df = pd.cut(x, bins=10).value_counts().to_frame().assign(
l=lambda d: pd.IntervalIndex(d.index).left,
r=lambda d: pd.IntervalIndex(d.index).right,
).sort_values(["l","r"]).rename(columns={0:"y"}).astype(float)
# lines in plotly are delimited by none
def line_array(df, cols):
return np.pad(
df.loc[:, cols].values, [(0, 0), (0, 1)], constant_values=None
).reshape(1, (len(df) * 3))[0]
# plot just lines
go.Figure(go.Scatter(x=line_array(df, ["l","r"]), y=line_array(df, ["y","y"]), marker={"color":"black"}))

Plotly: How to plot on secondary y-Axis with plotly express

How do I utilize plotly.express to plot multiple lines on two yaxis out of one Pandas dataframe?
I find this very useful to plot all columns containing a specific substring:
fig = px.line(df, y=df.filter(regex="Linear").columns, render_mode="webgl")
as I don't want to loop over all my filtered columns and use something like:
fig.add_trace(go.Scattergl(x=df["Time"], y=df["Linear-"]))
in each iteration.
It took me some time to fiddle this out, but I feel this could be useful to some people.
# import some stuff
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
# create some data
df = pd.DataFrame()
n = 50
df["Time"] = np.arange(n)
df["Linear-"] = np.arange(n)+np.random.rand(n)
df["Linear+"] = np.arange(n)+np.random.rand(n)
df["Log-"] = np.arange(n)+np.random.rand(n)
df["Log+"] = np.arange(n)+np.random.rand(n)
df.set_index("Time", inplace=True)
subfig = make_subplots(specs=[[{"secondary_y": True}]])
# create two independent figures with px.line each containing data from multiple columns
fig = px.line(df, y=df.filter(regex="Linear").columns, render_mode="webgl",)
fig2 = px.line(df, y=df.filter(regex="Log").columns, render_mode="webgl",)
fig2.update_traces(yaxis="y2")
subfig.add_traces(fig.data + fig2.data)
subfig.layout.xaxis.title="Time"
subfig.layout.yaxis.title="Linear Y"
subfig.layout.yaxis2.type="log"
subfig.layout.yaxis2.title="Log Y"
# recoloring is necessary otherwise lines from fig und fig2 would share each color
# e.g. Linear-, Log- = blue; Linear+, Log+ = red... we don't want this
subfig.for_each_trace(lambda t: t.update(line=dict(color=t.marker.color)))
subfig.show()
The trick with
subfig.for_each_trace(lambda t: t.update(line=dict(color=t.marker.color)))
I got from nicolaskruchten here: https://stackoverflow.com/a/60031260
Thank you derflo and vestland! I really wanted to use Plotly Express as opposed to Graph Objects with dual axis to more easily handle DataFrames with lots of columns. I dropped this into a function. Data1/2 works well as a DataFrame or Series.
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
def plotly_dual_axis(data1,data2, title="", y1="", y2=""):
# Create subplot with secondary axis
subplot_fig = make_subplots(specs=[[{"secondary_y": True}]])
#Put Dataframe in fig1 and fig2
fig1 = px.line(data1)
fig2 = px.line(data2)
#Change the axis for fig2
fig2.update_traces(yaxis="y2")
#Add the figs to the subplot figure
subplot_fig.add_traces(fig1.data + fig2.data)
#FORMAT subplot figure
subplot_fig.update_layout(title=title, yaxis=dict(title=y1), yaxis2=dict(title=y2))
#RECOLOR so as not to have overlapping colors
subplot_fig.for_each_trace(lambda t: t.update(line=dict(color=t.marker.color)))
return subplot_fig

Dataframe Bar plot with Seaborn

I'm trying to create a bar plot from a DataFrame with Datetime Index.
This is an example working code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set()
index = pd.date_range('2012-01-01', periods=48, freq='M')
data = np.random.randint(100, size = (len(index),1))
df = pd.DataFrame(index=index, data=data, columns=['numbers'])
fig, ax = plt.subplots()
ax.bar(df.index, df['numbers'])
The result is:
As you can see the white bars cannot be distinguished well with respect of the background (why?).
I tried using instead:
df['numbers'].plot(kind='bar')
import matplotlib.ticker as ticker
ticklabels = df.index.strftime('%Y-%m')
ax.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
with this result:
But in this way I lose the automatic xticks labels (and grid) 6-months spacing.
Any idea?
You can just change the style:
import matplotlib.pyplot as plt
index = pd.date_range('2012-01-01', periods=48, freq='M')
data = np.random.randint(100, size = (len(index),1))
df = pd.DataFrame(index=index, data=data, columns=['numbers'])
plt.figure(figsize=(12, 5))
plt.style.use('default')
plt.bar(df.index,df['numbers'],color="red")
You do not actually use seaborn. Replace ax.bar(df.index, df['numbers'])
with
sns.barplot(df.index, df['numbers'], ax=ax)

Matplotlib: cbar.set_xticklabels has no effects

I've assigned the 365 days of a year to several clusters and I'm now trying to plot them on a heatmap.
My code works fine except that cbar.set_ticks(some_range) has no effects: the tick labels on my colorbar have the right text but the wrong position
Here is a MCVE
from datetime import date
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import matplotlib
import seaborn as sns
#create some random data
n_cluster = 4
index = pd.date_range('01/01/2016', end='31/12/2016', freq='1D')
df = pd.DataFrame(np.random.randint(0, n_cluster, len(index)),
index=index, columns=['cluster'])
pivot = df.pivot_table('cluster',
columns=[lambda x: x.weekofyear],
index= [lambda x: x.dayofweek])
#yticklabels of the heatmap
days = [date(2018, 1, d).strftime('%a')[:3] for d in range(1, 8)]
#get a discrete cmap
cmap = plt.cm.get_cmap('RdBu', n_cluster)
fig = plt.figure(figsize=(10,3))
gs = matplotlib.gridspec.GridSpec(1, 2, width_ratios=[50,1])
ax = plt.subplot(gs[0])
cbar = plt.subplot(gs[1])
sns.heatmap(pivot, square=True, cmap=cmap,
yticklabels=days, ax=ax, cbar_ax=cbar)
#There is something wrong here
cbar.set_yticks([i + 1/(2.0*n_cluster) for i in np.arange(0, 1, 1.0/n_cluster)])
#This one is ok
cbar.set_yticklabels(range(0, n_cluster))
Thanks for your help
As a workaround, the following adds the correct labels in the correct place,
cbar.yaxis.set_ticks([0.125, 0.375, 0.625, 0.875])
which looks like,
EDIT:
Or the more general suggestion of mfitzp,
cbar.yaxis.set_ticks([i + 1/(2.0*n_cluster)
for i in np.arange(0, 1, 1.0/n_cluster)])

Categories

Resources