Plot a best fit over a scatter plot with holoviews - python

I am trying to make a best fit line for all of my graphs in holoviews, right now it just makes a line based on all the data instead of each graph individually.
vdims = [('year avg', 'Yearly Average Temperature')]
ds = hv.Dataset(temp, ['Year','State Name'], vdims)
ds = ds.aggregate(function=np.mean)
scat = hv.Scatter(ds,'Year','year avg')
layout = ds.to(hv.Scatter,'Year','year avg') * hv.Slope.from_scatter(scat)
layout.opts(opts.Curve(width=800, height=400, framewise=True))
Which gives this
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import holoviews as hv
hv.extension('bokeh')
from holoviews import opts
from holoviews.plotting.links import DataLink
import hvplot.pandas
df = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=list('ABCD'))
df['Even'] = df['D']%2 == 0
vdims = [('A', 'A')]
ds = hv.Dataset(df, ['B','Even'], vdims)
#ds = ds.aggregate(function=np.mean) this does nothing here, in my code it takes the mean of 100 or so data points in each group for each year.
scat = hv.Scatter(ds,'B','A')
layout = ds.to(hv.Scatter,'B','A') * hv.Slope.from_scatter(scat)
layout.opts(opts.Curve(width=800, height=400, framewise=True))
The idea is to make a scatter plot grouped by a variable in the dataframe, in this case if D is even or odd, and also plot a best fit line of that same grouped scatter plot.
What I have is a set of grouped scatter plots but with a best fit line based on all of the data combined, not grouped by even and odd.

Related

Plotly: How to plot histogram in Root style showing only the contours of the histogram?

I want to make a histogram with this style:
But using plotly in Python. I.e. I want to merge the bars and plot only the contour. I am using this code:
import plotly.graph_objects as go
import numpy as np
x = np.random.randn(500)
fig = go.Figure(data=[go.Histogram(x=x)])
fig.show()
I have been looking for examples on how to do this but could not find any.
Your best option is to handle the histogram with numpy like count, index = np.histogram(df['data'], bins=25) , and then use go.Scatter() and set the linetype to horizontal, vertical, horizontal with line=dict(width = 1, shape='hvh'). Take a look at the very last section why go.Histogram() will not be your best option. With a few other specifications for the layout of go.Scatter(), the snippet below will produce the following plot:
Complete code
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.express as px
pio.templates.default = "plotly_white"
# random numbers to a df
np.random.seed(12)
df = pd.DataFrame({'data': np.random.randn(500)})
# produce histogram data wiht numpy
count, index = np.histogram(df['data'], bins=25)
# plotly, go.Scatter with line shape set to 'hvh'
fig = go.Figure()
fig.add_traces(go.Scatter(x=index, y = count,
line=dict(width = 1, shape='hvh')))
# y-axis cosmetics
fig.update_yaxes(
showgrid=False,
ticks="inside",
tickson="boundaries",
ticklen=10,
showline=True,
linewidth=1,
linecolor='black',
mirror=True,
zeroline=False)
# x-axis cosmetics
fig.update_xaxes(
showgrid=False,
ticks="inside",
tickson="boundaries",
ticklen=10,
showline=True,
linewidth=1,
linecolor='black',
mirror=True,
zeroline=False)
fig.show()
Why go.Scatter() and not go.Histogram()?
The closest you'll get to your desired plot using your approach with fig = go.Figure(data=[go.Histogram(x=x)]) is this:
And that's pretty close, but you specifically wanted to exclude the vertical lines for each "bar". And I have yet not found a way to exclude or hide them with the go.Histogram setup.
Code for go.Histogram()
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.express as px
pio.templates.default = "plotly_white"
import numpy as np
x = np.random.randn(500)
fig = go.Figure(data=[go.Histogram(x=x)])
fig.update_traces(marker=dict(color='rgba(0,0,0,0)', line=dict(width=1, color='blue')))
fig.show()
for a variation plotly.go.Histogram(): Show only horizontal lines of distribution. Plot just the lines
using pandas instead of numpy to build data for histogram then plotting as a line scatter
import plotly.graph_objects as go
import numpy as np
import pandas as pd
x = np.random.randn(100)
# build data frame that is histogram
df = pd.cut(x, bins=10).value_counts().to_frame().assign(
l=lambda d: pd.IntervalIndex(d.index).left,
r=lambda d: pd.IntervalIndex(d.index).right,
).sort_values(["l","r"]).rename(columns={0:"y"}).astype(float)
# lines in plotly are delimited by none
def line_array(df, cols):
return np.pad(
df.loc[:, cols].values, [(0, 0), (0, 1)], constant_values=None
).reshape(1, (len(df) * 3))[0]
# plot just lines
go.Figure(go.Scatter(x=line_array(df, ["l","r"]), y=line_array(df, ["y","y"]), marker={"color":"black"}))

How to make animated 3D scatter plot in plotly

My goal is to create an animation with my 3D data in plotly.
I have 3 variables x,y,z for simplicity and I plot the 4th value depending on these x,y,z.
I create a 3D scatter plot where the 4th dim sort to speak is the color like this:
from numpy import genfromtxt
import numpy as np
import plotly.io as pio
import plotly.express as px
pio.renderers.default = 'notebook'
import plotly.graph_objects as go
import math
import pandas as pd
data = pd.read_csv("paramtp_1e-05_big.txt")
data.head()
data = data.iloc[::10, :]
color_data = data['gopt'].astype(float).round(decimals=2)
color_data[color_data>= 10] = 10
color_data_nopt = data['nopt'].astype(float).round(decimals=3)
color_data_mc = data['mc'].astype(float).round(decimals=3)
color_data_P= data['P']
color_data_P[color_data_P >= 1] = 1
data= data.replace(np.nan, '', regex=True)
data.tail()
fig = px.scatter_3d(data, x='NpN0', y='s', z='mu',log_x=True, log_z=True,
opacity = 0.5,
color=color_data,color_continuous_scale=px.colors.sequential.Viridis)
fig.add_trace(
go.Scatter(
mode='markers',
marker=dict(
size=1,
opacity=0.5,
),
)
)
fig.show()
Similarly to this wonderful animation: https://plotly.com/python/visualizing-mri-volume-slices/
I would like to slice up my data to isosurfaces with respect to any x,y,z coordinates.
As in the example they use images, I could not wrap my head around to create the same with my raw data.
Thank you in advance.

stating the co-ordinates of a specific data point in scatter plot

I am a beginner in python. I am trying to plot a CSV file in the form of a facet grid using the seaborne library.
import matplotlib.pyplot as plt
import seaborn as sns
g = sns.FacetGrid(df, col="Gamma1",col_wrap=6,sharex=False)
g = (g.map(plt.scatter, "ARMSE", "Frobenius_norm_correlation").add_legend())
plt.subplots_adjust(top=0.9)
g.fig.suptitle('Friedman_chain')
For each of the scatterplots in the facet grid, I want to state the co-ordinates of the data point with the minimum value of ARMSE and mark this point with a different color from the other data points in the given scatter plot.can you suggest to me how to do it?
The dataframe df contains the columns ARMSE,Gamma1,Frobenius_norm_correlation.
I am attaching the image of the current plot below :
You can create a column identifying the minimum data point as part of pre-processing and pass this column's name to seaborn.
For example, taking a sample dataset:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
df = pd.DataFrame(data={
"group": list("ABCDEFGHIJ") * 10,
"y": np.random.normal(loc=1, scale=1, size=100),
"x": np.array([[x] * 10 for x in range(10)]).flatten()
})
# new column identifying the minimum value
df["min"] = df["y"] == df.groupby("group")["y"].transform(min)
g = sns.FacetGrid(df, col="group", hue="min", col_wrap=5, sharex=True)
g = (g.map(plt.scatter, "x", "y").add_legend())
plt.subplots_adjust(top=0.9)
g.fig.suptitle('Min value detection')

Plotly: How to make a 3D stacked histogram?

I have several histograms that I succeded to plot using plotly like this:
fig.add_trace(go.Histogram(x=np.array(data[key]), name=self.labels[i]))
I would like to create something like this 3D stacked histogram but with the difference that each 2D histogram inside is a true histogram and not just a hardcoded line (my data is of the form [0.5 0.4 0.5 0.7 0.4] so using Histogram directly is very convenient)
Note that what I am asking is not similar to this and therefore also not the same as this. In the matplotlib example, the data is presented directly in a 2D array so the histogram is the 3rd dimension. In my case, I wanted to feed a function with many already computed histograms.
The snippet below takes care of both binning and formatting of the figure so that it appears as a stacked 3D chart using multiple traces of go.Scatter3D and np.Histogram.
The input is a dataframe with random numbers using np.random.normal(50, 5, size=(300, 4))
We can talk more about the other details if this is something you can use:
Plot 1: Angle 1
Plot 2: Angle 2
Complete code:
# imports
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = 'browser'
# data
np.random.seed(123)
df = pd.DataFrame(np.random.normal(50, 5, size=(300, 4)), columns=list('ABCD'))
# plotly setup
fig=go.Figure()
# data binning and traces
for i, col in enumerate(df.columns):
a0=np.histogram(df[col], bins=10, density=False)[0].tolist()
a0=np.repeat(a0,2).tolist()
a0.insert(0,0)
a0.pop()
a1=np.histogram(df[col], bins=10, density=False)[1].tolist()
a1=np.repeat(a1,2)
fig.add_traces(go.Scatter3d(x=[i]*len(a0), y=a1, z=a0,
mode='lines',
name=col
)
)
fig.show()
Unfortunately you can't use go.Histogram in a 3D space so you should use an alternative way. I used go.Scatter3d and I wanted to use the option to fill line doc but there is an evident bug see
import numpy as np
import plotly.graph_objs as go
# random mat
m = 6
n = 5
mat = np.random.uniform(size=(m,n)).round(1)
# we want to have the number repeated
mat = mat.repeat(2).reshape(m, n*2)
# and finally plot
x = np.arange(2*n)
y = np.ones(2*n)
fig = go.Figure()
for i in range(m):
fig.add_trace(go.Scatter3d(x=x,
y=y*i,
z=mat[i,:],
mode="lines",
# surfaceaxis=1 # bug
)
)
fig.show()

Python plotting by different dataframe columns (using Seaborn?)

I'm trying to create a scatterplot of a dataset with point coloring based on different categorical columns. Seaborn works well here for one plot:
fg = sns.FacetGrid(data=plot_data, hue='col_1')
fg.map(plt.scatter, 'x_data', 'y_data', **kws).add_legend()
plt.show()
I then want to display the same data, but with hue='col_2' and hue='col_3'. It works fine if I just make 3 plots, but I'm really hoping to find a way to have them all appear as subplots in one figure. Unfortunately, I haven't found any way to change the hue from one plot to the next. I know there are plotting APIs that allow for an axis keyword, thereby letting you pop it into a matplotlib figure, but I haven't found one that simultaneously allows you to set 'ax=' and 'hue='. Any ideas?
Thanks in advance!
Edit:
Here's some sample code to illustrate the idea
xx = np.random.rand(10,2)
cat1 = np.array(['cat','dog','dog','dog','cat','hamster','cat','cat','hamster','dog'])
cat2 = np.array(['blond','brown','brown','black','black','blond','blond','blond','brown','blond'])
d = {'x':xx[:,0], 'y':xx[:,1], 'pet':cat1, 'hair':cat2}
df = pd.DataFrame(data=d)
sns.set(style='ticks')
fg = sns.FacetGrid(data=df, hue='pet', size=5)
fg.map(plt.scatter, 'x', 'y').add_legend()
fg = sns.FacetGrid(data=df, hue='hair', size=5)
fg.map(plt.scatter, 'x', 'y').add_legend()
plt.show()
This plots what I want, but in two windows. The color scheme is set in the first plot by grouping by 'pet', and in the second plot by 'hair'. Is there any way to do this on one plot?
In order to plot 3 scatterplots with different colors for each, you may create 3 axes in matplotlib and plot a scatter to each axes.
import pandas as pd
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.rand(10,5),
columns=["x", "y", "col1", "col2", "col3"])
fig, axes = plt.subplots(nrows=3)
for ax, col in zip(axes, df.columns[2:]):
ax.scatter(df.x, df.y, c=df[col])
plt.show()
For categorical data it is often easier to plot several scatter plots, one per category.
import pandas as pd
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
import seaborn as sns
xx = np.random.rand(10,2)
cat1 = np.array(['cat','dog','dog','dog','cat','hamster','cat','cat','hamster','dog'])
cat2 = np.array(['blond','brown','brown','black','black','blond','blond','blond','brown','blond'])
d = {'x':xx[:,0], 'y':xx[:,1], 'pet':cat1, 'hair':cat2}
df = pd.DataFrame(data=d)
cols = ['pet',"hair"]
fig, axes = plt.subplots(nrows=len(cols ))
for ax,col in zip(axes,cols):
for n, group in df.groupby(col):
ax.scatter(group.x,group.y, label=n)
ax.legend()
plt.show()
You may surely use a FacetGrid, if you really want, but that requires a different data format of the DataFrame.
import pandas as pd
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
import seaborn as sns
xx = np.random.rand(10,2)
cat1 = np.array(['cat','dog','dog','dog','cat','hamster','cat','cat','hamster','dog'])
cat2 = np.array(['blond','brown','brown','black','black','blond','blond','blond','brown','blond'])
d = {'x':xx[:,0], 'y':xx[:,1], 'pet':cat1, 'hair':cat2}
df = pd.DataFrame(data=d)
df2 = pd.melt(df, id_vars=['x','y'], value_name='category', var_name="kind")
fg = sns.FacetGrid(data=df2, row="kind",hue='category', size=3)
fg.map(plt.scatter, 'x', 'y').add_legend()

Categories

Resources