rather than directly plotting ,need to plot smooth line chart python - python

i have a 3 df's fro 3 machines(Machine1/Machine2/Machine3) .Each df with 3 columns. Day-shift and production.
sample df:
Day-Shift Production Quality
Day 11-01 20 A
Night 11-01 45 A
Day 11-02 65 A
Night 11-02 12 B
Day 11-03 97 B
my code:
import numpy as np
import pandas as pd
from plotly.offline import iplot
import plotly.graph_objects as go
# Machine1: Create numpy arrays of values for the given quality.
b1 = np.where(df1['Quality'] == 'A', df1['Production'], None)
# Machine2: Same as above.
b2 = np.where(df2['Quality'] == 'A', df2['Production'], None)
# Machine3: Same as above.
b3 = np.where(df3['Quality'] == 'A', df3['Production'], None)
# Setup.
t = []
line = ['solid']
Quality = ['A']
t.append({'x': df1['Day-Shift'],
'y': b1,
'name': f'Machine1',
'line': {'color': 'red',
'dash': line[0]}})
t.append({'x': df2['Day-Shift'],
'y': b2,
'name': f'Machine1',
'line': {'color': 'blue',
'dash': line[0]}})
t.append({'x': df3['Day-Shift'],
'y': b3,
'name': f'Machine1',
'line': {'color': 'yellow',
'dash': line[0]}})
# Plot the graph.
layout = go.Layout(
title='Production meterage of Machine1/Machine2/Machine3 for Quality A',
template='plotly_dark',
xaxis=dict(
autorange=True
),
yaxis=dict(
autorange=True
)
)
fig = go.Figure(data=t, layout=layout)
iplot(fig)
Chart I got:
I created one line chart for all three machines. But the line chart looks messy. Need to do smoothing. I tried with gaussian_filter1d. But It does not work for me.

I think the best way of representing your data is with a histogram. I don't know much of ploty ofline module but you can do it (easily) with matplotlib.
Here is some documentation from matplotlib
https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.hist.html
and an example:
https://matplotlib.org/3.1.1/gallery/statistics/hist.html
and an example with multiply datasets for 1 chart
https://matplotlib.org/3.1.1/gallery/statistics/histogram_multihist.html

Related

Sort grouped barchart with plotly

I am trying to create a grouped bar chart, which is working with my code so far. However, I can't find a way to sort the groupings among each other, how is that possible with plotly?
Example data of bar_df:
4061 4144 4181 4331
lr 45.9089 65.0693 37.0036 47.3485
knn 64.8903 87.25 48.278 81.9212
bay_r 51.9641 63.5313 39.7762 46.4237
svr 52.7827 63.4806 37.032 46.1108
Current Code for plot:
partners = bar_df.columns
fig = go.Figure()
for algo in ["lr","knn","bay_r","svr"]:
fig.add_trace(go.Bar(
x=partners,
y=bar_df[bar_df.index == algo].values[0],
name=algo,
opacity=0.75
))
fig.update_layout(
width=1550,
height=450,
barmode='group',
title={
'text': f'Performance Modell-Vergleich',
'y': 0.9,
'x': 0.5,
},
yaxis_title="MAE",
xaxis_tickangle=-45
)
fig.show()
Image of the result of the current code:
You have not defined your order. An approach is to use https://pandas.pydata.org/docs/reference/api/pandas.CategoricalIndex.html to be able to define the order of the categories.
import pandas as pd
import plotly.express as px
import io
df = pd.read_csv(io.StringIO(""" 4061 4144 4181 4331
lr 45.9089 65.0693 37.0036 47.3485
knn 64.8903 87.25 48.278 81.9212
bay_r 51.9641 63.5313 39.7762 46.4237
svr 52.7827 63.4806 37.032 46.1108"""), sep="\s+")
# use pandas categorical to sort categories
df = df.set_index(pd.CategoricalIndex(df.index, ordered=True, categories=['svr', 'bay_r', 'knn', 'lr'])).sort_index()
# create figure with px, it's simpler
px.bar(df.reset_index().melt(id_vars="index"), color="index", x="variable", y="value").update_layout(
# width=1550,
height=450,
barmode='group',
title={
'text': f'Performance Modell-Vergleich',
'y': 0.9,
'x': 0.5,
},
yaxis_title="MAE",
xaxis_tickangle=-45
)

Bokeh: Legend outside plot in multi line chart

I have a multi line plot in Bokeh:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.palettes import Category20c_7
from bokeh.io import output_file
from bokeh.models import SingleIntervalTicker, LinearAxis, ColumnDataSource
output_file("conso_daily.html")
treatcriteria_daily_data = pd.read_csv("treatcriteria_evolution.csv", sep=';')
final_daily_data = treatcriteria_daily_data.groupby(['startdate_weekyear','startdate_dayweek'],as_index = False).sum().pivot('startdate_weekyear','startdate_dayweek').fillna(0)
# keep only integer values in x axis
def interval_integer(plot):
ticker = SingleIntervalTicker(interval=1, num_minor_ticks=1)
xaxis = LinearAxis(ticker=ticker)
plot.add_layout(xaxis, 'below')
numlines = len(final_daily_data.columns)
palette = Category20c_7[0:numlines]
# remove the last week if there is not all the data
data_without_last_week = final_daily_data[(final_daily_data != 0).all(1)]
cpu_values_daily = data_without_last_week.values.T.tolist()
weeks = []
for i in range(0,len(data_without_last_week.columns)):
weeks.append(data_without_last_week.index)
df = {'week': weeks,
'day': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
'color': ['red', 'orange', 'yellow', 'green', 'grey', 'pink', 'purple'],
'HCPU': cpu_values_daily}
source = ColumnDataSource(df)
p = figure(width=800, height=500)
p.multi_line(xs='week', ys='HCPU', legend='day', color='color',
line_width=5, line_alpha=0.6, hover_line_alpha=1.0,
source=source)
p.xaxis.visible = False
p.left[0].formatter.use_scientific = False
interval_integer(p)
show(p)
I want to show legend outside the plot area because the top curve (Sunday) is hidden.
I try to follow this thread, but it's for single lines and not for multiline: Create a two line legend in a bokeh plot
Using this code, I searched to show legend in right outside the plot area, but it doesn't work:
legend = Legend(items=[
('Monday', [p[0]]),
('Tuesday', [p[1]]),
('Wednesday', [p[2]]),
('Thursday', [p[3]]),
('Friday', [p[4]]),
('Saturday', [p[5]]),
('Sunday', [p[6]]),
], location=(0, -30))
p.add_layout(legend, 'right')
TypeError: 'Figure' object is not subscriptable
Thank you.
Edit: Here is my data 'final_daily_data' if it's useful:
mc_cpu_hours \
startdate_dayweek 1 2 3
startdate_weekyear
27 527644.000731 468053.338183 517548.838022
28 349896.850976 481313.693908 372385.568095
29 168595.113447 388117.184580 373894.548600
30 176007.786269 364379.872622 366155.953075
31 177517.591864 0.000000 0.000000
startdate_dayweek 4 5 6 7
startdate_weekyear
27 573669.325129 515710.534260 511711.421986 841073.028107
28 378069.713821 385937.231788 385856.666340 842468.209151
29 343235.942227 376405.876236 400007.946715 662019.708660
30 375948.240935 366151.336263 395790.387672 700936.336812
31 0.000000 0.000000 0.000000 686023.780120
Your problem is in legend = Legend(items=[('Monday', [p[0]]), ...]) or even more precise in p[0], ..., p[7]. The figure objet is not subscriptable, because it is not a list or dictionary and this raises the error. I think in your case it is enough to define the Legend()-class blank, without any further information.
Small Example
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Legend
output_notebook()
source = pd.DataFrame({
'xs':[[1,2,3,4],[1,2,3,4]],
'ys':[[1,2,3,4],[4,3,2,1]],
'label':['a','b'],
'color':['blue','green']
})
p = figure(width=400, height=300)
p.add_layout(Legend(),'right')
p.multi_line(xs='xs', ys='ys', legend_field ='label', color='color', source=source)
show(p)
Output
Look at this answer, in particular the comment from #Sam De Meyer. In short, you create the figure, and then you do:
p.add_layout(p.legend[0], 'right')
show(p)

Is there any way to implement Stacked or Grouped Bar charts in plotly express

I am trying to implement a grouped-bar-chart (or) stacked-bar-chart in plotly express
I have implemented it using plotly (which is pretty straight forward) and below is code for it. There are altogether six columns in dataframe ['Rank', 'NOC', 'Gold', 'Silver', 'Bronze', 'Total']
`
trace1=go.Bar(x=olympics_data['NOC'],y=olympics_data['Gold'],marker=dict(color='green',opacity=0.5),name="Gold")
trace2=go.Bar(x=olympics_data['NOC'],y=olympics_data['Silver'],marker=dict(color='red',opacity=0.5),name="Silver")
trace3=go.Bar(x=olympics_data['NOC'],y=olympics_data['Bronze'],marker=dict(color='blue',opacity=0.5),name="Bronze")
data=[trace1,trace2,trace3]
layout = go.Layout(title="number of medals in each category for various countries",xaxis=dict(title="countries"),yaxis=dict(title="number of medals"),
barmode="stack")
fig = go.Figure(data,layout)
fig.show()`
Output:
I am expecting a similar output using plotly-express.
You can arrange your data to use px.bar() as in this link.
Or you can consider using relative in the barmode().
barmode (str (default 'relative')) – One of 'group', 'overlay' or
'relative' In 'relative' mode, bars are stacked above zero for
positive values and below zero for negative values. In 'overlay' mode,
bars are drawn on top of one another. In 'group' mode, bars are placed
beside each other.
Using overlay:
import plotly.express as px
iris = px.data.iris()
display(iris)
fig = px.histogram(iris, x='sepal_length', color='species',
nbins=19, range_x=[4,8], width=600, height=350,
opacity=0.4, marginal='box')
fig.update_layout(barmode='overlay')
fig.update_yaxes(range=[0,20],row=1, col=1)
fig.show()
Using relative:
fig.update_layout(barmode='relative')
fig.update_yaxes(range=[0,20],row=1, col=1)
fig.show()
Using group:
fig.update_layout(barmode='group')
fig.show()
Yes, Plotly Express support both stacked and grouped bars with px.bar(). Full documentation with examples is here https://plot.ly/python/bar-charts/
Here is a reusable function to do this.
def px_stacked_bar(df, color_name='category', y_name='y', **pxargs):
'''Row-wise stacked bar using plot-express.
Equivalent of `df.T.plot(kind='bar', stacked=True)`
`df` must be single-indexed'''
idx_col = df.index.name
m = pd.melt(df.reset_index(), id_vars=idx_col, var_name=color_name, value_name=y_name)
return px.bar(m, x=idx_col, y=y_name, color=color_name, **pxargs)
Example use
df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
'B': {0: 1, 1: 3, 2: 5},
'C': {0: 2, 1: 4, 2: 6}})
px_stacked_bar(df.set_index('A'))

Plotly: How to retrieve values for major ticks and gridlines?

I'd like to retrieve the x-values highlighted in this plot:
The plot is generated in a Jupyter Notebook using this snippet:
import plotly
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pandas as pd
import numpy as np
# setup
init_notebook_mode(connected=True)
np.random.seed(123)
cf.set_config_file(theme='pearl')
# Random data using cufflinks
df = cf.datagen.lines()
# plot
fig = df.iplot(asFigure=True, kind='scatter',xTitle='Dates',yTitle='Returns',title='Returns')
iplot(fig)
This is what I've tried:
With this setup, you can retrieve every x and y value for every series by simply running fig in the notebook. But the tick values are nowhere to be found.
I'v also tried digging deeper in dir(fig), and thought maybe the output from fig.axis would do the trick, but no:
{'x1': {'gridcolor': '#E1E5ED',
'showgrid': True,
'tickfont': {'color': '#4D5663'},
'title': {'text': 'Dates', 'font': {'color': '#4D5663'}},
'zerolinecolor': '#E1E5ED'},
'y1': {'gridcolor': '#E1E5ED',
'showgrid': True,
'tickfont': {'color': '#4D5663'},
'title': {'text': 'Returns', 'font': {'color': '#4D5663'}},
'zerolinecolor': '#E1E5ED'}}
There are however other options for the grid and ticks there like 'showgrid': True, and 'tickfont', but the values seem to be "hidden" somewhere else.
Any suggestions?
This data is only available in the Javascript context, and so unfortunately is inaccessible from Python.

How do I plot two pandas DataFrames in one graph with the same colors but different line styles?

Suppose I have the following two dataframes:
df1 = pd.DataFrame(np.random.randn(100, 3),columns=['A','B','C']).cumsum()
df2 = pd.DataFrame(np.random.randn(100, 3),columns=['A','B','C']).cumsum()
My question is that, how can I plot them in one graph such that:
The three series of df1 and df2 are still in the same blue, orange
and green lines as above.
The three series of df1 are in solid lines
The three series of df1 are in dashed lines
Currently the closest thing I can get is the following:
ax = df1.plot(style=['b','y','g'])
df2.plot(ax=ax, style=['b','y','g'], linestyle='--')
Is there any way to get the color codes used by default by DataFrame.plot()? Or is there any other better approach to achieve what I want? Ideally I don't want to specify any color codes with the style parameter but always use the default colors.
Without messing with the colors themselves or transferring them from one plot to the other you may easily just reset the colorcycle in between your plot commands
ax = df1.plot()
ax.set_prop_cycle(None)
df2.plot(ax=ax, linestyle="--")
You could use get_color from the lines:
df1 = pd.DataFrame(np.random.randn(100, 3),columns=['A','B','C']).cumsum()
df2 = pd.DataFrame(np.random.randn(100, 3),columns=['A','B','C']).cumsum()
ax = df1.plot()
l = ax.get_lines()
df2.plot(ax=ax, linestyle='--', color=(i.get_color() for i in l))
Output:
You can get the default color parameters that are currently being used from matplotlib.
import matplotlib.pyplot as plt
colors = list(plt.rcParams.get('axes.prop_cycle'))
[{'color': '#1f77b4'},
{'color': '#ff7f0e'},
{'color': '#2ca02c'},
{'color': '#d62728'},
{'color': '#9467bd'},
{'color': '#8c564b'},
{'color': '#e377c2'},
{'color': '#7f7f7f'},
{'color': '#bcbd22'},
{'color': '#17becf'}]
so just pass style=['#1f77b4', '#ff7f0e', '#2ca02c'] and the colors should work.
If you want to set another color cycler, say the older version, then:
plt.rcParams['axes.prop_cycle'] = ("cycler('color', 'bgrcmyk')")
list(plt.rcParams['axes.prop_cycle'])
#[{'color': 'b'},
# {'color': 'g'},
# {'color': 'r'},
# {'color': 'c'},
# {'color': 'm'},
# {'color': 'y'},
# {'color': 'k'}]

Categories

Resources