I'm working on a graph that ilustrates computer usage each day. I want to have a button that will group dates monthly for last year and set y as AVERAGE (mean) and draw avg line.
My code:
import datetime
import numpy as np
import pandas as pd
import plotly.graph_objects as go
example_data = {"date": ["29/07/2022", "30/07/2022", "31/07/2022", "01/08/2022", "02/08/2022"],
"time_spent" : [15840, 21720, 40020, 1200, 4200]}
df = pd.DataFrame(example_data)
df["date"] = pd.to_datetime(df["date"], dayfirst=True)
df['Time spent'] = df['time_spent'].apply(lambda x:str(datetime.timedelta(seconds=x)))
df['Time spent'] = pd.to_datetime(df['Time spent'])
df = df.drop("time_spent", axis=1)
dfall = df.resample("M", on="date").mean().copy()
dfyearly = dfall.tail(12).copy()
dfweekly = df.tail(7).copy()
dfmonthly = df.tail(30).copy()
del df
dfs = {'Week':dfweekly, 'Month': dfmonthly, 'Year' : dfyearly, "All" : dfall}
for dframe in list(dfs.values()):
dframe['StfTime'] = dframe['Time spent'].apply(lambda x: x.strftime("%H:%M"))
frames = len(dfs) # number of dataframes organized in dict
columns = len(dfs['Week'].columns) - 1 # number of columns i df, minus 1 for Date
scenarios = [list(s) for s in [e==1 for e in np.eye(frames)]]
visibility = [list(np.repeat(e, columns)) for e in scenarios]
lowest_value = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time())
highest_value = dfweekly["Time spent"].max().ceil("H")
buttons = []
fig = go.Figure()
for i, (period, df) in enumerate(dfs.items()):
print(i)
for column in df.columns[1:]:
fig.add_bar(
name = column,
x = df['date'],
y = df[column],
customdata=df[['StfTime']],
text=df['StfTime'],
visible=True if period=='Week' else False # 'Week' values are shown from the start
)
#Change display data to more friendly format
fig.update_traces(textfont=dict(size=20), hovertemplate='<b>Time ON</b>: %{customdata[0]}</br>')
#Change range for better scalling
this_value =df["Time spent"].max().ceil("H")
if highest_value <= this_value:
highest_value = this_value
fig.update_yaxes(range=[lowest_value, highest_value])
#Add average value indicator
average_value = df["Time spent"].mean()
fig.add_hline(y=average_value, line_width=3, line_dash="dash",
line_color="green")
# one button per dataframe to trigger the visibility
# of all columns / traces for each dataframe
button = dict(label=period,
method = 'restyle',
args = ['visible',visibility[i]])
buttons.append(button)
fig.update_yaxes(dtick=60*60*1000, tickformat='%H:%M')
fig.update_xaxes(type='date', dtick='D1')
fig.update_layout(updatemenus=[dict(type="dropdown",
direction="down",
buttons = buttons)])
fig.show()
EDIT 1.
Thanks to vestland I managed to get semi-working dropdown.
The problem is that the line added with add_hline affect all bar charts. I want it to display only on the chart that it had been added for. Also after passing in custom data for nicer display, the space between bars is doubled. Any way to fix above issues?
Related
I've been working on trying to create a choropleth map with a date slider, and while the map and initial year of data is output, when the slider is moved past the initial year, the map goes white and the 'range' indicator on the right side noting the number of 'Total Deaths' disappears.
I receive no error, and am not sure what could be happening, any help would be great. Thanks!
The Kaggle dataset link
The code
df_total = pd.read_csv('../input/air-pollution/death-rates-total-air-pollution.csv')
scl = [[0.0, '#ffffff'],[0.2, '#ff9999'],[0.4, '#ff4d4d'],
[0.6, '#ff1a1a'],[0.8, '#cc0000'],[1.0, '#4d0000']]
data_slider = []
for year in df_total.Year.unique():
# I select the year (and remove DC for now)
dff = df_total[(df_total['Year']== year )]
dff = df_total = dff.rename(columns={"Deaths - Air pollution - Sex: Both - Age: Age-standardized (Rate)":"Total Deaths"})
for col in dff.columns: # I transform the columns into string type so I can:
dff[col] = dff[col].astype(str)
### I create the text for mouse-hover for each state, for the current year
'''dff['Total Deaths'] = dff['Entity'] '''
### create the dictionary with the data for the current year
data_one_year = dict(
type='choropleth',
locations = dff['Entity'],
z=dff['Total Deaths'].astype(float),
locationmode='country names',
colorscale = scl,
text = dff['Entity'],
)
data_slider.append(data_one_year) # I add the dictionary to the list of dictionaries for the slider
steps = []
for i in range(len(data_slider)):
step = dict(method='restyle',
args=['visible', [False] * len(data_slider)],
label='Year {}'.format(i + 1990)) # label to be displayed for each step (year)
step['args'][1][i] = True
steps.append(step)
sliders = [dict(active=0, pad={"t": 1}, steps=steps)]
layout = dict(geo=dict(scope='world',
projection={'type': 'equirectangular'}),
sliders=sliders)
#create the figure object:
fig = dict(data=data_slider, layout=layout)
#plot in the notebook
plotly.offline.iplot(fig)
This line is problematic inside for loop:
dff = df_total = dff.rename(
columns={"Deaths - Air pollution - Sex: Both - Age: Age-standardized (Rate)": "Total Deaths"})
Replace it with:
dff = dff.rename(
columns={"Deaths - Air pollution - Sex: Both - Age: Age-standardized (Rate)": "Total Deaths"})
I'm plotting the counts of a variable grouped by time as a heatmap. However, when including both hour and minute, the counts are quite low so the resulting heatmap doesn't really provide any real insight. Is it possible to group the counts in a bigger block of time? I'm hoping to test some different periods (5, 10 mins).
I'm also hoping to plot time on the x-axis. Similar to the output attached.
import seaborn as sns
import pandas as pd
from datetime import datetime
from datetime import timedelta
start = datetime(1900,1,1,10,0,0)
end = datetime(1900,1,1,13,0,0)
seconds = (end - start).total_seconds()
step = timedelta(minutes = 1)
array = []
for i in range(0, int(seconds), int(step.total_seconds())):
array.append(start + timedelta(seconds=i))
array = [i.strftime('%Y-%m-%d %H:%M%:%S') for i in array]
df2 = pd.DataFrame(array).rename(columns = {0:'Time'})
df2['Count'] = np.random.uniform(0.0, 0.5, size = len(df2))
df2['Count'] = df2['Count'].round(1)
df2['Time'] = pd.to_datetime(df2['Time'])
df2['Hour'] = df2['Time'].dt.hour
df2['Min'] = df2['Time'].dt.minute
g = df2.groupby(['Hour','Min','Count'])
count_df = g['Count'].nunique().unstack()
count_df.fillna(0, inplace = True)
sns.heatmap(count_df)
To deal with such cases, I think it would be easy to use data downsampling. It is also easy to change the thresholds. The axis labels in the output graph will need to be modified, but we recommend this method.
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
start = datetime(1900,1,1,10,0,0)
end = datetime(1900,1,1,13,0,0)
seconds = (end - start).total_seconds()
step = timedelta(minutes = 1)
array = []
for i in range(0, int(seconds), int(step.total_seconds())):
array.append(start + timedelta(seconds=i))
array = [i.strftime('%Y-%m-%d %H:%M:%S') for i in array]
df2 = pd.DataFrame(array).rename(columns = {0:'Time'})
df2['Count'] = np.random.uniform(0.0, 0.5, size = len(df2))
df2['Count'] = df2['Count'].round(1)
df2['Time'] = pd.to_datetime(df2['Time'])
df2['Hour'] = df2['Time'].dt.hour
df2['Min'] = df2['Time'].dt.minute
df2.set_index('Time', inplace=True)
count_df = df2.resample('10min')['Count'].value_counts().unstack()
count_df.fillna(0, inplace = True)
sns.heatmap(count_df.T)
The way you could achieve this is by creating a column with numbers that have repeating elements for the number of minutes.
For example:
minutes = 3
x = [0,1,2]
np.repeat(x, repeats=minutes, axis=0)
>>>> [0,0,0,1,1,1,2,2,2]
and then group your data using this column.
So your code would look like:
...
minutes = 5
x = [i for i in range(int(df2.shape[0]/5))]
df2['group'] = np.repeat(x, repeats=minutes, axis=0)
g = df2.groupby(['Min', 'Count'])
count_df = g['Count'].nunique().unstack()
count_df.fillna(0, inplace = True)
I have a plotly chart that tracks the live sentiment for various keywords.
I want the graph to show green when the sentiment is positive and red when it is negative.
Code:
app = dash.Dash(__name__)
app.layout = html.Div(
[ html.Div(className='container-fluid', children=[html.H2('Live Market Sentiment', style={'color':"#0C0F0A", 'text-align': 'center'}),
html.H5('Search Ticker/Stock:', style={'color':app_colors['text']}),
dcc.Dropdown(id='sentiment_term', options = [{'label':s,'value':s} for s in data_dict.keys()],value =['Google-GOOGL'], multi = False),
],
style={'width':'98%','margin-left':10,'margin-right':10,'max-width':50000})
#app.callback(Output('live-graph', 'figure'),
[Input(component_id='sentiment_term', component_property='value')],
events=[Event('graph-update', 'interval')])
def update_graph_scatter(sentiment_term):
var1 = str(data_dict[sentiment_term][0])
var2 = str(data_dict[sentiment_term][1])
try:
if sentiment_term:
df1 = pd.read_sql("SELECT sentiment.* FROM sentiment_fts fts LEFT JOIN sentiment ON fts.rowid = sentiment.id WHERE fts.sentiment_fts MATCH ? ORDER BY fts.rowid DESC LIMIT 1000", conn, params=(var1+'*',))
df2 = pd.read_sql("SELECT sentiment.* FROM sentiment_fts fts LEFT JOIN sentiment ON fts.rowid = sentiment.id WHERE fts.sentiment_fts MATCH ? ORDER BY fts.rowid DESC LIMIT 1000", conn, params=(var2+'*',))
df = df1.append(df2)
else:
df = pd.read_sql("SELECT * FROM sentiment ORDER BY id DESC, unix DESC LIMIT 1000", conn)
df.sort_values('unix', inplace=True)
df['date'] = pd.to_datetime(df['unix'], unit='ms')
df.set_index('date', inplace=True)
init_length = len(df)
df['sentiment_smoothed'] = df['sentiment'].rolling(int(len(df)/5)).mean()
df = df_resample_sizes(df)
X = df.index
Y = df.sentiment_smoothed.values
Y2 = df.volume.values
#df_count = pd.read_sql("SELECT * FROM sentiment ORDER BY id DESC LIMIT 1", conn)
#analyzer_count =df_count.id.max()
#print(analyzer_count)
data = plotly.graph_objs.Scatter(
x=X,
y=Y,
name='Sentiment',
mode= 'lines',
yaxis='y2',
fill="tozeroy",
fillcolor = "#8bcbfc"
)
return {'data': [data],'layout' : go.Layout(xaxis=dict(range=[min(X),max(X)]),
yaxis2=dict(range=[min(Y),max(Y)], side='left', overlaying='y',title='sentiment'),
title='Live sentiment for: "{}"'.format(sentiment_term),
font={'color':app_colors['text']},
plot_bgcolor = app_colors['background'],
paper_bgcolor = app_colors['background'],
showlegend=False)}
except Exception as e:
with open('errors.txt','a') as f:
f.write(str(e))
f.write('\n')
I tried adding if conditions to the charts but it does not seem to help. please help!
Thanks
I know this doesn't quite exactly answer your question, but here is some starter code I wrote to get you a related plot. I know this is regular Plotly code but you should be able to integrate this with your Dash code fairly easily. I did a lot of digging online and it appears that Plotly doesn't support multiple colors for the fill option as part of the trace. There are some solutions, but they only work if the y values don't change between positive and negative a lot.
Here is some starter code if they the values do not change signs frequently
# Import packages
import numpy as np
import pandas as pd
import plotly.graph_objects as go
# Generate some random data
numPts = 100
xData = pd.date_range(start='1/01/2020', end='12/31/2020', periods=numPts)
yDataPos = np.random.random(numPts//2)*4 # Random data [0, 4)
yDataNeg = (np.random.random(numPts//2) - 1)*2 # Random data [-2, 0)
# Create Plotly Plot
posData = go.Scatter(x=xData[0:numPts//2], y=yDataPos, fill='tonexty', line_color='green',
name='Trace 1', showlegend=True, legendgroup="mytrace")
negData = go.Scatter(x=xData[numPts//2:], y=yDataNeg, fill='tozeroy', line_color='red',
showlegend=False, legendgroup="mytrace")
fig = go.Figure(data=[posData,negData])
fig.update_layout(title='Live Sentiment')
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Value")
You would just need to add some filtering to find where your data is positive and negative and then plug in those values into the appropriate traces.
If the values do change sign frequently, I'd just recommend using a bar chart
allYData = yDataPos + yDataNeg
colors = ['red' if val < 0 else 'green' for val in allYData]
dataTrace = go.Bar(x=xData, y=allYData, marker=dict(color=colors), name='Data')
fig = go.Figure(data=dataTrace)
fig.update_layout(title='Live Sentiment')
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Value")
First of all I will share objective of running python code.
Getting Daily High and Low Prices for a stock from Yahoo.
Converting the daily high and lows to Weekly High/Lows, monthly High Lows, Yearly High Lows.
Getting exact dates of Weekly or Monthly High Lows from a daily dataframe
Finally after fetching Dates for Weekly(or Monthly)High & lows, I want to arrange the data of what occured first High or Low during the week. for eg. during week ending 12th December, 2020, I get High of the week is 100 and low of week is 97(after completing step 2) and also High date and low date from daily dataframe (from step 3), I want to arrange Prices in order of occurence. so if High happened on 9th December and Low happened on 12th December. The prices will be arranged as 100 in row 1 and then 97 in row 2 and this process repeats for entire data frame.
What I have been able to achieve.
I have completed step 1 and step 2. Struggling in step for 3 as of now.
Have accomplished Step 1 by
import pandas as pd
import yfinance as yf
Ticker = '^NSEI'
f = yf.download(Ticker,period="max")
f = f.drop(['Adj Close'], axis=1)
f = f.drop(['Open'], axis=1)
f = f.drop(['Close'], axis=1)
f = f.drop(['Volume'], axis=1)
f.reset_index(inplace=True)
f.insert(0,'Ticker',Ticker)
Step 2 by
fw = f.groupby(['Ticker', pd.Grouper(key='Date', freq='W')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
fm = f.groupby(['Ticker', pd.Grouper(key='Date', freq='M')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
fq = f.groupby(['Ticker', pd.Grouper(key='Date', freq='Q')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
fy = f.groupby(['Ticker', pd.Grouper(key='Date', freq='Y')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
Struggling with step 3. used pd.merge, pd.join, pd.concat but unable to combine Weekly dataframe with dataframe on Highs and lows. The no of weekly records increase by performing merge and drop duplcates also didn't work properly when specified keep last.
So if you all can help me in step 3 and 4 would be grateful. Thanks
Solved the query which i posted above. Hope this help others. Thanks
import pandas as pd
import yfinance as yf
import datetime as dt
import numpy as np
Ticker = '^NSEI'
df = yf.download(Ticker, period='max')
df= df.drop(['Open', 'Close', 'Adj Close', 'Volume'], axis = 1).reset_index()
# Daily 3238 columns for reference
#Adding columns for weekly, monthly,6 month,Yearly,
df['WkEnd'] = df.Date.dt.to_period('W').apply(lambda r: r.start_time) + dt.timedelta(days=6)
df['MEnd'] = (df.Date.dt.to_period('M').apply(lambda r: r.end_time)).dt.date
df['6Mend'] = np.where(df.Date.dt.month <= 6,(df.Date.dt.year).astype(str)+'-1H',(df['Date'].dt.year).astype(str)+'-2H')
df['YEnd'] = (df.Date.dt.to_period('Y').apply(lambda r: r.end_time)).dt.date
# key variable for melting
d = {'Date':['Hidate', 'Lodate'], 'Price':['High','Low']}
#creating weekly neoformat
dw = df.groupby(['WkEnd']).agg({'High' : 'max','Low' : 'min' }).reset_index()
dw['Hidate'] = dw[['WkEnd','High']].merge(df,how = 'left').Date
dw['Lodate'] = dw[['WkEnd','Low']].merge(df,how = 'left').Date
dw = pd.lreshape(dw,d)
dw = dw.sort_values(by = ['Date']).reset_index()
dw = dw.drop(['index'], axis = 1)
#creating Monthly neoformat
dm = df.groupby(['MEnd']).agg({'High' : 'max','Low' : 'min' }).reset_index()
dm['Hidate'] = dm[['MEnd','High']].merge(df,how = 'left').Date
dm['Lodate'] = dm[['MEnd','Low']].merge(df,how = 'left').Date
dm = pd.lreshape(dm,d)
dm = dm.sort_values(by = ['Date']).reset_index()
dm = dm.drop(['index'], axis = 1)
#creating 6mth neoformat
d6m = df.groupby(['6Mend']).agg({'High' : 'max','Low' : 'min' }).reset_index()
d6m['Hidate'] = d6m[['6Mend','High']].merge(df,how = 'left').Date
d6m['Lodate'] = d6m[['6Mend','Low']].merge(df,how = 'left').Date
d6m = pd.lreshape(d6m,d)
d6m = d6m.sort_values(by = ['Date']).reset_index()
d6m = d6m.drop(['index'], axis = 1)
#creating Yearly neoformat
dy = df.groupby(['YEnd']).agg({'High' : 'max','Low' : 'min' }).reset_index()
dy['Hidate'] = dy[['YEnd','High']].merge(df,how = 'left').Date
dy['Lodate'] = dy[['YEnd','Low']].merge(df,how = 'left').Date
dy = pd.lreshape(dy,d)
dy = dy.sort_values(by = ['Date']).reset_index()
dy = dy.drop(['index'], axis = 1)
I have a daily output for a cropping system data frame with multiple outputs from a crop model. I need to take a cumulative of a column (dThrTime) and add the cumulative values in the data frame as a new column. I was able to that but when I am trying to plot the newly added cumulative column against a daily time series, I get an error : ValueError: Could not interpret value GDU for parameter y
Blockquote
def read_data(file_name, run):
data_df = pd.read_csv(file_name)
data_df['Date'] = data_df['Date'].apply (lambda x: datetime.strptime(x, '%Y-%m-%d'))
data_df['Year'] = data_df['Date'].apply (lambda x: datetime.strftime(x, '%Y'))
data_df['run_name'] = run
return data_df
filenc = "up_goldsboro.csv"
all_df = read_data(filenc, run = "restricted")
#separating data for each crop year
harvest_2011 = datetime.strptime("2012-05-17",'%Y-%m-%d')
planting_2011 = datetime.strptime("2011-10-27",'%Y-%m-%d')
data_2011 = all_df.loc[all_df["Date"]<=harvest_2011]
data_2011 = data_2011.loc[data_2011["Date"]>=planting_2011]
#Taking cumulative of a column named dThrTime
all_df['GDU'] = all_df['dThrTime'].cumsum(axis = 0, skipna = True)
#Plotting error
sns.lineplot(x = "Date", y = "GDU", data=data_2011)
plt.ylabel("Clover Root Biomass in kg/ha")
plt.title("Location:Goldsboro,NC\n Termination Year: 2012\n Clover GDU")
plt.show()
enter image description here