Altair/HoloVis Panel - Multiple Select not working - python

I am using Altair to generate my plots (As i need the linked bar-chart selection) and Panel to create my dashboard. I have two dropdowns, where the values in the second are conditional on the value in the first.
When I use a Single Select dropdown the dashboard works as expected. However when I try and use any Mulitiple select widget I get no data rendered on my chart
import panel as pn
import altair as alt
import pandas as pd
from vega_datasets import data
import datetime as dt
from altair import datum
alt.renderers.enable('default')
pn.extension('vega')
data = pd.read_excel('randomtestdata.xlsx')
df = pd.DataFrame(data, columns=['Parent Location','Location','Alert Definition','Alert Type','Initiated Date'])
df = df[(df['Parent Location'] == 'Zone 1') | (df['Parent Location'] == 'Zone 2' )| (df['Parent Location'] == 'Zone 3' )]
df.rename(columns={'Parent Location': 'ParentLocation'},
inplace=True, errors='raise')
source = df
title = '##Dashboard'
subtitle = 'This is a test dashboard. Use widgets below to show desired chart.'
_locations = {
'Zone 1': source.loc[source['ParentLocation'] == 'Zone 1']['Location'].unique().tolist(),
'Zone 2' : source.loc[source['ParentLocation'] == 'Zone 2']['Location'].unique().tolist(),
'Zone 3': source.loc[source['ParentLocation'] == 'Zone 3']['Location'].unique().tolist()
}
zone = pn.widgets.Select(
name = 'Select a Zone',
value ='Zone 1',
options =['Zone 1', 'Zone 2', 'Zone 3']
)
#The following does not work
location = pn.widgets.MultiSelect(
name = 'Select a Location',
value =[True],
options =_locations[zone.value]
)
# The following does works:
# location = pn.widgets.Select(
# name = 'Select a Location',
# value = _locations[zone.value][0],
# options =_locations[zone.value]
# )
date_range_slider = pn.widgets.DateRangeSlider(
name='Date range to consider',
start=dt.datetime(2021, 1, 1), end=dt.datetime(2022, 1, 1),
value=(dt.datetime(2021, 1, 1), dt.datetime(2022, 1, 1))
)
#pn.depends(zone.param.value, location.param.value, date_range_slider.param.value, watch=True)
def get_plot(zone, location, date_range): # start function
df = source
df['Initiated Date'] = pd.to_datetime(df['Initiated Date']) # format date as datetime
start_date = date_range_slider.value[0]
end_date = date_range_slider.value[1]
mask = (df['Initiated Date'] > start_date) & (df['Initiated Date'] <= end_date)
df = df.loc[mask]
selection2 = alt.selection_single(fields=['Alert Type'])
chart = alt.Chart(df).mark_bar(
color="#0c1944",
opacity=0.8).encode(
x=alt.X('Alert Type:O', scale=alt.Scale(domain=source['Alert Type'].unique())),
y='count(Alert Type)').transform_filter(
(datum.Location == location)
).add_selection(selection2)
chart2 = alt.Chart(df).mark_bar(
color="#0c1944",
opacity=0.8).encode(
x='Alert Definition',
y='count(Alert Definition)').transform_filter(
(datum.Location == location)
).transform_filter(selection2)
return (chart|chart2)
#pn.depends(zone.param.value, watch=True)
def _update_locations(zone):
locations = _locations[zone]
location.options = locations
location.value = locations[0]
return
pn.Row(
pn.Column(title, subtitle, zone, location, date_range_slider,
get_plot )
)
Random test data:
https://github.com/KWSpittles/testdata

The reason this is not working is because you are filtering you Altair charts using
.transform_filter(
datum.Location == location
)
which allows filtering for a single value. When you pass a list of multiple values you need to instead use indexof like this
.transform_filter(
f'indexof({location}, datum.Location) != -1'
)

Related

How to add images to each row in a Plotly Table?

I would like to add images to the rows in a Plotly Table, but could not find a good solution.
As an alternative option, I am using the method add_layout_image() placing the images approximately at the row space - see the example of what I am trying to achieve.
Anyone has a better solution!?
import plotly.graph_objects as go
import pandas as pd
import base64
data = {'team': {1: 'Sales team 1', 2: 'Sales team 2', 3: 'Sales team 3'},
'award': {1: '', 2: '', 3: ''},
'performance': {1: '67.00%', 2: '45.00%', 3: '35.00%'}}
df = pd.DataFrame(data)
fig = go.Figure(data=[go.Table(
columnwidth=[40, 40, 40],
header=dict(
values=list(df.columns),
height=35),
cells=dict(
values=[df.team,
df.award,
df.performance],
align=['center', 'center', 'center'],
font=dict(color='black', size=18),
height=45)
)])
heightRow = fig.data[0].cells.height
numberRow = fig.data[0].cells.values[0].__len__()
image_1st = 'medal1st.png'
image_1st_base64 = base64.b64encode(open(image_1st, 'rb').read())
step_y = 1 / numberRow * .2
coordinate_y = 0
for index, eachRow in enumerate(df.iterrows()):
fig.add_layout_image(
source='data:image/png;base64,{}'.format(image_1st_base64.decode()),
x=0.5,
y=.9 - coordinate_y,
xref="x domain",
yref="y domain",
xanchor="center",
yanchor="bottom",
sizex=.055,
sizey=.055,
)
coordinate_y = coordinate_y + step_y
fig.show()

Date picker component controlling the date of SQL query

I am implementing date picker to my dash web app. When the date is picked I want it to be passed to SQL code so it returns values from the selected date. Although it sounds pretty simple I'm stuck on the second day with it and have no idea how to make it work. In the code below I am trying to return date_value from the function and assign it to the variable datev and then insert it into SQL query inside the class Values. Result is the error pasted just below.
ERROR
File "C:\Users\patryk.suwala\Documents\pythonProject\pythonProject6\data.py", line 28, in
dates, datev = update_output()
File "C:\Users\patryk.suwala\Documents\pythonProject\pythonProject6\venv\lib\site-packages\dash_callback.py", line 143, in add_context
output_spec = kwargs.pop("outputs_list")
KeyError: 'outputs_list'
app.layout = html.Div([
dcc.DatePickerSingle(id='date-picker-single', date=date(1997, 5, 10)),
html.Div(id='output-container-date-picker-single')
])
#app.callback(
Output('output-container-date-picker-single', 'children'),
Input('my-date-picker-single', 'date'))
def update_output(date_value):
if date_value is not None:
date_object = date.fromisoformat(date_value)
date_string = date_object.strftime('%B %d, %Y')
return date_string, date_value # Include date_value in function return
dates, datev = update_output() # Assign date_value to the variable
class Values:
SLGordersAc = f"""SELECT COUNT(slg.id) AS slg_orders_accepted
FROM slg_failure slg
WHERE slg.slg_declined_reason_id = 0
AND slg.created_date = {datev}; """ #Insert variable into SQL query
cursor.execute(SLGordersAc)
resultSLGoA = cursor.fetchall()
[resultSLGoA] = resultSLGoA[0]
SLGo = dftab3.at[3, 'Value']
SLGo_PR = SLGo / OO
dftab3.loc[4, :] = 'SLG Orders %', round(SLGo_PR, 2)
dftab3.loc[5, :] = 'SLG Orders Accepted', resultSLGoA
SLGoA = dftab3.at[5, 'Value']
SLGoA_PR = SLGoA / OO
dftab3.loc[6, :] = 'SLG Orders Accepted %', round(SLGoA_PR, 2)

How to make sure that the data in this PyTrends function comes out in YYYY-MM-DD format and not YYYY-MM-DD 00:00:00

I have the following function:
def my_funct(Keyword, Dates, Country, Col_name):
KEYWORDS=[Keyword]
KEYWORDS_CODES=[pytrend.suggestions(keyword=i)[0] for i in KEYWORDS]
df_CODES= pd.DataFrame(KEYWORDS_CODES)
EXACT_KEYWORDS=df_CODES['mid'].to_list()
DATE_INTERVAL= Dates
COUNTRY=[Country] #Use this link for iso country code
CATEGORY=0 # Use this link to select categories
SEARCH_TYPE='' #default is 'web searches',others include 'images','news','youtube','froogle' (google shopping)
Individual_EXACT_KEYWORD = list(zip(*[iter(EXACT_KEYWORDS)]*1))
Individual_EXACT_KEYWORD = [list(x) for x in Individual_EXACT_KEYWORD]
dicti = {}
i = 1
for Country in COUNTRY:
for keyword in Individual_EXACT_KEYWORD:
try:
pytrend.build_payload(kw_list=keyword,
timeframe = DATE_INTERVAL,
geo = Country,
cat=CATEGORY,
gprop=SEARCH_TYPE)
dicti[i] = pytrend.interest_over_time()
i+=1
time.sleep(6)
except requests.exceptions.Timeout:
print("Timeout occured")
df_trends = pd.concat(dicti, axis=1)
df_trends.columns = df_trends.columns.droplevel(0) #drop outside header
df_trends = df_trends.drop('isPartial', axis = 1) #drop "isPartial"
df_trends.reset_index(level=0,inplace=True) #reset_index
df_trends.columns=['date', Col_name] #change column names
return df_trends
Then I call the function using:
x1 = my_funct('Unemployment', '2004-01-04 2009-01-04', 'DK', 'Unemployment (Denmark)')
Then I put that into a df:
df1 = pd.DataFrame(x1)
Once I convert that df to excel, how do I ensure that it is in YYYY-MM-DD format without the dangling 00:00:00? Anytime I convert it comes out with hours and seconds.
I tried df1 = pd.DataFrame(x1).dt.strftime('%Y-%m-%d') but it says that this cannot be used?
Please help
Thanks
You are trying pass dt.strftime on the entire dataframe, but you need to pass it on the date column:
df1['date'] = df1['date'].dt.strftime('%Y-%m-%d')

drop a DataFrame column in python

I desperately need help here. I am trying to get the dimension of a dataframe. I always get 31 columns instead of 30: Value should be 30, found 31. I tried to reset_index(drop = True) but I still get the same error. any help is appreciated. Stay safe.
def read_data(dataset_id):
data = None
# Begin CODE
if dataset_id == 'breast_cancer':
disease = 'breast_cancer'
datafile = 'wdbc.data'
bc_columns = ['ptid', 'diagnosis', 'mean_radius', 'mean_texture',
'mean_perimeter', 'mean_area',
'mean_smoothness', 'mean_compactness', 'mean_concavity',
'mean_concave_pts', 'mean_symmetry ',
'mean_fractal_dim', 'std_err_radius', 'std_err_texture',
'std_err_perimeter', 'std_err_area',
'std_err_smoothness', 'std_err_compactness',
'std_err_concavity', 'std_err_concave_pts',
'std_err_symmetry ', 'std_err_fractal_dim', 'worst_radius',
'worst_texture', 'worst_perimeter',
'worst_area', 'worst_smoothness', 'worst_compactness',
'worst_concavity', 'worst_concave_pts',
'worst_symmetry ', 'worst_fractal_dim']
data = pd.read_csv(datafile, skipinitialspace=True, names=bc_columns)
data.drop(labels=['ptid'], axis=1, inplace=True)
bc_diag_class = get_class_list_dict(data['diagnosis'])
elif dataset_id == 'hyperthyroidism':
disease = 'hyperthyroidism'
datafile1 = 'allhyper.data' # tab delimited, no header
datafile2 = 'allhyper.test' # comma delimited, no header
ht_columns = ['age', 'Gender', 'on thyroxine', 'query on thyroxine', 'on
antithyroid medication', 'sick',
'pregnant', 'thyroid surgery', 'I131 treatment', 'query
hypothyroid', 'query hyperthyroid',
'lithium', 'goitre', 'tumor', 'hypopituitary', 'psych',
'TSH measured', 'TSH', 'T3 measured',
'T3', 'TT4 measured', 'TT4', 'T4U measured', 'T4U', 'FTI
measured', 'FTI', 'TBG measured', 'TBG',
'referral source', 'diag_class']
data1 = pd.read_csv(datafile1, sep='\t', skipinitialspace=True,
names=ht_columns)
data2 = pd.read_csv(datafile2, skipinitialspace=True, names=ht_columns)
data = data1.append(data2, ignore_index=True)
data = data.replace(to_replace='?', value=float('nan'))
data[['diag_class', 'ptid']] = data['diag_class'].str.split(pat='.\|',
expand=True)
diag_class = data['diag_class']
data.drop(labels=['diag_class', 'ptid'], axis=1, inplace=True)
data.insert(0, 'diag_class', diag_class)
data[['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI', 'TBG']] \
= data[['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI',
'TBG']].apply(pd.to_numeric)
elif dataset_id == 'cervical_cancer':
disease = 'cervical_cancer'
datafile = 'risk_factors_cervical_cancer.csv'
cc_columns = ('Age', 'Num_sex_partners', 'First_sex_intercourse',
'Num_pregnancies',
'Smokes', 'Smokes_years', 'Smokes_packs_year',
'Hormonal_Contraceps',
'Hormonal_Contraceps_years', 'IUD', 'IUD_years', 'STD',
'STD_number',
'STD_condylomatosis', 'STDscervical_condylomatosis',
'STD_vaginal_condylomatosis',
'STD_vulvo_perin_condylomatosis', 'STD_syphilis',
'STD_pelvic_inflam_disease',
'STD_genital_herpes', 'STD_molluscum_contagiosum',
'STD_AIDS', 'STD_HIV', 'STD_HepB',
'STD_HPV', 'STD_Num_diagnosis',
'STD_Time_since_first_diag', 'STDs_Time_since_last_diag',
'Dx_Cancer', 'Dx_CIN', 'Dx_HPV', 'Dx', 'Hinselmann', 'Schiller',
'Citology', 'Biopsy')
data = pd.read_csv(datafile, skipinitialspace=True)
data.columns = cc_columns
data = data.replace(to_replace='?', value=float('nan'))
biopsy_class = data['Biopsy']
data.drop(labels=['Dx_Cancer', 'Dx_CIN', 'Dx_HPV', 'Dx', 'Hinselmann',
'Schiller', 'Citology', 'Biopsy'],
axis=1, inplace=True)
data.insert(0, 'Biopsy', biopsy_class)
data[['Num_sex_partners', 'First_sex_intercourse', 'Num_pregnancies',
'Smokes_years', 'Smokes_packs_year',
'Hormonal_Contraceps_years', 'IUD_years',
'STD_number', 'STD_Time_since_first_diag',
'STDs_Time_since_last_diag']] \
= data[['Num_sex_partners', 'First_sex_intercourse',
'Num_pregnancies', 'Smokes_years', 'Smokes_packs_year',
'Hormonal_Contraceps_years', 'IUD_years',
'STD_number', 'STD_Time_since_first_diag',
'STDs_Time_since_last_diag']].apply(pd.to_numeric)
elif dataset_id == 'liver_cancer':
disease = 'liver_cancer'
datafile = 'Indian Liver Patient Dataset (ILPD).csv' # comma delimited,
no header
ld_columns = ['Age', 'Gender', 'TB', 'DB', 'Alkphos', 'Sgpt', 'Sgot',
'TP', 'ALB', 'A/G Ratio', 'Selector']
data = pd.read_csv(datafile, skipinitialspace=True, names=ld_columns)
data.loc[data['Gender'] == 'Male', 'Gender'] = 'M'
data.loc[data['Gender'] == 'Female', 'Gender'] = 'F'
selector_class = data['Selector']
data.drop(labels=['Selector'], axis=1, inplace=True)
data.insert(0, 'Selector', selector_class)
data.reset_index(drop=True, inplace=True)
# End CODE
print(data.head(20))
return data
def dimensions(dataset_id, dataset):
dim = None
# dim = dataset.shape
num_inst = len(dataset)
num_feat = len(dataset.iloc[0].reset_index())
dim = (num_inst, num_feat)
return dim
If you want to drop a column from DataFrame, You can do like this.
If you want to drop single column:
df.drop(['column_name'], axis = 1)
If you want to drop multiple columns:
df.drop(['Column1', 'Column2'], axis = 1)
If you want to drop based on some other condition instead of column name. You can comment below. I'll update the answer accordingly. Hope it helps!.

check 1st row of a column with all rows of anothter in python (panda)

I have two text files. First file contains those cols,
['Start time', 'End Time', 'Activity']
second file contains
['Start time', 'End Time', 'Location', 'Type', 'Place'].
I want to check for example
Start_time1[0] with all rows of Start_time2.
The code is
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 09 15:04:08 2017
#author: Owner
"""
import pandas as pd
#import matplotlib.pyplot as plt
import time
import datetime
import numpy as np
df = pd.read_csv("OrdonezA_ADLs.txt", header = None, delimiter=' *\t+ *', engine='python')
df.columns=['Start time', 'End Time', 'Activity']
df2 = pd.read_csv("OrdonezA_Sensors.txt", header = None, delimiter=' *\t+ *', engine='python')
df2.columns=['Start time', 'End Time', 'Location', 'Type', 'Place']
#print df2.head()
print df['Start time'][0]
print df2['Location'][0]
df['Start time'] = df.apply(lambda row: time.mktime(datetime.datetime.strptime(row['Start time'], "%Y-%m-%d %H:%M:%S").timetuple()), axis=1)
df['End Time'] = df.apply(lambda row: time.mktime(datetime.datetime.strptime(row['End Time'], "%Y-%m-%d %H:%M:%S").timetuple()), axis=1)
#print df
df2['Start time'] = df2.apply(lambda row: time.mktime(datetime.datetime.strptime(row['Start time'], "%Y-%m-%d %H:%M:%S").timetuple()), axis=1)
df2['End Time'] = df2.apply(lambda row: time.mktime(datetime.datetime.strptime(row['End Time'], "%Y-%m-%d %H:%M:%S").timetuple()), axis=1)
#print df2
p = np.zeros(shape=(len(df),12))
print p
for x in xrange(len(df)):
for y in xrange(len(df2)):
if (df['Start time'][x] == df2['Start time'][y] and df['End Time'][x] <= df2['End Time'][y]):
if df2['Location'][x] == 'Shower':
print "Shower on"
# p[x,0]=1 here i want to add 1 in first col of p,in x row of p
elif df2['Location'][x]=='Basin':
print "Basin on"
# p[x,1]=1
elif df2['Location'][x]=='Door Kitchen':
print "Door Kitchen on"
# p[x,2]=1
elif df2['Location'][x]=='Door Bathroom':
print "Door Bathroom on"
# p[x,3]=1
elif df2['Location'][x]=='Maindoor':
print "Maindoor on"
# p[x,4]=1
elif df2['Location'][x]=='Fridge':
print "Fridge on"
# p[x,5]=1
elif df2['Location'][x]=='Cupboard':
print "Cupboard on"
# p[x,6]=1
elif df2['Location'][x]=='Toilet':
print "Toilet on"
# p[x,7]=1
elif df2['Location'][x]=='Seat':
print "Seat on"
# p[x,8]=1
elif df2['Location'][x]=='Bed':
print "Bed on"
# p[x,9]=1
elif df2['Location'][x]=='Microwave':
print "Microwave on"
# p[x,10]=1
elif df2['Location'][x]=='Door Bedroom':
print "Door Bedroom on"
# p[x,11]=1
else:
print ("not")
Can anyone help me please? Thanks a lot.
I see that you transform the date string to datetime and then to timestamp.
Use merge with inneroption to extract only the matching cases for both columns that you are looking for.
The code will be something like:
import pandas as pd
df1 = pd.DataFrame({'Start_time': [1,2,3], 'End_time': [1,2,3], 'Activity': [4, 5, 9]})
df2 = pd.DataFrame({'Start_time': [1,2,3], 'End_time': [3,2,1], 'Location': ['x','y','z'], 'Type': [7,8,9], 'Place': ['a','b','c']})
df = pd.merge(df1, df2, how='inner', left_on=['Start_time','End_time'], right_on=['Start_time','End_time'], left_index=False, right_index=False, sort=False)
for i in df['Location']:
if(i=='y'):
print 'Ok'
else:
print 'Error'
Where only one row has 'Start_time' and 'End_time' in common.

Categories

Resources