How to add images to each row in a Plotly Table? - python

I would like to add images to the rows in a Plotly Table, but could not find a good solution.
As an alternative option, I am using the method add_layout_image() placing the images approximately at the row space - see the example of what I am trying to achieve.
Anyone has a better solution!?
import plotly.graph_objects as go
import pandas as pd
import base64
data = {'team': {1: 'Sales team 1', 2: 'Sales team 2', 3: 'Sales team 3'},
'award': {1: '', 2: '', 3: ''},
'performance': {1: '67.00%', 2: '45.00%', 3: '35.00%'}}
df = pd.DataFrame(data)
fig = go.Figure(data=[go.Table(
columnwidth=[40, 40, 40],
header=dict(
values=list(df.columns),
height=35),
cells=dict(
values=[df.team,
df.award,
df.performance],
align=['center', 'center', 'center'],
font=dict(color='black', size=18),
height=45)
)])
heightRow = fig.data[0].cells.height
numberRow = fig.data[0].cells.values[0].__len__()
image_1st = 'medal1st.png'
image_1st_base64 = base64.b64encode(open(image_1st, 'rb').read())
step_y = 1 / numberRow * .2
coordinate_y = 0
for index, eachRow in enumerate(df.iterrows()):
fig.add_layout_image(
source='data:image/png;base64,{}'.format(image_1st_base64.decode()),
x=0.5,
y=.9 - coordinate_y,
xref="x domain",
yref="y domain",
xanchor="center",
yanchor="bottom",
sizex=.055,
sizey=.055,
)
coordinate_y = coordinate_y + step_y
fig.show()

Related

Add limit orders on side of the chart Plotly

Trying to add limit order on right side that will show where limit orders was places and volume of that limit. if it sell order make it red and if buy green. If volume of limit order is big make line bigger. There is quick example what I'm looking for.
Here is full code that I have with chart data: https://textbin.net/noz678jlue
chartData = {'Price': [0.965879, 0.964773, 0.96447, 0.961223, 0.958788, 0.956747, 0.958788, 0.959872, 0.959868, 0.960104, 0.961375, 0.962256, 0.963297, 0.963315, 0.964611, 0.964513, 0.963761, 0.963763, 0.963922, 0.963907, 0.963857, 0.963265, 0.963293, 0.963171, 0.96318, 0.963707, 0.964389, 0.964352, 0.963077, 0.961785, 0.959572, 0.958703, 0.959223, 0.95819, 0.952994, 0.95124, 0.950693, 0.950906, 0.95184, 0.951838, 1.053997, 1.060501, 1.060672, 1.060486, 1.060171, 1.060241, 1.059292, 1.059263, 1.059205, 0.95906, 0.954787, 0.954901, 0.954993, 0.955447, 0.955465, 0.955626, 0.953638, 0.952751, 0.951972, 0.950729, 0.950532, 0.952849, 0.952773, 0.952682, 0.952351, 0.948383, 0.94847, 0.948451, 0.95198, 0.952234, 0.951982, 0.952163, 0.952301, 0.952407, 0.955843, 0.956628, 0.957734, 0.957548, 0.95771, 0.956813, 0.958674, 0.958295, 0.954697, 0.953861, 0.955926, 0.953264, 0.951443, 0.950245, 0.949453, 0.949492, 0.948764, 0.946932, 0.949487, 0.950302, 0.950381, 0.949979, 0.948601, 0.949252, 0.949217, 0.949271, 0.947859, 0.947683, 0.947763, 0.947593, 0.948247, 0.9483, 0.948568, 0.947236, 0.946515, 0.946128, 0.946793, 0.946244, 0.951683, 0.951324, 0.950662, 0.949001, 0.947648, 0.946191, 0.946928, 0.933038, 0.92239, 0.923197, 0.925719, 0.937193, 0.93354, 0.932933, 0.932073, 0.931954, 0.932393, 0.931602, 0.932908, 0.932966, 0.933866, 0.931223, 0.929834, 0.933195, 0.936534, 0.935959, 0.932762, 0.931187, 0.937434, 0.937664, 0.936378, 0.934742, 0.934742], 'Date': [1652117700000, 1652118000000, 1652118300000, 1652118600000, 1652118900000, 1652119200000, 1652119500000, 1652119800000, 1652120100000, 1652120400000, 1652120700000, 1652121000000, 1652121300000, 1652121600000, 1652121900000, 1652122200000, 1652122500000, 1652122800000, 1652123100000, 1652123400000, 1652123700000, 1652124000000, 1652124300000, 1652124600000, 1652124900000, 1652125200000, 1652125500000, 1652125800000, 1652126100000, 1652126400000, 1652126700000, 1652127000000, 1652127300000, 1652127600000, 1652127900000, 1652128200000, 1652128500000, 1652128800000, 1652129100000, 1652129400000, 1652129700000, 1652130000000, 1652130300000, 1652130600000, 1652130900000, 1652131200000, 1652131500000, 1652131800000, 1652132100000, 1652132400000, 1652132700000, 1652133000000, 1652133300000, 1652133600000, 1652133900000, 1652134200000, 1652134500000, 1652134800000, 1652135100000, 1652135400000, 1652135700000, 1652136000000, 1652136300000, 1652136600000, 1652136900000, 1652137200000, 1652137500000, 1652137800000, 1652138100000, 1652138400000, 1652138700000, 1652139000000, 1652139300000, 1652139600000, 1652139900000, 1652140200000, 1652140500000, 1652140800000, 1652141100000, 1652141400000, 1652141700000, 1652142000000, 1652142300000, 1652142600000, 1652142900000, 1652143200000, 1652143500000, 1652143800000, 1652144100000, 1652144400000, 1652144700000, 1652145000000, 1652145300000, 1652145600000, 1652145900000, 1652146200000, 1652146500000, 1652146800000, 1652147100000, 1652147400000, 1652147700000, 1652148000000, 1652148300000, 1652148600000, 1652148900000, 1652149200000, 1652149500000, 1652149800000, 1652150100000, 1652150400000, 1652150700000, 1652151000000, 1652151300000, 1652151600000, 1652151900000, 1652152200000, 1652152500000, 1652152800000, 1652153100000, 1652153400000, 1652153700000, 1652154000000, 1652154300000, 1652154600000, 1652154900000, 1652155200000, 1652155500000, 1652155800000, 1652156100000, 1652156400000, 1652156700000, 1652157000000, 1652157300000, 1652157600000, 1652157900000, 1652158200000, 1652158500000, 1652158800000, 1652159100000, 1652159400000, 1652159700000, 1652160000000, 1652160300000, 1652160600000, 1652160636000]}
limitOrders = {"BUY":{"0.98": 50000, "0.93": 5555, "0.67": 300000, "0.85": 5555, "0.47": 300000, '0.57': 300000, "0.95": 5555}, "SELL":{"1.00": 50000, "0.83": 5555, "0.67": 300000, "0.75": 5555, "0.57": 300000, '0.67': 300000, "0.85": 5555}}
eastern = pytz.timezone('US/Eastern')
df: DataFrame = pd.DataFrame.from_dict(chatData).fillna(method="backfill")
df['Date'] = pd.to_datetime(df['Date'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(eastern)
x = df['Date']
y = df['Price']
layout = Layout(
autosize=True,
width=1980,
height=1080,
margin=dict(l=10, r=10, t=80, b=10),
title="<b>TEST</b>",
paper_bgcolor='rgb(0.03,0.00,0.07)',
plot_bgcolor='rgb(0.03,0.00,0.07)',
yaxis_tickformat=".3f",
title_x=0.5,
font=dict(
family="Amarante,cursive",
size=25,
color="White")
)
fig = go.Figure([
go.Scatter(x=x, y=1.01 * np.ones_like(y), opacity=0.5, line_width=0, showlegend=False),
go.Scatter(x=x, y=y, fill='tonexty', fillcolor="#240050", line=dict(color="#940099"), line_shape='spline',
opacity=0, showlegend=False)
], layout=layout)
fig.show()
You can use plotly shapes to place down the line segments representing limit orders, and annotations to place down the text with the corresponding volume amount. However, you will need to increase the right margin so the annotations are visible.
import pytz
import numpy as np
import pandas as pd
import plotly.graph_objects as go
chartData = {'Price': [0.965879, 0.964773, 0.96447, 0.961223, 0.958788, 0.956747, 0.958788, 0.959872, 0.959868, 0.960104, 0.961375, 0.962256, 0.963297, 0.963315, 0.964611, 0.964513, 0.963761, 0.963763, 0.963922, 0.963907, 0.963857, 0.963265, 0.963293, 0.963171, 0.96318, 0.963707, 0.964389, 0.964352, 0.963077, 0.961785, 0.959572, 0.958703, 0.959223, 0.95819, 0.952994, 0.95124, 0.950693, 0.950906, 0.95184, 0.951838, 1.053997, 1.060501, 1.060672, 1.060486, 1.060171, 1.060241, 1.059292, 1.059263, 1.059205, 0.95906, 0.954787, 0.954901, 0.954993, 0.955447, 0.955465, 0.955626, 0.953638, 0.952751, 0.951972, 0.950729, 0.950532, 0.952849, 0.952773, 0.952682, 0.952351, 0.948383, 0.94847, 0.948451, 0.95198, 0.952234, 0.951982, 0.952163, 0.952301, 0.952407, 0.955843, 0.956628, 0.957734, 0.957548, 0.95771, 0.956813, 0.958674, 0.958295, 0.954697, 0.953861, 0.955926, 0.953264, 0.951443, 0.950245, 0.949453, 0.949492, 0.948764, 0.946932, 0.949487, 0.950302, 0.950381, 0.949979, 0.948601, 0.949252, 0.949217, 0.949271, 0.947859, 0.947683, 0.947763, 0.947593, 0.948247, 0.9483, 0.948568, 0.947236, 0.946515, 0.946128, 0.946793, 0.946244, 0.951683, 0.951324, 0.950662, 0.949001, 0.947648, 0.946191, 0.946928, 0.933038, 0.92239, 0.923197, 0.925719, 0.937193, 0.93354, 0.932933, 0.932073, 0.931954, 0.932393, 0.931602, 0.932908, 0.932966, 0.933866, 0.931223, 0.929834, 0.933195, 0.936534, 0.935959, 0.932762, 0.931187, 0.937434, 0.937664, 0.936378, 0.934742, 0.934742], 'Date': [1652117700000, 1652118000000, 1652118300000, 1652118600000, 1652118900000, 1652119200000, 1652119500000, 1652119800000, 1652120100000, 1652120400000, 1652120700000, 1652121000000, 1652121300000, 1652121600000, 1652121900000, 1652122200000, 1652122500000, 1652122800000, 1652123100000, 1652123400000, 1652123700000, 1652124000000, 1652124300000, 1652124600000, 1652124900000, 1652125200000, 1652125500000, 1652125800000, 1652126100000, 1652126400000, 1652126700000, 1652127000000, 1652127300000, 1652127600000, 1652127900000, 1652128200000, 1652128500000, 1652128800000, 1652129100000, 1652129400000, 1652129700000, 1652130000000, 1652130300000, 1652130600000, 1652130900000, 1652131200000, 1652131500000, 1652131800000, 1652132100000, 1652132400000, 1652132700000, 1652133000000, 1652133300000, 1652133600000, 1652133900000, 1652134200000, 1652134500000, 1652134800000, 1652135100000, 1652135400000, 1652135700000, 1652136000000, 1652136300000, 1652136600000, 1652136900000, 1652137200000, 1652137500000, 1652137800000, 1652138100000, 1652138400000, 1652138700000, 1652139000000, 1652139300000, 1652139600000, 1652139900000, 1652140200000, 1652140500000, 1652140800000, 1652141100000, 1652141400000, 1652141700000, 1652142000000, 1652142300000, 1652142600000, 1652142900000, 1652143200000, 1652143500000, 1652143800000, 1652144100000, 1652144400000, 1652144700000, 1652145000000, 1652145300000, 1652145600000, 1652145900000, 1652146200000, 1652146500000, 1652146800000, 1652147100000, 1652147400000, 1652147700000, 1652148000000, 1652148300000, 1652148600000, 1652148900000, 1652149200000, 1652149500000, 1652149800000, 1652150100000, 1652150400000, 1652150700000, 1652151000000, 1652151300000, 1652151600000, 1652151900000, 1652152200000, 1652152500000, 1652152800000, 1652153100000, 1652153400000, 1652153700000, 1652154000000, 1652154300000, 1652154600000, 1652154900000, 1652155200000, 1652155500000, 1652155800000, 1652156100000, 1652156400000, 1652156700000, 1652157000000, 1652157300000, 1652157600000, 1652157900000, 1652158200000, 1652158500000, 1652158800000, 1652159100000, 1652159400000, 1652159700000, 1652160000000, 1652160300000, 1652160600000, 1652160636000]}
limitOrders = {"BUY":{"0.98": 50000, "0.93": 5555, "0.67": 300000, "0.85": 5555, "0.47": 300000, '0.57': 300000, "0.95": 5555}, "SELL":{"1.00": 50000, "0.83": 5555, "0.67": 300000, "0.75": 5555, "0.57": 300000, '0.67': 300000, "0.85": 5555}}
eastern = pytz.timezone('US/Eastern')
df = pd.DataFrame.from_dict(chartData).fillna(method="backfill")
df['Date'] = pd.to_datetime(df['Date'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(eastern)
x = df['Date']
y = df['Price']
layout = dict(
autosize=True,
width=1980,
height=1080,
margin=dict(l=10, r=200, t=80, b=10),
title="<b>TEST</b>",
paper_bgcolor='rgb(0.03,0.00,0.07)',
plot_bgcolor='rgb(0.03,0.00,0.07)',
yaxis_tickformat=".3f",
title_x=0.5,
font=dict(
family="Amarante,cursive",
size=25,
color="White")
)
fig = go.Figure([
go.Scatter(x=x, y=1.01 * np.ones_like(y), opacity=0.5, line_width=0, showlegend=False),
go.Scatter(x=x, y=y, fill='tonexty', fillcolor="#240050", line=dict(color="#940099"), line_shape='spline',
opacity=0, showlegend=False)
], layout=layout)
## add limit orders using annotations
## use paper coordinates to determine length in the x direction
max_limit_volume = 500000
max_limit_volume_length = 0.25
for limit_order_name,limit_order_info in limitOrders.items():
if limit_order_name == "BUY":
for y_value, volume in limit_order_info.items():
y_value = float(y_value)
fig.add_shape(type="line",
x0=1, y0=y_value, x1=1-0.1*volume/max_limit_volume, y1=y_value,
line=dict(color="green",width=3)
)
fig.add_annotation(
x=1.05, y=y_value,
yshift=-30, xref="paper",
text=f"${volume}", font=dict(color="white")
)
if limit_order_name == "SELL":
for y_value, volume in limit_order_info.items():
y_value = float(y_value)
fig.add_shape(type="line",
x0=1, y0=y_value, x1=1-0.1*volume/max_limit_volume, y1=y_value,
line=dict(color="red",width=3)
)
fig.add_annotation(
x=1.05, y=y_value,
yshift=-30e, xref="paper",
text=f"${volume}", font=dict(color="white")
)
fig.update_shapes(dict(xref='paper', yref='y'))
fig.show()

Dynamically changing bar chart according to inputs in Plotly Dash

For a quite period of time I am trying to design my first Dash app. But I am not succesful and therefore I would like to ask for a help.
What I am trying to achieve is an app, where in the first instance I input some numbers according to which I do some calculations over given dataframe and then do a bar chart. This shouldn't be hard.
So far what I have got:
app = Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div(
[
html.I("Get me the first input needed", style={"margin-right": "115px"}),
html.I("Get me an additional input to the first one too!"),
html.Br(),
dcc.Input(id="input1", type="number", placeholder="", style={'marginRight':'10px'}),
dcc.Input(id="input11", type="number", placeholder="", style={'marginRight':'10px'}),
... another 3 very similar blocks for inputs,
dcc.Slider(min=0, max=10, step=1,
value=5,
id='my_slider',
marks={
1: {'label': '1 year', 'style': {'color': '#77b0b1'}},
2: {'label': '2 years'},
3: {'label': '3 years'},
4: {'label': '4 years'},
5: {'label': '5 years'},
6: {'label': '6 years'},
7: {'label': '7 years'},
8: {'label': '8 years'},
9: {'label': '9 years'},
10: {'label': '10 years'}
}),
dcc.Graph(id='bar-chart'),
dcc.Store(id='intermediate-value')
]
)
Slider part is not really used because I am stucked, but just for sure I showed you all.
After this part, callback part, where I would like to use those inputs to manipulate given dataframe. Before that, my inspiration for all this is here : >https://dash.plotly.com/sharing-data-between-callbacks
#app.callback(
[Output('intermediate-value', 'data')],
# [Output(component_id="bar-chart", component_property="figure")],
[Input("input1", "value"),
Input("input11", "value"),
Input("input2", "value"),
Input("input22", "value"),
Input("input3", "value"),
Input("input33", "value"),
Input('my_slider', 'value')]
)
def get_output(input1, input11, input2, input22, input3, input33, my_slider):
global dff
user_adapt_perc= [0.025, 0.135, 0.34, 0.34, 0.16]
first_positions = df.shape[0] * input11
first_money= df.iloc[:int(first_positions ),]['count'].sum() * input1 * 12
second_positions = df.shape[0] * (input22)
second_money= df.iloc[int(first_positions):int(second_positions ),]['count'].sum() * input2 * 12
third_positions = df.shape[0] * (input33)
third_money = df.iloc[int(second_positions ):int(third_positions ),]['count'].sum() * input3 * 12
total_money = first_money + second_money + third_money
dff = pd.DataFrame({"year":[1,2,3,4,5], "cash":list(np.cumsum(user_adapt_perc) * all_money) - total_money })
return dff.to_dict()
Variable all_money is something pre-calculated before whole Dash app, lets say 10 000 000. And df is just a dataframe of some input numbers used to calculate an important part. I have to return something else than DataFrame as it is not supported.
After this part, where all those calculations are correct, I am trying to plot a bar chart based on these calculations:
#app.callback(Output(component_id="bar-chart", component_property="figure"), Input('intermediate-value', 'data'))
def get_figure(dictionary_data):
dff2 = pd.DataFrame(dictionary_data)
barchart = px.bar(
data_frame=dff2,
x="rok",
y="uspora",
opacity=0.9,
barmode='group')
return barchart
When I input everything, I get an error:
dash._grouping.SchemaTypeValidationError: Schema: [<Output `intermediate-value.data`>]
Path: ()
Expected type: (<class 'tuple'>, <class 'list'>)
Received value of type <class 'dict'>:
which I guess has something even before bar chart.
I was also trying to add into output part bar-chart figure, but I have never came to that point.
Any help? Much appreciated.
Could you hint me/help me please?
Try changing what you have:
return dff.to_dict()
to this:
return dff.to_dict(orient='records')

Altair/HoloVis Panel - Multiple Select not working

I am using Altair to generate my plots (As i need the linked bar-chart selection) and Panel to create my dashboard. I have two dropdowns, where the values in the second are conditional on the value in the first.
When I use a Single Select dropdown the dashboard works as expected. However when I try and use any Mulitiple select widget I get no data rendered on my chart
import panel as pn
import altair as alt
import pandas as pd
from vega_datasets import data
import datetime as dt
from altair import datum
alt.renderers.enable('default')
pn.extension('vega')
data = pd.read_excel('randomtestdata.xlsx')
df = pd.DataFrame(data, columns=['Parent Location','Location','Alert Definition','Alert Type','Initiated Date'])
df = df[(df['Parent Location'] == 'Zone 1') | (df['Parent Location'] == 'Zone 2' )| (df['Parent Location'] == 'Zone 3' )]
df.rename(columns={'Parent Location': 'ParentLocation'},
inplace=True, errors='raise')
source = df
title = '##Dashboard'
subtitle = 'This is a test dashboard. Use widgets below to show desired chart.'
_locations = {
'Zone 1': source.loc[source['ParentLocation'] == 'Zone 1']['Location'].unique().tolist(),
'Zone 2' : source.loc[source['ParentLocation'] == 'Zone 2']['Location'].unique().tolist(),
'Zone 3': source.loc[source['ParentLocation'] == 'Zone 3']['Location'].unique().tolist()
}
zone = pn.widgets.Select(
name = 'Select a Zone',
value ='Zone 1',
options =['Zone 1', 'Zone 2', 'Zone 3']
)
#The following does not work
location = pn.widgets.MultiSelect(
name = 'Select a Location',
value =[True],
options =_locations[zone.value]
)
# The following does works:
# location = pn.widgets.Select(
# name = 'Select a Location',
# value = _locations[zone.value][0],
# options =_locations[zone.value]
# )
date_range_slider = pn.widgets.DateRangeSlider(
name='Date range to consider',
start=dt.datetime(2021, 1, 1), end=dt.datetime(2022, 1, 1),
value=(dt.datetime(2021, 1, 1), dt.datetime(2022, 1, 1))
)
#pn.depends(zone.param.value, location.param.value, date_range_slider.param.value, watch=True)
def get_plot(zone, location, date_range): # start function
df = source
df['Initiated Date'] = pd.to_datetime(df['Initiated Date']) # format date as datetime
start_date = date_range_slider.value[0]
end_date = date_range_slider.value[1]
mask = (df['Initiated Date'] > start_date) & (df['Initiated Date'] <= end_date)
df = df.loc[mask]
selection2 = alt.selection_single(fields=['Alert Type'])
chart = alt.Chart(df).mark_bar(
color="#0c1944",
opacity=0.8).encode(
x=alt.X('Alert Type:O', scale=alt.Scale(domain=source['Alert Type'].unique())),
y='count(Alert Type)').transform_filter(
(datum.Location == location)
).add_selection(selection2)
chart2 = alt.Chart(df).mark_bar(
color="#0c1944",
opacity=0.8).encode(
x='Alert Definition',
y='count(Alert Definition)').transform_filter(
(datum.Location == location)
).transform_filter(selection2)
return (chart|chart2)
#pn.depends(zone.param.value, watch=True)
def _update_locations(zone):
locations = _locations[zone]
location.options = locations
location.value = locations[0]
return
pn.Row(
pn.Column(title, subtitle, zone, location, date_range_slider,
get_plot )
)
Random test data:
https://github.com/KWSpittles/testdata
The reason this is not working is because you are filtering you Altair charts using
.transform_filter(
datum.Location == location
)
which allows filtering for a single value. When you pass a list of multiple values you need to instead use indexof like this
.transform_filter(
f'indexof({location}, datum.Location) != -1'
)

drop a DataFrame column in python

I desperately need help here. I am trying to get the dimension of a dataframe. I always get 31 columns instead of 30: Value should be 30, found 31. I tried to reset_index(drop = True) but I still get the same error. any help is appreciated. Stay safe.
def read_data(dataset_id):
data = None
# Begin CODE
if dataset_id == 'breast_cancer':
disease = 'breast_cancer'
datafile = 'wdbc.data'
bc_columns = ['ptid', 'diagnosis', 'mean_radius', 'mean_texture',
'mean_perimeter', 'mean_area',
'mean_smoothness', 'mean_compactness', 'mean_concavity',
'mean_concave_pts', 'mean_symmetry ',
'mean_fractal_dim', 'std_err_radius', 'std_err_texture',
'std_err_perimeter', 'std_err_area',
'std_err_smoothness', 'std_err_compactness',
'std_err_concavity', 'std_err_concave_pts',
'std_err_symmetry ', 'std_err_fractal_dim', 'worst_radius',
'worst_texture', 'worst_perimeter',
'worst_area', 'worst_smoothness', 'worst_compactness',
'worst_concavity', 'worst_concave_pts',
'worst_symmetry ', 'worst_fractal_dim']
data = pd.read_csv(datafile, skipinitialspace=True, names=bc_columns)
data.drop(labels=['ptid'], axis=1, inplace=True)
bc_diag_class = get_class_list_dict(data['diagnosis'])
elif dataset_id == 'hyperthyroidism':
disease = 'hyperthyroidism'
datafile1 = 'allhyper.data' # tab delimited, no header
datafile2 = 'allhyper.test' # comma delimited, no header
ht_columns = ['age', 'Gender', 'on thyroxine', 'query on thyroxine', 'on
antithyroid medication', 'sick',
'pregnant', 'thyroid surgery', 'I131 treatment', 'query
hypothyroid', 'query hyperthyroid',
'lithium', 'goitre', 'tumor', 'hypopituitary', 'psych',
'TSH measured', 'TSH', 'T3 measured',
'T3', 'TT4 measured', 'TT4', 'T4U measured', 'T4U', 'FTI
measured', 'FTI', 'TBG measured', 'TBG',
'referral source', 'diag_class']
data1 = pd.read_csv(datafile1, sep='\t', skipinitialspace=True,
names=ht_columns)
data2 = pd.read_csv(datafile2, skipinitialspace=True, names=ht_columns)
data = data1.append(data2, ignore_index=True)
data = data.replace(to_replace='?', value=float('nan'))
data[['diag_class', 'ptid']] = data['diag_class'].str.split(pat='.\|',
expand=True)
diag_class = data['diag_class']
data.drop(labels=['diag_class', 'ptid'], axis=1, inplace=True)
data.insert(0, 'diag_class', diag_class)
data[['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI', 'TBG']] \
= data[['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI',
'TBG']].apply(pd.to_numeric)
elif dataset_id == 'cervical_cancer':
disease = 'cervical_cancer'
datafile = 'risk_factors_cervical_cancer.csv'
cc_columns = ('Age', 'Num_sex_partners', 'First_sex_intercourse',
'Num_pregnancies',
'Smokes', 'Smokes_years', 'Smokes_packs_year',
'Hormonal_Contraceps',
'Hormonal_Contraceps_years', 'IUD', 'IUD_years', 'STD',
'STD_number',
'STD_condylomatosis', 'STDscervical_condylomatosis',
'STD_vaginal_condylomatosis',
'STD_vulvo_perin_condylomatosis', 'STD_syphilis',
'STD_pelvic_inflam_disease',
'STD_genital_herpes', 'STD_molluscum_contagiosum',
'STD_AIDS', 'STD_HIV', 'STD_HepB',
'STD_HPV', 'STD_Num_diagnosis',
'STD_Time_since_first_diag', 'STDs_Time_since_last_diag',
'Dx_Cancer', 'Dx_CIN', 'Dx_HPV', 'Dx', 'Hinselmann', 'Schiller',
'Citology', 'Biopsy')
data = pd.read_csv(datafile, skipinitialspace=True)
data.columns = cc_columns
data = data.replace(to_replace='?', value=float('nan'))
biopsy_class = data['Biopsy']
data.drop(labels=['Dx_Cancer', 'Dx_CIN', 'Dx_HPV', 'Dx', 'Hinselmann',
'Schiller', 'Citology', 'Biopsy'],
axis=1, inplace=True)
data.insert(0, 'Biopsy', biopsy_class)
data[['Num_sex_partners', 'First_sex_intercourse', 'Num_pregnancies',
'Smokes_years', 'Smokes_packs_year',
'Hormonal_Contraceps_years', 'IUD_years',
'STD_number', 'STD_Time_since_first_diag',
'STDs_Time_since_last_diag']] \
= data[['Num_sex_partners', 'First_sex_intercourse',
'Num_pregnancies', 'Smokes_years', 'Smokes_packs_year',
'Hormonal_Contraceps_years', 'IUD_years',
'STD_number', 'STD_Time_since_first_diag',
'STDs_Time_since_last_diag']].apply(pd.to_numeric)
elif dataset_id == 'liver_cancer':
disease = 'liver_cancer'
datafile = 'Indian Liver Patient Dataset (ILPD).csv' # comma delimited,
no header
ld_columns = ['Age', 'Gender', 'TB', 'DB', 'Alkphos', 'Sgpt', 'Sgot',
'TP', 'ALB', 'A/G Ratio', 'Selector']
data = pd.read_csv(datafile, skipinitialspace=True, names=ld_columns)
data.loc[data['Gender'] == 'Male', 'Gender'] = 'M'
data.loc[data['Gender'] == 'Female', 'Gender'] = 'F'
selector_class = data['Selector']
data.drop(labels=['Selector'], axis=1, inplace=True)
data.insert(0, 'Selector', selector_class)
data.reset_index(drop=True, inplace=True)
# End CODE
print(data.head(20))
return data
def dimensions(dataset_id, dataset):
dim = None
# dim = dataset.shape
num_inst = len(dataset)
num_feat = len(dataset.iloc[0].reset_index())
dim = (num_inst, num_feat)
return dim
If you want to drop a column from DataFrame, You can do like this.
If you want to drop single column:
df.drop(['column_name'], axis = 1)
If you want to drop multiple columns:
df.drop(['Column1', 'Column2'], axis = 1)
If you want to drop based on some other condition instead of column name. You can comment below. I'll update the answer accordingly. Hope it helps!.

reportlab dynamic data-driven header outputs wrong subtitle

I have created some fictitious, though representative, clinical trial type data using Pandas, and now come to some test reporting in ReportLab.
The data has a block (~50 rows) where the treatment column is 'Placebo' and the same amount where the treatment is 'Active'. I simply want to list the data using a sub-heading of 'Treatment Group: Placebo' for the first set and 'Treatment Group: Active' for the second.
There are some hits on a similar topic, and, indeed I've used one of the suggested techniques, namely to extend the arguments of a header functions using partial from functools.
title1 = "ACME Corp CONFIDENTIAL"
title2 = "XYZ123 / Anti-Hypertensive Draft"
title3 = "Protocol XYZ123"
title4 = "Study XYZ123"
title5 = "Listing of Demographic Data by Treatment Arm"
title6 = "All subjects"
def title(canvas, doc, bytext):
canvas.saveState()
canvas.setFont(styleN.fontName, styleN.fontSize)
canvas.drawString(DOCMARGIN, PAGE_HEIGHT*.975, title1)
canvas.drawString(DOCMARGIN, PAGE_HEIGHT*.950, title2)
canvas.drawString(DOCMARGIN, PAGE_HEIGHT*.925, title3)
canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT*.900, title4)
canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT*.875, title5)
canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT*.850, title6)
canvas.drawString(DOCMARGIN, PAGE_HEIGHT*.825, "Treatment Group:" + bytext)
canvas.restoreState()
This is then called as follows. n_groups has the value of 2 from a summary query and 0 maps to 'Placebo' and 1 maps to active.
def build_pdf(doc):
ptemplates = []
for armcd in range(n_groups):
ptemplates.append(PageTemplate(id = 'PT' + str(armcd), frames = [dataFrame,],
onPage = partial(title, bytext=t_dict[armcd]),
onPageEnd = foot))
doc.addPageTemplates(ptemplates)
elements = []
for armcd in range(n_groups):
elements.append(NextPageTemplate('PT' + str(armcd)))
sublist = [t for t in lista if t[0] == (armcd+1)]
sublist.insert(0,colheads)
data_table = Table(sublist, 6*[40*mm], len(sublist)*[DATA_CELL_HEIGHT], repeatRows=1)
data_table.setStyle(styleC)
elements.append(data_table)
elements.append(PageBreak())
doc.build(elements)
The report produces 6 pages. The first 3 pages of placebo data are correct, pages 5 & 6 of active data are correct, but page 4 - which should be the first page of the second 'active' group has the sub-title 'Treatment Group: Placebo'.
I have re-organized the order of the statements multiple times, but can't get Page 4 to sub-title correctly. Any help, suggestions or magic would be much appreciated.
[Edit 1: sample data structure]
The 'top' of the data starts as:
[
[1, 'Placebo', '000001-000015', '1976-09-20', 33, 'F', 'Black'],
[1, 'Placebo', '000001-000030', '1959-04-26', 50, 'M', 'Asian'],
[1, 'Placebo', '000001-000031', '1946-02-07', 64, 'F', 'Asian'],
[1, 'Placebo', '000001-000046', '1947-11-08', 62, 'M', 'Asian'],
etc for 50 rows, then continues with
[2, 'Active', '000001-000002', '1962-02-28', 48, 'F', 'Black'],
[2, 'Active', '000001-000008', '1975-10-20', 34, 'M', 'Black'],
[2, 'Active', '000001-000013', '1959-01-19', 51, 'M', 'White'],
[2, 'Active', '000001-000022', '1962-01-12', 48, 'F', 'Black'],
[2, 'Active', '000001-000036', '1976-10-17', 33, 'F', 'Asian'],
[2, 'Active', '000001-000045', '1980-12-31', 29, 'F', 'White'],
for another 50.
The column header inserted is:
['Treatment Arm Code',
'Treatment Arm',
'Site ID - Subject ID',
'Date of Birth',
'Age (Years)',
'Gender',
'Ethnicity'],
[Edit 2: A solution - move the PageBreak() and make it conditional:]
def build_pdf(doc):
ptemplates = []
for armcd in range(n_groups):
ptemplates.append(PageTemplate(id = 'PT' + str(armcd), frames = [dataFrame,],
onPage = partial(title, bytext=t_dict[armcd]),
onPageEnd = foot))
doc.addPageTemplates(ptemplates)
elements = []
for armcd in range(n_groups):
elements.append(NextPageTemplate('PT' + str(armcd)))
if armcd > 0:
elements.append(PageBreak())
sublist = [t for t in lista if t[0] == (armcd+1)]
sublist.insert(0,colheads)
data_table = Table(sublist, 6*[40*mm], len(sublist)*[DATA_CELL_HEIGHT], repeatRows=1)
data_table.setStyle(styleC)
elements.append(data_table)
doc.build(elements)

Categories

Resources