I have created some fictitious, though representative, clinical trial type data using Pandas, and now come to some test reporting in ReportLab.
The data has a block (~50 rows) where the treatment column is 'Placebo' and the same amount where the treatment is 'Active'. I simply want to list the data using a sub-heading of 'Treatment Group: Placebo' for the first set and 'Treatment Group: Active' for the second.
There are some hits on a similar topic, and, indeed I've used one of the suggested techniques, namely to extend the arguments of a header functions using partial from functools.
title2 = "XYZ123 / Anti-Hypertensive Draft"
title3 = "Protocol XYZ123"
title4 = "Study XYZ123"
title5 = "Listing of Demographic Data by Treatment Arm"
title6 = "All subjects"
def title(canvas, doc, bytext):
canvas.setFont(styleN.fontName, styleN.fontSize)
canvas.drawString(DOCMARGIN, PAGE_HEIGHT*.975, title1)
canvas.drawString(DOCMARGIN, PAGE_HEIGHT*.950, title2)
canvas.drawString(DOCMARGIN, PAGE_HEIGHT*.925, title3)
canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT*.900, title4)
canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT*.875, title5)
canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT*.850, title6)
canvas.drawString(DOCMARGIN, PAGE_HEIGHT*.825, "Treatment Group:" + bytext)
This is then called as follows. n_groups has the value of 2 from a summary query and 0 maps to 'Placebo' and 1 maps to active.
def build_pdf(doc):
ptemplates = []
for armcd in range(n_groups):
ptemplates.append(PageTemplate(id = 'PT' + str(armcd), frames = [dataFrame,],
onPage = partial(title, bytext=t_dict[armcd]),
onPageEnd = foot))
elements = []
for armcd in range(n_groups):
elements.append(NextPageTemplate('PT' + str(armcd)))
sublist = [t for t in lista if t[0] == (armcd+1)]
data_table = Table(sublist, 6*[40*mm], len(sublist)*[DATA_CELL_HEIGHT], repeatRows=1)
The report produces 6 pages. The first 3 pages of placebo data are correct, pages 5 & 6 of active data are correct, but page 4 - which should be the first page of the second 'active' group has the sub-title 'Treatment Group: Placebo'.
I have re-organized the order of the statements multiple times, but can't get Page 4 to sub-title correctly. Any help, suggestions or magic would be much appreciated.
[Edit 1: sample data structure]
The 'top' of the data starts as:
[1, 'Placebo', '000001-000015', '1976-09-20', 33, 'F', 'Black'],
[1, 'Placebo', '000001-000030', '1959-04-26', 50, 'M', 'Asian'],
[1, 'Placebo', '000001-000031', '1946-02-07', 64, 'F', 'Asian'],
[1, 'Placebo', '000001-000046', '1947-11-08', 62, 'M', 'Asian'],
etc for 50 rows, then continues with
[2, 'Active', '000001-000002', '1962-02-28', 48, 'F', 'Black'],
[2, 'Active', '000001-000008', '1975-10-20', 34, 'M', 'Black'],
[2, 'Active', '000001-000013', '1959-01-19', 51, 'M', 'White'],
[2, 'Active', '000001-000022', '1962-01-12', 48, 'F', 'Black'],
[2, 'Active', '000001-000036', '1976-10-17', 33, 'F', 'Asian'],
[2, 'Active', '000001-000045', '1980-12-31', 29, 'F', 'White'],
for another 50.
The column header inserted is:
['Treatment Arm Code',
'Treatment Arm',
'Site ID - Subject ID',
'Date of Birth',
'Age (Years)',
[Edit 2: A solution - move the PageBreak() and make it conditional:]
def build_pdf(doc):
ptemplates = []
for armcd in range(n_groups):
ptemplates.append(PageTemplate(id = 'PT' + str(armcd), frames = [dataFrame,],
onPage = partial(title, bytext=t_dict[armcd]),
onPageEnd = foot))
elements = []
for armcd in range(n_groups):
elements.append(NextPageTemplate('PT' + str(armcd)))
if armcd > 0:
sublist = [t for t in lista if t[0] == (armcd+1)]
data_table = Table(sublist, 6*[40*mm], len(sublist)*[DATA_CELL_HEIGHT], repeatRows=1)
I'm retrieving a large list of people from a database, then looping through each person to get their note/s and phone number/s. then displaying one person at a time with PySimpleGUI.
My issue is that it I'm doing multiple sql queries rather than just one (I don't know if this is an issue), and I have to repeatedly Finalize() and close() the screen, which creates a new window each time, rather than just refreshing the current page.
Am I able to refactor this so I don't have to close and create a new window each time?
Thank you.
# get all people
people = conn.execute('''SELECT * FROM people''')
people_tuple = people.fetchall()
# loop through all people
for index, person in enumerate(people_tuple):
# get persons notes
notes = conn.execute('''SELECT * FROM notes WHERE person_id = ?''', (person[0],))
notes_list = events.fetchall()
# get persons phone number/s
phone_numbers = conn.execute('''SELECT * FROM phone_numbers WHERE person_id = ?''', (person[0],))
# redacted #
window = main_window_layout(person, phone_num_list, notes_list).Finalize()
while True:
event, values = window.read()
# many if statements checking user input #
def main_window_layout(person, phone_num_list, notes_list):
top_left_frame_1 = [[sg.Text("ID: " + str(person[0]), key='id', pad=(2,4))],
[sg.Text("Name: " + person[1] + " " + person[2], key='name', pad=(2,4))]]
frame_2 = [
[sg.Text(note[4], key='note_date', pad=(3, (12, 3))), sg.Text(note[6], key='note_info', pad=(3, (12, 3)))]
for note in reversed(notes_list)
Try to create the layout of the window, all elements with key to update the content later.
Demo code
import PySimpleGUI as sg
person = [
[1, 'Ronald', 'Reagan'],
[2, 'Abraham', 'Lincoln'],
[3, 'George', 'Washington'],
[4, 'Andrew', 'Jackson'],
[5, 'Thomas', 'Jefferson'],
[6, 'Harry', 'Truman'],
size, index = 10, 0
total = len(person)
keys = ['ID', 'First Name', 'Last Name']
layout = [
sg.Text(str(person[index][i]), size=size, background_color='blue', key=key)]
for i, key in enumerate(keys)] + [
#[sg.Button('< Prev'), sg.Push(), sg.Button('Next >')],
window = sg.Window('Title', layout)
while True:
event, values = window.read(timeout=500)
if event == sg.WIN_CLOSED:
elif event == sg.TIMEOUT_EVENT:
index = (index + 1) % total
for i, key in enumerate(keys):
""" button events to show previous/next record
elif event in ('< Prev', 'Next >'):
delta = -1 if event == '< Prev' else 1
index = (index + delta) % total
for i, key in enumerate(keys):
This is the sample data in a file. I want to split each line in the file and add to a dataframe. In some cases they have more than 1 child. So whenever they have more than one child new set of column have to be added child2 Name and DOB
(P322) Rashmika Chadda 15/05/1995 – Rashmi C 12/02/2024
(P324) Shiva Bhupati 01/01/1994 – Vinitha B 04/08/2024
(P356) Karthikeyan chandrashekar 22/02/1991 – Kanishka P 10/03/2014
(P366) Kalyani Manoj 23/01/1975 - Vandana M 15/05/1995 - Chandana M 18/11/1998
This is the code I have tried but this splits only by taking "-" into consideration
with open("text.txt") as read_file:
file_contents = read_file.readlines()
content_list = []
temp = []
for each_line in file_contents:
temp = each_line.replace("–", " ").split()
Current output:
[['(P322)', 'Rashmika', 'Chadda', '15/05/1995', 'Rashmi', 'Chadda', 'Teega', '12/02/2024'], ['(P324)', 'Shiva', 'Bhupati', '01/01/1994', 'Vinitha', 'B', 'Sahu', '04/08/2024'], ['(P356)', 'Karthikeyan', 'chandrashekar', '22/02/1991', 'Kanishka', 'P', '10/03/2014'], ['(P366)', 'Kalyani', 'Manoj', '23/01/1975', '-', 'Vandana', 'M', '15/05/1995', '-', 'Chandana', 'M', '18/11/1998']]
Final output should be like below
Rashmika Chadda
Rashmi C
Shiva Bhupati
Vinitha B
Karthikeyan chandrashekar
Kanishka P
Kalyani Manoj
Vandana M
Chandana M
I'm not sure if you want it as a list or something else.
To get lists:
result = []
for t in text[:]:
# remove the \n at the end of each line
t = t.strip()
# remove the parenthesis you don't wnt
t = t.replace("(", "")
t = t.replace(")", "")
# split on space
t = t.split(" – ")
# reconstruct
for i, person in enumerate(t):
person = person.split(" ")
# print(person)
# remove code
if i==0:
res = [person.pop(0)]
res.extend([" ".join(person[:2]), person[2]])
Which would give the below output:
[['P322', 'Rashmika Chadda', '15/05/1995', 'Rashmi C', '12/02/2024'], ['P324', 'Shiva Bhupati', '01/01/1994', 'Vinitha B', '04/08/2024'], ['P356', 'Karthikeyan chandrashekar', '22/02/1991', 'Kanishka P', '10/03/2014'], ['P366', 'Kalyani Manoj', '23/01/1975', 'Vandana M', '15/05/1995', 'Chandana M', '18/11/1998']]
You can organise a bit more the data using dictionnary:
result = {}
for t in text[:]:
# remove the \n at the end of each line
t = t.strip()
# remove the parenthesis you don't wnt
t = t.replace("(", "")
t = t.replace(")", "")
# split on space
t = t.split(" – ")
for i, person in enumerate(t):
# split name
person = person.split(" ")
# remove code
if i==0:
code = person.pop(0)
if i==0:
result[code] = {"parent_name": " ".join(person[:2]), "parent_DOB": person[2], "children": [] }
result[code]['children'].append({f"child{i}_name": " ".join(person[:2]), f"child{i}_DOB": person[2]})
Which would give this output:
{'P322': {'children': [{'child1_DOB': '12/02/2024',
'child1_name': 'Rashmi C'}],
'parent_DOB': '15/05/1995',
'parent_name': 'Rashmika Chadda'},
'P324': {'children': [{'child1_DOB': '04/08/2024',
'child1_name': 'Vinitha B'}],
'parent_DOB': '01/01/1994',
'parent_name': 'Shiva Bhupati'},
'P356': {'children': [{'child1_DOB': '10/03/2014',
'child1_name': 'Kanishka P'}],
'parent_DOB': '22/02/1991',
'parent_name': 'Karthikeyan chandrashekar'},
'P366': {'children': [{'child1_DOB': '15/05/1995',
'child1_name': 'Vandana M'},
{'child2_DOB': '18/11/1998', 'child2_name': 'Chandana M'}],
'parent_DOB': '23/01/1975',
'parent_name': 'Kalyani Manoj'}}
In the end, to have an actual table, you would need to use pandas but that will require for you to fix the number of children max so that you can pad the empty cells.
I'm preparing a script that reconstitutes multi-token strings from a tokenized text for tokens that have specific labels. My tokens are associated with their start and end indices in the original text.
This is an example piece of text:
t = "Breakfast at Tiffany's is a novella by Truman Capote."
The tokens data structure containing the original text indices and labels:
[(['Breakfast', 0, 9], 'BOOK'),
(['at', 10, 12], 'BOOK'),
(['Tiffany', 13, 20], 'BOOK'),
(["'", 20, 21], 'BOOK'),
(['s', 21, 22], 'BOOK'),
(['is', 23, 25], 'O'),
(['a', 26, 27], 'O'),
(['novella', 28, 35], 'O'),
(['by', 36, 38], 'O'),
(['Truman', 39, 45], 'PER'),
(['Capote', 46, 52], 'PER'),
(['.', 52, 53], 'O')]
This data structure was generated from t as follows
import re
tokens = [[m.group(0), m.start(), m.end()] for m in re.finditer(r"\w+|[^\w\s]", t, re.UNICODE)]
tags = ['BOOK', 'BOOK', 'BOOK', 'BOOK', 'BOOK', 'O', 'O', 'O', 'O', 'PER', 'PER', 'O']
token_tuples = list(zip(tokens, tags))
What I would like my script to do is to iterate through token_tuples and if it encounters a non-O token, it breaks off from the main iteration and reconstitutes the tagged multi-token span until it hits the nearest token with O.
This is the current script:
for i in range(len(token_tuples)):
if token_tuples[i][1] != 'O':
tag = token_tuples[i][1]
start_ix = token_tuples[i][0][1]
slider = i+1
while slider < len(token_tuples):
if tag != token_tuples[slider][1]:
end_ix = token_tuples[slider][0][2]
print((t[start_ix:end_ix], tag))
This prints:
("Breakfast at Tiffany's is", 'BOOK')
("at Tiffany's is", 'BOOK')
("Tiffany's is", 'BOOK')
("'s is", 'BOOK')
('s is', 'BOOK')
('Truman Capote.', 'PER')
('Capote.', 'PER')
What needs to be modified so that the output for this example is:
> ("Breakfast at Tiffany's", "BOOK")
> ("Truman Capote", "PER")
Here's one solution. If you can come up with something less long-winded, I'd be happy to choose your answer instead!
def extract_entities(t, token_tuples):
entities = []
tag = ''
for i in range(len(token_tuples)):
if token_tuples[i][1] != 'O':
if token_tuples[i][1] != tag:
tag = token_tuples[i][1]
start_ix = token_tuples[i][0][1]
if i+1 < len(token_tuples):
if tag != token_tuples[i+1][1]:
end_ix = token_tuples[i][0][2]
entities.append((t[start_ix:end_ix], tag))
tag = ''
I have a bunch of school data that I maintain on a master list for monthly testing scores. Everytime a child takes a score and there is an update on 'Age', 'Score', 'School' I would insert a new row with updated data and keep track of all the changes. I am trying to figure out a python script to do this but since I am a newbie, I keep running in to issues.
I tried writing a loop but keep getting errors to include "False", "The Truth value of a series is ambigious", "tuple indices must be integers, not str"
master_df = pd.DataFrame({'ID': ['A', 'B', 'C', 'D'],
'School':['AB', 'CD', 'EF', 'GH'],
'Score':[80, 75, 62, 100],
'Date': ['3/1/2019', '3/1/2019', '3/1/2019', '3/1/2019']})
updates_df = pd.DataFrame({'ID': ['A', 'B', 'C', 'D'],
'School':['AB', 'ZX', 'EF', 'GH'],
'Score':[80, 90, 62, 100],
'Date': ['4/1/2019', '4/1/2019', '4/1/2019', '4/1/2019']})
# What I am trying to get is:
updated_master = pd.DataFrame({'ID': ['A', 'A', 'B', 'B', 'C','D'],
'School':['AB', 'AB', 'CD', 'ZX', 'EF', 'GH'],
'Score':[80, 80, 75, 90, 62, 100],
'Date': ['3/1/2019', '4/1/2019', '3/1/2019', '4/1/2019', '3/1/2019', '3/1/2019']})
temp_delta_list = []
m_score = master_df.iloc[1:, master_df.columns.get_loc('Score')]
m_age = master_df.iloc[1:, master_df.columns.get_loc('Age')]
m_school = master_df.iloc[1:, master_df.columns.get_loc('School')]
u_score = updates_df.iloc[1:, updates_df.columns.get_loc('Score')]
u_age = updates_df.iloc[1:, updates_df.columns.get_loc('Age')]
u_school = updates_df.iloc[1:, updates_df.columns.get_loc('School')]
for i in updates_df['ID'].values:
updated_temp_score = updates_df[updates_df['ID'] == i], u_score
updated_temp_age = updates_df[updates_df['ID'] == i], u_age
updated_temp_school = updates_df[updates_df['ID'] == i], u_school
master_temp_score = master_df[master_df['ID'] == i], m_score
master_temp_age = master_df[master_df['ID'] == i], m_age
master_temp_school = updates_df[master_df['ID'] == i], m_school
if (updated_temp_score == master_temp_score) | (updated_temp_age == master_temp_age) | (updated_temp_school == master_temp_school):
temp_deltas = updates_df[(updates_df['ID'] == i)]
I ultimately want to have the loop compare each row values for each ID and return rows that have any difference and then append the master_df
Assuming there is a list with sublists like this
[[2013, 'Patric', 'M', 1356], [2013, 'Helena', 'F', 202], [2013, 'Patric', 'F', 6],[1993, 'Patric', 'F', 7]......]
which is an output of def list_of_names() where 2013 is year, M is gender and 1356 is number of M births etc.
And I want to create a dictionary which outputs the name as a key and values as tuples (year, number_of_males,number_of_females) . So for example:
{ .. ’Patric’:[... , (1993, 0, 7), (2013, 1356, 6), ... ], ... }.
Technically 1993 is year, 0 is number of males and 7 is number of females and the tuples should be arranged in order of the years.
and I'm stuck on how to add this info into a dictionary
def name_Index(names):
d = dict()
L = readNames() #the list with from previous def which outputs different names and info as above
newlist = []
for sublist in L:
from collections import defaultdict
def list_of_names():
return [[2013, 'Patric', 'M', 1356],
[2013, 'Helena', 'F', 202],
[2013, 'Patric', 'F', 6],
[1993, 'Patric', 'F', 7]]
def name_Index():
tmp = defaultdict(lambda:defaultdict(lambda: [0,0]))
for year, name, sex, N in list_of_names():
i = 0 if sex == 'M' else 1
tmp[name][year][i] += N
d = {}
for name, entries in tmp.items():
d[name] = [(year, M, F) for (year, (M,F)) in entries.items()]
return d
print name_Index()
This was my attempt at the problem:
from collections import defaultdict, namedtuple
from itertools import groupby
data = [[2013, 'Patric', 'M', 1356],
[2013, 'Helena', 'F', 202],
[2013, 'Patric', 'F', 6],
[1993, 'Patric', 'F', 7]]
names = defaultdict(list)
datum = namedtuple('datum', 'year gender number')
for k, g in groupby(data, key=lambda x: x[1]):
for l in g:
year, name, gender, number = l
names[k].append(datum(year, gender, number))
final_dict = defaultdict(list)
for n in names:
for k, g in groupby(names[n], lambda x: x.year):
males = 0
females = 0
for l in g:
if l.gender == 'M':
males += l.number
elif l.gender == 'F':
females += l.number
final_dict[n].append((k, males, females))
The most convenient will be to use collections.defauldict. It returns dictionary-like object, that returns default value, if it doesn't find key. In your case, you use a list as default value, and in your loop you append tuples to it:
from collections import defaultdict
names = [ [2013, 'Patric', 'M', 1356],
[2013, 'Helena', 'F', 202],
[2013, 'Patric', 'F', 6],
[1993, 'Patric', 'F', 7] ]
def name_Index(data):
# name => year => sex
d = defaultdict(lambda: defaultdict(lambda: {'F': 0, 'M': 0}))
for year, name, sex, births in data:
d[name][year][sex] += births
# if you are fine with defauldict result: return d
# else collect results into tuples:
result = {}
for name, data in d.items():
result[name] = [(year, c['M'], c['F']) for year, c in data.items()]
return result
print name_Index(names)
# {'Helena': [(2013, 0, 202)], 'Patric': [(1993, 0, 7), (2013, 1356, 6)]}
I didn't understand why you are taking names as an argument of name_Index function and then calling readNames, there must be some necessity required for your work. Hence, i just put a dummy readNames function and sent None as argument to name_Index. Using class is a good technique to solve complicated data structures. Btw, nicely written question i must admit.
def readNames ():
return [[2013, 'Patric', 'M', 1356], [2013, 'Helena', 'F', 202], [2013, 'Patric', 'F', 6],[1993, 'Patric', 'F', 7]]
class YearOb(object):
def __init__(self):
self.male = 0
self.female = 0
def add_birth_data(self, gender, birth_count):
if gender == "M":
self.male += birth_count
self.female += birth_count
class NameOb(object):
def __init__(self):
self.yearobs = dict()
def add_record(self, year, gender, birth_count):
if year not in self.yearobs:
self.yearobs[year].add_birth_data(gender, birth_count)
def get_as_list(self):
list_data = []
for year, yearob in self.yearobs.items():
list_data.append((year, yearob.male, yearob.female))
return list_data
def name_Index(names):
d = dict()
L = readNames() #the list with from previous def which outputs different names and info as above
newlist = []
for sublist in L:
name = sublist[1]
if name not in d:
d[name].add_record(sublist[0], sublist[2], sublist[3])
for name, nameob in d.items():
d[name] = nameob.get_as_list()
return d