python-docx how to merge row cells - python

I am creating a Word document from data using python-docx. I can create all the rows and cells with no problem but, in some cases, when the current record from the database has some content in field comment, I need to add a new line to display a long content.
I tried by appending a paragraph, but the result is that the comment is appended after the table, and I need it to be added bellow the current table row.
I think the solution is to append a table row with all cells merged, but I can't find documentation to do so.
This is the code where I generate the docx file:
class OperationDOCXView(viewsets.ViewSet):
exclude_from_schema = True
def list(self, request):
from ReportsManagerApp.controllers import Operations2Controller
self.profile_id = request.query_params['profile_id']
self.operation_date = request.query_params['operation_date']
self.operation_type = request.query_params['operation_type']
self.format = request.query_params['doc_format']
operation_report_controller = Operations2Controller(self.profile_id, self.operation_date, self.operation_type)
context = operation_report_controller.get_context()
if self.format == 'json':
return Response(context)
else:
word_doc = self.get_operation_word_file(request, context)
return Response("{}{}{}".format(request.get_host(), settings.MEDIA_URL, word_doc))
def get_operation_word_file(self, request, context):
import unicodedata
from django.core.files import File
from django.urls import reverse
from docx import Document
from docx.shared import Inches, Pt
operation_type = {
'arrival': 'Llegadas',
'departure': 'Salidas',
'hotel': 'Hotel-Hotel',
'tour': 'Tours',
}
weekdays = {
'0': 'LUNES',
'1': 'MARTES',
'2': 'MIÉRCOLES',
'3': 'JUEVES',
'4': 'VIERNES',
'5': 'SÁBADO',
'6': 'DOMINGO',
}
titles = ['Booking', 'Nombre', '#', 'Vuelo', 'Hr', 'P Up', 'Traslado', 'Circuito', 'Priv?', 'Agencia', '']
widths = [Inches(1), Inches(2), Inches(0.5), Inches(1), Inches(1), Inches(1), Inches(2), Inches(3), Inches(0.5), Inches(3), Inches(0.5)]
document = Document()
section = document.sections[-1]
section.top_margin = Inches(0.5)
section.bottom_margin = Inches(0.5)
section.left_margin = Inches(0.3)
section.right_margin = Inches(0.2)
style = document.styles['Normal']
font = style.font
font.name ='Arial'
font.size = Pt(10)
company_paragraph = document.add_heading("XXXX TTOO INC")
company_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
description_paragraph = document.add_paragraph("Operación de {} del día {}".format(operation_type[self.operation_type], self.operation_date))
description_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
operation_date = self.get_operation_date().date()
operation_week_day = operation_date.weekday()
day_paragraph = document.add_paragraph(weekdays[str(operation_week_day)])
day_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
for provider_unit, transfers in context.items():
provider_unit_paragraph = document.add_paragraph(provider_unit)
provider_unit_paragraph.style.font.size = Pt(10)
provider_unit_paragraph.style.font.bold = False
table = document.add_table(rows=1, cols=11)
hdr_cells = table.rows[0].cells
runs = []
for i in range(len(hdr_cells)):
runs.append(self.get_hdr_cells_run(hdr_cells[i], titles[i]))
for row in table.rows:
for idx, width in enumerate(widths):
row.cells[idx].width = width
adults = 0
minors = 0
for transfer in transfers:
# table = document.add_table(rows=1, cols=11)
row_cells = table.add_row().cells
row_cells[0].text = transfer['booking']
row_cells[1].text = transfer['people']
row_cells[2].text = transfer['pax']
flight = transfer.get("flight","") if transfer.get("flight","") is not None else ""
row_cells[3].text = flight
flight_time = self.get_flight_time(flight) if flight != '' else ''
row_cells[4].text = flight_time
row_cells[5].text = transfer['pickup_time'].strftime('%H:%M') if transfer['pickup_time'] is not None else ''
row_cells[6].text = transfer['place']
row_cells[7].text = transfer['roundtrip']
row_cells[8].text = transfer['is_private']
row_cells[9].text = transfer['agency']
people = transfer['pax'].split('.')
adults = adults + int(people[0])
minors = minors + int(people[1])
if transfer['comment'] is not None:
document.add_paragraph("Comentarios: {}".format(transfer['comment']))
for row in table.rows:
for idx, width in enumerate(widths):
row.cells[idx].width = width
for cell in row.cells:
paragraphs = cell.paragraphs
for paragraph in paragraphs:
for run in paragraph.runs:
font = run.font
font.size = Pt(8)
row_cells = table.add_row().cells
row_cells[10].text = "{}.{}".format(adults, minors)
current_directory = settings.MEDIA_DIR
file_name = "Operaciones {} {}.docx".format(self.operation_type, self.operation_date)
document.save("{}{}".format(current_directory, file_name))
return file_name
def get_flight_time(self, flight):
from OperationsManagerApp.models import Flight
operation_types = {
'arrival': 'ARRIVAL',
'departure': 'DEPARTURE'
}
operation_date = datetime.strptime(self.operation_date, '%Y-%m-%d')
try:
flight = Flight.objects.get(flight_type=operation_types[self.operation_type], number=flight)
except:
return ''
else:
weekday_times = {
'0': flight.time_monday,
'1': flight.time_tuesday,
'2': flight.time_wednesday,
'3': flight.time_thursday,
'4': flight.time_friday,
'5': flight.time_saturday,
'6': flight.time_sunday,
}
weekday_time = weekday_times[str(operation_date.weekday())]
return weekday_time.strftime('%H:%M') if weekday_time is not None else ''
def get_hdr_cells_run(self, hdr_cells, title):
from docx.shared import Pt
new_run = hdr_cells.paragraphs[0].add_run(title)
new_run.bold = True
new_run.font.size = Pt(8)
return new_run
def get_operation_date(self):
date_array = self.operation_date.split('-')
day = int(date_array[2])
month = int(date_array[1])
year = int(date_array[0])
operation_date = datetime(year, month, day)
return operation_date

One approach is to add a paragraph to one of the cells:
cell.add_paragraph(transfer['comment'])
This will place it in the right position with respect to the row it belongs to, rather than after the table.
If that would take too much room for a single cell that already has another data item in it and you want to add a row, you'll need to account for that when you allocate the table. But assuming you get that worked out, merging the cells is easy:
row.cells[0].merge(row.cells[-1])

Related

Inserting/overwriting a table after a paragraph (specific location) in a word file with docx python?

Does anybody know how to create/update a table with the docx library in a word document at a specific location.
So e.g. after a paragraph with text 'test1'?
The idea is to check if paragraph exists in the document, overwrite the existsing table underneath if not create a new paragraph and underneath at a certain location (at certain header level).
I managed to add a paragraph after a specific paragraph but it does not seem to work with tables in the same way.
I can not seem to find a link between the paragraph objects and creating a table object underneath that paragraph object or identifying the existing table object based on the paragraph.
A bit of context on the code, the code is first reading xaml files and writing this data in a word document. The first time the code is run it will create all subheadings and text/tables. With a second run the code will be updating the text/table values as the subheadings already exist.
e.g.
template:
02_BusinessProcess
00_Dispatcher
01_Initialization
after first run:
02_BusinessProcess
1.1 Xamlfilename
Text
Table
00_Dispatcher
2.1 Xamlfilenam
Text
01_Initialization
after second run:
02_BusinessProcess
1.1 Xamlfilename
Updated Text
Updated Table
00_Dispatcher
2.1 Xamlfilenam
Updated Text
01_Initialization
I want to add in the tables between these lines(newly created paragraphs).
paragraph1 = insert_paragraph_after(paragraph, xaml_obt.xamlfilename, style=document.styles['Heading 3'])
paragraph_annseq = insert_paragraph_after(paragraph1, xaml_obt.ann_seq, style=document.styles['No Spacing'])
paragraph_var = insert_paragraph_after(paragraph_annseq, "Variables - " + xaml_obt.xamlfilename,style=document.styles['Heading 4'])
paragraph_in_arg = insert_paragraph_after(paragraph_var, "In_Agruments - " + xaml_obt.xamlfilename, style=document.styles['Heading 4'])
paragraph_io_arg = insert_paragraph_after(paragraph_in_arg, "In_Out_Agruments - " + xaml_obt.xamlfilename, style=document.styles['Heading 4'])
insert_paragraph_after(paragraph_io_arg, "Out_Agruments - " + xaml_obt.xamlfilename, style=document.styles['Heading 4'])
Or update the table in this spot if the paragraph exists in the document:
if paragraph.style.name.startswith('Heading'):
if paragraph.text == xaml_obt.xamlfilename:
new_para = document.paragraphs[i + 1]
new_para.text = xaml_obt.ann_seq + "\n\n"
style = document.styles['No Spacing']
new_para.style = style
new_para.alignment = WD_ALIGN_PARAGRAPH.LEFT
Here is the complete code:
from tkinter import Tk, filedialog
import os
import json
from xml.etree import ElementTree as ET
import docx
from docx.oxml.xmlchemy import OxmlElement
from docx.text.paragraph import Paragraph
from docx.enum.text import WD_ALIGN_PARAGRAPH
import time
import win32com.client
import pandas as pd
class xamlinfo(object):
def __init__(self, name: object) -> object:
self.aut_block = str
self.xamlfilepath = str
self.xamlfilename = str
self.xaml_read = None
self.toplevelnaming = str
self.ann_seq = str
self.in_arguments = pd.DataFrame(columns=['Name', 'Type', 'Annotation'])
self.out_arguments = pd.DataFrame(columns=['Name', 'Type', 'Annotation'])
self.io_agruments = pd.DataFrame(columns=['Name', 'Type', 'Annotation'])
self.variables = pd.DataFrame(columns=['Name', 'Annotation'])
def selectfolder():
root = Tk() # pointing root to Tk() to use it as Tk() in program.
root.withdraw()
root.attributes('-topmost', True)
open_file = filedialog.askdirectory()
open_file = os.path.normpath(open_file)
print("Following filepath selected: ",open_file)
return open_file
def assignxamlobjects(listxamls, path):
if os.path.exists(path):
for root,dirs,files in os.walk(path):
for file in files:
xaml_obt = xamlinfo(os.path.basename(file))
xaml_obt.aut_block = os.path.basename(path)
xaml_obt.xamlfilename = file
xaml_obt.xamlfilepath = os.path.join(root,file)
tree = ET.parse(xaml_obt.xamlfilepath)
treeroot = tree.getroot()
xaml_obt.xaml_read = treeroot
top_sequence = treeroot.find(".//{*}Sequence")
xaml_obt.toplevelnaming = top_sequence.attrib["DisplayName"]
annotation = ""
annotationelements = [x for x in top_sequence.attrib if "annotationtext" in x.lower()]
if (len(annotationelements) > 0):
annotation = top_sequence.attrib[annotationelements[0]]
xaml_obt.ann_seq = annotation
listofelements = treeroot.findall(".//{*}Property")
for element in listofelements:
if "InArgument" in element.attrib["Type"]:
annotation = ""
annotationelements = [x for x in element.attrib if "annotationtext" in x.lower()]
if (len(annotationelements) > 0):
annotation = element.attrib[annotationelements[0]]
xaml_obt.in_arguments = xaml_obt.in_arguments.append({0:str(element.attrib["Name"]), 1:str(element.attrib["Type"]).replace("InArgument",""),2: annotation})
if "InOutArgument" in element.attrib["Type"]:
annotation = ""
annotationelements = [x for x in element.attrib if "annotationtext" in x.lower()]
if (len(annotationelements) > 0):
annotation = element.attrib[annotationelements[0]]
xaml_obt.io_agruments = xaml_obt.io_agruments.append({0:str(element.attrib["Name"]), 1:str(element.attrib["Type"]).replace("InOutArgument",""),2: annotation})
if "OutArgument" in element.attrib["Type"]:
annotation = ""
annotationelements = [x for x in element.attrib if "annotationtext" in x.lower()]
if (len(annotationelements) > 0):
annotation = element.attrib[annotationelements[0]]
xaml_obt.out_arguments = xaml_obt.out_arguments.append({0:str(element.attrib["Name"]), 1:str(element.attrib["Type"]).replace("OutArgument",""),2: annotation})
listofelements = treeroot.findall(".//{*}Variable")
for element in listofelements:
annotation = ""
annotationelements = [x for x in element.attrib if "annotationtext" in x.lower()]
if (len(annotationelements) > 0):
annotation = element.attrib[annotationelements[0]]
xaml_obt.variables = xaml_obt.variables.append({0:str(element.attrib["Name"]),1:annotation})
listxamls.append(xaml_obt)
else:
print("The following path does not exists, please amend your project structure: "+path)
return listxamls
def getworkflowinfo(openfile):
jsonpath = os.path.join(openfile,"project.json")
procestrans_path = os.path.join(openfile,"process","02_BusinessProcess")
dispatcher_path = os.path.join(openfile,"process","00_Dispatcher")
init_path = os.path.join(openfile,"process","01_Initialization")
process_path = os.path.join(openfile,"Process")
listxamls = []
listxamls = assignxamlobjects(listxamls, path=procestrans_path)
listxamls = assignxamlobjects(listxamls, path=dispatcher_path)
listxamls = assignxamlobjects(listxamls, path=init_path)
listxamls = assignxamlobjects(listxamls, path=process_path)
with open(jsonpath) as f:
uipathjson = json.load(f)
return uipathjson, listxamls
def insert_paragraph_after(paragraph, text, style):
new_p = OxmlElement('w:p')
paragraph._p.addnext(new_p)
new_para = Paragraph(new_p, paragraph._parent)
if text:
new_para.add_run(text)
if style is not None:
new_para.style = style
paragraph1 = new_para
return paragraph1
def fillxamldata(document, listofxamls):
print("Starting to update workflow information.")
for xaml_obt in listofxamls:
paraexists = False
for paragraph in document.paragraphs:
if paragraph.text == xaml_obt.xamlfilename:
paraexists = True
if paraexists is True:
for i, paragraph in enumerate(document.paragraphs):
# Check if the paragraph is a heading
if paragraph.style.name.startswith('Heading'):
if paragraph.text == xaml_obt.xamlfilename:
new_para = document.paragraphs[i + 1]
new_para.text = xaml_obt.ann_seq + "\n\n"
style = document.styles['No Spacing']
new_para.style = style
new_para.alignment = WD_ALIGN_PARAGRAPH.LEFT
else:
for paragraph in document.paragraphs:
# Check if the paragraph is a heading
if paragraph.style.name.startswith('Heading'):
if paragraph.text == xaml_obt.aut_block:
paragraph1 = insert_paragraph_after(paragraph, xaml_obt.xamlfilename, style=document.styles['Heading 3'])
paragraph_annseq = insert_paragraph_after(paragraph1, xaml_obt.ann_seq, style=document.styles['No Spacing'])
paragraph_var = insert_paragraph_after(paragraph_annseq, "Variables - " + xaml_obt.xamlfilename,style=document.styles['Heading 4'])
paragraph_in_arg = insert_paragraph_after(paragraph_var, "In_Agruments - " + xaml_obt.xamlfilename, style=document.styles['Heading 4'])
paragraph_io_arg = insert_paragraph_after(paragraph_in_arg, "In_Out_Agruments - " + xaml_obt.xamlfilename, style=document.styles['Heading 4'])
insert_paragraph_after(paragraph_io_arg, "Out_Agruments - " + xaml_obt.xamlfilename, style=document.styles['Heading 4'])
print("Workflow information updated successfully.\n")
return
def filldependencies(document, jsonUI):
print("Starting to fill dependencies.")
dict_depend = jsonUI['dependencies']
text = ""
for i in dict_depend:
text = text + i+": "+ dict_depend[i]+"\n"
for i, paragraph in enumerate(document.paragraphs): # Loop through all the paragraphs in the Word file
if paragraph.style.name.startswith('Heading'): # Check if the paragraph is a heading
if 'dependencies' == paragraph.text.lower():
new_para = document.paragraphs[i+1]
new_para.text = text
style = document.styles['No Spacing']
new_para.style = style
new_para.alignment = WD_ALIGN_PARAGRAPH.LEFT
print("Dependencies updated successfully.\n")
return
def fillgeneralinfo(document, jsonUI):
print("Starting to fill process info.")
text = ("Process name: "+"\t\t\t"+ jsonUI['name'] + "\n" +
"Process description:"+"\t\t" + jsonUI['description'] +"\n" +
"UIpath Studio version:"+"\t\t"+ jsonUI['studioVersion'] + "\n" +
"Project version:"+"\t\t\t" + jsonUI['projectVersion'] + "\n")
for i, paragraph in enumerate(document.paragraphs): # Loop through all the paragraphs in the Word file
if paragraph.style.name.startswith('Heading'): # Check if the paragraph is a heading
if 'general info' == paragraph.text.lower():
new_para = document.paragraphs[i+1]
new_para.text = text
style = document.styles['No Spacing']
new_para.style = style
new_para.alignment = WD_ALIGN_PARAGRAPH.LEFT
print("Process info successfully updated.\n")
return
def fillworddata(path, listofxamls):
print("You seleceted the following SDD file: "+path+"\n")
document = docx.Document(path)
with open(path, "w") as doc:
fillxamldata(document, listofxamls)
filldependencies(document, jsonUI)
fillgeneralinfo(document, jsonUI)
document.save(path)
return
def startmessage():
print("###############################################################\n"+
" SDD_AUT \n"+
"###############################################################\n")
starttimer = time.time()
startmessage()
openfile = selectfolder()
jsonUI, listxamls = getworkflowinfo(openfile)
correct_proc = input("The information for process | " + jsonUI['name'] + " | has been read.\n"+
"Do you want to continue? (y/n)\n")
if correct_proc.lower() == 'y':
sdd_doc = filedialog.askopenfilename(title='Select a file')
fillworddata(path=sdd_doc, listofxamls=listxamls)
print("Process has been executed successfully!")
else:
print("The process has been terminated as the incorrect project was selected.")
endtimer = time.time()
duration = endtimer - starttimer
print("Process took: " + str(duration))

Python Docx Minimum Table Height

I'm trying to fit 10 rows (and three columns) of a table on one page, howver I'm running into a limitation where I can't get any more than 8 rows to fit. I've tried the following code:
table = document.add_table(rows=0, cols=3)
for row in table.rows:
row.height = Cm(1)
However, at some point when reducing the size,there is no difference in the output. Is it possible to fit 10 rows on one page?
An adapted version of my code, which is iterating through a dataframe and writing columns of my dataframe to cells of a table.
document = Document()
sections = document.sections
for section in sections:
section.top_margin = Inches(0.00)
section.bottom_margin = Inches(0.00)
section.left_margin = Inches(0.00)
section.right_margin = Inches(0.00)
style = document.styles['Normal']
font = style.font
font.size = Pt(8)
table = document.add_table(rows=0, cols=3)
index = 0
full_count = 1
for item_one, item_two,description,max_portion,quantity_adjusted, mods in zip(line_items['title'].tolist(), line_items['quantity'],line_items['description'], line_items['max_portion'],line_items['quantity_adjusted'], line_items['modifications']):
count = 0
if index % 3 == 0:
cell_row = table.add_row()
cell_row.height = Cm(0.1)
row_cells = cell_row.cells
part_one_cell = row_cells[index % 3]
part_one_cell.height = Cm(0.1)
#para = doc.add_paragraph().add_run('GeeksforGeeks is a Computer Science portal for geeks.')
#para.font.size = Pt(12)
p = part_one_cell.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
#p1 = part_one_cell.paragraphs[0].add_run(item_one.upper()+ ' ' + description.upper())
#p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
if len(item_one + description) < 40:
p.add_run(item_one.upper()+ ' ' + description.upper()).font.size = Pt(12)
elif len(item_one + description) < 60:
p.add_run(item_one.upper()+ ' ' + description.upper()).font.size = Pt(10)
else:
p.add_run(item_one.upper()+ ' ' + description.upper()).font.size = Pt(8)
row1 = row_cells[index % 3]
row2= row1.add_paragraph(mods)
row2.alignment = WD_ALIGN_PARAGRAPH.CENTER
row = row_cells[index % 3]
p1 = row.add_paragraph(f'{x[str(quantity_adjusted)]}')
p1.alignment=WD_ALIGN_PARAGRAPH.RIGHT
#part_one_cell.paragraphs[0].add_run(f'{x[str(item_two)]}')
#part_one_cell.paragraphs[0].add_run(f' {str(x)}').bold= True
index = index + 1
full_count = full_count + 1
if full_count % 30 == 0:
document.add_page_break()
table = document.add_table(rows=0, cols=3)
I have no problem getting 10 1cm rows in a single page. I declare the number of rows when adding the table:
from docx import Document
from docx.shared import Cm
document = Document()
table = document.add_table(rows=10, cols=3)
table.style = 'Table Grid'
for row in table.rows:
row.height = Cm(1)
document.save('demo.docx')
To add rows in a for loop:
table = document.add_table(rows=0, cols=3)
table.style = 'Table Grid'
for i in range(10):
row = table.add_row()
row.height = Cm(1)
document.save('demo.docx')

Why is my flask route saving xlsx file to the root directory of the project instead of instance files?

I am trying to create a flask route that sends data to a function, that function creates an openpyxl excel file and returns the excel file to the route, and the route then returns the downloadable file to a React frontend. I'm not sure if this is the exact problem, but I am getting errors that the file is not found in my instance/files folder. Instead, the file is saving to my project's root directory. The same path is working for other routes, so I'm not sure what I'm doing wrong here/why it is saving elsewhere. I'm assuming this is why I can't return the excel file to the frontend, but it could be other issues with my function/route. Please help!
This is my openpyxl function:
def generate_prev_sim_csv(data):
get_dict = data
claims = data['claims']
setup_dict = data['setups']
summary_metric_headers = data['setupSummaryMetricHeaders']
filename = "Simulation_Summary.xlsx"
wb = Workbook()
sheet = wb.active
# styles
heading_font = Font(size=11, bold=True)
heading = NamedStyle(name='Heading')
wb.add_named_style(heading)
heading.font = heading_font
percent_value = NamedStyle(name='Percentage')
wb.add_named_style(percent_value)
percent_value.number_format = '0.00%'
# Claim Header
headers = ['Claim']
start_claim_header_row = 1
start_claim_header_col = 2
for i, header in enumerate(headers):
current_row = start_claim_header_row
column_letter = get_column_letter(start_claim_header_col)
cell_ref = f"{column_letter}{current_row}"
sheet[cell_ref] = header
sheet[cell_ref].style = heading
# Setup Header
setup_title = "Setup "
start_setup_header_row = 1
start_setup_header_col = 3
for header_index, header in enumerate(setup_dict):
current_row = start_setup_header_row
column_letter = get_column_letter(start_setup_header_col)
cell_ref = f"{column_letter}{current_row}"
sheet[cell_ref] = setup_title + str(header_index)
sheet[cell_ref].style = heading
for col_index, col_data in enumerate(setup_dict):
current_col = start_setup_header_col + 1
column_letter = get_column_letter(current_col)
cell_ref = f"{column_letter}{current_row}"
sheet[cell_ref] = setup_title + str(col_index + 1)
sheet[cell_ref].style = heading
# Side by Side Claim and Claim States Table
starting_col_index = 2
starting_row_index = 2
for index, claim in enumerate(claims):
current_row = starting_row_index + index
column_letter = get_column_letter(starting_col_index)
cell_ref = f"{column_letter}{current_row}"
sheet[cell_ref] = claim
sheet[cell_ref].style = heading
for i, setup in enumerate(setup_dict):
setup_claims_on = setup[3]
current_col = starting_col_index + i + 1
column_letter = get_column_letter(current_col)
cell_ref = f"{column_letter}{current_row}"
if claim in setup_claims_on:
sheet[cell_ref] = setup[2][claim]['Summary_Metrics']['Reach']
sheet[cell_ref].style = percent_value
elif setup[0][claim] == "Offered":
sheet[cell_ref] = "Already Offered"
elif setup[0][claim] == "Considered":
sheet[cell_ref] = "Considered"
elif setup[0][claim] == "Excluded":
sheet[cell_ref] = "Excluded"
else:
sheet[cell_ref] = ""
# Summary Metrics Header
start_metric_header_row = 16
start_metric_header_col = 2
for i, header in enumerate(summary_metric_headers):
current_row = start_metric_header_row
column_letter = get_column_letter(start_metric_header_col)
cell_ref = f"{column_letter}{current_row}"
sheet[cell_ref] = "Summary Metrics"
sheet[cell_ref].style = heading
# Summary Metrics Table
start_col_index = 2
start_row_index = 17
for i, header in enumerate(summary_metric_headers):
current_row = start_row_index + i
column_letter = get_column_letter(start_col_index)
cell_ref = f"{column_letter}{current_row}"
sheet[cell_ref] = header
sheet[cell_ref].style = heading
for id, setup in enumerate(setup_dict):
current_col = starting_col_index + id + 1
column_letter = get_column_letter(current_col)
cell_ref = f"{column_letter}{current_row}"
if header == "Subgroup":
sheet[cell_ref] = setup[5]
elif header == "Number of Respondents":
sheet[cell_ref] = setup[4]
elif header == "Average Liked":
sheet[cell_ref] = round(setup[1]["Average_Number_of_Items_Liked"], 2)
elif header == "Average Reach":
sheet[cell_ref] = setup[1]["Reach"]
sheet[cell_ref].style = percent_value
elif header == "Average Favorite":
sheet[cell_ref] = setup[1]["Favorite_Percentage"]
sheet[cell_ref].style = percent_value
else:
sheet[cell_ref] = ""
wb.save(filename=filename)
return filename
This is my route. I'm not sure what to do with the return from the function?:
#bp.route("/api/export_prev_sim_to_csv", methods=["GET", "POST"])
def export_simulations_to_csv():
data = request.get_json() or {}
print(data)
if not os.path.exists(current_app.instance_path):
os.mkdir(current_app.instance_path)
if not os.path.exists(os.path.join(current_app.instance_path, "files")):
os.mkdir(os.path.join(current_app.instance_path, "files"))
cs_fn = os.path.join(
current_app.instance_path, "files", "Simulation_Summary.xlsx"
)
openpyxl_file = generate_prev_sim_csv(data)
return send_file(
cs_fn,
mimetype=(
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
),
as_attachment=True,
cache_timeout=0,
)
You are saving the file in root directory in generate_prev_sim_csv function
filename = "Simulation_Summary.xlsx"
[...]
wb.save(filename=filename)
Wb.save creates a file if it doesn't exist so you don't need to create file in your route
Just change the filename to this in your openpyxl function
filename = 'instance/files/Simulation_Summary.xlsx'

KeyError: "['xyz.pdf'] not found in axis"

Hi I am making a code to drop a certain row in pandas if a dictionary is empty. That dictionary is empty when it doesn't find the words it was searching in that pdf.
Here is my code -
allcapex = pd.read_csv("C:\Shodh by Arthavruksha\CorporateAnnouncements\Test 2021 - CA.csv",index_col = 0)
allcapex.drop(['Type', 'Topic','Date','Time','DateTime','Sector'], axis = 1,inplace = True)
allcapex = allcapex[allcapex['Source'].str.contains("-") == False]
allcapex['Content'] = allcapex['Source'].apply(lambda link: urltotext(link))
allcapex = allcapex[allcapex['Content'].str.contains("none") == False]
allcapex['Sentence'] = ''
allcapex['Date'] = ''
allcapex['Type'] = ''
allcapex['Value'] = ''
print(allcapex)
i = 0
for text in allcapex['Content']:
doc = nlp(text)
for sent in doc.sents:
capexmatches = findmatch(sent,['capex', 'capacity expansion', 'Capacity expansion', 'CAPEX', 'Capacity Expansion', 'Capex'],'CAPEX','CAPEX')
typematches = findmatch(sent,['Greenfield','greenfield', 'brownfield','Brownfield', 'de-bottlenecking', 'De-bottlenecking'],'Type','Type')
valuematches = findmatch(sent,['Crore', 'Cr','crore', 'cr'],'Value','Value')
datematches = findmatch(sent,['2020', '2021','2022', '2023','2024', '2025', 'FY21', 'FY22', 'FY23', 'FY24', 'FY25','FY26','Q1FY22','Q2FY22','Q3FY22','Q4FY22','Q1FY23','Q2FY23','Q3FY23','Q4FY23','Q1FY24','Q2FY24','Q3FY24','Q4FY24','Q1FY25','Q2FY25','Q3FY25','Q4FY25'],'Date','Date')
if not capexmatches:
link = allcapex['Source'].iloc[i]
allcapex = allcapex.drop(link,axis=1)
else:
company = allcapex['Company Name'].iloc[i]
link = allcapex['Source'].iloc[i]
capextype = getext(typematches)
capexvalue = getext(valuematches)
capexdate = getext(datematches)
allcapex.loc[len(allcapex.index)] = [link,company,capexdate,capextype,capexvalue]
i += 1
print(allcapex)
Error is in this line - allcapex = allcapex.drop(link,axis=1) . And error is this - KeyError: "['https://archives.nseindia.com/corporate/PIONDIST_31122021225817_intimation_NimishShah_appointment_signed.pdf'] not found in axis"
My Dataframe is this -
Source,Company Name
1. https://archives.nseindia.com/corporate/PIONDIST_31122021225817_intimation_NimishShah_appointment_signed.pdf,Pioneer Distilleries Limited
2. https://archives.nseindia.com/corporate/EQUITAS_31122021225634_311221EHLPostalballotresultsSIGNED.pdf,Equitas Holdings Limited

Q: What is the workaround for nested for loop error - find() takes no keyword arguments

I'm getting to error 'find() takes no keyword arguments' on the line of code place = racers.find('td', class_='horse_number').get_text()
I presume this is due to the nested for loop - is find onto find the problem??
My goal is to get detail of the race in first loop, second loop reiterate over each runner within the race, third for loop to get the times that meet each nested if statement.
for race in results:
race_number = race.find('td', class_='raceNumber').get_text()
race_name1 = race.find('td', class_='raceTitle').get_text()
race_title1 = race.find('td', class_='raceInformation').get_text()
race_title1 = ' '.join(race_title1.split())
race_distance1 = race.find('td', class_='distance').get_text()
tableofdata = race.find('table', class_='raceFieldTable')
for racers in tableofdata:
place = racers.find('td', class_='horse_number').get_text()
horsename = racers.find('a', class_='horse_name_link')
horsename = horsename.text.replace('HorseName: ', '') if horsename else ''
prizemoney = racers.find('td', class_='prizemoney')
prizemoney = prizemoney.text.replace('Prizemoney: ', '') if prizemoney else ''
barrier = racers.find('td', class_='barrier')
barrier = barrier.text.replace('Row: ', '') if barrier else ''
#tabnumber = race.find('td', class_='horse_number')
#tabnumber = tabnumber.text.replace('HorseNumber: ', '') if tabnumber else ''
#print(tabnumber, tr2)
trainer = racers.find_all('td', class_='trainer-short')
trainer = trainer.text.replace('Trainer: ', '') if trainer else ''
driver = racers.find_all('td', class_='driver-short')
driver = driver.text.replace('Driver: ', '') if driver else ''
margin = racers.find_all('td', class_='margin')
margin = margin.text.replace('Margin: ', '') if margin else ''
startingprice = racers.find_all('td', class_='starting_price')
startingprice = startingprice.text.replace('StartingOdds: ', '')
startingprice = startingprice.replace('Â', ' ')if startingprice else ''
stewardscomments = racers.find_all('span', class_='stewardsTooltip')
stewardscomments = stewardscomments.text.replace('StewardsComments: ', '') if horsename else ''
scratchingnumber = racers.find_all('td', class_='number')
scratchingnumber = scratchingnumber.text.replace('Scratching: ', '') if scratchingnumber else ''
tableoftimes = race.find('table', class_='raceTimes')
for row in tableoftimes.select('td>strong:contains(":")'):
for t in row:
if "Track Rating:" in t:
trackrating = t.next_element.strip()
else:
trackrating = ''
if "Gross Time:" in t:
grosstime = t.next_element.strip()
else:
grosstime = ''
if "Mile Rate:" in t:
milerate = t.next_element.strip()
else:
milerate = ''
if "Lead Time:" in t:
leadtime = t.next_element.strip()
else:
leadtime = ''
if "First Quarter:" in t:
firstquarter = t.next_element.strip()
else:
firstquarter = ''
if "Second Quarter:" in t:
secondquarter = t.next_element.strip()
else:
secondquarter = ''
if "Third Quarter:" in t:
thirdquarter = t.next_element.strip()
else:
thirdquarter = ''
if "Fourth Quarter:" in t:
fourthquarter = t.next_element.strip()
else:
fourthquarter = ''
Last query is this replace doesnt work - still prints $2.40Â onto csv file
file = open('harnessresults.csv', 'w', newline='', encoding='utf8')
writer = csv.writer(file)
....
startingprice = startingprice.replace('Â', ' ')if startingprice else ''
....
writer.writerow([tr2, race_number, race_name1, race_title1, race_distance1, place, horsename, prizemoney, barrier, trainer, driver, margin, startingprice, stewardscomments, scratchingnumber, trackrating, grosstime, milerate, leadtime, firstquarter, secondquarter, thirdquarter, fourthquarter])
UPDATED
Start of HTML with scraping looks like below
from datetime import datetime, date, timedelta
import requests
import re
import csv
import os
import numpy
import pandas as pd
from bs4 import BeautifulSoup as bs
from simplified_scrapy import SimplifiedDoc,req,utils
file = open('harnessresults.csv', 'w', newline='', encoding='utf8')
writer = csv.writer(file)
base_url = "http://www.harness.org.au/racing/results/?firstDate="
base1_url = "http://www.harness.org.au"
webpage_response = requests.get('http://www.harness.org.au/racing/results/?firstDate=')
soup = bs(webpage_response.content, "html.parser")
format = "%d-%m-%y"
delta = timedelta(days=1)
yesterday = datetime.today() - timedelta(days=1)
enddate = datetime(2020, 4, 20)
#prints header in csv
writer.writerow(['Venue', 'RaceNumber', 'RaceName', 'RaceTitle', 'RaceDistance', 'Place', 'HorseName', 'Prizemoney', 'Row', 'Trainer', 'Driver', 'Margin', 'StartingOdds', 'StewardsComments', 'Scratching', 'TrackRating', 'Gross_Time', 'Mile_Rate', 'Lead_Time', 'First_Quarter', 'Second_Quarter', 'Third_Quarter', 'Fourth_Quarter'])
while enddate <= yesterday:
enddate += timedelta(days=1)
enddate1 = enddate.strftime("%d-%m-%y")
new_url = base_url + str(enddate1)
soup12 = requests.get(new_url)
soup1 = bs(soup12.content, "html.parser")
table1 = soup1.find('table', class_='meetingListFull')
tr = table1.find_all('tr', {'class':['odd', 'even']})
for tr1 in tr:
tr2 = tr1.find('a').get_text()
tr3 = tr1.find('a')['href']
newurl = base1_url + tr3
with requests.Session() as s:
webpage_response = s.get(newurl)
soup = bs(webpage_response.content, "html.parser")
#soup1 = soup.select('.content')
results = soup.find_all('div', {'class':'forPrint'})
#resultsv2 = soup.find_all('table', {'class':'raceFieldTable'})
Expect the CSV to look like

Categories

Resources