Python Docx Minimum Table Height - python

I'm trying to fit 10 rows (and three columns) of a table on one page, howver I'm running into a limitation where I can't get any more than 8 rows to fit. I've tried the following code:
table = document.add_table(rows=0, cols=3)
for row in table.rows:
row.height = Cm(1)
However, at some point when reducing the size,there is no difference in the output. Is it possible to fit 10 rows on one page?
An adapted version of my code, which is iterating through a dataframe and writing columns of my dataframe to cells of a table.
document = Document()
sections = document.sections
for section in sections:
section.top_margin = Inches(0.00)
section.bottom_margin = Inches(0.00)
section.left_margin = Inches(0.00)
section.right_margin = Inches(0.00)
style = document.styles['Normal']
font = style.font
font.size = Pt(8)
table = document.add_table(rows=0, cols=3)
index = 0
full_count = 1
for item_one, item_two,description,max_portion,quantity_adjusted, mods in zip(line_items['title'].tolist(), line_items['quantity'],line_items['description'], line_items['max_portion'],line_items['quantity_adjusted'], line_items['modifications']):
count = 0
if index % 3 == 0:
cell_row = table.add_row()
cell_row.height = Cm(0.1)
row_cells = cell_row.cells
part_one_cell = row_cells[index % 3]
part_one_cell.height = Cm(0.1)
#para = doc.add_paragraph().add_run('GeeksforGeeks is a Computer Science portal for geeks.')
#para.font.size = Pt(12)
p = part_one_cell.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
#p1 = part_one_cell.paragraphs[0].add_run(item_one.upper()+ ' ' + description.upper())
#p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
if len(item_one + description) < 40:
p.add_run(item_one.upper()+ ' ' + description.upper()).font.size = Pt(12)
elif len(item_one + description) < 60:
p.add_run(item_one.upper()+ ' ' + description.upper()).font.size = Pt(10)
else:
p.add_run(item_one.upper()+ ' ' + description.upper()).font.size = Pt(8)
row1 = row_cells[index % 3]
row2= row1.add_paragraph(mods)
row2.alignment = WD_ALIGN_PARAGRAPH.CENTER
row = row_cells[index % 3]
p1 = row.add_paragraph(f'{x[str(quantity_adjusted)]}')
p1.alignment=WD_ALIGN_PARAGRAPH.RIGHT
#part_one_cell.paragraphs[0].add_run(f'{x[str(item_two)]}')
#part_one_cell.paragraphs[0].add_run(f' {str(x)}').bold= True
index = index + 1
full_count = full_count + 1
if full_count % 30 == 0:
document.add_page_break()
table = document.add_table(rows=0, cols=3)

I have no problem getting 10 1cm rows in a single page. I declare the number of rows when adding the table:
from docx import Document
from docx.shared import Cm
document = Document()
table = document.add_table(rows=10, cols=3)
table.style = 'Table Grid'
for row in table.rows:
row.height = Cm(1)
document.save('demo.docx')
To add rows in a for loop:
table = document.add_table(rows=0, cols=3)
table.style = 'Table Grid'
for i in range(10):
row = table.add_row()
row.height = Cm(1)
document.save('demo.docx')

Related

Python (docx) excel and word

how to remove internal borders of tables that are written to a Word file?
I create tables in a Word file, and write information from an excel file into them, and I want the tables in the Word to have no internal borders, I don’t know how to do it
reviewed everything that was possible, but still did not find it, they wrote a lot that table.style = 'Table Grid this removes internal borders, but this does not work
import tkinter as tk
from tkinter import filedialog
import openpyxl
import docx
from docx import Document
from docx.shared import Cm
# Create the main window
root = tk.Tk()
root.withdraw()
# Open a file dialog to select the Excel file
file_path = filedialog.askopenfilename(title="Select Excel file", filetypes=(("Excel files", "*.xlsx"), ("All files", "*.*")))
# Open the Excel file and select the worksheet
wb = openpyxl.load_workbook(file_path)
sheet = wb['Sheet1']
doc = Document()
section = doc.sections[0]
margin = docx.shared.Inches(0.1)
margin2 = docx.shared.Inches(0.5)
margin3 = docx.shared.Inches(0.3)
section.left_margin = margin3
section.right_margin = margin
section.top_margin = margin
section.bottom_margin = margin2
table_count = 0
row = None
for i in range(2, sheet.max_row + 1):
if sheet[f'B{i}'].value:
for j in range(int(sheet[f'F{i}'].value)):
# Check if we need to start a new row
if table_count % 2 == 0:
row = doc.add_table(rows=1, cols=2).rows[0]
row.height = Cm(3)
row.cells[0].width = Cm(25)
row.cells[1].width = Cm(25)
# Add a table to the current row
table = row.cells[table_count % 2].add_table(rows=7, cols=2)
table.style = 'Table Grid'
table.autofit = False
# Remove internal borders from the table
# Merge the first row
table.cell(0, 0).merge(table.cell(0, 1))
table.cell(6, 0).merge(table.cell(6, 1))
# Modify the width of the first column
for cell in table.columns[0].cells:
cell.width = Cm(2.5)
# Modify the width of the second column
for cell in table.columns[1].cells:
cell.width = Cm(5.5)
nos = sheet[f"B{i}"].value if sheet[f"B{i}"].value else "N/A"
nom_nr = sheet[f"J{i}"].value if sheet[f"J{i}"].value else "N/A"
kods = sheet[f"D{i}"].value if sheet[f"D{i}"].value else "N/A"
pas_nr = sheet[f"K{i}"].value if sheet[f"K{i}"].value else "N/A"
table.cell(0, 0).text = "Nos.: "
table.cell(0, 0).paragraphs[0].add_run(f"{nos}").bold = True
table.cell(1, 0).text = "Nom. Nr.: "
table.cell(1, 1).text = str(nom_nr)
table.cell(1, 1).paragraphs[0].runs[0].bold = True # make the variable bold
table.cell(2, 0).text = "Kods: "
table.cell(2, 1).text = str(kods)
table.cell(2, 1).paragraphs[0].runs[0].bold = True # make the variable bold
table.cell(3, 0).text = "Pas. Nr.: "
table.cell(3, 1).text = str(pas_nr)
table.cell(3, 1).paragraphs[0].runs[0].bold = True # make the variable bold
table.cell(4, 0).text = "Daudzums: "
table.cell(4, 1).text = "1000"
table.cell(4, 1).paragraphs[0].runs[0].bold = True # make the variable bold
table.cell(5, 0).text = " "
table.cell(5, 1).text = " "
table.cell(6, 0).text = "Izpildītājs: SIA “STS Group”\nCeļāres 2B, Spilve, Babītes pag.\nTel. 29211780"
# Increment the table count
table_count += 1
# Save the Word document
doc.save('data.docx')

Can I scrape table from html file in Python?

I want to scrape the table from this text file text_file and the table I want is SUMMARY CONSOLIDATED FINANCIAL AND OTHER DATA. The BeautifulSoup.content gives me the code looks like this The Origin Code. My code is attached and can someone tell me where it went wrong?
url = r'https://www.sec.gov/Archives/edgar/data/1181232/000104746903038553/a2123752z424b4.htm'
filing_url = requests.get(url)
content = filing_url.text
soup = BeautifulSoup(content, 'lxml')
tables = soup.find_all(text=re.compile('SUMMARY CONSOLIDATED FINANCIAL AND OTHER DATA'))
n_columns = 0
n_rows = 0
column_names = []
for table in tables:
for row in table.find_next('table').find_all('tr'):
# Determine the number of rows in the table
td_tags = row.find_all('td')
if len(td_tags) > 0:
n_rows += 1
if n_columns == 0:
# Set the number of columns for the table
n_columns = len(td_tags)
# Handle column names if find them
th_tags = row.find_all('th')
if len(th_tags) > 0 and len(column_names) == 0:
for th in th_tags:
column_names.append(th.get_text())
# Safeguard on Column Titles
if len(column_names) > 0 and len(column_names) != n_columns:
raise Exception("Column titles do not match the number of columns")
columns = column_names if len(column_names) > 0 else range(0, n_columns)
df = pd.DataFrame(columns=columns,
index=range(0, n_rows))
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
df.iat[row_marker, column_marker] = column.get_text()
column_marker += 1
if len(columns) > 0:
row_marker += 1
print(df)
In this particular case, you could simplify this significantly, using pandas:
import pandas as pd
url = 'https://www.sec.gov/Archives/edgar/data/1181232/000104746903038553/a2123752z424b4.htm'
tables = pd.read_html(url)
#there are more than 100 tables on that page, so you have to narrow it down
targets = []
for t in tables:
if 'Unaudited' in str(t.columns):
targets.append(t)
targets[0] #only two meet that requirement, and the first is your target
Output is your target table.

Iterating through a table importing images

Importing images using Python docx. Found some help from old post but unable to convert it to a 3 row, 2 col table.
from docx import Document
document = Document()
tables = document.tables
table = document.add_table(rows=1, cols=2)
row_cells = table.add_row().cells
**for i, image in enumerate(['image1.jpg', 'image2.jpg']):
paragraph = row_cells[i].paragraphs[0]**
run = paragraph.add_run()
run.add_picture(image)
document.save('doc.docx')
I've adapted it to...
document = Document()
tables = document.tables
table = document.add_table(rows=3, cols=2)
table.style = 'Table Grid'
row_cells = table.add_row().cells
Inc1 = ['1.jpg', '2.jpg','1.jpg', '2.jpg','1.jpg', '2.jpg']
length = len (Inc1)
for i in range(length):
for j in table.rows:
for k in table.columns:
paragraph = table.add_row().cells[i].paragraphs[0]
run = table.add_row().cells[i].paragraphs[0].paragraph.add_run()
run.add_picture('1.jpg', width = Inches(1))
document.save('test.docx')
You're adding too many rows. You should have all the rows you need after the table is created. Access a row using table.rows[i] where i is in (0, 1, 2).
So something like:
document = Document()
table = document.add_table(rows=3, cols=2)
Inc1 = ['1.jpg', '2.jpg','1.jpg', '2.jpg','1.jpg', '2.jpg']
for irow in range(3):
for icol in range(2):
paragraph = table.rows[irow].cells[icol].paragraphs[0]
run = paragraph.add_run()
run.add_picture(Inc1[(irow*2)+icol], width=Inches(1))
document.save('test.docx')

Handling exception with list out of range

I'm trying to extract all WC 2019 players batting stats, query got stuck with an error "list index out of range" at the player: http://www.espncricinfo.com/india/content/player/398438.html
How can I handle exception or PASS to get complete team player stats?
url2 = 'http://stats.espncricinfo.com/ci/engine/player/' + \
str(player_id) + \
'.htmlclass=2;template=results;type=batting;view=innings'
html = urllib.request.urlopen(url2, context=ctx).read()
temp_data = OrderedDict()
list_of_dict = []
bs = BeautifulSoup(html, 'lxml')
table_body = bs.find_all('tbody')
rows = table_body[1].find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [x.text.strip() for x in cols]
temp_data = OrderedDict()
for i in range(len(cols)):
temp_data["Runs"] = cols[0]
temp_data["Mins"] = cols[1]
temp_data["BF"] = cols[2]
temp_data["fours"] = cols[3]
temp_data["sixs"] = cols[4]
temp_data["SR"] = cols[5]
temp_data["POS"] = cols[6]
temp_data["Dismissal"] = cols[7]
temp_data["Inns"] = cols[8]
temp_data["Opposition"] = cols[10]
temp_data["Ground"] = cols[11]
temp_data["Date"] = cols[12]
temp_data["player"] = player
temp_data["playerid"] = player_id
list_of_dict.append(temp_data)
df = pd.DataFrame(list_of_dict)
df
df.to_sql("dummy", con, if_exists="append")
I'd like to extract all WC squad wise player stats.

python-docx how to merge row cells

I am creating a Word document from data using python-docx. I can create all the rows and cells with no problem but, in some cases, when the current record from the database has some content in field comment, I need to add a new line to display a long content.
I tried by appending a paragraph, but the result is that the comment is appended after the table, and I need it to be added bellow the current table row.
I think the solution is to append a table row with all cells merged, but I can't find documentation to do so.
This is the code where I generate the docx file:
class OperationDOCXView(viewsets.ViewSet):
exclude_from_schema = True
def list(self, request):
from ReportsManagerApp.controllers import Operations2Controller
self.profile_id = request.query_params['profile_id']
self.operation_date = request.query_params['operation_date']
self.operation_type = request.query_params['operation_type']
self.format = request.query_params['doc_format']
operation_report_controller = Operations2Controller(self.profile_id, self.operation_date, self.operation_type)
context = operation_report_controller.get_context()
if self.format == 'json':
return Response(context)
else:
word_doc = self.get_operation_word_file(request, context)
return Response("{}{}{}".format(request.get_host(), settings.MEDIA_URL, word_doc))
def get_operation_word_file(self, request, context):
import unicodedata
from django.core.files import File
from django.urls import reverse
from docx import Document
from docx.shared import Inches, Pt
operation_type = {
'arrival': 'Llegadas',
'departure': 'Salidas',
'hotel': 'Hotel-Hotel',
'tour': 'Tours',
}
weekdays = {
'0': 'LUNES',
'1': 'MARTES',
'2': 'MIÉRCOLES',
'3': 'JUEVES',
'4': 'VIERNES',
'5': 'SÁBADO',
'6': 'DOMINGO',
}
titles = ['Booking', 'Nombre', '#', 'Vuelo', 'Hr', 'P Up', 'Traslado', 'Circuito', 'Priv?', 'Agencia', '']
widths = [Inches(1), Inches(2), Inches(0.5), Inches(1), Inches(1), Inches(1), Inches(2), Inches(3), Inches(0.5), Inches(3), Inches(0.5)]
document = Document()
section = document.sections[-1]
section.top_margin = Inches(0.5)
section.bottom_margin = Inches(0.5)
section.left_margin = Inches(0.3)
section.right_margin = Inches(0.2)
style = document.styles['Normal']
font = style.font
font.name ='Arial'
font.size = Pt(10)
company_paragraph = document.add_heading("XXXX TTOO INC")
company_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
description_paragraph = document.add_paragraph("Operación de {} del día {}".format(operation_type[self.operation_type], self.operation_date))
description_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
operation_date = self.get_operation_date().date()
operation_week_day = operation_date.weekday()
day_paragraph = document.add_paragraph(weekdays[str(operation_week_day)])
day_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
for provider_unit, transfers in context.items():
provider_unit_paragraph = document.add_paragraph(provider_unit)
provider_unit_paragraph.style.font.size = Pt(10)
provider_unit_paragraph.style.font.bold = False
table = document.add_table(rows=1, cols=11)
hdr_cells = table.rows[0].cells
runs = []
for i in range(len(hdr_cells)):
runs.append(self.get_hdr_cells_run(hdr_cells[i], titles[i]))
for row in table.rows:
for idx, width in enumerate(widths):
row.cells[idx].width = width
adults = 0
minors = 0
for transfer in transfers:
# table = document.add_table(rows=1, cols=11)
row_cells = table.add_row().cells
row_cells[0].text = transfer['booking']
row_cells[1].text = transfer['people']
row_cells[2].text = transfer['pax']
flight = transfer.get("flight","") if transfer.get("flight","") is not None else ""
row_cells[3].text = flight
flight_time = self.get_flight_time(flight) if flight != '' else ''
row_cells[4].text = flight_time
row_cells[5].text = transfer['pickup_time'].strftime('%H:%M') if transfer['pickup_time'] is not None else ''
row_cells[6].text = transfer['place']
row_cells[7].text = transfer['roundtrip']
row_cells[8].text = transfer['is_private']
row_cells[9].text = transfer['agency']
people = transfer['pax'].split('.')
adults = adults + int(people[0])
minors = minors + int(people[1])
if transfer['comment'] is not None:
document.add_paragraph("Comentarios: {}".format(transfer['comment']))
for row in table.rows:
for idx, width in enumerate(widths):
row.cells[idx].width = width
for cell in row.cells:
paragraphs = cell.paragraphs
for paragraph in paragraphs:
for run in paragraph.runs:
font = run.font
font.size = Pt(8)
row_cells = table.add_row().cells
row_cells[10].text = "{}.{}".format(adults, minors)
current_directory = settings.MEDIA_DIR
file_name = "Operaciones {} {}.docx".format(self.operation_type, self.operation_date)
document.save("{}{}".format(current_directory, file_name))
return file_name
def get_flight_time(self, flight):
from OperationsManagerApp.models import Flight
operation_types = {
'arrival': 'ARRIVAL',
'departure': 'DEPARTURE'
}
operation_date = datetime.strptime(self.operation_date, '%Y-%m-%d')
try:
flight = Flight.objects.get(flight_type=operation_types[self.operation_type], number=flight)
except:
return ''
else:
weekday_times = {
'0': flight.time_monday,
'1': flight.time_tuesday,
'2': flight.time_wednesday,
'3': flight.time_thursday,
'4': flight.time_friday,
'5': flight.time_saturday,
'6': flight.time_sunday,
}
weekday_time = weekday_times[str(operation_date.weekday())]
return weekday_time.strftime('%H:%M') if weekday_time is not None else ''
def get_hdr_cells_run(self, hdr_cells, title):
from docx.shared import Pt
new_run = hdr_cells.paragraphs[0].add_run(title)
new_run.bold = True
new_run.font.size = Pt(8)
return new_run
def get_operation_date(self):
date_array = self.operation_date.split('-')
day = int(date_array[2])
month = int(date_array[1])
year = int(date_array[0])
operation_date = datetime(year, month, day)
return operation_date
One approach is to add a paragraph to one of the cells:
cell.add_paragraph(transfer['comment'])
This will place it in the right position with respect to the row it belongs to, rather than after the table.
If that would take too much room for a single cell that already has another data item in it and you want to add a row, you'll need to account for that when you allocate the table. But assuming you get that worked out, merging the cells is easy:
row.cells[0].merge(row.cells[-1])

Categories

Resources