Changing Paragraph formatting in python-docx

Changing Paragraph formatting in python-docx - python

I am trying to change the formatting for multiple paragraphs using Python's python-docx module.
from docx import Document
from docx.shared import Pt
from docx.shared import Inches
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.enum.section import WD_ORIENTATION
from content import report_content, provinces, report_date, introduction, intro_content
alignment_dict = {'justify': WD_PARAGRAPH_ALIGNMENT.JUSTIFY,
'center': WD_PARAGRAPH_ALIGNMENT.CENTER,
'centre': WD_PARAGRAPH_ALIGNMENT.CENTER,
'right': WD_PARAGRAPH_ALIGNMENT.RIGHT,
'left': WD_PARAGRAPH_ALIGNMENT.LEFT}
orientation_dict = {'portrait': WD_ORIENTATION.PORTRAIT,
'landscape': WD_ORIENTATION.LANDSCAPE}
document = Document()
def change_orientation(orientation='portrait', set_left_margin=1.0, set_right_margin=1.0):
section = document.sections[-1]
new_width, new_height = section.page_height, section.page_width
section.orientation = orientation_dict[orientation]
section.page_width = new_width
section.page_height = new_height
section.left_margin = Inches(set_left_margin)
section.right_margin = Inches(set_right_margin)
def add_logo(path, align):
document.add_picture(path, width=Inches(4.5), height=Inches(1.5))
last_paragraph = document.paragraphs[-1]
last_paragraph.alignment = alignment_dict[align]
def add_content(content, space_after, font_name='Arial', font_size=11, line_spacing=0, space_before=0,
align='justify', keep_together=True, keep_with_next=False, page_break_before=False,
widow_control=False, set_bold=False, set_italic=False, set_underline=False, set_all_caps=False):
paragraph = document.add_paragraph(content)
style = document.styles['Normal']
font = style.font
font.name = font_name
font.size = Pt(font_size)
font.bold = set_bold
font.italic = set_italic
font.all_caps = set_all_caps
font.underline = set_underline
paragraph_format = paragraph.paragraph_format
paragraph_format.alignment = alignment_dict.get(align.lower())
paragraph_format.space_before = Pt(space_before)
paragraph_format.space_after = Pt(space_after)
paragraph_format.line_spacing = line_spacing
paragraph_format.keep_together = keep_together
paragraph_format.keep_with_next = keep_with_next
paragraph_format.page_break_before = page_break_before
paragraph_format.widow_control = widow_control
def create_numbered_list():
pass
def add_subheading(subheading, level):
document.add_heading(subheading, level)
change_orientation(orientation='landscape', set_left_margin=0.5, set_right_margin=0.5)
add_logo('logo.png', 'center')
add_content(report_content, align='Center', space_before=40, space_after=20, line_spacing=1, font_name='Arial',
set_bold=True, set_all_caps=True)
add_content(provinces, align='Center', space_before=20, space_after=20, line_spacing=1, font_name='Arial',
set_bold=True, set_all_caps=True)
add_content(report_date, align='Center', space_before=20, space_after=20, line_spacing=1, font_name='Arial',
set_bold=True, set_all_caps=True)
document.add_page_break()
add_subheading(introduction, level=1)
add_content(intro_content, space_after=20, space_before=20)
document.save('demo.docx')
The problem is every time I add formatting to a new paragraph block via the add_content method the formatting for the older blocks gets changed as the formatting for the current block.
Why am I not being able to retain the formatting, why does it get reset to the formatting of the latest block?

Try this code. use add_style to add new style. document.styles['Normal'] is a system style
I test it ok
from docx.enum.style import WD_STYLE_TYPE
#.........................
def add_content(content, space_after, font_name='Arial', font_size=16, line_spacing=0, space_before=0,
align='justify', keep_together=True, keep_with_next=False, page_break_before=False,
widow_control=False, set_bold=False, set_italic=False, set_underline=False, set_all_caps=False,style_name=""):
paragraph = document.add_paragraph(content)
paragraph.style = document.styles.add_style(style_name, WD_STYLE_TYPE.PARAGRAPH)
font = paragraph.style.font
font.name = font_name
font.size = Pt(font_size)
font.bold = set_bold
font.italic = set_italic
font.all_caps = set_all_caps
font.underline = set_underline
paragraph_format = paragraph.paragraph_format
paragraph_format.alignment = alignment_dict.get(align.lower())
paragraph_format.space_before = Pt(space_before)
paragraph_format.space_after = Pt(space_after)
paragraph_format.line_spacing = line_spacing
paragraph_format.keep_together = keep_together
paragraph_format.keep_with_next = keep_with_next
paragraph_format.page_break_before = page_break_before
paragraph_format.widow_control = widow_control
add_content("1234", align='Center', space_before=40, space_after=20, line_spacing=1, font_name='Arial', font_size=16,
set_bold=True, set_all_caps=True,style_name ="Normal1")
add_content("12345", align='Center', space_before=20, space_after=20, line_spacing=1, font_name='Arial',font_size=14,
set_bold=True, set_all_caps=True,style_name ="Normal2")

Your problem is here:
style = document.styles['Normal']
font = style.font
A style applies to all paragraphs that have that style. It's a document global thing. So any change you make the style.font affect all paragraphs with that style (which is all your paragraphs in this case).
Make sure you read this page in the documentation and the one that follows it:
http://python-docx.readthedocs.io/en/latest/user/styles-understanding.html
Character formatting (e.g. bold, size, typeface) happens at the run level (below paragraph, a paragraph is composed of runs). So if you want to apply character formatting directly, as opposed to by using a style, you need to do it to each run.
Adding content to a paragraph by calling document.add_paragraph(content) places all that content in a single run. So a quick fix might be:
font = paragraph.runs[0].font
Might be worth a try. But probably also a good idea to spend a little time with the concepts parts of the documentation to understand the Word object model a bit better. It's pretty complex for what seems on the surface to be a simple thing. This page is a good place to start:
http://python-docx.readthedocs.io/en/latest/user/text.html

Related

Reportlab balanced cols control split of flowables

I am using BalancedColumns to generate multiple column layout.
I am not sure, how to resolve an issue of the split happening of the Flowables across the column frames.
I have a heading and it's content. I don't want BalancedColumns to split the flowables in such a way that heading is part of one column and its content is part of another.
The content of the paragraph can split.
The basic python code:
from reportlab.platypus.flowables import BalancedColumns, CondPageBreak
from reportlab.platypus import BaseDocTemplate, Paragraph, Spacer, Frame, PageTemplate, PageBreak
from reportlab.lib.units import mm
from reportlab.lib.pagesizes import A4, LETTER
import os
from reportlab.lib.colors import HexColor
framewidth = A4[0] - 10*mm
frameheight = A4[1] - 20*mm
portrait_frame = Frame(5*mm, 10*mm, framewidth, frameheight,
leftPadding=0,
bottomPadding=0,
rightPadding=0,
topPadding=0,
id=0,
showBoundary=False )
pTemplate = PageTemplate(id=0,frames=[portrait_frame])
templates = [pTemplate]
pdfPath = os.path.abspath(os.path.join(os.path.dirname(__file__), 'balancedColTest.pdf'))
doc = BaseDocTemplate(pdfPath, pagesize=A4, rightMargin=2*mm, leftMargin=2*mm,topMargin=5*mm,bottomMargin=5*mm,showBoundary=0)
doc.addPageTemplates(templates)
# generate stories for balanced columns
story = []
minPadding = 2
for i in range(3):
fs = []
numOfFlowables = random.choice([2, 5, 6, 1, 3])
padding = minPadding + 5*i
for ii in range(numOfFlowables):
text = '<b> <font color="#77D179"> Heading </font></b> <br/>'
heading = Paragraph(text)
heading.keepWithNext = True
fs.append(heading)
fs.append(Paragraph("This is another text in new Para flowable."))
# numCols = 2 if i%2 == 0 else 3
numCols = 2
bCols = BalancedColumns(fs, nCols=numCols, spaceAfter=5*mm, vLinesStrokeColor=HexColor('#77D179'), vLinesStrokeWidth=0.5)
story.append(bCols)
doc.build(story)
Even if I try to use keepWithNext=True the flowables are not part of same frame.
If I use KeepTogether, the BalanceColumns takes the entire space of the page.
heading = Paragraph(text)
content = Paragraph("This is another text in the paragraph")
f= KeepTogether([heading, content])
fs.append(f)
Can anyone suggest a solution so that the heading and its underlying paragraph can remain in the same frame?

How to use MagickImage/Wand to create an image comprised of equally spaced bordered boxes of text in Python

I need to make an image that looks like the following:
To do so, I've implemented the use of MagickImage/Wand. Here is my current implementation
import re
from unicodedata import normalize
from docx import Document
from wand.image import Image
from wand.drawing import Drawing
from wand.font import Font
doc = Document("P.docx")
docText = []
for para in doc.paragraphs:
docText.append(para.text)
fullText = "\n".join(docText)
ct = 242
def get(source, begin, end):
try:
start = source.index(len(begin)) + len(begin)
finish = source.index(len(end), len(start))
return source[start:finish]
except ValueError:
return ""
def capitalize(string):
cap = ("".join(j[0].upper() + j[1:]) for j in string)
return cap
def find_matches(text):
return capitalize(
[
m
for m in re.findall(
r"^[^0-9]\s+([^.;]+\s*)+[.;]+", normalize("NFKD", text), re.MULTILINE
)
]
)
with Image(width=300, height=300, psuedo='xc:black') as canvas:
left, top, width, height = 50, 10, 100, 150
for match in find_matches(text=fullText):
ct += 1
match_words = match.split(" ")
match = " ".join(match_words[:-1])
with Drawing() as context:
context.fill_color = 'white'
context.rectangle(left=left, top=top, width=width, height=height)
canvas.font = Font('/System/Library/Fonts/arial.ttf')
context(canvas)
canvas.caption(match + '\r' + 'ct', left=left, top=top, width=width, height=height, gravity='center')
canvas.save(filename='patdrawTest.png')
I'm not quite certain on how to create borders or how to properly space things with this tool, and as such, this is my current output:
I understand I need to have a base image that is iterated over. I also understand that I will need flags in order to keep track of the height/width/etc. of the previous blocks of text (unless there is an easier way of doing so with this tool). However, the way my code currently works is that it takes in words from a word document, parses it to get specific matches, and then is supposed to put it into an image like the first image I showed above. Yet, I am at a loss. Any help would be greatly appreciated.

Here's the code I've come up with in order to make equally-spaced boxes of text.
import re
from unicodedata import normalize
from docx import Document
from wand.image import Image
from wand.drawing import Drawing
from wand.font import Font
doc = Document("P.docx")
docText = []
for para in doc.paragraphs:
docText.append(para.text)
fullText = "\n".join(docText)
ct = 242
def get(source, begin, end):
try:
start = source.index(len(begin)) + len(begin)
finish = source.index(len(end), len(start))
return source[start:finish]
except ValueError:
return ""
def capitalize(string):
cap = ("".join(j[0].upper() + j[1:]) for j in string)
return cap
def find_matches(text):
return capitalize(
[
m
for m in re.findall(
r"^[^0-9]\s+([^.;]+\s*)+[.;]+", normalize("NFKD", text), re.MULTILINE
)
]
)
with Image(width=400, height=1000, pseudo='xc:white') as canvas:
left, top, width, height = 2, 2, 395, 131
for match in find_matches(text=fullText):
ct += 1
match_words = match.split(" ")
match = " ".join(match_words[:-1])
with Drawing() as context:
context.fill_color = 'black'
context.rectangle(left=left, top=top, width=width, height=height)
context.fill_color = 'white'
context.rectangle(left=(left+2), top=(top+2), width=(width-4), height=(height-4))
canvas.font = Font('/System/Library/Fonts/timesnewroman.ttf')
context(canvas)
canvas.caption(match + '\n' + str(ct), left=(left+5), top=top, width=(width-10), height=height,
gravity='center')
top += 135
canvas.crop(bottom=top)
canvas.save(filename='patdrawTest.png')
Here is the output with this code:
I do, however, still have something I'd like to address. While the boxes of text are all equally-spaced and look rather nice, I'd still prefer that all of the text looks the same; that is the same font-size, and the only way to do that is to have the borders and such be automatically re-sized such that it can work that way. I have no clue on how to do this, but for now here is this, should anyone else run into something like this.

How do I apply both bold and center in python-docx?

I'm using python-docx to put a text into MS Word. I can make it bold or center,but how to do both.
Here's the bold:
p=document.add_paragraph().add_run('test word')
p.font.size = Pt(16)
p.bold = True
Here's the center:
p=document.add_paragraph('test word')
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
how to do both bold and center?

Separate between paragraph and run and define each:
p=document.add_paragraph()
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
r=p.add_run('test word')
r.font.size = Pt(16)
r.bold = True

You can set p as the reference of this paragraph, and then use add_run() to add your text. Just like this:
p = document.add_paragraph()
p.add_run('test word').bold = True
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
Sorry, I can't upload my picture, but I have tried it.

I have in a project created a helper-class that I call Text. We also add a function that adds the text to the document.
Now we can do this:
add_Text([Text("NORMAL TEXT "),Text("BOLD TEXT",bold=True)]
The function will make sure the runs are in the same paragraph. As the center paragraph is a property of the paragraph I added it outside:
add_Text([Text("NORMAL TEXT "),Text("BOLD TEXT",bold=True), align='center']
Here is a full example of what I mean:
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
class Text:
def __init__(self, text, bold=False, italic=False):
self.text = text
self.bold = bold
self.italic = italic
def add_text(textitems, align=False):
p = document.add_paragraph('')
if align == 'center':
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
for t in textitems:
r = p.add_run(t.text)
if t.bold:
r.bold = True
if t.italic:
r.italic = True
document = Document()
document.add_heading('Document Title', 0)
add_text([
Text('Text ', bold=True, italic=True),
Text('Text2', bold=False, italic=True)
], align='center')
add_text([
Text('Text ', bold=True, italic=True),
Text('Text2', bold=False, italic=True)
])
document.save('demo.docx')

Generating a PDF Index with ReportLab

I'm generating PDF files through ReportLab, but i can't find any documentation on how to generate an index linking to the file pages. Does ReportLab support this type of feature, or is there any other solution ?

After a lot of time searching i came up with an anchor solution. Although i feel it was not the perfect solution for me, i hope it helps someone in need.
from reportlab.lib.styles import ParagraphStyle
from reportlab.pdfbase.pdfmetrics import registerFont
from reportlab.platypus import Paragraph, PageBreak, SimpleDocTemplate, Spacer
registerFont(TTFont('Calibri', 'Calibri.ttf')) # Just some font imports
registerFont(TTFont('Calibri-Bold', 'calibrib.ttf'))
pH = ParagraphStyle(name = 'Header', fontName = 'Calibri-Bold', fontSize = 13, leftIndent = 20, firstLineIndent = -20, spaceBefore = 10, leading = 16)
sH = ParagraphStyle(name = 'SubHeader', fontName = 'Calibri', fontSize = 12, leftIndent = 40, firstLineIndent = -20, spaceBefore = 5, leading = 16)
doc = SimpleDocTemplate('Reports\\PDFname.pdf')
story = [Spacer(1, 2 * inch)]
story.append(Paragraph('<a href = page3.html#0>1. First Title</a>', pH)) # Linking the anchor to reference 0
story.append(Paragraph('<a href = page3.html#1>1.1. First Subtitle</a>', sH)) # Linking the anchor to reference 1
story.append(PageBreak())
story.append(Paragraph('<a name = page3.html#0></a> 1. First Title', pH)) # Creating anchor with reference 0
story.append(Paragraph('<a name = page3.html#1></a><br/> 1.1. First Subtitle', style)) # Creating anchor with reference 1
doc.build(story)

Python Svgwrite and font styles/ sizes

I'm trying to make a SVG file connected to a web scraper.
How do I change font and text size with svgwrite? I understand that I have to define a CSS style and somehow connect that to the text object. But how is this made?
Here's the code I have so far
import svgwrite
svg_document = svgwrite.Drawing(filename = "test-svgwrite3.svg",
size = ("1200px", "800px"))
#This is the line I'm stuck at
#svg_document.add(svg_document.style('style="font-family: Arial; font-size : 34;'))
svg_document.add(svg_document.rect(insert = (900, 800),
size = ("200px", "100px"),
stroke_width = "1",
stroke = "black",
fill = "rgb(255,255,0)"))
svg_document.add(svg_document.text("Reported Crimes in Sweden",
insert = (410, 50),
fill = "rgb(255,255,0)",
#This is the connection to the first line that I'm stuck at
#style = 'style="font-family: Arial; font-size : 104;'))
print(svg_document.tostring())
svg_document.save()

Manfred Moitzi the maker of SvgWrite mailed me an more eleborated answer;
This has to be done by CSS, use the 'class_' or 'style' keyword args to set text properties:
dwg = svgwrite.Drawing()
with 'style' keyword arg:
g = dwg.g(style="font-size:30;font-family:Comic Sans MS, Arial;font-weight:bold;font-
style:oblique;stroke:black;stroke-width:1;fill:none")
g.add(dwg.text("your text", insert=(10,30))) # settings are valid for all text added to 'g'
dwg.add(g)
with 'class_' keyword arg:
Create a CSS file with content:
.myclass {
font-size:30;
font-family:Comic Sans MS, Arial;
font-weight:bold;
font-style:oblique;
stroke:black;
stroke-width:1;
fill:none;
}
see CSS reference: http://www.w3schools.com/cssref/default.asp
dwg.add_stylesheet(filename, title="sometext") # same rules as for html files
g = dwg.g(class_="myclass")
g.add(dwg.text("your text", insert=(10,30))) # settings are valid for all text added to 'g'
dwg.add(g)
With 'class_' and 'style' keyword args you can style every graphic object and they can be used at container objects.

The answer was quite simple;
svg_document.add(svg_document.text("Reported Crimes in Sweden",
insert = (410, 50),
fill = "rgb(255,255,0)",
style = "font-size:10px; font-family:Arial"))

Set font-size like this:
dwg = svgwrite.Drawing('test.svg', profile='tiny')
dwg.add(dwg.text('Test',insert = (30, 55),font_size="10px",fill='black'))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Changing Paragraph formatting in python-docx - python

Related

Reportlab balanced cols control split of flowables

How to use MagickImage/Wand to create an image comprised of equally spaced bordered boxes of text in Python

How do I apply both bold and center in python-docx?

Generating a PDF Index with ReportLab

Python Svgwrite and font styles/ sizes

Categories

Resources