How do I apply both bold and center in python-docx? - python

I'm using python-docx to put a text into MS Word. I can make it bold or center,but how to do both.
Here's the bold:
p=document.add_paragraph().add_run('test word')
p.font.size = Pt(16)
p.bold = True
Here's the center:
p=document.add_paragraph('test word')
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
how to do both bold and center?

Separate between paragraph and run and define each:
p=document.add_paragraph()
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
r=p.add_run('test word')
r.font.size = Pt(16)
r.bold = True

You can set p as the reference of this paragraph, and then use add_run() to add your text. Just like this:
p = document.add_paragraph()
p.add_run('test word').bold = True
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
Sorry, I can't upload my picture, but I have tried it.

I have in a project created a helper-class that I call Text. We also add a function that adds the text to the document.
Now we can do this:
add_Text([Text("NORMAL TEXT "),Text("BOLD TEXT",bold=True)]
The function will make sure the runs are in the same paragraph. As the center paragraph is a property of the paragraph I added it outside:
add_Text([Text("NORMAL TEXT "),Text("BOLD TEXT",bold=True), align='center']
Here is a full example of what I mean:
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
class Text:
def __init__(self, text, bold=False, italic=False):
self.text = text
self.bold = bold
self.italic = italic
def add_text(textitems, align=False):
p = document.add_paragraph('')
if align == 'center':
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
for t in textitems:
r = p.add_run(t.text)
if t.bold:
r.bold = True
if t.italic:
r.italic = True
document = Document()
document.add_heading('Document Title', 0)
add_text([
Text('Text ', bold=True, italic=True),
Text('Text2', bold=False, italic=True)
], align='center')
add_text([
Text('Text ', bold=True, italic=True),
Text('Text2', bold=False, italic=True)
])
document.save('demo.docx')

Related

Reportlab balanced cols control split of flowables

I am using BalancedColumns to generate multiple column layout.
I am not sure, how to resolve an issue of the split happening of the Flowables across the column frames.
I have a heading and it's content. I don't want BalancedColumns to split the flowables in such a way that heading is part of one column and its content is part of another.
The content of the paragraph can split.
The basic python code:
from reportlab.platypus.flowables import BalancedColumns, CondPageBreak
from reportlab.platypus import BaseDocTemplate, Paragraph, Spacer, Frame, PageTemplate, PageBreak
from reportlab.lib.units import mm
from reportlab.lib.pagesizes import A4, LETTER
import os
from reportlab.lib.colors import HexColor
framewidth = A4[0] - 10*mm
frameheight = A4[1] - 20*mm
portrait_frame = Frame(5*mm, 10*mm, framewidth, frameheight,
leftPadding=0,
bottomPadding=0,
rightPadding=0,
topPadding=0,
id=0,
showBoundary=False )
pTemplate = PageTemplate(id=0,frames=[portrait_frame])
templates = [pTemplate]
pdfPath = os.path.abspath(os.path.join(os.path.dirname(__file__), 'balancedColTest.pdf'))
doc = BaseDocTemplate(pdfPath, pagesize=A4, rightMargin=2*mm, leftMargin=2*mm,topMargin=5*mm,bottomMargin=5*mm,showBoundary=0)
doc.addPageTemplates(templates)
# generate stories for balanced columns
story = []
minPadding = 2
for i in range(3):
fs = []
numOfFlowables = random.choice([2, 5, 6, 1, 3])
padding = minPadding + 5*i
for ii in range(numOfFlowables):
text = '<b> <font color="#77D179"> Heading </font></b> <br/>'
heading = Paragraph(text)
heading.keepWithNext = True
fs.append(heading)
fs.append(Paragraph("This is another text in new Para flowable."))
# numCols = 2 if i%2 == 0 else 3
numCols = 2
bCols = BalancedColumns(fs, nCols=numCols, spaceAfter=5*mm, vLinesStrokeColor=HexColor('#77D179'), vLinesStrokeWidth=0.5)
story.append(bCols)
doc.build(story)
Even if I try to use keepWithNext=True the flowables are not part of same frame.
If I use KeepTogether, the BalanceColumns takes the entire space of the page.
heading = Paragraph(text)
content = Paragraph("This is another text in the paragraph")
f= KeepTogether([heading, content])
fs.append(f)
Can anyone suggest a solution so that the heading and its underlying paragraph can remain in the same frame?

How to use MagickImage/Wand to create an image comprised of equally spaced bordered boxes of text in Python

I need to make an image that looks like the following:
To do so, I've implemented the use of MagickImage/Wand. Here is my current implementation
import re
from unicodedata import normalize
from docx import Document
from wand.image import Image
from wand.drawing import Drawing
from wand.font import Font
doc = Document("P.docx")
docText = []
for para in doc.paragraphs:
docText.append(para.text)
fullText = "\n".join(docText)
ct = 242
def get(source, begin, end):
try:
start = source.index(len(begin)) + len(begin)
finish = source.index(len(end), len(start))
return source[start:finish]
except ValueError:
return ""
def capitalize(string):
cap = ("".join(j[0].upper() + j[1:]) for j in string)
return cap
def find_matches(text):
return capitalize(
[
m
for m in re.findall(
r"^[^0-9]\s+([^.;]+\s*)+[.;]+", normalize("NFKD", text), re.MULTILINE
)
]
)
with Image(width=300, height=300, psuedo='xc:black') as canvas:
left, top, width, height = 50, 10, 100, 150
for match in find_matches(text=fullText):
ct += 1
match_words = match.split(" ")
match = " ".join(match_words[:-1])
with Drawing() as context:
context.fill_color = 'white'
context.rectangle(left=left, top=top, width=width, height=height)
canvas.font = Font('/System/Library/Fonts/arial.ttf')
context(canvas)
canvas.caption(match + '\r' + 'ct', left=left, top=top, width=width, height=height, gravity='center')
canvas.save(filename='patdrawTest.png')
I'm not quite certain on how to create borders or how to properly space things with this tool, and as such, this is my current output:
I understand I need to have a base image that is iterated over. I also understand that I will need flags in order to keep track of the height/width/etc. of the previous blocks of text (unless there is an easier way of doing so with this tool). However, the way my code currently works is that it takes in words from a word document, parses it to get specific matches, and then is supposed to put it into an image like the first image I showed above. Yet, I am at a loss. Any help would be greatly appreciated.
Here's the code I've come up with in order to make equally-spaced boxes of text.
import re
from unicodedata import normalize
from docx import Document
from wand.image import Image
from wand.drawing import Drawing
from wand.font import Font
doc = Document("P.docx")
docText = []
for para in doc.paragraphs:
docText.append(para.text)
fullText = "\n".join(docText)
ct = 242
def get(source, begin, end):
try:
start = source.index(len(begin)) + len(begin)
finish = source.index(len(end), len(start))
return source[start:finish]
except ValueError:
return ""
def capitalize(string):
cap = ("".join(j[0].upper() + j[1:]) for j in string)
return cap
def find_matches(text):
return capitalize(
[
m
for m in re.findall(
r"^[^0-9]\s+([^.;]+\s*)+[.;]+", normalize("NFKD", text), re.MULTILINE
)
]
)
with Image(width=400, height=1000, pseudo='xc:white') as canvas:
left, top, width, height = 2, 2, 395, 131
for match in find_matches(text=fullText):
ct += 1
match_words = match.split(" ")
match = " ".join(match_words[:-1])
with Drawing() as context:
context.fill_color = 'black'
context.rectangle(left=left, top=top, width=width, height=height)
context.fill_color = 'white'
context.rectangle(left=(left+2), top=(top+2), width=(width-4), height=(height-4))
canvas.font = Font('/System/Library/Fonts/timesnewroman.ttf')
context(canvas)
canvas.caption(match + '\n' + str(ct), left=(left+5), top=top, width=(width-10), height=height,
gravity='center')
top += 135
canvas.crop(bottom=top)
canvas.save(filename='patdrawTest.png')
Here is the output with this code:
I do, however, still have something I'd like to address. While the boxes of text are all equally-spaced and look rather nice, I'd still prefer that all of the text looks the same; that is the same font-size, and the only way to do that is to have the borders and such be automatically re-sized such that it can work that way. I have no clue on how to do this, but for now here is this, should anyone else run into something like this.

Printing arabic - Lines are in reverse order when a paragraph is rendered in a block with multiple lines after using bid

Okay first, My data's source is google sheets, a simple table with rows (ID, Confession and Age)
I use google sheets api to connect the data to my python script using gspread and google oauth2client in order to fetch the Confession column ( which is mostly paragraphs, around 100 to 200 arabic words)
Once the data / confession is stored into the confessions Variable in my code, I run it by the arabic reshape functions and get display(bidi algorithm). then the output is processed by ''Pillow-Img'' in order to add the the paragraphs into an image. the output is fine, arabic is showing perfectly but the lines are starting from the bottom instead of the top right side.
the text must be written from right to left
example:
however, the actual paragraph is
كيما يكون التقريران متسقين تماما، فإن هناك تغييرا طفيفا في الفقرة 2 من
تقرير الفريق العامل الثاني، التي يرد فيها ذكر الرئيس والموظفين
المعنيين الآخرين.
as you can notice "كيما يكون التقريران" is at the end of the end of the sentence in the image, yet it's at the beginning (right top side) in the original paragraph (quote) so basically lines are reversed and I cant find a workaround without splitting the paragraphs into small lines and then print them line by line which is literally impossible due to the avg paragraph is going to be around 200 words.
get_arabic_text() and arabicfix() are both functions are built for reshaping arabic, except get_arabic_text splits the input and reshapes word by word. but arabicfix() just pushes the all data into the arabic reshaper. in the code im only triggering arabicfix()
I tend to believe that the issue is within the bidi algorithm but I can't figure out a solution.
# -*- coding: utf-8 -*-
from PIL import Image, ImageDraw, ImageFont
from bidi.algorithm import get_display
import textwrap, os, re, arabic_reshaper
from rtl import reshaper
def f(p): return os.path.join(directory, p)
directory = os.path.normcase(os.path.dirname(__file__))
def get_arabic_text(text):
if reshaper.has_arabic_letters(text):
words = text.split()
reshaped_words = []
for word in words:
if reshaper.has_arabic_letters(word):
# for reshaping and concating words
reshaped_text = reshaper.reshape(word)
# for right to left
bidi_text = get_display(reshaped_text)
reshaped_words.append(bidi_text)
else:
reshaped_words.append(word)
reshaped_words.reverse()
return ' '.join(reshaped_words)
return text
def draw_multiple_line_text(image, text, font, text_color, text_start_height):
draw = ImageDraw.Draw(image)
print(text)
image_width, image_height = image.size
y_text = text_start_height
lines = textwrap.wrap(text, width=65)
print(lines)
for line in lines:
line_width, line_height = font.getsize(line)
draw.text(((image_width - line_width)/2, y_text),line, font=font, fill=text_color)
y_text += line_height
def arabicfix(text):
config_from_font = arabic_reshaper.config_for_true_type_font(
f('arial.ttf'))
reshaper = arabic_reshaper.ArabicReshaper(config_from_font)
text_to_be_reshaped = text
reshaped_text=reshaper.reshape(text_to_be_reshaped)
bidi_text = get_display(reshaped_text,base_dir='R')
return bidi_text
def main():
image = Image.open('twitter2.jpg')
arabictext=u"كيما يكون التقريران متسقين تماما، فإن هناك تغييرا طفيفا في الفقرة 2 من تقرير الفريق العامل الثاني، التي يرد فيها ذكر الرئيس والموظفين المعنيين الآخرين."
text=arabicfix(arabictext)
print("text:", text)
fontsize = 25
textcolor= (0,0,0)
text_height =50
font = ImageFont.truetype('/Users/Hady/PycharmProjects/untitled/arial.ttf', fontsize)
draw_multiple_line_text(image, text, font,textcolor,text_height)
image.save(f('tofff.jpg'))
main()
I solved a similar issue by not fixing the whole arabic text at once, instead I fixed each line before printing like this:
def draw_multiple_line_text(image, text, font, text_color, text_start_height):
draw = ImageDraw.Draw(image)
print(text)
image_width, image_height = image.size
y_text = text_start_height
lines = textwrap.wrap(text, width=65)
print(lines)
for line in lines:
line_width, line_height = font.getsize(line)
draw.text(((image_width - line_width)/2, y_text),text=arabicfix(line), font=font, fill=text_color)
y_text += line_height
def main():
image = Image.open('twitter2.jpg')
arabictext=u"كيما يكون التقريران متسقين تماما، فإن هناك تغييرا طفيفا في الفقرة 2 من تقرير الفريق العامل الثاني، التي يرد فيها ذكر الرئيس والموظفين المعنيين الآخرين."
print("text:", text)
fontsize = 25
textcolor= (0,0,0)
text_height =50
font = ImageFont.truetype('/Users/Hady/PycharmProjects/untitled/arial.ttf', fontsize)
draw_multiple_line_text(image, text, font,textcolor,text_height)
image.save(f('tofff.jpg'))

Changing Paragraph formatting in python-docx

I am trying to change the formatting for multiple paragraphs using Python's python-docx module.
from docx import Document
from docx.shared import Pt
from docx.shared import Inches
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.enum.section import WD_ORIENTATION
from content import report_content, provinces, report_date, introduction, intro_content
alignment_dict = {'justify': WD_PARAGRAPH_ALIGNMENT.JUSTIFY,
'center': WD_PARAGRAPH_ALIGNMENT.CENTER,
'centre': WD_PARAGRAPH_ALIGNMENT.CENTER,
'right': WD_PARAGRAPH_ALIGNMENT.RIGHT,
'left': WD_PARAGRAPH_ALIGNMENT.LEFT}
orientation_dict = {'portrait': WD_ORIENTATION.PORTRAIT,
'landscape': WD_ORIENTATION.LANDSCAPE}
document = Document()
def change_orientation(orientation='portrait', set_left_margin=1.0, set_right_margin=1.0):
section = document.sections[-1]
new_width, new_height = section.page_height, section.page_width
section.orientation = orientation_dict[orientation]
section.page_width = new_width
section.page_height = new_height
section.left_margin = Inches(set_left_margin)
section.right_margin = Inches(set_right_margin)
def add_logo(path, align):
document.add_picture(path, width=Inches(4.5), height=Inches(1.5))
last_paragraph = document.paragraphs[-1]
last_paragraph.alignment = alignment_dict[align]
def add_content(content, space_after, font_name='Arial', font_size=11, line_spacing=0, space_before=0,
align='justify', keep_together=True, keep_with_next=False, page_break_before=False,
widow_control=False, set_bold=False, set_italic=False, set_underline=False, set_all_caps=False):
paragraph = document.add_paragraph(content)
style = document.styles['Normal']
font = style.font
font.name = font_name
font.size = Pt(font_size)
font.bold = set_bold
font.italic = set_italic
font.all_caps = set_all_caps
font.underline = set_underline
paragraph_format = paragraph.paragraph_format
paragraph_format.alignment = alignment_dict.get(align.lower())
paragraph_format.space_before = Pt(space_before)
paragraph_format.space_after = Pt(space_after)
paragraph_format.line_spacing = line_spacing
paragraph_format.keep_together = keep_together
paragraph_format.keep_with_next = keep_with_next
paragraph_format.page_break_before = page_break_before
paragraph_format.widow_control = widow_control
def create_numbered_list():
pass
def add_subheading(subheading, level):
document.add_heading(subheading, level)
change_orientation(orientation='landscape', set_left_margin=0.5, set_right_margin=0.5)
add_logo('logo.png', 'center')
add_content(report_content, align='Center', space_before=40, space_after=20, line_spacing=1, font_name='Arial',
set_bold=True, set_all_caps=True)
add_content(provinces, align='Center', space_before=20, space_after=20, line_spacing=1, font_name='Arial',
set_bold=True, set_all_caps=True)
add_content(report_date, align='Center', space_before=20, space_after=20, line_spacing=1, font_name='Arial',
set_bold=True, set_all_caps=True)
document.add_page_break()
add_subheading(introduction, level=1)
add_content(intro_content, space_after=20, space_before=20)
document.save('demo.docx')
The problem is every time I add formatting to a new paragraph block via the add_content method the formatting for the older blocks gets changed as the formatting for the current block.
Why am I not being able to retain the formatting, why does it get reset to the formatting of the latest block?
Try this code. use add_style to add new style. document.styles['Normal'] is a system style
I test it ok
from docx.enum.style import WD_STYLE_TYPE
#.........................
def add_content(content, space_after, font_name='Arial', font_size=16, line_spacing=0, space_before=0,
align='justify', keep_together=True, keep_with_next=False, page_break_before=False,
widow_control=False, set_bold=False, set_italic=False, set_underline=False, set_all_caps=False,style_name=""):
paragraph = document.add_paragraph(content)
paragraph.style = document.styles.add_style(style_name, WD_STYLE_TYPE.PARAGRAPH)
font = paragraph.style.font
font.name = font_name
font.size = Pt(font_size)
font.bold = set_bold
font.italic = set_italic
font.all_caps = set_all_caps
font.underline = set_underline
paragraph_format = paragraph.paragraph_format
paragraph_format.alignment = alignment_dict.get(align.lower())
paragraph_format.space_before = Pt(space_before)
paragraph_format.space_after = Pt(space_after)
paragraph_format.line_spacing = line_spacing
paragraph_format.keep_together = keep_together
paragraph_format.keep_with_next = keep_with_next
paragraph_format.page_break_before = page_break_before
paragraph_format.widow_control = widow_control
add_content("1234", align='Center', space_before=40, space_after=20, line_spacing=1, font_name='Arial', font_size=16,
set_bold=True, set_all_caps=True,style_name ="Normal1")
add_content("12345", align='Center', space_before=20, space_after=20, line_spacing=1, font_name='Arial',font_size=14,
set_bold=True, set_all_caps=True,style_name ="Normal2")
Your problem is here:
style = document.styles['Normal']
font = style.font
A style applies to all paragraphs that have that style. It's a document global thing. So any change you make the style.font affect all paragraphs with that style (which is all your paragraphs in this case).
Make sure you read this page in the documentation and the one that follows it:
http://python-docx.readthedocs.io/en/latest/user/styles-understanding.html
Character formatting (e.g. bold, size, typeface) happens at the run level (below paragraph, a paragraph is composed of runs). So if you want to apply character formatting directly, as opposed to by using a style, you need to do it to each run.
Adding content to a paragraph by calling document.add_paragraph(content) places all that content in a single run. So a quick fix might be:
font = paragraph.runs[0].font
Might be worth a try. But probably also a good idea to spend a little time with the concepts parts of the documentation to understand the Word object model a bit better. It's pretty complex for what seems on the surface to be a simple thing. This page is a good place to start:
http://python-docx.readthedocs.io/en/latest/user/text.html

Separating OCR text into lines with Python

What I'm trying to do is create a list of lines from a paragraph. The width of the lines cannot exceed a established amount of width.
Here's a class that is supposed to solve this, here's the code:
from font import Font
class Text:
def __init__(self, text, limit, size):
self.text = text
self.limit = limit
self.size = size
self.setText()
def setText(self):
textList = self.text.split(' ')
self.newList = tempo = []
spaceWidth = Font(self.size, ' ').width
count = 0
for x in textList:
word = Font(self.size, x)
count = count + word.width + spaceWidth
if count >= self.limit:
self.newList.append(' '.join(tempo))
tempo = []; tempo = [x]
count = word.width
else:
tempo.append(x)
self.newList.append(' '.join(tempo))
as you can see I'm using another class called Font, here it is:
from PIL import Image,ImageFont
class Font:
def __init__(self, fontSize, text):
self.font = ImageFont.truetype('tomnr.ttf', fontSize)
self.width, self.height = self.font.getsize(text)
There are no execution errors in the code but the result is not correct: for example,
from text import Text
text = Text("Art inspired apparel for Creative Individuals. Do you SurVibe?", 452, 25)
print text.newList
What this code is supposed to do is to create lines that are max. width 452 pixels. It should print
['Art inspired apparel for Creative', 'Individuals. Do you SurVibe?']
but instead it prints:
['Art', 'inspired', 'apparel', 'for', 'Creative', 'Art inspired apparel for Creative', 'Individuals. Do you SurVibe?']
And I can't find out what's going on. I think my loop is fine and everything run smoothly! I'm pretty sure it's a silly mistake but couldn't figure it out on my own. Thanks in advance.
Error is here:
self.newList = tempo = []
Both variables point to the same list.

Categories

Resources