How I can change images in docx file with python

How I can change images in docx file with python - python

I need change all images in docx file. How can i do it?
Now i download images with changing but i want change in docx
import zipfile
from PIL import Image, ImageDraw #Подключим необходимые библиотеки.
def change_img(path):
image = Image.open(path) # Открываем изображение.
draw = ImageDraw.Draw(image) # Создаем инструмент для рисования.
pix = image.load() # Выгружаем значения пикселей.
for i in range(image.size[0]):
for j in range(image.size[1]):
a = []
for k in range(3):
a.append(pix[i, j][k])
a = sum(a) // 3
draw.point((i, j), (a, a, a))
return image
archive = zipfile.ZipFile('document.docx')
for file in archive.filelist:
if file.filename.startswith('word/media/'):
change_img(str(archive.extract(file)))

Related

I'm having trouble with pasting a gif on an image using PIL

from PIL import Image, ImageSequence
import PIL
GIF_PATH = Image.open(r"C:\Users\me_\My\Filw\Path.gif")
IMAGE_PATH = Image.open(r"base.png")
frames = []
for frame in ImageSequence.Iterator(GIF_PATH):
output = IMAGE_PATH.copy()
frame_px = frame.load()
output_px = output.load()
transparent_foreground = frame.convert('RGBA')
transparent_foreground_px = transparent_foreground.load()
for x in range(frame.width):
for y in range(frame.height):
if frame_px[x, y] in (frame.info["background"], frame.info["transparency"]):
continue
output_px[x, y] = transparent_foreground_px[x, y]
output =output.resize([436,249], PIL.Image.NEAREST)
frames.append(output)
frames[0].save('output.gif',save_all = True, append_images = frames[1:], optimize = False, duration = 40, loop=0)
how would I paste the image to a specific location?
I'm pretty new so trying to get a grasp
I tried using imagechops offset but the image just wrapped around instead of moving

OSError: image file is truncated

When I am processing a bunch of images, on one of them I get this error
File "/home/tensorflowpython/firstmodel/yololoss.py", line 153, in data_generator
image, box = get_random_data(annotation_lines[i], input_shape, random=True)
File "/home/tensorflowpython/firstmodel/yololoss.py", line 226, in get_random_data
image = image.resize((nw,nh), Image.BICUBIC)
File "/home/tensorflowpython/kenv/lib/python3.6/site-packages/PIL/Image.py", line 1858, in resize
self.load()
File "/home/tensorflowpython/kenv/lib/python3.6/site-packages/PIL/ImageFile.py", line 247, in load
"(%d bytes not processed)" % len(b)
OSError: image file is truncated (25 bytes not processed)
I have already tried the solution suggested here but it doesn't work
my code looks like this
from PIL import Image
def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jitter=.3, hue=.1, sat=1.5, val=1.5, proc_img=True):
Image.LOAD_TRUNCATED_IMAGES = True
line = annotation_line.split()
image = Image.open(line[0])
iw, ih = image.size
h, w = input_shape
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
try:
image.load()
except IOError:
pass # You can always log it to logger
if not random:
# resize image
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
dx = (w-nw)//2
dy = (h-nh)//2
image_data=0
if proc_img:
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image)/255.
# correct boxes
box_data = np.zeros((max_boxes,5))
if len(box)>0:
np.random.shuffle(box)
if len(box)>max_boxes: box = box[:max_boxes]
box[:, [0,2]] = box[:, [0,2]]*scale + dx
box[:, [1,3]] = box[:, [1,3]]*scale + dy
box_data[:len(box)] = box
return image_data, box_data
# resize image
new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
scale = rand(.25, 2)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC) #error occurs here
The difference between my error and the previous solution is, mine says OS error and the solution is for IO error
EDIT: I have figured out the image that is causing this error, it can be downloaded from this link

I tried the solution that you linked with the truncated image and it worked. You made a slight mistake when trying to apply this solution: you have to set ImageFile.LOAD_TRUNCATED_IMAGES=True, not Image.LOAD_TRUNCATED_IMAGES.
LOAD_TRUNCATED_IMAGES does not originally exist in Image module, so when you do Image.LOAD_TRUNCATED_IMAGES=True you set a new variable which is not used by the library.
So I think you juste have to do that:
from PIL import ImageFile, Image
ImageFile.LOAD_TRUNCATED_IMAGES = True
image = Image.open("00090.jpg")
# resize now doesn't fail
image.resize((h, w), Image.BICUBIC)

Set Author, Title, and Subject for PDF using Reportlab

How can you correctly set the Author, Title and Subject attributes for a PDF File using Reportlab?
I have found the methods in the Reportlab User Guide on page 56, but I am not sure how to implement them correctly.
Below in my PDF cropping and scaling script, I have added the annotations method, but I don't know where to call them from, or if a whole new Canvas object is needed. Please excuse the lengthy code, but only after line 113 is the doc being created, above are mostly auxiliary methods, including the annotations method on line 30.
# All the necessary parameters are accessible after line 92,
# but can of course be changed manually in the Code
# imports for the crop, rename to avoid conflict with reportlab Image import
from PIL import Image as imgPIL
from PIL import ImageChops, ImageOps, ImageFilter
import os.path, sys
# import for the PDF creation
import glob
from reportlab.lib.pagesizes import A4
from reportlab.lib import utils
from reportlab.platypus import Image, SimpleDocTemplate, Spacer
from reportlab.pdfgen import canvas
# get os path for Cropping
path = (os.path.dirname(os.path.abspath("cropPDF.py")))
dirs = os.listdir(path)
def trim(im, border="white"):
bg = imgPIL.new(im.mode, im.size, border)
diff = ImageChops.difference(im, bg)
bbox = diff.getbbox()
if bbox:
return im.crop(bbox)
def annotations(canvas):
canvas.setAuthor("the ReportLab Team")
canvas.setTitle("ReportLab PDF Generation User Guide")
canvas.setSubject("How to Generate PDF files using the ReportLab modules")
def findMaxWidth():
maxWidth = 0
for item in dirs:
try:
fullpath = os.path.join(path, item)
if os.path.isfile(fullpath):
im = imgPIL.open(fullpath)
maxWidth = max(maxWidth, im.size[0])
except:
pass
return maxWidth
def padImages(docHeight):
maxWidth = findMaxWidth()
for item in dirs:
try:
fullpath = os.path.join(path, item)
if os.path.isfile(fullpath):
im = imgPIL.open(fullpath)
f, e = os.path.splitext(fullpath)
width, height = im.size # get the image dimensions, the height is needed for the blank image
if not docHeight <= height: # to prevent oversized images from bein padded, such that they remain centered
image = imgPIL.new('RGB', (maxWidth, height),
(255, 255, 255)) # create a white image with the max width
image.paste(im, (0, 0)) # paste the original image overtop the blank one, flush on the left side
image.save(f + ".png", "PNG", quality=100)
except:
pass
def crop():
for item in dirs:
try:
fullpath = os.path.join(path, item)
if os.path.isfile(fullpath):
im = imgPIL.open(fullpath)
f, e = os.path.splitext(fullpath)
imCrop = trim(im, "white")
imCrop.save(f + ".png", "PNG", quality=100)
except:
pass
def add_page_number(canvas, doc):
canvas.saveState()
canvas.setFont('Times-Roman', numberFontSize)
page_number_text = "%d" % (doc.page)
canvas.drawCentredString(
pageNumberSpacing * mm,
pageNumberSpacing * mm,
page_number_text
)
canvas.restoreState()
#############################
executeCrop = True
executePad = True
outputName = "output.pdf" #The name of the file that will be created
fileAuthor = "Roman Stadler" #these 3 attributes are visible in the file info menu
fileTitle = ""
fileSubject = ""
margin = 0.5
imageWidthDefault = 550
spacerHeight = 7
scalingIfImageTooTall = 0.95 # larger than 95 can result in an empty page after the image
includePagenumbers = True
numberFontSize = 10
pageNumberSpacing = 5
############################
doc = SimpleDocTemplate(
outputName,
topMargin=margin * mm,
leftMargin=margin * mm,
rightMargin=margin * mm,
bottomMargin=margin * mm,
pagesize=A4
)
if executeCrop:
crop()
if executePad:
padImages(doc.height)
filelist = glob.glob("*.png") # Get a list of files in the current directory
filelist.sort()
story = [] # create the list of images for the PDF
for fn in filelist:
img = utils.ImageReader(fn)
img_width, img_height = img.getSize() # necessary for the aspect ratio
aspect = img_height / float(img_width)
documentHeight = doc.height
imageWidth = imageWidthDefault
imageHeight = imageWidth * aspect
if imageHeight > documentHeight:
imageHeight = documentHeight * scalingIfImageTooTall
imageWidth = imageHeight / aspect
img = Image(
fn,
width=imageWidth,
height=imageHeight
)
story.append(img)
space = Spacer(width=0, height=spacerHeight)
story.append(space)
if includePagenumbers and not len(filelist) == 0: # if pagenumbers are desired, or not
doc.build(
story,
onFirstPage=add_page_number,
onLaterPages=add_page_number,
)
elif not len(filelist) == 0:
doc.build(story)
else: # to prevent an empty PDF that can't be opened
print("no files found")

In the meantime, I have found another way, that does not use reportlab, but instead relies on PyPDF2:
The following import is needed:
# PyPDF2 for the metadata modification
from PyPDF2 import PdfFileReader, PdfFileWriter
Then the metadata can be edited like this:
author = "Roman Stadler"
title = "CropPDF"
subject = "Stackoverflow"
#rest of the script
#attemp the metadate edit
try:
file = open('output.pdf', 'rb+')
reader = PdfFileReader(file)
writer = PdfFileWriter()
writer.appendPagesFromReader(reader)
metadata = reader.getDocumentInfo()
writer.addMetadata(metadata)
writer.addMetadata({
'/Author': author,
'/Title': title,
'/Subject' : subject,
'/Producer' : "CropPDF",
'/Creator' : "CropPDF",
})
writer.write(file)
file.close()
except:
print("Error while editing metadata")

You can define attributes like the author when defining the doc as a SimpleDocTemplate
doc = SimpleDocTemplate(
outputName,
topMargin=margin * mm,
leftMargin=margin * mm,
rightMargin=margin * mm,
bottomMargin=margin * mm,
pagesize=A4,
title="This is the title of the document", #exchange with your title
author="John Smith", #exchange with your authors name
subject"Adding metadata to pdf via reportlab" #exchange with your subject
)

Python 3.5.1 - Assign variables to all Files (Filetype: .jpg) inside target folder

I am fairly new to Python and have a difficult Problem to solve:
Here is what I am trying to do -->
I have a Folder (path known) with 1 - x picture files (.jpg) in it, whereas x can be as high as 1000
These picture files should be stiched together (one on top of the other)
To do so I want Python to assign a variable to each Picture file in the known folder and then create a loop which stiches these variable-stored pictures together and outputs it as 1 picture (.jpg)
Here is what I have coded so far:
from PIL import Image
import glob
#The following GLOB part doesn't work, I tried to make a List with all the
#files (.jpg) inside the main directory
#image_list = []
#for filename in glob.glob('*.jpg'):
# test = Image.open(filename)
# image_list.append(test)
img1 = Image.open("img1.jpg")
img2 = Image.open("img2.jpg")
def merge_images(img1, img2):
(width1, height1) = img1.size
(width2, height2) = img2.size
result_width = max(width1, width2)
result_height = height1 + height2
result = Image.new('RGB', (result_width, result_height))
result.paste(im=img2, box=(0,0))
result.paste(im=img1, box=(0,height1-2890))
return result
merged = merge_images(img1, img2)
merged.save("test.jpg")
What this does is to assign img1.jpg and img2.jpg to a variable and then stack them on top of oneanother and save this stacked picture as "test.jpg". Where do I go from here if I want to assign many pictures (.jpg) to variables and stack them on top of each other without typing a line of code for each new picture (see description further up?)
Thanks a lot for your help!
Chris

If you start with a 0x0 image, you can stack further images onto it like this:
stacked_img = Image.new('RGB', (0, 0))
for filename in glob.glob('*.jpg'):
stacked_img = merge_images(stacked_img, Image.open(filename))
stacked_img.save('stacked.jpg')
You might also like to change the height1-2890 to height2 in the merge_images() function if you are trying to stack the second image below the first image, i.e.
result.paste(im=img1, box=(0,height2))

Why not use a container such as a list?
images = [Image.open(img_name) for img_name in os.listdir(my_folder) if img_name.endswith('.jpg')
def merge_images(list_images):
result_width = max(img.size[0] for img in images)
result_height = sum(img.size[1] for img in images)
result = Image.new('RGB', (result_width, result_height))
for idx, img in enumerate(list_images):
result.paste(im=img, box=(0,list_images[0].size[1]-idx*2890))
See Data Structures for more information.
If performance is important, consider using map instead of a loop.

this is my code in which I implemented your approach. As a result, I am getting an image where 2 of the 4 images are displayed and the rest of the image is black. In the source directory I have 4 images (img1.jpg - img4.jpg)
from PIL import Image
import glob
stacked_img = Image.new('RGB', (0,0))
def merge_images(img1, img2):
(width1, height1) = img1.size
(width2, height2) = img2.size
result_width = max(width1, width2)
result_height = height1 + height2
result = Image.new('RGB', (result_width, result_height))
result.paste(im=img2, box=(0,0))
result.paste(im=img1, box=(0,height1))
return result
for filename in glob.glob('*.jpg'):
stacked_img = merge_images(stacked_img, Image.open(filename))
stacked_img.save('stacked.jpg')

Corrupted image being saved by cv.SaveImage() in opencv

import sys, Image, scipy, cv2, numpy
from scipy.misc import imread
from cv2 import cv
from SRM import SRM
def ndarrayToIplImage (source):
"""Conversion of ndarray to iplimage"""
image = cv.CreateImageHeader((source.shape[1], source.shape[0]), cv.IPL_DEPTH_8U, 3)
cv.SetData(image, source.tostring(), source.dtype.itemsize * 3 * source.shape[1])
return image
"""Main Program"""
filename = "snap.jpeg"
Q = 64
im = imread(filename)
name = filename[:-4]
img = Image.fromarray(im)
if img.size[0] > 200 or img.size[1] > 200:
ratio = img.size[0]/img.size[1]
size = int(ratio*200), 200
img = numpy.array(img.resize(size, Image.ANTIALIAS))
srm = SRM(img, Q)
srm.initialization()
srm.segmentation()
classes, map = srm.map()
"""Converting ndarray to PIL Image to iplimage"""
pil_img = Image.fromarray(map)
cv_img = cv.CreateImageHeader(pil_img.size, cv.IPL_DEPTH_8U, 3)
cv.SetData(cv_img, pil_img.tostring(), pil_img.size[0]*3)
print type(cv_img) ##prints <type 'cv2.cv.iplimage'>
"""Using ndarrayToIplImage function also gives the same error!"""
"""
cv_img if of type iplimage but still gives error while using cv.ShowImage()
or cv.SaveImage().
There is no error displayed. Just the console hangs...
"""
I am using the SRM (Statistical Region Merging) Package available at this page.
I have just changed the example program given in the package. I had to convert the type returned by the SRM package functions to iplimage. There is no error in using the package but somewhere in using opencv functions.
This is the image that is saved after the console closes after hanging.
It used cv.SaveImage().
I tried cv2.imwrite() and I got this as the result:
This is the image that should have been saved. I used scipy.misc.imsave('image.jpg', map) to save this.

Why do you use IplImage and PIL? SRM library read numpy array and you get a numpy array from cv2.imread(image), then if you need to resize yuor image you can use opencv function cv2.resize(...). Finally you can save an image with opencv with cv2.imwrite(...) your code should appear like this:
import sys, cv2, numpy
from SRM import SRM
"""Main Program"""
filename = "snap.jpeg"
Q = 64
img = cv2.imread(filename)
name = filename[:-4]
if img.shape[0] > 200 or img.shape[1] > 200:
ratio = img.shape[0] * 1. / img.shape[1]
size = (int(ratio * 200), 200)
img = cv2.resize(img, size, interpolation=cv2.INTER_LANCZOS4)
srm = SRM(img, Q)
srm.initialization()
srm.segmentation()
classes, srmMap = srm.map() # Map is a python function, use different variable name
srmMap = srmMap.astype('uint8') # or you can try other opencv supported type
# I suppose that srmMap is your image returned as numpy array
cv2.imwrite('name.jpeg', srmMap)
# or
cv2.imshow('image', srmMap)
cv2.waitKey(0)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How I can change images in docx file with python - python

Related

I'm having trouble with pasting a gif on an image using PIL

OSError: image file is truncated

Set Author, Title, and Subject for PDF using Reportlab

Python 3.5.1 - Assign variables to all Files (Filetype: .jpg) inside target folder

Corrupted image being saved by cv.SaveImage() in opencv

Categories

Resources