I have populated some mail merge fields in a .docx file and now I want my script to convert the saved .docx file to a .dotx file. I am using Python 3.6.
from __future__ import print_function
from mailmerge import MailMerge
from datetime import date
from docx import Document
from docx.opc.constants import CONTENT_TYPE as CT
import csv
import sys
import os
import numpy as np
import pandas as pd
# . . .
for i in range(0, numTemplates):
theTemplateName = templateNameCol[i]
theTemplateFileLocation = templateFileLocationCol[i]
template = theTemplateFileLocation
document = MailMerge(template)
print(document.get_merge_fields())
theOffice = officeCol[i]
theAddress = addressCol[i]
theSuite = suiteCol[i]
theCity = cityCol[i]
theState = stateCol[i]
theZip = zipCol[i]
thePhoneNum = phoneNumCol[i]
theFaxNum = faxNumCol[i]
document.merge(
Address = theAddress
)
document.write(r'\Users\me\mailmergeproject\test-output' + str(i) + r'.docx')
#do conversion here
Here at the bottom is where I want to do the conversion. As you can see, I've written a file and it's just sitting in a folder right now
Here is the code snippet for converting the .docx file to dotx.
You have to change the content-type while saving the document
pip install python-docx
import docx
document = docx.Document('foo.dotx')
document_part = document.part
document_part._content_type = 'application/vnd.openxmlformats-
officedocument.wordprocessingml.template.main+xml'
document.save('bar.docx')
Related
Currently I am using following code to read xml files and extract data.
import pandas as pd
import numpy as np
import xml.etree.cElementTree as et
import datetime
tree=et.parse(r'/data/dump_xml/myfile1.xml')
root=tree.getroot()
NAME = []
for name in root.iter('name'):
NAME.append(name.text)
UPDATE = []
for update in root.iter('lastupdate'):
UPDATE.append(update.text)
updated = datetime.datetime.fromtimestamp(int(UPDATE[0]))
lastupdate=updated.strftime('%Y-%m-%d %H:%M:%S')
ParaValue = []
for parameterevalue in root.iter('value'):
ParaValue.append(parameterevalue.text)
print(ParaValue[0])
print(ParaValue[1])
print(lastupdate,NAME[0],ParaValue[0])
print(lastupdate,NAME[1],ParaValue[1])
From one file I could get following output as a result
2022-05-23 11:25:01 traffic_in 1.5012356187e+05
2022-05-23 11:25:01 traffic_out 1.7723777592e+05
But I have set of xml files in /data/dump_xml/ and I need to get all the results as below with the file name as well. I need to export all those as a dataframe.Can someone help me to do this for whole directory?
I am a beginner to python and I am stuck in a critical part of my script. The goal is to get fillable pdf data into one single csv file. I have built a script to do so through pdfminer but I am now stuck in the part where I should get my script to write all of the output data into my csv. Instead, I am only getting one data line written in my csv. Any clue please?
Here is my code:
import sys, os
import six
import csv
import numpy as np
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
fp = open("C:\\Users\\Sufyan\\Desktop\\test\\ccc.pdf", "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
print(values)
with open('test.csv','wb') as f:
for i in names:
f.write(values)
Clues:
There is no i inside this loop:
with open('test.csv','wb') as f:
for i in names:
f.write(values)
For the same for loop, which values does names contain? Are these a lot? 1? 2? Why?
I want to put runs in one paragraph to another using , so I write code like this:
But it does not work.
How should I change this code?
from docx import Document from docxcompose.composer import Composer
import re
import time
import os
master = Document("out.docx")
po = master.add_paragraph('It is: ')
doc = Document("in4.docx")
for p in doc.paragraphs:
if re.search('yyy', p.text):
for r in p.runs:
master.paragraphs[-1].runs.append(r)
master.save('out1.doc1')
Thanks!
For each matching run, you can create a new run with text and style from the source into the destination Document.
from docx import Document
import re
import time
import os
master = Document()
po = master.add_paragraph('It is: ')
doc = Document("demo1.docx")
for p in doc.paragraphs:
if re.search('yyy', p.text):
for r in p.runs:
po.add_run(text=r.text, style=r.style)
master.save('out1.docx')
If you're not searching for a regular expression then instead do a text search:
if 'yyy' in p.text: ...
My current python script:
import ftplib
import hashlib
import httplib
import pytz
from datetime import datetime
import urllib
from pytz import timezone
import os.path, time
import glob
def ftphttp(cam_name):
for image in glob.glob(os.path.join('/tmp/image/*.png')):
ts = os.path.getmtime(image)
dt = datetime.fromtimestamp(ts, pytz.utc)
timeZone= timezone('Asia/Singapore')
localtime = dt.astimezone(timeZone).isoformat()
camid = cam_name(cam_name)
tscam = camid + localtime
ftp = ftplib.FTP('10.217.137.121','kevin403','S$ip1234')
ftp.cwd('/var/www/html/image')
m=hashlib.md5()
m.update(tscam)
dd=m.hexdigest()
x = httplib.HTTPConnection('10.217.137.121', 8086)
x.connect()
f = {'ts' : localtime}
x.request('GET','/camera/store?fn='+dd+'&'+urllib.urlencode(f)+'&cam='+cam_name(cam_name))
y = x.getresponse()
z=y.read()
x.close()
with open(image, 'rb') as file:
ftp.storbinary('STOR '+dd+ '.png', file)
ftp.quit()
Right now I'm able to send multiple files into another folder but the data that is store in the database is duplicated. Like example, when i store 3 files into the folder and then my database stored 6 data via httplib. Anybody got any ideas why the data is duplicated? HELP needed!
How can I import data for example for the field A1?
When I use etree.parse() I get an error, because I dont have a xml file.
It's a zip file:
import zipfile
from lxml import etree
z = zipfile.ZipFile('mydocument.ods')
data = z.read('content.xml')
data = etree.XML(data)
etree.dump(data)