I'm utilizing python to update data within the xml below, what im trying to do in all is update text within a powerpoint dynamically using an incoming dataframe, by doing so i pull out the xml from the pptx file and can't figure out how to change the text within the xml.
Dataframe:
Old New
0 A.1 Valuation
1 A.2 12000
2 A.3 5.23
3 A.4 Test,Complete
XMLFile: Github Link
XML Snippit:
<a:tc>
<a:txBody>
<a:bodyPr/>
<a:lstStyle/>
<a:p>
<a:pPr algn="l" fontAlgn="auto"/>
<a:r>
<a:rPr lang="en-US" sz="1800" dirty="0">
<a:effectLst/>
</a:rPr>
<a:t>A.1</a:t>
</a:r>
<a:endParaRPr lang="en-US" sz="1800" b="0" i="0" dirty="0">
<a:solidFill>
<a:srgbClr val="000000"/>
</a:solidFill>
<a:effectLst/>
<a:latin typeface="Calibri" panose="020F0502020204030204" pitchFamily="34" charset="0"/>
</a:endParaRPr>
</a:p>
</a:txBody>
<a:tcPr/>
</a:tc>
Python Code:
import shutil
from pathlib import Path
import lxml.etree as ET
import pandas as pd
xml_file = r'\Desktop\PowerPoint XML\Test\ppt\slides\slide1.xml'
Dataframe = r'\Desktop\PowerPoint XML\Dataframe.xlsx'
df = pd.read_excel(Dataframe)
df['Old'] = df['Old'].astype(str)
df['New'] = df['New'].astype(str)
# open xml file that contains slide data
tree = ET.parse(xml_file)
treeRoot = tree.getroot()
rootString = ET.tostring(treeRoot)
decodedRootString = bytes.decode(rootString)
old = df.Old; new = df.New
# search for old value then replace with new
for elem in treeRoot.iter():
elem.text.replace(old[elem],new[elem])
FinalString=str.encode(decodedRootString)
#save the XML File
root = ET.fromstring(FinalString)
my_tree = ET.ElementTree(root)
with open(xml_file, 'wb') as f:
f.write(ET.tostring(my_tree))
Error Log:
File "\tempCodeRunnerFile.py", line 36, in <module>
elem.text.replace(old[elem],new[elem])
AttributeError: 'NoneType' object has no attribute 'replace'
I have the xml file shown below, that has namespaces, for which I'm trying to extract the values of Node24
My current code is below, that is not printing anything:
import xml.etree.ElementTree as ET
filename = 'ifile.xml'
tree = ET.parse(filename)
root = tree.getroot()
for neighbor in root.iter('Node24'):
print(neighbor)
My expected output would be:
03-c34ko
04-c64ko
07-c54ko
The is the ifile.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<data-main-43:DATAMAINXZ123 xmlns="https://example.com/DATA-MAIN-XZ123" xmlns:data-gen="https://example.com/DATA-GEN" xmlns:data-main-43="https://example.com/DATA-MAIN-XZ123" xmlns:xsi="http://www.w3.org/2011/XMLSchema-instance" xsi:schemaLocation="https://example.com/DATA-MAIN-XZ123 data-main-ir21-12.1.xsd">
<MAINXZ123FileHeader>
<DATAGenSchemaVersion>2.4</DATAGenSchemaVersion>
<DATAMAINXZ123SchemaVersion>12.1</DATAMAINXZ123SchemaVersion>
</MAINXZ123FileHeader>
<Node1>
<Node2>WTRT DDK</Node2>
<Node3>XYZW</Node3>
<Node4>
<Node5>
<Node6>XYZW882</Node6>
<Node5Type>Ter</Node5Type>
<Node5Data>
<Node9>
<Node10>
<Node11>2019-02-18</Node11>
<Node12>
<Node13>
<Node14>
<Node15>Ermso</Node15>
<Node16>
<PrimaryNode16>
<Node18>19.32</Node18>
<Node18>12.11</Node18>
</PrimaryNode16>
<SecondaryNode16>
<Node18>82.97</Node18>
<Node18>12.41</Node18>
</SecondaryNode16>
</Node16>
<Node20>Muuatippw</Node20>
</Node14>
</Node13>
</Node12>
<Node21>
<Node22>
<Node23>
<Node24>03-c34ko</Node24>
<Node24>04-c64ko</Node24>
<Node24>07-c54ko</Node24>
</Node23>
<Node26Node22EdgeAgent>
<Node26>jjkksonem</Node26>
<PrimaryNode18DEANode26>
<Node18>2.40</Node18>
</PrimaryNode18DEANode26>
</Node26Node22EdgeAgent>
</Node22>
</Node21>
<Node28>
<Node29>
<Node30>false</Node30>
<Node31>true</Node31>
</Node29>
</Node28>
</Node10>
</Node9>
</Node5Data>
</Node5>
</Node4>
</Node1>
</data-main-43:DATAMAINXZ123>
How can I do this? Thanks in advance.
Like the duplicate mzjn referenced, just add the namespace uri to the element name...
import xml.etree.ElementTree as ET
filename = 'ifile.xml'
tree = ET.parse(filename)
root = tree.getroot()
for neighbor in root.iter('{https://example.com/DATA-MAIN-XZ123}Node24'):
print(neighbor.text)
Note: I also added .text to neighbor so you'd get the requested result.
I'm using regular expression so this is an alternative answer.
I converted the xml into string then search for all strings between Node24
import xml.etree.ElementTree as ET
import re
filename = 'ifile.xml'
tree = ET.parse(filename)
root = tree.getroot()
xml_str = ET.tostring(root)
for s in re.findall(r'ns0:Node24>(.*?)</ns0:Node24', str(xml_str)):
print(s)
Result:
03-c34ko
04-c64ko
07-c54ko
EDIT****Other aim:
And I would love to iterate through every excel row and save each row as separate .xml file (filename = invoice.text)
any help appreciated
->>> the problem is, that the 2nd created .xml file has also data from the first row inside. Can anybody help me? Highly appreciated
help is appreciated, I want to give ROOT name attributes and the xml a version "" and save each excel row as a separate .xml file
I already setup the excel with openpyxl.
EDIT
Code edited
from lxml import etree
import openpyxl
# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"
xmlRoot = etree.Element(
"{" + xmlns + "}LedgerImport",
version=version,
attrib={"{" + xsi + "}schemaLocation": schemaLocation},
generator_info=generator_info,
generating_system=generating_system,
nsmap={'xsi': xsi, None: xmlns}
)
####open excel file speadsheet
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
# build the xml tree
for i in range(2,6):
#consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value, 'consolidatedDate': str(sheet.cell(row=i,column=2).value, 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value, 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value )})
consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value),'consolidatedDate': str(sheet.cell(row=i,column=2).value), 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value), 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value) })
accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
account = etree.SubElement(accountsPayableLedger, 'bookingText')
account.text = sheet.cell(row=i,column=21).value
invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
invoice.text = sheet.cell(row=i,column=13).value
date = etree.SubElement(accountsPayableLedger, 'date')
date.text = sheet.cell(row=i,column=2).value
amount = etree.SubElement(accountsPayableLedger, 'amount')
amount.text = sheet.cell(row=i,column=16).value
account_no = etree.SubElement(accountsPayableLedger, 'accountNo')
account_no.text = sheet.cell(row=i,column=19).value
cost1 = etree.SubElement(accountsPayableLedger, 'costCategoryId')
cost1.text = sheet.cell(row=i,column=15).value
currency_code = etree.SubElement(accountsPayableLedger, 'currencyCode')
currency_code.text = sheet.cell(row=i,column=12).value
party_id = etree.SubElement(accountsPayableLedger, 'partyId')
party_id.text = sheet.cell(row=i,column=20).value
bpaccount = etree.SubElement(accountsPayableLedger, 'bpAccountNo')
bpaccount.text = sheet.cell(row=i,column=20).value
#doc = etree.ElementTree(xmlRoot)
#doc.write( str(sheet.cell(row=i,column=13).value)+".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
doc = etree.ElementTree(xmlRoot)
with open(str(sheet.cell(row=i,column=13).value)+".xml", 'w') as f:
f.write(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode('utf-8'))
# doc = etree.ElementTree(xmlRoot)
# with open("test1337.xml", 'w') as f:
# f.write(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode('utf-8'))
# convert into elementtree and write it directly into a file
#doc = etree.ElementTree(xmlRoot)
#outFile = open("test1337.xml", 'w')
#doc.write("test1337.xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
#doc.close()
Please help sitting like hours on that.
Thanks so much
I recommend using etree elements and subelements and convert them later on into a element tree. This provides more flexibility while creating the xml, especially when you want to iterate over an existing data structure:
from lxml import etree
# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"
xmlRoot = etree.Element(
"{" + xmlns + "}LedgerImport",
version=version,
attrib={"{" + xsi + "}schemaLocation": schemaLocation},
generator_info=generator_info,
generating_system=generating_system,
nsmap={'xsi': xsi, None: xmlns}
)
# build the xml tree
consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': "1337.01"})
accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
account = etree.SubElement(accountsPayableLedger, 'bookingText')
account.text = 'amazon'
invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
invoice.text = "1"
# convert into elementtree and write it directly into a file
doc = etree.ElementTree(xmlRoot)
with open("test1337.xml", 'w') as f:
f.write(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode('utf-8'))
The generated file looks like this:
<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://xml.datev.de/bedi/tps/ledger/v040" generating_system="DATEV manuell" generator_info="DATEV Musterdaten" version="4.0" xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
<consolidate consolidatedAmount="1337.01">
<accountsPayableLedger>
<bookingText>amazon</bookingText>
<invoiceId>1</invoiceId>
</accountsPayableLedger>
</consolidate>
</LedgerImport>
I need to parse a directory of xml files into one large csv file, I need certain attributes under the element 'Param' (attributes are 'Name' and 'PNum'). There is another XML file in the directory called Content.xml which I can get all the names of the other XML files and set them as the FileName. The issue is that I cannot figure out how to get these attributes in each XML file as each XML file has a different organisation and some don't seem to have these attributes in the first place.
I have written code that works for one of the XML files in the directory that outputs a CSV file with all the relevant information.
import xml.etree.ElementTree as ET
import csv
import os
FileName = '------.xml'
tree = ET.parse(FileName)
root = tree.getroot()[4]
csv_out = open('CsvOut', 'w')
csvwriter = csv.writer(csv_out)
count = 0
for child in root:
generation = []
parameters = []
if count == 0:
csv_head = ['Generation', 'Parameter Name', 'Parameter Number']
csvwriter.writerow(csv_head)
count = count + 1
gen = FileName[:-4]
generation.append(gen)
parameters.append(generation)
name = child.get('Name')
parameters.append(name)
num = child.get('PNum')
parameters.append(num)
csvwriter.writerow(parameters)
csv_out.close()
I rather simple and you can do it in two steps:
First, enumerate all xml files in the directory
Perform your code over these files
import xml.etree.ElementTree as ET
import csv
import os
from glob import glob
# create csv writer
csv_out = open('CsvOut', 'w')
csvwriter = csv.writer(csv_out)
# write the header
csv_head = ['Generation', 'Parameter Name', 'Parameter Number']
csvwriter.writerow(csv_head)
# iterate over the xml files in the current directory
for FileName in glob("*.xml"):
tree = ET.parse(FileName)
root = tree.getroot()[4]
for child in root:
generation = []
parameters = []
gen = FileName[:-4]
generation.append(gen)
parameters.append(generation)
name = child.get('Name')
parameters.append(name)
num = child.get('PNum')
parameters.append(num)
csvwriter.writerow(parameters)
# after iterating, close the csv file
csv_out.close()
I'm trying to find the simplest way of copying one node to another XML file. Both files will contain the same node - just the contents of that node will be different.
In the past I've done some crazy copying of each element and subelement - but there has to be a better way..
#Master XML
parser = etree.XMLParser(strip_cdata=False)
tree = etree.parse('file1.xml', parser)
# Find the //input node - which has a lot of subelems
inputMaster= tree.xpath('//input')[0]
#Dest XML -
parser2 = etree.XMLParser(strip_cdata=False)
tree2 = etree.parse('file2.xml', parser2)
# this won't work but.. it would be nice
etree.SubElement(tree2,'input') = inputMaster
Here's one way - its not brilliant as it loses the position (i.e. it pops the node at the end) but hey..
def getMaster(somefile):
parser = etree.XMLParser(strip_cdata=False)
tree = etree.parse(somefile, parser)
doc = tree.getroot()
inputMaster = doc.find('input')
return inputMaster
inputXML = getMaster('master_file.xml')
parser = etree.XMLParser(strip_cdata=False)
tree = etree.parse('file_to_copy_node_to.xml', parser)
doc = tree.getroot()
doc.remove(doc.find('input'))
doc.append(inputXML)
# Now write it
newxml = etree.tostring(tree, pretty_print=True)
f = open('file_to_copy_node_to.xml', 'w')
f.write(newxml)
f.close()