Python - multiple logs/structures in the same xml file using ElementTree - python

I wrote an xml file with ElementTree. The problem i'm facing is that I want to write multiple logs in the same xml file.
The code:
import xml.etree.cElementTree as ET
#XML
root = ET.Element('LOG')
DATE = ET.SubElement(root, 'DATE')
DATE.text = "child_1"
TIME = ET.SubElement(root, 'TIME')
TIME.text = "child_2"
CC = ET.SubElement(root, 'CC')
CC.text = "child_3"
AMOUNT = ET.SubElement(root, 'AMOUNT')
AMOUNT.text = "child_4"
tree = ET.ElementTree(root)
#Generating XML
tree.write("file_name.xlm")
#Print
print(open("file_name.xlm").read())
Current output example:
<LOG><DATE>child_1</DATE><TIME>child_2</TIME><CC>child_3</CC><AMOUNT>child_4</AMOUNT></LOG>
Output that I want:
<LOG><DATE>child_1</DATE><TIME>child_2</TIME><CC>child_3</CC><AMOUNT>child_4</AMOUNT></LOG><LOG><DATE>child_1</DATE><TIME>child_2</TIME><CC>child_3</CC><AMOUNT>child_4</AMOUNT></LOG><LOG><DATE>child_1</DATE><TIME>child_2</TIME><CC>child_3</CC><AMOUNT>child_4</AMOUNT></LOG>...

What you want is just not how xml works. xml documents have one root node, and just one. The closest you can get is by wrapping your <LOG> entries in a container:
<ALL_LOGS>
<LOG>....</LOG>
<LOG>....</LOG>
</ALL_LOGS>

Related

Unable to edit powerpoint XML data points

I'm utilizing python to update data within the xml below, what im trying to do in all is update text within a powerpoint dynamically using an incoming dataframe, by doing so i pull out the xml from the pptx file and can't figure out how to change the text within the xml.
Dataframe:
Old New
0 A.1 Valuation
1 A.2 12000
2 A.3 5.23
3 A.4 Test,Complete
XMLFile: Github Link
XML Snippit:
<a:tc>
<a:txBody>
<a:bodyPr/>
<a:lstStyle/>
<a:p>
<a:pPr algn="l" fontAlgn="auto"/>
<a:r>
<a:rPr lang="en-US" sz="1800" dirty="0">
<a:effectLst/>
</a:rPr>
<a:t>A.1​</a:t>
</a:r>
<a:endParaRPr lang="en-US" sz="1800" b="0" i="0" dirty="0">
<a:solidFill>
<a:srgbClr val="000000"/>
</a:solidFill>
<a:effectLst/>
<a:latin typeface="Calibri" panose="020F0502020204030204" pitchFamily="34" charset="0"/>
</a:endParaRPr>
</a:p>
</a:txBody>
<a:tcPr/>
</a:tc>
Python Code:
import shutil
from pathlib import Path
import lxml.etree as ET
import pandas as pd
xml_file = r'\Desktop\PowerPoint XML\Test\ppt\slides\slide1.xml'
Dataframe = r'\Desktop\PowerPoint XML\Dataframe.xlsx'
df = pd.read_excel(Dataframe)
df['Old'] = df['Old'].astype(str)
df['New'] = df['New'].astype(str)
# open xml file that contains slide data
tree = ET.parse(xml_file)
treeRoot = tree.getroot()
rootString = ET.tostring(treeRoot)
decodedRootString = bytes.decode(rootString)
old = df.Old; new = df.New
# search for old value then replace with new
for elem in treeRoot.iter():
elem.text.replace(old[elem],new[elem])
FinalString=str.encode(decodedRootString)
#save the XML File
root = ET.fromstring(FinalString)
my_tree = ET.ElementTree(root)
with open(xml_file, 'wb') as f:
f.write(ET.tostring(my_tree))
Error Log:
File "\tempCodeRunnerFile.py", line 36, in <module>
elem.text.replace(old[elem],new[elem])
AttributeError: 'NoneType' object has no attribute 'replace'

How to extract values from xml file with namespaces?

I have the xml file shown below, that has namespaces, for which I'm trying to extract the values of Node24
My current code is below, that is not printing anything:
import xml.etree.ElementTree as ET
filename = 'ifile.xml'
tree = ET.parse(filename)
root = tree.getroot()
for neighbor in root.iter('Node24'):
print(neighbor)
My expected output would be:
03-c34ko
04-c64ko
07-c54ko
The is the ifile.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<data-main-43:DATAMAINXZ123 xmlns="https://example.com/DATA-MAIN-XZ123" xmlns:data-gen="https://example.com/DATA-GEN" xmlns:data-main-43="https://example.com/DATA-MAIN-XZ123" xmlns:xsi="http://www.w3.org/2011/XMLSchema-instance" xsi:schemaLocation="https://example.com/DATA-MAIN-XZ123 data-main-ir21-12.1.xsd">
<MAINXZ123FileHeader>
<DATAGenSchemaVersion>2.4</DATAGenSchemaVersion>
<DATAMAINXZ123SchemaVersion>12.1</DATAMAINXZ123SchemaVersion>
</MAINXZ123FileHeader>
<Node1>
<Node2>WTRT DDK</Node2>
<Node3>XYZW</Node3>
<Node4>
<Node5>
<Node6>XYZW882</Node6>
<Node5Type>Ter</Node5Type>
<Node5Data>
<Node9>
<Node10>
<Node11>2019-02-18</Node11>
<Node12>
<Node13>
<Node14>
<Node15>Ermso</Node15>
<Node16>
<PrimaryNode16>
<Node18>19.32</Node18>
<Node18>12.11</Node18>
</PrimaryNode16>
<SecondaryNode16>
<Node18>82.97</Node18>
<Node18>12.41</Node18>
</SecondaryNode16>
</Node16>
<Node20>Muuatippw</Node20>
</Node14>
</Node13>
</Node12>
<Node21>
<Node22>
<Node23>
<Node24>03-c34ko</Node24>
<Node24>04-c64ko</Node24>
<Node24>07-c54ko</Node24>
</Node23>
<Node26Node22EdgeAgent>
<Node26>jjkksonem</Node26>
<PrimaryNode18DEANode26>
<Node18>2.40</Node18>
</PrimaryNode18DEANode26>
</Node26Node22EdgeAgent>
</Node22>
</Node21>
<Node28>
<Node29>
<Node30>false</Node30>
<Node31>true</Node31>
</Node29>
</Node28>
</Node10>
</Node9>
</Node5Data>
</Node5>
</Node4>
</Node1>
</data-main-43:DATAMAINXZ123>
How can I do this? Thanks in advance.
Like the duplicate mzjn referenced, just add the namespace uri to the element name...
import xml.etree.ElementTree as ET
filename = 'ifile.xml'
tree = ET.parse(filename)
root = tree.getroot()
for neighbor in root.iter('{https://example.com/DATA-MAIN-XZ123}Node24'):
print(neighbor.text)
Note: I also added .text to neighbor so you'd get the requested result.
I'm using regular expression so this is an alternative answer.
I converted the xml into string then search for all strings between Node24
import xml.etree.ElementTree as ET
import re
filename = 'ifile.xml'
tree = ET.parse(filename)
root = tree.getroot()
xml_str = ET.tostring(root)
for s in re.findall(r'ns0:Node24>(.*?)</ns0:Node24', str(xml_str)):
print(s)
Result:
03-c34ko
04-c64ko
07-c54ko

lxml give ROOT name attribute and xml file a version

EDIT****Other aim:
And I would love to iterate through every excel row and save each row as separate .xml file (filename = invoice.text)
any help appreciated
->>> the problem is, that the 2nd created .xml file has also data from the first row inside. Can anybody help me? Highly appreciated
help is appreciated, I want to give ROOT name attributes and the xml a version "" and save each excel row as a separate .xml file
I already setup the excel with openpyxl.
EDIT
Code edited
from lxml import etree
import openpyxl
# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"
xmlRoot = etree.Element(
"{" + xmlns + "}LedgerImport",
version=version,
attrib={"{" + xsi + "}schemaLocation": schemaLocation},
generator_info=generator_info,
generating_system=generating_system,
nsmap={'xsi': xsi, None: xmlns}
)
####open excel file speadsheet
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
# build the xml tree
for i in range(2,6):
#consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value, 'consolidatedDate': str(sheet.cell(row=i,column=2).value, 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value, 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value )})
consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value),'consolidatedDate': str(sheet.cell(row=i,column=2).value), 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value), 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value) })
accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
account = etree.SubElement(accountsPayableLedger, 'bookingText')
account.text = sheet.cell(row=i,column=21).value
invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
invoice.text = sheet.cell(row=i,column=13).value
date = etree.SubElement(accountsPayableLedger, 'date')
date.text = sheet.cell(row=i,column=2).value
amount = etree.SubElement(accountsPayableLedger, 'amount')
amount.text = sheet.cell(row=i,column=16).value
account_no = etree.SubElement(accountsPayableLedger, 'accountNo')
account_no.text = sheet.cell(row=i,column=19).value
cost1 = etree.SubElement(accountsPayableLedger, 'costCategoryId')
cost1.text = sheet.cell(row=i,column=15).value
currency_code = etree.SubElement(accountsPayableLedger, 'currencyCode')
currency_code.text = sheet.cell(row=i,column=12).value
party_id = etree.SubElement(accountsPayableLedger, 'partyId')
party_id.text = sheet.cell(row=i,column=20).value
bpaccount = etree.SubElement(accountsPayableLedger, 'bpAccountNo')
bpaccount.text = sheet.cell(row=i,column=20).value
#doc = etree.ElementTree(xmlRoot)
#doc.write( str(sheet.cell(row=i,column=13).value)+".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
doc = etree.ElementTree(xmlRoot)
with open(str(sheet.cell(row=i,column=13).value)+".xml", 'w') as f:
f.write(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode('utf-8'))
# doc = etree.ElementTree(xmlRoot)
# with open("test1337.xml", 'w') as f:
# f.write(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode('utf-8'))
# convert into elementtree and write it directly into a file
#doc = etree.ElementTree(xmlRoot)
#outFile = open("test1337.xml", 'w')
#doc.write("test1337.xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
#doc.close()
Please help sitting like hours on that.
Thanks so much
I recommend using etree elements and subelements and convert them later on into a element tree. This provides more flexibility while creating the xml, especially when you want to iterate over an existing data structure:
from lxml import etree
# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"
xmlRoot = etree.Element(
"{" + xmlns + "}LedgerImport",
version=version,
attrib={"{" + xsi + "}schemaLocation": schemaLocation},
generator_info=generator_info,
generating_system=generating_system,
nsmap={'xsi': xsi, None: xmlns}
)
# build the xml tree
consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': "1337.01"})
accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
account = etree.SubElement(accountsPayableLedger, 'bookingText')
account.text = 'amazon'
invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
invoice.text = "1"
# convert into elementtree and write it directly into a file
doc = etree.ElementTree(xmlRoot)
with open("test1337.xml", 'w') as f:
f.write(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode('utf-8'))
The generated file looks like this:
<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://xml.datev.de/bedi/tps/ledger/v040" generating_system="DATEV manuell" generator_info="DATEV Musterdaten" version="4.0" xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
<consolidate consolidatedAmount="1337.01">
<accountsPayableLedger>
<bookingText>amazon</bookingText>
<invoiceId>1</invoiceId>
</accountsPayableLedger>
</consolidate>
</LedgerImport>

How to parse multiple xml files and pass through attributes into csv?

I need to parse a directory of xml files into one large csv file, I need certain attributes under the element 'Param' (attributes are 'Name' and 'PNum'). There is another XML file in the directory called Content.xml which I can get all the names of the other XML files and set them as the FileName. The issue is that I cannot figure out how to get these attributes in each XML file as each XML file has a different organisation and some don't seem to have these attributes in the first place.
I have written code that works for one of the XML files in the directory that outputs a CSV file with all the relevant information.
import xml.etree.ElementTree as ET
import csv
import os
FileName = '------.xml'
tree = ET.parse(FileName)
root = tree.getroot()[4]
csv_out = open('CsvOut', 'w')
csvwriter = csv.writer(csv_out)
count = 0
for child in root:
generation = []
parameters = []
if count == 0:
csv_head = ['Generation', 'Parameter Name', 'Parameter Number']
csvwriter.writerow(csv_head)
count = count + 1
gen = FileName[:-4]
generation.append(gen)
parameters.append(generation)
name = child.get('Name')
parameters.append(name)
num = child.get('PNum')
parameters.append(num)
csvwriter.writerow(parameters)
csv_out.close()
I rather simple and you can do it in two steps:
First, enumerate all xml files in the directory
Perform your code over these files
import xml.etree.ElementTree as ET
import csv
import os
from glob import glob
# create csv writer
csv_out = open('CsvOut', 'w')
csvwriter = csv.writer(csv_out)
# write the header
csv_head = ['Generation', 'Parameter Name', 'Parameter Number']
csvwriter.writerow(csv_head)
# iterate over the xml files in the current directory
for FileName in glob("*.xml"):
tree = ET.parse(FileName)
root = tree.getroot()[4]
for child in root:
generation = []
parameters = []
gen = FileName[:-4]
generation.append(gen)
parameters.append(generation)
name = child.get('Name')
parameters.append(name)
num = child.get('PNum')
parameters.append(num)
csvwriter.writerow(parameters)
# after iterating, close the csv file
csv_out.close()

Copy a node from one xml file to another using lxml

I'm trying to find the simplest way of copying one node to another XML file. Both files will contain the same node - just the contents of that node will be different.
In the past I've done some crazy copying of each element and subelement - but there has to be a better way..
#Master XML
parser = etree.XMLParser(strip_cdata=False)
tree = etree.parse('file1.xml', parser)
# Find the //input node - which has a lot of subelems
inputMaster= tree.xpath('//input')[0]
#Dest XML -
parser2 = etree.XMLParser(strip_cdata=False)
tree2 = etree.parse('file2.xml', parser2)
# this won't work but.. it would be nice
etree.SubElement(tree2,'input') = inputMaster
Here's one way - its not brilliant as it loses the position (i.e. it pops the node at the end) but hey..
def getMaster(somefile):
parser = etree.XMLParser(strip_cdata=False)
tree = etree.parse(somefile, parser)
doc = tree.getroot()
inputMaster = doc.find('input')
return inputMaster
inputXML = getMaster('master_file.xml')
parser = etree.XMLParser(strip_cdata=False)
tree = etree.parse('file_to_copy_node_to.xml', parser)
doc = tree.getroot()
doc.remove(doc.find('input'))
doc.append(inputXML)
# Now write it
newxml = etree.tostring(tree, pretty_print=True)
f = open('file_to_copy_node_to.xml', 'w')
f.write(newxml)
f.close()

Categories

Resources