adding elements in xml using python - python

how do i add an element to my attrib ?
i know how to add with text:
ET.subElement(root[2],'resource')
for x in root.iter('resource'):
but what i'm looking for is:
<resources>
<resource identifier="presentation_3" type="webcontent" href="questions.html" adlcp:scormtype="sco">
<file href="questions.html"/>
</resource>
</resources>
below is my code:
import xml.etree.ElementTree as ET
xmlfile = "imsmanifest.xml"
tree = ET.parse(xmlfile)
root = tree.getroot()
ET.SubElement(root[2],'file').set('href','index.html')
tree.write('new.xml')

Related

saxonc's transform_to_file(), executed in a loop, doesn't transform but gives non-sensical errors or partial output

My transformation stylesheet file contains:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
<xsl:template match="/">
<cities>
<xsl:for-each select="cities/country">
<city name="{#capital}" isCapital="true"/>
</xsl:for-each>
</cities>
</xsl:template>
</xsl:stylesheet>
My python code:
import os
import xml.etree.ElementTree as ET
from saxonpy import PySaxonProcessor
def main():
print('starting code...')
source_XML = '''
<data>
<country name="Denmark" capital="Copenhagen"/>
<country name="Germany" capital="Berlin"/>
<country name="France" capital="Paris"/>
</data>
'''
parentroot = ET.fromstring(source_XML)
children = list(parentroot)
# create individual raw xmls
cnt = 0
for child in children:
cnt = cnt + 1
childroot = ET.Element("cities")
childroot.append(child)
tempfile_tree = ET.ElementTree(childroot)
# tempfile = "C:\\pythonProject\\stackoverflow\\tmp.xml"
# tempfile = "C:\\gaga\\tmp.xml"
# tempfile = os.path.abspath("tmp.xml")
tempfile = "tmp.xml"
transformedfile = f"output_{cnt}.xml"
with open(tempfile, 'wb') as f:
tempfile_tree.write(f, encoding='utf-8', xml_declaration=True)
try:
with PySaxonProcessor(license=False) as proc:
proc.set_cwd(os.getcwd())
xsltproc = proc.new_xslt30_processor()
xsltproc.transform_to_file(source_file=tempfile,
stylesheet_file="transformer.xsl",
output_file=transformedfile)
print(f"{transformedfile} has been created.")
except Exception as e:
print(e)
if __name__ == "__main__":
main()
my problem
I have saxonpy imported.
I'm trying to run saxonc's transform_to_file() inside a loop. I'm unable to get the transformed output files.
Depending on what I set for tempfile, i get
# I/O error reported by XML parser processing C:\pythonProject\stackoverflow\░╚╒E ⌂:
# unknown protocol: c. Caused by java.net.MalformedURLException: unknown protocol: c
or
Content is not allowed in prolog
(this is definitely not the case, I checked the tempfile with a hexeditor)
or no error but output file containing only:
# <?xml version="1.0" encoding="UTF-8"?>
# <cities/>
but also correct outputs (but I can't reproduce it anymore).
Note: I disabled MAX_PATH when installing python 3.10.
Note: using Pycharm with poetry venv
Using SaxonC 11.3 I managed to run the python script above with the minor change:
from saxonpy import PySaxonProcessor
Replaced with:
from saxonc import *
I got the output:
starting code...
output_1.xml has been created.
output_2.xml has been created.
output_3.xml has been created.
These files all have the following content:
<?xml version="1.0" encoding="UTF-8"?>
<cities/>
As I workaround I replaced transform_to_file with transform_to_string:
valueStr = xsltproc.transform_to_string(source_file=tempfile,
stylesheet_file="transformer.xsl")
#output_file=transformedfile)
print(valueStr)
This does produce the correct output:
starting code...
source in transformFiletoString=tmp.xml stylsheet=transformer.xsl
<?xml version="1.0" encoding="UTF-8"?>
<cities>
<city name="Copenhagen" isCapital="true"/>
</cities>
output_1.xml has been created.
<?xml version="1.0" encoding="UTF-8"?>
<cities>
<city name="Berlin" isCapital="true"/>
</cities>
output_2.xml has been created.
<?xml version="1.0" encoding="UTF-8"?>
<cities>
<city name="Paris" isCapital="true"/>
</cities>
output_3.xml has been created.
I have now installed SaxonC 1.2.1
I got the correct output with the following python script:
import os
import xml.etree.ElementTree as ET
from saxonpy import *
def main():
print('starting code...')
source_XML = '''
<data>
<country name="Denmark" capital="Copenhagen"/>
<country name="Germany" capital="Berlin"/>
<country name="France" capital="Paris"/>
</data>
'''
parentroot = ET.fromstring(source_XML)
children = list(parentroot)
# create individual raw xmls
try:
with PySaxonProcessor(license=False) as proc:
proc.set_cwd(os.getcwd())
xsltproc = proc.new_xslt30_processor()
cnt = 0
for child in children:
cnt = cnt + 1
childroot = ET.Element("cities")
childroot.append(child)
tempfile_tree = ET.ElementTree(childroot)
# tempfile = "C:\\pythonProject\\stackoverflow\\tmp.xml"
# tempfile = "C:\\gaga\\tmp.xml"
# tempfile = os.path.abspath("tmp.xml")
tempfile = "tmp.xml"
transformedfile = f"output_{cnt}.xml"
with open(tempfile, 'wb') as f:
tempfile_tree.write(f, encoding='utf-8', xml_declaration=True)
xsltproc.set_property("s",tempfile)
xsltproc.transform_to_file(source_file=tempfile,
stylesheet_file="transformer.xsl",
output_file=transformedfile)
#print(valueStr)
print(f"{transformedfile} has been created.")
except Exception as e:
print(e)
if __name__ == "__main__":
main()
With Apply_templates_returning_file() it works:
import os
import xml.etree.ElementTree as ET
from saxonpy import PySaxonProcessor
def main():
print('starting code...')
source_XML = '''
<data>
<country name="Denmark" capital="Copenhagen"/>
<country name="Germany" capital="Berlin"/>
<country name="France" capital="Paris"/>
</data>
'''
parentroot = ET.fromstring(source_XML)
children = list(parentroot)
try:
with PySaxonProcessor(license=False) as proc:
proc.set_cwd(os.getcwd())
xsltproc = proc.new_xslt30_processor()
xslt30_transformer = xsltproc.compile_stylesheet(stylesheet_file="transformer.xsl")
cnt = 0
for child in children:
cnt = cnt + 1
childroot = ET.Element("cities")
childroot.append(child)
tempfile_tree = ET.ElementTree(childroot)
tempfile = "tmp_1234567890ABCDEFGHIJKLMNOP.xml"
transformedfile = f"output_{cnt}.xml"
with open(tempfile, 'wb') as f:
tempfile_tree.write(f, xml_declaration=True)
xslt30_transformer.apply_templates_returning_file(source_file=tempfile,
output_file=transformedfile)
print(f"{transformedfile} has been created.")
except Exception as e:
print(e)
if __name__ == "__main__":
main()

How to extract values from xml file with namespaces?

I have the xml file shown below, that has namespaces, for which I'm trying to extract the values of Node24
My current code is below, that is not printing anything:
import xml.etree.ElementTree as ET
filename = 'ifile.xml'
tree = ET.parse(filename)
root = tree.getroot()
for neighbor in root.iter('Node24'):
print(neighbor)
My expected output would be:
03-c34ko
04-c64ko
07-c54ko
The is the ifile.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<data-main-43:DATAMAINXZ123 xmlns="https://example.com/DATA-MAIN-XZ123" xmlns:data-gen="https://example.com/DATA-GEN" xmlns:data-main-43="https://example.com/DATA-MAIN-XZ123" xmlns:xsi="http://www.w3.org/2011/XMLSchema-instance" xsi:schemaLocation="https://example.com/DATA-MAIN-XZ123 data-main-ir21-12.1.xsd">
<MAINXZ123FileHeader>
<DATAGenSchemaVersion>2.4</DATAGenSchemaVersion>
<DATAMAINXZ123SchemaVersion>12.1</DATAMAINXZ123SchemaVersion>
</MAINXZ123FileHeader>
<Node1>
<Node2>WTRT DDK</Node2>
<Node3>XYZW</Node3>
<Node4>
<Node5>
<Node6>XYZW882</Node6>
<Node5Type>Ter</Node5Type>
<Node5Data>
<Node9>
<Node10>
<Node11>2019-02-18</Node11>
<Node12>
<Node13>
<Node14>
<Node15>Ermso</Node15>
<Node16>
<PrimaryNode16>
<Node18>19.32</Node18>
<Node18>12.11</Node18>
</PrimaryNode16>
<SecondaryNode16>
<Node18>82.97</Node18>
<Node18>12.41</Node18>
</SecondaryNode16>
</Node16>
<Node20>Muuatippw</Node20>
</Node14>
</Node13>
</Node12>
<Node21>
<Node22>
<Node23>
<Node24>03-c34ko</Node24>
<Node24>04-c64ko</Node24>
<Node24>07-c54ko</Node24>
</Node23>
<Node26Node22EdgeAgent>
<Node26>jjkksonem</Node26>
<PrimaryNode18DEANode26>
<Node18>2.40</Node18>
</PrimaryNode18DEANode26>
</Node26Node22EdgeAgent>
</Node22>
</Node21>
<Node28>
<Node29>
<Node30>false</Node30>
<Node31>true</Node31>
</Node29>
</Node28>
</Node10>
</Node9>
</Node5Data>
</Node5>
</Node4>
</Node1>
</data-main-43:DATAMAINXZ123>
How can I do this? Thanks in advance.
Like the duplicate mzjn referenced, just add the namespace uri to the element name...
import xml.etree.ElementTree as ET
filename = 'ifile.xml'
tree = ET.parse(filename)
root = tree.getroot()
for neighbor in root.iter('{https://example.com/DATA-MAIN-XZ123}Node24'):
print(neighbor.text)
Note: I also added .text to neighbor so you'd get the requested result.
I'm using regular expression so this is an alternative answer.
I converted the xml into string then search for all strings between Node24
import xml.etree.ElementTree as ET
import re
filename = 'ifile.xml'
tree = ET.parse(filename)
root = tree.getroot()
xml_str = ET.tostring(root)
for s in re.findall(r'ns0:Node24>(.*?)</ns0:Node24', str(xml_str)):
print(s)
Result:
03-c34ko
04-c64ko
07-c54ko

Python: specify XMLNS on xml.etree elements

in my Python code I'm currently using the xml.etree library to create a tree and then dump it to an XML string. Unfortunately I can't use modules other than the ones in the Python Standard Libraries to do that.
Here is my code:
import xml.etree.ElementTree as ET
def dump_to_XML():
root_node = ET.Element("root")
c1_node = ET.SubElement(root_node, "child1")
c1_node.text = "foo"
c2_node = ET.SubElement(root_node, "child2")
gc1_node = ET.SubElement(c2_node, "grandchild1")
gc1_node.text = "bar"
return ET.tostring(root_node, encoding='utf8', method='xml')
which gives the string:
<?xml version='1.0' encoding='utf8'?>
<root>
<child1>foo</child1>
<child2>
<grandchild1>bar</grandchild1>
</child2>
</root>
Now, I have two schema files located - say - http://myhost.com/p.xsd and http://myhost.com/q.xsd, I want the output string to be turned into:
<?xml version='1.0' encoding='UTF-8'?>
<root xmlns:p="http://myhost.com/p.xsd" xmlns:q="http://myhost.com/q.xsd">
<p:child1>foo</p:child1>
<p:child2>
<q:grandchild1>bar</q:grandchild1>
</p:child2>
</root>
How can I leverage the etree library in order to achieve that?
Thanks in advance
Here we go:
import xml.etree.ElementTree as ET
xmlns_uris = {'p': 'http://myhost.com/p.xsd',
'q': 'http://myhost.com/q.xsd'}
def dump_to_XML():
root_node = ET.Element("root")
c1_node = ET.SubElement(root_node, "child1")
c1_node.text = "foo"
c2_node = ET.SubElement(root_node, "child2")
gc1_node = ET.SubElement(c2_node, "grandchild1")
gc1_node.text = "bar"
annotate_with_XMLNS_prefixes(gc1_node, 'q', False)
annotate_with_XMLNS_prefixes(root_node, 'p')
add_XMLNS_attributes(root_node, xmlns_uris)
return ET.tostring(root_node, encoding='UTF-8', method='xml')
def annotate_with_XMLNS_prefixes(tree, xmlns_prefix, skip_root_node=True):
if not ET.iselement(tree):
tree = tree.getroot()
iterator = tree.iter()
if skip_root_node: # Add XMLNS prefix also to the root node?
iterator.next()
for e in iterator:
if not ':' in e.tag:
e.tag = xmlns_prefix + ":" + e.tag
def add_XMLNS_attributes(tree, xmlns_uris_dict):
if not ET.iselement(tree):
tree = tree.getroot()
for prefix, uri in xmlns_uris_dict.items():
tree.attrib['xmlns:' + prefix] = uri
Executing: print dump_to_XML() gives:
<?xml version='1.0' encoding='UTF-8'?>
<root xmlns:p="http://myhost.com/p.xsd" xmlns:q="http://myhost.com/q.xsd">
<p:child1>foo</p:child1>
<p:child2>
<q:grandchild1>bar</q:grandchild1>
</p:child2>
</root>
from lxml import etree
xmlns_uris = {'p': 'http://myhost.com/p.xsd', 'q': 'http://myhost.com/q.xsd'}
root = etree.Element('root', nsmap = xmlns_uris)
child1 = etree.SubElement(root,'{%s}child1'%xmlns_uris['p'])
child1.text = 'foo'
child2 = etree.SubElement(root,'{%s}child2'%xmlns_uris['p'])
grandchild1 = etree.SubElement(child2,'{%s}grandchild1'%xmlns_uris['q'])
grandchild1.text = 'bar'
print(etree.tostring(root, pretty_print=True, encoding='UTF-8', xml_declaration=True).decode('cp1251'))

python xml pretty print not working

I am changing an some xml by adding some nodes and values from a list. I can successfully create all the new tags and values, I am creating them between the contributors tags, but when I save the xml out to a new file, the tags I create are all on one line. Here is a sample of my code:
templateXml = """<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<package>
<delivery_type>new</delivery_type>
<feature>
<feature_type>Movie</feature_type>
<contributors>
</contributors>
</package>"""
from lxml import etree
tree = etree.fromstring(templateXml)
node_video = tree.xpath('//feature/contributors')[0]
for cast in castList:
pageElement = etree.SubElement(node_video, 'contributor')
node_video1 = tree.xpath('//feature/contributors/contributor')[0]
pageElement.attrib['type'] = 'cast'
pageElement1 = etree.SubElement(pageElement, 'name')
pageElement1.text = cast.text
pageElement2 = etree.SubElement(pageElement, 'role')
pageElement2.text = "actor"
xmlFileOut = '/Users/User1/Desktop/Python/Done.xml'
with open(xmlFileOut, "w") as f:
f.write(etree.tostring(tree, pretty_print = True, xml_declaration = True, encoding='UTF-8', standalone="yes"))
Here is saved xml file:
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<package>
<delivery_type>new</delivery_type>
<feature>
<feature_type>Movie</feature_type>
<contributors>
<contributor type="cast"><name>John Doe</name><role>actor</role></contributor><contributor type="cast"><name>Another Actors name</name><role>actor</role></contributor><contributor type="cast"><name>Jane Doe</name><role>actor</role></contributor><contributor type="cast"><name>John Smith</name><role>actor</role></contributor></contributors>
</package>
I have solved this issue when opening an xml file to work on using the below code:
from lxml import etree
parser = etree.XMLParser(remove_blank_text=True) # makes pretty print work
path3 = 'path_to_xml_file'
open(path3)
tree = etree.parse(path3, parser)
root = tree.getroot()
tree.write(xmlFileOut, pretty_print = True, xml_declaration = True, encoding = 'UTF-8')
This works, but how do I get it to work with a string xml?
Taken from http://ruslanspivak.com/2014/05/12/how-to-pretty-print-xml-with-lxml/
import StringIO
import lxml.etree as etree
def prettify(xml_text):
"""Pretty prints xml."""
parser = etree.XMLParser(remove_blank_text=True)
file_obj = StringIO.StringIO(xml_text)
tree = etree.parse(file_obj, parser)
return etree.tostring(tree, pretty_print=True)
A simple solution might be to use StringIO:
from StringIO import StringIO
from lxml import etree
parser = etree.XMLParser(remove_blank_text=True)
tree = etree.parse(StringIO(templateXml), parser)

Changing a specific xml element using Python 3 ElementTree

I have a set of metadata files in xml which are updated regularly and I'm trying to automate.
I've worked out how to itteratively find and then replace text in the desired element of the xml but thought there must be a direct way to access and change the element. I just can't work it out.
The metadata xml is formatted:
<?xml version="1.0" ?>
<metadata xml:lang="en">
<Esri>
<CreaDate>20120405</CreaDate>
<CreaTime>13113000</CreaTime>
<ArcGISFormat>1.0</ArcGISFormat>
<SyncOnce>TRUE</SyncOnce>
<ModDate>20121129</ModDate>
<ModTime>11433300</ModTime>
<ArcGISProfile>ItemDescription</ArcGISProfile>
</Esri>
<dataIdInfo>
<idPurp>Updated :: 121129_114038</idPurp>
</dataIdInfo>
</metadata>
My iterative approach was:
for child in root:
for xel in child.iter('idPurp'):
download_new_datetime = strftime('%y%m%d_%H%M%S')
download_new_text = 'Downloaded :: '
xel.text = download_new_text + download_new_datetime
tree.write(xmlfile)
Ideas appreciated on a better way.
I would write to the file only once I'm done with the loop:
import xml.etree.ElementTree as ET
from time import strftime
xmlfile = '/tmp/file'
tree = ET.parse(xmlfile)
root = tree.getroot()
for child in root:
for xel in child.iter('idPurp'):
download_new_datetime = strftime('%y%m%d_%H%M%S')
download_new_text = 'Downloaded :: '
xel.text = download_new_text + download_new_datetime
tree.write(xmlfile)
I would even simplify that loop further to:
for child in root:
for xel in child.iter('idPurp'):
xel.text = 'Downloaded :: ' + time.strftime('%y%m%d_%H%M%S')
Two simpler ways, both work, tested.
First:
import xml.etree.ElementTree as ET
from time import strftime
xmlfile = 'metadata.xml'
tree = ET.parse(xmlfile)
root = tree.getroot()
xel = root.find('./dataIdInfo/idPurp')
xel.text = 'Downloaded :: ' + strftime('%y%m%d_%H%M%S')
tree.write(xmlfile)
Second:
import xml.etree.ElementTree as ET
from time import strftime
xmlfile = 'metadata.xml'
tree = ET.parse(xmlfile)
root = tree.getroot()
xel = root[1][0]
xel.text = 'Downloaded :: ' + strftime('%y%m%d_%H%M%S')
tree.write(xmlfile)
I prefer the first one, it's more readable in my opinion.

Categories

Resources