how to extract an #value from XML file in Python?

how to extract an #value from XML file in Python? - python

I have the following structure in an XML file:
<current>
<city id="2510170" name="Triana">
<coord lon="-6.02" lat="37.38"/>
<country>ES</country>
<sun rise="2016-04-04T06:04:05" set="2016-04-04T18:50:07"/>
</city>
<temperature value="290.92" min="288.15" max="296.15" unit="kelvin"/>
<humidity value="93" unit="%"/>
<pressure value="1009" unit="hPa"/>
<wind>
<speed value="8.2" name="Fresh Breeze"/>
<gusts/>
<direction value="230" code="SW" name="Southwest"/>
</wind>
<clouds value="90" name="overcast clouds"/>
<visibility/>
<precipitation mode="no"/>
<weather number="501" value="moderate rain" icon="10d"/>
<lastupdate value="2016-04-04T10:05:00"/>
</current>
The question is how to extract the temperature ( #value ) using XPATH of Python ?. That is, extract from "290.2" of the following line:
<temperature value="290.92" min="288.15" max="296.15" unit="kelvin"/>

Assuming that root reffers to <current> node
from lxml import etree
xml_file = 'test.xml'
with open(xml_file) as xml:
root = etree.XML(xml.read())
temperature_value = root.xpath('./temperature/#value')[0]

I would simply do
import xml.etree.ElementTree as ET
root = ET.parse('path_to_your_xml_file')
temperature = root.find('.//temperature')
Now temperature.attrib is a dictionary with all of the info
print temperature.attrib['value'] # 290.92
print temperature.attrib['min'] # 288.15
print temperature.attrib['max'] # 296.15
print temperature.attrib['unit'] # kelvin

from xml.etree import cElementTree as ET
tree = ET.parse("test.xml")
root = tree.getroot()
for temp in root.findall('temperature'):
print(temp.get("value"))

Related

"Invalid tag name" error when creating element with lxml in python

I am using lxml to make an xml file and my sample program is :
from lxml import etree
MESSAGETYPEINDIC = 'CRS701'
REPPERIOD = datetime.now().strftime("%Y-%m-%d")
root = etree.Element("crsdac2:CRS-DAC2-LT", attrib={'xmlns:crsdac2': 'urn:sti:ties:crsdac2:v1', 'xmlns:crs': 'urn:sti:ties:sask:v1','xmlns:xsi':'http://www.w3.org/2001/XMLSchema-instance', 'version':'3.141590118408203125', 'xsi:schemaLocation': 'urn:sti:ties:crsdac2:v1 file:///G:/Tax/Tax%20Technology/CRS%20(DAC2)/XML%20Specifikacija%20(versija%20nuo%202020-12)/CRS-DAC2-LT_v0.4.xsd' })
crsDAC2_messageSpec = etree.SubElement(root, "crsdac2:MessageSpec")
crsDAC2_messageSpec_messagetypeindic = etree.SubElement(crsDAC2_messageSpec, "crs:MessageTypeIndic").text = MESSAGETYPEINDIC
crsDAC2_messageSpec_repperiod = etree.SubElement(crsDAC2_messageSpec, "crs:ReportingPeriod").text = REPPERIOD
crsDAC2_messageBody = etree.SubElement(root, "crsdac2:MessageBody")
tree = etree.ElementTree(root)
print(tree)
tree_string = etree.tostring(tree, pretty_print=True, xml_declaration=True, encoding='UTF-8', standalone="yes")
print(tree_string)
I am getting the below error when I tried running the code above. Can you please help me with resolving this.
ValueError: Invalid tag name 'crsdac2:CRS-DAC2-LT'
I need the output as per below:
<?xml version="1.0" encoding="UTF-8"?>
<crsdac2:CRS-DAC2-LT xmlns:crsdac2="urn:sti:ties:crsdac2:v1" xmlns:crs="urn:sti:ties:crstypessti:v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="3.141590118408203125" xsi:schemaLocation="urn:sti:ties:crsdac2:v1 file:///G:/Tax/Tax%20Technology/CRS%20(DAC2)/XML%20Specifikacija%20(versija%20nuo%202020-12)/CRS-DAC2-LT_v0.4.xsd">
<crsdac2:MessageSpec>
<crs:MessageTypeIndic>CRS701</crs:MessageTypeIndic>
<crs:ReportingPeriod>2021-12-31</crs:ReportingPeriod>
</crsdac2:MessageSpec>
<crsdac2:MessageBody>
</crsdac2:MessageBody>
</crsdac2:CRS-DAC2-LT>

When creating an element or attribute bound to a namespace, you need to use the namespace URI (not the prefix). I suggest using the QName helper class to do this.
from lxml.etree import Element, SubElement, QName, tostring
from datetime import datetime
ns1 = "urn:sti:ties:crsdac2:v1"
ns2 = "urn:sti:ties:crstypessti:v1"
ns3 = 'http://www.w3.org/2001/XMLSchema-instance'
xsd = "file:///G:/Tax/Tax%20Technology/CRS%20(DAC2)/XML%20Specifikacija%20(versija%20nuo%202020-12)/CRS-DAC2-LT_v0.4.xsd"
MESSAGETYPEINDIC = 'CRS701'
REPPERIOD = datetime.now().strftime("%Y-%m-%d")
root = Element(QName(ns1, "CRS-DAC2-LT"), nsmap={"crsdac2": ns1, "crs": ns2})
root.set(QName(ns3, "schemaLocation"), xsd)
root.set("version", "3.141590118408203125")
messageSpec = SubElement(root, QName(ns1, "MessageSpec"))
messageTypeIndic = SubElement(messageSpec, QName(ns2, "MessageTypeIndic"))
messageTypeIndic.text = MESSAGETYPEINDIC
messageSpec_repperiod = SubElement(messageSpec, QName(ns2, "ReportingPeriod"))
messageSpec_repperiod.text = REPPERIOD
messageBody = SubElement(root, QName(ns1, "MessageBody"))
tree_string = tostring(root, pretty_print=True, xml_declaration=True,
encoding='UTF-8', standalone="yes")
print(tree_string.decode())
Output:
<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
<crsdac2:CRS-DAC2-LT xmlns:crs="urn:sti:ties:crstypessti:v1" xmlns:crsdac2="urn:sti:ties:crsdac2:v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="file:///G:/Tax/Tax%20Technology/CRS%20(DAC2)/XML%20Specifikacija%20(versija%20nuo%202020-12)/CRS-DAC2-LT_v0.4.xsd" version="3.141590118408203125">
<crsdac2:MessageSpec>
<crs:MessageTypeIndic>CRS701</crs:MessageTypeIndic>
<crs:ReportingPeriod>2022-12-20</crs:ReportingPeriod>
</crsdac2:MessageSpec>
<crsdac2:MessageBody/>
</crsdac2:CRS-DAC2-LT>

Remove whole tag from XML using ElementTree

I am creating a script to sanitize tags and attributes from a XML file I need to load.
Structure example:
<Cli TipCli="1" NmCli="client_name">
<EndEtnCli EmaiCli="client_email"/>
</Cli>
In case the attribute EmailCli is empty, I want to remove the whole tag <EndEtnCli/>.
I've followed some tutorials here and got into this code below, it's not showing any error, but it does not work at all.
What am I missing?
import xml.etree.ElementTree as ET
from datetime import date
tree = ET.parse('src/021/sample.xml')
root = tree.getroot()
for client in root:
# sanitize email from client
el = client.find('EndEtnCli')
if el.attrib.get('EmaiCli') == '': client.remove(el)
currentDate = date.today().strftime('%Y%m%d')
tree.write(f'src/021/test_{ currentDate }', encoding="utf-8")

Below
import xml.etree.ElementTree as ET
xml = '''<r>
<Cli TipCli="1" NmCli="client_name1">
<EndEtnCli EmaiCli=""/>
</Cli>
<Cli TipCli="2" NmCli="client_name2">
<EndEtnCli/>
</Cli>
<Cli TipCli="3" NmCli="client_name3">
<EndEtnCli EmaiCli="client_email"/>
</Cli>
</r>'''
root = ET.fromstring(xml)
cli_lst = root.findall('.//Cli')
for cli in cli_lst:
child = cli.find('./EndEtnCli')
email_cli = child.attrib.get('EmaiCli')
if email_cli is None or len(email_cli) == 0:
cli.remove(child)
ET.dump(root)
output
<r>
<Cli NmCli="client_name1" TipCli="1">
</Cli>
<Cli NmCli="client_name2" TipCli="2">
</Cli>
<Cli NmCli="client_name3" TipCli="3">
<EndEtnCli EmaiCli="client_email" />
</Cli>
</r>

Inserting an existing root into an existing Python ElementTree

I'm trying to link two existing Python ElementTree objects together.
import xml.etree.ElementTree as ET
root = ET.Element('Hello')
root2 = ET.Element('World')
node = ET.SubElement(root2, 'country')
node.text = 'Belgium'
When printed
print(ET.tostring(root))
print(ET.tostring(root2))
I get
b'<Hello />'
b'<World><country>Belgium</country></World>'
How do I add root2 to root, to get the result? `
print(ET.tostring(root))
b'<Hello><World><country>Belgium</country></World></Hello>'

How about
import xml.etree.ElementTree as ET
hello = ET.Element('Hello')
world = ET.Element('World')
hello.insert(0,world)
country = ET.SubElement(world,'Country')
country.text = 'Belgium'
print(ET.tostring(hello))
Output
b'<Hello><World><Country>Belgium</Country></World></Hello>'

It seems, that I can use the same syntax as in lists
root.append(root2)
print(ET.tostring(root))
b'<Hello><World><country>Belgium</country></World></Hello>'

How to insert children of one xml node in another xml node with python

I have follwing xml file:
<root>
<nodeA>
<childrens_A>
</nodeA>
<nodeB>
<childrens_B>
</nodeB>
<nodeA>
<childrens_A>
</nodeA>
<nodeB>
<childrens_B>
</nodeB>
</root>
I want get something like
<root>
<nodeA>
<childrens_A>
<childrens_B>
</nodeA>
<nodeA>
<childrens_A>
<childrens_B>
</nodeA>
</root>
Numbers of nodes A and B equal.
I can import only from standard python library. I cannot import lxml because access restriction. So i want be limited from xml.etree import ElementTree as et
My code is:
from xml.etree import ElementTree as et
tree = et.parse(path_in)
root = tree.getroot()
for child in root.gethcildren()
if child.tag == "nodeA"
#insert children of nodeB in nodeA
tr.write(path_out)
Thanks in advance!

Looks like i find solution:
from xml.etree import ElementTree as et
tr = et.parse(path_in)
root = tr.getroot()
for child in root.getchildren():
if child.tag == 'nodeB':
sub = child.getchildren()
i = root.getchildren().index(child)
root.getchildren()[i - 1].extend(sub)
tr.write(path_out)
Hope once this answer can help to somebody.

Parsing XML with ElementTree in Python

I have XML like this:
<parameter>
<name>ec_num</name>
<value>none</value>
<units/>
<url/>
<id>2455</id>
<m_date>2008-11-29 13:15:14</m_date>
<user_id>24</user_id>
<user_name>registry</user_name>
</parameter>
<parameter>
<name>swisspro</name>
<value>Q8H6N2</value>
<units/>
I want to parse the XML and extract the <value> entry which is just below the <name> entry marked 'swisspro'. I.e. I want to parse and extract the 'Q8H6N2' value.
How would I do this using ElementTree?

It would by much easier to do via lxml, but here' a solution using ElementTree library:
import xml.etree.ElementTree as ET
data = """<parameters>
<parameter>
<name>ec_num</name>
<value>none</value>
<units/>
<url/>
<id>2455</id>
<m_date>2008-11-29 13:15:14</m_date>
<user_id>24</user_id>
<user_name>registry</user_name>
</parameter>
<parameter>
<name>swisspro</name>
<value>Q8H6N2</value>
<units/>
</parameter>
</parameters>"""
tree = ET.fromstring(data)
for parameter in tree.iter(tag='parameter'):
name = parameter.find('name')
if name is not None and name.text == 'swisspro':
print parameter.find('value').text
break
prints:
Q8H6N2
The idea is pretty simple: iterate over all parameter tags, check the value of the name tag and if it is equal to swisspro, get the value element.
Hope that helps.

Here is an example:
xml file
<span style="font-size:13px;"><?xml version="1.0" encoding="utf-8"?>
<root>
<person age="18">
<name>hzj</name>
<sex>man</sex>
</person>
<person age="19" des="hello">
<name>kiki</name>
<sex>female</sex>
</person>
</root></span>
parse method
from xml.etree import ElementTree
def print_node(node):
'''print basic info'''
print "=============================================="
print "node.attrib:%s" % node.attrib
if node.attrib.has_key("age") > 0 :
print "node.attrib['age']:%s" % node.attrib['age']
print "node.tag:%s" % node.tag
print "node.text:%s" % node.text
def read_xml(text):
'''read xml file'''
# root = ElementTree.parse(r"D:/test.xml") #first method
root = ElementTree.fromstring(text) #second method
# get element
# 1 by getiterator
lst_node = root.getiterator("person")
for node in lst_node:
print_node(node)
# 2 by getchildren
lst_node_child = lst_node[0].getchildren()[0]
print_node(lst_node_child)
# 3 by .find
node_find = root.find('person')
print_node(node_find)
#4. by findall
node_findall = root.findall("person/name")[1]
print_node(node_findall)
if __name__ == '__main__':
read_xml(open("test.xml").read())

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

how to extract an #value from XML file in Python? - python

Assuming that root reffers to <current> node from lxml import etree xml_file = 'test.xml' with open(xml_file) as xml: root = etree.XML(xml.read()) temperature_value = root.xpath('./temperature/#value')[0]

from xml.etree import cElementTree as ET tree = ET.parse("test.xml") root = tree.getroot() for temp in root.findall('temperature'): print(temp.get("value"))

Related

"Invalid tag name" error when creating element with lxml in python

Remove whole tag from XML using ElementTree

Inserting an existing root into an existing Python ElementTree

How to insert children of one xml node in another xml node with python

Parsing XML with ElementTree in Python

Categories

Resources