how to extract an #value from XML file in Python? - python

I have the following structure in an XML file:
<current>
<city id="2510170" name="Triana">
<coord lon="-6.02" lat="37.38"/>
<country>ES</country>
<sun rise="2016-04-04T06:04:05" set="2016-04-04T18:50:07"/>
</city>
<temperature value="290.92" min="288.15" max="296.15" unit="kelvin"/>
<humidity value="93" unit="%"/>
<pressure value="1009" unit="hPa"/>
<wind>
<speed value="8.2" name="Fresh Breeze"/>
<gusts/>
<direction value="230" code="SW" name="Southwest"/>
</wind>
<clouds value="90" name="overcast clouds"/>
<visibility/>
<precipitation mode="no"/>
<weather number="501" value="moderate rain" icon="10d"/>
<lastupdate value="2016-04-04T10:05:00"/>
</current>
The question is how to extract the temperature ( #value ) using XPATH of Python ?. That is, extract from "290.2" of the following line:
<temperature value="290.92" min="288.15" max="296.15" unit="kelvin"/>

Assuming that root reffers to <current> node
from lxml import etree
xml_file = 'test.xml'
with open(xml_file) as xml:
root = etree.XML(xml.read())
temperature_value = root.xpath('./temperature/#value')[0]

I would simply do
import xml.etree.ElementTree as ET
root = ET.parse('path_to_your_xml_file')
temperature = root.find('.//temperature')
Now temperature.attrib is a dictionary with all of the info
print temperature.attrib['value'] # 290.92
print temperature.attrib['min'] # 288.15
print temperature.attrib['max'] # 296.15
print temperature.attrib['unit'] # kelvin

from xml.etree import cElementTree as ET
tree = ET.parse("test.xml")
root = tree.getroot()
for temp in root.findall('temperature'):
print(temp.get("value"))

Related

"Invalid tag name" error when creating element with lxml in python

I am using lxml to make an xml file and my sample program is :
from lxml import etree
MESSAGETYPEINDIC = 'CRS701'
REPPERIOD = datetime.now().strftime("%Y-%m-%d")
root = etree.Element("crsdac2:CRS-DAC2-LT", attrib={'xmlns:crsdac2': 'urn:sti:ties:crsdac2:v1', 'xmlns:crs': 'urn:sti:ties:sask:v1','xmlns:xsi':'http://www.w3.org/2001/XMLSchema-instance', 'version':'3.141590118408203125', 'xsi:schemaLocation': 'urn:sti:ties:crsdac2:v1 file:///G:/Tax/Tax%20Technology/CRS%20(DAC2)/XML%20Specifikacija%20(versija%20nuo%202020-12)/CRS-DAC2-LT_v0.4.xsd' })
crsDAC2_messageSpec = etree.SubElement(root, "crsdac2:MessageSpec")
crsDAC2_messageSpec_messagetypeindic = etree.SubElement(crsDAC2_messageSpec, "crs:MessageTypeIndic").text = MESSAGETYPEINDIC
crsDAC2_messageSpec_repperiod = etree.SubElement(crsDAC2_messageSpec, "crs:ReportingPeriod").text = REPPERIOD
crsDAC2_messageBody = etree.SubElement(root, "crsdac2:MessageBody")
tree = etree.ElementTree(root)
print(tree)
tree_string = etree.tostring(tree, pretty_print=True, xml_declaration=True, encoding='UTF-8', standalone="yes")
print(tree_string)
I am getting the below error when I tried running the code above. Can you please help me with resolving this.
ValueError: Invalid tag name 'crsdac2:CRS-DAC2-LT'
I need the output as per below:
<?xml version="1.0" encoding="UTF-8"?>
<crsdac2:CRS-DAC2-LT xmlns:crsdac2="urn:sti:ties:crsdac2:v1" xmlns:crs="urn:sti:ties:crstypessti:v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="3.141590118408203125" xsi:schemaLocation="urn:sti:ties:crsdac2:v1 file:///G:/Tax/Tax%20Technology/CRS%20(DAC2)/XML%20Specifikacija%20(versija%20nuo%202020-12)/CRS-DAC2-LT_v0.4.xsd">
<crsdac2:MessageSpec>
<crs:MessageTypeIndic>CRS701</crs:MessageTypeIndic>
<crs:ReportingPeriod>2021-12-31</crs:ReportingPeriod>
</crsdac2:MessageSpec>
<crsdac2:MessageBody>
</crsdac2:MessageBody>
</crsdac2:CRS-DAC2-LT>
When creating an element or attribute bound to a namespace, you need to use the namespace URI (not the prefix). I suggest using the QName helper class to do this.
from lxml.etree import Element, SubElement, QName, tostring
from datetime import datetime
ns1 = "urn:sti:ties:crsdac2:v1"
ns2 = "urn:sti:ties:crstypessti:v1"
ns3 = 'http://www.w3.org/2001/XMLSchema-instance'
xsd = "file:///G:/Tax/Tax%20Technology/CRS%20(DAC2)/XML%20Specifikacija%20(versija%20nuo%202020-12)/CRS-DAC2-LT_v0.4.xsd"
MESSAGETYPEINDIC = 'CRS701'
REPPERIOD = datetime.now().strftime("%Y-%m-%d")
root = Element(QName(ns1, "CRS-DAC2-LT"), nsmap={"crsdac2": ns1, "crs": ns2})
root.set(QName(ns3, "schemaLocation"), xsd)
root.set("version", "3.141590118408203125")
messageSpec = SubElement(root, QName(ns1, "MessageSpec"))
messageTypeIndic = SubElement(messageSpec, QName(ns2, "MessageTypeIndic"))
messageTypeIndic.text = MESSAGETYPEINDIC
messageSpec_repperiod = SubElement(messageSpec, QName(ns2, "ReportingPeriod"))
messageSpec_repperiod.text = REPPERIOD
messageBody = SubElement(root, QName(ns1, "MessageBody"))
tree_string = tostring(root, pretty_print=True, xml_declaration=True,
encoding='UTF-8', standalone="yes")
print(tree_string.decode())
Output:
<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
<crsdac2:CRS-DAC2-LT xmlns:crs="urn:sti:ties:crstypessti:v1" xmlns:crsdac2="urn:sti:ties:crsdac2:v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="file:///G:/Tax/Tax%20Technology/CRS%20(DAC2)/XML%20Specifikacija%20(versija%20nuo%202020-12)/CRS-DAC2-LT_v0.4.xsd" version="3.141590118408203125">
<crsdac2:MessageSpec>
<crs:MessageTypeIndic>CRS701</crs:MessageTypeIndic>
<crs:ReportingPeriod>2022-12-20</crs:ReportingPeriod>
</crsdac2:MessageSpec>
<crsdac2:MessageBody/>
</crsdac2:CRS-DAC2-LT>

Remove whole tag from XML using ElementTree

I am creating a script to sanitize tags and attributes from a XML file I need to load.
Structure example:
<Cli TipCli="1" NmCli="client_name">
<EndEtnCli EmaiCli="client_email"/>
</Cli>
In case the attribute EmailCli is empty, I want to remove the whole tag <EndEtnCli/>.
I've followed some tutorials here and got into this code below, it's not showing any error, but it does not work at all.
What am I missing?
import xml.etree.ElementTree as ET
from datetime import date
tree = ET.parse('src/021/sample.xml')
root = tree.getroot()
for client in root:
# sanitize email from client
el = client.find('EndEtnCli')
if el.attrib.get('EmaiCli') == '': client.remove(el)
currentDate = date.today().strftime('%Y%m%d')
tree.write(f'src/021/test_{ currentDate }', encoding="utf-8")
Below
import xml.etree.ElementTree as ET
xml = '''<r>
<Cli TipCli="1" NmCli="client_name1">
<EndEtnCli EmaiCli=""/>
</Cli>
<Cli TipCli="2" NmCli="client_name2">
<EndEtnCli/>
</Cli>
<Cli TipCli="3" NmCli="client_name3">
<EndEtnCli EmaiCli="client_email"/>
</Cli>
</r>'''
root = ET.fromstring(xml)
cli_lst = root.findall('.//Cli')
for cli in cli_lst:
child = cli.find('./EndEtnCli')
email_cli = child.attrib.get('EmaiCli')
if email_cli is None or len(email_cli) == 0:
cli.remove(child)
ET.dump(root)
output
<r>
<Cli NmCli="client_name1" TipCli="1">
</Cli>
<Cli NmCli="client_name2" TipCli="2">
</Cli>
<Cli NmCli="client_name3" TipCli="3">
<EndEtnCli EmaiCli="client_email" />
</Cli>
</r>

Inserting an existing root into an existing Python ElementTree

I'm trying to link two existing Python ElementTree objects together.
import xml.etree.ElementTree as ET
root = ET.Element('Hello')
root2 = ET.Element('World')
node = ET.SubElement(root2, 'country')
node.text = 'Belgium'
When printed
print(ET.tostring(root))
print(ET.tostring(root2))
I get
b'<Hello />'
b'<World><country>Belgium</country></World>'
How do I add root2 to root, to get the result? `
print(ET.tostring(root))
b'<Hello><World><country>Belgium</country></World></Hello>'
How about
import xml.etree.ElementTree as ET
hello = ET.Element('Hello')
world = ET.Element('World')
hello.insert(0,world)
country = ET.SubElement(world,'Country')
country.text = 'Belgium'
print(ET.tostring(hello))
Output
b'<Hello><World><Country>Belgium</Country></World></Hello>'
It seems, that I can use the same syntax as in lists
root.append(root2)
print(ET.tostring(root))
b'<Hello><World><country>Belgium</country></World></Hello>'

How to insert children of one xml node in another xml node with python

I have follwing xml file:
<root>
<nodeA>
<childrens_A>
</nodeA>
<nodeB>
<childrens_B>
</nodeB>
<nodeA>
<childrens_A>
</nodeA>
<nodeB>
<childrens_B>
</nodeB>
</root>
I want get something like
<root>
<nodeA>
<childrens_A>
<childrens_B>
</nodeA>
<nodeA>
<childrens_A>
<childrens_B>
</nodeA>
</root>
Numbers of nodes A and B equal.
I can import only from standard python library. I cannot import lxml because access restriction. So i want be limited from xml.etree import ElementTree as et
My code is:
from xml.etree import ElementTree as et
tree = et.parse(path_in)
root = tree.getroot()
for child in root.gethcildren()
if child.tag == "nodeA"
#insert children of nodeB in nodeA
tr.write(path_out)
Thanks in advance!
Looks like i find solution:
from xml.etree import ElementTree as et
tr = et.parse(path_in)
root = tr.getroot()
for child in root.getchildren():
if child.tag == 'nodeB':
sub = child.getchildren()
i = root.getchildren().index(child)
root.getchildren()[i - 1].extend(sub)
tr.write(path_out)
Hope once this answer can help to somebody.

Parsing XML with ElementTree in Python

I have XML like this:
<parameter>
<name>ec_num</name>
<value>none</value>
<units/>
<url/>
<id>2455</id>
<m_date>2008-11-29 13:15:14</m_date>
<user_id>24</user_id>
<user_name>registry</user_name>
</parameter>
<parameter>
<name>swisspro</name>
<value>Q8H6N2</value>
<units/>
I want to parse the XML and extract the <value> entry which is just below the <name> entry marked 'swisspro'. I.e. I want to parse and extract the 'Q8H6N2' value.
How would I do this using ElementTree?
It would by much easier to do via lxml, but here' a solution using ElementTree library:
import xml.etree.ElementTree as ET
data = """<parameters>
<parameter>
<name>ec_num</name>
<value>none</value>
<units/>
<url/>
<id>2455</id>
<m_date>2008-11-29 13:15:14</m_date>
<user_id>24</user_id>
<user_name>registry</user_name>
</parameter>
<parameter>
<name>swisspro</name>
<value>Q8H6N2</value>
<units/>
</parameter>
</parameters>"""
tree = ET.fromstring(data)
for parameter in tree.iter(tag='parameter'):
name = parameter.find('name')
if name is not None and name.text == 'swisspro':
print parameter.find('value').text
break
prints:
Q8H6N2
The idea is pretty simple: iterate over all parameter tags, check the value of the name tag and if it is equal to swisspro, get the value element.
Hope that helps.
Here is an example:
xml file
<span style="font-size:13px;"><?xml version="1.0" encoding="utf-8"?>
<root>
<person age="18">
<name>hzj</name>
<sex>man</sex>
</person>
<person age="19" des="hello">
<name>kiki</name>
<sex>female</sex>
</person>
</root></span>
parse method
from xml.etree import ElementTree
def print_node(node):
'''print basic info'''
print "=============================================="
print "node.attrib:%s" % node.attrib
if node.attrib.has_key("age") > 0 :
print "node.attrib['age']:%s" % node.attrib['age']
print "node.tag:%s" % node.tag
print "node.text:%s" % node.text
def read_xml(text):
'''read xml file'''
# root = ElementTree.parse(r"D:/test.xml") #first method
root = ElementTree.fromstring(text) #second method
# get element
# 1 by getiterator
lst_node = root.getiterator("person")
for node in lst_node:
print_node(node)
# 2 by getchildren
lst_node_child = lst_node[0].getchildren()[0]
print_node(lst_node_child)
# 3 by .find
node_find = root.find('person')
print_node(node_find)
#4. by findall
node_findall = root.findall("person/name")[1]
print_node(node_findall)
if __name__ == '__main__':
read_xml(open("test.xml").read())

Categories

Resources