XML parse does not show nodes - python

from xml.etree import ElementTree
t = """<collection xmlns:y="http://tail-f.com/ns/rest">
<appliance xmlns="http://networks.com/vnms/nms">
<uuid>088fbb70-40d1-4aaf-8ea3-590fd8238828</uuid>
<name>SRVDHCPE1</name>
<num-cpus>0</num-cpus>
<memory-size>0</memory-size>
<num-nics>4</num-nics>
</appliance>
<appliance xmlns="http://networks.com/vnms/nms">
<uuid>088fbb70-40d1-4aaf-8ea3-590fd8238828</uuid>
<name>SRVDHCPE2</name>
<num-cpus>0</num-cpus>
<memory-size>0</memory-size>
<num-nics>4</num-nics>
</appliance>
</collection>"""
dom = ElementTree.fromstring(t)
for n in dom.findall("collection/appliance/name"):
print(n.text)
Looking for all the names but it does not show. What am I doing wrong here.

You case definitely related to Parsing XML with Namespaces:
dom = ET.fromstring(t)
ns = {'rest': 'http://tail-f.com/ns/rest','nms': 'http://versa-networks.com/vnms/nms'}
for n in dom.findall("nms:appliance/nms:name", ns):
print(n.text)
The output:
SRVDHCPE1
SRVDHCPE2

You need to namespace your selectors:
from xml.etree import ElementTree
from xml.etree.ElementTree import Element
t = """<collection xmlns:y="http://tail-f.com/ns/rest">
<appliance xmlns="http://versa-networks.com/vnms/nms">
<uuid>088fbb70-40d1-4aaf-8ea3-590fd8238828</uuid>
<name>SRVDHCPE1</name>
<num-cpus>0</num-cpus>
<memory-size>0</memory-size>
<num-nics>4</num-nics>
</appliance>
<appliance xmlns="http://versa-networks.com/vnms/nms">
<uuid>088fbb70-40d1-4aaf-8ea3-590fd8238828</uuid>
<name>SRVDHCPE2</name>
<num-cpus>0</num-cpus>
<memory-size>0</memory-size>
<num-nics>4</num-nics>
</appliance>
</collection>"""
if __name__ == '__main__':
dom: Element = ElementTree.fromstring(t)
namespaces = {'n': 'http://versa-networks.com/vnms/nms'}
for name in dom.findall("./n:appliance/n:name", namespaces=namespaces):
print(name.text)
which prints:
SRVDHCPE1
SRVDHCPE2
For reference:
https://docs.python.org/3.7/library/xml.etree.elementtree.html#parsing-xml-with-namespaces

Related

XML data extraction in python

I have an XML file like the following:
<AreaModel>
...
<RecipePhase>
<UniqueName>PHASE1</UniqueName>
...
<NumberOfParameterTags>7</NumberOfParameterTags>
...
<DefaultRecipeParameter>
<Name>PARAM1</Name>
----
</DefaultRecipeParameter>
<DefaultRecipeParameter>
<Name>PARAM2</Name>
----
</DefaultRecipeParameter>
<DefaultRecipeParameter>
<Name>PARAM3</Name>
----
</DefaultRecipeParameter>
</RecipePhase>
<RecipePhase>
....
</RecipePhase>
</AreaModel>
I would like to read this file in sequential order and generate different list. One for the texts of UniqueName TAGs and a list of lists containing for each list the set of texts for tag Name under each RecipePhase element.
For example, I might have 10 RecipePhase elements, each one with TAG UniqueName and each one containing a different set of children with tag DefaultRecipeParameter.
How can I take into account when I enter into RecipePhase and when I go out of the element during parsing?
I am trying ElementTree but I am not able to find a solution.
cheers,
m
You can use xml python module:
See my example:
from xml.dom import minidom as dom
import urllib2
def fetchPage(url):
a = urllib2.urlopen(url)
return ''.join(a.readlines())
def extract(page):
a = dom.parseString(page)
item = a.getElementsByTagName('Rate')
for i in item:
if i.hasChildNodes() == True:
print i.getAttribute('currency')+"-"+ i.firstChild.nodeValue
if __name__=='__main__':
page = fetchPage("http://www.bnro.ro/nbrfxrates.xml")
extract(page)
I solved partially my problem with the following code:
import xml.etree.ElementTree as ET
tree = ET.parse('control_strategies.axml')
root = tree.getroot()
phases=[]
for recipephase in root.findall('./RecipePhase/UniqueName'):
phases.append(recipephase.text)
n_elem = len(phases)
param=[[] for _ in range(n_elem)]
i = 0
for recipephase in root.findall('./RecipePhase'):
for defparam in recipephase.findall('./DefaultRecipeParameter'):
for paramname in defparam.findall('./Name'):
param[i].append(paramname.text)
i = i + 1

Unable to find xml element

I'm trying to parse and read an xml with xml.etree.ElementTree (I can't move to lxml) but I've been unable to.
XML: https://pastebin.com/yJqAW0L0
<GetResponse xmlns="http://mywebsite.com/myservice/">
<AutoScalingGroup>
<AutoScalingGroupName>foo</AutoScalingGroupName>
<AttributeValuePair>
<Attribute>owner</Attribute>
<Value>bob</Value>
</AttributeValuePair>
</AutoScalingGroup>
</GetResponse>
I've tried doing
import xml.etree.ElementTree as ET
ET.register_namespace('', "http://mywebsite.com/myservice/")
NSMAP = {'service':'http://mywebsite.com/myservice/'}
tree = ET.fromstring(page) # This is where i grab the xml from
autoscalingGroups = tree.findall('.//service:AutoScalingGroup', namespaces = NSMAP)
for asg in autoscalingGroups:
name = asg.findtext('.//service:AutoScalingGroupName', namespaces = NSMAP, default = "Default asg name")
print "asg name: " + str(name)
This doesn't return anything and I'm struggling to find why.
1) how do i get 'foo'?
2) how do i get 'bob'?
Am i using the wrong XML xpath?

How to insert children of one xml node in another xml node with python

I have follwing xml file:
<root>
<nodeA>
<childrens_A>
</nodeA>
<nodeB>
<childrens_B>
</nodeB>
<nodeA>
<childrens_A>
</nodeA>
<nodeB>
<childrens_B>
</nodeB>
</root>
I want get something like
<root>
<nodeA>
<childrens_A>
<childrens_B>
</nodeA>
<nodeA>
<childrens_A>
<childrens_B>
</nodeA>
</root>
Numbers of nodes A and B equal.
I can import only from standard python library. I cannot import lxml because access restriction. So i want be limited from xml.etree import ElementTree as et
My code is:
from xml.etree import ElementTree as et
tree = et.parse(path_in)
root = tree.getroot()
for child in root.gethcildren()
if child.tag == "nodeA"
#insert children of nodeB in nodeA
tr.write(path_out)
Thanks in advance!
Looks like i find solution:
from xml.etree import ElementTree as et
tr = et.parse(path_in)
root = tr.getroot()
for child in root.getchildren():
if child.tag == 'nodeB':
sub = child.getchildren()
i = root.getchildren().index(child)
root.getchildren()[i - 1].extend(sub)
tr.write(path_out)
Hope once this answer can help to somebody.

Reg adding data to an existing XML in Python

I have to parse an xml file & modify the data in a particular tag using Python. I'm using Element Tree to do this. I'm able to parse & reach the required tag. But I'm not able to modify the value. I'm not sure if Element Tree is okay or if I should use TreeBuilder for this.
As you can see below I just want to replace the Not Executed under Verdict with a string value.
-<Procedure>
<PreCondition>PRECONDITION: - ECU in extended diagnostic session (zz = 0x03) </PreCondition>
<PostCondition/>
<ProcedureID>428495</ProcedureID>
<SequenceNumber>2</SequenceNumber>
<CID>-1</CID>
<**Verdict** Writable="true">NotExecuted</Verdict>
</Procedure>
import xml.etree.ElementTree as etree
X_tree = etree.parse('DIAGNOSTIC SERVER.xml')
X_root = X_tree.getroot()
ATC_Name = X_root.iterfind('TestOrder//TestOrder//TestSuite//')
try:
while(1):
temp = ATC_Name.next()
if temp.tag == 'ProcedureID' and temp.text == str(TestCase_Id[j].text).split('-')[1]:
ATC_Name.next()
ATC_Name.next()
ATC_Name.next().text = 'Pass' <--This is what I want to do
ATC_Name.close()
break
except:
print sys.exc_info()
I believe my approach is wrong. Kindly guide me with right pointers.
Thanks.
You'd better switch to lxml so that you can use the "unlimited" power of xpath.
The idea is to use the following xpath expression:
//Procedure[ProcedureID/text()="%d"]/Verdict
where %d placeholder is substituted with the appropriate procedure id via string formatting operation.
The xpath expression finds the appropriate Verdict tag which you can set text on:
from lxml import etree
data = """<Procedure>
<PreCondition>PRECONDITION: - ECU in extended diagnostic session (zz = 0x03) </PreCondition>
<PostCondition/>
<ProcedureID>428495</ProcedureID>
<SequenceNumber>2</SequenceNumber>
<CID>-1</CID>
<Verdict Writable="true">NotExecuted</Verdict>
</Procedure>"""
ID = 428495
tree = etree.fromstring(data)
verdict = tree.xpath('//Procedure[ProcedureID/text()="%d"]/Verdict' % ID)[0]
verdict.text = 'test'
print etree.tostring(tree)
prints:
<Procedure>
<PreCondition>PRECONDITION: - ECU in extended diagnostic session (zz = 0x03) </PreCondition>
<PostCondition/>
<ProcedureID>428495</ProcedureID>
<SequenceNumber>2</SequenceNumber>
<CID>-1</CID>
<Verdict Writable="true">test</Verdict>
</Procedure>
Here is a solution using ElementTree. See Modifying an XML File
import xml.etree.ElementTree as et
tree = et.parse('prison.xml')
root = tree.getroot()
print root.find('Verdict').text #before update
root.find('Verdict').text = 'Executed'
tree.write('prison.xml')
try this
import xml.etree.ElementTree as et
root=et.parse(xmldata).getroot()
s=root.find('Verdict')
s.text='Your string'

Accessing XMLNS attribute with Python Elementree?

How can one access NS attributes through using ElementTree?
With the following:
<data xmlns="http://www.foo.net/a" xmlns:a="http://www.foo.net/a" book="1" category="ABS" date="2009-12-22">
When I try to root.get('xmlns') I get back None, Category and Date are fine, Any help appreciated..
I think element.tag is what you're looking for. Note that your example is missing a trailing slash, so it's unbalanced and won't parse. I've added one in my example.
>>> from xml.etree import ElementTree as ET
>>> data = '''<data xmlns="http://www.foo.net/a"
... xmlns:a="http://www.foo.net/a"
... book="1" category="ABS" date="2009-12-22"/>'''
>>> element = ET.fromstring(data)
>>> element
<Element {http://www.foo.net/a}data at 1013b74d0>
>>> element.tag
'{http://www.foo.net/a}data'
>>> element.attrib
{'category': 'ABS', 'date': '2009-12-22', 'book': '1'}
If you just want to know the xmlns URI, you can split it out with a function like:
def tag_uri_and_name(elem):
if elem.tag[0] == "{":
uri, ignore, tag = elem.tag[1:].partition("}")
else:
uri = None
tag = elem.tag
return uri, tag
For much more on namespaces and qualified names in ElementTree, see effbot's examples.
Look at the effbot namespaces documentation/examples; specifically the parse_map function. It shows you how to add an *ns_map* attribute to each element which contains the prefix/URI mapping that applies to that specific element.
However, that adds the ns_map attribute to all the elements. For my needs, I found I wanted a global map of all the namespaces used to make element look up easier and not hardcoded.
Here's what I came up with:
import elementtree.ElementTree as ET
def parse_and_get_ns(file):
events = "start", "start-ns"
root = None
ns = {}
for event, elem in ET.iterparse(file, events):
if event == "start-ns":
if elem[0] in ns and ns[elem[0]] != elem[1]:
# NOTE: It is perfectly valid to have the same prefix refer
# to different URI namespaces in different parts of the
# document. This exception serves as a reminder that this
# solution is not robust. Use at your own peril.
raise KeyError("Duplicate prefix with different URI found.")
ns[elem[0]] = "{%s}" % elem[1]
elif event == "start":
if root is None:
root = elem
return ET.ElementTree(root), ns
With this you can parse an xml file and obtain a dict with the namespace mappings. So, if you have an xml file like the following ("my.xml"):
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"\
>
<feed>
<item>
<title>Foo</title>
<dc:creator>Joe McGroin</dc:creator>
<description>etc...</description>
</item>
</feed>
</rss>
You will be able to use the xml namepaces and get info for elements like dc:creator:
>>> tree, ns = parse_and_get_ns("my.xml")
>>> ns
{u'content': '{http://purl.org/rss/1.0/modules/content/}',
u'dc': '{http://purl.org/dc/elements/1.1/}'}
>>> item = tree.find("/feed/item")
>>> item.findtext(ns['dc']+"creator")
'Joe McGroin'
Try this:
import xml.etree.ElementTree as ET
import re
import sys
with open(sys.argv[1]) as f:
root = ET.fromstring(f.read())
xmlns = ''
m = re.search('{.*}', root.tag)
if m:
xmlns = m.group(0)
print(root.find(xmlns + 'the_tag_you_want').text)

Categories

Resources