Print lxml.objectify.ObjectifiedElement?

Print lxml.objectify.ObjectifiedElement? - python

Printing a lxml.objectify.ObjectifiedElement just prints a blank line, so I have to access it via it's tags and when I don't know the tags of the response, I'm just guessing.
How do I print the entire object, showing children names and values?
As requested, here is the code I have. Not sure what purpose this holds, but:
from amazonproduct import API
api = API('xxxxx', 'xxxxx', 'us', 'xxxx')
result = api.item_lookup('B00H8U93JO', ResponseGroup='OfferSummary')
print result

Using lxml.etree.tostring() seems to work, although not prettified :
>>> from lxml import etree
>>> from lxml import objectify
>>> raw = '''<root>
... <foo>foo</foo>
... <bar>bar</bar>
... </root>'''
...
>>> root = objectify.fromstring(raw)
>>> print type(root)
<type 'lxml.objectify.ObjectifiedElement'>
>>> print etree.tostring(root)
<root><foo>foo</foo><bar>bar</bar></root>

In response to har07, You can use minidom to prettify
from lxml import objectify, etree
from xml.dom import minidom
def pretty_print( elem ):
xml = etree.tostring( elem )
pretty = minidom.parseString( xml ).toprettyxml( indent=' ' )
print( pretty )

Related

XML parse does not show nodes

from xml.etree import ElementTree
t = """<collection xmlns:y="http://tail-f.com/ns/rest">
<appliance xmlns="http://networks.com/vnms/nms">
<uuid>088fbb70-40d1-4aaf-8ea3-590fd8238828</uuid>
<name>SRVDHCPE1</name>
<num-cpus>0</num-cpus>
<memory-size>0</memory-size>
<num-nics>4</num-nics>
</appliance>
<appliance xmlns="http://networks.com/vnms/nms">
<uuid>088fbb70-40d1-4aaf-8ea3-590fd8238828</uuid>
<name>SRVDHCPE2</name>
<num-cpus>0</num-cpus>
<memory-size>0</memory-size>
<num-nics>4</num-nics>
</appliance>
</collection>"""
dom = ElementTree.fromstring(t)
for n in dom.findall("collection/appliance/name"):
print(n.text)
Looking for all the names but it does not show. What am I doing wrong here.

You case definitely related to Parsing XML with Namespaces:
dom = ET.fromstring(t)
ns = {'rest': 'http://tail-f.com/ns/rest','nms': 'http://versa-networks.com/vnms/nms'}
for n in dom.findall("nms:appliance/nms:name", ns):
print(n.text)
The output:
SRVDHCPE1
SRVDHCPE2

You need to namespace your selectors:
from xml.etree import ElementTree
from xml.etree.ElementTree import Element
t = """<collection xmlns:y="http://tail-f.com/ns/rest">
<appliance xmlns="http://versa-networks.com/vnms/nms">
<uuid>088fbb70-40d1-4aaf-8ea3-590fd8238828</uuid>
<name>SRVDHCPE1</name>
<num-cpus>0</num-cpus>
<memory-size>0</memory-size>
<num-nics>4</num-nics>
</appliance>
<appliance xmlns="http://versa-networks.com/vnms/nms">
<uuid>088fbb70-40d1-4aaf-8ea3-590fd8238828</uuid>
<name>SRVDHCPE2</name>
<num-cpus>0</num-cpus>
<memory-size>0</memory-size>
<num-nics>4</num-nics>
</appliance>
</collection>"""
if __name__ == '__main__':
dom: Element = ElementTree.fromstring(t)
namespaces = {'n': 'http://versa-networks.com/vnms/nms'}
for name in dom.findall("./n:appliance/n:name", namespaces=namespaces):
print(name.text)
which prints:
SRVDHCPE1
SRVDHCPE2
For reference:
https://docs.python.org/3.7/library/xml.etree.elementtree.html#parsing-xml-with-namespaces

Test the existence of an Element with lxml.objectify

What is the standard way for testing if an element exists or not with lxml.objectify ?
Sample XML :
<?xml version="1.0" encoding="utf-8"?>
<Test>
<MyElement1>sdfsdfdsfd</MyElement1>
</Test>
Code
from lxml import etree, objectify
with open('config.xml') as f:
xml = f.read()
root = objectify.fromstring(xml)
print root.MyElement1
print root.MyElement17 # AttributeError: no such child: MyElement17
Then, what is the simplest solution to write something on a specific path ?
root.MyElement1.Blah = 'New' # this works because MyElement1 already exists
root.MyElement17.Blah = 'New' # this doesn't work because MyElement17 doesn't exist
root.MyElement1.Foo.Bar = 'Hello' # this doesn't as well... How to do this shortly ?

find method will return None if the element does not exist.
>>> xml = '''<?xml version="1.0" encoding="utf-8"?>
... <Test>
... <MyElement1>sdfsdfdsfd</MyElement1>
... </Test>'''
>>>
>>> from lxml import objectify
>>> root = objectify.fromstring(xml)
>>> root.find('.//MyElement1')
'sdfsdfdsfd'
>>> root.find('.//MyElement17')
>>> root.find('.//MyElement17') is None
True
UPDATE according to the question edit:
>>> from lxml import objectify
>>>
>>> def add_string(parent, attr, s):
... if len(attr) == 1:
... setattr(parent, attr[0], s)
... else:
... child = getattr(parent, attr[0], None)
... if child is None:
... child = objectify.SubElement(parent, attr[0])
... add_string(child, attr[1:], s)
...
>>> root = objectify.fromstring(xml)
>>> add_string(root, ['MyElement1', 'Blah'], 'New')
>>> add_string(root, ['MyElement17', 'Blah'], 'New')
>>> add_string(root, ['MyElement1', 'Foo', 'Bar'], 'Hello')
>>>
>>> root.MyElement1.Blah
'New'
>>> root.MyElement17.Blah
'New'
>>> root.MyElement1.Foo.Bar
'Hello'

You can use getattr:
if getattr(root, 'MyElement17', None):
# do something

minidom.parse reading short XML?

Here are 2 similar XML files :
Long XML
<mynode>
<text>Blah</text>
<position>322,13</position>
</mynode>
Short XML
<mynode text="Blah" position="322,13" />
It seems that Python's minidom.parse doesn't like the short XML.
Is this short XML style available with minidom (XML) ?
Is it possible to write a unique code that will read both short and long XML ?

from xml.dom import minidom
def getChild(n,v):
for child in n.childNodes:
if child.localName==v:
yield child
def getValue(n, val):
res = None
for n in mynode:
rv = getChild(n,val)
for v in rv:
var = v.childNodes[0].nodeValue
res = var
if not res:
for n in mynode:
attr = n.getAttributeNode(val)
if attr:
res = attr.nodeValue.strip()
return res
xmldoc = minidom.parse('file.xml')
mynode = xmldoc.getElementsByTagName('mynode')
print getValue(mynode,'text')
print getValue(mynode,'position')
output:
Blah
322,13

You need a root node
>>> from xml.dom.minidom import parseString
>>> doc = parseString('<root><mynode text="Blah" position="322,13" /></root>')
>>> print d.firstChild.firstChild.getAttribute('text')
Blah
>>> print d.firstChild.firstChild.getAttribute('position')
322,13

How to detect starting tag of xml and then parse and objectify

I'm using lxml to parse and objectify xml files in a path, I have a lot of model and xsd's, each object model maps to certain defined classes, for example if xml starts with model tag so it is a dataModel and if it starts with page tag it is a viewModel.
My question is how to detect in efficient way that xml file starts with which tag and then parse it with an appropriate xsd file and then objectify it
files = glob(os.path.join('resources/xml', '*.xml'))
for f in files:
xmlinput = open(f)
xmlContent = xmlinput.read()
if xsdPath:
xsdFile = open(xsdPath)
# xsdFile should retrieve according to xml content
schema = etree.XMLSchema(file=xsdFile)
xmlinput.seek(0)
myxml = etree.parse(xmlinput)
try:
schema.assertValid(myxml)
except etree.DocumentInvalid as x:
print "In file %s error %s has occurred." % (xmlPath, x.message)
finally:
xsdFile.close()
xmlinput.close()

I leave aside voluntarily file reading and treatments, to concentrate on your problem:
>>> from lxml.etree import fromstring
>>> # We have XMLs with different root tag
>>> tree1 = fromstring("<model><foo/><bar/></model>")
>>> tree2 = fromstring("<page><baz/><blah/></page>")
>>>
>>> # We have different treatments
>>> def modelTreatement(etree):
... return etree.xpath('//bar')
...
>>> def pageTreatment(etree):
... return etree.xpath('//blah')
...
>>> # Here is a recipe to read the root tag
>>> tree1.getroottree().getroot().tag
'model'
>>> tree2.getroottree().getroot().tag
'page'
>>>
>>> # So, by building an appropriated dict :
>>> tag_to_treatment_map = {'model': modelTreatement, 'page': pageTreatment}
>>> # You can run the right method on the right tree
>>> for tree in [tree1, tree2]:
... tag_to_treatment_map[tree.getroottree().getroot().tag](tree)
...
[<Element bar at 0x24979b0>]
[<Element blah at 0x2497a00>]
Hope this will be useful to someone, even if I had not seen this earlier.

lxml iterparse in python can't handle namespaces

from lxml import etree
import StringIO
data= StringIO.StringIO('<root xmlns="http://some.random.schema"><a>One</a><a>Two</a><a>Three</a></root>')
docs = etree.iterparse(data,tag='a')
a,b = docs.next()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "iterparse.pxi", line 478, in lxml.etree.iterparse.__next__ (src/lxml/lxml.etree.c:95348)
File "iterparse.pxi", line 534, in lxml.etree.iterparse._read_more_events (src/lxml/lxml.etree.c:95938)
StopIteration
Works fine untill I add the namespace to the root node. Any ideas as to what I can do as a work around, or the correct way of doing this?
I need to be event driven due to very large files.

When there is a namespace attached, the tag isn't a, it's {http://some.random.schema}a. Try this (Python 3):
from lxml import etree
from io import BytesIO
xml = '''\
<root xmlns="http://some.random.schema">
<a>One</a>
<a>Two</a>
<a>Three</a>
</root>'''
data = BytesIO(xml.encode())
docs = etree.iterparse(data, tag='{http://some.random.schema}a')
for event, elem in docs:
print(f'{event}: {elem}')
or, in Python 2:
from lxml import etree
from StringIO import StringIO
xml = '''\
<root xmlns="http://some.random.schema">
<a>One</a>
<a>Two</a>
<a>Three</a>
</root>'''
data = StringIO(xml)
docs = etree.iterparse(data, tag='{http://some.random.schema}a')
for event, elem in docs:
print event, elem
This prints something like:
end: <Element {http://some.random.schema}a at 0x10941e730>
end: <Element {http://some.random.schema}a at 0x10941e8c0>
end: <Element {http://some.random.schema}a at 0x10941e960>
As #mihail-shcheglov pointed out, a wildcard * can also be used, which works for any or no namespace:
from lxml import etree
from io import BytesIO
xml = '''\
<root xmlns="http://some.random.schema">
<a>One</a>
<a>Two</a>
<a>Three</a>
</root>'''
data = BytesIO(xml.encode())
docs = etree.iterparse(data, tag='{*}a')
for event, elem in docs:
print(f'{event}: {elem}')
See lxml.etree docs for more.

Why not with a regular expression ?
1)
Using lxml is slower than using a regex.
from time import clock
import StringIO
from lxml import etree
times1 = []
for i in xrange(1000):
data= StringIO.StringIO('<root ><a>One</a><a>Two</a><a>Three\nlittle pigs</a><b>Four</b><a>another</a></root>')
te = clock()
docs = etree.iterparse(data,tag='a')
tf = clock()
times1.append(tf-te)
print min(times1)
print [etree.tostring(y) for x,y in docs]
import re
regx = re.compile('<a>[\s\S]*?</a>')
times2 = []
for i in xrange(1000):
data= StringIO.StringIO('<root ><a>One</a><a>Two</a><a>Three\nlittle pigs</a><b>Four</b><a>another</a></root>')
te = clock()
li = regx.findall(data.read())
tf = clock()
times2.append(tf-te)
print min(times2)
print li
result
0.000150298431784
['<a>One</a>', '<a>Two</a>', '<a>Three\nlittle pigs</a>', '<a>another</a>']
2.40253998762e-05
['<a>One</a>', '<a>Two</a>', '<a>Three\nlittle pigs</a>', '<a>another</a>']
0.000150298431784 / 2.40253998762e-05 is 6.25
lxml is 6.25 times slower than a regex
.
2)
No problem if namespace:
import StringIO
import re
regx = re.compile('<a>[\s\S]*?</a>')
data= StringIO.StringIO('<root xmlns="http://some.random.schema"><a>One</a><a>Two</a><a>Three\nlittle pigs</a><b>Four</b><a>another</a></root>')
print regx.findall(data.read())
result
['<a>One</a>', '<a>Two</a>', '<a>Three\nlittle pigs</a>', '<a>another</a>']

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Print lxml.objectify.ObjectifiedElement? - python

In response to har07, You can use minidom to prettify from lxml import objectify, etree from xml.dom import minidom def pretty_print( elem ): xml = etree.tostring( elem ) pretty = minidom.parseString( xml ).toprettyxml( indent=' ' ) print( pretty )

Related

XML parse does not show nodes

Test the existence of an Element with lxml.objectify

minidom.parse reading short XML?

How to detect starting tag of xml and then parse and objectify

lxml iterparse in python can't handle namespaces

Categories

Resources