How to add xml nodes in python using ElementTree - python

i have xml file like
<data>
<person>
<Name>xyz</Name>
<add>abc</add>
</person>
</data>
i want to add another person node like
<data>
<person>
<Name>xyz</Name>
<add>abc</add>
</person>
<person>
<Name>def</Name>
</person>
</data>
my current python code is
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element
from xml.etree.ElementTree import ElementTree
root = ET.parse("Lexicon.xml").getroot()
creRoot = Element("person")
creDictionary = Element("Name")
creDictionary.text = "def"
creRoot.append(creDictionary)
print(ET.tostring(creRoot))
creTree= ElementTree(creRoot)
creTree.write("Lexicon.xml")
when i run this code it will create xml file rather then add and the result is
<person>
<Name>def</Name>
</person>
and it will remove all previous data..
Kindly anyone who can solve it.. Thanks in advance

SubElement shall be used to add nodes to existing node:
import xml.etree.ElementTree as etree
data = etree.XML(input)
person = etree.SubElement(data, 'person')
name = etree.SubElement(person, 'Name')
name.text = 'def'
print(etree.tostring(data))

We need to append new create element to respective parent element.
Demo:
>>> import xml.etree.ElementTree as ET
>>> input_data = """<data>
... <person>
... <Name>xyz</Name>
... <add>abc</add>
... </person>
... </data>"""
#- Create new Element.
>>> person_tag = ET.Element("person")
>>> name_tag = ET.Element("Name")
#- Add text to Element.
>>> name_tag.text = "def"
#- Append Element to Parent Element.
>>> person_tag.append(name_tag)
>>>
#- Just print Parent Element
>>> ET.tostring(person_tag)
'<person><Name>def</Name></person>'
>>>
>>>
#- Created ET object by formstring.
>>> root = ET.fromstring(input_data)
>>>
#- Append above element to root element
>>> root.append(person_tag)
#- Print root Element.
>>> print ET.tostring(root)
<data>
<person>
<Name>xyz</Name>
<add>abc</add>
</person>
<person><Name>def</Name></person></data>
>>> print ET.tostring(root, method="xml")
<data>
<person>
<Name>xyz</Name>
<add>abc</add>
</person>
<person><Name>def</Name></person></data>
>>>
Note: Best to use lxml b

Related

Python XML remove elements if a child is not in it

I have the following xml - "file.xml"
<?xml version="1.0"?>
-<data>
-<dataset>
<ID>001</ID>
<A>5</A>
<B>2</B>
<C>1</C>
</dataset>
-<dataset>
<ID>002</ID>
<A>6</A>
<B>4</B>
<C>2</C>
</dataset>
-<dataset>
<ID>003</ID>
<A>3</A>
</dataset>
-<dataset>
<ID>004</ID>
<A>2</A>
<C>5</C>
</dataset>
</data>
I want to keep all elements with children A and B. Child C doesn't matter at all. My approach is to delete those elements without child A or B. Say, missing of either A or B will trigger the deletion of that element.
Here is my code:
import xml.etree.ElementTree as ET
tree = ET.parse("file.xml")
root = tree.getroot()
for element in root.findall('.//dataset'):
if element.tag != 'A' and element.tag != 'B':
root.remove(element)
This doesn't seem to be working.
Desired output:
<?xml version="1.0"?>
-<data>
-<dataset>
<ID>001</ID>
<A>5</A>
<B>2</B>
<C>1</C>
</dataset>
-<dataset>
<ID>002</ID>
<A>6</A>
<B>4</B>
<C>2</C>
</dataset>
</data>
Thank you!
I got it.
import xml.etree.ElementTree as ET
tree = ET.parse("file.xml")
root = tree.getroot()
#Get a list of the parent elements 'dataset' that have both element 'A' and 'B'
both =[]
for i in tree.findall(".//dataset/A/.."):
if i in tree.findall(".//dataset/B/.."):
both.append(i)
#Remove elements that are not in the above list
for i in root:
if i not in both:
root.remove(i)

Sorting XML tags by child elements Python

I have a number of 'root' tags with children 'name'. I want to sort the 'root' blocks, ordered alphabetically by the 'name' element. Have tried lxml / etree / minidom but can't get it working...
I can't get it to parse the value inside the tags, and then sort the parent root tags.
<?xml version='1.0' encoding='UTF-8'?>
<roots>
<root>
<path>//1.1.1.100/Alex</path>
<name>Alex Space</name>
</root>
<root>
<path>//1.1.1.101/Steve</path>
<name>Steve Space</name>
</root>
<root>
<path>//1.1.1.150/Bethany</path>
<name>Bethanys</name>
</root>
</roots>
Here is what I have tried:
import xml.etree.ElementTree as ET
def sortchildrenby(parent, child):
parent[:] = sorted(parent, key=lambda child: child)
tree = ET.parse('data.xml')
root = tree.getroot()
sortchildrenby(root, 'name')
for child in root:
sortchildrenby(child, 'name')
tree.write('output.xml')
If you want to put the name nodes first:
x = """
<roots>
<root>
<path>//1.1.1.100/Alex</path>
<name>Alex Space</name>
</root>
<root>
<path>//1.1.1.101/Steve</path>
<name>Bethanys</name>
</root>
<root>
<path>//1.1.1.150/Bethany</path>
<name>Steve Space</name>
</root>
</roots>"""
import lxml.etree as et
tree = et.fromstring(x)
for r in tree.iter("root"):
r[:] = sorted(r, key=lambda ch: -(ch.tag == "name"))
print(et.tostring(tree).decode("utf-8"))
Which would give you:
<roots>
<root>
<name>Alex Space</name>
<path>//1.1.1.100/Alex</path>
</root>
<root>
<name>Bethanys</name>
<path>//1.1.1.101/Steve</path>
</root>
<root>
<name>Steve Space</name>
<path>//1.1.1.150/Bethany</path>
</root>
</roots>
But there is no need to sort if you just want to add them first, you can just remove and reinsert the name into index 0:
import lxml.etree as et
tree = et.fromstring(x)
for r in tree.iter("root"):
ch = r.find("name")
r.remove(ch)
r.insert(0, ch)
print(et.tostring(tree).decode("utf-8"))
If the nodes are actually not in sorted order and you want to rearrange the roots node alphabetically:
x = """
<roots>
<root>
<path>//1.1.1.100/Alex</path>
<name>Alex Space</name>
</root>
<root>
<path>//1.1.1.101/Steve</path>
<name>Steve Space</name>
</root>
<root>
<path>//1.1.1.150/Bethany</path>
<name>Bethanys</name>
</root>
</roots>"""
import lxml.etree as et
tree = et.fromstring(x)
tree[:] = sorted(tree, key=lambda ch: ch.xpath("name/text()"))
print(et.tostring(tree).decode("utf-8"))
Which would give you:
<roots>
<root>
<path>//1.1.1.100/Alex</path>
<name>Alex Space</name>
</root>
<root>
<path>//1.1.1.150/Bethany</path>
<name>Bethanys</name>
</root>
<root>
<path>//1.1.1.101/Steve</path>
<name>Steve Space</name>
</root>
</roots>
You can also combine with either of the first two approach two also rearrange the root nodes putting name first.
Try this:
import xml.etree.ElementTree as ET
xml="<?xml version='1.0' encoding='UTF-8'?><roots><root><path>//1.1.1.100/Alex</path><name>Alex Space</name></root><root><path>//1.1.1.101/Steve</path><name>Steve Space</name></root><root><path>//1.1.1.150/Bethany</path><name>Bethanys</name></root></roots>"
oldxml = ET.fromstring(xml)
names = []
for rootobj in oldxml.findall('root'):
names.append(rootobj.find('name').text)
newxml = ET.Element('roots')
for name in sorted(names):
for rootobj in oldxml.findall('root'):
if name == rootobj.find('name').text:
newxml.append(rootobj)
ET.dump(oldxml)
ET.dump(newxml)
I'm reading from a variable and dumpin it on screen.
You can change it read from file and dump it to a file like you need.

How to create a subset of document using lxml?

Suppose you have an lmxl.etree element with the contents like:
<root>
<element1>
<subelement1>blabla</subelement1>
</element1>
<element2>
<subelement2>blibli</sublement2>
</element2>
</root>
I can use find or xpath methods to get something an element rendering something like:
<element1>
<subelement1>blabla</subelement1>
</element1>
Is there a way simple to get:
<root>
<element1>
<subelement1>blabla</subelement1>
</element1>
</root>
i.e The element of interest plus all it's ancestors up to the document root?
I am not sure there is something built-in for it, but here is a terrible, "don't ever use it in real life" type of a workaround using the iterancestors() parent iterator:
from lxml import etree as ET
data = """<root>
<element1>
<subelement1>blabla</subelement1>
</element1>
<element2>
<subelement2>blibli</subelement2>
</element2>
</root>"""
root = ET.fromstring(data)
element = root.find(".//subelement1")
result = ET.tostring(element)
for node in element.iterancestors():
result = "<{name}>{text}</{name}>".format(name=node.tag, text=result)
print(ET.tostring(ET.fromstring(result), pretty_print=True))
Prints:
<root>
<element1>
<subelement1>blabla</subelement1>
</element1>
</root>
The following code removes elements that don't have any subelement1 descendants and are not named subelement1.
from lxml import etree
tree = etree.parse("input.xml") # First XML document in question
for elem in tree.iter():
if elem.xpath("not(.//subelement1)") and not(elem.tag == "subelement1"):
if elem.getparent() is not None:
elem.getparent().remove(elem)
print etree.tostring(tree)
Output:
<root>
<element1>
<subelement1>blabla</subelement1>
</element1>
</root>

Store XML values as Python list

I have XML stored as a string "vincontents", formatted as such:
<response>
<data>
<vin>1FT7X2B69CEC76666</vin>
</data>
<data>
<vin>1GNDT13S452225555</vin>
</data>
</response>
I'm trying to use Python's elementtree library to parse out the VIN values into an array or Python list. I'm only interested in the values, not the tags.
def parseVins():
content = etree.fromstring(vincontents)
vins = content.findall("data/vin")
print vins
Outputs all of the tag information:
[<Element 'vin' at 0x2d2eef0>, <Element 'vin' at 0x2d2efd0> ....
Any help would be appreciated. Thank you!
Use .text property:
>>> import xml.etree.ElementTree as etree
>>> data = """<response>
... <data>
... <vin>1FT7X2B69CEC76666</vin>
... </data>
... <data>
... <vin>1GNDT13S452225555</vin>
... </data>
... </response>"""
>>> tree = etree.fromstring(data)
>>> [el.text for el in tree.findall('.//data/vin')]
['1FT7X2B69CEC76666', '1GNDT13S452225555']

Python XML check next item

Here is a little xml example:
<?xml version="1.0" encoding="UTF-8"?>
<list>
<person id="1">
<name>Smith</name>
<city>New York</city>
</person>
<person id="2">
<name>Pitt</name>
</person>
...
...
</list>
Now I need all Persons with a name and city.
I tried:
#!/usr/bin/python
# coding: utf8
import xml.dom.minidom as dom
tree = dom.parse("test.xml")
for listItems in tree.firstChild.childNodes:
for personItems in listItems.childNodes:
if personItems.nodeName == "name" and personItems.nextSibling == "city":
print personItems.firstChild.data.strip()
But the ouput is empty. Without the "and" condition I become all names. How can I check that the next tag after "name" is "city"?
You can do this in minidom:
import xml.dom.minidom as minidom
def getChild(n,v):
for child in n.childNodes:
if child.localName==v:
yield child
xmldoc = minidom.parse('test.xml')
person = getChild(xmldoc, 'list')
for p in person:
for v in getChild(p,'person'):
attr = v.getAttributeNode('id')
if attr:
print attr.nodeValue.strip()
This prints id of person nodes:
1
2
use element tree check this element tree
import xml.etree.ElementTree as ET
tree = ET.parse('a.xml')
root = tree.getroot()
for person in root.findall('person'):
name = person.find('name').text
try:
city = person.find('city').text
except:
continue
print name, city
for id u can get it by id= person.get('id')
output:Smith New York
Using lxml, you can use xpath to get in one step what you need:
from lxml import etree
xmlstr = """
<list>
<person id="1">
<name>Smith</name>
<city>New York</city>
</person>
<person id="2">
<name>Pitt</name>
</person>
</list>
"""
xml = etree.fromstring(xmlstr)
xp = "//person[city]"
for person in xml.xpath(xp):
print etree.tostring(person)
lxml is external python package, but is so useful, that to me it is always worth to install.
xpath is searching for any (//) element person having (declared by content of []) subelement city.

Categories

Resources