Python - Read an XML using minidom - python

I'm new in Python and I have a question.
I'm trying to parse this xml (this XML has several information, this is the first data what I need to read):
<![CDATA[<?xml version="1.0" encoding="UTF-8"?><UDSObjectList>
<UDSObject>
<Handle>cr:908715</Handle>
<Attributes>
<Attribute DataType="2002">
<AttrName>ref_num</AttrName>
<AttrValue>497131</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>support_lev.sym</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2004">
<AttrName>open_date</AttrName>
<AttrValue>1516290907</AttrValue>
</Attribute>
<Attribute DataType="58814636">
<AttrName>agt.id</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2005">
<AttrName>priority</AttrName>
<AttrValue>3</AttrValue>
</Attribute>
<Attribute DataType="2009">
<AttrName>tenant.id</AttrName>
<AttrValue>F3CA8B5A2A456742B21EF8F3B5538623</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>tenant.name</AttrName>
<AttrValue>Ripley</AttrValue>
</Attribute>
<Attribute DataType="2005">
<AttrName>log_agent</AttrName>
<AttrValue>088966043F4D2944AA90067C52DA454F</AttrValue>
</Attribute>
<Attribute DataType="58826268">
<AttrName>request_by.first_name</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="58826268">
<AttrName>request_by.first_name</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2002">
<AttrName>customer.first_name</AttrName>
<AttrValue>Juan Guillermo</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>customer.last_name</AttrName>
<AttrValue>Mendoza Montero</AttrValue>
</Attribute>
<Attribute DataType="2009">
<AttrName>customer.id</AttrName>
<AttrValue>8C020EBAD32035419D7654CDE510D312</AttrValue>
</Attribute>
<Attribute DataType="2001">
<AttrName>category.id</AttrName>
<AttrValue>1121021012</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>category.sym</AttrName>
<AttrValue>Ripley.Sistemas Financieros.Terminal Financiero.Mensaje de
Error</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>status.sym</AttrName>
<AttrValue>Suspended</AttrValue>
</Attribute>
<Attribute DataType="2009">
<AttrName>group.id</AttrName>
<AttrValue>099621F7BD77C545B65FB65BFE466550</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>group.last_name</AttrName>
<AttrValue>EUS_Zona V Region</AttrValue>
</Attribute>
<Attribute DataType="2001">
<AttrName>zreporting_met.id</AttrName>
<AttrValue>7300</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>zreporting_met.sym</AttrName>
<AttrValue>E-Mail</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>assignee.combo_name</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2004">
<AttrName>open_date</AttrName>
<AttrValue>1516290907</AttrValue>
</Attribute>
<Attribute DataType="2004">
<AttrName>close_date</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2002">
<AttrName>description</AttrName>
<AttrValue>Asunto :Valaparaiso / Terminal Financiero Error
Nombre Completo :JUAN MENDOZA MONTERO
Ubicación :CCSS VALPARAISO Plaza victoria 1646, VALPARAISO
País :Chile
Telefono :ANEXO 2541
Correo :jmendozam#ripley.cl
Descripción :Error Terminal Financiero
Descartes :N/A</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>summary</AttrName>
<AttrValue>Santiago / Modificación </AttrValue>
</Attribute>
</Attributes>
</UDSObject>
but when I read the file with this method:
from zeep import Client
import xml.dom.minidom
from xml.dom.minidom import Node
def select():
resultado = []
sid = _client.service.login("User","password")
objectType = 'cr'
whereClause = "group.last_name LIKE 'EUS_ZONA%' AND open_date > 1517454000
AND open_date <
1519786800"
maxRows = -1
attributes = ["ref_num"
,"agt.id"
,"priority"
,"pcat.id"
,"tenant.id"
,"tenant.name"
,"log_agent"
,"request_by.first_name"
,"request_by.last_name"
,"customer.first_name"
,"customer.last_name"
,"customer.id"
,"category.id"
,"category.sym"
,"status.sym"
,"group.id"
,"group.last_name"
,"zreporting_met.id"
,"zreporting_met.sym"
,"assignee.combo_name"
,"open_date"
,"close_date"
,"description"
,"summary"]
minim = _client.service.doSelect(sid=sid, objectType=objectType,
whereClause=whereClause, maxRows= maxRows, attributes= attributes)
dom = xml.dom.minidom.parseString(minim)
nodeList = dom.getElementsByTagName('AttrValue')
for j in range(len(nodeList)):
resultado.append(dom.getElementsByTagName('AttrValue')[j].firstChild.wholeText)
print(resultado[j])
logout = _client.service.logout(sid)
This only print the first AttrValue (ref_num value), what I need to do is add every field of the XML file in resultado array, I need help to print every field from the XML file, someone can help me to that?

Please read and follow How to create a Minimal, Complete, and Verifiable example.
You should remove all the server stuff and reduce the size of your sample data.
This snippet follows your code and gets all attribute elements and then iterates those:
import xml.dom.minidom
from xml.dom.minidom import Node
minim = """<?xml version="1.0" encoding="UTF-8"?>
<udsobjectlist>
<udsobject>
<handle>cr:908715</handle>
<attributes>
<attribute datatype="2002">
<attrname>ref_num</attrname>
<attrvalue>497131</attrvalue>
</attribute>
<attribute datatype="2002">
<attrname>support_lev.sym</attrname>
<attrvalue/>
</attribute>
<attribute datatype="2004">
<attrname>open_date</attrname>
<attrvalue>1516290907</attrvalue>
</attribute>
</attributes>
</udsobject>
</udsobjectlist>
"""
dom = xml.dom.minidom.parseString(minim)
nodeList = dom.getElementsByTagName('attribute')
resultado = []
attributes = ["attrname", "attrvalue"]
for node in nodeList:
a = []
for attribute in attributes:
try:
a.append( node.getElementsByTagName(attribute)[0].firstChild.wholeText)
except AttributeError:
a.append("")
resultado.append(a)
print(resultado)
prints
[['ref_num', '497131'], ['support_lev.sym', ''], ['open_date', '1516290907']]
Even closer to your code:
nodeList = dom.getElementsByTagName('attrvalue')
for node in nodeList:
try:
v = node.firstChild.wholeText
resultado.append(v)
print(v)
except:
pass
print(resultado)
prints
497131
1516290907
['497131', '1516290907']
As suggested in the comments, with ET (although you probably should not access elements by index, but this might get you started):
import xml.etree.ElementTree as ET
root = ET.fromstring(minim)
for child in root[0][1]:
try:
print(child[0].text)
print(child[1].text)
except:
pass
prints
ref_num
497131
support_lev.sym
None
open_date
1516290907

Related

Deleting xml node from tree based on element criteria

I'm working with XMLs and I'm trying to delete <node> based on element's criteria. The idea is to delete 2nd ITEM. I tried to make list of ITEMS and then based on that list removed them from root lvl but I got
ValueError "ValueError: list.remove(x): x not in list"
and I have no idea how to get to higher lvl.
Code:
import xml.etree.ElementTree as ET
tree = ET.parse("PW.xml")
root = tree.getroot()
items = root.findall('.//ITEM[PROPERTY]')
m = -1
delate = []
for i in items:
pl = i.findall('PROPERTY[#NAME="ID detalu"]')
#print([(pv.attrib['NAME'], pv.text) for pv in pl])
m = m + 1
for p in pl:
if (p.text.startswith('SHP')):
delate.append(m)
tree.write('PW.xml')
XML:
<DATA>
<OBJECT TYPE="Dane">
<ITEM>
<PROPERTY NAME="[1]" />
<PROPERTY NAME="child2">0\557</PROPERTY>
<PROPERTY NAME="child3">MNO001366</PROPERTY>
<PROPERTY NAME="child4">1507</PROPERTY>
<PROPERTY NAME="child5" />
<PROPERTY NAME="child6" />
<PROPERTY NAME="ID detalu">PL.10.000033</PROPERTY>
</ITEM>
<ITEM>
<PROPERTY NAME="[1]" />
<PROPERTY NAME="child2">0\557</PROPERTY>
<PROPERTY NAME="child3">MNO001485</PROPERTY>
<PROPERTY NAME="child4">1626</PROPERTY>
<PROPERTY NAME="child5" />
<PROPERTY NAME="child6">Pintos</PROPERTY>
<PROPERTY NAME="ID detalu">SHP001432</PROPERTY>
</ITEM>
</OBJECT>
</DATA>
I cannot reproduce the error message. Anyway, to remove an element, you need to have a reference to its parent, so that you can do parent.remove(child). The parent of the ITEM elements is OBJECT.
Here is a demo:
import xml.etree.ElementTree as ET
tree = ET.parse("PW.xml")
root = tree.getroot()
to_delete = []
object = root.find("OBJECT")
items = root.findall(".//ITEM")
# Find the items to delete and add them to the to_delete list
for item in items:
prop = item.find("PROPERTY[#NAME='ID detalu']")
if prop is not None and prop.text.startswith("SHP"):
to_delete.append(item)
# Do the actual deletion of items
for item in to_delete:
object.remove(item)
print(ET.tostring(root).decode())
Output:
<DATA>
<OBJECT TYPE="Dane">
<ITEM>
<PROPERTY NAME="[1]" />
<PROPERTY NAME="child2">0\557</PROPERTY>
<PROPERTY NAME="child3">MNO001366</PROPERTY>
<PROPERTY NAME="child4">1507</PROPERTY>
<PROPERTY NAME="child5" />
<PROPERTY NAME="child6" />
<PROPERTY NAME="ID detalu">PL.10.000033</PROPERTY>
</ITEM>
</OBJECT>
</DATA>

Create new xml attributes from other attribute

I have the following XML
<icim source="source">
<object class="class_name" name="class_name">
<attribute name="Type">
<string>Type_Name</string>
</attribute>
<attribute name="DisplayName">
<string>DisplayName</string>
</attribute>
<attribute name="Vendor">
<string>Vendor_Name</string>
</attribute>
<attribute name="Model">
<string>Model_Name</string>
</attribute>
<attribute name="Description">
<string>Description_part1, Description_part2, Description_part3, Description_part4, Description_part5</string>
</attribute>
</object>
<object class="class_name" name="class_name">
<attribute name="Type">
<string>Type_Name</string>
</attribute>
<attribute name="DisplayName">
<DisplayName</string>
</attribute>
<attribute name="Vendor">
<string>Vendor_Name</string>
</attribute>
<attribute name="Model">
<string>Model_Name</string>
</attribute>
<attribute name="Description">
<string>Description_part1, Description_part2, Description_part3, Description_part4, Description_part5</string>
</attribute>
</object>
.
.
.
</icim>
and I want to transform it using Python's Element Tree to this:
<icim source="source">
<object class="class_name" name="class_name">
<attribute name="Type">
<string>Type_Name</string>
</attribute>
<attribute name="DisplayName">
<string>DisplayName</string>
</attribute>
<attribute name="Vendor">
<string>Vendor_Name</string>
</attribute>
<attribute name="Model">
<string>Model_Name</string>
</attribute>
<attribute name="String1">
<string>Description_part1</string>
</attribute>
</attribute>
<attribute name="String2">
<string>Description_part2</string>
</attribute>
</attribute>
<attribute name="String3">
<string>Description_part3</string>
</attribute>
<attribute name="Description">
<string>Description_part1, Description_part2, Description_part3, Description_part4, Description_part5</string>
</attribute>
</object>
<object class="class_name" name="class_name">
<attribute name="Type">
<string>Type_Name</string>
</attribute>
<attribute name="DisplayName">
<DisplayName</string>
</attribute>
<attribute name="Vendor">
<string>Vendor_Name</string>
</attribute>
<attribute name="Model">
<string>Model_Name</string>
</attribute>
</attribute>
<attribute name="String1">
<string>Description_part1</string>
</attribute>
</attribute>
<attribute name="String2">
<string>Description_part2</string>
</attribute>
</attribute>
<attribute name="String3">
<string>Description_part3</string>
</attribute>
<attribute name="Description">
<string>Description_part1, Description_part2, Description_part3, Description_part4, Description_part5</string>
</attribute>
</object>
.
.
.
</icim>
That is I want to extract the first three string parts from each Description element (the Description always has commas, so you can split the parts based on those) and create a new attribute for each of the first 3 Description parts. Thoughts?
Your xml and expected xml aren't well formed (<DisplayName</string> should be <string>DisplayName</string>) but assuming it's fixed, and if I undertstand you correctly, the following should get you at least most of the way there:
from lxml import etree
display = """[your xml above, corrected]"""
doc = etree.XML(display)
objs = doc.xpath("//object")
for obj in objs:
news = obj.xpath('.//attribute[# name="Description"]/string/text()')[0].split(',')[:3]
counter=3
for new in reversed(news): #this list needs to be reversed to get the new elements into the xml in the correct order
ins = etree.fromstring(f'<attribute name="String{counter}">\n <string>{new.strip()}</string>\n</attribute>\n')
obj.insert(4,ins)
counter-=1 #same reason for counting in reverse
print(etree.tostring(doc).decode())
Output should your expected output.

How to remove only the parent element and not its child elements in Python?

A similar question to the one in JavaScript
I have a xml and want to comment just the parent tag without its children
like in the example below:
<object id="12">
<process name="Developer">
<appdef>
<attributes>
<attribute name="X">
<ProcessValue datatype="number" value="15" />
</attribute>
<attribute name="Y">
<ProcessValue datatype="number" value="59" />
</attribute>
</attributes>
</appdef>
</process>
</object>
and comment just < object > tags
<!--<object id="12">-->
<process name="Developer">
<appdef>
<attributes>
<attribute name="X">
<ProcessValue datatype="number" value="15" />
</attribute>
<attribute name="Y">
<ProcessValue datatype="number" value="59" />
</attribute>
</attributes>
</appdef>
</process>
<!--</object>-->
I have a code to comment the tag but it comment all its children also.
Thank you very much I appreciate any help
Due to confusions I am attaching the whole code:
from xml.dom import minidom
xml = """\
<bpr:release xmlns:bpr="http://www.blueprism.co.uk/product/release">
<object id="0e694daf-836e-44a9-816a-9b8127abb7b2" name="Developer 2
ex" xmlns="http://www.blueprism.co.uk/product/process">
<process name="Developer 2 ex" version="1.0" bpversion="5.0.33.0"
narrative="BO for automation the HTML page
" type="object"
runmode="Exclusive">
<appdef>
<attributes>
<attribute name="X">
<ProcessValue datatype="number" value="15" />
</attribute>
<attribute name="Y">
<ProcessValue datatype="number" value="59" />
</attribute>
</attributes>
</appdef>
</process>
</object>
</bpr:release>
"""
def comment_node(node):
comment = node.ownerDocument.createComment(node.toxml())
print(comment)
node.parentNode.replaceChild(comment, node)
return comment
doc = minidom.parseString(xml).documentElement
comment_node(doc.getElementsByTagName('object')[-1])
xml = doc.toxml()

Using XML ElementTree to create list of objects with atrributes

I use the python requests module to get XML from the TeamCity rest api that looks like this:
<triggers count="10">
<trigger id="TRIGGER_1240" type="buildDependencyTrigger">
<properties count="2">
<property name="afterSuccessfulBuildOnly" value="true"/>
<property name="dependsOn" value="bt191"/>
</properties>
</trigger>
<trigger id="TRIGGER_1241" type="buildDependencyTrigger">
<properties count="2">
<property name="afterSuccessfulBuildOnly" value="true"/>
<property name="dependsOn" value="bt171"/>
</properties>
</trigger>
<trigger id="TRIGGER_1242" type="buildDependencyTrigger">
<properties count="2">
<property name="afterSuccessfulBuildOnly" value="true"/>
<property name="dependsOn" value="bt167"/>
</properties>
</trigger>
<trigger id="TRIGGER_1243" type="buildDependencyTrigger">
<properties count="2">
<property name="afterSuccessfulBuildOnly" value="true"/>
<property name="dependsOn" value="bt164"/>
</properties>
</trigger>
<trigger id="TRIGGER_1244" type="buildDependencyTrigger">
<properties count="2">
<property name="afterSuccessfulBuildOnly" value="true"/>
<property name="dependsOn" value="bt364"/>
</properties>
</trigger>
<trigger id="TRIGGER_736" type="buildDependencyTrigger">
<properties count="2">
<property name="afterSuccessfulBuildOnly" value="true"/>
<property name="dependsOn" value="Components_Ratchetdb"/>
</properties>
</trigger>
<trigger id="TRIGGER_149" type="buildDependencyTrigger">
<properties count="2">
<property name="afterSuccessfulBuildOnly" value="true"/>
<property name="dependsOn" value="Components_Filedb"/>
</properties>
</trigger>
<trigger id="TRIGGER_150" type="buildDependencyTrigger">
<properties count="2">
<property name="afterSuccessfulBuildOnly" value="true"/>
<property name="dependsOn" value="bt168"/>
</properties>
</trigger>
<trigger id="TRIGGER_1130" type="buildDependencyTrigger">
<properties count="2">
<property name="afterSuccessfulBuildOnly" value="true"/>
<property name="dependsOn" value="Components_Tbldb"/>
</properties>
</trigger>
<trigger id="vcsTrigger" type="vcsTrigger" inherited="true">
<properties count="3">
<property name="quietPeriod" value="60"/>
<property name="quietPeriodMode" value="USE_DEFAULT"/>
<property name="triggerRules" value="-:version.properties
-:comment=^Incremented:**
-:**/*-schema.sql"/>
</properties>
</trigger>
I am trying to create a list of "trigger" objects using a class. Ideally the object would have id, type, and a list of properties attributes as dictionaries of {name : value}. My code so far is:
class triggerList:
def __init__(self, triggerId, triggerType):
self.id = triggerId
self.type = triggerType
self.properties = []
def add_property(self, buildProperty):
self.properties.append(buildProperty)
def getAllTriggers(buildId):
url = path + 'buildTypes/id:' + buildId + '/triggers'
r = requests.get(url, auth=auth)
tree = ElementTree.fromstring(r.content)
listOfTriggers = []
for trigger in tree.iter('trigger'):
triggerType = trigger.get('type')
triggerId = trigger.get('id')
triggerName = str(triggerId)
triggerName = triggerList(triggerId, triggerType)
listOfTriggers.append(triggerName)
for triggerProperty in tree.iter('property'):
propertyName = triggerProperty.get('name')
propertyValue = triggerProperty.get('value')
propDict = {propertyName : propertyValue}
triggerName.add_property(propDict)
This gives me a list of objects but every object has a list of every property dictionary. This is the output:
a = listOfTriggers[1]
print a.id, a.type, a.properties
>>> TRIGGER_1241 buildDependencyTrigger [{'afterSuccessfulBuildOnly': 'true'}, {'dependsOn': 'bt191'}, {'afterSuccessfulBuildOnly': 'true'}, {'dependsOn': 'bt171'}, {'afterSuccessfulBuildOnly': 'true'}, {'dependsOn': 'bt167'}, {'afterSuccessfulBuildOnly': 'true'}, {'dependsOn': 'bt164'}, {'afterSuccessfulBuildOnly': 'true'}, {'dependsOn': 'bt364'}, {'afterSuccessfulBuildOnly': 'true'}, {'dependsOn': 'Components_Ratchetdb'}, {'afterSuccessfulBuildOnly': 'true'}, {'dependsOn': 'Components_Filedb'}, {'afterSuccessfulBuildOnly': 'true'}, {'dependsOn': 'bt168'}, {'afterSuccessfulBuildOnly': 'true'}, {'dependsOn': 'Components_Tbldb'}, {'quietPeriod': '60'}, {'quietPeriodMode': 'USE_DEFAULT'}, {'triggerRules': '-:version.properties\n-:comment=^Incremented:**\n-:**/*-schema.sql'}]
I don't know how to stop the loop for just the properties for a specific trigger. Is there a way to use ElementTree to only get the properties for a specific trigger? Is there a more efficient way to create this object?
Not directly answering the question, but you may be reinventing the wheel here, check lxml.objectify package:
The main idea is to hide the usage of XML behind normal Python
objects, sometimes referred to as data-binding. It allows you to use
XML as if you were dealing with a normal Python object hierarchy.
Accessing the children of an XML element deploys object attribute
access. If there are multiple children with the same name, slicing and
indexing can be used. Python data types are extracted from XML content
automatically and made available to the normal Python operators.
Simple syntax mistake:
for triggerProperty in trigger.iter('property'):
propertyName = triggerProperty.get('name')
propertyValue = triggerProperty.get('value')
propDict = {propertyName : propertyValue}
triggerName.add_property(propDict)
I was iterating over the whole tree, rather than the triggers. Should be:
for triggerProperty in trigger.iter('property'):

Parse XML with Python with title and value on different lines

I have the following XML document that i would like to write to a csv file.
<items>
<item>
<attribute type="set" identifier="naadloos">
<name locale="nl_NL">Naadloos</name>
<value locale="nl_NL" identifier="nee">Nee</value>
</attribute>
<attribute type="asset" identifier="short_description">
<value locale="nl_NL">Tom beugel bh</value>
</attribute>
<attribute type="text" identifier="name">
<name locale="nl_NL">Naam</name>
<value>Marie Jo L'Aventure Tom beugel bh</value>
</attribute>
<attribute type="int" identifier="is_backorder">
<name locale="nl_NL">Backorder</name>
<value>2</value>
</attribute>
</item>
</items>
how can i retrieve the data from this format? I need the following output
naadloos, short_description, name, is_Backorder
Nee, Tom beugel bh, Marie Jo L'Adventure Tom beugel bh, 2
so i need the identifier from the attribute line, and the text from the value line.
Any ideas?
Much appreciated
This is my try it gets elements by attribute and writes them into a specified file by dictwriter!
import lxml.etree as et
import csv
#headers={}
xml= """<items>
<item>
<attribute type="set" identifier="naadloos">
<name locale="nl_NL">Naadloos</name>
<value locale="nl_NL" identifier="nee">Nee</value>
</attribute>
<attribute type="asset" identifier="short_description">
<value locale="nl_NL">Tom beugel bh</value>
</attribute>
<attribute type="text" identifier="name">
<name locale="nl_NL">Naam</name>
<value>Marie Jo L'Aventure Tom beugel bh</value>
</attribute>
<attribute type="int" identifier="is_backorder">
<name locale="nl_NL">Backorder</name>
<value>2</value>
</attribute>
</item>
</items>
"""
tree = et.fromstring(xml)
header = []
for i in tree.xpath("//attribute/#identifier"):
header.append(i)
def dicter(x):
exp = r"//attribute[#identifier='%s']/value/text()"%x
tmp = ''.join(tree.xpath(exp))
d = [x,tmp]
return d
data = dict(dicter(i) for i in header)
#Now write data into file
with open(r"C:\Users\User_Name\Desktop\output.txt",'wb') as wrt:
writer = csv.DictWriter(wrt,header)
writer.writeheader()
writer.writerow(data)
Written file content-
naadloos,short_description,name,is_backorder
Nee,Tom beugel bh,Marie Jo L'Aventure Tom beugel bh,2

Categories

Resources