xml.etree.ElementTree access subelement without creating - python

I have a code
ffdata = ET.Element("FFData")
fForm = ET.SubElement(ffdata, "Form")
fForm.set("FormDefId","{DD0F88DD-A858-4595-AF2F-3643D0069A39}")
fPages = ET.SubElement(fForm, "Pages")
for xml_file in xml_files:
xml_file = os.path.join(*[CurrentFolderPath,xml_file])
tree = ET.parse(xml_file)
xml_data = tree.getroot()
for xPage in xml_data.iter('Page'):
# --- Ignore first element
if int(xPage.attrib['PageNumber']) >1:
#---- Change Paginators index
xPage.set('PageNumber',str(sPageNumber))
# -- Set page number to fields
fFields = ET.SubElement(xPage, "Fields")
fxField = ET.SubElement(fFields, "Field")
fxField.set('PageNumber',str(sPageNumber-1))
fPages.append(xPage) # Add element to root
sPageNumber= sPageNumber +1
else:
if sImoneExists == 0:
fPages.append(xPage) # Add element to root
sImoneExists = 1
fPages.set("Count",str(sPageNumber-1))
indent(ffdata)
tree = ET.ElementTree(ffdata)
xml_file_save = os.path.join(*[CurrentFolderPath,"Merged.ffdata"])
tree.write(xml_file_save)
i trying to change sub element inside loop
fFields = ET.SubElement(xPage, "Fields")
fxField = ET.SubElement(fFields, "Field")
fxField.set('PageNumber',str(sPageNumber-1))
But it create new element instead of change existing
so i get
<FFData>
<Form FormDefId="{DD0F88DD-A858-4595-AF2F-3643D0069A39}">
<Pages Count="41">
<Page PageDefName="1" PageNumber="2">
<Fields Count="135">
<Field Name="L1-1"></Field>
<Field Name="PageNumber">1</Field>
</Fields>
<Fields>
<Field PageNumber="2" />
</Fields>
</Page>
</Pages>
</Form>
</FFData>
expected
<FFData>
<Form FormDefId="{DD0F88DD-A858-4595-AF2F-3643D0069A39}">
<Pages Count="41">
<Page PageDefName="1" PageNumber="2">
<Fields Count="135">
<Field Name="L1-1"></Field>
<Field Name="PageNumber">2</Field>
</Fields>
</Page>
</Pages>
</Form>
</FFData>
So how to change existing sub element of each iterating page?

Related

How can I extract elementary values with ElementTree in Python?

I try to extract values attributes (ex. 'Filename') of that XML file in Python.
Can you help me ?
Here is the MC 'Librarytest.xml' file :
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<MPL Version="2.0" Title="Library">
<Item>
<Field Name="Filename">Y:\Styx\08 - Styx - Snowblind8. Snowblind.flac</Field>
<Field Name="Name">Snowblind</Field>
<Field Name="Artist">Styx</Field>
<Field Name="Album">Paradise Theater</Field>
<Field Name="Genre">Rock</Field>
</Item>
<Item>
<Field Name="Filename">Y:\David Gilmour\04 A Boat Lies Waiting.flac</Field>
<Field Name="Name">A Boat Lies Waiting</Field>
<Field Name="Artist">David Gilmour</Field>
<Field Name="Album">Rattle That Lock (Deluxe)</Field>
<Field Name="Genre">Progressive</Field>
</Item>
</MPL>
I try this :
import xml.etree.ElementTree as ET
xml_file = 'C:/Users/ClientMD/Downloads/MC Librarytest.xml'
tree = ET.parse(xml_file)
root = tree.getroot()
for each in root.findall('.//Field'):
rating = each.find('.//Filename')
print ('Nothing' if rating is None else rating.text)
and I obtain :
Nothing
...
Nothing
Like this:
import xml.etree.ElementTree as ET
xml_file = 'C:/Users/ClientMD/Downloads/MC Librarytest.xml'
tree = ET.parse(xml_file)
root = tree.getroot()
for each in root.findall('.//Field[#Name="Filename"]'):
rating = each.text
print ('Nothing' if rating is None else rating)
Output
Y:\Styx\08 - Styx - Snowblind8. Snowblind.flac
Y:\David Gilmour\04 A Boat Lies Waiting.flac
If you want to grab more elements and keep them under a single item context - you can use the below
import xml.etree.ElementTree as ET
xml = '''<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<MPL Version="2.0" Title="Library">
<Item>
<Field Name="Filename">Y:\Styx\\08 - Styx - Snowblind8. Snowblind.flac</Field>
<Field Name="Name">Snowblind</Field>
<Field Name="Artist">Styx</Field>
<Field Name="Album">Paradise Theater</Field>
<Field Name="Genre">Rock</Field>
</Item>
<Item>
<Field Name="Filename">Y:\David Gilmour\\04 A Boat Lies Waiting.flac</Field>
<Field Name="Name">A Boat Lies Waiting</Field>
<Field Name="Artist">David Gilmour</Field>
<Field Name="Album">Rattle That Lock (Deluxe)</Field>
<Field Name="Genre">Progressive</Field>
</Item>
</MPL>'''
INTERESTING_NAMES = ['Filename','Artist']
data = []
root = ET.fromstring(xml)
for item in root.findall('.//Item'):
temp = {}
for name in INTERESTING_NAMES:
temp[name] = item.find(f'Field[#Name="{name}"]').text
data.append(temp)
print(data)
output
[{'Filename': 'Y:\\Styx\\08 - Styx - Snowblind8. Snowblind.flac', 'Artist': 'Styx'}, {'Filename': 'Y:\\David Gilmour\\04 A Boat Lies Waiting.flac', 'Artist': 'David Gilmour'}]

complex xml to csv using python [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 1 year ago.
Improve this question
<app>
<doc>
<field name="id">013</field>
<field name="groupid">013</field>
<field name="img_url">8b4</field>
<field name="filetype">HTML</field>
<field name="url">https://calgaryherald.com/pmn/business-pmn/sally-rumbles-toward-u-s-/</field>
<field name="topic">accurate</field>
<field name="topic">additional</field>
<field name="topic">agriculture</field>
<field name="topic">area</field>
<field name="topic">biggest</field>
</doc>
<doc>
<field name="id">0131</field>
<field name="groupid">013</field>
<field name="img_url">8b</field>
<field name="filetype">HTML</field>
<field name="url">https://calgaryherald.com/pmn/business-pmn/sally-rumbles-toward</field>
<field name="topic">accurate</field>
<field name="topic">additional</field>
<field name="topic">agriculture</field>
<field name="topic">area</field>
<field name="topic">biggest1</field>
<field name="topic">biggest2</field>
<field name="topic">biggest3</field>
</doc>
</app>
I have a xml similar to this i need to convert it to a csv in python. Does anyone know how to do it and also the field name topic differs for different doc and the csv headers should be similar to the field name and for topics it should be in a single cell with comma separated.
Expected Output
enter image description here
You could use an XML parser that emits element data as it parses to build the csv. On every end tag, you could either add a value to the row or write the row itself. One advantage of iterparse is that you don't need to load the entire document into memory before processing.
import xml.etree.ElementTree as ET
import io
import csv
field_names = ["id", "groupid", "img_url", "filetype", "url", "topic"]
field_names_set = set(field_names)
with open("test.csv", "w", newline="") as out_file:
writer = csv.DictWriter(out_file, field_names)
writer.writeheader()
row = {}
topic = []
for event, elem in ET.iterparse("test.xml"): # iterate tag end events
if elem.tag == "doc":
# doc elem end, write row to csv and setup for next
row["topic"] = ",".join(topic)
writer.writerow(row)
row = {}
topic = []
elif elem.tag == "field":
# field elem end, add to current row
if elem.attrib["name"] == "topic":
topic.append(elem.text)
else:
row[elem.attrib["name"]] = elem.text
The below creates a csv like output. Is that what you are looking for?
Note that you cant tell which field is a 'topic' and which field is non 'topic'
import xml.etree.ElementTree as ET
xml = '''<?xml version="1.0" encoding="UTF-8"?>
<app>
<doc>
<field name="id">013</field>
<field name="groupid">013</field>
<field name="img_url">8b4</field>
<field name="filetype">HTML</field>
<field name="url">https://calgaryherald.com/pmn/business-pmn/sally-rumbles-toward-u-s-/</field>
<field name="topic">accurate</field>
<field name="topic">additional</field>
<field name="topic">agriculture</field>
<field name="topic">area</field>
<field name="topic">biggest</field>
</doc>
<doc>
<field name="id">0131</field>
<field name="groupid">013</field>
<field name="img_url">8b</field>
<field name="filetype">HTML</field>
<field name="url">https://calgaryherald.com/pmn/business-pmn/sally-rumbles-toward</field>
<field name="topic">accurate</field>
<field name="topic">additional</field>
<field name="topic">agriculture</field>
<field name="topic">area</field>
<field name="topic">biggest1</field>
<field name="topic">biggest2</field>
<field name="topic">biggest3</field>
</doc>
</app>'''
root = ET.fromstring(xml)
first_time = True
headers = set()
for doc in root.findall('.//doc'):
data = []
for field in doc.findall('field'):
if first_time:
headers.add(field.attrib['name'])
data.append((field.attrib['name'], field.text))
if first_time:
print(','.join(sorted(list(headers))))
first_time = False
print(','.join(y[1] for y in sorted(data, key=lambda x: x[0])))
output
filetype,groupid,id,img_url,topic,url
HTML,013,013,8b4,accurate,additional,agriculture,area,biggest,https://calgaryherald.com/pmn/business-pmn/sally-rumbles-toward-u-s-/
HTML,013,0131,8b,accurate,additional,agriculture,area,biggest1,biggest2,biggest3,https://calgaryherald.com/pmn/business-pmn/sally-rumbles-toward

Fetch xml tag values recursively using ElementTree

I have an xmk of the type:
<SCHOOL>
<GROUP name="GetStudInfo">
<DATA>
<NAME type="char">Sahil Jha</NAME>
<STD>11th</STD>
</DATA>
<DATA>
<NAME type="char">Rashmi Kaur</NAME>
<STD>11th</STD>
</DATA>
<DATA>
<NAME type="char">Palak Bisht</NAME>
<STD>11th</STD>
</DATA>
</SCHOOL>
I need to fetch the values of NAME, STD.
I tried doing this:
e = ET.ElementTree(ET.fromstring(getunitinfo_str))
for elt in e.iter():
print("{} {}".format(elt.tag, elt.text))
But this was covering other values as well:
Output:
SCHOOL
GROUP
DATA
NAME Sahil Jha
STD 11th
DATA
NAME Rashmi Kaur
STD 11th
DATA
NAME Palak Bisht
STD 11th
{}
Expected O/p:
{'Sahil Jha':'11th', 'Rashmi Kaur'::'11th', 'Palak Bisht':'11th'}
But the formatting should be of the type NAME:STD. Where am I going wrong?
As mentionned by #furas you can use XPATH to find all DATA elements and then find
NAME and STD elements:
import xml.etree.ElementTree as ET
xml = '''<SCHOOL>
<GROUP name="GetStudInfo">
<DATA>
<NAME type="char">Sahil Jha</NAME>
<STD>11th</STD>
</DATA>
<DATA>
<NAME type="char">Rashmi Kaur</NAME>
<STD>11th</STD>
</DATA>
<DATA>
<NAME type="char">Palak Bisht</NAME>
<STD>11th</STD>
</DATA>
</GROUP>
</SCHOOL>'''
e = ET.fromstring(xml)
for data_tag in e.findall('DATA'):
name = data_tag.find('NAME')
std = data_tag.find('STD')
print("{} {}".format(name.text, std.text))
Or you can use a dict comprehension to get the dictionary you want:
my_dict = {
data_tag.find('NAME').text: data_tag.find('STD').text
for data_tag in e.findall('.//DATA')
}
print(my_dict)
You need something more then only print() - you need if/else to check elt.tag to get only NAME and `STD.
Because NAME and STD are different tags so you will have to remeber NAME in some variable to use it when you get STD
name = None # default value at start
for elt in e.iter():
if elt.tag == 'NAME':
name = elt # remember element
if elt.tag == 'STD':
print("{}:{}".format(name.text, elt.text))
Or you could use xpath like in #qouify answer.
Minimal working code
getunitinfo_str = '''
<SCHOOL>
<GROUP name="GetStudInfo">
<DATA>
<NAME type="char">Sahil Jha</NAME>
<STD>11th</STD>
</DATA>
<DATA>
<NAME type="char">Rashmi Kaur</NAME>
<STD>11th</STD>
</DATA>
<DATA>
<NAME type="char">Palak Bisht</NAME>
<STD>11th</STD>
</DATA>
</GROUP>
</SCHOOL>
'''
import xml.etree.ElementTree as ET
e = ET.ElementTree(ET.fromstring(getunitinfo_str))
name = None # to remeber element
for elt in e.iter():
if elt.tag == 'NAME':
name = elt
if elt.tag == 'STD':
print("{}:{}".format(name.text, elt.text))
One liner below
import xml.etree.ElementTree as ET
xml = '''<SCHOOL>
<GROUP name="GetStudInfo">
<DATA>
<NAME type="char">Sahil Jha</NAME>
<STD>11th</STD>
</DATA>
<DATA>
<NAME type="char">Rashmi Kaur</NAME>
<STD>116th</STD>
</DATA>
<DATA>
<NAME type="char">Palak Bisht</NAME>
<STD>17th</STD>
</DATA>
</GROUP>
</SCHOOL>'''
root = ET.fromstring(xml)
data = {x.find("NAME").text: x.find("STD").text for x in root.findall('.//DATA')}
print(data)
output
{'Sahil Jha': '11th', 'Rashmi Kaur': '116th', 'Palak Bisht': '17th'}

Parsing name/value pairs from XML

I am trying to pull account details from XML files supplied by vendors.
I have one vendor that supplied XML files like:
<Accounts>
<Account>
<AccountNumber>1234567</AccountNumber>
<Balance>$200.00</Balance>
</Account>
<Account>
...
</Account>
</Accounts>
And I can parse this fairly easily using python:
mytree = et.parse(xml_path)
myroot = mytree.getroot()
for acc in charges_root.findall('Account'):
acctnum = acc.find('AccountNumber').text
balance = acc.find('Balance').text
print(acctnum, balance)
Which outputs like this:
1234567 $200.00
However another vendor supplies the XML files in something more like name/value pairs, and I am unsure how to easily access that data. It doesn't work the same way as above:
<Accounts>
<Account>
<field name='AccountNumber' value='1234567' />
<field name='Balance' value='$200.00' />
</Account>
<Account>
...
</Account>
</Accounts>
So far I've got this, but would like to be able to access the values separately and easily:
mytree = et.parse(xml_path)
myroot = mytree.getroot()
for field in myroot.findall('Account'):
for line in field:
print(line.attrib)
Which outputs something like:
{'name': 'AccountNumber', 'value': '1234567'}
{'name': 'Balance', 'value': '$200.00'}
So my question is this - How can I access the values and assign them to variables (based on the name) so that I can make use of them elsewhere in the script, like I have with acctnum and balance in the first example?
Populate a new datastructure (like a dict) from the field when you iterate instead of just discarding:
account_d = {}
for field in myroot.findall('Account'):
for line in field:
account_d[line.attrib['name']] = line.attrib['value']
# account_d should now be:
# { 'AccountNumber': '1234567', 'Balance': '$200.00' }
You can use a list of lists/tuples too:
account_a = []
for field in myroot.findall('Account'):
for line in field:
account_d.append(line.attrib['name'], line.attrib['value'])
# account_a should now be:
# [('AccountNumber', '1234567'), ('Balance', '$200.00')]
ElementTree 1.3 has the ability to locate nodes with particular attributes:
from xml.etree import ElementTree as et
data = '''\
<Accounts>
<Account>
<field name='AccountNumber' value='1234567' />
<field name='Balance' value='$200.00' />
</Account>
<Account>
<field name='AccountNumber' value='9999999' />
<field name='Balance' value='$300.00' />
</Account>
</Accounts>'''
tree = et.fromstring(data)
for acc in tree.iterfind('Account'):
acctnum = acc.find("field[#name='AccountNumber']").attrib['value']
balance = acc.find("field[#name='Balance']").attrib['value']
print(acctnum,balance)
1234567 $200.00
9999999 $300.00
You can do it by collecting all the Account element's field attributes into a dictionary and then using the information in it as needed:
accounts.xml sample input file:
<?xml version="1.0"?>
<Accounts>
<Account>
<field name='AccountNumber' value='1234567' />
<field name='Balance' value='$200.00' />
</Account>
<Account>
<field name='AccountNumber' value='8901234' />
<field name='Balance' value='$100.00' />
</Account>
</Accounts>
Code:
import xml.etree.ElementTree as et
xml_path = 'accounts.xml'
mytree = et.parse(xml_path)
myroot = mytree.getroot()
for acct in myroot.findall('Account'):
info = {field.attrib['name']: field.attrib['value']
for field in acct.findall('field')}
acctnum, balance = info['AccountNumber'], info['Balance']
print(acctnum, balance)
Result:
1234567 $200.00
8901234 $100.00
Question: How can I access the values and assign them to variables (based on the name)
Convert all Accounts to a Dict[AccountNumber] of Dict[field].
The Attribute name becomes the dict Key:
Accounts = {}
for account in root.findall('Account'):
fields = {}
for field in account.findall('field'):
fields[field.attrib['name']] = field.attrib['value']
print('{a[AccountNumber]} {a[Balance]}'.format(a=fields))
Accounts[fields['AccountNumber']] = fields
print(Accounts)
Output:
1234567 $200.00
9999999 $300.00
{'9999999': {'AccountNumber': '9999999', 'Balance': '$300.00'}, '1234567': {'AccountNumber': '1234567', 'Balance': '$200.00'}}
Tested with Python: 3.4.2

How can I display the child element of a node from an xml file, in python?

Here is my xml file
<root>
<Module name="ac4" offset="32" width="12">
<register name="xga_control" offset="0x000" width="32" access="R/W">
<field name="reserved" offset="0" bit_span="5"/>
<field name="force_all_fault_clear" bit_span="1" default="0">
<description>Rising edge forces all fault registers to clear</description>
</field>
<field name="force_warning" default="0" bit_span="1">
<description>Forces AC2 to report a Master Warning</description>
</field>
<field name="force_error" default="0" bit_span="1">
<description>Forces AC2 to report a Master Error</description>
</field>
</register>
</Module>
<root>
Right now I can access the names of my registers and display them. However I also want to display the names and attributes of my field elements. How can I do that? Here is my code so far.
input_file = etree.parse('file1.xml')
output=open("ac4.vhd","w+")
output.write("Registers \n")
for node in input_file.iter():
if node.tag=="register":
name=node.attrib.get("name")
print(name)
output.write(name)
output.write("\n")
if node.tag=="field":
name=node.attrib.get("name")
output.write(name)
Right now the output looks like
Registers
xga_control
i_cmd_reg
I want it to look like
Registers
xga_control
reserved
force_all_fault_clear
force_warning
force_error
i_cmd_reg
field name
field name
Any ideas on how to do this?
Instead of iterating over input_file.iter() you can do input_file.getroot() and iterate systematically over that.
This is how you would write your code:
import xml.etree.ElementTree as ET
tree = ET.parse('file1.xml')
root = tree.getroot()
with open('ac4.vhd', 'w+') as fd:
fd.write('Registers\n')
for node in root:
if node.tag == 'Module':
for sub_node in node:
fd.write('{0}\n'.format(sub_node.get('name')))
for child in sub_node:
fd.write('\t{0}\n'.format(child.get('name')))
Your output becomes:
Registers
xga_control
reserved
force_all_fault_clear
force_warning
force_error

Categories

Resources