issue: python xml append element inside a for loop - python

thanks for taking the time with this one.
i have an xml file with an element called selectionset. the idea is to take that element and modify some of the subelements attributes and tails, that part i have done.
the shady thing for me to get is why when i try to add the new subelements to the original (called selectionsets) its only pushing the last on the list inplist
import xml.etree.ElementTree as etree
from xml.etree.ElementTree import *
from xml.etree.ElementTree import ElementTree
tree=ElementTree()
tree.parse('STRUCTURAL.xml')
root = tree.getroot()
col=tree.find('selectionsets/selectionset')
#find the value needed
val=tree.findtext('selectionsets/selectionset/findspec/conditions/condition/value/data')
setname=col.attrib['name']
listnames=val + " 6"
inplist=["D","E","F","G","H"]
entry=3
catcher=[]
ss=root.find('selectionsets')
outxml=ss
for i in range(len(inplist)):
str(val)
col.set('name',(setname +" "+ inplist[i]))
col.find('findspec/conditions/condition/value/data').text=str(inplist[i]+val[1:3])
#print (etree.tostring(col)) #everything working well til this point
timper=col.find('selectionset')
root[0].append(col)
# new=etree.SubElement(outxml,timper)
#you need to create a tree with element tree before creating the xml file
itree=etree.ElementTree(outxml)
itree.write('Selection Sets.xml')
print (etree.tostring(outxml))
# print (Test_file.selectionset())
#Initial xml
<?xml version="1.0" encoding="UTF-8" ?>
<exchange xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://download.autodesk.com/us/navisworks/schemas/nw-exchange-12.0.xsd" units="ft" filename="STRUCTURAL.nwc" filepath="C:\Users\Ricardo\Desktop\Comun\Taller 3">
<selectionsets>
<selectionset name="Column Location" guid="565f5345-de06-4f5b-aa0f-1ae751c98ea8">
<findspec mode="all" disjoint="0">
<conditions>
<condition test="contains" flags="10">
<category>
<name internal="LcRevitData_Element">Element</name>
</category>
<property>
<name internal="lcldrevit_parameter_-1002563">Column Location Mark</name>
</property>
<value>
<data type="wstring">C-A </data>
</value>
</condition>
</conditions>
<locator>/</locator>
</findspec>
</selectionset>
</selectionsets>
</exchange>
#----Current Output
<selectionsets>
<selectionset guid="565f5345-de06-4f5b-aa0f-1ae751c98ea8" name="Column Location H">
<findspec disjoint="0" mode="all">
<conditions>
<condition flags="10" test="contains">
<category>
<name internal="LcRevitData_Element">Element</name>
</category>
<property>
<name internal="lcldrevit_parameter_-1002563">Column Location Mark</name>
</property>
<value>
<data type="wstring">H-A</data>
</value>
</condition>
</conditions>
<locator>/</locator>
</findspec>
</selectionset>
<selectionset guid="565f5345-de06-4f5b-aa0f-1ae751c98ea8" name="Column Location H">
<findspec disjoint="0" mode="all">
<conditions>
<condition flags="10" test="contains">
<category>
<name internal="LcRevitData_Element">Element</name>
</category>
<property>
<name internal="lcldrevit_parameter_-1002563">Column Location Mark</name>
</property>
<value>
<data type="wstring">H-A</data>
</value>
</condition>
</conditions>
<locator>/</locator>
</findspec>
</selectionset>
<selectionset guid="565f5345-de06-4f5b-aa0f-1ae751c98ea8" name="Column Location H">
<findspec disjoint="0" mode="all">
<conditions>
<condition flags="10" test="contains">
<category>
<name internal="LcRevitData_Element">Element</name>
</category>
<property>
<name internal="lcldrevit_parameter_-1002563">Column Location Mark</name>
</property>
<value>
<data type="wstring">H-A</data>
</value>
</condition>
</conditions>
<locator>/</locator>
</findspec>
</selectionset>
<selectionset guid="565f5345-de06-4f5b-aa0f-1ae751c98ea8" name="Column Location H">
<findspec disjoint="0" mode="all">
<conditions>
<condition flags="10" test="contains">
<category>
<name internal="LcRevitData_Element">Element</name>
</category>
<property>
<name internal="lcldrevit_parameter_-1002563">Column Location Mark</name>
</property>
<value>
<data type="wstring">H-A</data>
</value>
</condition>
</conditions>
<locator>/</locator>
</findspec>
</selectionset>
<selectionset guid="565f5345-de06-4f5b-aa0f-1ae751c98ea8" name="Column Location H">
<findspec disjoint="0" mode="all">
<conditions>
<condition flags="10" test="contains">
<category>
<name internal="LcRevitData_Element">Element</name>
</category>
<property>
<name internal="lcldrevit_parameter_-1002563">Column Location Mark</name>
</property>
<value>
<data type="wstring">H-A</data>
</value>
</condition>
</conditions>
<locator>/</locator>
</findspec>
</selectionset>
<selectionset guid="565f5345-de06-4f5b-aa0f-1ae751c98ea8" name="Column Location H">
<findspec disjoint="0" mode="all">
<conditions>
<condition flags="10" test="contains">
<category>
<name internal="LcRevitData_Element">Element</name>
</category>
<property>
<name internal="lcldrevit_parameter_-1002563">Column Location Mark</name>
</property>
<value>
<data type="wstring">H-A</data>
</value>
</condition>
</conditions>
<locator>/</locator>
</findspec>
</selectionset>
</selectionsets>

Here's what I've been able to put together and it looks like it'll do what you're looking for. Here are the main differences: (1) This will iterate over multiple selectionset items (if you end up with more than one), (2) It creates a deepcopy of the element before modifying the values (I think you were always modifying the original "col"), (3) It appends the new selectionset to the selectionsets tag rather than the root.
Here's the deepcopy documentation
import xml.etree.ElementTree as etree
import copy
tree=etree.ElementTree()
tree.parse('test.xml')
root = tree.getroot()
inplist=["D","E","F","G","H"]
for selectionset in tree.findall('selectionsets/selectionset'):
for i in inplist:
col = copy.deepcopy(selectionset)
col.set('name', '%s %s' % (col.attrib['name'], i))
data = col.find('findspec/conditions/condition/value/data')
data.text = '%s%s' % (i, data.text[1:3])
root.find('selectionsets').append(col)
itree = etree.ElementTree(root)
itree.write('Selection Sets.xml')

Related

Python - using element tree to get data from specific nodes in xml

I have been looking around and there are a lot of similar questions, but none that solved my issue sadly.
My XML file looks like this
<?xml version="1.0" encoding="utf-8"?>
<Nodes>
<Node ComponentID="1">
<Settings>
<Value name="Text Box (1)"> SettingA </Value>
<Value name="Text Box (2)"> SettingB </Value>
<Value name="Text Box (3)"> SettingC </Value>
<Value name="Text Box (4)"> SettingD </Value>
<AdvSettings State="On"/>
</Settings>
</Node>
<Node ComponentID="2">
<Settings>
<Value name="Text Box (1)"> SettingA </Value>
<Value name="Text Box (2)"> SettingB </Value>
<Value name="Text Box (3)"> SettingC </Value>
<Value name="Text Box (4)"> SettingD </Value>
<AdvSettings State="Off"/>
</Settings>
</Node>
<Node ComponentID="3">
<Settings>
<Value name="Text Box (1)"> SettingG </Value>
<Value name="Text Box (2)"> SettingH </Value>
<Value name="Text Box (3)"> SettingI </Value>
<Value name="Text Box (4)"> SettingJ </Value>
<AdvSettings State="Yes"/>
</Settings>
</Node>
</Nodes>
With Python I'm trying to get the Values of text box 1 and text box 2 for each Node that has "AdvSettings" set on ON.
So in this case I would like a result like
ComponentID State Textbox1 Textbox2
1 On SettingA SettingB
3 On SettingG SettingH
I have done some attempts but didn't get far. With this I managed to get the AdvSettings tag, but that's as far as I got:
import xml.etree.ElementTree as ET
tree = ET.parse('XMLSearch.xml')
root = tree.getroot()
for AdvSettingsin root.iter('AdvSettings'):
print(AdvSettings.tag, AdvSettings.attrib)
You can use an XPath to find all the relevant nodes and then extract the needed data out of them. An example to this will be like below. (Comments as explanation)
from lxml import etree
xml = etree.fromstring('''
<Nodes>...
</Nodes>
''')
# Use XPath to select the relevant nodes
on_nodes = xml.xpath("//Node[Settings[AdvSettings[#State='Yes' or #State='On']]]")
# Get all needed information from every node
data_collected = [dict(
[("ComponentID", node.attrib['ComponentID'])] +
[(c.get("name"), c.text) for c in node.find("Settings").getchildren() if c.text]) for node in on_nodes]
# You got a list of dicts with all relevant information
# print it out, I used pandas for formatting. Optional
import pandas
print(pandas.DataFrame.from_records(data_collected).to_markdown(index=False))
Would give you an output like
| ComponentID | Text Box (1) | Text Box (2) | Text Box (3) | Text Box (4) |
|--------------:|:---------------|:---------------|:---------------|:---------------|
| 1 | SettingA | SettingB | SettingC | SettingD |
| 3 | SettingG | SettingH | SettingI | SettingJ |
Below (using python core xml lib)
import xml.etree.ElementTree as ET
import pandas as pd
xml = '''<?xml version="1.0" encoding="utf-8"?>
<Nodes>
<Node ComponentID="1">
<Settings>
<Value name="Text Box (1)"> SettingA </Value>
<Value name="Text Box (2)"> SettingB </Value>
<Value name="Text Box (3)"> SettingC </Value>
<Value name="Text Box (4)"> SettingD </Value>
<AdvSettings State="On"/>
</Settings>
</Node>
<Node ComponentID="2">
<Settings>
<Value name="Text Box (1)"> SettingA </Value>
<Value name="Text Box (2)"> SettingB </Value>
<Value name="Text Box (3)"> SettingC </Value>
<Value name="Text Box (4)"> SettingD </Value>
<AdvSettings State="Off"/>
</Settings>
</Node>
<Node ComponentID="3">
<Settings>
<Value name="Text Box (1)"> SettingG </Value>
<Value name="Text Box (2)"> SettingH </Value>
<Value name="Text Box (3)"> SettingI </Value>
<Value name="Text Box (4)"> SettingJ </Value>
<AdvSettings State="Yes"/>
</Settings>
</Node>
</Nodes>'''
data = []
root = ET.fromstring(xml)
nodes = root.findall('.//Node')
for node in nodes:
adv = node.find('.//AdvSettings')
if adv is None:
continue
flag = adv.attrib.get('State','Off')
if flag == 'On' or flag == 'Yes':
data.append({'id':node.attrib.get('ComponentID'),'txt_box_1':node.find('.//Value[#name="Text Box (1)"]').text.strip(),'txt_box_2':node.find('.//Value[#name="Text Box (2)"]').text.strip()})
df = pd.DataFrame(data)
print(df)
output
id txt_box_1 txt_box_2
0 1 SettingA SettingB
1 3 SettingG SettingH

Change the atribute of the xml tree in python

I have a problem with change of the atribute at the xml file.
My tree looks like that
<Objects>
<BigObj Version="2.2" Name="Something">
<ItemList>
<Item Name="s_1" Selected="false"/>
<Item Name="s_2" Selected="false"/>
<Item Name="s_3" Selected="true"/>
<Item Name="s_4" Selected="false"/>
</ItemList>
</BigObj >
</Objects>
And i need to check if "s_x"is in list of names and if it is then change the value of Selected to true, if it's not to false (or keep it false)
I've tried to do that with this code:
lslist = ["s_1","s_4"]
for child in root.findall("./Objects/BigObj/ItemList/Item"):
for idx in lslist:
if idx in child.find("Name").text:
child.set('Selected', "true")
else:
child.set('Selected', "false")
But i have an AttributeError: 'NoneType' object has no attribute 'text'
The below works
import xml.etree.ElementTree as ET
lslist = ["s_1", "s_4"]
xml = '''<Objects>
<BigObj Version="2.2" Name="Something">
<ItemList>
<Item Name="s_1" Selected="false"/>
<Item Name="s_2" Selected="false"/>
<Item Name="s_3" Selected="true"/>
<Item Name="s_4" Selected="false"/>
</ItemList>
</BigObj ></Objects>'''
root = ET.fromstring(xml)
items = root.findall('.//Item')
for item in items:
item.attrib['Selected'] = str(item.attrib['Name'] in lslist)
ET.dump(root)
output
<Objects>
<BigObj Version="2.2" Name="Something">
<ItemList>
<Item Name="s_1" Selected="True" />
<Item Name="s_2" Selected="False" />
<Item Name="s_3" Selected="False" />
<Item Name="s_4" Selected="True" />
</ItemList>
</BigObj></Objects>

How to match a string to mapping xml tag using Python?

I would like to map a value from an XML file.
<Country>
<number no="2008" info="update">
<detail name="man1" class="A1\X4">
<string name="ruth" />
<string name="amy" />
</detail>
<detail name="man2" class="A2">
<string name="lisa" />
<string name="graham" />
</detail>
</number>
</Country>
I need to get the value of the number in here <number no="2008" by mapping with this value class="A1\X4"
T tried this way:
stringno = 'A1'
for family in ReadXML.findall('number/detail[#class="{}"]/..'.format(stringno)):
name = family.get('no')
print(name)
it only works if the stringno="A1\X4". But I need to mapping it if the stringno = 'A1'. Is there any matching function in python to solve this problem? maybe -like or -contain?
Thank you for the information.
Hi what about using an iterative method.
Full code
import xml.etree.ElementTree as ET
tree = ET.parse('myXml.xml')
root = tree.getroot()
stringno = 'A1'
for family in root.findall('number'):
for elem in family:
if stringno in elem.get('class'):
print('no: {}, name: {}, class: {}'.format(family.get('no'), elem.get('name'), elem.get('class')))
Input
myXml.xml
<Country>
<number no="2008" info="update">
<detail name="man1" class="A1\X4">
<string name="ruth" />
<string name="amy" />
</detail>
<detail name="man2" class="A2">
<string name="lisa" />
<string name="graham" />
</detail>
</number>
<number no="2009" info="update">
<detail name="man1" class="A1\X5">
<string name="ruth" />
<string name="amy" />
</detail>
<detail name="man2" class="A3">
<string name="lisa" />
<string name="graham" />
</detail>
</number>
</Country>
Output
no: 2008, name: man1, class: A1\X4
no: 2009, name: man1, class: A1\X5

etree data extraction from xml with odd tree structure

here is a piece of the xml data before i go any further
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE xmeml>
<xmeml version="5">
<sequence id="episode1">
<media>
<video>
<track>
<generatoritem id="Gen Subtitle1">
<effect>
<name>Gen Subtitle</name>
<effectid>Gen Subtitle</effectid>
<effectcategory>Text</effectcategory>
<effecttype>generator</effecttype>
<mediatype>video</mediatype>
<parameter>
<parameterid>part1</parameterid>
<name>Text Settings</name>
<value/>
</parameter>
<parameter>
<parameterid>str</parameterid>
<name>Text</name>
<value>You're a coward for picking on people
who are weaker than you.</value>
</parameter>
<parameter>
<parameterid>font</parameterid>
<name>Font</name>
<value>Arial</value>
</parameter>
</effect>
</media>
</sequence>
</xmeml>
now as you can see the tree starts with <effect> and inside there are multiple <parameters> but im only ater the <value> from <parameters> that also contain
<parameterid>str</parameterid>
<name>Text</name>
so i can get an output of "That child is so cute.
And he's smart."
Here is my code
lst = tree.findall('xmeml/sequence/media/video/track/generatoritem/effect/parameter/value')
counts = tree.findall('.//value')
for each in counts:
print(each.text)
And this is what i get
And he's smart.
Arial
See below
import xml.etree.ElementTree as ET
xml = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE xmeml>
<xmeml version="5">
<sequence id="episode1">
<effect>
<name>Gen Subtitle</name>
<effectid>Gen Subtitle</effectid>
<effectcategory>Text</effectcategory>
<effecttype>generator</effecttype>
<mediatype>video</mediatype>
<parameter>
<parameterid>part1</parameterid>
<name>Text Settings</name>
<value/>
</parameter>
<parameter>
<parameterid>str</parameterid>
<name>Text</name>
<value>That child is so cute. And he's smart</value>
</parameter>
<parameter>
<parameterid>font</parameterid>
<name>Font</name>
<value>Arial</value>
</parameter>
</effect>
</sequence>
</xmeml>'''
root = ET.fromstring(xml)
str_params = root.findall('.//parameter/[parameterid="str"]')
for param in str_params:
if param.find('./name').text == 'Text':
print('The text: {}'.format(param.find('./value').text))
break
output
The text: That child is so cute. And he's smart

Parse XML with Python with title and value on different lines

I have the following XML document that i would like to write to a csv file.
<items>
<item>
<attribute type="set" identifier="naadloos">
<name locale="nl_NL">Naadloos</name>
<value locale="nl_NL" identifier="nee">Nee</value>
</attribute>
<attribute type="asset" identifier="short_description">
<value locale="nl_NL">Tom beugel bh</value>
</attribute>
<attribute type="text" identifier="name">
<name locale="nl_NL">Naam</name>
<value>Marie Jo L'Aventure Tom beugel bh</value>
</attribute>
<attribute type="int" identifier="is_backorder">
<name locale="nl_NL">Backorder</name>
<value>2</value>
</attribute>
</item>
</items>
how can i retrieve the data from this format? I need the following output
naadloos, short_description, name, is_Backorder
Nee, Tom beugel bh, Marie Jo L'Adventure Tom beugel bh, 2
so i need the identifier from the attribute line, and the text from the value line.
Any ideas?
Much appreciated
This is my try it gets elements by attribute and writes them into a specified file by dictwriter!
import lxml.etree as et
import csv
#headers={}
xml= """<items>
<item>
<attribute type="set" identifier="naadloos">
<name locale="nl_NL">Naadloos</name>
<value locale="nl_NL" identifier="nee">Nee</value>
</attribute>
<attribute type="asset" identifier="short_description">
<value locale="nl_NL">Tom beugel bh</value>
</attribute>
<attribute type="text" identifier="name">
<name locale="nl_NL">Naam</name>
<value>Marie Jo L'Aventure Tom beugel bh</value>
</attribute>
<attribute type="int" identifier="is_backorder">
<name locale="nl_NL">Backorder</name>
<value>2</value>
</attribute>
</item>
</items>
"""
tree = et.fromstring(xml)
header = []
for i in tree.xpath("//attribute/#identifier"):
header.append(i)
def dicter(x):
exp = r"//attribute[#identifier='%s']/value/text()"%x
tmp = ''.join(tree.xpath(exp))
d = [x,tmp]
return d
data = dict(dicter(i) for i in header)
#Now write data into file
with open(r"C:\Users\User_Name\Desktop\output.txt",'wb') as wrt:
writer = csv.DictWriter(wrt,header)
writer.writeheader()
writer.writerow(data)
Written file content-
naadloos,short_description,name,is_backorder
Nee,Tom beugel bh,Marie Jo L'Aventure Tom beugel bh,2

Categories

Resources