Modify XML file using ElementTree - python

I am trying to do the folowing with Python:
get "price" value and change it
find "price_qty" and insert new line with new tier and different price based on the "price".
so far I could only find the price and change it and insert line in about correct place but I can't find a way how to get there "item" and "qty" and "price" attributes, nothing has worked so far...
this is my original xml:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<body start="20.04.2014 10:02:60">
<name>LEO - red pen</name>
<item qty="150" price="28.20" />
<item qty="750" price="26.80" />
<item qty="1500" price="25.60" />
the new xml should look this way:
<name>LEO - red pen</name>
<item qty="10" price="31.20" /> **-this is the new line**
<item qty="150" price="28.20" />
<item qty="750" price="26.80" />
<item qty="1500" price="25.60" />
my code so far:
import xml.etree.cElementTree as ET
from xml.etree.ElementTree import Element, SubElement
tree = ET.ElementTree(file='pricelist.xml')
root = tree.getroot()
# price - raise the main price and insert new tier
for elem in tree.iterfind('pricelist/item/price'):
price = elem.text
newprice = (float(price.replace(",", ".")))*1.2
newtier = "NEW TIER"
SubElement(root[0][pos][5], newtier)
tree.write('pricelist.xml', "UTF-8")
<item price="28.20" qty="150" />
<item price="26.80" qty="750" />
<item price="25.60" qty="1500" />
<NEW TIER /></price_qty>
thank you for any help.

Don't use fixed indexing. You already have the item element, so why don't use it?
tree = ET.ElementTree(file='pricelist.xml')
root = tree.getroot()
for elem in tree.iterfind('pricelist/item'):
price = elem.findtext('price')
newprice = float(price.replace(",", ".")) * 1.2
newtier = ET.Element("item", qty="10", price="%.2f" % newprice)
elem.find('price_qty').insert(0, newtier)
tree.write('pricelist.xml', "UTF-8")


Get items from xml Python

I have an xml in python, need to obtain the elements of the "Items" tag in an iterable list.
I need get a iterable list from this XML, for example like it:
Item 1: Bicycle, value $250, iva_tax: 50.30
Item 2: Skateboard, value $120, iva_tax: 25.0
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<info>Listado de items</info>
<![CDATA[<?xml version="1.0" encoding="UTF-8"?>
<tienda id="tiendaProd" version="1.1.0">
<tax name="iva" value="50.30"></tax>
<tax name="iva" value="25.0"></tax>
<tax name="iva" value="120.50"></tax>
I am working with
import xml.etree.ElementTree as ET
for example
import xml.etree.ElementTree as ET
xml = ET.fromstring(stringBase64)
ite = xml.find('.//detalle').text
tixml = ET.fromstring(ite)
You can use BeautifulSoup4 (BS4) to do this.
from bs4 import BeautifulSoup
#Read XML file
with open("example.xml", "r") as f:
contents = f.readlines()
#Create Soup object
soup = BeautifulSoup(contents, 'xml')
#find all the item tags
item_tags = soup.find_all("item") #returns everything in the <item> tags
#find the nombre and valor tags within each item
results = {}
for item in item_tags:
num = item.find("nombre").text
val = item.find("valor").text
results[str(num)] = val
#Prints dictionary with key value pairs from the xml

Python - Construct DF From Nested XML Response

What would be the best way to construct a DF from the below nested XML data?
Each "properties" element has three "property" elements nested containing the "name" and "value" of our data. I tried doing two for loops, pandas read_xml option, and a few other pieces but haven't quite gotten the nested logic figured out. My current approach below is closer, but does not keep the names and values together.
Using Python 3.7+ in Jupyter on windows
Sample XML Data:
<?xml version="1.0" encoding="utf-8"?>
<wsu:Timestamp wsu:Id="Timestamp-45333">
<Results xsi:type="DataExtensionObject">
<PartnerKey xsi:nil="true" />
<ObjectID xsi:nil="true" />
<Results xsi:type="DataExtensionObject">
<PartnerKey xsi:nil="true" />
<ObjectID xsi:nil="true" />
<Results xsi:type="DataExtensionObject">
<PartnerKey xsi:nil="true" />
<ObjectID xsi:nil="true" />
What I've Attempted So Far:
data_output = []
for el in soup_de.find_all('Property'):
dict_ = {el.find('Name').text:el.find('Value').text}
# print(data_output)
testing_de_df = pd.DataFrame(data_output)
Desired Output:
details = {'FIELD_NAME': ['asdfdfd12', 'fasd123', '0034P00'],
'FIELD_NAME_2': ['asdf', 'asdfd', 'fasdfs'],
'FIELD_NAME_3': ['fasdsa', 'a0A4f', 'a0fasd']}
desired_output = pd.DataFrame(details)
Since <Property> sits at a shallow part of the XML, simply call pandas.read_xml narrowing in on that set of nodes while acknowledging the default namespace (
property_df = pd.read_xml(
xpath = ".//rrm:Property",
namespaces = {"rrm": ""}
# Name Value
# 0 FIELD_NAME asdfdfd12
# 1 FIELD_NAME_2 asdf
# 2 FIELD_NAME_3 fasdsa
# 3 FIELD_NAME fasd123
# 4 FIELD_NAME_2 asdfd
# 5 FIELD_NAME_3 a0A4f
# 6 FIELD_NAME 0034P00
# 7 FIELD_NAME_2 fasdfs
# 8 FIELD_NAME_3 a0fasd
To delineate by property, consider creating a property group number with groupby().cumcount() and reshaping data wide with pivot_table:
property_wide_df = (
.assign(property_no = lambda x: x.groupby("Name").cumcount().add(1))
.pivot_table(index="property_no", columns="Name", values="Value", aggfunc="sum")
# property_no
# 1 asdfdfd12 asdf fasdsa
# 2 fasd123 asdfd a0A4f
# 3 0034P00 fasdfs a0fasd

Read xml file to dataframe in python

I got a xml file need to read in python as dataframe, it is a part of the xml code:
<?xml version="1.0" encoding="UTF-8"?>
<data id="root_661191">
<index id="data_162062">
<item id="index_829361_1">173915</item>
<item id="index_829361_2">14712</item>
<item id="index_829361_3">321255</item>
<property_id id="data_809625">
<item id="property_id_844926_1">88942.0</item>
<item id="property_id_844926_2">88162.0</item>
<item id="property_id_844926_3">80553.0</item>
<addr_street id="data_409265">
<item id="addr_street_959977_1">58 Middleton Street</item>
<item id="addr_street_959977_2">24 Royena Road</item>
<item id="addr_street_959977_3">9 Cafardi Boulevard</item>
<price id="data_784942">
<item id="price_225606_1">7480000.0</item>
<item id="price_225606_2">7728000.0</item>
<item id="price_225606_3">7659000.0</item>
I try some easier sample data to test my read function, they work. But when I use my function to do this xml file it only produce None in output. I think it might be col names, but I don't know how to fix it, could anyone help me?
The function I used is:
import pandas as pd
import xml.etree.ElementTree as et
def parse_xml(xml_file, df_cols):
xtree = et.parse(xml_file)
xroot = xtree.getroot()
rows = []
for node in xroot:
res = []
for el in df_cols[1:]:
if node is not None and node.find(el) is not None:
rows.append({df_cols[i]: res[i]
for i, _ in enumerate(df_cols)})
out_df = pd.DataFrame(rows, columns=df_cols)
return out_df
df_cols = ['index','property_id','addr_street','price']
I think this is what you want. You should be able to put this in a function if you need
tree = et.parse('myxmlfile.xml')
root = tree.getroot()
df_cols = ['index','property_id','addr_street','price']
mlist = []
for col in df_cols:
for d in root.findall('data'):
# print(d.attrib)
for c in d.findall(col):
# print(c)
# print(c.attrib)
# print(c.attrib.get('id'))
lst = []
for itm in c.findall('item'):
# print(itm.text)
# print({col:lst})
pd.concat(mlist, axis=1)
index property_id addr_street price
0 173915 88942.0 58 Middleton Street 7480000.0
1 14712 88162.0 24 Royena Road 7728000.0
2 321255 80553.0 9 Cafardi Boulevard 7659000.0

Python lxml does not support xpath syntax 'starts-with'?

str = """<ROOT>
xml = etree.fromstring(str)
xpath_str = ".//ITEM[starts-with(REVENUE_YEAR,'2554')]"
result = xml.find(xpath_str)
Hi, the code above raised SyntaxError: invalid predicate, does it mean lxml do not support starts-with? Any other way to locate the REVENUE_YEAR element(2554-02) by xpath with lxml? Thanks!
It supports xpath but you need to use xpath:
str = """<ROOT>
xml = etree.fromstring(str)
xpath_str = ".//ITEM[starts-with(REVENUE_YEAR,'2554')]"
result = xml.xpath(xpath_str)
print(result) # which is a list containing only one element

Large XML parsing in Python

I am a novice in python and have the following task on hand.
I have a large xml file like the one below:
<Component Name='ABC'>
<Group Name='DEF'>
<Parameter Name='GHI'>
Some Text
<Item Value='5'/>
<Parameter Name='JKL'>
Some Text
<Item Value='5'/>
<Group Name='MNO'>
<Parameter Name='PQR'>
Some Text
<Item Value='5'/>
<Parameter Name='TUV'>
Some Text
<Item Value='5'/>
In this xml file I have to parse through the component "ABC" go to group "MNO" and then to the parameter "TUV" and under this I have to change the item value to 10.
I have tried using xml.etree.cElementTree but to no use. And lxml dosent support on the server as its running a very old version of python. And I have no permissions to upgrade the version
I have been using the following code to parse and edit a relatively small xml:
def fnXMLModification(ArgStr):
argList = ArgStr.split()
strXMLPath = argList[0]
if not os.path.exists(strXMLPath):
fnlogs("XML File: " + strXMLPath + " does not exist.\n")
return False
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
f=open(strXMLPath, 'rt')
tree = ET.parse(f)
ValueSetFlag = False
AttrSetFlag = False
for strXPath in argList[1:]:
strXPathList = strXPath.split("[")
sxPath = strXPathList[0]
if len(strXPathList)==3:
# both present
AttrSetFlag = True
ValueSetFlag = True
valToBeSet = strXPathList[1].strip("]")
sAttr = strXPathList[2].strip("]")
attrList = sAttr.split(",")
elif len(strXPathList) == 2:
#anyone present
if "=" in strXPathList[1]:
AttrSetFlag = True
sAttr = strXPathList[1].strip("]")
attrList = sAttr.split(",")
ValueSetFlag = True
valToBeSet = strXPathList[1].strip("]")
node = tree.find(sxPath)
if AttrSetFlag:
for att in attrList:
slist = att.split("=")
if ValueSetFlag:
node.text = valToBeSet
fnlogs("XML File: " + strXMLPath + " has been modified successfully.\n")
return True
Using this function I am not able to traverse the current xml as it has lot of children attributes or sub groups.
import statement
import xml.etree.cElementTree as ET
Parse content by fromstring method.
root = ET.fromstring(data)
Iterate according our requirement and get target Item tag and change value of Value attribute
for component_tag in root.iter("Component"):
if "Name" in component_tag.attrib and component_tag.attrib['Name']=='ABC':
for group_tag in component_tag.iter("Group"):
if "Name" in group_tag.attrib and group_tag.attrib['Name']=='MNO':
#for value_tag in group_tag.iter("Value"):
for item_tag in group_tag.findall("Parameter[#Name='TUV']/Value/Item"):
item_tag.attrib["Value"] = "10"
We can use Xpath to get target Item tag
for item_tag in root.findall("Parameters/Component[#Name='ABC']/Group[#Name='MNO']/Parameter[#Name='TUV']/Value/Item"):
item_tag.attrib["Value"] = "10"
Use tostring method to get content.
data = ET.tostring(root)

