Modify XML file using ElementTree - python

I am trying to do the folowing with Python:
get "price" value and change it
find "price_qty" and insert new line with new tier and different price based on the "price".
so far I could only find the price and change it and insert line in about correct place but I can't find a way how to get there "item" and "qty" and "price" attributes, nothing has worked so far...
this is my original xml:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<body start="20.04.2014 10:02:60">
<pricelist>
<item>
<name>LEO - red pen</name>
<price>31,4</price>
<price_snc>0</price_snc>
<price_ao>0</price_ao>
<price_qty>
<item qty="150" price="28.20" />
<item qty="750" price="26.80" />
<item qty="1500" price="25.60" />
</price_qty>
<stock>50</stock>
</item>
</pricelist>
the new xml should look this way:
<pricelist>
<item>
<name>LEO - red pen</name>
<price>31,4</price>
<price_snc>0</price_snc>
<price_ao>0</price_ao>
<price_qty>
<item qty="10" price="31.20" /> **-this is the new line**
<item qty="150" price="28.20" />
<item qty="750" price="26.80" />
<item qty="1500" price="25.60" />
</price_qty>
<stock>50</stock>
</item>
</pricelist>
my code so far:
import xml.etree.cElementTree as ET
from xml.etree.ElementTree import Element, SubElement
tree = ET.ElementTree(file='pricelist.xml')
root = tree.getroot()
pos=0
# price - raise the main price and insert new tier
for elem in tree.iterfind('pricelist/item/price'):
price = elem.text
newprice = (float(price.replace(",", ".")))*1.2
newtier = "NEW TIER"
SubElement(root[0][pos][5], newtier)
pos+=1
tree.write('pricelist.xml', "UTF-8")
result:
...
<price_qty>
<item price="28.20" qty="150" />
<item price="26.80" qty="750" />
<item price="25.60" qty="1500" />
<NEW TIER /></price_qty>
thank you for any help.

Don't use fixed indexing. You already have the item element, so why don't use it?
tree = ET.ElementTree(file='pricelist.xml')
root = tree.getroot()
for elem in tree.iterfind('pricelist/item'):
price = elem.findtext('price')
newprice = float(price.replace(",", ".")) * 1.2
newtier = ET.Element("item", qty="10", price="%.2f" % newprice)
elem.find('price_qty').insert(0, newtier)
tree.write('pricelist.xml', "UTF-8")

Related

Get items from xml Python

I have an xml in python, need to obtain the elements of the "Items" tag in an iterable list.
I need get a iterable list from this XML, for example like it:
Item 1: Bicycle, value $250, iva_tax: 50.30
Item 2: Skateboard, value $120, iva_tax: 25.0
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<data>
<info>Listado de items</info>
<detalle>
<![CDATA[<?xml version="1.0" encoding="UTF-8"?>
<tienda id="tiendaProd" version="1.1.0">
<items>
<item>
<nombre>Bicycle</nombre>
<valor>250</valor>
<data>
<tax name="iva" value="50.30"></tax>
</data>
</item>
<item>
<nombre>Skateboard</nombre>
<valor>120</valor>
<data>
<tax name="iva" value="25.0"></tax>
</data>
</item>
<item>
<nombre>Motorcycle</nombre>
<valor>900</valor>
<data>
<tax name="iva" value="120.50"></tax>
</data>
</item>
</items>
</tienda>]]>
</detalle>
</data>
I am working with
import xml.etree.ElementTree as ET
for example
import xml.etree.ElementTree as ET
xml = ET.fromstring(stringBase64)
ite = xml.find('.//detalle').text
tixml = ET.fromstring(ite)
You can use BeautifulSoup4 (BS4) to do this.
from bs4 import BeautifulSoup
#Read XML file
with open("example.xml", "r") as f:
contents = f.readlines()
#Create Soup object
soup = BeautifulSoup(contents, 'xml')
#find all the item tags
item_tags = soup.find_all("item") #returns everything in the <item> tags
#find the nombre and valor tags within each item
results = {}
for item in item_tags:
num = item.find("nombre").text
val = item.find("valor").text
results[str(num)] = val
#Prints dictionary with key value pairs from the xml
print(results)

Python - Construct DF From Nested XML Response

What would be the best way to construct a DF from the below nested XML data?
Each "properties" element has three "property" elements nested containing the "name" and "value" of our data. I tried doing two for loops, pandas read_xml option, and a few other pieces but haven't quite gotten the nested logic figured out. My current approach below is closer, but does not keep the names and values together.
Using Python 3.7+ in Jupyter on windows
Sample XML Data:
<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope
xmlns:soap="http://www.w3.org/2003/05/soap-envelope"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:wsa="http://schemas.xmlsoap.org/ws/2004/08/addressing"
xmlns:wsse="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-1.0.xsd"
xmlns:wsu="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd">
<env:Header
xmlns:env="http://www.w3.org/2003/05/soap-envelope">
<wsa:Action>RetrieveResponse</wsa:Action>
<wsa:MessageID>urn:uuid:1234</wsa:MessageID>
<wsa:RelatesTo>urn:uuid:1234</wsa:RelatesTo>
<wsa:To>http://schemas.xmlsoap.org/ws/2004/08/addressing/role/anonymous</wsa:To>
<wsse:Security>
<wsu:Timestamp wsu:Id="Timestamp-45333">
<wsu:Created>2022-11-07T17:02:44Z</wsu:Created>
<wsu:Expires>2022-11-07T17:07:44Z</wsu:Expires>
</wsu:Timestamp>
</wsse:Security>
</env:Header>
<soap:Body>
<RetrieveResponseMsg
xmlns="http://exacttarget.com/wsdl/partnerAPI">
<OverallStatus>MoreDataAvailable</OverallStatus>
<RequestID>asdfds455</RequestID>
<Results xsi:type="DataExtensionObject">
<PartnerKey xsi:nil="true" />
<ObjectID xsi:nil="true" />
<Type>DataExtensionObject</Type>
<Properties>
<Property>
<Name>FIELD_NAME</Name>
<Value>asdfdfd12</Value>
</Property>
<Property>
<Name>FIELD_NAME_2</Name>
<Value>asdf</Value>
</Property>
<Property>
<Name>FIELD_NAME_3</Name>
<Value>fasdsa</Value>
</Property>
</Properties>
</Results>
<Results xsi:type="DataExtensionObject">
<PartnerKey xsi:nil="true" />
<ObjectID xsi:nil="true" />
<Type>DataExtensionObject</Type>
<Properties>
<Property>
<Name>FIELD_NAME</Name>
<Value>fasd123</Value>
</Property>
<Property>
<Name>FIELD_NAME_2</Name>
<Value>asdfd</Value>
</Property>
<Property>
<Name>FIELD_NAME_3</Name>
<Value>a0A4f</Value>
</Property>
</Properties>
</Results>
<Results xsi:type="DataExtensionObject">
<PartnerKey xsi:nil="true" />
<ObjectID xsi:nil="true" />
<Type>DataExtensionObject</Type>
<Properties>
<Property>
<Name>FIELD_NAME</Name>
<Value>0034P00</Value>
</Property>
<Property>
<Name>FIELD_NAME_2</Name>
<Value>fasdfs</Value>
</Property>
<Property>
<Name>FIELD_NAME_3</Name>
<Value>a0fasd</Value>
</Property>
</Properties>
</Results>
</RetrieveResponseMsg>
</soap:Body>
</soap:Envelope>
What I've Attempted So Far:
data_output = []
for el in soup_de.find_all('Property'):
dict_ = {el.find('Name').text:el.find('Value').text}
data_output.append(dict_)
print(len(data_output))
# print(data_output)
testing_de_df = pd.DataFrame(data_output)
display(testing_de_df.info())
display(testing_de_df.head(25))
Desired Output:
details = {'FIELD_NAME': ['asdfdfd12', 'fasd123', '0034P00'],
'FIELD_NAME_2': ['asdf', 'asdfd', 'fasdfs'],
'FIELD_NAME_3': ['fasdsa', 'a0A4f', 'a0fasd']}
desired_output = pd.DataFrame(details)
print(desired_output)
Since <Property> sits at a shallow part of the XML, simply call pandas.read_xml narrowing in on that set of nodes while acknowledging the default namespace (http://exacttarget.com/wsdl/partnerAPI):
property_df = pd.read_xml(
"Input.xml",
xpath = ".//rrm:Property",
namespaces = {"rrm": "http://exacttarget.com/wsdl/partnerAPI"}
)
print(property_df)
# Name Value
# 0 FIELD_NAME asdfdfd12
# 1 FIELD_NAME_2 asdf
# 2 FIELD_NAME_3 fasdsa
# 3 FIELD_NAME fasd123
# 4 FIELD_NAME_2 asdfd
# 5 FIELD_NAME_3 a0A4f
# 6 FIELD_NAME 0034P00
# 7 FIELD_NAME_2 fasdfs
# 8 FIELD_NAME_3 a0fasd
To delineate by property, consider creating a property group number with groupby().cumcount() and reshaping data wide with pivot_table:
property_wide_df = (
property_df
.assign(property_no = lambda x: x.groupby("Name").cumcount().add(1))
.pivot_table(index="property_no", columns="Name", values="Value", aggfunc="sum")
)
print(property_wide_df)
# Name FIELD_NAME FIELD_NAME_2 FIELD_NAME_3
# property_no
# 1 asdfdfd12 asdf fasdsa
# 2 fasd123 asdfd a0A4f
# 3 0034P00 fasdfs a0fasd

Read xml file to dataframe in python

I got a xml file need to read in python as dataframe, it is a part of the xml code:
<?xml version="1.0" encoding="UTF-8"?>
<root>
<data id="root_661191">
<index id="data_162062">
<item id="index_829361_1">173915</item>
<item id="index_829361_2">14712</item>
<item id="index_829361_3">321255</item>
</index>
<property_id id="data_809625">
<item id="property_id_844926_1">88942.0</item>
<item id="property_id_844926_2">88162.0</item>
<item id="property_id_844926_3">80553.0</item>
</property_id>
<addr_street id="data_409265">
<item id="addr_street_959977_1">58 Middleton Street</item>
<item id="addr_street_959977_2">24 Royena Road</item>
<item id="addr_street_959977_3">9 Cafardi Boulevard</item>
</addr_street>
<price id="data_784942">
<item id="price_225606_1">7480000.0</item>
<item id="price_225606_2">7728000.0</item>
<item id="price_225606_3">7659000.0</item>
</price>
</data>
</root>
I try some easier sample data to test my read function, they work. But when I use my function to do this xml file it only produce None in output. I think it might be col names, but I don't know how to fix it, could anyone help me?
The function I used is:
import pandas as pd
import xml.etree.ElementTree as et
def parse_xml(xml_file, df_cols):
xtree = et.parse(xml_file)
xroot = xtree.getroot()
rows = []
for node in xroot:
res = []
res.append(node.attrib.get(df_cols[0]))
for el in df_cols[1:]:
if node is not None and node.find(el) is not None:
res.append(node.find(el).text)
else:
res.append(None)
rows.append({df_cols[i]: res[i]
for i, _ in enumerate(df_cols)})
out_df = pd.DataFrame(rows, columns=df_cols)
return out_df
df_cols = ['index','property_id','addr_street','price']
parse_xml['myxmlfile.xml',df_cols]
I think this is what you want. You should be able to put this in a function if you need
tree = et.parse('myxmlfile.xml')
root = tree.getroot()
df_cols = ['index','property_id','addr_street','price']
mlist = []
for col in df_cols:
for d in root.findall('data'):
# print(d.attrib)
for c in d.findall(col):
# print(c)
# print(c.attrib)
# print(c.attrib.get('id'))
lst = []
for itm in c.findall('item'):
# print(itm.text)
lst.append(itm.text)
# print({col:lst})
mlist.append(pd.DataFrame({col:lst}))
mlist
pd.concat(mlist, axis=1)
Output:
index property_id addr_street price
0 173915 88942.0 58 Middleton Street 7480000.0
1 14712 88162.0 24 Royena Road 7728000.0
2 321255 80553.0 9 Cafardi Boulevard 7659000.0

Python lxml does not support xpath syntax 'starts-with'?

str = """<ROOT>
<ITEM>
<REVENUE_YEAR>2554-02</REVENUE_YEAR>
<REGION>Central</REGION>
</ITEM>
<ITEM>
<REVENUE_YEAR>2552-02</REVENUE_YEAR>
<REGION>Central</REGION>
</ITEM>
<ITEM>
<REVENUE_YEAR>2552-03</REVENUE_YEAR>
<REGION>Central</REGION>
</ITEM>
</ROOT>"""
xml = etree.fromstring(str)
xpath_str = ".//ITEM[starts-with(REVENUE_YEAR,'2554')]"
result = xml.find(xpath_str)
print(result)
Hi, the code above raised SyntaxError: invalid predicate, does it mean lxml do not support starts-with? Any other way to locate the REVENUE_YEAR element(2554-02) by xpath with lxml? Thanks!
It supports xpath but you need to use xpath:
str = """<ROOT>
<ITEM>
<REVENUE_YEAR>2554-02</REVENUE_YEAR>
<REGION>Central</REGION>
</ITEM>
<ITEM>
<REVENUE_YEAR>2552-02</REVENUE_YEAR>
<REGION>Central</REGION>
</ITEM>
<ITEM>
<REVENUE_YEAR>2552-03</REVENUE_YEAR>
<REGION>Central</REGION>
</ITEM>
</ROOT>"""
xml = etree.fromstring(str)
xpath_str = ".//ITEM[starts-with(REVENUE_YEAR,'2554')]"
result = xml.xpath(xpath_str)
print(result) # which is a list containing only one element

Large XML parsing in Python

I am a novice in python and have the following task on hand.
I have a large xml file like the one below:
<Configuration>
<Parameters>
<Component Name='ABC'>
<Group Name='DEF'>
<Parameter Name='GHI'>
<Description>
Some Text
</Description>
<Type>Integer</Type>
<Restriction>
<Level>5</Level>
</Restriction>
<Value>
<Item Value='5'/>
</Value>
</Parameter>
<Parameter Name='JKL'>
<Description>
Some Text
</Description>
<Type>Integer</Type>
<Restriction>
<Level>5</Level>
</Restriction>
<Value>
<Item Value='5'/>
</Value>
</Parameter>
</Group>
<Group Name='MNO'>
<Parameter Name='PQR'>
<Description>
Some Text
</Description>
<Type>Integer</Type>
<Restriction>
<Level>5</Level>
</Restriction>
<Value>
<Item Value='5'/>
</Value>
</Parameter>
<Parameter Name='TUV'>
<Description>
Some Text
</Description>
<Type>Integer</Type>
<Restriction>
<Level>5</Level>
</Restriction>
<Value>
<Item Value='5'/>
</Value>
</Parameter>
</Group>
</Component>
</Parameters>
</Configuration>
In this xml file I have to parse through the component "ABC" go to group "MNO" and then to the parameter "TUV" and under this I have to change the item value to 10.
I have tried using xml.etree.cElementTree but to no use. And lxml dosent support on the server as its running a very old version of python. And I have no permissions to upgrade the version
I have been using the following code to parse and edit a relatively small xml:
def fnXMLModification(ArgStr):
argList = ArgStr.split()
strXMLPath = argList[0]
if not os.path.exists(strXMLPath):
fnlogs("XML File: " + strXMLPath + " does not exist.\n")
return False
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
f=open(strXMLPath, 'rt')
tree = ET.parse(f)
ValueSetFlag = False
AttrSetFlag = False
for strXPath in argList[1:]:
strXPathList = strXPath.split("[")
sxPath = strXPathList[0]
if len(strXPathList)==3:
# both present
AttrSetFlag = True
ValueSetFlag = True
valToBeSet = strXPathList[1].strip("]")
sAttr = strXPathList[2].strip("]")
attrList = sAttr.split(",")
elif len(strXPathList) == 2:
#anyone present
if "=" in strXPathList[1]:
AttrSetFlag = True
sAttr = strXPathList[1].strip("]")
attrList = sAttr.split(",")
else:
ValueSetFlag = True
valToBeSet = strXPathList[1].strip("]")
node = tree.find(sxPath)
if AttrSetFlag:
for att in attrList:
slist = att.split("=")
node.set(slist[0].strip(),slist[1].strip())
if ValueSetFlag:
node.text = valToBeSet
tree.write(strXMLPath)
fnlogs("XML File: " + strXMLPath + " has been modified successfully.\n")
return True
Using this function I am not able to traverse the current xml as it has lot of children attributes or sub groups.
import statement
import xml.etree.cElementTree as ET
Parse content by fromstring method.
root = ET.fromstring(data)
Iterate according our requirement and get target Item tag and change value of Value attribute
for component_tag in root.iter("Component"):
if "Name" in component_tag.attrib and component_tag.attrib['Name']=='ABC':
for group_tag in component_tag.iter("Group"):
if "Name" in group_tag.attrib and group_tag.attrib['Name']=='MNO':
#for value_tag in group_tag.iter("Value"):
for item_tag in group_tag.findall("Parameter[#Name='TUV']/Value/Item"):
item_tag.attrib["Value"] = "10"
We can use Xpath to get target Item tag
for item_tag in root.findall("Parameters/Component[#Name='ABC']/Group[#Name='MNO']/Parameter[#Name='TUV']/Value/Item"):
item_tag.attrib["Value"] = "10"
Use tostring method to get content.
data = ET.tostring(root)

Categories

Resources