Creating XML file in python by iterating over a set of values

Creating XML file in python by iterating over a set of values - python

I have to create an xml file with root name 'structure' and it should consist of various subElements "packets", each of which contains a tree structure of elements. A single packet should look like this:
<L2>
<srcmac>value1</srcmac>
<dstmac>value2</dstmac>
</L2>
<L3>
<dscp>
<timetolive>value3</timetolive>
</dscp>
</L3>
<L4>
<protocol>value4</protocol>
</L4>
Th packet should be iterated 'n' number of times. It gives some error saying: unbound method write() must be called with ElementTree instance (got str instance instead)
This is the code I have written:
import xml.etree.cElementTree as et
import pcapy
import sys
from struct import*
def main():
count=1
root=et.Element("structure")
#creating xml layout
eachpacket=et.SubElement(root,"packet")
while count<n:
child1=et.SubElement(eachpacket,"L2")
subchild1=et.SubElement(child1,"smac")
subchild2=et.SubElement(child1,"dmac")
child2=et.SubElement(eachpacket,"L3")
sub1=et.SubElement(child2,"sip")
sub2=et.SubElement(child2,"dip")
sub3=et.SubElement(child2,"dscp")
s2=et.SubElement(sub3,"timetolive")
child3=et.SubElement(eachpacket,"L4")
schild1=et.SubElement(child3,"protocol")
try:
count=count+1
subchild1.text=str(value1)
subchild2.text=str(value2)
s2.text=str(value3)
schild1.text=str(protocol)
except:
break
else:
tree=et.ElementTree(root)
tree.write("FileNew.xml")
pass
if __name__== "__main__":
main()
Although I got an XML file as output, but the last entry seems to overwrite all the previous entries, and only the last iteration of 'packet' is stored in my generated xml file.
What do I need to modify to get a complete tree a iterations and to rectify the write() error?

I did some fixes, including style improvements, at your code and replaced your values like protocol to placeholders, since I have neither your libs, nor logic. As far I tested, it solves both issues.
from xml.etree import cElementTree as et
n = 5
value1 = 'value1'
value2 = 'value2'
value3 = 'value3'
protocol = 'protocol'
def main():
root = et.Element("structure")
# creating xml layout
eachpacket = et.SubElement(root, "packet")
for count in range(n):
child1 = et.SubElement(eachpacket, "L2")
subchild1 = et.SubElement(child1, "smac")
subchild2 = et.SubElement(child1, "dmac")
child2 = et.SubElement(eachpacket, "L3")
sub1 = et.SubElement(child2, "sip")
sub2 = et.SubElement(child2, "dip")
sub3 = et.SubElement(child2, "dscp")
s2 = et.SubElement(sub3, "timetolive")
child3 = et.SubElement(eachpacket, "L4")
schild1 = et.SubElement(child3, "protocol")
subchild1.text = str(value1)
subchild2.text = str(value2)
s2.text = str(value3)
schild1.text = str(protocol)
et.ElementTree.write(et.ElementTree(root), "FileNew.xml")
if __name__ == "__main__":
main()

Related

how to check if an attribute <Reporting_date> YYYYMMDD </Reporting_Date> in a .xml file is equal to a fixed Date value

I am new to python and wondering how to solve below use-case using python script in the shell script.
I have a shell script which holds variables as file name, ODATE value which is fixed in the format YYYYMMDD. It does some checks on file name.
In the next step I want to run a python script which actually checks the attribute <Reporting_date> YYYYMMDD </Reporting_Date> value for every occurrence in the test.xml file is equal to a fixed value from ODATE.
If all the values for <Reporting_date> YYYYMMDD </Reporting_Date> are matching the ODATE then print message "all the attributes are matched".
If we find one mismatch while scanning multiple records in test.xml file, on the 1st mismatch itself stop scanning the entire test.xml file, and print message "Encountered a mismatch"
Could anyone of you please guide me with this use-case. It would be much helpful.
Many thanks in advance.

How about something like this (I haven't test this code and I don't know structure of your XML file, but you should get an idea).
import xml.etree.ElementTree as ET
tree = ET.parse('test.xml')
root = tree.getroot()
ODATE = '20210215'
allok = True
for child in root.iter('Reporting_date'):
if child.text != ODATE:
allok = False
print ("Encountered a mismatch")
break
if allok:
print("All attributes are matched")
For more information see https://docs.python.org/3/library/xml.etree.elementtree.html
--- EDIT using findall instead of iter (see comments) ---
import xml.etree.ElementTree as ET
tree = ET.parse('test.xml')
root = tree.getroot()
ODATE = '20210215'
allok = True
for directchild in root:
for child in directchild.findall("REPORTING_DATE"):
# this supports multiple reporting_date in one POSITION structure.
# If you know that there always is only one, find would be better.
if child.text != ODATE:
allok = False
print ("Encountered a mismatch")
break
if allok:
print("All attributes are matched")

See below
import xml.etree.ElementTree as ET
XML = '''<?xml version="1.0" encoding="UTF-8"?>
<POSITIONS>
<POSITION>
<ISIN>aaaaaaa</ISIN>
<ACCOUNT>7777777</ACCOUNT>
<POSITION>11111</POSITION>
<SETTLEMENT_DATE>20210202</SETTLEMENT_DATE>
<REPORTING_DATE>20210202</REPORTING_DATE>
</POSITION>
<POSITION>
<ISIN>bbbbbbb</ISIN>
<ACCOUNT>66666666</ACCOUNT>
<POSITION>888888888</POSITION>
<SETTLEMENT_DATE>20210203</SETTLEMENT_DATE>
<REPORTING_DATE>20210215</REPORTING_DATE>
</POSITION>
</POSITIONS>'''
ODATE = '20210215'
root = ET.fromstring(XML)
dates = root.findall('.//REPORTING_DATE')
for date in dates:
if date.text == ODATE:
print(f'ODATE {ODATE} and REPORTING_DATE {date.text} are the same dates')
else:
print(f'ODATE {ODATE} and REPORTING_DATE {date.text} are NOT the same dates')

XML data extraction in python

I have an XML file like the following:
<AreaModel>
...
<RecipePhase>
<UniqueName>PHASE1</UniqueName>
...
<NumberOfParameterTags>7</NumberOfParameterTags>
...
<DefaultRecipeParameter>
<Name>PARAM1</Name>
----
</DefaultRecipeParameter>
<DefaultRecipeParameter>
<Name>PARAM2</Name>
----
</DefaultRecipeParameter>
<DefaultRecipeParameter>
<Name>PARAM3</Name>
----
</DefaultRecipeParameter>
</RecipePhase>
<RecipePhase>
....
</RecipePhase>
</AreaModel>
I would like to read this file in sequential order and generate different list. One for the texts of UniqueName TAGs and a list of lists containing for each list the set of texts for tag Name under each RecipePhase element.
For example, I might have 10 RecipePhase elements, each one with TAG UniqueName and each one containing a different set of children with tag DefaultRecipeParameter.
How can I take into account when I enter into RecipePhase and when I go out of the element during parsing?
I am trying ElementTree but I am not able to find a solution.
cheers,
m

You can use xml python module:
See my example:
from xml.dom import minidom as dom
import urllib2
def fetchPage(url):
a = urllib2.urlopen(url)
return ''.join(a.readlines())
def extract(page):
a = dom.parseString(page)
item = a.getElementsByTagName('Rate')
for i in item:
if i.hasChildNodes() == True:
print i.getAttribute('currency')+"-"+ i.firstChild.nodeValue
if __name__=='__main__':
page = fetchPage("http://www.bnro.ro/nbrfxrates.xml")
extract(page)

I solved partially my problem with the following code:
import xml.etree.ElementTree as ET
tree = ET.parse('control_strategies.axml')
root = tree.getroot()
phases=[]
for recipephase in root.findall('./RecipePhase/UniqueName'):
phases.append(recipephase.text)
n_elem = len(phases)
param=[[] for _ in range(n_elem)]
i = 0
for recipephase in root.findall('./RecipePhase'):
for defparam in recipephase.findall('./DefaultRecipeParameter'):
for paramname in defparam.findall('./Name'):
param[i].append(paramname.text)
i = i + 1

lxml (etree) - Pretty Print attributes of root tag

Is it possible in python to pretty print the root's attributes?
I used etree to extend the attributes of the child tag and then I had overwritten the existing file with the new content. However during the first generation of the XML, we were using a template where the attributes of the root tag were listed one per line and now with the etree I don't manage to achieve the same result.
I found similar questions but they were all referring to the tutorial of etree, which I find incomplete.
Hopefully someone has found a solution for this using etree.
EDIT: This is for custom XML so HTML Tidy (which was proposed in the comments), doesn't work for this.
Thanks!
generated_descriptors = list_generated_files(generated_descriptors_folder)
counter = 0
for g in generated_descriptors:
if counter % 20 == 0:
print "Extending Descriptor # %s out of %s" % (counter, len(descriptor_attributes))
with open(generated_descriptors_folder + "\\" + g, 'r+b') as descriptor:
root = etree.XML(descriptor.read(), parser=parser)
# Go through every ContextObject to check if the block is mandatory
for context_object in root.findall('ContextObject'):
for attribs in descriptor_attributes:
if attribs['descriptor_name'] == g[:-11] and context_object.attrib['name'] in attribs['attributes']['mandatoryobjects']:
context_object.set('allow-null', 'false')
elif attribs['descriptor_name'] == g[:-11] and context_object.attrib['name'] not in attribs['attributes']['mandatoryobjects']:
context_object.set('allow-null', 'true')
# Sort the ContextObjects based on allow-null and their name
context_objects = root.findall('ContextObject')
context_objects_sorted = sorted(context_objects, key=lambda c: (c.attrib['allow-null'], c.attrib['name']))
root[:] = context_objects_sorted
# Remove mandatoryobjects from Descriptor attributes and pretty print
root.attrib.pop("mandatoryobjects", None)
# paste new line here
# Convert to string in order to write the enhanced descriptor
xml = etree.tostring(root, pretty_print=True, encoding="UTF-8", xml_declaration=True)
# Write the enhanced descriptor
descriptor.seek(0) # Set cursor at beginning of the file
descriptor.truncate(0) # Make sure that file is empty
descriptor.write(xml)
descriptor.close()
counter+=1

Python:XML List index out of range

I'm having troubles to get some values in a xml file. The error is IndexError: list index out of range
XML
<?xml version="1.0" encoding="UTF-8"?>
<nfeProc xmlns="http://www.portalfiscal.inf.br/nfe" versao="3.10">
<NFe xmlns="http://www.portalfiscal.inf.br/nfe">
<infNFe Id="NFe35151150306471000109550010004791831003689145" versao="3.10">
<ide>
<nNF>479183</nNF>
</ide>
<emit>
<CNPJ>3213213212323</CNPJ>
</emit>
<det nItem="1">
<prod>
<cProd>7030-314</cProd>
</prod>
<imposto>
<ICMS>
<ICMS10>
<orig>1</orig>
<CST>10</CST>
<vICMS>10.35</vICMS>
<vICMSST>88.79</vICMSST>
</ICMS10>
</ICMS>
</imposto>
</det>
<det nItem="2">
<prod>
<cProd>7050-6</cProd>
</prod>
<imposto>
<ICMS>
<ICMS00>
<orig>1</orig>
<CST>00</CST>
<vICMS>7.49</vICMS>
</ICMS00>
</ICMS>
</imposto>
</det>
</infNFe>
</NFe>
</nfeProc>
I'm getting the values from XML, it's ok in some xml's, those having vICMS and vICMSST tags:
vicms = doc.getElementsByTagName('vICMS')[i].firstChild.nodeValue
vicmsst = doc.getElementsByTagName('vICMSST')[1].firstChild.nodeValue
This returns:
First returns:
print vicms
>> 10.35
print vicmsst
>> 88.79
Second imposto CRASHES because don't find vICMSST tag...
**IndexError: list index out of range**
What the best form to test it? I'm using xml.etree.ElementTree:
My code:
import os
import sys
import subprocess
import base64,xml.dom.minidom
from xml.dom.minidom import Node
import glob
import xml.etree.ElementTree as ET
origem = 0
# only loops over XML documents in folder
for file in glob.glob("*.xml"):
f = open("%s" % file,'r')
data = f.read()
i = 0
doc = xml.dom.minidom.parseString(data)
for topic in doc.getElementsByTagName('emit'):
#Get Fiscal Number
nnf= doc.getElementsByTagName('nNF')[i].firstChild.nodeValue
print 'Fiscal Number %s' % nnf
print '\n'
for prod in doc.getElementsByTagName('det'):
vicms = 0
vicmsst = 0
#Get value of ICMS
vicms = doc.getElementsByTagName('vICMS')[i].firstChild.nodeValue
#Get value of VICMSST
vicmsst = doc.getElementsByTagName('vICMSST')[i].firstChild.nodeValue
#PRINT INFO
print 'ICMS %s' % vicms
print 'Valor do ICMSST: %s' % vicmsst
print '\n\n'
i +=1
print '\n\n'

There is only one vICMSST tag in your XML document. So, when i=1, the following line returns an IndexError.
vicmsst = doc.getElementsByTagName('vICMSST')[1].firstChild.nodeValue
You can restructure this to:
try:
vicmsst = doc.getElementsByTagName('vICMSST')[i].firstChild.nodeValue
except IndexError:
# set a default value or deal with this how you like
It's hard to say what you should do upon an exception without knowing more about what you're trying to do.

You are making several general mistakes in your code.
Don't use counters to index into lists you don't know the length of. Normally, iteration with for .. in is a lot better than using indexes anyway.
You have many imports you don't seem to use, get rid of them.
You can use minidom, but ElementTree is better for your task because it supports searching for nodes with XPath and it supports XML namespaces.
Don't read an XML file as a string and then use parseString. Let the XML parser handle the file directly. This way all file encoding related issues will be handled without errors.
The following is a lot better than your original approach.
import glob
import xml.etree.ElementTree as ET
def get_text(context_elem, xpath, xmlns=None):
""" helper function that gets the text value of a node """
node = context_elem.find(xpath, xmlns)
if (node != None):
return node.text
else:
return ""
# set up XML namespace URIs
xmlns = {
"nfe": "http://www.portalfiscal.inf.br/nfe"
}
for path in glob.glob("*.xml"):
doc = ET.parse(path)
for infNFe in doc.iterfind('.//nfe:infNFe', xmlns):
print 'Fiscal Number\t%s' % get_text(infNFe, ".//nfe:nNF", xmlns)
for det in infNFe.iterfind(".//nfe:det", xmlns):
print ' ICMS\t%s' % get_text(det, ".//nfe:vICMS", xmlns)
print ' Valor do ICMSST:\t%s' % get_text(det, ".//nfe:vICMSST", xmlns)
print '\n\n'

How to replace node values in XML with Python

I am new to Python. Now I have to replace a number of values in an XML file with Python. The example snippet of XML is:
<gmd:extent>
<gmd:EX_Extent>
<gmd:description gco:nilReason="missing">
<gco:CharacterString />
</gmd:description>
<gmd:geographicElement>
<gmd:EX_GeographicBoundingBox>
<gmd:westBoundLongitude>
<gco:Decimal>112.907</gco:Decimal>
</gmd:westBoundLongitude>
<gmd:eastBoundLongitude>
<gco:Decimal>158.96</gco:Decimal>
</gmd:eastBoundLongitude>
<gmd:southBoundLatitude>
<gco:Decimal>-54.7539</gco:Decimal>
</gmd:southBoundLatitude>
<gmd:northBoundLatitude>
<gco:Decimal>-10.1357</gco:Decimal>
</gmd:northBoundLatitude>
</gmd:EX_GeographicBoundingBox>
</gmd:geographicElement>
</gmd:EX_Extent>
</gmd:extent>
What I want to do is to replace those decimal values, i.e. 112.907, with a specified value.
<gmd:extent>
<gmd:EX_Extent>
<gmd:description gco:nilReason="missing">
<gco:CharacterString />
</gmd:description>
<gmd:geographicElement>
<gmd:EX_GeographicBoundingBox>
<gmd:westBoundLongitude>
<gco:Decimal>new value</gco:Decimal>
</gmd:westBoundLongitude>
<gmd:eastBoundLongitude>
<gco:Decimal>new value</gco:Decimal>
</gmd:eastBoundLongitude>
<gmd:southBoundLatitude>
<gco:Decimal>new value</gco:Decimal>
</gmd:southBoundLatitude>
<gmd:northBoundLatitude>
<gco:Decimal>new value</gco:Decimal>
</gmd:northBoundLatitude>
</gmd:EX_GeographicBoundingBox>
</gmd:geographicElement>
</gmd:EX_Extent>
</gmd:extent>
I tried with a few methods but none of them worked with my assumption that the difficulty is with the namespace prefix gmd and gco.
Please help me out. Thanks in advance!
Cheers, Alex

I couldn't get lxml to process your xml without adding fake namespace declarations at the top so here is how your input looked
<gmd:extent xmlns:gmd="urn:x:y:z:1" xmlns:gco="urn:x:y:z:1">
<gmd:EX_Extent>
<gmd:description gco:nilReason="missing">
<gco:CharacterString />
</gmd:description>
<gmd:geographicElement>
<gmd:EX_GeographicBoundingBox>
<gmd:westBoundLongitude>
<gco:Decimal>112.907</gco:Decimal>
</gmd:westBoundLongitude>
<gmd:eastBoundLongitude>
<gco:Decimal>158.96</gco:Decimal>
</gmd:eastBoundLongitude>
<gmd:southBoundLatitude>
<gco:Decimal>-54.7539</gco:Decimal>
</gmd:southBoundLatitude>
<gmd:northBoundLatitude>
<gco:Decimal>-10.1357</gco:Decimal>
</gmd:northBoundLatitude>
</gmd:EX_GeographicBoundingBox>
</gmd:geographicElement>
</gmd:EX_Extent>
</gmd:extent>
I assumed you have two lists one for the current values and one for the new ones like this
old = [112.907, 158.96, -54.7539, -10.1357]
new = [1,2,3,4]
d = dict(zip(old,new))
Here is the full code
#!/usr/bin/env python
import sys
from lxml import etree
def process(fname):
f = open(fname)
tree = etree.parse(f)
root = tree.getroot()
old = [112.907, 158.96, -54.7539, -10.1357]
new = [1,2,3,4]
d = dict(zip(old,new))
nodes = root.findall('.//gco:Decimal', root.nsmap)
for node in nodes:
node.text = str(d[float(node.text)])
f.close()
return etree.tostring(root, pretty_print=True)
def main():
fname = sys.argv[1]
text = process(fname)
outfile = open('out.xml', 'w+')
outfile.write(text)
outfile.close()
if __name__ == '__main__':
main()
and here is how the output looked like
<gmd:extent xmlns:gmd="urn:x:y:z:1" xmlns:gco="urn:x:y:z:1">
<gmd:EX_Extent>
<gmd:description gco:nilReason="missing">
<gco:CharacterString/>
</gmd:description>
<gmd:geographicElement>
<gmd:EX_GeographicBoundingBox>
<gmd:westBoundLongitude>
<gco:Decimal>1</gco:Decimal>
</gmd:westBoundLongitude>
<gmd:eastBoundLongitude>
<gco:Decimal>2</gco:Decimal>
</gmd:eastBoundLongitude>
<gmd:southBoundLatitude>
<gco:Decimal>3</gco:Decimal>
</gmd:southBoundLatitude>
<gmd:northBoundLatitude>
<gco:Decimal>4</gco:Decimal>
</gmd:northBoundLatitude>
</gmd:EX_GeographicBoundingBox>
</gmd:geographicElement>
</gmd:EX_Extent>
</gmd:extent>

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Creating XML file in python by iterating over a set of values - python

Related

how to check if an attribute <Reporting_date> YYYYMMDD </Reporting_Date> in a .xml file is equal to a fixed Date value

XML data extraction in python

lxml (etree) - Pretty Print attributes of root tag

Python:XML List index out of range

How to replace node values in XML with Python

Categories

Resources