KeyError while converting XML to csv - python

I am new to Python Programming getting
ShotCode = root.attrib['Stops']
KeyError: 'Stops'
Error
tree = ET.parse(os.path.join(folderpath, xmlfilename))
root = tree.getroot()
filename, _ = xmlfilename.rsplit('.', 1)
Shot_30AA = open(filename + '.csv', 'w', newline='')
csvwriter = csv.writer(Shot_30AA)
head = []
ShotCode = root.attrib['Stops']
csvwriter.writerow(['Stops', ShotCode])
head.append(ShotCode)
sample Xml:
<Stops> <cmp_name>N/A</cmp_name> <cmp_id>N/A</cmp_id> <pu_DepartureDate>N/A</pu_DepartureDate> <DeliveryName>ABC</DeliveryName> <DeliveryID>RRFF</DeliveryID> <del_DepartureDate>2021-07-26T16:01:24.647</del_DepartureDate> <WorkOrder/> <ReleaseNo>EFFC</ReleaseNo> <NetWeight>38160.00</NetWeight> <TareWeight>0</TareWeight> <baletype>OCC-BALE</baletype> <BaleCount>36.00</BaleCount> <BaleCountLBH>0</BaleCountLBH> <SupplierName>VFGP</SupplierName> <DriverCode>DERG</DriverCode> <Coments>18971852</Coments> <TruckId>18971852</TruckId> <Picture/> </Stops>

i think you are confusing attributes and tags:
Xml attributes example:
<Stops test="attrib1"> </Stops>
In your case the element root has no attribs :
print(root.attrib) : {}
print(root.tag) : Stops

Related

Search and replace strings in XML using python

I am trying to search and replace certain words in my .xml file and replace it with another, but I struggle a bit.
I have been using this code so far:
import xml.etree.ElementTree as ET
with open('Rom1.xml', encoding="utf8") as f:
tree = ET.parse(f)
#root = tree.find('ExportedObjects')
root = tree.getroot()
for elem in root.iter():
try:
elem.text = elem.text.replace('Rom1', 'Rom2')
except AttributeError:
pass
Rom1.xml this is a snapshot from the XML file showing the structure
The XML file is pretty big but it contains the string 'Rom1' 41 times and I would like to replace all of them.
I know a simple search and replace in text editor does the job, but I want to automate this since I will do it for several hundered of files.
Any help is appriciated :)
If there is no possibility of ambiguity then you could just do this:
with open('Rom1.xml', encoding='utf-8', mode='r+') as xml:
content = xml.read().replace('Rom1', 'Rom2')
xml.seek(0)
xml.write(content)
xml.truncate()
In this case the truncate() call is not necessary. However, if the second argument to replace() was shorter than the first then this would be crucial. Just leave it there to account for all eventualities
Ok so I tried something else with great success:
import xml.etree.ElementTree as ET
Rom2 = input('Number: ')
input_file = "Rom1.xml"
output_file = Rom2+".xml"
with open(input_file) as f:
xml_content = f.readlines()
with open(output_file, 'w+') as f:
for line in xml_content:
f.write(line.replace('Rom1', Rom2))
But if I want to replace a second string f.ex 'SQ4XXX' to 'SQ4050' then it replaces both and keeps the old as well? I'm confused.
import xml.etree.ElementTree as ET
Rom2 = input('Number: ')
sq = input('SQ: ')
input_file = "Rom1.xml"
output_file = Rom2+".xml"
with open(input_file) as f:
xml_content = f.readlines()
with open(output_file, 'w+') as f:
for line in xml_content:
f.write(line.replace('Rom1', Rom2))
f.write(line.replace('SQ4XXX', sq))
Ok I got it working like I wanted, thanks for the help guys!
Heres the final code:
import xml.etree.ElementTree as ET
Rom2 = input('Number: ')
sq4 = input('SQ4: ')
sq5 = input('SQ5: ')
input_file = "Rom1.xml"
output_file = Rom2+".xml"
with open(input_file) as f:
xml_content = f.readlines()
with open(output_file, 'w+') as f:
for line in xml_content:
f.write(line.replace('Rom1', Rom2))
with open(output_file, encoding='utf-8', mode='r+') as xml:
content = xml.read().replace('SQ4XXX', sq4)
xml.seek(0)
xml.write(content)
xml.truncate()
with open(output_file, encoding='utf-8', mode='r+') as xml:
content = xml.read().replace('SQ5XXX', sq5)
xml.seek(0)
xml.write(content)
xml.truncate()er code here

If any potential issues using xpath find/update xml tag text

I wrote a script, based on some of the existing StackOverflow questions, but no one perfectly fit my issues.
The user use xpath to find a XML tag from a given XML file, and update the tag text based on user inputs.
Below is my script using Python 3 (The most difficult part for me is around the namespaces):
import xml.etree.ElementTree as ET
import sys
# user inputs and variables
filename = 'actors.xml'
xpath = 'actor/name'
value = 'test name'
temp_namespace = 'temp_namespace'
# get all namespaces
all_namespaces = dict([node for _, node in ET.iterparse(filename, events=['start-ns'])])
# register namespace
for key in all_namespaces.keys():
ET.register_namespace(key, all_namespaces[key])
# remove all namespace from elements first
# and temp save it to tag attribute
# The below logic is copied from other Stackoverflow answers
# From **Python 3.8**, we can add the parser to insert comments
it = ET.iterparse(filename, parser=ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)))
for _, el in it:
prefix, has_namespace, postfix = el.tag.partition('}')
if has_namespace:
el.tag = postfix
el.set(temp_namespace, prefix + has_namespace)
# find and update
root = it.root
for el in root.findall(xpath):
el.text = str(value)
# get xml comments before root level
doc_comments = []
with open(filename, 'r') as f:
lines = f.readlines()
for line in lines:
if line.startswith('<?xml'):
continue
if line.startswith('<' + root.tag):
break
else:
doc_comments.append(line)
def add_tag_namespace(el):
for sub_el in el:
if temp_namespace in sub_el.attrib.keys():
sub_el.tag = sub_el.attrib[temp_namespace] + sub_el.tag
del sub_el.attrib[temp_namespace]
add_tag_namespace(sub_el)
if temp_namespace in el.attrib.keys():
el.tag = el.attrib[temp_namespace] + el.tag
del el.attrib[temp_namespace]
# add all namespace back
# and delete the temp namespace attribute
add_tag_namespace(root)
# write back to xml file
tree = ET.ElementTree(root)
tree.write(filename, encoding='unicode', xml_declaration=True)
if len(doc_comments) == 0:
sys.exit()
# write xml comments before root back
lines = []
# first read all lines
with open(filename, 'r') as f:
lines = f.readlines()
# second, insert xml comments back into memory
for i, line in enumerate(lines):
if line.startswith('<?xml'):
insert_at = i + 1
for comment in doc_comments:
lines.insert(insert_at, comment)
insert_at += 1
break
# finally, write all contents to file
with open(filename, 'w') as f:
for line in lines:
f.write(line)
actors.xml:
<?xml version="1.0"?>
<actors xmlns:fictional="http://characters.example.com"
xmlns="http://people.example.com">
<actor>
<name>John Cleese</name>
<fictional:character>Lancelot</fictional:character>
<fictional:character>Archie Leach</fictional:character>
</actor>
<actor>
<name>Eric Idle</name>
<fictional:character>Sir Robin</fictional:character>
<fictional:character>Gunther</fictional:character>
<fictional:character>Commander Clement</fictional:character>
</actor>
</actors>

Parse many XML files to one CSV file

The code below takes an XML file and parses specific elements into a CSV file. Regarding the code I had simpler and different code that had a slightly different out, the code below is as an outcome of a lot help from here.
from xml.etree import ElementTree as ET
from collections import defaultdict
import csv
tree = ET.parse('thexmlfile.xml')
root = tree.getroot()
with open('output.csv', 'w', newline='') as f:
writer = csv.writer(f)
start_nodes = root.findall('.//START')
headers = ['id', 'service_code', 'rational', 'qualify', 'description_num', 'description_txt', 'set_data_xin', 'set_data_xax', 'set_data_value', 'set_data_x']
writer.writerow(headers)
for sn in start_nodes:
row = defaultdict(str)
for k,v in sn.attrib.items():
row[k] = v
for rn in sn.findall('.//Rational'):
row['rational'] = rn.text
for qu in sn.findall('.//Qualify'):
row['qualify'] = qu.text
for ds in sn.findall('.//Description'):
row['description_txt'] = ds.text
row['description_num'] = ds.attrib['num']
# all other tags except set data must be parsed before this.
for st in sn.findall('.//SetData'):
for k,v in st.attrib.items():
row['set_data_'+ str(k)] = v
row_data = [row[i] for i in headers]
writer.writerow(row_data)
row = defaultdict(str)
I'm trying to make that this code goes to a folder that has many XML files and parses them into one single CSV file. Simply said instead of parsing one XML file , do this for multiple XMLs and parse them to one csv file.
What I would normally do is use os.listdir(): . The code would look something like this
directory = 'C:/Users/docs/FolderwithXMLs'
for filename in os.listdir(directory):
if filename.endswith(".xml"):
#Something here
df.to_csv("./output.csv")
continue
else:
continue
I have tried different ways to implement this into the code from above without success until now. Considering that this process should also be fast.
Try:
from pathlib import Path
directory = 'C:/Users/docs/FolderwithXMLs'
with open('output.csv', 'w', newline='') as f:
writer = csv.writer(f)
headers = ['id', 'service_code', 'rational', 'qualify', 'description_num', 'description_txt', 'set_data_xin', 'set_data_xax', 'set_data_value', 'set_data_x']
writer.writerow(headers)
xml_files_list = list(map(str,Path(directory).glob('**/*.xml')))
for xml_file in xml_files_list:
tree = ET.parse(xml_file)
root = tree.getroot()
start_nodes = root.findall('.//START')
for sn in start_nodes:
row = defaultdict(str)
# <<<<< Indentation was wrong here
for k,v in sn.attrib.items():
row[k] = v
# Rest of the code here.
Hope that helps.

How to write to csv with multiple for in statements

I have a Python 3.7.3 script that reads an XML, parses what I need and is supposed to export the results to CSV. I had to go deeper in the XML tree using a for in loop for one of the fields, which throws off how the other for in statements append to csv.
When running the below, my output file does not list the different V-ID's (refer to the third for child in root... statement), however all the other fields are correct. The V-ID's display correctly when i remove the last for in statement and move the firstFile.write statement 2 tabs to the left, but then I don't have the status, so I know the problem is in the last statement. BTW, if I move the firstFile.write statement all the way to the left, it only returns one row in the csv, but there should be 5.
Is there a way to create a list from the output and then combine them all, or perhaps move the firstFile.write statement two tabs to the left and append the last for in statement to a specific column (essentially breaking up the firstFile.write statement)? Or do you have any other suggestions?
import os
import sys
import glob
import xml.etree.ElementTree as ET
firstFile = open("myfile.csv", "a")
firstFile.write("V-ID,")
firstFile.write("HostName,")
firstFile.write("Status,")
firstFile.write("Comments,")
firstFile.write("Finding Details,")
firstFile.write("STIG Name,")
basePath = os.path.dirname(os.path.realpath(__file__))
xmlFile = os.path.join(basePath, "C:\\Users\\myUserName\\Desktop\\Scripts\\Python\\XMLtest.xml")
tree = ET.parse(xmlFile)
root = tree.getroot()
for child in root.findall('{http://checklists.nist.gov/xccdf/1.2}title'):
d = child.text
for child in root:
for children in child.findall('{http://checklists.nist.gov/xccdf/1.2}target'):
b = children.text
for child in root.findall('{http://checklists.nist.gov/xccdf/1.2}Group'):
x = (str(child.attrib))
x = (x.split('_')[6])
a = x[:-2]
for child in root:
for children in child:
for childrens in children.findall('{http://checklists.nist.gov/xccdf/1.2}result'):
x = childrens.text
if ('pass' in x):
c = 'Completed'
else:
c = 'Ongoing'
firstFile.write("\n" + a + ',' + b + ',' + c + ',' + ',' + ',' + d)
firstFile.close()
Finally, took about a week to figure this out. I the output to CSV, then read it back into a list for each column, parsed the spaces and wrote it out again. Below is how I did it.
import os
import sys
import glob
import csv
import xml.etree.ElementTree as ET
firstFile = open("myfile.csv", "a")
path = 'C:\\Users\\JT\\Desktop\\Scripts\\Python\\xccdf\\'
for fileName in glob.glob(os.path.join(path, '*.xml')):
with open('C:\\Users\\JT\\Desktop\\Scripts\\Python\\myfile1.csv', 'w', newline='') as csvFile1:
csvWriter = csv.writer(csvFile1, delimiter=',')
# do your stuff
tree = ET.parse(fileName)
root = tree.getroot()
# Stig Title
for child in root.findall('{http://checklists.nist.gov/xccdf/1.2}title'):
d = child.text
# hostName
for child in root:
for children in child.findall('{http://checklists.nist.gov/xccdf/1.2}target'):
b = children.text
# V-ID
for child in root.findall('{http://checklists.nist.gov/xccdf/1.2}Group'):
x = (str(child.attrib))
x = (x.split('_')[6])
a = x[:-2]
firstFile.write(a + '\n')
# Status
for child in root:
for children in child:
for childrens in children.findall('{http://checklists.nist.gov/xccdf/1.2}result'):
x = childrens.text
firstFile.write(',' + b + ',' + x + ',' + ',' + ',' + d + '\n')
with open('C:\\Users\\JT\\Desktop\\Scripts\\Python\\myfile.csv', 'r') as csvFile:
csvReader = csv.reader(csvFile, delimiter=',')
vIDs = []
hostNames = []
status = []
stigTitles = []
for line in csvReader:
vID = line[0]
vIDs.append(vID)
try:
hostName = line[1]
hostNames.append(hostName)
except:
pass
try:
state = line[2]
status.append(state)
except:
pass
try:
stigTitle = line[5]
stigTitles.append(stigTitle)
except:
pass
with open('C:\\Users\\JT\\Desktop\\Scripts\\Python\\myfile1.csv', 'a', newline='') as csvFile1:
csvWriter = csv.writer(csvFile1, delimiter=',')
vIDMod = list(filter(None, vIDs))
hostNameMod = list(filter(None, hostNames))
statusMod = list(filter(None, status))
stigTitlesMod = list(filter(None, stigTitles))
csvWriter.writerows(zip(vIDMod, hostNameMod, statusMod, stigTitlesMod))
firstFile.close()

Generate XML files based on rows in CSV

I have a CSV and would like generate an XML file based on each row in the CSV.
Right now it creates an XML file but only with the last row in the CSV. How can I modify this script to generate an XML file for EACH row. And ideally have the filename based on the Column: "File / Entity Name". See below for what I currently have, Thanks!
# CSV module
import csv
# Stuff from the XML module
from xml.etree.ElementTree import Element, SubElement, tostring, ElementTree
import xml.etree.ElementTree as etree
# Topmost XML element
root = Element('root')
number = Element('number')
# Open a file
with open(r'U:\PROJECTS\Technical Graphics\book1.csv') as f:
for row in csv.DictReader(f):
root = Element('gmd:MD_Metadata')
tree = ElementTree(root)
for k, v in row.items():
child = SubElement(root, k)
child.text = v
reader = csv.DictReader(f)
tree.write(open(r'U:\PROJECTS\Technical Graphics\test.xml','w'))
print tostring(root)
You set the value of Root here:
for row in csv.DictReader(f):
root = Element('gmd:MD_Metadata')
tree = ElementTree(root)
filename = row.items()[7] # where 7 is the column your interested in
for k, v in row.items():
child = SubElement(root, k)
child.text = v
reader = csv.DictReader(f)
tree.write(open(r'U:\PROJECTS\Technical Graphics\' + filename + '.xml','w'))
print tostring(root)
You only want to create the csv.DictReader() class once, rather than for each iteration of your loop.
Similarly, you only want to create your root XML element once.
Finally, the order of the items returned from row.items() is arbitrary, and not reflective of the order of the fields in the file.
Try this:
# CSV module
import csv
# Stuff from the XML module
from xml.etree.ElementTree import Element, SubElement, tostring, ElementTree
import xml.etree.ElementTree as etree
# Topmost XML element
root = Element('root')
number = Element('number')
# Open a file
with open(r'U:\PROJECTS\Technical Graphics\book1.csv') as f:
root = Element('gmd:MD_Metadata')
tree = ElementTree(root)
reader = csv.DictReader(f)
for row in reader:
xml_row = SubElement(root, "row")
for k in reader.fieldnames:
child = SubElement(xml_row, k)
child.text = row[k]
tree.write(open(r'U:\PROJECTS\Technical Graphics\test.xml','w'))
print tostring(root)

Categories

Resources