I wrote an xml file with ElementTree. The problem i'm facing is that I want to write multiple logs in the same xml file.
The code:
import xml.etree.cElementTree as ET
#XML
root = ET.Element('LOG')
DATE = ET.SubElement(root, 'DATE')
DATE.text = "child_1"
TIME = ET.SubElement(root, 'TIME')
TIME.text = "child_2"
CC = ET.SubElement(root, 'CC')
CC.text = "child_3"
AMOUNT = ET.SubElement(root, 'AMOUNT')
AMOUNT.text = "child_4"
tree = ET.ElementTree(root)
#Generating XML
tree.write("file_name.xlm")
#Print
print(open("file_name.xlm").read())
Current output example:
<LOG><DATE>child_1</DATE><TIME>child_2</TIME><CC>child_3</CC><AMOUNT>child_4</AMOUNT></LOG>
Output that I want:
<LOG><DATE>child_1</DATE><TIME>child_2</TIME><CC>child_3</CC><AMOUNT>child_4</AMOUNT></LOG><LOG><DATE>child_1</DATE><TIME>child_2</TIME><CC>child_3</CC><AMOUNT>child_4</AMOUNT></LOG><LOG><DATE>child_1</DATE><TIME>child_2</TIME><CC>child_3</CC><AMOUNT>child_4</AMOUNT></LOG>...
What you want is just not how xml works. xml documents have one root node, and just one. The closest you can get is by wrapping your <LOG> entries in a container:
<ALL_LOGS>
<LOG>....</LOG>
<LOG>....</LOG>
</ALL_LOGS>
I have a directory that contain several xml files that I would like to able to treat all of them, one by one and export them as CSV files.
Individually, It works perfectly with the script below:
import xml.etree.ElementTree as ET
import csv
tree = ET.parse('D:/scripts/xml/download_xml_1.xml')
data_out = open('D:/scripts/csv/output_1.csv', 'w',newline='', errors='ignore')
csvwriter = csv.writer(data_out)
col_names = ['Fichier','No. de document','Titre']
csvwriter.writerow(col_names)
root = tree.getroot()
for elem in root.iter(tag='Document'):
row = []
filetype = elem.find('FileType').text
row.append(filetype)
documentnumber = elem.find('DocumentNumber').text
row.append(documentnumber)
title = elem.find('Title').text
row.append(title)
csvwriter.writerow(row)
data_out.close()
But I'm going crazy to find the solution to do it, one by one and this where I am so far:
import xml.etree.ElementTree as ET
import csv
import os
for my_files in os.listdir('D:/scripts/xml/'):
tree = ET.parse(my_files)
data_out = open('D:/scripts/csv/'+ my_files[:-4] +'.csv', 'w',newline='', errors='ignore')
csvwriter = csv.writer(data_out)
col_names = ['Fichier','No. de document','Titre']
csvwriter.writerow(col_names)
root = tree.getroot()
for elem in root.iter(tag='Document'):
row = []
filetype = elem.find('FileType').text
row.append(filetype)
documentnumber = elem.find('DocumentNumber').text
row.append(documentnumber)
title = elem.find('Title').text
row.append(title)
csvwriter.writerow(row)
data_out.close()
Any help would be greatly appreciated.
Simply generalize your process in a defined method that receives a file name as input. Then, iteratively pass file names to it. Also, consider with context manager to open text connection without need to close.
import os
import csv
import xml.etree.ElementTree as ET
xml_path = r'D:\scripts\xml'
csv_path = r'D:\scripts\csv'
# DEFINED METHOD
def xml_to_csv(xml_file):
csv_file = os.path.join(csv_path, f'Output_{xml_file[:-4]}.csv')
tree = ET.parse(os.path.join(xml_path, xml_file))
with open(csv_file, 'w', newline='', errors='ignore') as data_out:
csvwriter = csv.writer(data_out)
col_names = ['Fichier', 'No. de document', 'Titre']
csvwriter.writerow(col_names)
root = tree.getroot()
for elem in root.iter(tag='Document'):
row = [elem.find('FileType').text,
elem.find('DocumentNumber').text,
elem.find('Title').text]
csvwriter.writerow(row)
# FILE ITERATION
for f in os.listdir(xml_path):
xml_to_csv(f)
EDIT****Other aim:
And I would love to iterate through every excel row and save each row as separate .xml file (filename = invoice.text)
any help appreciated
->>> the problem is, that the 2nd created .xml file has also data from the first row inside. Can anybody help me? Highly appreciated
help is appreciated, I want to give ROOT name attributes and the xml a version "" and save each excel row as a separate .xml file
I already setup the excel with openpyxl.
EDIT
Code edited
from lxml import etree
import openpyxl
# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"
xmlRoot = etree.Element(
"{" + xmlns + "}LedgerImport",
version=version,
attrib={"{" + xsi + "}schemaLocation": schemaLocation},
generator_info=generator_info,
generating_system=generating_system,
nsmap={'xsi': xsi, None: xmlns}
)
####open excel file speadsheet
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
# build the xml tree
for i in range(2,6):
#consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value, 'consolidatedDate': str(sheet.cell(row=i,column=2).value, 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value, 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value )})
consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value),'consolidatedDate': str(sheet.cell(row=i,column=2).value), 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value), 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value) })
accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
account = etree.SubElement(accountsPayableLedger, 'bookingText')
account.text = sheet.cell(row=i,column=21).value
invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
invoice.text = sheet.cell(row=i,column=13).value
date = etree.SubElement(accountsPayableLedger, 'date')
date.text = sheet.cell(row=i,column=2).value
amount = etree.SubElement(accountsPayableLedger, 'amount')
amount.text = sheet.cell(row=i,column=16).value
account_no = etree.SubElement(accountsPayableLedger, 'accountNo')
account_no.text = sheet.cell(row=i,column=19).value
cost1 = etree.SubElement(accountsPayableLedger, 'costCategoryId')
cost1.text = sheet.cell(row=i,column=15).value
currency_code = etree.SubElement(accountsPayableLedger, 'currencyCode')
currency_code.text = sheet.cell(row=i,column=12).value
party_id = etree.SubElement(accountsPayableLedger, 'partyId')
party_id.text = sheet.cell(row=i,column=20).value
bpaccount = etree.SubElement(accountsPayableLedger, 'bpAccountNo')
bpaccount.text = sheet.cell(row=i,column=20).value
#doc = etree.ElementTree(xmlRoot)
#doc.write( str(sheet.cell(row=i,column=13).value)+".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
doc = etree.ElementTree(xmlRoot)
with open(str(sheet.cell(row=i,column=13).value)+".xml", 'w') as f:
f.write(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode('utf-8'))
# doc = etree.ElementTree(xmlRoot)
# with open("test1337.xml", 'w') as f:
# f.write(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode('utf-8'))
# convert into elementtree and write it directly into a file
#doc = etree.ElementTree(xmlRoot)
#outFile = open("test1337.xml", 'w')
#doc.write("test1337.xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
#doc.close()
Please help sitting like hours on that.
Thanks so much
I recommend using etree elements and subelements and convert them later on into a element tree. This provides more flexibility while creating the xml, especially when you want to iterate over an existing data structure:
from lxml import etree
# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"
xmlRoot = etree.Element(
"{" + xmlns + "}LedgerImport",
version=version,
attrib={"{" + xsi + "}schemaLocation": schemaLocation},
generator_info=generator_info,
generating_system=generating_system,
nsmap={'xsi': xsi, None: xmlns}
)
# build the xml tree
consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': "1337.01"})
accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
account = etree.SubElement(accountsPayableLedger, 'bookingText')
account.text = 'amazon'
invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
invoice.text = "1"
# convert into elementtree and write it directly into a file
doc = etree.ElementTree(xmlRoot)
with open("test1337.xml", 'w') as f:
f.write(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode('utf-8'))
The generated file looks like this:
<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://xml.datev.de/bedi/tps/ledger/v040" generating_system="DATEV manuell" generator_info="DATEV Musterdaten" version="4.0" xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
<consolidate consolidatedAmount="1337.01">
<accountsPayableLedger>
<bookingText>amazon</bookingText>
<invoiceId>1</invoiceId>
</accountsPayableLedger>
</consolidate>
</LedgerImport>
I need to parse a directory of xml files into one large csv file, I need certain attributes under the element 'Param' (attributes are 'Name' and 'PNum'). There is another XML file in the directory called Content.xml which I can get all the names of the other XML files and set them as the FileName. The issue is that I cannot figure out how to get these attributes in each XML file as each XML file has a different organisation and some don't seem to have these attributes in the first place.
I have written code that works for one of the XML files in the directory that outputs a CSV file with all the relevant information.
import xml.etree.ElementTree as ET
import csv
import os
FileName = '------.xml'
tree = ET.parse(FileName)
root = tree.getroot()[4]
csv_out = open('CsvOut', 'w')
csvwriter = csv.writer(csv_out)
count = 0
for child in root:
generation = []
parameters = []
if count == 0:
csv_head = ['Generation', 'Parameter Name', 'Parameter Number']
csvwriter.writerow(csv_head)
count = count + 1
gen = FileName[:-4]
generation.append(gen)
parameters.append(generation)
name = child.get('Name')
parameters.append(name)
num = child.get('PNum')
parameters.append(num)
csvwriter.writerow(parameters)
csv_out.close()
I rather simple and you can do it in two steps:
First, enumerate all xml files in the directory
Perform your code over these files
import xml.etree.ElementTree as ET
import csv
import os
from glob import glob
# create csv writer
csv_out = open('CsvOut', 'w')
csvwriter = csv.writer(csv_out)
# write the header
csv_head = ['Generation', 'Parameter Name', 'Parameter Number']
csvwriter.writerow(csv_head)
# iterate over the xml files in the current directory
for FileName in glob("*.xml"):
tree = ET.parse(FileName)
root = tree.getroot()[4]
for child in root:
generation = []
parameters = []
gen = FileName[:-4]
generation.append(gen)
parameters.append(generation)
name = child.get('Name')
parameters.append(name)
num = child.get('PNum')
parameters.append(num)
csvwriter.writerow(parameters)
# after iterating, close the csv file
csv_out.close()
I am trying to parse an xml file containing meteo data and to write some value in a csv file.
I'm not sure that this code is elegant but it works.
from qgis.PyQt.QtCore import *
import requests
import xml.etree.ElementTree as ET
# url of xml to parse
baseUrl = ('http://www.arpa.veneto.it/bollettini/meteo/h24/img08/0144.xml')
resp = requests.get(baseUrl)
msg = resp.content
tree = ET.fromstring(msg)
for stazione in tree.iter('STAZIONE'):
idstaz= stazione.find('IDSTAZ').text
for sensore in stazione.iter('SENSORE'):
id= sensore.find('ID').text
for dati in sensore.iter('DATI'):
ist = dati.get('ISTANTE')
vm = dati.find('VM').text
f = open('D:/GIS/_Temp/result.csv', 'a')
print >> f, idstaz, id, ist, vm
f.close()
I'm not sure that this code is elegant but it works.
144 300000864 201701080100 -4.2
144 300000864 201701080200 -4.5
144 300000864 201701080300 -4.8
144 300000864 201701080400 -5.5
...
but I don't know how to add the headers to the columns.
Open the file before the for loop and add header to file
from qgis.PyQt.QtCore import *
import requests
import xml.etree.ElementTree as ET
# url of xml to parse
baseUrl = ('http://www.arpa.veneto.it/bollettini/meteo/h24/img08/0144.xml')
resp = requests.get(baseUrl)
msg = resp.content
tree = ET.fromstring(msg)
f = open('D:/GIS/_Temp/result.cvs', 'a')
f.write('STAZIONE,IDSTAZ,SENSORE,ISTANTE')
for stazione in tree.iter('STAZIONE'):
idstaz= stazione.find('IDSTAZ').text
for sensore in stazione.iter('SENSORE'):
id= sensore.find('ID').text
for dati in sensore.iter('DATI'):
ist = dati.get('ISTANTE')
vm = dati.find('VM').text
print >> f, idstaz, id, ist, vm
f.close()