xml file to csv file python script

xml file to csv file python script - python

I need a python script for extract data from xml file
I have a xml file as shoen below:
<software>
<name>Update Image</name>
<Build>22.02</Build>
<description>Firmware for Delta-M Series </description>
<CommonImages> </CommonImages>
<ModelBasedImages>
<ULT>
<CNTRL_0>
<file type="UI_APP" ver="2.35" crc="1234"/>
<file type="MainFW" ver="5.01" crc="5678"/>
<SIZE300>
<file type="ParamTableDB" ver="1.1.4" crc="9101"/>
</SIZE300>
</CNTRL_0>
<CNTRL_2>
<file type="UI_APP" ver="2.35" crc="1234"/>
<file type="MainFW" ver="5.01" crc="9158"/>
</CNTRL_2>
</ULT>
</ModelBasedImages>
</software>
I want the data in table format like:
type ver crc
UI_APP 2.35 1234
MainFW 5.01 5678
ParamTableDB 1.1.4 9101
UI_APP 2.35 1234
MainFW 5.01 9158
Extract into any type of file csv/doc....
I tried this code:
import xml.etree.ElementTree as ET
import csv
tree = ET.parse("Build_40.01 (copy).xml")
root = tree.getroot()
# open a file for writing
Resident_data = open('ResidentData.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(Resident_data)
resident_head = []
count = 0
for member in root.findall('file'):
resident = []
address_list = []
if count == 0:
name = member.find('type').tag
resident_head.append(name)
ver = member.find('ver').tag
resident_head.append(ver)
crc = member.find('crc').tag
resident_head.append(crc)
csvwriter.writerow(resident_head)
count = count + 1
name = member.find('type').text
resident.append(name)
ver = member.find('ver').text
resident.append(ver)
crc = member.find('crc').text
resident.append(crc)
csvwriter.writerow(resident)
Resident_data.close()
Thanks in advance
edited:xml code updated.

Use the xpath expression .//file to find all <file> elements in the XML document, and then use each element's attributes to populate the CSV file through a csv.DictWriter:
import csv
import xml.etree.ElementTree as ET
tree = ET.parse("Build_40.01 (copy).xml")
root = tree.getroot()
with open('ResidentData.csv', 'w') as f:
w = csv.DictWriter(f, fieldnames=('type', 'ver', 'crc'))
w.writerheader()
w.writerows(e.attrib for e in root.findall('.//file'))
For your sample input the output CSV file will look like this:
type,ver,crc
UI_APP,2.35,1234
MainFW,5.01,5678
ParamTableDB,1.1.4,9101
UI_APP,2.35,1234
MainFW,5.01,9158
which uses the default delimiter (comma) for a CSV file. You can change the delimiter using the delimiter=' ' option to DictWriter(), however, you will not be able to obtain the same formatting as your sample output, which appears to use fixed width fields (but you might get away with using tab as the delimiter).

Related

create a condition that separates text.tag when parsing xml with python

I have this xml file that i dawnload from a source filename file.xml that inside of it`s Details have two OrderDetail, what i do with this is that i decode it and i write a new xml file in witch i parse it to get some information from.
<root>
<Details>
<OrderDetail ParentLineID="">H4sIAAAAAAAEAOy963LbyJbn+/
lMxLwDwtO7qnaMYeF+8d7VHZJolV0lWypRLu/u6g4HCC
QljClCmwTLdn+aFzkvd57k4EJSJAGIyJWQ8E+Ve3qqLd7
XSlxW/jLzl3//ty83E+UPNpvHyfTHZ/oL7dm//ev//B9/P
06m4/hqMQvS7==
</OrderDetail>
<OrderDetail ParentLineID="">H4sIAAAAAAAEAOy963LbyJbn+/
lMxLwDwtO7qnaMYeF+8d7VHZJolV0lWypRLu/u6g4HCC
QljClCmwTLdn+aFzkvd57k4EJSJAGIyJWQ8E+Ve3qqLd7
XSlxW/jLzl3//ty83E+UPNpvHyfTHZ/oL7dm//ev//B9/P
06m4/hqMQvS7==
</OrderDetail>
</Details>
</root>
tree = ET.parse('file.xml')
root = tree.getroot()
DEST_FILE_NAME = "XMLparser\\decompresed.xml"
def translate_to_file():
for child in root.iter('OrderDetail'):
child.get('ParentLineID')
result = zlib.decompress(base64.b64decode(child.text), 16 + zlib.MAX_WBITS).decode('utf-8')
with open(DEST_FILE_NAME, "w") as file:
file.write(result)
def read_file():
with open(DEST_FILE_NAME) as file:
return file.readlines()
def clean_file(lines):
with open(DEST_FILE_NAME, 'w') as file:
lines = filter(lambda x: x.strip(), lines)
file.writelines(lines)
def main():
translate_to_file()
lines = read_file()
clean_file(lines)
main()
when this file is decodedcrates an xml file
how can i create two separated xml files for reach OrderDetail ? so i take the first base64 decompresed and create an XML file . i take the other base64 decompresed and create a separate XML file ?

How to avoid double escape using XML

I'm using python to make a program which will have to write data in a XML tag of a specific file.
The line of data I'm willing to write is the following.
<Stream>XXXX-XXXX-XXXX-XXXX?p=0</Stream><URL>rtmp://a.rtmp.youtube.com/live2</URL>
But what I get in my XML file after writing is pretty different.
&lt;Stream&gt;XXXX-XXXX-XXXX-XXXX?p=0&lt;/Stream&gt;&lt;URL&gt;rtmp://a.rtmp.youtube.com/live2&lt;/URL&gt;
The &lt and &gt are here for purpose, and are NOT < and >. I need to keep this formatting but when I use the export as xml file, it replaces all the & by &
I use this code to write data in the xml file:
from lxml import etree as ET
Name_with_single_quote= """IF [Calculation_1] = 'Day-1' THEN [begintime] + 1
ELSEIF[Calculation_1] < 'Day-2' THEN [begintime] + 2
ELSEIF [Calculation_1] > "Day-3" THEN [begintime] + 3
ELSE [begintime]
END"""
Name_with_single_quote = Name_with_single_quote.replace("\n", "
").replace("<", "<").replace("'", "&apos;").replace(">",">").replace("\"", """)
Name_with_single_quote = str(Name_with_single_quote)
xml = """<?xml version="1.0"?>
<column role="dimension" type="nominal" name="[Calculation_1]" datatype="boolean" caption="">
<calculation formula=""/>
</column>"""
tree = ET.fromstring(xml)
formula = tree.find('.//calculation')
formula.set('formula', Name_with_single_quote)
from xml.dom import minidom
xmlstr = minidom.parseString(ET.tostring(tree)).toprettyxml()
xmlstr = '\n'.join(list(filter(lambda x: len(x.strip()), xmlstr.split('\n'))))
with open('test_for_esc_result.xml', "w") as f:
f.write(xmlstr)

Convert CSV document to XML

I know the question is redundant but I tried all the Python code that I found and modified for my file but they did not work. I need to find a way to convert my file myData.csv in to a XML format file which can be read by a navigator.
I just started to learn Python this month so I'm a beginner. This is my code:
#! usr/bin/python
# -*- coding: utf-8 -*-
import csv, sys, os
from lxml import etree
csvFile = 'myData.csv' # création de la variable pour le fichier csv
reader= csv.reader(open(csvFile), delimiter=';', quoting=csv.QUOTE_NONE) # création d'une variable reader à qui on renvoie le tableau csv
print "<data>"
for record in reader:
if reader.line_num == 1:
header = record
else:
innerXml = ""
dontShow = False
type = ""
for i, field in enumerate(record):
innerXml += "<%s>" % header[i].lower() + field + "</%s>" % header[i].lower()
if i == 1 and field == "0":
type = "Next"
elif type == "" and i == 3 and field == "0":
type = "Next"
elif type == "" and i == 3 and field != "0":
type = "film"
if i == 1 and field == "X":
dontShow = True
if dontShow == False:
xml = "<%s>" % type
xml += innerXml
xml += "</%s>" % type
print xml
print "</data>"

Consider building your XML with dedicated DOM objects and not a concatenation of strings which you can do with the lxml module. Using methods such as Element(), SubElement(), etc. you can iteratively build XML tree from reading CSV data:
import csv
import lxml.etree as ET
headers = ['Titre', 'Realisateur', 'Date_Debut_Evenement', 'Date_Fin_Evenement', 'Cadre',
'Lieu', 'Adresse', 'Arrondissement', 'Adresse_complète', 'Geo_Coordinates']
# INITIALIZING XML FILE
root = ET.Element('root')
# READING CSV FILE AND BUILD TREE
with open('myData.csv') as f:
next(f) # SKIP HEADER
csvreader = csv.reader(f)
for row in csvreader:
data = ET.SubElement(root, "data")
for col in range(len(headers)):
node = ET.SubElement(data, headers[col]).text = str(row[col])
# SAVE XML TO FILE
tree_out = (ET.tostring(root, pretty_print=True, xml_declaration=True, encoding="UTF-8"))
# OUTPUTTING XML CONTENT TO FILE
with open('Output.xml', 'wb') as f:
f.write(tree_out)
Output
<?xml version='1.0' encoding='UTF-8'?>
<root>
<data>
<Titre>1</Titre>
<Realisateur>BUS PALLADIUM</Realisateur>
<Date_Debut_Evenement>CHRISTOPHER THOMPSON</Date_Debut_Evenement>
<Date_Fin_Evenement>21 mai 2009</Date_Fin_Evenement>
<Cadre>21 mai 2009</Cadre>
<Lieu>EXTERIEUR</Lieu>
<Adresse>PLACE</Adresse>
<Arrondissement>PIGALLE</Arrondissement>
<Adresse_complète>75018</Adresse_complète>
<Geo_Coordinates>PLACE PIGALLE 75018 Paris France</Geo_Coordinates>
</data>
<data>
<Titre>2</Titre>
<Realisateur>LES INVITES DE MON PERE</Realisateur>
<Date_Debut_Evenement>ANNE LE NY</Date_Debut_Evenement>
<Date_Fin_Evenement>20 mai 2009</Date_Fin_Evenement>
<Cadre>20 mai 2009</Cadre>
<Lieu>DOMAINE PUBLIC</Lieu>
<Adresse>SQUARE</Adresse>
<Arrondissement>DU CLIGNANCOURT</Arrondissement>
<Adresse_complète>75018</Adresse_complète>
<Geo_Coordinates>SQUARE DU CLIGNANCOURT 75018 Paris France</Geo_Coordinates>
</data>
<data>
<Titre>3</Titre>
<Realisateur>DEMAIN, A L'AUBE</Realisateur>
<Date_Debut_Evenement>GAEL CABOUAT</Date_Debut_Evenement>
<Date_Fin_Evenement>17 avril 2009</Date_Fin_Evenement>
<Cadre>17 avril 2009</Cadre>
<Lieu>EXTERIEUR</Lieu>
<Adresse>RUE</Adresse>
<Arrondissement>QUINCAMPOIX</Arrondissement>
<Adresse_complète>75004</Adresse_complète>
<Geo_Coordinates>RUE QUINCAMPOIX 75004 Paris France</Geo_Coordinates>
</data>
...

(posted as an answer so I can show a code block)
There are a lot of picky details when writing XML. In Python, you should probably use some version of ElementTree to help with that. One good tutorial is Creating XML Documents. Quoting from there:
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
top = Element('top')
comment = Comment('Generated for PyMOTW')
top.append(comment)
child = SubElement(top, 'child')
child.text = 'This child contains text.'
child_with_tail = SubElement(top, 'child_with_tail')
child_with_tail.text = 'This child has regular text.'
child_with_tail.tail = 'And "tail" text.'
child_with_entity_ref = SubElement(top, 'child_with_entity_ref')
child_with_entity_ref.text = 'This & that'
print(tostring(top))
If you use this as an example of how to create a tree of XML elements, you should be able to translate your code into the XML structure you need.

Importing pandas and saving file name:
import pandas as pd
csvFile = 'myData.csv'
The following will read CSV into a pandas data frame, then convert to XML.
df = pd.read_csv(path)
df_xml = df.to_xml()
The below code will create a new file and then save the XML data to a file named "csv2xml"
f = open("csv2xml.xml", "w")
f.write(df_xml)
f.close()

extract values and construct new file

I have "vtu" format file (for paraview) as a text. The format is like below:
<?xml version="1.0"?>
<VTKFile type="UnstructuredGrid" version="0.1" byte_order="LittleEndian" >
<UnstructuredGrid>
<Piece NumberOfPoints="21" NumberOfCells="20" >
<Points>
<DataArray type="Float64" Name="coordinates" NumberOfComponents="3" format="ascii" >
-3.3333333333e-01 1.1111111111e-01 0.0000000000e+00
-2.7777777778e-01 1.1111111111e-01 0.0000000000e+00
-1.1111111111e-01 4.4444444445e-01 0.0000000000e+00
</DataArray>
</Points>
<Cells>
<DataArray type="UInt64" Name="connectivity" NumberOfComponents="1" format="ascii" >
0 1
2 3
5 4
It is representing a mesh file.
I would like to extract the value for NumberOfPoints and also the first two coordinate and store them in another file as following:
21
-3.3333333333e-01
1.1111111111e-01
-2.7777777778e-01
1.1111111111e-01
-1.1111111111e-01
4.4444444445e-01
I am not familiat with python, I could only read the file line by line but I don't know to construct the above file.
What I have learnt so far is very simple. For the first file I am able to detect the line NumberOfPoints is included by
import xml.etree.ElementTree as ET
tree = ET.parse('read.vtu')
root = tree.getroot()
for Piece in root.iter('Piece'):
print Piece.attrib
nr = Piece.get('NumberOfPoints')
print nr
I can I have 21 :) the next step is to add Coordinate. But I dont know how to parse them, since I cannot find any node connected to them.

Try this:
import xml.etree.ElementTree as ET
try:
from cStringIO import StringIO
except:
from StringIO import StringIO
o = file('out.txt', 'w')
tree = ET.parse('read.vtu')
root = tree.getroot()
for Piece in root.iter('Piece'):
nr = Piece.get('NumberOfPoints')
o.write(nr+ '\n')
piece = root.iter('Piece')
piece = piece.next()
point = piece.getchildren()[0]
dataArr = point.getchildren()
data = dataArr[0]
# Writing to a buffer
output = StringIO()
output.write(data.text)
# Retrieve the value written
crds = output.seek(1)
for l in output:
ls = l.split( );
o.write(ls[0]+ '\n')
o.write(ls[1]+ '\n')
output.close()
o.close()

meshio (a project of mine) knows the VTU format, so you could simply
pip install meshio
and then
import meshio
points, cells, _, _, _ = meshio.read('file.vtu')

Python text file to xml

I have one question about transforming a text file to XML. I have done nice conversion of text file and it's look like:
Program: 5 Start: 2013-09-11 05:30:00 Duration 06:15:00 Title: INFOCANALE
And my output in XML will be like
<data>
<eg>
<program>Program 5</program>
<start>2013-09-11 05:30:00</start>
<duration>06:15:00</duration>
<title>INFOCANALE</title>
</eg>
</dat‌a>
Can python convert text file to XML?
Can you help me with some advice, or some code.

I think easiest way would be to change your file into csv file like this:
Program,Start,Duration,Title
5,2013-09-11 05:30:00,06:15:00,INFOCANALE
And then convert it like:
from lxml import etree
import csv
root = etree.Element('data')
rdr = csv.reader(open("your file name here"))
header = rdr.next()
for row in rdr:
eg = etree.SubElement(root, 'eg')
for h, v in zip(header, row):
etree.SubElement(eg, h).text = v
f = open(r"C:\temp\data2.xml", "w")
f.write(etree.tostring(root))
f.close()
# you also can use
# etree.ElementTree(root).write(open(r"C:\temp\data2.xml", "w"))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

xml file to csv file python script - python

Related

create a condition that separates text.tag when parsing xml with python

How to avoid double escape using XML

Convert CSV document to XML

extract values and construct new file

Python text file to xml

Categories

Resources