Error in parsing XML file using Python's ElementTree - python

I'm trying to import an XML using ElementTree
Traceback (most recent call last):
File "Object_Detection_Script.py", line 3, in <module>
tree = ET.parse('text.xml')
File "ElementTree.py", line 1196, in parse
tree.parse(source, parser)
File "ElementTree.py", line 597, in parse
self._root = parser._parse_whole(source)
File "<string>", line None
xml.etree.ElementTree.ParseError: mismatched tag: line 20, column 6
Here's a part of my XML:
<Stream canTime="729785232" itcMsgCounter="39506" pcTime1="729209" sourceInfo="29.00" streamNumber="5.000" streamRefIndex="22090" vehIndexUsed="0" versionInfo="41240">
<vision_failsafes blurImageFailsafe="false" blurredImageSeverityLevel="0" ddrRamCrcFailure="false" flrMisalignment="false" foggySpotsSeverityLevel="0" frameIndex="0" fullBlockageSeverityLevel="0" heavyRainFailsafe="false" imageIndex="0" invalidAhbcSensitivityParams="false" invalidSpdYawDetected="false" lowSunSeverityLevel="0" lowVisibilitySeverityLevel="0" outOfCalibrationSeverityLevel="0" outOfFocusSeverityLevel="0" partialSolidBlockageFailsafe="false" radarCommErrorCounter="0" radarMisalignSeverityLevel="0" radarVisCorrelationFailsafe="false" rccMisalignment="false" rollAngleFailsafe="false" selfGlareSeverityLevel="0" smearImageSeverityLevel="0" smearedSpotsSeverityLevel="0" splashesSeverityLevel="0" spotRaysSeverityLevel="0" sunRaySeverityLevel="0"/>
<extraInfo>
XML file

Related

Can't write on XML file due to (TypeError: argument of type 'int' is not iterable) using ElementTree in Python

I keep getting
Traceback (most recent call last):
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/xml/etree/ElementTree.py", line 1076, in _escape_attrib
if "&" in text:
TypeError: argument of type 'int' is not iterable
when I'm trying to write an XML file with some attributes I need to edit.
Here's my code:
import xml.etree.ElementTree as ET
tree = ET.parse('TM_GeneralSettings.xml')
root = tree.getroot()
for item in root.iter('Control'):
numX = int(((720/1080)*float(item.attrib.get('LocationX'))))
numY = int(((720 / 1080) * float(item.attrib.get('LocationY'))))
numW = int(((720 / 1080) * float(item.attrib.get('SizeW'))))
numH = int(((720 / 1080) * float(item.attrib.get('SizeH'))))
print(numX, ':', numY, ':', numW, ':', numH)
item.set('LocationX', numX)
item.set('LocationY', numY)
item.set('LocationW', numW)
item.set('LocationH', numH)
tree.write('TM_GeneralSettings2.xml')
I also get this errors when I run my code:
Traceback (most recent call last):
File "/Users/alessandrochiodo/PycharmProjects/pythonProject/main.py", line 17, in <module>
tree.write('TM_GeneralSettings2.xml')
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/xml/etree/ElementTree.py", line 772, in write
serialize(write, self._root, qnames, namespaces,
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/xml/etree/ElementTree.py", line 937, in _serialize_xml
_serialize_xml(write, e, qnames, None,
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/xml/etree/ElementTree.py", line 930, in _serialize_xml
v = _escape_attrib(v)
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/xml/etree/ElementTree.py", line 1099, in _escape_attrib
_raise_serialization_error(text)
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/xml/etree/ElementTree.py", line 1053, in _raise_serialization_error
raise TypeError(
TypeError: cannot serialize 0 (type int)
Can someone help me? I can't find a solution.

FramenetError: Unknown frame

I run:
from nltk.corpus import framenet as fn
fn.frames()
And get the following error:
Traceback (most recent call last):
File "/Users/me/anaconda3/envs/nlp/lib/python3.6/site-packages/nltk/corpus/reader/framenet.py", line 1308, in frame_by_name
elt = XMLCorpusView(locpath, 'frame')[0]
File "/Users/me/anaconda3/envs/nlp/lib/python3.6/site-packages/nltk/corpus/reader/xmldocs.py", line 155, in __init__
encoding = self._detect_encoding(fileid)
File "/Users/me/anaconda3/envs/nlp/lib/python3.6/site-packages/nltk/corpus/reader/xmldocs.py", line 166, in _detect_encoding
with open(fileid, 'rb') as infile:
NotADirectoryError: [Errno 20] Not a directory: '/Users/me/nltk_data/corpora/framenet_v17.zip/framenet_v17/frame/Abandonment.xml'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/me/anaconda3/envs/nlp/lib/python3.6/site-packages/nltk/corpus/reader/framenet.py", line 876, in __repr__
for elt in self:
File "/Users/me/anaconda3/envs/nlp/lib/python3.6/site-packages/nltk/collections.py", line 406, in iterate_from
try: yield self._func(self._lists[0][index])
File "/Users/me/anaconda3/envs/nlp/lib/python3.6/site-packages/nltk/corpus/reader/framenet.py", line 1407, in frame
f = self.frame_by_id(fn_fid_or_fname, ignorekeys)
File "/Users/me/anaconda3/envs/nlp/lib/python3.6/site-packages/nltk/corpus/reader/framenet.py", line 1268, in frame_by_id
return self.frame_by_name(name, ignorekeys, check_cache=False)
File "/Users/me/anaconda3/envs/nlp/lib/python3.6/site-packages/nltk/corpus/reader/framenet.py", line 1310, in frame_by_name
raise FramenetError('Unknown frame: {0}'.format(fn_fname))
nltk.corpus.reader.framenet.FramenetError: Unknown frame: Abandonment
Both the Framenet 1.5 and 1.7 corpora are installed, according to nltk.download()

Python HTMLParser Not Reading Whole File

from HTMLParser import HTMLParser
class HTMLParserDos(HTMLParser):
full_text = ""
def handle_data(self, data):
self.full_text += data
return self.full_text
h = HTMLParserDos()
file = open('emails.txt', 'r')
h.feed(file.read())
file.close()
print h.container
This code is getting an error:
Traceback (most recent call last): File "/Users/laurenstrom/Google
Drive/PYTHON/RANDO_CALRISSIAN/html_parse", line 15, in
h.feed(file.read()) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 108, in feed
self.goahead(0) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 148, in goahead
k = self.parse_starttag(i) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 229, in parse_starttag
endpos = self.check_for_whole_start_tag(i) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 304, in check_for_whole_start_tag
self.error("malformed start tag") File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 115, in error
raise HTMLParseError(message, self.getpos()) HTMLParseError: malformed start tag, at line 7, column 18
I'm not sure what I'm missing about .feed() but I can't seem to find anything about why it won't just read the whole file.
Your are asking the HTML parser to parse a file most of which isn't HTML. It is tripping over line 7 of your file. Which is :
Return-Path: <Tom#sjnetworkconsulting.com>
I would imagine it is seeing the < and assuming that is HTML which of course it is not.

how to make a sample of the generated xml?

I create xml structure using the methods lxml.etree.Element
import lxml.etree
import lxml.html
parent = lxml.etree.Element('root')
child = lxml.etree.Element('sub')
child.text = 'text'
parent.append(child)
I need to do the following query:
doc = lxml.html.document_fromstring(parent)
text = doc.xpath('sub/text()')
print(text)
but I get the following error message:
Traceback (most recent call last): File
"C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\6_first_store_names_cat_full_xml_nested\q.py",
line 9, in
doc = lxml.html.document_fromstring(parent) File "C:\Python33\lib\site-packages\lxml\html__init__.py", line 600, in
document_fromstring
value = etree.fromstring(html, parser, **kw) File "lxml.etree.pyx", line 3003, in lxml.etree.fromstring
(src\lxml\lxml.etree.c:67277) File "parser.pxi", line 1784, in
lxml.etree._parseMemoryDocument (src\lxml\lxml.etree.c:101615)
ValueError: can only parse strings
>
help my please
lxml.html.document_fromstring() accepts a string, not an Element as you are passing in. Try passing in lxml.etree.tostring(parent):
s = lxml.etree.tostring(parent)
doc = lxml.html.document_fromstring(s)

Writing Python ElementTree to file throws TypeError

I'm trying to write an XML file using Python's ElementTree package. Basically I make a root element called allDepts, and then in each iteration of my for loop I call a function that returns a deptElement containing a bunch of information about a university department. I add every deptElement to allDepts, make an ElementTree out of allDepts, and try to write it to a file.
def crawl(year, season, campus):
departments = getAllDepartments(year, season, campus)
allDepts = ET.Element('depts')
for dept in departments:
deptElement = getDeptElement(allDepts, dept, year, season, campus)
print ET.tostring(deptElement) #Prints fine here!
ET.SubElement(allDepts, deptElement)
if deptElement == None:
print "ERROR: " + dept
with open(str(year) + season + "_" + campus + "_courses.xml", 'w') as f:
tree = ET.ElementTree(allDepts)
tree.write(f)
For some reason, at the tree.write(f) line, I get this error: "TypeError: cannot concatenate 'str' and 'instance' objects". Each deptElement prints out fine in the for loop, making me think that getDeptElement() is working fine. I never get my "ERROR" message printed out. Does anyone know what I'm doing wrong?
EDIT: Here's the full stack trace:
File "./CourseInfoCrawl.py", line 210, in <module>
crawl("2013", "S", "UBC")
File "./CourseInfoCrawl.py", line 207, in crawl
tree.write(f)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/xml/etree/ElementTree.py", line 663, in write
self._write(file, self._root, encoding, {})
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/xml/etree/ElementTree.py", line 707, in _write
self._write(file, n, encoding, namespaces)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/xml/etree/ElementTree.py", line 681, in _write
file.write("<" + _encode(tag, encoding))
Seem following line is cause.
print "ERROR: " + dept
Change as follow and retry:
print "ERROR: ", dept
OR
print "ERROR: " + str(dept)
ADD
Second argument to ET.SubElement should be str. Is deptElement is str?
If deptElement is Element, use allDepts.append(deptElement).
http://docs.python.org/2/library/xml.etree.elementtree.html#xml.etree.ElementTree.SubElement
http://docs.python.org/2/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element.append
ADD 2
To reproduce error (Python 2.6):
>>> from xml.etree import ElementTree as ET
>>> allDepts = ET.Element('depts')
>>> ET.SubElement(allDepts, ET.Element('a'))
<Element <Element a at b727b96c> at b727b22c>
>>> with open('a', 'wb') as f:
... tree = ET.ElementTree(allDepts)
... tree.write(f)
...
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
File "/home/falsetru/t/Python-2.6/Lib/xml/etree/ElementTree.py", line 663, in write
self._write(file, self._root, encoding, {})
File "/home/falsetru/t/Python-2.6/Lib/xml/etree/ElementTree.py", line 707, in _write
self._write(file, n, encoding, namespaces)
File "/home/falsetru/t/Python-2.6/Lib/xml/etree/ElementTree.py", line 681, in _write
file.write("<" + _encode(tag, encoding))
TypeError: cannot concatenate 'str' and 'instance' objects
To reproduce error (Python 2.7, different error message):
>>> from xml.etree import ElementTree as ET
>>> allDepts = ET.Element('depts')
>>> ET.SubElement(allDepts, ET.Element('a'))
<Element <Element 'a' at 0xb745a8ec> at 0xb74601ac>
>>> with open('a', 'wb') as f:
... tree = ET.ElementTree(allDepts)
... tree.write(f)
...
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
File "/usr/lib/python2.7/xml/etree/ElementTree.py", line 817, in write
self._root, encoding, default_namespace
File "/usr/lib/python2.7/xml/etree/ElementTree.py", line 886, in _namespaces
_raise_serialization_error(tag)
File "/usr/lib/python2.7/xml/etree/ElementTree.py", line 1052, in _raise_serialization_error
"cannot serialize %r (type %s)" % (text, type(text).__name__)
TypeError: cannot serialize <Element 'a' at 0xb745a8ec> (type Element)

Categories

Resources