Object has no attribute error while using ElementTree - python

I'm a python newbie and have the following problem. If I uncomment the last line of this code:
# traverse all directories
for root, dirs, files in os.walk(args.rootfolder):
for file in files:
print('Current file name: ' + file)
if file.endswith('.savx'):
# read single xml savx file
currfilename = os.path.join(root,file)
print('Current full file name: ' + currfilename)
tree = ET.parse(currfilename)
# root = tree.getroot() <-- if I uncomment this line I get errors
I get this error:
Traceback (most recent call last):
File "convert.py", line 30, in <module>
currfilename = os.path.join(root,file)
File "C:\progs\develop\Python34\lib\ntpath.py", line 108, in join
result_drive, result_path = splitdrive(path)
File "C:\progs\develop\Python34\lib\ntpath.py", line 161, in splitdrive
normp = p.replace(_get_altsep(p), sep)
AttributeError: 'xml.etree.ElementTree.Element' object has no attribute 'replace'
It looks like the error appears after the second file in files loop iteration.

You used root twice in your code, for different things:
for root, dirs, files in os.walk(args.rootfolder):
# ^^^^
and
root = tree.getroot()
So when you try to use root as a path string when building the next filename to load:
currfilename = os.path.join(root,file)
you'll find you replaced the root path name with an ElementTree object instead.
Use a different name for either the root directory name or your ElementTree object. Use dirname for example:
for dirname, dirs, files in os.walk(args.rootfolder):
for file in files:
print('Current file name: ' + file)
if file.endswith('.savx'):
# read single xml savx file
currfilename = os.path.join(dirname, file)
print('Current full file name: ' + currfilename)
tree = ET.parse(currfilename)
root = tree.getroot()

Related

Copy multipal file based on file names

Based on the first 3 characters of a file name i want to create a folder then copy in the related file.
I have a script that works the first time, however i get an error if I run it multiple times
I believe i need to check if the file exists first, however i haven't been able to get it work.
Or to filter out the newly created folders from the os.list
Any help would be greatly appreciated:
srcpath = 'C:\\temp\\Test'
srcfiles = os.listdir(srcpath)
destpath = 'C:\\temp\\Test'
# extract the three letters from filenames
destdirs = list(set([filename[0:3] for filename in srcfiles]))
def create(destdirs, destpath):
full_path = os.path.join(destpath, destdirs)
if not os.path.exists(full_path):
os.mkdir(full_path)
return full_path
def copy(filename, dirpath):
shutil.copy(os.path.join(srcpath, filename), dirpath)
# create destination directories and store their names along with full paths
targets = [
(folder, create(folder, destpath)) for folder in destdirs
]
for destdirs, full_path in targets:
for filename in srcfiles:
if destdirs == filename[0:3]:
copy(filename, full_path)
ERROR
Traceback (most recent call last):
File "C:/Users/Desktop/copy_only.py", line 45, in <module>
copy(filename, full_path)
File "C:/Users/Desktop/copy_only.py", line 35, in copy
shutil.copy(os.path.join(srcpath, filename), dirpath)
File "C:\Python27\lib\shutil.py", line 119, in copy
copyfile(src, dst)
File "C:\Python27\lib\shutil.py", line 82, in copyfile
with open(src, 'rb') as fsrc:
IOError: [Errno 13] Permission denied: 'C:\\temp\\Test\\F12'
It looks like you are trying to open a directory C:\\temp\\Test\\F12 as the filename in copy.
Otherwise, please check if you have the permission to open/read the file.

Python Multiple filetype support with glob.glob

I'm trying to use glob.glob to provide support for more than one filetype. The code I have is supposed to take files with the extensions '.pdf', '.xls', and '.xlsx' residing in the directory '/mnt/Test' and execute the code below after files matching have been found.
When I replace the existing for loop with just
for filename in glob.glob("*.xlsx"):
print filename
It works just fine.
When attempting to run the following code:
def main():
os.chdir("/mnt/Test")
extensions = ("*.xls", ".xlsx", ".pdf")
filename = []
for files in extensions:
filename.extend(glob.glob(files))
print filename
sys.stdout.flush()
doc_id, version = doc_placeholder(filename)
print 'doc_id:', doc_id, 'version:', version
workspace_upload(doc_id, version, filename)
print "%s has been found. Preparing next phase..." % filename
ftp_connection.cwd(remote_path)
fh = open(filename, 'rb')
ftp_connection.storbinary('STOR %s' % timestr + '_' + filename, fh)
fh.close()
send_email(filename)
I run across the following error:
Report /mnt/Test/fileTest.xlsx has been added.
[]
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 810, in __bootstrap_inner
self.run()
File "/usr/local/lib/python2.7/dist- packages/watchdog/observers/api.py", line 199, in run
self.dispatch_events(self.event_queue, self.timeout)
File "/usr/local/lib/python2.7/dist- packages/watchdog/observers/api.py", line 368, in dispatch_events
handler.dispatch(event)
File "/usr/local/lib/python2.7/dist-packages/watchdog/events.py", line 330, in dispatch
_method_map[event_type](event)
File "observe.py", line 14, in on_created
fero.main()
File "/home/tesuser/project-a/testing.py", line 129, in main
doc_id, version = doc_placeholder(filename)
File "/home/testuser/project-a/testing.py", line 58, in doc_placeholder
payload = {'documents':[{'document':{'name':os.path.splitext(filename)[0],'parentId':parent_id()}}]}
File "/usr/lib/python2.7/posixpath.py", line 105, in splitext
return genericpath._splitext(p, sep, altsep, extsep)
File "/usr/lib/python2.7/genericpath.py", line 91, in _splitext
sepIndex = p.rfind(sep)
AttributeError: 'list' object has no attribute 'rfind'
How can I edit the code above to achieve what I need?
Thanks in advance, everyone. Appreciate the help.
doc_placeholder includes this snippet, os.path.splitext(filename). Assuming filename is the list you passed in you've given a list to os.path.splittext when it is expecting a string.
Fix this by iterating over each filename instead of trying to process the entire list at once.
def main():
os.chdir("/mnt/Test")
extensions = ("*.xls", "*.xlsx", "*.pdf")
filenames = [] # made 'filename' plural to indicate it's a list
# building list of filenames moved to separate loop
for files in extensions:
filenames.extend(glob.glob(files))
# iterate over filenames
for filename in filenames:
print filename
sys.stdout.flush()
doc_id, version = doc_placeholder(filename)
print 'doc_id:', doc_id, 'version:', version
workspace_upload(doc_id, version, filename)
print "%s has been found. Preparing next phase..." % filename
ftp_connection.cwd(remote_path)
fh = open(filename, 'rb')
ftp_connection.storbinary('STOR %s' % timestr + '_' + filename, fh)
fh.close()
send_email(filename)

python parse/process all xml files in folder

I am trying to run my code on all xml files in the folder
I get a few errors when I run the code and it generates some files
but not all
here is my code:
import xml.etree.ElementTree as ET
import os
import glob
path = 'C:/xml/'
for infile in glob.glob( os.path.join(path, '*.xml') ):
tree = ET.parse(infile)
root = tree.getroot()
with open(infile+'new.csv','w') as outfile:
for elem in root.findall('.//event[#type="MEDIA"]'):
mediaidelem = elem.find('./mediaid')
if mediaidelem is not None:
outfile.write("{}\n".format(mediaidelem.text))
here is the error log all the
Traceback (most recent call last):
File "C:\xml\2.py", line 8, in <module>
tree = ET.parse(infile)
File "C:\Python34\lib\xml\etree\ElementTree.py", line 1187, in parse
tree.parse(source, parser)
File "C:\Python34\lib\xml\etree\ElementTree.py", line 598, in parse
self._root = parser._parse_whole(source)
File "<string>", line None
xml.etree.ElementTree.ParseError: no element found: line 1, column 0
Considering the error message you may have some empty (or malformed) files.
I would add a error handling here to warn user about such error and then skip the file. Something like:
for infile in glob.glob( os.path.join(path, '*.xml') ):
try:
tree = ET.parse(infile)
except xml.etree.ElementTree.ParseError as e:
print infile, str(e)
continue
...
I did not tried to reproduce it here, it is just a guess.

creating and saving a pdf in reportlab returns an error

I'm trying to create a pdf using reportlab but I keep getting an error, ideally I want to save the created pdf to a specific directory but this is just for testing and the save function only saves it to the current working directory.
import os
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
folder_path = "/home/ro/A Python Scripts/dest_test/"
folder_name = os.path.basename(folder_path)
pdf_name = folder_name + '.py'
def generate_pdf(folder_paths, folder_names, speedy_share_links):
c = canvas.Canvas(folder_names)
c.drawString(100,780,folder_names)
c.drawString(100,750,speedy_share_links)
c.save()
generate_pdf(folder_path, folder_name, "hiya")
I get the following error
Traceback (most recent call last):
File "pdf.py", line 16, in <module>
generate_pdf(folder_path, folder_name, "hiya")
File "pdf.py", line 14, in generate_pdf
c.save()
File "/usr/lib/python2.7/dist-packages/reportlab/pdfgen/canvas.py", line 1209, in save
self._doc.SaveToFile(self._filename, self)
File "/usr/lib/python2.7/dist-packages/reportlab/pdfbase/pdfdoc.py", line 216, in SaveToFile
f = open(filename, "wb")
IOError: [Errno 2] No such file or directory: u''
Your path has an empty basename, which is why the error shows the empty string.
Replace the line:
folder_path = "/home/ro/A Python Scripts/dest_test/"
With the line:
folder_path = "/home/ro/A Python Scripts/dest_test/foobar.pdf"
and your program will generate foobar.pdf in the current directory.

Renaming files recursively with Python

I have a large directory structure, each directory containing multiple sub-directories, multiple .mbox files, or both. I need to rename all the .mbox files to the respective file name without the extension e.g.
bar.mbox -> bar
foo.mbox -> foo
Here is the script I've written:
# !/usr/bin/python
import os, sys
def walktree(top, callback):
for path, dirs, files in os.walk(top):
for filename in files:
fullPath = os.path.join(path, filename)
callback(fullPath)
def renameFile(file):
if file.endswith('.mbox'):
fileName, fileExt = os.path.splitext(file)
print file, "->", fileName
os.rename(file,fileName)
if __name__ == '__main__':
walktree(sys.argv[1], renameFile)
When I run this using:
python walktrough.py "directory"
I get the error:
Traceback (most recent call last):
File "./walkthrough.py", line 18, in <module>
walktree(sys.argv[1], renameFile)
File "./walkthrough.py", line 9, in walktree
callback(fullPath)
File "./walkthrough.py", line 15, in renameFile
os.rename(file,fileName)
OSError: [Errno 21] Is a directory
This was solved by adding an extra conditional statement to test if the name the file was to be changed to, was a current directory.
If this was true, the filename to-be had an underscore added to.
Thanks to WKPlus for the hint on this.
BCvery1

Categories

Resources