I'm trying to create a context manager that uses mmap which is itself is a context manager. Initially I had an dumb open file problem Why isn't mmap closing associated file (getting PermissionError: [WinError 32])? and an answer quickly explained why it wasn't working as desired.
Given that information, I've attempted two different ways to correct the issue, but neither one has worked.
The first approach was to use thecontextlib's #contextmanager decorator:
from contextlib import contextmanager
import os
import mmap
#contextmanager
def memory_map(filename, access=mmap.ACCESS_WRITE):
size = os.path.getsize(filename)
fd = os.open(filename, os.O_RDWR)
print('about to yield')
with mmap.mmap(fd, size, access=access) as m:
yield m
print('in finally clause')
os.close(fd) # Close the associated file descriptor.
test_filename = 'data'
# First create the test file.
size = 1000000
with open(test_filename, 'wb') as f:
f.seek(size - 1)
f.write(b'\x00')
# Read and modify mmapped file in-place.
with memory_map(test_filename) as m: # Causes AttributeError: __enter__
print(len(m))
print(m[0:10])
# Reassign a slice.
m[0:11] = b'Hello World'
# Verify that changes were made
print('reading back')
with open(test_filename, 'rb') as f:
print(f.read(11))
# Delete test file.
# Causes:
# PermissionError: [WinError 32] The process cannot access the file because it
# is being used by another process: 'data'
os.remove(test_filename)
But it results in:
Traceback (most recent call last):
File "memory_map.py", line 27, in <module>
with memory_map(test_filename) as m: # Causes AttributeError: __enter__
AttributeError: __enter__
In the next attempt I tried explicitly creating a context manager class:
import os
import mmap
class MemoryMap:
def __init__(self, filename, access=mmap.ACCESS_WRITE):
print('in MemoryMap.__init__')
size = os.path.getsize(filename)
self.fd = os.open(filename, os.O_RDWR)
self.mmap = mmap.mmap(self.fd, size, access=access)
def __enter__(self):
print('in MemoryMap.__enter__')
return self.mmap
def __exit__(self, exc_type, exc_value, traceback):
print('in MemoryMap.__exit__')
os.close(self.fd) # Close the associated file descriptor.
print(' file descriptor closed')
test_filename = 'data'
# First create the test file.
size = 1000000
with open(test_filename, 'wb') as f:
f.seek(size - 1)
f.write(b'\x00')
# Read and modify mmapped file in-place.
with MemoryMap(test_filename) as m:
print(len(m))
print(m[0:10])
# Reassign a slice.
m[0:11] = b'Hello World'
# Verify that changes were made
print('reading back')
with open(test_filename, 'rb') as f:
print(f.read(11))
# Delete test file.
# Causes PermissionError: [WinError 32] The process cannot access the file
# because it is being used by another process: 'data'
os.remove(test_filename)
This makes it further, but the PermissionError is back—which really confuses me because the file descriptor was closed in that version as you can see in the output produced:
in MemoryMap.__init__
in MemoryMap.__enter__
1000000
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
in MemoryMap.__exit__
file descriptor closed
reading back
b'Hello World'
Traceback (most recent call last):
File "memory_map2.py", line 47, in <module>
os.remove(test_filename)
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'data'
So it seems I'm stuck again. Any ideas on what's wrong (as well as how to fix it)? Also, in the event they can both be fixed, which one is better if you have an opinion?
Solutions
There was an error in both snippets. This first was a simple typographical error. The contextmanger decorator was commented out. Should have been:
#contextmanager # Leading "#" changed to "#".
def memory_map(filename, access=mmap.ACCESS_WRITE):
size = os.path.getsize(filename)
fd = os.open(filename, os.O_RDWR)
...
In the second it was because the mmap itself was not being closed in the __exit__() method, just the associated file descriptor. That never occurred to me because the exception raised was the same as in the first case.
def __exit__(self, exc_type, exc_value, traceback):
print('in MemoryMap.__exit__')
self.mmap.close() # ADDED.
os.close(self.fd) # Close the associated file descriptor.
print(' file descriptor closed')
In case of your second attempt, you need to close the memory mapped file:
def __exit__(self, exc_type, exc_value, traceback):
self.mm.close()
print('in MemoryMap.__exit__')
os.close(self.fd) # Close the associated file descriptor.
print(' file descriptor closed')
Related
I have the following segment of code to create a hdf5 file, and have used "with" statement to ensure the file is correctly closed. However, I still keep having the error message as follows.
filename = 'E30.hdf5'
try:
with h5py.File(filename, 'w-') as f:
print('---')
except:
os.remove(filename)
f = h5py.File(filename, 'w-')
However, I still keep having the error message as follows. In the working directory, there may already have an existing file with the name of 'E30.hdf5'. But does it really matter? I tried to delete it from windows directly. However, the windows does not allow me to delete it saying it is being opened.
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
<ipython-input-6-e8ccfbc1b5d2> in vid_to_hdf(En, start, end, chunk)
9 try:
---> 10 with h5py.File(filename, 'w-') as f:
11 print('---')
~\AppData\Local\Continuum\anaconda3\envs\fastai-py37\lib\site-packages\h5py\_hl\files.py in __init__(self, name, mode, driver, libver, userblock_size, swmr, rdcc_nslots, rdcc_nbytes, rdcc_w0, track_order, **kwds)
407 fapl, fcpl=make_fcpl(track_order=track_order),
--> 408 swmr=swmr)
409
~\AppData\Local\Continuum\anaconda3\envs\fastai-py37\lib\site-packages\h5py\_hl\files.py in make_fid(name, mode, userblock_size, fapl, fcpl, swmr)
176 elif mode in ['w-', 'x']:
--> 177 fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
178 elif mode == 'w':
h5py\_objects.pyx in h5py._objects.with_phil.wrapper()
h5py\_objects.pyx in h5py._objects.with_phil.wrapper()
h5py\h5f.pyx in h5py.h5f.create()
OSError: Unable to create file (unable to open file: name = 'E30.hdf5', errno = 17, error message = 'File exists', flags = 15, o_flags = 502)
During handling of the above exception, another exception occurred:
PermissionError Traceback (most recent call last)
<timed eval> in <module>
<ipython-input-6-e8ccfbc1b5d2> in vid_to_hdf(En, start, end, chunk)
11 print('---')
12 except:
---> 13 os.remove(filename)
14 f = h5py.File(filename, 'w-')
15 # Create dataset within file
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'E30.hdf5'
You are running into multiple issues at once.
First, let's start with the h5py.File() access_mode flag.
w- : Create file, fail if exists (avoids accidentally overwriting an existing file)
w : Create file, truncate if exists (means it overwrites an existing file)
r+ : Read/write, file must exist (use to open an existing file to write data).
In your logic below, your try:/except: pattern will execute the try: statement if E30.hdf5 does not exist. It will execute the except: statement if E30.hdf5 exists.
This is complicated by different h5py.File() methods is each branch. Your try: branch uses the with h5py.File() as f: method. So, when your code executes this logic, the file will close cleanly at the end (without a f.close() statement).
HOWEVER, your except: branch uses f=h5py.File(). So, when your code executes this logic, you need a f.close() statement to ensure closure at the end.
This is the scenario I think you are experiencing:
I assume E30.hdf5 does not exist the first time you run your code.
So, the first time you run, you go through the try: branch and the file is closed cleanly at the end.
The next time you run the code, E30.hdf5 exists, so, you go through the except: branch. As a result, the file is NOT closed at the end of the process, and another process cannot access it (Python or the OS).
Coding suggestions:
Your except: block has the same behavior mode=w. The code below behaves the same, and will always close the file when the process completes. Also, it is more readable (IMHO). Note: both methods delete E30.hdf5 if it exists.
filename = 'E30.hdf5'
with h5py.File(filename, 'w') as f: # use mode=w
print('---')
Make this change if there is a burning need to keep the try:/except: pattern: (it's useful to use try:/except: for access modes w- and r+ WITHOUT the os.remove(filename).)
filename = 'E30.hdf5'
try:
with h5py.File(filename, 'w-') as f:
print('---')
except:
os.remove(filename)
with h5py.File(filename, 'w-') as f:
print('+++')
I wanted to ask if it's possible to create PDF/XLS documents as temporary files. I'm doing that to send them using flask afterwards. For pdf/xls files creation I use reportlab and xlsxwriter packages respectively. When I save document using their methods, I get the "Python temporary file permission denied" error. When I try to close using the tempfile methods, files become corrupted. Is there any way to overcome this? Or any other suitable solution?
EDIT:
Some code snippets:
import xlswriter
import tempfile
from flask import after_this_request
#app.route('/some_url', method=['POST'])
def create_doc_function():
#after_this_request
def cleanup(response):
temp.close()
return response
temp = tempfile.TemporaryFile()
book = xlsxwriter.Workbook(temp.name)
# some actions here ...
book.close() # raises "Python temporaty file permission denied" error.
# If missed, Excel book is gonna be corrupted,
# i.e. blank, which make sense
return send_file(temp, as_attachment=True,
attachment_filename='my_document_name.xls')
Similar story with pdf files.
Use tempfile.mkstemp() which will create a standard temp file on disk which will persist until removed:
import tempfile
import os
handle, filepath = tempfile.mkstemp()
f = os.fdopen(handle) # convert raw handle to file object
...
EDIT
tempfile.TemporaryFile() will be destroyed as soon as it's closed, which is why your code above is failing.
You can use and delete NamedTemporaryFile with context manager (or atexit module). It may do the dirty job for you.Example 1:
import os
from tempfile import NamedTemporaryFile
# define class, because everyone loves objects
class FileHandler():
def __init__(self):
'''
Let's create temporary file in constructor
Notice that there is no param (delete=True is not necessary)
'''
self.file = NamedTemporaryFile()
# write something funny into file...or do whatever you need
def write_into(self, btext):
self.file.write(btext)
def __enter__(self):
'''
Define simple but mandatory __enter__ function - context manager will require it.
Just return the instance, nothing more is requested.
'''
return self
def __exit__(self, exc_type, exc_val, exc_tb):
'''
Also define mandatory __exit__ method which is called at the end.
NamedTemporaryFile is deleted as soon as is closed (function checks it before and after close())
'''
print('Calling __exit__:')
print(f'File exists = {os.path.exists(self.file.name)}')
self.file.close()
print(f'File exists = {os.path.exists(self.file.name)}')
# use context mamager 'with' to create new instance and do something
with FileHandler() as fh:
fh.write_into(b'Hi happy developer!')
print(f'\nIn this point {fh.file.name} does not exist (exists = {os.path.exists(fh.file.name)})')
Output:
Calling __exit__:
File exists = True
File exists = False
In this point D:\users\fll2cj\AppData\Local\Temp\tmpyv37sp58 does not exist (exists = False)
Or you can use atexit module which calls defined function when program (cmd) exits.Example 2:
import os, atexit
from tempfile import NamedTemporaryFile
class FileHandler():
def __init__(self):
self.file = NamedTemporaryFile()
# register function called when quit
atexit.register(self._cleanup)
def write_into(self, btext):
self.file.write(btext)
def _cleanup(self):
# because self.file has been created without delete=False, closing the file causes its deletion
self.file.close()
# create new instance and do whatever you need
fh = FileHandler()
fh.write_into(b'Hi happy developer!')
# now the file still exists, but when program quits, _cleanup() is called and file closed and automaticaly deleted.
I'm trying to create and write a file if it does not exist yet, so that it is co-operatively safe from race conditions, and I'm having (probably stupid) problem. First, here's code:
import os
def safewrite(text, filename):
print "Going to open", filename
fd = os.open(filename, os.O_CREAT | os.O_EXCL, 0666) ##### problem line?
print "Going to write after opening fd", fd
os.write(fd, text)
print "Going to close after writing", text
os.close(fd)
print "Going to return after closing"
#test code to verify file writing works otherwise
f = open("foo2.txt", "w")
f.write("foo\n");
f.close()
f = open("foo2.txt", "r")
print "First write contents:", f.read()
f.close()
os.remove("foo2.txt")
#call the problem method
safewrite ("test\n", "foo2.txt")
Then the problem, I get exception:
First write contents: foo
Going to open foo2.txt
Going to write after opening fd 5
Traceback (most recent call last):
File "/home/user/test.py", line 21, in <module>
safewrite ("test\n", "foo2.txt")
File "/home/user/test.py", line 7, in safewrite
os.write(fd, text)
OSError: [Errno 9] Bad file descriptor
Probable problem line is marked in the code above (I mean, what else could it be?), but I can't figure out how to fix it. What is the problem?
Note: above was tested in a Linux VM, with Python 2.7.3. If you try the code and it works for you, please write a comment with your environment.
Alternative code to do the same thing at least as safely is also very welcome.
Change the line:
fd = os.open(filename, os.O_CREAT | os.O_EXCL, 0666)
to be instead:
fd=os.open(filename, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0666)
You must open the file with a flag such that you can write to it (os.O_WRONLY).
From open(2):
DESCRIPTION
The argument flags must include one of the following access modes: O_RDONLY, O_WRONLY, or O_RDWR. These request opening the file read-only,
write-only, or read/write, respectively.
From write(2):
NAME
write - write to a file descriptor
...
ERRORS
EAGAIN The file descriptor fd has been marked non-blocking (O_NONBLOCK) and the write would block.
EBADF fd is not a valid file descriptor or is not open for writing.
I'm trying to process the contents of a tarfile using multiprocessing.Pool. I'm able to successfully use the ThreadPool implementation within the multiprocessing module, but would like to be able to use processes instead of threads as it would possibly be faster and eliminate some changes made for Matplotlib to handle the multithreaded environment. I'm getting an error that I suspect is related to processes not sharing address space, but I'm not sure how to fix it:
Traceback (most recent call last):
File "test_tarfile.py", line 32, in <module>
test_multiproc()
File "test_tarfile.py", line 24, in test_multiproc
pool.map(read_file, files)
File "/ldata/whitcomb/epd-7.1-2-rh5-x86_64/lib/python2.7/multiprocessing/pool.py", line 225, in map
return self.map_async(func, iterable, chunksize).get()
File "/ldata/whitcomb/epd-7.1-2-rh5-x86_64/lib/python2.7/multiprocessing/pool.py", line 522, in get
raise self._value
ValueError: I/O operation on closed file
The actual program is more complicated, but this is an example of what I'm doing that reproduces the error:
from multiprocessing.pool import ThreadPool, Pool
import StringIO
import tarfile
def write_tar():
tar = tarfile.open('test.tar', 'w')
contents = 'line1'
info = tarfile.TarInfo('file1.txt')
info.size = len(contents)
tar.addfile(info, StringIO.StringIO(contents))
tar.close()
def test_multithread():
tar = tarfile.open('test.tar')
files = [tar.extractfile(member) for member in tar.getmembers()]
pool = ThreadPool(processes=1)
pool.map(read_file, files)
tar.close()
def test_multiproc():
tar = tarfile.open('test.tar')
files = [tar.extractfile(member) for member in tar.getmembers()]
pool = Pool(processes=1)
pool.map(read_file, files)
tar.close()
def read_file(f):
print f.read()
write_tar()
test_multithread()
test_multiproc()
I suspect that the something's wrong when the TarInfo object is passed into the other process but the parent TarFile is not, but I'm not sure how to fix it in the multiprocess case. Can I do this without having to extract files from the tarball and write them to disk?
You're not passing a TarInfo object into the other process, you're passing the result of tar.extractfile(member) into the other process where member is a TarInfo object. The extractfile(...) method returns a file-like object which has, among other things, a read() method which operates upon the original tar file you opened with tar = tarfile.open('test.tar').
However, you can't use an open file from one process in another process, you have to re-open the file. I replaced your test_multiproc() with this:
def test_multiproc():
tar = tarfile.open('test.tar')
files = [name for name in tar.getnames()]
pool = Pool(processes=1)
result = pool.map(read_file2, files)
tar.close()
And added this:
def read_file2(name):
t2 = tarfile.open('test.tar')
print t2.extractfile(name).read()
t2.close()
and was able to get your code working.
Based on the with statement
The context manager’s __exit__() is loaded for later use.
The context manager’s __enter__() method is invoked.
I have seen one of the with usage with zipfile
Question>
I have checked the source code of zipfile located here:
/usr/lib/python2.6/zipfile.py
I don't know where the __enter__ and __exit__ functions are defined?
Thank you
zipfile.ZipFile is not a context manager in 2.6, this has been added in 2.7.
I've added this as another answer because it is generally not an answer to initial question. However, it can help to fix your problem.
class MyZipFile(zipfile.ZipFile): # Create class based on zipfile.ZipFile
def __init__(file, mode='r'): # Initial part of our module
zipfile.ZipFile.__init__(file, mode) # Create ZipFile object
def __enter__(self): # On entering...
return(self) # Return object created in __init__ part
def __exit__(self, exc_type, exc_val, exc_tb): # On exiting...
self.close() # Use close method of zipfile.ZipFile
Usage:
with MyZipFile('new.zip', 'w') as tempzip: # Use content manager of MyZipFile
tempzip.write('sbdtools.py') # Write file to our archive
If you type
help(MyZipFile)
you can see all methods of original zipfile.ZipFile and your own methods: init, enter and exit. You can add another own functions if you want.
Good luck!
Example of creating a class using object class:
class ZipExtractor(object): # Create class that can only extract zip files
def __init__(self, path): # Initial part
import zipfile # Import old zipfile
self.Path = path # To make path available to all class
try: open(self.Path, 'rb') # To check whether file exists
except IOError: print('File doesn\'t exist') # Catch error and print it
else: # If file can be opened
with open(self.Path, 'rb') as temp:
self.Header = temp.read(4) # Read first 4 bytes
if self.Header != '\x50\x4B\x03\x04':
print('Your file is not a zip archive!')
else: self.ZipObject = zipfile.ZipFile(self.Path, 'r')
def __enter__(self): # On entering...
return(self) # Return object created in __init__ part
def __exit__(self, exc_type, exc_val, exc_tb): # On exiting...
self.close() # Use close method of our class
def SuperExtract(member=None, path=None):
'''Used to extract files from zip archive. If arg 'member'
was not set, extract all files. If path was set, extract file(s)
to selected folder.'''
print('Extracting ZIP archive %s' % self.Path) # Print path of zip
print('Archive has header %s' % self.Header) # Print header of zip
if filename=None:
self.ZipObject.extractall(path) # Extract all if member was not set
else:
self.ZipObject.extract(mamber, path) # Else extract selected file
def close(self): # To close our file
self.ZipObject.close()
Usage:
with ZipExtractor('/path/to/zip') as zfile:
zfile.SuperExtract('file') # Extract file to current dir
zfile.SuperExtract(None, path='/your/folder') # Extract all to selected dir
# another way
zfile = ZipExtractor('/path/to/zip')
zfile.SuperExtract('file')
zfile.close() # Don't forget that line to clear memory
If you run 'help(ZipExtractor)', you will see five methods:
__init__, __enter__, __exit__, close, SuperExtract
I hope I've helped you. I didn't test it, so you might have to improve it.
cat-plus-plus is right. But if you want, you can write your own class to add "missed" features. All you need to do is to add two functions in your class (which is based on zipfile):
def __enter__(self):
return(self)
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
That should be enough, AFAIR.