Python, write in memory zip to file - python

How do I write an in memory zipfile to a file?
# Create in memory zip and add files
zf = zipfile.ZipFile(StringIO.StringIO(), mode='w',compression=zipfile.ZIP_DEFLATED)
zf.writestr('file1.txt', "hi")
zf.writestr('file2.txt', "hi")
# Need to write it out
f = file("C:/path/my_zip.zip", "w")
f.write(zf) # what to do here? Also tried f.write(zf.read())
f.close()
zf.close()

StringIO.getvalue return content of StringIO:
>>> import StringIO
>>> f = StringIO.StringIO()
>>> f.write('asdf')
>>> f.getvalue()
'asdf'
Alternatively, you can change position of the file using seek:
>>> f.read()
''
>>> f.seek(0)
>>> f.read()
'asdf'
Try following:
mf = StringIO.StringIO()
with zipfile.ZipFile(mf, mode='w', compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr('file1.txt', "hi")
zf.writestr('file2.txt', "hi")
with open("C:/path/my_zip.zip", "wb") as f: # use `wb` mode
f.write(mf.getvalue())

Modify falsetru's answer for python3
1) use io.StringIO instead of StringIO.StringIO
StringIO in python3
2) use b"abc" instead of "abc" , or
python 3.5: TypeError: a bytes-like object is required, not 'str' when writing to a file
3) encode to binary string str.encode(s, "utf-8")
Best way to convert string to bytes in Python 3?
import zipfile
import io
mf = io.BytesIO()
with zipfile.ZipFile(mf, mode="w",compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr('file1.txt', b"hi")
zf.writestr('file2.txt', str.encode("hi"))
zf.writestr('file3.txt', str.encode("hi",'utf-8'))
with open("a.txt.zip", "wb") as f: # use `wb` mode
f.write(mf.getvalue())
This should also work for gzip: How do I gzip compress a string in Python?

with ZipFile(read_file, 'r') as zipread:
with ZipFile(file_write_buffer, 'w', ZIP_DEFLATED) as zipwrite:
for item in zipread.infolist():
# Copy all ZipInfo attributes for each file since defaults are not preseved
dest.CRC = item.CRC
dest.date_time = item.date_time
dest.create_system = item.create_system
dest.compress_type = item.compress_type
dest.external_attr = item.external_attr
dest.compress_size = item.compress_size
dest.file_size = item.file_size
dest.header_offset = item.header_offset
In the case where the zip file reads corrupted and you notice missing symlinks or corrupted files with wrong timestamps, it could be the fact that the file properties are not getting copied over.
The above code snippet is how I solved the problem.

Related

I am getting the error:"UnsupportedOperation: read"

import pickle
#writing into the file
f = open("essay1.txt","ab+")
list1 = ["Aditya","Arvind","Kunal","Naman","Samantha"]
list2 = ["17","23","12","14","34"]
zipfile = zip(list1,list2)
print(zipfile)
pickle.dump(zipfile,f)
f.close()
#opening the file to read it
f = open("essay1","ab")
zipfile = pickle.load(f)
f.close()
and output was :
runfile('E:/Aditya Singh/Aditya Singh/untitled3.py', wdir='E:/Aditya Singh/Aditya Singh')
<zip object at 0x0000000008293BC8>
Traceback (most recent call last):
File "E:\Aditya Singh\Aditya Singh\untitled3.py", line 21, in <module>
zipfile = pickle.load(f)
UnsupportedOperation: read
You forgot the file extension .txt in the line where you tried to open the file and also you opened it in append mode, which is why the returned object does not have read or readline methods (required by pickle.load). I also suggest to use the with keyword instead of manually closing the file.
import pickle
#writing into the file
with open("essay1.txt","ab+") as f:
list1 = ["Aditya","Arvind","Kunal","Naman","Samantha"]
list2 = ["17","23","12","14","34"]
zipfile = zip(list1,list2)
print(zipfile)
pickle.dump(zipfile,f)
#opening the file to read it
with open("essay1.txt", "rb") as f:
zipfile = pickle.load(f)
for item in zipfile:
print(item)
Output:
<zip object at 0x7fa6cb30e3c0>
('Aditya', '17')
('Arvind', '23')
('Kunal', '12')
('Naman', '14')
('Samantha', '34')
do you have essay1 file? or essay1.txt?
this is trying to open without extension.
f = open("essay1","ab")
so fails to read.
There are two issues with your code:
You're opening the file to write and not to read.
You're using different filenames for reading and for writing.
Here's a version that works:
import pickle
#writing into the file
f = open("essay1.txt","wb")
list1 = ["Aditya","Arvind","Kunal","Naman","Samantha"]
list2 = ["17","23","12","14","34"]
zipfile = zip(list1,list2)
print(zipfile)
pickle.dump(zipfile,f)
f.close()
#opening the file to read it
f = open("essay1.txt","rb")
zipfile = pickle.load(f)
print(zipfile)
f.close()

How to archive binary data

Use zipfile to archive data dictionary:
import os, sys, cPickle, zipfile
data = {1: 'one'}
pfile = r'c:\temp\myPickle.p'
cPickle.dump( data, open( pfile, "wb" ))
zfilename = r'c:\temp\myArchive.zip'
zfile = zipfile.ZipFile(zfilename, "w", zipfile.ZIP_DEFLATED)
zfile.write(pfile, os.path.basename(pfile))
zfile.close()
The approach results two files on a local drive: one is pickle and another is zip.
To get pickled data zip file needs to be un-archived:
fh = open(zfilename, 'rb')
z = zipfile.ZipFile(fh)
for each in z.namelist():
z.extract(each, r'c:\temp')
fh.close()
How to simplify the process?
Use gzip instead. It compresses the file as you write it so there is no need for intermediate files.
# (python 2) import cPickle as pickle
import pickle
import gzip
data = {1: 'one'}
pfile = r'test.pkl.gz'
pickle.dump(data, gzip.open(pfile, "w"), pickle.HIGHEST_PROTOCOL)
print pickle.load(gzip.open(pfile))

renaming a list of pdf files with for loop

i am trying to rename a list of pdf files by extracting the name from the file using PyPdf. i tried to use a for loop to rename the files but i always get an error with code 32 saying that the file is being used by another process. I am using python2.7
Here's my code
import os, glob
from pyPdf import PdfFileWriter, PdfFileReader
# this function extracts the name of the file
def getName(filepath):
output = PdfFileWriter()
input = PdfFileReader(file(filepath, "rb"))
output.addPage(input.getPage(0))
outputStream = file(filepath + '.txt', 'w')
output.write(outputStream)
outputStream.close()
outText = open(filepath + '.txt', 'rb')
textString = outText.read()
outText.close()
nameStart = textString.find('default">')
nameEnd = textString.find('_SATB', nameStart)
nameEnd2 = textString.find('</rdf:li>', nameStart)
if nameStart:
testName = textString[nameStart+9:nameEnd]
if len(testName) <= 100:
name = testName + '.pdf'
else:
name = textString[nameStart+9:nameEnd2] + '.pdf'
return name
pdfFiles = glob.glob('*.pdf')
m = len(pdfFiles)
for each in pdfFiles:
newName = getName(each)
os.rename(each, newName)
Consider using the with directive of Python. With it you do not need to handle closing the file yourself:
def getName(filepath):
output = PdfFileWriter()
with file(filepath, "rb") as pdfFile:
input = PdfFileReader(pdfFile)
...
You're not closing the input stream (the file) used by the pdf reader.
Thus, when you try to rename the file, it's still open.
So, instead of this:
input = PdfFileReader(file(filepath, "rb"))
Try this:
inputStream = file(filepath, "rb")
input = PdfFileReader(inputStream)
(... when done with this file...)
inputStream.close()
It does not look like you close the file object associated with the PDF reader object. Though maybe at tne end of the function it is closed automatically, but to be sure you might want to create a separate file object which you pass to the PdfFileReader and then close the file handle when done. Then rename.
The below was from SO: How to close pyPDF "PdfFileReader" Class file handle
import os.path
from pyPdf import PdfFileReader
fname = 'my.pdf'
fh = file(fname, "rb")
input = PdfFileReader(fh)
fh.close()
os.rename(fname, 'my_renamed.pdf')

How to read filenames included into a gz file

I've tried to read a gz file:
with open(os.path.join(storage_path,file), "rb") as gzipfile:
with gzip.GzipFile(fileobj=gzipfile) as datafile:
data = datafile.read()
It works but I need the filenames and the size of every file included into my gz file.
This code print out the content of the included file into the archive.
How can I read the filenames included into this gz file?
The Python gzip module does not provide access to that information.
The source code skips over it without ever storing it:
if flag & FNAME:
# Read and discard a null-terminated string containing the filename
while True:
s = self.fileobj.read(1)
if not s or s=='\000':
break
The filename component is optional, not guaranteed to be present (the commandline gzip -c decompression option would use the original filename sans .gz in that case, I think). The uncompressed filesize is not stored in the header; you can find it in the last four bytes instead.
To read the filename from the header yourself, you'd need to recreate the file header reading code, and retain the filename bytes instead. The following function returns that, plus the decompressed size:
import struct
from gzip import FEXTRA, FNAME
def read_gzip_info(gzipfile):
gf = gzipfile.fileobj
pos = gf.tell()
# Read archive size
gf.seek(-4, 2)
size = struct.unpack('<I', gf.read())[0]
gf.seek(0)
magic = gf.read(2)
if magic != '\037\213':
raise IOError('Not a gzipped file')
method, flag, mtime = struct.unpack("<BBIxx", gf.read(8))
if not flag & FNAME:
# Not stored in the header, use the filename sans .gz
gf.seek(pos)
fname = gzipfile.name
if fname.endswith('.gz'):
fname = fname[:-3]
return fname, size
if flag & FEXTRA:
# Read & discard the extra field, if present
gf.read(struct.unpack("<H", gf.read(2)))
# Read a null-terminated string containing the filename
fname = []
while True:
s = gf.read(1)
if not s or s=='\000':
break
fname.append(s)
gf.seek(pos)
return ''.join(fname), size
Use the above function with an already-created gzip.GzipFile object:
filename, size = read_gzip_info(gzipfileobj)
GzipFile itself doesn't have this information, but:
The file name is (usually) the name of the archive minus the .gz
If the uncompressed file is smaller than 4G, then the last four bytes of the archive contain the uncompressed size:
In [14]: f = open('fuse-ext2-0.0.7.tar.gz')
In [15]: f.seek(-4, 2)
In [16]: import struct
In [17]: r = f.read()
In [18]: struct.unpack('<I', r)[0]
Out[18]: 7106560
In [19]: len(gzip.open('fuse-ext2-0.0.7.tar.gz').read())
Out[19]: 7106560
(technically, the last four bytes are the size of the original (uncompressed) input data modulo 232 (the ISIZE field in the member trailer, http://www.gzip.org/zlib/rfc-gzip.html))
i've solved in this mode:
fl = search_files(storage_path)
for f in fl:
with open(os.path.join(storage_path,f), "rb") as gzipfile:
with gzip.GzipFile(fileobj=gzipfile) as datafile:
data = datafile.read()
print str(storage_path) + "/" + str(f[:-3]) + " : " + str(len(data)) + " bytes" #pcap file size
i don't know if it's correct.
Any suggest?
the new code:
fl = search_files(storage_path)
for f in fl:
with open(os.path.join(storage_path,f), "rb") as gzipfile:
#try with module 2^32
gzipfile.seek(-4,2)
r = gzipfile.read()
print str(storage_path) + "/" + str(f[:-3]) + " : " + str(struct.unpack('<I' ,r)[0]) + " bytes" #dimensione del file pcap
The solution of Martjin is really nice, I've packaged it for Python 3.6+: https://github.com/PierreSelim/gzinfo
Juste need to pip install gzinfo
in your code
import gzinfo
info = gzinfo.read_gz_info('bar.txt.gz')
# info.name is 'foo.txt'
print(info.fname)

Python: Mat-File into hex-values

filename = r"C:\Dokumente und Einstellungen\sschnei1\Desktop\a.mat"
print open(filename, "r").read().encode("hex")
The code above only work for text files. But I want to read out the hex-values of mat-files.
EDIT: my little hex-editor
from textwrap import fill
filename = r"C:\a.mat"
hexvalues = open(filename, "rb").read().encode("hex")
print fill(hexvalues,16)
try open(filename,"rb") - open it as a binary file

Categories

Resources