Unzipping file results in "BadZipFile" - python

I know there are similar questions but neither of them provided a solution for my problem. I am using the following code:
import os, glob
import zipfile
root = 'E:\\xx\\fashion\\*'
directory = 'E:\\xx\\fashion\\'
extension = ".zip"
date_file_list = []
for folder in glob.glob(root):
if folder.endswith(extension): # check for ".zip" extension
print(folder)
zipfile.ZipFile(os.path.join(directory, folder)).extractall(os.path.join(directory, os.path.splitext(folder)[0]))
os.remove(folder) # delete zipped file_name
And I get the following error:
Traceback (most recent call last):
File "C:/Users/xx/unzip.py", line 12, in <module>
zipfile.ZipFile(os.path.join(directory, folder)).extractall(os.path.join(directory, os.path.splitext(folder)[0]))
File "C:\Users\xx\AppData\Local\Programs\Python\Python35\lib\zipfile.py", line 1026, in __init__
self._RealGetContents()
File "C:\Users\xx\AppData\Local\Programs\Python\Python35\lib\zipfile.py", line 1094, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
Some of the files are compressed in winzip some of them are in 7zip. But there are too many files to unzip.
Anybody know why this error is occurring?

Related

"OSError: [Errno 9] Bad file descriptor" error when trying to save excel file with openpyxl in python

I have a couple of excel files I want to merge into one.
I need the second column on all the files to be copied into separate columns in a new Microsoft Excel file.
For this, I am using the openpyxl library in a python script.
This is my code:
import os
from openpyxl import load_workbook
def mergeDataFiles():
path = "C:\\Users\\ethan\\Desktop\\Benzoyl Chloride\\Benzoyl Chloride"
# source excel files
origin_files = list()
for path, subdirs, files in os.walk(path):
for file_index in range(len(files)):
origin_files.append(files[file_index])
# destination excel file
destination_file = path + ".xlsx"
destination_workbook = load_workbook(destination_file)
destination_sheet = destination_workbook["Sheet1"]
# copy data from source files to destination file
for origin_file_index in range(1, len(origin_files)):
origin_workbook = load_workbook(path + "\\" + origin_files[origin_file_index - 1])
origin_sheet = origin_workbook['Data']
destination_sheet.cell(row=1, column=origin_file_index).value = origin_files[origin_file_index - 1]
for i in range(1, 500):
# read cell value from source excel file
data = origin_sheet.cell(row=i, column=2)
# write the value to destination excel file
destination_sheet.cell(row=i + 1, column=origin_file_index).value = data.value
# saving the destination excel file
destination_workbook.save(destination_file)
if __name__ == "__main__":
mergeDataFiles()
When I run the code, I get an error on the last line in the function: OSError: [Errno 9] Bad file descriptor.
Full traceback:
C:\Users\ethan\.venv\Scripts\python.exe "C:/Users/ethan/Coding/Python/Copy Excel Data/main.py"
Traceback (most recent call last):
File "C:\Users\ethan\Coding\Python\Copy Excel Data\main.py", line 32, in <module>
mergeDataFiles()
File "C:\Users\ethan\Coding\Python\Copy Excel Data\main.py", line 28, in mergeDataFiles
destination_workbook.save(destination_file)
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\workbook\workbook.py", line 407, in save
save_workbook(self, filename)
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\writer\excel.py", line 293, in save_workbook
writer.save()
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\writer\excel.py", line 275, in save
self.write_data()
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\writer\excel.py", line 67, in write_data
archive.writestr(ARC_APP, tostring(props.to_tree()))
File "C:\Program Files\Python311\Lib\zipfile.py", line 1830, in writestr
with self.open(zinfo, mode='w') as dest:
File "C:\Program Files\Python311\Lib\zipfile.py", line 1204, in close
self._fileobj.seek(self._zinfo.header_offset)
OSError: [Errno 9] Bad file descriptor
Exception ignored in: <function ZipFile.__del__ at 0x000001D101443D80>
Traceback (most recent call last):
File "C:\Program Files\Python311\Lib\zipfile.py", line 1870, in __del__
self.close()
File "C:\Program Files\Python311\Lib\zipfile.py", line 1892, in close
self._fpclose(fp)
File "C:\Program Files\Python311\Lib\zipfile.py", line 1992, in _fpclose
fp.close()
OSError: [Errno 9] Bad file descriptor
Process finished with exit code 1
I have tried changing the file names and locations, having the destination file open and closed, scouring the internet for solutions and at this point I'm not sure what else I can try.
I am running the code on Windows 10 22H2, with an intel i5 cpu.
Please assist me with this issue, if you know how to solve it.

Having problem with opening same files openpyxl

Writting a program, that shuffels contents in files. All files are almost the same, but it doesn't work for some of them. Can't understand.
for file in allFiles:
print(file)
items = []
fileName = file
fileIndex = 1
directory = os.path.join(path, fileName[:-5].strip())
if not os.path.exists(directory):
os.mkdir(directory)
theFile = openpyxl.load_workbook(file)
allSheetNames = theFile.sheetnames
And after some quantity of files, it shows me these errors:
Traceback (most recent call last):
File "D:\staff\Python\NewProject\glow.py", line 25, in <module>
theFile = openpyxl.load_workbook(file)
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\site-packages\openpyxl\reader\excel.py", line 313, in load_workbook
reader = ExcelReader(filename, read_only, keep_vba,
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\site-packages\openpyxl\reader\excel.py", line 124, in __init__
self.archive = _validate_archive(fn)
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\site-packages\openpyxl\reader\excel.py", line 96, in _validate_archive
archive = ZipFile(filename, 'r')
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\zipfile.py", line 1269, in __init__
self._RealGetContents()
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\zipfile.py", line 1336, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
But before that everything worked fine, there was no error. Can someone guess, why? Thanks, everybody.
Looking for files that way:
path = os.getcwd()
sourcePath = os.getcwd() + '\source'
extension = 'xlsx'
os.chdir(sourcePath)
allFiles = glob.glob('*.{}'.format(extension))
You iterate over all files not regarding the filetype. Probably you or a process added a file to the directory which is no xlsx file. This is why openpyxl fails to read it.

Tablib xlsx file badZip file issue

I am getting error on opening xlsx extension file in windows 8 using tablib library.
python version - 2.7.14
error is as follows:
python suit_simple_sheet_product.py
Traceback (most recent call last):
File "suit_simple_sheet_product.py", line 19, in <module>
data = tablib.Dataset().load(open(BASE_PATH).read())
File "C:\Python27\lib\site-packages\tablib\core.py", line 446, in load
format = detect_format(in_stream)
File "C:\Python27\lib\site-packages\tablib\core.py", line 1157, in detect_format
if fmt.detect(stream):
File "C:\Python27\lib\site-packages\tablib\formats\_xls.py", line 25, in detect
xlrd.open_workbook(file_contents=stream)
File "C:\Python27\lib\site-packages\xlrd\__init__.py", line 120, in open_workbook
zf = zipfile.ZipFile(timemachine.BYTES_IO(file_contents))
File "C:\Python27\lib\zipfile.py", line 770, in __init__
self._RealGetContents()
File "C:\Python27\lib\zipfile.py", line 811, in _RealGetContents
raise BadZipfile, "File is not a zip file"
zipfile.BadZipfile: File is not a zip file
path location is as follows =
BASE_PATH = 'C:\Users\anju\Downloads\automate\catalog-5090 fabric detail and price list.xlsx'
Excel .xlsx files are actually zip files. In order for the unzip to work correctly, the file must be opened in binary mode, as such your need to open the file using:
import tablib
BASE_PATH = r'c:\my folder\my_test.xlsx'
data = tablib.Dataset().load(open(BASE_PATH, 'rb').read())
print data
Add r before your string to stop Python from trying to interpret the backslash characters in your path.

zipfile.BadZipfile: Bad CRC-32 for file | Read only file

Got a read-only file within a zip file which are password protected and I need to extract it to the /tmp directory.
I get a CRC-32 error which suggests that the file would be corrupted yet I know it isn't and is in fact a read-only file. Any Suggestions?
Error:
Traceback (most recent call last):
File "/tmp/usercode.py", line 45, in <module>
zip.extractall('/tmp',pwd = "piso")
File "/usr/lib64/python2.7/zipfile.py", line 1040, in extractall
self.extract(zipinfo, path, pwd)
File "/usr/lib64/python2.7/zipfile.py", line 1028, in extract
return self._extract_member(member, path, pwd)
File "/usr/lib64/python2.7/zipfile.py", line 1084, in _extract_member
shutil.copyfileobj(source, target)
File "/usr/lib64/python2.7/shutil.py", line 49, in copyfileobj
buf = fsrc.read(length)
File "/usr/lib64/python2.7/zipfile.py", line 632, in read
data = self.read1(n - len(buf))
File "/usr/lib64/python2.7/zipfile.py", line 672, in read1
self._update_crc(data, eof=(self._compress_left==0))
File "/usr/lib64/python2.7/zipfile.py", line 647, in _update_crc
raise BadZipfile("Bad CRC-32 for file %r" % self.name)
zipfile.BadZipfile: Bad CRC-32 for file 'alien-12.txt'
Code:
# importing required modules
from zipfile import ZipFile
# specifying the zip file name
file_name = "/tmp/alien-12.zip"
# opening the zip file in READ mode
with ZipFile(file_name, 'r') as zip:
# printing all the contents of the zip file
zip.printdir()
# extracting all the files
print('Extracting all the files now...')
zip.extractall('/tmp',pwd = "piso")
print('Done!')
If I change the line of:
zip.extractall('/tmp',pwd = "piso")
then I get the error of:
IOError: [Errno 30] Read-only file system:
Then go on to try and fix it first by trying to output what is in the zip file.
zipfile.testzip() returns which then errors
Error:
RuntimeError: File alien-12.txt is encrypted, password required for extraction

Extract a .TAR file within a .gz file

I have to untar around fifty *.gz files in a directory. Inside each *.gz file there is a *.TAR file and some other files.
I am trying a python script which extracts the contents of the *.gz files to a directory. But, I am not able to extract the *.TAR files inside the same directory to which the contents of *.gz are extracted.
This is how the script looks:
import tarfile
import os
import glob
basedir = "path_to _dir"
for i in glob.glob(basedir +"*.gz"):
a = os.path.basename(i)
b = os.path.splitext(a)[0]
c = os.path.splitext(b)[0]
os.mkdir(os.path.join(basedir,c))
t1 = tarfile.open(i)
t1.extractall(c)
for j in os.listdir(c):
if j.endswith('.TAR'):
print(j)
t2 = tarfile.open(j)
t2.extractall()
t2.close()
t1.close()
Its giving me the error:
Traceback (most recent call last):
File "./untar.py", line 16, in <module>
t2 = tarfile.open(j)
File "/usr/lib64/python2.7/tarfile.py", line 1660, in open
return func(name, "r", fileobj, **kwargs)
File "/usr/lib64/python2.7/tarfile.py", line 1722, in gzopen
fileobj = bltn_open(name, mode + "b")
IOError: [Errno 2] No such file or directory: '0299_0108060501.TAR'
0299_0108060501.TAR is the file contained inside the *.gz file
It seems to me that I am doing something very wrong fundamentally, but I don't know what.
Since tar.gz files are TAR archives compressed with gzip one should use
t1 = tarfile.open(i, 'r:gz')
as per documentation.
Also, you need to combine the path of the inner file with the directory being inspected, like so:
t2 = tarfile.open(os.path.join(c, j))

Categories

Resources