Unexpected end of data when zipping zip files in Python

Unexpected end of data when zipping zip files in Python - python

Good day.
I wrote a little Python program to help me easily create .cbc files for Calibre, which is just a renamed .zip file with a text file called comics.txt for TOC purposes. Each chapter is another zip file.
The issue is that the last zip file zipped always has the error "Unexpected end of data". The file itself is not corrupt, if I unzip it and rezip it it works perfectly. Playing around it seems that the problem is that Python doesn't close the last zip file after zipping it, since I can't delete the last zip while the program is still running since it's still open in Python. Needless to say, Calibre doesn't like the file and fails to convert it unless I manually rezip the affected chapters.
The code is as follows, checking the folders for not-image files, zipping the folders, zipping the zips while creating the text file, and "changing" extension.
import re, glob, os, zipfile, shutil, pathlib, gzip, itertools
Folders = glob.glob("*/")
items = len(Folders)
cn_list = []
cn_list_filtered = []
dirs_filtered = []
ch_id = ["c", "Ch. "]
subdir_im = []
total = 0
Dirs = next(os.walk('.'))[1]
for i in range(0, len(Dirs)):
for items in os.listdir("./" + Dirs[i]):
if items.__contains__('.png') or items.__contains__('.jpg'):
total+=1
else:
print(items + " not an accepted format.")
subdir_im.append(total)
total = 0
for fname in Folders:
if re.search(ch_id[0] + r'\d+' + r'[\S]' + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[0] + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+' + '[\S]' + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+)", fname)[0]
cn_list.append(cn)
else:
print('Warning: File found without proper filename format.')
cn_list_filtered = set(cn_list)
cn_list_filtered = sorted(cn_list_filtered)
cwd = os.getcwd()
Dirs = Folders
subdir_zi = []
total = 0
for i in range(0, len(cn_list_filtered)):
for folders in Dirs:
if folders.__contains__(ch_id[0] + cn_list_filtered[i] + " ")\
or folders.__contains__(ch_id[1] + cn_list_filtered[i] + " "):
print('Zipping folder ', folders)
namezip = "Chapter " + cn_list_filtered[i] + ".zip"
current_zip = zipfile.ZipFile(namezip, "a")
for items in os.listdir(folders):
if items.__contains__('.png') or items.__contains__('.jpg'):
current_zip.write(folders + "/" + items, items)
total+=1
subdir_zi.append(total)
total = 0
print('Folder contents in order:', subdir_im, ' Total:', sum(subdir_im))
print("Number of items per zip: ", subdir_zi, ' Total:', sum(subdir_zi))
if subdir_im == subdir_zi:
print("All items in folders have been successfully zipped")
else:
print("Warning: File count in folders and zips do not match. Please check the affected chapters")
zips = glob.glob("*.zip")
namezip2 = os.path.basename(os.getcwd()) + ".zip"
zipfinal = zipfile.ZipFile(namezip2, "a")
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
Data = []
for i in range (0,len(cn_list_filtered),1):
Datai = ("Chapter " + cn_list_filtered[i] + ".zip" + ":Chapter " + cn_list_filtered[i] + "\r\n")
Data.append(Datai)
Dataok = ''.join(Data)
with zipfile.ZipFile(namezip2, 'a') as myzip:
myzip.writestr("comics.txt", Dataok)
zipfinal.close()
os.rename(namezip2, namezip2 + ".cbc")
os.system("pause")
I am by no means a programmer, that is just a Frankenstein monster code I eventually managed to put together by checking threads, but this last issue has me stumped.
Some solutions I tried are:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[i].close()
Fails with:
zips[i].close()
AttributeError: 'str' object has no attribute 'close'
and:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[len(zips)].close()
Fails with:
zips[len(zips)].close()
IndexError: list index out of range
Thanks for the help.

This solved my issue:
def generate_zip(file_list, file_name=None):
zip_buffer = io.BytesIO()
zf = zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED)
for file in file_list:
print(f"Filename: {file[0]}\nData: {file[1]}")
zf.writestr(file[0], file[1])
**zf.close()**
with open(file_name, 'wb') as f:
f.write(zip_buffer.getvalue())
f.close()

Related

os.rename deleting files python 3

Python novice, my simple script gets a given directory and renames all files sequentially, however it is deleting the files but the print is showing the files names getting renamed, not sure where its going wrong here.
Also, in what order does it retrieve these files?
import os
path = os.path.abspath("D:\Desktop\gp")
i = 0
for file_name in os.listdir(path):
try:
print (file_name + " - " + str(i))
os.rename(os.path.join(path,file_name), str(i))
except WindowsError:
os.remove(str(i))
os.rename(os.path.join(path,file_name), str(i))
i += 1
print(str(i) + " files.")
Edit
Below is the solution with working code, retrieves all files in a dir by creation date and assigns them a iterated number while retaining file extension.
import os
def sorted_dir(folder):
def getctime(name):
path = os.path.join(folder, name)
return os.path.getctime(path)
return sorted(os.listdir(path), key=getctime)
path = os.path.abspath("D:\Path\Here")
i = 0
for file_name in sorted_dir(path):
_, ext = os.path.splitext(file_name)
print (file_name + " - " + str(i)+ext)
os.rename(os.path.join(path,file_name), os.path.join(path, str(i) + ext))
i += 1
print(str(i-1) + " files.")

The problem is that you're using an absolute path for the source, but a relative path for the destination. So the files aren't getting deleted, they're just getting moved into the current working directory.
To fix it so they get renamed into the same directory they were already in, you can do the same thing on the destination you do on the source:
os.rename(os.path.join(path,file_name), os.path.join(path, str(i)))
From a comment, it sounds like you may want to preserve the extensions on these files. To do that:
_, ext = os.path.splitext(file_name)
os.rename(os.path.join(path,file_name), os.path.join(path, str(i) + ext))

read multiple files automatically no manual file naming

I have a directory contains 50 files I want to read them one by one and compare wit the other files - that is fixed. I am using glob.blob. But it didn't work.
Here how I am reading all files. Instead, path = '*.rbd' if I give the file name like path = run-01.rbd it works.
path = '*.rbd'
path = folder + path
files=sorted(glob.glob(path))
complete code
import glob
from itertools import islice
import linecache
num_lines_nonbram = 1891427
bits_perline = 32
total_bit_flips = 0
num_bit_diff_flip_zero = 0
num_bit_diff_flip_ones = 0
folder = "files/"
path = '*.rbd'
path = folder + path
files=sorted(glob.glob(path))
original=open('files/mull-original-readback.rbd','r')
#source1 = open(file1, "r")
for filename in files:
del_lines = 101
with open(filename,'r') as f:
i=1
while i <= del_lines:
line1 = f.readline()
lineoriginal=original.readline()
i+=1
i=0
num_bit_diff_flip_zero = 0
num_bit_diff_flip_ones = 0
num_lines_diff =0
i=0
j=0
k=0
a_write2 = ""
while i < (num_lines_nonbram-del_lines):
line1 = f.readline()
lineoriginal = original.readline()
while k < bits_perline:
if ((lineoriginal[k] == line1[k])):
a_write2 += " "
else:
if (lineoriginal[k]=="0"):
#if ((line1[k]=="0" and line1[k]=="1")):
num_bit_diff_flip_zero += 1
if (lineoriginal[k]=="1"):
#if ((line1[k]=="0" and line1[k]=="1")):
num_bit_diff_flip_ones += 1
#if ((line1[k]==1 and line1[k]==0)):
#a_write_file2 = str(i+1) + " " + str(31-k) + "\n" + a_write_file2
#a_write2 += "^"
#num_bit_diff_flip_one += 1
# else:
# a_write2 += " "
k+=1
total_bit_flips=num_bit_diff_flip_zero+num_bit_diff_flip_ones
i+=1
k=0
i = 0
print files
print "Number of bits flip zero= %d" %num_bit_diff_flip_zero +"\n" +"Number of bits flip one= %d" %num_bit_diff_flip_ones +"\n" "Total bit flips = %d " %total_bit_flips
f.close()
original.close()

You could use the os module to first list everything in a directory (both files and modules) then use a python generator to filter out only the files. You could then use a second python generator to filter out files with a specific extension. There is probably a more efficient way of doing it but this works:
import os
def main():
path = './' # The path to current directory
# Go through all items in the directory and filter out files
files = [file for file in os.listdir(path) if
os.path.isfile(os.path.join(path, file))]
# Go through all files and filter out files with .txt (for example)
specificExtensionFiles = [file for file in files if ".txt" in file]
# Now specificExtensionFiles is a generator for .txt files in current
# directory which you can use in a for loop
print (specificExtensionFiles)
if __name__ == '__main__':
main()
For further reference:
How do I list all files of a directory?

The problem is that you're not going back to the beginning of originalfile whenever you start comparing with the next file in the for filename in files: loop. The simplest solution is to put:
original.seek(0)
at the beginning of that loop.
You could also read the whole file into a list just once before the loop, and use that instead of reading the file repeatedly.
And if you only want to process part of the files, you can read the file into a list, and then use a list slice to get the lines you want.
You also shouldn't be setting num_bit_diff_flip_zero and num_bit_diff_flip_one to 0 each time through the loop, since these are supposed to be the total across all files.
with open('files/mull-original-readback.rbd','r') as original:
original_lines = list(original)[del_lines:num_lines_nonbram]
for filename in files:
with open(file, 'r') as f:
lines = list(f)[del_lines:num_lines_nonbram]
for lineoriginal, line1 in zip(original_lines, lines):
for k in range(bits_perline):
if lineoriginal[k] == line1[k]:
a_write2 += " "
elif lineoriginal[k] == "0"
num_bit_diff_flip_zero += 1
else:
num_bit_diff_flip_ones += 1
total_bit_flips = num_bit_diff_flip_zero + num_bit_diff_flip_ones

How to check if folder has suboflders and then list the directories with listdir()?

I want to write a file searching code where I don't know if the directory I'm searching in has subdirectories and I want to check that so I don't get an error like this:
[Error 267]The directory name is invalid: 'C:/Path/To/Directory'.
I wrote a code like this where if it finds the file it breaks and stopps the program but if not it goes down a layer and so on.
filename = raw_input('> ')
path = 'C:/Path/Of/Directory/You/Want/To/Search/In'
fldr = os.listdir(path)
for f in fldr:
p = path + '/' + f
sfldr = os.listdir(p)
if os.path.exists(p + '/' + filename):
print 'Found file!!', p + '/' + filename
break
else:
for sf in sfldr:
pp = p + '/' + sf
ssfldr = os.listdir(pp)
if os.path.exists(pp + '/' + filename):
print 'Found file!!', pp + '/' + filename
break
else:
for ssf in ssfldr:
ppp = pp + '/' + ssf
sssfldr = os.listdir(ppp)
if os.path.exists(ppp + '/' + filename):
print 'Found file!!', ppp + '/' + filename
break
The easy to notice error is that when the directory doesn't have 3 layers of subfolders the program just breaks and gives an error message.So if I could check if the folder has subfolders before entering it,that would be neat.

Use os.scandir(). Provides better speed over os.walk()
Link to docs here!
Alternatively use, glob
>>> from glob import glob
>>> paths = glob('*/')
>>> paths
['bin/', 'content/', 'include/', 'lib/', 'output/']
>>>

Converting multiple gz file within subdirectories into csv

I have many subdirectories in my main directory and would like to write a script to unzip and convert all the files within it. If possible, I would also like to combine all the CSV within a single directory into a single CSV. But more importantly, I need help with my nested loop.
import gzip
import csv
import os
subdirlist = os.listdir('/home/user/Desktop/testloop')
subtotal = len(subdirlist)
subcounter = 0
for dirlist in subdirlist:
print "Working On " + dirlist
total = len(dirlist)
counter = 0
for dir in dirlist:
print "Working On " + dir
f = gzip.open('/' + str(subdirlist) + '/' + dir, 'rb')
file_content = f.read()
f.close()
print "25% Complete"
filename = '/' + str(subdirlist) + '/temp.txt'
target = open(filename, 'w')
target.write(file_content)
target.close()
print "50% Complete!"
csv_file = '/' + str(subdirlist) + '/' + str(dir) + '.csv'
in_txt = csv.reader(open(filename, "rb"), delimiter = '\t')
out_csv = csv.writer(open(csv_file, 'wb'))
out_csv.writerows(in_txt)
os.remove(filename)
os.remove('/' + str(subdirlist) + '/' + dir)
counter+=1
print str(counter) + "/" + str(total) + " " + str(dir) + " Complete!"
print "SubDirectory Converted!"
print str(subcounter) + "/" + str(subtotal) + " " + str(subdirlist) + " Complete!"
subcounter+=1
print "All Files Converted!"
Thanks in advance

To get lists of files and subdirectories, you can use os.walk. Below is an implementation I wrote to get all files (optionally, of certain type(s)) in arbitrarily nested subdirectories:
from os import walk, sep
from functools import reduce # in Python 3.x only
def get_filelist(root, extensions=None):
"""Return a list of files (path and name) within a supplied root directory.
To filter by extension(s), provide a list of strings, e.g.
get_filelist(root, ["zip", "csv"])
"""
return reduce(lambda x, y: x+y,
[[sep.join([item[0], name]) for name in item[2]
if (extensions is None or
name.split(".")[-1] in extensions)]
for item in walk(root)])

Avoid duplicate file names in a folder in Python

I want to download some files and save them in a folder and there may be some duplication in file names, so I want to avoid this to happen.
I think it needs an auto-naming system but now i don't know how to make it.
I used shutil and urllib2 to write my function.
This is a part of my code :
path = 'C:/DL/Others/'+filename+file_ext
with open(path, 'wb') as fp:
shutil.copyfileobj(req, fp)
As you know we can check that if a file exists or not by os.path.exists('path').
I wanna to rename my files and save them to avoid duplicated names using a pattern, for example by adding a number to file name.So if there was 4 files with same name, "fname", I want 4 files in this pattern :
fname - fname(1) - fname(2) - fname(3)

Something like this is probably reasonable:
path = 'c:/DL/Others/%s%s' % (filename, file_ext)
uniq = 1
while os.path.exists(path):
path = 'c:/DL/Others/%s_%d%s' % (filename, uniq, file_ext)
uniq += 1
If the original path doesn't exist you get no _1, but if it does exist it'll count up until it finds one that's free.

Track each filename's count as you create it:
fname_counts = {}
# ... whatever generates filename and file_ext goes here...
if filename + file_ext in fname_counts:
fname_counts[filename + file_ext] += 1
else:
fname_counts[filename + file_ext] = 0
# now check if it's a dupe when you create the path
if fname_counts[filename + file_ext]:
path = 'C:/DL/Others/%s_%s.%s' % (filename, fname_counts[filename + file_ext], file_ext)
else:
path = 'C:/DL/Others/' + filename + file_ext
Example at work with two duplicates ("test.txt"):
>>> filenames_and_exts = [('test', '.txt'), ('test', '.txt'), ('test2', '.txt'), ('test', '.cfg'), ('different_name', '.txt')]
>>> fname_counts = {}
>>> for filename, file_ext in filenames_and_exts:
if filename + file_ext in fname_counts:
fname_counts[filename + file_ext] += 1
else:
fname_counts[filename + file_ext] = 0
if fname_counts[filename + file_ext]:
path = 'C:/DL/Others/%s_%s%s' % (filename, fname_counts[filename + file_ext], file_ext)
else:
path = 'C:/DL/Others/' + filename + file_ext
print path
C:/DL/Others/test.txt
C:/DL/Others/test_1.txt
C:/DL/Others/test2.txt
C:/DL/Others/test.cfg
C:/DL/Others/different_name.txt

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Unexpected end of data when zipping zip files in Python - python

Related

os.rename deleting files python 3

read multiple files automatically no manual file naming

How to check if folder has suboflders and then list the directories with listdir()?

Converting multiple gz file within subdirectories into csv

Avoid duplicate file names in a folder in Python

Categories

Resources