Python: Recursivly get the length of file path - python

Trying to recursively scan a given directory and get the length of the file or directory path not the file or directory size
If the length is more than say 35 characters, Just to test, output the path and length to a log file
If Directory Path is > 35 then little point traversing down further
import sys
import os
path = sys.argv[1]
Log = path + "\\PathToLongLog.txt"
fname = []
# Check if path exits
if os.path.exists(path):
print ("Directory exist")
for root,d_names,f_names in os.walk(path):
print (root, d_names, f_names)
for f in f_names:
fname.append(os.path.join(root, f))
#print("fname = %s" %fname)
for fp in fname:
Len = len(fp)
if Len > 35:
print("fname = %s" %fname, " Lenth ", str(Len) )
msg ="fname = " + str(fname) + " Lenth " + str(Len)
with open(Log, "a") as LogFile:
LogFile.write(msg + "\n")
Expected output would be 1 line for each file
D:\Path\To\Very Long File\Or Directory\My File.txt Length 50
What I'm getting is
fname = ['D:\\Path\\To\\Very Long File\\Or Directory\\My File.txt', 'Path\\To\\File1.ext', 'Path\\To\\File2.ext',etc] length 50
Can anyone see what I'm doing wrong?

You want to log the file name but you are actually logging the fname variable which is a list of all files.
You can change the code to log the 'fp' variable instead of the 'fname' and it will work:
for fp in fname:
Len = len(fp)
if Len > 35:
print("fname = %s" %fp, " Lenth ", str(Len))
msg = "fname = " + str(fp) + " Lenth " + str(Len)
with open(Log, "a") as LogFile:
LogFile.write(msg + "\n")

Related

why the python file handling not recognizing file name?

I want to replace a string with another string in a file. I have below program to perform the task.
import os
import sys
import traceback
from glob import iglob
def usage():
print('Usage: python FindAndReplace.py [Old String] [New String] '
'[File Filters(default:".txt,.xml")] [Directory To Check(.)]')
def search_replace_string(fileName, old_str, new_str):
if not(os.path.isfile(fileName) and os.access(fileName, os.W_OK)):
print("Warning: Skipping..File does not exist or and is not writeable:" + filename)
return False
fileupdated = False
# Read the old file
with open(fileName, 'r') as f:
newlines = []
for lines in f.readlines():
if old_str in lines:
fileupdated = True
line = lines.replace(old_str, new_str)
newlines.append(line)
# Write changes to same file
if fileupdated:
print("string Found and Updating File: " + fileName)
try:
with open(fileName, 'w') as f:
for line in newlines:
f.write(line)
except:
print("Error: Cannot open/access existing file for writing: " + fileName)
return fileupdated
def main():
try:
DEFAULT_PATH = iglob(str('<path_to_file.xml'))
if len(sys.argv) < 3:
usage()
# old/new string required parameters, exit if not supplied
sys.exit(-1)
else:
oldString = sys.argv[1]
newString = sys.argv[2]
if len(sys.argv) < 4:
patterns = ['.xml', '.txt']
else:
stringFilter = sys.argv[3]
patterns = stringFilter.split(',')
if len(sys.argv) < 5:
path = DEFAULT_PATH
else:
path = sys.argv[4]
print('[Old String] :' + oldString)
print('[New String] :' + newString)
print('[File Filters] :' + ', '.join(patterns))
print('[Directory To Check] :' + path)
if not os.path.exists(path):
raise Exception("Selected path does not exist: " + path)
# Walk through directory structure looking for files matching patterns
matchingFileList = [os.path.join(dp, f) for dp, dn, filenames in os.walk(path) for f in filenames if os.path.splitext(f)[1] in patterns]
print('Files found matching patterns: ' + str(len(matchingFileList)))
filecount = 0
filesReplaced = 0
for currFile in matchingFileList:
filecount += 1
filesReplaced = search_replace_string(currFile, old_str, new_str)
if filesReplaced:
filesReplaced += 1
print("Total Files Searched :" + str(filecount))
print("Total Files Replaced/Updated :" + str(filesReplaced))
except Exception as err:
print(traceback.format_exception_only(type(err), err)[0].rstrip())
sys.exit(-1)
if __name__ == '__main__':
main()
When I am executing it from a command line I am getting below error:
(null): can't open file 'uro.py': [Errno 2] No such file or directory
Below is the command line argument I am giving
python uro.py <file_path> <old_str> <new_str>
NOTE: I am using xml file.
I wanted to develop a logic that will take file_name, old and new string as a command line argument. In the error one can see that the program is considering the python file a input file. While it should take the path of the file that I give with CMD argument.
What's the mistake here? Please suggest. Thank you
I got it fixed, there was some path issue. Thank you.

Unexpected end of data when zipping zip files in Python

Good day.
I wrote a little Python program to help me easily create .cbc files for Calibre, which is just a renamed .zip file with a text file called comics.txt for TOC purposes. Each chapter is another zip file.
The issue is that the last zip file zipped always has the error "Unexpected end of data". The file itself is not corrupt, if I unzip it and rezip it it works perfectly. Playing around it seems that the problem is that Python doesn't close the last zip file after zipping it, since I can't delete the last zip while the program is still running since it's still open in Python. Needless to say, Calibre doesn't like the file and fails to convert it unless I manually rezip the affected chapters.
The code is as follows, checking the folders for not-image files, zipping the folders, zipping the zips while creating the text file, and "changing" extension.
import re, glob, os, zipfile, shutil, pathlib, gzip, itertools
Folders = glob.glob("*/")
items = len(Folders)
cn_list = []
cn_list_filtered = []
dirs_filtered = []
ch_id = ["c", "Ch. "]
subdir_im = []
total = 0
Dirs = next(os.walk('.'))[1]
for i in range(0, len(Dirs)):
for items in os.listdir("./" + Dirs[i]):
if items.__contains__('.png') or items.__contains__('.jpg'):
total+=1
else:
print(items + " not an accepted format.")
subdir_im.append(total)
total = 0
for fname in Folders:
if re.search(ch_id[0] + r'\d+' + r'[\S]' + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[0] + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+' + '[\S]' + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+)", fname)[0]
cn_list.append(cn)
else:
print('Warning: File found without proper filename format.')
cn_list_filtered = set(cn_list)
cn_list_filtered = sorted(cn_list_filtered)
cwd = os.getcwd()
Dirs = Folders
subdir_zi = []
total = 0
for i in range(0, len(cn_list_filtered)):
for folders in Dirs:
if folders.__contains__(ch_id[0] + cn_list_filtered[i] + " ")\
or folders.__contains__(ch_id[1] + cn_list_filtered[i] + " "):
print('Zipping folder ', folders)
namezip = "Chapter " + cn_list_filtered[i] + ".zip"
current_zip = zipfile.ZipFile(namezip, "a")
for items in os.listdir(folders):
if items.__contains__('.png') or items.__contains__('.jpg'):
current_zip.write(folders + "/" + items, items)
total+=1
subdir_zi.append(total)
total = 0
print('Folder contents in order:', subdir_im, ' Total:', sum(subdir_im))
print("Number of items per zip: ", subdir_zi, ' Total:', sum(subdir_zi))
if subdir_im == subdir_zi:
print("All items in folders have been successfully zipped")
else:
print("Warning: File count in folders and zips do not match. Please check the affected chapters")
zips = glob.glob("*.zip")
namezip2 = os.path.basename(os.getcwd()) + ".zip"
zipfinal = zipfile.ZipFile(namezip2, "a")
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
Data = []
for i in range (0,len(cn_list_filtered),1):
Datai = ("Chapter " + cn_list_filtered[i] + ".zip" + ":Chapter " + cn_list_filtered[i] + "\r\n")
Data.append(Datai)
Dataok = ''.join(Data)
with zipfile.ZipFile(namezip2, 'a') as myzip:
myzip.writestr("comics.txt", Dataok)
zipfinal.close()
os.rename(namezip2, namezip2 + ".cbc")
os.system("pause")
I am by no means a programmer, that is just a Frankenstein monster code I eventually managed to put together by checking threads, but this last issue has me stumped.
Some solutions I tried are:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[i].close()
Fails with:
zips[i].close()
AttributeError: 'str' object has no attribute 'close'
and:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[len(zips)].close()
Fails with:
zips[len(zips)].close()
IndexError: list index out of range
Thanks for the help.
This solved my issue:
def generate_zip(file_list, file_name=None):
zip_buffer = io.BytesIO()
zf = zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED)
for file in file_list:
print(f"Filename: {file[0]}\nData: {file[1]}")
zf.writestr(file[0], file[1])
**zf.close()**
with open(file_name, 'wb') as f:
f.write(zip_buffer.getvalue())
f.close()

How to check if folder has suboflders and then list the directories with listdir()?

I want to write a file searching code where I don't know if the directory I'm searching in has subdirectories and I want to check that so I don't get an error like this:
[Error 267]The directory name is invalid: 'C:/Path/To/Directory'.
I wrote a code like this where if it finds the file it breaks and stopps the program but if not it goes down a layer and so on.
filename = raw_input('> ')
path = 'C:/Path/Of/Directory/You/Want/To/Search/In'
fldr = os.listdir(path)
for f in fldr:
p = path + '/' + f
sfldr = os.listdir(p)
if os.path.exists(p + '/' + filename):
print 'Found file!!', p + '/' + filename
break
else:
for sf in sfldr:
pp = p + '/' + sf
ssfldr = os.listdir(pp)
if os.path.exists(pp + '/' + filename):
print 'Found file!!', pp + '/' + filename
break
else:
for ssf in ssfldr:
ppp = pp + '/' + ssf
sssfldr = os.listdir(ppp)
if os.path.exists(ppp + '/' + filename):
print 'Found file!!', ppp + '/' + filename
break
The easy to notice error is that when the directory doesn't have 3 layers of subfolders the program just breaks and gives an error message.So if I could check if the folder has subfolders before entering it,that would be neat.
Use os.scandir(). Provides better speed over os.walk()
Link to docs here!
Alternatively use, glob
>>> from glob import glob
>>> paths = glob('*/')
>>> paths
['bin/', 'content/', 'include/', 'lib/', 'output/']
>>>

Converting multiple gz file within subdirectories into csv

I have many subdirectories in my main directory and would like to write a script to unzip and convert all the files within it. If possible, I would also like to combine all the CSV within a single directory into a single CSV. But more importantly, I need help with my nested loop.
import gzip
import csv
import os
subdirlist = os.listdir('/home/user/Desktop/testloop')
subtotal = len(subdirlist)
subcounter = 0
for dirlist in subdirlist:
print "Working On " + dirlist
total = len(dirlist)
counter = 0
for dir in dirlist:
print "Working On " + dir
f = gzip.open('/' + str(subdirlist) + '/' + dir, 'rb')
file_content = f.read()
f.close()
print "25% Complete"
filename = '/' + str(subdirlist) + '/temp.txt'
target = open(filename, 'w')
target.write(file_content)
target.close()
print "50% Complete!"
csv_file = '/' + str(subdirlist) + '/' + str(dir) + '.csv'
in_txt = csv.reader(open(filename, "rb"), delimiter = '\t')
out_csv = csv.writer(open(csv_file, 'wb'))
out_csv.writerows(in_txt)
os.remove(filename)
os.remove('/' + str(subdirlist) + '/' + dir)
counter+=1
print str(counter) + "/" + str(total) + " " + str(dir) + " Complete!"
print "SubDirectory Converted!"
print str(subcounter) + "/" + str(subtotal) + " " + str(subdirlist) + " Complete!"
subcounter+=1
print "All Files Converted!"
Thanks in advance
To get lists of files and subdirectories, you can use os.walk. Below is an implementation I wrote to get all files (optionally, of certain type(s)) in arbitrarily nested subdirectories:
from os import walk, sep
from functools import reduce # in Python 3.x only
def get_filelist(root, extensions=None):
"""Return a list of files (path and name) within a supplied root directory.
To filter by extension(s), provide a list of strings, e.g.
get_filelist(root, ["zip", "csv"])
"""
return reduce(lambda x, y: x+y,
[[sep.join([item[0], name]) for name in item[2]
if (extensions is None or
name.split(".")[-1] in extensions)]
for item in walk(root)])

Python File Integrity Monitoring

I have written the following code and can't get it working and I can't see why. The code:
Reads list of target files
Loops through the directories
Runs MD5 hash on the file
Checks the activefile for previous md5 hashes for said file
If the file is new it will log it as new
If the log is existing but changed it will write the change and log the change
If not new and no change then do nothing
Here is the code:
import hashlib
import logging as log
import optparse
import os
import re
import sys
import glob
import shutil
def md5(fileName):
"""Compute md5 hash of the specified file"""
try:
fileHandle = open(fileName, "rb")
except IOError:
return
m5Hash = hashlib.md5()
while True:
data = fileHandle.read(8192)
if not data:
break
m5Hash.update(data)
fileHandle.close()
return m5Hash.hexdigest()
req = open("requested.txt")
for reqline in req:
reqName = reqline[reqline.rfind('/') + 1:len(reqline) - 1]
reqDir = reqline[0:reqline.rfind('/') + 1]
tempFile = open("activetemp.txt", 'w')
for name in glob.glob(reqDir + reqName):
fileHash = md5(name)
actInt = 0
if fileHash != None:
actFile = open("activefile.txt")
for actLine in actFile:
actNameDir = actLine[0:actLine.rfind(' : ')]
actHash = actLine[actLine.rfind(' : ') + 3:len(actLine) -1]
if actNameDir == name and actHash == fileHash:
tempFile.write(name + " : " + fileHash + "\n")
actInt = 1
print fileHash
print actHash
print name
print actNameDir
if actNameDir == name and actHash != fileHash:
fimlog = open("fimlog.txt", 'a')
tempFile.write(name + " : " + actHash + "\n")
actInt = 1
fimlog.write("FIM Log: The file " + name + " was modified: " + actHash + "\n")
if actInt == 0:
fimlog = open("fimlog.txt", 'a')
fimlog.write("FIM Log: The file " + name + " was created: " + fileHash + "\n")
tempFile.write(name + " : " + fileHash + "\n")
shutil.copyfile("activetemp.txt", "activefile.txt")
You never really described the problem, but one possible culprit is that you never close the tempFile (or the other files for that matter), so the file copy at the end may fail.

Categories

Resources