Check if any file was downloaded in Paramiko recursive file transfer - python

I'm not able to figure out why cannot get exit code 1 in this function when item or item.filename which is the absolute path to file is empty ?
def sftp_get_recursive(path, dest, sftp):
item_list = sftp.listdir_attr(path)
dest = str(dest)
if not os.path.isdir(dest):
os.makedirs(dest, exist_ok=True)
for item in item_list:
mode = item.st_mode
if not str(item) :
print(item.filename)
print("No file found")
sys.exit(1)
elif S_ISDIR(mode):
sftp_get_recursive(path + "/" + item.filename, dest + "/" + item.filename, sftp)
else:
sftp.get(path + "/" + item.filename, dest + "/" + item.filename)
print("Sending file from : ",path + "/" + item.filename)
sftp.remove(path + "/" + item.filename)

If you want to return specific exit code, if there is no file anywhere in the directory tree, you will have to recursively count the files.
def sftp_get_recursive(path, dest, sftp):
count = 0
item_list = sftp.listdir_attr(path)
if not os.path.isdir(dest):
os.makedirs(dest, exist_ok=True)
for item in item_list:
source_path = path + "/" + item.filename
dest_path = os.path.join(dest, item.filename)
if S_ISDIR(item.st_mode):
count += sftp_get_recursive(source_path, dest_path, sftp)
else:
print("Sending file from : ", source_path)
sftp.get(source_path, dest_path)
sftp.remove(source_path)
count += 1
return count
(I have refactored your code a bit to avoid repeating code)
And then use the count at the top level:
count = sftp_get_recursive(path, dest, sftp)
if count == 0:
print("No file found")
sys.exit(1)
Note that even if no files are found, the code will still recreate the empty remote directory structure locally. I'm not sure, if that's what you want.

Related

Unexpected end of data when zipping zip files in Python

Good day.
I wrote a little Python program to help me easily create .cbc files for Calibre, which is just a renamed .zip file with a text file called comics.txt for TOC purposes. Each chapter is another zip file.
The issue is that the last zip file zipped always has the error "Unexpected end of data". The file itself is not corrupt, if I unzip it and rezip it it works perfectly. Playing around it seems that the problem is that Python doesn't close the last zip file after zipping it, since I can't delete the last zip while the program is still running since it's still open in Python. Needless to say, Calibre doesn't like the file and fails to convert it unless I manually rezip the affected chapters.
The code is as follows, checking the folders for not-image files, zipping the folders, zipping the zips while creating the text file, and "changing" extension.
import re, glob, os, zipfile, shutil, pathlib, gzip, itertools
Folders = glob.glob("*/")
items = len(Folders)
cn_list = []
cn_list_filtered = []
dirs_filtered = []
ch_id = ["c", "Ch. "]
subdir_im = []
total = 0
Dirs = next(os.walk('.'))[1]
for i in range(0, len(Dirs)):
for items in os.listdir("./" + Dirs[i]):
if items.__contains__('.png') or items.__contains__('.jpg'):
total+=1
else:
print(items + " not an accepted format.")
subdir_im.append(total)
total = 0
for fname in Folders:
if re.search(ch_id[0] + r'\d+' + r'[\S]' + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[0] + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+' + '[\S]' + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+)", fname)[0]
cn_list.append(cn)
else:
print('Warning: File found without proper filename format.')
cn_list_filtered = set(cn_list)
cn_list_filtered = sorted(cn_list_filtered)
cwd = os.getcwd()
Dirs = Folders
subdir_zi = []
total = 0
for i in range(0, len(cn_list_filtered)):
for folders in Dirs:
if folders.__contains__(ch_id[0] + cn_list_filtered[i] + " ")\
or folders.__contains__(ch_id[1] + cn_list_filtered[i] + " "):
print('Zipping folder ', folders)
namezip = "Chapter " + cn_list_filtered[i] + ".zip"
current_zip = zipfile.ZipFile(namezip, "a")
for items in os.listdir(folders):
if items.__contains__('.png') or items.__contains__('.jpg'):
current_zip.write(folders + "/" + items, items)
total+=1
subdir_zi.append(total)
total = 0
print('Folder contents in order:', subdir_im, ' Total:', sum(subdir_im))
print("Number of items per zip: ", subdir_zi, ' Total:', sum(subdir_zi))
if subdir_im == subdir_zi:
print("All items in folders have been successfully zipped")
else:
print("Warning: File count in folders and zips do not match. Please check the affected chapters")
zips = glob.glob("*.zip")
namezip2 = os.path.basename(os.getcwd()) + ".zip"
zipfinal = zipfile.ZipFile(namezip2, "a")
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
Data = []
for i in range (0,len(cn_list_filtered),1):
Datai = ("Chapter " + cn_list_filtered[i] + ".zip" + ":Chapter " + cn_list_filtered[i] + "\r\n")
Data.append(Datai)
Dataok = ''.join(Data)
with zipfile.ZipFile(namezip2, 'a') as myzip:
myzip.writestr("comics.txt", Dataok)
zipfinal.close()
os.rename(namezip2, namezip2 + ".cbc")
os.system("pause")
I am by no means a programmer, that is just a Frankenstein monster code I eventually managed to put together by checking threads, but this last issue has me stumped.
Some solutions I tried are:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[i].close()
Fails with:
zips[i].close()
AttributeError: 'str' object has no attribute 'close'
and:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[len(zips)].close()
Fails with:
zips[len(zips)].close()
IndexError: list index out of range
Thanks for the help.
This solved my issue:
def generate_zip(file_list, file_name=None):
zip_buffer = io.BytesIO()
zf = zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED)
for file in file_list:
print(f"Filename: {file[0]}\nData: {file[1]}")
zf.writestr(file[0], file[1])
**zf.close()**
with open(file_name, 'wb') as f:
f.write(zip_buffer.getvalue())
f.close()

Python script to run FME workbench

I have more than 500 xml files and each xml file should processed on FME workbench individually (iteration of FME workbench for each xml file).
For such a propose i have to run a python file (loop.py) to iterate FME workbench for each xml file.
The whole process was working in past on other PC without any problem. Now Once i run Module i got the following error:
Traceback (most recent call last):E:\XML_Data
File "E:\XML_Data\process\01_XML_Tile_1.py", line 28, in
if "Translation was SUCCESSFUL" in open(path_log + "\" + data + ".log").read():
IOError: [Errno 2] No such file or directory: 'E:\XML_Data\data_out\log_01\re_3385-5275.xml.log'
Attached the python code(loop.py).
Any help is greatly appreciated.
import os
import time
# Mainpath and Working Folder:
#path_main = r"E:\XML_Data"
path_main = r"E:\XML_Data"
teil = str("01")
# variables
path_in = path_main + r"\data_in\03_Places\teil_" + teil # "Source folder of XML files"
path_in_tile10 = path_main + r"\data_in\01_Tiling\10x10.shp" # "Source folder of Grid shapefile"
path_in_commu = path_main + r"\data_in\02_Communities\Communities.shp" # "Source folder of Communities shapefile"
path_out = path_main + r"\data_out\teil_" + teil # "Output folder of shapefiles that resulted from XML files (tile_01 folder)"
path_log = path_main + r"\data_out\log_" + teil # "Output folder of log files for each run(log_01 folder)"
path_fme = r"%FME_EXE_2015%" # "C:\Program Files\FME2015\fme.exe"
path_fme_workbench = path_main + r"\process\PY_FME2015.fmw" # "path of FME workbench"
datalists = os.listdir(path_in)
count = 0
# loop each file individually in FME
for data in datalists:
if data.find(".xml") != -1:
count +=1
print ("Run-No." + str(count) + ": with data " + data)
os.system (path_fme + " " + path_fme_workbench + " " + "--SourceDataset_XML"+ " " + path_in + "\\" + data + " " + "--SourceDataset_SHAPE" + " " + path_in_tile10 + " " + "--SourceDataset_SHAPE_COMU" + " " + path_in_commu + " " + "--DestDataset_SHAPE" +" " +path_out + " " +"LOG_FILENAME" + " " + path_log + "\\" + data + ".log" )
print ("Data processed: " + data)
shape = str(data[19:28]) + "_POPINT_CENTR_UTM32N.shp"
print ("ResultsFileName: " + shape)
if "Translation was SUCCESSFUL" in open(path_log + "\\" + data + ".log").read():
# Translation was successful and SHP file exists:
if os.path.isfile(path_out + "\\" + shape):
write_log = open(path_out + "\\" + "result_xml.log", "a")
write_log.write(time.asctime(time.localtime()) + " " + shape + "\n")
write_log.close()
print("Everything ok")
#Translation was successful, but SHP file does not exist:
else:
write_log = open(path_out + "\\" + "error_xml.log", "a")
write_log.write(time.asctime(time.localtime()) + " Data: " + shape + " unavailable.\n")
write_log.close()
# Translation was not successful:
else:
write_log = open(path_out + "\\" + "error_xml.log", "a")
write_log.write(time.asctime(time.localtime()) + " Translation " + Data + " not successful.\n")
write_log.close()
print ("Number of calculated files: " + str(count))
Most likely, the script failed at the os.system line, so the log file was not created from the command. Since you mentioned a different computer, it could be caused by many reasons, such as a different version of FME (so the environment variable %FME_EXE_2015% would not exist).
Use a workspace runner transformer to do this.
The FME version is outdated.so first check the version whether it is creating the problem.
subprocess.call(["C:/Program Files/fme/FMEStarter/FMEStarter.exe", "C:/Program Files/fme/fme20238/fme.exe", "/fmefile.fmw" "LOG_FILENAME","logfile"], stdin=None, stdout=None, stderr=None, shell=True, timeout=None)

Converting multiple gz file within subdirectories into csv

I have many subdirectories in my main directory and would like to write a script to unzip and convert all the files within it. If possible, I would also like to combine all the CSV within a single directory into a single CSV. But more importantly, I need help with my nested loop.
import gzip
import csv
import os
subdirlist = os.listdir('/home/user/Desktop/testloop')
subtotal = len(subdirlist)
subcounter = 0
for dirlist in subdirlist:
print "Working On " + dirlist
total = len(dirlist)
counter = 0
for dir in dirlist:
print "Working On " + dir
f = gzip.open('/' + str(subdirlist) + '/' + dir, 'rb')
file_content = f.read()
f.close()
print "25% Complete"
filename = '/' + str(subdirlist) + '/temp.txt'
target = open(filename, 'w')
target.write(file_content)
target.close()
print "50% Complete!"
csv_file = '/' + str(subdirlist) + '/' + str(dir) + '.csv'
in_txt = csv.reader(open(filename, "rb"), delimiter = '\t')
out_csv = csv.writer(open(csv_file, 'wb'))
out_csv.writerows(in_txt)
os.remove(filename)
os.remove('/' + str(subdirlist) + '/' + dir)
counter+=1
print str(counter) + "/" + str(total) + " " + str(dir) + " Complete!"
print "SubDirectory Converted!"
print str(subcounter) + "/" + str(subtotal) + " " + str(subdirlist) + " Complete!"
subcounter+=1
print "All Files Converted!"
Thanks in advance
To get lists of files and subdirectories, you can use os.walk. Below is an implementation I wrote to get all files (optionally, of certain type(s)) in arbitrarily nested subdirectories:
from os import walk, sep
from functools import reduce # in Python 3.x only
def get_filelist(root, extensions=None):
"""Return a list of files (path and name) within a supplied root directory.
To filter by extension(s), provide a list of strings, e.g.
get_filelist(root, ["zip", "csv"])
"""
return reduce(lambda x, y: x+y,
[[sep.join([item[0], name]) for name in item[2]
if (extensions is None or
name.split(".")[-1] in extensions)]
for item in walk(root)])

Need to implement nested try statements with one exception

I am trying to copy a files from different folders under a path to my usb drive.
So my source directory structure looks like this
/user/arun/Music/Songs/
under this I have different sub directories
Songs_1
Songs_2
Songs_3
the target folder is under anyone of these Songs directory
Songs_1/Kid Rock/All summer long.mp3
Songs_2/Linkin Park/In the end.mp3
Now I am constructing my src_dir in a try/except way like this.
for album,song in song_database.iteritems():
for s in song:
try:
src_dir_1 = src_dir + "/" + "Songs_1" + "/" + album + "/" + s + ".mp3"
shutil.copy2(src_dir_1,dest_dir
print src_dir_1
except IOError:
pass
try:
src_dir_1 = src_dir + "/" + "Songs_2" + "/" + album + "/" + s + ".mp3"
shutil.copy2(src_dir_1,dest_dir)
print src_dir_1
except IOError:
pass
try:
src_dir_1 = src_dir + "/" + "Songs_3" + "/" + album + "/" + s + ".mp3"
shutil.copy2(src_dir_1,dest_dir)
print src_dir_1
except IOError:
pass
try:
src_dir_1 = src_dir + "/" + "Songs_4" + "/" + album + "/" + s + ".mp3"
shutil.copy2(src_dir_1,dest_dir)
print src_dir_1
except IOError:
pass
Is there a better way to do this ?
Seems like a loop would be better:
for album,song in song_database.iteritems():
for s in song:
for sdir in 'Songs_1', 'Songs_2', 'Songs_3':
try:
src_dir_1 = src_dir + "/" + sdir + "/" + album + "/" + s + ".mp3"
shutil.copy2(src_dir_1,dest_dir)
print src_dir_1
except IOError:
pass
And, perhaps you would want to add a break statement if you succeed in copying the source to the destination...
As a side note, you might want to use os.path.join instead:
src_dir_1 = os.path.join(src_dir, sdir, album, s + ".mp3")

python function not repeating

I have a python tool to 'touch' (utime) a file, then move to another folder. However, if the file already exists in the destination folder, it is silently overwritten. I would like to check for a file of the same name in the destination folder and, if it exists, rename the one I am moving to its name plus '-n' at the end, where n is a number starting at '1' and, if the file with '-1' at the end already exists, '-2' etc.
For example, say in the source folder is a file 'foo.txt', but there is one 'foo.txt' in the destination folder as well. This function should return '(absolute path)/foo-1.txt'.
So, I have made a function to check for these circumstances and return the modified string, so I can use rename later and not overwrite. However, currently, if the file exists, it returns nothing. Following is the function code, assuming these vars:
fileName - input filepath, absolute path. e.g. /Users/foo/sourceFolder/bar.txt
index - iterator variable, set to '1' at the start of each file being opened (externally to function)
def checkExists(fileName):
global index
print "checkExists(" + fileName + ")"
if exists(fileName):
splitPath = split(newFile)
splitName = splitext(splitPath[1])
newSplitName = splitName[0] + "-" + str(index)
index += 1
newName = splitPath[0] + "/" + newSplitName + splitName[1]
print "newName = " + newName
else:
print "(else) fileName = " + fileName
print "(else) fileName = " + str(type(fileName))
print ""
return fileName
checkExists(newName)
Now it seems that the inner call for checkExists() at the end is not running.
I hope I have been clear in my explanation.
IAmThePiGuy
P.S. I do not want to hear about potential race problems with utime, I know that the files in the source directory will not be accessed by anything else.
Your problem is that, if the file exists, you don't return anything. I think you're intending to use recursion to check the new filename, but you don't return the result of that call. Here's a stab:
def checkExists(fileName, index=0):
print "checkExists(" + fileName + ")"
if exists(fileName):
splitPath = split(newFile)
splitName = splitext(splitPath[1])
newSplitName = splitName[0] + "-" + str(index)
index += 1
newName = splitPath[0] + "/" + newSplitName + splitName[1]
print "newName = " + newName
return checkExists(newName, index) # recurse
else:
print "(else) fileName = " + fileName
print "(else) fileName = " + str(type(fileName))
print ""
return fileName
I also took the liberty of moving the recursive call closer to the generation of newName and removing the global variable and replacing that with recursion as well.
Here's an iterative approach to a similar problem:
def copyfile(path, dstdir, verbose=True, dryrun=False):
"""Copy `path` file to `dstdir` directory incrementing name if necessary."""
filename = os.path.basename(path)
basename, ext = os.path.splitext(filename)
for i in itertools.count(2):
destpath = os.path.join(dstdir, filename)
if not os.path.exists(destpath):
if verbose:
print(path, '->', destpath)
if not dryrun:
shutil.copyfile(path, destpath)
return
# increment filename
filename = "%s_%02d%s" % (basename, i, ext)

Categories

Resources