So I understand the reason for the recursion limit of 1000. I want to run a script continuously, but am I right understanding that eventually the recursion limit will be reached (even if I set it higher) and Python will break?
In the scheme of things, its not a big deal, because I could get the OS to keep re-starting the script, but I thought there may be a more elegant solution I can employ within the script itself (swapping threads??).
My script:
import os
import subprocess
import time
import logging
import datetime
from sys import argv
if len(argv) < 3:
exit('Please provide two arguments - Source Destination')
LOC_DIR = argv[1]
REM_DIR = argv[2]
POLL_INT = 10
RUN_INT = 60
FILE_EXT = '.mov'
# logging setup
logging.basicConfig(filename='%s' % os.path.join(LOC_DIR, '%s the_log.log' % datetime.datetime.now()),level=logging.DEBUG)
# make an easy print and logging function
def printLog(string):
print '%s %s' % (datetime.datetime.now(), string)
logging.info('%s %s' % (datetime.datetime.now(), string))
# get the files with absolute paths
def getFiles(path):
return [os.path.join(path, entry) for entry in os.listdir(path)]
# check if file is still being copied (file size has changed within the poll interval)
def checkSize(path):
same = False
while same is False:
printLog("Processing '%s'" % os.path.basename(path))
printLog('Waiting %s seconds for any filesize change' % POLL_INT)
size1 = os.path.getsize(path)
time.sleep(POLL_INT)
size2 = os.path.getsize(path)
if size1 == size2:
same = True
printLog('File size stayed the same for %s seconds' % POLL_INT)
return same
else:
printLog('File size change detected. Waiting a further %s seconds' % POLL_INT)
# check if correct file extension
def checkExt(path):
if path.endswith(FILE_EXT):
return True
# rsync subprocess
def rsyncFile(path):
printLog("Syncing file '%s'" % os.path.basename(path))
try:
command = ['rsync', '-a', '--remove-source-files', path, REM_DIR]
p = subprocess.Popen(command, stdout=subprocess.PIPE)
for line in p.stdout:
printLog("rsync: '%s'" %line)
p.wait()
if p.returncode == 0:
printLog('<<< File synced successfully :) >>>')
elif p.returncode == 10:
printLog('****** Please check your internet connection!! ****** Rsync error code: %s' % p.returncode)
else:
printLog('There was a problem. Error code: %s' % p.returncode)
except Exception as e:
logging.debug(e)
# main logic
def main():
all_files = getFiles(LOC_DIR)
files = []
for f in all_files:
if checkExt(f):
files.append(f)
if len(files) == 1:
printLog('<<< Found %s matching file >>>' % len(files))
elif len(files) > 1:
printLog('<<< Found %s matching files >>>' % len(files))
for f in files:
if checkSize(f):
rsyncFile(f)
printLog('No files found. Checking again in %s seconds' % RUN_INT)
time.sleep(RUN_INT)
printLog('Checking for files')
main()
if __name__ == "__main__":
main()
CPython has no optimizations for recursion, so you really want to avoid deeply-recursive code in favor of regular loops:
def main():
while True:
all_files = getFiles(LOC_DIR)
files = []
for f in all_files:
if checkExt(f):
files.append(f)
if len(files) == 1:
printLog('<<< Found %s matching file >>>' % len(files))
elif len(files) > 1:
printLog('<<< Found %s matching files >>>' % len(files))
for f in files:
if checkSize(f):
rsyncFile(f)
printLog('No files found. Checking again in %s seconds' % RUN_INT)
time.sleep(RUN_INT)
printLog('Checking for files')
if __name__ == "__main__":
main()
You're going about this in the wrong way.
Replace the main loop with a loop.
# main logic
def main():
while True:
all_files = getFiles(LOC_DIR)
files = []
for f in all_files:
if checkExt(f):
files.append(f)
if len(files) == 1:
printLog('<<< Found %s matching file >>>' % len(files))
elif len(files) > 1:
printLog('<<< Found %s matching files >>>' % len(files))
for f in files:
if checkSize(f):
rsyncFile(f)
printLog('No files found. Checking again in %s seconds' % RUN_INT)
time.sleep(RUN_INT)
printLog('Checking for files')
The recursion limit is only set with recursive functions from my understanding, so If you really want to run something repeatedly, you can simply run.
while True:
#repeated stuff goes here
Recursion is an amazing tool, but handle with care, it often can end up burning you. You were right in the fact that python can only go 1000 calls deep recursively, so if you recursive method doesn't finish by then the exception gets thrown.
Goodluck.
Related
I have code:
class Gallery():
def __init__(self, directory = '.'):
self.imgnames = []
self.directory = directory
def get_files(self):
self.filenames = os.listdir(self.directory)
logging.debug("Directory files: %s" % self.filenames)
def generate_thumbnails(self):
counter = 0
for name in self.filenames:
logging.debug("Trying to open %s" % name)
counter+= 1
if name == "loading.gif":
continue
try:
im = Image.open(name)
self.imgnames.append(name)
im.thumbnail( (300,300) )
logging.debug("Saving thumb-%s" % name)
im.save("thumb-%s" % name)
sys.stdout.write("\r%f%%" % (counter*100.0/len(self.filenames)))
sys.stdout.flush()
except IOError:
logging.debug("File %s cannot be parsed by PIL, ignoring" % name)
def integrate_bootstrap(self):
logging.info("Generating index.html")
head = open("./gallery_files/index.html.head", 'r').read()
tail = open("./gallery_files/index.html.tail", 'r').read()
final = open("index.html", 'w')
and i would like change image thumbnails directory to save to ./gallery_files/thumbs/
when i change im.save("thumb-%s" % name) to im.save("./gallery_files/thumbs/thumb-%s" % name) not working.
Any ideas? Thank you
Unfortunately I cannot comment to your question. Therfore I want to give you an answer:
What is the error?
Does the folder ./gallery_files/thumbs/ exist? If not, then it needs to be created before saving.
I am trying to create a script that will move only new or updated files from the past 24 hours into a new folder. I created a script so far that will move files in general, any leads or suggestions would be greatly appreciated.
import os, shutil
source = os.listdir('C:\Users\Student\Desktop\FolderA')
destination = 'C:\Users\Student\Desktop\FolderB'
os.chdir('C:\Users\Student\Desktop\FolderA')
for files in os.listdir("C:\Users\Student\Desktop\FolderA"):
if files.endswith(".txt"):
src = os.path.join("C:\Users\Student\Desktop\FolderA",files)
dst = os.path.join(destination,files)
shutil.move(src,dst)
I believe I found a solution, let me know what you guys think.
# copy files from folder_a to folder_b
# if the files in folder_a have been modified within the past 24 hours
# copy them to folder_b
#
import shutil
import os
from os import path
import datetime
from datetime import date, time, timedelta
def file_has_changed(fname):
# print 'in file_has_changed with file : %s' % fname
# print str(path.getmtime(fname))
# get file modified time
file_m_time = datetime.datetime.fromtimestamp(path.getmtime(fname))
# print datetime.datetime.now()
# print file_m_time
#get the delta between today and filed mod time
td = datetime.datetime.now() - file_m_time
# print td
# print 'days : %d' % td.days
# file can be archived if mod within last 24 hours
if td.days == 0:
global ready_to_archive
ready_to_archive = ready_to_archive + 1
return True
else: return False
def main():
global ready_to_archive
global archived
ready_to_archive, archived = 0, 0
# src = "c:\users\gail\desktop\foldera"
# dst = "c:\users\gail\desktop\folderb"
for fname in os.listdir('c:\users\gail\Desktop\FolderA'):
src_fname = 'c:\users\gail\Desktop\FolderA\%s' % fname
if file_has_changed(src_fname):
dst_fname = 'c:\users\gail\Desktop\FolderB\%s' % fname
dst_folder = 'c:\users\gail\Desktop\FolderB'
try:
shutil.copy2(src_fname, dst_folder)
global archived;
archived = archived + 1
# print 'Copying file : %s ' % (src_fname)
# print ' To loc : %s ' % (dst_fname)
except IOError as e:
print 'could not open the file: %s ' % e
if __name__ == "__main__":
main()
print '****** Archive Report for %s ******' % datetime.datetime.now()
print '%d files ready for archiving ' % ready_to_archive
print '%d files archived' % archived
print '****** End of Archive Report ******'
I'm obviously doing something very wrong. I'd like to find files, that are in one directory but not in second directory (for instance xxx.phn in one directory and xxx.wav in second directory...
IT seems that I cannot detect, when file is NOT present in second directory (it's always showing like all files are)... I don't get any file displayed, although they exist...
import shutil, random, os, sys
if len(sys.argv) < 4:
print """usage: python del_orphans_dir1_dir2.py source_folder source_ext dest_folder dest_ext
"""
sys.exit(-1)
folder = sys.argv[1]
ext = sys.argv[2]
dest_folder = sys.argv[3]
dest_ext = sys.argv[4]
i = 0
for d, ds, fs in os.walk(folder):
for fname in fs:
basename = os.path.splitext(fname)[0]
if (not os.path.exists(dest_folder+'/'+basename + '.' + dest_ext) ):
print str(i)+': No duplicate for: '+fname
i=i+1
print str(i)+' files found'
Can I suggest that you make the filename you're looking at checking and print it before checking whether it exists..
dest_fname = dest_folder+'/'+basename + '.' + dest_ext
print "dest exists? %s" % dest_fname
os.path.exists(dest_fname)
Also as an aside please join paths using the join() method. (If you really want the basename without the leading path elements there's a basename() function).
I tried your program out and it worked for two simple flat directories. Here are the directory contents:
a\a.txt
a\b.txt # Missing from b directory
a\c.txt
b\a.csv
b\c.csv
And result of your script with a txt b csv as parameters. If your result was different, maybe you used different parameters?
0: No duplicate for: b.txt
1 files found
But when I added subdirectories:
a\a.txt
a\b.txt # Missing from b directory
a\c.txt
a\c\d.txt
a\c\e.txt # Missing from b\c directory
b\a.csv
b\c.csv
b\c\d.csv
Your script gives:
0: No duplicate for: b.txt
1: No duplicate for: d.txt # Error here
2: No duplicate for: e.txt
3 files found
To work with sub-directories you need to compute the path relative to the source directory, and then add it to the destination directory. Here's the result with a few other minor cleanups and prints to see what is going on. Note that fname is always just the file name and needs to be joined with d to get the whole path:
#!python2
import os, sys
if len(sys.argv) < 4:
print """usage: python del_orphans_dir1_dir2.py source_folder source_ext dest_folder dest_ext
"""
sys.exit(-1)
folder = sys.argv[1]
ext = sys.argv[2]
dest_folder = sys.argv[3]
dest_ext = sys.argv[4]
i = 0
for d, ds, fs in os.walk(folder):
for fname in fs:
relpath = os.path.relpath(os.path.join(d,fname),folder)
relbase = os.path.splitext(relpath)[0]
path_to_check = os.path.join(dest_folder,relbase+'.'+dest_ext)
if not os.path.exists(path_to_check):
print '{}: No duplicate for: {}, {} not found.'.format(i,os.path.join(folder,relpath),path_to_check)
i += 1
print i,'files found'
Output:
0: No duplicate for: a\b.txt, b\b.csv not found.
1: No duplicate for: a\c\e.txt, b\c\e.csv not found.
2 files found
What you're doing is looking for are matching files, not duplicate ones. One problem is that you're not using use the source_ext argument when searching. Another is I think the command-line argument handling is messed-up. Here's a corrected version that accomplishes what you're trying to do:
import os
import sys
if len(sys.argv) != 5:
print("usage: python "
"del_orphans_dir1_dir2.py " # argv[0] (script name)
"source_folder " # argv[1]
"source_ext " # argv[2]
"dest_folder " # argv[3]
"dest_ext") # argv[4]
sys.exit(2) # command line error
source_folder, source_ext, dest_folder, dest_ext = sys.argv[1:6]
dest_ext = dest_ext if dest_ext.startswith('.') else '.'+dest_ext # check dot
found = 0
for d, ds, fs in os.walk(source_folder):
for i, fname in enumerate(fs, start=1):
basename, ext = os.path.splitext(fname)
if ext == source_ext:
if os.path.exists(os.path.join(dest_folder, basename+dest_ext)):
found += 1
else:
print '{}: No matching file found for: {}'.format(i, fname)
print '{} matches found'.format(found)
sys.exit(0)
So I understand the reason for the recursion limit of 1000. I want to run a script continuously, but am I right understanding that eventually the recursion limit will be reached (even if I set it higher) and Python will break?
In the scheme of things, its not a big deal, because I could get the OS to keep re-starting the script, but I thought there may be a more elegant solution I can employ within the script itself (swapping threads??).
My script:
import os
import subprocess
import time
import logging
import datetime
from sys import argv
if len(argv) < 3:
exit('Please provide two arguments - Source Destination')
LOC_DIR = argv[1]
REM_DIR = argv[2]
POLL_INT = 10
RUN_INT = 60
FILE_EXT = '.mov'
# logging setup
logging.basicConfig(filename='%s' % os.path.join(LOC_DIR, '%s the_log.log' % datetime.datetime.now()),level=logging.DEBUG)
# make an easy print and logging function
def printLog(string):
print '%s %s' % (datetime.datetime.now(), string)
logging.info('%s %s' % (datetime.datetime.now(), string))
# get the files with absolute paths
def getFiles(path):
return [os.path.join(path, entry) for entry in os.listdir(path)]
# check if file is still being copied (file size has changed within the poll interval)
def checkSize(path):
same = False
while same is False:
printLog("Processing '%s'" % os.path.basename(path))
printLog('Waiting %s seconds for any filesize change' % POLL_INT)
size1 = os.path.getsize(path)
time.sleep(POLL_INT)
size2 = os.path.getsize(path)
if size1 == size2:
same = True
printLog('File size stayed the same for %s seconds' % POLL_INT)
return same
else:
printLog('File size change detected. Waiting a further %s seconds' % POLL_INT)
# check if correct file extension
def checkExt(path):
if path.endswith(FILE_EXT):
return True
# rsync subprocess
def rsyncFile(path):
printLog("Syncing file '%s'" % os.path.basename(path))
try:
command = ['rsync', '-a', '--remove-source-files', path, REM_DIR]
p = subprocess.Popen(command, stdout=subprocess.PIPE)
for line in p.stdout:
printLog("rsync: '%s'" %line)
p.wait()
if p.returncode == 0:
printLog('<<< File synced successfully :) >>>')
elif p.returncode == 10:
printLog('****** Please check your internet connection!! ****** Rsync error code: %s' % p.returncode)
else:
printLog('There was a problem. Error code: %s' % p.returncode)
except Exception as e:
logging.debug(e)
# main logic
def main():
all_files = getFiles(LOC_DIR)
files = []
for f in all_files:
if checkExt(f):
files.append(f)
if len(files) == 1:
printLog('<<< Found %s matching file >>>' % len(files))
elif len(files) > 1:
printLog('<<< Found %s matching files >>>' % len(files))
for f in files:
if checkSize(f):
rsyncFile(f)
printLog('No files found. Checking again in %s seconds' % RUN_INT)
time.sleep(RUN_INT)
printLog('Checking for files')
main()
if __name__ == "__main__":
main()
CPython has no optimizations for recursion, so you really want to avoid deeply-recursive code in favor of regular loops:
def main():
while True:
all_files = getFiles(LOC_DIR)
files = []
for f in all_files:
if checkExt(f):
files.append(f)
if len(files) == 1:
printLog('<<< Found %s matching file >>>' % len(files))
elif len(files) > 1:
printLog('<<< Found %s matching files >>>' % len(files))
for f in files:
if checkSize(f):
rsyncFile(f)
printLog('No files found. Checking again in %s seconds' % RUN_INT)
time.sleep(RUN_INT)
printLog('Checking for files')
if __name__ == "__main__":
main()
You're going about this in the wrong way.
Replace the main loop with a loop.
# main logic
def main():
while True:
all_files = getFiles(LOC_DIR)
files = []
for f in all_files:
if checkExt(f):
files.append(f)
if len(files) == 1:
printLog('<<< Found %s matching file >>>' % len(files))
elif len(files) > 1:
printLog('<<< Found %s matching files >>>' % len(files))
for f in files:
if checkSize(f):
rsyncFile(f)
printLog('No files found. Checking again in %s seconds' % RUN_INT)
time.sleep(RUN_INT)
printLog('Checking for files')
The recursion limit is only set with recursive functions from my understanding, so If you really want to run something repeatedly, you can simply run.
while True:
#repeated stuff goes here
Recursion is an amazing tool, but handle with care, it often can end up burning you. You were right in the fact that python can only go 1000 calls deep recursively, so if you recursive method doesn't finish by then the exception gets thrown.
Goodluck.
can some one please provide me with an explanation of the code especially the use of maxversions and statements following the line "for f in files:".
I want to understand what xrange(MAXVERSION) means? What is the use of indexing i.e
for index in xrange(MAXVERSIONS): backup = '%s.%2.2d' % (destpath, index)
The code:
!/usr/bin/env python
import sys,os, shutil, filecmp
MAXVERSIONS=100
BAKFOLDER = '.bak'
def backup_files(tree_top, bakdir_name=BAKFOLDER):
top_dir = os.path.basename(tree_top)
tree_top += os.sep
for dir, subdirs, files in os.walk(tree_top):
if os.path.isabs(bakdir_name):
relpath = dir.replace(tree_top,'')
backup_dir = os.path.join(bakdir_name, top_dir, relpath)
else:
backup_dir = os.path.join(dir, bakdir_name)
if not os.path.exists(backup_dir):
os.makedirs(backup_dir)
subdirs[:] = [d for d in subdirs if d != bakdir_name]
for f in files:
filepath = os.path.join(dir, f)
destpath = os.path.join(backup_dir, f)
for index in xrange(MAXVERSIONS):
backup = '%s.%2.2d' % (destpath, index)
abspath = os.path.abspath(filepath)
if index > 0:
old_backup = '%s.%2.2d' % (destpath, index-1)
if not os.path.exists(old_backup): break
abspath = os.path.abspath(old_backup)
try:
if os.path.isfile(abspath) and filecmp.cmp(abspath, filepath, shallow=False):
continue
except OSError:
pass
try:
if not os.path.exists(backup):
print 'Copying %s to %s...' % (filepath, backup)
shutil.copy(filepath, backup)
except (OSError, IOError), e:
pass
if __name__=="__main__":
if len(sys.argv)<2:
sys.exit("Usage: %s [directory] [backup directory]" % sys.argv[0])
tree_top = os.path.abspath(os.path.expanduser(os.path.expandvars(sys.argv[1])))
if len(sys.argv)>=3:
bakfolder = os.path.abspath(os.path.expanduser(os.path.expandvars(sys.argv[2])))
else:
bakfolder = BAKFOLDER
if os.path.isdir(tree_top):
backup_files(tree_top, bakfolder)
The script tries to recursively copy the contents of a directory (defaults to current directory) to a backup directory (defaults to .bak in the current directory);
for each filename.ext, it creates a duplicate named filename.ext.00; if filename.ext.00 already exists, it creates filename.ext.01 instead, and so on.
xrange() is a generator which returns all numbers in 0..(MAXVERSION-1), so MAXVERSION controls how many version-suffixes to try, ie how many old versions of the file to keep.