I have a python code which transfers files from folder a to folder b
In that folder if there are many files (30 for example) I need to transfer only 5 files at a time,
Following is the code
#!/usr/bin/python
import os, sys, time
src_path = "/opt/tst1/"
dst_path = "/opt/tst1/consume/"
now = time.time()
cutoff = now - (5 * 60)
count = 5
files = os.listdir(src_path)
for f in files:
fn = src_path + f
if not os.path.isfile(fn):
continue
t = os.stat(fn)
c = t.st_ctime
if c > cutoff:
continue
# move the file
dfn = dst_path + f
os.rename(fn, dfn)
count -=1
if count == 0:
Break
It copies the entire contents of the folder from 1 folder to another, as opposed to copy only 5 files at the time, is there anything that needs to be added
this code will send 5 files at a time until all files are exausted
files = os.listdir(".")
while files:
print "COPY 5"
for i in range(5):
try:
next_file = files.pop() #get the next file if we can
except IndexError:
print "DONE!" #if we cant we are done
break
print next_file
do_something(next_file)
print "Resting"
time.sleep(however_long)
Related
I have Ten thousand files in one folder. The file's names are numerically sorted. I am trying to move the files. Some files are already moved to a new folder. I have written the code to move the files but due to some files already being moved to a new folder, the code stops when the number hits the number of the file which has already been moved. I tried using try and catch the exception but it's not working. I would like the code to skip this error and continue moving the files.
This is what I have tired
import os, shutil
path = "I:\\"
moveto = "I:\\"
i = 1
j = 1
try:
while True:
f = "{0}.{1}".format(i,j)
filesrc = f + ".jpg"
src = path+filesrc
dst = moveto+filesrc
shutil.move(src,dst)
j += 1
if j > 6:
i += 1
j = 1
if i > 1500:
break
except for OSError as e:
pass
You need to use the try-catch block inside the loop, where the operation might fail.
import os, shutil
path = "I:\\"
moveto = "I:\\"
i = 1
j = 1
while True:
f = "{0}.{1}".format(i,j)
filesrc = f + ".jpg"
src = path+filesrc
dst = moveto+filesrc
try:
shutil.move(src,dst)
except for OSError as e:
pass
j += 1
if j > 6:
i += 1
j = 1
if i > 1500:
break
I have a python script that list folder and files existing in a given path.
What i want is to be able to check if the existing folder start with string "pdf" + date
like this: pdf 18-19-06-2020. If the folder start with just pdf and the date is not in the format "dd-dd-mm-yyyy" i need to convert the name to the required format.
I am getting the current date and the date of yesterday.
code:
#packages for list and copy folders & files.
import calendar
import os
import shutil
from os import path
from datetime import date
def main():
copy(src)
'''
FUNCTION THAT calculate current date and 2 dates before
'''
def yesterday():
days=[]
day = int(date.today().strftime("%d"))
month = int(date.today().strftime("%m"))
year = int(date.today().strftime("%Y"))
if day != 1:
p = day -1
p1 = day -2
p2 = day -3
print("******",p)
print("******",p1)
print("******",p2)
days.append(p)
days.append(p1)
days.append(p2)
print("******",days,"********")
return p
else:
p = 32 -1
print("******",p)
return p
long_months = [1, 3, 5, 7, 8, 10, 12]
if month in long_months:
print(32 -1)
return(32-1)
elif month == 2:
if calendar.isleap(year):
return 29
return 28
else:
return 30
dst = "E:/KRD2018_Data"
dst2 = "F:/ABpro"
dst3 = "C:/Users/cvd/Documents"
'''
FUNCTION THAT list the folders and files exist on the USB drive and copy the pdfs and docs to their destinations
and copy the pdfs in the existing folder to the specified destination
'''
def copy(src):
#name = folder pdf yesterday + today
#pdf dd-dd-mm-yyyy ==> 3-04-05-2020
datefile = "pdf " + str(yesterday()) + date.today().strftime("-%d-%m-%Y")
src2 = os.path.join(src, datefile)
ignore_list=["$RECYCLE.BIN","System Volume Information"]
i=0
j=0
z=0
for dirpath, dirnames, files in os.walk(src, topdown=True):
print(f'Found directory: {dirpath}')
if len(dirnames)==0 and len(files)==0:
print("this directory is empty")
continue
# exclude the ignore list from the os.walk
dirnames[:] = [d for d in dirnames if d not in ignore_list]
# check if the path is directory
isdir = os.path.isdir(dirpath)
print(isdir)
for file in files:
full_file_name = os.path.join(dirpath, file)
if os.path.join(dirpath) == src:
if file.endswith("pdf"):
if not os.path.exists(dst2):
os.mkdir(dst2)
else:
print("the path alredy exist")
# shutil.copy(full_file_name, dst2)
i+=1
elif file.endswith("docx") or file.endswith("doc"):
# shutil.copy(full_file_name, dst)
j+=1
elif os.path.join(dirpath)== src2:
if file.endswith("pdf"):
numfile = len(files)
# shutil.copy(full_file_name, dst3)
z+=1
print("*******number of directories = {}".format(len(dirnames)))
print("*******number of files = {}".format(len(files)))
print("{} word file \n".format(j))
print("{} pdf files \n".format(z))
print("{} other files \n".format(i))
print("total copied files {}".format(i+j+z))
if __name__=="__main__":
main()
based on this script:
#!/usr/bin/python
# run by crontab
# removes any files in /tmp/ older than 7 days
import os, sys, time
from subprocess import call
now = time.time()
cutoff = now - (7 * 86400)
files = os.listdir("/tmp")
for xfile in files:
if os.path.isfile( "/tmp/" + xfile ):
t = os.stat( "/tmp/" + xfile )
c = t.st_ctime
# delete file if older than a week
if c < cutoff:
os.remove("/tmp/" + xfile)
we can delete files in a path based on their time modified, but how can we delete folders in other folders based on their time modification?
it means there are many folders in the main folder but we need to keep main folders and subfolders and only delete folders which their modification time is older than a specific time.
You can try something along these lines
import shutil, os, time
top_dir = '/tmp'
now = time.time()
cutoff = now - (7 * 86400)
def del_old_files_and_dirs(top_dir, cutoff_time):
for root, dirs, files in os.walk(top_dir, topdown=False):
for cdir in dirs:
fdir = os.path.join(root, cdir)
if os.path.getmtime(fdir) < cutoff_time:
shutil.rmtree(fdir)
else:
# Process this dir again recursively
del_old_files_and_dirs(fdir, cutoff_time)
for cfile in files:
ffile = os.path.join(root, cfile)
if os.path.getmtime(ffile) < cutoff_time:
os.remove(ffile)
del_old_files_and_dirs(top_dir, cutoff)
I have a directory contains 50 files I want to read them one by one and compare wit the other files - that is fixed. I am using glob.blob. But it didn't work.
Here how I am reading all files. Instead, path = '*.rbd' if I give the file name like path = run-01.rbd it works.
path = '*.rbd'
path = folder + path
files=sorted(glob.glob(path))
complete code
import glob
from itertools import islice
import linecache
num_lines_nonbram = 1891427
bits_perline = 32
total_bit_flips = 0
num_bit_diff_flip_zero = 0
num_bit_diff_flip_ones = 0
folder = "files/"
path = '*.rbd'
path = folder + path
files=sorted(glob.glob(path))
original=open('files/mull-original-readback.rbd','r')
#source1 = open(file1, "r")
for filename in files:
del_lines = 101
with open(filename,'r') as f:
i=1
while i <= del_lines:
line1 = f.readline()
lineoriginal=original.readline()
i+=1
i=0
num_bit_diff_flip_zero = 0
num_bit_diff_flip_ones = 0
num_lines_diff =0
i=0
j=0
k=0
a_write2 = ""
while i < (num_lines_nonbram-del_lines):
line1 = f.readline()
lineoriginal = original.readline()
while k < bits_perline:
if ((lineoriginal[k] == line1[k])):
a_write2 += " "
else:
if (lineoriginal[k]=="0"):
#if ((line1[k]=="0" and line1[k]=="1")):
num_bit_diff_flip_zero += 1
if (lineoriginal[k]=="1"):
#if ((line1[k]=="0" and line1[k]=="1")):
num_bit_diff_flip_ones += 1
#if ((line1[k]==1 and line1[k]==0)):
#a_write_file2 = str(i+1) + " " + str(31-k) + "\n" + a_write_file2
#a_write2 += "^"
#num_bit_diff_flip_one += 1
# else:
# a_write2 += " "
k+=1
total_bit_flips=num_bit_diff_flip_zero+num_bit_diff_flip_ones
i+=1
k=0
i = 0
print files
print "Number of bits flip zero= %d" %num_bit_diff_flip_zero +"\n" +"Number of bits flip one= %d" %num_bit_diff_flip_ones +"\n" "Total bit flips = %d " %total_bit_flips
f.close()
original.close()
You could use the os module to first list everything in a directory (both files and modules) then use a python generator to filter out only the files. You could then use a second python generator to filter out files with a specific extension. There is probably a more efficient way of doing it but this works:
import os
def main():
path = './' # The path to current directory
# Go through all items in the directory and filter out files
files = [file for file in os.listdir(path) if
os.path.isfile(os.path.join(path, file))]
# Go through all files and filter out files with .txt (for example)
specificExtensionFiles = [file for file in files if ".txt" in file]
# Now specificExtensionFiles is a generator for .txt files in current
# directory which you can use in a for loop
print (specificExtensionFiles)
if __name__ == '__main__':
main()
For further reference:
How do I list all files of a directory?
The problem is that you're not going back to the beginning of originalfile whenever you start comparing with the next file in the for filename in files: loop. The simplest solution is to put:
original.seek(0)
at the beginning of that loop.
You could also read the whole file into a list just once before the loop, and use that instead of reading the file repeatedly.
And if you only want to process part of the files, you can read the file into a list, and then use a list slice to get the lines you want.
You also shouldn't be setting num_bit_diff_flip_zero and num_bit_diff_flip_one to 0 each time through the loop, since these are supposed to be the total across all files.
with open('files/mull-original-readback.rbd','r') as original:
original_lines = list(original)[del_lines:num_lines_nonbram]
for filename in files:
with open(file, 'r') as f:
lines = list(f)[del_lines:num_lines_nonbram]
for lineoriginal, line1 in zip(original_lines, lines):
for k in range(bits_perline):
if lineoriginal[k] == line1[k]:
a_write2 += " "
elif lineoriginal[k] == "0"
num_bit_diff_flip_zero += 1
else:
num_bit_diff_flip_ones += 1
total_bit_flips = num_bit_diff_flip_zero + num_bit_diff_flip_ones
i have a python script that when is run from eclipse it does what i want without any errors or anything.
I want now to create a batch file, that will run my script in a loop (infinitely).
The first problem is that i when i run the bat file, i get a second cmd window that shows the logging from my python script (which shows me that it is running) but when the main process of the script starts(which can take from 1 minute to some hours) it exits within a few second without actually running all the script. I have used start wait/ but it doesn't seem to work. Here is the simple batch file i have created:
#echo off
:start
start /wait C:\Python32\python.exe C:\Users\some_user\workspace\DMS_GUI\CheckCreateAdtf\NewTest.py
goto start
So i want the bat file to run my script, wait for it to finish(even if it takes some hours) and then run it again.
I have also tried creating a bat file that calls with start wait/ the bat file shown above with no success.
Optimally i would like it to keep the window open with all the logging that i have in my script, but that is another issue that can be solved later.
def _open_read_file(self):
logging.debug("Checking txt file with OLD DB-folder sizes")
content = []
with open(self._pathToFileWithDBsize) as f:
content = f.read().splitlines()
for p in content:
name,size = (p.split(","))
self._folder_sizes_dic[name] = size
def _check_DB(self):
logging.debug("Checking current DB size")
skippaths = ['OtherData','Aa','Sss','asss','dss','dddd']
dirlist = [ item for item in os.listdir(self._pathToDBparentFolder) if os.path.isdir(os.path.join(self._pathToDBparentFolder, item)) ]
for skip in skippaths:
if skip in dirlist:
dirlist.remove(skip)
MB=1024*1024.0
for dir in dirlist:
folderPath = self._pathToDBparentFolder +"\\"+str(dir)
fso = com.Dispatch("Scripting.FileSystemObject")
folder = fso.GetFolder(folderPath)
size = str("%.5f"%(folder.Size/MB))
self._DB_folder_sizes_dic[dir] = size
def _compare_fsizes(self):
logging.debug("Comparing sizes between DB and txt file")
for (key, value) in self._DB_folder_sizes_dic.items():
if key in self._folder_sizes_dic:
if (float(self._DB_folder_sizes_dic.get(key)) - float(self._folder_sizes_dic.get(key)) < 100.0 and float(self._DB_folder_sizes_dic.get(key)) - float(self._folder_sizes_dic.get(key)) > -30.0):
pass
else:
self._changed_folders.append(key)
else:
self._changed_folders.append(key)
def _update_file_with_new_folder_sizes(self):
logging.debug("Updating txt file with new DB sizes")
file = open(self._pathToFileWithDBsize,'w')
for key,value in self._DB_folder_sizes_dic.items():
file.write(str(key)+","+str(value)+"\n")
def _create_paths_for_changed_folders(self):
logging.debug("Creating paths to parse for the changed folders")
full_changed_folder_parent_paths = []
for folder in self._changed_folders:
full_changed_folder_parent_paths.append(self._pathToDBparentFolder +"\\"+str(folder))
for p in full_changed_folder_parent_paths:
for path, dirs, files in os.walk(p):
if not dirs:
self._full_paths_to_check_for_adtfs.append(path)
def _find_dat_files_with_no_adtf(self):
logging.debug("Finding files with no adtf txt")
for path in self._full_paths_to_check_for_adtfs:
for path, dirs, files in os.walk(path):
for f in files:
if f.endswith('_AdtfInfo.txt'):
hasAdtfFilename = f.replace('_AdtfInfo.txt', '.dat')
self.hasADTFinfos.add(path + "\\" + hasAdtfFilename)
self.adtf_files = self.adtf_files + 1
elif f.endswith('.dat'):
self.dat_files = self.dat_files + 1
self._dat_file_paths.append(path + "\\" + f)
logging.debug("Checking which files have AdtfInfo.txt, This will take some time depending on the number of .dat files ")
for file in self._dat_file_paths:
if file not in self.hasADTFinfos:
self._dat_with_no_adtf.append(file)
self.files_with_no_adtf = len(self._dat_with_no_adtf)
#self.unique_paths_from_log = set(full_paths_to_check_for_adtfs)
logging.debug("Files found with no adtf " + str(self.files_with_no_adtf))
def _create_adtf_info(self):
logging.debug("Creating Adtf txt for dat files")
files_numbering = 0
for file in self._dat_with_no_adtf:
file_name = str(file)
adtf_file_name_path = file.replace('.dat','_AdtfInfo.txt')
exe_path = r"C:\Users\some_user\Desktop\some.exe "
path_to_dat_file = file_name
path_to_adtf_file = adtf_file_name_path
command_to_subprocess = exe_path + path_to_dat_file + " -d "+ path_to_adtf_file
#Call VisionAdtfInfoToCsv
subprocess.Popen(command_to_subprocess,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
process_response = subprocess.check_output(command_to_subprocess)
#if index0 in response, adtf could not be created because .dat file is probably corrupted
if "index0" in str(process_response):
self._corrupted_files_paths.append(path_to_dat_file)
self._files_corrupted = self._files_corrupted + 1
self._corrupted_file_exist_flag = True
else:
self._files_processed_successfully = self._files_processed_successfully + 1
files_numbering = files_numbering + 1
The functions are called in this order
self._open_read_file()
self._check_DB()
self._compare_fsizes()
self._create_paths_for_changed_folders()
self._find_dat_files_with_no_adtf()
self._create_adtf_info()
self._check_DB()
self._update_file_with_new_folder_sizes()
Ok it seems that the .exe in the script was returning an error and that is why it the script was finishing so fast. I thought that the bat file did not wait. I should have placed the .bat file in the .exe folder and now the whole thing runs perfect.