I want to copy only first 50 files. I know how to copy the files but how do I copy only first 50 files?
This is the code I'm using for copying the file. It copies all the files present in the folder. I want to copy only first 50 files.
import sys, os, time, shutil
print time.ctime()
path = "C:\\temp"
files = os.listdir(path)
print len(files)
if len(files)<=0:
print'No Files Present'
else:
files.sort()
fileobj = open("log.txt",'a')
for eachfilename in files:
src = path+'\\'+eachfilename
temp = "C:\\Backup\\" +eachfilename
dst = "C:\\Dest\\" +eachfilename
shutil.copy(src,temp)
retrn_val = shutil.move(src, dst)
print retrn_val
print "File moved:",eachfilename
if retrn_val:
fileobj.write(eachfilename+','+'moved Sucessfully'+'\n')
else:
fileobj.write(eachfilename+','+'failed to move'+'\n')
print time.ctime()
Is there any function to specify number of files to copy?
You could replace
files = os.listdir(path)
with
files = os.listdir(path)[:50]
This would slice the list and limit the number of files to 50.
Related
This is follow up to a previous question, I am trying to copy data From multiple .xlsx files and paste in multiple corresponding destination .xlsm files that are in different sub folders. eg copy data from ABC_Report.xlsx and paste in existing file ABC_2023.xlsm, copy data from DEF_Report.xlsx and paste in existing file DEF_2023.xlsm where is ABC_2023.xslm is in a sub folder 'C:/folderpath/ABC/' and DEF_2023.xlsm is in another sub folder 'C:/folderpath/DEF/'. So far I have a code that does this but only when all destination files are in one folder. My source files are all in one folder which is great, the problem I have is that my destination files are in different individual sub folders
Source Files
destination files
import os
from glob import glob
import openpyxl
from openpyxl import load_workbook
def copy_data(src_file: str, dst_file: str) -> None:
# open files
ws_src = load_workbook(src_file)["src sheet"]
wb_dst = load_workbook(dst_file, keep_vba=True)
ws_dst = wb_dst["dest sheet"]
# configuration
start_row_src = 2
start_row_dst = 10
rows2copy = 100000
# copy data from src_file to dst_file
input_offset = start_row_dst - start_row_src
for i in range(start_row_src, rows2copy):
ws_dst[f"A{i}"].value = ws_src[f"A{i + input_offset}"].value
ws_dst[f"B{i}"].value = ws_src[f"B{i + input_offset}"].value
# files directories
src_dir_path = "C:/Users/me/Documents/Mysourcefolder/"
dst_dir_path = "C:/Users/me/Documents/Mydestinationfolder/"
# iterate over all excel files found in source path
workbooks = list(Path(src_dir_path).glob("*.xlsx"))
for src in workbooks:
#print(src)
dst = dst_dir_path + os.path.basename(src).replace("_Report.xlsx", "_2023.xlsm")
copy_data(src, dst)
The code above works perfect if all my destination files are in one folder, doesn't work when they are all in different subfolders, how do I factor in the destination files subfolders into my code? PS: its 40 source files, 40 destination files each in 40 subfolders the subfolders look like this
"C:/Users/me/Documents/Mydestinationfolder/ABC/"
"C:/Users/me/Documents/Mydestinationfolder/DEF/"
Just incase it will help someone out there, I figured out how to get the code to copy data from source files in one folder and paste into destinations files that are in different individual sub folders, see below:
import os
from glob import glob
import openpyxl
from openpyxl import load_workbook
def copy_data(src_file: str, dst_file: str) -> None:
# open files
ws_src = load_workbook(src_file)["src sheet"]
wb_dst = load_workbook(dst_file, keep_vba=True)
ws_dst = wb_dst["dest sheet"]
# configuration
start_row_src = 2
start_row_dst = 10
rows2copy = 100000
# copy data from src_file to dst_file
input_offset = start_row_dst - start_row_src
for i in range(start_row_src, rows2copy):
ws_dst[f"A{i}"].value = ws_src[f"A{i + input_offset}"].value
ws_dst[f"B{i}"].value = ws_src[f"B{i + input_offset}"].value
# save the modifications
wb_dst.save(dst_file)
# Source filepath
src_dir_path = "C:/users/me/sourcefolder/"
# iterate over all excel files found in source path
workbooks = list(Path(src_dir_path).glob("*.xlsx"))
for src in workbooks:
#Dynamically set your dest_dir_path variable using {} and .format()
dst_dir_path = "C:/Users/me/destfolder/{f}/".format(f=
(os.path.basename(src).replace("_Report.xlsx", "")))
dst = dst_dir_path + os.path.basename(src).replace("_Report.xlsx",
"_2023.xlsm")
copy_data(src, dst)
The code above will copy data from your source files and paste in corresponding destination files that are inside individual sub folders
So I'm dealing with a script that needs to zip all files into a single folder that share the same name. So, for example, the folder structure looks like this...
001.flt
001.hdr
001.prj
002.flt
002.hdr
002.prj
. .
.
700.flt
700.hdr
700.prj
In order to get a file to zip, I have a script that can handle a single file but does not recognize ["*.flt", "*.hdr", "*.prj"]
Is there a workaround for getting the script to recognize the file names based on their names and group them by name as well? I would like each individual zip file to contain file contents but zip it as
001.zip, 002.zip....
meaning the zip file contains the different file extensions
001.zip(
001.hdr,
001.prj,
001.flt
)
'''
import zipfile, sys, os, glob
inDir = r"\test\DEM"
outDir = r"\test\DEM_out"
filetype = "*.flt"
def zipfiletypeInDir(inDir, outDir):
# Check that input directory exists
if not os.path.exists(inDir):
print ("Input directory %s does not exist!" % inDir)
return False
print ("Zipping filetype(s) in folder %s to output folder %s" % (inDir, outDir))
# Loop through "filetype" in input directory, glob will match pathnames from
for inShp in glob.glob(os.path.join(inDir, filetype)):
# Build the filename of the output zip file
outZip = os.path.join(outDir, os.path.splitext(os.path.basename(inShp))[0] + ".zip")
# Zip the "filetype"
zipfiletype(inShp, outZip)
return True
def zipfiletype(infiletype, newZipFN):
print ('Starting to Zip '+(infiletype)+' to '+(newZipFN))
# Delete output zipfile if it already exists
if (os.path.exists(newZipFN)):
print ('Deleting'+ newZipFN)
os.remove(newZipFN)
# Output zipfile still exists, exit
if (os.path.exists(newZipFN)):
print ('Unable to Delete'+newZipFN)
return False
# Open zip file object
zipobj = zipfile.ZipFile(newZipFN,'w')
# Loop through "filetype" components
for infile in glob.glob( infiletype.lower().replace(filetype,"*.flt")):
# Skip .zip file extension
if os.path.splitext(infile)[1].lower() != ".zip":
print ("Zipping %s" % (infile))
# Zip the "filetype" component
zipobj.write(infile,os.path.basename(infile),zipfile.ZIP_DEFLATED)
zipobj.close()
return True
if __name__=="__main__":
zipfiletypeInDir(inDir, outDir)
print ("done!")
If the possible duplicate I provided doesn't answer your question....
One way would be to iterate over all the file names and make a dictionary grouping all the files with the same name.
In [54]: import collections, os, zipfile
In [55]: zips = collections.defaultdict(list)
In [55]:
In [56]: for f in os.listdir():
...: name, ext = os.path.splitext(f)
...: zips[name].append(f)
Then iterate over the dictionary; creating a new zip file for each key and adding each key's files to it.
In [57]: outdir = r'zips'
In [58]: for k,v in zips.items():
...: zname = k+'.zip'
...: fpath = os.path.join(outdir,zname)
...: #print(fpath)
...: with zipfile.ZipFile(fpath, 'w') as z:
...: for name in v:
...: z.write(name)
I Found what I was looking for, This script identifies the names of the files and groups them based on that with the Iterator.
#group files into separate zipfolders from single directory based from
#individual file names
import fnmatch, os, glob, zipfile
#edit data folders for in and out variables
path = r"D:\Users\in_path"
out_path = D"C:\Users\out_path"
#create variables used in iterator
obj = os.listdir(path)
my_iterator = obj.__iter__()
##
#iterate each file name as '%s.*'
for obj in my_iterator:
#define name of file for rest of iterator to preform
name = os.path.splitext(obj)[0]
print (name)
#create a zip folder to store data that is being compressed
zip_path = os.path.join(out_path, name + '.zip')
#create variable 'zip' that directs the data into the compressed folder
zip = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)
os.chdir(path)
#files are written to the folder with glob.glob
for files in glob.glob('%s.*' %name):
zip.write(os.path.join(path,files), files)
#print each iteration of files being written
print ('All files written to %s' %zip_path)
zip.close()
I have a directory that contains a large amount of sub directories.
Within each of these subdirectories are different jpegs, pngs.
I want to:
Select X amount of random images from these subdirectories
Create a new folder and copy these selected random images inside.
Thanks to help received here already I can print out a random selection of images using os.walk and random.choice.
import os
import random
import shutil
files_list = []
for root, dirs, files in os.walk("/Path/to/Directory"):
for file in files:
#all
if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
files_list.append(os.path.join(root, file))
#print images
#lets me count and print the amount of jpeg,jpg,pmg
file_count = len(files_list)
print file_count
print files_list
print(random.sample(files_list, 2)) #prints two random files from list
However, my issue is with actually selecting random images (not their names)
I have tried to create a variable imagePath that uses os.walk
#creates a variable imagePath that lets me access all img files in different folders
imagePath = os.walk("/Path/to/Directory")
and a new variable to randomly select a single image from imagePath
#create a variable that lets me choose random iamge from imagePath
randomImages = random.choice(os.listdir(imagePath))
and then created a new directory and used shutil.copy to move radnomally selected image into this new directory
#creates a new directory
os.mkdir('testDirectory')
#moves the randomly selected image into new directory
shutil.copy(randomImages, testDirectory)
However, I am getting the following error:
Traceback (most recent call last):
File "crawl.py", line 28, in <module>
randomImages = random.choice(os.listdir(imagePath))
TypeError: coercing to Unicode: need string or buffer, generator found
I have also tried
for root, dirs, files in os.walk("/Path/to/Directory", topdown=False):
imagePath = ("/Path/to/Directory") #creates a variable that lets me access all img files in different folders
randomImages = random.choice(os.listdir(imagePath))
print randomImages
But this returns a random selection of sub directories (not images within) along with .ds store files.
Here is the code, you want to move the files not make another copy I guess.
import os
import random
import shutil
files_list = []
for root, dirs, files in os.walk("<SOURCE_DIR>"):
for file in files:
#all
if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
files_list.append(os.path.join(root, file))
#print images
#lets me count and print the amount of jpeg,jpg,pmg
file_count = len(files_list)
print file_count
# print files_list
filesToCopy = random.sample(files_list, 2) #prints two random files from list
destPath = "<DESTINATION_DIR>"
# if destination dir does not exists, create it
if os.path.isdir(destPath) == False:
os.makedirs(destPath)
# iteraate over all random files and move them
for file in filesToCopy:
shutil.move(file, destPath)
You should be able to feed shutil.copy() a source and destination file path. It seems to me that you have a list of files already so you can just copy them.
import os
import random
import shutil
files_list = []
for root, dirs, files in os.walk("/Path/to/Directory"):
for file in files:
#all
if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
files_list.append(os.path.join(root, file))
#print images
#lets me count and print the amount of jpeg,jpg,pmg
file_count = len(files_list)
print file_count
print files_list
selected_files = random.sample(files_list, 2)) #assign to a list
dest_path = "/path/to/new/folder/"
os.mkdir(dest_path)
for src_path in selected_files:
shutil.copy(src_path, os.path.join(dest_path, os.path.basename(src_path)))
I'm literally new to python and trying to learn stuff. I have a python script that basically unzips from a compressed zipped folder from source and extracts all to the destination folder. Added I also wanted to delete the source contents once it is extracted. How would I achieve this ? thanks for help in advance!
Basically,inside this path "L:\Python\Source Zipped files" I have multiple zipped folders. My query, unzips each folder and extracts to the final destination. I'm looking for an approach, like first when it unzips the first folder, and extracts and then it should be deleted from the source folder. Included a snippet of how the source folder looks like.
enter image description here
Here is my query
import os
import zipfile
import shutil
import json
data_dir = r'L:\Python\Source Zipped files'
temp_dir = r'L:\Python\temp1'
new_dir = r'L:\Python\temp2'
final_dir = r'L:\Python\Destination Unzipped files'
big_list = os.listdir(data_dir)
archive_count = 0
file_count = 152865
basename1 = os.path.join(final_dir,'GENERIC_ROUGHDRAFT')
basename2 = os.path.join(final_dir,'XACTDOC')
my_time()
archive_count = len(big_list)
logging.info('Unzipping {} archives...'.format(archive_count))
for folder in big_list:
prior_count = file_count
logging.info('Starting: {}'.format(folder))
try:
shutil.rmtree(temp_dir)
except FileNotFoundError:
pass
os.mkdir(temp_dir)
with zipfile.ZipFile(os.path.join(data_dir,folder),mode='r') as a_zip:
a_zip.extractall(path = temp_dir)
archive_count += 1
logging.info('Cumulative total of {} archive(s) unzipped'.format(archive_count))
bigger_list = os.listdir(temp_dir)
logging.info('Current archive contains {} subfolders'.format(len(bigger_list)))
for sub_folder in bigger_list:
with zipfile.ZipFile(os.path.join(temp_dir,sub_folder),mode='r') as b_zip:
b_zip.extractall(path = new_dir)
file1 = "%s (%d).%s" % (basename1, file_count, 'xml')
file2 = "%s (%d).%s" % (basename2, file_count, 'xml')
shutil.copy(os.path.join(new_dir, 'GENERIC_ROUGHDRAFT.xml'), file1)
shutil.copy(os.path.join(new_dir, 'XACTDOC.xml'), file2)
file_count += 1
logging.info('{} subfolders unzipped'.format(file_count - prior_count))
my_time()
logging.info('Total of {0} files -- {1} pairs -- should be in {2}'.format(2*(file_count-1), file_count-1, final_dir))
time.sleep(1)
my_time()
Im trying to put into an array files[] the paths of each file from the Data folder but when I try to go into subfolders I want it to be able to go down to the end of the Data file, for example I can read files in a subfolder of the main folder Data which im trying to get a list of all the paths of each file into an array but it doesn't go deeper it does not access the subfolder of the subfolder of Data without writing a loop. Want I want is a loop which has infinit depth of view of files in the Data folder so I can get all the file paths.
For example this is what I get:
['Data/DataReader.py', 'Data/DataReader - Copy.py', 'Data/Dat/DataReader.py', 'Data/fge/er.txt']
This is what I want but it can still go into deeper folders:
['Data/DataReader.py', 'Data/DataReader - Copy.py', 'Data/Dat/DataReader.py', 'Data/fge/er.txt', 'Data/fge/Folder/dummy.png', 'Data/fge/Folder/AnotherFolder/data.dat']
This is my current path, what would i need to add or change?
import os
from os import walk
files = []
folders = []
for (dirname, dirpath, filename) in walk('Data'):
folders.extend(dirpath)
files.extend(filename)
break
filecount = 0
for i in files:
i = 'Data/' + i
files[filecount] = i
filecount += 1
foldercount = 0
for i in folders:
i = 'Data/' + i
folders[foldercount] = i
foldercount += 1
subfolders = []
subf_files = []
for i in folders:
for (dirname, dirpath, filename) in walk(i):
subfolders.extend(dirpath)
subf_files.extend(filename)
break
subf_files_count = 0
for a in subf_files:
a = i + '/'+a
files = files
files.append(a)
print files
subf_files = []
print files
print folders
Thanks a lot!
Don't understand what are your trying to do, especially why you break your walk after the first element:
import os
files = []
folders = []
for (path, dirnames, filenames) in os.walk('Data'):
folders.extend(os.path.join(path, name) for name in dirnames)
files.extend(os.path.join(path, name) for name in filenames)
print files
print folders