I have a directory that contains a large amount of sub directories.
Within each of these subdirectories are different jpegs, pngs.
I want to:
Select X amount of random images from these subdirectories
Create a new folder and copy these selected random images inside.
Thanks to help received here already I can print out a random selection of images using os.walk and random.choice.
import os
import random
import shutil
files_list = []
for root, dirs, files in os.walk("/Path/to/Directory"):
for file in files:
#all
if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
files_list.append(os.path.join(root, file))
#print images
#lets me count and print the amount of jpeg,jpg,pmg
file_count = len(files_list)
print file_count
print files_list
print(random.sample(files_list, 2)) #prints two random files from list
However, my issue is with actually selecting random images (not their names)
I have tried to create a variable imagePath that uses os.walk
#creates a variable imagePath that lets me access all img files in different folders
imagePath = os.walk("/Path/to/Directory")
and a new variable to randomly select a single image from imagePath
#create a variable that lets me choose random iamge from imagePath
randomImages = random.choice(os.listdir(imagePath))
and then created a new directory and used shutil.copy to move radnomally selected image into this new directory
#creates a new directory
os.mkdir('testDirectory')
#moves the randomly selected image into new directory
shutil.copy(randomImages, testDirectory)
However, I am getting the following error:
Traceback (most recent call last):
File "crawl.py", line 28, in <module>
randomImages = random.choice(os.listdir(imagePath))
TypeError: coercing to Unicode: need string or buffer, generator found
I have also tried
for root, dirs, files in os.walk("/Path/to/Directory", topdown=False):
imagePath = ("/Path/to/Directory") #creates a variable that lets me access all img files in different folders
randomImages = random.choice(os.listdir(imagePath))
print randomImages
But this returns a random selection of sub directories (not images within) along with .ds store files.
Here is the code, you want to move the files not make another copy I guess.
import os
import random
import shutil
files_list = []
for root, dirs, files in os.walk("<SOURCE_DIR>"):
for file in files:
#all
if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
files_list.append(os.path.join(root, file))
#print images
#lets me count and print the amount of jpeg,jpg,pmg
file_count = len(files_list)
print file_count
# print files_list
filesToCopy = random.sample(files_list, 2) #prints two random files from list
destPath = "<DESTINATION_DIR>"
# if destination dir does not exists, create it
if os.path.isdir(destPath) == False:
os.makedirs(destPath)
# iteraate over all random files and move them
for file in filesToCopy:
shutil.move(file, destPath)
You should be able to feed shutil.copy() a source and destination file path. It seems to me that you have a list of files already so you can just copy them.
import os
import random
import shutil
files_list = []
for root, dirs, files in os.walk("/Path/to/Directory"):
for file in files:
#all
if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
files_list.append(os.path.join(root, file))
#print images
#lets me count and print the amount of jpeg,jpg,pmg
file_count = len(files_list)
print file_count
print files_list
selected_files = random.sample(files_list, 2)) #assign to a list
dest_path = "/path/to/new/folder/"
os.mkdir(dest_path)
for src_path in selected_files:
shutil.copy(src_path, os.path.join(dest_path, os.path.basename(src_path)))
Related
I want to copy all the images mentioned in the list to the destination folder.
I tried this code but nothing happend.
Original_folder_Path = 'directory'
Destination_folder_path = 'directory'
List = [['100.jpg',1], ['101.jpg',0],['102.jpg',0],['103.jpg',1]]
the Original folder has all the images named as '100.jpg' till '500.jpg'
import shutil
import os
Original_folder_Path = 'directory'
Destination_folder_path = 'directory'
List = [['100.jpg',1], ['101.jpg',0],['102.jpg',0],['103.jpg',1]]
for files in glob.iglob(os.path.join(Original_folder_Path ,"*.jpg")):
if '{files}.jpg' in List:
shutil.copy(files, Destination_folder_path )
Here is one way to do it:
import shutil
import os, glob
path = 'source_directory/'
Destination_folder_path = 'destination_directory/'
yourList = [['100.jpg',1], ['101.jpg',0],['102.jpg',0],['103.jpg',1]]
mylist = []
for values in yourList:
mylist.append(values[0])
#mylist= ['100.jpg', '101.jpg']
#get all the file into a list.
my_files = [name for name in os.listdir(path) if os.path.isfile(os.path.join(path, name))]
#get files ending with .jpg
image_files = [file for file in my_files if file.endswith('.jpg')]
myFilesWithFullPath=[] #make a list of files with fullpath
for item in mylist:
myFilesWithFullPath.append(path+item)
for file in myFilesWithFullPath:
shutil.copy(file, Destination_folder_path)
I have a data structure format as below:
Dataset:
training-
-Cat
-dog
-monkey
I would like to transfer/move 10 percent of files from each dataset to validation dataset. How can I do it using python?. It should automatically create the directories as well
Dataset:
validation-
-Cat
-dog
-monkey
You can try:
import os
source = 'C:/.../training/'
destination = 'C:/.../validation/'
if not os.path.exists(destination):
# Create a new directory because it does not exist
os.makedirs(destination)
allfiles = os.listdir(source)
for f in allfiles:
os.rename(source + f, destination + f)
Try this, it should help, yet not tested on Windows (only Ubuntu). But you can modify if path string is different on Windows OS.
Tested on : Python = 3.6.13, numpy = 1.19.2
from glob import glob
import os
import numpy as np
import shutil
def copy_folder(src, dst, percent_keep=0.1):
all_files = glob(f"{src}/*")
# select folders
folders = [folder for folder in all_files if os.path.isdir(folder)]
# select files
all_files = [file for file in all_files if os.path.isfile(file)]
print(f"There are {len(folders)} folders in {src.split('training')[-1]}")
print(f"There are {len(all_files)} files in {src.split('training')[-1]}")
for folder in folders:
# iterate through subfolders
copy_folder(folder, dst, percent_keep)
if len(all_files) > 0:
# find path to be attacked to validation path
remaining_path = src.split("training/")[-1]
new_path = os.path.join(dst, "validation", remaining_path) # new path for destination
if not os.path.exists(new_path):
os.makedirs(new_path)
# select samples from all files you have
keep_files = np.random.choice(all_files, int(len(all_files) * percent_keep))
print(f"Copying {len(keep_files)} random files")
for index, file in enumerate(keep_files):
print(f"\rCopying {index+1} / {len(keep_files)}", end="")
shutil.copyfile(file, os.path.join(new_path, file.rsplit("/")[-1]))
print("")
if __name__ == "__main__":
src = "/home/user/Dataset/training" # Should be path to training folder
# should be path of directory one below training folder
# (lets say root) it will attach validation folder later in code
dst = "/home/user/Dataset/"
copy_folder(src, dst, 0.1)
if you dont want to use numpy for selecting random file to copy to validation folder, use random library.
Something like:
keep_files = random.choices(all_files,k=int(len(all_files) * percent_keep) )
If you dont want to use shutils or glob, you can use os library:
os.lisdirs() # instead of glob
os.rename() # instead of shutils (maybe somethind different, not tested)
if you dont want random samples , use :
keep_files = all_files [:int(len(all_files) * percent_keep)]
I have a folder with images of dogs, named dogID-X.jpg where X is the number of the picture that belongs to one dogID, e.g. 0a08e-1.jpg, 0a08e-2.jpg, 0a08e-3.jpg means there are three images that belong to the same dog.
How do I sort these images into two subfolders based on two lists that have only the dogID [0a08e, 4a45t, ...] i.e. all images with IDs from one list should go to one folder, and all images from another list should go into the other folder. Thanks! The list looks like this: list(y_labels) = ['86e1089a3',
'6296e909a',
'5842f1ff5',
'850a43f90',
'd24c30b4b',
'1caa6fcdb', ...]
for image in list(y_labels):
folder = y_labels.loc[image, 'PetID']
old = './train_images/{}'.format(image)
new = '//train_images_new/{}/{}'.format(folder, image)
try:
os.rename(old, new)
except:
print('{} - {}'.format(image,folder))
Well let's assume you have lis1 and lis2 as 2 lists containing only dogID, there is also a folder which contains all the images and I'll call it "mypath", sub folders will be named "lis1" and "lis2".
import os
# path to image folder, get all filenames on this folder
# and store it in the onlyfiles list
mypath = "PATH TO IMAGES FOLDER"
onlyfiles = [f for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))]
# your list of dogID's
lis1 = ["LIST ONE"]
lis2 = ["LIST TWO"]
# create two seperate lists from onlyfiles list based on lis1 and lis2
lis1files = [i for i in onlyfiles for j in lis1 if j in i]
lis2files = [i for i in onlyfiles for j in lis2 if j in i]
# create two sub folders in mypath folder
subfolder1 = os.path.join(mypath, "lis1")
subfolder2 = os.path.join(mypath, "lis2")
# check if they already exits to prevent error
if not os.path.exists(subfolder1):
os.makedirs(subfolder1)
if not os.path.exists(subfolder2):
os.makedirs(subfolder2)
# move files to their respective sub folders
for i in lis1files:
source = os.path.join(mypath, i)
destination = os.path.join(subfolder1, i)
os.rename(source, destination)
for i in lis2files:
source = os.path.join(mypath, i)
destination = os.path.join(subfolder2, i)
os.rename(source, destination)
I hope it solves your problem.
import os
import shutil
path = r'C:\Users\user\temp\test\dog_old' #folder where all dog images present
list_name =[]
# traverse root directory, and list directories as dirs and files as files
for root, dirs, files in os.walk(path):
list_name.extend(files)
from collections import defaultdict
dic=defaultdict(list)
for i in list_name:
filename,ext =os.path.splitext(i)
group, img_index = filename.split('-')
dic[group].append(img_index)
# folder path where new dog images had to added
new_folder = r'C:\Users\user\temp\test\dog_new'
for i in dic:
if not os.path.exists(os.path.join(new_folder,i)):
os.mkdir(os.path.join(new_folder,i))
for img in dic[i]:
old_image = os.path.join(path,'{}-{}.jpg'.format(i,img))
new_image = r'{}.jpg'.format(img)
new_path =os.path.join(new_folder,i)
shutil.move(old_image,os.path.join(new_path,new_image))
else:
for img in dic[i]:
old_image = os.path.join(path,'{}-{}.jpg'.format(i,img))
new_image = r'{}.jpg'.format(img)
new_path =os.path.join(new_folder,i)
print(new_path)
shutil.move(old_image,os.path.join(new_path,new_image))
Try this,
import os
pet_names = ['0a08e', '0a08d']
image_ids = ["0a08e-1.jpg", "0a08e-2.jpg", "0a08e-3.jpg","0a08d-1.jpg", "0a08d-2.jpg", "0a08d-3.jpg"]
image_folder_path = os.getcwd()#"<image folder path>"
# assuming you want to name the folder with the pet name, create folders with the names in the list.
for pet_name in pet_names:
if not os.path.exists(os.path.join(image_folder_path,pet_name)):
print("creating")
os.makedirs(pet_name)
# loop over the image id's match the pet name and put it in the respective folder
for img_id in image_ids:
for pet_name in pet_names:
if pet_name in img_id:
image_full_path_source = os.path.join(image_folder_path,img_id)
dest_path = os.path.join(image_folder_path,pet_name)
image_full_path_destination = os.path.join(dest_path,img_id)
os.rename(image_full_path_source, image_full_path_destination)
Hope it helps!
Im trying to put into an array files[] the paths of each file from the Data folder but when I try to go into subfolders I want it to be able to go down to the end of the Data file, for example I can read files in a subfolder of the main folder Data which im trying to get a list of all the paths of each file into an array but it doesn't go deeper it does not access the subfolder of the subfolder of Data without writing a loop. Want I want is a loop which has infinit depth of view of files in the Data folder so I can get all the file paths.
For example this is what I get:
['Data/DataReader.py', 'Data/DataReader - Copy.py', 'Data/Dat/DataReader.py', 'Data/fge/er.txt']
This is what I want but it can still go into deeper folders:
['Data/DataReader.py', 'Data/DataReader - Copy.py', 'Data/Dat/DataReader.py', 'Data/fge/er.txt', 'Data/fge/Folder/dummy.png', 'Data/fge/Folder/AnotherFolder/data.dat']
This is my current path, what would i need to add or change?
import os
from os import walk
files = []
folders = []
for (dirname, dirpath, filename) in walk('Data'):
folders.extend(dirpath)
files.extend(filename)
break
filecount = 0
for i in files:
i = 'Data/' + i
files[filecount] = i
filecount += 1
foldercount = 0
for i in folders:
i = 'Data/' + i
folders[foldercount] = i
foldercount += 1
subfolders = []
subf_files = []
for i in folders:
for (dirname, dirpath, filename) in walk(i):
subfolders.extend(dirpath)
subf_files.extend(filename)
break
subf_files_count = 0
for a in subf_files:
a = i + '/'+a
files = files
files.append(a)
print files
subf_files = []
print files
print folders
Thanks a lot!
Don't understand what are your trying to do, especially why you break your walk after the first element:
import os
files = []
folders = []
for (path, dirnames, filenames) in os.walk('Data'):
folders.extend(os.path.join(path, name) for name in dirnames)
files.extend(os.path.join(path, name) for name in filenames)
print files
print folders
I want to copy only first 50 files. I know how to copy the files but how do I copy only first 50 files?
This is the code I'm using for copying the file. It copies all the files present in the folder. I want to copy only first 50 files.
import sys, os, time, shutil
print time.ctime()
path = "C:\\temp"
files = os.listdir(path)
print len(files)
if len(files)<=0:
print'No Files Present'
else:
files.sort()
fileobj = open("log.txt",'a')
for eachfilename in files:
src = path+'\\'+eachfilename
temp = "C:\\Backup\\" +eachfilename
dst = "C:\\Dest\\" +eachfilename
shutil.copy(src,temp)
retrn_val = shutil.move(src, dst)
print retrn_val
print "File moved:",eachfilename
if retrn_val:
fileobj.write(eachfilename+','+'moved Sucessfully'+'\n')
else:
fileobj.write(eachfilename+','+'failed to move'+'\n')
print time.ctime()
Is there any function to specify number of files to copy?
You could replace
files = os.listdir(path)
with
files = os.listdir(path)[:50]
This would slice the list and limit the number of files to 50.