Greet ! I am doing a project on Face Recognition using convolutional neural network. I took pictures of 3 people into 3 subfolders in the ''dataset'' folder. I want to convert the image files of those 3 subdirectories into 1 csv file to train_test_split. I rely on this sample code to act as if I have a problem, please help me!
My code:
path = 'datasets/'
filename = 'dataset.csv'
folders = os.listdir(path)
dim = (100, 100)
cls = 3
df = pd.DataFrame(columns=[f'pix-{i}' for i in range(1, 1 + (dim[0] * dim[1]))] + ['class'])
for folder in folders:
files = os.listdir(path + folder + "/")
for file in files:
img = Image.open(files)
df.loc[file] = list(img.getdata()) + [cls]
df.to_csv(filename, index=False)
print('Completed!')
code example:
import matplotlib.image as image
import os
import pandas as pd
import cv2
from tqdm import tqdm
import numpy as np
from PIL import Image
path = 'Datasets/Resized_syam/'
filename = 'syam.csv'
files = os.listdir(path)
dim = (100, 100)
cls = 1
df = pd.DataFrame(columns = [f'pix-{i}' for i in range(1, 1+(dim[0]*dim[1]))]+['class'])
for i in tqdm(range(1, 1+len(files))):
img =Image.open(path+files[i-1])
df.loc[i] = list(img.getdata()) + [cls]
df.to_csv(filename,index = False)
print('Task Completed')
Related
I'm trying to take files from 'D:\Study\Progs\test\samples' and after transforming .wav to .png I want to save it to 'D:\Study\Progs\test\"input value"' but after "name = os.path.abspath(file)" program takes a wrong path "D:\Study\Progs\test\file.wav" not "D:\Study\Progs\test\samples\file.wav". What can I do this it? Here's my debug output And console output
import librosa.display
import matplotlib.pyplot as plt
import os
pa = "./"
save = pa+input()
os.mkdir(save)
for file in os.listdir("./samples"):
if file.endswith(".wav"):
print(file)
name = os.path.abspath(file)
ss = os.path.splitext(name)[0]+".png"
print(name)
audio = name
x, sr = librosa.load(audio, mono=True, duration=5)
save_path = os.path.join(save, ss)
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(20, 10))
librosa.display.specshow(Xdb, sr=sr)
plt.savefig(save_path)
If you don't mind using pathlib as #Andrew suggests, I think what you're trying to do could be accomplished by using the current working directory and the stem of each .wav file to construct the filename for your .png.
from pathlib import Path
cwd = Path.cwd() # Current path.
sample_dir = cwd / "samples" # Source files are here.
# Make some demo files if necessary.
if not sample_dir.exists():
sample_dir.mkdir()
(sample_dir / "file1.wav").touch() # Make empty demo file.
(sample_dir / "file2.wav").touch() # Make empty demo file.
for file in sample_dir.glob("*.wav"):
print(file)
outfile = (cwd / file.stem).with_suffix(".png")
print(f"->{outfile}")
pass # Replace this with whatever else needs to be done.
Here's my alternative working variant
import librosa.display
import matplotlib.pyplot as plt
import os
from pathlib import Path
cwd = Path.cwd()
print("Vvedite directoriu dlya sohraneniya resultatov:")
sf = input()
save_folder = cwd / sf
print("Vvedite nazvanie directorii s primerami .wav failov:")
smpl = input()
sample_dir = cwd / smpl
os.mkdir(save_folder)
for file in sample_dir.glob("*.wav"):
print(file)
base = os.path.basename(file)
outfile = os.path.splitext(base)[0] + ".png"
print(f"->{outfile}")
audio = file
x, sr = librosa.load(audio, mono=True, duration=5)
save_path = os.path.join(save_folder, outfile)
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(20, 10))
librosa.display.specshow(Xdb, sr=sr)
plt.savefig(save_path)
import os
import glob
from PIL import Image
files = glob.glob('/Users/mac/PycharmProjects/crop001/1/*.jpg')
for f in files:
img = Image.open(f)
img_resize = img.resize((int(img.width / 2), int(img.height / 2)))
title, ext = os.path.splitext(f)
img_resize.save(title + '_half' + ext)
I want to save new images to
"/Users/mac/PycharmProjects/crop001/1/11/*.jpg"
Not
"/Users/mac/PycharmProjects/crop001/1/*.jpg"
Any help will be appreciated!
You can save your processed images to your preferred directory (/Users/mac/PycharmProjects/crop001/1/11/*.jpg) by changing the parameter for img_resize.save() there.
Assuming that you still want to save the processed image with _half suffix in its filename. So the final code goes here:
import os
import glob
from PIL import Image
files = glob.glob('/Users/mac/PycharmProjects/crop001/1/*.jpg')
DESTINATION_PATH = '/Users/mac/PycharmProjects/crop001/1/11/' # The preferred path for saving the processed image
for f in files:
img = Image.open(f)
img_resize = img.resize((int(img.width / 2), int(img.height / 2)))
base_filename = os.path.basename(f)
title, ext = os.path.splitext(base_filename)
final_filepath = os.path.join(DESTINATION_PATH, title + '_half' + ext)
img_resize.save(final_filepath)
All of the function's docs I used here can be found here:
https://docs.python.org/3/library/os.path.html
In below code all the output files are getting written into T1 folder. How to separate those output files into sub folders, with the same name as original sub folders (where the original csv files were) ? Thanks
import pandas as pd
import numpy as np
import glob
import os
path = '/root/Desktop/TT1/'
mystep = 0.4
#define the function
def data_splitter(df, name):
max_time = df['Time'].max() # get max value of Time for the current csv file (df)
myrange= np.arange(0, max_time, mystep) # build the threshold range
for k in range(len(myrange)):
# build the upper values
temp = df[(df['Time'] >= myrange[k]) & (df['Time'] < myrange[k] + mystep)]
temp.to_csv("/root/Desktop/T1/{}_{}.csv".format(name, k))
# use os.walk(path) on the main path to get ALL subfolders inside path
for root,dirs,_ in os.walk(path):
for d in dirs:
path_sub = os.path.join(root,d) # this is the current subfolder
for filename in glob.glob(os.path.join(path_sub, '*.csv')):
df = pd.read_csv(filename)
name = os.path.split(filename)[1] # get the name of the current csv file
data_splitter(df, name)
This should help
Demo:
import pandas as pd
import numpy as np
import glob
import os
path = '/root/Desktop/TT1/'
mystep = 0.4
#define the function
def data_splitter(df, name, dest_folder):
max_time = df['Time'].max() # get max value of Time for the current csv file (df)
myrange= np.arange(0, max_time, mystep) # build the threshold range
basepath = "/root/Desktop/"
for k in range(len(myrange)):
# build the upper values
temp = df[(df['Time'] >= myrange[k]) & (df['Time'] < myrange[k] + mystep)]
dest_f = os.path.join(basepath, dest_folder)
if not os.path.isdir(dest_f):
os.mkdir(dest_f)
temp.to_csv(os.path.join(dest_f, "{}_{}.csv".format(name, k)))
# use os.walk(path) on the main path to get ALL subfolders inside path
for root,dirs, files in os.walk(path):
for f in files:
if f.endswith(".csv"):
filename = os.path.join(root, f)
df = pd.read_csv(filename)
name = os.path.split(os.path.basename(filename))[1]
dest_folder = os.path.basename(os.path.dirname(filename))
data_splitter(df, name, dest_folder)
A similar approach should work here:
import pandas as pd
import numpy as np
import glob
import os
input_root = '/root/Desktop/TT1'
output_root = '/root/Desktop/T1'
mystep = 0.4
#define the function
def data_splitter(input_file, output_path, output_basename):
df = pd.read_csv(input_file)
max_time = df['Time'].max() # get max value of Time for the current csv file (df)
myrange = np.arange(0, max_time, mystep) # build the threshold range
for k in range(len(myrange)):
# build the upper values
temp = df[(df['Time'] >= myrange[k]) & (df['Time'] < myrange[k] + mystep)]
temp.to_csv(os.path.join(output_path, f"{output_basename}_{k}.csv"))
# use os.walk(path) on the main path to get ALL subfolders inside path
for dirpath, dirnames, filenames in os.walk(input_root):
for filename in filenames:
if filename.lower().endswith('.csv'):
input_file = os.path.join(dirpath, filename)
sub_folders = dirpath[len(input_root)+1:]
output_path = os.path.join(output_root, sub_folders)
os.makedirs(output_path, exist_ok=True) # Ensure the output folder exists
output_basename = os.path.join(output_path, os.path.splitext(filename)[0] + '.csv')
data_splitter(input_file, output_path, output_basename)
This should result with the folder structure recreated at your output root folder.
Below code works fine if the images are in a single directory.
from skimage import io, img_as_float
import matplotlib.image as mpimg
import os
import pandas as pd
def load_images(folder):
images = []
data = []
for filename in os.listdir(folder):
img = mpimg.imread(os.path.join(folder, filename))
img = img_as_float(img)
if img is not None:
images.append(img)
if(np.mean(img) < 0.1):
print filename
data.append((folder, filename))
df = pd.DataFrame(data, columns=['Folder', 'File'])
return df
df = load_images('C:/Data/Testing')
df.to_csv('final.csv')
If my 'C:/Data/Testing' folder has many sub folders and sub folders have the image files, how to deal with it?
In case if someone is viewing this question, here's the answer to it:
from skimage import io, img_as_float
import matplotlib.image as mpimg
import os
import pandas as pd
def load_images(n):
images = []
data = []
for dirs in n:
for filename in os.listdir(dirs):
img = mpimg.imread(os.path.join(dirs, filename))
img = img_as_float(img)
if img is not None:
images.append(img)
if(np.mean(img) < 0.1):
print filename
data.append((dirs, filename))
df = pd.DataFrame(data, columns=['Folder', 'File'])
return df
from glob import glob
folder = glob("C:/Data/Testing/*/")
df = load_images(folder)
I hope this helps.
Loading and saving images in OpenCV is quite limited, so... what is the preferred ways to load all images from a given folder? Should I search for files in that folder with .png or .jpg extensions, store the names and use imread with every file? Or is there a better way?
Why not just try loading all the files in the folder? If OpenCV can't open it, oh well. Move on to the next. cv2.imread() returns None if the image can't be opened. Kind of weird that it doesn't raise an exception.
import cv2
import os
def load_images_from_folder(folder):
images = []
for filename in os.listdir(folder):
img = cv2.imread(os.path.join(folder,filename))
if img is not None:
images.append(img)
return images
I used skimage. You can create a collection and access elements the standard way, i.e. col[index]. This will give you the RGB values.
from skimage.io import imread_collection
#your path
col_dir = 'cats/*.jpg'
#creating a collection with the available images
col = imread_collection(col_dir)
import glob
cv_img = []
for img in glob.glob("Path/to/dir/*.jpg"):
n= cv2.imread(img)
cv_img.append(n)`
If all images are of the same format:
import cv2
import glob
images = [cv2.imread(file) for file in glob.glob('path/to/files/*.jpg')]
For reading images of different formats:
import cv2
import glob
imdir = 'path/to/files/'
ext = ['png', 'jpg', 'gif'] # Add image formats here
files = []
[files.extend(glob.glob(imdir + '*.' + e)) for e in ext]
images = [cv2.imread(file) for file in files]
you can use glob function to do this. see the example
import cv2
import glob
for img in glob.glob("path/to/folder/*.png"):
cv_img = cv2.imread(img)
You can also use matplotlib for this, try this out:
import matplotlib.image as mpimg
def load_images(folder):
images = []
for filename in os.listdir(folder):
img = mpimg.imread(os.path.join(folder, filename))
if img is not None:
images.append(img)
return images
import os
import cv2
rootdir = "directory path"
for subdir, dirs, files in os.walk(rootdir):
for file in files:
frame = cv2.imread(os.path.join(subdir, file))
To add onto the answer from Rishabh and make it able to handle files that are not images that are found in the folder.
import matplotlib.image as mpimg
images = []
folder = './your/folder/'
for filename in os.listdir(folder):
try:
img = mpimg.imread(os.path.join(folder, filename))
if img is not None:
images.append(img)
except:
print('Cant import ' + filename)
images = np.asarray(images)
Here is a simple script that feature opencv, scikit image, and glob
#!C:\Users\test\anaconda3\envs\data_aquisition\python.exe
import glob
import argparse
from timeit import default_timer as timer
import skimage
from skimage.io import imread_collection
import cv2
def get_args():
parser = argparse.ArgumentParser(
description='script that test the fastest image loading methods')
parser.add_argument('src_path', help = "diractorry that contains the ims")
parser.add_argument('extension', help = "extension of the images",choices=['jpg','png','webp'])
return parser.parse_args()
def load_imgs_scikit_image_collection(path:str):
#creating a collection with the available images
col = imread_collection(path)
print('loaded: ',len(col),' imgs')
return col
def load_imgs_scikit_image_glob(path):
imgs = []
for img in glob.glob(path):
imgs.append(skimage.io.imread(img))
return imgs
def load_image_opencv(path:str):
imgs = []
for f in glob.glob(path):
imgs.extend(cv2.imread(f))
return imgs
def load_image_opencv_glob(path:str):
filenames = glob.glob(path)
filenames.sort()
images = [cv2.imread(img) for img in filenames]
return images
def laod_images_opencv_extisions(path):
ext = [".jpg",".gif",".png",".tga",".webp"] # Add image formats here
files = []
images = []
[files.extend(glob.glob(path + '/*' + e)) for e in ext]
images.extend([cv2.imread(file) for file in files])
return images
def laod_images_ski_extisions(path):
ext = [".jpg",".gif",".png",".tga",".webp"] # Add image formats here
files = []
images = []
[files.extend(glob.glob(path + '/*' + e)) for e in ext]
images.extend([skimage.io.imread(file) for file in files])
return images
def show_image(img):
window_name = 'image'
cv2.imshow(window_name, img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def main():
args = get_args()
dir = args.src_path+'/*.'+args.extension
start = timer()
imgs=load_imgs_scikit_image_collection(dir)
end = timer()
print('scikit_image image collection',end - start) #time 0.08381089999999991
show_image(imgs[2])
start = timer()
load_imgs_scikit_image_glob(dir)
end = timer()
print('scikit_image and glob',end - start) #time 16.627431599999998
# dir = args.src_path+'\\.*'+args.extension
start = timer()
imgs_opencv = load_image_opencv_glob(dir) #time 10.9856656
end = timer()
print('opencv glob',end - start)
show_image(imgs_opencv[2])
start = timer()
valid_imgs_opencv = laod_images_opencv_extisions(args.src_path) #time 11.318516700000004
end = timer()
print('opencv glob extensions',end - start)
show_image(valid_imgs_opencv[2])
start = timer()
valid_imgs_opencv = laod_images_ski_extisions(args.src_path) #time 15.939870800000001
end = timer()
print('scikit_image glob extensions',end - start)
show_image(valid_imgs_opencv[2])
main()
Command to run script: python best_image_loader.py D:\data\dataset\radar_dome\manual png
png is used to load only png files.
Output
loaded: 876 imgs
scikit_image image collection 0.08248239999999996
scikit_image and glob 14.939381200000001
opencv glob 10.9708085
opencv glob extensions 10.974014100000005
scikit_image glob extensions 14.877048600000002
your_path = 'your_path'
ext = ['*.jpg', '*.png', '*.gif'] # Add image formats here
images = []
not_copy = 0
for item in [your_path + '/' + e for e in ext]:
images += glob(item)