How to set a path for directory? - python

I'm getting an error:
Not a directory: '/root/.keras/datasets/mask_detection_dataset/train_set/without_mask'
Here is my code
_URL = 'https://docs.google.com/uc?export=download&id=1xdjxPV9hT-p9pFZJllpv-3Yjnl3k3JIz'
zip_dir = tf.keras.utils.get_file('mask_detection_dataset', origin = _URL, extract =True)
base_dir = os.path.join(os.path.dirname(zip_dir),'mask_detection_dataset')
train_dir = os.path.join(base_dir,'train_set')
validation_dir = os.path.join(base_dir,'validation_set')
train_withoutmask_dir = os.path.join(train_dir,'without_mask')
train_withmask_dir = os.path.join(train_dir,'with_mask')
train_impropermask_dir = os.path.join(train_dir,'improper_mask')
validation_withoutmask_dir = os.path.join(validation_dir,'without_mask')
validation_withmask_dir = os.path.join(validation_dir,'with_mask')
validation_impropermask_dir = os.path.join(validation_dir,'improper_mask')
num_withoutmask_tr = len(os.listdir(train_withoutmask_dir))
num_withmask_tr = len(os.listdir(train_withmask_dir))
num_impropermask_tr = len(os.listdir(train_impropermask_dir))
total_tr_imgs = num_withoutmask_tr + num_withmask_tr + num_impropermask_tr
print('Total training without mask images : ', num_withoutmask_tr )
print('Total training with mask images : ', num_withmask_tr )
print('Total training with improper mask images :', num_impropermask_tr )
print('Total training images : ', total_tr_imgs)
Error :

Related

Convert image to numpy dataset for tesseract ocr training

I am trying to create a dataset for tesseract. But unable to do so. The following code should output a csv file containing the image path and image label feature and .npz file. But the code does append any files in the csv
import numpy as np
import os
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import pandas as pd
image_dataset_dir = "datasets/images"
new_dataset_folder = "datasets/new"
dataset = {
"image" :[],
"label" : []
}
for label in os.listdir(image_dataset_dir):
images_dir= image_dataset_dir + "/" + label
if not os.path.isdir(images_dir):
continue
for image_file in os.listdir(images_dir):
# if not image_file.endswith(".jpg", ".png",".tiff"):
# continue
img = load_img(os.path.join(image_dataset_dir, label, image_file))
x = img_to_array(img)
rel_path = label + "/" + os.path.splitext(image_file)[0] + '.npz'
os.makedirs(new_dataset_folder + "/" + label, exist_ok=True)
npz_file = os.path.join(new_dataset_folder, rel_path)
np.savez(npz_file, x)
# print(rel_path)
dataset["image"].append(rel_path)
dataset["label"].append(label)
df = pd.DataFrame(dataset)
df.to_csv(os.path.join(new_dataset_folder, "train.csv"), index=False)
print('Dataset converted to npz and saved here at %s '%new_dataset_folder)
df.head()
Your objective, create files and save the output and their values.
.npz is none public zones, try using it with different backgrounds matching patterns.
Sample: Using Pandas ( data frame as your requirements ) and Tensorflow
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
BATCH_SIZE = 1
IMG_SIZE = (32, 32)
new_dataset_folder = "F:\\temp\\Python\\excel"
PATH = 'F:\\datasets\\downloads\\cats_name'
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir, shuffle=True,
batch_size=BATCH_SIZE, image_size=IMG_SIZE)
class_names = train_dataset.class_names
print( 'class_names: ' + str( class_names ) )
print( train_dataset )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Dataset
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
dataset = {
"image" :[],
"label" : []
}
file_order = 0
for data in train_dataset :
file_path = new_dataset_folder + "\\" + str(int(data[1][0])) + ".npz"
dataset["image"].append(file_path)
dataset["label"].append(str(int(data[1][0])))
# Save
encoding = "utf-8"
with open( new_dataset_folder + "\\" + str(file_order), "wb" ) as f:
f.write(str(data[0]).encode(encoding))
file_order = file_order + 1
df = pd.DataFrame(dataset)
df.to_csv(os.path.join(new_dataset_folder, "train.csv"), index=False)

there is an error in my code 'WinError 267 The directory name is invalid'

I tried this code in jupyter notebook, and this error
# Set Directory
# -------------
BASE_DIR = "D:\Semester8\Skripsi\data\data_latih_image"
TRAIN_DIR = os.path.join(BASE_DIR, (os.listdir(BASE_DIR)[1]+'/'))
VAL_DIR = os.path.join(BASE_DIR, (os.listdir(BASE_DIR)[2]+'/'))
# Check Image Shape Data Training
# -------------------------------
kelas = os.listdir(TRAIN_DIR)[0] # [0] = 1L
fpath = os.path.join(TRAIN_DIR, (kelas+'/'))
fname = os.listdir(fpath)[0] # File Name
fdir = fpath+fname # File Directory
shape_old = cv2.imread(fdir).shape # Shape (tuple)
x = list(shape_old)
x[0] = x[1] = 227 # Reshape (227,227,3)
input_shape = tuple(x) # Input Shape
print('File Directory :', fdir)
print('File Name :', fname)
print('Image Shape :', shape_old, '==> Original')
print('Image Reshape :', input_shape, '==> AlexNet', '\n')
target_size = x = list(input_shape)
target_size.remove(3)
target_size = tuple(target_size) # Target Size
print('Target Size :', target_size)
print('Kelas :', kelas)
io.imshow(transform.resize(io.imread(fdir), (x[0], x[1])))
io.show()
error code:
NotADirectoryError: [WinError 267] The directory name is invalid: 'D:\Semester8\Skripsi\data\data_latih_image\infeksi/001-I00079.jpg/'

Getting keyError in my code. I am written a code but in this code i am getting keyerror:1

Path given to the directory
raw = './data/complete_ms_data/'
train_raw_dir = './data/train/'
test_raw_dir = './data/test/'
Number of raw train images
num_train = 20
Now there is problem in given fuction:
def makedir(new_dir):
if not os.path.exists(new_dir):
os.makedirs(new_dir)
if __name__ == '__main__':
if not os.path.exists(train_raw_dir):
makedir(train_raw_dir)
if not os.path.exists(test_raw_dir):
makedir(test_raw_dir)
print('begin processing...')
i = 0
roots = dict()
for root, dirs, files in os.walk(raw):
roots[i] = root
i += 1
del roots[0]
for i in range(0, 64):
if i%2:
del roots[i]
roots = list(roots.values()) # dict --> list
# random.Random(487).shuffle(roots)
img = np.zeros([31, 512, 512]) # 31
for i, root in enumerate(sorted(roots)):
# print(i, root)
png_list = sorted(glob.glob(root+'/*.png'))
# print(png_list)
for j, path in enumerate(png_list):
img[j, :, :] = cv2.imread(path, 0)
img = img.astype(np.uint8)
if i < num_train:
tiff.imsave(train_raw_dir + str(i) + '.tif', img)
else:
tiff.imsave(test_raw_dir + str(i - 20) + '.tif', img)
scio.savemat(test_raw_dir + str(i - 20) + '.mat', {'img':img})
print('Done')
I am written this code for training and testing for the data. I want to train a images
I am getting error in line 49:
del roots[i]
KeyError: 1
Where I am doing wrong?

OpenCV - How to deal with an unbalanced dataset when developing an image classifier using a Bag of Visual Words Model?

My dataset is represented by a csv file with two attributes: an image path and its label. I have dozens of different labels but the label '51' represents around 34% of the dataset and the label '13' represents around 41%, so just these two labels represent 3/4 of the entire dataset and my classifier ends up classifying everything as '13' (I think I've never seen it classify anything as '51'. Could this be a problem?). How can I deal with this?
I'll leave the code that I currently have here:
from cv2 import cv2 as cv
import numpy as np
import sys
sys.path.extend(['../../'])
from src import utils
if __name__ == '__main__':
DICTIONARY_SIZE = 50
TRAIN_SIZE = 100
TEST_SIZE = 100
DETECTOR = cv.KAZE_create()
MATCHER = cv.FlannBasedMatcher()
EXTRACTOR = cv.BOWImgDescriptorExtractor(DETECTOR, MATCHER)
TRAINER = cv.BOWKMeansTrainer(DICTIONARY_SIZE)
SVM = cv.ml.SVM_create()
SVM.setType(cv.ml.SVM_C_SVC)
SVM.setKernel(cv.ml.SVM_LINEAR)
SVM.setTermCriteria((cv.TERM_CRITERIA_MAX_ITER, 100, 1e-6))
print("Generating Training and Test Sets...")
train, test = utils.getTrainingAndTestSets('multiclass.csv', TRAIN_SIZE, TEST_SIZE)
print("Generating Dictionary...")
for train_entry in train:
img_path = train_entry[0]
img = cv.imread(img_path)
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
keypoint, descriptors = DETECTOR.detectAndCompute(img, None)
if descriptors is not None:
TRAINER.add(descriptors)
EXTRACTOR.setVocabulary(TRAINER.cluster())
print("Preparing Training Data...")
train_desc = []
train_labels = []
for train_entry in train:
img_path = train_entry[0]
img_label = int(train_entry[1])
img = cv.imread(img_path)
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
descriptor = EXTRACTOR.compute(img, DETECTOR.detect(img))
if descriptor is not None:
train_desc.extend(descriptor)
train_labels.append(img_label)
print("Training...")
SVM.train(np.array(train_desc), cv.ml.ROW_SAMPLE, np.array(train_labels))
correct_predictions = 0
samples_tested = len(test)
print("Testing...")
for test_entry in test:
img_path = test_entry[0]
real_attribute_id = int(test_entry[1])
img = cv.imread(img_path)
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
feature = EXTRACTOR.compute(img, DETECTOR.detect(img))
try:
_, prediction = SVM.predict(feature)
predicted_attribute_id = int(prediction[0][0])
if predicted_attribute_id == real_attribute_id:
print("CORRECT PREDICTION! :)")
correct_predictions += 1
else:
print("INCORRECT PREDICTION... :(")
print("Predicted Label: " + utils.getLabelFromAttributeID(predicted_attribute_id) + "(" + str(predicted_attribute_id) + ")")
print("Real Label: " + utils.getLabelFromAttributeID(real_attribute_id) + "(" + str(real_attribute_id) + ")")
except Exception:
samples_tested -= 1
correct_percentage = (correct_predictions / samples_tested) * 100
print("Test Results: " + "{:.2f}".format(correct_percentage) + "% Correct Predictions.")
Feel free to tell me if my current approach has any error. Thanks.

How to produce .npy features correctly in sound_classification_ml_production?

I ran the github repo code sound_classification_ml_production and got good accuracy, but found that .npy features are pre produced by author. So I want produce my own .npy and add code blow:
# FeatureExtractor class including librosa audio processing functions
class FeatureExtractor:
...
def compute_save_features(self,
mode='mfcc',
sample_rate=22050,
n_fft=2048,
hop_length=512,
n_mfcc=40,
output_path='features',
deltas=False
):
dataset_features = []
max_pad = self._compute_max_pad_length(self.max_audio_duration,
sample_rate=sample_rate,
n_fft=n_fft,
hop_length=hop_length)
print('Max Padding = ', max_pad)
if not os.path.exists(output_path):
print('Creating output folder: ', output_path)
os.makedirs(output_path)
else:
print('Output folder already existed')
print('Saving features in ', output_path)
i = 0
t = time.time()
features_path = []
for relative_filepath in self.dataset_df['filepath']:
filepath = base_path + relative_filepath;
print('compute_save_features, filepath = ' + str(filepath))
if i % 100 == 0:
print('{} files processed in {}s'.format(i, time.time() - t))
print('compute_save_features, librosa.load, filepath = ' + str(filepath))
audio_file, sample_rate = librosa.load(filepath, sr=sample_rate, res_type='kaiser_fast')
if mode == 'mfcc':
audio_features = self.compute_mfcc(audio_file, sample_rate, n_fft, hop_length, n_mfcc, deltas)
elif mode == 'stft':
audio_features = self.compute_stft(audio_file, sample_rate, n_fft, hop_length)
elif mode == 'mel-spectogram':
audio_features = self.compute_mel_spectogram(audio_file, sample_rate, n_fft, hop_length)
audio_features = np.pad(audio_features, pad_width=((0, 0), (0, max_pad - audio_features.shape[1])))
print('compute_save_features, audio_features = ' + str(type(audio_features)) + ', ' + str(audio_features))
# here I add code and save audio_features to my .npy
npy_path = os.path.join(output_path, filepath.split('/')[-1].replace('wav', 'npy'))
print('compute_save_features, npy_path = ' + str(npy_path))
np.save(npy_path, audio_features)
...
fe = FeatureExtractor(base_path + 'UrbanSound8K/metadata/UrbanSound8K.csv')
dataset_df = fe.compute_save_features(mode='mfcc', n_mfcc=13, output_path=base_path + 'my_features_mfcc', deltas=True)
...
after compared with author's origin .npy, there is so many diffs:
Please anyone could tell me that how to produce exactly the same .npy features as in repo code show, thanks in advance.
Keras layers are initialized randomly. To reproduce the same result you have to set the same seed. If you don't have a seed value - you have no chance to reproduce.

Categories

Resources