why I'm not getting the right shape - python

import numpy as np
# load all images in a directory
from os import listdir
from matplotlib import image
# load all images in a directory
loaded_images = list()
path='/content/IDC_regular_ps50_idx5'
for big_folder in listdir(path):
for folder in listdir(path + '/' + big_folder ):
for filename in listdir(path + '/' + big_folder + '/' + folder):
# load image
img_data = image.imread(path + '/' + big_folder +'/' + folder + '/' +filename)
# store loaded image
loaded_images.append(np.vstack(img_data))
#print('> loaded %s %s' % (filename, img_data.shape))
when i press np.array(loaded_images).shape I'm getting (277524) which is the numbers of images , what I desire is the 4D (277524 , 3 , 50 , 50 ) .
note : when I do loaded_images[i].shape I get (3 , 50 ,50 )

Related

Convert image to numpy dataset for tesseract ocr training

I am trying to create a dataset for tesseract. But unable to do so. The following code should output a csv file containing the image path and image label feature and .npz file. But the code does append any files in the csv
import numpy as np
import os
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import pandas as pd
image_dataset_dir = "datasets/images"
new_dataset_folder = "datasets/new"
dataset = {
"image" :[],
"label" : []
}
for label in os.listdir(image_dataset_dir):
images_dir= image_dataset_dir + "/" + label
if not os.path.isdir(images_dir):
continue
for image_file in os.listdir(images_dir):
# if not image_file.endswith(".jpg", ".png",".tiff"):
# continue
img = load_img(os.path.join(image_dataset_dir, label, image_file))
x = img_to_array(img)
rel_path = label + "/" + os.path.splitext(image_file)[0] + '.npz'
os.makedirs(new_dataset_folder + "/" + label, exist_ok=True)
npz_file = os.path.join(new_dataset_folder, rel_path)
np.savez(npz_file, x)
# print(rel_path)
dataset["image"].append(rel_path)
dataset["label"].append(label)
df = pd.DataFrame(dataset)
df.to_csv(os.path.join(new_dataset_folder, "train.csv"), index=False)
print('Dataset converted to npz and saved here at %s '%new_dataset_folder)
df.head()
Your objective, create files and save the output and their values.
.npz is none public zones, try using it with different backgrounds matching patterns.
Sample: Using Pandas ( data frame as your requirements ) and Tensorflow
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
BATCH_SIZE = 1
IMG_SIZE = (32, 32)
new_dataset_folder = "F:\\temp\\Python\\excel"
PATH = 'F:\\datasets\\downloads\\cats_name'
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir, shuffle=True,
batch_size=BATCH_SIZE, image_size=IMG_SIZE)
class_names = train_dataset.class_names
print( 'class_names: ' + str( class_names ) )
print( train_dataset )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Dataset
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
dataset = {
"image" :[],
"label" : []
}
file_order = 0
for data in train_dataset :
file_path = new_dataset_folder + "\\" + str(int(data[1][0])) + ".npz"
dataset["image"].append(file_path)
dataset["label"].append(str(int(data[1][0])))
# Save
encoding = "utf-8"
with open( new_dataset_folder + "\\" + str(file_order), "wb" ) as f:
f.write(str(data[0]).encode(encoding))
file_order = file_order + 1
df = pd.DataFrame(dataset)
df.to_csv(os.path.join(new_dataset_folder, "train.csv"), index=False)

Trying to divide images in multiple folders to train and test folder, but code is only reading/copying last folder

My current dataset has images inside multiple folders, labeled by class. I want to create a 'train' and 'test' folder, and class folders within those directories. Then, I want to put 70% of the images in the 'train' folders and 30% of the images in the 'test' folder, like so:
Train Folder
Beans folder:
img1
img2
...
Cake folder:
img1
...
My code to do this right now is this (I'm testing on a tiny dataset right now):
classes = ('BEANS', 'CAKE') #'Candy', 'Cereal', 'Chips', 'Chocolate',
# 'Coffee', 'Corn', 'Fish', 'Flour', 'Honey', 'Jam', 'Juice',
# 'Milk', 'Nuts', 'Oil', 'Pasta', 'Rice', 'Soda', 'Spices',
# 'Sugar', 'Tea', 'Tomato Sauce', 'Vinegar', 'Water')
# create sub-folders for each class
OUTPATH = 'C:\\Users\\User\\Documents\\Dataset\\freiburg_groceries_dataset\\sets'
for x in classes:
os.makedirs(OUTPATH+'\\train\\'+x, exist_ok=True)
os.makedirs(OUTPATH+'\\test\\'+x, exist_ok=True)
INPATH = 'C:\\Users\\User\\Documents\\Dataset\\freiburg_groceries_dataset\\imgs'
filenames = os.listdir(INPATH + '\\' + x)
counts = {x:0 for x in classes}
for x in classes:
print(len(filenames))
print(filenames)
testset = len(filenames) / 10 * 0.3 # 30%
for fl in filenames:
for cl in classes:
if cl in fl:
counts[cl] += 1 # increase count +1
if counts[cl] < testset:
shutil.move(INPATH + '\\' + x + '\\' + fl, OUTPATH+'\\test\\'+cl+'\\'+fl)
else:
shutil.move(INPATH + '\\' + x + '\\' + fl, OUTPATH+'\\train\\'+cl+'\\'+fl)
My code creates the folders I need, but then it only reads the CAKE folder and ignores the BEANS folder. It also moves all the cake images to the train folder and leaves the cake-->test folder blank, and does not move any of the BEAN images. Can anyone see where my code is failing to follow the steps to work with the BEANS folder, and to move 30% of cake images to the test folder?
I think there are two places where your code is not functioning as intended.
OUTPATH = 'C:\\Users\\User\\Documents\\Dataset\\freiburg_groceries_dataset\\sets'
for x in classes:
os.makedirs(OUTPATH+'\\train\\'+x, exist_ok=True)
os.makedirs(OUTPATH+'\\test\\'+x, exist_ok=True)
INPATH = 'C:\\Users\\User\\Documents\\Dataset\\freiburg_groceries_dataset\\imgs'
filenames = os.listdir(INPATH + '\\' + x) # <------ source of error no. 1 `x` is already defined in the loop and holds the last value that it contained i.e. `cake`
corrections needed
filenames = []
for x in classes:
filenames.extend(os.listdir(INPATH + '\\' + x))
and the next one at
for x in classes:
print(len(filenames))
print(filenames)
testset = len(filenames) / 10 * 0.3 # <----- source of error no.2 This is not 30% rather 3 percent only (don't divide by 10).
Edit: (the following would be much more simple solution you would be looking for)
classes = ('BEANS', 'CAKE')
# create sub-folders for each class
OUTPATH = 'C:\\Users\\User\\Documents\\Dataset\\freiburg_groceries_dataset\\sets'
INPATH = 'C:\\Users\\User\\Documents\\Dataset\\freiburg_groceries_dataset\\imgs'
for class in classes:
train_path = OUTPATH + '\\train\\' + class + '\\'
test_path = OUTPATH + '\\test\\' + class + '\\'
in_path = INPATH + '\\' + class + '\\'
os.makedirs(train_path , exist_ok=True)
os.makedirs(test_path, exist_ok=True)
filenames = os.listdir(in_path)
no_of_pictures = len(filenames)
test_set = int(len(filenames) * 0.3)
train_set= len(filenames) - test_set
test_imgs = filenames[:test_set]
train_imgs = filenames[test_set:]
for test_img in test_imgs:
shutil.move( inpath + test_img, test_path + test_img)
for train_img in train_imgs:
shutil.move( inpath + train_img, train_path + train_img)

How to add (merge) multiple images in one directory

I have two types (A and B) of images in one directory and I want to add them together (not concatenate) them like this:
A1.jpeg + B1.jpeg = Merged1.jpeg
A2.jpeg + B2.jpeg = Merged2.jpeg
      ...
AN.jpeg + BN.jpeg = MergedN.jpeg
I don't know how to customize my code so it would work for the whole directory:
import cv2
import os
for i,filenames in os.walk('.'):
A1 = cv2.imread('A1.jpeg',0)
B1 = cv2.imread('B1.jpeg',0)
image = cv2.add([A1,B1])
filename = ('Merged' + {i} + '.jpeg')
cv2.imwrite(filename, image)
Any ideas? Thanks
EDIT:
I added counter in for loop, because you cannot define for loop in the way I did before.
import cv2
import os
i=0
for filenames in os.walk('.'):
i = i + 1
A = "A" + str(i) + ".jpeg"
B = "B" + str(i) + ".jpeg"
Ai = cv2.imread(A,0)
Bi = cv2.imread(B,0)
image = cv2.add([Ai,Bi])
filename = ('Merged' + str(i) + '.jpeg')
cv2.imwrite(filename, image)
But it only adds A1 and B1. Is this a wrong way to count in for loop?
Just change the filename in imread with a custom variable which you can use i in.
A = "A" + str(i) + ".jpeg"
B = "B" + str(i) + ".jpeg"
Ai = cv2.imread(A,0)
Bi = cv2.imread(B,0)
I assumed that i is a number.
You can use glob for that it will be easier i think
import glob
for i, a_file in enumerate(glob.glob('./A*')):
A = cv2.imread(a_file,0)
B = cv2.imread(a_file.replace('A', 'B'),0)
image = cv2.add([A,B])
filename = ('Merged' + {i} + '.jpeg')
cv2.imwrite(filename, image)
I solved it like this:
import glob
import cv2
number_of_images = 750
for a_file in glob.glob('./A*'):
for i in range(1,number_of_images):
A = "A" + str(i) + ".jpeg"
B = "B" + str(i) + ".jpeg"
A1 = cv2.imread(A,0)
B1 = cv2.imread(B,0)
image = cv2.add([A1,B1])
filename = ('Merged' + str(i) + '.jpeg')
cv2.imwrite(filename, image)
print(i)
if i==number_of_images-1:
break
Thanks for your advices!

Python Permission Error [WinError32]...being used by another process

I have some images in a folder and if their dimensions are not what I want, I copy them to another folder and then resize them using the thumbnail command. After I've done that I want to rename them and as part of their new name I want to include the pixel sizes.
Anyway I've tried my code and I run into the following error:
File "C:/Anaconda/PhD/Scikit-Learn/Image Resizing3.py", line 55, in resize_img
os.rename(image_directory + '/Resized'+ "/" + image_filenames[i],image_directory + '/Resized'+ "/" + image_filenames[i][0:(len(image_filenames[i])-4)]+ str(img1.size[0]) + ' x ' + str(img1.size[1]) + '.jpg' )
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process:
I thought if I used the with keyword that it would close any processes I was using.
If anyone can see any problems in my code wrt to the error then I'd really appreciate it if you have a solution for it.
import scipy.io as sio
from matplotlib import pyplot as plt
from matplotlib import image as mpimg
from PIL import Image
from collections import OrderedDict
import shutil as sh
import os
image_directory = 'C:/StreetView/Labelled Images with Classes/Folder1 /Labelled/Classes/wood'
def resize_img(image_directory, pixel_width, pixel_height):
image_filenames = []
DIR = os.listdir(image_directory)
for n in DIR:
if n.endswith('.jpg'):
image_filenames.append(n)
image_sizes = []
images = []
for i in range(len(image_filenames)):
with Image.open(image_directory + '/' + image_filenames[i]) as images:
image_sizes.append(tuple(images.size))
ordered_imsize = tuple(OrderedDict.fromkeys(image_sizes))
pixarea = []
for i in range(len(ordered_imsize)):
arg1 = ordered_imsize[i][0]
arg2 = ordered_imsize[i][1]
pixarea.append(arg1 * arg2)
print('The maximum pixel area is', max(pixarea))
print('The image dimensions giving the largest area is:', ordered_imsize[pixarea.index(max(pixarea))] )
print('The minimum pixel area is', min(pixarea))
print('The image dimensions giving the smallest area is:', ordered_imsize[pixarea.index(min(pixarea))] )
if not os.path.exists(image_directory + '/Resized'):
os.mkdir(image_directory + '/Resized') # Then make the folder
for i in range(len(image_directory)):
if (image_sizes[i][0] >= 100) or (image_sizes[i][1] >= 400):
print('Inside the greater than loop')
sh.copy(image_directory + "/" + image_filenames[i],image_directory + '/Resized')
image_sizes1 = []
with Image.open(image_directory + '/Resized'+ "/" + image_filenames[i]) as img1: # need to use the with keyword because this makes sure the file is closed after its been used so it doesn't cause any conflicts like it did when it wasn't used.
print('Checking size: ', img1.size)
img2 = img1.thumbnail((pixel_width,pixel_height)) # img1 is not the thumbnail
image_sizes1.append(tuple(img1.size))
os.rename(image_directory + '/Resized'+ "/" + image_filenames[i],image_directory + '/Resized'+ "/" + image_filenames[i][0:(len(image_filenames[i])-4)]+ str(img1.size[0]) + ' x ' + str(img1.size[1]) + '.jpg' )
print('Image number', i, 'has been resized')
else:
print('inside the else loop')
sh.copy(image_directory + "/" + image_filenames[i],image_directory + '/Resized')
image_sizes2 = []
with Image.open(image_directory + '/Resized'+ "/" + image_filenames[i]) as img3:
image_sizes2.append(tuple(img3.size))
img3.save(image_directory + '/Resized' + "/" + image_filenames[i][0:len(image_filenames[i])-4] + '_' + str(img3.size[0]) + 'x' + str(img3.size[1]) + ".jpg")
#os.remove(image_directory + '/Resized'+ "/" + image_filenames[i])
print('Image number', i, 'has been copied over and not resized')
resize_img(image_directory,100,400)
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process
Solved it, I used shutil.move instead of os.rename.

Script Loop through files in directory

I have the following code which creates the txt file I require from a shp.file with the data I need. I have a folder called profiles containing a few number of shape files named (profil1.shp, profil2.shp, profil3.shp etc.). I was wondering how to create a loop so that the script creates for each file a txt file with the same name (eg. for profil1.shp create profil1.txt, profil2.shp create profil2.txt and so on).
import ogr, os, sys, osr
os.chdir('..\profiles')
file = open('profil1.txt', 'w')
driver = ogr.GetDriverByName('ESRI Shapefile')
datasource = driver.Open('profil1.shp', 0)
if datasource is None:
print 'Could not open file'
sys.exit(1)
layer = datasource.GetLayer()
feature = layer.GetNextFeature()
while feature:
id = feature.GetFieldAsString('ID')
Distanta = feature.GetFieldAsString('DIST')
Z = feature.GetFieldAsString('Z')
geom = feature.GetGeometryRef()
x = str(geom.GetX())
y = str(geom.GetY())
file.write(id + " " + Distanta + " " + "[X]:" + " " + x + ' ' + '[Y]:' + " " + y + " " + " " + "[Z]" + Z + " " + "\n")
feature.Destroy()
feature = layer.GetNextFeature()
datasource.Destroy()
file.close()
edit: the code is returning a Could not open file.Photo of the folder containing the files and their respective names. Safe to assume I am doing something wrong.
import ogr, os, sys, osr,os.path
os.chdir = ('C:\Users\Andrei\Desktop\profil3')
l = os.listdir('C:\Users\Andrei\Desktop\profil3')
for i in l:
if i.endswith('.shp'):
s1 = s.split('.')[0] + '.txt'
file = open(s1, 'w')
driver = ogr.GetDriverByName('ESRI Shapefile')
datasource = driver.Open(i, 0)
if datasource is None:
print 'Could not open file'
sys.exit(1)
layer = datasource.GetLayer()
feature = layer.GetNextFeature()
while feature:
id = feature.GetFieldAsString('ID')
Distanta = feature.GetFieldAsString('DIST')
Z = feature.GetFieldAsString('Z')
geom = feature.GetGeometryRef()
x = str(geom.GetX())
y = str(geom.GetY())
file.write(id + " " + Distanta + " " + "[X]:" + " " + x + ' ' + '[Y]:' + " " + y + " " + " " + "[Z]" + Z + " " + "\n")
feature.Destroy()
feature = layer.GetNextFeature()
datasource.Destroy()
file.close()
You can use os.listdir() to list the files and folders in the current directory.
This returns a list of all files in the current directory (or the directory given to it as parameter , if no parameter is specified it checks the current directory) .
Then you can check for files with the name ending with .shp using string.endswith() function and then use that to create your new files.
Example of a small portion -
import os , os.path
l = os.listdir()
for i in l:
if i.endswith('.shp'):
s1 = s.split('.')[0] + '.txt'
At the end s1 would contain the file with extension as .txt .
Then you can do your logic on this file, and keep on doing like this.
Full code would look something like -
import ogr, os, sys, osr,os.path
os.chdir('..\profiles')
l = os.listdir()
for i in l:
if i.endswith('.shp'):
s1 = s.split('.')[0] + '.txt'
file = open(s1, 'w')
driver = ogr.GetDriverByName('ESRI Shapefile')
datasource = driver.Open(i, 0)
if datasource is None:
print 'Could not open file'
sys.exit(1)
layer = datasource.GetLayer()
feature = layer.GetNextFeature()
while feature:
id = feature.GetFieldAsString('ID')
Distanta = feature.GetFieldAsString('DIST')
Z = feature.GetFieldAsString('Z')
geom = feature.GetGeometryRef()
x = str(geom.GetX())
y = str(geom.GetY())
file.write(id + " " + Distanta + " " + "[X]:" + " " + x + ' ' + '[Y]:' + " " + y + " " + " " + "[Z]" + Z + " " + "\n")
feature.Destroy()
feature = layer.GetNextFeature()
datasource.Destroy()
file.close()
A better way of openning files, etc is using with statement. Look up its tutorial here.

Categories

Resources