I have a folder containing 10K tiff files, how can i import all the files using python so that i can do some predictive modelling.
thanks
NK
Use the following approach:
import os
from PIL import Image
import numpy as np
dirname = 'tiff_folder_path'
final = []
for fname in os.listdir(dirname):
im = Image.open(os.path.join(dirname, fname))
imarray = np.array(im)
final.append(imarray)
final = np.asarray(final) # shape = (60000,28,28)
Related
I am trying to take 1967 jpg images to a np array with following codes. How do I keep the original order of the images in the array?
import cv2
import glob
import numpy as np
X_data = []
files = glob.glob ("/content/drive/My Drive/CourseWork/images/emotionet_validation/*.jpg")
for myFile in files:
print(myFile)
image = cv2.imread (myFile)
X_data.append (image)
print('X_data shape:', np.array(X_data).shape)
https://docs.python.org/3/howto/sorting.html
import cv2
import glob
import numpy as np
X_data = []
files = glob.glob ("/content/drive/My Drive/CourseWork/images/emotionet_validation/*.jpg")
sorted(files)
## Do you need sort to ignore case?
## sorted(files, key=str.casefold)
for myFile in files:
print(myFile)
image = cv2.imread (myFile)
X_data.append (image)
print('X_data shape:', np.array(X_data).shape)
*library
there is a mostly known library imported from NumPy and imageio
import NumPy as np
import os
import nibabel as nib
import imageio
// method where I have I write code to convert a nift to png
Method
convert a nift(.nii) image to png image
def nii_to_image(niifile):
filenames = os.listdir(filepath) #read nii folder
slice_trans = []
#filename is the path of nii image
for f in filenames:
#Start reading nii files
img_path = os.path.join(filepath, f)
img = nib.load(img_path) #read nii
img_fdata = img.get_fdata()
fname = f.replace('.nii','')
# Remove the nickname of nii
img_f_path = os.path.join(imgfile, fname)
#Create a folder corresponding to the image of nii
if not os.path.exists(img_f_path):
os.mkdir(img_f_path) #New folder
# to image
(x,y,z) = img.shape
for i in range(z): #x is the sequence of images
silce = img_fdata[i, :, :] #Select which direction the slice can be
imageio.imwrite(os.path.join(img_f_path,'{}.png'.format(i)), silce) #Save image
#main function where fill path was gived
main
if __name__ == '__main__':
filepath = '/content/drive/MyDrive/sem 8/dataset/pr'
imgfile = '/content/drive/MyDrive/sem 8/dataset/propi'
nii_to_image(filepath)
After you load the nifti file as NumPy array as you did, run on every slice (z from img.shape) and then save the array to png.
Make sure that when you run on each slice you save only the existing one (the z_slice_number):
slice = img_fdata[:, :, z_slice_numer]
And to save this slice you can do as follow (or another way from here):
matplotlib.image.imsave('name.png', slice)
I have a folder of dicom images and I stored these images in an array and I would like to print them out in a different folder.
I cannot find a method that will write out each of the images like the cv2.imwrite
import pydicom
import skimage, os
import numpy as np
FolderPathName = r'FolderPathName'
slices = [pydicom.read_file(FolderPathName + imagename) for imagename in os.listdir(FolderPathName)]
# Sort the dicom slices in their respective order
slices.sort(key=lambda x: int(x.InstanceNumber))
for x in range(len(slices)):
#write the images in a new folder
Method 1:
In your case,
The answer is ...
import pydicom
import skimage, os
import numpy as np
FolderPathName = r'FolderPathName'
slices = [pydicom.read_file(FolderPathName + imagename) for imagename in os.listdir(FolderPathName)]
# Sort the dicom slices in their respective order
slices.sort(key=lambda x: int(x.InstanceNumber))
jpg_folder = '' # Set your jpg folder
for idx in range(len(slices)):
#write the images in a new folder
jpg_filepath = os.path.join( jpg_folder, "pic-{}.jpg".format(idx) )
np_pixel_array = slices[idx].pixel_array
cv2.imwrite(jpg_filepath, np_pixel_array)
Method 2:
But, there is better way to process dicom files ...
import pydicom
import os
import numpy as np
import cv2
dicom_folder = '' # Set the folder of your dicom files that inclued images
jpg_folder = '' # Set the folder of your output folder for jpg files
# Step 1. prepare your input(.dcm) and output(.jpg) filepath
dcm_jpg_map = {}
for dicom_f in os.listdir(dicom_folder):
dicom_filepath = os.path.join(dicom_folder, dicom_f)
jpg_f = dicom_f.replace('.dcm', '.jpg')
jpg_filepath = os.path.join(jpg_folder,jpg_f)
dcm_jpg_map[dicom_filepath] = jpg_filepath
# Now, dcm_jpg_map is key,value pair of input dcm filepath and output jpg filepath
# Step 2. process your image by input/output information
for dicom_filepath, jpg_filepath in dcm_jpg_map.items():
# convert dicom file into jpg file
dicom = pydicom.read_file(dicom_filepath)
np_pixel_array = dicom.pixel_array
cv2.imwrite(jpg_filepath, np_pixel_array)
In above code,
the Step 1 is focus on file path processing. It's good for your to porting your code into different environment easily.
The Step 2 is major code which focus on any kind of image processing.
I'm working on a The Japanese Female Facial Expression (JAFFE) Database. You can find the database on this link http://www.kasrl.org/jaffe.html.
When I download the database I got a list of pictures. I would like to convert these image files into a CSV file but I'm still new in deep learning and I don't know how. Someone proposed that I work with OpenCV. what should I do?
i have simple example
i hope this help you.
from PIL import Image
import numpy as np
import sys
import os
import csv
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
# load the original image
myFileList = createFileList('path/to/directory/')
for file in fileList:
print(file)
img_file = Image.open(file)
# get original image parameters...
width, height = img_file.size
format = img_file.format
mode = img_file.mode
# Make image Greyscale
img_grey = img_file.convert('L')
value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
value = value.flatten()
print(value)
with open("img_pixels.csv", 'a') as f:
writer = csv.writer(f)
writer.writerow(value)
Install pillow, numpy, pandas
Convert the image to RGB
plot RGB along with x,y co-ordinates in a pandas Dataframe
Save the dataframe as csv
Sample working code as below
from PIL import Image
from numpy import array, moveaxis, indices, dstack
from pandas import DataFrame
image = Image.open("data.tiff")
pixels = image.convert("RGB")
rgbArray = array(pixels.getdata()).reshape(image.size + (3,))
indicesArray = moveaxis(indices(image.size), 0, 2)
allArray = dstack((indicesArray, rgbArray)).reshape((-1, 5))
df = DataFrame(allArray, columns=["y", "x", "red","green","blue"])
print(df.head())
df.to_csv("data.csv",index=False)
You don't need to write any code, you can just use vips on the command-line on macOS, Linux or Windows.
So, in Terminal (or Command Prompt, if on Windows):
vips im_vips2csv TM.AN1.190.tiff result.csv
will convert the 256x256 greyscale image TM.AN1.190.tiff into a 256 line CSV with 256 entries per line. Simples!
If you want to replace the tab separators by commas, you can do:
tr '\t' , < result.csv > NewFile.csv
I am trying to open a set of images in python, but I am a bit puzzled on how I should do that. I know how to do it with one image, but I don't have a clue on how to handle several hundreds of images.
I have a file folder with a few hundred .jpg images. I want to load them in a python program to do machine learning on them. How can I do this properly?
I don't have any code yet since I am already struggling with this.
But my Idea in pseudocode was
dataset = load(images)
do some manipulations on it
How I have done it before:
from sklearn.svm import LinearSVC
from numpy import genfromtxt,savetxt
load = lambda x: genfromtxt(open(x,"r"),delimiter = ",",dtype = "f8")[1:]
dataset = load("train.csv")
train = [x[1:] for x in dataset]
target = [x[0] for x in dataset]
test = load("test.csv")
linear = LinearSVC()
linear.fit(train,target)
savetxt("digit2.csv",linear.predict(test),delimiter = ",", fmt = "%d")
Which worked fine because of the format. Al the data was in one file.
If you want to process each image individually (assuming you're using PIL or Pillow) then do so sequentially:
import os
from glob import glob
try:
# PIL
import Image
except ImportError:
# Pillow
from PIL import Image
def process_image(img_path):
print "Processing image: %s" % img_path
# Open the image
img = Image.open(img_path)
# Do your processing here
print img.info
# Not strictly necessary, but let's be explicit:
# Close the image
del img
images_dir = "/home/user/images"
if __name__ == "__main__":
# List all JPEG files in your directory
images_list = glob(os.path.join(images_dir, "*.jpg"))
for img_filename in images_list:
img_path = os.path.join(images_dir, img_filename)
process_image(img_path)
Read the documentation on python glob module and in a loop process each of the images in turn.