I'm preparing a set of medical imaging volumes and segmentation masks to be input into a multi-label segmentation neural network for training. I am recieving the following error message when I attempt to load my 5D tensors into a torch TensorDataset:
Traceback (most recent call last):
File (path/project.py), line 122, in <module>
train_dataset = torch.utils.data.TensorDataset(timg, tmask)
File (path/dataset.py), line 365, in __init__
assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors), "Size mismatch between tensors"
File (path/dataset.py)", line 365, in <genexpr>
assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors), "Size mismatch between tensors"
TypeError: 'numpy.int32' object is not callable
My original assumption was that the size mismatch was due to the difference in the dimensions of my tensors -- the feature tensor had dimensions 60x128x128x64x1 and the label tensor had dimensions 60x128x128x64x5. However, making the shape and size of these tensors equal has not resolved the issue. My other theory was that the issue was related to this line of code:
def transt(list):
array = np.asarray(list, ->np.int32<-)
changing the dtype did not seem to have an effect. I also tried casting the tensor into different dtypes, again to seemingly no effect on the problem.
Attached is the code. Unsure of how to proceed and any advice would be very appreciated.
import numpy as np
import os
import tensorflow as tf
import nibabel as nib
import matplotlib.pyplot as plt
from VNet import VNet
import Layers
import torchvision
from torchvision.transforms import ToTensor
import torch
from torch.utils.data import TensorDataset, DataLoader
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
# Setting path
dirname = os.path.dirname(__file__)
path = os.path.join(dirname, 'Liver_MR_Dataset')
# Loading images/masks
img_list = []
gall_list = []
IVC_list = []
kidney_list = []
liver_list = []
rib_list = []
os.chdir(path + '/Image')
image_path = sorted(os.listdir(path + '/Image'))
for image in image_path:
img = nib.load(image).get_fdata()
img_list.append(img)
print(len(img_list))
train_img = img_list[:60]
print(len(train_img))
val_img = img_list[60:]
print(len(val_img))
os.chdir(path + '/Gall')
gall_path = sorted(os.listdir(path + '/Gall'))
for image in gall_path:
mask = nib.load(image).get_fdata()
gall_list.append(mask)
train_gall = gall_list[:60]
val_gall = gall_list[60:]
os.chdir(path + '/IVC')
IVC_path = sorted(os.listdir(path + '/IVC'))
for image in IVC_path:
mask = nib.load(image).get_fdata()
IVC_list.append(mask)
train_IVC = IVC_list[:60]
val_IVC = IVC_list[60:]
os.chdir(path + '/Kidney')
kidney_path = sorted(os.listdir(path + '/Kidney'))
for image in kidney_path:
mask = nib.load(image).get_fdata()
kidney_list.append(mask)
train_kidney = kidney_list[:60]
val_kidney = kidney_list[60:]
os.chdir(path + '/Liver')
liver_path = sorted(os.listdir(path + '/Liver'))
for image in liver_path:
mask = nib.load(image).get_fdata()
liver_list.append(mask)
train_liver = liver_list[:60]
val_liver = liver_list[60:]
os.chdir(path + '/Rib')
rib_path = sorted(os.listdir(path + '/Rib'))
for image in rib_path:
mask = nib.load(image).get_fdata()
rib_list.append(mask)
train_rib = rib_list[:60]
val_rib = rib_list[60:]
os.chdir(path)
# Transformations
def transt(list):
array = np.asarray(list, np.int32)
resize = np.resize(array, [60,128,128,64])
tensor = tf.convert_to_tensor(resize)
return tensor
def transv(list):
array = np.asarray(list, np.int32)
resize = np.resize(array, [7,128,128,64])
tensor = tf.convert_to_tensor(resize)
return tensor
tgall = transt(train_gall)
vgall = transv(val_gall)
tIVC = transt(train_IVC)
vIVC = transv(val_IVC)
tkidney = transt(train_kidney)
vkidney = transv(val_kidney)
tliver = transt(train_liver)
vliver = transv(val_liver)
trib = transt(train_rib)
vrib = transv(val_rib)
timg4d = transt(train_img)
vimg4d = transv(val_img)
timg = tf.stack([timg4d, timg4d, timg4d, timg4d, timg4d], axis=4)
print(timg.shape)
print(timg.size)
vimg = tf.stack([timg4d, timg4d, timg4d, timg4d, timg4d], axis=4)
tmask = tf.stack([tgall, tIVC, tkidney, tliver, trib], axis=4)
print(tmask.shape)
print(tmask.size)
vmask = tf.stack([vgall, vIVC, vkidney, vliver, vrib], axis=4)
# Create Datasets
train_dataset = torch.utils.data.TensorDataset(timg, tmask)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=60)
#val_dataset = torch.utils.data.TensorDataset(vimg, vmask)
#val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=60)
size is a tuple, all(tensors[0].size[0] == tensor.size[0] for tensor in tensors),
Related
I'm doing semantic segmentation for microscope image stacks.
My code works fine but the thing is it only uses one core of my CPU which makes me wait a long time to get the segmented images.
I recently knew that there are ways to use multicore processing with other python libraries, but I don't know how to implement it.
So someone can help me edit my code with one of the multiprocessing libraries?
My code is in the below.
import numpy as np
from patchify import patchify, unpatchify
import os
import cv2
from tqdm import tqdm
from tensorflow import keras
from tensorflow.keras.utils import normalize
import natsort
model = keras.models.load_model("C:/mymodel.h5", compile=False)
#creating recon image directory
recon_image_directory = "C:/Users/recon"
if not os.path.exists(recon_image_directory):
os.makedirs(recon_image_directory)
large_image_path = "C:/original_images/"
check_images = natsort.natsorted(os.listdir(large_image_path))
for num, large_image_name in tqdm(enumerate(check_images), total=len(check_images)):
if (large_image_name.split('.')[1] == "tif"):
img = cv2.imread(large_image_path + large_image_name, 0)
patches = patchify(img, (256, 256), step=256)
predicted_patches = []
for i in range(patches.shape[0]):
for j in range(patches.shape[1]):
single_patch = patches[i,j,:,:] #(256, 256)
single_patch_norm = normalize(np.array(single_patch), axis=1)
single_patch_input = np.stack((single_patch_norm,)*3, axis=-1) # (256, 256, 3)
single_patch_input = np.expand_dims(single_patch_input, 0) #(1,256,256,3)
single_patch_prediction = (model.predict(single_patch_input)[0,:,:,0]>0.5).astype(np.uint8)
predicted_patches.append(single_patch_prediction)
predicted_patches = np.array(predicted_patches)
predicted_patches_reshaped = np.reshape(predicted_patches, (patches.shape[0], patches.shape[1], 256,256) )
reconstructed_image = unpatchify(predicted_patches_reshaped, img.shape)
cv2.imwrite(recon_image_directory + "/recon"+'_' + str(num) + ".tif", reconstructed_image)
Does this snippet work? It should run each prediction in a separate process.
#ray.remote
def predict(large_image_name: str) -> None:
img = cv2.imread(large_image_path + large_image_name, 0)
patches = patchify(img, (256, 256), step=256)
predicted_patches = []
for i in range(patches.shape[0]):
for j in range(patches.shape[1]):
single_patch = patches[i,j,:,:] #(256, 256)
single_patch_norm = normalize(np.array(single_patch), axis=1)
single_patch_input = np.stack((single_patch_norm,)*3, axis=-1) # (256, 256, 3)
single_patch_input = np.expand_dims(single_patch_input, 0) #(1,256,256,3)
single_patch_prediction = (model.predict(single_patch_input)[0,:,:,0]>0.5).astype(np.uint8)
predicted_patches.append(single_patch_prediction)
predicted_patches = np.array(predicted_patches)
predicted_patches_reshaped = np.reshape(predicted_patches, (patches.shape[0], patches.shape[1], 256,256) )
reconstructed_image = unpatchify(predicted_patches_reshaped, img.shape)
cv2.imwrite(recon_image_directory + "/recon"+'_' + str(num) + ".tif", reconstructed_image)
futures = []
for num, large_image_name in tqdm(enumerate(check_images), total=len(check_images)):
if (large_image_name.split('.')[1] == "tif"):
futures.append(predict.remote(large_image_name))
ray.get(futures)
We also have a high-level abstraction for doing this sort of thing. If you're interested, you should check out the Ray AI Runtime (AIR).
I'm currently documenting how to convert each data type to be compatible with a new deeplearning framework. I'll cut out redundant code in the futrue :)
The following code can be executed on VScode interactive window.
The code has two part.
convert mat file to npy file (.mat -> .npy)
convert npy file to nifti file (.npy -> nii.gz) and add specific name in the path to match with nnU-Net data format. See the nnU-Net dataset_conversion.md if you're interested in it.
How it actually works?
1)10000001.mat -> 10000001.npy
2)10000001.npy -> AORTA_001_0000.nii.gz
The path can be adjusted by a individual user.
#%%
import numpy as np
import nibabel as nb
import pathlib
import numpy as np
from torch.utils.data import Dataset
import scipy.io
root_data = '/mnt/intern/code/dataset/test/original'
root_label = '/mnt/intern/code/dataset/test/label'
examples = []
examples2 = []
data_files = list(pathlib.Path(root_data).iterdir())
label_files = list(pathlib.Path(root_label).iterdir())
for fname in sorted(data_files):
examples += [fname]
for fname2 in sorted(label_files):
examples2 += [fname2]
for i in range(len(data_files)):
fname = examples[i]
fname2 = examples2[i]
data_name = str(pathlib.Path(fname))
label_name = str(pathlib.Path(fname2))
# d = np.load(data_name); l = np.load(label_name)
d_load = scipy.io.loadmat(data_name);
l_load = scipy.io.loadmat(label_name) # matfile data load
data = d_load['data'];
label = l_load['label'] # (512, 512, 251)
np.save('/mnt/intern/mat2npy/original/' + str(fname).split('.')[0][-8:], data)
np.save('/mnt/intern/mat2npy/label/' + str(fname).split('.')[0][-8:], label)
#%%
# Name change to match with nnU-Net data format
import numpy as np
import nibabel as nb
import pathlib
import numpy as np
from torch.utils.data import Dataset
import scipy.io
import numpy as np
import nibabel as nib
root_data = '/mnt/intern/mat2npy/imagesTr'
root_label = '/mnt/intern/mat2npy/labelsTr'
examples = []
examples2 = []
data_files = list(pathlib.Path(root_data).iterdir())
label_files = list(pathlib.Path(root_label).iterdir())
for fname in sorted(data_files):
examples += [fname]
for fname2 in sorted(label_files):
examples2 += [fname2]
for i in range(len(data_files)):
fname = examples[i]
fname2 = examples2[i]
data_name = str(pathlib.Path(fname))
label_name = str(pathlib.Path(fname2))
# d = np.load(data_name); l = np.load(label_name)
d_load = np.load(data_name);
l_load = np.load(label_name) # matfile data load
data = d_load
label = l_load # (512, 512, 251)
data = np.array(data, dtype=np.float32) # You need to replace normal array by yours
label = np.array(label, dtype=np.float32)
affine = np.eye(4)
nifti_data = nib.Nifti1Image(data, affine)
nifti_label = nib.Nifti1Image(label, affine)
nib.save(nifti_data, '/mnt/intern/mat2npy/imagesTr/' + 'AORTA_' + str(fname).split('.')[0][-3:] + '_0000.nii.gz') # Here you put the path + the extionsion 'nii' or 'nii.gz'
nib.save(nifti_label, '/mnt/intern/mat2npy/labelsTr/' + 'AORTA_' + str(fname).split('.')[0][-3:] + '_0000.nii.gz')
I have images of [64,512,5] stored in *.npy files which I convert into *.tfrecords files.
I have verified that the reading of said records corresponds correctly with what is present in the *.npy files. However, when I perform some operation on the parser, like adding 1 to each pixel of the image, the result is not the expected one. The result should be 65*512*5 = 163840 but it is 163839.99980013957 (not always the same)
I have tried to perform different operations like tf.subtract, but the results are the same.
Could someone tell me what is wrong?
import re
import ast
import sys, select
import random as rn
from glob import glob
from tqdm import tqdm
from datetime import datetime
from configparser import SafeConfigParser
import numpy as np
import numpy.ma as ma
import scipy.misc
import os.path
from os import mkdir, stat
from os.path import exists, dirname, abspath
from os.path import join as dir_join
import tensorflow as tf
''' File hierarchy
'''
_code_dir = dirname(abspath(__file__))
_python_dir = dirname(_code_dir)
_model_dir = dirname(_python_dir)
_project_dir = dirname(_model_dir)
_ml_dir = dirname(_project_dir)
_srv_dir = dirname(_ml_dir)
_root_datasets_dir = dir_join(_srv_dir,'machine_learning','data_sets/ssd_prepared')
_config_dir = dir_join(_python_dir, 'config')
'''Data sets directories
'''
THIS_DATA_SET_DIR = 'Sph_50m' #WARNING: Global variable also used in helper.py
_data_dir = dir_join(_root_datasets_dir, THIS_DATA_SET_DIR)
_data_set_dir = dir_join(_data_dir,'ImageSet')
_data_npy_dir = dir_join(_data_dir,'data')
_data_tfRecord_dir = dir_join(_data_dir,'tfRecord')
''' Configuration parser
'''
cfg_parser = SafeConfigParser()
cfg_parser.read(dir_join(_config_dir,'cfg_model.ini'))
''' Private variables
'''
_batch_size = cfg_parser.getint(section='train', option='batch_size')
_max_epoch = cfg_parser.getint(section='train', option='max_epoch')
_standarize = cfg_parser.getboolean(section='train', option='standarize_input')
_input_shape = ast.literal_eval(cfg_parser.get(section='data_shape', option='input_shape'))
_label_channel = cfg_parser.getint(section='data_shape', option='label_channel')
_track_channel = cfg_parser.getint(section='data_shape', option='track_channel')
_mask_channel = cfg_parser.getint(section='data_shape', option='mask_channel')
_data_train = cfg_parser.get(section='data_set', option='data_train')
_data_val = cfg_parser.get(section='data_set', option='data_val')
_data_test = cfg_parser.get(section='data_set', option='data_test')
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value.reshape(-1)))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _floats_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value.reshape(-1)))
def numpy_to_TFRecord():
if not exists(_data_tfRecord_dir): mkdir(_data_tfRecord_dir)
for dataset in [_data_train, _data_val, _data_test]:
tfRecord_folder = dir_join(_data_tfRecord_dir, dataset)
if not exists(tfRecord_folder): mkdir(tfRecord_folder)
#Retrieve list of files
projections_dir=[]
file_ = open(dir_join(_data_set_dir, dataset+'.txt'), 'r')
for x in file_.readlines():
file_nat = x.strip()+'.npy'
filename = dir_join(_data_npy_dir, file_nat)
assert exists(filename), "{} doesn't exist".format(filename)
projections_dir.append(filename)
file_.close()
totaltfRecordSize = 0
numFile = 0
for projection_dir in tqdm(projections_dir, ncols= 100, desc = 'TFRecord {}'.format(dataset)):
scanName = projection_dir.split('/')[-1].split('.')[0]
if totaltfRecordSize > 100*(10**6) or totaltfRecordSize == 0:
# address to save the TFRecords file
train_filename = dir_join(tfRecord_folder, \
str(numFile) + '_' + dataset +'.tfrecords')
# open the TFRecords file
writer = tf.python_io.TFRecordWriter(train_filename)
numFile += 1
totaltfRecordSize = 0
# Load the image
projection = np.load(projection_dir)
image = projection[:,:,:_label_channel]
label = projection[:,:,_label_channel].astype(int)
mask = projection[:,:,_mask_channel].astype(int)
track = projection[:,:,_track_channel].astype(int)
# Create a feature
feature = {'image': _floats_feature(image),
'label': _int64_feature(label),
'mask' : _int64_feature(mask),
'track': _int64_feature(track),
'scanName': _bytes_feature(tf.compat.as_bytes(scanName))}
# Create an example protocol buffer
example = tf.train.Example(features=tf.train.Features(feature=feature))
# Serialize to string and write on the file
writer.write(example.SerializeToString())
fileSize = stat(train_filename).st_size
totaltfRecordSize += fileSize
writer.close()
sys.stdout.flush()
def readTFRecord():
# Transforms a scalar string `example_proto` into a pair of a scalar string and
# a scalar integer, representing an image and its label, respectively.
image_dim = _input_shape[0] * _input_shape[1] * _label_channel
label_dim = _input_shape[0] * _input_shape[1]
mean = np.load(dir_join(_data_dir,'mean.npy'))
std = np.load(dir_join(_data_dir,'std.npy'))
mean_tf = tf.convert_to_tensor(mean, dtype=tf.float32, name='mean')
std_tf = tf.convert_to_tensor(std, dtype=tf.float32, name='std')
with tf.variable_scope('TFRecord'):
def _parse_function(example_proto):
with tf.variable_scope('parser'):
features = {'image': tf.FixedLenFeature([image_dim], tf.float32),
'label': tf.FixedLenFeature([label_dim], tf.int64),
'mask' : tf.FixedLenFeature([label_dim], tf.int64),
'track': tf.FixedLenFeature([label_dim], tf.int64),
'scanName': tf.FixedLenFeature([], tf.string)}
parsed_features = tf.parse_single_example(example_proto, features)
# Reshape image data into the original shape
image = tf.reshape(parsed_features['image'], [_input_shape[0], _input_shape[1], _label_channel], name='image')
label = tf.reshape(parsed_features['label'], _input_shape, name='lable_reshape')
mask = tf.reshape(parsed_features['mask'], _input_shape, name='mask_reshape')
track = tf.reshape(parsed_features['track'], _input_shape, name='track_reshape')
scanName = parsed_features['scanName']
image = image + tf.constant(1., dtype=tf.float32)
return image, label, mask, track, scanName
training_filenames = glob(dir_join(_data_tfRecord_dir, _data_train, '*.tfrecords'))
validation_filenames = glob(dir_join(_data_tfRecord_dir, _data_val, '*.tfrecords'))
filenames = tf.placeholder(tf.string, shape=[None], name='filenames')
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(_parse_function, num_parallel_calls=20) # Parse the record into tensors.
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(_batch_size, drop_remainder=True)
dataset = dataset.prefetch(buffer_size=10)
iterator = dataset.make_initializable_iterator()
next = iterator.get_next()
sess = tf.Session()
while True:
sess.run(iterator.initializer, feed_dict={filenames: training_filenames})
try:
img, _, _, _, scanX = sess.run(next)
for i, scan in enumerate(scanX):
print(scan.decode("utf-8"))
projection = np.load(dir_join(_data_npy_dir, scan.decode("utf-8") + '.npy'))
imagenp = projection[:,:,:_label_channel]
if np.abs(np.sum(img[i,...] - imagenp)) > 0.:
print(np.sum(img[i,...] - imagenp))
except tf.errors.OutOfRangeError:
break
return training_filenames, validation_filenames, filenames, iterator
if __name__ == '__main__':
numpy_to_TFRecord()
readTFRecord()
The test I'm doing in the previous code is to convert the *.npy files to *.tfrecords. Then, I compare the *.trecords with the *.npy. The value should be 0 if both images were identical.
img, _, _, _, scanX = sess.run(next)
for i, scan in enumerate(scanX):
print(scan.decode("utf-8"))
projection = np.load(dir_join(_data_npy_dir, scan.decode("utf-8") + '.npy'))
imagenp = projection[:,:,:_label_channel]
print(np.sum(img[i,...] - imagenp))
If the data is not preprocessed, these images are the same, however, if we perform some kind of transformation, the results do not match what was expected. In this case we are adding 1 to each pixel of the image, so the total difference should be 64 * 512 * 5.
image = image + tf.constant(1., dtype=tf.float32)
I would like to solve this error, since so far I have not been able to obtain the results obtained by my neural network using feed_dict instead of Tensorflow Dataset API, and this is the only point where I can observe a difference in the input data.
I have problem with preprocessing data set for deep learning.
I am using U-net.
I have training data, label data, test data size of 512x512.
I want patch based learning so i am trying to change 512x512 slices to multiple 64x64 slices so that I can train image with 64x64 patches. In my case I want to make 64x64 patches stride 32 pixel from original 512x512 Images.
For example, first patch is formed from (0,0) ~ (64,64) and the next patch is formed from (32,0) ~ (64 + 32, 64)
This is full code of data.py
#from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import numpy as np
import os
import glob
import cv2
from os.path import *
#from libtiff import TIFF
import matplotlib.pyplot as plt
class dataProcess(object):
def __init__(self, out_rows, out_cols):
"""
"""
self.out_rows = out_rows
self.out_cols = out_cols
def load_train_data(self):
imgs_row, imgs_col = 512,512
train_list = []
train_img = []
label_list = []
label_img = []
train_path = 'C:\\Users\\Lee Doyle\\unet\\data\\Train'
label_path = 'C:\\Users\\Lee Doyle\\unet\\data\\Label'
######################Traindata################################
print('-' * 30)
print('load train images...')
print('-' * 30)
for i in glob.glob(train_path + '/*.[tT][iI][fF]'):
train_list.append(abspath(i))
print(len(train_list))
for i in train_list:
# print(i)
img = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
train_img.append(img.astype(np.float32)/255.0)
train_img = np.array(train_img)
print(train_img)
######################Labeldata################################
for i in glob.glob(label_path + '/*.[tT][iI][fF]'):
label_list.append(abspath(i))
for i in label_list:
img = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
label_img.append(img.astype(np.float32) / 255.0)
label_img = np.array(label_img)
fig = plt.figure()
a=fig.add_subplot(1,2,1)
plt.imshow(train_img[0],cmap='gray')
a.set_title('trian image')
a=fig.add_subplot(1,2,2)
plt.imshow(label_img[0]+train_img[0],cmap='gray')
plt.imshow(label_img[0],cmap='gray')
a.set_title('Image with GT SEG ')
plt.show()
print(train_img.shape)
train_img = train_img.reshape(train_img.shape[0], imgs_row, imgs_col, 1)
label_img = label_img.reshape(label_img.shape[0], imgs_row, imgs_col, 1)
return train_img,label_img
def load_test_data(self):
imgs_row, imgs_col = 512,512
test_list = []
test_img = []
test_path = 'C:\\Users\\Lee Doyle\\unet\\data\\Test'
######################Testdata#################################
print('-' * 30)
print('load test images...')
print('-' * 30)
for i in glob.glob(test_path + '/*.[tT][iI][fF]'):
test_list.append(abspath(i))
for i in test_list:
img = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
# img=cv2.resize(img,(512,512))
test_img.append(img.astype(np.float32) / 255.0)
test_img = np.array(test_img)
# mean = test_img.mean(axis=0)
# test_img -= mean
test_img=test_img.reshape(test_img.shape[0],imgs_row,imgs_col,1)
print(test_img.shape)
return test_img
if __name__ == "__main__":
mydata = dataProcess(512,512)
mydata.load_train_data()
mydata.load_test_data()
I think I need to add patch making code near this code.
for i in glob.glob(train_path + '/*.[tT][iI][fF]'):
train_list.append(abspath(i))
print(len(train_list))
for i in train_list:
# print(i)
img = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
train_img.append(img.astype(np.float32)/255.0)
train_img = np.array(train_img)
print(train_img)
I apppreciate your help.
While I was following the deepdream iPython notebook which is here: https://github.com/google/deepdream/blob/master/dream.ipynb, I successfully ran the code and initialized the network until i get this error:
I0218 20:53:01.108750 12174 net.cpp:283] Network initialization done.
I0218 20:53:06.017426 12174 net.cpp:816] Ignoring source layer data
I0218 20:53:06.139768 12174 net.cpp:816] Ignoring source layer loss
Traceback (most recent call last):
File "/home/andrew/PycharmProjects/deepmeme/deepmeme.py", line 122, in <module>
<IPython.core.display.Image object>
frame = deepdream(net, frame)
File "/home/andrew/PycharmProjects/deepmeme/deepmeme.py", line 78, in deepdream
octaves = [preprocess(net, base_img)]
File "/home/andrew/PycharmProjects/deepmeme/deepmeme.py", line 43, in preprocess
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
KeyError: 'data'
This is my code for the python file:
import sys
sys.path.append("/home/andrew/caffe/python")
from cStringIO import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
from IPython.display import clear_output, Image, display
from google.protobuf import text_format
import caffe
# If your GPU supports CUDA and Caffe was built with CUDA support,
# uncomment the following to run Caffe operations on the GPU.
# caffe.set_mode_gpu()
# caffe.set_device(0) # select GPU device if multiple devices exist
def showarray(a, fmt='jpeg'):
a = np.uint8(np.clip(a, 0, 255))
f = StringIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
model_path = '/home/andrew/caffe/models/bvlc_reference_caffenet/' # substitute your path here
net_fn = model_path + 'deploy.prototxt'
param_fn = model_path + 'caffe_train_iter_500.caffemodel'
# Patching model to be able to compute gradients.
# Note that you can also manually add "force_backward: true" line to "deploy.prototxt".
model = caffe.io.caffe_pb2.NetParameter()
text_format.Merge(open(net_fn).read(), model)
model.force_backward = True
open('deploy.prototxt', 'w').write(str(model))
net = caffe.Classifier('/home/andrew/caffe/models/bvlc_reference_caffenet/deploy.prototxt', '/home/andrew/caffe/models/bvlc_reference_caffenet/caffenet_train_iter_500.caffemodel', caffe.TEST)
# a couple of utility functions for converting to and from Caffe's input image layout
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
def deprocess(net, img):
return np.dstack((img + net.transformer.mean['data'])[::-1])
def objective_L2(dst):
dst.diff[:] = dst.data
def make_step(net, step_size=1.5, end='inception_4c/output',
jitter=32, clip=True, objective=objective_L2):
'''Basic gradient ascent step.'''
src = net.blobs['data'] # input image is stored in Net's 'data' blob
dst = net.blobs[end]
ox, oy = np.random.randint(-jitter, jitter+1, 2)
src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift
net.forward(end=end)
objective(dst) # specify the optimization objective
net.backward(start=end)
g = src.diff[0]
# apply normalized ascent step to the input image
src.data[:] += step_size/np.abs(g).mean() * g
src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image
if clip:
bias = net.transformer.mean['data']
src.data[:] = np.clip(src.data, -bias, 255-bias)
def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4,
end='inception_4c/output', clip=True, **step_params):
# prepare base images for all octaves
octaves = [preprocess(net, base_img)]
for i in xrange(octave_n-1):
octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))
src = net.blobs['data']
detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
for octave, octave_base in enumerate(octaves[::-1]):
h, w = octave_base.shape[-2:]
if octave > 0:
# upscale details from the previous octave
h1, w1 = detail.shape[-2:]
detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)
src.reshape(1,3,h,w) # resize the network's input image size
src.data[0] = octave_base+detail
for i in xrange(iter_n):
make_step(net, end=end, clip=clip, **step_params)
# visualization
vis = deprocess(net, src.data[0])
if not clip: # adjust image contrast if clipping is disabled
vis = vis*(255.0/np.percentile(vis, 99.98))
showarray(vis)
print octave, i, end, vis.shape
clear_output(wait=True)
# extract details produced on the current octave
detail = src.data[0]-octave_base
# returning the resulting image
return deprocess(net, src.data[0])
img = np.float32(PIL.Image.open('/home/andrew/caffe/examples/images/cat.jpg'))
showarray(img)
net.blobs.keys()
frame = img
frame_i = 0
h, w = frame.shape[:2]
s = 0.05 # scale coefficient
for i in xrange(100):
frame = deepdream(net, frame)
PIL.Image.fromarray(np.uint8(frame)).save("frames/%04d.jpg"%frame_i)
frame = nd.affine_transform(frame, [1-s,1-s,1], [h*s/2,w*s/2,0], order=1)
frame_i += 1
Image(filename='frames/0029.jpg')
Does anybody know what's happening? I am using my own data that I successfully trained a model with.
From the deepdream iPython notebook:
net = caffe.Classifier('tmp.prototxt', param_fn,
mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent
channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB
vs your:
net = caffe.Classifier('/home/andrew/caffe/models/bvlc_reference_caffenet/deploy.prototxt', '/home/andrew/caffe/models/bvlc_reference_caffenet/caffenet_train_iter_500.caffemodel', caffe.TEST)
You do not seem to include a mean when you create a caffe.Classifier.
See the definition of caffe.Classifier.
If you don't have a mean, you could probably just remove the mention of mean from preprocess/deprocess:
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1])
def deprocess(net, img):
return np.dstack((img)[::-1])