Related
I am faced with the task of classifying sound by spectrograms. I have a solution to this problem in one way (I will convert all audio recordings into spectrograms -> save them as pictures and train a neural network for this), but I want to go the simpler way, that is, not save pictures, but immediately convert audio files into tensors, but there is a problem, I can't find any useful information on how to create my data set from tensors in TensorFlow. I will give an example of such code on Pytorch.
class SoundDataset(Dataset):
def __init__(self, file_names, labels):
self.file_names = file_names
self.labels = labels
def __getitem__(self,index):
#format the file path and load the file
path = self.file_names[index]
scale, sr = librosa.load(path)
filter_banks = librosa.filters.mel(n_fft=2048, sr=22050, n_mels=10)
mel_spectrogram = librosa.feature.melspectrogram(scale, sr=sr, n_fft=2048, hop_length=512, n_mels=32)
log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)
trch = torch.from_numpy(log_mel_spectrogram)
if log_mel_spectrogram.shape !=(10,87):
delta = 87 - log_mel_spectrogram.shape[1]
trch = torch.nn.functional.pad(trch, (0,delta))
return trch,self.labels[index]
def __len__(self):
return len(self.file_names)
Here a class is being created that takes paths to audio recordings and converts them into tensors, and will pad zeros if the tensors do not fit the shapes. How can I create the same class for TensorFlow. Next is an example of code that creates tuples with file paths and their class and creates an object of the Sound Data set class and generates a dataset from these files accordingly. All this is written for Pytorch. Tell me how it can be implemented for TensorFlow.
path = '/content/drive/MyDrive/МДМА/audiodata/for-rerecorded/training/'
files = []
labels = []
lbl = '1 0'.split()
for lab in lbl:
if lab == '0':
c = 'fake'
else:
c ='real'
names = os.listdir(path+c)
for n in names:
pth = path+c+'/'+n
files.append(pth)
labels.append(int(lab))
train_dataset = SoundDataset(files, labels)
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size = 20)
If you read the documentation there are code patterns.
This is not tested but if you load the index from another data structure which has mapped the files to the indexes then this code can help.
import librosa
import pathlib
import tensorflow as tf
DATASET_PATH = 'data/mini_speech_commands'
data_dir = pathlib.Path(DATASET_PATH)
if not data_dir.exists():
tf.keras.utils.get_file(
'mini_speech_commands.zip',
origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
extract=True,
cache_dir='.', cache_subdir='data')
def load_audio(filename):
scale, sr = librosa.load(filename)
mel_spectrogram = librosa.feature.melspectrogram(scale, sr=sr, n_fft=2048, hop_length=512, n_mels=32)
log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)
spectrogram_numpy = log_mel_spectrogram.numpy()
if log_mel_spectrogram.shape !=(10,87):
delta = 87 - log_mel_spectrogram.shape[1]
spectrogram_numpy = tf.pad(spectrogram_numpy, (0,delta))
return spectrogram_numpy #return index
read_audio = lambda x: tf.py_function(load_audio,
[x],
tf.float64)
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
files_ds = tf.data.Dataset.from_tensor_slices(filenames)
waveform_ds = files_ds.map(
map_func=read_audio)
The code to pad is converted using TensorFlow directly and you have to test it.
Update : Another way using keras.utils.Sequence is shown in this thread
The problem: I am unable to process CNN model for training 8-channel .TIF images.
Expected Output: Map training data (train_ds) via gdal and train model.
data (images):
n = 600
shape = (256, 256, 8)
data structure:
project_photos/
....classes/
......barren/
......agriculture/
......wooded/
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds
import pathlib
>print (tf.__version__)
2.1.0
data_dir = ".\projects\keras\projectA\project_photos\classes")
data_dir = pathlib.Path(data_dir)
image_count = len(list(data_dir.glob('*/*.tif')))
>print(image_count)
600
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'), shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)
batch_size = 32
img_height = 256
img_width = 256
>for f in list_ds.take(5):
> print(f.numpy())
b'/home/projects/keras/projectA/project_photos/classes/barren/12345_b0001.tif'
b'/home/projects/keras/projectA/project_photos/classes/wooded//12345_w0001.tif'
b'/home/projects/keras/projectA/project_photos/classes/barren/12345_b0002.tif'
b'/home/projects/keras/projectA/project_photos/classes/agriculture//12345_a0001.tif'
b'/home/projects/keras/projectA/project_photos/classes/wooded/12345_w0002.tif'
# tree structure
>class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
print(class_names)
['barren' 'agriculture' 'wooded']
# train/validation split
val_size = int(image_count * 0.2)
train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)
def get_label(file_path):
# convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
# The second to last is the class-directory
one_hot = parts[-2] == class_names
# Integer encode the label
return tf.argmax(one_hot)
def decode_img(img):
# convert the compressed string to a 3D uint8 tensor
img = tf.image.decode_jpeg(img, channels=3)
# resize the image to the desired size
return tf.image.resize(img, [img_height, img_width])
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)
I understand that tensorflow has limited support (experimental) for decode_tiff, and even if that did work - I am unable to use the latest version of TF that has that update.
This leaves me with attempting workarounds, the following - which have not succeeded:
"""
Updating decode_img(img) in attempt to process 8-channel .TIF raster
"""
#attempt, adding gdal_Open variable to decode_img
## fails due to image path (train_ds) being stored as byte.
x = gdal.Open(file_path)
Error: Not a string.
#attempt, modifying to extract PATH as str().
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = ''
for fpath in file_path:
img = fpath.numy()
img = decode_img(img)
return img, label
>train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
ValueError: len requires a non-scalar tensor, got one of shape Tensor("Shape:0", shape=(0,), dtype=int32)
#attempt, processing outside of `.map`, works just fine.
imgList = []
for elem in train_ds:
img = elem.numpy()
img = img.decode()
imgList.append(img)
file_path = imgList[0]
raster = gdal.Open(file_path)
bands = [raster.GetRasterBand(k + 1).ReadAsArray() for k in range (raster.RasterCount)]
n_bands = len(bands)
img_array = np.stack(bands,2)
img = tf.convert_to_tensor(img_array, dtype = tf.float32)
img = tf.image.resize(img, [img_height, img_width])
print(type(img))
print(img.numpy().shape)
<class: 'tensorflow.python.framework.ops.EagerTensor'>
(256, 256, 8)
So, any ideas on how I can get this to work within the TF framework - getting TF to process the raster via .map?
I have images of [64,512,5] stored in *.npy files which I convert into *.tfrecords files.
I have verified that the reading of said records corresponds correctly with what is present in the *.npy files. However, when I perform some operation on the parser, like adding 1 to each pixel of the image, the result is not the expected one. The result should be 65*512*5 = 163840 but it is 163839.99980013957 (not always the same)
I have tried to perform different operations like tf.subtract, but the results are the same.
Could someone tell me what is wrong?
import re
import ast
import sys, select
import random as rn
from glob import glob
from tqdm import tqdm
from datetime import datetime
from configparser import SafeConfigParser
import numpy as np
import numpy.ma as ma
import scipy.misc
import os.path
from os import mkdir, stat
from os.path import exists, dirname, abspath
from os.path import join as dir_join
import tensorflow as tf
''' File hierarchy
'''
_code_dir = dirname(abspath(__file__))
_python_dir = dirname(_code_dir)
_model_dir = dirname(_python_dir)
_project_dir = dirname(_model_dir)
_ml_dir = dirname(_project_dir)
_srv_dir = dirname(_ml_dir)
_root_datasets_dir = dir_join(_srv_dir,'machine_learning','data_sets/ssd_prepared')
_config_dir = dir_join(_python_dir, 'config')
'''Data sets directories
'''
THIS_DATA_SET_DIR = 'Sph_50m' #WARNING: Global variable also used in helper.py
_data_dir = dir_join(_root_datasets_dir, THIS_DATA_SET_DIR)
_data_set_dir = dir_join(_data_dir,'ImageSet')
_data_npy_dir = dir_join(_data_dir,'data')
_data_tfRecord_dir = dir_join(_data_dir,'tfRecord')
''' Configuration parser
'''
cfg_parser = SafeConfigParser()
cfg_parser.read(dir_join(_config_dir,'cfg_model.ini'))
''' Private variables
'''
_batch_size = cfg_parser.getint(section='train', option='batch_size')
_max_epoch = cfg_parser.getint(section='train', option='max_epoch')
_standarize = cfg_parser.getboolean(section='train', option='standarize_input')
_input_shape = ast.literal_eval(cfg_parser.get(section='data_shape', option='input_shape'))
_label_channel = cfg_parser.getint(section='data_shape', option='label_channel')
_track_channel = cfg_parser.getint(section='data_shape', option='track_channel')
_mask_channel = cfg_parser.getint(section='data_shape', option='mask_channel')
_data_train = cfg_parser.get(section='data_set', option='data_train')
_data_val = cfg_parser.get(section='data_set', option='data_val')
_data_test = cfg_parser.get(section='data_set', option='data_test')
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value.reshape(-1)))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _floats_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value.reshape(-1)))
def numpy_to_TFRecord():
if not exists(_data_tfRecord_dir): mkdir(_data_tfRecord_dir)
for dataset in [_data_train, _data_val, _data_test]:
tfRecord_folder = dir_join(_data_tfRecord_dir, dataset)
if not exists(tfRecord_folder): mkdir(tfRecord_folder)
#Retrieve list of files
projections_dir=[]
file_ = open(dir_join(_data_set_dir, dataset+'.txt'), 'r')
for x in file_.readlines():
file_nat = x.strip()+'.npy'
filename = dir_join(_data_npy_dir, file_nat)
assert exists(filename), "{} doesn't exist".format(filename)
projections_dir.append(filename)
file_.close()
totaltfRecordSize = 0
numFile = 0
for projection_dir in tqdm(projections_dir, ncols= 100, desc = 'TFRecord {}'.format(dataset)):
scanName = projection_dir.split('/')[-1].split('.')[0]
if totaltfRecordSize > 100*(10**6) or totaltfRecordSize == 0:
# address to save the TFRecords file
train_filename = dir_join(tfRecord_folder, \
str(numFile) + '_' + dataset +'.tfrecords')
# open the TFRecords file
writer = tf.python_io.TFRecordWriter(train_filename)
numFile += 1
totaltfRecordSize = 0
# Load the image
projection = np.load(projection_dir)
image = projection[:,:,:_label_channel]
label = projection[:,:,_label_channel].astype(int)
mask = projection[:,:,_mask_channel].astype(int)
track = projection[:,:,_track_channel].astype(int)
# Create a feature
feature = {'image': _floats_feature(image),
'label': _int64_feature(label),
'mask' : _int64_feature(mask),
'track': _int64_feature(track),
'scanName': _bytes_feature(tf.compat.as_bytes(scanName))}
# Create an example protocol buffer
example = tf.train.Example(features=tf.train.Features(feature=feature))
# Serialize to string and write on the file
writer.write(example.SerializeToString())
fileSize = stat(train_filename).st_size
totaltfRecordSize += fileSize
writer.close()
sys.stdout.flush()
def readTFRecord():
# Transforms a scalar string `example_proto` into a pair of a scalar string and
# a scalar integer, representing an image and its label, respectively.
image_dim = _input_shape[0] * _input_shape[1] * _label_channel
label_dim = _input_shape[0] * _input_shape[1]
mean = np.load(dir_join(_data_dir,'mean.npy'))
std = np.load(dir_join(_data_dir,'std.npy'))
mean_tf = tf.convert_to_tensor(mean, dtype=tf.float32, name='mean')
std_tf = tf.convert_to_tensor(std, dtype=tf.float32, name='std')
with tf.variable_scope('TFRecord'):
def _parse_function(example_proto):
with tf.variable_scope('parser'):
features = {'image': tf.FixedLenFeature([image_dim], tf.float32),
'label': tf.FixedLenFeature([label_dim], tf.int64),
'mask' : tf.FixedLenFeature([label_dim], tf.int64),
'track': tf.FixedLenFeature([label_dim], tf.int64),
'scanName': tf.FixedLenFeature([], tf.string)}
parsed_features = tf.parse_single_example(example_proto, features)
# Reshape image data into the original shape
image = tf.reshape(parsed_features['image'], [_input_shape[0], _input_shape[1], _label_channel], name='image')
label = tf.reshape(parsed_features['label'], _input_shape, name='lable_reshape')
mask = tf.reshape(parsed_features['mask'], _input_shape, name='mask_reshape')
track = tf.reshape(parsed_features['track'], _input_shape, name='track_reshape')
scanName = parsed_features['scanName']
image = image + tf.constant(1., dtype=tf.float32)
return image, label, mask, track, scanName
training_filenames = glob(dir_join(_data_tfRecord_dir, _data_train, '*.tfrecords'))
validation_filenames = glob(dir_join(_data_tfRecord_dir, _data_val, '*.tfrecords'))
filenames = tf.placeholder(tf.string, shape=[None], name='filenames')
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(_parse_function, num_parallel_calls=20) # Parse the record into tensors.
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(_batch_size, drop_remainder=True)
dataset = dataset.prefetch(buffer_size=10)
iterator = dataset.make_initializable_iterator()
next = iterator.get_next()
sess = tf.Session()
while True:
sess.run(iterator.initializer, feed_dict={filenames: training_filenames})
try:
img, _, _, _, scanX = sess.run(next)
for i, scan in enumerate(scanX):
print(scan.decode("utf-8"))
projection = np.load(dir_join(_data_npy_dir, scan.decode("utf-8") + '.npy'))
imagenp = projection[:,:,:_label_channel]
if np.abs(np.sum(img[i,...] - imagenp)) > 0.:
print(np.sum(img[i,...] - imagenp))
except tf.errors.OutOfRangeError:
break
return training_filenames, validation_filenames, filenames, iterator
if __name__ == '__main__':
numpy_to_TFRecord()
readTFRecord()
The test I'm doing in the previous code is to convert the *.npy files to *.tfrecords. Then, I compare the *.trecords with the *.npy. The value should be 0 if both images were identical.
img, _, _, _, scanX = sess.run(next)
for i, scan in enumerate(scanX):
print(scan.decode("utf-8"))
projection = np.load(dir_join(_data_npy_dir, scan.decode("utf-8") + '.npy'))
imagenp = projection[:,:,:_label_channel]
print(np.sum(img[i,...] - imagenp))
If the data is not preprocessed, these images are the same, however, if we perform some kind of transformation, the results do not match what was expected. In this case we are adding 1 to each pixel of the image, so the total difference should be 64 * 512 * 5.
image = image + tf.constant(1., dtype=tf.float32)
I would like to solve this error, since so far I have not been able to obtain the results obtained by my neural network using feed_dict instead of Tensorflow Dataset API, and this is the only point where I can observe a difference in the input data.
I'm trying a deep learning code for processing my dataset which consists 1,12,120 images. What my code does is the following:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import imageio
from os import listdir
import skimage.transform
import pickle
import sys, os
from sklearn.preprocessing import MultiLabelBinarizer
def get_labels(pic_id):
labels = meta_data.loc[meta_data["Image Index"]==pic_id,"Finding Labels"]
return labels.tolist()[0].split("|")
#Loading Data
meta_data = pd.read_csv(data_entry_path)
bbox_list = pd.read_csv(bbox_list_path)
with open(train_txt_path, "r") as f:
train_list = [ i.strip() for i in f.readlines()]
with open(valid_txt_path, "r") as f:
valid_list = [ i.strip() for i in f.readlines()]
label_eight = list(np.unique(bbox_list["Finding Label"])) + ["No Finding"]
# transform training images
print("training example:",len(train_list))
print("take care of your RAM here !!!")
train_X = []
for i in range(len(train_list)):
image_path = os.path.join(image_folder_path,train_list[i])
img = imageio.imread(image_path)
if img.shape != (1024,1024): # there some image with shape (1024,1024,4) in training set
img = img[:,:,0]
img_resized = skimage.transform.resize(img,(256,256)) # or use img[::4] here
train_X.append((np.array(img_resized)/255).reshape(256,256,1))
if i % 3000==0:
print(i)
train_X = np.array(train_X)
np.save(os.path.join(data_path,"train_X_small.npy"), train_X)
# transform validation images
print("validation example:",len(valid_list))
valid_X = []
for i in range(len(valid_list)):
image_path = os.path.join(image_folder_path,valid_list[i])
img = imageio.imread(image_path)
if img.shape != (1024,1024):
img = img[:,:,0]
img_resized = skimage.transform.resize(img,(256,256))
# if img.shape != (1024,1024):
# train_X.append(img[:,:,0])
# else:
valid_X.append((np.array(img_resized)/255).reshape(256,256,1))
if i % 3000==0:
print(i)
valid_X = np.array(valid_X)
np.save(os.path.join(data_path,"valid_X_small.npy"), valid_X)
# process label
print("label preprocessing")
train_y = []
for train_id in train_list:
train_y.append(get_labels(train_id))
valid_y = []
for valid_id in valid_list:
valid_y.append(get_labels(valid_id))
encoder = MultiLabelBinarizer()
encoder.fit(train_y+valid_y)
train_y_onehot = encoder.transform(train_y)
valid_y_onehot = encoder.transform(valid_y)
train_y_onehot = np.delete(train_y_onehot, [2,3,5,6,7,10,12],1) # delete out 8 and "No Finding" column
valid_y_onehot = np.delete(valid_y_onehot, [2,3,5,6,7,10,12],1) # delete out 8 and "No Finding" column
with open(data_path + "/train_y_onehot.pkl","wb") as f:
pickle.dump(train_y_onehot, f)
with open(data_path + "/valid_y_onehot.pkl","wb") as f:
pickle.dump(valid_y_onehot, f)
with open(data_path + "/label_encoder.pkl","wb") as f:
pickle.dump(encoder, f)
So this is my code My system configration:Intel i7-7700HQ,16GB Ram,256GB ssd,GTX 1050 4GB
Is there a way to split my dataset so and write to the same file again? I'm also posting the error which i got as a screenshot Error From Powershell after executing the code for 30mins
I'm also using python3 in my system 64bit version
Does spliting the 1,12,120 images and taking them as batches will it work here? If yes how?
While I was following the deepdream iPython notebook which is here: https://github.com/google/deepdream/blob/master/dream.ipynb, I successfully ran the code and initialized the network until i get this error:
I0218 20:53:01.108750 12174 net.cpp:283] Network initialization done.
I0218 20:53:06.017426 12174 net.cpp:816] Ignoring source layer data
I0218 20:53:06.139768 12174 net.cpp:816] Ignoring source layer loss
Traceback (most recent call last):
File "/home/andrew/PycharmProjects/deepmeme/deepmeme.py", line 122, in <module>
<IPython.core.display.Image object>
frame = deepdream(net, frame)
File "/home/andrew/PycharmProjects/deepmeme/deepmeme.py", line 78, in deepdream
octaves = [preprocess(net, base_img)]
File "/home/andrew/PycharmProjects/deepmeme/deepmeme.py", line 43, in preprocess
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
KeyError: 'data'
This is my code for the python file:
import sys
sys.path.append("/home/andrew/caffe/python")
from cStringIO import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
from IPython.display import clear_output, Image, display
from google.protobuf import text_format
import caffe
# If your GPU supports CUDA and Caffe was built with CUDA support,
# uncomment the following to run Caffe operations on the GPU.
# caffe.set_mode_gpu()
# caffe.set_device(0) # select GPU device if multiple devices exist
def showarray(a, fmt='jpeg'):
a = np.uint8(np.clip(a, 0, 255))
f = StringIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
model_path = '/home/andrew/caffe/models/bvlc_reference_caffenet/' # substitute your path here
net_fn = model_path + 'deploy.prototxt'
param_fn = model_path + 'caffe_train_iter_500.caffemodel'
# Patching model to be able to compute gradients.
# Note that you can also manually add "force_backward: true" line to "deploy.prototxt".
model = caffe.io.caffe_pb2.NetParameter()
text_format.Merge(open(net_fn).read(), model)
model.force_backward = True
open('deploy.prototxt', 'w').write(str(model))
net = caffe.Classifier('/home/andrew/caffe/models/bvlc_reference_caffenet/deploy.prototxt', '/home/andrew/caffe/models/bvlc_reference_caffenet/caffenet_train_iter_500.caffemodel', caffe.TEST)
# a couple of utility functions for converting to and from Caffe's input image layout
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
def deprocess(net, img):
return np.dstack((img + net.transformer.mean['data'])[::-1])
def objective_L2(dst):
dst.diff[:] = dst.data
def make_step(net, step_size=1.5, end='inception_4c/output',
jitter=32, clip=True, objective=objective_L2):
'''Basic gradient ascent step.'''
src = net.blobs['data'] # input image is stored in Net's 'data' blob
dst = net.blobs[end]
ox, oy = np.random.randint(-jitter, jitter+1, 2)
src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift
net.forward(end=end)
objective(dst) # specify the optimization objective
net.backward(start=end)
g = src.diff[0]
# apply normalized ascent step to the input image
src.data[:] += step_size/np.abs(g).mean() * g
src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image
if clip:
bias = net.transformer.mean['data']
src.data[:] = np.clip(src.data, -bias, 255-bias)
def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4,
end='inception_4c/output', clip=True, **step_params):
# prepare base images for all octaves
octaves = [preprocess(net, base_img)]
for i in xrange(octave_n-1):
octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))
src = net.blobs['data']
detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
for octave, octave_base in enumerate(octaves[::-1]):
h, w = octave_base.shape[-2:]
if octave > 0:
# upscale details from the previous octave
h1, w1 = detail.shape[-2:]
detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)
src.reshape(1,3,h,w) # resize the network's input image size
src.data[0] = octave_base+detail
for i in xrange(iter_n):
make_step(net, end=end, clip=clip, **step_params)
# visualization
vis = deprocess(net, src.data[0])
if not clip: # adjust image contrast if clipping is disabled
vis = vis*(255.0/np.percentile(vis, 99.98))
showarray(vis)
print octave, i, end, vis.shape
clear_output(wait=True)
# extract details produced on the current octave
detail = src.data[0]-octave_base
# returning the resulting image
return deprocess(net, src.data[0])
img = np.float32(PIL.Image.open('/home/andrew/caffe/examples/images/cat.jpg'))
showarray(img)
net.blobs.keys()
frame = img
frame_i = 0
h, w = frame.shape[:2]
s = 0.05 # scale coefficient
for i in xrange(100):
frame = deepdream(net, frame)
PIL.Image.fromarray(np.uint8(frame)).save("frames/%04d.jpg"%frame_i)
frame = nd.affine_transform(frame, [1-s,1-s,1], [h*s/2,w*s/2,0], order=1)
frame_i += 1
Image(filename='frames/0029.jpg')
Does anybody know what's happening? I am using my own data that I successfully trained a model with.
From the deepdream iPython notebook:
net = caffe.Classifier('tmp.prototxt', param_fn,
mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent
channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB
vs your:
net = caffe.Classifier('/home/andrew/caffe/models/bvlc_reference_caffenet/deploy.prototxt', '/home/andrew/caffe/models/bvlc_reference_caffenet/caffenet_train_iter_500.caffemodel', caffe.TEST)
You do not seem to include a mean when you create a caffe.Classifier.
See the definition of caffe.Classifier.
If you don't have a mean, you could probably just remove the mention of mean from preprocess/deprocess:
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1])
def deprocess(net, img):
return np.dstack((img)[::-1])