I want to use TensorFlow's ImageDataGenerator.flow_from_directory() to load my dataset but my output is not a classification but a regression. So I used class_mode=None so no labels are assigned to my data, but now I have to label my training examples and I don't know how (I have my labels as a list). Is there a way around this?
Example code:
labels = [0.75, 21.60, 10.12] # example labels
# load dataset from directory
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
train_data = image_generator.flow_from_directory(batch_size=batch_size, directory=train_x_dir, target_size=(224, 224), class_mode=None, shuffle=False)
# assign labels to training examples
# ???
Since I got no direct answer I assume this can't be done in TF 2.3.
So I referred to a thread mentioned by AerysS, specificaly to answer from user timehaven and used his code to generate batches from pandas dataframe using Keras' load_img and img_to_array. Code was written for Python 2.7 so I made a few changes to port it and it works for me with Python 3.6.8.
data_generator.py
from __future__ import print_function
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
import numpy as np
import pandas as pd
import bcolz
import threading
import os
import sys
import glob
import shutil
bcolz_lock = threading.Lock()
# old_blosc_nthreads = bcolz.blosc_set_nthreads(1)
# assert bcolz.blosc_set_nthreads(1) == 1
def safe_bcolz_open(fname, idx=None, debug=False):
with bcolz_lock:
if idx is None:
X2 = bcolz.open(fname)
else:
X2 = bcolz.open(fname)[idx]
if debug:
df_debug = pd.DataFrame(X2, index=idx)
assert X2.shape[0] == len(idx)
assert X2.shape == df_debug.shape
df_debug = df_debug.astype(int)
test_idx = (df_debug.subtract(df_debug.index.values, axis=0) == 0).all(axis=1)
assert test_idx.all(), df_debug[~test_idx]
return X2
class threadsafe_iter:
def __init__(self, it):
self.it = it
self.lock = threading.Lock()
assert self.lock is not bcolz_lock
def __iter__(self):
return self
def next(self):
with self.lock:
return self.it.next()
def __next__(self):
with self.lock:
return next(self.it)
def threadsafe_generator(f):
def g(*a, **kw):
return threadsafe_iter(f(*a, **kw))
return g
#threadsafe_generator
def generator_from_df(df, batch_size, target_size, features=None,
debug_merged=False):
if features is not None:
assert os.path.exists(features)
assert safe_bcolz_open(features).shape[0] == df.shape[0], "Features rows must match df!"
nbatches, n_skipped_per_epoch = divmod(df.shape[0], batch_size)
count = 1
epoch = 0
# New epoch.
while 1:
df = df.sample(frac=1) # frac=1 is same as shuffling df.
epoch += 1
i, j = 0, batch_size
# Mini-batches within epoch.
mini_batches_completed = 0
for _ in range(nbatches):
sub = df.iloc[i:j]
try:
X = np.array([(2 * (img_to_array(load_img(f, target_size=target_size)) / 255.0 - 0.5)) for f in sub.imgpath])
Y = sub.target.values
if features is None:
mini_batches_completed += 1
yield X, Y
else:
X2 = safe_bcolz_open(features, sub.index.values, debug=debug_merged)
mini_batches_completed += 1
yield [X, X2], Y
except IOError as err:
count -= 1
i = j
j += batch_size
count += 1
train.py
from data_generator import generator_from_df
def construct_dataframe(img_path, labels_path):
data = {}
data['imgpath'] = glob(os.path.join(img_path, '*.png'))
data['target'] = load_labels(labels_path)
return pd.DataFrame(data)
train_df = construct_dataframe(train_x_dir, train_y_dir)
train_generator = generator_from_df(train_df, batch_size, (img_size, img_size))
# load and compile model
# ...
model.fit(train_generator, ...)
Related
https://github.com/safwankdb/Neural-Style-Transfer
I created a model file by running the code in the link above. I then converted it to ONNX to run with C++ and OpenCV.
But while Input (1,3,512,512) in the ONNX file I exported, it is in the form (1,512,28,28) in Output. My export code:
VGG.eval()
torch.save(VGG, 'torchmodel.pth')
dummy_input = Variable(torch.randn(1, 3, 512, 512, device='cuda:1'))
input_names = ['input']
output_names = ['output']
onnxfile='style.onnx'
torch.onnx.export(VGG,dummy_input,onnxfile,verbose=False,input_names=input_names,opset_version=11, output_names=output_names)
I tried export parameters from some sites but it didn't work.
Does anyone have an opinion on this matter?
Original code :
# -*- coding: utf-8 -*-
"""StyleTransfer.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/15JKaqmpVNr8NhURJWgbkvIl1sd0aKS3o
"""
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision as tv
from PIL import Image
import imageio
import numpy as np
from matplotlib import pyplot as plt
to_tensor = tv.transforms.Compose([
tv.transforms.Resize((512,512)),
tv.transforms.ToTensor(),
tv.transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[1, 1, 1]),
])
unload = tv.transforms.Compose([
tv.transforms.Normalize(mean=[-0.485,-0.456,-0.406],
std=[1,1,1]),
tv.transforms.Lambda(lambda x: x.clamp(0,1))
])
to_image = tv.transforms.ToPILImage()
style_img = 'udnie.jpg'
input_img = 'chicago.jpg'
style_img = Image.open(style_img)
input_img = Image.open(input_img)
style_img = to_tensor(style_img).cuda()
input_img = to_tensor(input_img).cuda()
def get_features(module, x, y):
# print('here')
features.append(y)
def gram_matrix(x):
b, c, h, w = x.size()
F = x.view(b,c,h*w)
G = torch.bmm(F, F.transpose(1,2))/(h*w)
return G
VGG = tv.models.vgg19(pretrained=True).features
VGG.cuda()
for i, layer in enumerate(VGG):
if i in [0,5,10,19,21,28]:
VGG[i].register_forward_hook(get_features)
elif isinstance(layer, nn.MaxPool2d):
VGG[i] = nn.AvgPool2d(kernel_size=2)
VGG.eval()
for p in VGG.parameters():
p.requires_grad = False
features = []
VGG(input_img.unsqueeze(0))
c_target = features[4].detach()
features = []
VGG(style_img.unsqueeze(0))
f_targets = features[:4]+features[5:]
gram_targets = [gram_matrix(i).detach() for i in f_targets]
alpha = 1
beta = 1e3
iterations = 200
image = input_img.clone().unsqueeze(0)
# image = torch.randn(1,3,512,512).cuda()
images = []
optimizer = optim.LBFGS([
image.requires_grad_()], lr=1)
mse_loss = nn.MSELoss(reduction='mean')
l_c = []
l_s = []
counter = 0
for itr in range(iterations):
features = []
def closure():
optimizer.zero_grad()
VGG(image)
t_features = features[-6:]
content = t_features[4]
style_features = t_features[:4]+t_features[5:]
t_features = []
gram_styles = [gram_matrix(i) for i in style_features]
c_loss = alpha * mse_loss(content, c_target)
s_loss = 0
for i in range(5):
n_c = gram_styles[i].shape[0]
s_loss += beta * mse_loss(gram_styles[i],gram_targets[i])/(n_c**2)
total_loss = c_loss+s_loss
l_c.append(c_loss)
l_s.append(s_loss)
total_loss.backward()
return total_loss
optimizer.step(closure)
print('Step {}: S_loss: {:.8f} C_loss: {:.8f}'.format(itr, l_s[-1], l_c[-1]))
if itr%1 == 0:
temp = unload(image[0].cpu().detach())
temp = to_image(temp)
temp = np.array(temp)
images.append(temp)
imageio.mimsave('progress.gif', images)
plt.clf()
plt.plot(l_c, label='Content Loss')
plt.legend()
plt.savefig('loss1.png')
plt.clf()
plt.plot(l_s, label='Style Loss')
plt.legend()
plt.savefig('loss2.png')
plt.imsave('last.jpg',images[-1])
The following code
from torch.utils.data import DataLoader
from defect_segmentation.data_loading.DatasetSingleImage import dataset_single_image_default
import numpy as np
if __name__ == "__main__":
import matplotlib.pyplot as plt
def main():
dataset = dataset_single_image_default()
batch_size = 16
shuffle = True
num_workers = 0
loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
for i_batch, sample_batched in enumerate(loader):
fig, axs = plt.subplots(int(np.sqrt(batch_size)), batch_size // int(np.sqrt(batch_size)))
fig.suptitle(f"i_batch = {i_batch}")
for i_sample, ax in zip(range(sample_batched.shape[0]), axs.flat):
ax.set_title(f"Sample #{i_sample}")
ax.axis("off")
ax.imshow(sample_batched[i_sample, :, :])
plt.pause(0.001)
main()
works and outputs figures like so
This is fine.
Problem is by the 10th figure, it becomes very slow to populate the figures. I don't know what can be causing this.
For completeness, here is the code for creating the dataset (striding over a single image):
from torch.utils.data import Dataset
from Utils.ConfigProvider import ConfigProvider
import cv2
import os
from overrides import overrides # pip install overrides
class DatasetSingleImage(Dataset):
def __init__(self, image_path: str, sample_shape: tuple, strides: tuple):
self._path = image_path
assert os.path.isfile(self._path)
self._im = cv2.imread(self._path)
self._shape = self._im.shape
self._rows, self._cols = self._shape[0], self._shape[1]
self._sample_shape = sample_shape
self._sample_rows, self._sample_cols = self._sample_shape[0], self._sample_shape[1]
self._strides = strides
self._stride_rows, self._stride_cols = self._strides[0], self._strides[1]
# self._rows_start_range = range(0, self._rows, self._stride_rows)
# self._cols_start_range = range(0, self._cols, self._stride_cols)
self._rows_tuples_range = \
[(c, min(c + self._sample_rows, self._rows)) for c in range(0, self._rows - self._sample_rows, self._stride_rows)]
self._cols_tuples_range = \
[(r, min(r + self._sample_cols, self._cols)) for r in range(0, self._cols - self._sample_cols, self._stride_cols)]
self._n_strides_rows = len(self._rows_tuples_range)
self._n_strides_cols = len(self._cols_tuples_range)
self._total_strides = self._n_strides_rows * self._n_strides_cols
def __len__(self):
return self._total_strides
#overrides # pip install overrides
def __getitem__(self, ind):
row_ind = ind // self._n_strides_cols
col_ind = ind % self._n_strides_cols
sample_x = self._rows_tuples_range[row_ind]
sample_y = self._cols_tuples_range[col_ind]
sample = self._im[sample_x[0]:sample_x[1], sample_y[0]:sample_y[1]]
assert sample.shape[:2] == self._sample_shape
return sample
def dataset_single_image_default():
path = ConfigProvider.config().data.defective_inspected_path1
sample_shape = (50, 50)
strides = (25, 25)
dataset = DatasetSingleImage(path, sample_shape, strides)
return dataset
What is making the plotting slow, and how to fix it?
I'm following this tutorial.
I'm at the last part where we combine the models in a regression.
I'm coding this in jupyter as follows:
import shutil
import os
import time
from datetime import datetime
import argparse
import pandas
import numpy as np
from tqdm import tqdm
from tqdm import tqdm_notebook
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchsample.transforms import RandomRotate, RandomTranslate, RandomFlip, ToTensor, Compose, RandomAffine
from torchvision import transforms
import torch.nn.functional as F
from tensorboardX import SummaryWriter
import dataloader
from dataloader import MRDataset
import model
from sklearn import metrics
def extract_predictions(task, plane, train=True):
assert task in ['acl', 'meniscus', 'abnormal']
assert plane in ['axial', 'coronal', 'sagittal']
models = os.listdir('models/')
model_name = list(filter(lambda name: task in name and plane in name, models))[0]
model_path = f'models/{model_name}'
mrnet = torch.load(model_path)
_ = mrnet.eval()
train_dataset = MRDataset('data/',
task,
plane,
transform=None,
train=train,
)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=1,
shuffle=False,
num_workers=10,
drop_last=False)
predictions = []
labels = []
with torch.no_grad():
for image, label, _ in tqdm_notebook(train_loader):
logit = mrnet(image.cuda())
prediction = torch.sigmoid(logit)
predictions.append(prediction.item())
labels.append(label.item())
return predictions, labels
task = 'acl'
results = {}
for plane in ['axial', 'coronal', 'sagittal']:
predictions, labels = extract_predictions(task, plane)
results['labels'] = labels
results[plane] = predictions
X = np.zeros((len(predictions), 3))
X[:, 0] = results['axial']
X[:, 1] = results['coronal']
X[:, 2] = results['sagittal']
y = np.array(labels)
logreg = LogisticRegression(solver='lbfgs')
logreg.fit(X, y)
task = 'acl'
results_val = {}
for plane in ['axial', 'coronal', 'sagittal']:
predictions, labels = extract_predictions(task, plane, train=False)
results_val['labels'] = labels
results_val[plane] = predictions
y_pred = logreg.predict_proba(X_val)[:, 1]
metrics.roc_auc_score(y_val, y_pred)
However I get this error:
ValueError Traceback (most recent call last)
<ipython-input-2-979acb314bc5> in <module>
3
4 for plane in ['axial', 'coronal', 'sagittal']:
----> 5 predictions, labels = extract_predictions(task, plane)
6 results['labels'] = labels
7 results[plane] = predictions
<ipython-input-1-647731b6b5c8> in extract_predictions(task, plane, train)
54 logit = mrnet(image.cuda())
55 prediction = torch.sigmoid(logit)
---> 56 predictions.append(prediction.item())
57 labels.append(label.item())
58
ValueError: only one element tensors can be converted to Python scalars
Here's the MRDataset code in case:
class MRDataset(data.Dataset):
def __init__(self, root_dir, task, plane, train=True, transform=None, weights=None):
super().__init__()
self.task = task
self.plane = plane
self.root_dir = root_dir
self.train = train
if self.train:
self.folder_path = self.root_dir + 'train/{0}/'.format(plane)
self.records = pd.read_csv(
self.root_dir + 'train-{0}.csv'.format(task), header=None, names=['id', 'label'])
else:
transform = None
self.folder_path = self.root_dir + 'valid/{0}/'.format(plane)
self.records = pd.read_csv(
self.root_dir + 'valid-{0}.csv'.format(task), header=None, names=['id', 'label'])
self.records['id'] = self.records['id'].map(
lambda i: '0' * (4 - len(str(i))) + str(i))
self.paths = [self.folder_path + filename +
'.npy' for filename in self.records['id'].tolist()]
self.labels = self.records['label'].tolist()
self.transform = transform
if weights is None:
pos = np.sum(self.labels)
neg = len(self.labels) - pos
self.weights = torch.FloatTensor([1, neg / pos])
else:
self.weights = torch.FloatTensor(weights)
def __len__(self):
return len(self.paths)
def __getitem__(self, index):
array = np.load(self.paths[index])
label = self.labels[index]
if label == 1:
label = torch.FloatTensor([[0, 1]])
elif label == 0:
label = torch.FloatTensor([[1, 0]])
if self.transform:
array = self.transform(array)
else:
array = np.stack((array,)*3, axis=1)
array = torch.FloatTensor(array)
# if label.item() == 1:
# weight = np.array([self.weights[1]])
# weight = torch.FloatTensor(weight)
# else:
# weight = np.array([self.weights[0]])
# weight = torch.FloatTensor(weight)
return array, label, self.weights
I've only trained my models using 1 and 2 epochs for each plane of the MRI instead of 35 as in the tutorial, not sure if that has anything to do with it. Other than that I'm stranded as to what this could be? I also removed normalize=False in the options for train_dataset as it kept giving me an error and I read that it could be removed, but I'm not so sure?
Only a tensor that contains a single value can be converted to a scalar with item(), try printing the contents of prediction, I imagine this is a vector of probabilities indicating which label is most likely. Using argmax on prediction will give you your actual predicted label (assuming your labels are 0-n).
I've tried to run this code on Carla's command prompt window but it generates the error NameError: name self is not defined here:
self.model.compile(optimizer='adam', loss=self.conditional_loss,
metrics=[self.conditional_loss,
keras.metrics.sparse_categorical_accuracy],
run_eagerly=True)
But if i remove this then it would generate a warning saying that i should either run this in eagerly mode or graph mode with Keras's model.fit embedded into the code to continue to train the model. Any help would be fine, thanks!
Full Code for reference:
import glob
import os
import sys
import random
import time
import numpy as np
import cv2
import math
from collections import deque
from keras.applications.xception import Xception
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.models import Model
from keras.callbacks import TensorBoard
import tensorflow as tf
import keras.backend.tensorflow_backend as backend
import tensorflow.compat.v1 as tf
from threading import Thread
from tqdm import tqdm
try:
sys.path.append(glob.glob('../carla/dist/carla-*%d.%d-%s.egg' % (
sys.version_info.major,
sys.version_info.minor,
'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
except IndexError:
pass
import carla
SHOW_PREVIEW = False
IM_WIDTH = 640
IM_HEIGHT = 480
SECONDS_PER_EPISODE = 10
REPLAY_MEMORY_SIZE = 5_000
MIN_REPLAY_MEMORY_SIZE = 1_000
MINIBATCH_SIZE = 16
PREDICTION_BATCH_SIZE = 1
TRAINING_BATCH_SIZE = MINIBATCH_SIZE // 4
UPDATE_TARGET_EVERY = 5
MODEL_NAME = "Xception"
MEMORY_FRACTION = 0.4
MIN_REWARD = -200
EPISODES = 100
DISCOUNT = 0.99
epsilon = 1
EPSILON_DECAY = 0.95 ## 0.9975 99975
MIN_EPSILON = 0.001
AGGREGATE_STATS_EVERY = 10
# Own Tensorboard class
class ModifiedTensorBoard(TensorBoard):
# Overriding init to set initial step and writer (we want one log file for all .fit() calls)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.step = 1
# Overriding this method to stop creating default log writer
def set_model(self, model):
pass
# Overrided, saves logs with our step number
# (otherwise every .fit() will start writing from 0th step)
def on_epoch_end(self, epoch, logs=None):
self.update_stats(**logs)
# Overrided
# We train for one batch only, no need to save anything at epoch end
def on_batch_end(self, batch, logs=None):
pass
# Overrided, so won't close writer
def on_train_end(self, _):
pass
# Custom method for saving own metrics
# Creates writer, writes custom metrics and closes writer
def update_stats(self, **stats):
self._write_logs(stats, self.step)
def _write_logs(self, logs, index):
with self.writer.as_default():
for name, value in logs.items():
tf.summary.scalar(name, value, step=index)
self.step += 1
self.writer.flush()
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10)
])
predictions = model(x_train[:1]).numpy()
predictions
tf.nn.softmax(predictions).numpy()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_fn(y_train[:1], predictions).numpy()
model.compile(optimizer='adam',
loss=loss_fn,
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
self.model.compile(optimizer='adam', loss=self.conditional_loss,
metrics=[self.conditional_loss,
keras.metrics.sparse_categorical_accuracy],
run_eagerly=True
)
class CarEnv:
SHOW_CAM = SHOW_PREVIEW
STEER_AMT = 1.0
im_width = IM_WIDTH
im_height = IM_HEIGHT
front_camera = None
def __init__(self):
self.client = carla.Client("localhost", 2000)
self.client.set_timeout(2.0)
self.world = self.client.get_world()
self.blueprint_library = self.world.get_blueprint_library()
self.model_3 = self.blueprint_library.filter("model3")[0]
def reset(self):
self.collision_hist = []
self.actor_list = []
self.transform = random.choice(self.world.get_map().get_spawn_points())
self.vehicle = self.world.spawn_actor(self.model_3, self.transform)
self.actor_list.append(self.vehicle)
self.rgb_cam = self.blueprint_library.find('sensor.camera.rgb')
self.rgb_cam.set_attribute("image_size_x", f"{self.im_width}")
self.rgb_cam.set_attribute("image_size_y", f"{self.im_height}")
self.rgb_cam.set_attribute("fov", f"110")
transform = carla.Transform(carla.Location(x=2.5, z=0.7))
self.sensor = self.world.spawn_actor(self.rgb_cam, transform, attach_to=self.vehicle)
self.actor_list.append(self.sensor)
self.sensor.listen(lambda data: self.process_img(data))
self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
time.sleep(4)
colsensor = self.blueprint_library.find("sensor.other.collision")
self.colsensor = self.world.spawn_actor(colsensor, transform, attach_to=self.vehicle)
self.actor_list.append(self.colsensor)
self.colsensor.listen(lambda event: self.collision_data(event))
while self.front_camera is None:
time.sleep(0.01)
self.episode_start = time.time()
self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
return self.front_camera
def collision_data(self, event):
self.collision_hist.append(event)
def process_img(self, image):
i = np.array(image.raw_data)
#print(i.shape)
i2 = i.reshape((self.im_height, self.im_width, 4))
i3 = i2[:, :, :3]
if self.SHOW_CAM:
cv2.imshow("", i3)
cv2.waitKey(1)
self.front_camera = i3
def step(self, action):
if action == 0:
self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=-1*self.STEER_AMT))
elif action == 1:
self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer= 0))
elif action == 2:
self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=1*self.STEER_AMT))
v = self.vehicle.get_velocity()
kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
if len(self.collision_hist) != 0:
done = True
reward = -200
elif kmh < 50:
done = False
reward = -1
else:
done = False
reward = 1
if self.episode_start + SECONDS_PER_EPISODE < time.time():
done = True
return self.front_camera, reward, done, None
class DQNAgent:
def __init__(self):
self.model = self.create_model()
self.target_model = self.create_model()
self.target_model.set_weights(self.model.get_weights())
self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
self.target_update_counter = 0
self.graph = tf.get_default_graph()
self.terminate = False
self.last_logged_episode = 0
self.training_initialized = False
def create_model(self):
base_model = Xception(weights=None, include_top=False, input_shape=(IM_HEIGHT, IM_WIDTH,3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(3, activation="linear")(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(loss="mse", optimizer=Adam(lr=0.001), metrics=["accuracy"])
return model
def update_replay_memory(self, transition):
# transition = (current_state, action, reward, new_state, done)
self.replay_memory.append(transition)
def train(self):
if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
return
minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
current_states = np.array([transition[0] for transition in minibatch])/255
with self.graph.as_default():
current_qs_list = self.model.predict(current_states, PREDICTION_BATCH_SIZE)
new_current_states = np.array([transition[3] for transition in minibatch])/255
with self.graph.as_default():
future_qs_list = self.target_model.predict(new_current_states, PREDICTION_BATCH_SIZE)
X = []
y = []
for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
if not done:
max_future_q = np.max(future_qs_list[index])
new_q = reward + DISCOUNT * max_future_q
else:
new_q = reward
current_qs = current_qs_list[index]
current_qs[action] = new_q
X.append(current_state)
y.append(current_qs)
log_this_step = False
if self.tensorboard.step > self.last_logged_episode:
log_this_step = True
self.last_log_episode = self.tensorboard.step
with self.graph.as_default():
self.model.fit(np.array(X)/255, np.array(y), batch_size=TRAINING_BATCH_SIZE, verbose=0, shuffle=False, callbacks=[self.tensorboard] if log_this_step else None)
if log_this_step:
self.target_update_counter += 1
if self.target_update_counter > UPDATE_TARGET_EVERY:
self.target_model.set_weights(self.model.get_weights())
self.target_update_counter = 0
def get_qs(self, state):
return self.model.predict(np.array(state).reshape(-1, *state.shape)/255)[0]
def train_in_loop(self):
X = np.random.uniform(size=(1, IM_HEIGHT, IM_WIDTH, 3)).astype(np.float32)
y = np.random.uniform(size=(1, 3)).astype(np.float32)
with self.graph.as_default():
self.model.fit(X,y, verbose=False, batch_size=1)
self.training_initialized = True
while True:
if self.terminate:
return
self.train()
time.sleep(0.01)
if __name__ == '__main__':
FPS = 60
# For stats
ep_rewards = [-200]
# For more repetitive results
random.seed(1)
np.random.seed(1)
tf.set_random_seed(1)
# Memory fraction, used mostly when training multiple agents
#backend.set_session(tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)))
#tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction)
# Create models folder
if not os.path.isdir('models'):
os.makedirs('models')
# Create agent and environment
agent = DQNAgent()
env = CarEnv()
# Start training thread and wait for training to be initialized
trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
trainer_thread.start()
while not agent.training_initialized:
time.sleep(0.01)
# Initialize predictions - forst prediction takes longer as of initialization that has to be done
# It's better to do a first prediction then before we start iterating over episode steps
agent.get_qs(np.ones((env.im_height, env.im_width, 3)))
# Iterate over episodes
for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
#try:
env.collision_hist = []
# Update tensorboard step every episode
agent.tensorboard.step = episode
# Restarting episode - reset episode reward and step number
episode_reward = 0
step = 1
# Reset environment and get initial state
current_state = env.reset()
# Reset flag and start iterating until episode ends
done = False
episode_start = time.time()
# Play for given number of seconds only
while True:
# This part stays mostly the same, the change is to query a model for Q values
if np.random.random() > epsilon:
# Get action from Q table
action = np.argmax(agent.get_qs(current_state))
else:
# Get random action
action = np.random.randint(0, 3)
# This takes no time, so we add a delay matching 60 FPS (prediction above takes longer)
time.sleep(1/FPS)
new_state, reward, done, _ = env.step(action)
# Transform new continous state to new discrete state and count reward
episode_reward += reward
# Every step we update replay memory
agent.update_replay_memory((current_state, action, reward, new_state, done))
current_state = new_state
step += 1
if done:
break
# End of episode - destroy agents
for actor in env.actor_list:
actor.destroy()
# Append episode reward to a list and log stats (every given number of episodes)
ep_rewards.append(episode_reward)
if not episode % AGGREGATE_STATS_EVERY or episode == 1:
average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)
# Save model, but only when min reward is greater or equal a set value
if min_reward >= MIN_REWARD:
agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
# Decay epsilon
if epsilon > MIN_EPSILON:
epsilon *= EPSILON_DECAY
epsilon = max(MIN_EPSILON, epsilon)
# Set termination flag for training thread and wait for it to finish
agent.terminate = True
trainer_thread.join()
agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
I seem to have some issues using the implementation that is provided here.
I am a bit in a similar situation as the guy posting, in which I am trying to map and input to an output. The input being samples of an audio file, and the output being a feature vector with length 14 (length is static). The sequence length is variable as the audio files are in different lengths, making the vector containing the samples become in different lengths as well.
I am not solving a classification problem but a regression, so a bit different task.
My code looks like this:
import tensorflow as tf
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn import rnn
import numpy as np
import librosa
import glob
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join
import os
from os import walk
from os.path import splitext
from os.path import join
import time
rng = np.random
np.set_printoptions(threshold=np.nan)
import functools
start_time = time.time()
print "Preprocessing"
def lazy_property(function):
attribute = '_' + function.__name__
#property
#functools.wraps(function)
def wrapper(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return wrapper
## Class definition ##
class VariableSequenceLabelling:
def __init__(self, data, target, num_hidden=200, num_layers=3):
self.data = data
self.target = target
self._num_hidden = num_hidden
self._num_layers = num_layers
self.prediction
self.error
self.optimize
#lazy_property
def length(self):
used = tf.sign(tf.reduce_max(tf.abs(self.data), reduction_indices=2))
length = tf.reduce_sum(used, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
#lazy_property
def prediction(self):
# Recurrent network.
output, _ = tf.nn.dynamic_rnn(
rnn_cell.GRUCell(self._num_hidden),
self.data,
dtype=tf.float32,
sequence_length=self.length,
)
# Softmax layer.
max_length = int(self.target.get_shape()[1])
num_classes = int(self.target.get_shape()[2])
weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
# Flatten to apply same weights to all time steps.
output = tf.reshape(output, [-1, self._num_hidden])
prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
prediction = tf.reshape(prediction, [-1, max_length, num_classes])
return prediction
#lazy_property
def cost(self):
# Compute cross entropy for each frame.
cross_entropy = self.target * tf.log(self.prediction)
cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
cross_entropy *= mask
# Average over actual sequence lengths.
cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1)
cross_entropy /= tf.cast(self.length, tf.float32)
return tf.reduce_mean(cross_entropy)
#lazy_property
def optimize(self):
learning_rate = 0.0003
optimizer = tf.train.AdamOptimizer(learning_rate)
return optimizer.minimize(self.cost)
#lazy_property
def error(self):
mistakes = tf.not_equal(
tf.argmax(self.target, 2), tf.argmax(self.prediction, 2))
mistakes = tf.cast(mistakes, tf.float32)
mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
mistakes *= mask
# Average over actual sequence lengths.
mistakes = tf.reduce_sum(mistakes, reduction_indices=1)
mistakes /= tf.cast(self.length, tf.float32)
return tf.reduce_mean(mistakes)
#staticmethod
def _weight_and_bias(in_size, out_size):
weight = tf.truncated_normal([in_size, out_size], stddev=0.01)
bias = tf.constant(0.1, shape=[out_size])
return tf.Variable(weight), tf.Variable(bias)
#######################
#Converting file to .wav from .sph file format... God dammit!!!
#with open(train_filelist, 'r') as train_filelist, open(test_filelist, 'r') as test_filelist:
#train_mylist = train_filelist.read().splitlines()
#test_mylist = test_filelist.read().splitlines()
#for line in train_mylist:
#new_line = ' '.join(reversed(line))
#index_start = new_line.find('h')
#index_end = new_line.find('/')
#edited_line = ''.join(reversed(new_line[index_start+5:index_end])).strip().replace(" ","")
#new_file = edited_line + 'wav'
#os.system(line + ' >> ' + dnn_train + new_file)
#for line in test_mylist:
#new_line = ' '.join(reversed(line))
#index_start = new_line.find('h')
#index_end = new_line.find('/')
#edited_line = ''.join(reversed(new_line[index_start+5:index_end])).strip().replace(" ","")
#new_file = edited_line + 'wav'
#os.system(line + ' >> ' + dnn_test + new_file)
path_train = "/home/JoeS/kaldi-trunk/egs/start/s5/data/train"
path_test = "/home/JoeS/kaldi-trunk/egs/start/s5/data/test"
dnn_train = "/home/JoeS/kaldi-trunk/dnn/train/"
dnn_test = "/home/JoeS/kaldi-trunk/dnn/test/"
dnn = "/home/JoeS/kaldi-trunk/dnn/"
path = "/home/JoeS/kaldi-trunk/egs/start/s5/data/"
MFCC_dir = "/home/JoeS/kaldi-trunk/egs/start/s5/mfcc/raw_mfcc_train.txt"
train_filelist = path_train+"/wav_train.txt"
test_filelist = path_test+"/wav_test.txt"
os.chdir(path)
def find_all(a_str, sub):
start = 0
while True:
start = a_str.find(sub, start)
if start == -1: return
yield start
start += len(sub) # use start += 1 to find overlapping matches
def load_sound_files(file_paths , names_input, data_input):
raw_sounds = []
names_output = []
data_output = []
class_output = []
for fp in file_paths:
X,sr = librosa.load(fp)
raw_sounds.append(X)
index = list(find_all(fp,'-'))
input_index = names_input.index(fp[index[1]+1:index[2]])
names_output.append(names_input[input_index])
data_output.append(data_input[input_index])
class_output.append(binify(data_input[input_index][0]))
return raw_sounds, names_output, data_output, class_output
def generate_list_of_names_data(file_path):
# Proprocess
# extract name and data
name = []
data = []
with open(MFCC_dir) as mfcc_feature_list:
content = [x.strip('\n') for x in mfcc_feature_list.readlines()] # remove endlines
start_index_data = 0
end_index_data = 2
for number in range(0,42):
start = list(find_all(content[start_index_data],'['))[0]
end = list(find_all(content[end_index_data],']'))[0]
end_name = list(find_all(content[start_index_data],' '))[0]
substring_data = content[start_index_data][start+1 :]+content[end_index_data][: end]
substring_name = content[start_index_data][:end_name]
arr = np.array(substring_data.split(), dtype = float)
data.append(arr)
name.append(substring_name)
start_index_data = start_index_data + +3
end_index_data = end_index_data +3
return name, data
files_train_path = [dnn_train+f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_path = [dnn_test+f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
files_train_name = [f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_name = [f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
os.chdir(dnn_train)
train_name,train_data = generate_list_of_names_data(files_train_path)
train_data, train_names, train_output_data, train_class_output = load_sound_files(files_train_path,train_name,train_data)
max_length = 0 ## Used for variable sequence input
for element in train_data:
if element.size > max_length:
max_length = element.size
NUM_EXAMPLES = len(train_data)/2
test_data = train_data[NUM_EXAMPLES:]
test_output = train_output_data[NUM_EXAMPLES:]
train_data = train_data[:NUM_EXAMPLES]
train_output = train_output_data[:NUM_EXAMPLES]
print("--- %s seconds ---" % (time.time() - start_time))
##-------------------MAIN----------------------------##
if __name__ == '__main__':
data = tf.placeholder(tf.float32, [None, max_length, 1])
target = tf.placeholder(tf.float32, [None, 14, 1])
model = VariableSequenceLabelling(data, target)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for epoch in range(10):
for sample_set in range(100):
batch_train = train_data[sample_set]
batch_target = train_output[sample_set]
sess.run(model.optimize, {data: batch_train, target: batch_target})
test_set = test_data[epoch]
test_set_output = test_output[epoch]
error = sess.run(model.error, {data: test_set, target: test_set_output})
print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
And the error message is
Traceback (most recent call last):
File "tensorflow_datapreprocess_mfcc_extraction_rnn.py", line 239, in <module>
sess.run(model.optimize, {data: batch_train, target: batch_target})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 340, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 553, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (63945,) for Tensor u'Placeholder:0', which has shape '(?, 138915, 1)'
The error message I receive, as I understand is due to the use of max_length, and getting an input that does not have the proper size - meaning that the input isn't being zero padded properly?.. Or am I wrong? If so how do I fix it? The solution I seek doesn't seem to come natively from tensorflow, does other framework do this natively - and would it be recommendable to use a different one due to the missing function?
The shapes of Tensorflow placeholders must match the shapes of data that is fed to them. Here, you're trying to feed a [63945] tensor to a [?, 138915, 1] shaped placeholder. These are incompatible shapes and Tensorflow complains.
You must pad the input tensor using numpy to the desired shape before feeding it to Tensorflow. I suggest using numpy.pad. (Also note the number of dimensions must match --- you can use numpy.reshape to fix that, or change the placeholder shape and use a Tensorflow reshape.)
When working with long sequences, often padding to a common sequence length can cause memory problems. The standard workaround for that is to bucket sequences into buckets of similar lengths. The Tensorflow seq2seq example might be a useful source of inspiration: https://www.tensorflow.org/versions/r0.11/tutorials/seq2seq/index.html