I'm trying to define a model function for MCMC.
The idea is to have a mixture of two distributions controlled with a probability ratio.
One of my attempts would look like this:
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
root = tfd.JointDistributionCoroutine.Root
def model_fn():
rv_p = yield root(tfd.Sample(tfd.Uniform(0.0,1.0),1))
catprobs = tf.stack([rv_p, 1.-rv_p],0)
rv_cat = tfd.Categorical(probs=catprobs)
rv_norm1 = tfd.Sample(tfd.Normal(0.0,1.0),1)
rv_norm2 = tfd.Sample(tfd.Normal(3.0,1.0),1)
rv_mix = yield tfd.Mixture(cat=rv_cat,
components=[
rv_norm1,
rv_norm2,
])
jd = tfd.JointDistributionCoroutine(model_fn)
jd.sample(2)
The code fails with:
ValueError: components[0] batch shape must be compatible with cat shape and other component batch shapes ((2, 2) vs ())
Could you give me an example of how to use Mixture distribution in a way that allows "any" shape of inputs?
I'm using tensorflow 2.4.1 and tensorflow_probability 0.12.1 with python 3.6
I figured it out. For reference here is a sample code:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
tfd = tfp.distributions
tfb = tfp.bijectors
import numpy as np
from time import time
numdata = 10000
data = np.random.normal(0.0,1.0,numdata).astype(np.float32)
data[int(numdata/2):] = 0.0
_=plt.hist(data,30,density=True)
root = tfd.JointDistributionCoroutine.Root
def dist_fn(rv_p,rv_mu):
rv_cat = tfd.Categorical(probs=tf.stack([rv_p, 1.-rv_p],-1))
rv_norm = tfd.Normal(rv_mu,1.0)
rv_zero = tfd.Deterministic(tf.zeros_like(rv_mu))
rv_mix = tfd.Independent(
tfd.Mixture(cat=rv_cat,
components=[rv_norm,rv_zero]),
reinterpreted_batch_ndims=1)
return rv_mix
def model_fn():
rv_p = yield root(tfd.Sample(tfd.Uniform(0.0,1.0),1))
rv_mu = yield root(tfd.Sample(tfd.Uniform(-1.,1. ),1))
rv_mix = yield dist_fn(rv_p,rv_mu)
jd = tfd.JointDistributionCoroutine(model_fn)
unnormalized_posterior_log_prob = lambda *args: jd.log_prob(args + (data,))
n_chains = 1
p_init = [0.3]
p_init = tf.cast(p_init,dtype=tf.float32)
mu_init = 0.1
mu_init = tf.stack([mu_init]*n_chains,axis=0)
initial_chain_state = [
p_init,
mu_init,
]
bijectors = [
tfb.Sigmoid(), # p
tfb.Identity(), # mu
]
step_size = 0.01
num_results = 50000
num_burnin_steps = 50000
kernel=tfp.mcmc.TransformedTransitionKernel(
inner_kernel=tfp.mcmc.HamiltonianMonteCarlo(
target_log_prob_fn=unnormalized_posterior_log_prob,
num_leapfrog_steps=2,
step_size=step_size,
state_gradients_are_stopped=True),
bijector=bijectors)
kernel = tfp.mcmc.SimpleStepSizeAdaptation(
inner_kernel=kernel, num_adaptation_steps=int(num_burnin_steps * 0.8))
#XLA optim
#tf.function(autograph=False, experimental_compile=True)
def graph_sample_chain(*args, **kwargs):
return tfp.mcmc.sample_chain(*args, **kwargs)
st = time()
trace,stats = graph_sample_chain(
num_results=num_results,
num_burnin_steps=num_burnin_steps,
current_state=initial_chain_state,
kernel=kernel)
et = time()
print(et-st)
ptrace, mutrace = trace
plt.subplot(121)
_=plt.hist(ptrace.numpy(),100,density=True)
plt.subplot(122)
_=plt.hist(mutrace.numpy(),100,density=True)
print(np.mean(ptrace),np.mean(mutrace))
Related
I am new to machine learning and python at a same time.
I am trying to run this piece of code.
# note that this custom dataset is not prepared on the top of geometric Dataset(pytorch's inbuilt)
import os
import torch
import glob
import numpy as np
import random
import math
from os import listdir
from os.path import isfile, join
processed_dir="../Human_features/processed/"
npy_file = "./Human_features/npy_file_new(human_dataset).npy"
npy_ar = np.load(npy_file)
print(npy_ar.shape)
from torch.utils.data import Dataset as Dataset_n
from torch_geometric.data import DataLoader as DataLoader_n
class LabelledDataset(Dataset_n):
def __init__(self, npy_file, processed_dir):
self.npy_ar = np.load(npy_file)
self.processed_dir = processed_dir
self.protein_1 = self.npy_ar[:,2]
self.protein_2 = self.npy_ar[:,5]
self.label = self.npy_ar[:,6].astype(float)
self.n_samples = self.npy_ar.shape[0]
def __len__(self):
return(self.n_samples)
def __getitem__(self, index):
prot_1 = os.path.join(self.processed_dir, self.protein_1[index]+".pt")
prot_2 = os.path.join(self.processed_dir, self.protein_2[index]+".pt")
#print(f'Second prot is {prot_2}')
prot_1 = torch.load(glob.glob(prot_1)[0])
#print(f'Here lies {glob.glob(prot_2)}')
prot_2 = torch.load(glob.glob(prot_2)[0])
return prot_1, prot_2, torch.tensor(self.label[index])
dataset = LabelledDataset(npy_file = npy_file ,processed_dir= processed_dir)
final_pairs = np.load(npy_file)
size = final_pairs.shape[0]
print("Size is : ")
print(size)
seed = 42
torch.manual_seed(seed)
#print(math.floor(0.8 * size))
#Make iterables using dataloader class
trainset, testset = torch.utils.data.random_split(dataset, [math.floor(0.8 * size), size - math.floor(0.8 * size) ])
print(trainset[0])
trainloader = DataLoader_n(dataset= trainset, batch_size= 4, num_workers = 0)
testloader = DataLoader_n(dataset= testset, batch_size= 4, num_workers = 0)
print("Length")
print(len(trainloader))
print(len(testloader))
and when I run this code I get this error:
There is error in dataLoader of this line:
from torch_geometric.data import DataLoader as DataLoader_n
I installed pytorch with
and pytorch geometric with
How can I solve this?
My python version is:
3.9.0
Thank you
Iam trying to implement DDPG algorithm that take a state of 8 values and output action of size=4.
The actions are lower bounded by [5,5,0,0] and upper bounded by [40,40,15,15].
When I train my DDPG it always choose one of the boundaries for example [5,40,0,15] or [40,40,0,0].
I implemented SAC algorithm after that and it works, knowing that I tried my DDPG agent on a gym game and it works. Maybe the problem is with upscaling the actions in the policy agent.
here have a look on the model I have
class Buffers:
def __init__(self, buffer_capacity=100000, batch_size=64):
# Number of "experiences" to store at max
self.buffer_capacity = buffer_capacity
num_states = 8
num_actions = 4
# Num of tuples to train on.
self.batch_size = batch_size
# Its tells us num of times record() was called.
self.buffer_counter = 0
# Instead of list of tuples as the exp.replay concept go
# We use different np.arrays for each tuple element
self.state_buffer = np.zeros((self.buffer_capacity, num_states))
self.action_buffer = np.zeros((self.buffer_capacity, num_actions))
self.reward_buffer = np.zeros((self.buffer_capacity, 1))
self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
# Takes (s,a,r,s') obervation tuple as input
def record(self, obs_tuple):
# Set index to zero if buffer_capacity is exceeded,
# replacing old records
index = self.buffer_counter % self.buffer_capacity
self.state_buffer[index] = obs_tuple[0]
self.action_buffer[index] = obs_tuple[1]
self.reward_buffer[index] = obs_tuple[2]
self.next_state_buffer[index] = obs_tuple[3]
self.buffer_counter += 1
import random
import numpy as np
from collections import deque
import tensorflow as tf
from keras.models import Sequential
from keras.callbacks import History
from keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from keras import backend as K
from tensorflow.keras import layers
import keras.backend as K
import import_ipynb
from Noise import OUActionNoise
import tensorflow as tf
keras = tf.keras
#tf.compat.v1.disable_eager_execution()
class DQLearningAgent:
def __init__(self, seed ,discount_factor =0.95):
self.tau = 0.05
self.gamma = discount_factor
self.critic_lr = 0.002
self.actor_lr = 0.001
self.std_dev = [0.7,0.7,0.2,0.2]
self.buffer = Buffers(50000, 64)
self.M = 16
self.upper_bound = [40,40,self.M-1 ,self.M-1 ]
self.lower_bound = [5,5,0,0]
self.action_scale = (np.array(self.upper_bound) - np.array(self.lower_bound)) / 2.0
self.action_bias = (np.array(self.upper_bound) + np.array(self.lower_bound)) / 2.0
self._state_size = 8 # unchange
self._action_size = 4
self.seed = seed
# random.seed(self.seed)
# np.random.seed(self.seed)
self.actor_model = self.get_actor()
self.critic_model = self.get_critic()
self.target_actor = self.get_actor()
self.target_critic = self.get_critic()
# Making the weights equal initially
self.target_actor.set_weights(self.actor_model.get_weights())
self.target_critic.set_weights(self.critic_model.get_weights())
self.critic_optimizer = tf.keras.optimizers.Adam(self.critic_lr)
self.actor_optimizer = tf.keras.optimizers.Adam(self.actor_lr)
self.ou_noise = OUActionNoise(mean=np.zeros(self._action_size ), std_deviation=np.array(self.std_dev))
def get_actor(self):
# Initialize weights between -3e-3 and 3-e3
last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
inputs = layers.Input(shape=(self._state_size,))
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(inputs)
# out = layers.Dense(28,activation=keras.layers.LeakyReLU(alpha=0.01))(out)
# out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
outputs = layers.Dense(self._action_size, activation="tanh", kernel_initializer=last_init)(out)
def antirectifier(x):
outputs = self.action_scale*x + self.action_bias
return outputs
outputs = layers.Lambda(antirectifier )(outputs)
model = tf.keras.Model(inputs, outputs)
return model
def get_critic(self):
# State as input
state_input = layers.Input(shape=(self._state_size))
# state_out = layers.Dense(28, activation="relu")(state_input)
# Action as input
action_input = layers.Input(shape=(self._action_size))
# action_out = layers.Dense(16, activation="relu")(action_input)
# Both are passed through seperate layer before concatenating
concat = layers.Concatenate()([state_input, action_input])
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(concat)
# out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
# out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
outputs = layers.Dense(1)(out)
# Outputs single value for give state-action
model = tf.keras.Model([state_input, action_input], outputs)
return model
def learn(self):
# Get sampling range
record_range = min(self.buffer.buffer_counter, self.buffer.buffer_capacity)
# Randomly sample indices
batch_indices = np.random.choice(record_range, self.buffer.batch_size)
# print(self.buffer.action_buffer[batch_indices].shape)
# Convert to tensors
state_batch = tf.convert_to_tensor(self.buffer.state_buffer[batch_indices])
action_batch = tf.convert_to_tensor(self.buffer.action_buffer[batch_indices])
reward_batch = tf.convert_to_tensor(self.buffer.reward_buffer[batch_indices])
reward_batch = tf.cast(reward_batch, dtype=tf.float32)
next_state_batch = tf.convert_to_tensor(self.buffer.next_state_buffer[batch_indices])
return self.update(state_batch, action_batch, reward_batch, next_state_batch)
def update(self, state_batch, action_batch, reward_batch, next_state_batch):
with tf.GradientTape() as tape:
target_actions_new = self.target_actor(next_state_batch)
y = reward_batch + self.gamma * self.target_critic([next_state_batch,target_actions_new])
q = self.critic_model([state_batch,action_batch])
critic_loss = tf.math.reduce_mean(tf.math.square(y - q))
critic_grad = tape.gradient(critic_loss, self.critic_model.trainable_variables)
self.critic_optimizer.apply_gradients( zip(critic_grad, self.critic_model.trainable_variables))
with tf.GradientTape() as tape:
actions = self.actor_model(state_batch)
critic_value = self.critic_model([state_batch , actions])
actor_loss = -tf.math.reduce_mean(critic_value)
actor_grad = tape.gradient(actor_loss , self.actor_model.trainable_variables)
self.actor_optimizer.apply_gradients( zip(actor_grad, self.actor_model.trainable_variables))
self.update_target(self.target_actor.variables , self.actor_model.variables)
self.update_target(self.target_critic.variables , self.critic_model.variables)
return actor_loss,critic_loss
def update_target(self,target_weights, weights):
for (a, b) in zip(target_weights, weights):
a.assign(b * self.tau + a * (1 - self.tau))
def policy(self,state):
sampled_actions = tf.squeeze(self.actor_model(state))
noise = self.ou_noise()
# Adding noise to action
sampled_actions = sampled_actions.numpy() + noise
# We make sure action is within bounds
legal_action = np.clip(sampled_actions, self.lower_bound, self.upper_bound)
return [np.squeeze(legal_action)]
Hi I'm trying to train MNIST classifier with SVM(SVC), sci-kit learn(sklearn).
But my training runs endlessly.. What should I do?
I tried changing parameters of SVC but I'm not sure about what I'm doing
And It doesn't work...
The number of training data is 60,000
Please help me
import os
import struct
import numpy as np
import matplotlib .pyplot as plt
from sklearn.svm import SVC
from google.colab import drive
drive.mount('/content/gdrive')
def read(dataset = "training", path="."):
if dataset is "training":
fname_img = os.path.join(path, 'train-images-idx3-ubyte.idx3-ubyte')
fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte.idx1-ubyte')
elif dataset is "testing":
fname_img = os.path.join(path, 't10k-images-idx3-ubyte.idx3-ubyte')
fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte.idx1-ubyte')
else:
raise Exception("dataset must be 'testing' or 'training'")
with open(fname_lbl, 'rb') as flbl:
magic, num = struct.unpack(">II", flbl.read(8))
lbl = np.fromfile(flbl, dtype=np.int8)
with open(fname_img, 'rb') as fimg:
magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
img = np.fromfile(fimg, dtype=np.uint8).reshape(len(lbl), rows, cols)
get_img = lambda idx: (lbl[idx], img[idx])
# Create an iterator which returns each image in turn
for i in range(len(lbl)):
yield get_img(i)
tr = list(read("training", "/content/gdrive/My Drive/ColabNotebooks/MNIST"))
tst = list(read("testing", "/content/gdrive/My Drive/ColabNotebooks/MNIST"))
def seperate(data):
labels =[]
images =[]
for i in data:
labels.append(int(i[0]))
images.append(i[1])
return {"labels":labels, "images":images}
train = seperate(tr)
test = seperate(tst)
clf = SVC(kernel = 'linear', cache_size = 6000, gamma = 0.001, C = 100)
train_len = len(tr)
train_Array = np.array(train["images"]).reshape(train_len, -1)
clf.fit(train_Array, train["labels"])
I am new to TensorFlow, I just started learning and understanding it.
I am working on neural style transfer problem and I am using tensorflow version 1.14.
I am getting an error loss passed to Optimizer.compute_gradients should be a function when eager execution is enabled.
I tried to solve the problem by using TensorFlow graph instead of eager execution, but it's not working. I want to use eager execution because it looks like a more pythonic way.
here is my code, sorry for putting whole code here, please suggest corrections in my code.
import scipy
import tensorflow as tf
import tensorflow.contrib.eager as tfe
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from scipy import misc
from skimage.transform import resize
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from tensorflow.keras import backend as K
tf.enable_eager_execution()
print('Eager execution {}'.format(tf.executing_eagerly()))
content_path = '800px-Green_Sea_Turtle_grazing_seagrass.jpg'
style_path = '800px-The_Great_Wave_off_Kanagawa.jpg'
content_img = plt.imread(content_path)
plt.imshow(content_img)
style_img = plt.imread(style_path)
plt.imshow(style_img)
MEANS = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
content_img = resize(content_img, (552,800,3)) #resized content img because style img has shape (552,800,3)
content_img = np.array(content_img)
content_img = np.reshape(content_img, ((1,)+content_img.shape))
style_img = np.array(style_img)
style_img = np.reshape(style_img, ((1,)+style_img.shape))
noise_img= np.random.uniform(-20,20,(1,552,800,3)).astype('float32')
generated_img = noise_img*0.6 + content_img*0.4
plt.imshow(generated_img[0])
content_img = content_img-MEANS
style_img = style_img-MEANS
model = VGG19(include_top=False, weights='imagenet')
def compute_content_cost(act_content_img, act_generated_img):
return tf.reduce_mean(tf.square(act_content_img-act_generated_img))
def gram_matrix(A):
gram = tf.matmul(A, tf.transpose(A))
return gram
def style_loss_one_layer(act_style_img, act_generated_img):
m,n_H,n_W,n_C = tf.shape(act_generated_img) #act_generated_img.get_shape().as_list()
gram_act_style_img = gram_matrix(act_style_img)
gram_generated_img = gram_matrix(act_generated_img)
return tf.reduce_mean(tf.square(gram_act_style_img-gram_generated_img))*(1/(4*n_C**2*(n_H*n_W)**2))
content_layer = ['block5_conv2']
style_layers = [('block1_conv1',0.2),
('block2_conv1',0.2),
('block3_conv1',0.2),
('block4_conv1',0.2),
('block5_conv1',0.2)]
def compute_style_cost(model, style_layers):
style_cost = total_style_cost = 0
for layer, coeff in style_layers:
act_style_img = model.get_layer(layer).output
act_generated_img = model.get_layer(layer).output
style_cost += style_loss_one_layer(act_style_img, act_generated_img)
total_style_cost += coeff*style_cost
return total_style_cost
def compute_total_cost(J_content, J_style, alpha=10, beta=40):
J = (alpha*tf.cast(J_content, tf.float64)) + (beta*J_style)
return J
act_generated_img = model.get_layer('block5_conv2').output
act_content_img = model.get_layer('block5_conv2').output
J_content = compute_content_cost(act_content_img=act_content_img, act_generated_img=act_generated_img)
print(J_content)
J_style = compute_style_cost(model, style_layers=style_layers)
print(J_style)
J_total_cost = compute_total_cost(J_content, J_style, alpha=10, beta=40)
print(J_total_cost)
optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(J_total_cost) #**getting error here**
The above error is mainly caused when you are trying to use TensorFlow 1.x but the system is running tensor 2.0.
Initialise the TensorFlow using the code below to ensure you are trying to use the version 1.0
import tensorflow.compat.v1 as tf
You can make the system disable that behaviour by the below command after the initialisers.
tf.disable_v2_behavior()
Here, I have LSTM Autoencoder written in Keras. I want to convert the code to Chainer.
import numpy as np
from keras.layers import Input, GRU
from keras.models import Model
input_feat = Input(shape=(30, 2000))
l = GRU( 100, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(input_feat)
l = GRU(2000, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(l)
model = Model(input_feat, l)
model.compile(optimizer="RMSprop", loss="mean_squared_error")
feat = np.load("feat.npy")
model.fit(feat, feat[:, ::-1, :], epochs=200, batch_size=250)
feat is numpy whose dimension is (269, 30, 2000). I could run above code and the result was reasonable. I had written below Chainer code.
import numpy as np
from chainer import Chain, Variable, optimizers
import chainer.functions as F
import chainer.links as L
class GRUAutoEncoder(Chain):
def __init__(self):
super().__init__()
with self.init_scope():
self.encode = L.GRU(2000, 100)
self.decode = L.GRU(100, 2000)
def __call__(self, h, mode):
if mode == "encode":
h = F.tanh(self.encode(h))
return h
if mode == "decode":
h = F.tanh(self.decode(h))
return h
def reset(self):
self.encode.reset_state()
self.decode.reset_state()
def main():
feat = np.load("feat.npy") #(269, 30, 2000)
gru_autoencoder = GRUAutoEncoder()
optimizer = optimizers.RMSprop(lr=0.01).setup(gru_autoencoder)
N = len(feat)
batch_size = 250
for epoch in range(200):
index = np.random.randint(0, N-batch_size+1)
input_splices = feat[index:index+batch_size] #(250, 30, 2000)
#Encoding
input_vector = np.zeros((30, batch_size, 2000), dtype="float32")
h = []
for i in range(frame_rate):
input_vector[i] = input_splices[:, i, :] #(250, 1, 2000)
tmp = Variable(input_vector[i])
h.append(gru_autoencoder(tmp, "encode")) #(250, 100)
#Decoding
output_vector = []
for i in range(frame_rate):
tmp = h[i]
output_vector.append(gru_autoencoder(tmp, "decode"))
x = input_vector[0]
t = output_vector[0]
for i in range(len(output_vector)):
x = F.concat((x,input_vector[i]), axis=1)
t = F.concat((t,output_vector[i]), axis=1)
loss = F.mean_squared_error(x, t)
gru_autoencoder.cleargrads()
loss.backward()
optimizer.update()
gru_autoencoder.reset()
if __name__ == "__main__":
main()
But the result of above code was not reasonable. I think the Chainer code has something wrong but I cannot find where it is.
In Keras code,
model.fit(feat, feat[:, ::-1, :])
So, I tried to reverse output_vector in Chainer code,
output_vector.reverse()
but the result was not still reasonable.
.. note: This answer is a translation of [Japanese SO].(https://ja.stackoverflow.com/questions/52162/keras%E3%81%AE%E3%82%B3%E3%83%BC%E3%83%89%E3%82%92chainer%E3%81%AB%E6%9B%B8%E3%81%8D%E6%8F%9B%E3%81%88%E3%81%9F%E3%81%84lstm-autoencoder%E3%81%AE%E5%AE%9F%E8%A3%85/52213#52213)
You should avoid using L.GRU and should use L.NStepGRU, because for L.GRU you have to write "recurrence-aware" code. In other words, you have to apply L.GRU multiple times to one timeseries, therefore "batch" must be treated with great care. L.NStepGRU (with n_layers=1) wraps the batch-processing, so it would be user-friendly.
An instance of L.StepGRU takes two input arguments: one is initial state, and the other is a list of timeserieses, which composes a batch. Conventionally, the initial state is None.
Therefore, the whole answer for your question is as follows.
### dataset.py
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
N_SAMPLES = 269
N_TIMESERIES = 30
N_DIMS = 2000
def __init__(self):
super().__init__()
self.data = np.random.randn(self.N_SAMPLES, self.N_TIMESERIES, self.N_DIMS) \
.astype(np.float32)
def __len__(self):
return self.N_SAMPLES
def get_example(self, i):
return self.data[i, :, :]
### model.py
import chainer
from chainer import links as L
from chainer import functions as F
from chainer.link import Chain
class MyModel(Chain):
N_IN_CHANNEL = 2000
N_HIDDEN_CHANNEL = 100
N_OUT_CHANNEL = 2000
def __init__(self):
super().__init__()
self.encoder = L.NStepGRU(n_layers=1, in_size=self.N_IN_CHANNEL, out_size=self.N_HIDDEN_CHANNEL, dropout=0)
self.decoder = L.NStepGRU(n_layers=1, in_size=self.N_HIDDEN_CHANNEL, out_size=self.N_OUT_CHANNEL, dropout=0)
def to_gpu(self, device=None):
self.encoder.to_gpu(device)
self.decoder.to_gpu(device)
def to_cpu(self):
self.encoder.to_cpu()
self.decoder.to_cpu()
#staticmethod
def flip_list(source_list):
return [F.flip(source, axis=1) for source in source_list]
def __call__(self, source_list):
"""
.. note:
This implementation makes use of "auto-encoding"
by avoiding redundant copy in GPU device.
In the typical implementation, this function should receive
both of ``source_list`` and ``target_list``.
"""
target_list = self.flip_list(source_list)
_, h_list = self.encoder(hx=None, xs=source_list)
_, predicted_list = self.decoder(hx=None, xs=h_list)
diff_list = [F.mean_squared_error(target, predicted).reshape((1,)) for target, predicted in zip(target_list, predicted_list)]
loss = F.sum(F.concat(diff_list, axis=0))
chainer.report({'loss': loss}, self)
return loss
### converter.py (referring examples/seq2seq/seq2seq.py)
from chainer.dataset import to_device
def convert(batch, device):
"""
.. note:
batch must be list(batch_size) of array
"""
if device is None:
return batch
else:
return [to_device(device, x) for x in batch]
### train.py
from chainer.iterators import SerialIterator
from chainer.optimizers import RMSprop
from chainer.training.updaters import StandardUpdater
from chainer.training.trainer import Trainer
dataset = MyDataset()
BATCH_SIZE = 32
iterator = SerialIterator(dataset, BATCH_SIZE)
model = MyModel()
optimizer = RMSprop()
optimizer.setup(model)
updater = StandardUpdater(iterator, optimizer, convert, device=0)
trainer = Trainer(updater, (100, 'iteration'))
from chainer.training.extensions import snapshot_object
trainer.extend(snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=(10, 'iteration'))
from chainer.training.extensions import LogReport, PrintReport, ProgressBar
trainer.extend(LogReport(['epoch', 'iteration', 'main/loss'], (1, 'iteration')))
trainer.extend(PrintReport(['epoch', 'iteration', 'main/loss']), trigger=(1, 'iteration'))
trainer.extend(ProgressBar(update_interval=1))
trainer.run()