Related
I'm trying to figure out what is the difference between using a pretrained model from tensorflow hub versus using the very same architecture from tf.keras.applications. I've tried training 2 models with the same architecture - one from tf hub, an the other one from tf.keras.applications, which should yeild comparable results, however the results are vastly different. Could you please explain the difference?
Here are examples of the two models.
base_model = tf.keras.applications.EfficientNetB0(include_top = False)
base_model.trainable = False
inputs = Input(shape = (224,224,3), name = 'input_layer')
x = base_model(inputs, training = False)
x = GlobalAveragePooling2D(name = 'global_avg_pool_layer')(x)
outputs = Dense(len(class_names), activation = 'softmax', name = 'output_layer')(x)
model_1 = tf.keras.Model(inputs, outputs)
model_1.compile(loss = 'categorical_crossentropy', optimizer = Adam(), metrics = ['accuracy'])
history_1 = model_1.fit(train_data_all_10_percent,
epochs = 10,
validation_data = test_data,
validation_steps = (0.15 * len(test_data)))
AND THE SECOND
efficientnet_url = 'https://tfhub.dev/tensorflow/efficientnet/b0/feature-vector/1'
def create_model(model_url, num_classes = 10):
feature_extractor_layer = hub.KerasLayer(model_url, trainable = False, name = 'feature_extraction_layer', input_shape = IMG_SIZE + (3,))
model = Sequential([
feature_extractor_layer,
Dense(len(class_names), activation = 'softmax', name = 'output_layer')
])
return model
efficientnet_model = create_model(efficientnet_model , num_classes = len(class_names))
efficientnet_model .compile(loss = 'categorical_crossentropy', optimizer = Adam(), metrics = ['accuracy'])
efficientnet_history = efficientnet_model .fit(train_data_all_10_percent,
epochs = 10,
validation_data = test_data,
validation_steps = 0.15 * len(test_data))
I am interested in multi class segmentation of skin tissues, I have 3000 skin tissue labels classified into 4 classes, I have created a CNN classification algorithm to train my classification model. I would like to use the classification model for segmentation task of new skin tissue image and perform feature extraction of the skin tissue belonging to each of the class
Following is the code that is written to train my classification model
from tensorflow.keras.layers import Input, Concatenate, Dropout, Flatten, Dense, GlobalAveragePooling2D, Conv2D
from tensorflow.keras import backend as K
#from tensorflow.keras.utils import np_utils
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers
from tensorflow.keras.metrics import top_k_categorical_accuracy
from tensorflow.keras.models import Sequential, Model, load_model
import tensorflow as tf
from tensorflow.keras.initializers import he_uniform
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping, CSVLogger, ReduceLROnPlateau
#from tensorflow.compat.keras.backend import KTF
#import keras.backend.tensorflow_backend as KTF
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.inception_v3 import InceptionV3
import os
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
#import numpy as np, Pillow, skimage, imageio, matplotlib
#from scipy.misc import imresize
from skimage.transform import resize
from tqdm import tqdm
from tensorflow.keras import metrics
#### PREPROCESS STAGE ####
# Path to superpixels class files
classes_file = "/home/DEV/SKIN_3000_CLASSES.csv"
concatenated_data= pd.read_csv(classes_file, header=None)
# Instances with targets
targets = concatenated_data[1].tolist()
# Split data according to their classes
class_0 = concatenated_data[concatenated_data[1] == 0]
class_1 = concatenated_data[concatenated_data[1] == 1]
class_2 = concatenated_data[concatenated_data[1] == 2]
class_3 = concatenated_data[concatenated_data[1] == 3]
# Holdout split train/test set (Other options are k-folds or leave-one-out)
split_proportion = 0.8
split_size_0 = int(len(class_0)*split_proportion)
split_size_1 = int(len(class_1)*split_proportion)
split_size_2 = int(len(class_2)*split_proportion)
split_size_3 = int(len(class_3)*split_proportion)
new_class_0_train = np.random.choice(len(class_0), split_size_0, replace=False)
new_class_0_train = class_0.iloc[new_class_0_train]
new_class_0_test = ~class_0.iloc[:][0].isin(new_class_0_train.iloc[:][0])
new_class_0_test = class_0[new_class_0_test]
new_class_1_train = np.random.choice(len(class_1), split_size_1, replace=False)
new_class_1_train = class_1.iloc[new_class_1_train]
new_class_1_test = ~class_1.iloc[:][0].isin(new_class_1_train.iloc[:][0])
new_class_1_test = class_1[new_class_1_test]
new_class_2_train = np.random.choice(len(class_2), split_size_2, replace=False)
new_class_2_train = class_2.iloc[new_class_2_train]
new_class_2_test = ~class_2.iloc[:][0].isin(new_class_2_train.iloc[:][0])
new_class_2_test = class_2[new_class_2_test]
new_class_3_train = np.random.choice(len(class_3), split_size_3, replace=False)
new_class_3_train = class_3.iloc[new_class_3_train]
new_class_3_test = ~class_3.iloc[:][0].isin(new_class_3_train.iloc[:][0])
new_class_3_test = class_3[new_class_3_test]
x_train_list = pd.concat(
[new_class_0_train, new_class_1_train, new_class_2_train, new_class_3_train])
x_test_list = pd.concat(
[new_class_0_test, new_class_1_test, new_class_2_test, new_class_3_test])
# Load superpixels files
imagePath = "/home/DEV/SKIN_SET_3000/"
x_train = []
y_train = []
for index, row in tqdm(x_train_list.iterrows(), total=x_train_list.shape[0]):
try:
loadedImage = plt.imread(imagePath + str(row[0]) + ".jpg")
x_train.append(loadedImage)
y_train.append(row[1])
except:
# Try with .png file format if images are not properly loaded
try:
loadedImage = plt.imread(imagePath + str(row[0]) + ".png")
x_train.append(loadedImage)
y_train.append(row[1])
except:
# Print file names whenever it is impossible to load image files
print(imagePath + str(row[0]))
x_test = []
y_test = []
for index, row in tqdm(x_test_list.iterrows(), total=x_test_list.shape[0]):
try:
loadedImage = plt.imread(imagePath + str(row[0]) + ".jpg")
x_test.append(loadedImage)
y_test.append(row[1])
except:
# Try with .png file format if images are not properly loaded
try:
loadedImage = plt.imread(imagePath + str(row[0]) + ".png")
x_test.append(loadedImage)
y_test.append(row[1])
except:
# Print file names whenever it is impossible to load image files
print(imagePath + str(row[0]))
# Reescaling of images
img_width, img_height = 139, 139
index = 0
for image in tqdm(x_train):
#aux = resize(image, (img_width, img_height, 3), "bilinear")
aux = resize(image, (img_width, img_height))
x_train[index] = aux / 255.0 # Normalization
index += 1
index = 0
for image in tqdm(x_test):
#aux = resize(image, (img_width, img_height, 3), "bilinear")
aux = resize(image, (img_width, img_height))
x_test[index] = aux / 255.0 # Normalization
index += 1
#### TRAINING STAGE ####
os.environ["KERAS_BACKEND"] = "tensorflow"
RANDOM_STATE = 42
def get_session(gpu_fraction=0.8):
num_threads = os.environ.get('OMP_NUM_THREADS')
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
if num_threads:
return tf.Session(config=tf.ConfigProto(
gpu_options=gpu_options, intra_op_parallelism_threads=num_threads))
else:
return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
#KTF.set_session(get_session())
def precision(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def recall(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def fbeta_score(y_true, y_pred, beta=1):
if beta < 0:
raise ValueError('The lowest choosable beta is zero (only precision).')
# Set F-score as 0 if there are no true positives (sklearn-like).
if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
return 0.0
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
bb = beta ** 2
fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
return fbeta_score
nb_classes = 4
final_model = []
# Option = InceptionV3
model = InceptionV3(weights="imagenet", include_top=False,
input_shape=(img_width, img_height, 3))
# Option = ResNet
# model = ResNet50(weights="imagenet", include_top=False, input_shape=(3,img_width, img_height))
# Creating new outputs for the model
x = model.output
x = Flatten()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(nb_classes, activation='softmax')(x)
#predictions = Dense(nb_classes, activation='sigmoid')(x)
final_model = Model(inputs=model.input, outputs=predictions)
# Metrics
learningRate = 0.001
optimizer = optimizers.SGD(learning_rate=learningRate, momentum=0.88, nesterov=True)
# Compiling the model...
final_model.compile(loss="categorical_crossentropy", optimizer=optimizer,
metrics=["accuracy", fbeta_score])
final_model.summary()
#final_model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#x_train = np.array(x_train)
#x_test = np.array(x_test)
x_train = np.asarray(x_train).astype(np.float32)
#x_test = np.array(x_test)
x_test = np.asarray(x_test).astype(np.float32)
# Defining targets...
y_train = np.concatenate([np.full((new_class_0_train.shape[0]), 0), np.full((new_class_1_train.shape[0]), 1),
np.full((new_class_2_train.shape[0]), 2), np.full((new_class_3_train.shape[0]), 3)])
y_test = np.concatenate([np.full((new_class_0_test.shape[0]), 0), np.full((new_class_1_test.shape[0]), 1),
np.full((new_class_2_test.shape[0]), 2), np.full((new_class_3_test.shape[0]), 3)])
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
modelFilename = "/home/DEV/SKIN_SET_3000/model_inception.h5"
trainingFilename = "/home/DEV/SKIN_SET_3000/training.csv"
nb_train_samples = y_train.shape[0]
nb_test_samples = y_test.shape[0]
#epochs = 10000
epochs = 100
batch_size = 24
trainingPatience = 200
decayPatience = trainingPatience / 4
# Setting the data generator...
train_datagen = ImageDataGenerator(
horizontal_flip=True,
fill_mode="reflect",
zoom_range=0.2
)
train_generator = train_datagen.flow(x_train, y_train, batch_size=batch_size)
# Saving the model
checkpoint = ModelCheckpoint(modelFilename,
monitor='val_accuracy',
verbose=1,
save_best_only=True,
save_weights_only=False,
mode='auto',
save_freq=1)
adaptativeLearningRate = ReduceLROnPlateau(monitor='val_accuracy',
factor=0.5,
patience=decayPatience,
verbose=1,
mode='auto',
min_delta=0.0001,
cooldown=0,
min_lr=1e-8)
early = EarlyStopping(monitor='val_accuracy',
min_delta=0,
patience=trainingPatience,
verbose=1,
mode='auto')
csv_logger = CSVLogger(trainingFilename, separator=",", append=False)
# Callbacks
callbacks = [checkpoint, early, csv_logger, adaptativeLearningRate]
# Training of the model
final_model.fit(train_generator,
steps_per_epoch=nb_train_samples / batch_size,
epochs=epochs,
shuffle=True,
validation_data=(x_test, y_test),
validation_steps=nb_test_samples / batch_size,
callbacks=callbacks)
final_model.save('/home/DEV/SKIN_SET_3000/model_inception.h5')
#compile metrics
In order to segment my image, first i have transformed my input image to super pixel using SLIC
from skimage.segmentation import slic
from skimage.segmentation import mark_boundaries
from skimage.util import img_as_float
from skimage import io; io.use_plugin('matplotlib')
import cv2 as cv
from skimage.color import label2rgb
img_width, img_height = 139, 139
# load the model we saved
model = load_model('/home/DEV/SKIN_SET_3000/model_inception.h5', compile=False)
# Get test image ready
img = skimage.img_as_float(skimage.io.imread('/home/DEV/SKIN_ULCER.jpg')).astype(np.float32)
plt.imshow(img)
test_image_slic = slic(img, n_segments=500, compactness=10.0)
test_image_slic_out = mark_boundaries(img,test_image_slic)
plt.imshow(test_image_slic_out)
#test_image=test_image/255
test_image_array = np.array(test_image_slic_out)
test_image_resize = cv2.resize(test_image_array,(img_width,img_height))
test_image_reshape = test_image_resize.reshape(1,img_width, img_height,3)
I would like to check if each superpixel of my input is labeled as one of my target class among 4 tissue classes, and extract the features belonging to each class as a mask and quantify the total surface area of mask .
any suggestions of how to implement this approach would be appreciated.
I am using this tutorial code:
import tensorflow as tf
import numpy as np
import os
import time
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print('Length of text: {} characters'.format(len(text)))
# Take a look at the first 250 characters in text
print(text[:250])
# The unique characters in the file
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
text_as_int = np.array([char2idx[c] for c in text])
print('{')
for char,_ in zip(char2idx, range(20)):
print(' {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print(' ...\n}')
# Show how the first 13 characters from the text are mapped to integers
print('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))
# The maximum length sentence you want for a single input in characters
# seq_length = 100
seq_length = 50
examples_per_epoch = len(text)//(seq_length+1)
# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
for i in char_dataset.take(5):
print(idx2char[i.numpy()])
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)
for item in sequences.take(5):
print(repr(''.join(idx2char[item.numpy()])))
def split_input_target(chunk):
input_text = chunk[:-1]
target_text = chunk[1:]
return input_text, target_text
dataset = sequences.map(split_input_target)
for input_example, target_example in dataset.take(1):
print('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
print('Target data:', repr(''.join(idx2char[target_example.numpy()])))
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
print("Step {:4d}".format(i))
print(" input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
print(" expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))
# Batch size
BATCH_SIZE = 64
# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
dataset
# Length of the vocabulary in chars
vocab_size = len(vocab)
# The embedding dimension
embedding_dim = 128
#embedding_dim = 256
# Number of RNN units
rnn_units = 128
#rnn_units = 1024
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim,
batch_input_shape=[batch_size, None]),
tf.keras.layers.GRU(rnn_units,
return_sequences=True,
stateful=True,
recurrent_initializer='glorot_uniform'),
tf.keras.layers.GRU(rnn_units,
return_sequences=True,
stateful=True,
recurrent_initializer='glorot_uniform'),
tf.keras.layers.Dense(vocab_size)
])
return model
model = build_model(
vocab_size=len(vocab),
embedding_dim=embedding_dim,
rnn_units=rnn_units,
batch_size=BATCH_SIZE)
for input_example_batch, target_example_batch in dataset.take(1):
example_batch_predictions = model(input_example_batch)
print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
model.summary()
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()
sampled_indices
print("Input: \n", repr("".join(idx2char[input_example_batch[0].numpy()])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))
def loss(labels, logits):
return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
example_batch_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss: ", example_batch_loss.numpy().mean())
model.compile(optimizer='adam', loss=loss)
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_prefix,
save_weights_only=True)
#EPOCHS = 30
EPOCHS = 10
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])
tf.train.latest_checkpoint(checkpoint_dir)
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))
model.summary()
I have set the parameters so it runs quickly for testing. I would like to measure the perplexity of the model, say on the training set itself or some other test text. How can I do that?
To make the question completely self-contained, given the model made above, how would you compute the perplexity of the string "where"?
The solution can be found in this keras issue: https://github.com/keras-team/keras/issues/8267
def loss(labels, logits):
return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
def perplexity(labels, logits):
cross_entropy = loss(labels, logits)
perplexity = tf.keras.backend.exp(cross_entropy)
return perplexity
I want to split my dataset into test and validation datasets as my model might be suffering from overfitting. Here's my current architecture:
input_sentences = []
output_sentences = []
output_sentences_inputs = []
count = 0
for line in open(r'/content/drive/My Drive/TEMPPP/123.txt', encoding="utf-8"):
count += 1
if count > NUM_SENTENCES:
break
if '\t' not in line:
continue
input_sentence, output = line.rstrip().split('\t')
output_sentence = output + ' <eos>'
output_sentence_input = '<sos> ' + output
input_sentences.append(input_sentence)
output_sentences.append(output_sentence)
output_sentences_inputs.append(output_sentence_input)
input_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
input_tokenizer.fit_on_texts(input_sentences)
input_integer_seq = input_tokenizer.texts_to_sequences(input_sentences)
word2idx_inputs = input_tokenizer.word_index
max_input_len = max(len(sen) for sen in input_integer_seq)
output_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, filters='')
output_tokenizer.fit_on_texts(output_sentences + output_sentences_inputs)
output_integer_seq = output_tokenizer.texts_to_sequences(output_sentences)
output_input_integer_seq = output_tokenizer.texts_to_sequences(output_sentences_inputs)
word2idx_outputs = output_tokenizer.word_index
num_words_output = len(word2idx_outputs) + 1
max_out_len = max(len(sen) for sen in output_integer_seq)
encoder_input_sequences = pad_sequences(input_integer_seq, maxlen=max_input_len)
decoder_input_sequences = pad_sequences(output_input_integer_seq, maxlen=max_out_len, padding='post')
import numpy as np
read_dictionary = np.load('/content/drive/My Drive/TEMPPP/hinvec.npy',allow_pickle='TRUE').item()
num_words = min(MAX_NUM_WORDS, len(word2idx_inputs) + 1)
embedding_matrix = np.zeros((num_words, EMBEDDING_SIZE))
for word, index in word2idx_inputs.items():
embedding_vector = read_dictionary.get(word)
if embedding_vector is not None:
embedding_matrix[index] = embedding_vector
embedding_layer = Embedding(num_words, EMBEDDING_SIZE, weights=[embedding_matrix], input_length=max_input_len)
decoder_targets_one_hot = np.zeros((
len(input_sentences),
max_out_len,
num_words_output
),
dtype='float32'
)
decoder_output_sequences = pad_sequences(output_integer_seq, maxlen=max_out_len, padding='post')
for i, d in enumerate(decoder_output_sequences):
for t, word in enumerate(d):
decoder_targets_one_hot[i, t, word] = 1
encoder_inputs_placeholder = Input(shape=(max_input_len,))
x = embedding_layer(encoder_inputs_placeholder)
encoder = LSTM(LSTM_NODES, return_state=True)
encoder_outputs, h, c = encoder(x)
encoder_states = [h, c]
decoder_inputs_placeholder = Input(shape=(max_out_len,))
decoder_embedding = Embedding(num_words_output, LSTM_NODES)
decoder_inputs_x = decoder_embedding(decoder_inputs_placeholder)
decoder_lstm = LSTM(LSTM_NODES, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs_x, initial_state=encoder_states)
decoder_dense = Dense(num_words_output, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
import tensorflow as tf
starter_learning_rate = 0.1
end_learning_rate = 0.01
decay_steps = 2000
learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(
starter_learning_rate,
decay_steps,
end_learning_rate,
power=0.5)
opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn, epsilon=1e-03, clipvalue=0.5)
model = Model([encoder_inputs_placeholder,
decoder_inputs_placeholder],
decoder_outputs)
model.compile(
optimizer=opt,
loss='categorical_crossentropy',
metrics=['accuracy']
)
history = model.fit(
[encoder_input_sequences, decoder_input_sequences],
decoder_targets_one_hot,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_split=0.1,
)
After reading dataset, its already being stored in input_sentences and output_sentences so I thought I can pass them directly to X,y like this:
from sklearn.model_selection import train_test_split
X=input_sentences
y=output_sentences
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
This way I get my Hindi sentences in X and English sentences in y with . Now im really confused how to implement it in my model?
I'm working on some code about nlp. I want to train and save model but here comes this error. I searched some documentation but i didn't find right solution. How can i solve this problem?
import torch,time
import torch.nn as nn
input_dim = 5
hidden_dim = 10
n_layers = 1
lstm_layer = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
batch_size = 1
seq_len = 1
inp = torch.randn(batch_size, seq_len, input_dim)
hidden_state = torch.randn(n_layers, batch_size, hidden_dim)
cell_state = torch.randn(n_layers, batch_size, hidden_dim)
hidden = (hidden_state, cell_state)
out, hidden = lstm_layer(inp, hidden)
print("Output shape: ", out.shape)
print("Hidden: ", hidden)
seq_len = 3
inp = torch.randn(batch_size, seq_len, input_dim)
out, hidden = lstm_layer(inp, hidden)
print(out.shape)
# Obtaining the last output
out = out.squeeze()[-1, :]
print(out.shape)
import bz2
from collections import Counter
import re
import nltk
import numpy as np
#nltk.download('punkt')
train_file = bz2.BZ2File('C:/Users/DELL/Dogal-Dil-Isleme/Xml-Files/trwiktionary-20200301-pages-articles-multistream.xml.bz2')
test_file = bz2.BZ2File('C:/Users/DELL/Dogal-Dil-Isleme/Xml-Files/trwikisource-20200601-pages-articles.xml.bz2')
train_file = train_file.readlines()
test_file = test_file.readlines()
num_train = 200
num_test = 50
train_file = [x.decode('utf-8') for x in train_file[:num_train]]
test_file = [x.decode('utf-8') for x in test_file[:num_test]]
train_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in train_file]
train_sentences = [x.split(' ', 1)[1][:-1].lower() for x in train_file]
test_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in test_file]
test_sentences = [x.split(' ', 1)[1][:-1].lower() for x in test_file]
for i in range(len(train_sentences)):
train_sentences[i] = re.sub('\d','0',train_sentences[i])
for i in range(len(test_sentences)):
test_sentences[i] = re.sub('\d','0',test_sentences[i])
for i in range(len(train_sentences)):
if 'www.' in train_sentences[i] or 'http:' in train_sentences[i] or 'https:' in train_sentences[i] or '.com' in train_sentences[i]:
train_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", train_sentences[i])
for i in range(len(test_sentences)):
if 'www.' in test_sentences[i] or 'http:' in test_sentences[i] or 'https:' in test_sentences[i] or '.com' in test_sentences[i]:
test_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", test_sentences[i])
words = Counter() # Dictionary that will map a word to the number of times it appeared in all the training sentences
for i, sentence in enumerate(train_sentences):
train_sentences[i] = []
for word in nltk.word_tokenize(sentence):
words.update([word.lower()])
train_sentences[i].append(word)
if i%20000 == 0:
print(str((i*100)/num_train) + "% done")
print("100% done")
words = {k:v for k,v in words.items() if v>1}
words = sorted(words, key=words.get, reverse=True)
words = ['_PAD','_UNK'] + words
word2idx = {o:i for i,o in enumerate(words)}
idx2word = {i:o for i,o in enumerate(words)}
for i, sentence in enumerate(train_sentences):
train_sentences[i] = [word2idx[word] if word in word2idx else 0 for word in sentence]
for i, sentence in enumerate(test_sentences):
# For test sentences, we have to tokenize the sentences as well
test_sentences[i] = [word2idx[word.lower()] if word.lower() in word2idx else 0 for word in nltk.word_tokenize(sentence)]
def pad_input(sentences, seq_len):
features = np.zeros((len(sentences), seq_len),dtype=int)
for ii, review in enumerate(sentences):
if len(review) != 0:
features[ii, -len(review):] = np.array(review)[:seq_len]
return features
seq_len = 200 # The length that the sentences will be padded/shortened to
train_sentences = pad_input(train_sentences, seq_len)
test_sentences = pad_input(test_sentences, seq_len)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
split_frac = 0.5 # 50% validation, 50% test
split_id = int(split_frac * len(test_sentences))
val_sentences, test_sentences = test_sentences[:split_id], test_sentences[split_id:]
val_labels, test_labels = test_labels[:split_id], test_labels[split_id:]
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
train_data = TensorDataset(torch.from_numpy(train_sentences), torch.from_numpy(train_labels))
val_data = TensorDataset(torch.from_numpy(val_sentences), torch.from_numpy(val_labels))
test_data = TensorDataset(torch.from_numpy(test_sentences), torch.from_numpy(test_labels))
batch_size = 200
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()
# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
device = torch.device("cuda")
else:
device = torch.device("cpu")
class SentimentNet(nn.Module):
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
super(SentimentNet, self).__init__()
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
self.dropout = nn.Dropout(drop_prob)
self.fc = nn.Linear(hidden_dim, output_size)
self.sigmoid = nn.Sigmoid()
def forward(self, x, hidden):
batch_size = x.size(0)
x = x.long()
embeds = self.embedding(x)
lstm_out, hidden = self.lstm(embeds, hidden)
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
out = self.dropout(lstm_out)
out = self.fc(out)
out = self.sigmoid(out)
out = out.view(batch_size, -1)
out = out[:,-1]
return out, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
return hidden
vocab_size = len(word2idx) + 1
output_size = 1
embedding_dim = 400
hidden_dim = 512
n_layers = 2
model = SentimentNet(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)
model.to(device)
lr=0.005
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
epochs = 2
counter = 0
print_every = 1000
clip = 5
valid_loss_min = np.Inf
model.train()
for i in range(epochs):
h = model.init_hidden(batch_size)
for inputs, labels in train_loader:
counter += 1
h = tuple([e.data for e in h])
inputs, labels = inputs.to(device), labels.to(device)
model.zero_grad()
output, h = model(inputs, h)
loss = criterion(output.squeeze(), labels.float())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
if counter%print_every == 0:
val_h = model.init_hidden(batch_size)
val_losses = []
model.eval()
for inp, lab in val_loader:
val_h = tuple([each.data for each in val_h])
inp, lab = inp.to(device), lab.to(device)
out, val_h = model(inp, val_h)
val_loss = criterion(out.squeeze(), lab.float())
val_losses.append(val_loss.item())
model.train()
print("Epoch: {}/{}...".format(i+1, epochs),
"Step: {}...".format(counter),
"Loss: {:.6f}...".format(loss.item()),
"Val Loss: {:.6f}".format(np.mean(val_losses)))
if np.mean(val_losses) <= valid_loss_min:
torch.save(model.state_dict(), 'C:/Users/DELL/Dogal-Dil-Isleme/Models/state_dict.pt')
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
valid_loss_min = np.mean(val_losses)
time.sleep(1)
# Loading the best model
model.load_state_dict(torch.load('C:/Users/DELL/Dogal-Dil-Isleme/Models/state_dict.pt'))
test_losses = []
num_correct = 0
h = model.init_hidden(batch_size)
model.eval()
for inputs, labels in test_loader:
h = tuple([each.data for each in h])
inputs, labels = inputs.to(device), labels.to(device)
output, h = model(inputs, h)
test_loss = criterion(output.squeeze(), labels.float())
test_losses.append(test_loss.item())
pred = torch.round(output.squeeze()) # Rounds the output to 0/1
correct_tensor = pred.eq(labels.float().view_as(pred))
correct = np.squeeze(correct_tensor.cpu().numpy())
num_correct += np.sum(correct)
print("Test loss: {:.3f}".format(np.mean(test_losses)))
test_acc = num_correct/len(test_loader.dataset)
print("Test accuracy: {:.3f}%".format(test_acc*100))
i tried a create new folder and change path but all the ways comes error :)
i read pytorch documentation and change recommended code but error still came.
i will share some link for your reading about this issue.
same issue
pytorch documentation
how can i fix or is there any alternative way to save model?
Try changing it to: model.load_state_dict(torch.load('C:/Users/DELL/Dogal-Dil-Isleme/Models/state_dict'))