I'm trying to train a simple movie recommendation system using the latest-small movie lens dataset, but I keep getting an error saying that:
Traceback (most recent call last):
File "D:\AI\Python projects\anotherone.py", line 48, in <module>
history = model.fit([train.userId,train.movieId], train.rating,epochs=10, verbose=1)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 780, in fit
steps_name='steps_per_epoch')
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py", line 363, in model_iteration
batch_outs = f(ins_batch)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\backend.py", line 3292, in _call_
run_metadata=self.run_metadata)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1458, in _call_
run_metadata_ptr)
tensorflow.python.framework.errors_impl.InvalidArgumentError: indices[4,0] = 179819 is not in [0, 8984)
[[{{node Movie-Embedding/embedding_lookup}}]]
Code:
# importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
# ignoring warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# importing our data
df = pd.read_csv('D:/AI/Data sets/ml-latest-small/ratings.csv')
train, test = train_test_split(df, test_size=0.2, random_state=42)
# extracting the unique number of users and movies
n_users = len(df.userId.unique())
n_movies = len(df.movieId.unique())
# creating the embedding
movie_input = Input(shape=[1], name="Movie-Input")
movie_embedding = Embedding(n_movies+1, 5, name="Movie-Embedding")(movie_input)
movie_vec = Flatten(name="Flatten-Movie")(movie_embedding)
user_input = Input(shape=[1], name = "User-Input")
user_embedding = Embedding(n_users+1, 5, name="User-Embedding")(user_input)
user_vec = Flatten(name="Flatten-user")(user_embedding)
# concatinating the features
concat = concatenate([movie_vec,user_vec])
# creating our model
layer1 = Dense(128,activation="relu")(concat)
layer2 = Dense(32,activation="relu")(layer1)
outputLayer = Dense(1)(layer2)
model = Model([user_input,movie_input],outputLayer)
model.compile('adam','mean_squared_error')
# tranning the model
if os.path.exists('multiParam.h5'):
model = load_model('multiParam.h5')
else:
history = model.fit([train.userId,train.movieId], train.rating,epochs=10, verbose=1)
model.save('multiParam.h5')
plt.plot(history.history['loss'])
plt.xlabel("Epochs")
plt.ylabel("Training Error")
# testing the model
print(model.evaluate([test.userId, test.movieId], test.rating))
# running some predictions
predictions = model.predict([test.userId.head(10), test.movieId.head(10)])
[print(predictions[i], test.rating.iloc[i]) for i in range(0,10)]
I'm still new to machine learning, but from the research I understood, I need to provide the number of unique values +1 to the embedding layer, which I'm doing but It's still not working, any help can be appreciated, thank you :)
You should try Label encoding for both movieId and userId in order to make them sequential starting from zero :
from sklearn.preprocessing import LabelEncoder
user_enc = LabelEncoder()
df['user'] = user_enc.fit_transform(df['userId'].values)
n_users = df['user'].nunique()
item_enc = LabelEncoder()
df['movie'] = item_enc.fit_transform(df['movieId'].values)
n_movies = df['movie'].nunique()
Related
I am using the latest version of foolbox (3.3.1), and my code simply load a RESNET-50 CNN, adds some layers for a transferred learning application, and loads the weights as follows.
from numpy.core.records import array
import tensorflow as tf
from keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
import cv2
import os
import numpy as np
import foolbox as FB
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cityblock
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from PIL import Image
import foolbox as FB
import math
from foolbox.criteria import Misclassification
#load model
num_classes = 12
#Load model and prepare it for testing
print("Step 1: Load model and weights")
baseModel = ResNet50(weights=None, include_top=False, input_tensor=Input(shape=(224, 224, 3)))
headModel = baseModel.output
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(512, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(num_classes, activation="softmax")(headModel)
model = Model(inputs=baseModel.input, outputs=headModel)
model.load_weights("RESNET-50/weights/train1-test1.h5")
print("Step 2: prepare testing data")
#features is a set of (1200,10,224,224,3) images
features=np.load("features.npy")
labels=np.load("labels.npy")
Now I would like to attack it using the foolbox 3.3.1 Carlini and Wagner attack, here is the way I load the model for foolbox
#Lets test the foolbox model
bounds = (0, 1)
fmodel = fb.TensorFlowModel(model, bounds=bounds)
My dataset is split into 10 images per document, I will attack these 10 images using a batch size of 10 for foolbox using Carlini and Wagner attack
#for each i, I have 10 images
for i in range(0, features.shape[0]):
print("document "+str(i))
#Receive current values
#This is a batch of (10,224,224,3) images
features_to_test=features[i,:]
#Get their labels
labels_to_test=labels[i,:]
######################ATTACK IN THE NORMALIZED DOMAIN###########################
#lets do the attack
#We use an interval of epsilons
epsilons = np.linspace(0.01, 1, num=2)
attack = fb.attacks.L2CarliniWagnerAttack(fmodel)
adversarials = attack(features_to_test, labels_to_test, criterion=Misclassification(labels=labels_to_test), epsilons=epsilons)
However, whenever I run the code, here is the error that is returned to me
Traceback (most recent call last):
File "test_carlini_wagner.py", line 161, in <module>
adversarials = attack(features_to_test, labels_to_test,
criterion=Misclassification(labels=labels_to_test), epsilons=epsilons)
File "/usr/local/lib/python3.8/dist-packages/foolbox/attacks/base.py", line 410, in
__call__
xp = self.run(model, x, criterion, early_stop=early_stop, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/foolbox/attacks/carlini_wagner.py", line 100, in run
bounds = model.bounds
AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute
'bounds'
What is supposed to be the error? am I loading my model wrongly? should I add new parameters for the attack called? as previously stated, I am on foolbox 3.3.1.
I think you might have mixed up the parameters of the L2CarliniWagnerAttack. Here is a simplified working example with dummy data:
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cityblock
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from foolbox import TensorFlowModel
from foolbox.criteria import Misclassification
from foolbox.attacks import L2CarliniWagnerAttack
num_classes = 12
print("Step 1: Load model and weights")
baseModel = ResNet50(weights=None, include_top=False, input_tensor=Input(shape=(224, 224, 3)))
headModel = baseModel.output
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(512, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(num_classes, activation="softmax")(headModel)
model = Model(inputs=baseModel.input, outputs=headModel)
bounds = (0, 1)
fmodel = TensorFlowModel(model, bounds=bounds)
images, labels = tf.random.normal((64, 10, 224, 224, 3)), tf.random.uniform((64, 10,), maxval=13, dtype=tf.int32)
for i in range(0, images.shape[0]):
print("document "+str(i))
features_to_test=images[i,:]
labels_to_test=labels[i,:]
epsilons = np.linspace(0.01, 1, num=2)
attack = L2CarliniWagnerAttack()
adversarials = attack(fmodel, features_to_test, criterion=Misclassification(labels_to_test), epsilons=epsilons)
Step 1: Load model and weights
document 0
document 1
document 2
document 3
document 4
document 5
document 6
...
I'm following along with Laurence Moroney at tensorflow, coding as I go. Here's the video:
https://www.youtube.com/watch?v=Y_hzMnRXjhI
I'm getting this error message:
Traceback (most recent call last):
File "tensorTest.py", line 66, in <module>
validation_data=(testing_padded, testing_labels), verbose=2)
File "/Users/elliot/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "/Users/elliot/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 1063, in fit
steps_per_execution=self._steps_per_execution)
File "/Users/elliot/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1117, in __init__
model=model)
File "/Users/elliot/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 282, in __init__
raise ValueError(msg)
ValueError: Data cardinality is ambiguous:
x sizes: 28618
y sizes: 14309
Please provide data which shares the same first dimension.
I can get the model summary just fine, but when it tries to fit I get the error. Full code below:
import tensorflow as tf from tensorflow import keras from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences import pandas as pd
data = pd.read_json("Sarcasm_Headlines_Dataset_v2.json", lines=True)
sentences = data['headline'].to_list() labels = data['is_sarcastic'].to_list() urls = data['article_link'].to_list()
vocab_size = 10000 embedding_dim = 16 max_length = 100 trunc_type='post' padding_type='post' oov_tok = "<OOV>" training_size
= 14309
training_sentences = sentences[0:training_size] testing_sentences = sentences[:training_size] training_labels = labels[0:training_size] testing_labels = labels[training_size:]
tokenizer = Tokenizer(oov_token = oov_tok) tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
training_sequences = tokenizer.texts_to_sequences(sentences) training_padded = pad_sequences(training_sequences, padding=padding_type, maxlen=max_length, truncating=trunc_type)
testing_sequences = tokenizer.texts_to_sequences(testing_sentences) testing_padded = pad_sequences(testing_sequences, padding=padding_type, maxlen=max_length, truncating=trunc_type)
import numpy as np training_padded = np.array(training_padded) training_labels = np.array(training_labels) testing_padded = np.array(testing_padded) testing_labels = np.array(testing_labels)
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(24, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid') ])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary()
num_epochs = 30
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)
print(history)
THANKS IN ADVANCE!
I am using Keras Functional API from Tensorflow 2.2 to build a model that uses features columns. I followed the guide here and tutorial Classify structured data to make the code snippet:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop('target')
ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
if __name__ == '__main__':
URL = 'https://storage.googleapis.com/applied-dl/heart.csv'
dataframe = pd.read_csv(URL)
batch_size = 5 # A small batch sized is used for demonstration purposes
train_ds = df_to_dataset(dataframe, batch_size=batch_size)
example_batch = next(iter(train_ds))
feature_columns = []
inputs = {'age': tf.keras.layers.Input(name='age', shape=(), dtype='float32'),
'thal': tf.keras.layers.Input(name='thal', shape=(), dtype='string')}
feature_columns.append(feature_column.numeric_column('age'))
thal = feature_column.categorical_column_with_vocabulary_list(
'thal', ['fixed', 'normal', 'reversible'])
thal_one_hot = feature_column.indicator_column(thal)
feature_columns.append(thal_one_hot)
x = layers.DenseFeatures(feature_columns)(inputs)
preds = layers.Dense(1)(x)
model = tf.keras.Model(inputs=inputs, outputs=preds)
print(model(example_batch))
Unfortunately this code fails with an error:
Traceback (most recent call last):
File "test.py", line 42, in <module>
print(model(example_batch))
File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 968, in __call__
outputs = self.call(cast_inputs, *args, **kwargs)
File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\network.py", line 717, in call
return self._run_internal_graph(
File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\network.py", line 837, in _run_internal_graph
y = self._conform_to_reference_input(y, ref_input=x)
File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\network.py", line 961, in _conform_to_reference_input
tensor = math_ops.cast(tensor, dtype=ref_input.dtype)
File "C:\Python38\lib\site-packages\tensorflow\python\util\dispatch.py", line 180, in wrapper
return target(*args, **kwargs)
File "C:\Python38\lib\site-packages\tensorflow\python\ops\math_ops.py", line 789, in cast
x = gen_math_ops.cast(x, base_type, name=name)
File "C:\Python38\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1971, in cast
_ops.raise_from_not_ok_status(e, name)
File "C:\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 6653, in raise_from_not_ok_status
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.UnimplementedError: Cast int64 to string is not supported [Op:Cast]
When i drop category column 'thal' from feature_columns, it works without error. Unfortunately, i need to pass category column into my model. I don't know how to fix it.
There is no need to pass Inputs in the code, x = layers.DenseFeatures(feature_columns) because feature_columns already comprises the Feature Columns corresponding to all the Features.
Complete working code for Training the Model with heart.csv data is shown below:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop('target')
ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
URL = 'https://storage.googleapis.com/applied-dl/heart.csv'
dataframe = pd.read_csv(URL)
print(dataframe.head())
batch_size = 5 # A small batch sized is used for demonstration purposes
train_ds = df_to_dataset(dataframe, batch_size=batch_size)
example_batch = next(iter(train_ds))
feature_columns = []
age = feature_column.numeric_column('age')
feature_columns.append(age)
thal = feature_column.categorical_column_with_vocabulary_list(
'thal', ['fixed', 'normal', 'reversible'])
thal_one_hot = feature_column.indicator_column(thal)
feature_columns.append(thal_one_hot)
print(feature_columns)
x = layers.DenseFeatures(feature_columns)
model = tf.keras.Sequential([
x,
layers.Dense(1)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(train_ds,
epochs=5)
#print(model(example_batch))
Please let me know if you need any other information or if you face any other error, and I will be Happy to help you.
Hope this helps. Happy Learning!
example_batch contains both feature batch and label batch, and model expects only features as inputs.
Following code should work:
feature_batch, label_batch = next(iter(train_ds))
model(feature_batch)
or:
example_batch = next(iter(train_ds))[0]
model(example_batch)
So I've been working on this chatbot project, I'm using SVM for its ML and I really want to use cosine similarity as kernel. i've tried using pykernel (as suggested from this post) or another code from different source, but it's still not working, and I don't know why...
say that i have train.py code like this
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import pickle, csv, json, timeit, random, os, nltk
from nltk.stem.lancaster import LancasterStemmer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import LabelEncoder as LE
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
import my_kernel
def preprocessing(text):
factory1 = StopWordRemoverFactory()
StopWord = factory1.create_stop_word_remover()
text = StopWord.remove(text)
factory2 = StemmerFactory()
stemmer = factory2.create_stemmer()
return (stemmer.stem(text))
le = LE()
tfv = TfidfVectorizer(min_df=1)
file = os.path.join(os.path.dirname(os.path.abspath(__file__)),"scraping","tes.json")
svm_pickle_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"data","svm_model.pickle")
if os.path.exists(svm_pickle_path):
os.remove(svm_pickle_path)
tit = [] # Title
cat = [] # Category
post = [] # Post
with open(file, "r") as sentences_file:
reader = json.load(sentences_file)
for row in reader:
tit.append(preprocessing(row["Judul"]))
cat.append(preprocessing(row["Kategori"]))
post.append(preprocessing(row["Post"]))
tfv.fit(tit)
le.fit(cat)
features = tfv.transform(tit)
labels = le.transform(cat)
trainx, testx, trainy, testy = tts(features, labels, test_size=.30, random_state=42)
model = SVC(kernel=my_kernel, C=1.5)
f = open(svm_pickle_path, 'wb')
pickle.dump(model.fit(trainx, trainy), f)
f.close()
print("SVC training score:", model.score(testx, testy))
with open(svm_pickle_path, 'rb') as file:
pickle_model = pickle.load(file)
score = pickle_model.score(testx, testy)
print("Test score: {0:.2f} %".format(100 * score))
Ypredict = pickle_model.predict(testx)
print(Ypredict)
and for my_kernel.py code :
import numpy as np
import math
from numpy import linalg as LA
def my_kernel(X, Y):
norm = LA.norm(X) * LA.norm(Y)
return np.dot(X, Y.T)/norm
and it shows this everytime I run the program
Traceback (most recent call last):
File "F:\env\chatbot\chatbotProj\chatbotProj\train.py", line 84, in <module>
pickle.dump(model.fit(trainx, trainy), f)
File "F:\env\lib\site-packages\sklearn\svm\base.py", line 212, in fit
fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
File "F:\env\lib\site-packages\sklearn\svm\base.py", line 252, in _dense_fit
X = self._compute_kernel(X)
File "F:\env\lib\site-packages\sklearn\svm\base.py", line 380, in _compute_kernel
kernel = self.kernel(X, self.__Xfit)
File "F:\env\chatbot\chatbotProj\chatbotProj\ChatbotCode\svm.py", line 31, in my_kernel
norm = LA.norm(X) * LA.norm(Y)
File "F:\env\lib\site-packages\numpy\linalg\linalg.py", line 2359, in norm
sqnorm = dot(x, x)
File "F:\env\lib\site-packages\scipy\sparse\base.py", line 478, in __mul__
raise ValueError('dimension mismatch')
ValueError: dimension mismatch
I'm new to python and this SVM area, does anybody know what's wrong or could recommend me how to write cosine similarity kernel better and cleaner?
Oh and, the dimension for the train X is (193, 634), train Y is (193, ), test X is (83, 634) and test Y is (83,) from train_test_split sklearn.
Update :
my friend told me it's happened because I have sparse matrix not a simple array, so I have to dense it and replace my_kernel.py code to be like this
def my_kernel(X, Y):
X=np.array(X.todense())
Y=np.array(Y.todense())
norm = LA.norm(X) * LA.norm(Y)
return np.dot(X, Y.T)/norm
I am new to RNNs and I have been working on a small binary label classifier. I have been able to get a stable model with satisfactory results.
However, I am having a hard time using the model to classify new inputs and I was wondering if any of you could help me. Please see my code below for reference.
Thank you very much.
from tensorflow.keras import preprocessing
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras import models
from tensorflow.keras.layers import LSTM, Activation, Dense, Dropout, Input,
Embedding
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.preprocessing import sequence, text
from tensorflow.keras.callbacks import EarlyStopping
from matplotlib import pyplot
class tensor_rnn():
def __init__(self, hidden_layers=3):
self.data_path = 'C:\\\\Users\\cmazz\\PycharmProjects\\InvestmentAnalysis_2.0\\Sentiment\\Finance_Articles\\'
# self.corp_paths = corpora_paths
self.h_layers = hidden_layers
self.num_words = []
good = pd.read_csv(self.data_path + 'GoodO.csv')
good['Polarity'] = 'pos'
for line in good['Head'].tolist():
counter = len(line.split())
self.num_words.append(counter)
bad = pd.read_csv(self.data_path + 'BadO.csv')
bad['Polarity'] = 'neg'
for line in bad['Head'].tolist():
counter = len(line.split())
self.num_words.append(counter)
self.features = pd.concat([good, bad]).reset_index(drop=True)
self.features = shuffle(self.features)
self.max_len = len(max(self.features['Head'].tolist()))
# self.train, self.test = train_test_split(features, test_size=0.33, random_state=42)
X = self.features['Head']
Y = self.features['Polarity']
le = LabelEncoder()
Y = le.fit_transform(Y)
Y = Y.reshape(-1, 1)
self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=0.30)
self.tok = preprocessing.text.Tokenizer(num_words=len(self.num_words))
self.tok.fit_on_texts(self.X_train)
sequences = self.tok.texts_to_sequences(self.X_train)
self.sequences_matrix = preprocessing.sequence.pad_sequences(sequences, maxlen=self.max_len)
def RNN(self):
inputs = Input(name='inputs', shape=[self.max_len])
layer = Embedding(len(self.num_words), 30, input_length=self.max_len)(inputs)
# layer = LSTM(64, return_sequences=True)(layer)
layer = LSTM(32)(layer)
layer = Dense(256, name='FC1')(layer)
layer = Activation('relu')(layer)
layer = Dropout(0.5)(layer)
layer = Dense(1, name='out_layer')(layer)
layer = Activation('sigmoid')(layer)
model = Model(inputs=inputs, outputs=layer)
return model
def model_train(self):
self.model = self.RNN()
self.model.summary()
self.model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy']) # RMSprop()
def model_test(self):
self.history = self.model.fit(self.sequences_matrix, self.Y_train, batch_size=100, epochs=3,
validation_split=0.30, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
test_sequences = self.tok.texts_to_sequences(self.X_test)
test_sequences_matrix = sequence.pad_sequences(test_sequences, maxlen=self.max_len)
accr = self.model.evaluate(test_sequences_matrix, self.Y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0], accr[1]))
if __name__ == "__main__":
a = tensor_rnn()
a.model_train()
a.model_test()
a.model.save('C:\\\\Users\\cmazz\\PycharmProjects\\'
'InvestmentAnalysis_2.0\\RNN_Model.h5',
include_optimizer=True)
b = models.load_model('C:\\\\Users\\cmazz\\PycharmProjects\\'
'InvestmentAnalysis_2.0\\RNN_Model.h5')
stringy = ['Fund managers back away from Amazon as they cut FANG exposure']
prediction = b.predict(np.array(stringy))
print(prediction)
When I run my code I get the following error:
ValueError: Error when checking input: expected inputs to have shape
(39,) but got array with shape (1,)
Based on the ValueError and prediction = b.predict(np.array(stringy)), I think you need to tokenize your input string.