get a result from flask - python

I have trained a deep learning model (lstm) with keras, save it with h5 and now I want to "hit" a web service in order to get back a category. This is the first time I have tried to do that so I am a little confused. I can not figure out how to take categories back. Also when I send a request to http://localhost:8000/predict I get the following error,
The server encountered an internal error and was unable to complete your
request. Either the server is overloaded or there is an error in the
application.
and in the notebook
ValueError: Tensor Tensor("dense_3/Softmax:0", shape=(?, 6), dtype=float32)
is not an element of this graph.
I try the solution from enter link description here but is not working
The code so far is below
from flask import Flask,request, jsonify#--jsonify will return the data
import os
from keras.models import load_model
app = Flask(__name__)
model=load_model('lstm-final-five-Copy1.h5')
#app.route('/predict', methods= ["GET","POST"])
def predict():
df_final = pd.read_csv('flask.csv')
activities = df_final['activity'].value_counts().index
label = LabelEncoder()
df_final['label'] = label.fit_transform(df_final['activity'])
X = df_final[['accx', 'accy', 'accz', 'gyrx', 'gyry', 'gyrz']]
y = df_final['label']
scaler = StandardScaler()
X = scaler.fit_transform(X)
df_final = pd.DataFrame(X, columns = ['accx', 'accy', 'accz', 'gyrx',
'gyry', 'gyrz'])
df_final['label'] = y.values
Fs = 50
frame_size = Fs*2 # 200 samples
hop_size = frame_size # 40 samples
def get_frames(df_final, frame_size, hop_size):
N_FEATURES = 6 #x,y,z (acc,gut)
frames = []
labels = []
for i in range(0, len(df_final) - frame_size, hop_size):
accx = df_final['accx'].values[i: i + frame_size]
accy = df_final['accy'].values[i: i + frame_size]
accz = df_final['accz'].values[i: i + frame_size]
gyrx = df_final['gyrx'].values[i: i + frame_size]
gyry = df_final['gyry'].values[i: i + frame_size]
gyrz = df_final['gyrz'].values[i: i + frame_size]
# Retrieve the most often used label in this segment
label = stats.mode(df_final['label'][i: i + frame_size])[0][0]
frames.append([accx, accy, accz, gyrx, gyry, gyrz])
labels.append(label)
# Bring the segments into a better shape
frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
labels = np.asarray(labels)
return frames, labels
X, y = get_frames(df_final, frame_size, hop_size)
pred = model.predict_classes(X)
return jsonify({"Prediction": pred}), 201
if __name__ == '__main__':
app.run(host="localhost", port=8000, debug=False)

It seems in your '/predict' POST endpoint you arent returning any values which is why you arent getting back a category as you expect.
If you wanted to add a GET method you could add something like what is mentioned below,
#app.route('/', methods=['GET'])
def check_server_status():
return ("Server Running!")
And in the POST method your case you could return your prediction in the endpoint,
#app.route('/predict', methods=['POST'])
def predict():
# Add in other steps here
pred = model.predict_classes(X)
return jsonify({"Prediction": pred}), 201

As far as I can see you need to install pandas if you haven't by doing pip install pandas and import it as import pandas as pd
Also you can add "GET" method in your /prediction endpoint like:
#app.route("/predict", methods=["GET", "POST"])

Related

How to you include a OneHotEncoder step in a saved model to deploy via Flask?

I have an XGBoost model that predicts whether a student will enter college based on a number of features. Part of the model is using OneHotEncoder to transform a few columns with string values. There's nothing wrong with the model, but I've run into issues with building a rudimentary Flask app that takes in a JSON to make a prediction. My confusion is where I add the OneHotEncoder step? Would I need to re-build the model using a pipeline for OneHotEncoding, model parameters, and fitting, save the model again, and then when I send the JSON via Postman, the saved model will put the data through the OneHotEncoder step? Can I add OneHotEncoder as part of the GridSearchCV step?
optimal_params = GridSearchCV(
estimator = xgb.XGBClassifier(objective='binary:logistic'),
param_grid=param_grid,
scoring='roc_auc',
verbose=2,
n_jobs=10,
cv=3
)
optimal_params.fit(X,
y,
early_stopping_rounds=10,
eval_metric='auc',
eval_set=[(X_test, y_test)],
verbose=False)
Flask code:
def return_prediction(college_model, sample_json):
type_school = sample_json["type_school"]
school_accreditation = sample_json["school_accreditation"]
gender = sample_json['gender']
interest = sample_json['interest']
residence = sample_json['residence']
parent_age = sample_json['parent_age']
parent_salary = sample_json['parent salary']
house_area = sample_json['house_area']
average_grades = sample_json['average_grades']
parent_was_in_college = sample_json['parent_was_in_college']
college = [[type_school, school_accreditation, gender, interest,
residence, parent_age, parent_salary, house_area,
average_grades, parent_was_in_collegel]]
class_ind = college_model.predict(college)
return class_ind
app = Flask(__name__)
#app.route("/")
def index():
return '<h1>Flask Running</h>'
college_model = joblib.load("college_model.pkl")
column_trans = joblib.load("ohe.pkl")
#app.route('/college', methods=['POST'])
def prediction():
content = request.json
results = return_predictions(college_model, column_trans, content)
results = results.tolist()
return jsonify(results)
if __name__=='__main__':
app.run()
After GridSearchCV, you will store the final variable and make the model for it into a pkl file.
And you have to put in the preprocessing part of your flask code.
The data should be OneHotEncode as possible so that it can be predicated.
def return_prediction(college_model, sample_json):
type_school = sample_json["type_school"]
school_accreditation = sample_json["school_accreditation"]
gender = sample_json['gender']
interest = sample_json['interest']
residence = sample_json['residence']
parent_age = sample_json['parent_age']
parent_salary = sample_json['parent salary']
house_area = sample_json['house_area']
average_grades = sample_json['average_grades']
parent_was_in_college = sample_json['parent_was_in_college']
college = [[type_school, school_accreditation, gender, interest,
residence, parent_age, parent_salary, house_area,
average_grades, parent_was_in_collegel]]
colleage['in your str data'] = pd.get_dummies(colleage = columns = ['in your str data'])
class_ind = college_model.predict(college)
return class_ind

How does OneHotEncoder work with a saved XGBoost model?

I built a rudimentary XGBoost model to make predictions on whether a student will enter college, based on a number of features (both string, ints, and boolean terms). To encode these labels, I used OneHotEncoder after splitting the data between X and y. The model works and I am also using to make new predictions; however, I'm having some trouble when trying to adapt the model to a Flask app. My intention is to build the app, run it locally, and then use Postman to make predictions by posting a JSON. The issue is that I do not know where to add the OneHotEncoder step for the new JSON I'm posting in Postman. Do I add OneHotEncoder as part of the GridSearchCV I'm using, or should I reconfigure the model to use a pipeline for OneHotEncoding, Grid Search, and fitting the model? So when I use the saved model, the JSON sent via Postman would go through the OneHotEncoder process? Also, in the part of the Flask app "content['type_school'] = float(session['type_school'])," can I specify Bool or string before "session"?
Flask app code:
def return_prediction(model, sample_json):
type_school = sample_json["type_school"]
school_accreditation = sample_json["school_accreditation"]
gender = sample_json['gender']
interest = sample_json['interest']
residence = sample_json['residence']
parent_age = sample_json['parent_age']
parent_salary = sample_json['parent salary']
house_area = sample_json['house_area']
average_grades = sample_json['average_grades']
parent_was_in_college = sample_json['parent_was_in_college']
college = [[type_school, school_accreditation, gender, interest,
residence, parent_age, parent_salary, house_area,
average_grades, parent_was_in_collegel]]
classes = np.array(['TRUE','FALSE'])
class_ind = college_model.predict_classes(college)
return classes[class_ind][0]
app = Flask(__name__)
app.config['SECRET_KEY'] = 'mysecretkey'
class CollegeForm(FlaskForm):
type_school = TextField("type_school")
school_accreditation = TextField("school_accreditation")
gender = TextField('gender')
interest = TextField('interest')
residence = TextField('residence')
parent_age = TextField('parent_age')
parent_salary = TextField('parent salary')
house_area = TextField('house_area')
average_grades = TextField('average_grades')
parent_was_in_college = TextField('parent_was_in_college')
submit = SubmitField('Predict')
#app.route("/",methods=['GET', 'POST'])
def index():
form = CollegeForm()
if form.validate_on_submit():
session['type_school'] = form.type_school.data
session['school_accreditation'] = form.school_accreditation.data
session['gender'] = form.gener.data
session['interest'] = form.interest.data
session['residence'] = form.residence.data
session['parent_age'] = form.parent_age.data
session['parent_salary'] = form.parent_salary.data
session['house_area'] = form.house_area.data
session['average_grades'] = form.average_grades.data
session['parent_was_in_college'] = form.parent_was_in_college.data
return redirect(url_for("predictions"))
return render_template('home_2.html',form=form)
college_model = joblib.load("college_model.pkl")
#app.route('/prediction')
def prediction():
content = {}
content['type_school'] = float(session['type_school'])
content['school_accreditation'] = float(session['school_accreditation'])
content['gender'] = float(session['gender'])
content['interest'] = float(session['interest'])
content['residence'] = float(session['residence'])
content['parent_age'] = float(session['parent_age'])
content['parent_salary'] = float(session['parent_salary'])
content['house_area'] = float(session['house_area'])
content['average_grades'] = float(session['average_grades'])
content['parent_was_in_college'] = float(session['parent_was_in_college'])
results = return_prediction(college_model, content)
return render_template('predictions.html',results=results)
if __name__=='__main__':
app.run()

ParameterError: Mono data must have shape (samples,). Received shape=(1, 87488721)

Currently I am working speaker Diarization on python where I am using pyannote for embedding.
My embedding function looks like this:
import torch
import librosa
from pyannote.core import Segment
def embeddings_(audio_path,resegmented,range):
model_emb = torch.hub.load('pyannote/pyannote-audio', 'emb')
embedding = model_emb({'audio': audio_path})
for window, emb in embedding:
assert isinstance(window, Segment)
assert isinstance(emb, np.ndarray)
y, sr = librosa.load(audio_path)
myDict={}
myDict['audio'] = audio_path
myDict['duration'] = len(y)/sr
data=[]
for i in resegmented:
excerpt = Segment(start=i[0], end=i[0]+range)
emb = model_emb.crop(myDict,excerpt)
data.append(emb.T)
data= np.asarray(data)
return data.reshape(len(data),512)
When I run
embeddings = embeddings_(audiofile,resegmented,2)
I get this error:
ParameterError: Mono data must have shape (samples,). Received shape=(1, 87488721)
I got the same error too, but i have found a workaround. For me, the error got triggered in "pyannote/audio/features/utils.py", when it is trying to resample the audio using this line y = librosa.core.resample(y.T, sample_rate, self.sample_rate).T
This is my workaround
def get_features(self, y, sample_rate):
# convert to mono
if self.mono:
y = np.mean(y, axis=1, keepdims=True)
y = np.squeeze(y) # Add this line
# resample if sample rates mismatch
if (self.sample_rate is not None) and (self.sample_rate != sample_rate):
y = librosa.core.resample(y.T, sample_rate, self.sample_rate).T
sample_rate = self.sample_rate
# augment data
if self.augmentation is not None:
y = self.augmentation(y, sample_rate)
# TODO: how time consuming is this thing (needs profiling...)
if len(y.shape) == 1: # Add this line
y = y[:,np.newaxis] # Add this line
try:
valid = valid_audio(y[:, 0], mono=True)
except ParameterError as e:
msg = f"Something went wrong when augmenting waveform."
raise ValueError(msg)
return y
Use np.squeeze on y for librosa.core.resample, then use y[:,np.newaxis] to change its shape to (samples, 1) for valid = valid_audio(y[:, 0], mono=True)

How to read from local directory, kmeans streaming pyspark

I need help with reading from a local directory when running kmeans streaming with pyspark. There is no good answer on this topic on stackoverflow
Here is my code
if __name__ == "__main__":
ssc = StreamingContext(sc, 1)
training_data_raw, training_data_df = prepare_data(TRAINING_DATA_SET)
trainingData = parse2(training_data_raw)
testing_data_raw, testing_data_df = prepare_data(TEST_DATA_SET)
testingData = testing_data_raw.map(parse1)
#print(testingData)
trainingQueue = [trainingData]
testingQueue = [testingData]
trainingStream = ssc.queueStream(trainingQueue)
testingStream = ssc.queueStream(testingQueue)
# We create a model with random clusters and specify the number of clusters to find
model = StreamingKMeans(k=2, decayFactor=1.0).setRandomCenters(3, 1.0, 0)
# Now register the streams for training and testing and start the job,
# printing the predicted cluster assignments on new data points as they arrive.
model.trainOn(trainingStream)
result = model.predictOnValues(testingStream.map(lambda lp: (lp.label, lp.features)))
result.pprint()
ssc.textFileStream('file:///Users/userrname/PycharmProjects/MLtest/training/data/')
ssc.start()
ssc.awaitTermination()
Thanks!!
from pyspark.mllib.linalg import Vectors
trainingData = ssc.textFileStream("/training/data/dir").map(Vectors.parse)
for test examples
from pyspark.mllib.regression import LabeledPoint
def parse(lp):
label = float(lp[lp.find('(') + 1: lp.find(',')])
vec = Vectors.dense(lp[lp.find('[') + 1: lp.find(']')].split(','))
return LabeledPoint(label, vec)
testData = ssc.textFileStream("/testing/data/dir").map(parse)

Broadcasting a keras model with pyspark [duplicate]

I am using Caffe to do image classification, can I am using MAC OS X, Pyhton.
Right now I know how to classify a list of images using Caffe with Spark python, but if I want to make it faster, I want to use Spark.
Therefore, I tried to apply the image classification on each element of an RDD, the RDD created from a list of image_path. However, Spark does not allow me to do so.
Here is my code:
This is the code for image classification:
# display image name, class number, predicted label
def classify_image(image_path, transformer, net):
image = caffe.io.load_image(image_path)
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
output = net.forward()
output_prob = output['prob'][0]
pred = output_prob.argmax()
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
labels = np.loadtxt(labels_file, str, delimiter='\t')
lb = labels[pred]
image_name = image_path.split(images_folder_path)[1]
result_str = 'image: '+image_name+' prediction: '+str(pred)+' label: '+lb
return result_str
This this the code generates Caffe parameters and apply the classify_image method on each element of the RDD:
def main():
sys.path.insert(0, caffe_root + 'python')
caffe.set_mode_cpu()
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
net = caffe.Net(model_def,
model_weights,
caffe.TEST)
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu)
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))
net.blobs['data'].reshape(50,
3,
227, 227)
image_list= []
for image_path in glob.glob(images_folder_path+'*.jpg'):
image_list.append(image_path)
images_rdd = sc.parallelize(image_list)
transformer_bc = sc.broadcast(transformer)
net_bc = sc.broadcast(net)
image_predictions = images_rdd.map(lambda image_path: classify_image(image_path, transformer_bc, net_bc))
print image_predictions
if __name__ == '__main__':
main()
As you can see, here I tried to broadcast the caffe parameters, transformer_bc = sc.broadcast(transformer), net_bc = sc.broadcast(net)
The error is:
RuntimeError: Pickling of "caffe._caffe.Net" instances is not enabled
Before I am doing the broadcast, the error was :
Driver stacktrace.... Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):....
So, do you know, is there any way I can classify images using Caffe and Spark but also take advantage of Spark?
When you work with complex, non-native objects initialization has to moved directly to the workers for example with singleton module:
net_builder.py:
import cafe
net = None
def build_net(*args, **kwargs):
... # Initialize net here
return net
def get_net(*args, **kwargs):
global net
if net is None:
net = build_net(*args, **kwargs)
return net
main.py:
import net_builder
sc.addPyFile("net_builder.py")
def classify_image(image_path, transformer, *args, **kwargs):
net = net_builder.get_net(*args, **kwargs)
It means you'll have to distribute all required files as well. It can be done either manually or using SparkFiles mechanism.
On a side note you should take a look at the SparkNet package.

Categories

Resources