How to deploy Keras-yolo model to the web with Flask? - python

I'm successfully trained my own dataset using Keras yolov3 Github project link
and I've got good predictions:
I would like to deploy this model on the web using flask to make it work with a stream or with IP cameras.
I saw many tutorials explains how to do that but, in reality, I did not find what I am looking for.
How can I get started?

You can use flask-restful to design a simple rest API.
You can use opencv VideoCapture to grab the video stream and get frames.
import numpy as np
import cv2
# Open a sample video available in sample-videos
vcap = cv2.VideoCapture('URL')
The client will take an image/ frame, encode it using base64, add other details like height, width, and make a request.
import numpy as np
import base64
import zlib
import requests
import time
t1 = time.time()
for _ in range(1000): # 1000 continuous request
frame = np.random.randint(0,256, (416,416,3), dtype=np.uint8) # dummy rgb image
# replace frame with your image
# compress
data = frame # zlib.compress(frame)
data = base64.b64encode(data)
data_send = data
#data2 = base64.b64decode(data)
#data2 = zlib.decompress(data2)
#fdata = np.frombuffer(data2, dtype=np.uint8)
r ="", json={'imgb64' : data_send.decode(), 'w': 416, 'h': 416})
# make a post request
# print the response here
t2 = time.time()
Your server will load the darknet model, and when it receives a post request it will simply return the model output.
from flask import Flask, request
from flask_restful import Resource, Api, reqparse
import json
import numpy as np
import base64
# compression
import zlib
# load keras model
# load_model('model.h5')
app = Flask(__name__)
api = Api(app)
parser = reqparse.RequestParser()
parser.add_argument('imgb64', location='json', help = 'type error')
parser.add_argument('w', type = int, location='json', help = 'type error')
parser.add_argument('h', type = int, location='json', help = 'type error')
class Predict(Resource):
def post(self):
data = parser.parse_args()
if data['imgb64'] == "":
return {
'message':'No file found',
img = data['imgb64']
w = data['w']
h = data['h']
data2 = img.encode()
data2 = base64.b64decode(data2)
#data2 = zlib.decompress(data2)
fdata = np.frombuffer(data2, dtype=np.uint8).reshape(w, h, -1)
# do model inference here
if img:
return json.dumps({
'mean': np.mean(fdata),
'channel': fdata.shape[-1],
'message':'darknet processed',
return {
'message':'Something when wrong',
if __name__ == '__main__':, host = '', port = 5000, threaded=True)
In the # do model inference here part, just use your detect/predict function.
If you want to use native darknet,
If you want to use gRPC instead of REST,


how to pass a json String to a url using Flask

I did create a machine learning model using Pytorch which i want to use as a webservice using Flask. The problem is that i don't understand how i can pass a json-String to the url. Below is my code that I wrote to do some tryouts with my model and Flask:
from modelLoader import Model
from imageLoader import Img
import os
from flask import Flask, jsonify, request
app = Flask(__name__)
classes = ["dummy-image", "product-image"]
model_path = os.path.join("data", "models", "model1709", "model1709")
image_path = os.path.join("data", "images", "dummy_images")
m1 = Model(model_path, classes, "cpu")
#app.route('/predict', methods=['POST', 'GET'])
def predict():
# case for handle json
input_data = request.get_json()['url']
if isinstance(input_data, list):
for elem in input_data:
img_elem = Img(url=elem)
res = img_elem.get_prediction(m1)
return jsonify({"type": "bulk_upload"})
img_inpdata = Img(url=input_data)
res, info = img_inpdata.get_prediction(m1)
return jsonify({input_data: res, "info": str(info)})
if __name__ == '__main__':
This would be a request that I want to make using this code:
Content-Type: application/json
Accept: application/json
"url" : ""
How exactly can I get the prediction for the image inside the json-string, by passing this json-string to the application?
Here the two classes model and imageLoader for completeness:
from torch import argmax, device, load, nn
class Model:
def __init__(self, path, class_list=None, dvc=None):
if class_list is None:
class_list = [0, 1]
if dvc is None:
dvc = 'cpu'
self.class_list = class_list
self.model = load(path, map_location=device(dvc))
num_ftrs = self.model.fc.in_features
self.model.fc = nn.Linear(num_ftrs, len(class_list))
import torchvision.transforms as transforms
import io
from PIL import Image
from torch import argmax, device, load, nn
import requests
class Img:
def __init__(self, url=None, image=None, image_bytes=None):
if url:
img =, stream=True).raw)
img_byte_arr = io.BytesIO(), format=img.format)
self.image_bytes = img_byte_arr.getvalue()
elif image:
f =
self.image_bytes = bytearray(f)
elif image_bytes:
self.image_bytes = image_bytes
def transform_image(self):
data_transforms = transforms.Compose([transforms.Resize((224, 224)),
224), transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224,
image ='RGB')
return data_transforms(image).unsqueeze(0)
def get_prediction(self, model):
tensor = self.transform_image()
output = (model.model(tensor))
sm = nn.Softmax(output)
best = output.argmax().item()
return model.class_list[best], sm
You cannot do a POST request directly from the browser URL box. There are many applications to test your API and my favorite one is postman. You can also use the curl command tool.
If you want to use the browser URL box only, then consider using the GET request. The format of GET request is <URL>?parameter1=value1&parameter2=value2. You can access the value of the parameter in flask using the request module. For example, if your service is at You can send it as And you can fetch it in flask as
from flask import request
my_url = request.args.get("url")

Passing a pandas dataframe to FastAPI for NLP ML

I am trying to, for the first time, deploy an NLP ML model. To do this it was suggested that I use FastAPI and uvicorn. I have had some success in getting FastAPI to respond; however, I have not been able to successfully pass the dataframe and have it process it. I've tried using dictionaries and even attempted to convert the passed json to a dataframe.
With data_dict = data.dict() I get:
ValueError: Iterable over raw text documents expected, string object received.
With data_dict = pd.DataFrame(data.dict()) I get:
ValueError: If using all scalar values, you must pass an index
I believe I understand the problem, my Data class is expecting a string which this is not; however, I have not been able to determine how to set and / or pass the expected data so that fit_transform() will work. Ultimately I will have a prediction returned based on the submitted messages value. Bonus if I can pass a dataframe of 1 or more rows and have predictions made and returned for each of the rows. The response will include the id, project, and the prediction so that we are in future able to leverage this response to post the prediction back to the original (requesting) system.
import requests
import pandas as pd
import json
import os
from pprint import pprint
url = ''
df = pd.DataFrame(
'id': ['ab410483801c38', 'cd34148639180'],
'project': ['project1', 'project2'],
'messages': ['This is message 1', 'This is message 2']
to_predict_dict = df.iloc[0].to_dict()
r =, json=to_predict_dict)
#!/usr/bin/env python
# coding: utf-8
import pickle
import pandas as pd
import numpy as np
from pydantic import BaseModel
from sklearn.feature_extraction.text import TfidfVectorizer
# Server
import uvicorn
from fastapi import FastAPI
# Model
import xgboost as xgb
app = FastAPI()
clf = pickle.load(open('data/xgbmodel.pickle', 'rb'))
class Data(BaseModel):
# id: str
project: str
messages: str
async def test():
return {"ping": "pong"}"/predict")
async def predict(data: Data):
# data_dict = data.dict()
data_dict = pd.DataFrame(data.dict())
tfidf_vect = TfidfVectorizer(stop_words="english", analyzer='word', token_pattern=r'\w{1,}')
# to_predict = tfidf_vect.transform(data_dict['messages'])
# prediction = clf.predict(to_predict)
return {"response": "Success"}
Probably not the most elegant solution but I've made progress using the following:
def predict(data: Data):
data_dict = pd.DataFrame(
'id': [],
'project': [data.project],
'messages': [data.messages]
Frist, encode your dataFrame df to JSON record-oriented:
r =, json=df.to_json(orient='records')).
Then, decode your data inside the /predict/ endpoint with:
df = pd.DataFrame(jsonable_encoder(data))
Remember to import the module from fastapi.encoders import jsonable_encoder.
A new library called pandera now supports direct passage of DataFrames without conversion via FastAPI. The docs are bit basic as of posting this, but may be worth reading:
I was able to address the issue by simply converting data.messages into a list. I also had to make some unrelated changes, I had failed to pickle my vectorizer (string tokenizer).
import pickle
import pandas as pd
import numpy as np
import json
import time
from pydantic import BaseModel
from sklearn.feature_extraction.text import TfidfVectorizer
# Server / endpoint
import uvicorn
from fastapi import FastAPI
# Model
import xgboost as xgb
app = FastAPI(debug=True)
clf = pickle.load(open('data/xgbmodel.pickle', 'rb'))
vect = pickle.load(open('data/tfidfvect.pickle', 'rb'))
class Data(BaseModel):
id: str = None
project: str
messages: str
async def ping():
return {"ping": "pong"}"/predict/")
def predict(data: Data):
start = time.time()
data_l = [data.messages] # make messages iterable.
to_predict = vect.transform(data_l)
prediction = clf.predict(to_predict)
exec_time = round((time.time() - start), 3)
return {
"project": data.project,
"prediction": prediction[0],
"execution_time": exec_time
if __name__ == "__main__":, host="", port=8000)

Returning matplotlib plots using telegram bot

This code is from here
I have the following code for a telegram bot which i am building:
import pandas as pd
from pandas import datetime
from pandas import DataFrame as df
import matplotlib
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import datetime
import requests
from bottle import (
run, post, response, request as bottle_request
BOT_URL = ''
def get_chat_id(data):
Method to extract chat id from telegram request.
chat_id = data['message']['chat']['id']
return chat_id
def get_message(data):
Method to extract message id from telegram request.
message_text = data['message']['text']
return message_text
def send_message(prepared_data):
Prepared data should be json which includes at least `chat_id` and `text`
message_url = BOT_URL + 'sendMessage', json=prepared_data)
def get_ticker(text):
stock = f'^GSPC'
start =,1,1)
end =
data = web.DataReader(stock, 'yahoo',start, end)
plot = data.plot(y='Open')
return plot
def prepare_data_for_answer(data):
answer = get_ticker(get_message(data))
json_data = {
"chat_id": get_chat_id(data),
"text": answer,
return json_data
def main():
data = bottle_request.json
answer_data = prepare_data_for_answer(data)
send_message(answer_data) # <--- function for sending answer
return response # status 200 OK by default
if __name__ == '__main__':
run(host='localhost', port=8080, debug=True)
When i run this code i am getting the following error:
TypeError: Object of type AxesSubplot is not JSON serializable
What this code is suppose to do is take ticker symbols from telegram app and return its chart back.
I know this is because json does not handle images.
What can i do to resolve it?
Sorry, I'm a bit late to the party. Here is a possible solution below, though I didn't test it. Hope it works or at least gives you a way to go about solving the issue :)
import datetime
from io import BytesIO
import requests
from pandas_datareader import data as web
from bottle import (
run, post, response, request as bottle_request
BOT_URL = ''
def get_chat_id(data):
Method to extract chat id from telegram request.
chat_id = data['message']['chat']['id']
return chat_id
def get_message(data):
Method to extract message id from telegram request.
message_text = data['message']['text']
return message_text
def send_photo(prepared_data):
Prepared data should be json which includes at least `chat_id` and `plot_file`
data = {'chat_id': prepared_data['chat_id']}
files = {'photo': prepared_data['plot_file']} + 'sendPhoto', json=data, files=files)
def get_ticker(text):
stock = f'^GSPC'
start =,1,1)
end =
data = web.DataReader(stock, 'yahoo',start, end)
plot = data.plot(y='Open')
return plot
def prepare_data_for_answer(data):
plot = get_ticker(get_message(data))
# Write the plot Figure to a file-like bytes object:
plot_file = BytesIO()
fig = plot.get_figure()
fig.savefig(plot_file, format='png')
prepared_data = {
"chat_id": get_chat_id(data),
"plot_file": plot_file,
return prepared_data
def main():
data = bottle_request.json
answer_data = prepare_data_for_answer(data)
send_photo(answer_data) # <--- function for sending answer
return response # status 200 OK by default
if __name__ == '__main__':
run(host='localhost', port=8080, debug=True)
The idea is not to send a message using the sendMessage Telegram API endpoint, but to send a photo file by using the sendPhoto endpoint. Here, we use savefig call in the prepare_data_for_answer function body to convert AxesSubplot instance, that we get as a return value from the get_ticker function, to a file-like BytesIO object, which we then send as a photo to Telegram using send_photo function (previously named as send_message).
You may use bob-telegram-tools
import TelegramBot
import matplotlib.pyplot as plt
token = '<your_token>'
user_id = int('<your_chat_id>')
bot = TelegramBot(token, user_id)
plt.plot([1, 2, 3, 4])
plt.ylabel('some numbers')
# This method delete the generetad image
You cannot send a matplotlib figure directly. You will need to convert it to bytes and then send it as a multipart message.
data.plot will return a matplotlib.axes.Axes object. You can save convert the figure to bytes like this
import StringIO
img = StringIO.StringIO()
plot.fig.savefig(img, format='png')
yukuku/telebot has some good code on how to send the image as a message. Check this line here.

How to annotate MULTIPLE images from a single call using Google's vision API? Python

I recently started using Google's vision API. I am trying to annotate a batch of images and therefore issued the 'batch image annotation offline' guide from their documentation.
However, it is not clear to me how I can annotate MULTIPLE images from one API call. So let's say I have stored 10 images in my google cloud bucket. How can I annotate all these images at once and store them in one JSON file? Right now, I wrote a program that calls their example function and it works, but to put it simple, why can't I say: 'Look in this folder and annotate all images in it.'?
Thanks in advance.
from batch_image_labeling import sample_async_batch_annotate_images
counter = 0
for file in os.listdir('my_directory'):
filename = file
sample_async_batch_annotate_images('gs://my_bucket/{}'.format(filename), 'gs://my_bucket/{}'.format(counter))
counter += 1
from import vision_v1
from import enums
import six
def sample_async_batch_annotate_images(input_image_uri, output_uri):
"""Perform async batch image annotation"""
client = vision_v1.ImageAnnotatorClient()
if isinstance(input_image_uri, six.binary_type):
input_image_uri = input_image_uri.decode('utf-8')
if isinstance(output_uri, six.binary_type):
output_uri = output_uri.decode('utf-8')
source = {'image_uri': input_image_uri}
image = {'source': source}
type_ = enums.Feature.Type.LABEL_DETECTION
features_element = {'type': type_}
type_2 = enums.Feature.Type.IMAGE_PROPERTIES
features_element_2 = {'type': type_2}
features = [features_element, features_element_2]
requests_element = {'image': image, 'features': features}
requests = [requests_element]
gcs_destination = {'uri': output_uri}
# The max number of responses to output in each JSON file
batch_size = 2
output_config = {'gcs_destination': gcs_destination, 'batch_size': batch_size}
operation = client.async_batch_annotate_images(requests, output_config)
print('Waiting for operation to complete...')
response = operation.result()
# The output is written to GCS with the provided output_uri as prefix
gcs_output_uri = response.output_config.gcs_destination.uri
print('Output written to GCS with prefix: {}'.format(gcs_output_uri))
It's somewhat unclear from that example, but your call to async_batch_annotate_images takes a requests parameter which is a list of multiple requests. So you can do something like this:
rom import vision_v1
from import enums
import six
def generate_request(input_image_uri):
if isinstance(input_image_uri, six.binary_type):
input_image_uri = input_image_uri.decode('utf-8')
if isinstance(output_uri, six.binary_type):
output_uri = output_uri.decode('utf-8')
source = {'image_uri': input_image_uri}
image = {'source': source}
type_ = enums.Feature.Type.LABEL_DETECTION
features_element = {'type': type_}
type_2 = enums.Feature.Type.IMAGE_PROPERTIES
features_element_2 = {'type': type_2}
features = [features_element, features_element_2]
requests_element = {'image': image, 'features': features}
return requests_element
def sample_async_batch_annotate_images(input_uri, output_uri):
"""Perform async batch image annotation"""
client = vision_v1.ImageAnnotatorClient()
requests = [
for filename in os.listdir('my_directory')
gcs_destination = {'uri': output_uri}
# The max number of responses to output in each JSON file
batch_size = 1
output_config = {'gcs_destination': gcs_destination, 'batch_size': batch_size}
operation = client.async_batch_annotate_images(requests, output_config)
print('Waiting for operation to complete...')
response = operation.result()
# The output is written to GCS with the provided output_uri as prefix
gcs_output_uri = response.output_config.gcs_destination.uri
print('Output written to GCS with prefix: {}'.format(gcs_output_uri))
sample_async_batch_annotate_images('gs://my_bucket/{}', 'gs://my_bucket/results')
This can annotate up to 2,000 images in a single request. The only downside is that you can only specify a single output_uri as a destination, so you won't be able to use counter to put each result in a separate file, but you can set batch_size = 1 to ensure each response is written separately if this is what you want.

Tensorflow serving returns [[[0]]]

I'm trying to use Tensorflow Serving to make predictions for my model.
This is the client that I'm using:
#!/usr/bin/env python2.7
"""A client that talks to tensorflow_model_server loaded with deepspeech model.
The client queries the service with the given audio and prints a ranked list
of decoded outputs to the standard output, one per line.
Typical usage example: --server=localhost:9000 --file audio.wav
import os
import sys
local_tf = os.path.join(os.path.dirname(os.path.dirname(os.path.join(os.path.abspath(__file__)))), 'local_tf')
import threading
from grpc.beta import implementations
import numpy as np
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
from util.text import ndarray_to_text
from import audiofile_to_input_vector'server', '', 'PredictionService host:port')'file', '', 'Wave audio file')
# These need to match the constants used when training the deepspeech model'n_input', 26, 'Number of MFCC features')'n_context', 9, 'Number of frames of context')
def _create_rpc_callback(event):
def _callback(result_future):
exception = result_future.exception()
if exception:
print exception
results = tf.contrib.util.make_ndarray(result_future.result().outputs['outputs'])
for result in results[0]:
print ndarray_to_text(result)
return _callback
def do_inference(hostport, audio):
host, port = hostport.split(':')
channel = implementations.insecure_channel(host, int(port))
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
request = predict_pb2.PredictRequest() = 'deepspeech'
event = threading.Event()
result_future = stub.Predict.future(request, 5.0) # 5 seconds
if event.is_set() != True:
def main(_):
if not FLAGS.server:
print 'please specify server host:port'
if not FLAGS.file:
print 'pleace specify an audio file'
audio_waves = audiofile_to_input_vector(
FLAGS.file, FLAGS.n_input, FLAGS.n_context)
audio = np.array([ audio_waves ])
do_inference(FLAGS.server, audio)
if __name__ == '__main__':
It's taken directly from an older commit of the Mozilla DeepSpeech implementation.
I have a trained model (from the same commit) that I'm trying to use to make predictions. I'm using the LDC193S1.wav file to try and make predictions.
When I run the model, I get back a response that is equal to
dtype: DT_INT64 tensor_shape { dim { size: 1 } dim { size: 1 } dim { size: 1 } } int64_val: 0 [[[0]]]
What can I do to fix this?

