I have this simple webapp written in python (Flask)
models.py
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy()
class Coin(db.Model):
__tablename__ = "coins"
id = db.Column(db.Integer, primary_key=True)
pair = db.Column(db.String)
sell_amt = db.Column(db.Float)
buy_amt = db.Column(db.Float)
app.py
from flask import Flask
from ui import ui
from models import db , Coin
app = Flask(__name__)
app.register_blueprint(ui)
db.init_app(app)
if __name__ == "__main__":
app.run(port=8080)
__init__.py in ui folder
from flask import Blueprint ,current_app
from models import db, Coin
from threading import Thread
ui = Blueprint('ui', __name__)
def intro():
global bot_state
with current_app.app_context():
all_coins = Coin.query.filter_by().all()
while bot_state:
sleep(3)
print (f" Current time : {time()}")
#ui.route('/startbot')
def start_bot():
global bot_thread, bot_state
bot_state = True
bot_thread = Thread(target=intro ,daemon=True)
bot_thread.start()
return "bot started "
#ui.route('/stopbot')
def stop_bot():
global bot_state
bot_state = False
bot_thread.join()
return " bot stopped"
When create a request to /startbot the app throws the error the it is working outside the app context
RuntimeError: Working outside of application context.
This typically means that you attempted to use functionality that needed
to interface with the current application object in some way. To solve
this, set up an application context with app.app_context(). See the
documentation for more information.
but when trying to create a database object for example new = Coin() it works fine, how do you give a function the context of the application without making a function that returns the app, because doing so creates another error that is (circular import)
Note this is the bare minimum example and there are other files that require access to the models.py folder (to add orders to the data base created by the bot )
There has to be a better way of doing it but this is what I managed to do, we create two apps the first one is the main web app, and looks sth like this
app = Flask(__name__)
app.register_blueprint(some_blueprint)
db.init_app(app)
and the second app will be for the bot and will be declared in the same file where the bot core code is written and can be imported into the blueprint and looks like this
bot_app = Flask(__name__)
db.init_app(app)
Now intro will look sth like this
from bot_file import bot_app
def intro(app):
with bot_app.app_context():
all_coins = Coin.query.all()
this way we can use the bot_app in the bot_core class with out importing the main web app
This isn't the most preferable code out there but it does solve this problem
The trick is to pass the application object to the thread. This also works with the proxy current_app. In this case, however, you need access to the underlying application object. You can find a short note on this within the documentation here.
from flask import current_app
# ...
def intro(app):
with app.app_context():
all_coins = Coin.query.all()
#ui.route('/startbot')
def start_bot():
bot_thread = Thread(
target=intro,
args=(current_app._get_current_object(),), # <- !!!
daemon=True
)
bot_thread.start()
return "bot started"
Since you don't seem to have fully understood my explanations, the following is how the complete contents of the __init__.py file would look like.
from flask import Blueprint, current_app, render_template
from models import Coin, db
from threading import Event, Lock, Thread
from time import sleep, time
ui = Blueprint('ui', __name__)
thread = None
thread_event = Event()
thread_lock = Lock()
def intro(app, event):
app.logger.info('bot started')
try:
while event.is_set():
tm = time()
app.logger.info('current time %s', tm)
with app.app_context():
all_coins = Coin.query.all()
# ...
dur = 3 - (time() - tm)
if dur > 0: sleep(dur)
finally:
event.clear()
app.logger.info('bot stopped')
#ui.route('/startbot')
def start_bot():
global thread
thread_event.set()
with thread_lock:
if thread is None:
thread = Thread(
target=intro,
args=(current_app._get_current_object(), thread_event),
daemon=True
)
thread.start()
return '', 200
#ui.route('/stopbot')
def stop_bot():
global thread
thread_event.clear()
with thread_lock:
if thread is not None:
thread.join()
thread = None
return '', 200
Have fun and success with the further implementation of your project.
Related
I've been struggling with this for awhile now. I Have a flask app that is executed in my app.py file. In this file I have a bunch of endpoints that call different functions from other files. In another file, extensions.py, I've instantiated a class that contains a redis connection. See the file structure below.
#app.py
from flask import Flask
from extensions import redis_obj
app = Flask(__name__)
#app.route('/flush-cache', methods=['POST'])
def flush_redis():
result = redis_obj.flush_redis_cache()
return result
# extensions.py
from redis_class import CloudRedis
redis_obj = CloudRedis()
# redis_class
import redis
class CloudRedis:
def __init__(self):
self.conn = redis.Redis(connection_pool=redis.ConnectionPool.from_url('REDIS_URL',
ssl_cert_reqs=None))
def flush_redis_cache(self):
try:
self.conn.flushdb()
return 'OK'
except:
return 'redis flush failed'
I've been attempting to use monkeypatching in a test patch flush_redis_cache, so when I run flush_redis() the call to redis_obj.flush_redis_cache() will just return "Ok", since I've already tested the CloudRedis class in other pytests. However, no matter what I've tried I haven't been able to successfully patch this. This is what I have below.
from extensions import redis_obj
from app import app
#pytest.fixture()
def client():
yield app.test_client()
def test_flush_redis_when_redis_flushed(client, monkeypatch):
# setup
def get_mock_flush_redis_cache():
return 'OK'
monkeypatch.setattr(cloud_reids, 'flush_redis_cache', get_mock_flush_redis_cache)
cloud_redis.flush_redis = get_mock_flush_redis_cache
# act
res = client.post('/flush-cache')
result = flush_redis()
Does anyone have any ideas on how this can be done?
I am using Flask with Celery and I am trying to lock a specific task so that it can only be run one at a time. In the celery docs it gives a example of doing this Celery docs, Ensuring a task is only executed one at a time. This example that was given was for Django however I am using flask I have done my best to convert this to work with Flask however I still see myTask1 which has the lock can be run multiple times.
One thing that is not clear to me is if I am using the cache correctly, I have never used it before so all of it is new to me. One thing from the doc's that is mentioned but not explained is this
Doc Notes:
In order for this to work correctly you need to be using a cache backend where the .add operation is atomic. memcached is known to work well for this purpose.
Im not truly sure what that means, should i be using the cache in conjunction with a database and if so how would I do that? I am using mongodb. In my code I just have this setup for the cache cache = Cache(app, config={'CACHE_TYPE': 'simple'}) as that is what was mentioned in the Flask-Cache doc's Flask-Cache Docs
Another thing that is not clear to me is if there is anything different I need to do as I am calling my myTask1 from within my Flask route task1
Here is an example of my code that I am using.
from flask import (Flask, render_template, flash, redirect,
url_for, session, logging, request, g, render_template_string, jsonify)
from flask_caching import Cache
from contextlib import contextmanager
from celery import Celery
from Flask_celery import make_celery
from celery.result import AsyncResult
from celery.utils.log import get_task_logger
from celery.five import monotonic
from flask_pymongo import PyMongo
from hashlib import md5
import pymongo
import time
app = Flask(__name__)
cache = Cache(app, config={'CACHE_TYPE': 'simple'})
app.config['SECRET_KEY']= 'super secret key for me123456789987654321'
######################
# MONGODB SETUP
#####################
app.config['MONGO_HOST'] = 'localhost'
app.config['MONGO_DBNAME'] = 'celery-test-db'
app.config["MONGO_URI"] = 'mongodb://localhost:27017/celery-test-db'
mongo = PyMongo(app)
##############################
# CELERY ARGUMENTS
##############################
app.config['CELERY_BROKER_URL'] = 'amqp://localhost//'
app.config['CELERY_RESULT_BACKEND'] = 'mongodb://localhost:27017/celery-test-db'
app.config['CELERY_RESULT_BACKEND'] = 'mongodb'
app.config['CELERY_MONGODB_BACKEND_SETTINGS'] = {
"host": "localhost",
"port": 27017,
"database": "celery-test-db",
"taskmeta_collection": "celery_jobs",
}
app.config['CELERY_TASK_SERIALIZER'] = 'json'
celery = Celery('task',broker='mongodb://localhost:27017/jobs')
celery = make_celery(app)
LOCK_EXPIRE = 60 * 2 # Lock expires in 2 minutes
#contextmanager
def memcache_lock(lock_id, oid):
timeout_at = monotonic() + LOCK_EXPIRE - 3
# cache.add fails if the key already exists
status = cache.add(lock_id, oid, LOCK_EXPIRE)
try:
yield status
finally:
# memcache delete is very slow, but we have to use it to take
# advantage of using add() for atomic locking
if monotonic() < timeout_at and status:
# don't release the lock if we exceeded the timeout
# to lessen the chance of releasing an expired lock
# owned by someone else
# also don't release the lock if we didn't acquire it
cache.delete(lock_id)
#celery.task(bind=True, name='app.myTask1')
def myTask1(self):
self.update_state(state='IN TASK')
lock_id = self.name
with memcache_lock(lock_id, self.app.oid) as acquired:
if acquired:
# do work if we got the lock
print('acquired is {}'.format(acquired))
self.update_state(state='DOING WORK')
time.sleep(90)
return 'result'
# otherwise, the lock was already in use
raise self.retry(countdown=60) # redeliver message to the queue, so the work can be done later
#celery.task(bind=True, name='app.myTask2')
def myTask2(self):
print('you are in task2')
self.update_state(state='STARTING')
time.sleep(120)
print('task2 done')
#app.route('/', methods=['GET', 'POST'])
def index():
return render_template('index.html')
#app.route('/task1', methods=['GET', 'POST'])
def task1():
print('running task1')
result = myTask1.delay()
# get async task id
taskResult = AsyncResult(result.task_id)
# push async taskid into db collection job_task_id
mongo.db.job_task_id.insert({'taskid': str(taskResult), 'TaskName': 'task1'})
return render_template('task1.html')
#app.route('/task2', methods=['GET', 'POST'])
def task2():
print('running task2')
result = myTask2.delay()
# get async task id
taskResult = AsyncResult(result.task_id)
# push async taskid into db collection job_task_id
mongo.db.job_task_id.insert({'taskid': str(taskResult), 'TaskName': 'task2'})
return render_template('task2.html')
#app.route('/status', methods=['GET', 'POST'])
def status():
taskid_list = []
task_state_list = []
TaskName_list = []
allAsyncData = mongo.db.job_task_id.find()
for doc in allAsyncData:
try:
taskid_list.append(doc['taskid'])
except:
print('error with db conneciton in asyncJobStatus')
TaskName_list.append(doc['TaskName'])
# PASS TASK ID TO ASYNC RESULT TO GET TASK RESULT FOR THAT SPECIFIC TASK
for item in taskid_list:
try:
task_state_list.append(myTask1.AsyncResult(item).state)
except:
task_state_list.append('UNKNOWN')
return render_template('status.html', data_list=zip(task_state_list, TaskName_list))
Final Working Code
from flask import (Flask, render_template, flash, redirect,
url_for, session, logging, request, g, render_template_string, jsonify)
from flask_caching import Cache
from contextlib import contextmanager
from celery import Celery
from Flask_celery import make_celery
from celery.result import AsyncResult
from celery.utils.log import get_task_logger
from celery.five import monotonic
from flask_pymongo import PyMongo
from hashlib import md5
import pymongo
import time
import redis
from flask_redis import FlaskRedis
app = Flask(__name__)
# ADDING REDIS
redis_store = FlaskRedis(app)
# POINTING CACHE_TYPE TO REDIS
cache = Cache(app, config={'CACHE_TYPE': 'redis'})
app.config['SECRET_KEY']= 'super secret key for me123456789987654321'
######################
# MONGODB SETUP
#####################
app.config['MONGO_HOST'] = 'localhost'
app.config['MONGO_DBNAME'] = 'celery-test-db'
app.config["MONGO_URI"] = 'mongodb://localhost:27017/celery-test-db'
mongo = PyMongo(app)
##############################
# CELERY ARGUMENTS
##############################
# CELERY USING REDIS
app.config['CELERY_BROKER_URL'] = 'redis://localhost:6379/0'
app.config['CELERY_RESULT_BACKEND'] = 'mongodb://localhost:27017/celery-test-db'
app.config['CELERY_RESULT_BACKEND'] = 'mongodb'
app.config['CELERY_MONGODB_BACKEND_SETTINGS'] = {
"host": "localhost",
"port": 27017,
"database": "celery-test-db",
"taskmeta_collection": "celery_jobs",
}
app.config['CELERY_TASK_SERIALIZER'] = 'json'
celery = Celery('task',broker='mongodb://localhost:27017/jobs')
celery = make_celery(app)
LOCK_EXPIRE = 60 * 2 # Lock expires in 2 minutes
#contextmanager
def memcache_lock(lock_id, oid):
timeout_at = monotonic() + LOCK_EXPIRE - 3
print('in memcache_lock and timeout_at is {}'.format(timeout_at))
# cache.add fails if the key already exists
status = cache.add(lock_id, oid, LOCK_EXPIRE)
try:
yield status
print('memcache_lock and status is {}'.format(status))
finally:
# memcache delete is very slow, but we have to use it to take
# advantage of using add() for atomic locking
if monotonic() < timeout_at and status:
# don't release the lock if we exceeded the timeout
# to lessen the chance of releasing an expired lock
# owned by someone else
# also don't release the lock if we didn't acquire it
cache.delete(lock_id)
#celery.task(bind=True, name='app.myTask1')
def myTask1(self):
self.update_state(state='IN TASK')
print('dir is {} '.format(dir(self)))
lock_id = self.name
print('lock_id is {}'.format(lock_id))
with memcache_lock(lock_id, self.app.oid) as acquired:
print('in memcache_lock and lock_id is {} self.app.oid is {} and acquired is {}'.format(lock_id, self.app.oid, acquired))
if acquired:
# do work if we got the lock
print('acquired is {}'.format(acquired))
self.update_state(state='DOING WORK')
time.sleep(90)
return 'result'
# otherwise, the lock was already in use
raise self.retry(countdown=60) # redeliver message to the queue, so the work can be done later
#celery.task(bind=True, name='app.myTask2')
def myTask2(self):
print('you are in task2')
self.update_state(state='STARTING')
time.sleep(120)
print('task2 done')
#app.route('/', methods=['GET', 'POST'])
def index():
return render_template('index.html')
#app.route('/task1', methods=['GET', 'POST'])
def task1():
print('running task1')
result = myTask1.delay()
# get async task id
taskResult = AsyncResult(result.task_id)
# push async taskid into db collection job_task_id
mongo.db.job_task_id.insert({'taskid': str(taskResult), 'TaskName': 'myTask1'})
return render_template('task1.html')
#app.route('/task2', methods=['GET', 'POST'])
def task2():
print('running task2')
result = myTask2.delay()
# get async task id
taskResult = AsyncResult(result.task_id)
# push async taskid into db collection job_task_id
mongo.db.job_task_id.insert({'taskid': str(taskResult), 'TaskName': 'task2'})
return render_template('task2.html')
#app.route('/status', methods=['GET', 'POST'])
def status():
taskid_list = []
task_state_list = []
TaskName_list = []
allAsyncData = mongo.db.job_task_id.find()
for doc in allAsyncData:
try:
taskid_list.append(doc['taskid'])
except:
print('error with db conneciton in asyncJobStatus')
TaskName_list.append(doc['TaskName'])
# PASS TASK ID TO ASYNC RESULT TO GET TASK RESULT FOR THAT SPECIFIC TASK
for item in taskid_list:
try:
task_state_list.append(myTask1.AsyncResult(item).state)
except:
task_state_list.append('UNKNOWN')
return render_template('status.html', data_list=zip(task_state_list, TaskName_list))
if __name__ == '__main__':
app.secret_key = 'super secret key for me123456789987654321'
app.run(port=1234, host='localhost')
Here is also a screen shot you can see that I ran myTask1 two times and myTask2 a single time. Now I have the expected behavior for myTask1. Now myTask1 will be run by a single worker if another worker attempt to pick it up it will just keep retrying based on whatever i define.
In your question, you point out this warning from the Celery example you used:
In order for this to work correctly you need to be using a cache backend where the .add operation is atomic. memcached is known to work well for this purpose.
And you mention that you don't really understand what this means. Indeed, the code you show demonstrates that you've not heeded that warning, because your code uses an inappropriate backend.
Consider this code:
with memcache_lock(lock_id, self.app.oid) as acquired:
if acquired:
# do some work
What you want here is for acquired to be true only for one thread at a time. If two threads enter the with block at the same time, only one should "win" and have acquired be true. This thread that has acquired true can then proceed with its work, and the other thread has to skip doing the work and try again later to acquire the lock. In order to ensure that only one thread can have acquired true, .add must be atomic.
Here's some pseudo code of what .add(key, value) does:
1. if <key> is already in the cache:
2. return False
3. else:
4. set the cache so that <key> has the value <value>
5. return True
If the execution of .add is not atomic, this could happen if two threads A and B execute .add("foo", "bar"). Assume an empty cache at the start.
Thread A executes 1. if "foo" is already in the cache and finds that "foo" is not in the cache, and jumps to line 3 but the thread scheduler switches control to thread B.
Thread B also executes 1. if "foo" is already in the cache, and also finds that "foo" is not in the cache. So it jumps to line 3 and then executes line 4 and 5 which sets the key "foo" to the value "bar" and the call returns True.
Eventually, the scheduler gives control back to Thread A, which continues executing 3, 4, 5 and also sets the key "foo" to the value "bar" and also returns True.
What you have here is two .add calls that return True, if these .add calls are made within memcache_lock this entails that two threads can have acquired be true. So two threads could do work at the same time, and your memcache_lock is not doing what it should be doing, which is only allow one thread to work at a time.
You are not using a cache that ensures that .add is atomic. You initialize it like this:
cache = Cache(app, config={'CACHE_TYPE': 'simple'})
The simple backend is scoped to a single process, has no thread-safety, and has an .add operation which is not atomic. (This does not involve Mongo at all by the way. If you wanted your cache to be backed by Mongo, you'd have to specify a backed specifically made to send data to a Mongo database.)
So you have to switch to another backend, one that guarantees that .add is atomic. You could follow the lead of the Celery example and use the memcached backend, which does have an atomic .add operation. I don't use Flask, but I've does essentially what you are doing with Django and Celery, and used the Redis backend successfully to provide the kind of locking you're using here.
I also found this to be a surprisingly hard problem. Inspired mainly by Sebastian's work on implementing a distributed locking algorithm in redis I wrote up a decorator function.
A key point to bear in mind about this approach is that we lock tasks at the level of the task's argument space, e.g. we allow multiple game update/process order tasks to run concurrently, but only one per game. That's what argument_signature achieves in the code below. You can see documentation on how we use this in our stack at this gist:
import base64
from contextlib import contextmanager
import json
import pickle as pkl
import uuid
from backend.config import Config
from redis import StrictRedis
from redis_cache import RedisCache
from redlock import Redlock
rds = StrictRedis(Config.REDIS_HOST, decode_responses=True, charset="utf-8")
rds_cache = StrictRedis(Config.REDIS_HOST, decode_responses=False, charset="utf-8")
redis_cache = RedisCache(redis_client=rds_cache, prefix="rc", serializer=pkl.dumps, deserializer=pkl.loads)
dlm = Redlock([{"host": Config.REDIS_HOST}])
TASK_LOCK_MSG = "Task execution skipped -- another task already has the lock"
DEFAULT_ASSET_EXPIRATION = 8 * 24 * 60 * 60 # by default keep cached values around for 8 days
DEFAULT_CACHE_EXPIRATION = 1 * 24 * 60 * 60 # we can keep cached values around for a shorter period of time
REMOVE_ONLY_IF_OWNER_SCRIPT = """
if redis.call("get",KEYS[1]) == ARGV[1] then
return redis.call("del",KEYS[1])
else
return 0
end
"""
#contextmanager
def redis_lock(lock_name, expires=60):
# https://breadcrumbscollector.tech/what-is-celery-beat-and-how-to-use-it-part-2-patterns-and-caveats/
random_value = str(uuid.uuid4())
lock_acquired = bool(
rds.set(lock_name, random_value, ex=expires, nx=True)
)
yield lock_acquired
if lock_acquired:
rds.eval(REMOVE_ONLY_IF_OWNER_SCRIPT, 1, lock_name, random_value)
def argument_signature(*args, **kwargs):
arg_list = [str(x) for x in args]
kwarg_list = [f"{str(k)}:{str(v)}" for k, v in kwargs.items()]
return base64.b64encode(f"{'_'.join(arg_list)}-{'_'.join(kwarg_list)}".encode()).decode()
def task_lock(func=None, main_key="", timeout=None):
def _dec(run_func):
def _caller(*args, **kwargs):
with redis_lock(f"{main_key}_{argument_signature(*args, **kwargs)}", timeout) as acquired:
if not acquired:
return TASK_LOCK_MSG
return run_func(*args, **kwargs)
return _caller
return _dec(func) if func is not None else _dec
Implementation in our task definitions file:
#celery.task(name="async_test_task_lock")
#task_lock(main_key="async_test_task_lock", timeout=UPDATE_GAME_DATA_TIMEOUT)
def async_test_task_lock(game_id):
print(f"processing game_id {game_id}")
time.sleep(TASK_LOCK_TEST_SLEEP)
How we test against a local celery cluster:
from backend.tasks.definitions import async_test_task_lock, TASK_LOCK_TEST_SLEEP
from backend.tasks.redis_handlers import rds, TASK_LOCK_MSG
class TestTaskLocking(TestCase):
def test_task_locking(self):
rds.flushall()
res1 = async_test_task_lock.delay(3)
res2 = async_test_task_lock.delay(5)
self.assertFalse(res1.ready())
self.assertFalse(res2.ready())
res3 = async_test_task_lock.delay(5)
res4 = async_test_task_lock.delay(5)
self.assertEqual(res3.get(), TASK_LOCK_MSG)
self.assertEqual(res4.get(), TASK_LOCK_MSG)
time.sleep(TASK_LOCK_TEST_SLEEP)
res5 = async_test_task_lock.delay(3)
self.assertFalse(res5.ready())
(as a goodie there's also a quick example of how to setup a redis_cache)
With this setup, you should still expect to see workers receiving the task, since the lock is checked inside of the task itself. The only difference will be that the work won't be performed if the lock is acquired by another worker.
In the example given in the docs, this is the desired behavior; if a lock already exists, the task will simply do nothing and finish as successful. What you want is slightly different; you want the work to be queued up instead of ignored.
In order to get the desired effect, you would need to make sure that the task will be picked up by a worker and performed some time in the future. One way to accomplish this would be with retrying.
#task(bind=True, name='my-task')
def my_task(self):
lock_id = self.name
with memcache_lock(lock_id, self.app.oid) as acquired:
if acquired:
# do work if we got the lock
print('acquired is {}'.format(acquired))
return 'result'
# otherwise, the lock was already in use
raise self.retry(countdown=60) # redeliver message to the queue, so the work can be done later
I have 2 functions.
1st function stores the data received in a list and 2nd function writes the data into a csv file.
I'm using Flask. Whenever a web service has been called it will store the data and send response to it, as soon as it sends response it triggers the 2nd function.
My Code:
from flask import Flask, flash, request, redirect, url_for, session
import json
app = Flask(__name__)
arr = []
#app.route("/test", methods=['GET','POST'])
def check():
arr.append(request.form['a'])
arr.append(request.form['b'])
res = {'Status': True}
return json.dumps(res)
def trigger():
df = pd.DataFrame({'x': arr})
df.to_csv("docs/xyz.csv", index=False)
return
Obviously the 2nd function is not called.
Is there a way to achieve this?
P.S: My real life problem is different where trigger function is time consuming and I don't want user to wait for it to finish execution.
One solution would be to have a background thread that will watch a queue. You put your csv data in the queue and the background thread will consume it. You can start such a thread before first request:
import threading
from multiprocessing import Queue
class CSVWriterThread(threading.Thread):
def __init__(self, *args, **kwargs):
threading.Thread.__init__(self, *args, **kwargs)
self.input_queue = Queue()
def send(self, item):
self.input_queue.put(item)
def close(self):
self.input_queue.put(None)
self.input_queue.join()
def run(self):
while True:
csv_array = self.input_queue.get()
if csv_array is None:
break
# Do something here ...
df = pd.DataFrame({'x': csv_array})
df.to_csv("docs/xyz.csv", index=False)
self.input_queue.task_done()
time.sleep(1)
# Done
self.input_queue.task_done()
return
#app.before_first_request
def activate_job_monitor():
thread = CSVWriterThread()
app.csvwriter = thread
thread.start()
And in your code put the message in the queue before returning:
#app.route("/test", methods=['GET','POST'])
def check():
arr.append(request.form['a'])
arr.append(request.form['b'])
res = {'Status': True}
app.csvwriter.send(arr)
return json.dumps(res)
P.S: My real life problem is different where trigger function is time consuming and I don't want user to wait for it to finish execution.
Consider using celery which is made for the very problem you're trying to solve. From docs:
Celery is a simple, flexible, and reliable distributed system to process vast amounts of messages, while providing operations with the tools required to maintain such a system.
I recommend you integrate celery with your flask app as described here. your trigger method would then become a straightforward celery task that you can execute without having to worry about long response time.
Im actually working on another interesting case on my side where i pass the work off to a python worker that sends the job to a redis queue. There are some great blogs using redis with Flask , you basically need to ensure redis is running (able to connect on port 6379)
The worker would look something like this:
import os
import redis
from rq import Worker, Queue, Connection
listen = ['default']
redis_url = os.getenv('REDISTOGO_URL', 'redis://localhost:6379')
conn = redis.from_url(redis_url)
if __name__ == '__main__':
with Connection(conn):
worker = Worker(list(map(Queue, listen)))
worker.work()
In my example I have a function that queries a database for usage and since it might be a lengthy process i pass it off to the worker (running as a seperate script)
def post(self):
data = Task.parser.parse_args()
job = q.enqueue_call(
func=migrate_usage, args=(my_args),
result_ttl=5000
)
print("Job ID is: {}".format(job.get_id()))
job_key = job.get_id()
print(str(Job.fetch(job_key, connection=conn).result))
if job:
return {"message": "Job : {} added to queue".format(job_key)}, 201
Credit due to the following article:
https://realpython.com/flask-by-example-implementing-a-redis-task-queue/#install-requirements
You can try use streaming. See next example:
import time
from flask import Flask, Response
app = Flask(__name__)
#app.route('/')
def main():
return '''<div>start</div>
<script>
var xhr = new XMLHttpRequest();
xhr.open('GET', '/test', true);
xhr.onreadystatechange = function(e) {
var div = document.createElement('div');
div.innerHTML = '' + this.readyState + ':' + this.responseText;
document.body.appendChild(div);
};
xhr.send();
</script>
'''
#app.route('/test')
def test():
def generate():
app.logger.info('request started')
for i in range(5):
time.sleep(1)
yield str(i)
app.logger.info('request finished')
yield ''
return Response(generate(), mimetype='text/plain')
if __name__ == '__main__':
app.run('0.0.0.0', 8080, True)
All magic in this example in genarator where you can start response data, after do some staff and yield empty data to end your stream.
For details look at http://flask.pocoo.org/docs/patterns/streaming/.
You can defer route specific actions with limited context by combining after_this_request and response.call_on_close. Note that request and response context won't be available but the route function context remains available. So you'll need to copy any request/response data you'll need into local variables for deferred access.
I moved your array to a local var to show how the function context is preserved. You could change your csv write function to an append so you're not pushing data endlessly into memory.
from flask import Flask, flash, request, redirect, url_for, session
import json
app = Flask(__name__)
#app.route("/test", methods=['GET','POST'])
def check():
arr = []
arr.append(request.form['a'])
arr.append(request.form['b'])
res = {'Status': True}
#flask.after_this_request
def add_close_action(response):
#response.call_on_close
def process_after_request():
df = pd.DataFrame({'x': arr})
df.to_csv("docs/xyz.csv", index=False)
return response
return json.dumps(res)
Share memory areas between celery workers on one machine
In this post the author demonstrates how to share the dictionary among the workers. Thanks to him I have achieved this. But now I want to read that shared memory from my main.py.
I have main.py. I have generatereport.py which generates reports and I added the shared dictionary here. All the workers now have the access to the same dictionary. Now how can I access this shared dictionary from main.
main.py
from report import reportgen
from flask import Flask, request, send_from_directory, url_for
app = Flask(__name__)
#ns.route('/GenerateReport')
#ns.doc('Generate PDF')
class GeneratePdf(Resource):
def get(self):
task = generate.delay(myDict, uuid)
return {"request":"made"}
#ns.route('/pdfstatus')
#ns.doc('status')
class statusPdf(Resource):
def get(self):
print(report.serviceStatusDict)
cel.py
from celery import Celery
from report import reportgen
app = Celery('tasks', broker='pyamqp://guest#localhost//')
#app.task
def generate(myDict, name):
data = generateJson(myDict)
pdfgen.serviceStatusDict[name] = "Rest Performed"
reportgen(data, name)
report.py
from multiprocessing import Manager
manag = Manager()
serviceLock = manag.Lock()
serviceStatusDict = manag.dict()
def reportgen():
serviceStatusDict["uuid"] = "Generated the report"
//....//
I'm using Flask to expose some data-crunching code as a web service.
I'd like to have some class variables that my Flask functions can access.
Let me walk you through where I'm stuck:
from flask import Flask
app = Flask(__name__)
class MyServer:
def __init__(self):
globalData = json.load(filename)
#app.route('/getSomeData')
def getSomeData():
return random.choice(globalData) #select some random data to return
if __name__ == "__main__":
app.run(host='0.0.0.0')
When I run getSomeData() outside of Flask, it works fine. But, when I run this with Flask, I get 500 internal server error. There's no magic here, and Flask has no idea that it's supposed to initialize a MyServer object. How can I feed an instance of MyServer to the app.run() command?
I could admit defeat and put globalData into a database instead. But, is there an other way?
You can create an instance of MyServer just outside the scope of your endpoints and access its attributes. This worked for me:
class MyServer:
def __init__(self):
self.globalData = "hello"
from flask import Flask
app = Flask(__name__)
my_server = MyServer()
#app.route("/getSomeData")
def getSomeData():
return my_server.globalData
if __name__ == "__main__":
app.run(host="0.0.0.0")
I know this is a late reply, but I came across this question while facing a similar issue. I found flask-classful really good.
You inherit your class from FlaskView and register the Flask app with your MyServer class
http://flask-classful.teracy.org/#
In this case, with flask-classful, your code would look like this:
from flask import Flask
from flask_classful import FlaskView, route
app = Flask(__name__)
class MyServer(FlaskView):
def __init__(self):
globalData = json.load(filename)
#route('/getSomeData')
def getSomeData():
return random.choice(globalData) #select some random data to return
MyServer.register(app, base_route="/")
if __name__ == "__main__":
app.run(host='0.0.0.0')
The least-coupled solution is to apply the routes at runtime (instead of at load time):
def init_app(flask_app, database_interface, filesystem_interface):
server = MyServer(database_interface, filesystem_interface)
flask_app.route('get_data', methods=['GET'])(server.get_data)
This is very testable--just invoke init_app() in your test code with the mocked/faked dependencies (database_interface and filesystem_interface) and a flask app that has been configured for testing (app.config["TESTING"]=True or something like that) and you're all-set to write tests that cover your entire application (including the flask routing).
The only downside is this isn't very "Flasky" (or so I've been told); the Flask idiom is to use #app.route(), which is applied at load time and is necessarily tightly coupled because dependencies are hard-coded into the implementation instead of injected into some constructor or factory method (and thus complicated to test).
The following code is a simple solution for OOP with Flask:
from flask import Flask, request
class Server:
def __init__(self, name):
self.app = Flask(name)
#self.app.route('/')
def __index():
return self.index()
#self.app.route('/hello')
def __hello():
return self.hello()
#self.app.route('/user_agent')
def __user_agent():
return self.user_agent()
#self.app.route('/factorial/<n>', methods=['GET'])
def __factorial(n):
return self.factorial(n)
def index(self):
return 'Index Page'
def hello(self):
return 'Hello, World'
def user_agent(self):
return request.headers.get('User-Agent')
def factorial(self, n):
n = int(n)
fact = 1
for num in range(2, n + 1):
fact = fact * num
return str(fact)
def run(self, host, port):
self.app.run(host=host, port=port)
def main():
server = Server(__name__)
server.run(host='0.0.0.0', port=5000)
if __name__ == '__main__':
main()
To test the code, browse the following urls:
http://localhost:5000/
http://localhost:5000/hello
http://localhost:5000/user_agent
http://localhost:5000/factorial/10
a bit late but heres a quick implementation that i use to register routes at init time
from flask import Flask,request,render_template
from functools import partial
registered_routes = {}
def register_route(route=None):
#simple decorator for class based views
def inner(fn):
registered_routes[route] = fn
return fn
return inner
class MyServer(Flask):
def __init__(self,*args,**kwargs):
if not args:
kwargs.setdefault('import_name',__name__)
Flask.__init__(self,*args ,**kwargs)
# register the routes from the decorator
for route,fn in registered_routes.items():
partial_fn = partial(fn,self)
partial_fn.__name__ = fn.__name__
self.route(route)(partial_fn)
#register_route("/")
def index(self):
return render_template("my_template.html")
if __name__ == "__main__":
MyServer(template_folder=os.path.dirname(__file__)).run(debug=True)
if you wish to approach MyServer class as a resource
I believe that flask_restful can help you:
from flask import Flask
from flask_restful import Resource, Api
import json
import numpy as np
app = Flask(__name__)
api = Api(app)
class MyServer(Resource):
def __init__(self):
self.globalData = json.load(filename)
def get(self):
return np.random.choice(self.globalData)
api.add_resource(MyServer, '/')
if __name__ == '__main__':
app.run()