I am trying to run multiple concurrent downstream requests using gevent in my flask application.
I have:
import gevent
from gevent import monkey
monkey.patch_all(thread=False)
from flask import Flask, request, g
app = Flask(__name__, static_folder='static')
and the following code:
def f1(self):
#copy_current_request_context
def _test(t):
time.sleep(t)
r = requests.get(
'https://webhook.site/d14a41a6-9c6a-4da0-bbe6-3cc660daea3d', params=dict(word='test')
)
return r.status_code
jobs = [gevent.spawn(_test, 5), gevent.spawn(_test, 10)]
results = [job.value for job in gevent.joinall(jobs)]
return None
If the second _test is taking longer than the first one I am getting a Popped wrong app context error.
If I add another method like:
#copy_current_request_context
def _test_bis(t):
from random import randint
time.sleep(t)
r = requests.get(
'https://webhook.site/d14a41a6-9c6a-4da0-bbe6-3cc660daea3d', params=dict(word='test')
)
return r.status_code
and use it like that:
jobs = [gevent.spawn(_test_bis, 5), gevent.spawn(_test, 10)]
I don't have any error.
Any idea how I could work around that issue?
Thi has to do with Flask's application context. When you spin up multiple greenlets using spawn, Flask doesn't know which app is the "current" app. You've made use of #copy_current_request_context decorator but, that only copy request context and gives a new app context to all bg threads.
To avoid this, you could create one to pass the current app context with closure:
def copy_current_app_context(f):
from flask.globals import _app_ctx_stack
appctx = _app_ctx_stack.top
def _(*args, **kwargs):
with appctx:
return f(*args, **kwargs)
return _
Or you could choose to wrap your thread code in a test_request_context so you have access to context locals:
def f1(self):
with app.test_request_context():
def _test(t):
time.sleep(t)
r = requests.get(
'https://webhook.site/d14a41a6-9c6a-4da0-bbe6-3cc660daea3d', params=dict(word='test')
)
return r.status_code
jobs = [gevent.spawn(_test, 5), gevent.spawn(_test, 10)]
results = [job.value for job in gevent.joinall(jobs)]
return None
it's worth pointing out that the thread will have a different context than the original request. should you need any interesting request data, it's important to extract that before spawning the thread.
Related
I've been struggling with this for awhile now. I Have a flask app that is executed in my app.py file. In this file I have a bunch of endpoints that call different functions from other files. In another file, extensions.py, I've instantiated a class that contains a redis connection. See the file structure below.
#app.py
from flask import Flask
from extensions import redis_obj
app = Flask(__name__)
#app.route('/flush-cache', methods=['POST'])
def flush_redis():
result = redis_obj.flush_redis_cache()
return result
# extensions.py
from redis_class import CloudRedis
redis_obj = CloudRedis()
# redis_class
import redis
class CloudRedis:
def __init__(self):
self.conn = redis.Redis(connection_pool=redis.ConnectionPool.from_url('REDIS_URL',
ssl_cert_reqs=None))
def flush_redis_cache(self):
try:
self.conn.flushdb()
return 'OK'
except:
return 'redis flush failed'
I've been attempting to use monkeypatching in a test patch flush_redis_cache, so when I run flush_redis() the call to redis_obj.flush_redis_cache() will just return "Ok", since I've already tested the CloudRedis class in other pytests. However, no matter what I've tried I haven't been able to successfully patch this. This is what I have below.
from extensions import redis_obj
from app import app
#pytest.fixture()
def client():
yield app.test_client()
def test_flush_redis_when_redis_flushed(client, monkeypatch):
# setup
def get_mock_flush_redis_cache():
return 'OK'
monkeypatch.setattr(cloud_reids, 'flush_redis_cache', get_mock_flush_redis_cache)
cloud_redis.flush_redis = get_mock_flush_redis_cache
# act
res = client.post('/flush-cache')
result = flush_redis()
Does anyone have any ideas on how this can be done?
I have 2 functions.
1st function stores the data received in a list and 2nd function writes the data into a csv file.
I'm using Flask. Whenever a web service has been called it will store the data and send response to it, as soon as it sends response it triggers the 2nd function.
My Code:
from flask import Flask, flash, request, redirect, url_for, session
import json
app = Flask(__name__)
arr = []
#app.route("/test", methods=['GET','POST'])
def check():
arr.append(request.form['a'])
arr.append(request.form['b'])
res = {'Status': True}
return json.dumps(res)
def trigger():
df = pd.DataFrame({'x': arr})
df.to_csv("docs/xyz.csv", index=False)
return
Obviously the 2nd function is not called.
Is there a way to achieve this?
P.S: My real life problem is different where trigger function is time consuming and I don't want user to wait for it to finish execution.
One solution would be to have a background thread that will watch a queue. You put your csv data in the queue and the background thread will consume it. You can start such a thread before first request:
import threading
from multiprocessing import Queue
class CSVWriterThread(threading.Thread):
def __init__(self, *args, **kwargs):
threading.Thread.__init__(self, *args, **kwargs)
self.input_queue = Queue()
def send(self, item):
self.input_queue.put(item)
def close(self):
self.input_queue.put(None)
self.input_queue.join()
def run(self):
while True:
csv_array = self.input_queue.get()
if csv_array is None:
break
# Do something here ...
df = pd.DataFrame({'x': csv_array})
df.to_csv("docs/xyz.csv", index=False)
self.input_queue.task_done()
time.sleep(1)
# Done
self.input_queue.task_done()
return
#app.before_first_request
def activate_job_monitor():
thread = CSVWriterThread()
app.csvwriter = thread
thread.start()
And in your code put the message in the queue before returning:
#app.route("/test", methods=['GET','POST'])
def check():
arr.append(request.form['a'])
arr.append(request.form['b'])
res = {'Status': True}
app.csvwriter.send(arr)
return json.dumps(res)
P.S: My real life problem is different where trigger function is time consuming and I don't want user to wait for it to finish execution.
Consider using celery which is made for the very problem you're trying to solve. From docs:
Celery is a simple, flexible, and reliable distributed system to process vast amounts of messages, while providing operations with the tools required to maintain such a system.
I recommend you integrate celery with your flask app as described here. your trigger method would then become a straightforward celery task that you can execute without having to worry about long response time.
Im actually working on another interesting case on my side where i pass the work off to a python worker that sends the job to a redis queue. There are some great blogs using redis with Flask , you basically need to ensure redis is running (able to connect on port 6379)
The worker would look something like this:
import os
import redis
from rq import Worker, Queue, Connection
listen = ['default']
redis_url = os.getenv('REDISTOGO_URL', 'redis://localhost:6379')
conn = redis.from_url(redis_url)
if __name__ == '__main__':
with Connection(conn):
worker = Worker(list(map(Queue, listen)))
worker.work()
In my example I have a function that queries a database for usage and since it might be a lengthy process i pass it off to the worker (running as a seperate script)
def post(self):
data = Task.parser.parse_args()
job = q.enqueue_call(
func=migrate_usage, args=(my_args),
result_ttl=5000
)
print("Job ID is: {}".format(job.get_id()))
job_key = job.get_id()
print(str(Job.fetch(job_key, connection=conn).result))
if job:
return {"message": "Job : {} added to queue".format(job_key)}, 201
Credit due to the following article:
https://realpython.com/flask-by-example-implementing-a-redis-task-queue/#install-requirements
You can try use streaming. See next example:
import time
from flask import Flask, Response
app = Flask(__name__)
#app.route('/')
def main():
return '''<div>start</div>
<script>
var xhr = new XMLHttpRequest();
xhr.open('GET', '/test', true);
xhr.onreadystatechange = function(e) {
var div = document.createElement('div');
div.innerHTML = '' + this.readyState + ':' + this.responseText;
document.body.appendChild(div);
};
xhr.send();
</script>
'''
#app.route('/test')
def test():
def generate():
app.logger.info('request started')
for i in range(5):
time.sleep(1)
yield str(i)
app.logger.info('request finished')
yield ''
return Response(generate(), mimetype='text/plain')
if __name__ == '__main__':
app.run('0.0.0.0', 8080, True)
All magic in this example in genarator where you can start response data, after do some staff and yield empty data to end your stream.
For details look at http://flask.pocoo.org/docs/patterns/streaming/.
You can defer route specific actions with limited context by combining after_this_request and response.call_on_close. Note that request and response context won't be available but the route function context remains available. So you'll need to copy any request/response data you'll need into local variables for deferred access.
I moved your array to a local var to show how the function context is preserved. You could change your csv write function to an append so you're not pushing data endlessly into memory.
from flask import Flask, flash, request, redirect, url_for, session
import json
app = Flask(__name__)
#app.route("/test", methods=['GET','POST'])
def check():
arr = []
arr.append(request.form['a'])
arr.append(request.form['b'])
res = {'Status': True}
#flask.after_this_request
def add_close_action(response):
#response.call_on_close
def process_after_request():
df = pd.DataFrame({'x': arr})
df.to_csv("docs/xyz.csv", index=False)
return response
return json.dumps(res)
In python I want to create an async method in a class that create a thread without blocking the main thread. When the new thread finish, I return a value from that function/thread.
For example the class is used for retrieve some information from web pages. I want run parallel processing in a function that download the page and return a object.
class WebDown:
def display(self, url):
print 'display(): ' + content
def download(self, url):
thread = Thread(target=self.get_info)
# thread join
print 'download(): ' + content
# return the info
def get_info(self, url):
# download page
# retrieve info
return info
if __name__ == '__main__':
wd = WebDown()
ret = wd.download('http://...')
wd.display('http://...')
I this example, in order I call download() for retrieve the info, after display() for print others information. The print output should be
display(): foo, bar, ....
download(): blue, red, ....
One way to write asynchronous, non blocking code in python involves using Python's Twisted. Twisted does not rely on multithreading but uses multiprocessing instead. It gives you convenient way to create Deferred objects, adding callbacks and errbacks to them. The example you give would look like this in Twisted, I'm using treq (Twisted Requests) library which makes generating requests a little quicker and easier:
from treq import get
from twisted.internet import reactor
class WebAsync(object):
def download(self, url):
request = get(url)
request.addCallback(self.deliver_body)
def deliver_body(self, response):
deferred = response.text()
deferred.addCallback(self.display)
return deferred
def display(self, response_body):
print response_body
reactor.stop()
if __name__ == "__main__":
web_client = WebAsync()
web_client.download("http://httpbin.org/html")
reactor.run()
Both 'download' and 'deliver_body' methods return deferreds, you add callbacks to them that are going to be executed when results is available.
I would simply use request and gevent called grequests.
import grequests
>>> urls = [
'http://...',
'http://...'
]
>>> rs = (grequests.get(u) for u in urls)
>>> grequests.map(rs)
[<Response [200]>, <Response [200]>]
I have the following web app:
import bottle
app = bottle.Bottle()
#app.route('/ping')
def ping():
print 'pong'
return 'pong'
#app.hook('after_request')
def after():
print 'foo'
print bottle.response.body
if __name__ == "__main__":
app.run(host='0.0.0.0', port='9999', server='cherrypy')
Is there a way to access the response body before sending the response back?
If I start the app and I query /ping, I can see in the console that the ping() and the after() function run in the right sequence
$ python bottle_after_request.py
Bottle v0.11.6 server starting up (using CherryPyServer())...
Listening on http://0.0.0.0:9999/
Hit Ctrl-C to quit.
pong
foo
but when in after() I try to access response.body, I don't have anything.
In Flask the after_request decorated functions take in input the response object so it's easy to access it. How can I do the same in Bottle?
Is there something I'm missing?
Is there a way to access the response body before sending the response back?
You could write a simple plugin, which (depending on what you're actually trying to do with the response) might be all you need.
Here's an example from the Bottle plugin docs, which sets a request header. It could just as easily manipulate body.
from bottle import response, install
import time
def stopwatch(callback):
def wrapper(*args, **kwargs):
start = time.time()
body = callback(*args, **kwargs)
end = time.time()
response.headers['X-Exec-Time'] = str(end - start)
return body
return wrapper
install(stopwatch)
Hope that works for your purposes.
You can use plugin approach, this is what i did
from bottle import response
class BottlePlugin(object):
name = 'my_custom_plugin'
api = 2
def __init__(self, debug=False):
self.debug = debug
self.app = None
def setup(self, app):
"""Handle plugin install"""
self.app = app
def apply(self, callback):
"""Handle route callbacks"""
def wrapper(*a, **ka):
"""Encapsulate the result in the expected api structure"""
# Check if the client wants a different format
# output depends what you are returning from view
# in my case its dict with keys ("data")
output = callback(*a, **ka)
data = output["data"]
paging = output.get("paging", {})
response_data = {
data: data,
paging: paging
}
# in case if you want to update response
# e.g response code
response.status = 200
return response_data
return wrapper
So, I have a flask view, which adds a celery task to a queue, and returns a 200 to the user.
from flask.views import MethodView
from app.tasks import launch_task
class ExampleView(MethodView):
def post(self):
# Does some verification of the incoming request, if all good:
launch_task(task, arguments)
return 'Accepted', 200
The issue is with testing the following, I don't want to have to have a celery instance etc. etc. I just want to know that after all the verification is ok, it returns 200 to the user. The celery launch_task() will be tested elsewhere.
Therefore I'm keen to mock out that launch_task() call so essentially it does nothing, making my unittest independent of the celery instance.
I've tried various incarnations of:
#mock.patch('app.views.launch_task.delay'):
def test_launch_view(self, mock_launch_task):
mock_launch_task.return_value = None
# post a correct dictionary to the view
correct_data = {'correct': 'params'}
rs.self.app.post('/launch/', data=correct_data)
self.assertEqual(rs.status_code, 200)
#mock.patch('app.views.launch_task'):
def test_launch_view(self, mock_launch_task):
mock_launch_task.return_value = None
# post a correct dictionary to the view
correct_data = {'correct': 'params'}
rs.self.app.post('/launch/', data=correct_data)
self.assertEqual(rs.status_code, 200)
But can't seem to get it to work, my view just exits with a 500 error. Any assistance would be appreciated!
I tried also any #patch decorator and it didn't work
And I found mock in setUp like:
import unittest
from mock import patch
from mock import MagicMock
class TestLaunchTask(unittest.TestCase):
def setUp(self):
self.patcher_1 = patch('app.views.launch_task')
mock_1 = self.patcher_1.start()
launch_task = MagicMock()
launch_task.as_string = MagicMock(return_value = 'test')
mock_1.return_value = launch_task
def tearDown(self):
self.patcher_1.stop()
The #task decorator replaces the function with a Task object (see documentation). If you mock the task itself you'll replace the (somewhat magic) Task object with a MagicMock and it won't schedule the task at all. Instead mock the Task object's run() method, like so:
# With CELERY_ALWAYS_EAGER=True
#patch('monitor.tasks.monitor_user.run')
def test_monitor_all(self, monitor_user):
"""
Test monitor.all task
"""
user = ApiUserFactory()
tasks.monitor_all.delay()
monitor_user.assert_called_once_with(user.key)