I'm working on a program. It reads urls from a file and then makes requests. But handlers don't get called.
Here is my code:
import grequests
def main():
async_list =[]
for site in extractsites():
action_item = grequests.get('http://' + site, hooks={'response': handleresponse})
async_list.append(action_item)
grequests.map(async_list)
def extractsites():
return ['google.com']
def handleresponse(response):
print(response)
if __name__ == '__main__':
main()
Could anyone see why it does not work?
The handler has to have different signature, other parameters could be passed:
def handler(response, **kwargs):
print(response, kwargs)
Complete example:
import grequests
def main():
async_list =[]
for site in extractsites():
action_item = grequests.get('http://' + site, hooks={'response': handleresponse})
async_list.append(action_item)
grequests.map(async_list)
def extractsites():
return ['google.com']
def handleresponse(response, **kwargs):
print(response, kwargs)
if __name__ == '__main__':
main()
Or shorter:
import grequests
urls = ['http://google.com']
handler = lambda response, **kwargs: print(response, kwargs)
grequests.map(grequests.get(url, hooks={'response': handler}) for url in urls)
Related
i need a scheduler in a Textual application to periodically query an external data source. As a test i've tried to use APscheduler to call a tick() function every second.
However nothing happens although the scheduler should be started.
What is going on and how to debug this?
from textual.app import App, ComposeResult
from textual.containers import Horizontal, Vertical
from textual.widgets import *
from apscheduler.schedulers.background import BackgroundScheduler
class HeaderApp(App):
def __init__(self, *args, **kwargs):
self.sched = BackgroundScheduler()
self.sched.add_job(self.tick,'interval', seconds=1)
self.sched.start()
super(HeaderApp, self).__init__(*args, **kwargs)
def compose(self) -> ComposeResult:
yield Header()
yield TextLog()
def tick(self):
text_log = self.query_one(TextLog)
text_log.write("tick")
def on_mount(self):
text_log = self.query_one(TextLog)
text_log.write(self.sched.running)
if __name__ == "__main__":
app = HeaderApp()
app.run()
I'm not familiar with apscheduler, but since you haven't had a response, could you use the builtin set_interval which will call a method at regular intervals?
from textual.app import App, ComposeResult
from textual.containers import Horizontal, Vertical
from textual.widgets import *
class HeaderApp(App):
def compose(self) -> ComposeResult:
yield Header()
yield TextLog()
def tick(self):
text_log = self.query_one(TextLog)
text_log.write("tick")
def on_mount(self):
self.set_interval(1, self.tick)
if __name__ == "__main__":
app = HeaderApp()
app.run()
According to my search.
There are two main views:
Install and use the sanic_jinja2
Just use the jinja2
But above two methods didn't work to me(Maybe it's my fault).
Could anyone give me some example code?
tool/tmp.py
from jinja2 import Environment, PackageLoader
from functools import wraps
from sanic.response import html
# print(__name__) # 当前文件路径
# ★package_path设置templates文件夹的相对路径(对于tmp.py来说)
env = Environment(loader=PackageLoader(__name__, package_path='../templates'), enable_async=True)
class Tmp:
#staticmethod
def template(template_name):
def warapper(func):
#wraps(func)
async def inner(request, *args, **kwargs):
temp = env.get_template(template_name)
context = await func(request, *args, **kwargs)
return html(await temp.render_async(context))
return inner
return warapper
app.py
from sanic import Sanic
from tool.tmp import Tmp
app = Sanic(__name__)
app.static("static", "./static")
# 实例化Tmp
br = Tmp()
#app.route('/dag/<br>/<status>', methods=["GET"])
#br.template('dag.html') # 会自动去templates下查找文件html文件
async def search_branch(request, br, status):
_branch = br
_status = status
_time = request.args.get('time')
return {'request': request, '_branch': _branch, '_status': _status, "_time": _time}
As for now, I've found a lot of examples on how contextvars module behaves with asyncio, but none on how one behaves with threads (asyncio.get_event_loop().run_in_executor, threading.Thread, and so on).
My question is, how can I pass context to a separate thread? Below you can see a code snippet that does not work (python 3.9.8).
import typing
import asyncio
import contextvars
import concurrent.futures
class CustomThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
def submit(
self,
function: typing.Callable,
*args,
**kwargs
) -> concurrent.futures.Future:
context = contextvars.copy_context()
return super().submit(
context.run,
functools.partial(function, *args, **kwargs)
)
def function():
print(var.get())
async def main():
await asyncio.get_event_loop().run_in_executor(None, function)
if __name__ == '__main__':
var = contextvars.ContextVar('variable')
var.set('Message.')
asyncio.get_event_loop().set_default_executor(CustomThreadPoolExecutor)
asyncio.run(main())
You can use wrapper function that takes copy_context.items(), set them and call your function. functools.partial will help you to create wrapped function for passing to run_in_executor. This is working test for my decorators:
def test_run_in_thread_pool_executor():
def init(func, ctx_vars, *args, **kwargs):
for var, value in ctx_vars:
var.set(value)
return func(*args, **kwargs)
#async_add_headers('streaming')
async def wrapper(f):
loop = asyncio.get_event_loop()
ctx = contextvars.copy_context()
executor = futures.ThreadPoolExecutor(max_workers=5)
return await loop.run_in_executor(executor, functools.partial(init, f, ctx.items()))
#add_headers('client')
def foo():
assert caller_context_var.get() == 'streaming'
async def main_test():
await wrapper(foo)
asyncio.run(main_test())
Here add_headers and async_add_headers change some contextvars in order of calling functions. caller_context_var.get() would be equal to 'client' without init function.
Unfortunately it works only for ThreadPoolExecutor and doesn't for ProcessPoolExecutor because Context objects are not picklable. Check relative PEP 567 section. There are also example with executor:
executor = ThreadPoolExecutor()
current_context = contextvars.copy_context()
executor.submit(current_context.run, some_function)
I'm trying to add a delay between requests in an asynchronous way.
When I use Tornado gen.sleep(x) my function (launch) doesn't get executed.
If I remove yield from yield gen.sleep(1.0), function is called, but no delay is added.
How to add delay between requests in my for loop? I need to control Request per second to external API.
If I use time.sleep the response is delayed after all requests are completed.
Tried to add #gen.engine decorator to launch function and no results.
Code:
import collections
import tornado.httpclient
class BacklogClient(object):
MAX_CONCURRENT_REQUESTS = 20
def __init__(self, ioloop):
self.ioloop = ioloop
self.client = tornado.httpclient.AsyncHTTPClient(max_clients=self.MAX_CONCURRENT_REQUESTS)
self.client.configure(None, defaults=dict(connect_timeout=20, request_timeout=30))
self.backlog = collections.deque()
self.concurrent_requests = 0
def __get_callback(self, function):
def wrapped(*args, **kwargs):
self.concurrent_requests -= 1
self.try_run_request()
return function(*args, **kwargs)
return wrapped
def try_run_request(self):
while self.backlog and self.concurrent_requests < self.MAX_CONCURRENT_REQUESTS:
request, callback = self.backlog.popleft()
self.client.fetch(request, callback=callback)
self.concurrent_requests += 1
def fetch(self, request, callback=None):
wrapped = self.__get_callback(callback)
self.backlog.append((request, wrapped))
self.try_run_request()
import time
from tornado import ioloop, httpclient, gen
class TornadoBacklog:
def __init__(self):
self.queue = 0
self.debug = 1
self.toProcess = [
'http://google.com',
'http://yahoo.com',
'http://nytimes.com',
'http://msn.com',
'http://cnn.com',
'http://twitter.com',
'http://facebook.com',
]
def handle_request(self, response):
print response.code
if not self.backlog.backlog and self.backlog.concurrent_requests == 0:
ioloop.IOLoop.instance().stop()
def launch(self):
self.ioloop = ioloop.IOLoop.current()
self.backlog = BacklogClient(self.ioloop)
for item in self.toProcess:
yield gen.sleep(1.0)
print item
self.backlog.fetch(
httpclient.HTTPRequest(
item,
method='GET',
headers=None,
),
self.handle_request
)
self.ioloop.start()
def main():
start_time = time.time()
scraper = TornadoBacklog()
scraper.launch()
elapsed_time = time.time() - start_time
print('Process took %f seconds processed %d items.' % (elapsed_time, len(scraper.toProcess)))
if __name__ == "__main__":
main()
Reference: https://github.com/tornadoweb/tornado/issues/1400
Tornado coroutines have two components:
They contain "yield" statements
They are decorated with "gen.coroutine"
Use the "coroutine" decorator on your "launch" function:
#gen.coroutine
def launch(self):
Run a Tornado coroutine from start to finish like this:
tornado.ioloop.IOLoop.current().run_sync(launch)
Remove the call to "ioloop.start" from your "launch" function: the loop runs the "launch" function, not vice-versa.
I tried to get support on this but I am TOTALLY confused.
Here's my code:
from twisted.internet import reactor
from twisted.web.client import getPage
from twisted.web.error import Error
from twisted.internet.defer import DeferredList
from sys import argv
class GrabPage:
def __init__(self, page):
self.page = page
def start(self, *args):
if args == ():
# We apparently don't need authentication for this
d1 = getPage(self.page)
else:
if len(args) == 2:
# We have our login information
d1 = getPage(self.page, headers={"Authorization": " ".join(args)})
else:
raise Exception('Missing parameters')
d1.addCallback(self.pageCallback)
dl = DeferredList([d1])
d1.addErrback(self.errorHandler)
dl.addCallback(self.listCallback)
def errorHandler(self,result):
# Bad thingy!
pass
def pageCallback(self, result):
return result
def listCallback(self, result):
print result
a = GrabPage('http://www.google.com')
data = a.start() # Not the HTML
I wish to get the HTML out which is given to pageCallback when start() is called. This has been a pita for me. Ty! And sorry for my sucky coding.
You're missing the basics of how Twisted operates. It all revolves around the reactor, which you're never even running. Think of the reactor like this:
(source: krondo.com)
Until you start the reactor, by setting up deferreds all you're doing is chaining them with no events from which to fire.
I recommend you give the Twisted Intro by Dave Peticolas a read. It's quick and it really gives you all the missing information that the Twisted documentation doesn't.
Anyways, here is the most basic usage example of getPage as possible:
from twisted.web.client import getPage
from twisted.internet import reactor
url = 'http://aol.com'
def print_and_stop(output):
print output
if reactor.running:
reactor.stop()
if __name__ == '__main__':
print 'fetching', url
d = getPage(url)
d.addCallback(print_and_stop)
reactor.run()
Since getPage returns a deferred, I'm adding the callback print_and_stop to the deferred chain. After that, I start the reactor. The reactor fires getPage, which then fires print_and_stop which prints the data from aol.com and then stops the reactor.
Edit to show a working example of OP's code:
class GrabPage:
def __init__(self, page):
self.page = page
########### I added this:
self.data = None
def start(self, *args):
if args == ():
# We apparently don't need authentication for this
d1 = getPage(self.page)
else:
if len(args) == 2:
# We have our login information
d1 = getPage(self.page, headers={"Authorization": " ".join(args)})
else:
raise Exception('Missing parameters')
d1.addCallback(self.pageCallback)
dl = DeferredList([d1])
d1.addErrback(self.errorHandler)
dl.addCallback(self.listCallback)
def errorHandler(self,result):
# Bad thingy!
pass
def pageCallback(self, result):
########### I added this, to hold the data:
self.data = result
return result
def listCallback(self, result):
print result
# Added for effect:
if reactor.running:
reactor.stop()
a = GrabPage('http://google.com')
########### Just call it without assigning to data
#data = a.start() # Not the HTML
a.start()
########### I added this:
if not reactor.running:
reactor.run()
########### Reference the data attribute from the class
data = a.data
print '------REACTOR STOPPED------'
print
########### First 100 characters of a.data:
print '------a.data[:100]------'
print data[:100]