concurrent futures jobs not submitted - python

I'm trying to use concurrent futures using the below example but my job never gets submitted. Don't see the print stmt in load_url.
import sys
from concurrent import futures
import multiprocessing
import time
import queue
def load_url(url,q):
# it will take 2 seconds to process a URL
print('load_url')
try:
time.sleep(2)
# put some dummy results in queue
for x in range(5):
print('put in queue')
q.put(x)
except Exception as e:
print('exception')
def main():
print('start')
manager = multiprocessing.Manager()
e = manager.Event()
q = queue.Queue()
with futures.ProcessPoolExecutor(max_workers=5) as executor:
livefutures = {executor.submit(load_url, url, q): url
for url in ['a','b']}
runningfutures = True
print('check_futures')
while runningfutures:
print('here')
runningfutures = [f for f in livefutures if f.running()]
if not runningfutures:
print('not running futures == ', q.empty())
while not q.empty():
print('not running futures1')
yield q.get(False)
if __name__ == '__main__':
for x in main():
print('x=',x)

Probably a bit late but I just ran into your post.
ProcessPoolExecutor is a bit picky, it requires the treads to execute simple functions and also sometimes behaves differently on Windows and Linux.
ThreadPoolExecutor is more permissive.
If you replace futures.ProcessPoolExecutor by futures.ThreadPoolExecutor it seems to work.

You are passing python's standard Queue to your asyncronous processes rather than a multiprocessing-safe Queue implementation. Therefore, your asyncronous job is failing with: TypeError: cannot pickle '_thread.lock' object. However, because you are not calling .result on the future object - this exception is never raised in the main process.
Instantiate your queue with manager.Queue() and the code works.

Related

Python ThreadPoolExecutor terminate all threads

I am running a piece of python code in which multiple threads are run through threadpool executor. Each thread is supposed to perform a task (fetch a webpage for example). What I want to be able to do is to terminate all threads, even if one of the threads fail. For instance:
with ThreadPoolExecutor(self._num_threads) as executor:
jobs = []
for path in paths:
kw = {"path": path}
jobs.append(executor.submit(start,**kw))
for job in futures.as_completed(jobs):
result = job.result()
print(result)
def start(*args,**kwargs):
#fetch the page
if(success):
return True
else:
#Signal all threads to stop
Is it possible to do so? The results returned by threads are useless to me unless all of them are successful, so if even one of them fails, I would like to save some execution time of the rest of the threads and terminate them immediately. The actual code obviously is doing relatively lengthy tasks with a couple of failure points.
If you are done with threads and want to look into processes, then this peace of code here looks very promising and simple, almost the same syntax as thread, but with the multiprocessing module.
When the timeout flag expires the process is terminated, very convenient.
import multiprocessing
def get_page(*args, **kwargs):
# your web page downloading code goes here
def start_get_page(timeout, *args, **kwargs):
p = multiprocessing.Process(target=get_page, args=args, kwargs=kwargs)
p.start()
p.join(timeout)
if p.is_alive():
# stop the downloading 'thread'
p.terminate()
# and then do any post-error processing here
if __name__ == "__main__":
start_get_page(timeout, *args, **kwargs)
I have created an answer for a similar question I had, which I think will work for this question.
Terminate executor using ThreadPoolExecutor from concurrent.futures module
from concurrent.futures import ThreadPoolExecutor, as_completed
from time import sleep
NUM_REQUESTS = 100
def long_request(id):
sleep(1)
# Simulate bad response
if id == 10:
return {"data": {"valid": False}}
else:
return {"data": {"valid": True}}
def check_results(results):
valid = True
for result in results:
valid = result["data"]["valid"]
return valid
def main():
futures = []
responses = []
num_requests = 0
with ThreadPoolExecutor(max_workers=10) as executor:
for request_index in range(NUM_REQUESTS):
future = executor.submit(long_request, request_index)
# Future list
futures.append(future)
for future in as_completed(futures):
is_responses_valid = check_results(responses)
# Cancel all future requests if one invalid
if not is_responses_valid:
executor.shutdown(wait=False)
else:
# Append valid responses
num_requests += 1
responses.append(future.result())
return num_requests
if __name__ == "__main__":
requests = main()
print("Num Requests: ", requests)
In my code I used multiprocessing
import multiprocessing as mp
pool = mp.Pool()
for i in range(threadNumber):
pool.apply_async(publishMessage, args=(map_metrics, connection_parameters...,))
pool.close()
pool.terminate()
This is how I would do it:
import concurrent.futures
def start(*args,**kwargs):
#fetch the page
if(success):
return True
else:
return False
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(start, {"path": path}) for path in paths]
concurrent.futures.wait(results, timeout=10, return_when=concurrent.futures.FIRST_COMPLETED)
for f in concurrent.futures.as_completed(results):
f_success = f.result()
if not f_success:
executor.shutdown(wait=False, cancel_futures=True) # shutdown if one fails
else:
#do stuff here
If any result is not True, everything will be shut down immediately.
You can try to use StoppableThread from func-timeout.
But terminating threads is strongly discouraged. And if you need to kill a thread, you probably have a design problem. Look at alternatives: asyncio coroutines and multiprocessing with legal cancel/terminating functionality.

Multiprocessing callback message

I have long running process, that I want to keep track about in which state it currently is in. There is N processes running in same time therefore multiprocessing issue.
I pass Queue into process to report messages about state, and this Queue is then read(if not empty) in thread every couple of second.
I'm using Spider on windows as environment and later described behavior is in its console. I did not try it in different env.
from multiprocessing import Process,Queue,Lock
import time
def test(process_msg: Queue):
try:
process_msg.put('Inside process message')
# process...
return # to have exitstate = 0
except Exception as e:
process_msg.put(e)
callback_msg = Queue()
if __name__ == '__main__':
p = Process(target = test,
args = (callback_msg,))
p.start()
time.sleep(5)
print(p)
while not callback_msg.empty():
msg = callback_msg.get()
if type(msg) != Exception:
tqdm.write(str(msg))
else:
raise msg
Problem is that whatever I do with code, it never reads what is inside the Queue(also because it never puts anything in it). Only when I switch to dummy version, which runs similary to threading on only 1 CPU from multiprocessing.dummy import Process,Queue,Lock
Apparently the test function have to be in separate file.

Issue with Pool and Queue of multiprocessing module in Python

I am new to multiprocessing of Python, and I wrote the tiny script below:
import multiprocessing
import os
def task(queue):
print(100)
def run(pool):
queue = multiprocessing.Queue()
for i in range(os.cpu_count()):
pool.apply_async(task, args=(queue, ))
if __name__ == '__main__':
multiprocessing.freeze_support()
pool = multiprocessing.Pool()
run(pool)
pool.close()
pool.join()
I am wondering why the task() method is not executed and there is no output after running this script. Could anyone help me?
It is running, but it's dying with an error outside the main thread, and so you don't see the error. For that reason, it's always good to .get() the result of an async call, even if you don't care about the result: the .get() will raise the error that's otherwise invisible.
For example, change your loop like so:
tasks = []
for i in range(os.cpu_count()):
tasks.append(pool.apply_async(task, args=(queue,)))
for t in tasks:
t.get()
Then the new t.get() will blow up, ending with:
RuntimeError: Queue objects should only be shared between processes through inheritance
In short, passing Queue objects to Pool methods isn't supported.
But you can pass them to multiprocessing.Process(), or to a Pool initialization function. For example, here's a way to do the latter:
import multiprocessing
import os
def pool_init(q):
global queue # make queue global in workers
queue = q
def task():
# can use `queue` here if you like
print(100)
def run(pool):
tasks = []
for i in range(os.cpu_count()):
tasks.append(pool.apply_async(task))
for t in tasks:
t.get()
if __name__ == '__main__':
queue = multiprocessing.Queue()
pool = multiprocessing.Pool(initializer=pool_init, initargs=(queue,))
run(pool)
pool.close()
pool.join()
On Linux-y systems, you can - as the original error message suggested - use process inheritance instead (but that's not possible on Windows).

python multiprocessing pool timeout

I want to use multiprocessing.Pool, but multiprocessing.Pool can't abort a task after a timeout. I found solution and some modify it.
from multiprocessing import util, Pool, TimeoutError
from multiprocessing.dummy import Pool as ThreadPool
import threading
import sys
from functools import partial
import time
def worker(y):
print("worker sleep {} sec, thread: {}".format(y, threading.current_thread()))
start = time.time()
while True:
if time.time() - start >= y:
break
time.sleep(0.5)
# show work progress
print(y)
return y
def collect_my_result(result):
print("Got result {}".format(result))
def abortable_worker(func, *args, **kwargs):
timeout = kwargs.get('timeout', None)
p = ThreadPool(1)
res = p.apply_async(func, args=args)
try:
# Wait timeout seconds for func to complete.
out = res.get(timeout)
except TimeoutError:
print("Aborting due to timeout {}".format(args[1]))
# kill worker itself when get TimeoutError
sys.exit(1)
else:
return out
def empty_func():
pass
if __name__ == "__main__":
TIMEOUT = 4
util.log_to_stderr(util.DEBUG)
pool = Pool(processes=4)
# k - time to job sleep
featureClass = [(k,) for k in range(20, 0, -1)] # list of arguments
for f in featureClass:
# check available worker
pool.apply(empty_func)
# run job with timeout
abortable_func = partial(abortable_worker, worker, timeout=TIMEOUT)
pool.apply_async(abortable_func, args=f, callback=collect_my_result)
time.sleep(TIMEOUT)
pool.terminate()
print("exit")
main modification - worker process exit with sys.exit(1). It's kill worker process and kill job thread, but i'm not sure that this solution is good. What potential problems can i get, when process terminate itself with running job?
There is no implicit risk in stopping a running job, the OS will take care of correctly terminating the process.
If your job is writing on files, you might end up with lots of truncated files on your disk.
Some small issue might also occur if you write on DBs or if you are connected with some remote process.
Nevertheless, Python standard Pool does not support worker termination on task timeout. Terminating processes abruptly might lead to weird behaviour within your application.
Pebble processing Pool does support timing-out tasks.
from pebble import ProcessPool
from concurrent.futures import TimeoutError
TIMEOUT_SECONDS = 5
def function(one, two):
return one + two
with ProcessPool() as pool:
future = pool.schedule(function, args=(1, 2), timeout=TIMEOUT_SECONDS)
try:
result = future.result()
except TimeoutError:
print("Future: %s took more than 5 seconds to complete" % future)

Python mutiprocessing using Slurm: <Finalize object, dead> stalls

I have reserved some nodes on a SLURM cluster and want to run a python script on these nodes.
On one node (server) a python script should fill a queue with jobs and dispatch these jobs to the clients.
Most of the time this works fine, but occasionally the script stalls.
When using Ctrl+C it turns out that in that case one (or sometimes more) nodes seem to be stuck in <Finalize object, dead>:
^Csrun: interrupt (one more within 1 sec to abort)
srun: task 30: running
srun: tasks 0-29,31-39: exited
^Csrun: sending Ctrl-C to job 1075185.14
Exception KeyboardInterrupt: KeyboardInterrupt() in <Finalize object, dead> ignored
srun: Job step aborted: Waiting up to 2 seconds for job step to finish.
slurmd[cluster-112]: *** STEP 1075185.14 KILLED AT 2014-04-03T09:11:23 WITH SIGNAL 9 ***
I have no clue what the reason could be. Maybe, it looks like something related to the garbage collector.
This is the script I run:
#!/usr/bin/env
import os
import multiprocessing.managers
import Queue
import sys
import subprocess
import socket
import errno
class QueueManager(multiprocessing.managers.SyncManager):
pass
def worker(i, my_slurm_proc_id):
print 'hello %i (proc=%i)' % (i, my_slurm_proc_id)
time.sleep(0.1)
pass
def run_server(first_slurm_node, N_procs):
queue = Queue.Queue()
barrier = multiprocessing.BoundedSemaphore(N_procs-1)
QueueManager.register('get_queue', callable=lambda: queue)
QueueManager.register('get_barrier', callable=lambda: barrier)
for i in range(5000):
queue.put(i)
m = QueueManager(address=(first_slurm_node, 50000), authkey='abracadabra')
m.start()
for i in range(N_procs-1):
barrier.acquire(True)
m.get_queue().join() # somehow just 'queue.join()' doesn't work here
def run_client(my_slurm_proc_id, first_slurm_node):
QueueManager.register('get_queue')
QueueManager.register('get_barrier')
m = QueueManager(address=(first_slurm_node, 50000), authkey='abracadabra')
m.connect()
barrier = m.get_barrier()
barrier.acquire(True)
queue = m.get_queue()
while not queue.empty():
try:
data = queue.get_nowait()
except Queue.Empty:
break
worker(data, my_slurm_proc_id)
queue.task_done()
queue = None
barrier.release()
barrier = None
def main():
slurm_job_nodelist = subprocess.check_output('scontrol show hostname'.split(' ') + [os.environ['SLURM_JOB_NODELIST']]).split('\n')
master_node = slurm_job_nodelist[0]
my_slurm_proc_id = int(os.environ['SLURM_PROCID'])
N_procs = int(os.environ['SLURM_NPROCS'])
if my_slurm_proc_id == 0:
run_server(master_node, N_procs)
else:
run_client(my_slurm_proc_id, master_node)
if __name__ == '__main__':
main()

Categories

Resources