Multithread proxy change but once - python

Let say I have a code like this:
def func1(a,b,c):
try:
p = pycurl.Curl()
p.setopt(pycurl.PROXY, "127.0.0.1")
p.setopt(pycurl.PROXYPORT, 9050)
p.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5)
p.perform()
p.close()
except pycurl.error as error:
if error[0] == 28: # timeout - change proxy
print "Tor timeout, need to change"
queue.put((a,b,c))
new_tor()
return
def new_tor():
# send_signal_for_new_ident_is_here
I start this code in 7 threads.
And when a thread receives error 28 it change the identify.
But it happens that ALL 7 THREADS sending signal to change identify.
How to do this:
If thread received error 28, then it calls new_tor() and other 6 threads don't but waiting for result and only then they proceed to work. How to sync this?

Just put an error "id" into the queue and if you encounter it, put the value back into the queue, and then handle as needed.
You don't wish to end the thread, which is what I did.
So, you can have some unique identifier for each thread, such that if once a thread encounters an error, it also adds the data (it's identifier) that says it encountered this error before, so that if all threads have encountered this error, the error is removed from the queue.
Code:
import threading
import Queue
y = 0
def f1():
global y
y += 1
if y > 100:
raise ValueError('trial')
def f2():
return
class Test(threading.Thread):
def __init__(self, func, name):
threading.Thread.__init__(self)
self.func = func
self.name = name
def run(self):
while True:
x = ''
if not queue.empty():
x = queue.get()
if x == 'error':
queue.put(x)
print 'Stopping %s' % (self.name,)
return
try:
self.func()
except Exception as e:
queue.put('error')
queue = Queue.Queue()
thread1 = Test(f1, '1')
thread2 = Test(f2, '2')
thread1.start()
thread2.start()

Related

Send data to Python thread, then read a response using Queue

It's quite easy to send or receive data through threads using Queue's module when doing each thing at a time, but I didn't figure out how to send something to a thread, then expect for a return properly.
In the below example, I was expecting to send something to thread in order to be processed, then harvest the result, but the t.queue.get() in the main function receives what what just sent above instead of waiting for the thread to return. How can I get around it?
#!/usr/bin/env python3
from threading import Thread
from queue import Queue
class MyThread(Thread):
queue:Queue
def __init__(self, *args, **kwargs):
super().__init__()
self.queue = Queue()
self.daemon = True
# receives a name, then prints "Hello, name!"
def run(self):
while True:
val = self.queue.get()
if not val:
break
self.queue.put(f'Hello, {val}!')
def main():
t = MyThread()
t.start()
# sends string to thread
t.queue.put('Jurandir')
# expects to receive "Hello, Jurandir!",
# but "Jurandir" is immediately returned
ret = t.queue.get()
print(ret)
if __name__ == '__main__':
main()
Thing is that you are getting the alleged result immediately from the queue, and the worker has still not added the result. You can split into an "input queue" and a "results queue". And then wait in the main thread until there's some output in the queue.
#!/usr/bin/env python3
from threading import Thread, Lock
from queue import Queue
class MyThread(Thread):
def __init__(self, *args, **kwargs):
super().__init__()
self.input_queue = Queue()
self.results_queue = Queue()
self.daemon = True
# receives a name, then prints "Hello, name!"
def run(self):
while True:
val = self.input_queue.get()
if not val:
break
self.results_queue.put(f'Hello, {val}!')
def main():
t = MyThread()
t.start()
# sends string to thread
t.input_queue.put('Jurandir')
ret = t.results_queue.get()
while ret is None:
ret = t.results_queue.get()
print(ret)
if __name__ == '__main__':
main()

Python custom signal handling in processes pool

I am dealing with the following problem:
I've implemented a dummy 'Thing' class that sleeps for 10 seconds and logs a message ('foo'). This class is instantiated in a worker function for a Processes Pool and the 'foo' method that implements the above mentioned logic is called.
What I want to achieve is a custom signal handling: as long as the processes haven't terminated, if CTRL+C (SIGINT) is sent, each process will log the signal and they will immediately terminate.
Half of the logic is working: while each process is sleeping, on SIGINT, they'll be interrupted and the Pool will be closed.
Problem: if ALL the process end successfully and SIGINT is sent, the message will be logged but the Pool won't be closed.
Code:
import logging
import signal
import os
import time
from multiprocessing import Pool, current_process
logger = logging.getLogger('test')
SIGNAL_NAMES = dict((k, v) for v, k in reversed(sorted(signal.__dict__.items()))
if v.startswith('SIG') and not v.startswith('SIG_'))
class Thing(object):
def __init__(self, my_id):
self.my_id = my_id
self.logger = logging.getLogger(str(my_id))
def foo(self):
time.sleep(10)
self.logger.info('[%s] Foo after 10 secs!', self.my_id)
class Daemon(object):
def __init__(self, no_processes, max_count):
signal.signal(signal.SIGINT, self.stop)
self.done = False
self.count = 0
self.max_count = max_count
self.pool = Pool(no_processes, initializer=self.pool_initializer)
def stop(self, signum, _):
""" Stop function for Daemon """
sig = SIGNAL_NAMES.get(signum) or signum
logger.info('[Daemon] Stopping (received signal %s', sig)
self.done = True
def _generate_ids(self):
""" Generator function of the IDs for the Processes Pool """
while not self.done:
if self.count < self.max_count:
my_id = "ID-{}".format(self.count)
logger.info('[Daemon] Generated ID %s', my_id)
time.sleep(3)
yield my_id
self.count += 1
time.sleep(1)
def run(self):
""" Main daemon run function """
pid = os.getpid()
logger.info('[Daemon] Started running on PID %s', str(pid))
my_ids = self._generate_ids()
for res in self.pool.imap_unordered(run_thing, my_ids):
logger.info("[Daemon] Finished %s", res or '')
logger.info('[Daemon] Closing & waiting processes to terminate')
self.pool.close()
self.pool.join()
def pool_initializer(self):
""" Pool initializer function """
signal.signal(signal.SIGINT, self.worker_signal_handler)
#staticmethod
def worker_signal_handler(signum, _):
""" Signal handler for the Process worker """
sig = SIGNAL_NAMES.get(signum) or signum
cp = current_process()
logger.info("[%s] Received in worker %s signal %s", WORKER_THING_ID or '', str(cp), sig)
global WORKER_EXITING
WORKER_EXITING = True
WORKER_EXITING = False
WORKER_THING_ID = None
def run_thing(arg):
""" Worker function for processes """
if WORKER_EXITING:
return
global WORKER_THING_ID
WORKER_THING_ID = arg
run_exception = None
logger.info('[%s] START Thing foo-ing', arg)
logging.getLogger('Thing-{}'.format(arg)).setLevel(logging.INFO)
try:
thing = Thing(arg)
thing.foo()
except Exception as e:
run_exception = e
finally:
WORKER_THING_ID = None
logger.info('[%s] STOP Thing foo-ing', arg)
if run_exception:
logger.error('[%s] EXCEPTION on Thing foo-ing: %s', arg, run_exception)
return arg
if __name__ == '__main__':
logging.basicConfig()
logger.setLevel(logging.INFO)
daemon = Daemon(4, 3)
daemon.run()
Your problem is logic in function _generate_ids(). The function never ends so pool.imap_unordered() never finishes by itself, only needs to be interrupted by CTRL-C.
Change it for something like this:
def _generate_ids(self):
""" Generator function of the IDs for the Processes Pool """
for i in range(self.max_count):
time.sleep(3)
my_id = "ID-{}".format(self.count)
logger.info('[Daemon] Generated ID %s', my_id)
if self.done:
break
self.count += 1
yield my_id
And the processes end by themselves normally.

Python Multiprocessing - terminate / restart worker process

I have a bunch of long running processes that I would like to split up into multiple processes. That part I can do no problem. The issue I run into is sometimes these processes go into a hung state. To address this issue I would like to be able to set a time threshold for each task that a process is working on. When that time threshold is exceeded I would like to restart or terminate the task.
Originally my code was very simple using a process pool, however with the pool I could not figure out how to retrieve the processes inside the pool, nevermind how to restart / terminate a process in the pool.
I have resorted to using a queue and process objects as is illustrated in this example (https://pymotw.com/2/multiprocessing/communication.html#passing-messages-to-processes with some changes.
My attempts to figure this out are in the code below. In its current state the process does not actually get terminated. Further to that I cannot figure out how to get the process to move onto the next task after the current task is terminated. Any suggestions / help appreciated, perhaps I’m going about this the wrong way.
Thanks
import multiprocess
import time
class Consumer(multiprocess.Process):
def __init__(self, task_queue, result_queue, startTimes, name=None):
multiprocess.Process.__init__(self)
if name:
self.name = name
print 'created process: {0}'.format(self.name)
self.task_queue = task_queue
self.result_queue = result_queue
self.startTimes = startTimes
def stopProcess(self):
elapseTime = time.time() - self.startTimes[self.name]
print 'killing process {0} {1}'.format(self.name, elapseTime)
self.task_queue.cancel_join_thread()
self.terminate()
# now want to get the process to start procesing another job
def run(self):
'''
The process subclass calls this on a separate process.
'''
proc_name = self.name
print proc_name
while True:
# pulling the next task off the queue and starting it
# on the current process.
task = self.task_queue.get()
self.task_queue.cancel_join_thread()
if task is None:
# Poison pill means shutdown
#print '%s: Exiting' % proc_name
self.task_queue.task_done()
break
self.startTimes[proc_name] = time.time()
answer = task()
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def __init__(self, a, b, startTimes):
self.a = a
self.b = b
self.startTimes = startTimes
self.taskName = 'taskName_{0}_{1}'.format(self.a, self.b)
def __call__(self):
import time
import os
print 'new job in process pid:', os.getpid(), self.taskName
if self.a == 2:
time.sleep(20000) # simulate a hung process
else:
time.sleep(3) # pretend to take some time to do the work
return '%s * %s = %s' % (self.a, self.b, self.a * self.b)
def __str__(self):
return '%s * %s' % (self.a, self.b)
if __name__ == '__main__':
# Establish communication queues
# tasks = this is the work queue and results is for results or completed work
tasks = multiprocess.JoinableQueue()
results = multiprocess.Queue()
#parentPipe, childPipe = multiprocess.Pipe(duplex=True)
mgr = multiprocess.Manager()
startTimes = mgr.dict()
# Start consumers
numberOfProcesses = 4
processObjs = []
for processNumber in range(numberOfProcesses):
processObj = Consumer(tasks, results, startTimes)
processObjs.append(processObj)
for process in processObjs:
process.start()
# Enqueue jobs
num_jobs = 30
for i in range(num_jobs):
tasks.put(Task(i, i + 1, startTimes))
# Add a poison pill for each process object
for i in range(numberOfProcesses):
tasks.put(None)
# process monitor loop,
killProcesses = {}
executing = True
while executing:
allDead = True
for process in processObjs:
name = process.name
#status = consumer.status.getStatusString()
status = process.is_alive()
pid = process.ident
elapsedTime = 0
if name in startTimes:
elapsedTime = time.time() - startTimes[name]
if elapsedTime > 10:
process.stopProcess()
print "{0} - {1} - {2} - {3}".format(name, status, pid, elapsedTime)
if allDead and status:
allDead = False
if allDead:
executing = False
time.sleep(3)
# Wait for all of the tasks to finish
#tasks.join()
# Start printing results
while num_jobs:
result = results.get()
print 'Result:', result
num_jobs -= 1
I generally recommend against subclassing multiprocessing.Process as it leads to code hard to read.
I'd rather encapsulate your logic in a function and run it in a separate process. This keeps the code much cleaner and intuitive.
Nevertheless, rather than reinventing the wheel, I'd recommend you to use some library which already solves the issue for you such as Pebble or billiard.
For example, the Pebble library allows to easily set timeouts to processes running independently or within a Pool.
Running your function within a separate process with a timeout:
from pebble import concurrent
from concurrent.futures import TimeoutError
#concurrent.process(timeout=10)
def function(foo, bar=0):
return foo + bar
future = function(1, bar=2)
try:
result = future.result() # blocks until results are ready
except TimeoutError as error:
print("Function took longer than %d seconds" % error.args[1])
Same example but with a process Pool.
with ProcessPool(max_workers=5, max_tasks=10) as pool:
future = pool.schedule(function, args=[1], timeout=10)
try:
result = future.result() # blocks until results are ready
except TimeoutError as error:
print("Function took longer than %d seconds" % error.args[1])
In both cases, the timing out process will be automatically terminated for you.
A way simpler solution would be to continue using a than reimplementing the Pool is to design a mechanism which timeout the function you are running.
For instance:
from time import sleep
import signal
class TimeoutError(Exception):
pass
def handler(signum, frame):
raise TimeoutError()
def run_with_timeout(func, *args, timeout=10, **kwargs):
signal.signal(signal.SIGALRM, handler)
signal.alarm(timeout)
try:
res = func(*args, **kwargs)
except TimeoutError as exc:
print("Timeout")
res = exc
finally:
signal.alarm(0)
return res
def test():
sleep(4)
print("ok")
if __name__ == "__main__":
import multiprocessing as mp
p = mp.Pool()
print(p.apply_async(run_with_timeout, args=(test,),
kwds={"timeout":1}).get())
The signal.alarm set a timeout and when this timeout, it run the handler, which stop the execution of your function.
EDIT: If you are using a windows system, it seems to be a bit more complicated as signal does not implement SIGALRM. Another solution is to use the C-level python API. This code have been adapted from this SO answer with a bit of adaptation to work on 64bit system. I have only tested it on linux but it should work the same on windows.
import threading
import ctypes
from time import sleep
class TimeoutError(Exception):
pass
def run_with_timeout(func, *args, timeout=10, **kwargs):
interupt_tid = int(threading.get_ident())
def interupt_thread():
# Call the low level C python api using ctypes. tid must be converted
# to c_long to be valid.
res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
ctypes.c_long(interupt_tid), ctypes.py_object(TimeoutError))
if res == 0:
print(threading.enumerate())
print(interupt_tid)
raise ValueError("invalid thread id")
elif res != 1:
# "if it returns a number greater than one, you're in trouble,
# and you should call it again with exc=NULL to revert the effect"
ctypes.pythonapi.PyThreadState_SetAsyncExc(
ctypes.c_long(interupt_tid), 0)
raise SystemError("PyThreadState_SetAsyncExc failed")
timer = threading.Timer(timeout, interupt_thread)
try:
timer.start()
res = func(*args, **kwargs)
except TimeoutError as exc:
print("Timeout")
res = exc
else:
timer.cancel()
return res
def test():
sleep(4)
print("ok")
if __name__ == "__main__":
import multiprocessing as mp
p = mp.Pool()
print(p.apply_async(run_with_timeout, args=(test,),
kwds={"timeout": 1}).get())
print(p.apply_async(run_with_timeout, args=(test,),
kwds={"timeout": 5}).get())
For long running processes and/or long iterators, spawned workers might hang after some time. To prevent this, there are two built-in techniques:
Restart workers after they have delivered maxtasksperchild tasks from the queue.
Pass timeout to pool.imap.next(), catch the TimeoutError, and finish the rest of the work in another pool.
The following wrapper implements both, as a generator. This also works when replacing stdlib multiprocessing with multiprocess.
import multiprocessing as mp
def imap(
func,
iterable,
*,
processes=None,
maxtasksperchild=42,
timeout=42,
initializer=None,
initargs=(),
context=mp.get_context("spawn")
):
"""Multiprocessing imap, restarting workers after maxtasksperchild tasks to avoid zombies.
Example:
>>> list(imap(str, range(5)))
['0', '1', '2', '3', '4']
Raises:
mp.TimeoutError: if the next result cannot be returned within timeout seconds.
Yields:
Ordered results as they come in.
"""
with context.Pool(
processes=processes,
maxtasksperchild=maxtasksperchild,
initializer=initializer,
initargs=initargs,
) as pool:
it = pool.imap(func, iterable)
while True:
try:
yield it.next(timeout)
except StopIteration:
return
To catch the TimeoutError:
>>> import time
>>> iterable = list(range(10))
>>> results = []
>>> try:
... for i, result in enumerate(imap(time.sleep, iterable, processes=2, timeout=2)):
... results.append(result)
... except mp.TimeoutError:
... print("Failed to process the following subset of iterable:", iterable[i:])
Failed to process the following subset of iterable: [2, 3, 4, 5, 6, 7, 8, 9]

Why doesn't class B print every 1 second

Given the code below
from threading import Thread
import Queue
from time import sleep
class myClassA(Thread):
def __init__(self,num,q):
Thread.__init__(self)
self.daemon = True
self.num = num
self.start()
def run(self):
while True:
self.num = self.num+1
q.put(self.num)
sleep(5)
class myClassB(Thread):
def __init__(self,num,q):
Thread.__init__(self)
self.daemon = True
self.num = num
self.start()
def run(self):
while True:
self.num = q.get()
print self.num
sleep(1)
num = 0
q = Queue.Queue()
myClassA(num,q)
myClassB(num,q)
while True:
pass
Why doesn't Class B print every second? I would expect Class B to print five 1's then five 2's etc. Is q.get() a blocking function?
Yes, Queue.get() is blocking by default. From the documentation:
If optional args block is true and timeout is None (the default), block if necessary until an item is available.
Bold emphasis mine. Because q.get() blocks, it won't return until the other thread has put something in the queue for it to fetch.
Even so, removing an item from the queue means it won't be there the next time. q.get() doesn't leave the number there to be fetched again and again.
Instead, if you were to use q.get(False) (or used q.get_nowait()) to prevent blocking, an Empty exception is raised instead.

Regularly check whether a webserver is up with a Thread

I wrote a Threading class which tests whether a webserver is up or not.
import urllib
import threading
import time
import Queue
class Thread_CheckDeviceState(threading.Thread):
def __init__(self, device_ip, queue, inter=0.1):
self._run = True
self._codes = {}
self._queue = queue
self._device_ip = device_ip
self._inter = inter
self._elapsed = 0
threading.Thread.__init__(self)
def stop(self):
self._run = False
def run(self):
start = time.time()
while self._run:
try:
code = urllib.urlopen(self._device_ip).getcode()
except Exception:
code = "nope"
finally:
measure = time.time()
self._elapsed += measure-start
print self._elapsed, code
self._codes.update(
{self._elapsed:code}
)
time.sleep(self._inter)
self._queue.put(self._codes)
q = Queue.Queue()
thread = Thread_CheckDeviceState("http://192.168.1.3", q)
thread.start()
time.sleep(10)
thread.stop()
print q.get()
It works fine - until I disconnect my pc from the network. From that moment on the thread just does nothing until it is stopped. I would expect it to just continue and set the code to "nope", like I wrote it in the exception handler. Why doesn't it work
You need to use urllib2 instead, and specify a timeout parameter when you call urlopen().

Categories

Resources