python Multi processing share object modification not expected - python

I have got a consumer class like the following:
class Consumer(multiprocessing.Process):
def __init__(self, best_list, task_queue)
multiprocessing.Process.__init__(self)
self.best_list = best_list
self.task_queue = task_queue
def get_best_list(self):
return self.best_list
def run(self):
proc_name = self.name
while True:
next_task = self.task_queue.get()
answer = next_task()
if answer != '' and answer != []:
self.best_list += answer()
print (self.best_list)
class Task:
def __call__(self):
return [100]
best_list = []
tasks = multiprocessing.JoinableQueue()
for i in range(100):
tasks.put(Task())
c = Consumer(best_list, tasks)
c.start()
tasks.join()
print (c.get_best_list())
print (best_list)
The result would shows c.get_best_list() and best_list are both as [], but the print (self.best_list) shows the self.best_list is appending.
I have double checked with my code but still not working out the solution and not sure why get_best_list is returning the initial state.
Would someone please give some hints

Related

incorrect code work threading with locked counter in python

here there is part of my code:
lock = Threading.Lock()
value ={'token1':0, 'token2':0...}
def get_data_for_coin(app):
def run(queue_, ):
while True:
try:
item = queue_.get()
........
result = request_data(app, token)
if result == True
with lock:
value[token] += 1
else:
with lock:
value[token] = 0
time.sleep(60)
check_first(value_)
except Exception as e:
print(e)
queue_.task_done()
global value # >>>>>>> this is my counter
queue = Queue()
threads = []
for j in range(4):
t = threading.Thread(target=run, args=(queue, args), daemon=True)
t.start()
names.append(t.name)
threads.append(t)
.......
for i in range(0, 10000):
queue.put(item)
queue.join()
.............
check_final(app)
return data
def request_data(app, token):
........
and my question about incorrect working counter:
after 100 requests, the counter data is still correct, but then it starts to get lost, somehow other threads affect it, although I block them. I tried to push the counter into the decorator and it didn't work there at all. def counter(function):
def inner(*args, **kwargs):
for key in kwargs:
if key == "token":
token_key = kwargs[key]
with lock:
inner.count[token_key] += 1
if inner.count[token_key] > 20:
inner.count[token_key] = 0
return function(*args, **kwargs)
Please tell me what can be done with a regular counter here ?
is there a way to use a decorator ?
please give examples
this is my simpe func to calculete same errors
def check_first(value_):
list2 = []
for key_1 in value_.keys():
list2.append(value_[key_1])
list1.append(list2)
def check_final(app):
list3 = []
deviation_count = 0
for mass in list1:
if mass not in list3:
list3.append(mass)
else:
deviation_count += 1
print(mass)

Queue in singleton object

I'm using the singleton design pattern which holds a queue and look like that:
class MySingleton():
__instance = None
__msg_queue = None
MAX_QUEUE_SIZE = 10000
lock = threading.Lock()
#staticmethod
def get_instance():
if not MySingleton.__instance:
MySingleton.lock.acquire()
if not MySingleton.__instance:
MySingleton.__instance = MySingleton()
MySingleton.lock.release()
return MySingleton.__instance
def __init__(self):
self.__msg_queue = queue.Queue(maxsize=self.MAX_QUEUE_SIZE)
def get_msg(self):
return self.__msg_queue.get()
def put_msg(self, msg):
self.__msg_queue.put(msg)
def is_empty(self):
return self.__msg_queue.empty()
And when I try to use the is_empty method at another thread that way:
while True:
if MySingleton.get_instance().is_empty():
continue
else:
# do something
My code stuck. Can someone see the problem?

Threaded Buffer crashes

I tried to implement a 5 element buffer through threading and a list.
import threading
class Buffer1 :
def __init__(self,size) :
self.empty = True
self.content = None
self.lock = threading.Condition()
self.list = []
def take(self) :
with self.lock :
while not self.list :
self.lock.wait()
help = self.list[len(self.list)-1]
del self.list[len(self.list)-1]
self.lock.notify_all()
return help
def put(self,v) :
with self.lock :
while len(self.list) >4:
self.lock.wait()
#self.content = v
self.list.append(v)
self.empty = False
self.lock.notify_all()
def show_list(self):
return self.list
a = Buffer1(5)
a.put(7)
Theoretically it works, but when you exceed the limitations of the buffer, either by buffering 6 values or by trying to „take()“ when there is no value buffered, the IDE becomes unresponsive. How could I go about fixing this?
You are using only one thread to add elements in the buffer, so the list contains 5 items, and your main thread is waiting indefinitely on self.lock.wait(). You can use another thread that would take some elements in parallel and then it will notify the producer thread.
For example, creation of a consumer thread that takes 5 items:
def consume(buffer):
import time
for i in range(5):
print(threading.current_thread(), "consume", buffer.take())
time.sleep(2)
print(threading.current_thread(), "Bye")
buffer = Buffer1(5)
t = threading.Thread(target=consume, args=(buffer,), name="consumer")
t.start()
buffer.put(1)
buffer.put(2)
buffer.put(3)
buffer.put(4)
buffer.put(5)
buffer.put(6)
print(buffer.show_list())
... the IDE becomes unresponsive. How could I go about fixing this?
You only showed adding to the buffer from the main thread and nothing ever takes anything out.
If the buffer gets filled up or becomes empty the next put/take will cause its Condition (lock) to wait until something notifies it to continue. I didn't see any of that signaling in your example.
The buffer is a shared resource. The buffer and the threads that use it need to have good control so that everyone can stay out of everyone else's way and enough logic to keep from getting stuck somewhere.
Presumably you need a thread that puts stuff into the buffer and a thread that takes stuff out of the buffer - both having ample signaling to notify everyone when they are done messing with the buffer.
Set up logging so that the program execution could be traced with log messages.
Buffer1 changes:
Changed the list to a collections.deque to simplify things a bit.
Added properties for empty and full
Added an Event attribute to stop putting/taking when the process gets shut down.
Added a timeout while waiting to put/take to forestall any timing issues when the threads are shut down
Added notifications for empty and full conditions.
Made two threads: one to add to the buffer and one to take from the buffer.
Each will add/take while its Event is not set.
In each iteration a random number of items are taken or added.
When taking/putting the buffer's Condition is acquired and all Waiter's are notified when complete.
In the main thread:
An Event is created - for signaling the thread to quit
A Timer is created to limit thread execution time - when it times out its callback function sets the Event and uses the buffer's Condition (lock) to notify anyone that is waiting and free them up.
The threads are created, started, and joined.
import threading
import collections
import random
import string
import time
import logging
import sys
# logging setup
root = logging.getLogger()
root.setLevel(logging.INFO)
formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(message)s',
datefmt='%S')
class WarningFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.WARNING
class InfoFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.INFO
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
handler.addFilter(InfoFilter())
root.addHandler(handler)
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.WARNING)
handler.setFormatter(formatter)
handler.addFilter(WarningFilter())
root.addHandler(handler)
# logging setup end
class Buffer1:
'''FILO buffer.
'''
def __init__(self,size,evt) :
self.content = None
self.size = size
self.evt = evt
self.lock = threading.Condition()
self.list = collections.deque()
#property
def full(self):
return len(self.list) >= self.size
#property
def empty(self):
return bool(self.list)
def take(self) :
with self.lock :
while not self.empty:
root.warning('buffer empty waiting to take')
self.lock.wait(timeout=5)
if self.evt.is_set():
help = None
break
else:
help = self.list.pop()
self.lock.notify_all()
return help
def put(self,v):
success = False
with self.lock :
while self.full:
root.warning('buffer full waiting to put')
self.lock.wait(timeout=5)
if self.evt.is_set():
break
else:
self.list.append(v)
success = True
self.lock.notify_all()
return success
def show_list(self):
return self.list
class Prod(threading.Thread):
'''Puts stuff onto buffer, quits on Event.
Contrived toy - periodically puts random n items in buffer.
'''
def __init__(self,buffer,evt):
super().__init__(name='producer')
self.buffer = buffer
self.evt = evt
def run(self):
n = 0
while not self.evt.is_set():
howmany = random.randint(1,9)
payload = random.sample(string.ascii_letters,howmany)
payload = collections.deque(payload)
root.info(f'{self.name} putting {howmany}')
with self.buffer.lock:
while payload and (not self.evt.is_set()):
c = payload.popleft()
root.info(f'{self.name} -----> {c}')
if not self.buffer.put(c):
root.warning(f'{self.name} last put failed')
self.buffer.lock.notify_all()
time.sleep(.04)
n += 1
root.info(f'{self.name} dying n={n}')
with self.buffer.lock:
self.buffer.lock.notify_all()
root.info(f'{self.name} is done')
class Cons(threading.Thread):
'''Takes stuff off of buffer, quits on Event set.
Contrived toy - periodically takes random n items from buffer.
'''
def __init__(self,buffer,evt):
super().__init__(name='consumer')
self.buffer = buffer
self.evt = evt
def run(self):
n = 0
while not self.evt.is_set():
howmany = random.randint(1,9)
root.info(f'{self.name} taking {howmany}')
with self.buffer.lock:
while (howmany > 0) and (not self.evt.is_set()):
c = self.buffer.take()
root.info(f'{self.name} <----- {c}')
howmany -= 1
self.buffer.lock.notify_all()
time.sleep(.04)
n += 1
root.info(f'{self.name} dying n={n}')
with self.buffer.lock:
self.buffer.lock.notify_all()
root.info(f'{self.name} is done')
if __name__ == '__main__':
# use an Event to shut down the whole process
evt = threading.Event()
buffer = Buffer1(5,evt)
def kill(evt=evt,buffer=buffer):
root.warning('killing everything')
evt.set()
with buffer.lock:
buffer.lock.notify_all()
# don't let this toy example run forever
t = threading.Timer(5,kill)
t.start()
p1 = Prod(buffer,evt)
c1 = Cons(buffer,evt)
c1.start()
p1.start()
p1.join()
c1.join()
print('exit')
Here is another take using asyncio instead of threads to exercise your buffer.
import asyncio
import collections
import random
import string
import time
import logging
import sys
# logging setup
root = logging.getLogger()
root.setLevel(logging.INFO)
formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(message)s',
datefmt='%S')
class WarningFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.WARNING
class InfoFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.INFO
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
handler.addFilter(InfoFilter())
root.addHandler(handler)
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.WARNING)
handler.setFormatter(formatter)
handler.addFilter(WarningFilter())
root.addHandler(handler)
class Buffer:
'''FILO buffer.
'''
def __init__(self,size,evt) :
self.content = None
self.size = size
self.stop_evt = evt
self.lock = asyncio.Condition()
self.list = collections.deque()
def full(self):
return len(self.list) >= self.size
def not_full(self):
return len(self.list) < self.size
def empty(self):
return not bool(self.list)
def not_empty(self):
return bool(self.list)
async def take(self) :
async with self.lock:
#root.info(f'take:lock acquired - wait for not empty')
while self.empty():
waiters = [thing for thing in self.lock._waiters]
#root.warning(f'take:{waiters} waiting')
await self.lock.wait()
if self.stop_evt.is_set(): # shutting down
val = None
else:
#root.info('take: not empty')
val = self.list.pop()
self.lock.notify_all()
return val
async def put(self,v):
success = False
async with self.lock:
#root.info(f'put:lock acquired - wait for not full')
while self.full():
waiters = [thing for thing in self.lock._waiters]
#root.warning(f'put:{waiters} waiting')
await self.lock.wait()
if self.stop_evt.is_set(): # shutting down
break
else:
#root.info('put: not full')
self.list.append(v)
success = True
self.lock.notify_all()
return success
def show_list(self):
return self.list
async def random_stuff():
howmany = random.randint(1,9)
payload = random.sample(string.ascii_letters,howmany)
return collections.deque(payload)
async def produce(buffer,stop_evt,name):
puts = []
try:
while True:
payload = await random_stuff()
root.warning(f'producer{name} putting {len(payload)}')
while payload:
c = payload.popleft()
root.info(f'producer{name} -----> {c}')
success = await buffer.put(c)
if not success:
root.warning(f'producer{name} failed to put {c}')
else:
puts.append(c)
await asyncio.sleep(.03)
except asyncio.CancelledError as e:
root.warning('producer canceled')
root.info(f'producer{name} dying n={len(puts)}')
root.info(f'producer{name} is done')
return puts
async def consume(buffer, stop_evt, name):
'''Takes stuff off of buffer, quits on Event set.
Contrived toy - periodically takes random n items from buffer.
'''
takes = []
try:
while True:
howmany = random.randint(1,9)
msg = f'consumer{name} taking {howmany}'
root.warning(f'{msg:>38}')
while howmany > 0:
c = await buffer.take()
takes.append(c)
msg = f'consumer{name} <----- {c}'
root.info(f'{msg:>38}')
howmany -= 1
await asyncio.sleep(.02)
except asyncio.CancelledError as e:
root.warning('consumer canceled')
root.info(f'consumer{name} dying n={len(takes)}')
root.info(f'consumer{name} is done')
return takes
async def timer(n,buffer,evt, tasks):
root.warning('timer started')
await asyncio.sleep(n)
evt.set()
root.warning('timed out - event set')
root.warning('canceling tasks')
for task in tasks:
task.cancel()
async def main():
loop = asyncio.get_running_loop()
loop.set_debug(True)
# use an Event to shut down the whole process
evt = asyncio.Event()
buffer = Buffer(5,evt)
put_task = asyncio.create_task(produce(buffer,evt,1))
take_task = asyncio.create_task(consume(buffer,evt,1))
timer_task = asyncio.create_task(timer(5,buffer,evt,[put_task,take_task]))
root.info('tasks created')
await timer_task
puts = await put_task
takes = await take_task
print('exit')
return puts,takes,buffer.list
if __name__ == '__main__':
puts,takes,remains = asyncio.run(main())
puts = collections.Counter(puts)
takes = collections.Counter(takes)
remains = collections.Counter(remains)
#print(remains == (puts-takes))

Why doesn't SIGVTALRM trigger inside time.sleep()?

I'm trying to use SIGVTALRM to snapshot profile my Python code, but it doesn't seem to be firing inside blocking operations like time.sleep() and socket operations.
Why is that? And is there any way to address that, so I can collect samples while I'm inside blocking operations?
I've also tried using ITIMER_PROF/SIGPROF and ITIMER_REAL/SIGALRM and both seem to produce similar results.
The code I'm testing with follows, and the output is something like:
$ python profiler-test.py
<module>(__main__:1);test_sampling_profiler(__main__:53): 1
<module>(__main__:1);test_sampling_profiler(__main__:53);busyloop(__main__:48): 1509
Note that the timesleep function isn't shown at all.
Test code:
import time
import signal
import collections
class SamplingProfiler(object):
def __init__(self, interval=0.001, logger=None):
self.interval = interval
self.running = False
self.counter = collections.Counter()
def _sample(self, signum, frame):
if not self.running:
return
stack = []
while frame is not None:
formatted_frame = "%s(%s:%s)" %(
frame.f_code.co_name,
frame.f_globals.get('__name__'),
frame.f_code.co_firstlineno,
)
stack.append(formatted_frame)
frame = frame.f_back
formatted_stack = ';'.join(reversed(stack))
self.counter[formatted_stack] += 1
signal.setitimer(signal.ITIMER_VIRTUAL, self.interval, 0)
def start(self):
if self.running:
return
signal.signal(signal.SIGVTALRM, self._sample)
signal.setitimer(signal.ITIMER_VIRTUAL, self.interval, 0)
self.running = True
def stop(self):
if not self.running:
return
self.running = False
signal.signal(signal.SIGVTALRM, signal.SIG_IGN)
def flush(self):
res = self.counter
self.counter = collections.Counter()
return res
def busyloop():
start = time.time()
while time.time() - start < 5:
pass
def timesleep():
time.sleep(5)
def test_sampling_profiler():
p = SamplingProfiler()
p.start()
busyloop()
timesleep()
p.stop()
print "\n".join("%s: %s" %x for x in sorted(p.flush().items()))
if __name__ == "__main__":
test_sampling_profiler()
Not sure about why time.sleep works that way (could it be using SIGALRM for itself to know when to resume?) but Popen.wait does not block signals so worst case you can call out to OS sleep.
Another approach is to use a separate thread to trigger the sampling:
import sys
import threading
import time
import collections
class SamplingProfiler(object):
def __init__(self, interval=0.001):
self.interval = interval
self.running = False
self.counter = collections.Counter()
self.thread = threading.Thread(target=self._sample)
def _sample(self):
while self.running:
next_wakeup_time = time.time() + self.interval
for thread_id, frame in sys._current_frames().items():
if thread_id == self.thread.ident:
continue
stack = []
while frame is not None:
formatted_frame = "%s(%s:%s)" % (
frame.f_code.co_name,
frame.f_globals.get('__name__'),
frame.f_code.co_firstlineno,
)
stack.append(formatted_frame)
frame = frame.f_back
formatted_stack = ';'.join(reversed(stack))
self.counter[formatted_stack] += 1
sleep_time = next_wakeup_time - time.time()
if sleep_time > 0:
time.sleep(sleep_time)
def start(self):
if self.running:
return
self.running = True
self.thread.start()
def stop(self):
if not self.running:
return
self.running = False
def flush(self):
res = self.counter
self.counter = collections.Counter()
return res
def busyloop():
start = time.time()
while time.time() - start < 5:
pass
def timesleep():
time.sleep(5)
def test_sampling_profiler():
p = SamplingProfiler()
p.start()
busyloop()
timesleep()
p.stop()
print "\n".join("%s: %s" %x for x in sorted(p.flush().items()))
if __name__ == "__main__":
test_sampling_profiler()
When doing it this way the result is:
$ python profiler-test.py
<module>(__main__:1);test_sampling_profiler(__main__:62);busyloop(__main__:54): 2875
<module>(__main__:1);test_sampling_profiler(__main__:62);start(__main__:37);start(threading:717);wait(threading:597);wait(threading:309): 1
<module>(__main__:1);test_sampling_profiler(__main__:62);timesleep(__main__:59): 4280
Still not totally fair, but better than no samples at all during sleep.
The absence of SIGVTALRM during a sleep() doesn't surprise me, since ITIMER_VIRTUAL "runs only when the process is executing."
(As an aside, CPython on non-Windows platforms implements time.sleep() in terms of select().)
With a plain SIGALRM, however, I expect a signal interruption and indeed I observe one:
<module>(__main__:1);test_sampling_profiler(__main__:62);busyloop(__main__:54): 4914
<module>(__main__:1);test_sampling_profiler(__main__:62);timesleep(__main__:59): 1
I changed the code somewhat, but you get the idea:
class SamplingProfiler(object):
TimerSigs = {
signal.ITIMER_PROF : signal.SIGPROF,
signal.ITIMER_REAL : signal.SIGALRM,
signal.ITIMER_VIRTUAL : signal.SIGVTALRM,
}
def __init__(self, interval=0.001, timer = signal.ITIMER_REAL): # CHANGE
self.interval = interval
self.running = False
self.counter = collections.Counter()
self.timer = timer # CHANGE
self.signal = self.TimerSigs[timer] # CHANGE
....

Why thread is interrupted before the changes are complete

I'm attempting to create python module for getting MAC adresses by IP addresses.
def getMACs(addressesList):
def _processArp(pkt):
spa = _inet_ntoa(pkt.spa)
if pkt.op == dpkt.arp.ARP_OP_REPLY and spa in _cache.macTable:
lock.acquire()
try:
_cache.macTable[spa] = _packedToMacStr(pkt.sha)
_cache.notFilledMacs -= 1
finally:
lock.release()
if _cache.notFilledMacs == 0:
thrd.stop()
addresses = _parseAddresses(addressesList)
_cache.registerCacheEntry("macTable", {})
_cache.registerCacheEntry("notFilledMacs", 0)
_events.arpPacket += _processArp
lock = threading.Lock()
thrd = _CaptureThread(promisc=False, timeout_ms=30, filter="arp")
thrd.start()
for addr in addresses:
if _sendArpQuery(addr):
_cache.macTable[str(addr)] = None
_cache.notFilledMacs += 1
thrd.join(125)
thrd.stop()
return _cache.macTable
if __name__ == "__main__":
macTable = getMACs([IPAddress("192.168.1.1"), IPAddress("192.168.1.3")])
_pprint.pprint(macTable)
When I run this module I get
{'192.168.1.1': '00:11:95:9E:25:B1', '192.168.1.3': None}
When I debug _processArp step by step I get
{'192.168.1.1': '00:11:95:9E:25:B1', '192.168.1.3': '00:21:63:78:98:8E'}
Class CaptureThread:
class CaptureThread(threading.Thread):
def __init__ (self, name=None, snaplen=65535, promisc=True, timeout_ms=0, immediate=False, filter=None):
threading.Thread.__init__(self)
self.__running = True
self.__name = name
self.__snaplen = snaplen
self.__promisc = promisc
self.__timeout_ms = timeout_ms
self.__immediate = immediate
self.__filter = filter
def stop(self):
self.__running = False
def run(self):
self.__pc = pcap.pcap(self.__name, self.__snaplen, self.__promisc, self.__timeout_ms, self.__immediate)
if self.__filter:
self.__pc.setfilter(self.__filter)
while self.__running:
self.__pc.dispatch(1, self.__processPacket)
def __processPacket(self, timestamp, pkt):
peth = dpkt.ethernet.Ethernet(pkt)
if isinstance(peth.data, dpkt.arp.ARP):
_events.arpPacket(peth.data)
Stupid error. As always when working with threads - because of thread synchronization.
One of my conditions for interrupting thread is "_cache.notFilledMacs == 0". In the main thread _cache.notFilledMacs did not have time to get the value of 2 when in the CaptureThread value is decreased.

Categories

Resources