Queue in singleton object - python

I'm using the singleton design pattern which holds a queue and look like that:
class MySingleton():
__instance = None
__msg_queue = None
MAX_QUEUE_SIZE = 10000
lock = threading.Lock()
#staticmethod
def get_instance():
if not MySingleton.__instance:
MySingleton.lock.acquire()
if not MySingleton.__instance:
MySingleton.__instance = MySingleton()
MySingleton.lock.release()
return MySingleton.__instance
def __init__(self):
self.__msg_queue = queue.Queue(maxsize=self.MAX_QUEUE_SIZE)
def get_msg(self):
return self.__msg_queue.get()
def put_msg(self, msg):
self.__msg_queue.put(msg)
def is_empty(self):
return self.__msg_queue.empty()
And when I try to use the is_empty method at another thread that way:
while True:
if MySingleton.get_instance().is_empty():
continue
else:
# do something
My code stuck. Can someone see the problem?

Related

python Multi processing share object modification not expected

I have got a consumer class like the following:
class Consumer(multiprocessing.Process):
def __init__(self, best_list, task_queue)
multiprocessing.Process.__init__(self)
self.best_list = best_list
self.task_queue = task_queue
def get_best_list(self):
return self.best_list
def run(self):
proc_name = self.name
while True:
next_task = self.task_queue.get()
answer = next_task()
if answer != '' and answer != []:
self.best_list += answer()
print (self.best_list)
class Task:
def __call__(self):
return [100]
best_list = []
tasks = multiprocessing.JoinableQueue()
for i in range(100):
tasks.put(Task())
c = Consumer(best_list, tasks)
c.start()
tasks.join()
print (c.get_best_list())
print (best_list)
The result would shows c.get_best_list() and best_list are both as [], but the print (self.best_list) shows the self.best_list is appending.
I have double checked with my code but still not working out the solution and not sure why get_best_list is returning the initial state.
Would someone please give some hints

Python equivalent of Java synchronized

In Java, you can make a variable thread safe by just adding the synchronized keyword. Is there anything that can achieve the same results in Python?
You can use with self.lock: and then put your code inside there. See http://theorangeduck.com/page/synchronized-python for more information.
Working code using with self.lock which can take care of exception if occurs:
Inside Manager we are making Manager mehods thread safe :
from threading import RLock
class Manager:
def __init__(self):
self.lock = RLock()
self.hash: dict[str, int] = dict()
def containsToken(self, key) -> bool:
with self.lock:
self.lock.acquire()
return key in self.hash
def addToken(self, token: str):
with self.lock:
token = token.strip()
if token in self.hash:
self.hash[token] = self.hash[token] + 1
else:
self.hash[token] = 1
def removeToken(self, token):
with self.lock:
if token not in self.hash:
raise KeyError(f"token : {token} doesn't exits")
self.hash[token] = self.hash[token] - 1
if self.hash[token] == 0:
self.hash.pop(token)
if __name__ == "__main__":
sync = Manager()
sync.addToken("a")
sync.addToken("a")
sync.addToken("a")
sync.addToken("a")
sync.addToken("B")
sync.addToken("B")
sync.addToken("B")
sync.addToken("B")
sync.removeToken("a")
sync.removeToken("a")
sync.removeToken("a")
sync.removeToken("B")
print(sync.hash)
Output:
{'a': 1, 'B': 3}
You can write your own #synchronized decorator.
The example uses a Mutex Lock:
from functools import wraps
from multiprocessing import Lock
def synchronized(member):
"""
#synchronized decorator.
Lock a method for synchronized access only. The lock is stored to
the function or class instance, depending on what is available.
"""
#wraps(member)
def wrapper(*args, **kwargs):
lock = vars(member).get("_synchronized_lock", None)
result = ""
try:
if lock is None:
lock = vars(member).setdefault("_synchronized_lock", Lock())
lock.acquire()
result = member(*args, **kwargs)
lock.release()
except Exception as e:
lock.release()
raise e
return result
return wrapper
Now your are able to decorate a method like this:
class MyClass:
...
#synchronized
def hello_world(self):
print("synced hello world")
And there is also an excellent Blog post about the missing synchronized decorator.

Python threading.join() hangs

My problem is as follows:
I have a class that inherits from threading.Thread that I want to be able to stop gracefully. This class also has a Queue it get's its work from.
Since there are quite some classes in my project that should have this behaviour, I've created some superclasses to reduce duplicate code like this:
Thread related behaviour:
class StoppableThread(Thread):
def __init__(self):
Thread.__init__(self)
self._stop = Event()
def stop(self):
self._stop.set()
def stopped(self):
return self._stop.isSet()
Queue related behaviour:
class Queueable():
def __init__(self):
self._queue = Queue()
def append_to_job_queue(self, job):
self._queue.put(job)
Combining the two above and adding queue.join() to the stop() call
class StoppableQueueThread(StoppableThread, Queueable):
def __init__(self):
StoppableThread.__init__(self)
Queueable.__init__(self)
def stop(self):
super(StoppableQueueThread, self).stop()
self._queue.join()
A base class for a datasource:
class DataSource(StoppableThread, ABC):
def __init__(self, data_parser):
StoppableThread.__init__(self)
self.setName("DataSource")
ABC.__init__(self)
self._data_parser = data_parser
def run(self):
while not self.stopped():
record = self._fetch_data()
self._data_parser.append_to_job_queue(record)
#abstractmethod
def _fetch_data(self):
"""implement logic here for obtaining a data piece
should return the fetched data"""
An implementation for a datasource:
class CSVDataSource(DataSource):
def __init__(self, data_parser, file_path):
DataSource.__init__(self, data_parser)
self.file_path = file_path
self.csv_data = Queue()
print('loading csv')
self.load_csv()
print('done loading csv')
def load_csv(self):
"""Loops through csv and adds data to a queue"""
with open(self.file_path, 'r') as f:
self.reader = reader(f)
next(self.reader, None) # skip header
for row in self.reader:
self.csv_data.put(row)
def _fetch_data(self):
"""Returns next item of the queue"""
item = self.csv_data.get()
self.csv_data.task_done()
print(self.csv_data.qsize())
return item
Suppose there is a CSVDataSource instance called ds, if I want to stop the thread I call:
ds.stop()
ds.join()
The ds.join() call however, never returns. I'm not sure why this is, because the run() method does check if the stop event is set.
Any Ideas?
Update
A little more clarity as requested: the applications is build up out of several threads. The RealStrategy thread (below) is the owner of all the other threads and is responsible for starting and terminating them. I haven't set the daemon flag for any of the threads, so they should be non-daemonic by default.
The main thread looks like this:
if __name__ == '__main__':
def exit_handler(signal, frame):
rs.stop_engine()
rs.join()
sys.exit(0)
signal.signal(signal.SIGINT, exit_handler)
rs = RealStrategy()
rs.run_engine()
And here are the rs.run_engine() and rs.stop_engine() methods that are called in main:
class RealStrategy(Thread):
.....
.....
def run_engine(self):
self.on_start()
self._order_handler.start()
self._data_parser.start()
self._data_source.start()
self.start()
def stop_engine(self):
self._data_source.stop()
self._data_parser.stop()
self._order_handler.stop()
self._data_source.join()
self._data_parser.join()
self._order_handler.join()
self.stop()
If you want to use queue.Queue.join, then you must also use queue.Queue.task_done. You can read the linked documentation or see the following copied from information available online:
Queue.task_done()
Indicate that a formerly enqueued task is complete.
Used by queue consumer threads. For each get() used to fetch a task, a
subsequent call to task_done() tells the queue that the processing on
the task is complete.
If a join() is currently blocking, it will resume when all items have
been processed (meaning that a task_done() call was received for every
item that had been put() into the queue).
Raises a ValueError if called more times than there were items placed
in the queue.
Queue.join()
Blocks until all items in the queue have been gotten and processed.
The count of unfinished tasks goes up whenever an item is added to the
queue. The count goes down whenever a consumer thread calls
task_done() to indicate that the item was retrieved and all work on it
is complete. When the count of unfinished tasks drops to zero, join()
unblocks.
To test your problem, an example implementation was created to find out what was going on. It is slightly different from how your program works but demonstrates a method to solving your problem:
#! /usr/bin/env python3
import abc
import csv
import pathlib
import queue
import sys
import threading
import time
def main():
source_path = pathlib.Path(r'C:\path\to\file.csv')
data_source = CSVDataSource(source_path)
data_source.start()
processor = StoppableThread(target=consumer, args=[data_source])
processor.start()
time.sleep(0.1)
data_source.stop()
def consumer(data_source):
while data_source.empty:
time.sleep(0.001)
while not data_source.empty:
task = data_source.get_from_queue(True, 0.1)
print(*task.data, sep=', ', flush=True)
task.done()
class StopThread(StopIteration):
pass
threading.SystemExit = SystemExit, StopThread
class StoppableThread(threading.Thread):
def _bootstrap(self, stop=False):
# noinspection PyProtectedMember
if threading._trace_hook:
raise RuntimeError('cannot run thread with tracing')
def terminate():
nonlocal stop
stop = True
self.__terminate = terminate
# noinspection PyUnusedLocal
def trace(frame, event, arg):
if stop:
raise StopThread
sys.settrace(trace)
super()._bootstrap()
def terminate(self):
try:
self.__terminate()
except AttributeError:
raise RuntimeError('cannot terminate thread '
'before it is started') from None
class Queryable:
def __init__(self, maxsize=1 << 10):
self.__queue = queue.Queue(maxsize)
def add_to_queue(self, item):
self.__queue.put(item)
def get_from_queue(self, block=True, timeout=None):
return self.__queue.get(block, timeout)
#property
def empty(self):
return self.__queue.empty()
#property
def full(self):
return self.__queue.full()
def task_done(self):
self.__queue.task_done()
def join_queue(self):
self.__queue.join()
class StoppableQueryThread(StoppableThread, Queryable):
def __init__(self, target=None, name=None, args=(), kwargs=None,
*, daemon=None, maxsize=1 << 10):
super().__init__(None, target, name, args, kwargs, daemon=daemon)
Queryable.__init__(self, maxsize)
def stop(self):
self.terminate()
self.join_queue()
class DataSource(StoppableQueryThread, abc.ABC):
#abc.abstractmethod
def __init__(self, maxsize=1 << 10):
super().__init__(None, 'DataSource', maxsize=maxsize)
def run(self):
while True:
record = self._fetch_data()
self.add_to_queue(record)
#abc.abstractmethod
def _fetch_data(self):
pass
class CSVDataSource(DataSource):
def __init__(self, source_path):
super().__init__()
self.__data_parser = self.__build_data_parser(source_path)
#staticmethod
def __build_data_parser(source_path):
with source_path.open(newline='') as source:
parser = csv.reader(source)
next(parser, None)
yield from parser
def _fetch_data(self):
try:
return Task(next(self.__data_parser), self.task_done)
except StopIteration:
raise StopThread from None
class Task:
def __init__(self, data, callback):
self.__data = data
self.__callback = callback
#property
def data(self):
return self.__data
def done(self):
self.__callback()
if __name__ == '__main__':
main()

Whether the method below is thread-safe when implementing Singleton pattern in python?

I have wrote two classes. The first class MySingletonTSafe is thread-safe because I use threading.Lock() to control how to call the method that creates the instance. But I'm not sure whether the second class MySingletonNTSafe is thread-safe. How could the sencond class be proved not thread-safe if it is?
I have found the thread Creating a singleton in python but it couldn't solve my puzzle.
The following is my code:
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
import threading
import Queue
import time
import random
class MySingletonTSafe(object):
"""implement Singleton pattern, thread-safe"""
_instance = None
_lock = threading.Lock()
def __new__(cls):
if MySingletonTSafe._instance is None:
with MySingletonTSafe._lock:
if MySingletonTSafe._instance is None:
MySingletonTSafe._instance = super(MySingletonTSafe, cls).__new__(cls)
return MySingletonTSafe._instance
def __init__(self):
MySingletonTSafe._instance = self
class MySingletonNTSafe(object):
"""implement Singleton pattern, not-thread-safe"""
_instance = None
def __new__(cls):
if MySingletonNTSafe._instance is None:
# time.sleep(random.randint(1, 10))
MySingletonNTSafe._instance = super(MySingletonNTSafe, cls).__new__(cls)
return MySingletonNTSafe._instance
def __init__(self):
MySingletonNTSafe._instance = self
class Test(object):
"""Test whether the class MySingleton works"""
def __init__(self, tnum=3):
self.tnum = tnum
self.queue_st = Queue.Queue()
self.queue_nst = Queue.Queue()
def run(self):
threads_s = [threading.Thread(target=self._run_st) for i in xrange(self.tnum)]
for t in threads_s:
t.start()
t.join()
threads_ns = [threading.Thread(target=self._run_nst) for i in xrange(self.tnum)]
for t in threads_ns:
t.start()
t.join()
def _run_st(self):
# thread-safe case
obj = MySingletonTSafe()
self.queue_st.put(obj)
def _run_nst(self):
# not-thread-safe case
obj = MySingletonNTSafe()
self.queue_nst.put(obj)
if __name__ == '__main__':
test = Test(tnum=10)
test.run()
objs_st = []
while not test.queue_st.empty():
objs_st.append(test.queue_st.get())
last = objs_st.pop()
for obj in objs_st:
if not last == obj:
print('NOT IDENTICAL')
break
else:
print('IDENTICAL')
objs_nst = []
while not test.queue_nst.empty():
objs_nst.append(test.queue_nst.get())
last = objs_nst.pop()
for obj in objs_nst:
if not last == obj:
print('NOT IDENTICAL')
break
else:
print('IDENTICAL')
In this case, you can just come up with an execution where MySingletonNTSafe does the wrong thing in a multi-threaded environment. Here's one:
Thread 1 calls MySingtonNTSafe(). It sees that MySingletonNTSafe._instance is None, then gets interrupted.
Thread 2 calls MySingletonNTSafe(). It sees that MySingletonNTSafe._instance is still None, creates an instance, assigns it to _instance, and returns it.
Thread 1 resumes execution. It creates an instance, assigns it to _instance, and returns it.
At this point, we have two instances, and the class has failed. It is not threadsafe.

Sub Process in its own Thread

I'm wondering if the following class is sound. I'm using it to launch a bunch of simulators for each test in my test environment.
class SubProcessInOwnThread(threading.Thread):
def __init__(self, arguments, currentWorkingDirectory):
self.arguments = arguments
self.currentWorkingDirectory = currentWorkingDirectory
threading.Thread.__init__(self)
self.isTerminated = False
def run(self):
try:
self.subProcess = subprocess.Popen(self.arguments, cwd=self.currentWorkingDirectory)
self.subProcess.wait()
finally:
self.isTerminated = True
def kill(self):
while not self.isTerminated:
try:
self.subProcess.kill()
except:
time.sleep(0.1)
Some senarios:
# Normal
subProcessThreadArguments = ["cmd.exe"]
subProcessThread = SubProcessInOwnThread(subProcessThreadArguments,r"C:\\")
subProcessThread.start()
time.sleep(5)
subProcessThread.kill()
# Process killed very quickly
subProcessThreadArguments = ["cmd.exe"]
subProcessThread = SubProcessInOwnThread(subProcessThreadArguments,r"C:\\")
subProcessThread.start()
subProcessThread.kill()
# Incorrect configuration
subProcessThreadArguments = ["cmdsfgfg.exe"]
subProcessThread = SubProcessInOwnThread(subProcessThreadArguments,r"C:\\")
subProcessThread.start()
time.sleep(5)
subProcessThread.kill()
So I can create simulators like this:
subProcessThreadArguments1 = ["sim1.exe"]
subProcessThread1 = SubProcessInOwnThread(subProcessThreadArguments1,r"C:\\")
subProcessThread1.start()
subProcessThreadArguments2 = ["sim2.exe"]
subProcessThread2 = SubProcessInOwnThread(subProcessThreadArguments2,r"C:\\")
subProcessThread2.start()
# do test...
subProcessThread1.kill()
subProcessThread2.kill()
I'd be interested in any improvents. Should I consider the use of the with keyword? If so, what would the benifits be?
Thanks!
I don't see the point of having a separate thread being stuck in wait() here. Working directly on the subprocess would work like
class SubProcessWithoutThread(object):
def __init__(self, arguments, currentWorkingDirectory):
self.arguments = arguments
self.currentWorkingDirectory = currentWorkingDirectory
self.isTerminated = False
def start(self):
self.subProcess = subprocess.Popen(self.arguments, cwd=self.currentWorkingDirectory)
def kill(self):
while self.subProcess.poll() is None:
try:
self.subProcess.kill()
except:
time.sleep(0.1)
__enter__ = start
def __exit__(self, *x):
self.kill()
(untested)
I have added the methods for a context manager, but I cannot see how that would help you as it would be quite a bunch of with statements which you would have to create, including the necessary indentation.
But maybe I have got your intention wrong...

Categories

Resources