I have created a class where all the functions work and can be put into different processes and edited. All of the functions work except the add value function. This is my class:
class data(object):
def __init__(self):
Manager = multiprocessing.Manager()
self.lock = multiprocessing.Lock()
self.changeAmt = 0
self.jump = multiprocessing.Value(ctypes.c_float, 0.02)
self.has_best = multiprocessing.Value(ctypes.c_bool, False)
self.best_list = Manager.list()
self.command_original = 'start sequence'
self.command = multiprocessing.Value(ctypes.c_wchar_p, self.command_original)
self.get_best = multiprocessing.Value(ctypes.c_bool, False)
self.done = multiprocessing.Value(ctypes.c_bool, False)
def get(self, name, model):
self.model = model
if model != 'Array':
with multiprocessing.Lock():
exec('self.now = self.'+name)
return self.now.value
if self.model == 'Array':
exec('self.now = self.'+name)
return self.now
def edit(self, name, model, changeAmt):
self.changeAmt = changeAmt
if model != 'Array':
with multiprocessing.Lock():
exec('self.'+name+'.value = self.changeAmt')
if model == 'Array':
for i in range(len(changeAmt)):
exec('self.'+name+'.append(changeAmt[i])')
def addValue(self, name, value):
self.now = value
exec('self.'+name+' = self.now')
Which can then be used correctly with these lines of code in the main function:
ult = data()
p1 = Process(target = try2, args=(ult,))
p1.start()
p2 = Process(target = nextTry, args=(ult,))
p2.start()
p1.join()
p2.join()
When I go to use the addValue function in my code, the first process works great and can return the value when the get function is called. However, when I go to call the get function for the second process it fails. One of my ideas on a solution is to use the multiprocessing Queue method and have both processes add the value. Although, I have a feeling even if both functions have added the value it will not change if one of them changes the value. Is what I am trying to accomplish do-able, or should I just try to initialize all of my variables from the start?
I found that the only way to do this is to use a multiprocessing dictionary, I have posted the working example of an initializer and creator below.
def __init__(self):
Manager = multiprocessing.Manager()
self.allDict = Manager.dict()
self.allDict['status'] = 'nothing'
self.allDict['done'] = False
self.allDict['changeAmt'] = 0
self.allDict['command'] = 'start sequence'
self.allDict['get_best'] = False
then if you wanted to add a regular value:
def addValue(self, name, value):
self.allDict[name] = value
This honestly was probably one of the original reasons for the manager dictionary. Anyways, I hope if anyone needs this information and stumbles across this post it helps.
Related
how can i just initialize or is even possible to just initialize just on of these objects?
startsimulation = {}
startsimulation['obj'] = StartSimulation(client_socket)
startsimulation['threadSimulation'] = Thread(target=startsimulation['obj'].start_simulation, daemon=True)
startreading = {}
startreading['obj'] = StartReading(client_socket)
startreading['threadReading'] = Thread(target=startreading['obj'].start_reading, daemon=True)
because then in my code with the two initializations i get things like this ps:it's not wrong but it's not efficient
startsimulation['obj'].client = client_socket
startsimulation['obj'].send_handler_connect()
startsimulation['obj'].is_connected = True
startreading['obj'].client = client_socket
startreading['obj'].send_handler_connect()
startreading['obj'].is_connected = True
I would suggest using a class, like that:
class Wrapper():
def __init__(self, obj):
self.obj = obj
self.thread = Thread(target=obj.start_reading, daemon=True)
def send_handler_connect(self):
self.obj.send_handler_connect()
obj1 = Wrapper(StartSimulation(client_socket))
obj2 = Wrapper(StartReading(client_socket))
I assume you might expect something like
a = b = 5
But you can not apply such assignment chaining into your application, as both startsimulation and startreading clearly referring to different instances, so assigning threads to them must be done with separate Thread calls.
But later on, you can simplify it to:
startreading['obj'].is_connected = startsimulation['obj'].is_connected = True
Or, if your aim is not to be explicit but brevity, you might handle this with function.
Assume I have two classes that use threads
class foo(threading.Thread):
def __init__(self):
threading.Thread.__init__(self,name="foo=>bar")
self.var1 = {}
def run(self):
while True
value, name = getvalue() // name is an string
self.var1[name] = value
bar(self)
class bar(threading.Thread):
def __init__(self,fooInstance):
threading.Thread.__init__(self,name="bar")
def run(self):
while True
arg = myfunction() // somefunction (not shown for simplicity)
val = myOtherfunction(fooInstance.var1[arg]) //other function
print(val)
f = foo()
f.start()
The variable var1 in foo will change over time and bar needs to be aware of these changes. It makes sense to me, but I wonder if there is something fundamental here that could fail eventually. is this correct in python?
The actual sharing part is the same question as "how do I share a value with another object?" without threads, and all the same solutions will work.
For example. you're already passing the foo instance into the bar initializer, so just get it from there:
class bar(threading.Thread):
def __init__(self,fooInstance):
threading.Thread.__init__(self,name="bar")
self.var1 = fooInstance.var1
But is this thread-safe?
Well, yes, but only because you never actually start the background thread. But I assume in your real code, you're going to have two threads running at the same time, both accessing that var1 value. In which case it's not thread-safe without some kind of synchronization. For example:
class foo(threading.Thread):
def __init__(self):
threading.Thread.__init__(self,name="foo=>bar")
self.var1 = {}
self.var1lock = threading.Lock()
class bar(threading.Thread):
def __init__(self,fooInstance):
threading.Thread.__init__(self,name="bar")
self.var1 = fooInstance.var1
self.var1lock = fooInstance.var1lock
And now, instead of this:
self.var1[name] = value
… you do this:
with self.var1lock:
self.var1[name] = value
And likewise, instead of this:
val = myOtherfunction(fooInstance.var1[arg]) //other function
… you do this:
with self.var1lock:
var1arg = var1[arg]
val = myOtherfunction(var1arg)
Or… as it turns out, in CPython, updating a value for a single key in a dict (only a builtin dict, not a subclass or custom mapping class!) has always been atomic, and probably always will be. If you want to rely on that fact, you can. But I'd only do that if the lock turned out to be a significant performance issue. And I'd comment every use of it to make it clear, too.
If you'd rather pass values instead of share them, the usual answer is queue.Queue or one of its relatives.
But this requires a redesign of your program. For example, maybe you want to pass each new/changed key-value pair over the queue. That would go something like this:
class foo(threading.Thread):
def __init__(self):
threading.Thread.__init__(self,name="foo=>bar")
self.var1 = {}
self.q = queue.Queue()
def run(self):
b = bar(self)
b.start()
while True:
value, name = getvalue() // name is an string
self.var1[name] = value
self.q.put((name, value))
class bar(threading.Thread):
def __init__(self,fooInstance):
threading.Thread.__init__(self,name="bar")
self.var1 = copy.deepcopy(fooInstance.var1)
self.q = fooInstance.q
def _checkq(self):
while True:
try:
key, val = self.q.get_nowait()
except queue.Empty:
break
else:
self.var1[key] = val
def run(self):
while True:
self._checkq()
arg = myfunction() // somefunction (not shown for simplicity)
val = myOtherfunction(fooInstance.var1[arg]) //other function
print(val)
I am trying to change my code to a more object oriented format. In doing so I am lost on how to 'visualize' what is happening with multiprocessing and how to solve it. On the one hand, the class should track changes to local variables across functions, but on the other I believe multiprocessing creates a copy of the code which the original instance would not have access to. I need to figure out a way to manipulate classes, within a class, using multiprocessing, and have the parent class retain all manipulated values in the nested classes.
A simple version (OLD CODE):
function runMultProc():
...
dictReports = {}
listReports = ['reportName1.txt', 'reportName2.txt']
tasks = []
pool = multiprocessing.Pool()
for report in listReports:
if report not in dictReports:
dictReports[today][report] = {}
tasks.append(pool.apply_async(worker, args=([report, dictReports[today][report]])))
else:
continue
for task in tasks:
report, currentReportDict = task.get()
dictReports[report] = currentFileDict
function worker(report, currentReportDict):
<Manipulate_reports_dict>
return report, currentReportDict
NEW CODE:
class Transfer():
def __init__(self):
self.masterReportDictionary[<todays_date>] = [reportObj1, reportObj2]
def processReports(self):
self.pool = multiprocessing.Pool()
self.pool.map(processWorker, self.masterReportDictionary[<todays_date>])
self.pool.close()
self.pool.join()
def processWorker(self, report):
# **process and manipulate report, currently no return**
report.name = 'foo'
report.path = '/path/to/report'
class Report():
def init(self):
self.name = ''
self.path = ''
self.errors = {}
self.runTime = ''
self.timeProcessed = ''
self.hashes = {}
self.attempts = 0
I don't think this code does what I need it to do, which is to have it process the list of reports in parallel AND, as processWorker manipulates each report class object, store those results. As I am fairly new to this I was hoping someone could help.
The big difference between the two is that the first one build a dictionary and returned it. The second model shouldn't really be returning anything, I just need for the classes to finish being processed and they should have relevant information within them.
Thanks!
I have a class which is pulling JSON data with keys, but the problem is that per instance of this class, the JSON data may not have keys for everything I am trying to grab. Currently, my class is set up like this:
class Show():
def __init__(self, data):
self.data = data
self.status = self.data['status']
self.rating = self.data['rating']
self.genres = self.data['genres']
self.weight = self.data['weight']
self.updated = self.data['updated']
self.name = self.data['name']
self.language = self.data['language']
self.schedule = self.data['schedule']
self.url = self.data['url']
self.image = self.data['image']
And so on, there are more parameters than that. I'm trying to avoid the messiness of having a try-except block for EACH AND EVERY one of those (27) lines. Is there a better way? Ultimately, I want a parameter to be assigned None if the JSON key doesn't exist.
If you're going to set a default value to the attribute if it's not in the data dictionary, use data.get('key') rather than data['key']. The get method will return None if the key does not exist, rather than raising a KeyError exception. If you want a different default value than None, you can pass a second argument to get and that is what will be returned.
So, your code could become:
class Show():
def __init__(self, data):
self.data = data
self.status = self.data.get('status')
self.rating = self.data.get('rating')
self.genres = self.data.get('genres')
self.weight = self.data.get('weight')
self.updated = self.data.get('updated')
self.name = self.data.get('name')
self.language = self.data.get('language')
self.schedule = self.data.get('schedule')
self.url = self.data.get('url')
self.image = self.data.get('image')
Use dict.get, which provides a default value instead of raising an exception for missing keys.
For example, you can change this:
self.status = self.data['status']
into this:
self.status = self.data.get('status')
You could change your code to something like:
class Show():
def __init__(self, data):
self.data = data
self.__dict__.update(data)
data = {'status': True, 'ratings': [1,2,3], 'foo': "blahblah"}
aShow = Show(data)
"""
>>> aShow.status
True
>>> aShow.ratings
[1,2,3]
>>> aShow.something_not_in_dict
AttributeError: Show instance has no attribute 'something_not_in_dict'
"""
Which does exactly the same, and trying to access something from your Show instance that isn't a key in your data dictionary would raise an AttributeError
So the situation is that I have multiple methods, which might be threaded simaltenously, but all need their own lock
against being re-threaded until they have run. They are established by initialising a class with some dataprocessing options:
class InfrequentDataDaemon(object): pass
class FrequentDataDaemon(object): pass
def addMethod(name):
def wrapper(f):
setattr(processor, f.__name__, staticmethod(f))
return f
return wrapper
class DataProcessors(object):
lock = threading.Lock()
def __init__(self, options):
self.common_settings = options['common_settings']
self.data_processing_configurations = options['data_processing_configurations'] #Configs for each processing method
self.data_processing_types = options['data_processing_types']
self.Data_Processsing_Functions ={}
#I __init__ each processing method as a seperate function so that it can be locked
for type in options['data_processing_types']:
def bindFunction1(name):
def func1(self, data=None, lock=None):
config = self.data_processing_configurations[data['type']] #I get the right config for the datatype
with lock:
FetchDataBaseStuff(data['type'])
#I don't want this to be run more than once at a time per DataProcessing Type
# But it's fine if multiple DoSomethings run at once, as long as each DataType is different!
DoSomething(data, config)
WriteToDataBase(data['type'])
func1.__name__ = "Processing_for_{}".format(type)
self.Data_Processing_Functions[func1.__name__] = func1 #Add this function to the Dictinary object
bindFunction1(type)
#Then I add some methods to a daemon that are going to check if our Dataprocessors need to be called
def fast_process_types(data):
if not example_condition is True: return
if not data['type'] in self.data_processing_types: return #Check that we are doing something with this type of data
threading.Thread(target=self.Data_Processing_Functions["Processing_for_{}".format(data['type'])], args=(self,data, lock)).start()
def slow_process_types(data):
if not some_other_condition is True: return
if not data['type'] in self.data_processing_types: return #Check that we are doing something with this type of data
threading.Thread(target=self.Data_Processing_Functions["Processing_for_{}".format(data['type'])], args=(self,data, lock)).start()
addMethod(InfrequentDataDaemon)(slow_process_types)
addMethod(FrequentDataDaemon)(fast_process_types)
The idea is to lock each method in
DataProcessors.Data_Processing_Functions - so that each method is only accessed by one thread at a time (and the rest of the threads for the same method are queued). How does Locking need to be set up to achieve this effect?
I'm not sure I completely follow what you're trying to do here, but could you just create a separate threading.Lock object for each type?
class DataProcessors(object):
def __init__(self, options):
self.common_settings = options['common_settings']
self.data_processing_configurations = options['data_processing_configurations'] #Configs for each processing method
self.data_processing_types = options['data_processing_types']
self.Data_Processsing_Functions ={}
self.locks = {}
#I __init__ each processing method as a seperate function so that it can be locked
for type in options['data_processing_types']:
self.locks[type] = threading.Lock()
def bindFunction1(name):
def func1(self, data=None):
config = self.data_processing_configurations[data['type']] #I get the right config for the datatype
with self.locks[data['type']]:
FetchDataBaseStuff(data['type'])
DoSomething(data, config)
WriteToDataBase(data['type'])
func1.__name__ = "Processing_for_{}".format(type)
self.Data_Processing_Functions[func1.__name__] = func1 #Add this function to the Dictinary object
bindFunction1(type)
#Then I add some methods to a daemon that are going to check if our Dataprocessors need to be called
def fast_process_types(data):
if not example_condition is True: return
if not data['type'] in self.data_processing_types: return #Check that we are doing something with this type of data
threading.Thread(target=self.Data_Processing_Functions["Processing_for_{}".format(data['type'])], args=(self,data)).start()
def slow_process_types(data):
if not some_other_condition is True: return
if not data['type'] in self.data_processing_types: return #Check that we are doing something with this type of data
threading.Thread(target=self.Data_Processing_Functions["Processing_for_{}".format(data['type'])], args=(self,data)).start()
addMethod(InfrequentDataDaemon)(slow_process_types)
addMethod(FrequentDataDaemon)(fast_process_types)