Communicating and comparing between objects in python-multiprocessing? - python

I am trying to find a way to compare between different objects (inherited from Thread class) in a way that keep parallilsm (real-time processing).
Every worker has three fields (message, count, n ). I am updating Count everytime. Let's say that I have three threads workers. I need to compare in my server based on the field count, how can I do access and compare between Worker.count of every worker, in a way that I keep parallelism
from Queue import Queue
from threading import Thread
import time
class Worker(Thread):
def __init__(self, message, n):
Thread.__init__(self)
self.message = message
self.count= 0
self.n = n
def run(self):
while True:
print(self.message)
self.count+=1
time.sleep(self.n)
class Comparator(Thread):
def __init__(self, message, n):
Thread.__init__(self)
self.message = message
self.n = n
def run(self):
while True:
max= max([x.count for x in threads]) # how can I access to other threads
print "max", max
time.sleep(self.n)
thread1 = Worker("Test-1", 1)
thread2 = Worker("Test-2", 3)
s = Comparator("Test-3", 2)
s.start()
s.join()
threads = [thread1, thread2]
for g in threads:
g.start()
for worker in threads:
# wait for workers
worker.join()
NOTE Using shared object here is not a good solution for me, using Queue() for example is not what I want, I need to do comparision based on updated field in the object that I update on the go (for simplicity, I use max() ).

you can pass the threads list to Comparator __init__() method :
[...]
class Comparator(Thread):
def __init__(self, message, n, threads):
Thread.__init__(self)
self.threads = threads
def run(self):
while True:
max= max([x.count for x in self.threads])
print("max", max)
time.sleep(self.n)
[...]
threads = [thread1, thread2]
s = Comparator("Test-3", 2, threads)

Related

Queues or Dictionaries for shared ressources in threads

I have two threads with while loops in them. The first process data that the second needs to elaborate in parallel. I need to share a variable.
let's introduce dummy input:
data = iter([1,2,3,4,5,6,7,8,9])
My first class of Thread:
import threading
from queue import Queue
import time
class Thread1(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
_download = {}
def run(self):
i = 0
while True:
_download[i] = next(data)
self.queue.put(next(data))
time.sleep(1)
i += 1
My second class of Thread:
class Thread2(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
self.queue.get()
time.sleep(3)
with the main method:
q = Queue(maxsize=10)
t = Thread1(q)
s = Thread2(q)
t.start()
s.start()
I illustratedthe two alternatives for the case. I can access queue variable from the second Thread but I also want that the second Thread access the dictionary.
what can I do to access also the dictionary from Thread2?
for which choice should I opt?

Python Queues and ThreadPoolExecutor

My question is around Queues and using ThreadPoolExecutor. If I understand the Python docs for Queues I can have code somewhat like this and not have to worry about needing another lock in Class B to control which thread is adding in items in to the queue? Since the Queue implments multiproducer, multiconsumer
class A:
def __init__(max_worker = 1):
pool = ThreadPoolExecutor(max_worker)
buffer = {}
_lock = threading.RLock()
def add_record_id(id, item):
with self._lock:
buffer[id].add(item, pool)
class B:
def __init__():
q = queue.Queue()
def add(item, pool):
if id >= 0:
q.put(item)
pool.submit(background_remover)

MultiThreading with Python

This is a Producer Consumer Problem. I need a single producer and multiple consumers to access the shared data cell and each consumer needs to access the produced data before the producer makes additional data. The code works fine when there is a single consumer. I have attempted to make a list of the Producer and Consumers in order to .join() and .start() them. The program works so far as the first consumer, but hangs up when it gets to the second consumer. I have tried to change the locking mechanisms from "notify" to "notifyAll" in the getData and setData, I am a beginner in python and this stuff is pretty foreign to me but I have been trying stuff for 10 hours and would really appreciate some help.
import time, random
from threading import Thread, currentThread, Condition
class SharedCell(object):
def __init__(self):
self.data = -1
self.writeable = True
self.condition = Condition()
def setData(self, data):
self.condition.acquire()
while not self.writeable:
self.condition.wait()
print("%s setting data to %d" % \
(currentThread().getName(), data))
self.data = data
self.writeable = False
self.condition.notifyAll()
self.condition.release()
def getData(self):
self.condition.acquire()
while self.writeable:
self.condition.wait()
print(f'accessing data {currentThread().getName()} {self.data}')
self.writeable = True
self.condition.notifyAll()
self.condition.release()
return self.data
class Producer(Thread):
def __init__(self, cell, accessCount, sleepMax):
Thread.__init__(self, name = "Producer")
self.accessCount = accessCount
self.cell = cell
self.sleepMax = sleepMax
def run(self):
print("%s starting up" % self.getName())
for count in range(self.accessCount):
time.sleep(random.randint(1, self.sleepMax))
self.cell.setData(count + 1)
print("%s is done producing\n" % self.getName())
class Consumer(Thread):
def __init__(self, cell, accessCount, sleepMax):
Thread.__init__(self)
self.accessCount = accessCount
self.cell = cell
self.sleepMax = sleepMax
def run(self):
print("%s starting up" % self.getName())
for count in range(self.accessCount):
time.sleep(random.randint(1, self.sleepMax))
value = self.cell.getData()
print("%s is done consuming\n" % self.getName())
def main():
accessCount = int(input("Enter the number of accesses: "))
sleepMax = 4
cell = SharedCell()
producer = Producer(cell, accessCount, sleepMax)
consumer = Consumer(cell, accessCount, sleepMax)
consumerTwo = Consumer(cell, accessCount, sleepMax)
threads = []
threads.append(producer)
threads.append(consumer)
threads.append(consumerTwo)
print("Starting the threads")
for thread in threads:
thread.start()
thread.join()
main()
The join function blocks the current thread and waits until the indicated thread terminates. In your loop at the end of your main function, why do you join each thread immediately after starting it? That would result in starting thread 1, and then waiting for it to terminate before starting thread 2, and then waiting that it to terminate before starting thread 3, and so on.
Perhaps you meant something like this:
for thread in threads:
thread.start()
for thread in threads:
thread.join()
so that every thread is started before you wait for them to terminate.

How to identify if Python Threads with Queue are done with task?

Here i have MazeRunner Class which put all elements of self.boxes in queue and run thread on them until all of the queue becomes empty q.empty() .
Here problem is how do i actually identify if my program is done performing threads on all elements which are in queue of self.boxes & return True.
It looks challenging because our threads are in while loop which keep changes based on self.boxes length & self.threads we defined.
i have tried putting all threads in list and t.join them all. But not luck. Any Help?
import threading,queue,time
class MazeRunner:
def __init__(self):
self.q = queue.Queue()
self.boxes = [1,2,3,4,5,6,7] ## `7` elements of list
self.threads = 5
for i in self.boxes:
self.q.put(i) ### ADDING Every element of list to queue
for j in range(self.threads): ### for i in range(5) threads
t = threading.Thread(target=self.ProcessQueue)
t.start() ### Started `5` threads on `7` elements
def ProcessQueue(self):
while not self.q.empty():
each_element = self.q.get()
self.SleepFunction(each_element)
self.q.task_done()
def SleepFunction(self,each_element):
print("STARTING : ",each_element)
time.sleep(10)
print("DONE : ",each_element)
lets_try = MazeRunner()
if lets_try == True:
print("All Threads Done on Elements")
You need to wait until all threads are done calling Thread.join:
HOWTO:
Replace your self.threads = 5 expression with class constant:
THREAD_NUM = 5
Put additional attribute threads (for a list of threads) into your __init__ method:
...
self.threads = []
Put each created thread into threads list:
for j in range(self.THREAD_NUM):
t = threading.Thread(target=self.ProcessQueue)
self.threads.append(t)
t.start()
Define method like check_completed to ensure all threads are terminated (done):
....
def check_completed(self):
for t in self.threads:
t.join()
return True
The way you need to check "all done":
m_runner = MazeRunner()
if m_runner.check_completed():
print("All Threads Done on Elements")

Delete Objects in a List as Passed to Multiprocessing

I need to pass each object in a large list to a function. After the function completes I no longer need the object passed to the function and would like to delete the object to save memory. If I were working with a single process I would do the following:
result = []
while len(mylist) > 0:
result.append(myfunc(mylist.pop())
As I loop over mylist I pop off each object in the list such that the object is no longer stored in mylist after it's passed to my function. How do I achieve this same effect in parallel using multiprocessing?
A simple consumer example (credits go here) :
import multiprocessing
import time
import random
class Consumer(multiprocessing.Process):
def __init__(self, task_queue, result_queue):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
def run(self):
while True:
task = self.task_queue.get()
if task is None:
# Poison pill means shutdown
self.task_queue.task_done()
break
answer = task.process()
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def process(self):
time.sleep(0.1) # pretend to take some time to do the work
return random.randint(0, 100)
if __name__ == '__main__':
# Establish communication queues
tasks = multiprocessing.JoinableQueue()
results = multiprocessing.Queue()
# Start consumers
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(tasks, results) for i in xrange(num_consumers)]
for consumer in consumers:
consumer.start()
# Enqueue jobs
num_jobs = 10
for _ in xrange(num_jobs):
tasks.put(Task())
# Add a poison pill for each consumer
for _ in xrange(num_consumers):
tasks.put(None)
# Wait for all tasks to finish
tasks.join()
# Start printing results
while num_jobs:
result = results.get()
print 'Result:', result
num_jobs -= 1

Categories

Resources