Why is my shared integer variable only 1 byte? - python

I'm pretty new to Python and I'm very new to multiprocessing.
I'm using the multiprocessing library in Python 3.4, and my shared counter variable is maxing out at 127, and then flipping around to -128 leading me to believe that it's only 1 byte in length. Why is it not the size of a normal int?
Here's an example of my code:
import multiprocessing
from multiprocessing import Value
def process(arg1, arg2, counter, checker):
global attempts
attempts = counter
global succeeded
succeeded = checker
while not succeeded.value:
if <expression>:
succeeded.value = True
break
elif <expression>:
break
else:
<code that does stuff>
with attempts.get_lock():
attempts.value += 1
print(attempts.value)
NUMOFPROCESSES = 5
attempts = Value('i', 0, lock=True)
succeeded = Value('b', False)
if __name__ == '__main__':
jobs = []
for i in range(NUMOFPROCESSES):
p = multiprocessing.Process(target=process, args=(arg1, arg2, attempts, succeeded))
jobs.append(p)
p.start()
Output would look something like this:
1
2
3
.
.
.
125
126
127
-128
-127
-126
-125

Related

ProcessPoolExecutor suddenly stops at 255th process

I'm trying to use concurrent.futures and to increment the process number which is allocated in the shared_memory. (How to properly close and unlink shared memory of multiprocessing?)
But, the incrementation suddenly stops at state = 255 in the following code. I want to increment the number up to about 500 for my own reason.
Can someone solve the problem?
from multiprocessing import shared_memory, Lock
from concurrent.futures import ProcessPoolExecutor as Executor, as_completed
import time, random
lock = Lock()
def counter():
existing_shm = shared_memory.SharedMemory(name = 'shm')
c = existing_shm.buf
with lock:
old_state = c[0]
time.sleep(random.random()/10)
new_state = old_state + 1
c[0] = new_state
print(new_state)
existing_shm.close()
if __name__=='__main__':
with Executor(12) as p:
shm = shared_memory.SharedMemory(create = True, size = 1, name= 'shm')
buffer = shm.buf
buffer[0] = 0
futures = [p.submit(counter) for i in range(500)]
for future in as_completed(futures):
pass
shm.close()
shm.unlink()
The output is follows.
1
2
3
*snip*
253
254
255
Your shared memory is only 1 byte in size. Once your worker function, counter, attempts to store a value greater than 255, the maximum value a single byte can hold, counter will raise the following exception:
ValueError: memoryview: invalid value for format 'B'
Unfortunately, you had no code to catch this and so this exception went undetected. Had you called future.result(), that would have been another story.
As I recommended in an earlier post of yours, you could use a multiprocessing.Value instance, which also uses shared memory. For example:
from multiprocessing import Value
from concurrent.futures import ProcessPoolExecutor as Executor
import time, random
def init_pool_processes(value):
global v
v = value
def counter():
with v.get_lock():
old_state = v.value
time.sleep(random.random()/10)
new_state = old_state + 1
v.value = new_state
print(new_state)
if __name__=='__main__':
v = Value('i', 0, lock=True)
with Executor(12, initializer=init_pool_processes, initargs=(v,)) as p:
for _ in range(500):
p.submit(counter)
# There is an implicit call here to p.shutdown(True), which
# will wait for all tasks to complete

Problems with Pythons Multiprocessing Process Class

I am currently trying to get into Python.
To explain the code below, you can see a program to compare two strategies in Roulette with many runs.
The color doubling strategy without knowing what hits were before the start.
The color doubling strategy with knowing that red got hit 10 times before, so I start with start value times 2^10
The "Player" class inherits both strategies. The global "globalBal_1" and "globalBal_2" variables count the profit for each strategy.
But the algorithm shall not be the problem. The main problem is that when I run the calculating function "run" with a normal call, it delivers me results. The multiprocessing processes for some reason do not change the global "globalBal_1" and "globalBal_2" variables and thus don't deliver results. Rather they do have value "0" as I have declared them initially.
What am I doing wrong there? I'm fairly new into multiprocessing and Python itself.
Edit:
Expected values for "globalBal_1" and"globalBal_2" are about half of "rounds", so in this case should be "500.000" (per process it is 500.000 / amount of processes).
But the actual results for the multiprocessing runs are "0".
Code:
from numpy.random import randint
import time
from multiprocessing import Process
threads = 4
rounds = int(1000000 / threads)
globalBal_1 = 0
globalBal_2 = 0
class Player:
def __init__(self):
self.balance_1 = 0
self.balance_2 = 0
def strat_1(self, sequence):
counter = 0
for i in range(len(sequence) - 1):
if sequence[i]:
counter += 1
self.balance_1 += counter
def strat_2(self, sequence):
for i in range(len(sequence) - 1 - 1):
if sequence[i] == 1:
return
if sequence[len(sequence) - 1]:
self.balance_2 += 2 ** (len(sequence) - 0)
def getBal_1(self):
return self.balance_1
def getBal_2(self):
return self.balance_2
def run(count):
p1 = Player()
print("Inside run func")
global globalBal_1, globalBal_2
for i in range(count):
rolls = randint(0, 2, 10)
p1.strat_1(rolls)
p1.strat_2(rolls)
globalBal_1 += p1.getBal_1()
globalBal_2 += p1.getBal_2()
print("Finished run func")
if __name__ == '__main__':
start = time.time()
procs = [Process(target=run, args=(rounds,)) for t in range(threads)]
for p in procs:
p.start()
for p in procs:
p.join()
tempEnd = time.time()
print("Multiprocessing result:")
print(globalBal_1, globalBal_2, tempEnd - start)
print("\nSingle process:")
run(rounds)
end = time.time()
print(globalBal_1, globalBal_2, end - start)
Solution thanks to #mirmo and #Bing Wang:
def runMulti(count, result1, result2):
p1 = Player()
for i in range(count):
rolls = randint(0, 2, 10)
p1.strat_1(rolls)
p1.strat_2(rolls)
result1.value += p1.getBal_1()
result2.value += p1.getBal_2()
[...]
profit1 = Value('i', 0)
profit2 = Value('i', 0)
procs = [Process(target=runMulti, args=(rounds, profit1, profit2)) for t in range(threads)]
Please always include the actual and expected output.
The global variables are not updated simply because there are now 4 separate processes created (hence the name multiprocessing) which have no access to the global variables you have created, or more specifically, do not have access to the global variables of the parent process.
Either return the value for each process and add them up at the end, create a queue or as mentioned use a shared object.

Fastest way to share big object between different Process of python

Supose I have 3 different Process that do different logic in a forever loop. I want to run all of them in parallel and while each Process can access a shared_object, which is a heavy object of a class. So I tried using multiprocessing with a manger to archive it like this:
import multiprocessing
import inspect
from multiprocessing.managers import BaseManager, NamespaceProxy
import time
import random
class SharedObject():
def __init__(self):
self.a = 1
def show_a(self):
print(self.a)
class ProcessManager(BaseManager):
pass
class ProxyBase(NamespaceProxy):
_exposed_ = ('__getattribute__', '__setattr__', '__delattr__')
class ManagerProxy(ProxyBase):
pass
def register_proxy(name, cls, proxy):
for attr in dir(cls):
if callable(getattr(cls, attr)) and not attr.startswith("__"):
proxy._exposed_ += (attr,)
setattr(proxy, attr,
lambda s: object.__getattribute__(s, '_callmethod')(attr))
ProcessManager.register(name, cls, proxy)
register_proxy('shared_object', SharedObject, ManagerProxy)
process_manager = ProcessManager()
process_manager.start()
shared_object = process_manager.shared_object()
def process_1():
while True:
print('Process 1 see {}'.format(shared_object.a))
shared_object.a = 1
time.sleep(1)
def process_2():
while True:
print('Process 2 see {}'.format(shared_object.a))
shared_object.a = 2
time.sleep(1)
def process_3():
while True:
print('Process 3 see {}'.format(shared_object.a))
shared_object.a = 3
if random.randint(0,1) == 1:
shared_object.show_a()
time.sleep(1)
first_process = multiprocessing.Process(name="First process", target=process_1)
first_process.start()
second_process = multiprocessing.Process(name="Second process", target=process_2)
second_process.start()
third_process = multiprocessing.Process(name="Third process", target=process_3)
third_process.start()
shared_object.show_a()
while True:
time.sleep(10)
It works but too slow for me since I have to pass around big numpy array. Are there any other ways to make this faster (real-time speed)? Thanks a lot
It looks like it's the problem solved by multiprocessing.shared_memory, but a) it looks like it's only python 3.8+ and b) the code would need to be restructured, at the very least:
assigning the right size
passing the name of the shared object to the processes
and remembering to close it at the end
EDIT:
Since I couldn't get it to work with python 3.7, I decided to use it with the shared memory primitives in 3.5+, Array (and Value, it could be what you need). The following code runs happily:
import time
import random
from multiprocessing import Process, Array
s1 = Array('i', [1])
def process_1():
while True:
print('Process 1 see {}'.format(s1[0]))
s1[0] = 1
time.sleep(1)
def process_2():
while True:
print('Process 2 see {}'.format(s1[0]))
s1[0] = 2
time.sleep(1)
def process_3():
while True:
print('Process 3 see {}'.format(s1[0]))
s1[0] = 3
if random.randint(0,1) == 1:
print(s1[0])
time.sleep(1)
first_process = Process(name="First process", target=process_1)
first_process.start()
second_process = Process(name="Second process", target=process_2)
second_process.start()
third_process = Process(name="Third process", target=process_3)
third_process.start()
while True:
time.sleep(10)
Getting
Process 1 see 1
Process 2 see 1
Process 3 see 1
Process 1 see 3
Process 2 see 1
Process 3 see 2
3
Process 1 see 3
Process 2 see 1
Process 3 see 2
3
[...]
I would still pass the array to the processes, something like:
def process_1(shared):
...
and then
Process(name="First process", args=(s1), target=process_1)
to make it clearer what each process is working on, though.
Also, since I've not tried it with BIG objects, I am not really sure how it would fare...

How to share state when using concurrent futures

I am aware using the traditional multiprocessing library I can declare a value and share the state between processes.
https://docs.python.org/3/library/multiprocessing.html?highlight=multiprocessing#sharing-state-between-processes
When using the newer concurrent.futures library how can I share state between my processes?
import concurrent.futures
def get_user_object(batch):
# do some work
counter = counter + 1
print(counter)
def do_multithreading(batches):
with concurrent.futures.ThreadPoolExecutor(max_workers=25) as executor:
threadingResult = executor.map(get_user_object, batches)
def run():
data_pools = get_data()
start = time.time()
with concurrent.futures.ProcessPoolExecutor(max_workers=PROCESSES) as executor:
processResult = executor.map(do_multithreading, data_pools)
end = time.time()
print("TIME TAKEN:", end - start)
if __name__ == '__main__':
run()
I want to keep a synchronized value of this counter.
In the previous library I might have used multiprocessing.Value and a Lock.
You can pass an initializer and initargs to ProcessPoolExecutor just as you would to multiprocessing.Pool. Here's an example:
import concurrent.futures
import multiprocessing as mp
def get_user_object(batch):
with _COUNTER.get_lock():
_COUNTER.value += 1
print(_COUNTER.value, end=' ')
def init_globals(counter):
global _COUNTER
_COUNTER = counter
def main():
counter = mp.Value('i', 0)
with concurrent.futures.ProcessPoolExecutor(
initializer=init_globals, initargs=(counter,)
) as executor:
for _ in executor.map(get_user_object, range(10)):
pass
print()
if __name__ == "__main__":
import sys
sys.exit(main())
Use:
$ python3 glob_counter.py
1 2 4 3 5 6 7 8 10 9
Where:
for _ in executor.map(get_user_object, range(10)): lets you iterate over each result. In this case, get_user_object() returns None, so you don't really have anything to process; you just pass and take no further action.
The last print() call gives you an extra newline, because the original print() call does not use a newline (end=' '')

Can this loop be sped up in pure Python?

I was trying out an experiment with Python, trying to find out how many times it could add one to an integer in one minute's time. Assuming two computers are the same except for the speed of the CPUs, this should give an estimate of how fast some CPU operations may take for the computer in question.
The code below is an example of a test designed to fulfill the requirements given above. This version is about 20% faster than the first attempt and 150% faster than the third attempt. Can anyone make any suggestions as to how to get the most additions in a minute's time span? Higher numbers are desireable.
EDIT 1: This experiment is being written in Python 3.1 and is 15% faster than the fourth speed-up attempt.
def start(seconds):
import time, _thread
def stop(seconds, signal):
time.sleep(seconds)
signal.pop()
total, signal = 0, [None]
_thread.start_new_thread(stop, (seconds, signal))
while signal:
total += 1
return total
if __name__ == '__main__':
print('Testing the CPU speed ...')
print('Relative speed:', start(60))
EDIT 2: Regarding using True instead of 1 in the while loop: there should be no speed difference. The following experiment proves that they are the same. First, create a file named main.py and copy the following code into it.
def test1():
total = 0
while 1:
total += 1
def test2():
total = 0
while True:
total += 1
if __name__ == '__main__':
import dis, main
dis.dis(main)
Running the code should produce the following output that shows how the code was actually compiled and what the generated Python Virtual Machine Instructions turned out to be.
Disassembly of test1:
2 0 LOAD_CONST 1 (0)
3 STORE_FAST 0 (total)
3 6 SETUP_LOOP 13 (to 22)
4 >> 9 LOAD_FAST 0 (total)
12 LOAD_CONST 2 (1)
15 INPLACE_ADD
16 STORE_FAST 0 (total)
19 JUMP_ABSOLUTE 9
>> 22 LOAD_CONST 0 (None)
25 RETURN_VALUE
Disassembly of test2:
7 0 LOAD_CONST 1 (0)
3 STORE_FAST 0 (total)
8 6 SETUP_LOOP 13 (to 22)
9 >> 9 LOAD_FAST 0 (total)
12 LOAD_CONST 2 (1)
15 INPLACE_ADD
16 STORE_FAST 0 (total)
19 JUMP_ABSOLUTE 9
>> 22 LOAD_CONST 0 (None)
25 RETURN_VALUE
The emitted PVMIs (byte codes) are exactly the same, so both loops should run without any difference in speed.
I see almost the same but consistently better (~2%) results than the #Amber's one on my machine on Python 3.1.2 for the code:
import signal
class Alarm(Exception):
pass
def alarm_handler(signum, frame):
raise Alarm
def jfs_signal(seconds):
# set signal handler
signal.signal(signal.SIGALRM, alarm_handler)
# raise Alarm in `seconds` seconds
signal.alarm(seconds)
total = 0
try:
while 1:
total += 1
finally:
signal.alarm(0) # disable the alarm
return total
Here's variant that uses subprocess module to run interruptible loop:
#!/usr/bin/env python
# save it as `skytower.py` file
import atexit
import os
import signal
import subprocess
import sys
import tempfile
import time
def loop():
#atexit.register
def print_total():
print(total)
total = 0
while 1:
total += 1
def jfs_subprocess(seconds):
# start process, redirect stdout/stderr
f = tempfile.TemporaryFile()
p = subprocess.Popen([sys.executable, "-c",
"from skytower import loop; loop()"],
stdout=f, stderr=open(os.devnull, 'wb'))
# wait
time.sleep(seconds)
# raise KeyboardInterrupt
#NOTE: if it doesn't kill the process then `p.wait()` blocks forever
p.send_signal(signal.SIGINT)
p.wait() # wait for the process to terminate otherwise the output
# might be garbled
# return saved output
f.seek(0) # rewind to the beginning of the file
d = int(f.read())
f.close()
return d
if __name__ == '__main__':
print('total:', jfs_subprocess(60))
It is ~20% slower than the signal.alarm()'s variant on my machine.
About a 20-25% improvement, FWIW - but like others, I'd propose that Python incrementing integers probably isn't the best benchmarking tool.
def start(seconds):
import time, _thread
def stop(seconds):
time.sleep(seconds)
_thread.interrupt_main()
total = 0
_thread.start_new_thread(stop, (seconds,))
try:
while True:
total += 1
except:
return total
if __name__ == '__main__':
print('Testing the CPU speed ...')
print('Relative speed:', start(60))
This exercise on learning more about Python and computers was satisfying. This is the final program:
def start(seconds, total=0):
import _thread, time
def stop():
time.sleep(seconds)
_thread.interrupt_main()
_thread.start_new_thread(stop, ())
try:
while True:
total += 1
except KeyboardInterrupt:
return total
if __name__ == '__main__':
print('Testing the CPU speed ...')
print('Relative speed:', start(60))
Running it on Windows 7 Professional with a 2.16 GHz CPU produced the following output within IDLE:
Python 3.1.3 (r313:86834, Nov 27 2010, 18:30:53) [MSC v.1500 32 bit (Intel)]
on win32
Type "copyright", "credits" or "license()" for more information.
>>> ================================ RESTART ================================
>>>
Testing the CPU speed ...
Relative speed: 673991388
>>>
Edit: The code up above only runs on one core. The following program was written to fix that problem.
#! /usr/bin/env python3
def main(seconds):
from multiprocessing import cpu_count, Barrier, SimpleQueue, Process
def get_all(queue):
while not queue.empty():
yield queue.get()
args = seconds, Barrier(cpu_count()), SimpleQueue()
processes = [Process(target=run, args=args) for _ in range(cpu_count())]
for p in processes:
p.start()
for p in processes:
p.join()
print('Relative speed:', sorted(get_all(args[-1]), reverse=True))
def run(seconds, barrier, queue):
from time import sleep
from _thread import interrupt_main, start_new_thread
def terminate():
sleep(seconds)
interrupt_main()
total = 0
barrier.wait()
start_new_thread(terminate, ())
try:
while True:
total += 1
except KeyboardInterrupt:
queue.put(total)
if __name__ == '__main__':
main(60)

Categories

Resources