Can this loop be sped up in pure Python?

Can this loop be sped up in pure Python? - python

I was trying out an experiment with Python, trying to find out how many times it could add one to an integer in one minute's time. Assuming two computers are the same except for the speed of the CPUs, this should give an estimate of how fast some CPU operations may take for the computer in question.
The code below is an example of a test designed to fulfill the requirements given above. This version is about 20% faster than the first attempt and 150% faster than the third attempt. Can anyone make any suggestions as to how to get the most additions in a minute's time span? Higher numbers are desireable.
EDIT 1: This experiment is being written in Python 3.1 and is 15% faster than the fourth speed-up attempt.
def start(seconds):
import time, _thread
def stop(seconds, signal):
time.sleep(seconds)
signal.pop()
total, signal = 0, [None]
_thread.start_new_thread(stop, (seconds, signal))
while signal:
total += 1
return total
if __name__ == '__main__':
print('Testing the CPU speed ...')
print('Relative speed:', start(60))
EDIT 2: Regarding using True instead of 1 in the while loop: there should be no speed difference. The following experiment proves that they are the same. First, create a file named main.py and copy the following code into it.
def test1():
total = 0
while 1:
total += 1
def test2():
total = 0
while True:
total += 1
if __name__ == '__main__':
import dis, main
dis.dis(main)
Running the code should produce the following output that shows how the code was actually compiled and what the generated Python Virtual Machine Instructions turned out to be.
Disassembly of test1:
2 0 LOAD_CONST 1 (0)
3 STORE_FAST 0 (total)
3 6 SETUP_LOOP 13 (to 22)
4 >> 9 LOAD_FAST 0 (total)
12 LOAD_CONST 2 (1)
15 INPLACE_ADD
16 STORE_FAST 0 (total)
19 JUMP_ABSOLUTE 9
>> 22 LOAD_CONST 0 (None)
25 RETURN_VALUE
Disassembly of test2:
7 0 LOAD_CONST 1 (0)
3 STORE_FAST 0 (total)
8 6 SETUP_LOOP 13 (to 22)
9 >> 9 LOAD_FAST 0 (total)
12 LOAD_CONST 2 (1)
15 INPLACE_ADD
16 STORE_FAST 0 (total)
19 JUMP_ABSOLUTE 9
>> 22 LOAD_CONST 0 (None)
25 RETURN_VALUE
The emitted PVMIs (byte codes) are exactly the same, so both loops should run without any difference in speed.

I see almost the same but consistently better (~2%) results than the #Amber's one on my machine on Python 3.1.2 for the code:
import signal
class Alarm(Exception):
pass
def alarm_handler(signum, frame):
raise Alarm
def jfs_signal(seconds):
# set signal handler
signal.signal(signal.SIGALRM, alarm_handler)
# raise Alarm in `seconds` seconds
signal.alarm(seconds)
total = 0
try:
while 1:
total += 1
finally:
signal.alarm(0) # disable the alarm
return total
Here's variant that uses subprocess module to run interruptible loop:
#!/usr/bin/env python
# save it as `skytower.py` file
import atexit
import os
import signal
import subprocess
import sys
import tempfile
import time
def loop():
#atexit.register
def print_total():
print(total)
total = 0
while 1:
total += 1
def jfs_subprocess(seconds):
# start process, redirect stdout/stderr
f = tempfile.TemporaryFile()
p = subprocess.Popen([sys.executable, "-c",
"from skytower import loop; loop()"],
stdout=f, stderr=open(os.devnull, 'wb'))
# wait
time.sleep(seconds)
# raise KeyboardInterrupt
#NOTE: if it doesn't kill the process then `p.wait()` blocks forever
p.send_signal(signal.SIGINT)
p.wait() # wait for the process to terminate otherwise the output
# might be garbled
# return saved output
f.seek(0) # rewind to the beginning of the file
d = int(f.read())
f.close()
return d
if __name__ == '__main__':
print('total:', jfs_subprocess(60))
It is ~20% slower than the signal.alarm()'s variant on my machine.

About a 20-25% improvement, FWIW - but like others, I'd propose that Python incrementing integers probably isn't the best benchmarking tool.
def start(seconds):
import time, _thread
def stop(seconds):
time.sleep(seconds)
_thread.interrupt_main()
total = 0
_thread.start_new_thread(stop, (seconds,))
try:
while True:
total += 1
except:
return total
if __name__ == '__main__':
print('Testing the CPU speed ...')
print('Relative speed:', start(60))

This exercise on learning more about Python and computers was satisfying. This is the final program:
def start(seconds, total=0):
import _thread, time
def stop():
time.sleep(seconds)
_thread.interrupt_main()
_thread.start_new_thread(stop, ())
try:
while True:
total += 1
except KeyboardInterrupt:
return total
if __name__ == '__main__':
print('Testing the CPU speed ...')
print('Relative speed:', start(60))
Running it on Windows 7 Professional with a 2.16 GHz CPU produced the following output within IDLE:
Python 3.1.3 (r313:86834, Nov 27 2010, 18:30:53) [MSC v.1500 32 bit (Intel)]
on win32
Type "copyright", "credits" or "license()" for more information.
>>> ================================ RESTART ================================
>>>
Testing the CPU speed ...
Relative speed: 673991388
>>>
Edit: The code up above only runs on one core. The following program was written to fix that problem.
#! /usr/bin/env python3
def main(seconds):
from multiprocessing import cpu_count, Barrier, SimpleQueue, Process
def get_all(queue):
while not queue.empty():
yield queue.get()
args = seconds, Barrier(cpu_count()), SimpleQueue()
processes = [Process(target=run, args=args) for _ in range(cpu_count())]
for p in processes:
p.start()
for p in processes:
p.join()
print('Relative speed:', sorted(get_all(args[-1]), reverse=True))
def run(seconds, barrier, queue):
from time import sleep
from _thread import interrupt_main, start_new_thread
def terminate():
sleep(seconds)
interrupt_main()
total = 0
barrier.wait()
start_new_thread(terminate, ())
try:
while True:
total += 1
except KeyboardInterrupt:
queue.put(total)
if __name__ == '__main__':
main(60)

Related

Fastest way to share big object between different Process of python

Supose I have 3 different Process that do different logic in a forever loop. I want to run all of them in parallel and while each Process can access a shared_object, which is a heavy object of a class. So I tried using multiprocessing with a manger to archive it like this:
import multiprocessing
import inspect
from multiprocessing.managers import BaseManager, NamespaceProxy
import time
import random
class SharedObject():
def __init__(self):
self.a = 1
def show_a(self):
print(self.a)
class ProcessManager(BaseManager):
pass
class ProxyBase(NamespaceProxy):
_exposed_ = ('__getattribute__', '__setattr__', '__delattr__')
class ManagerProxy(ProxyBase):
pass
def register_proxy(name, cls, proxy):
for attr in dir(cls):
if callable(getattr(cls, attr)) and not attr.startswith("__"):
proxy._exposed_ += (attr,)
setattr(proxy, attr,
lambda s: object.__getattribute__(s, '_callmethod')(attr))
ProcessManager.register(name, cls, proxy)
register_proxy('shared_object', SharedObject, ManagerProxy)
process_manager = ProcessManager()
process_manager.start()
shared_object = process_manager.shared_object()
def process_1():
while True:
print('Process 1 see {}'.format(shared_object.a))
shared_object.a = 1
time.sleep(1)
def process_2():
while True:
print('Process 2 see {}'.format(shared_object.a))
shared_object.a = 2
time.sleep(1)
def process_3():
while True:
print('Process 3 see {}'.format(shared_object.a))
shared_object.a = 3
if random.randint(0,1) == 1:
shared_object.show_a()
time.sleep(1)
first_process = multiprocessing.Process(name="First process", target=process_1)
first_process.start()
second_process = multiprocessing.Process(name="Second process", target=process_2)
second_process.start()
third_process = multiprocessing.Process(name="Third process", target=process_3)
third_process.start()
shared_object.show_a()
while True:
time.sleep(10)
It works but too slow for me since I have to pass around big numpy array. Are there any other ways to make this faster (real-time speed)? Thanks a lot

It looks like it's the problem solved by multiprocessing.shared_memory, but a) it looks like it's only python 3.8+ and b) the code would need to be restructured, at the very least:
assigning the right size
passing the name of the shared object to the processes
and remembering to close it at the end
EDIT:
Since I couldn't get it to work with python 3.7, I decided to use it with the shared memory primitives in 3.5+, Array (and Value, it could be what you need). The following code runs happily:
import time
import random
from multiprocessing import Process, Array
s1 = Array('i', [1])
def process_1():
while True:
print('Process 1 see {}'.format(s1[0]))
s1[0] = 1
time.sleep(1)
def process_2():
while True:
print('Process 2 see {}'.format(s1[0]))
s1[0] = 2
time.sleep(1)
def process_3():
while True:
print('Process 3 see {}'.format(s1[0]))
s1[0] = 3
if random.randint(0,1) == 1:
print(s1[0])
time.sleep(1)
first_process = Process(name="First process", target=process_1)
first_process.start()
second_process = Process(name="Second process", target=process_2)
second_process.start()
third_process = Process(name="Third process", target=process_3)
third_process.start()
while True:
time.sleep(10)
Getting
Process 1 see 1
Process 2 see 1
Process 3 see 1
Process 1 see 3
Process 2 see 1
Process 3 see 2
3
Process 1 see 3
Process 2 see 1
Process 3 see 2
3
[...]
I would still pass the array to the processes, something like:
def process_1(shared):
...
and then
Process(name="First process", args=(s1), target=process_1)
to make it clearer what each process is working on, though.
Also, since I've not tried it with BIG objects, I am not really sure how it would fare...

Why is my shared integer variable only 1 byte?

I'm pretty new to Python and I'm very new to multiprocessing.
I'm using the multiprocessing library in Python 3.4, and my shared counter variable is maxing out at 127, and then flipping around to -128 leading me to believe that it's only 1 byte in length. Why is it not the size of a normal int?
Here's an example of my code:
import multiprocessing
from multiprocessing import Value
def process(arg1, arg2, counter, checker):
global attempts
attempts = counter
global succeeded
succeeded = checker
while not succeeded.value:
if <expression>:
succeeded.value = True
break
elif <expression>:
break
else:
<code that does stuff>
with attempts.get_lock():
attempts.value += 1
print(attempts.value)
NUMOFPROCESSES = 5
attempts = Value('i', 0, lock=True)
succeeded = Value('b', False)
if __name__ == '__main__':
jobs = []
for i in range(NUMOFPROCESSES):
p = multiprocessing.Process(target=process, args=(arg1, arg2, attempts, succeeded))
jobs.append(p)
p.start()
Output would look something like this:
1
2
3
.
.
.
125
126
127
-128
-127
-126
-125

multithreading not working

#KSFT
currently frustrated by my inability to either decipher the formatting on stackoverflow OR make a simple python script work...
whats wrong here?
the program prompts for input to determine the value of dwell but it does not result in an led turning on.
import threading
import RPi.GPIO as GPIO
import time
GPIO.setmode(GPIO.BCM)
GPIO.setup(7, GPIO.OUT)
frequency = 0.05
dwell = 0.01
def get_input():
while True:
dwell=raw_input("Brightness: ")
input_thread=threading.Thread(target=get_input())
input_thread.start()
while True:
time.sleep(frequency)
GPIO.output(7, 1)
time.sleep(dwell)
GPIO.output(7, 0)

input_thread=threading.Thread(target=get_input())
is wrong!
input_thread=threading.Thread(target=get_input)
is right!
threading
class threading.Thread(group=None, target=None, name=None, args=(), kwargs={})
If you want to give arg to get_input ,you need to give it throw args and kwargs.
Example:
1 #!/usr/bin/python
2 #current's number of threads
3 import threading
4 import time
5
6 def worker():
7 print "test"
8 time.sleep(1)
9
10 for i in xrange(5):
11 t = threading.Thread(target=worker)
12 t.start()
13
14 print "current has %d threads" % (threading.activeCount() - 1)
target=worker().
target=worker.

why doesn't a simple python producer/consumer multi-threading program speed up by adding the number of workers?

The code below is almost identical to the python official Queue example at http://docs.python.org/2/library/queue.html
from Queue import Queue
from threading import Thread
from time import time
import sys
num_worker_threads = int(sys.argv[1])
source = xrange(10000)
def do_work(item):
for i in xrange(100000):
pass
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = Queue()
for item in source:
q.put(item)
start = time()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
q.join()
end = time()
print(end - start)
These are the results on a Xeon 12-core processor:
$ ./speed.py 1
12.0873839855
$ ./speed.py 2
15.9101941586
$ ./speed.py 4
27.5713479519
I expected that increasing the number of workers reduce the response time but instead, it is increasing. I did the experiment again and again but the result didn't change.
Am I missing something obvious? or the python queue/threading doesn't work well?

Yeah, Maxim's right concerning the GIL. But as soon as you do something worth doing in the worker, the situation changes in most cases. Typical things to be done in the threads involve waiting for I/O or other things in which a thread-switch can be done quite fine. If you don't just count numbers in your workers but instead simulate working with a sleep, the situation changes dramatically:
#!/usr/bin/env python
from Queue import Queue
from threading import Thread
from time import time, sleep
import sys
num_worker_threads = int(sys.argv[1])
source = xrange(1000)
def do_work(item):
for i in xrange(10):
sleep(0.001)
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = Queue()
for item in source:
q.put(item)
start = time()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
q.join()
end = time()
This gives the following results:
for i in 1 2 3 4 5 6 7 8 9 10; do echo -n "$i "; ./t.py $i; done
1 11.0209097862
2 5.50820493698
3 3.65133094788
4 2.73591113091
5 2.19623804092
6 1.83647704124
7 1.57275605202
8 1.38150596619
9 1.23809313774
10 1.1111137867

Python is rather poor at multi-threading. Due to a global lock only one thread normally makes progress at a time. See http://wiki.python.org/moin/GlobalInterpreterLock

How do I put a task back in the queue if the task fails?

I have a script that looks something like this:
#!/usr/bin/env python
# encoding: utf-8
import time, random, os, multiprocessing
def main():
NPROCESSES = 5
pool = multiprocessing.Pool(processes=NPROCESSES)
a = [1,2,3,4,5,6,7,8,9,0]
for _ in pool.imap_unordered(do_task, a):
pass
def do_task(n):
try:
might_crash(n)
except Hell, e:
print e, " crashed."
def might_crash(n):
time.sleep(3*random.random())
if random.randrange( 3 ) == 0:
raise Hell(n)
print n
class Hell(Exception):
pass
if __name__=="__main__":
main()
This script will normally print the values from 'a', but might_crash() will randomly raise an exception.
I want to catch these exceptions and put the current do_task() back in the queue to retry later.
How do I put the current task back in the queue if it should fail?

You could collect results from do_task, check which results are instances of Hell, stuff those tasks into a list new_tasks, and loop until there are no new_tasks:
import time
import random
import os
import multiprocessing as mp
def main():
NPROCESSES = 5
pool=mp.Pool(NPROCESSES)
a = [1,2,3,4,5,6,7,8,9,0]
new_tasks=a
while new_tasks:
a=new_tasks
new_tasks=[]
for result in pool.imap_unordered(do_task, a):
if isinstance(result,Hell):
new_tasks.append(result.args[0])
else:
print(result)
def do_task(n):
try:
result=might_crash(n)
except Hell as e:
print("{0} crashed.".format(e.args[0]))
result=e
return result
def might_crash(n):
time.sleep(3*random.random())
if random.randrange( 3 ) == 0:
raise Hell(n)
return '{0} done'.format(n)
class Hell(Exception):
pass
if __name__=="__main__":
main()
yields
1 done
6 crashed.
4 done
7 crashed.
5 done
9 done
3 done
2 crashed.
8 done
0 crashed.
0 crashed.
2 done
7 crashed.
6 done
0 done
7 done

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Can this loop be sped up in pure Python? - python

Related

Fastest way to share big object between different Process of python

Why is my shared integer variable only 1 byte?

multithreading not working

why doesn't a simple python producer/consumer multi-threading program speed up by adding the number of workers?

How do I put a task back in the queue if the task fails?

Categories

Resources