Python multiprocessing shared memory; one write, multiple read - python

SYSTEM
Linux (Manjaro KDE)
Python 3.8.3
PROGRAM:
I have incoming string data on a UDP port. The main loop spools up the processes prior to using selectors to monitor the UDP port. I want the UDP data, which is constantly updated, available for each process.
TRIED:
Multiprocessing Queues with maxsize = 1 and that became a headache and quickly broke down.
Multiprocessing Arrays (this is where I'm at now)
I have checked, and the Array at each location I'm looking at has the same memory address (I think). For whatever reason, when I try to access the contents of the Array in the child process, the process hangs.
NOT TRIED
Pipes. I have a feeling this may be the way to go. But I'm already deep in uncharted territory; I've never used them before.
WHAT I WANT
I would like to access the UDP data from the child processes - these are the camera_view method.
Dummy UDP string
import socket
import random
import datetime
import time
conn = ('127.0.0.1', 6666)
def rand_value(f_val, t_val):
result = round(random.uniform(f_val, t_val), 2)
result = random.uniform(f_val, t_val)
return result
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
while True:
time.sleep(6)
timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
overlay = timestamp
for i in range(9):
val = rand_value(i*10, i*10+10)
if i == 8: val = 'TASK: Im the real Batman'
overlay = overlay + "," + str(val)
print(overlay)
sock.sendto(overlay.encode(), conn)
My Program
import datetime
import selectors
import socket
import time
from multiprocessing import Lock, Process, Queue
from multiprocessing.sharedctypes import Array
from ctypes import c_char_p
REQUIRED_CAMERAS = 1
CAMERA_CONN = {'name':['Colour Camera'], 'ip':['127.0.0.1'], 'port':[9000]}
OVERLAY_CONN = ('0.0.0.0', 6666)
CONTROL_CONN = ('0.0.0.0', 6667)
NUMBER_OF_ITEMS_IN_OVERLAY = 10
class Camera():
def __init__(self, cam_name, cam_ip, cam_port):
self.ip = cam_ip
self.port = cam_port
self.capture = cv2.VideoCapture(0)
self.frame_width = int(self.capture.get(3))
self.frame_height = int(self.capture.get(4))
self.name = cam_name
def get_overlay(data_packet):
data = data_packet.decode()
data = data.split(',')
field0 = data[0]
field1 = 'KP: ' + str(round(float(data[1]), 3))
field2 = 'DCC: ' + str(round(float(data[2]), 2)) + 'm'
field3 = 'E: ' + str(round(float(data[3]), 2)) + 'm'
field4 = 'N: ' + str(round(float(data[4]), 2)) + 'm'
field5 = 'D: ' + str(round(float(data[5]), 2)) + 'm'
field6 = 'H: ' + str(round(float(data[6]), 2)) # + '°'
field7 = 'R: ' + str(round(float(data[7]), 2)) # + '°'
field8 = 'P: ' + str(round(float(data[8]), 2)) # + '°'
field9 = data[9]
x = []
for i in range(NUMBER_OF_ITEMS_IN_OVERLAY):
x.append(eval('field' + str(i)).encode())
# if i == 0:
# print(x[i])
return x
def socket_reader(sock, mask, q, REQUIRED_CAMERAS, overlay):
data_packet, sensor_ip = sock.recvfrom(1024)
sensor_port = sock.getsockname()[1]
print(f'SENSOR PORT {sensor_port} and SENSOR_IP {sensor_ip}')
if sensor_port == OVERLAY_CONN[1]:
x = get_overlay(data_packet)
for i in range(len(x)):
overlay[i] = x[i]
print(f'Socket Reader {overlay}')
def camera_view(CAMERA_CONN, cam_name, camera, overlay_q, control_q, overlay):
while True:
print(f'PROCESS {camera} RUNNING FOR: {cam_name}')
try:
print(f'Camera View {overlay}')
for i in range(len(overlay)):
print(overlay[i])
except:
pass
time.sleep(1)
def controller(REQUIRED_CAMERAS, CAMERA_CONN, OVERLAY_CONN, CONTROL_CONN):
if REQUIRED_CAMERAS > len(CAMERA_CONN['name']):
print(f'REQURIED_CAMERAS: {REQUIRED_CAMERAS} - more than connections in CAMERA_CONN ')
else:
# Set up a UDP connection for the overlay string and the control commands
sock_overlay = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock_control = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock_overlay.bind(OVERLAY_CONN)
sock_control.bind(CONTROL_CONN)
# Set up the selector to watch over the socket
# and trigger when data is ready for reading
sel = selectors.DefaultSelector()
sel.register(fileobj=sock_overlay, events=selectors.EVENT_READ, data=socket_reader)
sel.register(fileobj=sock_control, events=selectors.EVENT_READ, data=socket_reader)
# create shared memory
overlay_q = Queue(maxsize=1)
control_q = Queue(maxsize=1)
overlay = Array(c_char_p, range(NUMBER_OF_ITEMS_IN_OVERLAY))
print(f'Init Overlay {overlay}')
# Generate the processes; one per camera
processes = []
for camera in range(REQUIRED_CAMERAS):
processes.append(Process(target=camera_view, args=(CAMERA_CONN, CAMERA_CONN['name'][camera], camera, overlay_q, control_q, overlay)))
for process in processes:
process.daemon = True
process.start()
# Spin over the selector
while True:
# Only have one connnection registered, so to stop
# the loop spinning up the CPU, I have made it blocking
# with the timeout = 1 (sec) instead of =0.
events = sel.select(timeout=None)
for key, mask in events:
# the selector callback is the data= from the register above
callback = key.data
# the callback gets the sock, mask and the sensor queues
if key.fileobj == sock_overlay:
callback(key.fileobj, mask, overlay_q, REQUIRED_CAMERAS, overlay)
else:
callback(key.fileobj, mask, control_q, REQUIRED_CAMERAS, overlay)
if __name__ == "__main__":
controller(REQUIRED_CAMERAS, CAMERA_CONN, OVERLAY_CONN, CONTROL_CONN)
EDIT1:
from multiprocessing import Process, Array
from ctypes import c_char_p
import time
def worker(arr):
count = 0
while True:
count += 1
val = 'val' + str(count)
arr[0] = val
print(arr[:])
time.sleep(2)
def main():
arr = Array(c_char_p, 1)
p = Process(target=worker, args=(arr,))
p.daemon = True
p.start()
while True:
print(arr[:])
try:
print(arr[:].decode('utf-8'))
except :
pass
# try:
# val = arr[:]
# val = val.decode('utf-8')
# print(f'main {val}')
# except:
# pass
time.sleep(1)
if __name__ == "__main__":
main()
'''
from multiprocessing import Process, Array
from ctypes import c_char_p
import time
def worker(arr):
count = 0
while True:
count += 1
val = 'val' + str(count)
arr[0] = bytes(val, 'utf-8')
print(arr[:])
time.sleep(2)
def main():
arr = Array(c_char_p, 1)
p = Process(target=worker, args=(arr,))
p.daemon = True
p.start()
while True:
print(arr[:])
try:
print(arr[:].decode('utf-8'))
except :
pass
time.sleep(1)
if __name__ == "__main__":
main()
if __name__ == "__main__":
main()
'''
EDIT2:
Thanks to #RolandSmith, I have persevered with Queues and I think I have got a template on how I can move forward. See below code. If I can't get this to work in program, I'll be back here.
from multiprocessing import Process, Queue
import time
import datetime
def worker(camera, q):
val = ''
while True:
if q.full() == True:
val = q.get()
else:
val = val
print(f'WORKER{camera} {val}')
time.sleep(0.2)
def main():
cameras = 2
processes = []
queues = []
for camera in range(cameras):
queues.append(Queue(maxsize=1))
processes.append(Process(target=worker, args=(camera, queues[camera])))
for process in processes:
process.daemon = True
process.start()
while True:
for q in queues:
if not q.empty():
try:
_ = q.get()
except:
pass
else:
q.put(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
time.sleep(.5)
if __name__ == "__main__":
main()

In my view, using Queue is a less error-prone solution than using an Array.
Here is your second example, converted to using a Queue:
from multiprocessing import Process, Queue
import time
def worker(q):
count = 0
while True:
count += 1
val = 'val' + str(count)
q.put(val)
print('worker:', val)
time.sleep(2)
def main():
q = Queue()
p = Process(target=worker, args=(q, ))
p.daemon = True
p.start()
while True:
if not q.empty():
print('main:', q.get())
time.sleep(1)
if __name__ == "__main__":
main()
This yields:
> python3 test3.py
worker: val1
main: val1
worker: val2
main: val2
worker: val3
main: val3
worker: val4
main: val4
worker: val5
Here is the same example using a Pipe:
from multiprocessing import Process, Pipe
import time
def worker(p):
count = 0
while True:
count += 1
val = 'val' + str(count)
p.send(val)
print('worker:', val)
time.sleep(2)
def main():
child, parent = Pipe()
p = Process(target=worker, args=(child, ))
p.daemon = True
p.start()
while True:
if parent.poll():
print('main:', parent.recv())
time.sleep(1)
if __name__ == "__main__":
main()
This produces the same result as the previous example.
Additionally, by default a pipe is bidirectional.
So you could also send back data from the workers to the parent.

Related

plotting in real-time two plots on the same figure using multi-processing python

currently I'm working on plotting the data from two sensors (audio and vibration) on the same graph in real-time , but I'm facing a problem:
this is my server :
import socket as s
import math as M
import struct
import time
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from datetime import datetime, tzinfo
import pytz
import socket
import sys
import json
import csv
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from datetime import datetime
from datetime import timedelta
import math as M
...
import multiprocessing
from multiprocessing import Lock , Process, Queue
timel = []
visiond = []
import csv
# -----------
freq = 50
seconds = 60 # number of seconds of data to display
gain = 10
wlen = freq / 100
per_lap = 0.9
ch = 'SHZ'
mult = 8.0
displ_samps = seconds * freq
save = False
# -----------
# -----------
def _nearest_pow_2(x):
"""
Find power of two nearest to x
>>> _nearest_pow_2(3)
2.0
>>> _nearest_pow_2(15)
16.0
:type x: float
:param x: Number
:rtype: Int
:return: Nearest power of 2 to x
"""
a = M.pow(2, M.ceil(np.log2(x)))
b = M.pow(2, M.floor(np.log2(x)))
if abs(a - x) < abs(b - x):
return a
else:
return b
nfft1 = int(_nearest_pow_2(wlen * freq))
nlap1 = int(nfft1 * per_lap)
if mult is not None:
mult = int(_nearest_pow_2(mult))
mult = mult * nfft1
# -------------------
# ------------
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(13, 6))
plt.ion()
fig.show()
fig.canvas.draw()
n = 0
stream = [] # our stream (simple list object)
curr = datetime.now()
# --------------
# sudo iptables -A INPUT -p tcp -s 192.168.0.9 --dport 50012 -j ACCEPT
# Set up a TCP/IP server
tcp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
aud_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
dur = 0.02
# Bind the socket to server address and port 81
server_address = (localhost, 50021)
server_address_aud = (localhost, 50022)
tcp_socket.bind(server_address)
aud_socket.bind(server_address_aud)
# Listen on port 81
tcp_socket.listen(1)
aud_socket.listen(1)
def update_vib(tcp_socket,n,q,lock,curr,stream):
while True:
print("Waiting for connection")
connection, client = tcp_socket.accept()
try:
print("Connected to client IP: {}".format(client))
if connection:
current_time = datetime.now(tz=pytz.UTC)
future_time = current_time + timedelta(seconds=1)
xx = future_time.strftime("%m/%d/%Y, %H:%M:%S")
str_1_encoded = bytes(xx, 'UTF-8')
connection.sendall(str_1_encoded)
# Receive and print data 32 bytes at a time, as long as the client is sending something
while True:
data = connection.recv(1024)
sample = bytes(data).decode("utf-8")
sample = (sample.replace('{', '').replace('}', '').replace(' ', '').split(','))
sample.remove("'SHZ'")
initial_time = float(sample.pop(0))
for i in range(0, len(sample)):
ts = initial_time + i * dur
send_tcp = '%.3f, %s' % (ts, sample[i])
id_machine = '4'
sample_json = {}
sample_json['id'] = id_machine
sample_json['type'] = 'vib'
dataf = send_tcp.split(",")
sample_json['sample_time'] = dataf[0]
sample_json['sample_data'] = dataf[1]
sample_str = json.dumps(sample_json)
str_1_encoded = bytes(sample_str, 'UTF-8')
# dataf = send_tcp.split(",")
# timel.append(float(dataf[0]))
# visiond.append(int(dataf[1]))
# drawnow(makeFig)
# plt.pause(.000001)
fulldata = json.loads(sample_str)
timel.append((fulldata['sample_time']))
visiond.append((fulldata['sample_data']))
if not data:
break
s = data.decode('UTF-8').strip("'{}").split(', ')
if ch in s[0]: # only listen to the specific channel
prev = curr # timing
curr = datetime.now() # timing
fps = 1 / (curr - prev).total_seconds() # timing
Text = "hi"
for smp in s[2:]: # convert strings to ints
stream.append(int(smp))
npts = len(stream)
if npts > displ_samps: # if the number of samples exceeds the display value, slice the array
stream = stream[npts - displ_samps:] # move the array to the right, get rid of old samples
lock.acquire()
plotting_figures()
lock.release()
finally:
connection.close()
amp22=[]
audiotime=[]
def recived_aud(aud_socket,lock) :
while True:
data = b''
payload_size2 = struct.calcsize("dd")
connection, client = aud_socket.accept()
print("Connected to client IP: {}".format(client))
if connection :
while True:
while len(data) < payload_size2:
data = connection.recv(1024)
if not data: break
packed_msg_size = data[:payload_size2]
data = data[payload_size2:]
tup = struct.unpack('dd', packed_msg_size)
audiotime.append(float(tup[0]))
amp22.append(float(tup[1]))
print(tup)
lock.acquire()
plotting_figures()
lock.release()
def plotting_figures():
if stream:
ax[0].clear() # ready the plot axis for a new draw
ax[0].set_xlim(0, len(stream)) # being explicit here helps speed things up slightly
ax[0].set_ylim(min(stream) - 25, max(stream) + 25)
ax[0].plot(stream, linewidth=0.5)
else:
ax[0].clear()
if amp22:
ax[1].clear() # ready the plot axis for a new draw
ax[1].set_xlim(0, len(amp22)) # being explicit here helps speed things up slightly
ax[1].set_ylim(min(amp22) - 25, max(amp22) + 25)
ax[1].plot(amp22, linewidth=0.5)
else:
ax[1].clear()
fig.canvas.draw()
lock=Lock()
q=Queue()
q.put([fig,ax])
qq=q.get()
p1=Process(target=update_vib, args=(tcp_socket,n,q,lock,curr,stream,))
p2=Process(target=recived_aud, args=(aud_socket,lock))
p1.start()
p2.start()
p1.join()
p2.join()
the error I'm receiving:
[xcb] Unknown sequence number while processing queue
[xcb] Most likely this is a multi-threaded client and XInitThreads has not been called
[xcb] Aborting, sorry about that.
python3.8: ../../src/xcb_io.c:260: poll_for_event: Assertion `!xcb_xlib_threads_sequence_lost' failed.
I'm aware that matplotlib does not support multi-threading but I'm using multi-processing instead , please let me know how I can update the two plot on the same figure simultaneously.
and thank you

Python multi connection downloader resuming after pausing makes download run endlessly

I have written a Python script that downloads a single file using 32 connections if available.
I have written a multiconnection downloader that works fine without pausing, but won't stop downloading after resuming, the progress would go beyond 100%...
Like this:
Download mode: Multi-thread (press Space to pause/resume, press Escape to stop)
[████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████] 120% completed, paused: False
Download mode: Multi-thread (press Space to pause/resume, press Escape to stop)
1798.08 MiB downloaded, 1489.83 MiB total, -308.25 MiB remaining, download speed: 22.73 MiB/s
Minimum speed: 0.00 MiB/s, average speed: 4.54 MiB/s, maximum speed: 75.00 MiB/s
Task started on 2021-08-09 16:57:03, 00:06:35 elapsed, ETA: -1:59:47
After progress exceeds 100%, there will be error messages like this:
Exception in thread Thread-78:
Traceback (most recent call last):
File "C:\Program Files\Python39\lib\threading.py", line 973, in _bootstrap_inner
self.run()
File "C:\Program Files\Python39\lib\threading.py", line 910, in run
self._target(*self._args, **self._kwargs)
File "D:\MyScript\downloader.py", line 70, in multidown
mm[position: position+len(chunk)] = chunk
IndexError: mmap slice assignment is wrong size
(The above doesn't include all of the error message)
I have encountered all sorts of errors after resuming, but most importantly, the server will often send extra bytes from previous request, whose connection is dead and needless to say this breaks the whole code.
How should I implement pause and resume correctly?
I am thinking about multiprocessing, I assume the sessions and connections are all PID and port number related, and so far I haven't encountered a new run of the script that received extra bytes from previous runs of the script, so I guess using another process with a new PID and new port number plus requests.session() plus {'connection': 'close'} for each download should guarantee that no extra bytes from previous connections will be received, I just don't know how to share variables between processes...
The code:
downloader.py
import json
import keyboard
import os
import re
import requests
import sys
import time
import validators
from collections import deque
from datetime import datetime, timedelta
from math import inf
from mmap import mmap
from pathlib import Path
from ping3 import ping
from reprint import output
from threading import Thread
def timestring(sec):
sec = int(sec)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
return f'{h:02d}:{m:02d}:{s:02d}'
class Downloader:
def __init__(self):
self.recent = deque([0] * 12, maxlen=12)
self.recentspeeds = deque([0] * 200, maxlen=200)
self.paused = False
self.progress = dict()
class Multidown:
def __init__(self, obj, id):
self.count = 0
self.position = 0
self.completed = False
self.id = id
self.parent = obj
def multidown(self, url, start, end):
interrupted = False
s = requests.session()
s.headers.update({'connection': 'close', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'})
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
while end - length + (self.id != self.parent.progress['connections'] - 1) != start or r.status_code != 206:
r.close()
s.close()
del r
del s
time.sleep(0.02)
s = requests.session()
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
self.position = start
for chunk in r.iter_content(1048576):
if self.parent.paused:
self.parent.mm.flush()
r.connection.close()
r.close()
s.close()
del r
del s
interrupted = True
break
if chunk:
self.parent.mm[self.position: self.position+len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
if not interrupted:
r.close()
s.close()
if self.count == self.parent.progress[self.id]['length']:
self.completed = True
self.parent.progress[self.id]['completed'] = True
self.parent.mm.flush()
class Singledown:
def __init__(self):
self.count = 0
def singledown(self, url, path):
with requests.get(url, stream=True) as r:
with path.open('wb') as file:
for chunk in r.iter_content(1048576):
if chunk:
self.count += len(chunk)
file.write(chunk)
def download(self, url, filepath, num_connections=32, overwrite=False):
singlethread = False
threads = []
bcontinue = False
filepath = filepath.replace('\\', '/')
if (not re.match('^[a-zA-Z]:/(((?![<>:"/|?*]).)+((?<![ .])/)?)*$', filepath) or
not Path(filepath[:3]).exists()):
print('Invalid windows file path has been inputted, process will now stop.')
return
if not validators.url(url):
print('Invalid url been inputted, process will now stop.')
return
if url.lower().startswith('ftp://'):
print(
"`requests` module doesn't suport File Transfer Protocol, process will now stop")
return
path = Path(filepath)
if not path.exists():
bcontinue = True
else:
if path.is_file():
if overwrite:
bcontinue = True
else:
while True:
answer = input(
f'`{filepath}` already exists, do you want to overwrite it? \n(Yes, No):').lower()
if answer in ['y', 'yes', 'n', 'no']:
if answer.startswith('y'):
os.remove(filepath)
bcontinue = True
break
else:
print('Invalid input detected, retaking input.')
if not bcontinue:
print(
f'Overwritting {filepath} has been aborted, process will now stop.')
return
bcontinue = False
server = url.split('/')[2]
ok = ping(server, timeout=2)
if ok == False:
print(
'The server of the inputted url is non-existent, process will now stop.')
return
if ok:
bcontinue = True
if not ok:
print('Connection has timed out, will reattempt to ping server 5 times.')
for i in range(5):
print(
f'Reattempting to ping server, retrying {i + 1} out of 5')
ok = ping(server, timeout=2)
if ok:
print(
f'Connection successful on retry {i + 1}, process will now continue.')
bcontinue = True
break
else:
print(f'Retry {i + 1} out of 5 timed out' + (i != 4)
* ', reattempting in 1 second.' + (i == 4) * '.')
time.sleep(1)
if not bcontinue:
print('Failed to connect server, connection timed out, process will now stop')
return
bcontinue = False
head = requests.head(url)
if head.status_code == 200:
bcontinue = True
else:
for i in range(5):
print(f'Server responce is invalid, retrying {i + 1} out of 5')
head = requests.head(url)
if head.status_code == 200:
print(
f'Connection successful on retry {i + 1}, process will now continue.')
bcontinue = True
break
else:
print(f'Retry {i + 1} out of 5 failed to access data' +
(i != 4) * ', reattempting in 1 second.' + (i == 4) * '.')
time.sleep(1)
if not bcontinue:
print("Can't establish a connection with access to data, can't download target file, process will now stop.")
return
folder = '/'.join(filepath.split('/')[:-1])
Path(folder).mkdir(parents=True, exist_ok=True)
headers = head.headers
total = headers.get('content-length')
if not total:
print(
f'Cannot find the total length of the content of {url}, the file will be downloaded using a single thread.')
started = datetime.now()
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
sd = self.Singledown()
th = Thread(target=sd.singledown, args=(url, path))
threads.append(sd)
th.start()
total = inf
singlethread = True
else:
total = int(total)
if not headers.get('accept-ranges'):
print(
'Server does not support the `range` parameter, the file will be downloaded using a single thread.')
started = datetime.now()
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
sd = self.Singledown()
th = Thread(target=sd.singledown, args=(url, path))
threads.append(sd)
th.start()
singlethread = True
else:
segment = total / num_connections
started = datetime.now()
lastpressed = started
path.touch()
file = path.open('wb')
file.seek(total - 1)
file.write(b'\0')
file.close()
file = path.open(mode='r+b')
self.mm = mmap(file.fileno(), 0)
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
self.progress['total'] = total
self.progress['connections'] = num_connections
for i in range(num_connections):
md = self.Multidown(self, i)
start = int(segment * i)
end = int(segment * (i + 1)) - (i != num_connections - 1)
length = end - start + (i != num_connections - 1)
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
self.progress[i] = dict()
self.progress[i]['start'] = start
self.progress[i]['position'] = start
self.progress[i]['end'] = end
self.progress[i]['count'] = 0
self.progress[i]['length'] = length
self.progress[i]['completed'] = False
th.start()
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
downloaded = 0
totalMiB = total / 1048576
speeds = []
interval = 0.04
with output(initial_len=5, interval=0) as dynamic_print:
while True:
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
status = sum([i.completed for i in threads])
downloaded = sum(i.count for i in threads)
self.recent.append(downloaded)
done = int(100 * downloaded / total)
doneMiB = downloaded / 1048576
gt0 = len([i for i in self.recent if i])
if not gt0:
speed = 0
else:
recent = list(self.recent)[12 - gt0:]
if len(recent) == 1:
speed = recent[0] / 1048576 / interval
else:
diff = [b - a for a, b in zip(recent, recent[1:])]
speed = sum(diff) / len(diff) / 1048576 / interval
speeds.append(speed)
self.recentspeeds.append(speed)
nzspeeds = [i for i in speeds if i]
if nzspeeds:
minspeed = min(nzspeeds)
else:
minspeed = 0
maxspeed = max(speeds)
now = datetime.now()
elapsed = (now - started).total_seconds()
meanspeed = downloaded / elapsed / 1048576
remaining = totalMiB - doneMiB
dynamic_print[0] = '[{0}{1}] {2}'.format(
'\u2588' * done, '\u00b7' * (100-done), str(done)) + '% completed' + (not singlethread) * ', paused: {0}'.format(self.paused)
dynamic_print[1] = 'Download mode: ' + singlethread * \
'Single-thread' + (not singlethread) * 'Multi-thread (press Space to pause/resume, press Escape to stop)'
dynamic_print[2] = '{0:.2f} MiB downloaded, {1:.2f} MiB total, {2:.2f} MiB remaining, download speed: {3:.2f} MiB/s'.format(
doneMiB, totalMiB, remaining, speed)
if speed and total != inf:
eta = timestring(remaining / speed)
else:
eta = '99:59:59'
dynamic_print[3] = 'Minimum speed: {0:.2f} MiB/s, average speed: {1:.2f} MiB/s, maximum speed: {2:.2f} MiB/s'.format(
minspeed, meanspeed, maxspeed)
dynamic_print[4] = 'Task started on {0}, {1} elapsed, ETA: {2}'.format(
started.strftime('%Y-%m-%d %H:%M:%S'), timestring(elapsed), eta)
if keyboard.is_pressed('space'):
if not singlethread:
pressed = datetime.now()
if (pressed - lastpressed).total_seconds() > 0.5:
lastpressed = pressed
if self.paused:
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(
url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
self.paused = not self.paused
if keyboard.is_pressed('esc'):
if not singlethread:
ended = datetime.now()
self.paused = True
break
if status == len(threads):
if not singlethread:
self.mm.close()
ended = datetime.now()
break
time.sleep(interval)
time_spent = (ended - started).total_seconds()
meanspeed = total / time_spent / 1048576
status = sum([i.completed for i in threads])
if status == len(threads):
print('Task completed on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
else:
print('Task interrupted on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
if __name__ == '__main__':
d = Downloader()
d.download(*sys.argv[1:])
For testing purposes this is a dumbed-down version of the script, with all checks removed while retaining the same functionality (sorry it really takes all these lines to show the download information):
import json
import os
import requests
import sys
import time
from collections import deque
from datetime import datetime, timedelta
from math import inf
from mmap import mmap
from pathlib import Path
from reprint import output
from threading import Thread
def timestring(sec):
sec = int(sec)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
return f'{h:02d}:{m:02d}:{s:02d}'
class Downloader:
def __init__(self):
self.recent = deque([0] * 12, maxlen=12)
self.recentspeeds = deque([0] * 200, maxlen=200)
self.paused = False
self.progress = dict()
self.UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
class Multidown:
def __init__(self, obj, id):
self.count = 0
self.position = 0
self.completed = False
self.id = id
self.parent = obj
self.UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
def multidown(self, url, start, end):
interrupted = False
s = requests.session()
s.headers.update({'connection': 'close', 'user-agent': self.UA})
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
while end - length + (self.id != self.parent.progress['connections'] - 1) != start or r.status_code != 206:
r.close()
s.close()
del r
del s
time.sleep(0.02)
s = requests.session()
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
self.position = start
for chunk in r.iter_content(1048576):
if self.parent.paused:
self.parent.mm.flush()
r.connection.close()
r.close()
s.close()
del r
del s
interrupted = True
break
if chunk:
self.parent.mm[self.position: self.position+len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
if not interrupted:
r.close()
s.close()
if self.count == self.parent.progress[self.id]['length']:
self.completed = True
self.parent.progress[self.id]['completed'] = True
self.parent.mm.flush()
def download(self, url, filepath, num_connections=32, overwrite=False):
singlethread = False
threads = []
bcontinue = False
filepath = filepath.replace('\\', '/')
if Path(filepath).exists():
os.remove(filepath)
folder = '/'.join(filepath.split('/')[:-1])
Path(folder).mkdir(parents=True, exist_ok=True)
head = requests.head(url, headers={'user-agent': self.UA})
path = Path(filepath)
headers = head.headers
total = headers.get('content-length')
if total:
total = int(total)
if headers.get('accept-ranges'):
segment = total / num_connections
started = datetime.now()
lastpressed = started
path.touch()
file = path.open('wb')
file.seek(total - 1)
file.write(b'\0')
file.close()
file = path.open(mode='r+b')
self.mm = mmap(file.fileno(), 0)
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
self.progress['total'] = total
self.progress['connections'] = num_connections
for i in range(num_connections):
md = self.Multidown(self, i)
start = int(segment * i)
end = int(segment * (i + 1)) - (i != num_connections - 1)
length = end - start + (i != num_connections - 1)
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
self.progress[i] = dict()
self.progress[i]['start'] = start
self.progress[i]['position'] = start
self.progress[i]['end'] = end
self.progress[i]['count'] = 0
self.progress[i]['length'] = length
self.progress[i]['completed'] = False
th.start()
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
downloaded = 0
totalMiB = total / 1048576
speeds = []
interval = 0.04
with output(initial_len=5, interval=0) as dynamic_print:
while True:
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
status = sum([i.completed for i in threads])
downloaded = sum(i.count for i in threads)
self.recent.append(downloaded)
done = int(100 * downloaded / total)
doneMiB = downloaded / 1048576
gt0 = len([i for i in self.recent if i])
if not gt0:
speed = 0
else:
recent = list(self.recent)[12 - gt0:]
if len(recent) == 1:
speed = recent[0] / 1048576 / interval
else:
diff = [b - a for a, b in zip(recent, recent[1:])]
speed = sum(diff) / len(diff) / 1048576 / interval
speeds.append(speed)
self.recentspeeds.append(speed)
nzspeeds = [i for i in speeds if i]
if nzspeeds:
minspeed = min(nzspeeds)
else:
minspeed = 0
maxspeed = max(speeds)
now = datetime.now()
elapsed = (now - started).total_seconds()
meanspeed = downloaded / elapsed / 1048576
remaining = totalMiB - doneMiB
dynamic_print[0] = '[{0}{1}] {2}'.format(
'\u2588' * done, '\u00b7' * (100-done), str(done)) + '% completed' + (not singlethread) * ', paused: {0}'.format(self.paused)
dynamic_print[1] = 'Download mode: ' + singlethread * \
'Single-thread' + (not singlethread) * 'Multi-thread (press Space to pause/resume, press Escape to stop)'
dynamic_print[2] = '{0:.2f} MiB downloaded, {1:.2f} MiB total, {2:.2f} MiB remaining, download speed: {3:.2f} MiB/s'.format(
doneMiB, totalMiB, remaining, speed)
if speed and total != inf:
eta = timestring(remaining / speed)
else:
eta = '99:59:59'
dynamic_print[3] = 'Minimum speed: {0:.2f} MiB/s, average speed: {1:.2f} MiB/s, maximum speed: {2:.2f} MiB/s'.format(
minspeed, meanspeed, maxspeed)
dynamic_print[4] = 'Task started on {0}, {1} elapsed, ETA: {2}'.format(
started.strftime('%Y-%m-%d %H:%M:%S'), timestring(elapsed), eta)
if PAUSE:
if not singlethread:
pressed = datetime.now()
if (pressed - lastpressed).total_seconds() > 0.5:
lastpressed = pressed
if self.paused:
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(
url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
self.paused = not self.paused
if status == len(threads):
if not singlethread:
self.mm.close()
ended = datetime.now()
break
time.sleep(interval)
time_spent = (ended - started).total_seconds()
meanspeed = total / time_spent / 1048576
status = sum([i.completed for i in threads])
if status == len(threads):
print('Task completed on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
else:
print('Task interrupted on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
if __name__ == '__main__':
import hashlib
global PAUSE
PAUSE = False
chash = '5674E59283D95EFE8C88770515A9BBC80CBB77CB67602389FD91DEF26D26AED2'
d = Downloader()
if sys.argv[1] == '0':
d.download('http://ipv4.download.thinkbroadband.com/1GB.zip', 'C:/test/1GB.zip')
elif sys.argv[1] == '1':
th1 = Thread(target=d.download, args=('http://ipv4.download.thinkbroadband.com/1GB.zip', 'C:/test/1GB.zip'))
th1.start()
def test():
while th1.is_alive():
global PAUSE
PAUSE = not PAUSE
time.sleep(10)
th2 = Thread(target=test)
th2.start()
while th1.is_alive():
pass
sha256_hash = hashlib.sha256()
with open('C:/test/1GB.zip',"rb") as f:
for byte_block in iter(lambda: f.read(1048576),b""):
sha256_hash.update(byte_block)
print(sha256_hash.hexdigest().lower() == chash.lower())
The url isn't accessible without a VPN in my locale, and test 0 always results True, that is, if the connection hasn't gone dead during the download, and test 1 sometimes results True, sometimes results False, sometimes it doesn't finish(progress bar goes beyond 100%)...
How can my code be salvaged?
This might not be your only problem but you have a race condition that could show up if you pause and resume quickly (where the definition of quickly varies greatly depending on your circumstances). Consider that you've got 32 threads each requesting a MB chunk, let's call them threads 0-31. They are sitting their downloading and you pause. The threads do not know that you paused until they get a chunk of data as they are sitting in blocking io. Not sure what speed your connection is or how many cores your machine has (threads will sometimes act in parallel when they don't need the GIL,) but this process could take a lot longer than you expect. Then you unpause and your code creates new threads 32-63 but some or all of threads 0-31 are still waiting for the next chunk. You set threads 32-63 in motion and then you turn off your pause flag. Those threads that didn't end from 0-31 then wake up and see that things aren't paused. Now you have multiple threads accessing the same state variables
self.parent.mm[self.position: self.position + len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
so if thread 0 is downloading the same chunk as thread 31 they both keep updating all the same state and they add to position and count even though they are downloading overlapping parts of the file. You even reuse the objects that the threads live inside of so that state can get really really messed up.
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
There might be some other problems in your code and it is a lot to sort through so I suggest taking the time to do some refactoring to eliminate duplicate code and organise things into more functions. I don't believe in crazy tiny functions, but you could use a few sub functions like download_multi(download_state) and download_single maybe. I am relatively confident however that your current problem will be solved if you ensure the threads you have running actually end after you pause. To do so you need to actually hold references to your threads
somewhere:
actual_threads = []
When you create your threads (the first time and after you unpause, or preferably this would be in a function and you'd do it there and return the list):
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
actual_threads.append(th)
Then when you unpause:
self.paused = not self.paused
for th in actual_threads:
th.join()
This way you have the threads working, they quit when you pause and you rebuild them. So join should return as soon as they break out of the blocking io call to iter_content. This way those threads are always dead before you make the new ones.
What I would do myself however would be to create sockets from each thread to the main process. When pause is detected the threads shut down the request and save any data that's already waiting in the OS buffer then go into a blocking receive on the socket (there might be a way to use select with a socket and requests to allow you to even break out of the blocking io involved in r.iter_content immediately but I leave that for your research). When the program is unpaused the main process would send some value to indicate the program should restart (you'd want at least two signals the threads would recognise, one for quitting gracefully and one to resume. The codes can be single characters.) When the value is sent to each thread that thread will unblock and can then restart the download using requests and its previous state like nothing happened.

Multiprocessing.Process do not run process in parallel

I tried to run a very simple multiprocessing code, but the code is still serially processed.
I have tried to run it on Mac(macOS 10.13) and Linux(Ubuntu 18.04) with python 2 and 3, but in both environments I had the same problem.
the function _process has to receive numpy array as arguments, so I decided to use Multiprocess.Process instead of Multiprocess.Pool.map() and Multiprocess.Pool.apply_async() because pickle is broken when use pool.map() in a class. https://stackoverflow.com/a/21345308/4755986
import time
from multiprocessing import Process, Queue
import numpy as np
class model:
def __init__(self):
self.results = []
self.jobs = []
self.start = time.time()
def _process(self, x,y,z):
j= 0
for i in range(10**8):
j = i+j
return j
def work(self,X,Y,Z, result_queue):
start = time.time() -self.start
result = self._process(X,Y,Z)
result_queue.put(result)
print(result)
end = time.time() -self.start
print( 'start time: ', start)
print('end time:', end)
# return result_queue
def fit(self,num):
for i in range(num):
X, Y, Z = np.ones([5,5]), np.ones([3,3]), np.ones([2,2])
result_queue = Queue()
p = Process(target=self.work, args = (X,Y,Z, result_queue))
self.jobs.append(p)
p.start()
print( 'ChildProcess...',i)
result = result_queue.get()
self.results.append(result)
for p in self.jobs:
p.join()
p.close()
return self.results
R = model()
k = R.fit(10)
print(k)
The time of start and end of each process is printed, and the second process only starts after the first process is finished. This is strange because each process should be automatically assign to different core and run in parallel.
result = result_queue.get()
result_queue.get() will block if it is empty. An item will only be added when a process finishes, hence the next process will be spawned only if the previous has finished.
Below is a version that does spawn 10 processes at once. I've marked the section I've added:
import time
from multiprocessing import Process, Queue
import numpy as np
class model:
def __init__(self):
self.results = []
self.jobs = []
self.start = time.time()
def _process(self, x,y,z):
j= 0
for i in range(10**8):
j = i+j
return j
def work(self,X,Y,Z, result_queue):
start = time.time() -self.start
result = self._process(X,Y,Z)
result_queue.put(result)
print(result)
end = time.time() -self.start
print( 'start time: ', start)
print('end time:', end)
# return result_queue
def fit(self,num):
for i in range(num):
X, Y, Z = np.ones([5,5]), np.ones([3,3]), np.ones([2,2])
result_queue = Queue()
p = Process(target=self.work, args = (X,Y,Z, result_queue))
self.jobs.append(p)
p.start()
print( 'ChildProcess...',i)
#result = result_queue.get() # <--- This blocks
#self.results.append(result)
for p in self.jobs:
p.join()
p.close()
for result in result_queue: # <-----
self.results.append(result) # <-----
return self.results
R = model()
k = R.fit(10)
print(k)

Python Threading with Serial ports waiting for other thread to complete

New to Queues and Threads but as I have gotten my program to work and run 2 threads of motors separate from each other, ive noticed that there is still a point in time that one will wait for the other to finish.
I have 2 motors on linear rails. In order to validate their consistency, repeatability, and accuracy it runs a home test. Just giving the motor a fixed number of tests and a position to go to, where it runs back and forth, logs the data and tells me how close we got both in steps and distance. The odd behavior is that one motor is waiting for the other to complete its cycle before starting its next cycle.
My guess is this is a problem with serial reads where my motor is held up waiting for a serial read on one motor and cannot execute the other. Can anyone confirm this or have a work around?
Code for the test, wrapped in a class
def home(self):
DIConf = None
debugStart = time.time()
while DIConf != -1:
self.anyCMD('DI-1')
while DIConf == None:
DIConf = self.checkReturn('DI')
if time.time() - debugStart >= 5:
print 'DIConf = ' + str(DIConf)
return 'Homing Timeout'
self.s.write('SH1H\r')
def homeTest(self, numTests = 30, testPos = 100000):
if numTests == None:
numTests = 30
if testPos == None:
testPos = 100000
self.tests = numTests
self.testingPos = testPos
self.B = np.zeros((tests, 4))
for i in range(0, self.tests):
# print i
self.setPos(0)
self.start = time.time()
self.gotoPos(self.testingPos)
self.s.write('WM\r')
time.sleep(2)
while self.getPos() < self.testingPos:
pass
self.testPosConfirm = self.getPos()
self.home()#(setHomePos = 1)
self.s.write('WM\r')
time.sleep(2)
self.end = time.time()
self.backHome = self.getPos()
self.B[i][0] = self.testPosConfirm
self.B[i][1] = self.backHome
self.B[i][2] = (float(self.backHome)/20000) * 3.175
self.B[i][3] = self.end-self.start
return self.B
and code for the queue/threads
import Queue
import threading
a = Queue.Queue()
b = Queue.Queue()
y = motor(DA = 1)
z = motor(DA = 2)
def yQueue(tempY, tempA, cycles = None, defPos = None):
tempVar = tempY.homeTest(cycles, defPos)
tempA.put(tempVar)
if 1 == 1:
thread = threading.Thread(target = yQueue, args = [y, a, 5, 50000])
thread2 = threading.Thread(target = yQueue, args = [z, b, 5, 600000])
thread.start()
thread2.start()
As suggested below. Attempted the same thing with Multiprocessing and got similar results.
Code:
import multiprocessing as mp
p = mp.Process(target = yQueue, args = [y, a, 3, 40000])
p.start()
p.join
p2 = Process(target = yQueue, args = [z, b, 3, 500000])
p2.start()
p2.join

Python multiprocessing (pool map) freezing

multi() freezes somewhere in the middle of its activity:
def current_proc(): print mp.current_process().name, 'started'
def multi(fn, func):
print 'Process started on',time.strftime('%H:%M:%S')
count = mp.cpu_count()*2
input = nohead(xlsx2array(fn))
parts = chunks(input, 10)
pool = mp.Pool(processes = count, initializer = current_proc, maxtasksperchild = 1)
for part in parts:
with stopwatch() as r: pool.map(func, part)
return r
pool.close()
pool.join()
I am using multiprocessing with the function to get effective urls:
def query(i):
attempts = 2
while attempts:
try:
q = requests.get(i, allow_redirects = True, verify = False, timeout = 2)
match = q.url
match = str(match)
break
except:
attempts -= 1
match = 'pattern not found'
pass
return [ i, match ]
Please advise how can I avoid such freezing. Thanks,

Categories

Resources