currently I'm working on plotting the data from two sensors (audio and vibration) on the same graph in real-time , but I'm facing a problem:
this is my server :
import socket as s
import math as M
import struct
import time
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from datetime import datetime, tzinfo
import pytz
import socket
import sys
import json
import csv
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from datetime import datetime
from datetime import timedelta
import math as M
...
import multiprocessing
from multiprocessing import Lock , Process, Queue
timel = []
visiond = []
import csv
# -----------
freq = 50
seconds = 60 # number of seconds of data to display
gain = 10
wlen = freq / 100
per_lap = 0.9
ch = 'SHZ'
mult = 8.0
displ_samps = seconds * freq
save = False
# -----------
# -----------
def _nearest_pow_2(x):
"""
Find power of two nearest to x
>>> _nearest_pow_2(3)
2.0
>>> _nearest_pow_2(15)
16.0
:type x: float
:param x: Number
:rtype: Int
:return: Nearest power of 2 to x
"""
a = M.pow(2, M.ceil(np.log2(x)))
b = M.pow(2, M.floor(np.log2(x)))
if abs(a - x) < abs(b - x):
return a
else:
return b
nfft1 = int(_nearest_pow_2(wlen * freq))
nlap1 = int(nfft1 * per_lap)
if mult is not None:
mult = int(_nearest_pow_2(mult))
mult = mult * nfft1
# -------------------
# ------------
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(13, 6))
plt.ion()
fig.show()
fig.canvas.draw()
n = 0
stream = [] # our stream (simple list object)
curr = datetime.now()
# --------------
# sudo iptables -A INPUT -p tcp -s 192.168.0.9 --dport 50012 -j ACCEPT
# Set up a TCP/IP server
tcp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
aud_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
dur = 0.02
# Bind the socket to server address and port 81
server_address = (localhost, 50021)
server_address_aud = (localhost, 50022)
tcp_socket.bind(server_address)
aud_socket.bind(server_address_aud)
# Listen on port 81
tcp_socket.listen(1)
aud_socket.listen(1)
def update_vib(tcp_socket,n,q,lock,curr,stream):
while True:
print("Waiting for connection")
connection, client = tcp_socket.accept()
try:
print("Connected to client IP: {}".format(client))
if connection:
current_time = datetime.now(tz=pytz.UTC)
future_time = current_time + timedelta(seconds=1)
xx = future_time.strftime("%m/%d/%Y, %H:%M:%S")
str_1_encoded = bytes(xx, 'UTF-8')
connection.sendall(str_1_encoded)
# Receive and print data 32 bytes at a time, as long as the client is sending something
while True:
data = connection.recv(1024)
sample = bytes(data).decode("utf-8")
sample = (sample.replace('{', '').replace('}', '').replace(' ', '').split(','))
sample.remove("'SHZ'")
initial_time = float(sample.pop(0))
for i in range(0, len(sample)):
ts = initial_time + i * dur
send_tcp = '%.3f, %s' % (ts, sample[i])
id_machine = '4'
sample_json = {}
sample_json['id'] = id_machine
sample_json['type'] = 'vib'
dataf = send_tcp.split(",")
sample_json['sample_time'] = dataf[0]
sample_json['sample_data'] = dataf[1]
sample_str = json.dumps(sample_json)
str_1_encoded = bytes(sample_str, 'UTF-8')
# dataf = send_tcp.split(",")
# timel.append(float(dataf[0]))
# visiond.append(int(dataf[1]))
# drawnow(makeFig)
# plt.pause(.000001)
fulldata = json.loads(sample_str)
timel.append((fulldata['sample_time']))
visiond.append((fulldata['sample_data']))
if not data:
break
s = data.decode('UTF-8').strip("'{}").split(', ')
if ch in s[0]: # only listen to the specific channel
prev = curr # timing
curr = datetime.now() # timing
fps = 1 / (curr - prev).total_seconds() # timing
Text = "hi"
for smp in s[2:]: # convert strings to ints
stream.append(int(smp))
npts = len(stream)
if npts > displ_samps: # if the number of samples exceeds the display value, slice the array
stream = stream[npts - displ_samps:] # move the array to the right, get rid of old samples
lock.acquire()
plotting_figures()
lock.release()
finally:
connection.close()
amp22=[]
audiotime=[]
def recived_aud(aud_socket,lock) :
while True:
data = b''
payload_size2 = struct.calcsize("dd")
connection, client = aud_socket.accept()
print("Connected to client IP: {}".format(client))
if connection :
while True:
while len(data) < payload_size2:
data = connection.recv(1024)
if not data: break
packed_msg_size = data[:payload_size2]
data = data[payload_size2:]
tup = struct.unpack('dd', packed_msg_size)
audiotime.append(float(tup[0]))
amp22.append(float(tup[1]))
print(tup)
lock.acquire()
plotting_figures()
lock.release()
def plotting_figures():
if stream:
ax[0].clear() # ready the plot axis for a new draw
ax[0].set_xlim(0, len(stream)) # being explicit here helps speed things up slightly
ax[0].set_ylim(min(stream) - 25, max(stream) + 25)
ax[0].plot(stream, linewidth=0.5)
else:
ax[0].clear()
if amp22:
ax[1].clear() # ready the plot axis for a new draw
ax[1].set_xlim(0, len(amp22)) # being explicit here helps speed things up slightly
ax[1].set_ylim(min(amp22) - 25, max(amp22) + 25)
ax[1].plot(amp22, linewidth=0.5)
else:
ax[1].clear()
fig.canvas.draw()
lock=Lock()
q=Queue()
q.put([fig,ax])
qq=q.get()
p1=Process(target=update_vib, args=(tcp_socket,n,q,lock,curr,stream,))
p2=Process(target=recived_aud, args=(aud_socket,lock))
p1.start()
p2.start()
p1.join()
p2.join()
the error I'm receiving:
[xcb] Unknown sequence number while processing queue
[xcb] Most likely this is a multi-threaded client and XInitThreads has not been called
[xcb] Aborting, sorry about that.
python3.8: ../../src/xcb_io.c:260: poll_for_event: Assertion `!xcb_xlib_threads_sequence_lost' failed.
I'm aware that matplotlib does not support multi-threading but I'm using multi-processing instead , please let me know how I can update the two plot on the same figure simultaneously.
and thank you
I have written a Python script that downloads a single file using 32 connections if available.
I have written a multiconnection downloader that works fine without pausing, but won't stop downloading after resuming, the progress would go beyond 100%...
Like this:
Download mode: Multi-thread (press Space to pause/resume, press Escape to stop)
[████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████] 120% completed, paused: False
Download mode: Multi-thread (press Space to pause/resume, press Escape to stop)
1798.08 MiB downloaded, 1489.83 MiB total, -308.25 MiB remaining, download speed: 22.73 MiB/s
Minimum speed: 0.00 MiB/s, average speed: 4.54 MiB/s, maximum speed: 75.00 MiB/s
Task started on 2021-08-09 16:57:03, 00:06:35 elapsed, ETA: -1:59:47
After progress exceeds 100%, there will be error messages like this:
Exception in thread Thread-78:
Traceback (most recent call last):
File "C:\Program Files\Python39\lib\threading.py", line 973, in _bootstrap_inner
self.run()
File "C:\Program Files\Python39\lib\threading.py", line 910, in run
self._target(*self._args, **self._kwargs)
File "D:\MyScript\downloader.py", line 70, in multidown
mm[position: position+len(chunk)] = chunk
IndexError: mmap slice assignment is wrong size
(The above doesn't include all of the error message)
I have encountered all sorts of errors after resuming, but most importantly, the server will often send extra bytes from previous request, whose connection is dead and needless to say this breaks the whole code.
How should I implement pause and resume correctly?
I am thinking about multiprocessing, I assume the sessions and connections are all PID and port number related, and so far I haven't encountered a new run of the script that received extra bytes from previous runs of the script, so I guess using another process with a new PID and new port number plus requests.session() plus {'connection': 'close'} for each download should guarantee that no extra bytes from previous connections will be received, I just don't know how to share variables between processes...
The code:
downloader.py
import json
import keyboard
import os
import re
import requests
import sys
import time
import validators
from collections import deque
from datetime import datetime, timedelta
from math import inf
from mmap import mmap
from pathlib import Path
from ping3 import ping
from reprint import output
from threading import Thread
def timestring(sec):
sec = int(sec)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
return f'{h:02d}:{m:02d}:{s:02d}'
class Downloader:
def __init__(self):
self.recent = deque([0] * 12, maxlen=12)
self.recentspeeds = deque([0] * 200, maxlen=200)
self.paused = False
self.progress = dict()
class Multidown:
def __init__(self, obj, id):
self.count = 0
self.position = 0
self.completed = False
self.id = id
self.parent = obj
def multidown(self, url, start, end):
interrupted = False
s = requests.session()
s.headers.update({'connection': 'close', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'})
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
while end - length + (self.id != self.parent.progress['connections'] - 1) != start or r.status_code != 206:
r.close()
s.close()
del r
del s
time.sleep(0.02)
s = requests.session()
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
self.position = start
for chunk in r.iter_content(1048576):
if self.parent.paused:
self.parent.mm.flush()
r.connection.close()
r.close()
s.close()
del r
del s
interrupted = True
break
if chunk:
self.parent.mm[self.position: self.position+len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
if not interrupted:
r.close()
s.close()
if self.count == self.parent.progress[self.id]['length']:
self.completed = True
self.parent.progress[self.id]['completed'] = True
self.parent.mm.flush()
class Singledown:
def __init__(self):
self.count = 0
def singledown(self, url, path):
with requests.get(url, stream=True) as r:
with path.open('wb') as file:
for chunk in r.iter_content(1048576):
if chunk:
self.count += len(chunk)
file.write(chunk)
def download(self, url, filepath, num_connections=32, overwrite=False):
singlethread = False
threads = []
bcontinue = False
filepath = filepath.replace('\\', '/')
if (not re.match('^[a-zA-Z]:/(((?![<>:"/|?*]).)+((?<![ .])/)?)*$', filepath) or
not Path(filepath[:3]).exists()):
print('Invalid windows file path has been inputted, process will now stop.')
return
if not validators.url(url):
print('Invalid url been inputted, process will now stop.')
return
if url.lower().startswith('ftp://'):
print(
"`requests` module doesn't suport File Transfer Protocol, process will now stop")
return
path = Path(filepath)
if not path.exists():
bcontinue = True
else:
if path.is_file():
if overwrite:
bcontinue = True
else:
while True:
answer = input(
f'`{filepath}` already exists, do you want to overwrite it? \n(Yes, No):').lower()
if answer in ['y', 'yes', 'n', 'no']:
if answer.startswith('y'):
os.remove(filepath)
bcontinue = True
break
else:
print('Invalid input detected, retaking input.')
if not bcontinue:
print(
f'Overwritting {filepath} has been aborted, process will now stop.')
return
bcontinue = False
server = url.split('/')[2]
ok = ping(server, timeout=2)
if ok == False:
print(
'The server of the inputted url is non-existent, process will now stop.')
return
if ok:
bcontinue = True
if not ok:
print('Connection has timed out, will reattempt to ping server 5 times.')
for i in range(5):
print(
f'Reattempting to ping server, retrying {i + 1} out of 5')
ok = ping(server, timeout=2)
if ok:
print(
f'Connection successful on retry {i + 1}, process will now continue.')
bcontinue = True
break
else:
print(f'Retry {i + 1} out of 5 timed out' + (i != 4)
* ', reattempting in 1 second.' + (i == 4) * '.')
time.sleep(1)
if not bcontinue:
print('Failed to connect server, connection timed out, process will now stop')
return
bcontinue = False
head = requests.head(url)
if head.status_code == 200:
bcontinue = True
else:
for i in range(5):
print(f'Server responce is invalid, retrying {i + 1} out of 5')
head = requests.head(url)
if head.status_code == 200:
print(
f'Connection successful on retry {i + 1}, process will now continue.')
bcontinue = True
break
else:
print(f'Retry {i + 1} out of 5 failed to access data' +
(i != 4) * ', reattempting in 1 second.' + (i == 4) * '.')
time.sleep(1)
if not bcontinue:
print("Can't establish a connection with access to data, can't download target file, process will now stop.")
return
folder = '/'.join(filepath.split('/')[:-1])
Path(folder).mkdir(parents=True, exist_ok=True)
headers = head.headers
total = headers.get('content-length')
if not total:
print(
f'Cannot find the total length of the content of {url}, the file will be downloaded using a single thread.')
started = datetime.now()
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
sd = self.Singledown()
th = Thread(target=sd.singledown, args=(url, path))
threads.append(sd)
th.start()
total = inf
singlethread = True
else:
total = int(total)
if not headers.get('accept-ranges'):
print(
'Server does not support the `range` parameter, the file will be downloaded using a single thread.')
started = datetime.now()
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
sd = self.Singledown()
th = Thread(target=sd.singledown, args=(url, path))
threads.append(sd)
th.start()
singlethread = True
else:
segment = total / num_connections
started = datetime.now()
lastpressed = started
path.touch()
file = path.open('wb')
file.seek(total - 1)
file.write(b'\0')
file.close()
file = path.open(mode='r+b')
self.mm = mmap(file.fileno(), 0)
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
self.progress['total'] = total
self.progress['connections'] = num_connections
for i in range(num_connections):
md = self.Multidown(self, i)
start = int(segment * i)
end = int(segment * (i + 1)) - (i != num_connections - 1)
length = end - start + (i != num_connections - 1)
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
self.progress[i] = dict()
self.progress[i]['start'] = start
self.progress[i]['position'] = start
self.progress[i]['end'] = end
self.progress[i]['count'] = 0
self.progress[i]['length'] = length
self.progress[i]['completed'] = False
th.start()
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
downloaded = 0
totalMiB = total / 1048576
speeds = []
interval = 0.04
with output(initial_len=5, interval=0) as dynamic_print:
while True:
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
status = sum([i.completed for i in threads])
downloaded = sum(i.count for i in threads)
self.recent.append(downloaded)
done = int(100 * downloaded / total)
doneMiB = downloaded / 1048576
gt0 = len([i for i in self.recent if i])
if not gt0:
speed = 0
else:
recent = list(self.recent)[12 - gt0:]
if len(recent) == 1:
speed = recent[0] / 1048576 / interval
else:
diff = [b - a for a, b in zip(recent, recent[1:])]
speed = sum(diff) / len(diff) / 1048576 / interval
speeds.append(speed)
self.recentspeeds.append(speed)
nzspeeds = [i for i in speeds if i]
if nzspeeds:
minspeed = min(nzspeeds)
else:
minspeed = 0
maxspeed = max(speeds)
now = datetime.now()
elapsed = (now - started).total_seconds()
meanspeed = downloaded / elapsed / 1048576
remaining = totalMiB - doneMiB
dynamic_print[0] = '[{0}{1}] {2}'.format(
'\u2588' * done, '\u00b7' * (100-done), str(done)) + '% completed' + (not singlethread) * ', paused: {0}'.format(self.paused)
dynamic_print[1] = 'Download mode: ' + singlethread * \
'Single-thread' + (not singlethread) * 'Multi-thread (press Space to pause/resume, press Escape to stop)'
dynamic_print[2] = '{0:.2f} MiB downloaded, {1:.2f} MiB total, {2:.2f} MiB remaining, download speed: {3:.2f} MiB/s'.format(
doneMiB, totalMiB, remaining, speed)
if speed and total != inf:
eta = timestring(remaining / speed)
else:
eta = '99:59:59'
dynamic_print[3] = 'Minimum speed: {0:.2f} MiB/s, average speed: {1:.2f} MiB/s, maximum speed: {2:.2f} MiB/s'.format(
minspeed, meanspeed, maxspeed)
dynamic_print[4] = 'Task started on {0}, {1} elapsed, ETA: {2}'.format(
started.strftime('%Y-%m-%d %H:%M:%S'), timestring(elapsed), eta)
if keyboard.is_pressed('space'):
if not singlethread:
pressed = datetime.now()
if (pressed - lastpressed).total_seconds() > 0.5:
lastpressed = pressed
if self.paused:
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(
url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
self.paused = not self.paused
if keyboard.is_pressed('esc'):
if not singlethread:
ended = datetime.now()
self.paused = True
break
if status == len(threads):
if not singlethread:
self.mm.close()
ended = datetime.now()
break
time.sleep(interval)
time_spent = (ended - started).total_seconds()
meanspeed = total / time_spent / 1048576
status = sum([i.completed for i in threads])
if status == len(threads):
print('Task completed on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
else:
print('Task interrupted on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
if __name__ == '__main__':
d = Downloader()
d.download(*sys.argv[1:])
For testing purposes this is a dumbed-down version of the script, with all checks removed while retaining the same functionality (sorry it really takes all these lines to show the download information):
import json
import os
import requests
import sys
import time
from collections import deque
from datetime import datetime, timedelta
from math import inf
from mmap import mmap
from pathlib import Path
from reprint import output
from threading import Thread
def timestring(sec):
sec = int(sec)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
return f'{h:02d}:{m:02d}:{s:02d}'
class Downloader:
def __init__(self):
self.recent = deque([0] * 12, maxlen=12)
self.recentspeeds = deque([0] * 200, maxlen=200)
self.paused = False
self.progress = dict()
self.UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
class Multidown:
def __init__(self, obj, id):
self.count = 0
self.position = 0
self.completed = False
self.id = id
self.parent = obj
self.UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
def multidown(self, url, start, end):
interrupted = False
s = requests.session()
s.headers.update({'connection': 'close', 'user-agent': self.UA})
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
while end - length + (self.id != self.parent.progress['connections'] - 1) != start or r.status_code != 206:
r.close()
s.close()
del r
del s
time.sleep(0.02)
s = requests.session()
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
self.position = start
for chunk in r.iter_content(1048576):
if self.parent.paused:
self.parent.mm.flush()
r.connection.close()
r.close()
s.close()
del r
del s
interrupted = True
break
if chunk:
self.parent.mm[self.position: self.position+len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
if not interrupted:
r.close()
s.close()
if self.count == self.parent.progress[self.id]['length']:
self.completed = True
self.parent.progress[self.id]['completed'] = True
self.parent.mm.flush()
def download(self, url, filepath, num_connections=32, overwrite=False):
singlethread = False
threads = []
bcontinue = False
filepath = filepath.replace('\\', '/')
if Path(filepath).exists():
os.remove(filepath)
folder = '/'.join(filepath.split('/')[:-1])
Path(folder).mkdir(parents=True, exist_ok=True)
head = requests.head(url, headers={'user-agent': self.UA})
path = Path(filepath)
headers = head.headers
total = headers.get('content-length')
if total:
total = int(total)
if headers.get('accept-ranges'):
segment = total / num_connections
started = datetime.now()
lastpressed = started
path.touch()
file = path.open('wb')
file.seek(total - 1)
file.write(b'\0')
file.close()
file = path.open(mode='r+b')
self.mm = mmap(file.fileno(), 0)
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
self.progress['total'] = total
self.progress['connections'] = num_connections
for i in range(num_connections):
md = self.Multidown(self, i)
start = int(segment * i)
end = int(segment * (i + 1)) - (i != num_connections - 1)
length = end - start + (i != num_connections - 1)
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
self.progress[i] = dict()
self.progress[i]['start'] = start
self.progress[i]['position'] = start
self.progress[i]['end'] = end
self.progress[i]['count'] = 0
self.progress[i]['length'] = length
self.progress[i]['completed'] = False
th.start()
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
downloaded = 0
totalMiB = total / 1048576
speeds = []
interval = 0.04
with output(initial_len=5, interval=0) as dynamic_print:
while True:
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
status = sum([i.completed for i in threads])
downloaded = sum(i.count for i in threads)
self.recent.append(downloaded)
done = int(100 * downloaded / total)
doneMiB = downloaded / 1048576
gt0 = len([i for i in self.recent if i])
if not gt0:
speed = 0
else:
recent = list(self.recent)[12 - gt0:]
if len(recent) == 1:
speed = recent[0] / 1048576 / interval
else:
diff = [b - a for a, b in zip(recent, recent[1:])]
speed = sum(diff) / len(diff) / 1048576 / interval
speeds.append(speed)
self.recentspeeds.append(speed)
nzspeeds = [i for i in speeds if i]
if nzspeeds:
minspeed = min(nzspeeds)
else:
minspeed = 0
maxspeed = max(speeds)
now = datetime.now()
elapsed = (now - started).total_seconds()
meanspeed = downloaded / elapsed / 1048576
remaining = totalMiB - doneMiB
dynamic_print[0] = '[{0}{1}] {2}'.format(
'\u2588' * done, '\u00b7' * (100-done), str(done)) + '% completed' + (not singlethread) * ', paused: {0}'.format(self.paused)
dynamic_print[1] = 'Download mode: ' + singlethread * \
'Single-thread' + (not singlethread) * 'Multi-thread (press Space to pause/resume, press Escape to stop)'
dynamic_print[2] = '{0:.2f} MiB downloaded, {1:.2f} MiB total, {2:.2f} MiB remaining, download speed: {3:.2f} MiB/s'.format(
doneMiB, totalMiB, remaining, speed)
if speed and total != inf:
eta = timestring(remaining / speed)
else:
eta = '99:59:59'
dynamic_print[3] = 'Minimum speed: {0:.2f} MiB/s, average speed: {1:.2f} MiB/s, maximum speed: {2:.2f} MiB/s'.format(
minspeed, meanspeed, maxspeed)
dynamic_print[4] = 'Task started on {0}, {1} elapsed, ETA: {2}'.format(
started.strftime('%Y-%m-%d %H:%M:%S'), timestring(elapsed), eta)
if PAUSE:
if not singlethread:
pressed = datetime.now()
if (pressed - lastpressed).total_seconds() > 0.5:
lastpressed = pressed
if self.paused:
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(
url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
self.paused = not self.paused
if status == len(threads):
if not singlethread:
self.mm.close()
ended = datetime.now()
break
time.sleep(interval)
time_spent = (ended - started).total_seconds()
meanspeed = total / time_spent / 1048576
status = sum([i.completed for i in threads])
if status == len(threads):
print('Task completed on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
else:
print('Task interrupted on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
if __name__ == '__main__':
import hashlib
global PAUSE
PAUSE = False
chash = '5674E59283D95EFE8C88770515A9BBC80CBB77CB67602389FD91DEF26D26AED2'
d = Downloader()
if sys.argv[1] == '0':
d.download('http://ipv4.download.thinkbroadband.com/1GB.zip', 'C:/test/1GB.zip')
elif sys.argv[1] == '1':
th1 = Thread(target=d.download, args=('http://ipv4.download.thinkbroadband.com/1GB.zip', 'C:/test/1GB.zip'))
th1.start()
def test():
while th1.is_alive():
global PAUSE
PAUSE = not PAUSE
time.sleep(10)
th2 = Thread(target=test)
th2.start()
while th1.is_alive():
pass
sha256_hash = hashlib.sha256()
with open('C:/test/1GB.zip',"rb") as f:
for byte_block in iter(lambda: f.read(1048576),b""):
sha256_hash.update(byte_block)
print(sha256_hash.hexdigest().lower() == chash.lower())
The url isn't accessible without a VPN in my locale, and test 0 always results True, that is, if the connection hasn't gone dead during the download, and test 1 sometimes results True, sometimes results False, sometimes it doesn't finish(progress bar goes beyond 100%)...
How can my code be salvaged?
This might not be your only problem but you have a race condition that could show up if you pause and resume quickly (where the definition of quickly varies greatly depending on your circumstances). Consider that you've got 32 threads each requesting a MB chunk, let's call them threads 0-31. They are sitting their downloading and you pause. The threads do not know that you paused until they get a chunk of data as they are sitting in blocking io. Not sure what speed your connection is or how many cores your machine has (threads will sometimes act in parallel when they don't need the GIL,) but this process could take a lot longer than you expect. Then you unpause and your code creates new threads 32-63 but some or all of threads 0-31 are still waiting for the next chunk. You set threads 32-63 in motion and then you turn off your pause flag. Those threads that didn't end from 0-31 then wake up and see that things aren't paused. Now you have multiple threads accessing the same state variables
self.parent.mm[self.position: self.position + len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
so if thread 0 is downloading the same chunk as thread 31 they both keep updating all the same state and they add to position and count even though they are downloading overlapping parts of the file. You even reuse the objects that the threads live inside of so that state can get really really messed up.
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
There might be some other problems in your code and it is a lot to sort through so I suggest taking the time to do some refactoring to eliminate duplicate code and organise things into more functions. I don't believe in crazy tiny functions, but you could use a few sub functions like download_multi(download_state) and download_single maybe. I am relatively confident however that your current problem will be solved if you ensure the threads you have running actually end after you pause. To do so you need to actually hold references to your threads
somewhere:
actual_threads = []
When you create your threads (the first time and after you unpause, or preferably this would be in a function and you'd do it there and return the list):
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
actual_threads.append(th)
Then when you unpause:
self.paused = not self.paused
for th in actual_threads:
th.join()
This way you have the threads working, they quit when you pause and you rebuild them. So join should return as soon as they break out of the blocking io call to iter_content. This way those threads are always dead before you make the new ones.
What I would do myself however would be to create sockets from each thread to the main process. When pause is detected the threads shut down the request and save any data that's already waiting in the OS buffer then go into a blocking receive on the socket (there might be a way to use select with a socket and requests to allow you to even break out of the blocking io involved in r.iter_content immediately but I leave that for your research). When the program is unpaused the main process would send some value to indicate the program should restart (you'd want at least two signals the threads would recognise, one for quitting gracefully and one to resume. The codes can be single characters.) When the value is sent to each thread that thread will unblock and can then restart the download using requests and its previous state like nothing happened.
I tried to run a very simple multiprocessing code, but the code is still serially processed.
I have tried to run it on Mac(macOS 10.13) and Linux(Ubuntu 18.04) with python 2 and 3, but in both environments I had the same problem.
the function _process has to receive numpy array as arguments, so I decided to use Multiprocess.Process instead of Multiprocess.Pool.map() and Multiprocess.Pool.apply_async() because pickle is broken when use pool.map() in a class. https://stackoverflow.com/a/21345308/4755986
import time
from multiprocessing import Process, Queue
import numpy as np
class model:
def __init__(self):
self.results = []
self.jobs = []
self.start = time.time()
def _process(self, x,y,z):
j= 0
for i in range(10**8):
j = i+j
return j
def work(self,X,Y,Z, result_queue):
start = time.time() -self.start
result = self._process(X,Y,Z)
result_queue.put(result)
print(result)
end = time.time() -self.start
print( 'start time: ', start)
print('end time:', end)
# return result_queue
def fit(self,num):
for i in range(num):
X, Y, Z = np.ones([5,5]), np.ones([3,3]), np.ones([2,2])
result_queue = Queue()
p = Process(target=self.work, args = (X,Y,Z, result_queue))
self.jobs.append(p)
p.start()
print( 'ChildProcess...',i)
result = result_queue.get()
self.results.append(result)
for p in self.jobs:
p.join()
p.close()
return self.results
R = model()
k = R.fit(10)
print(k)
The time of start and end of each process is printed, and the second process only starts after the first process is finished. This is strange because each process should be automatically assign to different core and run in parallel.
result = result_queue.get()
result_queue.get() will block if it is empty. An item will only be added when a process finishes, hence the next process will be spawned only if the previous has finished.
Below is a version that does spawn 10 processes at once. I've marked the section I've added:
import time
from multiprocessing import Process, Queue
import numpy as np
class model:
def __init__(self):
self.results = []
self.jobs = []
self.start = time.time()
def _process(self, x,y,z):
j= 0
for i in range(10**8):
j = i+j
return j
def work(self,X,Y,Z, result_queue):
start = time.time() -self.start
result = self._process(X,Y,Z)
result_queue.put(result)
print(result)
end = time.time() -self.start
print( 'start time: ', start)
print('end time:', end)
# return result_queue
def fit(self,num):
for i in range(num):
X, Y, Z = np.ones([5,5]), np.ones([3,3]), np.ones([2,2])
result_queue = Queue()
p = Process(target=self.work, args = (X,Y,Z, result_queue))
self.jobs.append(p)
p.start()
print( 'ChildProcess...',i)
#result = result_queue.get() # <--- This blocks
#self.results.append(result)
for p in self.jobs:
p.join()
p.close()
for result in result_queue: # <-----
self.results.append(result) # <-----
return self.results
R = model()
k = R.fit(10)
print(k)
New to Queues and Threads but as I have gotten my program to work and run 2 threads of motors separate from each other, ive noticed that there is still a point in time that one will wait for the other to finish.
I have 2 motors on linear rails. In order to validate their consistency, repeatability, and accuracy it runs a home test. Just giving the motor a fixed number of tests and a position to go to, where it runs back and forth, logs the data and tells me how close we got both in steps and distance. The odd behavior is that one motor is waiting for the other to complete its cycle before starting its next cycle.
My guess is this is a problem with serial reads where my motor is held up waiting for a serial read on one motor and cannot execute the other. Can anyone confirm this or have a work around?
Code for the test, wrapped in a class
def home(self):
DIConf = None
debugStart = time.time()
while DIConf != -1:
self.anyCMD('DI-1')
while DIConf == None:
DIConf = self.checkReturn('DI')
if time.time() - debugStart >= 5:
print 'DIConf = ' + str(DIConf)
return 'Homing Timeout'
self.s.write('SH1H\r')
def homeTest(self, numTests = 30, testPos = 100000):
if numTests == None:
numTests = 30
if testPos == None:
testPos = 100000
self.tests = numTests
self.testingPos = testPos
self.B = np.zeros((tests, 4))
for i in range(0, self.tests):
# print i
self.setPos(0)
self.start = time.time()
self.gotoPos(self.testingPos)
self.s.write('WM\r')
time.sleep(2)
while self.getPos() < self.testingPos:
pass
self.testPosConfirm = self.getPos()
self.home()#(setHomePos = 1)
self.s.write('WM\r')
time.sleep(2)
self.end = time.time()
self.backHome = self.getPos()
self.B[i][0] = self.testPosConfirm
self.B[i][1] = self.backHome
self.B[i][2] = (float(self.backHome)/20000) * 3.175
self.B[i][3] = self.end-self.start
return self.B
and code for the queue/threads
import Queue
import threading
a = Queue.Queue()
b = Queue.Queue()
y = motor(DA = 1)
z = motor(DA = 2)
def yQueue(tempY, tempA, cycles = None, defPos = None):
tempVar = tempY.homeTest(cycles, defPos)
tempA.put(tempVar)
if 1 == 1:
thread = threading.Thread(target = yQueue, args = [y, a, 5, 50000])
thread2 = threading.Thread(target = yQueue, args = [z, b, 5, 600000])
thread.start()
thread2.start()
As suggested below. Attempted the same thing with Multiprocessing and got similar results.
Code:
import multiprocessing as mp
p = mp.Process(target = yQueue, args = [y, a, 3, 40000])
p.start()
p.join
p2 = Process(target = yQueue, args = [z, b, 3, 500000])
p2.start()
p2.join