Python multi connection downloader resuming after pausing makes download run endlessly - python

I have written a Python script that downloads a single file using 32 connections if available.
I have written a multiconnection downloader that works fine without pausing, but won't stop downloading after resuming, the progress would go beyond 100%...
Like this:
Download mode: Multi-thread (press Space to pause/resume, press Escape to stop)
[████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████] 120% completed, paused: False
Download mode: Multi-thread (press Space to pause/resume, press Escape to stop)
1798.08 MiB downloaded, 1489.83 MiB total, -308.25 MiB remaining, download speed: 22.73 MiB/s
Minimum speed: 0.00 MiB/s, average speed: 4.54 MiB/s, maximum speed: 75.00 MiB/s
Task started on 2021-08-09 16:57:03, 00:06:35 elapsed, ETA: -1:59:47
After progress exceeds 100%, there will be error messages like this:
Exception in thread Thread-78:
Traceback (most recent call last):
File "C:\Program Files\Python39\lib\threading.py", line 973, in _bootstrap_inner
self.run()
File "C:\Program Files\Python39\lib\threading.py", line 910, in run
self._target(*self._args, **self._kwargs)
File "D:\MyScript\downloader.py", line 70, in multidown
mm[position: position+len(chunk)] = chunk
IndexError: mmap slice assignment is wrong size
(The above doesn't include all of the error message)
I have encountered all sorts of errors after resuming, but most importantly, the server will often send extra bytes from previous request, whose connection is dead and needless to say this breaks the whole code.
How should I implement pause and resume correctly?
I am thinking about multiprocessing, I assume the sessions and connections are all PID and port number related, and so far I haven't encountered a new run of the script that received extra bytes from previous runs of the script, so I guess using another process with a new PID and new port number plus requests.session() plus {'connection': 'close'} for each download should guarantee that no extra bytes from previous connections will be received, I just don't know how to share variables between processes...
The code:
downloader.py
import json
import keyboard
import os
import re
import requests
import sys
import time
import validators
from collections import deque
from datetime import datetime, timedelta
from math import inf
from mmap import mmap
from pathlib import Path
from ping3 import ping
from reprint import output
from threading import Thread
def timestring(sec):
sec = int(sec)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
return f'{h:02d}:{m:02d}:{s:02d}'
class Downloader:
def __init__(self):
self.recent = deque([0] * 12, maxlen=12)
self.recentspeeds = deque([0] * 200, maxlen=200)
self.paused = False
self.progress = dict()
class Multidown:
def __init__(self, obj, id):
self.count = 0
self.position = 0
self.completed = False
self.id = id
self.parent = obj
def multidown(self, url, start, end):
interrupted = False
s = requests.session()
s.headers.update({'connection': 'close', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'})
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
while end - length + (self.id != self.parent.progress['connections'] - 1) != start or r.status_code != 206:
r.close()
s.close()
del r
del s
time.sleep(0.02)
s = requests.session()
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
self.position = start
for chunk in r.iter_content(1048576):
if self.parent.paused:
self.parent.mm.flush()
r.connection.close()
r.close()
s.close()
del r
del s
interrupted = True
break
if chunk:
self.parent.mm[self.position: self.position+len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
if not interrupted:
r.close()
s.close()
if self.count == self.parent.progress[self.id]['length']:
self.completed = True
self.parent.progress[self.id]['completed'] = True
self.parent.mm.flush()
class Singledown:
def __init__(self):
self.count = 0
def singledown(self, url, path):
with requests.get(url, stream=True) as r:
with path.open('wb') as file:
for chunk in r.iter_content(1048576):
if chunk:
self.count += len(chunk)
file.write(chunk)
def download(self, url, filepath, num_connections=32, overwrite=False):
singlethread = False
threads = []
bcontinue = False
filepath = filepath.replace('\\', '/')
if (not re.match('^[a-zA-Z]:/(((?![<>:"/|?*]).)+((?<![ .])/)?)*$', filepath) or
not Path(filepath[:3]).exists()):
print('Invalid windows file path has been inputted, process will now stop.')
return
if not validators.url(url):
print('Invalid url been inputted, process will now stop.')
return
if url.lower().startswith('ftp://'):
print(
"`requests` module doesn't suport File Transfer Protocol, process will now stop")
return
path = Path(filepath)
if not path.exists():
bcontinue = True
else:
if path.is_file():
if overwrite:
bcontinue = True
else:
while True:
answer = input(
f'`{filepath}` already exists, do you want to overwrite it? \n(Yes, No):').lower()
if answer in ['y', 'yes', 'n', 'no']:
if answer.startswith('y'):
os.remove(filepath)
bcontinue = True
break
else:
print('Invalid input detected, retaking input.')
if not bcontinue:
print(
f'Overwritting {filepath} has been aborted, process will now stop.')
return
bcontinue = False
server = url.split('/')[2]
ok = ping(server, timeout=2)
if ok == False:
print(
'The server of the inputted url is non-existent, process will now stop.')
return
if ok:
bcontinue = True
if not ok:
print('Connection has timed out, will reattempt to ping server 5 times.')
for i in range(5):
print(
f'Reattempting to ping server, retrying {i + 1} out of 5')
ok = ping(server, timeout=2)
if ok:
print(
f'Connection successful on retry {i + 1}, process will now continue.')
bcontinue = True
break
else:
print(f'Retry {i + 1} out of 5 timed out' + (i != 4)
* ', reattempting in 1 second.' + (i == 4) * '.')
time.sleep(1)
if not bcontinue:
print('Failed to connect server, connection timed out, process will now stop')
return
bcontinue = False
head = requests.head(url)
if head.status_code == 200:
bcontinue = True
else:
for i in range(5):
print(f'Server responce is invalid, retrying {i + 1} out of 5')
head = requests.head(url)
if head.status_code == 200:
print(
f'Connection successful on retry {i + 1}, process will now continue.')
bcontinue = True
break
else:
print(f'Retry {i + 1} out of 5 failed to access data' +
(i != 4) * ', reattempting in 1 second.' + (i == 4) * '.')
time.sleep(1)
if not bcontinue:
print("Can't establish a connection with access to data, can't download target file, process will now stop.")
return
folder = '/'.join(filepath.split('/')[:-1])
Path(folder).mkdir(parents=True, exist_ok=True)
headers = head.headers
total = headers.get('content-length')
if not total:
print(
f'Cannot find the total length of the content of {url}, the file will be downloaded using a single thread.')
started = datetime.now()
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
sd = self.Singledown()
th = Thread(target=sd.singledown, args=(url, path))
threads.append(sd)
th.start()
total = inf
singlethread = True
else:
total = int(total)
if not headers.get('accept-ranges'):
print(
'Server does not support the `range` parameter, the file will be downloaded using a single thread.')
started = datetime.now()
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
sd = self.Singledown()
th = Thread(target=sd.singledown, args=(url, path))
threads.append(sd)
th.start()
singlethread = True
else:
segment = total / num_connections
started = datetime.now()
lastpressed = started
path.touch()
file = path.open('wb')
file.seek(total - 1)
file.write(b'\0')
file.close()
file = path.open(mode='r+b')
self.mm = mmap(file.fileno(), 0)
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
self.progress['total'] = total
self.progress['connections'] = num_connections
for i in range(num_connections):
md = self.Multidown(self, i)
start = int(segment * i)
end = int(segment * (i + 1)) - (i != num_connections - 1)
length = end - start + (i != num_connections - 1)
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
self.progress[i] = dict()
self.progress[i]['start'] = start
self.progress[i]['position'] = start
self.progress[i]['end'] = end
self.progress[i]['count'] = 0
self.progress[i]['length'] = length
self.progress[i]['completed'] = False
th.start()
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
downloaded = 0
totalMiB = total / 1048576
speeds = []
interval = 0.04
with output(initial_len=5, interval=0) as dynamic_print:
while True:
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
status = sum([i.completed for i in threads])
downloaded = sum(i.count for i in threads)
self.recent.append(downloaded)
done = int(100 * downloaded / total)
doneMiB = downloaded / 1048576
gt0 = len([i for i in self.recent if i])
if not gt0:
speed = 0
else:
recent = list(self.recent)[12 - gt0:]
if len(recent) == 1:
speed = recent[0] / 1048576 / interval
else:
diff = [b - a for a, b in zip(recent, recent[1:])]
speed = sum(diff) / len(diff) / 1048576 / interval
speeds.append(speed)
self.recentspeeds.append(speed)
nzspeeds = [i for i in speeds if i]
if nzspeeds:
minspeed = min(nzspeeds)
else:
minspeed = 0
maxspeed = max(speeds)
now = datetime.now()
elapsed = (now - started).total_seconds()
meanspeed = downloaded / elapsed / 1048576
remaining = totalMiB - doneMiB
dynamic_print[0] = '[{0}{1}] {2}'.format(
'\u2588' * done, '\u00b7' * (100-done), str(done)) + '% completed' + (not singlethread) * ', paused: {0}'.format(self.paused)
dynamic_print[1] = 'Download mode: ' + singlethread * \
'Single-thread' + (not singlethread) * 'Multi-thread (press Space to pause/resume, press Escape to stop)'
dynamic_print[2] = '{0:.2f} MiB downloaded, {1:.2f} MiB total, {2:.2f} MiB remaining, download speed: {3:.2f} MiB/s'.format(
doneMiB, totalMiB, remaining, speed)
if speed and total != inf:
eta = timestring(remaining / speed)
else:
eta = '99:59:59'
dynamic_print[3] = 'Minimum speed: {0:.2f} MiB/s, average speed: {1:.2f} MiB/s, maximum speed: {2:.2f} MiB/s'.format(
minspeed, meanspeed, maxspeed)
dynamic_print[4] = 'Task started on {0}, {1} elapsed, ETA: {2}'.format(
started.strftime('%Y-%m-%d %H:%M:%S'), timestring(elapsed), eta)
if keyboard.is_pressed('space'):
if not singlethread:
pressed = datetime.now()
if (pressed - lastpressed).total_seconds() > 0.5:
lastpressed = pressed
if self.paused:
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(
url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
self.paused = not self.paused
if keyboard.is_pressed('esc'):
if not singlethread:
ended = datetime.now()
self.paused = True
break
if status == len(threads):
if not singlethread:
self.mm.close()
ended = datetime.now()
break
time.sleep(interval)
time_spent = (ended - started).total_seconds()
meanspeed = total / time_spent / 1048576
status = sum([i.completed for i in threads])
if status == len(threads):
print('Task completed on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
else:
print('Task interrupted on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
if __name__ == '__main__':
d = Downloader()
d.download(*sys.argv[1:])
For testing purposes this is a dumbed-down version of the script, with all checks removed while retaining the same functionality (sorry it really takes all these lines to show the download information):
import json
import os
import requests
import sys
import time
from collections import deque
from datetime import datetime, timedelta
from math import inf
from mmap import mmap
from pathlib import Path
from reprint import output
from threading import Thread
def timestring(sec):
sec = int(sec)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
return f'{h:02d}:{m:02d}:{s:02d}'
class Downloader:
def __init__(self):
self.recent = deque([0] * 12, maxlen=12)
self.recentspeeds = deque([0] * 200, maxlen=200)
self.paused = False
self.progress = dict()
self.UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
class Multidown:
def __init__(self, obj, id):
self.count = 0
self.position = 0
self.completed = False
self.id = id
self.parent = obj
self.UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
def multidown(self, url, start, end):
interrupted = False
s = requests.session()
s.headers.update({'connection': 'close', 'user-agent': self.UA})
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
while end - length + (self.id != self.parent.progress['connections'] - 1) != start or r.status_code != 206:
r.close()
s.close()
del r
del s
time.sleep(0.02)
s = requests.session()
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
self.position = start
for chunk in r.iter_content(1048576):
if self.parent.paused:
self.parent.mm.flush()
r.connection.close()
r.close()
s.close()
del r
del s
interrupted = True
break
if chunk:
self.parent.mm[self.position: self.position+len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
if not interrupted:
r.close()
s.close()
if self.count == self.parent.progress[self.id]['length']:
self.completed = True
self.parent.progress[self.id]['completed'] = True
self.parent.mm.flush()
def download(self, url, filepath, num_connections=32, overwrite=False):
singlethread = False
threads = []
bcontinue = False
filepath = filepath.replace('\\', '/')
if Path(filepath).exists():
os.remove(filepath)
folder = '/'.join(filepath.split('/')[:-1])
Path(folder).mkdir(parents=True, exist_ok=True)
head = requests.head(url, headers={'user-agent': self.UA})
path = Path(filepath)
headers = head.headers
total = headers.get('content-length')
if total:
total = int(total)
if headers.get('accept-ranges'):
segment = total / num_connections
started = datetime.now()
lastpressed = started
path.touch()
file = path.open('wb')
file.seek(total - 1)
file.write(b'\0')
file.close()
file = path.open(mode='r+b')
self.mm = mmap(file.fileno(), 0)
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
self.progress['total'] = total
self.progress['connections'] = num_connections
for i in range(num_connections):
md = self.Multidown(self, i)
start = int(segment * i)
end = int(segment * (i + 1)) - (i != num_connections - 1)
length = end - start + (i != num_connections - 1)
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
self.progress[i] = dict()
self.progress[i]['start'] = start
self.progress[i]['position'] = start
self.progress[i]['end'] = end
self.progress[i]['count'] = 0
self.progress[i]['length'] = length
self.progress[i]['completed'] = False
th.start()
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
downloaded = 0
totalMiB = total / 1048576
speeds = []
interval = 0.04
with output(initial_len=5, interval=0) as dynamic_print:
while True:
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
status = sum([i.completed for i in threads])
downloaded = sum(i.count for i in threads)
self.recent.append(downloaded)
done = int(100 * downloaded / total)
doneMiB = downloaded / 1048576
gt0 = len([i for i in self.recent if i])
if not gt0:
speed = 0
else:
recent = list(self.recent)[12 - gt0:]
if len(recent) == 1:
speed = recent[0] / 1048576 / interval
else:
diff = [b - a for a, b in zip(recent, recent[1:])]
speed = sum(diff) / len(diff) / 1048576 / interval
speeds.append(speed)
self.recentspeeds.append(speed)
nzspeeds = [i for i in speeds if i]
if nzspeeds:
minspeed = min(nzspeeds)
else:
minspeed = 0
maxspeed = max(speeds)
now = datetime.now()
elapsed = (now - started).total_seconds()
meanspeed = downloaded / elapsed / 1048576
remaining = totalMiB - doneMiB
dynamic_print[0] = '[{0}{1}] {2}'.format(
'\u2588' * done, '\u00b7' * (100-done), str(done)) + '% completed' + (not singlethread) * ', paused: {0}'.format(self.paused)
dynamic_print[1] = 'Download mode: ' + singlethread * \
'Single-thread' + (not singlethread) * 'Multi-thread (press Space to pause/resume, press Escape to stop)'
dynamic_print[2] = '{0:.2f} MiB downloaded, {1:.2f} MiB total, {2:.2f} MiB remaining, download speed: {3:.2f} MiB/s'.format(
doneMiB, totalMiB, remaining, speed)
if speed and total != inf:
eta = timestring(remaining / speed)
else:
eta = '99:59:59'
dynamic_print[3] = 'Minimum speed: {0:.2f} MiB/s, average speed: {1:.2f} MiB/s, maximum speed: {2:.2f} MiB/s'.format(
minspeed, meanspeed, maxspeed)
dynamic_print[4] = 'Task started on {0}, {1} elapsed, ETA: {2}'.format(
started.strftime('%Y-%m-%d %H:%M:%S'), timestring(elapsed), eta)
if PAUSE:
if not singlethread:
pressed = datetime.now()
if (pressed - lastpressed).total_seconds() > 0.5:
lastpressed = pressed
if self.paused:
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(
url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
self.paused = not self.paused
if status == len(threads):
if not singlethread:
self.mm.close()
ended = datetime.now()
break
time.sleep(interval)
time_spent = (ended - started).total_seconds()
meanspeed = total / time_spent / 1048576
status = sum([i.completed for i in threads])
if status == len(threads):
print('Task completed on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
else:
print('Task interrupted on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
if __name__ == '__main__':
import hashlib
global PAUSE
PAUSE = False
chash = '5674E59283D95EFE8C88770515A9BBC80CBB77CB67602389FD91DEF26D26AED2'
d = Downloader()
if sys.argv[1] == '0':
d.download('http://ipv4.download.thinkbroadband.com/1GB.zip', 'C:/test/1GB.zip')
elif sys.argv[1] == '1':
th1 = Thread(target=d.download, args=('http://ipv4.download.thinkbroadband.com/1GB.zip', 'C:/test/1GB.zip'))
th1.start()
def test():
while th1.is_alive():
global PAUSE
PAUSE = not PAUSE
time.sleep(10)
th2 = Thread(target=test)
th2.start()
while th1.is_alive():
pass
sha256_hash = hashlib.sha256()
with open('C:/test/1GB.zip',"rb") as f:
for byte_block in iter(lambda: f.read(1048576),b""):
sha256_hash.update(byte_block)
print(sha256_hash.hexdigest().lower() == chash.lower())
The url isn't accessible without a VPN in my locale, and test 0 always results True, that is, if the connection hasn't gone dead during the download, and test 1 sometimes results True, sometimes results False, sometimes it doesn't finish(progress bar goes beyond 100%)...
How can my code be salvaged?

This might not be your only problem but you have a race condition that could show up if you pause and resume quickly (where the definition of quickly varies greatly depending on your circumstances). Consider that you've got 32 threads each requesting a MB chunk, let's call them threads 0-31. They are sitting their downloading and you pause. The threads do not know that you paused until they get a chunk of data as they are sitting in blocking io. Not sure what speed your connection is or how many cores your machine has (threads will sometimes act in parallel when they don't need the GIL,) but this process could take a lot longer than you expect. Then you unpause and your code creates new threads 32-63 but some or all of threads 0-31 are still waiting for the next chunk. You set threads 32-63 in motion and then you turn off your pause flag. Those threads that didn't end from 0-31 then wake up and see that things aren't paused. Now you have multiple threads accessing the same state variables
self.parent.mm[self.position: self.position + len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
so if thread 0 is downloading the same chunk as thread 31 they both keep updating all the same state and they add to position and count even though they are downloading overlapping parts of the file. You even reuse the objects that the threads live inside of so that state can get really really messed up.
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
There might be some other problems in your code and it is a lot to sort through so I suggest taking the time to do some refactoring to eliminate duplicate code and organise things into more functions. I don't believe in crazy tiny functions, but you could use a few sub functions like download_multi(download_state) and download_single maybe. I am relatively confident however that your current problem will be solved if you ensure the threads you have running actually end after you pause. To do so you need to actually hold references to your threads
somewhere:
actual_threads = []
When you create your threads (the first time and after you unpause, or preferably this would be in a function and you'd do it there and return the list):
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
actual_threads.append(th)
Then when you unpause:
self.paused = not self.paused
for th in actual_threads:
th.join()
This way you have the threads working, they quit when you pause and you rebuild them. So join should return as soon as they break out of the blocking io call to iter_content. This way those threads are always dead before you make the new ones.
What I would do myself however would be to create sockets from each thread to the main process. When pause is detected the threads shut down the request and save any data that's already waiting in the OS buffer then go into a blocking receive on the socket (there might be a way to use select with a socket and requests to allow you to even break out of the blocking io involved in r.iter_content immediately but I leave that for your research). When the program is unpaused the main process would send some value to indicate the program should restart (you'd want at least two signals the threads would recognise, one for quitting gracefully and one to resume. The codes can be single characters.) When the value is sent to each thread that thread will unblock and can then restart the download using requests and its previous state like nothing happened.

Related

Python multiprocessing shared memory; one write, multiple read

SYSTEM
Linux (Manjaro KDE)
Python 3.8.3
PROGRAM:
I have incoming string data on a UDP port. The main loop spools up the processes prior to using selectors to monitor the UDP port. I want the UDP data, which is constantly updated, available for each process.
TRIED:
Multiprocessing Queues with maxsize = 1 and that became a headache and quickly broke down.
Multiprocessing Arrays (this is where I'm at now)
I have checked, and the Array at each location I'm looking at has the same memory address (I think). For whatever reason, when I try to access the contents of the Array in the child process, the process hangs.
NOT TRIED
Pipes. I have a feeling this may be the way to go. But I'm already deep in uncharted territory; I've never used them before.
WHAT I WANT
I would like to access the UDP data from the child processes - these are the camera_view method.
Dummy UDP string
import socket
import random
import datetime
import time
conn = ('127.0.0.1', 6666)
def rand_value(f_val, t_val):
result = round(random.uniform(f_val, t_val), 2)
result = random.uniform(f_val, t_val)
return result
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
while True:
time.sleep(6)
timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
overlay = timestamp
for i in range(9):
val = rand_value(i*10, i*10+10)
if i == 8: val = 'TASK: Im the real Batman'
overlay = overlay + "," + str(val)
print(overlay)
sock.sendto(overlay.encode(), conn)
My Program
import datetime
import selectors
import socket
import time
from multiprocessing import Lock, Process, Queue
from multiprocessing.sharedctypes import Array
from ctypes import c_char_p
REQUIRED_CAMERAS = 1
CAMERA_CONN = {'name':['Colour Camera'], 'ip':['127.0.0.1'], 'port':[9000]}
OVERLAY_CONN = ('0.0.0.0', 6666)
CONTROL_CONN = ('0.0.0.0', 6667)
NUMBER_OF_ITEMS_IN_OVERLAY = 10
class Camera():
def __init__(self, cam_name, cam_ip, cam_port):
self.ip = cam_ip
self.port = cam_port
self.capture = cv2.VideoCapture(0)
self.frame_width = int(self.capture.get(3))
self.frame_height = int(self.capture.get(4))
self.name = cam_name
def get_overlay(data_packet):
data = data_packet.decode()
data = data.split(',')
field0 = data[0]
field1 = 'KP: ' + str(round(float(data[1]), 3))
field2 = 'DCC: ' + str(round(float(data[2]), 2)) + 'm'
field3 = 'E: ' + str(round(float(data[3]), 2)) + 'm'
field4 = 'N: ' + str(round(float(data[4]), 2)) + 'm'
field5 = 'D: ' + str(round(float(data[5]), 2)) + 'm'
field6 = 'H: ' + str(round(float(data[6]), 2)) # + '°'
field7 = 'R: ' + str(round(float(data[7]), 2)) # + '°'
field8 = 'P: ' + str(round(float(data[8]), 2)) # + '°'
field9 = data[9]
x = []
for i in range(NUMBER_OF_ITEMS_IN_OVERLAY):
x.append(eval('field' + str(i)).encode())
# if i == 0:
# print(x[i])
return x
def socket_reader(sock, mask, q, REQUIRED_CAMERAS, overlay):
data_packet, sensor_ip = sock.recvfrom(1024)
sensor_port = sock.getsockname()[1]
print(f'SENSOR PORT {sensor_port} and SENSOR_IP {sensor_ip}')
if sensor_port == OVERLAY_CONN[1]:
x = get_overlay(data_packet)
for i in range(len(x)):
overlay[i] = x[i]
print(f'Socket Reader {overlay}')
def camera_view(CAMERA_CONN, cam_name, camera, overlay_q, control_q, overlay):
while True:
print(f'PROCESS {camera} RUNNING FOR: {cam_name}')
try:
print(f'Camera View {overlay}')
for i in range(len(overlay)):
print(overlay[i])
except:
pass
time.sleep(1)
def controller(REQUIRED_CAMERAS, CAMERA_CONN, OVERLAY_CONN, CONTROL_CONN):
if REQUIRED_CAMERAS > len(CAMERA_CONN['name']):
print(f'REQURIED_CAMERAS: {REQUIRED_CAMERAS} - more than connections in CAMERA_CONN ')
else:
# Set up a UDP connection for the overlay string and the control commands
sock_overlay = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock_control = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock_overlay.bind(OVERLAY_CONN)
sock_control.bind(CONTROL_CONN)
# Set up the selector to watch over the socket
# and trigger when data is ready for reading
sel = selectors.DefaultSelector()
sel.register(fileobj=sock_overlay, events=selectors.EVENT_READ, data=socket_reader)
sel.register(fileobj=sock_control, events=selectors.EVENT_READ, data=socket_reader)
# create shared memory
overlay_q = Queue(maxsize=1)
control_q = Queue(maxsize=1)
overlay = Array(c_char_p, range(NUMBER_OF_ITEMS_IN_OVERLAY))
print(f'Init Overlay {overlay}')
# Generate the processes; one per camera
processes = []
for camera in range(REQUIRED_CAMERAS):
processes.append(Process(target=camera_view, args=(CAMERA_CONN, CAMERA_CONN['name'][camera], camera, overlay_q, control_q, overlay)))
for process in processes:
process.daemon = True
process.start()
# Spin over the selector
while True:
# Only have one connnection registered, so to stop
# the loop spinning up the CPU, I have made it blocking
# with the timeout = 1 (sec) instead of =0.
events = sel.select(timeout=None)
for key, mask in events:
# the selector callback is the data= from the register above
callback = key.data
# the callback gets the sock, mask and the sensor queues
if key.fileobj == sock_overlay:
callback(key.fileobj, mask, overlay_q, REQUIRED_CAMERAS, overlay)
else:
callback(key.fileobj, mask, control_q, REQUIRED_CAMERAS, overlay)
if __name__ == "__main__":
controller(REQUIRED_CAMERAS, CAMERA_CONN, OVERLAY_CONN, CONTROL_CONN)
EDIT1:
from multiprocessing import Process, Array
from ctypes import c_char_p
import time
def worker(arr):
count = 0
while True:
count += 1
val = 'val' + str(count)
arr[0] = val
print(arr[:])
time.sleep(2)
def main():
arr = Array(c_char_p, 1)
p = Process(target=worker, args=(arr,))
p.daemon = True
p.start()
while True:
print(arr[:])
try:
print(arr[:].decode('utf-8'))
except :
pass
# try:
# val = arr[:]
# val = val.decode('utf-8')
# print(f'main {val}')
# except:
# pass
time.sleep(1)
if __name__ == "__main__":
main()
'''
from multiprocessing import Process, Array
from ctypes import c_char_p
import time
def worker(arr):
count = 0
while True:
count += 1
val = 'val' + str(count)
arr[0] = bytes(val, 'utf-8')
print(arr[:])
time.sleep(2)
def main():
arr = Array(c_char_p, 1)
p = Process(target=worker, args=(arr,))
p.daemon = True
p.start()
while True:
print(arr[:])
try:
print(arr[:].decode('utf-8'))
except :
pass
time.sleep(1)
if __name__ == "__main__":
main()
if __name__ == "__main__":
main()
'''
EDIT2:
Thanks to #RolandSmith, I have persevered with Queues and I think I have got a template on how I can move forward. See below code. If I can't get this to work in program, I'll be back here.
from multiprocessing import Process, Queue
import time
import datetime
def worker(camera, q):
val = ''
while True:
if q.full() == True:
val = q.get()
else:
val = val
print(f'WORKER{camera} {val}')
time.sleep(0.2)
def main():
cameras = 2
processes = []
queues = []
for camera in range(cameras):
queues.append(Queue(maxsize=1))
processes.append(Process(target=worker, args=(camera, queues[camera])))
for process in processes:
process.daemon = True
process.start()
while True:
for q in queues:
if not q.empty():
try:
_ = q.get()
except:
pass
else:
q.put(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
time.sleep(.5)
if __name__ == "__main__":
main()
In my view, using Queue is a less error-prone solution than using an Array.
Here is your second example, converted to using a Queue:
from multiprocessing import Process, Queue
import time
def worker(q):
count = 0
while True:
count += 1
val = 'val' + str(count)
q.put(val)
print('worker:', val)
time.sleep(2)
def main():
q = Queue()
p = Process(target=worker, args=(q, ))
p.daemon = True
p.start()
while True:
if not q.empty():
print('main:', q.get())
time.sleep(1)
if __name__ == "__main__":
main()
This yields:
> python3 test3.py
worker: val1
main: val1
worker: val2
main: val2
worker: val3
main: val3
worker: val4
main: val4
worker: val5
Here is the same example using a Pipe:
from multiprocessing import Process, Pipe
import time
def worker(p):
count = 0
while True:
count += 1
val = 'val' + str(count)
p.send(val)
print('worker:', val)
time.sleep(2)
def main():
child, parent = Pipe()
p = Process(target=worker, args=(child, ))
p.daemon = True
p.start()
while True:
if parent.poll():
print('main:', parent.recv())
time.sleep(1)
if __name__ == "__main__":
main()
This produces the same result as the previous example.
Additionally, by default a pipe is bidirectional.
So you could also send back data from the workers to the parent.

A problem with compatibility of the script on Windows

This script works perfectly on Mac OS and Linux but when I try it on Windows it does not work.
When I try to try it on Windows, this error appears...
I imagine the problem from switch_user(dev).
I tried it with her (dev) but I couldn't solve it.
Whereas when Check GPT is verified, it stops and the error occurs.
PS C:\Users\motc-pc\Desktop\amonet-karnak-\amonet\modules> python main.py
[2020-06-03 22:03:49.653199] Waiting for bootrom
[2020-06-03 22:03:59.339064] Found port = COM22
[2020-06-03 22:03:59.348800] Handshake
* * * If you have a short attached, remove it now * * *
* * * Press Enter to continue * * *
[2020-06-03 22:04:02.368897] Init crypto engine
[2020-06-03 22:04:02.422396] Disable caches
[2020-06-03 22:04:02.424535] Disable bootrom range checks
[2020-06-03 22:04:02.459386] Load payload from ../brom-payload/build/payload.bin = 0x4888 bytes
[2020-06-03 22:04:02.469524] Send payload
[2020-06-03 22:04:03.440416] Let's rock
[2020-06-03 22:04:03.442368] Wait for the payload to come online...
[2020-06-03 22:04:04.163004] all good
[2020-06-03 22:04:04.165239] Check GPT
Traceback (most recent call last):
File "main.py", line 450, in <module>
main()
File "main.py", line 361, in main
switch_user(dev)
File "main.py", line 321, in switch_user
block = dev.emmc_read(0)
File "main.py", line 196, in emmc_read
raise RuntimeError("read fail")
RuntimeError: read fail
This script is full.
As I said earlier when I trying it on Linux or MacOS it works normally without problems
import struct
import os
import sys
import time
from handshake import handshake
from load_payload import load_payload, UserInputThread
from logger import log
import struct
import glob
import serial
from logger import log
BAUD = 115200
TIMEOUT = 5
CRYPTO_BASE = 0x10210000 # for karnak
def serial_ports ():
""" Lists available serial ports
:raises EnvironmentError:
On unsupported or unknown platforms
:returns:
A set containing the serial ports available on the system
"""
if sys.platform.startswith("win"):
ports = [ "COM{0:d}".format(i + 1) for i in range(256) ]
elif sys.platform.startswith("linux"):
ports = glob.glob("/dev/ttyACM*")
elif sys.platform.startswith("darwin"):
ports = glob.glob("/dev/cu.usbmodem*")
else:
raise EnvironmentError("Unsupported platform")
result = set()
for port in ports:
try:
s = serial.Serial(port, timeout=TIMEOUT)
s.close()
result.add(port)
except (OSError, serial.SerialException):
pass
return result
def p32_be(x):
return struct.pack(">I", x)
class Device:
def __init__(self, port=None):
self.dev = None
if port:
self.dev = serial.Serial(port, BAUD, timeout=TIMEOUT)
def find_device(self,preloader=False):
if self.dev:
raise RuntimeError("Device already found")
if preloader:
log("Waiting for preloader")
else:
log("Waiting for bootrom")
old = serial_ports()
while True:
new = serial_ports()
# port added
if new > old:
port = (new - old).pop()
break
# port removed
elif old > new:
old = new
time.sleep(0.25)
log("Found port = {}".format(port))
self.dev = serial.Serial(port, BAUD, timeout=TIMEOUT)
def check(self, test, gold):
if test != gold:
raise RuntimeError("ERROR: Serial protocol mismatch")
def check_int(self, test, gold):
test = struct.unpack('>I', test)[0]
self.check(test, gold)
def _writeb(self, out_str):
self.dev.write(out_str)
return self.dev.read()
def handshake(self):
# look for start byte
while True:
c = self._writeb(b'\xa0')
if c == b'\x5f':
break
self.dev.flushInput()
# complete sequence
self.check(self._writeb(b'\x0a'), b'\xf5')
self.check(self._writeb(b'\x50'), b'\xaf')
self.check(self._writeb(b'\x05'), b'\xfa')
def handshake2(self, cmd='FACTFACT'):
# look for start byte
c = 0
while c != b'Y':
c = self.dev.read()
log("Preloader ready, sending " + cmd)
command = str.encode(cmd)
self.dev.write(command)
self.dev.flushInput()
def read32(self, addr, size=1):
result = []
self.dev.write(b'\xd1')
self.check(self.dev.read(1), b'\xd1') # echo cmd
self.dev.write(struct.pack('>I', addr))
self.check_int(self.dev.read(4), addr) # echo addr
self.dev.write(struct.pack('>I', size))
self.check_int(self.dev.read(4), size) # echo size
self.check(self.dev.read(2), b'\x00\x00') # arg check
for _ in range(size):
data = struct.unpack('>I', self.dev.read(4))[0]
result.append(data)
self.check(self.dev.read(2), b'\x00\x00') # status
# support scalar
if len(result) == 1:
return result[0]
else:
return result
def write32(self, addr, words, status_check=True):
# support scalar
if not isinstance(words, list):
words = [ words ]
self.dev.write(b'\xd4')
self.check(self.dev.read(1), b'\xd4') # echo cmd
self.dev.write(struct.pack('>I', addr))
self.check_int(self.dev.read(4), addr) # echo addr
self.dev.write(struct.pack('>I', len(words)))
self.check_int(self.dev.read(4), len(words)) # echo size
self.check(self.dev.read(2), b'\x00\x01') # arg check
for word in words:
self.dev.write(struct.pack('>I', word))
self.check_int(self.dev.read(4), word) # echo word
if status_check:
self.check(self.dev.read(2), b'\x00\x01') # status
def run_ext_cmd(self, cmd):
self.dev.write(b'\xC8')
self.check(self.dev.read(1), b'\xC8') # echo cmd
cmd = bytes([cmd])
self.dev.write(cmd)
self.check(self.dev.read(1), cmd)
self.dev.read(1)
self.dev.read(2)
def wait_payload(self):
data = self.dev.read(4)
if data != b"\xB1\xB2\xB3\xB4":
raise RuntimeError("received {} instead of expected pattern".format(data))
def emmc_read(self, idx):
# magic
self.dev.write(p32_be(0xf00dd00d))
# cmd
self.dev.write(p32_be(0x1000))
# block to read
self.dev.write(p32_be(idx))
data = self.dev.read(0x200)
if len(data) != 0x200:
raise RuntimeError("read fail")
return data
def emmc_write(self, idx, data):
if len(data) != 0x200:
raise RuntimeError("data must be 0x200 bytes")
# magic
self.dev.write(p32_be(0xf00dd00d))
# cmd
self.dev.write(p32_be(0x1001))
# block to write
self.dev.write(p32_be(idx))
# data
self.dev.write(data)
code = self.dev.read(4)
if code != b"\xd0\xd0\xd0\xd0":
raise RuntimeError("device failure")
def emmc_switch(self, part):
# magic
self.dev.write(p32_be(0xf00dd00d))
# cmd
self.dev.write(p32_be(0x1002))
# partition
self.dev.write(p32_be(part))
def reboot(self):
# magic
self.dev.write(p32_be(0xf00dd00d))
# cmd
self.dev.write(p32_be(0x3000))
def kick_watchdog(self):
# magic
self.dev.write(p32_be(0xf00dd00d))
# cmd
self.dev.write(p32_be(0x3001))
def rpmb_read(self):
# magic
self.dev.write(p32_be(0xf00dd00d))
# cmd
self.dev.write(p32_be(0x2000))
data = self.dev.read(0x100)
if len(data) != 0x100:
raise RuntimeError("read fail")
return data
def rpmb_write(self, data):
if len(data) != 0x100:
raise RuntimeError("data must be 0x100 bytes")
# magic
self.dev.write(p32_be(0xf00dd00d))
# cmd
self.dev.write(p32_be(0x2001))
# data
self.dev.write(data)
def switch_boot0(dev):
dev.emmc_switch(1)
block = dev.emmc_read(0)
if block[0:9] != b"EMMC_BOOT" and block != b"\x00" * 0x200:
dev.reboot()
raise RuntimeError("what's wrong with your BOOT0?")
dev.kick_watchdog()
def flash_data(dev, data, start_block, max_size=0):
while len(data) % 0x200 != 0:
data += b"\x00"
if max_size and len(data) > max_size:
raise RuntimeError("data too big to flash")
blocks = len(data) // 0x200
for x in range(blocks):
print("[{} / {}]".format(x + 1, blocks), end='\r')
dev.emmc_write(start_block + x, data[x * 0x200:(x + 1) * 0x200])
if x % 10 == 0:
dev.kick_watchdog()
print("")
def flash_binary(dev, path, start_block, max_size=0):
with open(path, "rb") as fin:
data = fin.read()
while len(data) % 0x200 != 0:
data += b"\x00"
flash_data(dev, data, start_block, max_size=0)
def dump_binary(dev, path, start_block, max_size=0):
with open(path, "w+b") as fout:
blocks = max_size // 0x200
for x in range(blocks):
print("[{} / {}]".format(x + 1, blocks), end='\r')
fout.write(dev.emmc_read(start_block + x))
if x % 10 == 0:
dev.kick_watchdog()
print("")
def force_fastboot(dev, gpt):
switch_user(dev)
block = list(dev.emmc_read(gpt["MISC"][0]))
block[0:16] = "FASTBOOT_PLEASE\x00".encode("utf-8")
dev.emmc_write(gpt["MISC"][0], bytes(block))
block = dev.emmc_read(gpt["MISC"][0])
def force_recovery(dev, gpt):
switch_user(dev)
block = list(dev.emmc_read(gpt["MISC"][0]))
block[0:16] = "boot-recovery\x00\x00\x00".encode("utf-8")
dev.emmc_write(gpt["MISC"][0], bytes(block))
block = dev.emmc_read(gpt["MISC"][0])
def switch_user(dev):
dev.emmc_switch(0)
block = dev.emmc_read(0)
dev.kick_watchdog()
def parse_gpt(dev):
data = dev.emmc_read(0x400 // 0x200) + dev.emmc_read(0x600 // 0x200) + dev.emmc_read(0x800 // 0x200) + dev.emmc_read(0xA00 // 0x200)
num = len(data) // 0x80
parts = dict()
for x in range(num):
part = data[x * 0x80:(x + 1) * 0x80]
part_name = part[0x38:].decode("utf-16le").rstrip("\x00")
part_start = struct.unpack("<Q", part[0x20:0x28])[0]
part_end = struct.unpack("<Q", part[0x28:0x30])[0]
parts[part_name] = (part_start, part_end - part_start + 1)
return parts
def main():
minimal = False
dev = Device()
dev.find_device()
# 0.1) Handshake
handshake(dev)
# 0.2) Load brom payload
load_payload(dev, "../brom-payload/build/payload.bin")
dev.kick_watchdog()
if len(sys.argv) == 2 and sys.argv[1] == "minimal":
thread = UserInputThread(msg = "Running in minimal mode, assuming LK, TZ, LK-payload and TWRP to have already been flashed.\nIf this is correct (i.e. you used \"brick\" option in step 1) press enter, otherwise terminate with Ctrl+C")
thread.start()
while not thread.done:
dev.kick_watchdog()
time.sleep(1)
minimal = True
# 1) Sanity check GPT
log("Check GPT")
switch_user(dev)
# 1.1) Parse gpt
gpt = parse_gpt(dev)
log("gpt_parsed = {}".format(gpt))
if "lk" not in gpt or "tee1" not in gpt or "boot" not in gpt or "recovery" not in gpt:
raise RuntimeError("bad gpt")
# 2) Sanity check boot0
log("Check boot0")
switch_boot0(dev)
# 3) Sanity check rpmb
log("Check rpmb")
rpmb = dev.rpmb_read()
if rpmb[0:4] != b"AMZN":
thread = UserInputThread(msg = "rpmb looks broken; if this is expected (i.e. you're retrying the exploit) press enter, otherwise terminate with Ctrl+C")
thread.start()
while not thread.done:
dev.kick_watchdog()
time.sleep(1)
# Clear preloader so, we get into bootrom without shorting, should the script stall (we flash preloader as last step)
# 4) Downgrade preloader
log("Clear preloader header")
switch_boot0(dev)
flash_data(dev, b"EMMC_BOOT" + b"\x00" * ((0x200 * 8) - 9), 0)
# 5) Zero out rpmb to enable downgrade
log("Downgrade rpmb")
dev.rpmb_write(b"\x00" * 0x100)
log("Recheck rpmb")
rpmb = dev.rpmb_read()
if rpmb != b"\x00" * 0x100:
dev.reboot()
raise RuntimeError("downgrade failure, giving up")
log("rpmb downgrade ok")
dev.kick_watchdog()
if not minimal:
# 6) Install preloader
log("Flash preloader")
switch_boot0(dev)
flash_binary(dev, "../bin/preloader.bin", 8)
flash_binary(dev, "../bin/preloader.bin", 520)
# 6) Install lk-payload
log("Flash lk-payload")
switch_boot0(dev)
flash_binary(dev, "../lk-payload/build/payload.bin", 1024)
# 7) Downgrade tz
log("Flash tz")
switch_user(dev)
flash_binary(dev, "../bin/tz.img", gpt["tee1"][0], gpt["tee1"][1] * 0x200)
# 8) Downgrade lk
log("Flash lk")
switch_user(dev)
flash_binary(dev, "../bin/lk.bin", gpt["lk"][0], gpt["lk"][1] * 0x200)
# 9) Flash microloader
log("Inject microloader")
switch_user(dev)
boot_hdr1 = dev.emmc_read(gpt["boot"][0]) + dev.emmc_read(gpt["boot"][0] + 1)
boot_hdr2 = dev.emmc_read(gpt["boot"][0] + 2) + dev.emmc_read(gpt["boot"][0] + 3)
flash_binary(dev, "../bin/microloader.bin", gpt["boot"][0], 2 * 0x200)
if boot_hdr2[0:8] != b"ANDROID!":
flash_data(dev, boot_hdr1, gpt["boot"][0] + 2, 2 * 0x200)
if not minimal:
log("Force fastboot")
force_fastboot(dev, gpt)
else:
log("Force recovery")
force_recovery(dev, gpt)
# 10) Downgrade preloader
log("Flash preloader header")
switch_boot0(dev)
flash_binary(dev, "../bin/preloader.hdr0", 0, 4)
flash_binary(dev, "../bin/preloader.hdr1", 4, 4)
# Reboot (to fastboot or recovery)
log("Reboot")
dev.reboot()
if __name__ == "__main__":
main()

Calculating the amount of time left until completion

I am wondering how to calculate the amount of time it would take to example:
Complete a brute force word list.
I know how to use the time function and measure in time,
but the problem is i need to find out how long it would take in the program itself...
Here is the code i made this yesterday
import itertools, math
import os
Alphabet = ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890") # Add or remove whatevs you think will be in the password you're cracking (example, [symbols])
counter = 1
CharLength = 1
range_num = int(raw_input("Enter range: "))
stopper = range_num + 1
filename = "bruteforce_%r.txt" % (range_num)
f = open(filename, 'a')
#n_1 = len(Alphabet)
#n_2 = n_1 - 1 # <-- total useless peice of garbage that could of been great in vurtual life
#n_3 = '0' * n_2
#n = '1' + n_3
x = range_num
y = len(Alphabet)
amount = math.pow(y, x)
total_items = math.pow(y, x)
for CharLength in range(range_num, stopper):
passwords = (itertools.product(Alphabet, repeat = CharLength))
for i in passwords:
counter += 1
percentage = (counter / total_items) * 100
amount -= 1
i = str(i)
i = i.replace("[", "")
i = i.replace("]", "")
i = i.replace("'", "")
i = i.replace(" ", "")
i = i.replace(",", "")
i = i.replace("(", "")
i = i.replace(")", "")
f.write(i)
f.write('\n')
print "Password: %r\tPercentage: %r/100\tAmount left: %r" % (i, int(percentage), amount)
if i == '0'* range_num:
print "*Done"
f.close()
exit(0)
else:
pass
This is my timer function i managed to make
#import winsound # Comment this out if your using linux
import os
import time
from sys import exit
print "This is the timer\nHit CTRL-C to stop the timer\nOtherwise just let it rip untill the time's up"
hours = int(raw_input('Enter the hours.\n>>> '))
os.system('clear') # Linux
#os.system('cls') # Windows
minutes = int(raw_input('Enter the minutes.\n>>> '))
os.system('clear') # linux
#os.system('cls') # Windows
seconds = int(raw_input('Enter the seconds.\n>>> '))
os.system('clear') # Linux
#os.system('cls') # Windows
stop_time = '%r:%r:%r' % (hours, minutes, seconds)
t_hours = 00
t_minutes = 00
t_seconds = 00
while t_seconds <= 60:
try:
os.system('clear') # Linux
#os.system('cls') # Windows
current_time = '%r:%r:%r' % (t_hours, t_minutes, t_seconds)
print current_time
time.sleep(1)
t_seconds+=1
if current_time == stop_time:
print "// Done"
#winsound.Beep(500,1000)
#winsound.Beep(400,1000)
break
elif t_seconds == 60:
t_minutes+=1
t_seconds=0
elif t_minutes == 60:
t_hours+=1
t_minutes = 00
except KeyboardInterrupt:
print "Stopped at: %r:%r:%r" % (t_hours, t_minutes, t_seconds)
raw_input("Hit enter to continue\nHit CTRL-C to end")
try:
pass
except KeyboardInterrupt:
exit(0)
Now i just cant figure out how to make this again but to calculate how long it will take rather than how long it is taking...
You cannot predict the time a script is going to take.
Firstly because two machines wouldn't run the script in the same time, and secondly, because the execution time on one machine can vary from on take to another.
What you can do, however, is compute the percentage of execution.
You need to figure out, for example, how many iterations your main loop will do, and calculate at each iteration the ratio current iteration count / total number of iterations.
Here is a minimalist example of what you can do:
n = 10000
for i in range(n):
print("Processing file {} ({}%)".format(i, 100*i//n))
process_file(i)
You can take it further and add the time as an additional info:
n = 10000
t0 = time.time()
for i in range(n):
t1 = time.time()
print("Processing file {} ({}%)".format(i, 100*i//n), end="")
process_file(i)
t2 = time.time()
print(" {}s (total: {}s)".format(t2-t1, t2-t0))
The output will look like this:
...
Processing file 2597 (25%) 0.2s (total: 519.4s)
Processing file 2598 (25%) 0.3s (total: 519.7s)
Processing file 2599 (25%) 0.1s (total: 519.8s)
Processing file 2600 (25%)
This is my implementation, which returns time elapsed, time left, and finish time in H:M:S format.
def calcProcessTime(starttime, cur_iter, max_iter):
telapsed = time.time() - starttime
testimated = (telapsed/cur_iter)*(max_iter)
finishtime = starttime + testimated
finishtime = dt.datetime.fromtimestamp(finishtime).strftime("%H:%M:%S") # in time
lefttime = testimated-telapsed # in seconds
return (int(telapsed), int(lefttime), finishtime)
Example:
import time
import datetime as dt
start = time.time()
cur_iter = 0
max_iter = 10
for i in range(max_iter):
time.sleep(5)
cur_iter += 1
prstime = calcProcessTime(start,cur_iter ,max_iter)
print("time elapsed: %s(s), time left: %s(s), estimated finish time: %s"%prstime)
Output:
time elapsed: 5(s), time left: 45(s), estimated finish time: 14:28:18
time elapsed: 10(s), time left: 40(s), estimated finish time: 14:28:18
time elapsed: 15(s), time left: 35(s), estimated finish time: 14:28:18
....
You will never ever be able to know exactly how long it is going to take to finish. The best you can do is calculate was percentage of the work you have finished and how long that has taken you and then project that out.
For example if you are doing some work on the range of numbers from 1 to 100 you could do something such as
start_time = get the current time
for i in range(1, 101):
# Do some work
current_time = get the current time
elapsed_time = current_time - start_time
time_left = 100 * elapsed_time / i - elapsed_time
print(time_left)
Please understand that the above is largely pseudo-code
The following function will calculate the remaining time:
last_times = []
def get_remaining_time(i, total, time):
last_times.append(time)
len_last_t = len(last_times)
if len_last_t > 5:
last_times.pop(0)
mean_t = sum(last_times) // len_last_t
remain_s_tot = mean_t * (total - i + 1)
remain_m = remain_s_tot // 60
remain_s = remain_s_tot % 60
return f"{remain_m}m{remain_s}s"
The parameters are:
i : The current iteration
total : the total number of iterations
time : the duration of the last iteration
It uses the average time taken by the last 5 iterations to calculate the remaining time. You can the use it in your code as follows:
last_t = 0
iterations = range(1,1000)
for i in iterations:
t = time.time()
# Do your task here
last_t = time.time() - t
get_remaining_time(i, len(iterations), last_t)

MultiThreading/Optimization Python Requests?

I am trying to optimize this code, as of right now it runs 340 Requests in 10 mins. I have trying to get 1800 requests in 30 mins. Since I can run a request every second, according to amazon api. Can I use multithreading with this code to increase the number of runs??
However, I was reading in the full data to the main function, should I split it now, how can I figure out how many each thread should take?
def newhmac():
return hmac.new(AWS_SECRET_ACCESS_KEY, digestmod=sha256)
def getSignedUrl(params):
hmac = newhmac()
action = 'GET'
server = "webservices.amazon.com"
path = "/onca/xml"
params['Version'] = '2013-08-01'
params['AWSAccessKeyId'] = AWS_ACCESS_KEY_ID
params['Service'] = 'AWSECommerceService'
params['Timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
key_values = [(urllib.quote(k), urllib.quote(v)) for k,v in params.items()]
key_values.sort()
paramstring = '&'.join(['%s=%s' % (k, v) for k, v in key_values])
urlstring = "http://" + server + path + "?" + \
('&'.join(['%s=%s' % (k, v) for k, v in key_values]))
hmac.update(action + "\n" + server + "\n" + path + "\n" + paramstring)
urlstring = urlstring + "&Signature="+\
urllib.quote(base64.encodestring(hmac.digest()).strip())
return urlstring
def readData():
data = []
with open("ASIN.csv") as f:
reader = csv.reader(f)
for row in reader:
data.append(row[0])
return data
def writeData(data):
with open("data.csv", "a") as f:
writer = csv.writer(f)
writer.writerows(data)
def main():
data = readData()
filtData = []
i = 0
count = 0
while(i < len(data) -10 ):
if (count %4 == 0):
time.sleep(1)
asins = ','.join([data[x] for x in range(i,i+10)])
params = {'ResponseGroup':'OfferFull,Offers',
'AssociateTag':'4chin-20',
'Operation':'ItemLookup',
'IdType':'ASIN',
'ItemId':asins}
url = getSignedUrl(params)
resp = requests.get(url)
responseSoup=BeautifulSoup(resp.text)
quantity = ['' if product.amount is None else product.amount.text for product in responseSoup.findAll("offersummary")]
price = ['' if product.lowestnewprice is None else product.lowestnewprice.formattedprice.text for product in responseSoup.findAll("offersummary")]
prime = ['' if product.iseligibleforprime is None else product.iseligibleforprime.text for product in responseSoup("offer")]
for zz in zip(asins.split(","), price,quantity,prime):
print zz
filtData.append(zz)
print i, len(filtData)
i+=10
count +=1
writeData(filtData)
threading.Timer(1.0, main).start()
If you are using python 3.2 you can use concurrent.futures library to make it easy to launch tasks in multiple threads. e.g. here I am simulating running 10 url parsing job in parallel, each one of which takes 1 sec, if run synchronously it would have taken 10 seconds but with thread pool of 10 should take about 1 seconds
import time
from concurrent.futures import ThreadPoolExecutor
def parse_url(url):
time.sleep(1)
print(url)
return "done."
st = time.time()
with ThreadPoolExecutor(max_workers=10) as executor:
for i in range(10):
future = executor.submit(parse_url, "http://google.com/%s"%i)
print("total time: %s"%(time.time() - st))
Output:
http://google.com/0
http://google.com/1
http://google.com/2
http://google.com/3
http://google.com/4
http://google.com/5
http://google.com/6
http://google.com/7
http://google.com/8
http://google.com/9
total time: 1.0066466331481934

Leaky bucket in python

Hi I am trying to develop a leakybucket with unlimited bucket capacity in Python. I want it to be thread-safe and CPU efficient, with a minimum number of threads. It generally works now. But there are some tiny errors.
I throttle the bandwidth to 500 kbps. But the third line seems to break this. Also, can anybody tell me if this is the right way to implement leakybucket? Thanks.
rate: 500.00
rate: 500.00
rate: 550.00
rate: 500.00
rate: 500.00
rate: 500.00
rate: 500.00
rate: 500.00
Code here:
from collections import deque
import threading, time
class LeakyBucket:
'''the leaky bucket throttling the bit rate'''
def __init__(self, node, bitsPerSec, measIntv, LBtype):
self.node = node
self.bitsPerSec = bitsPerSec #the rate limit
self.measIntv = measIntv #the measure interval, tokens will become full at the beginning of each interval
self.LBtype = LBtype #the type of the bucket
self.lastTime = 0 #the start time of the last measure interval
self.bitsDone = 0 #the bits that have been transmitted
self.BDLock = threading.Lock() #the lock for the bits sent
self.packDQ = deque() #the packet Q
self.maxToken = bitsPerSec*float(measIntv) #the max token (bits)
self.token = self.maxToken #the current token
self.condition = threading.Condition() #sync lock
def packIn(self, msg):
'''Insert a packet'''
self.condition.acquire()
self.packDQ.append(msg)
self.condition.notify()
self.condition.release()
def keepPoping(self):
'''keep poping new pack'''
self.lastTime = time.time() #record the start time
while True:
timeNow = time.time()
if timeNow - self.lastTime > self.measIntv:
#new intv, need to reset token
self.token = self.maxToken
self.lastTime = timeNow
self.condition.acquire()
if self.packDQ: # the queue is not empty
pack = list(self.packDQ)[0]
packLen = len(pack[2])*8
if packLen > self.token: #no enough token?
#self.packDQ.popleft()
self.condition.release()
time.sleep(max(self.lastTime+self.measIntv-time.time(),0)) #wait for enough token
else: #enough token, can send out the packet
self.packDQ.popleft()
self.condition.release()
self.changeBitsDone(packLen)
self.token = self.token - packLen #consume token
else:
self.condition.wait()
self.condition.release()
def begin(self):
'''begin the leakybucket'''
aThread = threading.Thread(target = self.keepPoping, args = [])
aThread.start()
def getBitsDone(self):
'''get and reset bitsDone, for testing'''
self.BDLock.acquire()
reV = self.bitsDone
self.bitsDone = 0
self.BDLock.release()
return reV
def changeBitsDone(self,length):
'''change bitsDone, for testing'''
self.BDLock.acquire()
self.bitsDone += length
self.BDLock.release()
def measure(self, intv):
'''measure the throughput of the leaky bucket'''
while True:
bitsDone = self.getBitsDone()
rate = bitsDone / float(intv*1024)
print 'rate: %.2f' % rate
time.sleep(intv)
def startMeasure(self, intv):
'''start measure the rate'''
#print 'here'
aThread = threading.Thread(target = self.measure, args = [intv])
aThread.start()
#===============================
def main():
pack = 1000*'a'
msg = ('192.168.1.1', 16000, pack)
print 'here'
LB = LeakyBucket(None, 500*1024, 1, 'reg')
LB.begin()
LB.startMeasure(10)
numMsg = 0
while numMsg < 10000:
LB.packIn(msg)
#print 'pack in'
numMsg += 1
if __name__ == '__main__':
main()

Categories

Resources