Leaky bucket in python - python

Hi I am trying to develop a leakybucket with unlimited bucket capacity in Python. I want it to be thread-safe and CPU efficient, with a minimum number of threads. It generally works now. But there are some tiny errors.
I throttle the bandwidth to 500 kbps. But the third line seems to break this. Also, can anybody tell me if this is the right way to implement leakybucket? Thanks.
rate: 500.00
rate: 500.00
rate: 550.00
rate: 500.00
rate: 500.00
rate: 500.00
rate: 500.00
rate: 500.00
Code here:
from collections import deque
import threading, time
class LeakyBucket:
'''the leaky bucket throttling the bit rate'''
def __init__(self, node, bitsPerSec, measIntv, LBtype):
self.node = node
self.bitsPerSec = bitsPerSec #the rate limit
self.measIntv = measIntv #the measure interval, tokens will become full at the beginning of each interval
self.LBtype = LBtype #the type of the bucket
self.lastTime = 0 #the start time of the last measure interval
self.bitsDone = 0 #the bits that have been transmitted
self.BDLock = threading.Lock() #the lock for the bits sent
self.packDQ = deque() #the packet Q
self.maxToken = bitsPerSec*float(measIntv) #the max token (bits)
self.token = self.maxToken #the current token
self.condition = threading.Condition() #sync lock
def packIn(self, msg):
'''Insert a packet'''
self.condition.acquire()
self.packDQ.append(msg)
self.condition.notify()
self.condition.release()
def keepPoping(self):
'''keep poping new pack'''
self.lastTime = time.time() #record the start time
while True:
timeNow = time.time()
if timeNow - self.lastTime > self.measIntv:
#new intv, need to reset token
self.token = self.maxToken
self.lastTime = timeNow
self.condition.acquire()
if self.packDQ: # the queue is not empty
pack = list(self.packDQ)[0]
packLen = len(pack[2])*8
if packLen > self.token: #no enough token?
#self.packDQ.popleft()
self.condition.release()
time.sleep(max(self.lastTime+self.measIntv-time.time(),0)) #wait for enough token
else: #enough token, can send out the packet
self.packDQ.popleft()
self.condition.release()
self.changeBitsDone(packLen)
self.token = self.token - packLen #consume token
else:
self.condition.wait()
self.condition.release()
def begin(self):
'''begin the leakybucket'''
aThread = threading.Thread(target = self.keepPoping, args = [])
aThread.start()
def getBitsDone(self):
'''get and reset bitsDone, for testing'''
self.BDLock.acquire()
reV = self.bitsDone
self.bitsDone = 0
self.BDLock.release()
return reV
def changeBitsDone(self,length):
'''change bitsDone, for testing'''
self.BDLock.acquire()
self.bitsDone += length
self.BDLock.release()
def measure(self, intv):
'''measure the throughput of the leaky bucket'''
while True:
bitsDone = self.getBitsDone()
rate = bitsDone / float(intv*1024)
print 'rate: %.2f' % rate
time.sleep(intv)
def startMeasure(self, intv):
'''start measure the rate'''
#print 'here'
aThread = threading.Thread(target = self.measure, args = [intv])
aThread.start()
#===============================
def main():
pack = 1000*'a'
msg = ('192.168.1.1', 16000, pack)
print 'here'
LB = LeakyBucket(None, 500*1024, 1, 'reg')
LB.begin()
LB.startMeasure(10)
numMsg = 0
while numMsg < 10000:
LB.packIn(msg)
#print 'pack in'
numMsg += 1
if __name__ == '__main__':
main()

Related

Cannot unpack noniterable bool object

I'm new to Python and posting on SO.
I'm doing a class project on trying to create an automated climate control system with a Raspberry pi(Pi4 4GB) and a DHT.
Here is my current error and code and I'm not sure what's causing the fail in the loop. It works for a while then errors out. It seems to error when it gets a bad read from the sensor, but until then it works fine.
Traceback (most recent call last):
File "/home/pi/raphael-kit/python/Climatecontrol.py", line 156, in <module>
main()
File "/home/pi/raphael-kit/python/Climatecontrol.py", line 118, in main
humidity, temperature = readDht11()
TypeError: cannot unpack non-iterable bool object
import RPi.GPIO as GPIO
import time
import LCD1602
dhtPin = 17
relayPin = 23 # Set GPIO23 as control pin
set_temp = 25 # this is the required temperature
GPIO.setmode(GPIO.BCM)
# Set relayPin's mode to output,
# and initial level to High(3.3v)
GPIO.setup(relayPin, GPIO.OUT, initial=GPIO.HIGH)
MAX_UNCHANGE_COUNT = 100
STATE_INIT_PULL_DOWN = 1
STATE_INIT_PULL_UP = 2
STATE_DATA_FIRST_PULL_DOWN = 3
STATE_DATA_PULL_UP = 4
STATE_DATA_PULL_DOWN = 5
def readDht11():
GPIO.setup(dhtPin, GPIO.OUT)
GPIO.output(dhtPin, GPIO.HIGH)
time.sleep(0.05)
GPIO.output(dhtPin, GPIO.LOW)
time.sleep(0.02)
GPIO.setup(dhtPin, GPIO.IN, GPIO.PUD_UP)
unchanged_count = 0
last = -1
data = []
while True:
current = GPIO.input(dhtPin)
data.append(current)
if last != current:
unchanged_count = 0
last = current
else:
unchanged_count += 1
if unchanged_count > MAX_UNCHANGE_COUNT:
break
state = STATE_INIT_PULL_DOWN
lengths = []
current_length = 0
for current in data:
current_length += 1
if state == STATE_INIT_PULL_DOWN:
if current == GPIO.LOW:
state = STATE_INIT_PULL_UP
else:
continue
if state == STATE_INIT_PULL_UP:
if current == GPIO.HIGH:
state = STATE_DATA_FIRST_PULL_DOWN
else:
continue
if state == STATE_DATA_FIRST_PULL_DOWN:
if current == GPIO.LOW:
state = STATE_DATA_PULL_UP
else:
continue
if state == STATE_DATA_PULL_UP:
if current == GPIO.HIGH:
current_length = 0
state = STATE_DATA_PULL_DOWN
else:
continue
if state == STATE_DATA_PULL_DOWN:
if current == GPIO.LOW:
lengths.append(current_length)
state = STATE_DATA_PULL_UP
else:
continue
if len(lengths) != 40:
#print ("Data not good, skip")
return False
shortest_pull_up = min(lengths)
longest_pull_up = max(lengths)
halfway = (longest_pull_up + shortest_pull_up) / 2
bits = []
the_bytes = []
byte = 0
for length in lengths:
bit = 0
if length > halfway:
bit = 1
bits.append(bit)
#print ("bits: %s, length: %d" % (bits, len(bits)))
for i in range(0, len(bits)):
byte = byte << 1
if (bits[i]):
byte = byte | 1
else:
byte = byte | 0
if ((i + 1) % 8 == 0):
the_bytes.append(byte)
byte = 0
#print (the_bytes)
checksum = (the_bytes[0] + the_bytes[1] + the_bytes[2] + the_bytes[3]) & 0xFF
#if the_bytes[4] != checksum:
#print ("Data not good, skip")
#return False
return the_bytes[0], the_bytes[2]
def main():
while True:
humidity, temperature = readDht11()
if humidity is not None and temperature is not None:
print("Temp={0:0.1f}*C Humidity={1:0.1f}%".format(temperature, humidity))
# test for low temperature
if temperature < set_temp:
print(GPIO.output(relayPin, GPIO.LOW))
# test for high temperature
if temperature > (set_temp + 1):
print(GPIO.output(relayPin, GPIO.HIGH))
else:
print("Failed to retrieve data from humidity sensor")
time.sleep(5) #this is the time between taking readings and acting on them you can reduce it but not below 5 seconds
# Define a destroy function for clean up everything after
# the script finished
def setup():
LCD1602.init(0x27, 1) # init(slave address, background light)
LCD1602.write(0, 0, 'Temperature: %s C')
LCD1602.write(1, 1, 'humidity: %s %%')
time.sleep(2)
def destroy():
# set relay to off
GPIO.output(relayPin, GPIO.LOW)
# Release resource
GPIO.cleanup()
if __name__ == '__main__':
try:
setup()
except KeyboardInterrupt:
destroy()
as a note I haven't managed to get the LCD working in tandem but im more worried about the main functionality

Why isn't my Simpy resource keeping a queue?

I've been working for around a week to learn SimPy for a discrete simulation I have to run. I've done my best, but I'm just not experienced enough to figure it out quickly. I am dying. Please help.
The system in question goes like this:
order arrives -> resource_1 (there are 2) performs take_order -> order broken into items -> resource_2 (there are 10) performs process_item
My code runs and performs the simulation, but I'm having a lot of trouble getting the queues on the resources to function. As in, queues do not build up on either resource when I run it, and I cannot find the reason why. I try resource.get_queue and get empty lists. There should absolutely be queues, as the orders arrive faster than they can be processed.
I think it has something to do with the logic for requesting resources, but I can't figure it out. Here's how I've structured the code:
import simpy
import random
import numpy as np
total_items = []
total_a = []
total_b = []
total_c = []
order_Q = []
item_Q = []
skipped_visits = []
order_time_dict = {}
order_time_dict2 = {}
total_order_time_dict = {}
var = []
class System:
def __init__(self,env,num_resource_1,num_resource_2):
self.env = env
self.resource_1 = simpy.Resource(env,num_resource_1)
self.resource_2 = simpy.Resource(env,num_resource_2)
def take_order(self, order):
self.time_to_order = random.triangular(30/60,60/60,120/60)
arrive = self.env.now
yield self.env.timeout(self.time_to_order)
def process_item(self,item):
total_process_time = 0
current = env.now
order_num = item[1][0]
for i in range(1,item[1][1]):
if 'a' in item[0]:
total_process_time += random.triangular(.05,7/60,1/6) #bagging time only
#here edit order time w x
if 'b' in item[0]:
total_process_time += random.triangular(.05,.3333,.75)
if 'c' in item[0]:
total_process_time += random.triangular(.05,7/60,1/6)
#the following is handling time: getting to station, waiting on car to arrive at window after finished, handing to cust
total_process_time += random.triangular(.05, 10/60, 15/60)
item_finish_time = current + total_process_time
if order_num in order_time_dict2.keys():
start = order_time_dict2[order_num][0]
if order_time_dict2[order_num][1] < item_finish_time:
order_time_dict2[order_num] = (start, item_finish_time)
else:
order_time_dict2[order_num] = (current, item_finish_time)
yield self.env.timeout(total_process_time)
class Order:
def __init__(self, order_dict,order_num):
self.order_dict = order_dict
self.order_num = order_num
self.order_stripped = {}
for x,y in list(self.order_dict.items()):
if x != 'total':
if y != 0:
self.order_stripped[x] = (order_num,y) #this gives dictionary format {item: (order number, number items) } but only including items in order
self.order_list = list(self.order_stripped.items())
def generate_order(num_orders):
print('running generate_order')
a_demand = .1914 ** 3
a_stdev = 43.684104
b_demand = .1153
b_stdev = 28.507782
c_demand = .0664
c_stdev = 15.5562624349
num_a = abs(round(np.random.normal(a_demand)))
num_b = abs(round(np.random.normal(b_demand)))
num_c = abs(round(np.random.normal(c_demand)))
total = num_orders
total_a.append(num_a)
total_b.append(num_b)
total_c.append(num_c)
total_num_items = num_a + num_b + num_c
total_items.append(total_num_items)
order_dict = {'num_a':num_a, 'num_b':num_b,'num_c':num_c, 'total': total}
return order_dict
def order_process(order_instance,system):
enter_system_at = system.env.now
print("order " + str(order_instance.order_num) + " arrives at " + str(enter_system_at))
if len(system.resource_1.get_queue) > 1:
print("WORKING HERE ******************")
if len(system.resource_1.get_queue) <= 25:
with system.resource_1.request() as req:
order_Q.append(order_instance)
yield req
yield env.process(system.take_order(order_instance))
order_Q.pop()
enter_workstation_at = system.env.now
print("order num " + str(order_instance.order_num) + " enters workstation at " + str(enter_workstation_at))
for item in order_instance.order_list:
item_Q.append(item)
with system.resource_2.request() as req:
yield req
yield env.process(system.process_item(item))
if len(system.resource_2.get_queue) >1:
var.append(1)
item_Q.pop()
leave_workstation_at = system.env.now
print("Order num " + str(order_instance.order_num) + " leaves at " + str(leave_workstation_at))
order_time_dict[order_instance.order_num] = leave_workstation_at-enter_workstation_at
total_order_time_dict[order_instance.order_num]=leave_workstation_at-enter_system_at
else:
skipped_visits.append(1)
def setup(env):
system = System(env,2,15)
order_num = 0
while True:
next_order = random.expovariate(3.5) #where 20 is order arrival mean (lambda)
yield env.timeout(next_order)
order_num+=1
env.process(order_process(Order(generate_order(order_num),order_num),system))
env = simpy.Environment()
env.process(setup(env))
env.run(until=15*60)
print("1: \n", order_time_dict)
I think you are looking at the wrong queue.
the api for getting queued requests for resources is just attribute queue so try using
len(system.resource_1.queue)
get_queue and put_queue is from the base class and used to derive new resource classes.
but wait they are not what any reasonable person would assume, and I find this confusing too, but the doc says
Requesting a resources is modeled as “putting a process’ token into the resources” which means when you call request() the process is put into the put_queue, not the get_queue. And with resource, release always succeeds immediately so its queue (which is the get_queue) is always empty
I think queue is just a alias for the put_queue, but queue is much less confussing

How to reuse class from another file

I import technicals.py into bot.py and want to reuse the variable sl and tp from the class instance process_candles.
If a constant number is given to sl and tp in bot.py, the script is able to work. However, the desired result is to get variable sl and tp which is calculated in the class instance process_candles. from technicals.py.
snippet technicals.py as below:
df['PAIR'] = self.pair
decision = NONE
tp = 0
sl = 0
if c[-2]>o[-2]:
if ca[-1]>h[-2]+0.0010:
decision = BUY
tp = ca[-1]+0.010
sl = l[-2]-0.010
elif o[-2]>c[-2]:
if cb[-1]<l[-2]-0.0010:
decision = SELL
tp = cb[-1]-0.010
sl = h[-2]+0.010
else:
decision = NONE
snippet bot.py
def process_pairs(self):
trades_to_make = []
for pair in self.trade_pairs:
if self.timings[pair].ready == True:
self.log_message(f"Ready to trade {pair}")
techs = Technicals(self.settings[pair], self.api, pair, GRANULARITY, log=self.tech_log)
decision = techs.get_trade_decision(self.timings[pair].last_candle)
print ("process decision")
print (decision)
units = decision * self.settings[pair].units
#tp = "154"
#sl = "153"
if units != 0:
trades_to_make.append({'pair': pair, 'units': units,'take_profit':tp, 'stop_loss':sl})
Full script are as below:
technicals.py
import pandas as pd
import numpy as np
from defs import BUY, SELL, NONE
class Technicals():
def __init__(self, settings, api, pair, granularity, log=None):
self.settings = settings
self.log = log
self.api = api
self.pair = pair
self.granularity = granularity
def log_message(self, msg):
if self.log is not None:
self.log.logger.debug(msg)
def fetch_candles(self, row_count, candle_time):
status_code, df = self.api.fetch_candles(self.pair, count=row_count, granularity=self.granularity)
if df is None:
self.log_message(f"Error fetching candles for pair:{self.pair} {candle_time}, df None")
return None
elif df.iloc[-1].time != candle_time:
self.log_message(f"Error fetching candles for pair:{self.pair} {candle_time} vs {df.iloc[-1].time}")
return None
else:
return df
def process_candles(self, df):
open = df.mid_o
o = np.array(open,dtype='float')
#print (o)
high = df.mid_h
h = np.array(high,dtype='float')
#print (h)
low = df.mid_l
l = np.array(low,dtype='float')
#print (l)
close = df.mid_c
c = np.array(close,dtype='float')
print (c)
close_ask = df.ask_c
ca = np.array(close_ask,dtype='float')
print (ca)
close_bid = df.bid_c
cb = np.array(close_bid,dtype='float')
print (cb)
df['PAIR'] = self.pair
decision = NONE
tp = 0
sl = 0
if c[-2]>o[-2]:
if ca[-1]>h[-2]+0.0010:
decision = BUY
tp = ca[-1]+0.010
sl = l[-2]-0.010
elif o[-2]>c[-2]:
if cb[-1]<l[-2]-0.0010:
decision = SELL
tp = cb[-1]-0.010
sl = h[-2]+0.010
else:
decision = NONE
log_cols = ['time','volume','PAIR','bid_c','ask_c','mid_o','mid_h','mid_l','mid_c']
self.log_message(f"Processed_df\n{df[log_cols].tail(3)}")
self.log_message(f"Trade_decision:{decision}")
self.log_message("")
return decision
def get_trade_decision(self, candle_time):
max_rows = self.settings.long_ma + 2
self.log_message("")
self.log_message(f"get_trade_decision() pair:{self.pair} max_rows:{max_rows}")
df = self.fetch_candles(max_rows, candle_time)
print ("xxxx")
print (df)
if df is not None:
return self.process_candles(df)
print("get trade decision")
print(self.process_candles(df))
return NONE
bot.py
import pprint
import time
from settings import Settings
from log_wrapper import LogWrapper
from timing import Timing
from oanda_api import OandaAPI
from technicals import Technicals
from defs import NONE, BUY, SELL
from trade_manager import TradeManager
GRANULARITY = "M1"
SLEEP = 10.0
class TradingBot():
def __init__(self):
self.log = LogWrapper("Bot")
self.tech_log = LogWrapper("Technicals")
self.trade_log = LogWrapper("Trade")
self.trade_pairs = Settings.get_pairs()
self.settings = Settings.load_settings()
self.api = OandaAPI()
self.trade_manager = TradeManager(self.api, self.settings, self.trade_log)
self.timings = { p: Timing(self.api.last_complete_candle(p, GRANULARITY)) for p in self.trade_pairs }
self.log_message(f"Bot started with\n{pprint.pformat(self.settings)}")
self.log_message(f"Bot Timings\n{pprint.pformat(self.timings)}")
print (self.api)
def log_message(self, msg):
self.log.logger.debug(msg)
def update_timings(self):
for pair in self.trade_pairs:
current = self.api.last_complete_candle(pair, GRANULARITY)
self.timings[pair].ready = False
if current > self.timings[pair].last_candle:
self.timings[pair].ready = True
self.timings[pair].last_candle = current
self.log_message(f"{pair} new candle {current}")
def process_pairs(self):
trades_to_make = []
for pair in self.trade_pairs:
if self.timings[pair].ready == True:
self.log_message(f"Ready to trade {pair}")
techs = Technicals(self.settings[pair], self.api, pair, GRANULARITY, log=self.tech_log)
decision = techs.get_trade_decision(self.timings[pair].last_candle)
print ("process decision")
print (decision)
units = decision * self.settings[pair].units
#tp = "154"
#sl = "153"
if units != 0:
trades_to_make.append({'pair': pair, 'units': units,'take_profit':tp, 'stop_loss':sl})
if len(trades_to_make) > 0:
print("bot")
print(trades_to_make)
self.trade_manager.place_trades(trades_to_make)
def run(self):
while True:
self.update_timings()
self.process_pairs()
time.sleep(SLEEP)
if __name__ == "__main__":
b = TradingBot()
b.run()
defs.py
API_KEY = "xxxx"
ACCOUNT_ID = "xyz"
OANDA_URL = 'https://api-fxpractice.oanda.com/v3'
SECURE_HEADER = {
'Authorization': f'Bearer {API_KEY}',
'Content-Type': 'application/json'
}
BUY = 1
SELL = -1
NONE = 0
Instead of just returning the decision, also return the take profit and stop loss values:
return decision, tp, sl
Then you can unpack the tuple in process_pairs:
decision, tp, sl = techs.get_trade_decision(self.timings[pair].last_candle)
You can define your tp and sl as class variables of Technicals.
class Technicals(object):
tp: int = 0
sl: int = 0
and use them within Technicals as:
cls.tp = ... # if you are inside class-method
self.tp = ... # if you are inside instance-method
And in the TradingBot you can then simple import Technicals and use the class-Vars like:
tp = Technicals.tp # you can use the class
tp = techs.tp # or the instance you already have

Python multi connection downloader resuming after pausing makes download run endlessly

I have written a Python script that downloads a single file using 32 connections if available.
I have written a multiconnection downloader that works fine without pausing, but won't stop downloading after resuming, the progress would go beyond 100%...
Like this:
Download mode: Multi-thread (press Space to pause/resume, press Escape to stop)
[████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████] 120% completed, paused: False
Download mode: Multi-thread (press Space to pause/resume, press Escape to stop)
1798.08 MiB downloaded, 1489.83 MiB total, -308.25 MiB remaining, download speed: 22.73 MiB/s
Minimum speed: 0.00 MiB/s, average speed: 4.54 MiB/s, maximum speed: 75.00 MiB/s
Task started on 2021-08-09 16:57:03, 00:06:35 elapsed, ETA: -1:59:47
After progress exceeds 100%, there will be error messages like this:
Exception in thread Thread-78:
Traceback (most recent call last):
File "C:\Program Files\Python39\lib\threading.py", line 973, in _bootstrap_inner
self.run()
File "C:\Program Files\Python39\lib\threading.py", line 910, in run
self._target(*self._args, **self._kwargs)
File "D:\MyScript\downloader.py", line 70, in multidown
mm[position: position+len(chunk)] = chunk
IndexError: mmap slice assignment is wrong size
(The above doesn't include all of the error message)
I have encountered all sorts of errors after resuming, but most importantly, the server will often send extra bytes from previous request, whose connection is dead and needless to say this breaks the whole code.
How should I implement pause and resume correctly?
I am thinking about multiprocessing, I assume the sessions and connections are all PID and port number related, and so far I haven't encountered a new run of the script that received extra bytes from previous runs of the script, so I guess using another process with a new PID and new port number plus requests.session() plus {'connection': 'close'} for each download should guarantee that no extra bytes from previous connections will be received, I just don't know how to share variables between processes...
The code:
downloader.py
import json
import keyboard
import os
import re
import requests
import sys
import time
import validators
from collections import deque
from datetime import datetime, timedelta
from math import inf
from mmap import mmap
from pathlib import Path
from ping3 import ping
from reprint import output
from threading import Thread
def timestring(sec):
sec = int(sec)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
return f'{h:02d}:{m:02d}:{s:02d}'
class Downloader:
def __init__(self):
self.recent = deque([0] * 12, maxlen=12)
self.recentspeeds = deque([0] * 200, maxlen=200)
self.paused = False
self.progress = dict()
class Multidown:
def __init__(self, obj, id):
self.count = 0
self.position = 0
self.completed = False
self.id = id
self.parent = obj
def multidown(self, url, start, end):
interrupted = False
s = requests.session()
s.headers.update({'connection': 'close', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'})
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
while end - length + (self.id != self.parent.progress['connections'] - 1) != start or r.status_code != 206:
r.close()
s.close()
del r
del s
time.sleep(0.02)
s = requests.session()
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
self.position = start
for chunk in r.iter_content(1048576):
if self.parent.paused:
self.parent.mm.flush()
r.connection.close()
r.close()
s.close()
del r
del s
interrupted = True
break
if chunk:
self.parent.mm[self.position: self.position+len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
if not interrupted:
r.close()
s.close()
if self.count == self.parent.progress[self.id]['length']:
self.completed = True
self.parent.progress[self.id]['completed'] = True
self.parent.mm.flush()
class Singledown:
def __init__(self):
self.count = 0
def singledown(self, url, path):
with requests.get(url, stream=True) as r:
with path.open('wb') as file:
for chunk in r.iter_content(1048576):
if chunk:
self.count += len(chunk)
file.write(chunk)
def download(self, url, filepath, num_connections=32, overwrite=False):
singlethread = False
threads = []
bcontinue = False
filepath = filepath.replace('\\', '/')
if (not re.match('^[a-zA-Z]:/(((?![<>:"/|?*]).)+((?<![ .])/)?)*$', filepath) or
not Path(filepath[:3]).exists()):
print('Invalid windows file path has been inputted, process will now stop.')
return
if not validators.url(url):
print('Invalid url been inputted, process will now stop.')
return
if url.lower().startswith('ftp://'):
print(
"`requests` module doesn't suport File Transfer Protocol, process will now stop")
return
path = Path(filepath)
if not path.exists():
bcontinue = True
else:
if path.is_file():
if overwrite:
bcontinue = True
else:
while True:
answer = input(
f'`{filepath}` already exists, do you want to overwrite it? \n(Yes, No):').lower()
if answer in ['y', 'yes', 'n', 'no']:
if answer.startswith('y'):
os.remove(filepath)
bcontinue = True
break
else:
print('Invalid input detected, retaking input.')
if not bcontinue:
print(
f'Overwritting {filepath} has been aborted, process will now stop.')
return
bcontinue = False
server = url.split('/')[2]
ok = ping(server, timeout=2)
if ok == False:
print(
'The server of the inputted url is non-existent, process will now stop.')
return
if ok:
bcontinue = True
if not ok:
print('Connection has timed out, will reattempt to ping server 5 times.')
for i in range(5):
print(
f'Reattempting to ping server, retrying {i + 1} out of 5')
ok = ping(server, timeout=2)
if ok:
print(
f'Connection successful on retry {i + 1}, process will now continue.')
bcontinue = True
break
else:
print(f'Retry {i + 1} out of 5 timed out' + (i != 4)
* ', reattempting in 1 second.' + (i == 4) * '.')
time.sleep(1)
if not bcontinue:
print('Failed to connect server, connection timed out, process will now stop')
return
bcontinue = False
head = requests.head(url)
if head.status_code == 200:
bcontinue = True
else:
for i in range(5):
print(f'Server responce is invalid, retrying {i + 1} out of 5')
head = requests.head(url)
if head.status_code == 200:
print(
f'Connection successful on retry {i + 1}, process will now continue.')
bcontinue = True
break
else:
print(f'Retry {i + 1} out of 5 failed to access data' +
(i != 4) * ', reattempting in 1 second.' + (i == 4) * '.')
time.sleep(1)
if not bcontinue:
print("Can't establish a connection with access to data, can't download target file, process will now stop.")
return
folder = '/'.join(filepath.split('/')[:-1])
Path(folder).mkdir(parents=True, exist_ok=True)
headers = head.headers
total = headers.get('content-length')
if not total:
print(
f'Cannot find the total length of the content of {url}, the file will be downloaded using a single thread.')
started = datetime.now()
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
sd = self.Singledown()
th = Thread(target=sd.singledown, args=(url, path))
threads.append(sd)
th.start()
total = inf
singlethread = True
else:
total = int(total)
if not headers.get('accept-ranges'):
print(
'Server does not support the `range` parameter, the file will be downloaded using a single thread.')
started = datetime.now()
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
sd = self.Singledown()
th = Thread(target=sd.singledown, args=(url, path))
threads.append(sd)
th.start()
singlethread = True
else:
segment = total / num_connections
started = datetime.now()
lastpressed = started
path.touch()
file = path.open('wb')
file.seek(total - 1)
file.write(b'\0')
file.close()
file = path.open(mode='r+b')
self.mm = mmap(file.fileno(), 0)
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
self.progress['total'] = total
self.progress['connections'] = num_connections
for i in range(num_connections):
md = self.Multidown(self, i)
start = int(segment * i)
end = int(segment * (i + 1)) - (i != num_connections - 1)
length = end - start + (i != num_connections - 1)
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
self.progress[i] = dict()
self.progress[i]['start'] = start
self.progress[i]['position'] = start
self.progress[i]['end'] = end
self.progress[i]['count'] = 0
self.progress[i]['length'] = length
self.progress[i]['completed'] = False
th.start()
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
downloaded = 0
totalMiB = total / 1048576
speeds = []
interval = 0.04
with output(initial_len=5, interval=0) as dynamic_print:
while True:
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
status = sum([i.completed for i in threads])
downloaded = sum(i.count for i in threads)
self.recent.append(downloaded)
done = int(100 * downloaded / total)
doneMiB = downloaded / 1048576
gt0 = len([i for i in self.recent if i])
if not gt0:
speed = 0
else:
recent = list(self.recent)[12 - gt0:]
if len(recent) == 1:
speed = recent[0] / 1048576 / interval
else:
diff = [b - a for a, b in zip(recent, recent[1:])]
speed = sum(diff) / len(diff) / 1048576 / interval
speeds.append(speed)
self.recentspeeds.append(speed)
nzspeeds = [i for i in speeds if i]
if nzspeeds:
minspeed = min(nzspeeds)
else:
minspeed = 0
maxspeed = max(speeds)
now = datetime.now()
elapsed = (now - started).total_seconds()
meanspeed = downloaded / elapsed / 1048576
remaining = totalMiB - doneMiB
dynamic_print[0] = '[{0}{1}] {2}'.format(
'\u2588' * done, '\u00b7' * (100-done), str(done)) + '% completed' + (not singlethread) * ', paused: {0}'.format(self.paused)
dynamic_print[1] = 'Download mode: ' + singlethread * \
'Single-thread' + (not singlethread) * 'Multi-thread (press Space to pause/resume, press Escape to stop)'
dynamic_print[2] = '{0:.2f} MiB downloaded, {1:.2f} MiB total, {2:.2f} MiB remaining, download speed: {3:.2f} MiB/s'.format(
doneMiB, totalMiB, remaining, speed)
if speed and total != inf:
eta = timestring(remaining / speed)
else:
eta = '99:59:59'
dynamic_print[3] = 'Minimum speed: {0:.2f} MiB/s, average speed: {1:.2f} MiB/s, maximum speed: {2:.2f} MiB/s'.format(
minspeed, meanspeed, maxspeed)
dynamic_print[4] = 'Task started on {0}, {1} elapsed, ETA: {2}'.format(
started.strftime('%Y-%m-%d %H:%M:%S'), timestring(elapsed), eta)
if keyboard.is_pressed('space'):
if not singlethread:
pressed = datetime.now()
if (pressed - lastpressed).total_seconds() > 0.5:
lastpressed = pressed
if self.paused:
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(
url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
self.paused = not self.paused
if keyboard.is_pressed('esc'):
if not singlethread:
ended = datetime.now()
self.paused = True
break
if status == len(threads):
if not singlethread:
self.mm.close()
ended = datetime.now()
break
time.sleep(interval)
time_spent = (ended - started).total_seconds()
meanspeed = total / time_spent / 1048576
status = sum([i.completed for i in threads])
if status == len(threads):
print('Task completed on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
else:
print('Task interrupted on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
if __name__ == '__main__':
d = Downloader()
d.download(*sys.argv[1:])
For testing purposes this is a dumbed-down version of the script, with all checks removed while retaining the same functionality (sorry it really takes all these lines to show the download information):
import json
import os
import requests
import sys
import time
from collections import deque
from datetime import datetime, timedelta
from math import inf
from mmap import mmap
from pathlib import Path
from reprint import output
from threading import Thread
def timestring(sec):
sec = int(sec)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
return f'{h:02d}:{m:02d}:{s:02d}'
class Downloader:
def __init__(self):
self.recent = deque([0] * 12, maxlen=12)
self.recentspeeds = deque([0] * 200, maxlen=200)
self.paused = False
self.progress = dict()
self.UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
class Multidown:
def __init__(self, obj, id):
self.count = 0
self.position = 0
self.completed = False
self.id = id
self.parent = obj
self.UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
def multidown(self, url, start, end):
interrupted = False
s = requests.session()
s.headers.update({'connection': 'close', 'user-agent': self.UA})
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
while end - length + (self.id != self.parent.progress['connections'] - 1) != start or r.status_code != 206:
r.close()
s.close()
del r
del s
time.sleep(0.02)
s = requests.session()
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
length = int(r.headers['content-length'])
self.position = start
for chunk in r.iter_content(1048576):
if self.parent.paused:
self.parent.mm.flush()
r.connection.close()
r.close()
s.close()
del r
del s
interrupted = True
break
if chunk:
self.parent.mm[self.position: self.position+len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
if not interrupted:
r.close()
s.close()
if self.count == self.parent.progress[self.id]['length']:
self.completed = True
self.parent.progress[self.id]['completed'] = True
self.parent.mm.flush()
def download(self, url, filepath, num_connections=32, overwrite=False):
singlethread = False
threads = []
bcontinue = False
filepath = filepath.replace('\\', '/')
if Path(filepath).exists():
os.remove(filepath)
folder = '/'.join(filepath.split('/')[:-1])
Path(folder).mkdir(parents=True, exist_ok=True)
head = requests.head(url, headers={'user-agent': self.UA})
path = Path(filepath)
headers = head.headers
total = headers.get('content-length')
if total:
total = int(total)
if headers.get('accept-ranges'):
segment = total / num_connections
started = datetime.now()
lastpressed = started
path.touch()
file = path.open('wb')
file.seek(total - 1)
file.write(b'\0')
file.close()
file = path.open(mode='r+b')
self.mm = mmap(file.fileno(), 0)
print('Task started on %s.' %
started.strftime('%Y-%m-%d %H:%M:%S'))
self.progress['total'] = total
self.progress['connections'] = num_connections
for i in range(num_connections):
md = self.Multidown(self, i)
start = int(segment * i)
end = int(segment * (i + 1)) - (i != num_connections - 1)
length = end - start + (i != num_connections - 1)
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
self.progress[i] = dict()
self.progress[i]['start'] = start
self.progress[i]['position'] = start
self.progress[i]['end'] = end
self.progress[i]['count'] = 0
self.progress[i]['length'] = length
self.progress[i]['completed'] = False
th.start()
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
downloaded = 0
totalMiB = total / 1048576
speeds = []
interval = 0.04
with output(initial_len=5, interval=0) as dynamic_print:
while True:
Path(filepath + '.progress.json').write_text(json.dumps(self.progress, indent=4))
status = sum([i.completed for i in threads])
downloaded = sum(i.count for i in threads)
self.recent.append(downloaded)
done = int(100 * downloaded / total)
doneMiB = downloaded / 1048576
gt0 = len([i for i in self.recent if i])
if not gt0:
speed = 0
else:
recent = list(self.recent)[12 - gt0:]
if len(recent) == 1:
speed = recent[0] / 1048576 / interval
else:
diff = [b - a for a, b in zip(recent, recent[1:])]
speed = sum(diff) / len(diff) / 1048576 / interval
speeds.append(speed)
self.recentspeeds.append(speed)
nzspeeds = [i for i in speeds if i]
if nzspeeds:
minspeed = min(nzspeeds)
else:
minspeed = 0
maxspeed = max(speeds)
now = datetime.now()
elapsed = (now - started).total_seconds()
meanspeed = downloaded / elapsed / 1048576
remaining = totalMiB - doneMiB
dynamic_print[0] = '[{0}{1}] {2}'.format(
'\u2588' * done, '\u00b7' * (100-done), str(done)) + '% completed' + (not singlethread) * ', paused: {0}'.format(self.paused)
dynamic_print[1] = 'Download mode: ' + singlethread * \
'Single-thread' + (not singlethread) * 'Multi-thread (press Space to pause/resume, press Escape to stop)'
dynamic_print[2] = '{0:.2f} MiB downloaded, {1:.2f} MiB total, {2:.2f} MiB remaining, download speed: {3:.2f} MiB/s'.format(
doneMiB, totalMiB, remaining, speed)
if speed and total != inf:
eta = timestring(remaining / speed)
else:
eta = '99:59:59'
dynamic_print[3] = 'Minimum speed: {0:.2f} MiB/s, average speed: {1:.2f} MiB/s, maximum speed: {2:.2f} MiB/s'.format(
minspeed, meanspeed, maxspeed)
dynamic_print[4] = 'Task started on {0}, {1} elapsed, ETA: {2}'.format(
started.strftime('%Y-%m-%d %H:%M:%S'), timestring(elapsed), eta)
if PAUSE:
if not singlethread:
pressed = datetime.now()
if (pressed - lastpressed).total_seconds() > 0.5:
lastpressed = pressed
if self.paused:
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(
url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
self.paused = not self.paused
if status == len(threads):
if not singlethread:
self.mm.close()
ended = datetime.now()
break
time.sleep(interval)
time_spent = (ended - started).total_seconds()
meanspeed = total / time_spent / 1048576
status = sum([i.completed for i in threads])
if status == len(threads):
print('Task completed on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
else:
print('Task interrupted on {0}, total time elapsed: {1}, average speed: {2:.2f} MiB/s'.format(
ended.strftime('%Y-%m-%d %H:%M:%S'), timestring(time_spent), meanspeed))
if __name__ == '__main__':
import hashlib
global PAUSE
PAUSE = False
chash = '5674E59283D95EFE8C88770515A9BBC80CBB77CB67602389FD91DEF26D26AED2'
d = Downloader()
if sys.argv[1] == '0':
d.download('http://ipv4.download.thinkbroadband.com/1GB.zip', 'C:/test/1GB.zip')
elif sys.argv[1] == '1':
th1 = Thread(target=d.download, args=('http://ipv4.download.thinkbroadband.com/1GB.zip', 'C:/test/1GB.zip'))
th1.start()
def test():
while th1.is_alive():
global PAUSE
PAUSE = not PAUSE
time.sleep(10)
th2 = Thread(target=test)
th2.start()
while th1.is_alive():
pass
sha256_hash = hashlib.sha256()
with open('C:/test/1GB.zip',"rb") as f:
for byte_block in iter(lambda: f.read(1048576),b""):
sha256_hash.update(byte_block)
print(sha256_hash.hexdigest().lower() == chash.lower())
The url isn't accessible without a VPN in my locale, and test 0 always results True, that is, if the connection hasn't gone dead during the download, and test 1 sometimes results True, sometimes results False, sometimes it doesn't finish(progress bar goes beyond 100%)...
How can my code be salvaged?
This might not be your only problem but you have a race condition that could show up if you pause and resume quickly (where the definition of quickly varies greatly depending on your circumstances). Consider that you've got 32 threads each requesting a MB chunk, let's call them threads 0-31. They are sitting their downloading and you pause. The threads do not know that you paused until they get a chunk of data as they are sitting in blocking io. Not sure what speed your connection is or how many cores your machine has (threads will sometimes act in parallel when they don't need the GIL,) but this process could take a lot longer than you expect. Then you unpause and your code creates new threads 32-63 but some or all of threads 0-31 are still waiting for the next chunk. You set threads 32-63 in motion and then you turn off your pause flag. Those threads that didn't end from 0-31 then wake up and see that things aren't paused. Now you have multiple threads accessing the same state variables
self.parent.mm[self.position: self.position + len(chunk)] = chunk
self.count += len(chunk)
self.position += len(chunk)
self.parent.progress[self.id]['count'] = self.count
self.parent.progress[self.id]['position'] = self.position
so if thread 0 is downloading the same chunk as thread 31 they both keep updating all the same state and they add to position and count even though they are downloading overlapping parts of the file. You even reuse the objects that the threads live inside of so that state can get really really messed up.
for i, md in enumerate(threads):
if not md.completed:
th = Thread(target=md.multidown, args=(url, self.progress[i]['position'], self.progress[i]['end']))
th.start()
There might be some other problems in your code and it is a lot to sort through so I suggest taking the time to do some refactoring to eliminate duplicate code and organise things into more functions. I don't believe in crazy tiny functions, but you could use a few sub functions like download_multi(download_state) and download_single maybe. I am relatively confident however that your current problem will be solved if you ensure the threads you have running actually end after you pause. To do so you need to actually hold references to your threads
somewhere:
actual_threads = []
When you create your threads (the first time and after you unpause, or preferably this would be in a function and you'd do it there and return the list):
th = Thread(target=md.multidown, args=(
url, start, end))
threads.append(md)
actual_threads.append(th)
Then when you unpause:
self.paused = not self.paused
for th in actual_threads:
th.join()
This way you have the threads working, they quit when you pause and you rebuild them. So join should return as soon as they break out of the blocking io call to iter_content. This way those threads are always dead before you make the new ones.
What I would do myself however would be to create sockets from each thread to the main process. When pause is detected the threads shut down the request and save any data that's already waiting in the OS buffer then go into a blocking receive on the socket (there might be a way to use select with a socket and requests to allow you to even break out of the blocking io involved in r.iter_content immediately but I leave that for your research). When the program is unpaused the main process would send some value to indicate the program should restart (you'd want at least two signals the threads would recognise, one for quitting gracefully and one to resume. The codes can be single characters.) When the value is sent to each thread that thread will unblock and can then restart the download using requests and its previous state like nothing happened.

(Almost Done) Python program that closes stream when below audio threshold

I have already made a program that prints the max amplitude or volume, then I made a program that only prints the volume when the threshold is above 2300, now I want to make a program that closes the stream when the threshold is below 2300 for 2 seconds, so not right after it but in 2 seconds only if the threshold hasn't raised again within 2 seconds. Here is what I have:
import pyaudio
import struct
import audioop
import time
INITIAL_THRESHOLD = 0.010
FORMAT = pyaudio.paInt16
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME
MAX_BLOCKS = 0.15/INPUT_BLOCK_TIME
class TEST(object):
def __init__(self):
self.pa = pyaudio.PyAudio()
self.stream = self.open_mic_stream()
self.tap_threshold = INITIAL_THRESHOLD
self.noisycount = MAX_BLOCKS+1
self.quietcount = 0
self.errorcount = 0
def stop(self):
self.stream.close()
def find_input_device(self):
device_index = None
for i in range( self.pa.get_device_count() ):
devinfo = self.pa.get_device_info_by_index(i)
print( "Device %d: %s"%(i,devinfo["name"]) )
for keyword in ["mic","input"]:
if keyword in devinfo["name"].lower():
print( "Found an input: device %d - %s"%(i,devinfo["name"]) )
device_index = i
return device_index
if device_index == None:
print( "No preferred input found; using default input device." )
return device_index
def open_mic_stream( self ):
device_index = self.find_input_device()
stream = self.pa.open( format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
input_device_index = device_index,
frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
return stream
def listen(self):
try:
chunk = self.stream.read(INPUT_FRAMES_PER_BLOCK)
except IOError, e:
self.errorcount += 1
print( "(%d) Error recording: %s"%(self.errorcount,e) )
self.noisycount = 1
return
while True:
mx = audioop.max(chunk, 2)
if mx > 2300: #print the volume level whenever the volume is above 2300
print(mx)
elif mx < 2300: #If the volume is below 2300:
time.sleep(2) #Wait two seconds
if mx > 2300: #If after two seconds the volume is back up, repeat
continue
elif mx < 2300: #If after two seconds the volume is still down, break the loop
break
print("You're Done")
self.stream.close()
print("Stream Closed")
if __name__ == "__main__":
tt = TEST()
for i in range(1000):
tt.listen()
All that does is either print the initial volume level, over and over forever, or it just closes the stream immediately. This depends on whether there is sound when I start the program or not.
EDIT:
While the methods posted in my answer outline ways to break your loop, the real reason your program is not working, is not because of the loop, its because the portion of code where you actually read the audio input is not even in your loop.
You need to read the input stream each time around the loop otherwise you will just keep making a check against the value when when the listen method is called.
def listen(self):
while True:
try:
chunk = self.stream.read(INPUT_FRAMES_PER_BLOCK)
except IOError, e:
self.errorcount += 1
print( "(%d) Error recording: %s"%(self.errorcount,e) )
self.noisycount = 1
return
mx = audioop.max(chunk, 2)
if mx > 2300: #print the volume level whenever the volume is above 2300
print(mx)
elif mx < 2300: #If the volume is below 2300:
time.sleep(2) #Wait two seconds
if mx > 2300: #If after two seconds the volume is back up, repeat
continue
You need to listen during those two seconds, instead of passively waiting.
start = -1
while True:
try:
chunk = self.stream.read(INPUT_FRAMES_PER_BLOCK)
except IOError, e:
self.errorcount += 1
print( "(%d) Error recording: %s"%(self.errorcount,e) )
self.noisycount = 1
return
mx = audioop.max(chunk, 2)
if mx > 2300: #print the volume level whenever the volume is above 2300
print(mx)
start = -1
elif mx < 2300: #If the volume is below 2300:
if start < 0: # INITIALIZE
start = time.time()
else:
if time.time() - start >= 2: #Wait two seconds
break
The problem is you aren't updating mx while (or after) you sleep. You should set a variable for when you last got a big sound, and break/restart based on new samples
last_sound = time.time() # time of last loud sound
while True:
mx = audioop.max(chunk, 2) # new sample
if mx > 2300:
print(mx)
last_sound = time.time() # reset the "timer" because we're loud
else: # I changed this to else
now = time.time()
if now - last_sound >= 2:
break # two seconds of silence!
else:
print(now-last_sound) # this should count up to 2

Categories

Resources