Python: stdout rate is double of what it should be - python

I wrote a very simple script in python 2.7 called slowcat.py which allows me to cat a file with a certain byte rate per second. Now the problem is that the rate that's given is doubled on the output stream. So rate=1 byte results in 2 bytes per second and rate=4 bytes results in 8 bytes per second. See the code snippet for the actual program:
#!/usr/bin/env python
import argparse
import time
import sys
import os
def get_configuration():
"""
Returns a populated configuration
"""
parser = argparse.ArgumentParser()
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r'),
nargs='?', help="Input file")
parser.add_argument('--rate', type=int, default=666880,
help="Output rate in bytes per second")
return parser.parse_args()
def main():
cfg = get_configuration()
size = os.path.getsize(cfg.file.name)
bytes_read = 0
t1 = time.time()
while True:
n = min(cfg.rate, size-bytes_read)
if n <= 0:
break
buf = cfg.file.read(n)
sys.stdout.write(buf)
sys.stdout.flush()
bytes_read += n
t2 = time.time()
if t2-t1 < 1.0:
time.sleep(1.0 - (t2-t1))
t1 = t2
if __name__ == "__main__":
main()
Now my question is: Why is the output rate double of what I pass it on the command line? If you copy paste the code snippet you can easily try it out on your system e.g. python slowcat.py --rate 2 slowcat.py

The problem is the t1 = t2 at the end of the while loop. That sets t1 to the time before the sleep delay, so the next time you test the elapsed time interval it will be more than 1 second, and thus the sleep call will be skipped.
To fix it, change that last line to
t1 = time.time()
BTW, it may be more accurate to use the new time.perf_counter() rather than time.time()

Related

How to write data to a file every 10 seconds

I'm a JS dev trying to learn a bit of Python while working on a Raspberry-Pi3 project that reads data from a Bluetooth temperature sensor.
I need to write the data to my file.txt every 10 seconds, how could I do that please? I found similar topic here (Run certain code every n seconds ), but I don't know how to make it work in my current scenario.
#!/usr/bin/env python3
import argparse
import re
import logging
import sys
import time
from btlewrap import available_backends, BluepyBackend, GatttoolBackend, PygattBackend
from mitemp_bt.mitemp_bt_poller import MiTempBtPoller, \
MI_TEMPERATURE, MI_HUMIDITY, MI_BATTERY
def valid_mitemp_mac(mac, pat=re.compile(r"4C:65:A8:[0-9A-F]{2}:[0-9A-F]{2}:[0-9A-F]{2}")):
"""Check for valid mac adresses."""
if not pat.match(mac.upper()):
raise argparse.ArgumentTypeError('The MAC address "{}" seems to be in the wrong format'.format(mac))
return mac
def poll(args):
"""Poll data from the sensor."""
backend = _get_backend(args)
poller = MiTempBtPoller(args.mac, backend)
line1 = "Temperature: {}".format(poller.parameter_value(MI_TEMPERATURE))
line2 = "Humidity: {}".format(poller.parameter_value(MI_HUMIDITY))
print("Getting data from Mi Temperature and Humidity Sensor")
print("FW: {}".format(poller.firmware_version()))
print("Name: {}".format(poller.name()))
print("Battery: {}".format(poller.parameter_value(MI_BATTERY)))
print(line1)
print(line2)
f = open('file.txt', 'w')
f.write("%s \n %s \n" % (line1, line2))
f.close()
def _get_backend(args):
"""Extract the backend class from the command line arguments."""
if args.backend == 'gatttool':
backend = GatttoolBackend
elif args.backend == 'bluepy':
backend = BluepyBackend
elif args.backend == 'pygatt':
backend = PygattBackend
else:
raise Exception('unknown backend: {}'.format(args.backend))
return backend
def list_backends(_):
"""List all available backends."""
backends = [b.__name__ for b in available_backends()]
print('\n'.join(backends))
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--backend', choices=['gatttool', 'bluepy', 'pygatt'], default='gatttool')
parser.add_argument('-v', '--verbose', action='store_const', const=True)
subparsers = parser.add_subparsers(help='sub-command help', )
parser_poll = subparsers.add_parser('poll', help='poll data from a sensor')
parser_poll.add_argument('mac', type=valid_mitemp_mac)
parser_poll.set_defaults(func=poll)
parser_scan = subparsers.add_parser('backends', help='list the available backends')
parser_scan.set_defaults(func=list_backends)
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
if not hasattr(args, "func"):
parser.print_help()
sys.exit(0)
args.func(args)
if __name__ == '__main__':
main()
You can use the time module to pause the program for 10 seconds on each iteration;
from time import sleep
def func(n):
print(n+1)
for i in range(5):
func(i)
sleep(10)
>1
>2
>3
>4
>5
# (every 10 seconds)
However this will block the rest of the program running, although a simple multi-threading script to call the writing function would suffice.
And in relation to the code you are using, insert the sleep call within the poll function and wrap what you have there. If you want to loop the program 10 times then;
def poll(args):
"""Poll data from the sensor."""
for _ in range(10):
# code things
f = open('file.txt', 'a') # << Use append here or you will keep overwriting file contents
f.write('hello')
f.close()
sleep(10)
Or if you want it to run forever until you KeyboardInterrupt or exit somehow:
def poll(args):
"""Poll data from the sensor."""
while True:
# code things
f = open('file.txt', 'a') # << Use append here or you will keep overwriting file contents
f.write('hello')
f.close()
sleep(10)
you need some kind of loop that polls your sensor - I do not see one glancing over your code. You got while and for loops in JS as well - look them up in http://docs.python.org/3/tutorial if you are unsure about the syntax.
store the time you wrote to a variable , sleep a bit poll the next value, check if 10s passed, write if, else not. (or simply sleep 10s between polls if you do not want intermediate values printed
Readup about loops:
for statement
looping techniques
import time
def poll():
return time.time(), 42
last_write = None # when did we record to file last?
# loop for as long as you want - while True would loop endlessly
for _ in range(7):
t,c = poll() # call poll() to get time and temperature from mocked data reader
# check if enough time has passed
if last_write is None or (t-last_write) > 2: # check if new reading needed
with open("t.txt","a") as f:
f.write(f"{t} {c}\n")
last_write=t
print("in file ", t,c)
else:
print("just output ", t,c)
time.sleep(0.7) # sleep some
Output:
in file 1552978725.5224085 42 # ...25.5
just output 1552978726.2232893 42 # ...26.2 - not 2s passed
just output 1552978726.9241226 42 # ...26.9 - not 2s passed
in file 1552978727.6249442 42 # ...27.6 - 2.1s passed
just output 1552978728.3259027 42 # ...28.3 - not 2s passed
just output 1552978729.0267787 42 # ...29.0 - not 2s passed
in file 1552978729.7275977 42 # ...29.7 - 2.1s passed
More remarks:
use with open(filename,mode) as f: and scope the file opeations below it - it will autoclose your file after scope and handle exceptions by closing the filehandle as well.
Using mode "w" will truncate the file before writing to it - you might want to use append instead: reading and writing files

Python end function after another function returns result

I may be approaching this all wrong but still this is where I'm at. I have very large log files I'm trying to search, up to 30gb in some cases. I'm writing a script to pull info and have been playing with multi process to speed it up a bit. right now I'm testing running two functions at the same time to search from the top and bottom to get results, which seems to work. I'm wondering if it's possible to stop one function one a result from the other. Such as if the top function finds a result they both stop. This way I can build it out as needed.
from file_read_backwards import FileReadBackwards
from multiprocessing import Process
import sys
z = "log.log"
#!/usr/bin/env python
rocket = 0
def top():
target = "test"
with open(z) as src:
found= None
for line in src:
if len(line) == 0: break #happens at end of file, then stop loop
if target in line:
found= line
break
print(found)
def bottom():
target = "text"
with FileReadBackwards(z) as src:
found= None
for line in src:
if len(line) == 0: break #happens at end of file, then stop loop
if target in line:
found= line
break
print(found)
if __name__=='__main__':
p1 = Process(target = top)
p1.start()
p2 = Process(target = bottom)
p2.start()
Here's a proof-of-concept of the approach I mentioned in the comments:
import os
import random
import sys
from multiprocessing import Process, Value
def search(proc_no, file_name, seek_to, max_size, find, flag):
stop_at = seek_to + max_size
with open(file_name) as f:
if seek_to:
f.seek(seek_to - 1)
prev_char = f.read(1)
if prev_char != '\n':
# Landed in the middle of a line. Skip back one (or
# maybe more) lines so this line isn't excluded. Start
# by seeking back 256 bytes, then 512 if necessary, etc.
exponent = 8
pos = seek_to
while pos >= seek_to:
pos = f.seek(max(0, pos - (2 ** exponent)))
f.readline()
pos = f.tell()
exponent += 1
while True:
if flag.value:
break
line = f.readline()
if not line:
break # EOF
data = line.strip()
if data == find:
flag.value = proc_no
print(data)
break
if f.tell() > stop_at:
break
if __name__ == '__main__':
# list.txt contains lines with the numbers 1 to 1000001
file_name = 'list.txt'
info = os.stat(file_name)
file_size = info.st_size
if len(sys.argv) == 1:
# Pick a random value from list.txt
num_lines = 1000001
choices = list(range(1, num_lines + 1))
choices.append('XXX')
find = str(random.choice(choices))
else:
find = sys.argv[1]
num_procs = 4
chunk_size, remainder = divmod(file_size, num_procs)
max_size = chunk_size + remainder
flag = Value('i', 0)
procs = []
print(f'Using {num_procs} processes to look for {find} in {file_name}')
for i in range(num_procs):
seek_to = i * chunk_size
proc = Process(target=search, args=(i + 1, file_name, seek_to, max_size, find, flag))
procs.append(proc)
for proc in procs:
proc.start()
for proc in procs:
proc.join()
if flag.value:
print(find, 'found by proc', flag.value)
else:
print(find, 'not found')
After reading various posts[1] about reading files with multiprocessing and multithreading, it seems that neither is a great approach due to potential disk thrashing and serialized reads. So here's a different, simpler approach that is way faster (at least for the file with a million lines I was trying it out on):
import mmap
import sys
def search_file(file_name, text, encoding='utf-8'):
text = text.encode(encoding)
with open(file_name) as f:
with mmap.mmap(f.fileno(), 0, flags=mmap.ACCESS_READ, prot=mmap.PROT_READ) as m:
index = m.find(text)
if index > -1:
# Found a match; now find beginning of line that
# contains match so we can grab the whole line.
while index > 0:
index -= 1
if m[index] == 10:
index += 1
break
else:
index = 0
m.seek(index)
line = m.readline()
return line.decode(encoding)
if __name__ == '__main__':
file_name, search_string = sys.argv[1:]
line = search_file(file_name, search_string)
sys.stdout.write(line if line is not None else f'Not found in {file_name}: {search_string}\n')
I'm curious how this would perform with a 30GB log file.
[1] Including this one
Simple example using a multiprocessing.Pool and callback function.
Terminates remaining pool processes once a result has returned.
You could add an arbitrary number of processes to search from different offsets in the file using this approach.
import math
import time
from multiprocessing import Pool
from random import random
def search(pid, wait):
"""Sleep for wait seconds, return PID
"""
time.sleep(wait)
return pid
def done(result):
"""Do something with result and stop other processes
"""
print("Process: %d done." % result)
pool.terminate()
print("Terminate Pool")
pool = Pool(2)
pool.apply_async(search, (1, math.ceil(random() * 3)), callback=done)
pool.apply_async(search, (2, math.ceil(random() * 3)), callback=done)
# do other stuff ...
# Wait for result
pool.close()
pool.join() # block our main thread
This is essentially the same as Blurp's answer, but I shortened it and made it a bit to make it more general. As you can see top should be an infinite loop, but bottom stops top immediately.
from multiprocessing import Process
valNotFound = True
def top():
i=0
while ValNotFound:
i += 1
def bottom():
ValNotFound = False
p1 = Process(target = top)
p2 = Process(target = bottom)
p1.start()
p2.start()

Number of vehicles each T seconds

I wrote the script below with python and i implemented it on sumo,in order to obtain the number of vehicles between two inductionLoop,every 60 seconds,in a lane.
But this one gives each second .
#!/usr/bin/env python
# -*-coding:Latin-1 -*
import os, sys
import optparse
import subprocess
import random
import threading
import time
SUMO_HOME = "/home/khadija/Téléchargements/sumo-0.25.0"
try:
sys.path.append(os.path.join(SUMO_HOME, "tools"))
from sumolib import checkBinary
except ImportError:
sys.exit("please declare environment variable 'SUMO_HOME' as the root directory of your sumo installation (it should contain folders 'bin', 'tools' and 'docs')")
import traci
routeFile="data2/cross.rou.xml"
PORT = 8873
#SIGN CONFIGURATIONS : NESW
NSgreen = "GrGr"
EWgreen = "rGrG"
PROGRAM = (NSgreen,EWgreen)
def nbr_veh_entr_indloop(i,o):
# i et j se sont les inductions loop input et output
threading.Timer(60.0, nbr_veh_entr_indloop).start()
x = traci.inductionloop.getLastStepMeanLength(i) - traci.inductionloop.getLastStepMeanLength(o)
return x
def run():
steps = open("data2/step.txt","w")
traci.init(int(PORT))
step = 0
while step < 7200 :
a = nbr_veh_entr_indloop("4i","40")
k=str(a)
print >> steps , "nombre des veh : " + k #concaténation
traci.simulationStep()
step +=1
steps.close()
traci.close()
sys.stdout.flush()
def get_options():
optParser = optparse.OptionParser()
optParser.add_option("--nogui", action="store_true",
default=False, help="run the commandline version of sumo")
options, args = optParser.parse_args()
return options
# this is the main entry point of this script
if __name__ == "__main__":
options = get_options()
# this script has been called from the command line. It will start sumo as a
# server, then connect and run
if options.nogui:
sumoBinary = checkBinary('sumo')
else:
sumoBinary = checkBinary('sumo-gui')
# this is the normal way of using traci. sumo is started as a
# subprocess and then the python script connects and runs
sumoProcess = subprocess.Popen([sumoBinary, "-c", "data2/cross.sumocfg", "--tripinfo-output","tripinfo.xml", "--remote-port", str(PORT)], stdout=sys.stdout, stderr=sys.stderr)
run()
sumoProcess.wait()
Thanks for help in advance.
Regards,
You probably want to have have the value every 60 simulation seconds not every 60 wallclock seconds, so a timer is pointless here. Simply ask for the value after 60 simulation steps (assuming you use sumo's default step length of one second). So you could write something like:
if step % 60 == 0:
print >> steps , "nombre des veh : " + k
This will print the value for the last step every 60 steps. If you want the value for the last minute you need to aggregate (sum up) yourself.

Data not printing correct

When running a data streaming sub-process against a multiprocessing, after printing a single element in the dictionary program freezes,
#!/usr/bin/python
import subprocess,time, timeit
from multiprocessing import Process, Queue
import re, os, pprint, math
from collections import defaultdict
Dict = {}
count = defaultdict(int)
queueVar = Queue()
def __ReadRX__(RX_info):
lines = iter(RX_info.stdout.readline, "")
try:
start = time.clock()
for line in lines:
if re.match(r"^\d+.*$",line):
splitline = line.split()
del splitline[1:4]
identifier = splitline[1]
count[identifier] += 1
end = time.clock()
timing = round((end - start) * 10000, 100)
dlc = splitline[2]
hexbits = splitline[3:]
Dict[identifier] = [dlc, hexbits, count[identifier],int(timing)]
start = end
for identifier,hexbits in Dict.items():
queueVar.put(Dict)
except KeyboardInterrupt:
pass
procRX = subprocess.Popen('receivetest -f=/dev/pcan32'.split(), stdout=subprocess.PIPE)
if __name__ == '__main__':
munchCan = Process(target=__ReadRX__, args=(procRX,))
munchCan.start()
#munchCan.join()
printDict = queueVar.get()
for i in range(len(printDict)):
print printDict
I know if if I print from __ReadRX__ it prints a constant stream however when trying to print from outside of the function I only get a single entry in the dictionary.
add the following at the top:
from time import sleep
then after the print statement add:
sleep(1)
this will make the script wait for 1 second. You can adjust that number as necessary.

Python 3.3: LAN speed test which calculates the read & write speeds and then returns the figure to Excel

I'm creating a LAN speed test which creates a data file in a specified location of a specified size and records the speed at which it is created/read. For the most part this is working correctly, there is just one problem: the read speed is ridiculously fast because all it's doing is timing how long it takes for the file to open, rather than how long it takes for the file to actually be readable (if that makes sense?).
So far I have this:
import time
import pythoncom
from win32com.client import Dispatch
import os
# create file - write speed
myPath = input('Where do you want to write the file?')
size_MB = int(input('What sized file do you want to test with? (MB)'))
size_B = size_MB * 1024 * 1024
fName = '\pydatafile'
#start timer
start = time.clock()
f = open(myPath + fName,'w')
f.write("\x00" * size_B)
f.close()
# how much time it took
elapsed = (time.clock() -start)
print ("It took", elapsed, "seconds to write the", size_MB, "MB file")
time.sleep(1)
writeMBps = size_MB / elapsed
print("That's", writeMBps, "MBps.")
time.sleep(1)
writeMbps = writeMBps * 8
print("Or", writeMbps, "Mbps.")
time.sleep(2)
# open file - read speed
startRead = time.clock()
f = open(myPath + fName,'r')
# how much time it took
elapsedRead = (time.clock() - startRead)
print("It took", elapsedRead,"seconds to read the", size_MB,"MB file")
time.sleep(1)
readMBps = size_MB / elapsedRead
print("That's", readMBps,"MBps.")
time.sleep(1)
readMbps = readMBps * 8
print("Or", readMbps,"Mbps.")
time.sleep(2)
f.close()
# delete the data file
os.remove(myPath + fName)
# record results on Excel
xl = Dispatch('Excel.Application')
xl.visible= 0
wb = xl.Workbooks.Add(r'C:\File\Location')
ws = wb.Worksheets(1)
# Write speed result
#
# loop until empty cell is found in column
col = 1
row = 1
empty = False
while not empty:
val = ws.Cells(row,col).value
print("Looking for next available cell to write to...")
if val == None:
print("Writing result to cell")
ws.Cells(row,col).value = writeMbps
empty = True
row += 1
# Read speed result
#
# loop until empty cell is found in column
col = 2
row = 1
empty = False
while not empty:
val = ws.Cells(row,col).value
print("Looking for next available cell to write to...")
if val == None:
print("Writing result to cell")
ws.Cells(row,col).value = readMbps
empty = True
row += 1
xl.Run('Save')
xl.Quit()
pythoncom.CoUninitialize()
How can I make this so the read speed is correct?
Thanks a lot
Try to actually read the file:
f = open(myPath + fName, 'r')
f.read()
Or (if the file is too large to fit in memory):
f = open(myPath + fName, 'r')
while f.read(1024 * 1024):
pass
But operating system could still make read fast by caching file content. You've just written it there! And even if you manage to disable caching, your measurement (in addition to network speed) could include the time to write data to file server disk.
If you want network speed only, you need to use 2 separate machines on LAN. E.g. run echo server on one machine (e.g. by enabling Simple TCP/IP services or writing and running your own). Then run Python echo client on another machine that sends some data to echo server, makes sure it receives the same data back and measures the turnaround time.

Categories

Resources