Python: Dictionary with values higher than the average value

Python: Dictionary with values higher than the average value - python

I am trying to write a program that reads two textfiles (box_a and box_b). These files have the license plate number and the time this car passes two different speed cameras. The format in the files is like this: 6TKJ777, 2018-02-09 09:13:22. I would like the program to calculate the average speed (see avg_speed in the code below) between these cameras (based on the time of the passings on box_a and box_b, and the distance from the variable distance below.). The cars with an avg_speed above the speed limit (the variable speed_limit below) should be added to a dictionary where registration number is key, and avg_speed and the time the car passes box_a as a tuple. This dictionary only contains cars that have broken the speed limit. I seem to have got stuck. The code below probably have several issues, but the latest error is name license_ is not defined. Any ideas?
from datetime import datetime
date_format = ' %Y-%m-%d %H:%M:%S'
def file_to_dictionary(file):
filename = file
filename = open(file, 'r')
readings = []
for line in filename:
line = line.strip('\n')
reg = line.split(',')
readings.append(reg)
filename.close()
dictionary = dict(readings)
for key in dictionary:
print(key, ' : ', dictionary[key])
return dictionary
def list_speeders():
filename_a = "box_a.txt"
filename_b = "box_b.txt"
speed_limit = 60
distance = 5
mydict= {license_:(avg_speed,b_time)}
dict_a = file_to_dictionary(filename_a)
dict_b = file_to_dictionary(filename_b)
a_time = dict_a[license_]
b_time = dict_b[license_]
avg_speed=round(distance/(((datetime.strptime(b_time, date_format) - datetime.strptime(a_time, date_format)).total_seconds())/3600),3)
for line in dict_a:
if avg_speed > speed_limit:
mydict[license_]=avg_speed
print(mydict)
list_speeders()

The code is less cumbersome if you convert the date and time to a timestamp when you first build the dictionaries. Then it's simple:
from datetime import datetime
date_format = '%Y-%m-%d %H:%M:%S'
speed_limit = 60
distance = 5
def to_dictionary(filenames):
alldicts = tuple()
for filename in filenames:
d = {}
with open(filename, encoding='utf-8') as infile:
for line in infile:
reg, dt = line.split(',')
d[reg] = datetime.strptime(dt.strip(), date_format).timestamp()
alldicts += (d, )
return alldicts
box_a, box_b = to_dictionary(('box_a.txt', 'box_b.txt'))
speeders = {}
for k, va in box_a.items():
if (vb := box_b.get(k)):
if (average_speed := distance / abs(va-vb) * 3600) > speed_limit:
speeders[k] = average_speed
print(speeders)

Related

How can I plot the monthly average temperature?

I wrote this code in order to obtain a series of monthly weather observations at Helsinki for a period from 1960 to 2020 and then I saved the data to a local file using package pickle. I used the data available from the API provided by Finnish Meteorological Institute.
import datetime
from pprint import pprint
import pickle
import pandas as pd
class FmiApi:
"""
a minimalistic wrapper for the Finnish Meteorological Institute API
"""
def __init__(self):
self.base_url = 'http://opendata.fmi.fi/wfs'
self.fixed_params = {
'service': 'WFS',
'version':'2.0.0',
}
self.namespaces ={
'wfs':'http://www.opengis.net/wfs/2.0',
'gml':'http://www.opengis.net/gml/3.2',
'BsWfs':'http://xml.fmi.fi/schema/wfs/2.0'
}
try:
with open('wdata.pkl', 'rb') as f:
data=pickle.load(f)
except:
wheaters=[]
for year in range(1960,2021):
for month in range(1,13):
api = FmiApi()
w = api.get_monthly_obs('helsinki', year, month )
wheaters.append(w)
pprint(w)
with open('wdata.pkl', 'wb') as f:
pickle.dump(wheaters, f)
Now I want to use the local file to access the data in order to plot the monthly average temperature for the years 1960 to 2020.
I wrote this code but it doesn't print the average temperature:
def get_data(file_in=None, file_out=None):
assert file_in != None or file_out != None
#read wdata from local file
df_weath = pd.read_csv('wdata.pkl', parse_dates=[0], infer_datetime_format=True)
df_weath.sort_values('Date', inplace=True, ignore_index=True)
df_weath['Date'] = df_weath['Date'].dt.date #convert to datetime objects
#print(df_wdata)
#input()
#get weather data in the format returned by the api
if file_in != None: #read weather data from local file
with open(file_in, 'rb') as f:
wdata = pickle.load(f)
else: #use the FMI API to get weather data
api = FmiApi() #instantiate the api
params = {
'place': u'helsinki',
'maxlocations': '5',
}
d0 = df_weath['date'].values[0]
d1 = df_weath['date'].values[-1]
n_days = (d1 - d0).days + 1 #number of days between d0 and d1
wdata = []
for i_day in range(n_days):
date = d0 + datetime.timedelta(days=i_day)
params['starttime'] = str(date) + 'T00:00:00'
params['endtime'] = str(date) + 'T00:00:00'
try:
print('requesting weather data for %s'%str(date))
weather = api.get_daily_obs(params)
except:
print('getting weather failed, skipping')
continue
wdata.append(weather)
if file_out != None: #save weather data to a file
with open(file_out, 'wb') as f:
pickle.dump(wdata, f)
#move weather data to a pandas dataframe (calculate avg over valid observations)
data = []
variables = ['tday']
for wobs in wdata:
avgs = {}
for pos, obs in wobs.items():
for var, xx in obs.items():
if not var in variables: continue
sdate = xx[1][:10]
date = datetime.date(*[int(z) for z in sdate.split('-')])
if xx[0] != 'NaN':
val = float(xx[0])
else:
val = None
if not var in avgs: avgs[var] = []
if val != None: avgs[var].append(val)
vals = []
for var in variables: #calculate the average when available
if len(avgs[var]) > 0:
vals.append(sum(avgs[var])/len(avgs[var]))
else:
vals.append(None)
row = [date] + vals
data.append(row)
df_weather = pd.DataFrame(data, columns=['date'] + variables)
print(df_weather)
input()
#merge the dataframes by date (pick dates from weather df and add NaNs for missing values)
df = pd.merge(df_weather, df_weath, on='date', how='left', sort=True)
return df
The dictionary is like this:
{(65.90051, 29.06302): {'rrmon': ('18.4', '1985-01-01T00:00:00Z'),
'tmon': ('NaN', '1985-01-01T00:00:00Z')},
(65.91718, 29.22969): {'rrmon': ('15.0', '1985-01-01T00:00:00Z'),
'tmon': ('NaN', '1985-01-01T00:00:00Z')},
(65.95515, 29.1347): {'rrmon': ('16.9', '1985-01-01T00:00:00Z'),
'tmon': ('NaN', '1985-01-01T00:00:00Z')}}
Can you help me to find what I am doing wrong? Or do you know any easier way to plot the monthly average temperature?

process data on user input

I have a scenario where I am uploading a text file then providing a user input and then according to the user input processing further.
Sample file :
DOWN 07.09.2016 08:21:33 - 07.09.2016 08:23:33
UP 07.11.2016 09:41:07 - 09.11.2016 09:20:33
DOWN 09.11.2016 08:26:33 - 09.11.2016 08:46:33
UP 09.11.2016 08:23:33 - 09.11.2016 08:25:33
DOWN 09.11.2016 08:36:33 - 09.11.2016 08:41:33
DOWN 10.11.2016 08:36:33 - 10.11.2016 08:39:33
code :
try:
import Tkinter as Tk
import tkFileDialog as fileDialog
except ImportError:
import tkinter as Tk
import tkinter.filedialog as fileDialog
import datetime
def read_data():
'''
Read data from file and convert to list with datetime
which can be used to calculate time and display.
'''
global data
filename = fileDialog.askopenfilename()
if filename:
# read all lines
with open(filename) as fileHandle:
lines = fileHandle.readlines()
# convert to `datetime` (not `timestamp`)
data = []
for line in lines:
#direction = line[:4].strip()
#dt1 = line[5:24]
#dt2 = line[27:46]
direction, d1, t1, _, d2, t2 = line.split()
dt1 = d1 + ' ' + t1
dt2 = d2 + ' ' + t2
t1 = datetime.datetime.strptime(dt1, "%d.%m.%Y %H:%M:%S")
t2 = datetime.datetime.strptime(dt2, "%d.%m.%Y %H:%M:%S")
seconds = (t2-t1).seconds
data.append([direction, t1, t2, seconds])
print(data)
def processText(lines, selected_date):
total = 0
start = None
print(selected_date)
# if there is `selected_date` then convert to `datetime`
if selected_date:
try:
selected_date = datetime.datetime.strptime(selected_date, "%d.%m.%Y")
except AttributeError as ex:
print("ERROR:", ex)
selected_date = None
# calculate time
for direction, t1, t2, seconds in lines:
if direction == "DOWN":
# if `selected_date` then filter times
if selected_date and t1 <= selected_date:
continue
if not start:
start = t1.strftime("%d.%m.%Y %H:%M:%S")
total += seconds
# convert to minutes after summing all second
total = total//60
return total, start
def calculate():
all_dates = entry.get().split(',')
print(all_dates)
all_dates = [date.strip() for date in all_dates]
txt = ''
for current_date in all_dates:
down, start = processText(data, current_date)
txt += "Total Downtime is {0} min from {1}\n".format(down, start)
textVar.set(txt)
# --- main ---
data = None # to keep data from file
# -
root = Tk.Tk()
button = Tk.Button(root, text="Open", command=read_data)
button.grid(column=1, row=1)
textVar = Tk.StringVar(root)
label = Tk.Label(root, textvariable=textVar)
label.grid(column=1, row=2)
entry = Tk.Entry(root)
entry.grid(column=1, row=3)
button2 = Tk.Button(root, text="Calculate", command=calculate)
button2.grid(column=1, row=4)
root.mainloop()
Above code prompts me to select date in format Date1.Month1.Year1,Date2.Month2.Year2 ... (depending on the number input for date.)
and returns output as :
Total Downtime is x min from date1.month1.year1 xx:xx:xx(time1)
Total Downtime is y min from date2.month2.year2 yy:yy:yy(time2)
Here I have the details of downtime in mins and i want to convert that in percentage till date. For example ->
user input :
1.9.2016,1.11.2016,1.1.2016
Output :
Total Downtime is 30 min from 1.9.2016 08:21:33 & Availability percentage from selected date to till date : xx.xxx%
Total Downtime is 28 min from 1.11.2016 08:26:33 & Availability percentage from selected date to till date : yy.yyy%
Total Downtime is 30 min from 1.11.2016 08:26:33 & Availability percentage from selected date to till date : zz.zzz%
logic behind availability calculation would be
total number of min down from date(which is retrieved)/total number of min till date * 100
I am stuck in this part, Is this achievable? Any help would be great!!

If you run processText() with None instead of date then you get total number of minutes when it was down
total_down, start = processText(data, None)
and you can use it to calculate percentage.
percentage = (down/total_down) * 100
and you can use string formatting {:.2f} to display only two digits after dot
def calculate():
all_dates = entry.get().split(',')
print(all_dates)
all_dates = [date.strip() for date in all_dates]
# calculate total number of minutes when it was down
total_down, start = processText(data, None) # <-- None
print('total_down:', total_down)
txt = ''
for current_date in all_dates:
down, start = processText(data, current_date)
# calculate percetage
percentage = (down/total_down) * 100
# use string formatting {:.2f} to display only two digits after dot
txt += "Total Downtime is {} min from {} ({:.2f}%)\n".format(down, start, percentage)
textVar.set(txt)
If you want total number of minutes when it was down or up then you have to change processText and add new parameter (ie. word) which will check if direction is DOWN or UP or both (word = None)
def processText(lines, selected_date, word="DOWN"):
total = 0
start = None
print(selected_date)
# if there is `selected_date` then convert to `datetime`
if selected_date:
try:
selected_date = datetime.datetime.strptime(selected_date, "%d.%m.%Y")
except AttributeError as ex:
print("ERROR:", ex)
selected_date = None
# calculate time
for direction, t1, t2, seconds in lines:
if not word or word == direction:
# if `selected_date` then filter times
if selected_date and t1 <= selected_date:
continue
if not start:
start = t1.strftime("%d.%m.%Y %H:%M:%S")
total += seconds
# convert to minutes after summing all second
total = total//60
return total, start
def calculate():
all_dates = entry.get().split(',')
print(all_dates)
all_dates = [date.strip() for date in all_dates]
# calculate total number of minutes when it was down and up
total_down, start = processText(data, None, None)
print('total_down:', total_down)
txt = ''
for current_date in all_dates:
down, start = processText(data, current_date, "DOWN")
percentage = (down/total_down) * 100
txt += "Total Downtime is {} min from {} ({:.2f}%)\n".format(down, start, percentage)
textVar.set(txt)

python regex error: NameError: name 're' is not defined

I have some code that reads an infofile and extracts information using python's regex and writes it into a new file. When I test this portion of the code individually in its own script, it works perfectly. However when I add it to the rest of my code, I get this error:
NameError: name 're' is not defined
Below is my entire code. The regex portion is obvious (all the re.search commands):
import glob
import subprocess
import os
import datetime
import matplotlib.pyplot as plt
import csv
import re
import ntpath
x = open('data.txt', 'w')
m = open('graphing_data.txt', 'w')
ckopuspath= '/Volumes/DAVIS/sfit-ckopus/ckopus'
command_init = 'sfit4Layer0.py -bv5 -fh'
subprocess.call(command_init.split(), shell=False)
with open('/Volumes/DAVIS/calpy_em27_neu/spectra_out_demo/info.txt', 'rt') as infofile: # the info.txt file created by CALPY
for count, line in enumerate(infofile):
with open('\\_spec_final.t15', 'w') as t:
lat = re.search('Latitude of location:\s*([^;]+)', line, re.IGNORECASE).group(0)
lat = lat.split()
lat = lat[3]
lat = float(lat)
lon = re.search('Longitude of location:\s*([^;]+)', line, re.IGNORECASE).group(0)
lon = lon.split()
lon = lon[3]
lon = float(lon)
date = re.search('Time of measurement \(UTC\): ([^;]+)', line).group(0)
date = date.split()
yeardate = date[4]
yeardate = yeardate.split('-')
year = int(yeardate[0])
month = int(yeardate[1])
day = int(yeardate[2])
time = date[5]
time = time.split(':')
hour = int(time[0])
minute = int(time[1])
second = float(time[2])
dur = re.search('Duration of measurement \[s\]: ([^;]+)', line).group(0)
dur = dur.split()
dur = float(dur[4])
numpoints = re.search('Number of values of one scan:\s*([^;]+)', line, re.IGNORECASE).group(0)
numpoints = numpoints.split()
numpoints = float(numpoints[6])
fov = re.search('semi FOV \[rad\] :\s*([^;]+)', line, re.IGNORECASE).group(0)
fov = fov.split()
fov = fov[3]
fov = float(fov[1:])
sza = re.search('sun Azimuth \[deg\]:\s*([^;]+)', line, re.IGNORECASE).group(0)
sza = sza.split()
sza = float(sza[3])
snr = 0.0000
roe = 6396.2
res = 0.5000
lowwav = re.search('first wavenumber:\s*([^;]+)', line, re.IGNORECASE).group(0)
lowwav = lowwav.split()
lowwav = float(lowwav[2])
highwav = re.search('last wavenumber:\s*([^;]+)', line, re.IGNORECASE).group(0)
highwav = highwav.split()
highwav = float(highwav[2])
spacebw = (highwav - lowwav)/ numpoints
d = datetime.datetime(year, month, day, hour, minute, second)
t.write('{:>12.5f}{:>12.5f}{:>12.5f}{:>12.5f}{:>8.1f}'.format(sza,roe,lat,lon,snr)) # line 1
t.write("\n")
t.write('{:>10d}{:>5d}{:>5d}{:>5d}{:>5d}{:>5d}'.format(year,month,day,hour,minute,second)) # line 2
t.write("\n")
t.write( ('{:%Y/%m/%d %H:%M:%S}'.format(d)) + "UT Solar Azimuth:" + ('{:>6.3f}'.format(sza)) + " Resolution:" + ('{:>6.4f}'.format(res)) + " Duration:" + ('{:>6.2f}'.format(dur))) # line 3
t.write("\n")
t.write('{:>21.13f}{:>26.13f}{:>24.17e}{:>12f}'.format(lowwav,highwav,spacebw,numpoints)) # line 4
t.write("\n")
calpy_path = '/Volumes/DAVIS/calpy_em27_neu/spectra_out_demo/140803/*' # the CALPY output files!
files1 = glob.glob(calpy_path)
with open(files1[count], 'r') as g:
for line in g:
wave_no, intensity = [float(item) for item in line.split()]
if lowwav <= wave_no <= highwav:
t.write(str(intensity) + '\n')
##########################
subprocess.call(['sfit4Layer0.py', '-bv5', '-fs'],shell=False) #I think this writes the summary file
# this retrieves info from summary and outputs it into data.txt (for readability)
# and graphing_data.txt (for graphing)
road = '/Volumes/DAVIS/calpy_em27_neu/spectra_out_demo/sfit4_trial' # path to summary file that is produced - not sure where this is usually*
for infile in glob.glob(os.path.join(road, 'summary*')):
lines = open(infile, 'r').readlines()
#extract info from summary
x.write('{0} {1} {2} {3} {4}'.format(fitrms, chi2, dofsall, dofstrg, iter))
x.write('\n')
x.close()
m.close()

What is wrong with this code during enumeration

import datetime
with open("fine.txt","r") as f, open("fine1.txt","a") as fine1:
lines = f.read().split("\n")
for i in range(2):
var = input("reg : ") # registration number(reg_num)
enter = input('Time entered camera 1(24hrs)in the format HH:MM:SS: ')
ext = input('Time enterd camera 2 (24hrs)in the format HH:MM:SS : ')
total_time = '%H:%M:%S'
enter_time = datetime.datetime.strptime(enter, total_time)
ext_time = datetime.datetime.strptime(ext, total_time)
if enter_time > ext_time:
ext_time += datetime.timedelta(hours=24)
t_diff = ext_time - enter_time
time = t_diff.total_seconds() / 3600
speed = 1 / time
reg = var[0:1].isalpha() and var[2:3].isdigit() and var[4].isspace() and var[5:7].isalpha() and var.isupper()
if reg == True:
for i, line in enumerate(lines):
if var in line:
num = int("{}".format(i))
var = f.read()
name = (var[num]) #the problem
print(name)
address = (var[num + 0])
if speed > 70:
print("ovrspeeding", (var[num + 0]))
fine1.write(name+address+speed+"\n")
The whole code had to inputted, otherwise you will not understand what i am trying to do.
fine.txt is a file that has already been made and looks like:
reg_num1 aaaaaaaaaaaaaaaaaaa
reg_num2 bbbbbbbbbbbbbbbbbbb
reg_num3 ccccccccccccccccccc
this code takes in inputs of the registration number(e.g. AA01 SSS) and 2 time formats (which will later be used to calculate the speed). i want this code to find the line in fine.txt that have the registration number i inputted and if that vehicle is overspeeding(speed >70mph)the whole line needs to be appended into the file fine1.txt.
the problem is that when i run the code the error massage states that:
name = (var[num])
IndexError: string index out of range
i dont what this means, so can you help me with this.

Using datetime package to convert a date with time to only date.

My function is to read data from a file that consists of dates with times a tweet was written, and sentiments (good, bad or neutral) it's classified as; select date with times, and sentiments between a start and end date; and finally create three dictionaries (positive, negative and neutral) that use the date as key, and number of positive, negative or neutral tweets made in a day.
The problems I have are:
a) How do I get only date to display, and not date and time?.
b) How do I get my program to include both start and end date?
c) How do I separate a key and value with a semi-colon in a dictionary?
def get_sentiment_dates(start_date, end_date):
positive_dict = {}
negative_dict = {}
neutral_dict = {}
f = open("BAC2_answer.csv", "r")
tweets = f.readlines()
bin_use =[]
bin_trash =[]
bin_use_senti = []
bin_trash_senti = []
start_date_obj = datetime.strptime(start_date, '%Y-%m-%d')
end_date_obj = datetime.strptime(end_date, '%Y-%m-%d')
for i in tweets:
specs = i.split(',')
t_and_d = specs[0]
dt_obj = datetime.strptime(t_and_d, "%Y-%m-%d %H:%M:%S")
chars_body = specs[1].strip()
if ((dt_obj >= start_date_obj) and dt_obj <= (end_date_obj)):
bin_use.append(dt_obj)
bin_use_senti.append(chars_body)
else:
bin_trash.append(dt_obj)
bin_trash_senti.append(chars_body)
num_of_pos = 0
num_of_neg = 0
num_of_neut = 0
for i,j in zip(bin_use, bin_use_senti):
if j == 'Bullish':
num_of_pos +=1
positive_dict = (i, num_of_pos)
elif j == 'Bearish':
num_of_neg+=1
negative_dict = (i, num_of_neg)
else:
num_of_neut+=1
neutral_dict = (i, num_of_neut)
# print str(positive_dict) + "," + str(negative_dict) + "," + str(neutral_dict)
f.close()
return [positive_dict,negative_dict,neutral_dict]

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python: Dictionary with values higher than the average value - python

Related

How can I plot the monthly average temperature?

process data on user input

python regex error: NameError: name 're' is not defined

What is wrong with this code during enumeration

Using datetime package to convert a date with time to only date.

Categories

Resources