optimizing my Benfold's law program - python

lines=[]
count1 = 0
count2 = 0
count3 = 0
count4 = 0
count5 = 0
count6 = 0
count7 = 0
count8 = 0
count9 = 0
allcount = 0
with open('city_all.txt', 'r') as file:
for line in file:
lines.append(line.strip())
for x in range(0,len(lines)):
if lines[x].isdigit():
allcount+=1
string = lines[x]
if string[0]=="1":
count1+=1
elif string[0]=="2":
count2+=1
elif string[0]=="3":
count3+=1
elif string[0]=="4":
count4+=1
elif string[0]=="5":
count5+=1
elif string[0]=="6":
count6+=1
elif string[0]=="7":
count7+=1
elif string[0]=="8":
count8+=1
elif string[0]=="9":
count9+=1
print(count1/allcount)
print('{:.1%}'.format(count1/allcount))
Wondering if there is anyway to not have to declare all my variables, and compact all the if statements?Trying to make a program to help compute Benfold's law, so I am putting a txt file into a list, then going through each element and checking what the starting digit is.

You can simplify it a bit:
counts = [0 for _ in range (10) ]
with open('city_all.txt', 'r') as f:
for line in (x.strip () for x in f):
if line.isdigit():
allcount += 1
try: counts[int(line)] += 1
except IndexError: pass

Related

ValueError: substring not found on lip reading code

This is what I have gotten while trying to run step 3 of this source code:
https://github.com/carykh/lazykh
Error:
Traceback (most recent call last):
File "C:\Users\User\Desktop\lazykh-main\code\scheduler.py", line 93, in
OS_nextIndex = originalScript.index(wordString,OS_IndexAt)+len(wordString)
ValueError: substring not found
Code:
import argparse
import os.path
import json
import numpy as np
import random
def addPhoneme(p, t):
global prevPhoneme
global f
if p != prevPhoneme:
strings[4] += (str.format('{0:.3f}', t)+",phoneme,"+p+"\n")
prevPhoneme = p
def pickNewPose(t):
global pose
global prevPose
global POSE_COUNT
global prevPhoneme
global f
newPose = -1
while newPose == -1 or newPose == pose or newPose == prevPose:
newPose = int(random.random()*POSE_COUNT)
prevPose = pose
pose = newPose
strings[3] += (str.format('{0:.3f}', t)+",pose,"+str(pose)+"\n")
prevPhoneme = "na"
strings = [""]*5
POSE_COUNT = 5
emotions = {}
emotions["explain"] = 0
emotions["happy"] = 1
emotions["sad"] = 2
emotions["angry"] = 3
emotions["confused"] = 4
emotions["rq"] = 5
mouthList = [["aa","a"],["ae","a"],["ah","a"],["ao","a"],["aw","au"],
["ay","ay"],["b","m"],["ch","t"],["d","t"],["dh","t"],
["eh","a"],["er","u"],["ey","ay"],["f","f"],["g","t"],
["hh","y"],["ih","a"],["iy","ay"],["jh","t"],["k","t"],
["l","y"],["m","m"],["n","t"],["ng","t"],["ow","au"],
["oy","ua"],["p","m"],["r","u"],["s","t"],["sh","t"],
["t","t"],["th","t"],["uh","u"],["uw","u"],["v","f"],
["w","u"],["y","y"],["z","t"],["zh","t"],
["oov","m"]] # For unknown phonemes, the stick figure will just have a closed mouth ("mmm")
mouths = {}
for x in mouthList:
mouths[x[0]] = x[1]
ENDING_PHONEME = "m"
STOPPERS = [",",";",".",":","!","?"]
parser = argparse.ArgumentParser(description='blah')
parser.add_argument('--input_file', type=str, help='the script')
args = parser.parse_args()
INPUT_FILE = args.input_file
f = open(INPUT_FILE+".txt","r+")
originalScript = f.read()
f.close()
f = open(INPUT_FILE+".json","r+")
fileData = f.read()
f.close()
data = json.loads(fileData)
WORD_COUNT = len(data['words'])
pose = -1
prevPose = -1
prevPhoneme = "na"
emotion = "0"
pararaph = 0
image = 0
OS_IndexAt = 0
pickNewPose(0)
strings[1] += "0,emotion,0\n"
strings[0] += "0,paragraph,0\n"
strings[2] += "0,image,0\n"
strings[4] += "0,phoneme,m\n"
for i in range(WORD_COUNT):
word = data['words'][i]
if "start" not in word:
continue
wordString = word["word"]
timeStart = word["start"]
OS_nextIndex = originalScript.index(wordString,OS_IndexAt)+len(wordString)
if "<" in originalScript[OS_IndexAt:]:
tagStart = originalScript.index("<",OS_IndexAt)
tagEnd = originalScript.index(">",OS_IndexAt)
if OS_nextIndex > tagStart and tagEnd >= OS_nextIndex:
OS_nextIndex = originalScript.index(wordString,tagEnd)+len(wordString)
nextDigest = originalScript[OS_IndexAt:OS_nextIndex]
if "\n" in nextDigest and data['words'][i-1]['case'] != 'not-found-in-audio' and (prevPhoneme == "a" or prevPhoneme == "f" or prevPhoneme == "u" or prevPhoneme == "y"):
addPhoneme("m", data['words'][i-1]["end"])
"""print(wordString)
print(str(OS_IndexAt)+", "+str(OS_nextIndex))
print(nextDigest)
print("")"""
pickedPose = False
for stopper in STOPPERS:
if stopper in nextDigest:
pickNewPose(timeStart)
pickedPose = True
if "<" in nextDigest:
leftIndex = nextDigest.index("<")+1
rightIndex = nextDigest.index(">")
emotion = emotions[nextDigest[leftIndex:rightIndex]]
strings[1] += (str.format('{0:.3f}', timeStart)+",emotion,"+str(emotion)+"\n")
prevPhoneme = "na"
if "\n\n" in nextDigest:
pararaph += 1
image += 1 # The line of the script advances 2 lines whenever we hit a /n/n.
strings[0] += (str.format('{0:.3f}', timeStart)+",paragraph,"+str(pararaph)+"\n")
prevPhoneme = "na"
if "\n" in nextDigest:
image += 1
strings[2] += (str.format('{0:.3f}', timeStart)+",image,"+str(image)+"\n")
prevPhoneme = "na"
if not pickedPose:
pickNewPose(timeStart) # A new image means we also need to have a new pose
phones = word["phones"]
timeAt = timeStart
for phone in phones:
timeAt += phone["duration"]
phoneString = phone["phone"]
if phoneString == "sil":
truePhone = "m"
else:
truePhone = mouths[phoneString[:phoneString.index("_")]]
if len(truePhone) == 2:
addPhoneme(truePhone[0], timeAt-phone["duration"])
addPhoneme(truePhone[1], timeAt-phone["duration"]*0.5)
else:
addPhoneme(truePhone, timeAt-phone["duration"])
OS_IndexAt = OS_nextIndex
f = open(INPUT_FILE+"_schedule.csv","w+")
for i in range(len(strings)):
f.write(strings[i])
if i < len(strings)-1:
f.write("SECTION\n")
f.flush()
f.close()
print(f"Done creating schedule for {INPUT_FILE}.")
The
ValueError: substring not found
occurs when you try to find the index of a substring in a string which does not contain it in the specified (or default) section, using the index function.
The index method takes 3 parameters:
value
start
end
and it searches for the value between start and end.
So, the error occurred because the substring was not found in the section where it was searched for. The line of
OS_nextIndex = originalScript.index(wordString,tagEnd)+len(wordString)
searches for wordString, starting from tagEnd and searches for the likes of
<span>yourwordstring</span>
, but in your case it was not found. You can do one of the following to solve the issue:
you can fix your input if it should always have a match for the search
you can handle the error when the index throws the error
you can use find instead, see https://bobbyhadz.com/blog/python-valueerror-substring-not-found
Note that find also has three parameters, as you can read from https://www.w3schools.com/python/ref_string_find.asp

Get the Size of every function in a file

How would I find the size of every function in a file using python?
For context, I'm learning pyplot as well as scipy, and I wanted to measure the size of the functions in a programming file, and then measure the frequency of that size.
What I'm looking for is a way to read a file, identify a function, count the lines of the function, and then add it to a list, I don't really care if the function returns a list, I can always write a function that condenses the list of lists into one list.
Example of what I'm trying to implement:
Function A has a length of 10 lines
Function B has a length of 16 lines
Function C has a length of 8 lines
Function D has a length of 5 lines
Given the above data, I want to be able to condense that into a list of [10,16,8,5].
Additionally: I'm going to be testing this on a couple of beginner C projects, and only want to test the files with the .c extension, not .h.
Code I have so far:
# Counts the number of lines in the file
def line_counter(file_name):
tot_line = 0
with open(file_name, 'r') as f:
for line in f:
tot_line += 1
return (tot_line)
# Counts the number of lines that end with a semicolon
def semi_counter(file_name):
tot_semi = 0
with open(file_name, 'r') as f:
for line in f:
sline = line.strip()
if len(sline) > 0 and sline[-1] == ';':
tot_semi += 1
return (tot_semi)
# Returns the maximum nesting depth of the file
def max_depth(file_name):
max_dep = 0
dep = 0
with open(file_name, 'r') as f:
for line in f:
for ch in line:
if ch == '{':
dep += 1
if ch == '}':
dep -= 1
if dep > max_dep:
max_dep = dep
return max_dep
# Counts the number of characters in a file
def char_counter(file_name):
tot_chars = 0
with open(file_name, 'r') as f:
for line in f:
tot_chars += len(line)
return (tot_chars)
# Counts the number of comments in the file
def comm_counter(file_name):
comm_chars = 0
with open(file_name, 'r') as f:
block_comment = False
for line in f:
lch = None
line_comment = False
for ch in line:
if lch == '/' and ch == '*':
block_comment = True
if lch == '*' and ch == '/':
block_comment = False
if lch == '/' and ch == '/':
line_comment = True
lch = ch
if line_comment or block_comment:
comm_chars += 1
return comm_chars
# Counts the number of root level functions
def block_counter(file_name):
block_tot = 0
dep = 0
with open(file_name, 'r') as f:
for line in f:
for ch in line:
if ch == '{':
dep += 1
if ch == '}':
dep -= 1
if dep == 0:
block_tot += 1
return block_tot

Opening a file for append error

I'm trying to open a file for appending, but I keep getting the "except" portion of my try/except block, meaning there is some sort of error with the code but I can't seem to find what exactly is wrong with it. It only happens when I try to open a new file like so:
results = open("results.txt", "a")
results.append(score3)
Here's my full code:
import statistics
# input
filename = input("Enter a class to grade: ")
try:
# open file name
open(filename+".txt", "r")
print("Succesfully opened", filename,".txt", sep='')
print("**** ANALYZING ****")
with open(filename+".txt", 'r') as f:
counter1 = 0
counter2 = 0
right = 0
answerkey = "B,A,D,D,C,B,D,A,C,C,D,B,A,B,A,C,B,D,A,C,A,A,B,D,D"
a = []
# validating files
for line in f:
if len(line.split(',')) !=26:
print("Invalid line of data: does not contain exactly 26 values:")
print(line)
counter2 += 1
counter1 -= 1
if line.split(",")[0][1:9].isdigit() != True:
print("Invalid line of data: wrong N#:")
print(line)
counter2 += 1
counter1 -= 1
if len(line.split(",")[0]) != 9:
print("Invalid line of data: wrong N#:")
print(line)
counter2 += 1
counter1 -= 1
counter1 += 1
#grading students
score = len(([x for x in zip(answerkey.split(","), line.split(",")[1:]) if x[0] != x[1]]))
score1 = 26 - score
score2 = score1 / 26
score3 = score2 * 100
a.append(score3)
# results file
results = open("results.txt", "a")
results.write(score3)
# in case of no errors
if counter2 == 0:
print("No errors found!")
# calculating
number = len(a)
sum1 = sum(a)
max1 = max(a)
min1 = min(a)
range1 = max1 - min1
av = sum1/number
# turn to int
av1 = int(av)
max2 = int(max1)
min2 = int(min1)
range2 = int(range1)
# median
sort1 = sorted(a)
number2 = number / 2
number2i = int(number2)
median = a[number2i]
median1 = int(median)
# mode
from statistics import mode
mode = mode(sort1)
imode = int(mode)
# printing
print ("**** REPORT ****")
print ("Total valid lines of data:", counter1)
print ("Total invalid lines of data:", counter2)
print ("Mean (average) score:", av1)
print ("Highest score:", max2)
print("Lowest score:", min2)
print("Range of scores:", range2)
print("Median Score:", median1)
print("Mode score(s):", imode)
results.close()
except:
print("File cannot be found.")
I don't think there is a method called append for writing into file. You can use the write or writelines method only to write. As you already opened the file with append permissions. It wont change the old data and will append the text to the file.
f=open('ccc.txt','a')
f.write('Hellloooo')
f.close()
Hope it helps.

Caesar's Cipher decoding program in python

Hey guys I'm having issue with my program that does the following:
1.) takes in one file that generates the relative frequency of letters that will be assumed to be average.
2.) takes a second file that contains the coded message.
3.) tests each possible rotation.
4.) creates a new txt file containing the decoded message as the output
here is my code:
# This is the module that we import to check if a file name exists
import os
# This is the dictionary used later to store individual letter counts, which
# allows us to calculate the relative frequency of each letter
d1 = { }
d1['a'] = 0
d1['b'] = 0
d1['c'] = 0
d1['d'] = 0
d1['e'] = 0
d1['f'] = 0
d1['g'] = 0
d1['h'] = 0
d1['i'] = 0
d1['j'] = 0
d1['k'] = 0
d1['l'] = 0
d1['m'] = 0
d1['n'] = 0
d1['o'] = 0
d1['p'] = 0
d1['q'] = 0
d1['r'] = 0
d1['s'] = 0
d1['t'] = 0
d1['u'] = 0
d1['v'] = 0
d1['w'] = 0
d1['x'] = 0
d1['y'] = 0
d1['z'] = 0
# This asks for the user to enter a file to parse
filename = raw_input("Path to a file to parse: ")
# This is the basic if/else statement that keeps track of each letter counter
# in the dictionary above if the file exists, and displays and error message
# and quits if it doesn't exist.
if os.path.exists(filename):
f = open(filename, 'r')
counter = 0
for line in f:
for j in line:
if j.isalpha():
counter += 1
d1[j.lower()] += 1
f.close()
else:
print "Error: cannot find",filename
quit()
# This is the definition that give us the relative frequency by dividing the
# dictionary key value for each character by the total number of characters
def relfreq(character):
return d1[character] / float(counter)
### This is the end of the previous module's code ###
# This code creates a list of the average frequencies of letter
lof1 = [relfreq('a'), relfreq('b'), relfreq('c'), relfreq('d'), relfreq('e'),
relfreq('f'), relfreq('g'), relfreq('h'), relfreq('i'), relfreq('j'),
relfreq('k'), relfreq('l'), relfreq('m'), relfreq('n'), relfreq('o'),
relfreq('p'), relfreq('q'), relfreq('r'), relfreq('s'), relfreq('t'),
relfreq('u'), relfreq('v'), relfreq('w'), relfreq('x'), relfreq('y'),
relfreq('z')]
# This code finds the relative frequency of the coded message
d2 = { }
d2['a'] = 0
d2['b'] = 0
d2['c'] = 0
d2['d'] = 0
d2['e'] = 0
d2['f'] = 0
d2['g'] = 0
d2['h'] = 0
d2['i'] = 0
d2['j'] = 0
d2['k'] = 0
d2['l'] = 0
d2['m'] = 0
d2['n'] = 0
d2['o'] = 0
d2['p'] = 0
d2['q'] = 0
d2['r'] = 0
d2['s'] = 0
d2['t'] = 0
d2['u'] = 0
d2['v'] = 0
d2['w'] = 0
d2['x'] = 0
d2['y'] = 0
d2['z'] = 0
filename2 = raw_input("Path to encoded message: ")
if os.path.exists(filename2):
f2 = open(filename2, 'r')
counter2 = 0
for line2 in f2:
for j2 in line2:
if j2.isalpha():
counter2 += 1
d2[j2.lower()] += 1
f2.close()
else:
print "Error: cannot find",filename2
quit()
def relfreq2(character):
return d2[character] / float(counter2)
# This code creates a list of relative frequencies of the coded message
lof2 = [relfreq2('a'), relfreq2('b'), relfreq2('c'), relfreq2('d'), relfreq2('e'),
relfreq2('f'), relfreq2('g'), relfreq2('h'), relfreq2('i'), relfreq2('j'),
relfreq2('k'), relfreq2('l'), relfreq2('m'), relfreq2('n'), relfreq2('o'),
relfreq2('p'), relfreq2('q'), relfreq2('r'), relfreq2('s'), relfreq2('t'),
relfreq2('u'), relfreq2('v'), relfreq2('w'), relfreq2('x'), relfreq2('y'),
relfreq2('z')]
##### Not sure if this is correct #####
scores = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
d3 = { }
d3['a'] = 0
d3['b'] = 1
d3['c'] = 2
d3['d'] = 3
d3['e'] = 4
d3['f'] = 5
d3['g'] = 6
d3['h'] = 7
d3['i'] = 8
d3['j'] = 9
d3['k'] = 10
d3['l'] = 11
d3['m'] = 12
d3['n'] = 13
d3['o'] = 14
d3['p'] = 15
d3['q'] = 16
d3['r'] = 17
d3['s'] = 18
d3['t'] = 19
d3['u'] = 20
d3['v'] = 21
d3['w'] = 22
d3['x'] = 23
d3['y'] = 24
d3['z'] = 25
def get_scores():
ii = 0
jj = 0
for ii in range(25):
for jj in range(26):
if ii + jj <26:
scores[jj] += lof1[jj] * lof2[jj + ii]
jj += 1
else:
scores[jj] += lof1[jj] * lof2[jj + ii - 26]
jj += 1
ii += 1
# This is the code that determines which match is the best match
get_scores()
rotationscore = max(scores)
rotations_ttr = scores.index(rotationscore)
print "Shift",rotations_ttr,"letters to the right"
loa = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r',
's','t','u','v','w','x','y','']
# This code 'decodes' the coded message
if os.path.exists(filename):
f3 = open(filename2, 'r')
counter3 = 0
for line3 in f3:
for j3 in line3:
if j2.isalpha():
counter3 += 1
j3 = d3[j3.lower()]
line3.replace(loa[int(j3)], loa[int(j3 + rotations_ttr)])
print
f.close()
I currently get the error:
Path to a file to parse: ./Phoenix.py Traceback (most recent call
last): File "/Users/atloftus/Desktop/Lecture Code/Labs/decipher.py",
line 85, in
lof1 = [relfreq('a'), relfreq('b'), relfreq('c'), relfreq('d'), relfreq('e'), File "/Users/atloftus/Desktop/Lecture
Code/Labs/decipher.py", line 79, in relfreq
return d1[character] / float(counter) ZeroDivisionError: float division by zero
How do I get rid of that error? It wasn't there earlier and now I don't know what I changed to cause it. Thanks

Unexpected EOF while parsing; trying calculate mean/max/min of each line reading in

the data looks like this:
line = infile.readlines()
line
['56047257 16 17 19 16 12 15 12 20 58 123 59\n',
'97231934 18 16 13 19 16 12 13 18 72 101 55\n',
....same]
I want to get the average of the 2 to 9 column and get max and min of 2 to 12 column by using the loop below, but it keep giving me an error:
File "<string>", line unknown
^
SyntaxError: unexpected EOF while parsing
This is what my code looks like :
def main():
#read data in
infile = open('data.txt', 'r')
sun = 0.0
count = 0
line = infile.readline()
while line != "":
ID = line.split(" ")
min_val = float('inf')
max_val = -float('inf')
count_min = 0
count_max = 0
for xStr in line.split(' ')[1:9]:
sun = sun + eval(xStr)
count = count + 1
avg = round(sun / count, 2)
val = eval(xStr)
if val < min_val:
min_val = val
count_min = 1
elif val == min_val:
count_min += 1
if val > max_val:
max_val = val
count_max = 1
elif val == max_val:
count_max += 1
line = infile.readline()
print (ID, ' ',avg,' ',min_val,' ',max_val)
main()
Take note of the issues raised in the comments section of your post, but with that said, this is a much easier way of getting your desired output:
def main():
#read data in
infile = open('data.txt', 'r')
average = max_val = min_val = 0.0
count1=count2 = 0
line = infile.readlines()
for x in [x.strip().split() for x in line[:9]]:
x = ID =map(int, x)
average = (average + (sum(x)/len(x)))/len(x)
print average
for x in [x.strip().split() for x in line[:12]]:
x = map(int, x)
val=max(x)
if count1 !=0 and val>max_val:
max_val = val
val=min(x)
if count2 !=0 and val<min_val:
min_val = val
if count1==0:
max_val=max(x)
min_val=min(x)
count1=count2=1
print (ID, ' ',average,' ',min_val,' ',max_val)
main()
Note: You should try not assign infinty to variables. There are usually always better alternatives

Categories

Resources