Caesar's Cipher decoding program in python - python
Hey guys I'm having issue with my program that does the following:
1.) takes in one file that generates the relative frequency of letters that will be assumed to be average.
2.) takes a second file that contains the coded message.
3.) tests each possible rotation.
4.) creates a new txt file containing the decoded message as the output
here is my code:
# This is the module that we import to check if a file name exists
import os
# This is the dictionary used later to store individual letter counts, which
# allows us to calculate the relative frequency of each letter
d1 = { }
d1['a'] = 0
d1['b'] = 0
d1['c'] = 0
d1['d'] = 0
d1['e'] = 0
d1['f'] = 0
d1['g'] = 0
d1['h'] = 0
d1['i'] = 0
d1['j'] = 0
d1['k'] = 0
d1['l'] = 0
d1['m'] = 0
d1['n'] = 0
d1['o'] = 0
d1['p'] = 0
d1['q'] = 0
d1['r'] = 0
d1['s'] = 0
d1['t'] = 0
d1['u'] = 0
d1['v'] = 0
d1['w'] = 0
d1['x'] = 0
d1['y'] = 0
d1['z'] = 0
# This asks for the user to enter a file to parse
filename = raw_input("Path to a file to parse: ")
# This is the basic if/else statement that keeps track of each letter counter
# in the dictionary above if the file exists, and displays and error message
# and quits if it doesn't exist.
if os.path.exists(filename):
f = open(filename, 'r')
counter = 0
for line in f:
for j in line:
if j.isalpha():
counter += 1
d1[j.lower()] += 1
f.close()
else:
print "Error: cannot find",filename
quit()
# This is the definition that give us the relative frequency by dividing the
# dictionary key value for each character by the total number of characters
def relfreq(character):
return d1[character] / float(counter)
### This is the end of the previous module's code ###
# This code creates a list of the average frequencies of letter
lof1 = [relfreq('a'), relfreq('b'), relfreq('c'), relfreq('d'), relfreq('e'),
relfreq('f'), relfreq('g'), relfreq('h'), relfreq('i'), relfreq('j'),
relfreq('k'), relfreq('l'), relfreq('m'), relfreq('n'), relfreq('o'),
relfreq('p'), relfreq('q'), relfreq('r'), relfreq('s'), relfreq('t'),
relfreq('u'), relfreq('v'), relfreq('w'), relfreq('x'), relfreq('y'),
relfreq('z')]
# This code finds the relative frequency of the coded message
d2 = { }
d2['a'] = 0
d2['b'] = 0
d2['c'] = 0
d2['d'] = 0
d2['e'] = 0
d2['f'] = 0
d2['g'] = 0
d2['h'] = 0
d2['i'] = 0
d2['j'] = 0
d2['k'] = 0
d2['l'] = 0
d2['m'] = 0
d2['n'] = 0
d2['o'] = 0
d2['p'] = 0
d2['q'] = 0
d2['r'] = 0
d2['s'] = 0
d2['t'] = 0
d2['u'] = 0
d2['v'] = 0
d2['w'] = 0
d2['x'] = 0
d2['y'] = 0
d2['z'] = 0
filename2 = raw_input("Path to encoded message: ")
if os.path.exists(filename2):
f2 = open(filename2, 'r')
counter2 = 0
for line2 in f2:
for j2 in line2:
if j2.isalpha():
counter2 += 1
d2[j2.lower()] += 1
f2.close()
else:
print "Error: cannot find",filename2
quit()
def relfreq2(character):
return d2[character] / float(counter2)
# This code creates a list of relative frequencies of the coded message
lof2 = [relfreq2('a'), relfreq2('b'), relfreq2('c'), relfreq2('d'), relfreq2('e'),
relfreq2('f'), relfreq2('g'), relfreq2('h'), relfreq2('i'), relfreq2('j'),
relfreq2('k'), relfreq2('l'), relfreq2('m'), relfreq2('n'), relfreq2('o'),
relfreq2('p'), relfreq2('q'), relfreq2('r'), relfreq2('s'), relfreq2('t'),
relfreq2('u'), relfreq2('v'), relfreq2('w'), relfreq2('x'), relfreq2('y'),
relfreq2('z')]
##### Not sure if this is correct #####
scores = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
d3 = { }
d3['a'] = 0
d3['b'] = 1
d3['c'] = 2
d3['d'] = 3
d3['e'] = 4
d3['f'] = 5
d3['g'] = 6
d3['h'] = 7
d3['i'] = 8
d3['j'] = 9
d3['k'] = 10
d3['l'] = 11
d3['m'] = 12
d3['n'] = 13
d3['o'] = 14
d3['p'] = 15
d3['q'] = 16
d3['r'] = 17
d3['s'] = 18
d3['t'] = 19
d3['u'] = 20
d3['v'] = 21
d3['w'] = 22
d3['x'] = 23
d3['y'] = 24
d3['z'] = 25
def get_scores():
ii = 0
jj = 0
for ii in range(25):
for jj in range(26):
if ii + jj <26:
scores[jj] += lof1[jj] * lof2[jj + ii]
jj += 1
else:
scores[jj] += lof1[jj] * lof2[jj + ii - 26]
jj += 1
ii += 1
# This is the code that determines which match is the best match
get_scores()
rotationscore = max(scores)
rotations_ttr = scores.index(rotationscore)
print "Shift",rotations_ttr,"letters to the right"
loa = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r',
's','t','u','v','w','x','y','']
# This code 'decodes' the coded message
if os.path.exists(filename):
f3 = open(filename2, 'r')
counter3 = 0
for line3 in f3:
for j3 in line3:
if j2.isalpha():
counter3 += 1
j3 = d3[j3.lower()]
line3.replace(loa[int(j3)], loa[int(j3 + rotations_ttr)])
print
f.close()
I currently get the error:
Path to a file to parse: ./Phoenix.py Traceback (most recent call
last): File "/Users/atloftus/Desktop/Lecture Code/Labs/decipher.py",
line 85, in
lof1 = [relfreq('a'), relfreq('b'), relfreq('c'), relfreq('d'), relfreq('e'), File "/Users/atloftus/Desktop/Lecture
Code/Labs/decipher.py", line 79, in relfreq
return d1[character] / float(counter) ZeroDivisionError: float division by zero
How do I get rid of that error? It wasn't there earlier and now I don't know what I changed to cause it. Thanks
Related
Enumerating the Writing of Different Lines in Txt file in Python
It seems that every time the string should add up the 4, 1, and 4, for column 1, the total result is just 4*3. Could you help me put an enumeration-like function in here? (I am I very new beginner) Thank you for anything! import os import platform pathwindows = os.environ['USERPROFILE'] + r"\Documents\Your_Wordle_Results.txt" pathmac = r'/Mac/Users/%USEPROFILE%/Documents/Your_Wordle_Results.txt' isFileWindows = os.path.exists(pathwindows) isFileMac = os.path.isfile(pathmac) if isFileWindows == True: outfile = open(pathwindows, 'r') if isFileMac == True: outfile = open(pathmac, 'r') totalpoints1 = 0 totalpoints2 = 0 totalpoints3 = 0 totalpoints4 = 0 totalpoints5 = 0 with open(pathwindows, 'r') as fp: lineofinterest = fp.readlines()[2:100] stringlineofinterest = str(lineofinterest) print(*lineofinterest) for line in lineofinterest: print(line.strip()) startline = 22 separation = 4 value1 = (stringlineofinterest[startline + separation * 0]) value2 = (stringlineofinterest[startline + separation * 1]) value3 = (stringlineofinterest[startline + separation * 2]) value4 = (stringlineofinterest[startline + separation * 3]) value5 = (stringlineofinterest[startline + separation * 4]) outfile.close print(value1) print(totalpoints1) The text file is Ben Jackson 1pt 2pt 3pt 4pt 5pt Total Results Will Be Shown Below 4 3 0 1 0 LOSS for audio in 7.28s 1 2 0 2 0 LOSS for audit in 6.18s 4 5 0 1 0 LOSS for audio in 7.28s I expected for the 4 + 1 +4 to add up in the 1 pt column but rather the first "4" was multiplied 3 times meaning that the cycle that beings with "with open" did not enumerate through.
I'm going to answer this the best I can according to the post, there was problems with indentation, use of the correct variable to fetch the values (stringlineofinteres instead of line which is the one in the loop), your code, and finally no line to add vaalues to the totals: import os import platform pathwindows = os.environ['USERPROFILE'] + r"\Documents\Your_Wordle_Results.txt" pathmac = r'/Mac/Users/%USEPROFILE%/Documents/Your_Wordle_Results.txt' pathwindows="enum.txt" isFileWindows = os.path.exists(pathwindows) isFileMac = os.path.isfile(pathmac) if isFileWindows == True: filepath=pathwindows if isFileMac == True: filepath=pathmac totalpoints1 = 0 totalpoints2 = 0 totalpoints3 = 0 totalpoints4 = 0 totalpoints5 = 0 with open(filepath, 'r') as fp: lineofinterest = fp.readlines()[2:100] stringlineofinterest = str(lineofinterest) print(*lineofinterest) for line in lineofinterest: print(line) startline = 22 separation = 4 value1 = (line[startline + separation * 0]) totalpoints1 += int(value1) value2 = (line[startline + separation * 1]) totalpoints2 += int(value2) value3 = (line[startline + separation * 2]) totalpoints3 += int(value3) value4 = (line[startline + separation * 3]) totalpoints4 += int(value4) value5 = (line[startline + separation * 4]) totalpoints5 += int(value5) # write the totals line here with open(filepath,'a') as outfile: outfile.write("totals xxxx") print(totalpoints1, totalpoints2,totalpoints3,totalpoints4,totalpoints5)
Unexpected EOF while parsing; trying calculate mean/max/min of each line reading in
the data looks like this: line = infile.readlines() line ['56047257 16 17 19 16 12 15 12 20 58 123 59\n', '97231934 18 16 13 19 16 12 13 18 72 101 55\n', ....same] I want to get the average of the 2 to 9 column and get max and min of 2 to 12 column by using the loop below, but it keep giving me an error: File "<string>", line unknown ^ SyntaxError: unexpected EOF while parsing This is what my code looks like : def main(): #read data in infile = open('data.txt', 'r') sun = 0.0 count = 0 line = infile.readline() while line != "": ID = line.split(" ") min_val = float('inf') max_val = -float('inf') count_min = 0 count_max = 0 for xStr in line.split(' ')[1:9]: sun = sun + eval(xStr) count = count + 1 avg = round(sun / count, 2) val = eval(xStr) if val < min_val: min_val = val count_min = 1 elif val == min_val: count_min += 1 if val > max_val: max_val = val count_max = 1 elif val == max_val: count_max += 1 line = infile.readline() print (ID, ' ',avg,' ',min_val,' ',max_val) main()
Take note of the issues raised in the comments section of your post, but with that said, this is a much easier way of getting your desired output: def main(): #read data in infile = open('data.txt', 'r') average = max_val = min_val = 0.0 count1=count2 = 0 line = infile.readlines() for x in [x.strip().split() for x in line[:9]]: x = ID =map(int, x) average = (average + (sum(x)/len(x)))/len(x) print average for x in [x.strip().split() for x in line[:12]]: x = map(int, x) val=max(x) if count1 !=0 and val>max_val: max_val = val val=min(x) if count2 !=0 and val<min_val: min_val = val if count1==0: max_val=max(x) min_val=min(x) count1=count2=1 print (ID, ' ',average,' ',min_val,' ',max_val) main() Note: You should try not assign infinty to variables. There are usually always better alternatives
optimizing my Benfold's law program
lines=[] count1 = 0 count2 = 0 count3 = 0 count4 = 0 count5 = 0 count6 = 0 count7 = 0 count8 = 0 count9 = 0 allcount = 0 with open('city_all.txt', 'r') as file: for line in file: lines.append(line.strip()) for x in range(0,len(lines)): if lines[x].isdigit(): allcount+=1 string = lines[x] if string[0]=="1": count1+=1 elif string[0]=="2": count2+=1 elif string[0]=="3": count3+=1 elif string[0]=="4": count4+=1 elif string[0]=="5": count5+=1 elif string[0]=="6": count6+=1 elif string[0]=="7": count7+=1 elif string[0]=="8": count8+=1 elif string[0]=="9": count9+=1 print(count1/allcount) print('{:.1%}'.format(count1/allcount)) Wondering if there is anyway to not have to declare all my variables, and compact all the if statements?Trying to make a program to help compute Benfold's law, so I am putting a txt file into a list, then going through each element and checking what the starting digit is.
You can simplify it a bit: counts = [0 for _ in range (10) ] with open('city_all.txt', 'r') as f: for line in (x.strip () for x in f): if line.isdigit(): allcount += 1 try: counts[int(line)] += 1 except IndexError: pass
Memory overflow in Python
I have 67000 files, I need to read them and extract similarities between the words, but when I run the code my laptop becomes much slower, I can't open any other application, and then a memory overflow error shows up (even when I run on around 10 000 of the files). Is there a way to clear the memory after every for loop maybe, or will running the code on all files be impossible to do? Below is the code: def isAscii(s): for c in s: if c not in string.printable: return False return True windowSize = 2 relationTable = {} probabilities = {} wordCount = {} totalWordCount = 0 def sim(w1, w2): numerator = 0 denominator = 0 if (w1 in relationTable) and (w2 in relationTable): rtw1 = {} rtw2 = {} rtw1 = relationTable[w1] rtw2 = relationTable[w2] for word in rtw1: rtw1_PMI = rtw1[word]['pairPMI'] denominator += rtw1_PMI if(word in rtw2): rtw2_PMI = rtw2[word]['pairPMI'] numerator += (rtw1_PMI + rtw2_PMI) for word in rtw2: rtw2_PMI = rtw2[word]['pairPMI'] denominator += rtw2_PMI if(denominator != 0): return float(numerator)/denominator else: return 0 else: return -1 AllNotes = {} AllNotes = os.listdir("C:/Users/nerry-san/Desktop/EECE 502/MedicalNotes") fileStopPunctuations = open('C:/Users/nerry-san/Desktop/EECE 502/stopPunctuations.txt') stopPunctuations = nltk.word_tokenize(fileStopPunctuations.read()) for x in range (0, 10): fileToRead = open('C:/Users/nerry-san/Desktop/EECE 502/MedicalNotes/%s'%(AllNotes[x])) case1 = fileToRead.read() text = nltk.WordPunctTokenizer().tokenize(case1.lower()) final_text = [] for index in range(len(text)): word = text[index] if (word not in stopPunctuations): final_text.append(word) for index in range (len(final_text)): w1 = final_text[index] if(isAscii(w1)): for index2 in range(-windowSize, windowSize+1): if (index2 != 0): if ( index + index2 ) in range (0, len(final_text)): w2 = final_text[index + index2] if(isAscii(w2)): totalWordCount += 1 if (w1 not in wordCount): wordCount[w1] = {} wordCount[w1]['wCount'] = 0 try: wordCount[w1][w2]['count'] += 1 wordCount[w1]['wCount'] += 1 except KeyError: wordCount[w1][w2] = {'count':1} wordCount[w1]['wCount'] += 1 for word in wordCount: probabilities[word]={} probabilities[word]['wordProb'] = float (wordCount[word]['wCount'])/ totalWordCount for word in wordCount: relationTable[word] = {} for word2 in wordCount[word]: if ( word2 != 'wCount'): pairProb = float(wordCount[word][word2]['count'])/(wordCount[word]['wCount']) relationTable[word][word2] = {} relationTable[word][word2]['pairPMI'] = math.log(float(pairProb)/(probabilities[word]['wordProb'] * probabilities[word2]['wordProb']),2) l = [] for word in relationTable: l.append(word) for index in range (0, len(l)): word = l[index] simValues = [] for index2 in range (0, len(l)): word2 = l[index2] if(word!= word2): simVal = sim(word,word2) if(simVal > 0): simValues.append([word2, simVal]) simValues.sort(key= operator.itemgetter(1), reverse = True)
Every time you open a file, use the "with" statement. This will ensure the file is closed when the loop finishes (or rather when the with block is exited.
walking and averaging values in python
i have to process .txt files presnent in subfolder inside a Folder.like: New Folder>Folder 1 to 6>xx.txt & yy.txt(files present in each folder) each file contain two columns as: arg his asp gln glu his and arg his glu arg arg his glu asp now what I have to do is : 1)count number of occurance of each word for each file > and average total count by dividing with total no. of lines in that file 2)then with values obtained after completing 1st step, divide the values with total no. of files present in the folder for averaging (i.e. 2 in this case) I have tried with my code as follows: but I have succeeded in 1st case but I'm not getting 2nd case. for root,dirs,files in os.walk(path): aspCount = 0 glu_count = 0 lys_count = 0 arg_count = 0 his_count = 0 acid_count = 0 base_count = 0 count = 0 listOfFile = glob.iglob(os.path.join(root,'*.txt') for filename in listOfFile: lineCount = 0 asp_count_col1 = 0 asp_count_col2 = 0 glu_count_col1 = 0 glu_count_col2 = 0 lys_count_col1 = 0 lys_count_col2 = 0 arg_count_col1 = 0 arg_count_col2 = 0 his_count_col1 = 0 his_count_col2 = 0 count += 1 for line in map(str.split,inp): saltCount += 1 k = line[4] m = line[6] if k == 'ASP': asp_count_col1 += 1 elif m == 'ASP': asp_count_col2 += 1 if k == 'GLU': glu_count_col += 1 elif m == 'GLU': glu_count_col2 += 1 if k == 'LYS': lys_count_col1 += 1 elif m == 'LYS': lys_count_col2 += 1 if k == 'ARG': arg_count_col1 += 1 elif m == 'ARG': arg_count_col2 += 1 if k == 'HIS': his_count_col1 += 1 elif m == 'HIS': his_count_col2 += 1 asp_count = (float(asp_count_col1 + asp_count_col2))/lineCount glu_count = (float(glu_count_col1 + glu_count_col2))/lineCount lys_count = (float(lys_count_col1 + lys_count_col2))/lineCount arg_count = (float(arg_count_col1 + arg_count_col2))/lineCount his_count = (float(his_count_col1 + his_count_col2))/lineCount upto this I could be able to get the average value per file. But how could I be able to get average per subfolder(i.e. by dividing with count(total no. of file)). the problem is 2nd part. 1st part is done. The code provided will average values for each file. But I want to add this averages and make a new average by dividing with total no. of files present in the sub-folder.
import os from collections import * aminoAcids = set('asp glu lys arg his'.split()) filesToCounts = {} for root,dirs,files in os.walk(subfolderPath): for file in files: if file.endswith('.txt'): path = os.path.join(root,file) with open(path) as f: acidsInFile = f.read().split() assert all(a in aminoAcids for a in acidsInFile) filesToCounts[file] = Counter(acidsInFile) def averageOfCounts(counts): numberOfAcids = sum(counts.values()) assert numberOfAcids%2==0 numberOfAcidPairs = numberOfAcids/2 return dict((acid,acidCount/numberOfAcidPairs) for acid,acidCount in counts.items()) filesToAverages = dict((file,averageOfCounts(counts)) for file,counts in filesToCounts.items())
Your use of os.walk together with glob.iglob is bogus. Either use one or the other, not both together. Here's how I would do it: import os, os.path, re, pprint, sys #... for root, dirs, files in os.walk(path): counts = {} nlines = 0 for f in filter(lambda n: re.search(r'\.txt$', n), files): for l in open(f, 'rt'): nlines += 1 for k in l.split(): counts[k] = counts[k]+1 if k in counts else 1 for k, v in counts.items(): counts[k] = float(v)/nlines sys.stdout.write('Frequencies for directory %s:\n'%root pprint.pprint(counts)
I like ninjagecko's answer but understand the question differently. Using his code as starting point I propose this: import os from collections import * aminoAcids = set('asp glu lys arg his'.split()) subfolderFreqs = {} for root,dirs,files in os.walk(subfolderPath): cumulativeFreqs = defaultdict(int) fileCount = 0 for file in files: if file.endswith('.txt'): fileCount += 1 path = os.path.join(root,file) with open(path) as f: acidsInFile = f.read().split() counts = Counter(acidsInFile) assert aminoAcids.issuperset(counts) numberOfAcidPairs = len(acidsInFile)/2 for acid, acidCount in counts.items(): cumulativeFreqs[acid] += float(acidCount) / numberOfAcidPairs if fileCount: subfolderFreqs[root] = {acid: cumulative/fileCount for acid, cumulative in cumulativeFreqs.items()} print subfolderFreqs