Caesar's Cipher decoding program in python - python

Hey guys I'm having issue with my program that does the following:
1.) takes in one file that generates the relative frequency of letters that will be assumed to be average.
2.) takes a second file that contains the coded message.
3.) tests each possible rotation.
4.) creates a new txt file containing the decoded message as the output
here is my code:
# This is the module that we import to check if a file name exists
import os
# This is the dictionary used later to store individual letter counts, which
# allows us to calculate the relative frequency of each letter
d1 = { }
d1['a'] = 0
d1['b'] = 0
d1['c'] = 0
d1['d'] = 0
d1['e'] = 0
d1['f'] = 0
d1['g'] = 0
d1['h'] = 0
d1['i'] = 0
d1['j'] = 0
d1['k'] = 0
d1['l'] = 0
d1['m'] = 0
d1['n'] = 0
d1['o'] = 0
d1['p'] = 0
d1['q'] = 0
d1['r'] = 0
d1['s'] = 0
d1['t'] = 0
d1['u'] = 0
d1['v'] = 0
d1['w'] = 0
d1['x'] = 0
d1['y'] = 0
d1['z'] = 0
# This asks for the user to enter a file to parse
filename = raw_input("Path to a file to parse: ")
# This is the basic if/else statement that keeps track of each letter counter
# in the dictionary above if the file exists, and displays and error message
# and quits if it doesn't exist.
if os.path.exists(filename):
f = open(filename, 'r')
counter = 0
for line in f:
for j in line:
if j.isalpha():
counter += 1
d1[j.lower()] += 1
f.close()
else:
print "Error: cannot find",filename
quit()
# This is the definition that give us the relative frequency by dividing the
# dictionary key value for each character by the total number of characters
def relfreq(character):
return d1[character] / float(counter)
### This is the end of the previous module's code ###
# This code creates a list of the average frequencies of letter
lof1 = [relfreq('a'), relfreq('b'), relfreq('c'), relfreq('d'), relfreq('e'),
relfreq('f'), relfreq('g'), relfreq('h'), relfreq('i'), relfreq('j'),
relfreq('k'), relfreq('l'), relfreq('m'), relfreq('n'), relfreq('o'),
relfreq('p'), relfreq('q'), relfreq('r'), relfreq('s'), relfreq('t'),
relfreq('u'), relfreq('v'), relfreq('w'), relfreq('x'), relfreq('y'),
relfreq('z')]
# This code finds the relative frequency of the coded message
d2 = { }
d2['a'] = 0
d2['b'] = 0
d2['c'] = 0
d2['d'] = 0
d2['e'] = 0
d2['f'] = 0
d2['g'] = 0
d2['h'] = 0
d2['i'] = 0
d2['j'] = 0
d2['k'] = 0
d2['l'] = 0
d2['m'] = 0
d2['n'] = 0
d2['o'] = 0
d2['p'] = 0
d2['q'] = 0
d2['r'] = 0
d2['s'] = 0
d2['t'] = 0
d2['u'] = 0
d2['v'] = 0
d2['w'] = 0
d2['x'] = 0
d2['y'] = 0
d2['z'] = 0
filename2 = raw_input("Path to encoded message: ")
if os.path.exists(filename2):
f2 = open(filename2, 'r')
counter2 = 0
for line2 in f2:
for j2 in line2:
if j2.isalpha():
counter2 += 1
d2[j2.lower()] += 1
f2.close()
else:
print "Error: cannot find",filename2
quit()
def relfreq2(character):
return d2[character] / float(counter2)
# This code creates a list of relative frequencies of the coded message
lof2 = [relfreq2('a'), relfreq2('b'), relfreq2('c'), relfreq2('d'), relfreq2('e'),
relfreq2('f'), relfreq2('g'), relfreq2('h'), relfreq2('i'), relfreq2('j'),
relfreq2('k'), relfreq2('l'), relfreq2('m'), relfreq2('n'), relfreq2('o'),
relfreq2('p'), relfreq2('q'), relfreq2('r'), relfreq2('s'), relfreq2('t'),
relfreq2('u'), relfreq2('v'), relfreq2('w'), relfreq2('x'), relfreq2('y'),
relfreq2('z')]
##### Not sure if this is correct #####
scores = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
d3 = { }
d3['a'] = 0
d3['b'] = 1
d3['c'] = 2
d3['d'] = 3
d3['e'] = 4
d3['f'] = 5
d3['g'] = 6
d3['h'] = 7
d3['i'] = 8
d3['j'] = 9
d3['k'] = 10
d3['l'] = 11
d3['m'] = 12
d3['n'] = 13
d3['o'] = 14
d3['p'] = 15
d3['q'] = 16
d3['r'] = 17
d3['s'] = 18
d3['t'] = 19
d3['u'] = 20
d3['v'] = 21
d3['w'] = 22
d3['x'] = 23
d3['y'] = 24
d3['z'] = 25
def get_scores():
ii = 0
jj = 0
for ii in range(25):
for jj in range(26):
if ii + jj <26:
scores[jj] += lof1[jj] * lof2[jj + ii]
jj += 1
else:
scores[jj] += lof1[jj] * lof2[jj + ii - 26]
jj += 1
ii += 1
# This is the code that determines which match is the best match
get_scores()
rotationscore = max(scores)
rotations_ttr = scores.index(rotationscore)
print "Shift",rotations_ttr,"letters to the right"
loa = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r',
's','t','u','v','w','x','y','']
# This code 'decodes' the coded message
if os.path.exists(filename):
f3 = open(filename2, 'r')
counter3 = 0
for line3 in f3:
for j3 in line3:
if j2.isalpha():
counter3 += 1
j3 = d3[j3.lower()]
line3.replace(loa[int(j3)], loa[int(j3 + rotations_ttr)])
print
f.close()
I currently get the error:
Path to a file to parse: ./Phoenix.py Traceback (most recent call
last): File "/Users/atloftus/Desktop/Lecture Code/Labs/decipher.py",
line 85, in
lof1 = [relfreq('a'), relfreq('b'), relfreq('c'), relfreq('d'), relfreq('e'), File "/Users/atloftus/Desktop/Lecture
Code/Labs/decipher.py", line 79, in relfreq
return d1[character] / float(counter) ZeroDivisionError: float division by zero
How do I get rid of that error? It wasn't there earlier and now I don't know what I changed to cause it. Thanks

Related

Enumerating the Writing of Different Lines in Txt file in Python

It seems that every time the string should add up the 4, 1, and 4, for column 1, the total result is just 4*3.
Could you help me put an enumeration-like function in here? (I am I very new beginner)
Thank you for anything!
import os
import platform
pathwindows = os.environ['USERPROFILE'] + r"\Documents\Your_Wordle_Results.txt"
pathmac = r'/Mac/Users/%USEPROFILE%/Documents/Your_Wordle_Results.txt'
isFileWindows = os.path.exists(pathwindows)
isFileMac = os.path.isfile(pathmac)
if isFileWindows == True:
outfile = open(pathwindows, 'r')
if isFileMac == True:
outfile = open(pathmac, 'r')
totalpoints1 = 0
totalpoints2 = 0
totalpoints3 = 0
totalpoints4 = 0
totalpoints5 = 0
with open(pathwindows, 'r') as fp:
lineofinterest = fp.readlines()[2:100]
stringlineofinterest = str(lineofinterest)
print(*lineofinterest)
for line in lineofinterest:
print(line.strip())
startline = 22
separation = 4
value1 = (stringlineofinterest[startline + separation * 0])
value2 = (stringlineofinterest[startline + separation * 1])
value3 = (stringlineofinterest[startline + separation * 2])
value4 = (stringlineofinterest[startline + separation * 3])
value5 = (stringlineofinterest[startline + separation * 4])
outfile.close
print(value1)
print(totalpoints1)
The text file is
Ben Jackson
1pt 2pt 3pt 4pt 5pt Total Results Will Be Shown Below
4 3 0 1 0 LOSS for audio in 7.28s
1 2 0 2 0 LOSS for audit in 6.18s
4 5 0 1 0 LOSS for audio in 7.28s
I expected for the 4 + 1 +4 to add up in the 1 pt column but rather the first "4" was multiplied 3 times meaning that the cycle that beings with "with open" did not enumerate through.
I'm going to answer this the best I can according to the post, there was problems with indentation, use of the correct variable to fetch the values (stringlineofinteres instead of line which is the one in the loop), your code, and finally no line to add vaalues to the totals:
import os
import platform
pathwindows = os.environ['USERPROFILE'] + r"\Documents\Your_Wordle_Results.txt"
pathmac = r'/Mac/Users/%USEPROFILE%/Documents/Your_Wordle_Results.txt'
pathwindows="enum.txt"
isFileWindows = os.path.exists(pathwindows)
isFileMac = os.path.isfile(pathmac)
if isFileWindows == True:
filepath=pathwindows
if isFileMac == True:
filepath=pathmac
totalpoints1 = 0
totalpoints2 = 0
totalpoints3 = 0
totalpoints4 = 0
totalpoints5 = 0
with open(filepath, 'r') as fp:
lineofinterest = fp.readlines()[2:100]
stringlineofinterest = str(lineofinterest)
print(*lineofinterest)
for line in lineofinterest:
print(line)
startline = 22
separation = 4
value1 = (line[startline + separation * 0])
totalpoints1 += int(value1)
value2 = (line[startline + separation * 1])
totalpoints2 += int(value2)
value3 = (line[startline + separation * 2])
totalpoints3 += int(value3)
value4 = (line[startline + separation * 3])
totalpoints4 += int(value4)
value5 = (line[startline + separation * 4])
totalpoints5 += int(value5)
# write the totals line here
with open(filepath,'a') as outfile:
outfile.write("totals xxxx")
print(totalpoints1, totalpoints2,totalpoints3,totalpoints4,totalpoints5)

Unexpected EOF while parsing; trying calculate mean/max/min of each line reading in

the data looks like this:
line = infile.readlines()
line
['56047257 16 17 19 16 12 15 12 20 58 123 59\n',
'97231934 18 16 13 19 16 12 13 18 72 101 55\n',
....same]
I want to get the average of the 2 to 9 column and get max and min of 2 to 12 column by using the loop below, but it keep giving me an error:
File "<string>", line unknown
^
SyntaxError: unexpected EOF while parsing
This is what my code looks like :
def main():
#read data in
infile = open('data.txt', 'r')
sun = 0.0
count = 0
line = infile.readline()
while line != "":
ID = line.split(" ")
min_val = float('inf')
max_val = -float('inf')
count_min = 0
count_max = 0
for xStr in line.split(' ')[1:9]:
sun = sun + eval(xStr)
count = count + 1
avg = round(sun / count, 2)
val = eval(xStr)
if val < min_val:
min_val = val
count_min = 1
elif val == min_val:
count_min += 1
if val > max_val:
max_val = val
count_max = 1
elif val == max_val:
count_max += 1
line = infile.readline()
print (ID, ' ',avg,' ',min_val,' ',max_val)
main()
Take note of the issues raised in the comments section of your post, but with that said, this is a much easier way of getting your desired output:
def main():
#read data in
infile = open('data.txt', 'r')
average = max_val = min_val = 0.0
count1=count2 = 0
line = infile.readlines()
for x in [x.strip().split() for x in line[:9]]:
x = ID =map(int, x)
average = (average + (sum(x)/len(x)))/len(x)
print average
for x in [x.strip().split() for x in line[:12]]:
x = map(int, x)
val=max(x)
if count1 !=0 and val>max_val:
max_val = val
val=min(x)
if count2 !=0 and val<min_val:
min_val = val
if count1==0:
max_val=max(x)
min_val=min(x)
count1=count2=1
print (ID, ' ',average,' ',min_val,' ',max_val)
main()
Note: You should try not assign infinty to variables. There are usually always better alternatives

optimizing my Benfold's law program

lines=[]
count1 = 0
count2 = 0
count3 = 0
count4 = 0
count5 = 0
count6 = 0
count7 = 0
count8 = 0
count9 = 0
allcount = 0
with open('city_all.txt', 'r') as file:
for line in file:
lines.append(line.strip())
for x in range(0,len(lines)):
if lines[x].isdigit():
allcount+=1
string = lines[x]
if string[0]=="1":
count1+=1
elif string[0]=="2":
count2+=1
elif string[0]=="3":
count3+=1
elif string[0]=="4":
count4+=1
elif string[0]=="5":
count5+=1
elif string[0]=="6":
count6+=1
elif string[0]=="7":
count7+=1
elif string[0]=="8":
count8+=1
elif string[0]=="9":
count9+=1
print(count1/allcount)
print('{:.1%}'.format(count1/allcount))
Wondering if there is anyway to not have to declare all my variables, and compact all the if statements?Trying to make a program to help compute Benfold's law, so I am putting a txt file into a list, then going through each element and checking what the starting digit is.
You can simplify it a bit:
counts = [0 for _ in range (10) ]
with open('city_all.txt', 'r') as f:
for line in (x.strip () for x in f):
if line.isdigit():
allcount += 1
try: counts[int(line)] += 1
except IndexError: pass

Memory overflow in Python

I have 67000 files, I need to read them and extract similarities between the words, but when I run the code my laptop becomes much slower, I can't open any other application, and then a memory overflow error shows up (even when I run on around 10 000 of the files). Is there a way to clear the memory after every for loop maybe, or will running the code on all files be impossible to do? Below is the code:
def isAscii(s):
for c in s:
if c not in string.printable:
return False
return True
windowSize = 2
relationTable = {}
probabilities = {}
wordCount = {}
totalWordCount = 0
def sim(w1, w2):
numerator = 0
denominator = 0
if (w1 in relationTable) and (w2 in relationTable):
rtw1 = {}
rtw2 = {}
rtw1 = relationTable[w1]
rtw2 = relationTable[w2]
for word in rtw1:
rtw1_PMI = rtw1[word]['pairPMI']
denominator += rtw1_PMI
if(word in rtw2):
rtw2_PMI = rtw2[word]['pairPMI']
numerator += (rtw1_PMI + rtw2_PMI)
for word in rtw2:
rtw2_PMI = rtw2[word]['pairPMI']
denominator += rtw2_PMI
if(denominator != 0):
return float(numerator)/denominator
else:
return 0
else:
return -1
AllNotes = {}
AllNotes = os.listdir("C:/Users/nerry-san/Desktop/EECE 502/MedicalNotes")
fileStopPunctuations = open('C:/Users/nerry-san/Desktop/EECE 502/stopPunctuations.txt')
stopPunctuations = nltk.word_tokenize(fileStopPunctuations.read())
for x in range (0, 10):
fileToRead = open('C:/Users/nerry-san/Desktop/EECE 502/MedicalNotes/%s'%(AllNotes[x]))
case1 = fileToRead.read()
text = nltk.WordPunctTokenizer().tokenize(case1.lower())
final_text = []
for index in range(len(text)):
word = text[index]
if (word not in stopPunctuations):
final_text.append(word)
for index in range (len(final_text)):
w1 = final_text[index]
if(isAscii(w1)):
for index2 in range(-windowSize, windowSize+1):
if (index2 != 0):
if ( index + index2 ) in range (0, len(final_text)):
w2 = final_text[index + index2]
if(isAscii(w2)):
totalWordCount += 1
if (w1 not in wordCount):
wordCount[w1] = {}
wordCount[w1]['wCount'] = 0
try:
wordCount[w1][w2]['count'] += 1
wordCount[w1]['wCount'] += 1
except KeyError:
wordCount[w1][w2] = {'count':1}
wordCount[w1]['wCount'] += 1
for word in wordCount:
probabilities[word]={}
probabilities[word]['wordProb'] = float (wordCount[word]['wCount'])/ totalWordCount
for word in wordCount:
relationTable[word] = {}
for word2 in wordCount[word]:
if ( word2 != 'wCount'):
pairProb = float(wordCount[word][word2]['count'])/(wordCount[word]['wCount'])
relationTable[word][word2] = {}
relationTable[word][word2]['pairPMI'] = math.log(float(pairProb)/(probabilities[word]['wordProb'] * probabilities[word2]['wordProb']),2)
l = []
for word in relationTable:
l.append(word)
for index in range (0, len(l)):
word = l[index]
simValues = []
for index2 in range (0, len(l)):
word2 = l[index2]
if(word!= word2):
simVal = sim(word,word2)
if(simVal > 0):
simValues.append([word2, simVal])
simValues.sort(key= operator.itemgetter(1), reverse = True)
Every time you open a file, use the "with" statement. This will ensure the file is closed when the loop finishes (or rather when the with block is exited.

walking and averaging values in python

i have to process .txt files presnent in subfolder inside a Folder.like:
New Folder>Folder 1 to 6>xx.txt & yy.txt(files present in each folder)
each file contain two columns as:
arg his
asp gln
glu his
and
arg his
glu arg
arg his
glu asp
now what I have to do is :
1)count number of occurance of each word for each file > and average total count by dividing with total no. of lines in that file
2)then with values obtained after completing 1st step, divide the values with total no. of files present in the folder for averaging (i.e. 2 in this case)
I have tried with my code as follows:
but I have succeeded in 1st case but I'm not getting 2nd case.
for root,dirs,files in os.walk(path):
aspCount = 0
glu_count = 0
lys_count = 0
arg_count = 0
his_count = 0
acid_count = 0
base_count = 0
count = 0
listOfFile = glob.iglob(os.path.join(root,'*.txt')
for filename in listOfFile:
lineCount = 0
asp_count_col1 = 0
asp_count_col2 = 0
glu_count_col1 = 0
glu_count_col2 = 0
lys_count_col1 = 0
lys_count_col2 = 0
arg_count_col1 = 0
arg_count_col2 = 0
his_count_col1 = 0
his_count_col2 = 0
count += 1
for line in map(str.split,inp):
saltCount += 1
k = line[4]
m = line[6]
if k == 'ASP':
asp_count_col1 += 1
elif m == 'ASP':
asp_count_col2 += 1
if k == 'GLU':
glu_count_col += 1
elif m == 'GLU':
glu_count_col2 += 1
if k == 'LYS':
lys_count_col1 += 1
elif m == 'LYS':
lys_count_col2 += 1
if k == 'ARG':
arg_count_col1 += 1
elif m == 'ARG':
arg_count_col2 += 1
if k == 'HIS':
his_count_col1 += 1
elif m == 'HIS':
his_count_col2 += 1
asp_count = (float(asp_count_col1 + asp_count_col2))/lineCount
glu_count = (float(glu_count_col1 + glu_count_col2))/lineCount
lys_count = (float(lys_count_col1 + lys_count_col2))/lineCount
arg_count = (float(arg_count_col1 + arg_count_col2))/lineCount
his_count = (float(his_count_col1 + his_count_col2))/lineCount
upto this I could be able to get the average value per file. But how could I be able to get average per subfolder(i.e. by dividing with count(total no. of file)).
the problem is 2nd part. 1st part is done. The code provided will average values for each file. But I want to add this averages and make a new average by dividing with total no. of files present in the sub-folder.
import os
from collections import *
aminoAcids = set('asp glu lys arg his'.split())
filesToCounts = {}
for root,dirs,files in os.walk(subfolderPath):
for file in files:
if file.endswith('.txt'):
path = os.path.join(root,file)
with open(path) as f:
acidsInFile = f.read().split()
assert all(a in aminoAcids for a in acidsInFile)
filesToCounts[file] = Counter(acidsInFile)
def averageOfCounts(counts):
numberOfAcids = sum(counts.values())
assert numberOfAcids%2==0
numberOfAcidPairs = numberOfAcids/2
return dict((acid,acidCount/numberOfAcidPairs) for acid,acidCount in counts.items())
filesToAverages = dict((file,averageOfCounts(counts)) for file,counts in filesToCounts.items())
Your use of os.walk together with glob.iglob is bogus. Either use one or the other, not both together. Here's how I would do it:
import os, os.path, re, pprint, sys
#...
for root, dirs, files in os.walk(path):
counts = {}
nlines = 0
for f in filter(lambda n: re.search(r'\.txt$', n), files):
for l in open(f, 'rt'):
nlines += 1
for k in l.split():
counts[k] = counts[k]+1 if k in counts else 1
for k, v in counts.items():
counts[k] = float(v)/nlines
sys.stdout.write('Frequencies for directory %s:\n'%root
pprint.pprint(counts)
I like ninjagecko's answer but understand the question differently. Using his code as starting point I propose this:
import os
from collections import *
aminoAcids = set('asp glu lys arg his'.split())
subfolderFreqs = {}
for root,dirs,files in os.walk(subfolderPath):
cumulativeFreqs = defaultdict(int)
fileCount = 0
for file in files:
if file.endswith('.txt'):
fileCount += 1
path = os.path.join(root,file)
with open(path) as f:
acidsInFile = f.read().split()
counts = Counter(acidsInFile)
assert aminoAcids.issuperset(counts)
numberOfAcidPairs = len(acidsInFile)/2
for acid, acidCount in counts.items():
cumulativeFreqs[acid] += float(acidCount) / numberOfAcidPairs
if fileCount:
subfolderFreqs[root] = {acid: cumulative/fileCount for acid, cumulative in cumulativeFreqs.items()}
print subfolderFreqs

Categories

Resources