Basically I want the ciphered phrase as the output with both uppercase being ciphered to uppercase and lowercase being ciphered to lowercase but not any spaces or symbols are ciphered. It can encrypt a paragraph consisting of all upper case and a paragraph consisting of all lower case but not a mix of the two. here is what I have.
def encrypt(phrase,move):
encription=[]
for character in phrase:
a = ord(character)
if a>64 and a<123:
if a!=(91,96):
for case in phrase:
if case.islower():
alph=["a","b","c","d","e","f","g","h","i","j","k","l","m","n",
"o","p","q","r","s","t","u","v","w","x","y","z"]
dic={}
for i in range(0,len(alph)):
dic[alph[i]]=alph[(i+move)%len(alph)]
cipherphrase=""
for l in phrase:
if l in dic:
l=dic[l]
cipherphrase+=l
encription.append(chr(a if 97<a<=122 else 96+a%122))
return cipherphrase
else:
ALPH=["A","B","C","D","E","F","G","H","I","J","K","L","M","N",
"O","P","Q","R","S","T","U","V","W","X","Y","Z"]
DIC={}
for I in range(0,len(ALPH)):
DIC[ALPH[I]]=ALPH[(I+move)%len(ALPH)]
cipherphrase=""
for L in phrase:
if L in DIC:
L=DIC[L]
cipherphrase+=L
encription.append(chr(a if 97<a<=122 else 96+a%122))
return cipherphrase
I know its a lot but as you can see im not very good
import string
def caesar_cipher(msg, shift):
# create a character-translation table
trans = dict(zip(string.lowercase, string.lowercase[shift:] + string.lowercase[:shift]))
trans.update(zip(string.uppercase, string.uppercase[shift:] + string.uppercase[:shift]))
# apply it to the message string
return ''.join(trans.get(ch, ch) for ch in msg)
then
caesar_cipher('This is my 3rd test!', 2) # => 'Vjku ku oa 3tf vguv!'
import string
def rot_cipher(msg,amount):
alphabet1 = string.ascii_lowercase + string.ascii_uppercase
alphabet2 = string.ascii_lowercase[amount:] + string.ascii_lowercase[:amount]\
+ string.ascii_uppercase[amount:] + string.ascii_uppercase[:amount]
tab = str.maketrans(alphabet1,alphabet2)
return msg.translate(tab)
print(rot_cipher("hello world!",13))
Related
This is about plain word counting, to collect which words appear in a document and how how often.
I try to write a function were the input is a list of text lines. I go through all lines, split them into words, accumulate the recognized words and finally return the complete list.
First I have a while-loop that goes through all the characters in the list, and but ignores the white spaces. Inside this while loop I also try to recognize what kind of words I have. In this context, there are three kinds of words:
those starting with a letter;
those starting with a digit;
and those which contain only one character which is neither letter nor digit.
I have three if statements which check what kind of character I have. When I know what kind of word I have encountered, I try to extract the word itself. When the word starts with a letter or a digit, I take all consecutive characters of the same kind as part of the word.
But, in the third if statement, when I take care of the case when the current character is neither a letter nor a digit, I get problems.
When I give the input
wordfreq.tokenize(['15, delicious& Tarts.'])
I want the output to be
['15', ',', 'delicious', '&', 'tarts', '.']
When I test the function in the Python Console, it looks like this:
PyDev console: starting.
Python 3.7.4 (v3.7.4:e09359112e, Jul 8 2019, 14:54:52)
[Clang 6.0 (clang-600.0.57)] on darwin
import wordfreq
wordfreq.tokenize(['15, delicious& Tarts.'])
['15', 'delicious', 'tarts']
The function does not take neither the comma, the ampersand nor the dot into account! How do I fix this?
See below for the code.
(The lower() method is because I want to ignore capitalization, e.g. 'Tarts' and 'tarts' are really the same words.)
# wordfreq.py
def tokenize(lines):
words = []
for line in lines:
start = 0
while start < len(line):
while line[start].isspace():
start = start + 1
if line[start].isalpha():
end = start
while line[end].isalpha():
end = end + 1
word = line[start:end]
words.append(word.lower())
start = end
elif line[start].isdigit():
end = start
while line[end].isdigit():
end = end + 1
word = line[start:end]
words.append(word)
start = end
else:
words.append(line[start])
start = start + 1
return words
I'm not sure why you're doing upper and lower but here's how you can split it:
input = ['15, delicious& Tarts.']
line = input[0]
words = line.split(' ')
words = [word for word in words if word]
out:
['15,', 'delicious&', 'Tarts.']
edit, saw that you edited how you want your output to be. Just skip this line to get that output:
words = [word for word in words if word]
itertools.groupby could simplify this quite a bit. Basically, you group the characters in your string based on the category or type of character - alpha, digit or punctuation. In this example I only defined those three categories, but you can define as many or as little categories as you wish. Any character that doesn't match any of the categories (whitespace, in this example) is ignored:
def get_tokens(string):
from itertools import groupby
from string import ascii_lowercase, ascii_uppercase, digits, punctuation as punct
alpha = ascii_lowercase + ascii_uppercase
yield from ("".join(group) for key, group in groupby(string, key=lambda char: next((category for category in (alpha, digits, punct) if char in category), "")) if key)
print(list(get_tokens("15, delicious& Tarts.")))
Output:
['15', ',', 'delicious', '&', 'Tarts', '.']
>>>
I found what the problem was. The line
start = start + 1
where supposed to be in the last else statement.
So my code looks like this and gives me the desired input specified above:
def tokenize(lines):
words = []
for line in lines:
start = 0
while start < len(line):
while line[start].isspace():
start = start + 1
end = start
if line[start].isalpha():
while line[end].isalpha():
end = end + 1
word = line[start:end]
word = word.lower()
words.append(word)
start = end
elif line[start].isdigit():
while line[end].isdigit():
end = end + 1
word = line[start:end]
words.append(word)
start = end
else:
word = line[start]
words.append(word)
start = start + 1
return words
However, when I use the testing script below to make sure that there is no corner cases that the function 'tokenize' missed out;...
import io
import sys
import importlib.util
def test(fun,x,y):
global pass_tests, fail_tests
if type(x) == tuple:
z = fun(*x)
else:
z = fun(x)
if y == z:
pass_tests = pass_tests + 1
else:
if type(x) == tuple:
s = repr(x)
else:
s = "("+repr(x)+")"
print("Condition failed:")
print(" "+fun.__name__+s+" == "+repr(y))
print(fun.__name__+" returned/printed:")
print(str(z))
fail_tests = fail_tests + 1
def run(src_path=None):
global pass_tests, fail_tests
if src_path == None:
import wordfreq
else:
spec = importlib.util.spec_from_file_location("wordfreq", src_path+"/wordfreq.py")
wordfreq = importlib.util.module_from_spec(spec)
spec.loader.exec_module(wordfreq)
pass_tests = 0
fail_tests = 0
fun_count = 0
def printTopMost(freq,n):
saved = sys.stdout
sys.stdout = io.StringIO()
wordfreq.printTopMost(freq,n)
out = sys.stdout.getvalue()
sys.stdout = saved
return out
if hasattr(wordfreq, "tokenize"):
fun_count = fun_count + 1
test(wordfreq.tokenize, [], [])
test(wordfreq.tokenize, [""], [])
test(wordfreq.tokenize, [" "], [])
test(wordfreq.tokenize, ["This is a simple sentence"], ["this","is","a","simple","sentence"])
test(wordfreq.tokenize, ["I told you!"], ["i","told","you","!"])
test(wordfreq.tokenize, ["The 10 little chicks"], ["the","10","little","chicks"])
test(wordfreq.tokenize, ["15th anniversary"], ["15","th","anniversary"])
test(wordfreq.tokenize, ["He is in the room, she said."], ["he","is","in","the","room",",","she","said","."])
else:
print("tokenize is not implemented yet!")
if hasattr(wordfreq, "countWords"):
fun_count = fun_count + 1
test(wordfreq.countWords, ([],[]), {})
test(wordfreq.countWords, (["clean","water"],[]), {"clean":1,"water":1})
test(wordfreq.countWords, (["clean","water","is","drinkable","water"],[]), {"clean":1,"water":2,"is":1,"drinkable":1})
test(wordfreq.countWords, (["clean","water","is","drinkable","water"],["is"]), {"clean":1,"water":2,"drinkable":1})
else:
print("countWords is not implemented yet!")
if hasattr(wordfreq, "printTopMost"):
fun_count = fun_count + 1
test(printTopMost,({},10),"")
test(printTopMost,({"horror": 5, "happiness": 15},0),"")
test(printTopMost,({"C": 3, "python": 5, "haskell": 2, "java": 1},3),"python 5\nC 3\nhaskell 2\n")
else:
print("printTopMost is not implemented yet!")
print(str(pass_tests)+" out of "+str(pass_tests+fail_tests)+" passed.")
return (fun_count == 3 and fail_tests == 0)
if __name__ == "__main__":
run()
... I get the following output:
/usr/local/bin/python3.7 "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/test.py"
Traceback (most recent call last):
File "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/test.py", line 81, in <module>
run()
File "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/test.py", line 50, in run
test(wordfreq.tokenize, [" "], [])
File "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/test.py", line 10, in test
z = fun(x)
File "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/wordfreq.py", line 44, in tokenize
while line[start].isspace():
IndexError: string index out of range
Why does it say that the string index is out of range? How do I fix that problem?
I am working on a group assignment to read a docx file and then output the word 'carrier' or 'carriers' with the word directly to the right of it. The output we are receiving is only 26 of the total 82 mentions of the word carrier in the document. I would prefer recommendations to what might be causing this. My hunch is that it has something to do with the For loop.
from docx import Document
emptyString = {}
tupl = ()
doc = Document('Interstate Commerce Act.docx')
for i ,paragraph in enumerate(doc.paragraphs):
text = paragraph.text
text = text.split()
#text = text.lower()
if 'carrier' in text:
next = text.index('carrier') + 1
now = text.index('carrier')
#print(text[now], text[next])
tupl = (text[now], text[next])
emptyString[i] = tupl
if 'carriers' in text:
next = text.index('carriers') + 1
now = text.index('carriers')
#print(text[now], text[next])
tupl = (text[now], text[next])
emptyString[i] = tupl
if 'Carriers' in text:
next = text.index('Carriers') + 1
now = text.index('Carriers')
#print(text[now], text[next])
tupl = (text[now], text[next])
emptyString[i] = tupl
if 'Carrier' in text:
next = text.index('Carrier') + 1
now = text.index('Carrier')
#print(text[now], text[next])
tupl = (text[now], text[next])
emptyString[i] = tupl
print(emptyString)
Your text = text.split() line is going to cause certain items to be "hidden". For example, "The carrier is a Carrier." will produce the word list:
["The", "carrier", "is", "a", "Carrier."]
Since the last item is "Carrier." and not "Carrier" it will not be found by your "exact match" test.
Perhaps better to split by word and then check whether a lowercase version includes "carrier":
words = text.split()
for i, word in enumerate(words):
if "carrier" in word.lower():
print("word %d is a match" % i)
Using the lowercase comparison avoids the need for separate tests for all the case varieties.
I've been writing a Countdown program in Python, and in it. I've written this:
#Letters Game
global vowels, consonants
from random import choice, uniform
from time import sleep
from itertools import permutations
startLetter = ""
words = []
def check(word, startLetter):
fileName = startLetter + ".txt"
datafile = open(fileName)
for line in datafile:
print("Checking if", word, "is", line.lower())
if word == line.lower():
return True
return False
def generateLetters():
lettersLeft = 9
output = []
while lettersLeft >= 1:
lType = input("Vowel or consonant? (v/c)")
sleep(uniform(0.5, 1.5))
if lType not in ("v", "c"):
print("Please input v or c")
continue
elif lType == "v":
letter = choice(vowels)
print("Rachel has picked an", letter)
vowels.remove(letter)
output.append(letter)
elif lType == "c":
letter = choice(consonants)
print("Rachel has picked a", letter)
consonants.remove(letter)
output.append(letter)
print("Letters so far:", output)
lettersLeft -= 1
return output
def possibleWords(letters, words):
for i in range(1,9):
print(letters)
print(i)
for item in permutations(letters, i):
item = "".join(list(item))
startLetter = list(item)[0]
if check(item, startLetter):
print("\n\n***Got one***\n", item)
words.append(item)
return words
vowels = ["a"]*15 + ["e"]*21 + ["i"]*13 + ["o"]*13+ ["u"]*5
consonants = ["b"]*2 + ["c"]*3 + ["d"]*6 + ["f"]*2 + ["g"]*3 +["h"]*2 +["j"]*1 +["k"]*1 +["l"]*5 +["m"]*4 +["n"]*8 +["p"]*4 +["q"]*1 +["r"]*9 +["s"]*9 +["t"]*9 + ["v"]*1 +["w"]*1 +["x"]*1 +["y"]*1 +["z"]*1
print("***Let's play a letters game!***")
sleep(3)
letters = generateLetters()
sleep(uniform(1, 1.5))
print("\n\n***Let's play countdown***\n\n\n\n\n")
print(letters)
for count in reversed(range(1, 31)):
print(count)
sleep(1)
print("\n\nStop!")
print("All possible words:")
print(possibleWords(letters, words))
'''
#Code for sorting the dictionary into files
alphabet = "abcdefghijklmnopqrstuvwxyz"
alphabet = list(alphabet)
for letter in alphabet:
allFile = open("Dictionary.txt", "r+")
filename = letter + ".txt"
letterFile = open(filename, "w")
for line in allFile:
if len(list(line.lower())) <= 9:
if list(line.lower())[0] == letter:
print("Writing:", line.lower())
letterFile.write(line.lower())
allFile.close()
letterFile.close()
I have 26 text files called a.txt, b.txt, c.txt... to make the search quicker
(Sorry it's not very neat - I haven't finished it yet)
However, instead of returning what I expect (pan), it returns all words with pan in it (pan, pancake, pans, pandemic...)
Is there any way in Python you can only return the line if it's EXACTLY the same as the string? Do I have to .read() the file first?
Thanks
Your post is strangely written so excuse me if I missmatch
Is there any way in Python you can only return the line if it's EXACTLY the same as the string? Do I have to .read() the file first?
Yes, there is!!!
file = open("file.txt")
content = file.read() # which is a str
lines = content.split('\n') # which is a list (containing every lines)
test_string = " pan "
positive_match = [l for l in lines if test_string in l]
This is a bit hacky since we avoid getting pancake for pan (for instance) but using spaces (and then, what about cases like ".....,pan "?). You should have a look at tokenization function. As pythonists, we hve one of the best library for this: nltk
(because, basically, you are reinventing the wheel)
the program is when user input"8#15#23###23#1#19###9#20"
output should be "HOW WAS IT"
However,it could not work to show space(###).
enter code here
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
def from_abstract(s):
result = ''
for word in s.split('*'):
result = result +ABSTRACT_SHIFTED.get(word)
return result
This would do the trick:
#!/usr/bin/env python
InputString = "8#15#23###23#1#19###9#20"
InputString = InputString.replace("###", "##")
InputString = InputString.split("#")
DecodedMessage = ""
for NumericRepresentation in InputString:
if NumericRepresentation == "":
NumericRepresentation = " "
DecodedMessage += NumericRepresentation
continue
else:
DecodedMessage += chr(int(NumericRepresentation) + 64)
print(DecodedMessage)
Prints:
HOW WAS IT
you can also use a regex
import re
replacer ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
reversed = {value:key for key,value in replacer.items()}
# Reversed because regex is greedy and it will match 1 before 15
target = '8#15#23###23#1#19###9#20'
pattern = '|'.join(map(lambda x: x + '+', list(reversed.keys())[::-1]))
repl = lambda x: reversed[x.group(0)]
print(re.sub(pattern, string=target, repl=repl))
And prints:
HOW WAS IT
With a couple minimal changes to your code it works.
1) split on '#', not '*'
2) retrieve ' ' by default if a match isn't found
3) use '##' instead of '###'
def from_abstract(s):
result = ''
for word in s.replace('###','##').split('#'):
result = result +ABSTRACT_SHIFTED.get(word," ")
return result
Swap the key-value pairs of ABSTRACT and use simple split + join on input
ip = "8#15#23###23#1#19###9#20"
ABSTRACT = dict((v,k) for k,v in ABSTRACT.items())
''.join(ABSTRACT.get(i,' ') for i in ip.split('#')).replace(' ', ' ')
#'HOW WAS IT'
The biggest challenge here is that "#" is used as a token separator and as the space character, you have to know the context to tell which you've got at any given time, and that makes it difficult to simply split the string. So write a simple parser. This one will accept anything as the first character in a token and then grab everything until it sees the next "#".
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
user_input = "8#15#23###23#1#19###9#20"
def from_abstract(s):
result = []
while s:
print 'try', s
# tokens are terminated with #
idx = s.find("#")
# ...except at end of line
if idx == -1:
idx = len(s) - 1
token = s[:idx]
s = s[idx+1:]
result.append(ABSTRACT_SHIFTED.get(token, ' '))
return ''.join(result)
print from_abstract(user_input)
I'm trying to compare to see if a word in a list is in a dictionary full or words. I'm writing a program that decode a txt file.
Say here is one the the lines:
['Now', 'we', 'are', 'engaged', 'in', 'a', 'great', 'civil', 'war,']
I want to go the the dictionary and check to see if any of these words are in there. If so, I'll put the strings together and write to a file. All I want to know is how to compare the two. I'd first lowercase the first word in the list since all words are lowercase.
an example of my dictionary would be:
{"now": "", "help": "", "you": ""}
but filled with MANY more words.
If you want to see my overall code just ask :)
Here is my code for making the dictionary. Each line is a word.
f = open('dictionary.txt', "r")
dictionary = {}
for line in f:
word = line.strip()
dictionary[word] = ""
print dictionary
updated
def CaeserCipher(string, k):
#setting up variables to move through
upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'*10000
lower = 'abcdefghijklmnopqrstuvwxyz'*10000
newCipher = ''
#looping each letter and moving it k times
for letter in string:
if letter in upper:
if upper.index(letter) + k > 25:
indexPosition = (upper.index(letter) + k)
newCipher = newCipher + upper[indexPosition]
else:
indexPosition = upper.index(letter) + k
newCipher = newCipher + upper[indexPosition]
elif letter in lower:
if lower.index(letter) + k > 25:
indexPosition = (lower.index(letter) + k)
newCipher = newCipher + lower[indexPosition]
else:
indexPosition = lower.index(letter) + k
newCipher = newCipher + lower[indexPosition]
else:
newCipher = newCipher + letter
return newCipher
f = open('dictionary.txt', "r")
dictionary = set()
for line in f:
word = line.strip()
dictionary.add(word)
print dictionary
#main file
#reading file and encrypting text
f = open('encrypted.txt')
string = ''
out = open("plain1.txt", "w")
#working through each line
for line in f:
for k in range(26):
line = [CaeserCipher(word, k) for word in line]
print line
#listSplit = re.split('[,\[\]]', line)
#print listSplit
string = ("".join(line))
listOfWords = string.split()
lowercase_line = [word.lower() for word in listOfWords]
out.write(dictionary.intersection(lowercase_line))
f.close()
out.close()
If you're willing to represent your dictionary as a set, you can use intersection to find all the words in the dictionary that exist in the line.
dictionary = {"now", "help", "you"}
line = ['Now', 'we', 'are', 'engaged', 'in', 'a', 'great', 'civil', 'war,']
lowercase_line = [word.lower() for word in line]
#todo: also filter out punctuation, so "war," becomes "war"
print dictionary.intersection(lowercase_line)
result:
set(['now'])
if any(word.lower() in your_word_dict for word in line_list):
' '.join(line_list)
# write to file
check to see if any of the words in your word list are in your dictionary and if they are join them into a string and write them to a file