Check if string is exactly the same as line in file

Check if string is exactly the same as line in file - python

I've been writing a Countdown program in Python, and in it. I've written this:
#Letters Game
global vowels, consonants
from random import choice, uniform
from time import sleep
from itertools import permutations
startLetter = ""
words = []
def check(word, startLetter):
fileName = startLetter + ".txt"
datafile = open(fileName)
for line in datafile:
print("Checking if", word, "is", line.lower())
if word == line.lower():
return True
return False
def generateLetters():
lettersLeft = 9
output = []
while lettersLeft >= 1:
lType = input("Vowel or consonant? (v/c)")
sleep(uniform(0.5, 1.5))
if lType not in ("v", "c"):
print("Please input v or c")
continue
elif lType == "v":
letter = choice(vowels)
print("Rachel has picked an", letter)
vowels.remove(letter)
output.append(letter)
elif lType == "c":
letter = choice(consonants)
print("Rachel has picked a", letter)
consonants.remove(letter)
output.append(letter)
print("Letters so far:", output)
lettersLeft -= 1
return output
def possibleWords(letters, words):
for i in range(1,9):
print(letters)
print(i)
for item in permutations(letters, i):
item = "".join(list(item))
startLetter = list(item)[0]
if check(item, startLetter):
print("\n\n***Got one***\n", item)
words.append(item)
return words
vowels = ["a"]*15 + ["e"]*21 + ["i"]*13 + ["o"]*13+ ["u"]*5
consonants = ["b"]*2 + ["c"]*3 + ["d"]*6 + ["f"]*2 + ["g"]*3 +["h"]*2 +["j"]*1 +["k"]*1 +["l"]*5 +["m"]*4 +["n"]*8 +["p"]*4 +["q"]*1 +["r"]*9 +["s"]*9 +["t"]*9 + ["v"]*1 +["w"]*1 +["x"]*1 +["y"]*1 +["z"]*1
print("***Let's play a letters game!***")
sleep(3)
letters = generateLetters()
sleep(uniform(1, 1.5))
print("\n\n***Let's play countdown***\n\n\n\n\n")
print(letters)
for count in reversed(range(1, 31)):
print(count)
sleep(1)
print("\n\nStop!")
print("All possible words:")
print(possibleWords(letters, words))
'''
#Code for sorting the dictionary into files
alphabet = "abcdefghijklmnopqrstuvwxyz"
alphabet = list(alphabet)
for letter in alphabet:
allFile = open("Dictionary.txt", "r+")
filename = letter + ".txt"
letterFile = open(filename, "w")
for line in allFile:
if len(list(line.lower())) <= 9:
if list(line.lower())[0] == letter:
print("Writing:", line.lower())
letterFile.write(line.lower())
allFile.close()
letterFile.close()
I have 26 text files called a.txt, b.txt, c.txt... to make the search quicker
(Sorry it's not very neat - I haven't finished it yet)
However, instead of returning what I expect (pan), it returns all words with pan in it (pan, pancake, pans, pandemic...)
Is there any way in Python you can only return the line if it's EXACTLY the same as the string? Do I have to .read() the file first?
Thanks

Your post is strangely written so excuse me if I missmatch
Is there any way in Python you can only return the line if it's EXACTLY the same as the string? Do I have to .read() the file first?
Yes, there is!!!
file = open("file.txt")
content = file.read() # which is a str
lines = content.split('\n') # which is a list (containing every lines)
test_string = " pan "
positive_match = [l for l in lines if test_string in l]
This is a bit hacky since we avoid getting pancake for pan (for instance) but using spaces (and then, what about cases like ".....,pan "?). You should have a look at tokenization function. As pythonists, we hve one of the best library for this: nltk
(because, basically, you are reinventing the wheel)

Related

obfuscation of a text file using python - by reversing the words and inserting a specific number of random characters between them

Beginner Coding problem I am supposed to write a code that reverses the contents of a file and then inserts a number of random characters based on a strength the user chooses. It then creates a new file containing the obstructed file.
For example, if the user chooses strength = 2, it will insert 2 random characters between each letter in the text file: The cat sits ---> sgyt6gilns t7faxdc e3dh1kT
Right now my program inserts too many characters in between and I can't figure out why.
This is what it's doing:
input: CAT
Output of strength = 1: TeAEADQoC
import string
import random
def getRandomChar():
alpha = string.ascii_letters + string.digits
return random.choice(alpha)
def randomString(EncrypStrength):
count = 0
result = ''
while count < len(EncrypStrength):
result += getRandomChar()
count += 1
return result
def ReverseString(OrigFile):
return OrigFile[::-1]
def LineEncrypt(line, EncrypStrength):
EncrypStrength = ReverseString(line)
index = 0
newline = EncrypStrength[index]
index += 1
while index < len(EncrypStrength):
newline += randomString(EncrypStrength)
newline += EncrypStrength[index]
index += 1
return newline
def main():
OrigFile =input('Original File Name:')
EncryptedFile = input("obfuscated File Name:")
EncrypStrength = int(input('Enter the Encryption Strength:'))
Orig = open(OrigFile, 'r')
Encrypted = open(EncryptedFile, 'w')
line = Orig.readline()
while line!= '':
encryptLine = LineEncrypt(line, EncrypStrength)
Encrypted.write(encryptLine +"\n")
line = Orig.readline()
Orig.close()
Encrypted.close()
if __name__=="__main__":
main()

In Line Encrypt method you are using incorrectly Encrypt Strength, you are overriding the number of characters to put as EncryptStrength with reversed line.
def LineEncrypt(line, EncrypStrength):
reversedString = ReverseString(line)
index = 0
newline = reversedString[index]
index += 1
while index < len(reversedString):
newline += randomString(EncrypStrength)
newline += reversedString[index]
index += 1

You are confusing EncrypStrength and overriding it as Ritesh mentioned.
Here is the full corrected code, I hope it will work as you expected.
import string
import random
def getRandomChar():
alpha = string.ascii_letters + string.digits
return random.choice(alpha)
def randomString(EncrypStrength):
count = 0
result = ''
while count < EncrypStrength:
result += getRandomChar()
count += 1
return result
def ReverseString(OrigFile):
return OrigFile[::-1]
def LineEncrypt(line, EncrypStrength):
RevStr = ReverseString(line)
index = 0
newline = RevStr[index]
index += 1
while index < len(RevStr):
newline += randomString(EncrypStrength)
newline += RevStr[index]
index += 1
return newline
def main():
OrigFile =input('Original File Name:')
EncryptedFile = input("obfuscated File Name:")
EncrypStrength = int(input('Enter the Encryption Strength:'))
Orig = open(OrigFile, 'r')
Encrypted = open(EncryptedFile, 'w')
line = Orig.readline()
while line!= '':
encryptLine = LineEncrypt(line, EncrypStrength)
Encrypted.write(encryptLine +"\n")
line = Orig.readline()
Orig.close()
Encrypted.close()
if __name__=="__main__":
main()

How do I get characters that are non-letters and non-digits appended in a list?

This is about plain word counting, to collect which words appear in a document and how how often.
I try to write a function were the input is a list of text lines. I go through all lines, split them into words, accumulate the recognized words and finally return the complete list.
First I have a while-loop that goes through all the characters in the list, and but ignores the white spaces. Inside this while loop I also try to recognize what kind of words I have. In this context, there are three kinds of words:
those starting with a letter;
those starting with a digit;
and those which contain only one character which is neither letter nor digit.
I have three if statements which check what kind of character I have. When I know what kind of word I have encountered, I try to extract the word itself. When the word starts with a letter or a digit, I take all consecutive characters of the same kind as part of the word.
But, in the third if statement, when I take care of the case when the current character is neither a letter nor a digit, I get problems.
When I give the input
wordfreq.tokenize(['15, delicious& Tarts.'])
I want the output to be
['15', ',', 'delicious', '&', 'tarts', '.']
When I test the function in the Python Console, it looks like this:
PyDev console: starting.
Python 3.7.4 (v3.7.4:e09359112e, Jul 8 2019, 14:54:52)
[Clang 6.0 (clang-600.0.57)] on darwin
import wordfreq
wordfreq.tokenize(['15, delicious& Tarts.'])
['15', 'delicious', 'tarts']
The function does not take neither the comma, the ampersand nor the dot into account! How do I fix this?
See below for the code.
(The lower() method is because I want to ignore capitalization, e.g. 'Tarts' and 'tarts' are really the same words.)
# wordfreq.py
def tokenize(lines):
words = []
for line in lines:
start = 0
while start < len(line):
while line[start].isspace():
start = start + 1
if line[start].isalpha():
end = start
while line[end].isalpha():
end = end + 1
word = line[start:end]
words.append(word.lower())
start = end
elif line[start].isdigit():
end = start
while line[end].isdigit():
end = end + 1
word = line[start:end]
words.append(word)
start = end
else:
words.append(line[start])
start = start + 1
return words

I'm not sure why you're doing upper and lower but here's how you can split it:
input = ['15, delicious& Tarts.']
line = input[0]
words = line.split(' ')
words = [word for word in words if word]
out:
['15,', 'delicious&', 'Tarts.']
edit, saw that you edited how you want your output to be. Just skip this line to get that output:
words = [word for word in words if word]

itertools.groupby could simplify this quite a bit. Basically, you group the characters in your string based on the category or type of character - alpha, digit or punctuation. In this example I only defined those three categories, but you can define as many or as little categories as you wish. Any character that doesn't match any of the categories (whitespace, in this example) is ignored:
def get_tokens(string):
from itertools import groupby
from string import ascii_lowercase, ascii_uppercase, digits, punctuation as punct
alpha = ascii_lowercase + ascii_uppercase
yield from ("".join(group) for key, group in groupby(string, key=lambda char: next((category for category in (alpha, digits, punct) if char in category), "")) if key)
print(list(get_tokens("15, delicious& Tarts.")))
Output:
['15', ',', 'delicious', '&', 'Tarts', '.']
>>>

I found what the problem was. The line
start = start + 1
where supposed to be in the last else statement.
So my code looks like this and gives me the desired input specified above:
def tokenize(lines):
words = []
for line in lines:
start = 0
while start < len(line):
while line[start].isspace():
start = start + 1
end = start
if line[start].isalpha():
while line[end].isalpha():
end = end + 1
word = line[start:end]
word = word.lower()
words.append(word)
start = end
elif line[start].isdigit():
while line[end].isdigit():
end = end + 1
word = line[start:end]
words.append(word)
start = end
else:
word = line[start]
words.append(word)
start = start + 1
return words
However, when I use the testing script below to make sure that there is no corner cases that the function 'tokenize' missed out;...
import io
import sys
import importlib.util
def test(fun,x,y):
global pass_tests, fail_tests
if type(x) == tuple:
z = fun(*x)
else:
z = fun(x)
if y == z:
pass_tests = pass_tests + 1
else:
if type(x) == tuple:
s = repr(x)
else:
s = "("+repr(x)+")"
print("Condition failed:")
print(" "+fun.__name__+s+" == "+repr(y))
print(fun.__name__+" returned/printed:")
print(str(z))
fail_tests = fail_tests + 1
def run(src_path=None):
global pass_tests, fail_tests
if src_path == None:
import wordfreq
else:
spec = importlib.util.spec_from_file_location("wordfreq", src_path+"/wordfreq.py")
wordfreq = importlib.util.module_from_spec(spec)
spec.loader.exec_module(wordfreq)
pass_tests = 0
fail_tests = 0
fun_count = 0
def printTopMost(freq,n):
saved = sys.stdout
sys.stdout = io.StringIO()
wordfreq.printTopMost(freq,n)
out = sys.stdout.getvalue()
sys.stdout = saved
return out
if hasattr(wordfreq, "tokenize"):
fun_count = fun_count + 1
test(wordfreq.tokenize, [], [])
test(wordfreq.tokenize, [""], [])
test(wordfreq.tokenize, [" "], [])
test(wordfreq.tokenize, ["This is a simple sentence"], ["this","is","a","simple","sentence"])
test(wordfreq.tokenize, ["I told you!"], ["i","told","you","!"])
test(wordfreq.tokenize, ["The 10 little chicks"], ["the","10","little","chicks"])
test(wordfreq.tokenize, ["15th anniversary"], ["15","th","anniversary"])
test(wordfreq.tokenize, ["He is in the room, she said."], ["he","is","in","the","room",",","she","said","."])
else:
print("tokenize is not implemented yet!")
if hasattr(wordfreq, "countWords"):
fun_count = fun_count + 1
test(wordfreq.countWords, ([],[]), {})
test(wordfreq.countWords, (["clean","water"],[]), {"clean":1,"water":1})
test(wordfreq.countWords, (["clean","water","is","drinkable","water"],[]), {"clean":1,"water":2,"is":1,"drinkable":1})
test(wordfreq.countWords, (["clean","water","is","drinkable","water"],["is"]), {"clean":1,"water":2,"drinkable":1})
else:
print("countWords is not implemented yet!")
if hasattr(wordfreq, "printTopMost"):
fun_count = fun_count + 1
test(printTopMost,({},10),"")
test(printTopMost,({"horror": 5, "happiness": 15},0),"")
test(printTopMost,({"C": 3, "python": 5, "haskell": 2, "java": 1},3),"python 5\nC 3\nhaskell 2\n")
else:
print("printTopMost is not implemented yet!")
print(str(pass_tests)+" out of "+str(pass_tests+fail_tests)+" passed.")
return (fun_count == 3 and fail_tests == 0)
if __name__ == "__main__":
run()
... I get the following output:
/usr/local/bin/python3.7 "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/test.py"
Traceback (most recent call last):
File "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/test.py", line 81, in <module>
run()
File "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/test.py", line 50, in run
test(wordfreq.tokenize, [" "], [])
File "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/test.py", line 10, in test
z = fun(x)
File "/Users/ericjohannesson/Documents/Fristående kurser/DAT455 – Introduktion till programmering med Python/lab1/Laborations/Laboration_1/wordfreq.py", line 44, in tokenize
while line[start].isspace():
IndexError: string index out of range
Why does it say that the string index is out of range? How do I fix that problem?

Python: Rosalind Consensus and Profile

I am trying to solve the "Consensus and Profile" challenge on Rosalind.
The challenge instructions are as follows:
Given: A collection of at most 10 DNA strings of equal length (at most 1 kbp) in FASTA format.
Return: A consensus string and profile matrix for the collection. (If several possible consensus strings exist, then you may return any one of them.)
My code is as follows (I got most of it from another user on this website). My only issue is that some of the DNA strands are broken down into multiple separate lines, so they are being appended to the "allstrings" list as separate strings. I am trying to figure out how to write each consecutive line that does not contain ">" as a single string.
import numpy as np
seq = []
allstrings = []
temp_seq = []
matrix = []
C = []
G = []
T = []
A = []
P = []
consensus = []
position = 1
file = open("C:/Users/knigh/Documents/rosalind_cons (3).txt", "r")
conout = open("C:/Users/knigh/Documents/consensus.txt", "w")
# Right now, this is reading and writing each as an individual line. Thus, it
# is splitting each sequence into multiple small sequences. You need to figure
# out how to read this in FASTA format to prevent this from occurring
desc = file.readlines()
for line in desc:
allstrings.append(line)
for string in range(1, len(allstrings)):
if ">" not in allstrings[string]:
temp_seq.append(allstrings[string])
else:
seq.insert(position, temp_seq[0])
temp_seq = []
position += 1
# This last insertion into the sequence must be performed after the loop to empty
# out the last remaining string from temp_seq
seq.insert(position, temp_seq[0])
for base in seq:
matrix.append([pos for pos in base])
M = np.array(matrix).reshape(len(seq), len(seq[0]))
for base in range(len(seq[0])):
A_count = 0
C_count = 0
G_count = 0
T_count = 0
for pos in M[:, base]:
if pos == "A":
A_count += 1
elif pos == "C":
C_count += 1
elif pos == "G":
G_count += 1
elif pos == "T":
T_count += 1
A.append(A_count)
C.append(C_count)
G.append(G_count)
T.append(T_count)
profile_matrix = {"A": A, "C": C, "G": G, "T": T}
P.append(A)
P.append(C)
P.append(G)
P.append(T)
profile = np.array(P).reshape(4, len(A))
for pos in range(len(A)):
if max(profile[:, pos]) == profile[0, pos]:
consensus.append("A")
elif max(profile[:, pos]) == profile[1, pos]:
consensus.append("C")
elif max(profile[:, pos]) == profile[2, pos]:
consensus.append("G")
elif max(profile[:, pos]) == profile[3, pos]:
consensus.append("T")
conout.write("".join(consensus) + "\n")
for k, v in profile_matrix.items():
conout.write(k + ": " + " ".join(str(x) for x in v) + "\n")
conout.close()

There are a couple of ways that you can iterate a FASTA file as records. You can use a prebuilt library or write your own.
A widely used library for working with sequence data is biopython. This code snippet will create a list of strings.
from Bio import SeqIO
file = "path/to/your/file.fa"
sequences = []
with open(file, "r") as file_handle:
for record in SeqIO.parse(file_handle, "fasta"):
sequences.append(record.seq)
Alternatively, you can write your own FASTA parser. Something like this should work:
def read_fasta(fh):
# Iterate to get first FASTA header
for line in fh:
if line.startswith(">"):
name = line[1:].strip()
break
# This list will hold the sequence lines
fa_lines = []
# Now iterate to find the get multiline fasta
for line in fh:
if line.startswith(">"):
# When in this block we have reached
#  the next FASTA record
# yield the previous record's name and
# sequence as tuple that we can unpack
yield name, "".join(fa_lines)
# Reset the sequence lines and save the
#  name of the next record
fa_lines = []
name = line[1:].strip()
# skip to next line
continue
fa_lines.append(line.strip())
yield name, "".join(fa_lines)
You can use this function like so:
file = "path/to/your/file.fa"
sequences = []
with open(file, "r") as file_handle:
for name, seq in read_fasta(file_handle):
sequences.append(seq)

decoding comparing list to dictionary

I'm trying to compare to see if a word in a list is in a dictionary full or words. I'm writing a program that decode a txt file.
Say here is one the the lines:
['Now', 'we', 'are', 'engaged', 'in', 'a', 'great', 'civil', 'war,']
I want to go the the dictionary and check to see if any of these words are in there. If so, I'll put the strings together and write to a file. All I want to know is how to compare the two. I'd first lowercase the first word in the list since all words are lowercase.
an example of my dictionary would be:
{"now": "", "help": "", "you": ""}
but filled with MANY more words.
If you want to see my overall code just ask :)
Here is my code for making the dictionary. Each line is a word.
f = open('dictionary.txt', "r")
dictionary = {}
for line in f:
word = line.strip()
dictionary[word] = ""
print dictionary
updated
def CaeserCipher(string, k):
#setting up variables to move through
upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'*10000
lower = 'abcdefghijklmnopqrstuvwxyz'*10000
newCipher = ''
#looping each letter and moving it k times
for letter in string:
if letter in upper:
if upper.index(letter) + k > 25:
indexPosition = (upper.index(letter) + k)
newCipher = newCipher + upper[indexPosition]
else:
indexPosition = upper.index(letter) + k
newCipher = newCipher + upper[indexPosition]
elif letter in lower:
if lower.index(letter) + k > 25:
indexPosition = (lower.index(letter) + k)
newCipher = newCipher + lower[indexPosition]
else:
indexPosition = lower.index(letter) + k
newCipher = newCipher + lower[indexPosition]
else:
newCipher = newCipher + letter
return newCipher
f = open('dictionary.txt', "r")
dictionary = set()
for line in f:
word = line.strip()
dictionary.add(word)
print dictionary
#main file
#reading file and encrypting text
f = open('encrypted.txt')
string = ''
out = open("plain1.txt", "w")
#working through each line
for line in f:
for k in range(26):
line = [CaeserCipher(word, k) for word in line]
print line
#listSplit = re.split('[,\[\]]', line)
#print listSplit
string = ("".join(line))
listOfWords = string.split()
lowercase_line = [word.lower() for word in listOfWords]
out.write(dictionary.intersection(lowercase_line))
f.close()
out.close()

If you're willing to represent your dictionary as a set, you can use intersection to find all the words in the dictionary that exist in the line.
dictionary = {"now", "help", "you"}
line = ['Now', 'we', 'are', 'engaged', 'in', 'a', 'great', 'civil', 'war,']
lowercase_line = [word.lower() for word in line]
#todo: also filter out punctuation, so "war," becomes "war"
print dictionary.intersection(lowercase_line)
result:
set(['now'])

if any(word.lower() in your_word_dict for word in line_list):
' '.join(line_list)
# write to file
check to see if any of the words in your word list are in your dictionary and if they are join them into a string and write them to a file

Word count with pattern in Python

So this is the question:
Write a program to read in multiple lines of text and count the number
of words in which the rule i before e, except after c is broken, and
number of words which contain either ei or ie and which don't break
the rule.
For this question, we only care about the c if it is the character
immediately before the ie or the ei. So science counts as breaking the
rule, but mischievous doesn't. If a word breaks the rule twice (like
obeisancies), then it should still only be counted once.
Example given:
Line: The science heist succeeded
Line: challenge accepted
Line:
Number of times the rule helped: 0
Number of times the rule was broken: 2
and my code:
rule = []
broken = []
line = None
while line != '':
line = input('Line: ')
line.replace('cie', 'broken')
line.replace('cei', 'rule')
line.replace('ie', 'rule')
line.replace('ei', 'broken')
a = line.count('rule')
b = line.count('broken')
rule.append(a)
broken.append(b)
print(sum(a)); print(sum(b))
How do I fix my code, to work like the question wants it to?

I'm not going to write the code to your exact specification as it sounds like homework but this should help:
import pprint
words = ['science', 'believe', 'die', 'friend', 'ceiling',
'receipt', 'seize', 'weird', 'vein', 'foreign']
rule = {}
rule['ie'] = []
rule['ei'] = []
rule['cei'] = []
rule['cie'] = []
for word in words:
if 'ie' in word:
if 'cie' in word:
rule['cie'].append(word)
else:
rule['ie'].append(word)
if 'ei' in word:
if 'cei' in word:
rule['cei'].append(word)
else:
rule['ei'].append(word)
pprint.pprint(rule)
Save it to a file like i_before_e.py and run python i_before_e.py:
{'cei': ['ceiling', 'receipt'],
'cie': ['science'],
'ei': ['seize', 'weird', 'vein', 'foreign'],
'ie': ['believe', 'die', 'friend']}
You can easily count the occurrences with:
for key in rule.keys():
print "%s occured %d times." % (key, len(rule[key]))
Output:
ei occured 4 times.
ie occured 3 times.
cie occured 1 times.
cei occured 2 times.

Firstly, replace does not chance stuff in place. What you need is the return value:
line = 'hello there' # line = 'hello there'
line.replace('there','bob') # line = 'hello there'
line = line.replace('there','bob') # line = 'hello bob'
Also I would assume you want actual totals so:
print('Number of times the rule helped: {0}'.format(sum(rule)))
print('Number of times the rule was broken: {0}'.format(sum(broken)))
You are printing a and b. These are the numbers of times the rule worked and was broken in the last line processed. You want totals.
As a sidenote: Regular expressions are good for things like this. re.findall would make this a lot more sturdy and pretty:
line = 'foo moo goo loo foobar cheese is great '
foo_matches = len(re.findall('foo', line)) # = 2

Let's split the logic up into functions, that should help us reason about the code and get it right. To loop over the line, we can use the iter function:
def rule_applies(word):
return 'ei' in word or 'ie' in word
def complies_with_rule(word):
if 'cie' in word:
return False
if word.count('ei') > word.count('cei'):
return False
return True
helped_count = 0
broken_count = 0
lines = iter(lambda: input("Line: "), '')
for line in lines:
for word in line.split():
if rule_applies(word):
if complies_with_rule(word):
helped_count += 1
else:
broken_count += 1
print("Number of times the rule helped:", helped_count)
print("Number of times the rule was broken:", broken_count)
We can make the code more concise by shortening the complies_with_rule function and by using generator expressions and Counter:
from collections import Counter
def rule_applies(word):
return 'ei' in word or 'ie' in word
def complies_with_rule(word):
return 'cie' not in word and word.count('ei') == word.count('cei')
lines = iter(lambda: input("Line: "), '')
words = (word for line in lines for word in line.split())
words_considered = (word for word in words if rule_applies(word))
did_rule_help_count = Counter(complies_with_rule(word) for word in words_considered)
print("Number of times the rule helped:", did_rule_help_count[True])
print("Number of times the rule was broken:", did_rule_help_count[False])

If I understand correctly, your main problematic is to get unique result per word. Is that what you try to achieve:
rule_count = 0
break_count = 0
line = None
while line != '':
line = input('Line: ')
rule_found = False
break_found = False
for word in line.split():
if 'cie' in line:
line = line.replace('cie', '')
break_found = True
if 'cei' in line:
line = line.replace('cei', '')
rule_found = True
if 'ie' in line:
rule_found = True
if 'ei' in line:
break_found = True
if rule_found:
rule_count += 1
if break_found:
break_count += 1
print(rule_found); print(break_count)

rule = []
broken = []
tb = 0
tr = 0
line = ' '
while line:
lines = input('Line: ')
line = lines.split()
for word in line:
if 'ie' in word:
if 'cie' in word:
tb += 1
elif word.count('cie') > 1:
tb += 1
elif word.count('ie') > 1:
tr += 1
elif 'ie' in word:
tr += 1
if 'ei' in word:
if 'cei' in word:
tr += 1
elif word.count('cei') > 1:
tr += 1
elif word.count('ei') > 1:
tb += 1
elif 'ei' in word:
tb += 1
print('Number of times the rule helped: {0}'.format(tr))
print('Number of times the rule was broken: {0}'.format(tb))
Done.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Check if string is exactly the same as line in file - python

Related

obfuscation of a text file using python - by reversing the words and inserting a specific number of random characters between them

How do I get characters that are non-letters and non-digits appended in a list?

Python: Rosalind Consensus and Profile

decoding comparing list to dictionary

Word count with pattern in Python

Categories

Resources