python : the longest word in a string - python

I'm developing a function that returns the longest word from a string, my code is :
def longestWord(sen):
max = 1
i = 0
ind = 1
while ind != 0 :
if sen[i] == " ":
i = i+1
word = ""
lenth = 0
while(sen[i] != " " and i < len(sen)):
word = word + sen[i]
lenth = lenth+1
if(lenth > max):
max = lenth
longestword = word
i = i+1
if i == len(sen)-1:
ind = 0
return longestword
print(longestWord("ceci est un texte"))
When I try to run it an error shows up saying that "string index out of range"
The error message:
Traceback (most recent call last):
File "C:\Users\pc\PycharmProjects\pythonProject2\venv\tp2\longestWord.py", line 25, in <module>
print(longestWord("ceci est un texte"))
File "C:\Users\pc\PycharmProjects\pythonProject2\venv\tp2\longestWord.py", line 11, in longestWord
while(sen[i] != " " and i < len(sen)):
IndexError: string index out of range

Seems like a very complicated way. A simple pythonic way would be:
def longest_word(s):
return max(s.split(), key=len)
Output:
>>> longest_word("ceci est un texte")
"texte"

You can do this in one line
def longest_word(sentence):
return sorted(sencente.split(), key = lambda x: len(x))[-1]
print(longest_word("ceci est un texte"))

If you want to capture the results in a list, because multiple words may be the same length.
def longest_words(sentence):
words = sentence.split()
maxlen = max(len(w) for w in words)
return [w for w in words if len(w) == maxlen]
Or in one pass:
def longest_words(sentence):
words = sentence.split()
results = []
maxlen = 0
for w in words:
curlen = len(w)
if curlen > maxlen:
results = [w]
maxlen = curlen
elif curlen == maxlen:
results.append(w)
return results

Replace your function with this one :
def longestWord(string) :
lettersCounter = 0
result = ""
for word in string.split() :
if(len(word) > lettersCounter) :
lettersCounter = len(word)
result = word
return result
print(longestWord("ceci est un texte"))

Related

no such file in directory python problem and how to fix it

I'm not new to opening an closing files using python but this it refuses to let me access the text files
this is the code ignore the rest just focus on the files parts :
def LongFichier(s):
f=open(s,"r")
l = len(f.lirelignes())
f.fermer()
return(l)
#------------------
def NombreLettresNonMajus(ch):
n=0
char = "abcdefghijklmnopqrstuvwxyz"
for i in range(len(ch)) :
if ch[i] not in char.upper() :
n = n+1
return n
#------------------
def NombreLettresNonMinus(ch):
n=0
char = "abcdefghijklmnopqrstuvwxyz"
for i in range(len(ch)) :
if ch[i] not in char :
n = n+1
return n
#------------------
def NombreLettresNonAlph(ch):
n=0
char = "abcdefghijklmnopqrstuvwxyz"
for i in range(len(ch)) :
if ch[i] not in char and ch[i] not in char.upper() :
n = n+1
return n
#------------------
def plusLongMin(ch):
char = "abcdefghijklmnopqrstuvwxyz"
n=0
counter = 0
for i in range(len(ch)) :
if ch[i] in char :
counter = counter+1
if counter > n :
n= counter
else :
counter = 0
return(n)
#------------------
def plusLongMaj(ch):
char = "abcdefghijklmnopqrstuvwxyz"
n=0
counter = 0
for i in range(len(ch)) :
if ch[i] in char.upper() :
counter = counter+1
if counter > n :
n= counter
else :
counter = 0
return(n)
#------------------
def CalculScore(ch):
s=0
s=s+len(ch)*4
s=s+( NombreLettresNonMajus(ch))*2
s=s+( NombreLettresNonMinus(ch))*3
s=s+NombreLettresNonAlph(ch)*5
s=s-plusLongMin(ch)*2
s=s-plusLongMaj(ch)*3
#------------------
def rempPass(s,s2,s3):
f=open(s2,"r")
r=open(s,"a")
oof=open(s3,"a")
for i in range(LongFichier(s2)) :
ligne = f.lireligne()
score = CalculScore(ligne)
message = ""
if score <20 :
message = "tres faible "
elif 20<score < 40:
message = " faible "
elif 40<score < 60:
message = " moyen "
elif 60<score < 80:
message = " fort "
if score >= 80:
message = " tres fort "
oof.write(ligne+" "+str(score)+" "+message)
r.write(ligne+" "+str(score)+" "+message)
#------------------
rempPass("school-year\past_bac_exams\2015\passwordScore.txt","school-year\past_bac_exams\2015\password.txt","school-year\past_bac_exams\2015\strongPass.txt")
it displays this error :
File "c:\Users\amine\Desktop\pyyyy\school year\past_bac_exams\2015\problem.py", line 88, in <module>
rempPass("school-year\past_bac_exams\2015\passwordScore.txt","school-year\past_bac_exams\2015\password.txt","school-year\past_bac_exams\2015\strongPass.txt")
File "c:\Users\amine\Desktop\pyyyy\school year\past_bac_exams\2015\problem.py", line 67, in rempPass
f=open(s2,"r")
FileNotFoundError: [Errno 2] No such file or directory: 'school-year\\past_bac_exams\x815\\password.txt'
I'm sure that the directory path is right because I'm using vscode ( you are able right click on a file and copy its relative path ) so what's the problem?
Hi so python consdider \ blackslash as escaping character like \t,\a \n
in this case c:\Users\amine
so please use two blackslash \\ or one forward slash / like "c:/user/Anime...."

obfuscation of a text file using python - by reversing the words and inserting a specific number of random characters between them

Beginner Coding problem I am supposed to write a code that reverses the contents of a file and then inserts a number of random characters based on a strength the user chooses. It then creates a new file containing the obstructed file.
For example, if the user chooses strength = 2, it will insert 2 random characters between each letter in the text file: The cat sits ---> sgyt6gilns t7faxdc e3dh1kT
Right now my program inserts too many characters in between and I can't figure out why.
This is what it's doing:
input: CAT
Output of strength = 1: TeAEADQoC
import string
import random
def getRandomChar():
alpha = string.ascii_letters + string.digits
return random.choice(alpha)
def randomString(EncrypStrength):
count = 0
result = ''
while count < len(EncrypStrength):
result += getRandomChar()
count += 1
return result
def ReverseString(OrigFile):
return OrigFile[::-1]
def LineEncrypt(line, EncrypStrength):
EncrypStrength = ReverseString(line)
index = 0
newline = EncrypStrength[index]
index += 1
while index < len(EncrypStrength):
newline += randomString(EncrypStrength)
newline += EncrypStrength[index]
index += 1
return newline
def main():
OrigFile =input('Original File Name:')
EncryptedFile = input("obfuscated File Name:")
EncrypStrength = int(input('Enter the Encryption Strength:'))
Orig = open(OrigFile, 'r')
Encrypted = open(EncryptedFile, 'w')
line = Orig.readline()
while line!= '':
encryptLine = LineEncrypt(line, EncrypStrength)
Encrypted.write(encryptLine +"\n")
line = Orig.readline()
Orig.close()
Encrypted.close()
if __name__=="__main__":
main()
In Line Encrypt method you are using incorrectly Encrypt Strength, you are overriding the number of characters to put as EncryptStrength with reversed line.
def LineEncrypt(line, EncrypStrength):
reversedString = ReverseString(line)
index = 0
newline = reversedString[index]
index += 1
while index < len(reversedString):
newline += randomString(EncrypStrength)
newline += reversedString[index]
index += 1
You are confusing EncrypStrength and overriding it as Ritesh mentioned.
Here is the full corrected code, I hope it will work as you expected.
import string
import random
def getRandomChar():
alpha = string.ascii_letters + string.digits
return random.choice(alpha)
def randomString(EncrypStrength):
count = 0
result = ''
while count < EncrypStrength:
result += getRandomChar()
count += 1
return result
def ReverseString(OrigFile):
return OrigFile[::-1]
def LineEncrypt(line, EncrypStrength):
RevStr = ReverseString(line)
index = 0
newline = RevStr[index]
index += 1
while index < len(RevStr):
newline += randomString(EncrypStrength)
newline += RevStr[index]
index += 1
return newline
def main():
OrigFile =input('Original File Name:')
EncryptedFile = input("obfuscated File Name:")
EncrypStrength = int(input('Enter the Encryption Strength:'))
Orig = open(OrigFile, 'r')
Encrypted = open(EncryptedFile, 'w')
line = Orig.readline()
while line!= '':
encryptLine = LineEncrypt(line, EncrypStrength)
Encrypted.write(encryptLine +"\n")
line = Orig.readline()
Orig.close()
Encrypted.close()
if __name__=="__main__":
main()

my string index is out of range in this cipher code

def cipherText():
text = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
key = int(input("Enter numerical key--"))
word = str(input("Type word to be ciphered--"))
i = 0
k = 0
n = len(word)
print(n)
while n >= 0:
letter = word[i]
i = i + 1
while k <= 25:
textLetter = text[k]
if textLetter == letter:
givenLetter = letter
if k < (25 - key):
cipherLength = k + key
else:
cipherLength = k + key - 25
print(text[cipherLength])
k = k + 1
n = n - 1
cipherText()
WHEN I RUN THIS FOLLOWING MESSAGE POPS OUT:
Traceback (most recent call last): File "main.py", line 23, in
cipherText() File "main.py", line 10, in cipherText
letter=word[i] IndexError: string index out of range
You need to modify condition while n>=0:, as list starts with 0th index.
this line,
while n>=0:
should be,
while n-1>=0:

Word count with pattern in Python

So this is the question:
Write a program to read in multiple lines of text and count the number
of words in which the rule i before e, except after c is broken, and
number of words which contain either ei or ie and which don't break
the rule.
For this question, we only care about the c if it is the character
immediately before the ie or the ei. So science counts as breaking the
rule, but mischievous doesn't. If a word breaks the rule twice (like
obeisancies), then it should still only be counted once.
Example given:
Line: The science heist succeeded
Line: challenge accepted
Line:
Number of times the rule helped: 0
Number of times the rule was broken: 2
and my code:
rule = []
broken = []
line = None
while line != '':
line = input('Line: ')
line.replace('cie', 'broken')
line.replace('cei', 'rule')
line.replace('ie', 'rule')
line.replace('ei', 'broken')
a = line.count('rule')
b = line.count('broken')
rule.append(a)
broken.append(b)
print(sum(a)); print(sum(b))
How do I fix my code, to work like the question wants it to?
I'm not going to write the code to your exact specification as it sounds like homework but this should help:
import pprint
words = ['science', 'believe', 'die', 'friend', 'ceiling',
'receipt', 'seize', 'weird', 'vein', 'foreign']
rule = {}
rule['ie'] = []
rule['ei'] = []
rule['cei'] = []
rule['cie'] = []
for word in words:
if 'ie' in word:
if 'cie' in word:
rule['cie'].append(word)
else:
rule['ie'].append(word)
if 'ei' in word:
if 'cei' in word:
rule['cei'].append(word)
else:
rule['ei'].append(word)
pprint.pprint(rule)
Save it to a file like i_before_e.py and run python i_before_e.py:
{'cei': ['ceiling', 'receipt'],
'cie': ['science'],
'ei': ['seize', 'weird', 'vein', 'foreign'],
'ie': ['believe', 'die', 'friend']}
You can easily count the occurrences with:
for key in rule.keys():
print "%s occured %d times." % (key, len(rule[key]))
Output:
ei occured 4 times.
ie occured 3 times.
cie occured 1 times.
cei occured 2 times.
Firstly, replace does not chance stuff in place. What you need is the return value:
line = 'hello there' # line = 'hello there'
line.replace('there','bob') # line = 'hello there'
line = line.replace('there','bob') # line = 'hello bob'
Also I would assume you want actual totals so:
print('Number of times the rule helped: {0}'.format(sum(rule)))
print('Number of times the rule was broken: {0}'.format(sum(broken)))
You are printing a and b. These are the numbers of times the rule worked and was broken in the last line processed. You want totals.
As a sidenote: Regular expressions are good for things like this. re.findall would make this a lot more sturdy and pretty:
line = 'foo moo goo loo foobar cheese is great '
foo_matches = len(re.findall('foo', line)) # = 2
Let's split the logic up into functions, that should help us reason about the code and get it right. To loop over the line, we can use the iter function:
def rule_applies(word):
return 'ei' in word or 'ie' in word
def complies_with_rule(word):
if 'cie' in word:
return False
if word.count('ei') > word.count('cei'):
return False
return True
helped_count = 0
broken_count = 0
lines = iter(lambda: input("Line: "), '')
for line in lines:
for word in line.split():
if rule_applies(word):
if complies_with_rule(word):
helped_count += 1
else:
broken_count += 1
print("Number of times the rule helped:", helped_count)
print("Number of times the rule was broken:", broken_count)
We can make the code more concise by shortening the complies_with_rule function and by using generator expressions and Counter:
from collections import Counter
def rule_applies(word):
return 'ei' in word or 'ie' in word
def complies_with_rule(word):
return 'cie' not in word and word.count('ei') == word.count('cei')
lines = iter(lambda: input("Line: "), '')
words = (word for line in lines for word in line.split())
words_considered = (word for word in words if rule_applies(word))
did_rule_help_count = Counter(complies_with_rule(word) for word in words_considered)
print("Number of times the rule helped:", did_rule_help_count[True])
print("Number of times the rule was broken:", did_rule_help_count[False])
If I understand correctly, your main problematic is to get unique result per word. Is that what you try to achieve:
rule_count = 0
break_count = 0
line = None
while line != '':
line = input('Line: ')
rule_found = False
break_found = False
for word in line.split():
if 'cie' in line:
line = line.replace('cie', '')
break_found = True
if 'cei' in line:
line = line.replace('cei', '')
rule_found = True
if 'ie' in line:
rule_found = True
if 'ei' in line:
break_found = True
if rule_found:
rule_count += 1
if break_found:
break_count += 1
print(rule_found); print(break_count)
rule = []
broken = []
tb = 0
tr = 0
line = ' '
while line:
lines = input('Line: ')
line = lines.split()
for word in line:
if 'ie' in word:
if 'cie' in word:
tb += 1
elif word.count('cie') > 1:
tb += 1
elif word.count('ie') > 1:
tr += 1
elif 'ie' in word:
tr += 1
if 'ei' in word:
if 'cei' in word:
tr += 1
elif word.count('cei') > 1:
tr += 1
elif word.count('ei') > 1:
tb += 1
elif 'ei' in word:
tb += 1
print('Number of times the rule helped: {0}'.format(tr))
print('Number of times the rule was broken: {0}'.format(tb))
Done.

Memory overflow in Python

I have 67000 files, I need to read them and extract similarities between the words, but when I run the code my laptop becomes much slower, I can't open any other application, and then a memory overflow error shows up (even when I run on around 10 000 of the files). Is there a way to clear the memory after every for loop maybe, or will running the code on all files be impossible to do? Below is the code:
def isAscii(s):
for c in s:
if c not in string.printable:
return False
return True
windowSize = 2
relationTable = {}
probabilities = {}
wordCount = {}
totalWordCount = 0
def sim(w1, w2):
numerator = 0
denominator = 0
if (w1 in relationTable) and (w2 in relationTable):
rtw1 = {}
rtw2 = {}
rtw1 = relationTable[w1]
rtw2 = relationTable[w2]
for word in rtw1:
rtw1_PMI = rtw1[word]['pairPMI']
denominator += rtw1_PMI
if(word in rtw2):
rtw2_PMI = rtw2[word]['pairPMI']
numerator += (rtw1_PMI + rtw2_PMI)
for word in rtw2:
rtw2_PMI = rtw2[word]['pairPMI']
denominator += rtw2_PMI
if(denominator != 0):
return float(numerator)/denominator
else:
return 0
else:
return -1
AllNotes = {}
AllNotes = os.listdir("C:/Users/nerry-san/Desktop/EECE 502/MedicalNotes")
fileStopPunctuations = open('C:/Users/nerry-san/Desktop/EECE 502/stopPunctuations.txt')
stopPunctuations = nltk.word_tokenize(fileStopPunctuations.read())
for x in range (0, 10):
fileToRead = open('C:/Users/nerry-san/Desktop/EECE 502/MedicalNotes/%s'%(AllNotes[x]))
case1 = fileToRead.read()
text = nltk.WordPunctTokenizer().tokenize(case1.lower())
final_text = []
for index in range(len(text)):
word = text[index]
if (word not in stopPunctuations):
final_text.append(word)
for index in range (len(final_text)):
w1 = final_text[index]
if(isAscii(w1)):
for index2 in range(-windowSize, windowSize+1):
if (index2 != 0):
if ( index + index2 ) in range (0, len(final_text)):
w2 = final_text[index + index2]
if(isAscii(w2)):
totalWordCount += 1
if (w1 not in wordCount):
wordCount[w1] = {}
wordCount[w1]['wCount'] = 0
try:
wordCount[w1][w2]['count'] += 1
wordCount[w1]['wCount'] += 1
except KeyError:
wordCount[w1][w2] = {'count':1}
wordCount[w1]['wCount'] += 1
for word in wordCount:
probabilities[word]={}
probabilities[word]['wordProb'] = float (wordCount[word]['wCount'])/ totalWordCount
for word in wordCount:
relationTable[word] = {}
for word2 in wordCount[word]:
if ( word2 != 'wCount'):
pairProb = float(wordCount[word][word2]['count'])/(wordCount[word]['wCount'])
relationTable[word][word2] = {}
relationTable[word][word2]['pairPMI'] = math.log(float(pairProb)/(probabilities[word]['wordProb'] * probabilities[word2]['wordProb']),2)
l = []
for word in relationTable:
l.append(word)
for index in range (0, len(l)):
word = l[index]
simValues = []
for index2 in range (0, len(l)):
word2 = l[index2]
if(word!= word2):
simVal = sim(word,word2)
if(simVal > 0):
simValues.append([word2, simVal])
simValues.sort(key= operator.itemgetter(1), reverse = True)
Every time you open a file, use the "with" statement. This will ensure the file is closed when the loop finishes (or rather when the with block is exited.

Categories

Resources