Find consecutive alphabets in string

Find consecutive alphabets in string - python

I want to find consecutive number of characters and print them as >3 with alphabets#count otherwise print all alphabets
I want to get: B#6CCCBBB
But I get B#5CCCBBB as output. I am missing 0th element.
str1 = "BBBBBBCCCBBB"
def consecutive_alpha(str1):
count = 0
new_string = ""
n = 3
for i in range(0, len(str1)-1):
if str1[i] == str1[i+1]:
count += 1
if i == (len(str1)-2):
if count > n:
new_string = new_string + str1[i] +"#" + str(count)
else:
new_string = new_string + str1[i]*count
else:
if count > n:
new_string = new_string + str1[i] +"#" + str(count)
else:
new_string = new_string + str1[i]*count
count = 1
print new_string
consecutive_alpha(str1)

Why not just use itertools.groupby?
from itertools import groupby
def strict_groupby(iterable, **kwargs):
for key, group in groupby(iterable, **kwargs):
yield (key, ''.join(group))
def consecutive_alpha(string):
return ''.join(f'{key}#{len(group)}'
if len(group) > 3
else group
for key, group in strict_groupby(string))
consecutive_alpha('BBBBBBCCCBBB')
Output:
'B#6CCCBBB'

Incase want to try one-liner
from itertools import groupby
''.join(_ + '#' + str(len(l)) if len(l)> 3 else ''.join(l) for l in [list(g) for _,g in groupby(str1)])
#B#6CCCBBB

You're getting B#5 because you initialize count = 0. So you're not counting the first character. You get it right when you do count = 1 later in the loop.
You have another problem. If the last character isn't part of a repeated sequence, you never print it, since the loop stops early.
def consecutive_alpha(str1):
count = 1
new_string = ""
n = 3
for i in range(0, len(str1)-1):
if str1[i] == str1[i+1]:
count += 1
if i == (len(str1)-2):
if count > n:
new_string += str1[i] +"#" + str(count)
else:
new_string += str1[i]*count
else:
if count > n:
new_string += str1[i] + "#" + str(count)
else:
new_string += str1[i]*count
count = 1
# Add last character if necessary
if len(str1) > 1 and str1[-1] != str1[-2]:
new_string += str1[-1]
print(new_string)
consecutive_alpha("BBBBBBCCCBBBD")
consecutive_alpha("BBBBBBCCCAAAABBBXXXXX")

Related

Counting occurrences of multiple characters in a string, with python

I'm trying to create a function that -given a string- will return the count of non-allowed characters ('error_char'), like so: 'total count of not-allowed / total length of string'.
So far I've tried:
def allowed_characters(s):
s = s.lower()
correct_char = 'abcdef'
error_char = 'ghijklmnopqrstuvwxyz'
counter = 0
for i in s:
if i in correct_char:
no_error = '0'+'/'+ str(len(s))
return no_error
elif i in error_char:
counter += 1
result = str(sum(counter)) + '/' + str(len(s))
return result
but all I get is '0/56' where I'm expecting '22/56' since m,x,y,z are 'not allowed' and m repeats 19 times
allowed_characters('aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbmmmmmmmmmmmmmmmmmmmxyz')
'0/56'
Then I've tried:
def allowed_characters(s):
s = s.lower()
correct_char = 'abcdef'
error_char = 'ghijklmnopqrstuvwxyz'
counter = 0
for i in s:
if i in correct_char:
no_error = '0'+'/'+ str(len(s))
return no_error
elif i in error_char:
import regex as re
rgx_pattern = re.compile([error_char])
count_e = rgx_pattern.findall(error_char, s)
p_error = sum([count_e.count(i) for i in error_char])
result = str(p_error) + '/' + str(len(s))
But I get the same result...
I've also tried these other ways, but keep getting the same:
def allowed_characters1(s):
s = s.lower()
correct_char = 'abcdef'
for i in s:
if i not in correct_char:
counter = sum([s.count(i) for i in s])
p_error = str(counter) + '/' + str(len(s))
return p_error
elif i in correct_char:
no_error = '0'+'/'+ str(len(s))
return no_error
and...
def allowed_characters2(s):
s = s.lower()
correct_char = 'abcdef'
for i in s:
if i not in correct_char:
counter = sum(s.count(i))
p_error = str(counter) + '/' + str(len(s))
return p_error
elif i in correct_char:
no_error = '0'+'/'+ str(len(s))
return no_error
I've even tried changing the logic and iterating over 'correct/error_char' instead, but nothing seems to work... I keep getting the same result over and over. It looks as though the loop stops right after first character or doesn't run the 'elif' part?

Whenever it comes to do quicker counting - it's always good to think about Counter You can try to simplify your code like this:
Notes - please don't change your Problem Description during the middle of people's answering posts. That make it very hard to keep in-sync.
There is still room to improve it though.
from collections import Counter
def allowed_char(s):
s = s.lower()
correct_char = 'abcdef'
error_char = 'ghijklmnopqrstuvwxyz'
ok_counts = Counter(s)
print(f' allowed: {ok_counts} ')
correct_count = sum(count for c, count in ok_counts.items() if c in correct_char)
error_count = sum(count for c, count in ok_counts.items() if c in error_char)
#return sum(not_ok.values()) / total
return correct_count, error_count # print both
s =('aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbmmmmmmmmmmmmmmmmmmmxyz')
print(allowed_char(s)) # (34, 22)
print(allowed_char('abdmmmmxyz')) # (3, 7)
Alternatively, you really want to use for-loop and learn to process the string of characters, you could try this:
def loop_count(s):
s = s.lower()
correct_count = error_count = 0
for c in s:
if c in correct_char:
correct_count += 1
else:
error_count += 1
return correct_count, error_count

I would use a regex replacement trick here using len():
def allowed_characters(s):
return len(s) - len(re.sub(r'[^ghijklmnopqrstuvwxyz]+', '', s))
The above returns the length of the input string minus the length of the input with all allowed characters removed (alternatively minus the length of the string with only non allowed characters).

Trying to reverse letter order in words with 5 or more letters in a string. This code does not crash but does not output anything either

def spin(s):
for word in s:
if len(word) >= 5:
w = ' '.join(w[::-1] for w in s.split())
return w
print(w)
s = "Twist and shout"
spin(s)
desired output: "tsiwT and tuohs"

def spin(s):
return ' '.join(word[::-1] if len(word) >= 5 else word for word in s.split())
print(spin("Twist and shout"))

def reverse(x):
x_list = list(s.split(" "))
final_list = []
for word in x_list:
if len(word)>=5:
word = word[::-1]
final_list.append(word)
else:
final_list.append(word)
final_word = " ".join(final_list)
return final_word

placeholders instead of split operation. just to show use of placeholders
string = "Twist and shout";
count = 0;
temp = 0;
array = [];
for element in range(0, len(string)):
count = count + 1;
if string[element] is " ":
array.append(string[temp:count-1]);
if len(array[-1]) > 4:
array[-1] = (array[-1][::-1])
temp = count;
array.append(string[temp:count]);
if len(array[-1]) > 4:
array[-1] = (array[-1][::-1])

Counting while using recursion with strings from inputs

I'm trying to create a game where the score is dependent on what the letters are worth. I'm having trouble with keeping a count on the side while still recursing to the next letter of the string. I'm really stuck & I hope you can help!
def net_zero():
guess_prompt = input('Guess a string: ')
win_display = 'Congratulations you win'
low_vowels = "aeiou" # +1
low_constants = "bcdfghjklmnpqrstvwxyz" # -1
up_vowels = "AEIOU" # +2
up_constants = "BCDFGHJKLMNPQRSTVWXYZ" # -2
ten_digits = "0123456789" # +3
#else -3
count = 0
if len(guess_prompt) == 0:
return count
elif guess_prompt[0] in low_vowels:
return (count + 1) + guess_prompt[1:]
elif guess_prompt[0] in low_constants:
return (count - 1) + guess_prompt[1:]
elif guess_prompt[0] in up_vowels:
return (count + 2) + guess_prompt[1:]
elif guess_prompt[0] in up_constants:
return (count - 2) + guess_prompt[1:]
elif guess_prompt[0] in ten_digits:
return (count + 3) + guess_prompt[1:]
else: return (count - 3) + guess_prompt[1:]

I think you would like to do following
count = 0
if len(guess_prompt) == 0:
return count
for letter in guess_prompt:
if letter in low_vowels:
count +=1
if letter in low_constants:
count -=1
...
return count

I feel you can use dict instead of using string content for lookup. It will improve lookup time.
guess_prompt = "aaB4??BBBBB"
value = {}
for char in "aeiou":
value[char] = 1
for char in "bcdfghjklmnpqrstvwxyz":
value[char] = -1
for char in "AEIOU":
value[char] = 2
for char in "BCDFGHJKLMNPQRSTVWXYZ":
value[char] = -2
for char in "0123456789":
value[char] = 3
count = 0
for char in guess_prompt:
count = count + value.get(char, -3) #default value -3
print(count) ## PRINTS -13 ##

Text file indexing using python 3.4.3

I try to write a Python 3.4 code to index text document from external
and this my attempt. when run it error message:
raw input is not defined
What I want is:
to tokenize the document which is out of python 34 folder
to remove stop words
to stem
indexing
The code:
import string
def RemovePunc():
line = []
i = 0
text_input = ""
total_text_input = "C:Users\Kelil\Desktop\IRS_Assignment\project.txt"
#This part removes the punctuation and converts input text to lowercase
while i != 1:
text_input = raw_input
if text_input == ".":
i = 1
else:
new_char_string = ""
for char in text_input:
if char in string.punctuation:
char = " "
new_char_string = new_char_string + char
line = line + [new_char_string.lower()]
#This is a list with all of the text that was entered in
total_text_input = (total_text_input + new_char_string).lower()
return line
def RemoveStopWords(line):
line_stop_words = []
stop_words = ['a','able','about','across','after','all','almost','also','am','among',
'an','and','any','are','as','at','be','because','been','but','by','can',
'cannot','could','dear','did','do','does','either','else','ever','every',
'for','from','get','got','had','has','have','he','her','hers','him','his',
'how','however','i','if','in','into','is','it','its','just','least','let',
'like','likely','may','me','might','most','must','my','neither','no','nor',
'not','of','off','often','on','only','or','other','our','own','rather','said',
'say','says','she','should','since','so','some','than','that','the','their',
'them','then','there','these','they','this','tis','to','too','twas','us',
'wants','was','we','were','what','when','where','which','while','who',
'whom', 'why', 'will', 'with', 'would', 'yet', 'you', 'your']
#this part removes the stop words for the list of inputs
line_stop_words = []
sent = ""
word = ""
test = []
for sent in line:
word_list = string.split(sent)
new_string = ""
for word in word_list:
if word not in stop_words:
new_string = new_string + word + " "
new_string = string.split(new_string)
line_stop_words = line_stop_words + [new_string]
return(line_stop_words)
def StemWords(line_stop_words):
leaf_words = "s","es","ed","er","ly","ing"
i=0
while i < 6:
count = 0
length = len(leaf_words[i])
while count < len(line_stop_words):
line = line_stop_words[count]
count2 = 0
while count2 < len(line):
#line is the particular list(or line) that we are dealing with, count if the specific word
if leaf_words[i] == line[count2][-length:]:
line[count2] = line[count2][:-length]
count2 = count2 + 1
line_stop_words[count] = line
count2 = 0
count = count + 1
count = 0
i = i + 1
return(line_stop_words)
def indexDupe(lineCount,occur):
if str(lineCount) in occur:
return True
else:
return False
def Indexing(line_stop_words):
line_limit = len(line_stop_words)
index = []
line_count = 0
while line_count < line_limit:
for x in line_stop_words[line_count]:
count = 0
while count <= len(index):
if count == len(index):
index = index + [[x,[str(line_count+1)]]]
break
else:
if x == index[count][0]:
if indexDupe(line_count+1,index[count][1]) == False:
index[count][1] += str(line_count+1)
break
count = count + 1
line_count = line_count + 1
return(index)
def OutputIndex(index):
print ("Index:")
count = 0
indexLength = len(index)
while count < indexLength:
print (index[count][0],)
count2 = 0
lineOccur = len(index[count][1])
while count2 < lineOccur:
print (index[count][1][count2],)
if count2 == lineOccur -1:
print ("")
break
else:
print (",",)
count2 += 1
count += 1
line = RemovePunc()
line_stop_words = RemoveStopWords(line)
line_stop_words = StemWords(line_stop_words)
index = Indexing(line_stop_words)
OutputIndex(index)

#smichak already put the right answer in the comments. raw_input was renamed to input in Python 3. So you want:
text_input = input()
Don't forget those parentheses, since you want to call the function.

Index Error When Comparing Strings - Python

I am having a bit of trouble with some Python code. I have a large text file called "big.txt". I have iterated over it in my code to sort each word into an array (or list) and then iterated over it again to remove any character that is not in the alphabet. I also have a function called worddistance which looks at how similar two words are and returns a score subsequently. I have another function called autocorrect. I want to pass this function a misspelled word, and print a 'Did you mean...' sentence with words that gave a low score on the worddistance function (the function adds 1 to a counter whenever a difference is noticed - the lower the score, the more similar).
Strangely, I keep getting the error:
"Index Error: string index out of range"
I am at a loss at what is going on!
My code is below.
Thanks in advance for the replies,
Samuel Naughton
f = open("big.txt", "r")
words = list()
temp_words = list()
for line in f:
for word in line.split():
temp_words.append(word.lower())
allowed_characters = 'abcdefghijklmnopqrstuvwxyz'
for item in temp_words:
temp_new_word = ''
for char in item:
if char in allowed_characters:
temp_new_word += char
else:
continue
words.append(temp_new_word)
list(set(words)).sort()
def worddistance(word1, word2):
counter = 0
if len(word1) > len(word2):
counter += len(word1) - len(word2)
new_word1 = word1[:len(word2) + 1]
for char in range(0, len(word2) + 1) :
if word2[char] != new_word1[char]:
counter += 1
else:
continue
elif len(word2) > len(word1):
counter += len(word2) - len(word1)
new_word2 = word2[:len(word1) + 1]
for char in range(0, len(word1) + 1):
if word1[char] != word2[char]:
counter += 1
else:
continue
return counter
def autocorrect(word):
word.lower()
if word in words:
print("The spelling is correct.")
return
else:
suggestions = list()
for item in words:
diff = worddistance(word, item)
if diff == 1:
suggestions.append(item)
print("Did you mean: ", end = ' ')
if len(suggestions) == 1:
print(suggestions[0])
return
else:
for i in range(0, len(suggestions)):
if i == len(suggestons) - 1:
print("or " + suggestions[i] + "?")
return
print(suggestions[i] + ", ", end="")
return

In worddistance(), it looks like for char in range(0, len(word1) + 1): should be:
for char in range(len(word1)):
And for char in range(0, len(word2) + 1) : should be:
for char in range(len(word2)):
And by the way, list(set(words)).sort() is sorting a temporary list, which is probably not what you want. It should be:
words = sorted(set(words))

As mentioned in the other comment, you should range(len(word1)).
In addition to that:
- You should consider case where word1 and words have the same length #len(word2) == len(word1)
- You should also take care of naming. In the second condition in wordDistance function
if word1[char] != word2[char]:
You should be comparing to new_word2
if word1[char] != new_word2[char]:
- In the autocorrect, you should assign lower to word= word.lower()
words= []
for item in temp_words:
temp_new_word = ''
for char in item:
if char in allowed_characters:
temp_new_word += char
else:
continue
words.append(temp_new_word)
words= sorted(set(words))
def worddistance(word1, word2):
counter = 0
if len(word1) > len(word2):
counter += len(word1) - len(word2)
new_word1 = word1[:len(word2) + 1]
for char in range(len(word2)) :
if word2[char] != new_word1[char]:
counter += 1
elif len(word2) > len(word1):
counter += len(word2) - len(word1)
new_word2 = word2[:len(word1) + 1]
for char in range(len(word1)):
if word1[char] != new_word2[char]: #This is a problem
counter += 1
else: #len(word2) == len(word1) #You missed this case
for char in range(len(word1)):
if word1[char] != word2[char]:
counter += 1
return counter
def autocorrect(word):
word= word.lower() #This is a problem
if word in words:
print("The spelling is correct.")
else:
suggestions = list()
for item in words:
diff = worddistance(word, item)
print diff
if diff == 1:
suggestions.append(item)
print("Did you mean: ")
if len(suggestions) == 1:
print(suggestions[0])
else:
for i in range(len(suggestions)):
if i == len(suggestons) - 1:
print("or " + suggestions[i] + "?")
print(suggestions[i] + ", ")
Next time, Try to use Python built-in function like enumerate, to avoid using for i in range(list), then list[i], len instead of counter .. etc
Eg:
Your distance function could be written this way, or much more simpler.
def distance(word1, word2):
counter= max(len(word1),len(word2))- min(len(word1),len(word2))
if len(word1) > len(word2):
counter+= len([x for x,z in zip (list(word2), list(word1[:len(word2) + 1])) if x!=z])
elif len(word2) > len(word1):
counter+= len([x for x,z in zip (list(word1), list(word2[:len(word1) + 1])) if x!=z])
else:
counter+= len([x for x,z in zip (list(word1), list(word2)) if x!=z])
return counter

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Find consecutive alphabets in string - python

Incase want to try one-liner from itertools import groupby ''.join(_ + '#' + str(len(l)) if len(l)> 3 else ''.join(l) for l in [list(g) for _,g in groupby(str1)]) #B#6CCCBBB

Related

Counting occurrences of multiple characters in a string, with python

Trying to reverse letter order in words with 5 or more letters in a string. This code does not crash but does not output anything either

Counting while using recursion with strings from inputs

Text file indexing using python 3.4.3

Index Error When Comparing Strings - Python

Categories

Resources