Counting occurrences of multiple characters in a string, with python

Counting occurrences of multiple characters in a string, with python - python

I'm trying to create a function that -given a string- will return the count of non-allowed characters ('error_char'), like so: 'total count of not-allowed / total length of string'.
So far I've tried:
def allowed_characters(s):
s = s.lower()
correct_char = 'abcdef'
error_char = 'ghijklmnopqrstuvwxyz'
counter = 0
for i in s:
if i in correct_char:
no_error = '0'+'/'+ str(len(s))
return no_error
elif i in error_char:
counter += 1
result = str(sum(counter)) + '/' + str(len(s))
return result
but all I get is '0/56' where I'm expecting '22/56' since m,x,y,z are 'not allowed' and m repeats 19 times
allowed_characters('aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbmmmmmmmmmmmmmmmmmmmxyz')
'0/56'
Then I've tried:
def allowed_characters(s):
s = s.lower()
correct_char = 'abcdef'
error_char = 'ghijklmnopqrstuvwxyz'
counter = 0
for i in s:
if i in correct_char:
no_error = '0'+'/'+ str(len(s))
return no_error
elif i in error_char:
import regex as re
rgx_pattern = re.compile([error_char])
count_e = rgx_pattern.findall(error_char, s)
p_error = sum([count_e.count(i) for i in error_char])
result = str(p_error) + '/' + str(len(s))
But I get the same result...
I've also tried these other ways, but keep getting the same:
def allowed_characters1(s):
s = s.lower()
correct_char = 'abcdef'
for i in s:
if i not in correct_char:
counter = sum([s.count(i) for i in s])
p_error = str(counter) + '/' + str(len(s))
return p_error
elif i in correct_char:
no_error = '0'+'/'+ str(len(s))
return no_error
and...
def allowed_characters2(s):
s = s.lower()
correct_char = 'abcdef'
for i in s:
if i not in correct_char:
counter = sum(s.count(i))
p_error = str(counter) + '/' + str(len(s))
return p_error
elif i in correct_char:
no_error = '0'+'/'+ str(len(s))
return no_error
I've even tried changing the logic and iterating over 'correct/error_char' instead, but nothing seems to work... I keep getting the same result over and over. It looks as though the loop stops right after first character or doesn't run the 'elif' part?

Whenever it comes to do quicker counting - it's always good to think about Counter You can try to simplify your code like this:
Notes - please don't change your Problem Description during the middle of people's answering posts. That make it very hard to keep in-sync.
There is still room to improve it though.
from collections import Counter
def allowed_char(s):
s = s.lower()
correct_char = 'abcdef'
error_char = 'ghijklmnopqrstuvwxyz'
ok_counts = Counter(s)
print(f' allowed: {ok_counts} ')
correct_count = sum(count for c, count in ok_counts.items() if c in correct_char)
error_count = sum(count for c, count in ok_counts.items() if c in error_char)
#return sum(not_ok.values()) / total
return correct_count, error_count # print both
s =('aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbmmmmmmmmmmmmmmmmmmmxyz')
print(allowed_char(s)) # (34, 22)
print(allowed_char('abdmmmmxyz')) # (3, 7)
Alternatively, you really want to use for-loop and learn to process the string of characters, you could try this:
def loop_count(s):
s = s.lower()
correct_count = error_count = 0
for c in s:
if c in correct_char:
correct_count += 1
else:
error_count += 1
return correct_count, error_count

I would use a regex replacement trick here using len():
def allowed_characters(s):
return len(s) - len(re.sub(r'[^ghijklmnopqrstuvwxyz]+', '', s))
The above returns the length of the input string minus the length of the input with all allowed characters removed (alternatively minus the length of the string with only non allowed characters).

Related

Why does Python 3 for loop output and behave differently?

This is a password generator, I couldn't really determine where the problem is, but from the output, I could say it's around turnFromAlphabet()
The function turnFromAlphabet() converts an alphabetical character to its integer value.
The random module, I think doesn't do anything here as it just decides whether to convert a character in a string to uppercase or lowercase. And if a string is in either, when sent or passed to turnFromAlphabet() it is converted to lowercase first to avoid errors but there are still errors.
CODE:
import random
import re
#variables
username = "oogisjab" #i defined it already for question purposes
index = 0
upperOrLower = []
finalRes = []
index2a = 0
#make decisions
for x in range(len(username)):
decision = random.randint(0,1)
if(decision is 0):
upperOrLower.append(True)
else:
upperOrLower.append(False)
#Apply decisions
for i in range(len(username)):
if(upperOrLower[index]):
finalRes.append(username[index].lower())
else:
finalRes.append(username[index].upper())
index+=1
s = ""
#lowkey final
s = s.join(finalRes)
#reset index to 0
index = 0
def enc(that):
if(that is "a"):
return "#"
elif(that is "A"):
return "4"
elif(that is "O"):
return "0" #zero
elif(that is " "):
# reduce oof hackedt
decision2 = random.randint(0,1)
if(decision2 is 0):
return "!"
else:
return "_"
elif(that is "E"):
return "3"
else:
return that
secondVal = []
for y in range(len(s)):
secondVal.append(enc(s[index]))
index += 1
def turnFromAlphabet(that, index2a):
alp = "abcdefghijklmnopqrstuvwxyz"
alp2 = list(alp)
for x in alp2:
if(str(that.lower()) == str(x)):
return index2a+1
break
else:
index2a += 1
else:
return "Error: Input is not in the alphabet"
#real final
finalOutput = "".join(secondVal)
#calculate some numbers and chars from a substring
amount = len(finalOutput) - round(len(finalOutput)/3)
getSubstr = finalOutput[-(amount):]
index = 0
allFactors = {
};
#loop from substring
for x in range(len(getSubstr)):
hrhe = re.sub(r'\d', 'a', ''.join(e for e in getSubstr[index] if e.isalnum())).replace(" ", "a").lower()
print(hrhe)
#print(str(turnFromAlphabet("a", 0)) + "demo")
alpInt = turnFromAlphabet(hrhe, 0)
print(alpInt)
#get factors
oneDimensionFactors = []
for p in range(2,alpInt):
# if mod 0
if(alpInt % p) is 0:
oneDimensionFactors.append(p)
else:
oneDimensionFactors.append(1)
indexP = 0
for z in oneDimensionFactors:
allFactors.setdefault("index{0}".format(index), {})["keyNumber"+str(p)] = z
index+=1
print(allFactors)

I think that you are getting the message "Error: input is not in the alphabet" because your enc() change some of your characters. But the characters they becomes (for example '#', '4' or '!') are not in your alp variable defined in turnFromAlphabet(). I don't know how you want to fix that. It's up to you.
But I have to say to your code is difficult to understand which may explain why it can be difficult for you to debug or why others may be reluctant to help you. I tried to make sense of your code by removing code that don't have any impact. But even in the end I'm not sure I understood what you tried to do. Here's what I understood of your code:
import random
import re
#username = "oogi esjabjbb"
username = "oogisjab" #i defined it already for question purposes
def transform_case(character):
character_cases = ('upper', 'lower')
character_to_return = character.upper() if random.choice(character_cases) == 'upper' else character.lower()
return character_to_return
username_character_cases_modified = "".join(transform_case(current_character) for current_character in username)
def encode(character_to_encode):
translation_table = {
'a' : '#',
'A' : '4',
'O' : '0',
'E' : '3',
}
character_translated = translation_table.get(character_to_encode, None)
if character_translated is None:
character_translated = character_to_encode
if character_translated == ' ':
character_translated = '!' if random.choice((True, False)) else '_'
return character_translated
final_output = "".join(encode(current_character) for current_character in username_character_cases_modified)
amount = round(len(final_output) / 3)
part_of_final_output = final_output[amount:]
all_factors = {}
for (index, current_character) in enumerate(part_of_final_output):
hrhe = current_character
if not hrhe.isalnum():
continue
hrhe = re.sub(r'\d', 'a', hrhe)
hrhe = hrhe.lower()
print(hrhe)
def find_in_alphabet(character, offset):
alphabet = "abcdefghijklmnopqrstuvwxyz"
place_found = alphabet.find(character)
if place_found == -1 or not character:
raise ValueError("Input is not in the alphabet")
else:
place_to_return = place_found + offset + 1
return place_to_return
place_in_alphabet = find_in_alphabet(hrhe, 0)
print(place_in_alphabet)
def provide_factors(factors_of):
for x in range(1, int(place_in_alphabet ** 0.5) + 1):
(quotient, remainder) = divmod(factors_of, x)
if remainder == 0:
for current_quotient in (quotient, x):
yield current_quotient
unique_factors = set(provide_factors(place_in_alphabet))
factors = sorted(unique_factors)
all_factors.setdefault(f'index{index}', dict())[f'keyNumber{place_in_alphabet}'] = factors
print(all_factors)
Is near what your wanted to do?

Find consecutive alphabets in string

I want to find consecutive number of characters and print them as >3 with alphabets#count otherwise print all alphabets
I want to get: B#6CCCBBB
But I get B#5CCCBBB as output. I am missing 0th element.
str1 = "BBBBBBCCCBBB"
def consecutive_alpha(str1):
count = 0
new_string = ""
n = 3
for i in range(0, len(str1)-1):
if str1[i] == str1[i+1]:
count += 1
if i == (len(str1)-2):
if count > n:
new_string = new_string + str1[i] +"#" + str(count)
else:
new_string = new_string + str1[i]*count
else:
if count > n:
new_string = new_string + str1[i] +"#" + str(count)
else:
new_string = new_string + str1[i]*count
count = 1
print new_string
consecutive_alpha(str1)

Why not just use itertools.groupby?
from itertools import groupby
def strict_groupby(iterable, **kwargs):
for key, group in groupby(iterable, **kwargs):
yield (key, ''.join(group))
def consecutive_alpha(string):
return ''.join(f'{key}#{len(group)}'
if len(group) > 3
else group
for key, group in strict_groupby(string))
consecutive_alpha('BBBBBBCCCBBB')
Output:
'B#6CCCBBB'

Incase want to try one-liner
from itertools import groupby
''.join(_ + '#' + str(len(l)) if len(l)> 3 else ''.join(l) for l in [list(g) for _,g in groupby(str1)])
#B#6CCCBBB

You're getting B#5 because you initialize count = 0. So you're not counting the first character. You get it right when you do count = 1 later in the loop.
You have another problem. If the last character isn't part of a repeated sequence, you never print it, since the loop stops early.
def consecutive_alpha(str1):
count = 1
new_string = ""
n = 3
for i in range(0, len(str1)-1):
if str1[i] == str1[i+1]:
count += 1
if i == (len(str1)-2):
if count > n:
new_string += str1[i] +"#" + str(count)
else:
new_string += str1[i]*count
else:
if count > n:
new_string += str1[i] + "#" + str(count)
else:
new_string += str1[i]*count
count = 1
# Add last character if necessary
if len(str1) > 1 and str1[-1] != str1[-2]:
new_string += str1[-1]
print(new_string)
consecutive_alpha("BBBBBBCCCBBBD")
consecutive_alpha("BBBBBBCCCAAAABBBXXXXX")

how to recursively remove all adjacent characters that have repeated 3 or more times using python

Test Cases
Input: abbbaaccada
Output: ccada
Input: bbccdddcb
Output: (Empty string)
str = input("Enter string: ")
def my_string(string):
if not string:
return ""
if len(string) == 1:
return string
if string[0] == string[1] == string[2]:
return my_string(string[3:])
return string[0] + my_string(string[1:])
print (my_string(str))
I am new to python. and I am trying to remove characters with 3 or more consecutive appearance in a string. In this I could only able to get output of only 1 iteration. e.g. i/p- hhhelllo o/p-eo but for i/p- abbbaaccada o/p is aaaccada but it should be ccada.. please help..
I have done this till 3 repetition but how to generalize it for more than 3 repetition.??

Your problem presents the opportunity to show how else in for loops can be useful. Take a look:
def remover(my_str):
temp = set(my_str)
while True:
for c in temp:
if 3*c in my_str:
my_str = my_str.replace(3*c, '')
break
else:
break
return my_str
test1 = 'abbbaaccada'
print(remover(test1)) # -> ccada
test2 = 'i/p- hhhelllo'
print(remover(test2)) # -> i/p- eo
If you insist on having recursive calls, you can modify the above as follows:
def remover(my_str):
temp = set(my_str)
new_str = my_str
for c in temp:
if 3*c in new_str:
new_str = new_str.replace(3*c, '')
if my_str == new_str:
return new_str
else:
return remover(new_str)

I have added a solution which will work for 3 or more repetition as the above solution didn't work for me. It is a recursive solution.
import re
def format_string(u_str):
f_str = remove_string(u_str)
if f_str == u_str:
return f_str
else:
return format_string(f_str)
def remove_string(u_str):
index = 0 # This will maintain the index while traversing the entire string
while index < len(u_str):
r = re.search(u_str[index]*4 + '*', u_str)
if r:
start, end = r.span() # start and end index of substring matching 3 or more repetition
u_str = u_str[:start] + u_str[end:] # removing the found substring
index = end
else:
index += 1
return u_str
test1 = 'abbbaaccada'
print('output:' + format_string(test1))
test2 = 'bbccdddcb'
print('output:' + format_string(test2))

How to combine two similar functions that convert between hiragana and katakana?

I have two functions that convert between katakana and hiragana and they look the same:
katakana_minus_hiragana = 0x30a1 - 0x3041 # KATAKANA LETTER A - HIRAGANA A
def is_hirgana(char):
return 0x3040 < ord(char[0]) and ord(char[0]) < 0x3097
def is_katakana(char):
return 0x30a0 < ord(char[0]) and ord(char[0]) < 0x30f7
def hiragana_to_katakana(hiragana_text):
katakana_text = ""
max_len = 0
for i, char in enumerate(hiragana_text):
if is_hirgana(char):
katakana_text += chr(ord(char) + katakana_minus_hiragana)
max_len += 1
else:
break
return katakana_text, max_len
def katakana_to_hiragana(katakana_text):
hiragana_text = ""
max_len = 0
for i, char in enumerate(katakana_text):
if is_katakana(char):
hiragana_text += chr(ord(char) - katakana_minus_hiragana)
max_len += 1
else:
break
return hiragana_text, max_len
Is there a way to simplify hiragana_to_katakana() and katakana_to_hiragana() into a duck-type function or a super/meta function?
E.g. something like
def convert_hk_kh(text, charset_range, offset):
charset_start, charset_end = charset_range
output_text = ""
max_len = 0
for i, char in enumerate(text):
if charset_start < ord(char[0]) and ord(char[0]) < charset_end:
output_text += chr(ord(char) + offset)
max_len +=1
else:
break
return output_text, max_len
def katakana_to_hiragana(katakana_text):
return convert_hk_kh(katakana_text, (0x30a0, 0x30f7), -katakana_minus_hiragana)
def hiragana_to_katakana(hiragana_text):
return convert_hk_kh(hiragana_text, (0x3040, 0x3097), katakana_minus_hiragana)
Are there other pythonic ways to simplify the two functions that are very similar?
EDITED
There's also https://github.com/olsgaard/Japanese_nlp_scripts which seems to do the same thing with str.translate. Is that more efficient? More pythonic?

I'd do something like this:
KATAKANA_HIRGANA_SHIFT = 0x30a1 - 0x3041 # KATAKANA LETTER A - HIRAGANA A
def shift_chars_prefix(text, amount, condition):
output = ''
for last_index, char in enumerate(text):
if not condition(char):
break
output += chr(ord(char) + amount)
return output, last_index
def katakana_to_hiragana(text):
return shift_chars_prefix(text, -KATAKANA_HIRGANA_SHIFT, lambda c: '\u30a0' < c < '\u30f7')
def hiragana_to_katakana(text):
return shift_chars_prefix(text, KATAKANA_HIRGANA_SHIFT, lambda c: '\u3040' < c < '\u3097')
You can also use regex if you don't return the length of the replaced prefix:
import re
KATAKANA_HIRGANA_SHIFT = 0x30a1 - 0x3041 # KATAKANA LETTER A - HIRAGANA A
def shift_by(n):
def replacer(match):
return ''.join(chr(ord(c) + n) for c in match.group(0))
return replacer
def katakana_to_hiragana(text):
return re.sub(r'^[\u30a1-\u30f6]+', shift_by(KATAKANA_HIRGANA_SHIFT), text)
def hiragana_to_katakana(text):
return re.sub(r'^[\u3041-\u3096]+', shift_by(-KATAKANA_HIRGANA_SHIFT), text)

Here’s a function that would switch each kind of kana to the other.
Unlike the given functions, it does not stop when it encounters
non-kana, but simply passes those characters through without changing
them.
Note that conversion between kana types is not as simple as this; for
example, in hiragana a long “e” sound is indicated by ええ or えい
(e.g., おねえ older sister, せんせい teacher), while in katakana one
uses a chōonpu (オネー, せんせー). There are kana characters outside the
ranges you use as well.
def switch_kana_type(kana_text):
"""Replace each kind of kana with the other kind. Other characters are
passed through unchanged."""
output_text = ''
for c in kana_text:
if is_hiragana(c): # Note typo fix of "is_hirgana"
output_text += chr(ord(c) + katakana_minus_hiragana)
elif is_katakana(char):
output_text += chr(ord(c) - katakana_minus_hiragana)
else:
output_text += c;
return output_text, len(output_text)

How could I write this function the pythonic way? [duplicate]

I have a string like '....(((...((...' for which I have to generate another string 'ss(4)h5(3)ss(3)h2(2)ss(3)'.
'.' corresponds to 'ss' and the number of continous '.' is in the bracket.
'(' corresponds to 'h5' and the number of continuos '(' is in the bracket.
Currently I'm able to get the output 'ss(4)h5(3)ss(3)' and my code ignores the last two character sequences.
This is what I have done so far
def main():
stringInput = raw_input("Enter the string:")
ssCount = 0
h5Count = 0
finalString = ""
ssString = ""
h5String = ""
ssCont = True
h5Cont = True
for i in range(0, len(stringInput), 1):
if stringInput[i] == ".":
h5Cont = False
if ssCont:
ssCount = ssCount + 1
ssString = "ss(" + str(ssCount) + ")"
ssCont = True
else:
finalString = finalString + ssString
ssCont = True
ssCount = 1
elif stringInput[i] == "(":
ssCont = False
if h5Cont:
h5Count = h5Count + 1
h5String = "h5(" + str(h5Count) + ")"
h5Cont = True
else:
finalString = finalString + h5String
h5Cont = True
h5Count = 1
print finalString
main()
How to modify the code to get the desired output?

I don’t know about modifying your existing code, but to me this can be done very succinctly and pythonically using itertools.groupby. Note that I’m not sure if the 'h2' in your expected output is a typo or if it should be 'h5', which I’m assuming.
from itertools import chain, groupby
string = '....(((...((...'
def character_count(S, labels): # this allows you to customize the labels you want to use
for K, G in groupby(S):
yield labels[K], '(', str(sum(1 for c in G)), ')' # sum() counts the number of items in the iterator G
output = ''.join(chain.from_iterable(character_count(string, {'.': 'ss', '(': 'h5'}))) # joins the components into a single string
print(output)
# >>> ss(4)h5(3)ss(3)h5(2)ss(3)

#Kelvin 's answer is great, however if you want to define a function yourself, you could do it like this:
def h5ss(x):
names = {".": "ss", "(": "h5"}
count = 0
current = None
out = ""
for i in x:
if i == current:
count += 1
else:
if current is not None:
out += "{}({})".format(names[current], count)
count = 1
current = i
if current is not None:
out += "{}({})".format(names[current], count)
return out

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Counting occurrences of multiple characters in a string, with python - python

Related

Why does Python 3 for loop output and behave differently?

Find consecutive alphabets in string

how to recursively remove all adjacent characters that have repeated 3 or more times using python

How to combine two similar functions that convert between hiragana and katakana?

How could I write this function the pythonic way? [duplicate]

Categories

Resources