count the matching characters between two inputs given by user - python

How do i get this python output? counting matches and mismatches
String1: aaabbbccc #aaabbbccc is user input
String2: aabbbcccc #aabbbcccc is user input
Matches: ?
MisMatches: ?
String1: aaAbbBccc #mismatches are capitalize
String2: aaBbbCccc

import itertools
s1 = 'aaabbbccc'
s2 = 'aabbbcccc'
print "Matches:", sum( c1==c2 for c1, c2 in itertools.izip(s1, s2) )
print "Mismatches:", sum( c1!=c2 for c1, c2 in itertools.izip(s1, s2) )
print "String 1:", ''.join( c1 if c1==c2 else c1.upper() for c1, c2 in itertools.izip(s1, s2) )
print "String 2:", ''.join( c2 if c1==c2 else c2.upper() for c1, c2 in itertools.izip(s1, s2) )
This produces:
Matches: 7
Mismatches: 2
String 1: aaAbbBccc
String 2: aaBbbCccc

Assuming you have gotten the string from a file or user input, what about:
import itertools
s1 = 'aaabbbccc'
s2 = 'aabbbcccc'
# This will only consider n characters, where n = min(len(s1), len(s2))
match_indices = [i for (i,(c1, c2)) in enumerate(itertools.izip(s1, s2)) if c1 == c2]
num_matches = len(match_indices)
num_misses = min(len(s1), len(s2)) - num_matches
print("Matches: %d" % num_matches)
print("Mismatches: %d" % num_misses)
print("String 1: %s" % ''.join(c if i in match_indices else c.upper() for (i,c) in enumerate(s1)))
print("String 2: %s" % ''.join(c if i in match_indices else c.upper() for (i,c) in enumerate(s2)))
Output:
Matches: 7
Mismatches: 2
String 1: aaAbbBccc
String 1: aaBbbCccc
If you wanted to count strings of uneven length (where extra characters counted as misses), you could change:
num_misses = min(len(s1), len(s2)) - num_matches
# to
num_misses = max(len(s1), len(s2)) - num_matches

You can try:
index = 0
for letter in String1:
if String1[index] != String2[index]:
mismatches +=1
index += 1
print "Matches:" + (len(String1)-mismatches)
print "Mismatches:" + mismatches

You could try the below.
>>> s1 = 'aaabbbccc'
>>> s2 = 'aabbbcccc'
>>> match = 0
>>> mismatch = 0
>>> for i,j in itertools.izip_longest(s1,s2):
if i == j:
match += 1
else:
mismatch +=1
In python3 use itertools.zip_longest instead of itertools.izip_longest.
If you want to consider a and A as a match, then change the if condition to,
if i.lower() == j.lower():
Finally get the match and mismatch count from the variables match and mismatch .

>>>s= list('aaabbbccc')
>>>s1=list('aabbbcccc')
>>>match=0
>>>mismatch=0
>>>for i in range(0,len(s)):
... if(s[i]==s1[i]):
... match+=1
... else:
... mismatch+=1
... s[i]=s[i].upper()
... s1[i]=s1[i].upper()
>>>print 'Matches:'+ str(match)
>>>print 'MisMatches:'+str(mismatch)
>>>print 'String 1:' +''.join(s)
>>>print 'String 2:' +''.join(s1)

Related

How to get the common ordered characters between 2 strings in python?

How to get the common ordered characters between 2 strings in python?
for example:
str1 = 'AAA'
str2 = 'AXA'
output should be 'AA'
Compare each characters from 2 strings and keep it when they are the same:
str1 = 'AAA'
str2 = 'AXA'
str3 = ''.join(c1 for c1, c2 in zip(str1, str2) if c1 == c2)
print(str3)
# Output
'AA'
Other example:
str1 = 'ABC'
str2 = 'AZB'
str3 = ''.join(c1 for c1, c2 in zip(str1, str2) if c1 == c2)
print(str3)
# Output
'A'
Try like this
result = ''
for i in range(0, len(str1)):
if str1[i] == str2[i]:
result += str1[i]
print(result)

Python - Counting Letter Frequency in a String

I want to write my each string's letter frequencies. My inputs and expected outputs are like this.
"aaaa" -> "a4"
"abb" -> "a1b2"
"abbb cc a" -> "a1b3 c2 a1"
"bbbaaacddddee" -> "b3a3c1d4e2"
"a b" -> "a1 b1"
I found this solution but it gives the frequencies in random order. How can I do this?
Does this satisfy your needs?
from itertools import groupby
s = "bbbaaac ddddee aa"
groups = groupby(s)
result = [(label, sum(1 for _ in group)) for label, group in groups]
res1 = "".join("{}{}".format(label, count) for label, count in result)
# 'b3a3c1 1d4e2 1a2'
# spaces just as spaces, do not include their count
import re
re.sub(' [0-9]+', ' ', res1)
'b3a3c1 d4e2 a2'
For me, it is a little bit trickier that it looks at first. For example, it does look that "bbbaaacddddee" -> "b3a3c1d4e2" needs the count results to be outputted in the order of appearance in the passed string:
import re
def unique_elements(t):
l = []
for w in t:
if w not in l:
l.append(w)
return l
def splitter(s):
res = []
tokens = re.split("[ ]+", s)
for token in tokens:
s1 = unique_elements(token) # or s1 = sorted(set(token))
this_count = "".join([k + str(v) for k, v in list(zip(s1, [token.count(x) for x in s1]))])
res.append(this_count)
return " ".join(res)
print(splitter("aaaa"))
print(splitter("abb"))
print(splitter("abbb cc a"))
print(splitter("bbbaaacddddee"))
print(splitter("a b"))
OUTPUT
a4
a1b2
a1b3 c2 a1
b3a3c1d4e2
a1 b1
If the order of appearance is not a real deal, you can disregard the unique_elements function and simply substitute something like s1 = sorted(set(token)) within splitter, as indicated in the comment.
here is you answer
test_str = "here is your answer"
res = {}
list=[]
list=test_str.split()
# print(list)
for a in list:
res={}
for keys in a:
res[keys] = res.get(keys, 0) + 1
for key,value in res.items():
print(f"{key}{value}",end="")
print(end=" ")
There is no need to iterate every character in every word.
This is an alternate solution. (If you don't want to use itertools, that looked pretty tidy.)
def word_stats(data: str=""):
all = []
for word in data.split(" "):
res = []
while len(word)>0:
res.append(word[:1] + str(word.count(word[:1])))
word = word.replace(word[:1],"")
res.sort()
all.append("".join(res))
return " ".join(all)
print(word_stats("asjssjbjbbhsiaiic ifiaficjxzjooro qoprlllkskrmsnm mmvvllvlxjxj jfnnfcncnnccnncsllsdfi"))
print(word_stats("abbb cc a"))
print(word_stats("bbbaaacddddee"))
This would output:
c5d1f3i1j1l2n7s2
a1b3 c2 a1
a3b3c1d4e2

How to get Count of discrepancy values from two given string?

I have two Strings Paula and Pole. If we check Paula with Pole then we will get three discrepancy a,u,a is present in Paula but not present in Pole so it should return a value 3.
Input:
enter string1: Paula
enter string2: Pole
Expected Output:
3
String 1 is always correct here for a row of names.
I have tried something like below so far
import itertools
def compare(string1, string2, no_match_c=' ', match_c='|'):
if len(string2) < len(string1):
string1, string2 = string2, string1
result = ''
n_diff = 0
for c1, c2 in itertools.izip(string1, string2):
if c1 == c2:
result += match_c
else:
result += no_match_c
n_diff += 1
delta = len(string2) - len(string1)
result += delta * no_match_c
n_diff += delta
return (result, n_diff)
def main():
string1 = 'paula'
string2 = 'pole'
result, n_diff = compare(string1, string2, no_match_c='_')
print(n_diff)
main()
Answer should be in a function
Example of other string
string1 = Michelle
string2 = Michele
Output : 1
This is a simple approach to do what you want, assuming you always want to find the number of chars in string 1 not in string 2.
def compare(str1, str2):
#Return count of chars in str1 not in str2
s1 = set([x for x in str1])
s2 = set([x for x in str2])
ms = s1 ^ s2 & s1 #Finds the chars in string 1 not in str2
rslt = 0
for v in ms:
rslt += str1.count(v)
return rslt
You may try Counter, it can use to count the number of each character in both strings and support subtraction between counts.
from collections import Counter
def diff(s1, s2):
c1 = Counter(s1)
c2 = Counter(s2)
return sum((c1 - c2).values())
print(diff("Paula", "Pole")) # Output: 3
print(diff("Pole", "Paula")) # Output: 2
print(diff("Michelle", "Michele")) # Output: 1
print(diff("Michele", "Michelle")) # Output: 0
You can try that, using a list of zeors in the size of 256 (number of ASCII characters) which represents a counter for all characters.
def compare(string1, string2):
chars_counter = [0]*256
for c1 in string1:
chars_counter[ord(c1)] += 1
for c2 in string2:
if chars_counter[ord(c2)] != 0:
chars_counter[ord(c2)] -= 1
return sum(chars_counter)

Concatenate the single characters in texts

I have a list with company names, some of them has abbreviations. ex:
compNames = ['Costa Limited', 'D B M LTD']
I need to convert compNames of text to a matrix of token counts using the following. But this does not output columns for B D M in D B M LTD
count_vect = CountVectorizer(analyzer='word')
count_vect.fit_transform(compNames).toarray()
What is the best way to concatenate the single characters in a text?
ex: 'D B M LTD' to 'DBM LTD'
import re
string = 'D B M LTD'
print re.sub("([^ ]) ", r"\1", re.sub(" ([^ ]{2,})", r" \1", string))
Awkward, but it should work. It introduces an additional space in front of LTD and then replaces "D " with "D", "B " with "B" and so on.
Here is a short function that breaks a string on white space characters to a list, iterates the list, builds a temporary string if the element is of length 1, appends the temp string to a new list when an element with length greater than one is encounters.
import re
a = 'D B M LTD'
def single_concat(s):
out = []
tmp = ''
for x in re.split(r'\s+', s):
if len(x) == 1:
tmp += x
else:
if tmp:
out.append(tmp)
out.append(x)
tmp = ''
return ' '.join(out)
single_concat(a)
# returns:
'DBM LTD'
import re
s = "D B M LTD"
first_part = ''
for chunk in re.compile("([A-Z]{1})\s").split(s):
if len(chunk) == 1:
first_part += chunk
elif len(chunk) > 1:
last_part = chunk
print(first_part + " " + last_part)
Prints DBM LTD.
import re
string = 'D B M LTD'
print re.sub(r"\+", r"", re.sub(r"\+(\w\B)", r" \1", re.sub(r"(\b\w) ", r"\1+", string)))
I'm using the + character as temporary, assuming there are no + characters in the string. If there are, use some other that doesn't occur.
Look, no re:
def mingle(s):
""" SO: 49692941 """
l = s.split()
r = []
t = []
for e in l:
if len(e) == 1:
t.append(e)
else:
j = "".join(t)
r.append( j )
r.append( e )
t = []
return " ".join(r)
print( mingle('D B M LTD') )
prints
DBM LTD

Insert "X" between identical consecutive letters in a string

Given a string, how can I break it up such that there are no consecutive identical letters, at n, n+1, where n is even.
Meaning, how can i get "abba" to remain "abba", but take "abbb" into "abbXb".
Thanks
Because everyone loves one-liners:
strings = ['ab', 'abba', 'abbb', 'abbba', 'abbababababbbaaaa', 'abcacbbbddbabbdd']
for s in strings:
r = ''.join('X' + v if (k and k % 2 and v == s[k - 1]) else v for (k,v) in enumerate(s))
print s, '->', r
The code reads like this: look at every character in the string. If it's not the first and if it's index is even and it is the same as the character before, prepend an 'X' to the character.
Output:
ab -> ab
abba -> abba
abbb -> abbXb
abbba -> abbXba
abbababababbbaaaa -> abbababababXbbaaXaa
abcacbbbddbabbdd -> abcacbbXbdXdbabXbdXd
Do your own homework, Kevin.
def foo(text, separator):
if len(text) < 2:
return text
result = ""
for i in range(1, len(text), 2):
if text[i] == text[i - 1]:
result += text[i - 1] + separator + text[i]
else:
result += text[i-1:i+1]
if len(text) % 2 != 0:
result += text[-1]
return result
print(foo("ab", "X"))
print(foo("abba", "X"))
print(foo("abbba", "X"))
print(foo("abbababababbbaaaa", "Z"))
Output:
>> ab
>> abba
>> abbXba
>> abbababababZbbaaZaa
You can use itertools.groupby:
from itertools import islice, groupby
import math
def solve(strs, n):
for k, g in groupby(strs):
lis = list(g)
it = iter(lis)
yield 'X'.join(''.join(islice(it, n)) for _ in xrange(int(math.ceil(len(lis)/float(n)))))
Demo:
>>> ''.join(solve("abba", 2))
'abba'
>>> ''.join(solve("abbb", 2))
'abbXb'
>>> ''.join(list(solve('abbbbyyyyy', 2)))
'abbXbbyyXyyXy'
>>> ''.join(solve('abbbbyyyyy', 4))
'abbbbyyyyXy'
May it be good?
from itertools import izip_longest
def X(s):
s_odd = s[::2]
s_even = s[1::2]
output = ''
for o, e in izip_longest(s_odd, s_even):
output += o or ''
if o == e:
output += 'X'
output += e or ''
return output
strings = ['ab', 'abba', 'abbb', 'abbba', 'abbababababbbaaaa', 'abcacbbbddbabbdd']
for s in strings:
print X(s)
result:
ab
abba
abbXb
abbXba
abbababababXbbaaXaa
abcacbbXbdXdbabXbdXd
EDIT
A simpler version:
def X(s):
output = ''
for i in range(0, len(s), 2):
o = s[i]
e = s[i+1] if i < len(s) - 1 else ''
output += o
if o == e:
output += 'X'
output += e
return output
strings = ['ab', 'abba', 'abbb', 'abbba', 'abbababababbbaaaa', 'abcacbbbddbabbdd']
for s in strings:
print X(s)

Categories

Resources