Count character occurrences in a Python string - python

I want to get the each character count in a given sentence. I tried the below code and I got the count of every character but it displays the repeated characters count in the output. How to delete repeated character.
def countwords(x):
x=x.lower()
for i in x:
print(i,'in',x.count(i))
x=str(input("Enter a paragraph "))
countwords(x)
My output is:
My output should not contain spaces count and repeated characters.. What to do....!!!

check this code :
my_string = "count a character occurance"
my_list = list(my_string)
print (my_list)
get_unique_char = set(my_list)
print (get_unique_char)
for key in get_unique_char:
print (key, my_string.count(key))

There are a few different approaches, most hinted at by jonrsharpe's comment, but I'd suggest a simple set.
The set approach, along with a few others are included below:
# An approach using a set
def countwords_set(s):
for c in set(s):
if c == ' ': continue
print(c, 'in', s.count(c))
# An approach using a standard dict
def countwords_dict(s):
d = dict()
for c in s:
if c == ' ': continue # Skip spaces
d[c] = d.get(c,0) + 1 # Use the .get method in case the
# key isn't set
for c,x in d.items(): # Display results
print(c, 'in', x)
# An approach using a defaultdict (from the collections module)
def countwords_ddict(s):
from collections import defaultdict # Typically, imports go at the top
d = defaultdict(int)
for c in s:
if c == ' ': continue
d[c] += 1
for c,x in d.items():
print(c, 'in', x)
# An approach using a Counter (from the collections module)
def countwords_counter(s):
from collections import Counter # Typically, imports go at the top
counter = Counter(s)
# Counters can be accessed like dicts
for c,x in counter.items():
if c == ' ': continue
print(c, 'in', x)
# User input and comparison
s = str(input("Enter a paragraph "))
s = s.lower()
countwords_set(s)
print("---")
countwords_dict(s)
print("---")
countwords_ddict(s)
print("---")
countwords_counter(s)
print("---")
The output is the essentially the same for each approach, although the order of the characters may differ since Python dictionaries are unordered.

Use a dict.
def countwords(x):
d = dict()
x=x.lower()
for i in x:
if i in d.keys():
d[i] = d[i] +1;
else:
d[i] = 1;
for i in d.keys():
print i + " " + d[i]

Or use numpy with this one-liner
np.unique(np.array(list('count these characters')), return_counts=True)

Related

How to change uppercase & lowercase alternatively in a string?

I want to create a new string from a given string with alternate uppercase and lowercase.
I have tried iterating over the string and changing first to uppercase into a new string and then to lower case into another new string again.
def myfunc(x):
even = x.upper()
lst = list(even)
for itemno in lst:
if (itemno % 2) !=0:
even1=lst[1::2].lowercase()
itemno=itemno+1
even2=str(even1)
print(even2)
Since I cant change the given string I need a good way of creating a new string alternate caps.
Here's a onliner
"".join([x.upper() if i%2 else x.lower() for i,x in enumerate(mystring)])
You can simply randomly choose for each letter in the old string if you should lowercase or uppercase it, like this:
import random
def myfunc2(old):
new = ''
for c in old:
lower = random.randint(0, 1)
if lower:
new += c.lower()
else:
new += c.upper()
return new
Here's one that returns a new string using with alternate caps:
def myfunc(x):
seq = []
for i, v in enumerate(x):
seq.append(v.upper() if i % 2 == 0 else v.lower())
return ''.join(seq)
This does the job also
def foo(input_message):
c = 0
output_message = ""
for m in input_message:
if (c%2==0):
output_message = output_message + m.lower()
else:
output_message = output_message + m.upper()
c = c + 1
return output_message
Here's a solution using itertools which utilizes string slicing:
from itertools import chain, zip_longest
x = 'inputstring'
zipper = zip_longest(x[::2].lower(), x[1::2].upper(), fillvalue='')
res = ''.join(chain.from_iterable(zipper))
# 'iNpUtStRiNg'
Using a string slicing:
from itertools import zip_longest
s = 'example'
new_s = ''.join(x.upper() + y.lower()
for x, y in zip_longest(s[::2], s[1::2], fillvalue=''))
# ExAmPlE
Using an iterator:
s_iter = iter(s)
new_s = ''.join(x.upper() + y.lower()
for x, y in zip_longest(s_iter, s_iter, fillvalue=''))
# ExAmPlE
Using the function reduce():
def func(x, y):
if x[-1].islower():
return x + y.upper()
else:
return x + y.lower()
new_s = reduce(func, s) # eXaMpLe
This code also returns alternative caps string:-
def alternative_strings(strings):
for i,x in enumerate(strings):
if i % 2 == 0:
print(x.upper(), end="")
else:
print(x.lower(), end= "")
return ''
print(alternative_strings("Testing String"))
def myfunc(string):
# Un-hash print statements to watch python build out the string.
# Script is an elementary example of using an enumerate function.
# An enumerate function tracks an index integer and its associated value as it moves along the string.
# In this example we use arithmetic to determine odd and even index counts, then modify the associated variable.
# After modifying the upper/lower case of the character, it starts adding the string back together.
# The end of the function then returns back with the new modified string.
#print(string)
retval = ''
for space, letter in enumerate(string):
if space %2==0:
retval = retval + letter.upper()
#print(retval)
else:
retval = retval + letter.lower()
#print(retval)
print(retval)
return retval
myfunc('Thisisanamazingscript')

Check the most frequent letter(s) in a word. Python

My task is:
To write a function that gets a string as an argument and returns the letter(s) with the maximum appearance in it.
Example 1:
s = 'Astana'
Output:
a
Example 2:
s = 'Kaskelen'
Output:
ke
So far, I've got this code(click to run):
a = input()
def most_used(w):
a = list(w)
indexes = []
g_count_max = a.count(a[0])
for letter in a:
count = 0
i = int()
for index in range(len(a)):
if letter == a[index] or letter == a[index].upper():
count += 1
i = index
if g_count_max <= count: //here is the problem.
g_count_max = count
if i not in indexes:
indexes.append(i)
letters = str()
for i in indexes:
letters = letters + a[i].lower()
return letters
print(most_used(a))
The problem is that it automatically adds first letter to the array because the sum of appearance of the first element is actually equal to the starter point of appearance(which is basically the first element).
Example 1:
s = 'hheee'
Output:
he
Example 2:
s = 'malaysia'
Output:
ma
I think what you're trying to can be much simplified by using the standard library's Counter object
from collections import Counter
def most_used(word):
# this has the form [(letter, count), ...] ordered from most to least common
most_common = Counter(word.lower()).most_common()
result = []
for letter, count in most_common:
if count == most_common[0][1]:
result.append(letter) # if equal largest -- add to result
else:
break # otherwise don't bother looping over the whole thing
return result # or ''.join(result) to return a string
You can use a dictionary comprehension with a list comprehension and max():
s = 'Kaskelen'
s_lower = s.lower() #convert string to lowercase
counts = {i: s_lower.count(i) for i in s_lower}
max_counts = max(counts.values()) #maximum count
most_common = ''.join(k for k,v in counts.items() if v == max_counts)
Yields:
'ke'
try this code using list comprehensions:
word = input('word=').lower()
letters = set(list(word))
max_w = max([word.count(item) for item in letters])
out = ''.join([item for item in letters if word.count(item)==max_w])
print(out)
Also you can import Counter lib:
from collections import Counter
a = "dagsdvwdsbd"
print(Counter(a).most_common(3)[0][0])
Then it returns:
d

Reading a text file to print frequency of letters in decreasing order - Python 3

I am doing python basic challenges this is one of them. What all I needed to do is to read through a file and print out the frequency of letters in decreasing order. I am able to do this but I wanted to enhance the program by also printing out the frequency percentage alongside with the letter - frequency - freq%. Something like this: o - 46 - 10.15%
This is what I did so far:
def exercise11():
import string
while True:
try:
fname = input('Enter the file name -> ')
fop = open(fname)
break
except:
print('This file does not exists. Please try again!')
continue
counts = {}
for line in fop:
line = line.translate(str.maketrans('', '', string.punctuation))
line = line.translate(str.maketrans('', '', string.whitespace))
line = line.translate(str.maketrans('', '', string.digits))
line = line.lower()
for ltr in line:
if ltr in counts:
counts[ltr] += 1
else:
counts[ltr] = 1
lst = []
countlst = []
freqlst = []
for ltrs, c in counts.items():
lst.append((c, ltrs))
countlst.append(c)
totalcount = sum(countlst)
for ec in countlst:
efreq = (ec/totalcount) * 100
freqlst.append(efreq)
freqlst.sort(reverse=True)
lst.sort(reverse=True)
for ltrs, c, in lst:
print(c, '-', ltrs)
exercise11()
As you can see I am able to calculate and sort the freq% on a different list but I am not able to include it in the tuple of the lst[] list alongside with the letter, freq. Is there any way to solve this problem?
Also if you have any other suggestions for my code. Please do mention.
Output Screen
Modification
Applying a simple modification as mentioned by #wwii I got the desired output. All I had to do is add one more parameter to the print statement while iterating the lst[] list. Previously I tried to make another list for the freq%, sort and then tried to insert it to the letters-count tuple in a list which didn't work out.
for ltrs, c, in lst:
print(c, '-', ltrs, '-', round(ltrs/totalcount*100, 2), '%')
Output Screen
Your count data is in a dictionary of {letter:count} pairs.
You can use the dictionary to calculate the total count like this:
total_count = sum(counts.values())
Then don't calculate the percentage till you are iterating over the counts...
for letter, count in counts.items():
print(f'{letter} - {count} - {100*count/total}') #Python v3.6+
#print('{} - {} - {}'.format(letter, count, 100*count/total) #Python version <3.6+
Or if you want to put it all in a list so you can sort it:
data = []
for letter, count in counts.items():
data.append((letter,count,100*count/total)
Using operator.itemgetter for the sort key function can help code readability.
import operator
letter = operator.itemgetter(0)
count = operator.itemgetter(1)
frequency = operator.itemgetter(2)
data.sort(key=letter)
data.sort(key=count)
data.sort(key=frequency)
Tuples are immutable which is probably the issue you are finding. The other issue is the simple form of the sort function; A more-advanced sort function would serve you well. See below:
The list-of-tuples format of lst, but because tuples are immutable whereas lists are mutable, opting to change lst to a list-of-lists is a valid approach. Then, since lst is a list-of-lists with each element consisting of 'letter,count,frequency%', the sort function with lambda can be used to sort by whichever index you'd like. The following is to be inserted after your for line in fop: loop.
lst = []
for ltrs, c in counts.items():
lst.append([ltrs,c])
totalcount = sum([x[1] for x in lst]) # sum all 'count' values in a list comprehension
for elem in lst:
elem.append((elem[1]/totalcount)*100) # now that each element in 'lst' is a mutable list, you can append the calculated frequency to the respective element in lst
lst.sort(reverse=True,key=lambda lst:lst[2]) # sort in-place in reverse order by index 2.
The items in freqlst,countlist, and lst are related to each other by their position. If any are sorted that relationship is lost.
zipping the lists together before sorting will maintain the relationship.
Will pick up from your list initialization lines.
lst = []
countlst = []
freqlst = []
for ltr, c in counts.items():
#change here, lst now only contains letters
lst.append(ltr)
countlst.append(c)
totalcount = sum(countlst)
for ec in countlst:
efreq = (ec/totalcount) * 100
freqlst.append(efreq)
#New stuff here: Note this only works in python 3+
zipped = zip(lst, countlst, freqlst)
zipped = sorted(zipped, key=lambda x: x[1])
for ltr, c, freq in zipped:
print("{} - {} - {}%".format(ltr, c, freq)) # love me the format method :)
Basically, zip combines lists together into a list of tuples. Then you can use a lambda function to sort those tuples (very common stack question)
I think I was able to achieve what you wanted by using lists instead of tuples. Tuples cannot be modified, but if you really want to know how click here
(I also added the possibility to quit the program)
Important: Never forget to comment your code
The code:
def exercise11():
import string
while True:
try:
fname = input('Enter the file name -> ')
print('Press 0 to quit the program') # give the User the option to quit the program easily
if fname == '0':
break
fop = open(fname)
break
except:
print('This file does not exists. Please try again!')
continue
counts = {}
for line in fop:
line = line.translate(str.maketrans('', '', string.punctuation))
line = line.translate(str.maketrans('', '', string.whitespace))
line = line.translate(str.maketrans('', '', string.digits))
line = line.lower()
for ltr in line:
if ltr in counts:
counts[ltr] += 1
else:
counts[ltr] = 1
lst = []
countlst = []
freqlst = []
for ltrs, c in counts.items():
# add a zero as a place holder &
# use square brakets so you can use a list that you can modify
lst.append([c, ltrs, 0])
countlst.append(c)
totalcount = sum(countlst)
for ec in countlst:
efreq = (ec/totalcount) * 100
freqlst.append(efreq)
freqlst.sort(reverse=True)
lst.sort(reverse=True)
# count the total of the letters
counter = 0
for ltrs in lst:
counter += ltrs[0]
# calculate the percentage for each letter
for letter in lst:
percentage = (letter[0] / counter) * 100
letter[2] += float(format(percentage, '.2f'))
for i in lst:
print('The letter {} is repeated {} times, which is {}% '.format(i[1], i[0], i[2]))
exercise11()
<?php
$fh = fopen("text.txt", 'r') or die("File does not exist");
$line = fgets($fh);
$words = count_chars($line, 1);
foreach ($words as $key=>$value)
{
echo "The character <b>' ".chr($key)." '</b> was found <b>$value</b> times. <br>";
}
?>

Trying to output the x most common words in a text file

I'm trying to write a program that will read in a text file and output a list of most common words (30 as the code is written now) along with their counts. so something like:
word1 count1
word2 count2
word3 count3
... ...
... ...
wordn countn
in order of count1 > count2 > count3 >... >countn. This is what I have so far but I cannot get the sorted function to perform what I want. The error I get now is:
TypeError: list indices must be integers, not tuple
I'm new to python. Any help would be appreciated. Thank you.
def count_func(dictionary_list):
return dictionary_list[1]
def print_top(filename):
word_list = {}
with open(filename, 'r') as input_file:
count = 0
#best
for line in input_file:
for word in line.split():
word = word.lower()
if word not in word_list:
word_list[word] = 1
else:
word_list[word] += 1
#sorted_x = sorted(word_list.items(), key=operator.itemgetter(1))
# items = sorted(word_count.items(), key=get_count, reverse=True)
word_list = sorted(word_list.items(), key=lambda x: x[1])
for word in word_list:
if (count > 30):#19
break
print "%s: %s" % (word, word_list[word])
count += 1
# This basic command line argument parsing code is provided and
# calls the print_words() and print_top() functions which you must define.
def main():
if len(sys.argv) != 3:
print 'usage: ./wordcount.py {--count | --topcount} file'
sys.exit(1)
option = sys.argv[1]
filename = sys.argv[2]
if option == '--count':
print_words(filename)
elif option == '--topcount':
print_top(filename)
else:
print 'unknown option: ' + option
sys.exit(1)
if __name__ == '__main__':
main()
Use the collections.Counter class.
from collections import Counter
for word, count in Counter(words).most_common(30):
print(word, count)
Some unsolicited advice: Don't make so many functions until everything is working as one big block of code. Refactor into functions after it works. You don't even need a main section for a script this small.
Using itertools' groupby:
from itertools import groupby
words = sorted([w.lower() for w in open("/path/to/file").read().split()])
count = [[item[0], len(list(item[1]))] for item in groupby(words)]
count.sort(key=lambda x: x[1], reverse = True)
for item in count[:5]:
print(*item)
This will list the file's words, sort them and list unique words and their occurrence. Subsequently, the found list is sorted by occurrence by:
count.sort(key=lambda x: x[1], reverse = True)
The reverse = True is to list the most common words first.
In the line:
for item in count[:5]:
[:5] defines the number of most occurring words to show.
First method as others have suggested i.e. by using most_common(...) doesn't work according to your needs cause it returns the nth first most common words and not the words whose count is less than or equal to n:
Here's using most_common(...): note it just print the first nth most common words:
>>> import re
... from collections import Counter
... def print_top(filename, max_count):
... words = re.findall(r'\w+', open(filename).read().lower())
... for word, count in Counter(words).most_common(max_count):
... print word, count
... print_top('n.sh', 1)
force 1
The correct way would be as follows, note it prints all the words whose count is less than equal to count:
>>> import re
... from collections import Counter
... def print_top(filename, max_count):
... words = re.findall(r'\w+', open(filename).read().lower())
... for word, count in filter(lambda x: x[1]<=max_count, sorted(Counter(words).items(), key=lambda x: x[1], reverse=True)):
... print word, count
... print_top('n.sh', 1)
force 1
in 1
done 1
mysql 1
yes 1
egrep 1
for 1
1 1
print 1
bin 1
do 1
awk 1
reinstall 1
bash 1
mythtv 1
selections 1
install 1
v 1
y 1
Here is my python3 solution. I was asked this question in an interview and the interviewer was happy this solution, albeit in a less time-constrained situation the other solutions provided above seem a lot nicer to me.
dict_count = {}
lines = []
file = open("logdata.txt", "r")
for line in file:# open("logdata.txt", "r"):
lines.append(line.replace('\n', ''))
for line in lines:
if line not in dict_count:
dict_count[line] = 1
else:
num = dict_count[line]
dict_count[line] = (num + 1)
def greatest(words):
greatest = 0
string = ''
for key, val in words.items():
if val > greatest:
greatest = val
string = key
return [greatest, string]
most_common = []
def n_most_common_words(n, words):
while len(most_common) < n:
most_common.append(greatest(words))
del words[(greatest(words)[1])]
n_most_common_words(20, dict_count)
print(most_common)

I have a program to find the amount of alphabets in a string but its not complete can you complete it python

def encode(source):
dest="";
i=0
while i<len(source):
runLength = 1;
while source[runLength] == source[runLength-1]:
runLength=runLength+1
i=i+1
dest+=source[i]
dest+=str(runLength)
i=i+1
return dest
source = raw_input("")
print (encode(source))
sample input:
AABBCCCCDADD
sample output:
3A2B4C3D
please fix it, mostly changing line 6 should do it I think
You can simply do it using dictionary.
x="AABBCCCCDDD"
d={}
for i in x:
d.setdefault(i,0)
d[i]=d[i]+1
print "".join([str(j)+i for i,j in d.items()])
The best way is to use a dict to keep the count, an OrderedDict will also keep the order for you:
from collections import OrderedDict
def encode(source):
od = OrderedDict()
# iterate over input string
for ch in source:
# create key/value pairing if key not alread in dict
od.setdefault(ch,0)
# increase count by one each time we see the char/key
od[ch] += 1
# join the key/char and the value/count for each char
return "".join([str(v)+k for k,v in od.items()])
source = "AABBCCCCDDD"
print (encode(source))
This will only work for strings like your input where the chars don't repeat later, if they do we can keep track in a loop and reset the count when we meet a char that was not the same as he previous:
def encode(source):
it = iter(source)
# set prev to first char from source
prev = next(it)
count = 1
out = ""
for ch in it:
# if prev and char are equal add 1 to count
if prev == ch:
count += 1
# else we don't have sequence so add count and prev char to output string
# and reset count to 1
else:
out += prev + str(count)
count = 1
prev = ch
# catch out last match or a single string
out += prev + str(count)
return out
Output:
In [7]: source = "AABBCCCCDDDEE"
In [8]: print (encode(source))
A2B2C4D3E2
As an alternative solution, there is a Python library called itertools that has a function which is useful in this situation. It can split your string up into groups of the same letter.
import itertools
def encode(source):
return "".join(["%u%s" % (len(list(g)), k) for k,g in itertools.groupby(source)])
print encode("AABBCCCCDDD")
This will print out the following:
2A2B4C3D
To see how this works, see the following smaller version:
for k, g in itertools.groupby("AABBCCCCDDD"):
print k, list(g)
This prints the following:
A ['A', 'A']
B ['B', 'B']
C ['C', 'C', 'C', 'C']
D ['D', 'D', 'D']
You can see k is the key, and g is the group. If we take the length of each group, you have your solution.

Categories

Resources