Creating a histogram to map character frequency

Creating a histogram to map character frequency - python

I am creating a function that returns a histogram with each letter of the alphabet and asterisks that map out how many times each character appears in a string. So far I have:
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
def character_frequency_string(text):
#remove_extraneous function removes anything that is not a letter in the alphabet from the text string
new_text = remove_extraneous(text)
for char in new_text:
if char in new_text:
print(char +' ' + '*'*new_text.count(char))
if char not in new_text:
print(char)
My docstring is the following (with the outputs as they are right now, incorrect):
'''
Examples:
>>> character_frequency_string('hello world!')
h *
e *
l ***
l ***
o **
w *
o **
r *
l ***
d *
>>> character_frequency_string('testing!')
t **
e *
s *
t **
i *
n *
g *
'''
The correct output for 'hello world!' would be:
How could I change my code so that the histogram works as intended (with all of the alphabet in order, displaying an asterisk beside each letter for its character frequency, still displaying letters when they aren't in the text, just with no asterisk.)

Iterate over alphabet:
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
def character_frequency_string(text):
new_text = text
for char in alphabet:
print(char + ' ' + '*' * new_text.count(char))
character_frequency_string('hello world!')
Output
a
b
c
d *
e *
f
g
h *
i
j
k
l ***
m
n
o **
p
q
r *
s
t
u
v
w *
x
y
z
The above solution has O(n^2) time complexity, a more performant alternative is to use collections.Counter.

You could do the following, using a collections.Counter and f-strings:
from collections import Counter
from string import ascii_lowercase as alphabet
def character_frequency_string(text):
c = Counter(text.lower())
for x in alphabet:
print(f"{x} {'*' * c[x]}")
>>> character_frequency_string("hello world!")
a
b
c
d *
e *
f
g
h *
i
j
k
l ***
m
n
o **
p
q
r *
s
t
u
v
w *
x
y
z
Some docs:
Counter
string.ascii_lowercase

A dictionary approach. Notice the text may contain characters which do not belongs to the alphabet such as punctuation and white spaces. In case, such excluded characters are implemented also as string.punctuation and string.whitespace.
import string
alphabet = string.ascii_lowercase
def character_frequency_string(text, sep='*'):
d = dict.fromkeys(alphabet, 0)
for c in tuple(text.lower()):
if c in d:
d[c] += 1
print(*(f'{k} {sep*v}' for k, v in d.items()), sep='\n')
t = 'hello world!'
character_frequency_string(t)

You could use set for getting the same result
import re
strString = input("Please give input value of the string :")
strString = re.sub(r"[^a-zA-Z]","",strString)
strString=strString.lower()
print(strString)
a = list(sorted(set(strString.lower())))
for i in range(len(a)):
count = 0
for k in range(len(strString)):
if a[i] == strString[k]:
count = count + 1
str(a[i])+"is"+str(count)
print(a[i] + " " + "*" *count)

Related

Inserting breaks in variables over a certain length

So basically what I want to do is if the random string of characters generated is over 6 chars long it adds a space in a random place in that string and then the remaining ones are added on after the space so for example: raw output: "aoneicse", what I want : "aoneic se" or "aone icse".
Here is my code:
import random
alist = [' ', 'a', 'b', 'c', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
blist = [' ', 'a', 'b', 'c', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
clist = [' ', 'a', 'b', 'c', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
# define the 3 words
a = ''.join(random.choices(alist, k = random.randint(2,8)))
b = ''.join(random.choices(blist, k = random.randint(2,8)))
c = ''.join(random.choices(clist, k = random.randint(2,8)))
# each letter is a certain word, if this word exceeds 6 characters add a space in a random place
if a length = > 6:
asp = [a + " "]
if b length = > 6:
bsp = [b + " "]
if c length = > 6:
csp = [c + " "]
# or something along the lines of this i guess
This code does not currently work, BTW.

You don't need to create a list of strings with every letter of the alphabet, there's already a string module that does that for you:
import string
print(' ' + string.ascii_lowercase)
# Outputs abcdefghijklmnopqrstuvwxyz
You also don't need to create three different variables, you can just use a single one and use that:
import string
base_str = ' ' + string.ascii_lowercase
Then, you can use a list to generate your words based on this string:
import random
import string
base_str = ' ' + string.ascii_lowercase
words = [''.join(random.choices(base_str, k=random.randint(2,8))) for _ in range(3)]
Now, just apply your space requirement to each word:
import random
import string
base_str = ' ' + string.ascii_lowercase
words = [''.join(random.choices(base_str, k=random.randint(2,8))) for _ in range(3)]
new_words = []
for word in words:
if len(word) < 6:
new_word = word
else:
pos = random.randrange(len(word))
new_word = word[:pos] + ' ' + word[pos:]
new_words.append(new_word)
Using random.seed(0), new_words is equal to ['j xazmxvz', 'oxmg', 'buzns pcb'].

Don't join the choices immediately. Generate the list, and if it is long enough, pick an index in the middle (sufficiently far from either end, depending on your needs), and perform a slice assignment to the empty list at that position. Then join the list into a single string.
import string
a = random.choices(string.ascii_lowercase, k=random.randint(2,8))
if len(a) > 6:
# Adjust the arguments to randint as desired
x = random.randint(2, len(a) - 2)
a[x:x] = ' '
a = ''.join(a)

import random
character = list(map(chr, range(ord('a'), ord('z') + 1)))
def get_char():
return character[random.randint(0, len(character) - 1)]
def gen_word(min_len, max_len):
word = [get_char() for _ in range(random.randint(min_len, max_len))]
if len(word) > 6:
word[random.randint(1, len(word) - 2)] = ' '
return ''.join(word)
for i in range(10):
print(gen_word(4, 10))

I’d use slices to return a space, sliced-in at the right spot; Something along the lines of:
def split(input_str):
if len(input_str) > 6:
space_at = rand(0,len(input_str))
return input_str[:space_at] + ‘ ‘ + input_str[space_at:]
else:
return input_str

random alphabet chooser with no repeats (python)

import random
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
iterations = 1
running = True
def random_picker():
random_choice = random.choice(alphabet)
iterations =+ 1
print(random_choice)
while running == True:
if iterations <=26:
random_picker()
else:
running == False
I'm trying to get a different random letter through each iteration, through all 26. Each letter picked needs to update the random_choice variable.

Your program will continue looping until 26 letters are returned. Since you want a different letter on each iteration, it's probably easier to shuffle the alphabet array and loop over it instead of trying to choose a random letter in each iteration:
random.shuffle(alphabet)
Paul M. EDIT - Here is an example of how you might use it to achieve the desired effect:
from string import ascii_lowercase
from random import shuffle
alphabet = list(ascii_lowercase)
shuffle(alphabet)
for char in alphabet:
print(char)
Output:
j
m
z
w
k
y
d
f
l
c
u
b
t
s
e
p
x
g
a
r
n
h
i
q
o
v
>>>

Python: How to check if element of string is in a string and print the not included string elements?

I want to check each list element of c in alphabet and print the letters of the alphabet, which are not in a list element of c.
For example, shall the first list element of c "aa" print all letters of alphabet in a string excluding the letter a.
alphabet = "abcdefghijklmnopqrstuvwxyz"
c = ['aa', 'bb', 'zz']
for x in c:
if x in alphabet:
print(alphabet)
else:
print('not an element of alphabet')

Something like that:
alphabet = "abcdefghijklmnopqrstuvwxyz"
cases = ['aa', 'bb', 'zz']
for case in cases:
missing_letters = []
for letter in alphabet:
if letter not in case:
missing_letters.append(letter)
print(f"Case {case} misses following alphabeth letters {missing_letters}")
Output:
Case aa misses following alphabeth letters ['b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

If you are sure that the elements in c are all in the format of 'xx' like in your sample, then the following is a solution:
alphabet = "abcdefghijklmnopqrstuvwxyz"
c = ['ad', 'bb', 'zz','ad', 'bt', 'uz']
for x in c:
new_alph = alphabet
for char in x:
new_alph = new_alph.replace(char,'')
if new_alph == alphabet:
print('not an element of alphabet')
else:
print(new_alph)
Output:
bcefghijklmnopqrstuvwxyz
acdefghijklmnopqrstuvwxyz
abcdefghijklmnopqrstuvwxy
bcefghijklmnopqrstuvwxyz
acdefghijklmnopqrsuvwxyz
abcdefghijklmnopqrstvwxy
Another way is to use translate to make the code more compact:
alphabet = "abcdefghijklmnopqrstuvwxyz"
c = ['ad', 'bb', 'zz','ad', 'bt', 'uz']
for x in c:
new_alph = alphabet.translate({ord(char): '' for char in x})
if new_alph == alphabet:
print('not an element of alphabet')
else:
print(new_alph)
Output:
bcefghijklmnopqrstuvwxyz
acdefghijklmnopqrstuvwxyz
abcdefghijklmnopqrstuvwxy
bcefghijklmnopqrstuvwxyz
acdefghijklmnopqrsuvwxyz
abcdefghijklmnopqrstvwxy

As long as the strings in c are only 2 chars this will work
alphabet = "abcdefghijklmnopqrstuvwxyz"
c = ['aa', 'bb', 'zz']
for x in c:
if x[0] in alphabet or x[1] in alphabet:
alphabet.replace(x[0], '').replace(x[1], '')
else:
print('not an element of alphabet')

Using array to generate random text

I am trying to generate random text using letter frequencies that I have obtained. First, I succeeded with the following code:
for i in range(450):
outcome=random.random()
if 0<outcome<0.06775:
sys.stdout.write('a')
if 0.06775<outcome<0.07920:
sys.stdout.write('b')
if 0.07920<outcome<0.098:
sys.stdout.write('c')
....
This until the letter z and spacebar. This give me >50 lines of code and I want to get the same result using an array.
So far I have :
f_list = [0, 0.06775, 0.08242, 0.10199, 0.13522, 0.23703, 0.25514, 0.27324, 0.32793, 0.38483, 0.38577, 0.39278, 0.42999, 0.45023, 0.50728, 0.56756, 0.58256, 0.58391, 0.62924, 0.68509, 0.7616, 0.78481, 0.79229, 0.81161, 0.81251, 0.82718, 0.82773, 0.99998]
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ']
import random
import sys
for i in range(25):
outcome=random.random()
if f_list[i]<outcome<f_list[i+1]:
sys.stdout.write('alphabet[i]')
But it isn't working properly, as the range seems now to relate to the array and not the number of iterations I want. The output is blank.

import random
import sys
import bisect
f_list = [0, 0.06775, 0.08242, 0.10199, 0.13522, 0.23703, 0.25514, 0.27324, 0.32793, 0.38483, 0.38577, 0.39278, 0.42999, 0.45023, 0.50728, 0.56756, 0.58256, 0.58391, 0.62924, 0.68509, 0.7616, 0.78481, 0.79229, 0.81161, 0.81251, 0.82718, 0.82773, 0.99998]
alphabet = 'abcdefghijklmnopqrstuvwxyz '
for i in xrange(450):
sys.stdout.write(alphabet[bisect.bisect(f_list, random.random()) - 1])
does the trick and returns (example):
l wefboethol gsplotfoh ua onpedefh dnolnairnioeiegehhecaworonnfmeuej dsiauhpbfttwcknal ateof ap cgbr sunnee leseaeeecltaiaur u oen vxntgsoio kdeniei ot df htr dcencrsrrfp bwelsuoaslrnr heh ee tpt oeejaldeatcto fi a u idimiadmgglral o m iaielbtnt es oe shlspudwdfrrsvol oo i tlwh d r i swhsnloai p swlooi wbe nn sshth nsawtnrqsud mtw diit pner r nitmah todf zcsehma hl e ros ctee toiouinn i hl hlonphioe nh gan ho heein itrgeylftn epaacrmanhe
alphabet can be defined as a simple string too (accessing its elements - single characters - works like for lists)
bisect.bisect(list, value) takes a sorted list and a value and tells where this value should be put between. More about bisect.

Eumiros answer is perfect, and much simpler then mine, but because i made the effort to modify a older solution to a similar problem, i don't want it to go to waste.
I even had the link still around for the discussion about weighted random generators
from which i borrowed the "King of the hill" algorithm.
from string import lowercase
from random import random
class TextGenerator(object):
def __init__(self, flist, textlength, charmap = lowercase + ' '):
self.text_length = textlength
self.chars = charmap
self.weights = self._get_weight_list(flist)
def create_new_weights(self, flist):
self.weights = self._get_weight_list(flist)
def get_weight(self, char):
return self.weights[self.chars.index(char)]
def change_weight(self, char, weight):
self.weights[self.chars.index(char)] = weight
def _get_weight_list(self, flist):
return map (lambda x, y: y-x,
flist,
flist[1:] + [1.0])[:-1]
def windex(self):
assert(len(self.weights) == len(self.chars))
rnd = random() * sum(self.weights)
for i, w in enumerate(self.weights):
rnd -= w
if rnd < 0:
return i
def create_text(self, flist = None):
weights = self._get_weight_list(flist)if flist else self.weights
return u''.join([self.chars[self.windex()] for i in range(self.text_length)])
flist = [0, 0.067750000000000005, 0.082419999999999993, 0.10199, 0.13522000000000001, 0.23702999999999999, 0.25513999999999998, 0.27323999999999998, 0.32793, 0.38483000000000001, 0.38577, 0.39278000000000002, 0.42998999999999998, 0.45023000000000002, 0.50727999999999995, 0.56755999999999995, 0.58255999999999997, 0.58391000000000004, 0.62924000000000002, 0.68508999999999998, 0.76160000000000005, 0.78481000000000001, 0.79229000000000005, 0.81161000000000005, 0.81250999999999995, 0.82718000000000003, 0.82772999999999997, 0.99997999999999998]
texter = TextGenerator(flist, 1000)
print texter.create_text()
texter.change_weight('i', texter.get_weight('e') * 2)
print texter.create_text()

Python: how to print range a-z?

1. Print a-n: a b c d e f g h i j k l m n
2. Every second in a-n: a c e g i k m
3. Append a-n to index of urls{hello.com/, hej.com/, ..., hallo.com/}: hello.com/a hej.com/b ... hallo.com/n

>>> import string
>>> string.ascii_lowercase[:14]
'abcdefghijklmn'
>>> string.ascii_lowercase[:14:2]
'acegikm'
To do the urls, you could use something like this
[i + j for i, j in zip(list_of_urls, string.ascii_lowercase[:14])]

Assuming this is a homework ;-) - no need to summon libraries etc - it probably expect you to use range() with chr/ord, like so:
for i in range(ord('a'), ord('n')+1):
print chr(i),
For the rest, just play a bit more with the range()

Hints:
import string
print string.ascii_lowercase
and
for i in xrange(0, 10, 2):
print i
and
"hello{0}, world!".format('z')

for one in range(97,110):
print chr(one)

Get a list with the desired values
small_letters = map(chr, range(ord('a'), ord('z')+1))
big_letters = map(chr, range(ord('A'), ord('Z')+1))
digits = map(chr, range(ord('0'), ord('9')+1))
or
import string
string.letters
string.uppercase
string.digits
This solution uses the ASCII table. ord gets the ascii value from a character and chr vice versa.
Apply what you know about lists
>>> small_letters = map(chr, range(ord('a'), ord('z')+1))
>>> an = small_letters[0:(ord('n')-ord('a')+1)]
>>> print(" ".join(an))
a b c d e f g h i j k l m n
>>> print(" ".join(small_letters[0::2]))
a c e g i k m o q s u w y
>>> s = small_letters[0:(ord('n')-ord('a')+1):2]
>>> print(" ".join(s))
a c e g i k m
>>> urls = ["hello.com/", "hej.com/", "hallo.com/"]
>>> print([x + y for x, y in zip(urls, an)])
['hello.com/a', 'hej.com/b', 'hallo.com/c']

import string
print list(string.ascii_lowercase)
# ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

import string
print list(string.ascii_lowercase)
# ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
and
for c in list(string.ascii_lowercase)[:5]:
...operation with the first 5 characters

myList = [chr(chNum) for chNum in list(range(ord('a'),ord('z')+1))]
print(myList)
Output
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

list(string.ascii_lowercase)
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

Try:
strng = ""
for i in range(97,123):
strng = strng + chr(i)
print(strng)

import string
string.printable[10:36]
# abcdefghijklmnopqrstuvwxyz
string.printable[10:62]
# abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ

#1)
print " ".join(map(chr, range(ord('a'),ord('n')+1)))
#2)
print " ".join(map(chr, range(ord('a'),ord('n')+1,2)))
#3)
urls = ["hello.com/", "hej.com/", "hallo.com/"]
an = map(chr, range(ord('a'),ord('n')+1))
print [ x + y for x,y in zip(urls, an)]

The answer to this question is simple, just make a list called ABC like so:
ABC = ['abcdefghijklmnopqrstuvwxyz']
And whenever you need to refer to it, just do:
print ABC[0:9] #prints abcdefghij
print ABC #prints abcdefghijklmnopqrstuvwxyz
for x in range(0,25):
if x % 2 == 0:
print ABC[x] #prints acegikmoqsuwy (all odd numbered letters)
Also try this to break ur device :D
##Try this and call it AlphabetSoup.py:
ABC = ['abcdefghijklmnopqrstuvwxyz']
try:
while True:
for a in ABC:
for b in ABC:
for c in ABC:
for d in ABC:
for e in ABC:
for f in ABC:
print a, b, c, d, e, f, ' ',
except KeyboardInterrupt:
pass

This is your 2nd question: string.lowercase[ord('a')-97:ord('n')-97:2] because 97==ord('a') -- if you want to learn a bit you should figure out the rest yourself ;-)

I hope this helps:
import string
alphas = list(string.ascii_letters[:26])
for chr in alphas:
print(chr)

About gnibbler's answer.
Zip -function, full explanation, returns a list of tuples, where the i-th tuple contains the i-th element from each of the argument sequences or iterables. [...] construct is called list comprehension, very cool feature!

# Assign the range of characters
first_char_start = 'a'
last_char = 'n'
# Generate a list of assigned characters (here from 'a' to 'n')
alpha_list = [chr(i) for i in range(ord(first_char), ord(last_char) + 1)]
# Print a-n with spaces: a b c d e f g h i j k l m n
print(" ".join(alpha_list))
# Every second in a-n: a c e g i k m
print(" ".join(alpha_list[::2]))
# Append a-n to index of urls{hello.com/, hej.com/, ..., hallo.com/}
# Ex.hello.com/a hej.com/b ... hallo.com/n
#urls: list of urls
results = [i+j for i, j in zip(urls, alpha_list)]
#print new url list 'results' (concatenated two lists element-wise)
print(results)

Another way to do it
import string
aalist = list(string.ascii_lowercase)
aaurls = ['alpha.com','bravo.com','chrly.com','delta.com',]
iilen = aaurls.__len__()
ans01 = "".join( (aalist[0:14]) )
ans02 = "".join( (aalist[0:14:2]) )
ans03 = "".join( "{vurl}/{vl}\n".format(vl=vlet,vurl=aaurls[vind % iilen]) for vind,vlet in enumerate(aalist[0:14]) )
print(ans01)
print(ans02)
print(ans03)
Result
abcdefghijklmn
acegikm
alpha.com/a
bravo.com/b
chrly.com/c
delta.com/d
alpha.com/e
bravo.com/f
chrly.com/g
delta.com/h
alpha.com/i
bravo.com/j
chrly.com/k
delta.com/l
alpha.com/m
bravo.com/n
How this differs from the other replies
iterate over an arbitrary number of base urls
cycle through the urls using modular arithmetic, and do not stop until we run out of letters
use enumerate in conjunction with list comprehension and str.format

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Creating a histogram to map character frequency - python

Related

Inserting breaks in variables over a certain length

random alphabet chooser with no repeats (python)

Python: How to check if element of string is in a string and print the not included string elements?

Using array to generate random text

Python: how to print range a-z?

Categories

Resources