How to count sequentially using letters instead of numbers? - python

Is there a simple way to count using letters in Python? Meaning, 'A' will be used as 1, 'B' as 2 and so on, and after 'Z' will be 'AA', 'AB' and so on. So below code would generate:
def get_next_letter(last_letter):
return last_letter += 1 # pseudo
>>> get_next_letter('a')
'b'
>>> get_next_letter('b')
'c'
>>> get_next_letter('c')
'd'
...
>>> get_next_letter('z')
'aa'
>>> get_next_letter('aa')
'ab'
>>> get_next_letter('ab')
'ac'
...
>>> get_next_letter('az')
'ba'
>>> get_next_letter('ba')
'bb'
...
>>> get_next_letter('zz')
'aaa'

Based on #Charlie Clark's implementation of the openpyxl util get_column_letter, we can have:
def get_number_letter(n):
letters = []
while n > 0:
n, remainder = divmod(n, 26)
# check for exact division and borrow if needed
if remainder == 0:
remainder = 26
n-= 1
letters.append(chr(remainder+64))
return ''.join(reversed(letters))
This gives the letter representation of a number. Now, to increment, we need the reverse. Based on that logic (and the general number base logic), I wrote:
def number_from_string(letters):
n = 0
for i, c in enumerate(reversed(letters)):
n += (ord(c)-64)*26**i
return n
And now we can combine them to:
def get_next_letter(letters):
return get_number_letter(number_from_string(letters)+1)
Original answer:
This kind of "counting" is very similar to how Excel indexes its columns. Therefore it is possible to take advantage of the openpyxl package, which has two utility functions: get_column_letter and column_index_from_string:
from openpyxl.utils import get_column_letter, column_index_from_string
def get_next_letter(letters):
return get_column_letter(column_index_from_string(letters)+1)
NOTE: as this is based on Excel, it is limited to count up-to 'ZZZ'. i.e. calling the function with 'ZZZ' will raise an exception.
Output example for both implementations:
>>> get_next_letter('A')
'B'
>>> get_next_letter('Z')
'AA'
>>> get_next_letter('BD')
'BE'

Let's start with the simple special case of getting just the single-character strings.
from string import ascii_lowercase
def population():
yield from ascii_lowercase
Then
>>> x = population()
>>> list(x)
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
>>> x = population()
>>> next(x)
'a'
>>> next(x)
'b'
So we'd like to add the two-character sequences next:
from string import ascii_lowercase
from itertools import product
def population():
yield from ascii_lowercase
yield from map(''.join, product(ascii_lowercase, repeat=2)
Note that the single-character strings are just a special case of the product with repeat=1, so we could have written
from string import ascii_lowercase
from itertools import product
def population():
yield from map(''.join, product(ascii_lowercase, repeat=1)
yield from map(''.join, product(ascii_lowercase, repeat=2)
We can write this with a loop:
def population():
for k in range(1, 3):
yield from map(''.join, product(ascii_lowercase, repeat=k)
but we don't necessarily want an artificial upper limit on what strings we can produce; we want, in theory, to produce all of them. For that, we replace range with itertools.count.
from string import ascii_lowercase
from itertools import product, count
def population():
for k in count(1):
yield from map(''.join, product(ascii_lowercase, repeat=k)

all proposed are just way too complicated
I came up with below, using a recursive call,
this is it!
def getNextLetter(previous_letter):
"""
'increments' the provide string to the next letter recursively
raises TypeError if previous_letter is not a string
returns "a" if provided previous_letter was emtpy string
"""
if not isinstance(previous_letter, str):
raise TypeError("the previous letter should be a letter, doh")
if previous_letter == '':
return "a"
for letter_location in range(len(previous_letter) - 1, -1, -1):
if previous_letter[letter_location] == "z":
return getNextLetter(previous_letter[:-1])+"a"
else:
return (previous_letter[:-1])+chr(ord(previous_letter[letter_location])+1)
# EOF

Related

Correct word generation without repetitions

How many five-letter words can you make from a 26-letter alphabet (no repetitions)?
I am writing a program that generates names (just words) from 5 letters in the format: consonant_vowel_consistent_vowel_consonant. Only 5 letters. in Latin. I just want to understand how many times I have to run the cycle for generation. At 65780, for example, repetitions already begin. Can you please tell me how to do it correctly?
import random
import xlsxwriter
consonants = ['B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q',
'R', 'S', 'T', 'V', 'W', 'X', 'Z']
vowels = ['A', 'E', 'I', 'O', 'U', 'Y']
workbook = xlsxwriter.Workbook('GeneratedNames.xlsx')
worksheet = workbook.add_worksheet()
def names_generator(size=5, chars=consonants + vowels):
for y in range(65780):
toggle = True
_id = ""
for i in range(size):
if toggle:
toggle = False
_id += random.choice(consonants)
else:
toggle = True
_id += random.choice(vowels)
worksheet.write(y, 0, _id)
print(_id)
workbook.close()
names_generator()
You can use itertools.combinations to get 3 different consonants and 2 different vowels and get the permutations of those to generate all possible "names".
from itertools import combinations, permutations
names = [a+b+c+d+e for cons in combinations(consonants, 3)
for a, c, e in permutations(cons)
for vow in combinations(vowels, 2)
for b, d in permutations(vow)]
There are only 205,200 = 20x19x18x6x5 in total, so this will take no time at all for 5 letters, but will quickly take longer for more. That is, if by "no repetitions" you mean that no letter should occur more than once. If, instead, you just want that no consecutive letters are repeated (which is already guaranteed by alternating consonants and vowels), or that no names are repeated (which is guaranteed by constructing them without randomness), you can just use itertools.product instead, for a total of 288,000 = 20x20x20x6x6 names:
names = [a+b+c+d+e for a, c, e in product(consonants, repeat=3)
for b, d in product(vowels, repeat=2)]
If you want to generate them in random order, you could just random.shuffle the list afterwards, or if you want just a few such names, you can use random.sample or random.choice on the resulting list.
If you want to avoid duplicates, you shouldn't use randomness but simply generate all such IDs:
from itertools import product
C = consonants
V = vowels
for id_ in map(''.join, product(C, V, C, V, C)):
print(id_)
or
from itertools import cycle, islice, product
for id_ in map(''.join, product(*islice(cycle((consonants, vowels)), 5))):
print(id_)
itertools allows for non repetitive permutations https://docs.python.org/3/library/itertools.html
import itertools, re
names = list(itertools.product(consonants + vowels, repeat=5))
consonants_regex = "(" + "|".join(consonants) + ")"
vowels_regex = "(" + "|".join(vowels) + ")"
search_string = consonants_regex + vowels_regex + consonants_regex + vowels_regex + consonants_regex
names_format = ["".join(name) for name in names if re.match(search_string, "".join(name))]
Output:
>>> len(names)
11881376
>>> len(names_format)
288000
I want to make sure to answer your question
I just want to understand how many times I have to run the cycle for
generation
since I think it is important to get a better intuition about the problem.
You have 20 consonants and 6 vowels and in total that yields 20x6x20x6x20 = 288000 different combinations for words. Since it is sequential, you can split it up to make that easier to understand. You have 20 different consonants you can put as the 1st letter and for each one 6 vowels you can attach afterwards = 20x6 = 120. Then you can keep going and say for those 120 combinations you can add 20 consonants for each = 120x20 = 2400 ... and so on.

List all possible words with n letters

I want to list all possible words with n letters where the first letter can be a1 or a2, the second can be b1, b2 or b3, the third can be c1 or c2, ... Here's a simple example input-output for n=2 with each letter having 2 alternatives:
input = [["a","b"],["c","d"]]
output = ["ac", "ad", "bc", "bd"]
I tried doing this recursively by creating all possible words with the first 2 letters first, so something like this:
def go(l):
if len(l) > 2:
head = go(l[0:2])
tail = l[2:]
tail.insert(0, head)
go(tail)
elif len(l) == 2:
res = []
for i in l[0]:
for j in l[1]:
res.append(i+j)
return res
elif len(l) == 1:
return l
else:
return None
However, this becomes incredibly slow for large n or many alternatives per letter. What would be a more efficient way to solve this?
Thanks
I think you just want itertools.product here:
>>> from itertools import product
>>> lst = ['ab', 'c', 'de']
>>> words = product(*lst)
>>> list(words)
[('a', 'c', 'd'), ('a', 'c', 'e'), ('b', 'c', 'd'), ('b', 'c', 'e')]`
Or, if you wanted them joined into words:
>>> [''.join(word) for word in product(*lst)]
['acd', 'ace', 'bcd', 'bce']
Or, with your example:
>>> lst = [["a","b"],["c","d"]]
>>> [''.join(word) for word in product(*lst)]
['ac', 'ad', 'bc', 'bd']
Of course for very large n or very large sets of letters (size m), this will be slow. If you want to generate an exponentially large set of outputs (O(m**n)), that will take exponential time. But at least it has constant rather than exponential space (it generates one product at a time, instead of a giant list of all of them), and will be faster than what you were on your way to by a decent constant factor, and it's a whole lot simpler and harder to get wrong.
You can use the permutations from the built-in itertools module to achieve this, like so
>>> from itertools import permutations
>>> [''.join(word) for word in permutations('abc', 2)]
['ab', 'ac', 'ba', 'bc', 'ca', 'cb']
Generating all strings of some length with given alphabet :
test.py :
def generate_random_list(alphabet, length):
if length == 0: return []
c = [[a] for a in alphabet[:]]
if length == 1: return c
c = [[x,y] for x in alphabet for y in alphabet]
if length == 2: return c
for l in range(2, length):
c = [[x]+y for x in alphabet for y in c]
return c
if __name__ == "__main__":
for p in generate_random_list(['h','i'],2):
print p
$ python2 test.py
['h', 'h']
['h', 'i']
['i', 'h']
['i', 'i']
Next Way :
def generate_random_list(alphabet, length):
c = []
for i in range(length):
c = [[x]+y for x in alphabet for y in c or [[]]]
return c
if __name__ == "__main__":
for p in generate_random_list(['h','i'],2):
print p
Next Way :
import itertools
if __name__ == "__main__":
chars = "hi"
count = 2
for item in itertools.product(chars, repeat=count):
print("".join(item))
import itertools
print([''.join(x) for x in itertools.product('hi',repeat=2)])
Next Way :
from itertools import product
#from string import ascii_letters, digits
#for i in product(ascii_letters + digits, repeat=2):
for i in product("hi",repeat=2):
print(''.join(i))

Returning the value of an index in a python list based on other values

I have put the letters a-z in a list. How would I find the value of an item in the list depending on what the user typed?
For example if they type the letter a it would return c, f would return h and x would return z.
letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
newletters = []
offset = 2
userInput = input('type a string')
newvalue = chr(ord(userInput)+offset)
split = list(newvalue)
print split
the above works for a character but not for a string..help?!
You can try this:
>>> offset = 2
>>> aString = raw_input("digit a letter: ")
>>> aString
'a'
>>> chr(ord(aString)+offset)
'c'
documentation:
https://docs.python.org/2/library/functions.html#chr
https://docs.python.org/2/library/functions.html#ord
If you want to iterate over an entire string, a simple way is using a for loop. I assume the input string is always lowercase.
EDIT2: I improved the solution to handle the case when a letter is 'y' or 'z' and without "rotation" should begin a not alphabetic character, eg:
# with only offset addiction this return a non-alphabetic character
>>> chr(ord('z')+2)
'|'
# the 'z' rotation return the letter 'b'
>>> letter = "z"
>>> ord_letter = ord(letter)+offset
>>> ord_letter_rotated = ((ord_letter - 97) % 26) + 97
>>> chr(ord_letter_rotated)
'b'
The code solution:
offset = 2
aString = raw_input("digit the string to convert: ")
#aString = "abz"
newString = ""
for letter in aString:
ord_letter = ord(letter)+offset
ord_letter_rotated = ((ord_letter - 97) % 26) + 97
newString += chr(ord_letter_rotated)
print newString
The output of this code for the entire lowercase alphabet:
cdefghijklmnopqrstuvwxyzab
Note: you can obtain the lowercase alphabet for free also this way:
>>> import string
>>> string.lowercase
'abcdefghijklmnopqrstuvwxyz'
See the wikipedia page to learn something about ROT13:
https://en.wikipedia.org/wiki/ROT13
What should happen for z? Should it become b?
You can use Python's maketrans and translate functions to do this as follows:
import string
def rotate(text, by):
s_from = string.ascii_lowercase
s_to = string.ascii_lowercase[by:] + string.ascii_lowercase[:by]
cypher_table = string.maketrans(s_from, s_to)
return text.translate(cypher_table)
user_input = raw_input('type a string: ').lower()
print rotate(user_input, 2)
This works on the whole string as follows:
type a string: abcxyz
cdezab
How does it work?
If you print s_from and s_to they look as follows:
abcdefghijklmnopqrstuvwxyz
cdefghijklmnopqrstuvwxyzab
maketrans creates a mapping table to map characters in s_from to s_to. translate then applies this mapping to your string.

How to sort the letters in a string alphabetically in Python

Is there an easy way to sort the letters in a string alphabetically in Python?
So for:
a = 'ZENOVW'
I would like to return:
'ENOVWZ'
You can do:
>>> a = 'ZENOVW'
>>> ''.join(sorted(a))
'ENOVWZ'
>>> a = 'ZENOVW'
>>> b = sorted(a)
>>> print b
['E', 'N', 'O', 'V', 'W', 'Z']
sorted returns a list, so you can make it a string again using join:
>>> c = ''.join(b)
which joins the items of b together with an empty string '' in between each item.
>>> print c
'ENOVWZ'
Sorted() solution can give you some unexpected results with other strings.
List of other solutions:
Sort letters and make them distinct:
>>> s = "Bubble Bobble"
>>> ''.join(sorted(set(s.lower())))
' belou'
Sort letters and make them distinct while keeping caps:
>>> s = "Bubble Bobble"
>>> ''.join(sorted(set(s)))
' Bbelou'
Sort letters and keep duplicates:
>>> s = "Bubble Bobble"
>>> ''.join(sorted(s))
' BBbbbbeellou'
If you want to get rid of the space in the result, add strip() function in any of those mentioned cases:
>>> s = "Bubble Bobble"
>>> ''.join(sorted(set(s.lower()))).strip()
'belou'
Python functionsorted returns ASCII based result for string.
INCORRECT: In the example below, e and d is behind H and W due it's to ASCII value.
>>>a = "Hello World!"
>>>"".join(sorted(a))
' !!HWdellloor'
CORRECT: In order to write the sorted string without changing the case of letter. Use the code:
>>> a = "Hello World!"
>>> "".join(sorted(a,key=lambda x:x.lower()))
' !deHllloorW'
OR (Ref: https://docs.python.org/3/library/functions.html#sorted)
>>> a = "Hello World!"
>>> "".join(sorted(a,key=str.lower))
' !deHllloorW'
If you want to remove all punctuation and numbers.
Use the code:
>>> a = "Hello World!"
>>> "".join(filter(lambda x:x.isalpha(), sorted(a,key=lambda x:x.lower())))
'deHllloorW'
You can use reduce
>>> a = 'ZENOVW'
>>> reduce(lambda x,y: x+y, sorted(a))
'ENOVWZ'
the code can be used to sort string in alphabetical order without using any inbuilt function of python
k = input("Enter any string again ")
li = []
x = len(k)
for i in range (0,x):
li.append(k[i])
print("List is : ",li)
for i in range(0,x):
for j in range(0,x):
if li[i]<li[j]:
temp = li[i]
li[i]=li[j]
li[j]=temp
j=""
for i in range(0,x):
j = j+li[i]
print("After sorting String is : ",j)
Really liked the answer with the reduce() function. Here's another way to sort the string using accumulate().
from itertools import accumulate
s = 'mississippi'
print(tuple(accumulate(sorted(s)))[-1])
sorted(s) -> ['i', 'i', 'i', 'i', 'm', 'p', 'p', 's', 's', 's', 's']
tuple(accumulate(sorted(s)) -> ('i', 'ii', 'iii', 'iiii', 'iiiim', 'iiiimp', 'iiiimpp', 'iiiimpps', 'iiiimppss', 'iiiimppsss', 'iiiimppssss')
We are selecting the last index (-1) of the tuple

What is the python equivalent to perl "a".."azc"

In perl, to get a list of all strings from "a" to "azc", to only thing to do is using the range operator:
perl -le 'print "a".."azc"'
What I want is a list of strings:
["a", "b", ..., "z", "aa", ..., "az" ,"ba", ..., "azc"]
I suppose I can use ord and chr, looping over and over, this is simple to get for "a" to "z", eg:
>>> [chr(c) for c in range(ord("a"), ord("z") + 1)]
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
But a bit more complex for my case, here.
Thanks for any help !
Generator version:
from string import ascii_lowercase
from itertools import product
def letterrange(last):
for k in range(len(last)):
for x in product(ascii_lowercase, repeat=k+1):
result = ''.join(x)
yield result
if result == last:
return
EDIT: #ihightower asks in the comments:
I have no idea what I should do if I want to print from 'b' to 'azc'.
So you want to start with something other than 'a'. Just discard anything before the start value:
def letterrange(first, last):
for k in range(len(last)):
for x in product(ascii_lowercase, repeat=k+1):
result = ''.join(x)
if first:
if first != result:
continue
else:
first = None
yield result
if result == last:
return
A suggestion purely based on iterators:
import string
import itertools
def string_range(letters=string.ascii_lowercase, start="a", end="z"):
return itertools.takewhile(end.__ne__, itertools.dropwhile(start.__ne__, (x for i in itertools.count(1) for x in itertools.imap("".join, itertools.product(letters, repeat=i)))))
print list(string_range(end="azc"))
Use the product call in itertools, and ascii_letters from string.
from string import ascii_letters
from itertools import product
if __name__ == '__main__':
values = []
for i in xrange(1, 4):
values += [''.join(x) for x in product(ascii_letters[:26], repeat=i)]
print values
Here's a better way to do it, though you need a conversion function:
for i in xrange(int('a', 36), int('azd', 36)):
if base36encode(i).isalpha():
print base36encode(i, lower=True)
And here's your function (thank you Wikipedia):
def base36encode(number, alphabet='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ', lower=False):
'''
Convert positive integer to a base36 string.
'''
if lower:
alphabet = alphabet.lower()
if not isinstance(number, (int, long)):
raise TypeError('number must be an integer')
if number < 0:
raise ValueError('number must be positive')
# Special case for small numbers
if number < 36:
return alphabet[number]
base36 = ''
while number != 0:
number, i = divmod(number, 36)
base36 = alphabet[i] + base36
return base36
I tacked on the lowercase conversion option, just in case you wanted that.
I generalized the accepted answer to be able to start middle and to use other than lowercase:
from string import ascii_lowercase, ascii_uppercase
from itertools import product
def letter_range(first, last, letters=ascii_lowercase):
for k in range(len(first), len(last)):
for x in product(letters, repeat=k+1):
result = ''.join(x)
if len(x) != len(first) or result >= first:
yield result
if result == last:
return
print list(letter_range('a', 'zzz'))
print list(letter_range('BA', 'DZA', ascii_uppercase))
def strrange(end):
values = []
for i in range(1, len(end) + 1):
values += [''.join(x) for x in product(ascii_lowercase, repeat=i)]
return values[:values.index(end) + 1]

Categories

Resources