Is there a simple way to count using letters in Python? Meaning, 'A' will be used as 1, 'B' as 2 and so on, and after 'Z' will be 'AA', 'AB' and so on. So below code would generate:
def get_next_letter(last_letter):
return last_letter += 1 # pseudo
>>> get_next_letter('a')
'b'
>>> get_next_letter('b')
'c'
>>> get_next_letter('c')
'd'
...
>>> get_next_letter('z')
'aa'
>>> get_next_letter('aa')
'ab'
>>> get_next_letter('ab')
'ac'
...
>>> get_next_letter('az')
'ba'
>>> get_next_letter('ba')
'bb'
...
>>> get_next_letter('zz')
'aaa'
Based on #Charlie Clark's implementation of the openpyxl util get_column_letter, we can have:
def get_number_letter(n):
letters = []
while n > 0:
n, remainder = divmod(n, 26)
# check for exact division and borrow if needed
if remainder == 0:
remainder = 26
n-= 1
letters.append(chr(remainder+64))
return ''.join(reversed(letters))
This gives the letter representation of a number. Now, to increment, we need the reverse. Based on that logic (and the general number base logic), I wrote:
def number_from_string(letters):
n = 0
for i, c in enumerate(reversed(letters)):
n += (ord(c)-64)*26**i
return n
And now we can combine them to:
def get_next_letter(letters):
return get_number_letter(number_from_string(letters)+1)
Original answer:
This kind of "counting" is very similar to how Excel indexes its columns. Therefore it is possible to take advantage of the openpyxl package, which has two utility functions: get_column_letter and column_index_from_string:
from openpyxl.utils import get_column_letter, column_index_from_string
def get_next_letter(letters):
return get_column_letter(column_index_from_string(letters)+1)
NOTE: as this is based on Excel, it is limited to count up-to 'ZZZ'. i.e. calling the function with 'ZZZ' will raise an exception.
Output example for both implementations:
>>> get_next_letter('A')
'B'
>>> get_next_letter('Z')
'AA'
>>> get_next_letter('BD')
'BE'
Let's start with the simple special case of getting just the single-character strings.
from string import ascii_lowercase
def population():
yield from ascii_lowercase
Then
>>> x = population()
>>> list(x)
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
>>> x = population()
>>> next(x)
'a'
>>> next(x)
'b'
So we'd like to add the two-character sequences next:
from string import ascii_lowercase
from itertools import product
def population():
yield from ascii_lowercase
yield from map(''.join, product(ascii_lowercase, repeat=2)
Note that the single-character strings are just a special case of the product with repeat=1, so we could have written
from string import ascii_lowercase
from itertools import product
def population():
yield from map(''.join, product(ascii_lowercase, repeat=1)
yield from map(''.join, product(ascii_lowercase, repeat=2)
We can write this with a loop:
def population():
for k in range(1, 3):
yield from map(''.join, product(ascii_lowercase, repeat=k)
but we don't necessarily want an artificial upper limit on what strings we can produce; we want, in theory, to produce all of them. For that, we replace range with itertools.count.
from string import ascii_lowercase
from itertools import product, count
def population():
for k in count(1):
yield from map(''.join, product(ascii_lowercase, repeat=k)
all proposed are just way too complicated
I came up with below, using a recursive call,
this is it!
def getNextLetter(previous_letter):
"""
'increments' the provide string to the next letter recursively
raises TypeError if previous_letter is not a string
returns "a" if provided previous_letter was emtpy string
"""
if not isinstance(previous_letter, str):
raise TypeError("the previous letter should be a letter, doh")
if previous_letter == '':
return "a"
for letter_location in range(len(previous_letter) - 1, -1, -1):
if previous_letter[letter_location] == "z":
return getNextLetter(previous_letter[:-1])+"a"
else:
return (previous_letter[:-1])+chr(ord(previous_letter[letter_location])+1)
# EOF
I have a string list :
li = ['a', 'b', 'c', 'd']
Using the following code in Python, I generated all the possible combination of characters for list li and got a result of 256 strings.
from itertools import product
li = ['a', 'b', 'c', 'd']
for comb in product(li, repeat=4):
print(''.join(comb))
Say for example, I know the character of the second and fourth position of the string in the list li which is 'b' and 'c'.
So the result will be a set of only 16 strings which is :
abac
abbc
abcc
abdc
bbac
bbbc
bbcc
bbdc
cbac
cbbc
cbcc
cbdc
dbac
dbbc
dbcc
dbdc
How to get this result? Is there a Pythonic way to achieve this?
Thanks.
Edit : My desired size of list li is a to z and the value for repeat is 13. When I tried the above code, compiler throwed memory error!
Use list comprehension:
from itertools import product
li = ['a', 'b', 'c', 'd']
combs = [list(x) for x in product(li, repeat=4)]
selected_combs = [comb for comb in combs if (comb[1] == 'b' and comb[3] == 'c')]
print(["".join(comb) for comb in selected_combs])
# ['abac', 'abbc', 'abcc', 'abdc', 'bbac', 'bbbc', 'bbcc', 'bbdc', 'cbac', 'cbbc', 'cbcc', 'cbdc', 'dbac', 'dbbc', 'dbcc', 'dbdc']
To save memory in case you do not need all the combinations combs, you can simply do:
li = ['a', 'b', 'c', 'd']
selected_combs = [comb for comb in product(li, repeat=4) if (comb[1] == 'b' and comb[3] == 'c')]
print(["".join(comb) for comb in selected_combs])
def permute(s):
out = []
if len(s) == 1:
return s
else:
for i,let in enumerate(s):
for perm in permute(s[:i] + s[i+1:]):
out += [let + perm]
return out
per=permute(['a', 'b', 'c', 'd'])
print(per)
Do you want this?
I want to find all possible permutation of two list of strings within a constant length (5). Assume list_1 = ["A"] and list_2 = ["BB"].
All possible combinations are:
A A A A A
A A A BB
A A BB A
A BB A A
BB A A A
A BB BB
BB A BB
BB BB A
I was trying to implement it with the code below, but I am not sure how to define the length 5 for it.
import itertools
from itertools import permutations
list_1 = ["A"]
list_2 = ["BB"]
unique_combinations = []
permut = itertools.permutations(list_1, 5)
for comb in permut:
zipped = zip(comb, list_2)
unique_combinations.append(list(zipped))
print(unique_combinations)
Use recursion:
list_1 = ["A"]
list_2 = ["BB"]
size = 5
strs = list_1 + list_2
res = []
def helper(strs, size, cur, res):
if size == 0:
res.append(cur)
return
if size < 0:
return
for s in strs:
helper(strs, size-len(s), cur+[s], res)
helper(strs, size, [], res)
print(res)
No recursion:
list_1 = ["A"]
list_2 = ["BB"]
size = 5
strs = list_1 + list_2
res = []
q = [[]]
while q:
t = q.pop()
for s in strs:
cur = t + [s]
cursize = len(''.join(cur))
if cursize == size:
res.append(cur)
elif cursize < size:
q.append(cur)
print(res)
You could do the following:
import itertools
unique_combinations = []
permut = itertools.product(["A","B"], repeat=5)
for comb in permut:
l = "".join(comb)
c_bb = l.count("BB")
c_a = l.count("A")
if 2*c_bb + c_a == 5:
unique_combinations.append(l)
print(unique_combinations)
This will give:
['AAAAA', 'AAABB', 'AABBA', 'ABBAA', 'ABBBB', 'BBAAA', 'BBABB', 'BBBBA']
First find all the string-like of length 5 consists of 5 elements, either "A" or "B". Then use string.count to count the occurrences of each substring you are interested in and if it is equal 5 save it.
You can use itertools.product to find all the possible combinations of 'A' and 'BB' (of repeat from 3 to 5, as these are the number of elements in the acceptable answers), and then filter than based on their total length being 5 characters:
import itertools
all_options = []
for i in range(3,6):
all_options += list(itertools.product(['A', 'BB'], repeat=i))
all_options = [i for i in all_options if len(''.join(i)) == 5]
print(all_options)
Output:
[('A', 'BB', 'BB'), ('BB', 'A', 'BB'), ('BB', 'BB', 'A'), ('A', 'A', 'A', 'BB'), ('A', 'A', 'BB', 'A'), ('A', 'BB', 'A', 'A'), ('BB', 'A', 'A', 'A'), ('A', 'A', 'A', 'A', 'A')]
You need a recursive function like this:
def f(words, N, current = ""):
if(len(current)<N):
for i in words:
f(words, N, current+i)
elif(len(current)==N):
print(current)
f(["A", "BB"], 5)
Edit: Unfortunately, this function return duplicates, if two or more words in your list share the same letter. So the correct approach should be to fill a list with all return and then eliminate the duplicates.
Can anyone help me with finding all the possible substring in a string using python?
E.g:
string = 'abc'
output
a, b, c, ab, bc, abc
P.s : I am a beginner and would appreciate if the solution is simple to understand.
You could do something like:
for length in range(len(string)):
for index in range(len(string) - length):
print(string[index:index+length+1])
Output:
a
b
c
ab
bc
abc
else one way is using the combinations
from itertools import combinations
s = 'abc'
[
''.join(x)
for size in range(1, len(s) + 1)
for x in (combinations(s, size))
]
Out
['a', 'b', 'c', 'ab', 'ac', 'bc', 'abc']
Every substring contains a unique start index and a unique end index (which is greater than the start index). You can use two for loops to get all unique combinations of indices.
def all_substrings(s):
all_subs = []
for end in range(1, len(s) + 1):
for start in range(end):
all_subs.append(s[start:end])
return all_subs
s = 'abc'
print(all_substrings(s)) # prints ['a', 'ab', 'b', 'abc', 'bc', 'c']
You can do like:
def subString(s):
for i in range(len(s)):
for j in range(i+1,len(s)+1):
print(s[i:j])
subString("aashu")
a
aa
aas
aash
aashu
a
as
ash
ashu
s
sh
shu
h
hu
u
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 3 years ago.
Improve this question
I need to generate every possible combination from a given charset to a given range.
Like,
charset=list(map(str,"abcdefghijklmnopqrstuvwxyz"))
range=10
And the out put should be,
[a,b,c,d..................,zzzzzzzzzy,zzzzzzzzzz]
I know I can do this using already in use libraries.But I need to know how they really works.If anyone can give me a commented code of this kind of algorithm in Python or any programming language readable,I would be very grateful.
Use itertools.product, combined with itertools.chain to put the various lengths together:
from itertools import chain, product
def bruteforce(charset, maxlength):
return (''.join(candidate)
for candidate in chain.from_iterable(product(charset, repeat=i)
for i in range(1, maxlength + 1)))
Demonstration:
>>> list(bruteforce('abcde', 2))
['a', 'b', 'c', 'd', 'e', 'aa', 'ab', 'ac', 'ad', 'ae', 'ba', 'bb', 'bc', 'bd', 'be', 'ca', 'cb', 'cc', 'cd', 'ce', 'da', 'db', 'dc', 'dd', 'de', 'ea', 'eb', 'ec', 'ed', 'ee']
This will efficiently produce progressively larger words with the input sets, up to length maxlength.
Do not attempt to produce an in-memory list of 26 characters up to length 10; instead, iterate over the results produced:
for attempt in bruteforce(string.ascii_lowercase, 10):
# match it against your password, or whatever
if matched:
break
If you REALLY want to brute force it, try this, but it will take you a ridiculous amount of time:
your_list = 'abcdefghijklmnopqrstuvwxyz'
complete_list = []
for current in xrange(10):
a = [i for i in your_list]
for y in xrange(current):
a = [x+i for i in your_list for x in a]
complete_list = complete_list+a
On a smaller example, where list = 'ab' and we only go up to 5, this prints the following:
['a', 'b', 'aa', 'ba', 'ab', 'bb', 'aaa', 'baa', 'aba', 'bba', 'aab', 'bab', 'abb', 'bbb', 'aaaa', 'baaa', 'abaa', 'bbaa', 'aaba', 'baba', 'abba', 'bbba', 'aaab', 'baab', 'abab', 'bbab', 'aabb', 'babb', 'abbb', 'bbbb', 'aaaaa', 'baaaa', 'abaaa', 'bbaaa', 'aabaa', 'babaa', 'abbaa', 'bbbaa', 'aaaba','baaba', 'ababa', 'bbaba', 'aabba', 'babba', 'abbba', 'bbbba', 'aaaab', 'baaab', 'abaab', 'bbaab', 'aabab', 'babab', 'abbab', 'bbbab', 'aaabb', 'baabb', 'ababb', 'bbabb', 'aabbb', 'babbb', 'abbbb', 'bbbbb']
I found another very easy way to create dictionaries using itertools.
generator=itertools.combinations_with_replacement('abcd', 4 )
This will iterate through all combinations of 'a','b','c' and 'd' and create combinations with a total length of 1 to 4. ie. a,b,c,d,aa,ab.........,dddc,dddd. generator is an itertool object and you can loop through normally like this,
for password in generator:
''.join(password)
Each password is infact of type tuple and you can work on them as you normally do.
If you really want a bruteforce algorithm, don't save any big list in the memory of your computer, unless you want a slow algorithm that crashes with a MemoryError.
You could try to use itertools.product like this :
from string import ascii_lowercase
from itertools import product
charset = ascii_lowercase # abcdefghijklmnopqrstuvwxyz
maxrange = 10
def solve_password(password, maxrange):
for i in range(maxrange+1):
for attempt in product(charset, repeat=i):
if ''.join(attempt) == password:
return ''.join(attempt)
solved = solve_password('solve', maxrange) # This worked for me in 2.51 sec
itertools.product(*iterables) returns the cartesian products of the iterables you entered.
[i for i in product('bar', (42,))] returns e.g. [('b', 42), ('a', 42), ('r', 42)]
The repeat parameter allows you to make exactly what you asked :
[i for i in product('abc', repeat=2)]
Returns
[('a', 'a'),
('a', 'b'),
('a', 'c'),
('b', 'a'),
('b', 'b'),
('b', 'c'),
('c', 'a'),
('c', 'b'),
('c', 'c')]
Note:
You wanted a brute-force algorithm so I gave it to you. Now, it is a very long method when the password starts to get bigger because it grows exponentially (it took 62 sec to find the word 'solved').
itertools is ideally suited for this:
itertools.chain.from_iterable((''.join(l)
for l in itertools.product(charset, repeat=i))
for i in range(1, maxlen + 1))
A solution using recursion:
def brute(string, length, charset):
if len(string) == length:
return
for char in charset:
temp = string + char
print(temp)
brute(temp, length, charset)
Usage:
brute("", 4, "rce")
import string, itertools
#password = input("Enter password: ")
password = "abc"
characters = string.printable
def iter_all_strings():
length = 1
while True:
for s in itertools.product(characters, repeat=length):
yield "".join(s)
length +=1
for s in iter_all_strings():
print(s)
if s == password:
print('Password is {}'.format(s))
break
Simple solution using the itertools and string modules
# modules to easily set characters and iterate over them
import itertools, string
# character limit so you don't run out of ram
maxChar = int(input('Character limit for password: '))
# file to save output to, so you can look over the output without using so much ram
output_file = open('insert filepath here', 'a+')
# this is the part that actually iterates over the valid characters, and stops at the
# character limit.
x = list(map(''.join, itertools.permutations(string.ascii_lowercase, maxChar)))
# writes the output of the above line to a file
output_file.write(str(x))
# saves the output to the file and closes it to preserve ram
output_file.close()
I piped the output to a file to save ram, and used the input function so you can set the character limit to something like "hiiworld". Below is the same script but with a more fluid character set using letters, numbers, symbols, and spaces.
import itertools, string
maxChar = int(input('Character limit for password: '))
output_file = open('insert filepath here', 'a+')
x = list(map(''.join, itertools.permutations(string.printable, maxChar)))
x.write(str(x))
x.close()
from random import choice
sl = 4 #start length
ml = 8 #max length
ls = '9876543210qwertyuiopasdfghjklzxcvbnm' # list
g = 0
tries = 0
file = open("file.txt",'w') #your file
for j in range(0,len(ls)**4):
while sl <= ml:
i = 0
while i < sl:
file.write(choice(ls))
i += 1
sl += 1
file.write('\n')
g += 1
sl -= g
g = 0
print(tries)
tries += 1
file.close()
Try this:
import os
import sys
Zeichen=["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s",";t","u","v","w","x","y","z"]
def start(): input("Enter to start")
def Gen(stellen): if stellen==1: for i in Zeichen: print(i) elif stellen==2: for i in Zeichen: for r in Zeichen: print(i+r) elif stellen==3: for i in Zeichen: for r in Zeichen: for t in Zeichen: print(i+r+t) elif stellen==4: for i in Zeichen: for r in Zeichen: for t in Zeichen: for u in Zeichen: print(i+r+t+u) elif stellen==5: for i in Zeichen: for r in Zeichen: for t in Zeichen: for u in Zeichen: for o in Zeichen: print(i+r+t+u+o) else: print("done")
#*********************
start()
Gen(1)
Gen(2)
Gen(3)
Gen(4)
Gen(5)