This Python function interlocks the characters of two words (e.g., "sho" + "col" -> "school"). word1-char1 + word2-char1 + word1-char2 + ...
def interlock(a,b):
i = 0
c = ""
d = ""
while (i < len(a) and len(b)):
c = (a[i]+b[i])
d = d + c
i+=1
return(d)
interlock("sho", "col")
Now, I would like to apply this function to a list of words. The goal is to find out any interlock corresponds to an item of a list.
word_list = ["test", "col", "tele", "school", "tel", "sho", "aye"]
To do that, I would first have to create a new list that has all the interlocks in it. This is exactly where I am stuck - I don't know how to iterate over word_list using interlock.
Thanks for your help!
If you want all possible permutations of the list to pass to interlock without pairing a word with itself i.e we won't get interlock("col", "col"):
def interlock(s1,s2):
out = ""
while s1 and s2: # keep looping until any string is empty
out += s1[0] + s2[0]
s1, s2 = s1[1:], s2[1:]
return out + s1 + s2 # add the remainder of any longer string
word_list = ["test", "col", "tele", "school", "tel", "sho","col" "aye"]
from itertools import permutations
# get all permutations of len 2 from our word list
perms = permutations(word_list,2)
st = set(word_list)
for a, b in perms:
res = interlock(a,b)
if res in st:
print(res)
school
You can also achieve the same result using itertools.zip_longest using a fillvalue of "" to catch the end of the longer words:
from itertools import permutations, zip_longest
perms = permutations(word_list, 2)
st = set(word_list)
for a, b in perms:
res = "".join("".join(tup) for tup in zip_longest(a,b,fillvalue=""))
if res in st:
print(res)
You can do it using product function from itertools module:
from itertools import product
for a, b in product(word_list, word_list):
interlock(a, b)
https://docs.python.org/2/library/itertools.html#itertools.product
Try this.
def interlockList(A):
while Len(A) > 2:
B = interlock(A[0],A[1])
A.remove(A[0])
A.remove(A[1])
A.insert(0, B)
return B
Related
I want to write my each string's letter frequencies. My inputs and expected outputs are like this.
"aaaa" -> "a4"
"abb" -> "a1b2"
"abbb cc a" -> "a1b3 c2 a1"
"bbbaaacddddee" -> "b3a3c1d4e2"
"a b" -> "a1 b1"
I found this solution but it gives the frequencies in random order. How can I do this?
Does this satisfy your needs?
from itertools import groupby
s = "bbbaaac ddddee aa"
groups = groupby(s)
result = [(label, sum(1 for _ in group)) for label, group in groups]
res1 = "".join("{}{}".format(label, count) for label, count in result)
# 'b3a3c1 1d4e2 1a2'
# spaces just as spaces, do not include their count
import re
re.sub(' [0-9]+', ' ', res1)
'b3a3c1 d4e2 a2'
For me, it is a little bit trickier that it looks at first. For example, it does look that "bbbaaacddddee" -> "b3a3c1d4e2" needs the count results to be outputted in the order of appearance in the passed string:
import re
def unique_elements(t):
l = []
for w in t:
if w not in l:
l.append(w)
return l
def splitter(s):
res = []
tokens = re.split("[ ]+", s)
for token in tokens:
s1 = unique_elements(token) # or s1 = sorted(set(token))
this_count = "".join([k + str(v) for k, v in list(zip(s1, [token.count(x) for x in s1]))])
res.append(this_count)
return " ".join(res)
print(splitter("aaaa"))
print(splitter("abb"))
print(splitter("abbb cc a"))
print(splitter("bbbaaacddddee"))
print(splitter("a b"))
OUTPUT
a4
a1b2
a1b3 c2 a1
b3a3c1d4e2
a1 b1
If the order of appearance is not a real deal, you can disregard the unique_elements function and simply substitute something like s1 = sorted(set(token)) within splitter, as indicated in the comment.
here is you answer
test_str = "here is your answer"
res = {}
list=[]
list=test_str.split()
# print(list)
for a in list:
res={}
for keys in a:
res[keys] = res.get(keys, 0) + 1
for key,value in res.items():
print(f"{key}{value}",end="")
print(end=" ")
There is no need to iterate every character in every word.
This is an alternate solution. (If you don't want to use itertools, that looked pretty tidy.)
def word_stats(data: str=""):
all = []
for word in data.split(" "):
res = []
while len(word)>0:
res.append(word[:1] + str(word.count(word[:1])))
word = word.replace(word[:1],"")
res.sort()
all.append("".join(res))
return " ".join(all)
print(word_stats("asjssjbjbbhsiaiic ifiaficjxzjooro qoprlllkskrmsnm mmvvllvlxjxj jfnnfcncnnccnncsllsdfi"))
print(word_stats("abbb cc a"))
print(word_stats("bbbaaacddddee"))
This would output:
c5d1f3i1j1l2n7s2
a1b3 c2 a1
a3b3c1d4e2
I need to figure out a way to delete common characters from two strings if the common characters are in the same position, but it is not working and I am trying to figure this out. This is what I tried so far, it works for some strings, but as soon as the second string is larger than the first, it stops working. EDIT: I also need a way to store the result in a variable before printing it as I need to use it in another function.
Example :
ABCDEF and ABLDKG would result in the "ABD" parts of both strings to be deleted, but the rest of the string would remain the same
CEF and LKG would be the output
def compare(input1,input2):
if len(input1) < len(input2):
for i in input1:
posi = int(input1.find(i))
if input1[num] == input2[num]:
x = input1.replace(i,"" )
y = input2.replace(i,"" )
num = num+1
print(x)
print(y)
else:
for i in input2:
num = 0
posi = int(input2.find(i))
if input2[num] == input1[num]:
input1 = input1[0:num] + input1[num+1:(len(input1)+ 1 )] # input1.replace(i,"" )
input2 = input2[0:num] + input2[num+1:(len(input1) + 1)]
x = input1
y = input2
num = num + 1
print(str(x))
print(str(y))
you could use
from itertools import zip_longest
a,b = "ABCDEF","ABLDKG"
[''.join(k) for k in zip(*[i for i in zip_longest(a, b, fillvalue = "") if i[0]!=i[1]])]
['CEF', 'LKG']
You can wrap this in a function:
def compare(a, b):
s = zip(*[i for i in zip_longest(a, b, fillvalue = "") if i[0]!=i[1]])
return [''.join(k) for k in s]
compare("ABCDEF","ABLDKG")
['CEF', 'LKG']
compare('asdfq', 'aqdexyz')
['sfq', 'qexyz']
strlist = ["ABCDEF","ABLDKG"]
char_dict = dict()
for item in strlist:
for char in item:
char_dict[char] = char_dict.get(char,0) + 1
new_strlist = []
for item in strlist:
new_strlist.append(''.join([char for char in item if char_dict[char] < 2]))
Note that this will convert strings that have only duplicates into empty strings rather than removing them altogether.
I want to create alphabetically ascending names like the column names in excel. That is I want to have smth. like a,b,c,...,z,aa,ab,...az,...zz,aaa,aab,....
I have tried:
for i in range(1000):
mod = int(i%26)
div = int(i/26)
print(string.ascii_lowercase[div]+string.ascii_lowercase[mod])
Which works until zz but than fails because it runs out of index
aa
ab
ac
ad
ae
af
ag
ah
ai
aj
ak
al
.
.
.
zz
IndexError
You could make use of itertools.product():
from itertools import product
from string import ascii_lowercase
for i in range(1, 4):
for x in product(ascii_lowercase, repeat=i):
print(''.join(x))
First, you want all letters, then all pairs, then all triplets, etc. This is why we first need to iterate through all the string lengths you want (for i in range(...)).
Then, we need all possible associations with the i letters, so we can use product(ascii_lowercase) which is equivalent to a nested for loop repeated i times.
This will generate the tuples of size i required, finally just join() them to obtain a string.
To continuously generate names without limit, replace the for loop with while:
def generate():
i = 0
while True:
i += 1
for x in product(ascii_lowercase, repeat=i):
yield ''.join(x)
generator = generate()
next(generator) # 'a'
next(generator) # 'b'
...
For a general solution we can use a generator and islice from itertools:
import string
from itertools import islice
def generate():
base = ['']
while True:
next_base = []
for b in base:
for i in range(26):
next_base.append(b + string.ascii_lowercase[i])
yield next_base[-1]
base = next_base
print('\n'.join(islice(generate(), 1000)))
And the output:
a
b
c
...
z
aa
ab
...
zz
aaa
aab
...
And you can use islice to take as many strings as you need.
Try:
>>import string
>>string.ascii_lowercase
'abcdefghijklmnopqrstuvwxyz'
>>len(string.ascii_lowercase)
26
When your index in below line exceed 26 it raise exception
div = int(i/26)
, becouse of ascii_lowercase length:
But you can:
for i in range(26*26): # <--- 26 is string.ascii_lowercase
mod = int(i%26)
div = int(i/26)
print(string.ascii_lowercase[div]+string.ascii_lowercase[mod])
EDIT:
or you can use:
import string
n = 4 # number of chars
small_limit = len(string.ascii_lowercase)
limit = small_limit ** n
i = 0
while i < limit:
s = ''
for c in range(n):
index = int(i/(small_limit**c))%small_limit
s += string.ascii_lowercase[index]
print(s)
i += 1
You can use:
from string import ascii_lowercase
l = list(ascii_lowercase) + [letter1+letter2 for letter1 in ascii_lowercase for letter2 in ascii_lowercase]+ [letter1+letter2+letter3 for letter1 in ascii_lowercase for letter2 in ascii_lowercase for letter3 in ascii_lowercase]
There's an answer to this question provided on Code Review SE
A slight modification to the answer in the link gives the following which works for an arbitrary number of iterations.
def increment_char(c):
return chr(ord(c) + 1) if c != 'z' else 'a'
def increment_str(s):
lpart = s.rstrip('z')
num_replacements = len(s) - len(lpart)
new_s = lpart[:-1] + increment_char(lpart[-1]) if lpart else 'a'
new_s += 'a' * num_replacements
return new_s
s = ''
for _ in range(1000):
s = increment_str(s)
print(s)
I have the follwing string and I split it:
>>> st = '%2g%k%3p'
>>> l = filter(None, st.split('%'))
>>> print l
['2g', 'k', '3p']
Now I want to print the g letter two times, the k letter one time and the p letter three times:
ggkppp
How is it possible?
You could use generator with isdigit() to check wheter your first symbol is digit or not and then return following string with appropriate count. Then you could use join to get your output:
''.join(i[1:]*int(i[0]) if i[0].isdigit() else i for i in l)
Demonstration:
In [70]: [i[1:]*int(i[0]) if i[0].isdigit() else i for i in l ]
Out[70]: ['gg', 'k', 'ppp']
In [71]: ''.join(i[1:]*int(i[0]) if i[0].isdigit() else i for i in l)
Out[71]: 'ggkppp'
EDIT
Using re module when first number is with several digits:
''.join(re.search('(\d+)(\w+)', i).group(2)*int(re.search('(\d+)(\w+)', i).group(1)) if re.search('(\d+)(\w+)', i) else i for i in l)
Example:
In [144]: l = ['12g', '2kd', 'h', '3p']
In [145]: ''.join(re.search('(\d+)(\w+)', i).group(2)*int(re.search('(\d+)(\w+)', i).group(1)) if re.search('(\d+)(\w+)', i) else i for i in l)
Out[145]: 'ggggggggggggkdkdhppp'
EDIT2
For your input like:
st = '%2g_%3k%3p'
You could replace _ with empty string and then add _ to the end if the work from list endswith the _ symbol:
st = '%2g_%3k%3p'
l = list(filter(None, st.split('%')))
''.join((re.search('(\d+)(\w+)', i).group(2)*int(re.search('(\d+)(\w+)', i).group(1))).replace("_", "") + '_' * i.endswith('_') if re.search('(\d+)(\w+)', i) else i for i in l)
Output:
'gg_kkkppp'
EDIT3
Solution without re module but with usual loops working for 2 digits. You could define functions:
def add_str(ind, st):
if not st.endswith('_'):
return st[ind:] * int(st[:ind])
else:
return st[ind:-1] * int(st[:ind]) + '_'
def collect(l):
final_str = ''
for i in l:
if i[0].isdigit():
if i[1].isdigit():
final_str += add_str(2, i)
else:
final_str += add_str(1, i)
else:
final_str += i
return final_str
And then use them as:
l = ['12g_', '3k', '3p']
print(collect(l))
gggggggggggg_kkkppp
One-liner Regex way:
>>> import re
>>> st = '%2g%k%3p'
>>> re.sub(r'%|(\d*)(\w+)', lambda m: int(m.group(1))*m.group(2) if m.group(1) else m.group(2), st)
'ggkppp'
%|(\d*)(\w+) regex matches all % and captures zero or moredigit present before any word character into one group and the following word characters into another group. On replacement all the matched chars should be replaced with the value given in the replacement part. So this should loose % character.
or
>>> re.sub(r'%(\d*)(\w+)', lambda m: int(m.group(1))*m.group(2) if m.group(1) else m.group(2), st)
'ggkppp'
Assumes you are always printing single letter, but preceding number may be longer than single digit in base 10.
seq = ['2g', 'k', '3p']
result = ''.join(int(s[:-1] or 1) * s[-1] for s in seq)
assert result == "ggkppp"
LATE FOR THE SHOW BUT READY TO GO
Another way, is to define your function which converts nC into CCCC...C (ntimes), then pass it to a map to apply it on every element of the list l coming from the split over %, the finally join them all, as follows:
>>> def f(s):
x = 0
if s:
if len(s) == 1:
out = s
else:
for i in s:
if i.isdigit():
x = x*10 + int(i)
out = x*s[-1]
else:
out = ''
return out
>>> st
'%4g%10k%p'
>>> ''.join(map(f, st.split('%')))
'ggggkkkkkkkkkkp'
>>> st = '%2g%k%3p'
>>> ''.join(map(f, st.split('%')))
'ggkppp'
Or if you want to put all of these into one single function definition:
>>> def f(s):
out = ''
if s:
l = filter(None, s.split('%'))
for item in l:
x = 0
if len(item) == 1:
repl = item
else:
for c in item:
if c.isdigit():
x = x*10 + int(c)
repl = x*item[-1]
out += repl
return out
>>> st
'%2g%k%3p'
>>> f(st)
'ggkppp'
>>>
>>> st = '%4g%10k%p'
>>>
>>> f(st)
'ggggkkkkkkkkkkp'
>>> st = '%4g%101k%2p'
>>> f(st)
'ggggkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkpp'
>>> len(f(st))
107
EDIT :
In case of the presence of _ where the OP does not want this character to be repeated, then the best way in my opinion is to go with re.sub, it will make things easier, this way:
>>> def f(s):
pat = re.compile(r'%(\d*)([a-zA-Z]+)')
out = pat.sub(lambda m:int(m.group(1))*m.group(2) if m.group(1) else m.group(2), s)
return out
>>> st = '%4g_%12k%p__%m'
>>> f(st)
'gggg_kkkkkkkkkkkkp__m'
Loop the list, check first entry for number, and then append the second digit onwards:
string=''
l = ['2g', 'k', '3p']
for entry in l:
if len(entry) ==1:
string += (entry)
else:
number = int(entry[0])
for i in range(number):
string += (entry[1:])
I would like to make a alphabetical list for an application similar to an excel worksheet.
A user would input number of cells and I would like to generate list.
For example a user needs 54 cells. Then I would generate
'a','b','c',...,'z','aa','ab','ac',...,'az', 'ba','bb'
I can generate the list from [ref]
from string import ascii_lowercase
L = list(ascii_lowercase)
How do i stitch it together?
A similar question for PHP has been asked here. Does some one have the python equivalent?
Use itertools.product.
from string import ascii_lowercase
import itertools
def iter_all_strings():
for size in itertools.count(1):
for s in itertools.product(ascii_lowercase, repeat=size):
yield "".join(s)
for s in iter_all_strings():
print(s)
if s == 'bb':
break
Result:
a
b
c
d
e
...
y
z
aa
ab
ac
...
ay
az
ba
bb
This has the added benefit of going well beyond two-letter combinations. If you need a million strings, it will happily give you three and four and five letter strings.
Bonus style tip: if you don't like having an explicit break inside the bottom loop, you can use islice to make the loop terminate on its own:
for s in itertools.islice(iter_all_strings(), 54):
print s
You can use a list comprehension.
from string import ascii_lowercase
L = list(ascii_lowercase) + [letter1+letter2 for letter1 in ascii_lowercase for letter2 in ascii_lowercase]
Following #Kevin 's answer :
from string import ascii_lowercase
import itertools
# define the generator itself
def iter_all_strings():
size = 1
while True:
for s in itertools.product(ascii_lowercase, repeat=size):
yield "".join(s)
size +=1
The code below enables one to generate strings, that can be used to generate unique labels for example.
# define the generator handler
gen = iter_all_strings()
def label_gen():
for s in gen:
return s
# call it whenever needed
print label_gen()
print label_gen()
print label_gen()
I've ended up doing my own.
I think it can create any number of letters.
def AA(n, s):
r = n % 26
r = r if r > 0 else 26
n = (n - r) / 26
s = chr(64 + r) + s
if n > 26:
s = AA(n, s)
elif n > 0:
s = chr(64 + n) + s
return s
n = quantity | r = remaining (26 letters A-Z) | s = string
To print the list :
def uprint(nc):
for x in range(1, nc + 1):
print AA(x,'').lower()
Used VBA before convert to python :
Function AA(n, s)
r = n Mod 26
r = IIf(r > 0, r, 26)
n = (n - r) / 26
s = Chr(64 + r) & s
If n > 26 Then
s = AA(n, s)
ElseIf n > 0 Then
s = Chr(64 + n) & s
End If
AA = s
End Function
Using neo's insight on a while loop.
For a given iterable with chars in ascending order. 'abcd...'.
n is the Nth position of the representation starting with 1 as the first position.
def char_label(n, chars):
indexes = []
while n:
residual = n % len(chars)
if residual == 0:
residual = len(chars)
indexes.append(residual)
n = (n - residual)
n = n // len(chars)
indexes.reverse()
label = ''
for i in indexes:
label += chars[i-1]
return label
Later you can print a list of the range n of the 'labels' you need using a for loop:
my_chrs = 'abc'
n = 15
for i in range(1, n+1):
print(char_label(i, my_chrs))
or build a list comprehension etc...
Print the set of xl cell range of lowercase and uppercase charterers
Upper_case:
from string import ascii_uppercase
import itertools
def iter_range_strings(start_colu):
for size in itertools.count(1):
for string in itertools.product(ascii_uppercase, repeat=size):
yield "".join(string)
input_colume_range = ['A', 'B']
input_row_range= [1,2]
for row in iter_range_strings(input_colume_range[0]):
for colum in range(int(input_row_range[0]), int(input_row_range[1]+1)):
print(str(row)+ str(colum))
if row == input_colume_range[1]:
break
Result:
A1
A2
B1
B2
In two lines (plus an import):
from string import ascii_uppercase as ABC
count = 100
ABC+=' '
[(ABC[x[0]] + ABC[x[1]]).strip() for i in range(count) if (x:= divmod(i-26, 26))]
Wrap it in a function/lambda if you need to reuse.
code:
alphabet = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"]
for i in range(len(alphabet)):
for a in range(len(alphabet)):
print(alphabet[i] + alphabet[a])
result:
aa
ab
ac
ad
ae
af
ag
ah
ai
aj
ak
al
am
...