Python find and replace upon condition / with a function - python

String = n76a+q80a+l83a+i153a+l203f+r207a+s211a+s215w+f216a+e283l
I want the script to look at a pair at a time meaning:
evaluate n76a+q80a. if abs(76-80) < 10, then replace '+' with a '_':
else don't change anything.
Then evaluate q80a+l83a next and do the same thing.
The desired output should be:
n76a_q80a_l83a+i153a+l203f_r207a_s211a_s215w_f216a+e283l
What i tried is,
def aa_dist(x):
if abs(int(x[1:3]) - int(x[6:8])) < 10:
print re.sub(r'\+', '_', x)
with open(input_file, 'r') as alex:
oligos_list = alex.read()
aa_dist(oligos_list)
This is what I have up to this point. I know that my code will just replace all '+' into '_' because it only evaluates the first pair and and replace all. How should I do this?

import itertools,re
my_string = "n76a+q80a+l83a+i153a+l203f+r207a+s211a+s215w+f216a+e283l"
#first extract the numbers
my_numbers = map(int,re.findall("[0-9]+",my_string))
#split the string on + (useless comment)
parts = my_string.split("+")
def get_filler((a,b)):
'''this method decides on the joiner'''
return "_" if abs(a-b) < 10 else '+'
fillers = map(get_filler,zip(my_numbers,my_numbers[1:])) #figure out what fillers we need
print "".join(itertools.chain.from_iterable(zip(parts,fillers)))+parts[-1] #it will always skip the last part so gotta add it
is one way you might accomplish this... and is also an example of worthless comments

Through re module only.
>>> s = 'n76a+q80a+l83a+i153a+l203f+r207a+s211a+s215w+f216a+e283l'
>>> m = re.findall(r'(?=\b([^+]+\+[^+]+))', s) # This regex would helps to do a overlapping match. See the demo (https://regex101.com/r/jO6zT2/13)
>>> m
['n76a+q80a', 'q80a+l83a', 'l83a+i153a', 'i153a+l203f', 'l203f+r207a', 'r207a+s211a', 's211a+s215w', 's215w+f216a', 'f216a+e283l']
>>> l = []
>>> for i in m:
if abs(int(re.search(r'^\D*(\d+)', i).group(1)) - int(re.search(r'^\D*\d+\D*(\d+)', i).group(1))) < 10:
l.append(i.replace('+', '_'))
else:
l.append(i)
>>> re.sub(r'([a-z0-9]+)\1', r'\1',''.join(l))
'n76a_q80a_l83a+i153a+l203f_r207a_s211a_s215w_f216a+e283l'
By defining a separate function.
import re
def aa_dist(x):
l = []
m = re.findall(r'(?=\b([^+]+\+[^+]+))', x)
for i in m:
if abs(int(re.search(r'^\D*(\d+)', i).group(1)) - int(re.search(r'^\D*\d+\D*(\d+)', i).group(1))) < 10:
l.append(i.replace('+', '_'))
else:
l.append(i)
return re.sub(r'([a-z0-9]+)\1', r'\1',''.join(l))
string = 'n76a+q80a+l83a+i153a+l203f+r207a+s211a+s215w+f216a+e283l'
print aa_dist(string)
Output:
n76a_q80a_l83a+i153a+l203f_r207a_s211a_s215w_f216a+e283l

Related

How to change uppercase & lowercase alternatively in a string?

I want to create a new string from a given string with alternate uppercase and lowercase.
I have tried iterating over the string and changing first to uppercase into a new string and then to lower case into another new string again.
def myfunc(x):
even = x.upper()
lst = list(even)
for itemno in lst:
if (itemno % 2) !=0:
even1=lst[1::2].lowercase()
itemno=itemno+1
even2=str(even1)
print(even2)
Since I cant change the given string I need a good way of creating a new string alternate caps.
Here's a onliner
"".join([x.upper() if i%2 else x.lower() for i,x in enumerate(mystring)])
You can simply randomly choose for each letter in the old string if you should lowercase or uppercase it, like this:
import random
def myfunc2(old):
new = ''
for c in old:
lower = random.randint(0, 1)
if lower:
new += c.lower()
else:
new += c.upper()
return new
Here's one that returns a new string using with alternate caps:
def myfunc(x):
seq = []
for i, v in enumerate(x):
seq.append(v.upper() if i % 2 == 0 else v.lower())
return ''.join(seq)
This does the job also
def foo(input_message):
c = 0
output_message = ""
for m in input_message:
if (c%2==0):
output_message = output_message + m.lower()
else:
output_message = output_message + m.upper()
c = c + 1
return output_message
Here's a solution using itertools which utilizes string slicing:
from itertools import chain, zip_longest
x = 'inputstring'
zipper = zip_longest(x[::2].lower(), x[1::2].upper(), fillvalue='')
res = ''.join(chain.from_iterable(zipper))
# 'iNpUtStRiNg'
Using a string slicing:
from itertools import zip_longest
s = 'example'
new_s = ''.join(x.upper() + y.lower()
for x, y in zip_longest(s[::2], s[1::2], fillvalue=''))
# ExAmPlE
Using an iterator:
s_iter = iter(s)
new_s = ''.join(x.upper() + y.lower()
for x, y in zip_longest(s_iter, s_iter, fillvalue=''))
# ExAmPlE
Using the function reduce():
def func(x, y):
if x[-1].islower():
return x + y.upper()
else:
return x + y.lower()
new_s = reduce(func, s) # eXaMpLe
This code also returns alternative caps string:-
def alternative_strings(strings):
for i,x in enumerate(strings):
if i % 2 == 0:
print(x.upper(), end="")
else:
print(x.lower(), end= "")
return ''
print(alternative_strings("Testing String"))
def myfunc(string):
# Un-hash print statements to watch python build out the string.
# Script is an elementary example of using an enumerate function.
# An enumerate function tracks an index integer and its associated value as it moves along the string.
# In this example we use arithmetic to determine odd and even index counts, then modify the associated variable.
# After modifying the upper/lower case of the character, it starts adding the string back together.
# The end of the function then returns back with the new modified string.
#print(string)
retval = ''
for space, letter in enumerate(string):
if space %2==0:
retval = retval + letter.upper()
#print(retval)
else:
retval = retval + letter.lower()
#print(retval)
print(retval)
return retval
myfunc('Thisisanamazingscript')

Python Find Word in Text [duplicate]

Python has string.find() and string.rfind() to get the index of a substring in a string.
I'm wondering whether there is something like string.find_all() which can return all found indexes (not only the first from the beginning or the first from the end).
For example:
string = "test test test test"
print string.find('test') # 0
print string.rfind('test') # 15
#this is the goal
print string.find_all('test') # [0,5,10,15]
For counting the occurrences, see Count number of occurrences of a substring in a string.
There is no simple built-in string function that does what you're looking for, but you could use the more powerful regular expressions:
import re
[m.start() for m in re.finditer('test', 'test test test test')]
#[0, 5, 10, 15]
If you want to find overlapping matches, lookahead will do that:
[m.start() for m in re.finditer('(?=tt)', 'ttt')]
#[0, 1]
If you want a reverse find-all without overlaps, you can combine positive and negative lookahead into an expression like this:
search = 'tt'
[m.start() for m in re.finditer('(?=%s)(?!.{1,%d}%s)' % (search, len(search)-1, search), 'ttt')]
#[1]
re.finditer returns a generator, so you could change the [] in the above to () to get a generator instead of a list which will be more efficient if you're only iterating through the results once.
>>> help(str.find)
Help on method_descriptor:
find(...)
S.find(sub [,start [,end]]) -> int
Thus, we can build it ourselves:
def find_all(a_str, sub):
start = 0
while True:
start = a_str.find(sub, start)
if start == -1: return
yield start
start += len(sub) # use start += 1 to find overlapping matches
list(find_all('spam spam spam spam', 'spam')) # [0, 5, 10, 15]
No temporary strings or regexes required.
Here's a (very inefficient) way to get all (i.e. even overlapping) matches:
>>> string = "test test test test"
>>> [i for i in range(len(string)) if string.startswith('test', i)]
[0, 5, 10, 15]
Use re.finditer:
import re
sentence = input("Give me a sentence ")
word = input("What word would you like to find ")
for match in re.finditer(word, sentence):
print (match.start(), match.end())
For word = "this" and sentence = "this is a sentence this this" this will yield the output:
(0, 4)
(19, 23)
(24, 28)
Again, old thread, but here's my solution using a generator and plain str.find.
def findall(p, s):
'''Yields all the positions of
the pattern p in the string s.'''
i = s.find(p)
while i != -1:
yield i
i = s.find(p, i+1)
Example
x = 'banananassantana'
[(i, x[i:i+2]) for i in findall('na', x)]
returns
[(2, 'na'), (4, 'na'), (6, 'na'), (14, 'na')]
You can use re.finditer() for non-overlapping matches.
>>> import re
>>> aString = 'this is a string where the substring "is" is repeated several times'
>>> print [(a.start(), a.end()) for a in list(re.finditer('is', aString))]
[(2, 4), (5, 7), (38, 40), (42, 44)]
but won't work for:
In [1]: aString="ababa"
In [2]: print [(a.start(), a.end()) for a in list(re.finditer('aba', aString))]
Output: [(0, 3)]
Come, let us recurse together.
def locations_of_substring(string, substring):
"""Return a list of locations of a substring."""
substring_length = len(substring)
def recurse(locations_found, start):
location = string.find(substring, start)
if location != -1:
return recurse(locations_found + [location], location+substring_length)
else:
return locations_found
return recurse([], 0)
print(locations_of_substring('this is a test for finding this and this', 'this'))
# prints [0, 27, 36]
No need for regular expressions this way.
If you're just looking for a single character, this would work:
string = "dooobiedoobiedoobie"
match = 'o'
reduce(lambda count, char: count + 1 if char == match else count, string, 0)
# produces 7
Also,
string = "test test test test"
match = "test"
len(string.split(match)) - 1
# produces 4
My hunch is that neither of these (especially #2) is terribly performant.
this is an old thread but i got interested and wanted to share my solution.
def find_all(a_string, sub):
result = []
k = 0
while k < len(a_string):
k = a_string.find(sub, k)
if k == -1:
return result
else:
result.append(k)
k += 1 #change to k += len(sub) to not search overlapping results
return result
It should return a list of positions where the substring was found.
Please comment if you see an error or room for improvment.
This does the trick for me using re.finditer
import re
text = 'This is sample text to test if this pythonic '\
'program can serve as an indexing platform for '\
'finding words in a paragraph. It can give '\
'values as to where the word is located with the '\
'different examples as stated'
# find all occurances of the word 'as' in the above text
find_the_word = re.finditer('as', text)
for match in find_the_word:
print('start {}, end {}, search string \'{}\''.
format(match.start(), match.end(), match.group()))
This thread is a little old but this worked for me:
numberString = "onetwothreefourfivesixseveneightninefiveten"
testString = "five"
marker = 0
while marker < len(numberString):
try:
print(numberString.index("five",marker))
marker = numberString.index("five", marker) + 1
except ValueError:
print("String not found")
marker = len(numberString)
You can try :
>>> string = "test test test test"
>>> for index,value in enumerate(string):
if string[index:index+(len("test"))] == "test":
print index
0
5
10
15
You can try :
import re
str1 = "This dress looks good; you have good taste in clothes."
substr = "good"
result = [_.start() for _ in re.finditer(substr, str1)]
# result = [17, 32]
When looking for a large amount of key words in a document, use flashtext
from flashtext import KeywordProcessor
words = ['test', 'exam', 'quiz']
txt = 'this is a test'
kwp = KeywordProcessor()
kwp.add_keywords_from_list(words)
result = kwp.extract_keywords(txt, span_info=True)
Flashtext runs faster than regex on large list of search words.
This function does not look at all positions inside the string, it does not waste compute resources. My try:
def findAll(string,word):
all_positions=[]
next_pos=-1
while True:
next_pos=string.find(word,next_pos+1)
if(next_pos<0):
break
all_positions.append(next_pos)
return all_positions
to use it call it like this:
result=findAll('this word is a big word man how many words are there?','word')
src = input() # we will find substring in this string
sub = input() # substring
res = []
pos = src.find(sub)
while pos != -1:
res.append(pos)
pos = src.find(sub, pos + 1)
Whatever the solutions provided by others are completely based on the available method find() or any available methods.
What is the core basic algorithm to find all the occurrences of a
substring in a string?
def find_all(string,substring):
"""
Function: Returning all the index of substring in a string
Arguments: String and the search string
Return:Returning a list
"""
length = len(substring)
c=0
indexes = []
while c < len(string):
if string[c:c+length] == substring:
indexes.append(c)
c=c+1
return indexes
You can also inherit str class to new class and can use this function
below.
class newstr(str):
def find_all(string,substring):
"""
Function: Returning all the index of substring in a string
Arguments: String and the search string
Return:Returning a list
"""
length = len(substring)
c=0
indexes = []
while c < len(string):
if string[c:c+length] == substring:
indexes.append(c)
c=c+1
return indexes
Calling the method
newstr.find_all('Do you find this answer helpful? then upvote
this!','this')
This is solution of a similar question from hackerrank. I hope this could help you.
import re
a = input()
b = input()
if b not in a:
print((-1,-1))
else:
#create two list as
start_indc = [m.start() for m in re.finditer('(?=' + b + ')', a)]
for i in range(len(start_indc)):
print((start_indc[i], start_indc[i]+len(b)-1))
Output:
aaadaa
aa
(0, 1)
(1, 2)
(4, 5)
Here's a solution that I came up with, using assignment expression (new feature since Python 3.8):
string = "test test test test"
phrase = "test"
start = -1
result = [(start := string.find(phrase, start + 1)) for _ in range(string.count(phrase))]
Output:
[0, 5, 10, 15]
I think the most clean way of solution is without libraries and yields:
def find_all_occurrences(string, sub):
index_of_occurrences = []
current_index = 0
while True:
current_index = string.find(sub, current_index)
if current_index == -1:
return index_of_occurrences
else:
index_of_occurrences.append(current_index)
current_index += len(sub)
find_all_occurrences(string, substr)
Note: find() method returns -1 when it can't find anything
The pythonic way would be:
mystring = 'Hello World, this should work!'
find_all = lambda c,s: [x for x in range(c.find(s), len(c)) if c[x] == s]
# s represents the search string
# c represents the character string
find_all(mystring,'o') # will return all positions of 'o'
[4, 7, 20, 26]
>>>
if you only want to use numpy here is a solution
import numpy as np
S= "test test test test"
S2 = 'test'
inds = np.cumsum([len(k)+len(S2) for k in S.split(S2)[:-1]])- len(S2)
print(inds)
if you want to use without re(regex) then:
find_all = lambda _str,_w : [ i for i in range(len(_str)) if _str.startswith(_w,i) ]
string = "test test test test"
print( find_all(string, 'test') ) # >>> [0, 5, 10, 15]
please look at below code
#!/usr/bin/env python
# coding:utf-8
'''黄哥Python'''
def get_substring_indices(text, s):
result = [i for i in range(len(text)) if text.startswith(s, i)]
return result
if __name__ == '__main__':
text = "How much wood would a wood chuck chuck if a wood chuck could chuck wood?"
s = 'wood'
print get_substring_indices(text, s)
def find_index(string, let):
enumerated = [place for place, letter in enumerate(string) if letter == let]
return enumerated
for example :
find_index("hey doode find d", "d")
returns:
[4, 7, 13, 15]
Not exactly what OP asked but you could also use the split function to get a list of where all the substrings don't occur. OP didn't specify the end goal of the code but if your goal is to remove the substrings anyways then this could be a simple one-liner. There are probably more efficient ways to do this with larger strings; regular expressions would be preferable in that case
# Extract all non-substrings
s = "an-example-string"
s_no_dash = s.split('-')
# >>> s_no_dash
# ['an', 'example', 'string']
# Or extract and join them into a sentence
s_no_dash2 = ' '.join(s.split('-'))
# >>> s_no_dash2
# 'an example string'
Did a brief skim of other answers so apologies if this is already up there.
def count_substring(string, sub_string):
c=0
for i in range(0,len(string)-2):
if string[i:i+len(sub_string)] == sub_string:
c+=1
return c
if __name__ == '__main__':
string = input().strip()
sub_string = input().strip()
count = count_substring(string, sub_string)
print(count)
I runned in the same problem and did this:
hw = 'Hello oh World!'
list_hw = list(hw)
o_in_hw = []
while True:
o = hw.find('o')
if o != -1:
o_in_hw.append(o)
list_hw[o] = ' '
hw = ''.join(list_hw)
else:
print(o_in_hw)
break
Im pretty new at coding so you can probably simplify it (and if planned to used continuously of course make it a function).
All and all it works as intended for what i was doing.
Edit: Please consider this is for single characters only, and it will change your variable, so you have to create a copy of the string in a new variable to save it, i didnt put it in the code cause its easy and its only to show how i made it work.
By slicing we find all the combinations possible and append them in a list and find the number of times it occurs using count function
s=input()
n=len(s)
l=[]
f=input()
print(s[0])
for i in range(0,n):
for j in range(1,n+1):
l.append(s[i:j])
if f in l:
print(l.count(f))
To find all the occurence of a character in a give string and return as a dictionary
eg: hello
result :
{'h':1, 'e':1, 'l':2, 'o':1}
def count(string):
result = {}
if(string):
for i in string:
result[i] = string.count(i)
return result
return {}
or else you do like this
from collections import Counter
def count(string):
return Counter(string)

Add a start index to a string index generator

I'm currently learning to create generators and to use itertools. So I decided to make a string index generator, but I'd like to add some parameters such as a "start index" allowing to define where to start generating the indexes.
I came up with this ugly solution which can be very long and not efficient with large indexes:
import itertools
import string
class StringIndex(object):
'''
Generator that create string indexes in form:
A, B, C ... Z, AA, AB, AC ... ZZ, AAA, AAB, etc.
Arguments:
- startIndex = string; default = ''; start increment for the generator.
- mode = 'lower' or 'upper'; default = 'upper'; is the output index in
lower or upper case.
'''
def __init__(self, startIndex = '', mode = 'upper'):
if mode == 'lower':
self.letters = string.ascii_lowercase
elif mode == 'upper':
self.letters = string.ascii_uppercase
else:
cmds.error ('Wrong output mode, expected "lower" or "upper", ' +
'got {}'.format(mode))
if startIndex != '':
if not all(i in self.letters for i in startIndex):
cmds.error ('Illegal characters in start index; allowed ' +
'characters are: {}'.format(self.letters))
self.startIndex = startIndex
def getIndex(self):
'''
Returns:
- string; current string index
'''
startIndexOk = False
x = 1
while True:
strIdMaker = itertools.product(self.letters, repeat = x)
for stringList in strIdMaker:
index = ''.join([s for s in stringList])
# Here is the part to simpify
if self.startIndex:
if index == self.startIndex:
startIndexOk = True
if not startIndexOk:
continue
###
yield index
x += 1
Any advice or improvement is welcome. Thank you!
EDIT:
The start index must be a string!
You would have to do the arithmetic (in base 26) yourself to avoid looping over itertools.product. But you can at least set x=len(self.startIndex) or 1!
Old (incorrect) answer
If you would do it without itertools (assuming you start with a single letter), you could do the following:
letters = 'abcdefghijklmnopqrstuvwxyz'
def getIndex(start, case):
lets = list(letters.lower()) if case == 'lower' else list(letters.upper())
# default is 'upper', but can also be an elif
for r in xrange(0,10):
for l in lets[start:]:
if l.lower() == 'z':
start = 0
yield ''.join(lets[:r])+l
I run until max 10 rows of letters are created, but you could ofcourse use an infinite while loop such that it can be called forever.
Correct answer
I found the solution in a different way: I used a base 26 number translator (based on (and fixxed since it didn't work perfectly): http://quora.com/How-do-I-write-a-program-in-Python-that-can-convert-an-integer-from-one-base-to-another)
I uses itertools.count() to count and just loops over all the possibilities.
The code:
import time
from itertools import count
def toAlph(x, letters):
div = 26
r = '' if x > 0 else letters[0]
while x > 0:
r = letters[x % div] + r
if (x // div == 1) and (x % div == 0):
r = letters[0] + r
break
else:
x //= div
return r
def getIndex(start, case='upper'):
alphabet = 'abcdefghijklmnopqrstuvwxyz'
letters = alphabet.upper() if case == 'upper' else alphabet
started = False
for num in count(0,1):
l = toAlph(num, letters)
if l == start:
started = True
if started:
yield l
iterator = getIndex('AA')
for i in iterator:
print(i)
time.sleep(0.1)

Python: Count character in string which are following each other

I have a string in which I want to count the occurrences of # following each other to replace them by numbers to create a increment.
For example:
rawString = 'MyString1_test##_edit####'
for x in xrange(5):
output = doConvertMyString(rawString)
print output
MyString1_test01_edit0001
MyString1_test02_edit0002
MyString1_test03_edit0003
MyString1_test04_edit0004
MyString1_test05_edit0005
Assuming that the number of # is not fixed and that rawString is a user input containing only string.ascii_letters + string.digits + '_' + '#, how can I do that?
Here is my test so far:
rawString = 'MyString1_test##_edit####'
incrDatas = {}
key = '#'
counter = 1
for x in xrange(len(rawString)):
if rawString[x] != key:
counter = 1
continue
else:
if x > 0:
if rawString[x - 1] == key:
counter += 1
else:
pass
# ???
You may use zfill in the re.sub replacement to pad any amount of # chunks. #+ regex pattern matches 1 or more # symbols. The m.group() stands for the match the regex found, and thus, we replace all #s with the incremented x converted to string padded with the same amount of 0s as there are # in the match.
import re
rawString = 'MyString1_test##_edit####'
for x in xrange(5):
output = re.sub(r"#+", lambda m: str(x+1).zfill(len(m.group())), rawString)
print output
Result of the demo:
MyString1_test01_edit0001
MyString1_test02_edit0002
MyString1_test03_edit0003
MyString1_test04_edit0004
MyString1_test05_edit0005
The code below converts the rawString to a format string, using groupby in a list comprehension to find groups of hashes. Each run of hashes is converted into a format directive to print a zero-padded integer of the appropriate width, runs of non-hashes are simply joined back together.
This code works on Python 2.6 and later.
from itertools import groupby
def convert(template):
return ''.join(['{{x:0{0}d}}'.format(len(list(g))) if k else ''.join(g)
for k, g in groupby(template, lambda c: c == '#')])
rawString = 'MyString1_test##_edit####'
fmt = convert(rawString)
print(repr(fmt))
for x in range(5):
print(fmt.format(x=x))
output
'MyString1_test{x:02d}_edit{x:04d}'
MyString1_test00_edit0000
MyString1_test01_edit0001
MyString1_test02_edit0002
MyString1_test03_edit0003
MyString1_test04_edit0004
How about this-
rawString = 'MyString1_test##_edit####'
splitString = rawString.split('_')
for i in xrange(10): # you may put any count
print '%s_%s%02d_%s%04d' % (splitString[0], splitString[1][0:4], i, splitString[2][0:4], i, )
You can try this naive (and probably not most efficient) solution. It assumes that the number of '#' is fixed.
rawString = 'MyString1_test##_edit####'
for i in range(1, 6):
temp = rawString.replace('####', str(i).zfill(4)).replace('##', str(i).zfill(2))
print(temp)
>> MyString1_test01_edit0001
MyString1_test02_edit0002
MyString1_test03_edit0003
MyString1_test04_edit0004
MyString1_test05_edit0005
test_string = 'MyString1_test##_edit####'
def count_hash(raw_string):
str_list = list(raw_string)
hash_count = str_list.count("#") + 1
for num in xrange(1, hash_count):
new_string = raw_string.replace("####", "000" + str(num))
new_string = new_string.replace("##", "0" + str(num))
print new_string
count_hash(test_string)
It's a bit clunky, and only works for # counts of less than 10, but seems to do what you want.
EDIT: By "only works" I mean that you'll get extra characters with the fixed number of # symbols inserted
EDIT2: amended code

Print letters in specific pattern in Python

I have the follwing string and I split it:
>>> st = '%2g%k%3p'
>>> l = filter(None, st.split('%'))
>>> print l
['2g', 'k', '3p']
Now I want to print the g letter two times, the k letter one time and the p letter three times:
ggkppp
How is it possible?
You could use generator with isdigit() to check wheter your first symbol is digit or not and then return following string with appropriate count. Then you could use join to get your output:
''.join(i[1:]*int(i[0]) if i[0].isdigit() else i for i in l)
Demonstration:
In [70]: [i[1:]*int(i[0]) if i[0].isdigit() else i for i in l ]
Out[70]: ['gg', 'k', 'ppp']
In [71]: ''.join(i[1:]*int(i[0]) if i[0].isdigit() else i for i in l)
Out[71]: 'ggkppp'
EDIT
Using re module when first number is with several digits:
''.join(re.search('(\d+)(\w+)', i).group(2)*int(re.search('(\d+)(\w+)', i).group(1)) if re.search('(\d+)(\w+)', i) else i for i in l)
Example:
In [144]: l = ['12g', '2kd', 'h', '3p']
In [145]: ''.join(re.search('(\d+)(\w+)', i).group(2)*int(re.search('(\d+)(\w+)', i).group(1)) if re.search('(\d+)(\w+)', i) else i for i in l)
Out[145]: 'ggggggggggggkdkdhppp'
EDIT2
For your input like:
st = '%2g_%3k%3p'
You could replace _ with empty string and then add _ to the end if the work from list endswith the _ symbol:
st = '%2g_%3k%3p'
l = list(filter(None, st.split('%')))
''.join((re.search('(\d+)(\w+)', i).group(2)*int(re.search('(\d+)(\w+)', i).group(1))).replace("_", "") + '_' * i.endswith('_') if re.search('(\d+)(\w+)', i) else i for i in l)
Output:
'gg_kkkppp'
EDIT3
Solution without re module but with usual loops working for 2 digits. You could define functions:
def add_str(ind, st):
if not st.endswith('_'):
return st[ind:] * int(st[:ind])
else:
return st[ind:-1] * int(st[:ind]) + '_'
def collect(l):
final_str = ''
for i in l:
if i[0].isdigit():
if i[1].isdigit():
final_str += add_str(2, i)
else:
final_str += add_str(1, i)
else:
final_str += i
return final_str
And then use them as:
l = ['12g_', '3k', '3p']
print(collect(l))
gggggggggggg_kkkppp
One-liner Regex way:
>>> import re
>>> st = '%2g%k%3p'
>>> re.sub(r'%|(\d*)(\w+)', lambda m: int(m.group(1))*m.group(2) if m.group(1) else m.group(2), st)
'ggkppp'
%|(\d*)(\w+) regex matches all % and captures zero or moredigit present before any word character into one group and the following word characters into another group. On replacement all the matched chars should be replaced with the value given in the replacement part. So this should loose % character.
or
>>> re.sub(r'%(\d*)(\w+)', lambda m: int(m.group(1))*m.group(2) if m.group(1) else m.group(2), st)
'ggkppp'
Assumes you are always printing single letter, but preceding number may be longer than single digit in base 10.
seq = ['2g', 'k', '3p']
result = ''.join(int(s[:-1] or 1) * s[-1] for s in seq)
assert result == "ggkppp"
LATE FOR THE SHOW BUT READY TO GO
Another way, is to define your function which converts nC into CCCC...C (ntimes), then pass it to a map to apply it on every element of the list l coming from the split over %, the finally join them all, as follows:
>>> def f(s):
x = 0
if s:
if len(s) == 1:
out = s
else:
for i in s:
if i.isdigit():
x = x*10 + int(i)
out = x*s[-1]
else:
out = ''
return out
>>> st
'%4g%10k%p'
>>> ''.join(map(f, st.split('%')))
'ggggkkkkkkkkkkp'
>>> st = '%2g%k%3p'
>>> ''.join(map(f, st.split('%')))
'ggkppp'
Or if you want to put all of these into one single function definition:
>>> def f(s):
out = ''
if s:
l = filter(None, s.split('%'))
for item in l:
x = 0
if len(item) == 1:
repl = item
else:
for c in item:
if c.isdigit():
x = x*10 + int(c)
repl = x*item[-1]
out += repl
return out
>>> st
'%2g%k%3p'
>>> f(st)
'ggkppp'
>>>
>>> st = '%4g%10k%p'
>>>
>>> f(st)
'ggggkkkkkkkkkkp'
>>> st = '%4g%101k%2p'
>>> f(st)
'ggggkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkpp'
>>> len(f(st))
107
EDIT :
In case of the presence of _ where the OP does not want this character to be repeated, then the best way in my opinion is to go with re.sub, it will make things easier, this way:
>>> def f(s):
pat = re.compile(r'%(\d*)([a-zA-Z]+)')
out = pat.sub(lambda m:int(m.group(1))*m.group(2) if m.group(1) else m.group(2), s)
return out
>>> st = '%4g_%12k%p__%m'
>>> f(st)
'gggg_kkkkkkkkkkkkp__m'
Loop the list, check first entry for number, and then append the second digit onwards:
string=''
l = ['2g', 'k', '3p']
for entry in l:
if len(entry) ==1:
string += (entry)
else:
number = int(entry[0])
for i in range(number):
string += (entry[1:])

Categories

Resources