Transform "4CA2CACA" to "CCCCACCACA" - python

I have already tried this (as somebody told me on another question):
import re
def decode(txt):
list = []
for cnt, char in re.findall(r"([\d*])([^d])", txt):
list.extend((char * (int(cnt) if cnt else 1)))
list = "".join(list)
return list
Example:
print(decode("2CACA2CACA3CACACA3CAC"))
This is what I get
CCCCCCCCCC
And this is what I need
CCACACCACACCCACACACCCAC

re.sub can take a named function or lambda as its second argument, and you can use this to accomplish your goal. Using this approach you simply don't do any substitution when a letter does not have a number in front of it.
def decode(s):
return re.sub(r'(\d+)([a-zA-Z])',
lambda m: m.group(2)*int(m.group(1)),
s)
decode("2CACA2CACA3CACACA3CAC")
# 'CCACACCACACCCACACACCCAC'

What you are missing is characters without a digit in front. This will include those:
import re
def decode(txt):
_list = []
for cnt, char, single_char in re.findall(r"(\d)([^\d])|([^\d])", txt):
if single_char:
_list.extend(single_char)
else:
_list.extend((char * (int(cnt) if cnt else 1)))
_list = "".join(_list)
return _list
print(decode("2CACA2CACA3CACACA3CAC"))

You can do this easily with functools and re. 1 line of code does all the work.
import re, functools
#create a partial of the `sub` method and it's `repl` arg
#the `lambda` takes the match, and multiplies the letter by the number
decode = functools.partial(re.compile(r'(\d+)([a-z])', re.I).sub,
lambda m: m.group(2)*int(m.group(1)))
rle = '4CA2CACA2CACACACA'
print(decode(rle)) #CCCCACCACACCACACACA

If you want to do it without any imports, then you could do something like this:
x = '2CACA2CACA3CACACA3CAC0M'
int_string = ""
char_list = []
for char in x:
if char.isnumeric():
int_string += char
continue
else:
if not int_string:
char_list.append(char)
else:
char_list.append(char * int(int_string))
int_string = ""
print("".join(char_list))
This will work for any positive integers, even zero, as you can see in the above example.

Related

Leetcode problem 14. Longest Common Prefix (Python)

I tried to solve the problem (you can read description here: https://leetcode.com/problems/longest-common-prefix/) And the following is code I came up with.
It gives prefix value of the first string in strs list and compares prefix with every string from the list, popping all characters that are not equal.
class Solution:
def longestCommonPrefix(self, strs: List[str]) -> str:
prefix = strs[0][0]
for i in range(len(strs)):
for j in range(len(prefix)):
if strs[i][j] != prefix[j]:
prefix.pop(prefix[j])
return prefix
But this code fails in the very first testcase where strs = ["flower","flow","flight"]
Expected output is "fl", while my code returns just "f"
I am struggling to find what is going wrong in my solution. Maybe you can help?
Iterate over the characters in parallel with zip:
strs = ["flower", "flow", "flight"]
n = 0
for chars in zip(*strs):
if len(set(chars)) > 1:
break
n += 1
# length
print(n) # 2
# prefix
print(strs[0][:n]) # fl
Similar approach as a one-liner using itertools.takewhile:
from itertools import takewhile
prefix = ''.join([x[0] for x in takewhile(lambda x: len(set(x)) == 1, zip(*strs))])
Alternatively you could try to use the lib in os - commonprefix:
(it's available since Python 3.5+)
def longestCommonPrefix(self, strs: List[str]) -> str:
return os.path.commonprefix(strs)
strs = ["flower","flow","flight"]
print(longestCommonPrefix(strs))

Function to remove more than 2 consecutive repetitions of a string not working

Here's my function:
def remove_more_than_two_reps(text):
result = list(text)
for idx,char in enumerate(text):
if(result[:idx].count(char) > 2):
result.remove(char)
return ''.join(result)
expected result:
text = 'teeeexxxxt'
result = remove_more_than_two_reps(text)
>'teexxt'
My function just returns the original string, what is the problem?
Try using append which is O(1) instead of remove which is O(n):
def remove_more_than_two_reps(text: str) -> str:
result = []
for ch in text:
if len(result) < 2 or result[-1] != ch or result[-2] != ch:
result.append(ch)
return ''.join(result)
text = 'teeeexxxxt'
result = remove_more_than_two_reps(text)
print(result)
Output:
teexxt
Another option could be using a pattern, matching 3 or more times the same character (.)\1{2,} and in the replacement use 2 times the captured group value:
import re
def remove_more_than_two_reps(text):
return re.sub(r'(.)\1{2,}', r'\1\1', text)
text = 'teeeexxxxt'
print(remove_more_than_two_reps(text))
Output
teexxt
See a regex demo and a Python demo.
Wanted to share an itertools solution, useful when you have particularly big strings (since it avoids allocating an enormous list):
import itertools as it
def remove_more_than_two_reps(text: str) -> str:
reps_of_at_most_two = (it.islice(reps, 2) for _, reps in it.groupby(text))
return ''.join(it.chain.from_iterable(reps_of_at_most_two))

Python lists and for loops. How do I communicate to the for loop that I intend to work on subsequent items and not the first one only?

I am a newbie in python and I am working on a function that I expect to pass a string like abcd and it outputs something like A-Bb-Ccc-Dddd.
I have created the following.
`
def mumbler(s):
chars = list(s)
mumbled = []
result = []
for char in chars:
caps = char.upper()
num = chars.index(char)
low = char.lower()
mumbled.append( caps+ low*num)
for i in mumbled:
result.append(i+'-')
result = ''.join(result)
return result[:-1]
`
It works for most cases. However, when I pass a string like Abcda. It fails to return the expected output, in this case, A-Bb-Ccc-Dddd-Aaaaa.
How should I go about solving this?
Thank you for taking the time to answer this.
You can do it in a much simpler way using list comprehension and enumerate
>>> s = 'abcd'
>>> '-'.join([c.upper() + c.lower()*i for i,c in enumerate(s)])
'A-Bb-Ccc-Dddd'
If you want to make your own code work, you'll just need to convert the result list to string outside your second for-loop:
def mumbler(s):
chars = list(s)
mumbled = []
result = []
for char in chars:
caps = char.upper()
num = chars.index(char)
low = char.lower()
mumbled.append( caps+ low*num)
for i in mumbled:
result.append(i+'-')
result = ''.join(result)
return result[:-1]
mumbler('Abcda')
'A-Bb-Ccc-Dddd-Aaaaa'
Go for a simple 1-liner - next() on count for maintaining the times to repeat and title() for title-casing:
from itertools import count
s = 'Abcda'
i = count(1)
print('-'.join([(x * next(i)).title() for x in s]))
# A-Bb-Ccc-Dddd-Aaaaa

Return a string from another string based off number and order of specific substrings

Suppose I have a string such as
s = "left-left-right-right-left"
and an empty string n = ''
and going from left to right for that string, read the number of lefts and rights that appear, and add an 'a' for every left and 'b' for every right that appears.
In other words a function like
def convert(s):
would return 'aabba'
I'm thinking along the lines of s.count, but the b's need to be between the a's, and count doesn't tell you where an occurrence of a substring happens.
The easiest way is to replace left by a and right by b. it should work
s = "left-left-right-right-left"
s = s.replace("left","a")
s=s.replace("right","b")
s=s.replace("-","")
Simplest solution.
def convert(s):
s = s.replace("left", "a")
s = s.replace("right", "b")
s = s.replace("-", "")
return s
OR
def convert(s):
return s.replace("left", "a").replace("right", "b").replace("-", "")
I've tried an recursive solution.
def rec_search(s, n):
if len(s) is 0:
return n
if s[-len('left'):] == 'left':
return rec_search(s[:-len('-left')], n) + 'a'
return rec_search(s[:-len('-right')], n) + 'b'
print rec_search('left-left-right-right-left', '')
This could also be done using a regular expression sub() as follows:
import re
s = "left-left-right-right-left"
print re.sub('left|right|-', lambda x: {'left':'a', 'right':'b', '-':''}[x.group(0)], s)
Giving you:
aabba
It works by replacing any left right or | with a function that looks up the replacement text in a dictionary.

Check and remove particular char from string in python

I'm in a situation where I have a string and a special symbol that is consecutively repeating, such as:
s = 'a.b.c...d..e.g'
How can I check whether it is repeating or not and remove consecutive symbols, resulting in this:
s = 'a.b.c.d.e.g'
import re
result = re.sub(r'\.{2,}', '.', 'a.b.c...d..e.g')
A bit more generalized version:
import re
symbol = '.'
regex_pattern_to_replace = re.escape(symbol)+'{2,}'
# Note that escape sequences are processed in replace_to
# but this time we have no backslash characters in it.
# In case of more complex replacement we could use
# replace_to = replace_to.replace('\\', '\\\\')
# to defend against occasional escape sequences.
replace_to = symbol
result = re.sub(regex_pattern_to_replace, replace_to, 'a.b.c...d..e.g')
The same with compiled regex (added after Cristian Ciupitu's comment):
compiled_regex = re.compile(regex_pattern_to_replace)
# You can store the compiled_regex and reuse it multiple times.
result = compiled_regex.sub(replace_to, 'a.b.c...d..e.g')
Check out the docs for re.sub
Simple and clear:
>>> a = 'a.b.c...d..e.g'
>>> while '..' in a:
a = a.replace('..','.')
>>> a
'a.b.c.d.e.g'
Lot's of answers so why not throw another one into the mix.
You can zip the string with itself off by one and eliminate all matching '.'s:
''.join(x[0] for x in zip(s, s[1:]+' ') if x != ('.', '.'))
Certainly not the fastest, just interesting. It's trivial to turn this into eliminating all repeating elements:
''.join(a for a,b in zip(s, s[1:]+' ') if a != b)
Note: you can use izip_longest (py2) or zip_longest (py3) if ' ' as a filler causes an issue.
My previous answer was a dud so here's another attempt using reduce(). This is reasonably efficient with O(n) time complexity:
def remove_consecutive(s, symbol='.'):
def _remover(x, y):
if y == symbol and x[-1:] == y:
return x
else:
return x + y
return reduce(_remover, s, '')
for s in 'abcdefg', '.a.', '..aa..', '..aa...b...c.d.e.f.g.....', '.', '..', '...', '':
print remove_consecutive(s)
Output
abcdefg
.a.
.aa.
.aa.b.c.d.e.f.g.
.
.
.
Kind of complicated, but it works and it's being done in a single loop:
import itertools
def remove_consecutive(s, c='.'):
return ''.join(
itertools.chain.from_iterable(
c if k else g
for k, g in itertools.groupby(s, c.__eq__)
)
)

Categories

Resources