case sensitive string replacement in Python - python

I need to replace a string in case sensitive way. For example
abc -> def
Abc -> Def
aBc -> dEf
abC -> deF
What can I do this with Python?

from string import maketrans
"Abc".translate(maketrans("abcABC", "defDEF"))

Expanding on Mark Byers' answer, Here's a solution which works for replacement text of any length.
The trick is to send a function to re.sub().
import re
def case_sensitive_replace(string, old, new):
""" replace occurrences of old with new, within string
replacements will match the case of the text it replaces
"""
def repl(match):
current = match.group()
result = ''
all_upper=True
for i,c in enumerate(current):
if i >= len(new):
break
if c.isupper():
result += new[i].upper()
else:
result += new[i].lower()
all_upper=False
#append any remaining characters from new
if all_upper:
result += new[i+1:].upper()
else:
result += new[i+1:].lower()
return result
regex = re.compile(re.escape(old), re.I)
return regex.sub(repl, string)
print case_sensitive_replace("abc Abc aBc abC ABC",'abc','de')
print case_sensitive_replace("abc Abc aBc abC ABC",'abc','def')
print case_sensitive_replace("abc Abc aBc abC ABC",'abc','defg')
Result:
de De dE de DE
def Def dEf deF DEF
defg Defg dEfg deFg DEFG

Here's a method using regular expressions. The key point is that when it finds a match it first modifies the replacement string to match the casing of the matched string. This works because re.sub can take a function as a replacement instead of just a string.
import re
def case_sensitive_replace(s, before, after):
regex = re.compile(re.escape(before), re.I)
return regex.sub(lambda x: ''.join(d.upper() if c.isupper() else d.lower()
for c,d in zip(x.group(), after)), s)
test = '''
abc -> def
Abc -> Def
aBc -> dEf
abC -> deF
'''
result = case_sensitive_replace(a, 'abc', 'def')
print(result)
Result:
def -> def
Def -> Def
dEf -> dEf
deF -> deF

Long time lurker, thought I'd post a suggestion here as some of these seem fairly convoluted.
print map(lambda a, b: b.lower() if a.islower() else b.upper(), "aBc", "def")
It does assume both strings are the same length, however you could easily replace the lambda with a proper function and check for None on the first input.

Not the most efficient way, and it's very crude, but probably something like this could work:
def case_insensitive_replace(string, old, new):
upper_indices = [idx for idx, char in enumerate(string) if char.isupper()]
replaced = list(string.lower().replace(old.lower(), new.lower()))
for idx in upper_indices:
replaced[idx] = replaced[idx].upper()
return "".join(replaced)

I understand that You want to change the second string case according to the first string.
Am i right? So, my solution is the following. String s2 change its case according to the corresponding string s1. The result is storing in s3. One assumption here is the two strings has the same length.
s1 = "AaBb"
s2 = "cdef"
s3 = ""
index = 0
length = len(s1)
while(True):
if s1[index].isupper():
temp = s2[index].upper()
else:
temp = s2[index].lower()
s3 = s3 + temp
index +=1
if index == length:
break
print s3

This will work, although you probably want to add some checks that the string lengths are the same:
string1 = "AbcDEFghiJKLmnO"
string2 = "some other text"
string2 = "".join((string2[i].upper() if string1[i].isupper() else string2[i].lower()
for i in range(len(string1))))

Related

How to interleave items from 2 lists backward

Word Problem:
Create a function to interleave the letters of two strings (starting with the first string from right to left) and return the resultant string.
def interleave(s1: str, s2: str) -> str:
I was able to solve this word problem but I need help putting it in a function
def interleave(s1: str, s2: str) -> str:
string1 = s1[::-1]
string2 = s2[::-1]
for i in range(len(string1)):
print(string2[i] + string1[i], end = "")
return print
print(interleave("1234", "5678"))
I can't tell from your question what should happen when the strings are unequal in length. My basic solution would be
def interleave(str1, str2):
return ''.join(c1 + c2 for c1, c2 in zip(str1, str2))
but this will stop with the shortest of the two input strings.
Right now, the function prints the results. Instead store the results in a variable, which you return when done.
Like so:
#!/usr/bin/python3
def interleave(s1: str, s2: str) -> str:
string1 = s1[::-1]
string2 = s2[::-1]
interleaved_string = ''
for i in range(len(string1)):
interleaved_string += string2[i] + string1[i]
return interleaved_string
print(interleave("1234", "5678"))
Your whole loop can be made into a one-liner using zip and map (or a generator comprehension):
def interleave(str1: str, str2: str) -> str:
assert len(str1) == len(str2), "Strings must be equal in length." # Alternatively, consider itertools.zip_longest
return ''.join(map(''.join, zip(str1[::-1], str2[::-1])))
Input:
joined_str = interleave("1234", "5678")
print(joined_str)
Output:
'48372615'

Function to remove more than 2 consecutive repetitions of a string not working

Here's my function:
def remove_more_than_two_reps(text):
result = list(text)
for idx,char in enumerate(text):
if(result[:idx].count(char) > 2):
result.remove(char)
return ''.join(result)
expected result:
text = 'teeeexxxxt'
result = remove_more_than_two_reps(text)
>'teexxt'
My function just returns the original string, what is the problem?
Try using append which is O(1) instead of remove which is O(n):
def remove_more_than_two_reps(text: str) -> str:
result = []
for ch in text:
if len(result) < 2 or result[-1] != ch or result[-2] != ch:
result.append(ch)
return ''.join(result)
text = 'teeeexxxxt'
result = remove_more_than_two_reps(text)
print(result)
Output:
teexxt
Another option could be using a pattern, matching 3 or more times the same character (.)\1{2,} and in the replacement use 2 times the captured group value:
import re
def remove_more_than_two_reps(text):
return re.sub(r'(.)\1{2,}', r'\1\1', text)
text = 'teeeexxxxt'
print(remove_more_than_two_reps(text))
Output
teexxt
See a regex demo and a Python demo.
Wanted to share an itertools solution, useful when you have particularly big strings (since it avoids allocating an enormous list):
import itertools as it
def remove_more_than_two_reps(text: str) -> str:
reps_of_at_most_two = (it.islice(reps, 2) for _, reps in it.groupby(text))
return ''.join(it.chain.from_iterable(reps_of_at_most_two))

How can we remove word with repeated single character?

I am trying to remove word with single repeated characters using regex in python, for example :
good => good
gggggggg => g
What I have tried so far is following
re.sub(r'([a-z])\1+', r'\1', 'ffffffbbbbbbbqqq')
Problem with above solution is that it changes good to god and I just want to remove words with single repeated characters.
A better approach here is to use a set
def modify(s):
#Create a set from the string
c = set(s)
#If you have only one character in the set, convert set to string
if len(c) == 1:
return ''.join(c)
#Else return original string
else:
return s
print(modify('good'))
print(modify('gggggggg'))
If you want to use regex, mark the start and end of the string in our regex by ^ and $ (inspired from #bobblebubble comment)
import re
def modify(s):
#Create the sub string with a regex which only matches if a single character is repeated
#Marking the start and end of string as well
out = re.sub(r'^([a-z])\1+$', r'\1', s)
return out
print(modify('good'))
print(modify('gggggggg'))
The output will be
good
g
If you do not want to use a set in your method, this should do the trick:
def simplify(s):
l = len(s)
if l>1 and s.count(s[0]) == l:
return s[0]
return s
print(simplify('good'))
print(simplify('abba'))
print(simplify('ggggg'))
print(simplify('g'))
print(simplify(''))
output:
good
abba
g
g
Explanations:
You compute the length of the string
you count the number of characters that are equal to the first one and you compare the count with the initial string length
depending on the result you return the first character or the whole string
You can use trim command:
take a look at this examples:
"ggggggg".Trim('g');
Update:
and for characters which are in the middle of the string use this function, thanks to this answer
in java:
public static string RemoveDuplicates(string input)
{
return new string(input.ToCharArray().Distinct().ToArray());
}
in python:
used = set()
unique = [x for x in mylist if x not in used and (used.add(x) or True)]
but I think all of these answers does not match situation like aaaaabbbbbcda, this string has an a at the end of string which does not appear in the result (abcd). for this kind of situation use this functions which I wrote:
In:
def unique(s):
used = set()
ret = list()
s = list(s)
for x in s:
if x not in used:
ret.append(x)
used = set()
used.add(x)
return ret
print(unique('aaaaabbbbbcda'))
out:
['a', 'b', 'c', 'd', 'a']

In python, how to 'if finditer(...) has no matches'?

I would like to do something when finditer() does not find anything.
import re
pattern = "1"
string = "abc"
matched_iter = re.finditer(pattern, string)
# <if matched_iter is empty (no matched found>.
# do something.
# else
for m in matched_iter:
print m.group()
The best thing I could come up with is to keep track of found manually:
mi_no_find = re.finditer(r'\w+',"$$%%%%") # not matching.
found = False
for m in mi_no_find:
print m.group()
found = True
if not found:
print "Nothing found"
Related posts that don't answer:
Counting finditer matches: Number of regex matches (I don't need to count, I just need to know if there are no matches).
finditer vs match: different behavior when using re.finditer and re.match (says always have to loop over an iterator returned by finditer)
[edit]
- I have no interest in enumerating or counting total output. Only if found else not found actions.
- I understand I can put finditer into a list, but this would be inefficient for large strings. One objective is to have low memory utilization.
Updated 04/10/2020
Use re.search(pattern, string) to check if a pattern exists.
pattern = "1"
string = "abc"
if re.search(pattern, string) is None:
print('do this because nothing was found')
Returns:
do this because nothing was found
If you want to iterate over the return, then place the re.finditer() within the re.search().
pattern = '[A-Za-z]'
string = "abc"
if re.search(pattern, string) is not None:
for thing in re.finditer(pattern, string):
print('Found this thing: ' + thing[0])
Returns:
Found this thing: a
Found this thing: b
Found this thing: c
Therefore, if you wanted both options, use the else: clause with the if re.search() conditional.
pattern = "1"
string = "abc"
if re.search(pattern, string) is not None:
for thing in re.finditer(pattern, string):
print('Found this thing: ' + thing[0])
else:
print('do this because nothing was found')
Returns:
do this because nothing was found
previous reply below (not sufficient, just read above)
If the .finditer() does not match a pattern, then it will not perform any commands within the related loop.
So:
Set the variable before the loop you are using to iterate over the regex returns
Call the variable after (And outside of) the loop you are using to iterate over the regex returns
This way, if nothing is returned from the regex call, the loop won't execute and your variable call after the loop will return the exact same variable it was set to.
Below, example 1 demonstrates the regex finding the pattern. Example 2 shows the regex not finding the pattern, so the variable within the loop is never set.
Example 3 shows my suggestion - where the variable is set before the regex loop, so if the regex does not find a match (and subsequently, does not trigger the loop), the variable call after the loop returns the initial variable set (Confirming the regex pattern was not found).
Remember to import the import re module.
EXAMPLE 1 (Searching for the characters 'he' in the string 'hello world' will return 'he')
my_string = 'hello world'
pat = '(he)'
regex = re.finditer(pat,my_string)
for a in regex:
b = str(a.groups()[0])
print(b)
# returns 'he'
EXAMPLE 2 (Searching for the characters 'ab' in the string 'hello world' do not match anything, so the 'for a in regex:' loop does not execute and does not assign the b variable any value.)
my_string = 'hello world'
pat = '(ab)'
regex = re.finditer(pat,my_string)
for a in regex:
b = str(a.groups()[0])
print(b)
# no return
EXAMPLE 3 (Searching for the characters 'ab' again, but this time setting the variable b to 'CAKE' before the loop, and calling the variable b after, outside of the loop returns the initial variable - i.e. 'CAKE' - since the loop did not execute).
my_string = 'hello world'
pat = '(ab)'
regex = re.finditer(pat,my_string)
b = 'CAKE' # sets the variable prior to the for loop
for a in regex:
b = str(a.groups()[0])
print(b) # calls the variable after (and outside) the loop
# returns 'CAKE'
It's also worth noting that when designing your pattern to feed into the regex, make sure to use the parenthesis to indicate the start and end of a group.
pattern = '(ab)' # use this
pattern = 'ab' # avoid using this
To tie back to the initial question:
Since nothing found won’t execute the for loop (for a in regex), the user can preload the variable, then check it after the for loop for the original loaded value. This will allow for the user to know if nothing was found.
my_string = 'hello world'
pat = '(ab)'
regex = re.finditer(pat,my_string)
b = 'CAKE' # sets the variable prior to the for loop
for a in regex:
b = str(a.groups()[0])
if b == ‘CAKE’:
# action taken if nothing is returned
If performance isn't an issue, simply use findall or list(finditer(...)), which returns a list.
Otherwise, you can "peek" into the generator with next, then loop as normal if it raises StopIteration. Though there are other ways to do it, this is the simplest to me:
import itertools
import re
pattern = "1"
string = "abc"
matched_iter = re.finditer(pattern, string)
try:
first_match = next(matched_iter)
except StopIteration:
print("No match!") # action for no match
else:
for m in itertools.chain([first_match], matched_iter):
print(m.group())
You can probe the iterator with next and then chain the results back together while excepting StopIteration which means the iterator was empty:
import itertools as it
matches = iter([])
try:
probe = next(matches)
except StopIteration:
print('empty')
else:
for m in it.chain([probe], matches):
print(m)
Regarding your solution you could check m directly, setting it to None beforehand:
matches = iter([])
m = None
for m in matches:
print(m)
if m is None:
print('empty')
It prints the original string if there are no matches in the string.
It will replace the position n of the string.
For more reference: https://docs.python.org/2/howto/regex.html
Input_Str = "FOOTBALL"
def replacing(Input_String, char_2_replace, replaced_char, n):
pattern = re.compile(char_2_replace)
if len(re.findall(pattern, Input_String)) >= n:
where = [m for m in pattern.finditer(Input_String)][n-1]
before = Input_String[:where.start()]
after = Input_String[where.end():]
newString = before + replaced_char + after
else:
newString = Input_String
return newString
print(replacing(Input_Str, 'L', 'X', 4))```
I know this answer is late, but very suitable for Python 3.8+
You can use the new warlus operator := operator along with next(iterator[, default]) to solve for 'no matches' in re.finditer(pattern, string, flags=0) somewhat like this:
import re
pattern_ = "1"
string_ = "abc"
def is_match():
was_found = False
while next((match := re.finditer(pattern_, string_)), None) is not None:
was_found = True
yield match.group() # or just print it
return was_found

Regex with customized word boundaries in Python

I'm using a function called findlist to return a list of all the positions of a certain string within a text, with regex to look for word boundaries. But I want to ignore the character ( and only consider the other word boundaries, so that it will find split in var split but not in split(a). Is there any way to do this?
import re
def findlist(input, place):
return [m.span() for m in re.finditer(input, place)]
str = '''
var a = 'a b c'
var split = a.split(' ')
'''
instances = findlist(r"\b%s\b" % ('split'), str)
print(instances)
You may check if there is a ( after the trailing word boundary with a negative lookahead (?!\():
instances = findlist(r"\b{}\b(?!\()".format('split'), s)
^^^^^^
The (?!\() will trigger after the whole word is found, and if there is a ( immediately to the right of the found word, the match will be failed.
See the Python demo:
import re
def findlist(input_data, place):
return [m.span() for m in re.finditer(input_data, place)]
s = '''
var a = 'a b c'
var split = a.split(' ')
'''
instances = findlist(r"\b{}\b(?!\()".format('split'), s)
print(instances) # => [(21, 26)]

Categories

Resources