Replace empty string with space - python

Hello I started learning Python and I've come across a problem. I want to replace each empty string in a list with a space (" "). For example, if I call the function with function(['', 'x', 'x', '', '', 'y', 'y', '', 'a']) I would like to return a string ' xx yy a'.
def function(a):
for i in a:
if i == None:
a[i] = " "
string = "".join(a)
return string

Use a generator expression instead with a short-circuiting or:
def function(a):
return ''.join(char or ' ' for char in a)
If the character is a non-empty string, it'll be used as is. Otherwise, a space will be used.

to replace None , False and empty strings
>>> a = ['','11',None,'22',False]
>>> b=[elem if elem else " " for elem in a]
>>> ''.join(b)
' 11 22 '
>>>
note that your code would not replace '' as '' is not None
>>>''==None
False
>>>

Related

How would you save/keep the separator for split() function on python? [duplicate]

Here's the simplest way to explain this. Here's what I'm using:
re.split('\W', 'foo/bar spam\neggs')
>>> ['foo', 'bar', 'spam', 'eggs']
Here's what I want:
someMethod('\W', 'foo/bar spam\neggs')
>>> ['foo', '/', 'bar', ' ', 'spam', '\n', 'eggs']
The reason is that I want to split a string into tokens, manipulate it, then put it back together again.
The docs of re.split mention:
Split string by the occurrences of pattern. If capturing
parentheses are used in pattern, then the text of all groups in the
pattern are also returned as part of the resulting list.
So you just need to wrap your separator with a capturing group:
>>> re.split('(\W)', 'foo/bar spam\neggs')
['foo', '/', 'bar', ' ', 'spam', '\n', 'eggs']
If you are splitting on newline, use splitlines(True).
>>> 'line 1\nline 2\nline without newline'.splitlines(True)
['line 1\n', 'line 2\n', 'line without newline']
(Not a general solution, but adding this here in case someone comes here not realizing this method existed.)
another example, split on non alpha-numeric and keep the separators
import re
a = "foo,bar#candy*ice%cream"
re.split('([^a-zA-Z0-9])',a)
output:
['foo', ',', 'bar', '#', 'candy', '*', 'ice', '%', 'cream']
explanation
re.split('([^a-zA-Z0-9])',a)
() <- keep the separators
[] <- match everything in between
^a-zA-Z0-9 <-except alphabets, upper/lower and numbers.
If you have only 1 separator, you can employ list comprehensions:
text = 'foo,bar,baz,qux'
sep = ','
Appending/prepending separator:
result = [x+sep for x in text.split(sep)]
#['foo,', 'bar,', 'baz,', 'qux,']
# to get rid of trailing
result[-1] = result[-1].strip(sep)
#['foo,', 'bar,', 'baz,', 'qux']
result = [sep+x for x in text.split(sep)]
#[',foo', ',bar', ',baz', ',qux']
# to get rid of trailing
result[0] = result[0].strip(sep)
#['foo', ',bar', ',baz', ',qux']
Separator as it's own element:
result = [u for x in text.split(sep) for u in (x, sep)]
#['foo', ',', 'bar', ',', 'baz', ',', 'qux', ',']
results = result[:-1] # to get rid of trailing
Another no-regex solution that works well on Python 3
# Split strings and keep separator
test_strings = ['<Hello>', 'Hi', '<Hi> <Planet>', '<', '']
def split_and_keep(s, sep):
if not s: return [''] # consistent with string.split()
# Find replacement character that is not used in string
# i.e. just use the highest available character plus one
# Note: This fails if ord(max(s)) = 0x10FFFF (ValueError)
p=chr(ord(max(s))+1)
return s.replace(sep, sep+p).split(p)
for s in test_strings:
print(split_and_keep(s, '<'))
# If the unicode limit is reached it will fail explicitly
unicode_max_char = chr(1114111)
ridiculous_string = '<Hello>'+unicode_max_char+'<World>'
print(split_and_keep(ridiculous_string, '<'))
One Lazy and Simple Solution
Assume your regex pattern is split_pattern = r'(!|\?)'
First, you add some same character as the new separator, like '[cut]'
new_string = re.sub(split_pattern, '\\1[cut]', your_string)
Then you split the new separator, new_string.split('[cut]')
You can also split a string with an array of strings instead of a regular expression, like this:
def tokenizeString(aString, separators):
#separators is an array of strings that are being used to split the string.
#sort separators in order of descending length
separators.sort(key=len)
listToReturn = []
i = 0
while i < len(aString):
theSeparator = ""
for current in separators:
if current == aString[i:i+len(current)]:
theSeparator = current
if theSeparator != "":
listToReturn += [theSeparator]
i = i + len(theSeparator)
else:
if listToReturn == []:
listToReturn = [""]
if(listToReturn[-1] in separators):
listToReturn += [""]
listToReturn[-1] += aString[i]
i += 1
return listToReturn
print(tokenizeString(aString = "\"\"\"hi\"\"\" hello + world += (1*2+3/5) '''hi'''", separators = ["'''", '+=', '+', "/", "*", "\\'", '\\"', "-=", "-", " ", '"""', "(", ")"]))
Here is a simple .split solution that works without regex.
This is an answer for Python split() without removing the delimiter, so not exactly what the original post asks but the other question was closed as a duplicate for this one.
def splitkeep(s, delimiter):
split = s.split(delimiter)
return [substr + delimiter for substr in split[:-1]] + [split[-1]]
Random tests:
import random
CHARS = [".", "a", "b", "c"]
assert splitkeep("", "X") == [""] # 0 length test
for delimiter in ('.', '..'):
for _ in range(100000):
length = random.randint(1, 50)
s = "".join(random.choice(CHARS) for _ in range(length))
assert "".join(splitkeep(s, delimiter)) == s
# This keeps all separators in result
##########################################################################
import re
st="%%(c+dd+e+f-1523)%%7"
sh=re.compile('[\+\-//\*\<\>\%\(\)]')
def splitStringFull(sh, st):
ls=sh.split(st)
lo=[]
start=0
for l in ls:
if not l : continue
k=st.find(l)
llen=len(l)
if k> start:
tmp= st[start:k]
lo.append(tmp)
lo.append(l)
start = k + llen
else:
lo.append(l)
start =llen
return lo
#############################
li= splitStringFull(sh , st)
['%%(', 'c', '+', 'dd', '+', 'e', '+', 'f', '-', '1523', ')%%', '7']
replace all seperator: (\W) with seperator + new_seperator: (\W;)
split by the new_seperator: (;)
def split_and_keep(seperator, s):
return re.split(';', re.sub(seperator, lambda match: match.group() + ';', s))
print('\W', 'foo/bar spam\neggs')
If one wants to split string while keeping separators by regex without capturing group:
def finditer_with_separators(regex, s):
matches = []
prev_end = 0
for match in regex.finditer(s):
match_start = match.start()
if (prev_end != 0 or match_start > 0) and match_start != prev_end:
matches.append(s[prev_end:match.start()])
matches.append(match.group())
prev_end = match.end()
if prev_end < len(s):
matches.append(s[prev_end:])
return matches
regex = re.compile(r"[\(\)]")
matches = finditer_with_separators(regex, s)
If one assumes that regex is wrapped up into capturing group:
def split_with_separators(regex, s):
matches = list(filter(None, regex.split(s)))
return matches
regex = re.compile(r"([\(\)])")
matches = split_with_separators(regex, s)
Both ways also will remove empty groups which are useless and annoying in most of the cases.
install wrs "WITHOUT REMOVING SPLITOR" BY DOING
pip install wrs
(developed by Rao Hamza)
import wrs
text = "Now inbox “how to make spam ad” Invest in hard email marketing."
splitor = 'email | spam | inbox'
list = wrs.wr_split(splitor, text)
print(list)
result:
['now ', 'inbox “how to make ', 'spam ad” invest in hard ', 'email marketing.']
I had a similar issue trying to split a file path and struggled to find a simple answer.
This worked for me and didn't involve having to substitute delimiters back into the split text:
my_path = 'folder1/folder2/folder3/file1'
import re
re.findall('[^/]+/|[^/]+', my_path)
returns:
['folder1/', 'folder2/', 'folder3/', 'file1']
I found this generator based approach more satisfying:
def split_keep(string, sep):
"""Usage:
>>> list(split_keep("a.b.c.d", "."))
['a.', 'b.', 'c.', 'd']
"""
start = 0
while True:
end = string.find(sep, start) + 1
if end == 0:
break
yield string[start:end]
start = end
yield string[start:]
It avoids the need to figure out the correct regex, while in theory should be fairly cheap. It doesn't create new string objects and, delegates most of the iteration work to the efficient find method.
... and in Python 3.8 it can be as short as:
def split_keep(string, sep):
start = 0
while (end := string.find(sep, start) + 1) > 0:
yield string[start:end]
start = end
yield string[start:]
May I just leave it here
s = 'foo/bar spam\neggs'
print(s.replace('/', '+++/+++').replace(' ', '+++ +++').replace('\n', '+++\n+++').split('+++'))
['foo', '/', 'bar', ' ', 'spam', '\n', 'eggs']
Use re.split and also your regular expression comes from variable and also you have multi separator ,you can use as the following:
# BashSpecialParamList is the special param in bash,
# such as your separator is the bash special param
BashSpecialParamList = ["$*", "$#", "$#", "$?", "$-", "$$", "$!", "$0"]
# aStr is the the string to be splited
aStr = "$a Klkjfd$0 $? $#%$*Sdfdf"
reStr = "|".join([re.escape(sepStr) for sepStr in BashSpecialParamList])
re.split(f'({reStr})', aStr)
# Then You can get the result:
# ['$a Klkjfd', '$0', ' ', '$?', ' ', '$#', '%', '$*', 'Sdfdf']
reference: GNU Bash Special Parameters
Some of those answers posted before, will repeat delimiter, or have some other bugs which I faced in my case. You can use this function, instead:
def split_and_keep_delimiter(input, delimiter):
result = list()
idx = 0
while delimiter in input:
idx = input.index(delimiter);
result.append(input[0:idx+len(delimiter)])
input = input[idx+len(delimiter):]
result.append(input)
return result
In the below code, there is a simple, very efficient and well tested answer to this question. The code has comments explaining everything in it.
I promise it's not as scary as it looks - it's actually only 13 lines of code! The rest are all comments, docs and assertions
def split_including_delimiters(input: str, delimiter: str):
"""
Splits an input string, while including the delimiters in the output
Unlike str.split, we can use an empty string as a delimiter
Unlike str.split, the output will not have any extra empty strings
Conequently, len(''.split(delimiter))== 0 for all delimiters,
whereas len(input.split(delimiter))>0 for all inputs and delimiters
INPUTS:
input: Can be any string
delimiter: Can be any string
EXAMPLES:
>>> split_and_keep_delimiter('Hello World ! ',' ')
ans = ['Hello ', 'World ', ' ', '! ', ' ']
>>> split_and_keep_delimiter("Hello**World**!***", "**")
ans = ['Hello', '**', 'World', '**', '!', '**', '*']
EXAMPLES:
assert split_and_keep_delimiter('-xx-xx-','xx') == ['-', 'xx', '-', 'xx', '-'] # length 5
assert split_and_keep_delimiter('xx-xx-' ,'xx') == ['xx', '-', 'xx', '-'] # length 4
assert split_and_keep_delimiter('-xx-xx' ,'xx') == ['-', 'xx', '-', 'xx'] # length 4
assert split_and_keep_delimiter('xx-xx' ,'xx') == ['xx', '-', 'xx'] # length 3
assert split_and_keep_delimiter('xxxx' ,'xx') == ['xx', 'xx'] # length 2
assert split_and_keep_delimiter('xxx' ,'xx') == ['xx', 'x'] # length 2
assert split_and_keep_delimiter('x' ,'xx') == ['x'] # length 1
assert split_and_keep_delimiter('' ,'xx') == [] # length 0
assert split_and_keep_delimiter('aaa' ,'xx') == ['aaa'] # length 1
assert split_and_keep_delimiter('aa' ,'xx') == ['aa'] # length 1
assert split_and_keep_delimiter('a' ,'xx') == ['a'] # length 1
assert split_and_keep_delimiter('' ,'' ) == [] # length 0
assert split_and_keep_delimiter('a' ,'' ) == ['a'] # length 1
assert split_and_keep_delimiter('aa' ,'' ) == ['a', '', 'a'] # length 3
assert split_and_keep_delimiter('aaa' ,'' ) == ['a', '', 'a', '', 'a'] # length 5
"""
# Input assertions
assert isinstance(input,str), "input must be a string"
assert isinstance(delimiter,str), "delimiter must be a string"
if delimiter:
# These tokens do not include the delimiter, but are computed quickly
tokens = input.split(delimiter)
else:
# Edge case: if the delimiter is the empty string, split between the characters
tokens = list(input)
# The following assertions are always true for any string input and delimiter
# For speed's sake, we disable this assertion
# assert delimiter.join(tokens) == input
output = tokens[:1]
for token in tokens[1:]:
output.append(delimiter)
if token:
output.append(token)
# Don't let the first element be an empty string
if output[:1]==['']:
del output[0]
# The only case where we should have an empty string in the output is if it is our delimiter
# For speed's sake, we disable this assertion
# assert delimiter=='' or '' not in output
# The resulting strings should be combinable back into the original string
# For speed's sake, we disable this assertion
# assert ''.join(output) == input
return output
>>> line = 'hello_toto_is_there'
>>> sep = '_'
>>> [sep + x[1] if x[0] != 0 else x[1] for x in enumerate(line.split(sep))]
['hello', '_toto', '_is', '_there']

excess trailing concatenation of a string

I'm trying to write a function that will take a list and convert it to a string separated by - or a ,.
I have to use a loop for this so I came up with the following.
My problem is that I can't get rid of trailing separator. any ideas?
Output is : String is r-i-n-g-i-n-g-
should be : String is r-i-n-g-i-n-g
#A list created for the purpose of converting it to a string)
c_list = ['r', 'i', 'n', 'g', 'i', 'n', 'g']
# Function to_string()
def to_string(my_list, sep=', '):
counter = 0
mystring = ''
for n in my_list:
n = str(n)
mystring = mystring + n
mystring = mystring + sep
return mystring
print('String is', to_string(c_list, '-'))
You can use the str.join method instead:
print('String is', '-'.join(c_list))
If you need to use a loop, however, you can make adding the separator the first thing to do in your loop instead, but make it conditional on that there is already content in mystring, so that it does not add the separator in the first iteration:
c_list = ['r', 'i', 'n', 'g', 'i', 'n', 'g']
# Function to_string()
def to_string(my_list, sep=', '):
counter = 0
mystring = ''
for n in my_list:
if mystring:
mystring = mystring + sep
n = str(n)
mystring = mystring + n
return mystring
print('String is', to_string(c_list, '-'))
In return statement you can use string slicing to remove trailing delimiter.
return mystring[:-1]
This will remove the last character in the string.

How to print a string in alternating case?

I want to print a string in Python with alternate cases. For example my string is "Python". I want to print it like "PyThOn". How can I do this?
string = "Python"
for i in string:
if (i%2 == 0):
(string[i].upper())
else:
(string[i].lower())
print (string)
It's simply not Pythonic if you don't manage to work a zip() in there somehow:
string = 'Pythonic'
print(''.join(x + y for x, y in zip(string[0::2].upper(), string[1::2].lower())))
OUTPUT
PyThOnIc
mystring="Python"
newstring=""
odd=True
for c in mystring:
if odd:
newstring = newstring + c.upper()
else:
newstring = newstring + c.lower()
odd = not odd
print newstring
For random caps and small characters
>>> def test(x):
... return [(str(s).lower(),str(s).upper())[randint(0,1)] for s in x]
...
>>> print test("Python")
['P', 'Y', 't', 'h', 'o', 'n']
>>> print test("Python")
['P', 'y', 'T', 'h', 'O', 'n']
>>>
>>>
>>> print ''.join(test("Python"))
pYthOn
>>> print ''.join(test("Python"))
PytHon
>>> print ''.join(test("Python"))
PYTHOn
>>> print ''.join(test("Python"))
PytHOn
>>>
For Your problem code is :
st = "Python"
out = ""
for i,x in enumerate(st):
if (i%2 == 0):
out += st[i].upper()
else:
out += st[i].lower()
print out
Try it:
def upper(word, n):
word = list(word)
for i in range(0, len(word), n):
word[i] = word[i].upper()
return ''.join(word)
You can iterate using list comprehension, and force case depending on each character's being even or odd.
example:
s = "This is a test string"
ss = ''.join([x.lower() if i%2 else x.upper() for i,x in enumerate(s)])
print ss
s = "ThisIsATestStringWithoutSpaces"
ss = ''.join([x.lower() if i%2 else x.upper() for i,x in enumerate(s)])
print ss
output:
~/so_test $ python so_test.py
ThIs iS A TeSt sTrInG
ThIsIsAtEsTsTrInGwItHoUtSpAcEs
~/so_test $

How do I replace multiple spaces with just one character?

Here's my code so far:
input1 = input("Please enter a string: ")
newstring = input1.replace(' ','_')
print(newstring)
So if I put in my input as:
I want only one underscore.
It currently shows up as:
I_want_only_____one______underscore.
But I want it to show up like this:
I_want_only_one_underscore.
This pattern will replace any groups of whitespace with a single underscore
newstring = '_'.join(input1.split())
If you only want to replace spaces (not tab/newline/linefeed etc.) it's probably easier to use a regex
import re
newstring = re.sub(' +', '_', input1)
Dirty way:
newstring = '_'.join(input1.split())
Nicer way (more configurable):
import re
newstring = re.sub('\s+', '_', input1)
Extra Super Dirty way using the replace function:
def replace_and_shrink(t):
'''For when you absolutely, positively hate the normal ways to do this.'''
t = t.replace(' ', '_')
if '__' not in t:
return t
t = t.replace('__', '_')
return replace_and_shrink(t)
First approach (doesn't work)
>>> a = '213 45435 fdgdu'
>>> a
'213 45435 fdgdu '
>>> b = ' '.join( a.split() )
>>> b
'213 45435 fdgdu'
As you can see the variable a contains a lot of spaces between the "useful" sub-strings. The combination of the split() function without arguments and the join() function cleans up the initial string from the multiple white spaces.
The previous technique fails when the initial string contains special characters such as '\n':
>>> a = '213\n 45435\n fdgdu\n '
>>> b = ' '.join( a.split() )
>>> b
'213 45435 fdgdu' (the new line characters have been lost :( )
In order to correct this we can use the following (more complex) solution.
Second approach (works)
>>> a = '213\n 45435\n fdgdu\n '
>>> tmp = a.split( ' ' )
>>> tmp
['213\n', '', '', '', '', '', '', '', '', '45435\n', '', '', '', '', '', '', '', '', '', '', '', '', 'fdgdu\n', '']
>>> while '' in tmp: tmp.remove( '' )
...
>>> tmp
['213\n', '45435\n', 'fdgdu\n']
>>> b = ' '.join( tmp )
>>> b
'213\n 45435\n fdgdu\n'
Third approach (works)
This approach is a little bit more pythonic in my eyes. Check it:
>>> a = '213\n 45435\n fdgdu\n '
>>> b = ' '.join( filter( len, a.split( ' ' ) ) )
>>> b
'213\n 45435\n fdgdu\n'

split string on a number of different characters

I'd like to split a string using one or more separator characters.
E.g. "a b.c", split on " " and "." would give the list ["a", "b", "c"].
At the moment, I can't see anything in the standard library to do this, and my own attempts are a bit clumsy. E.g.
def my_split(string, split_chars):
if isinstance(string_L, basestring):
string_L = [string_L]
try:
split_char = split_chars[0]
except IndexError:
return string_L
res = []
for s in string_L:
res.extend(s.split(split_char))
return my_split(res, split_chars[1:])
print my_split("a b.c", [' ', '.'])
Horrible! Any better suggestions?
>>> import re
>>> re.split('[ .]', 'a b.c')
['a', 'b', 'c']
This one replaces all of the separators with the first separator in the list, and then "splits" using that character.
def split(string, divs):
for d in divs[1:]:
string = string.replace(d, divs[0])
return string.split(divs[0])
output:
>>> split("a b.c", " .")
['a', 'b', 'c']
>>> split("a b.c", ".")
['a b', 'c']
I do like that 're' solution though.
Solution without re:
from itertools import groupby
sep = ' .,'
s = 'a b.c,d'
print [''.join(g) for k, g in groupby(s, sep.__contains__) if not k]
An explanation is here https://stackoverflow.com/a/19211729/2468006
Not very fast but does the job:
def my_split(text, seps):
for sep in seps:
text = text.replace(sep, seps[0])
return text.split(seps[0])

Categories

Resources