I am trying to implement in python a function that checks if the '#' symbol inside a parsed line is part of a string variable.
def comment_part_of_string(line,comment_idx):
"""
:param line: stripped line that has '#' symbol
comment_idx: index of '#' symbol in line
:return: return True when the '#' symbol is inside a string variable
"""
for example, I want the function to return True for:
> line="peace'and#much'love"
> comment_idx=line.find('#')
and False for:
> line="peace#love"
> comment_idx=line.find('#')
How can I check if a char in a parsed line is part of a string variable?
edit
I tried this and it also worked:
def comment_part_of_string(line, comment_idx):
"""
:param comment_idx: index of '#' symbol in line
:param line: stripped line that has '#' symbol
:return: return True when the '#' symbol is inside a string variable
"""
if ((line[:comment_idx].count(b"\'") % 2 == 1 and line[comment_idx:].count(b"\'") % 2 == 1)
or (line[:comment_idx].count(b"\"") % 2 == 1 and line[comment_idx:].count(b"\"") % 2 == 1)):
return True
return False
You can do it by checking the number of single quotes(') before the # symbol. If it is even, that means it is outside a string literal and if its odd, then it is inside a string. Do it like so:
def comment_part_of_string(line, comment_idx):
"""
:param line: stripped line that has '#' symbol
comment_idx: index of '#' symbol in line
:return: return True when the '#' symbol is inside a string variable
"""
count = line.split(line[comment_idx])[0].count("'")
if(count % 2):
return True
else:
return False
Hope this helps :)
I think this should work
def iscomment(line):
line = line.split(" ")
for i in line:
if "#" in i:
if '"' in i or "'" in i:
return True
return False
It splits line for spaces, then is goes through parts of line and if it find ' or " and # in line it returns True.
This can be solved using regex.
Note: Strings can be inside ' or ". So have to consider that also.
import re
def comment_part_of_string(line):
pattern=r'\'.*#.*\'|\".*#.*\"'
if re.findall(pattern,line):
return True
return False
Output:
>>> comment_part_of_string("peace'and#much'love")
True
>>> comment_part_of_string("peace#love")
False
>>> comment_part_of_string('peace"and#much"love')
True
Test Cases
Input: abbbaaccada
Output: ccada
Input: bbccdddcb
Output: (Empty string)
str = input("Enter string: ")
def my_string(string):
if not string:
return ""
if len(string) == 1:
return string
if string[0] == string[1] == string[2]:
return my_string(string[3:])
return string[0] + my_string(string[1:])
print (my_string(str))
I am new to python. and I am trying to remove characters with 3 or more consecutive appearance in a string. In this I could only able to get output of only 1 iteration. e.g. i/p- hhhelllo o/p-eo but for i/p- abbbaaccada o/p is aaaccada but it should be ccada.. please help..
I have done this till 3 repetition but how to generalize it for more than 3 repetition.??
Your problem presents the opportunity to show how else in for loops can be useful. Take a look:
def remover(my_str):
temp = set(my_str)
while True:
for c in temp:
if 3*c in my_str:
my_str = my_str.replace(3*c, '')
break
else:
break
return my_str
test1 = 'abbbaaccada'
print(remover(test1)) # -> ccada
test2 = 'i/p- hhhelllo'
print(remover(test2)) # -> i/p- eo
If you insist on having recursive calls, you can modify the above as follows:
def remover(my_str):
temp = set(my_str)
new_str = my_str
for c in temp:
if 3*c in new_str:
new_str = new_str.replace(3*c, '')
if my_str == new_str:
return new_str
else:
return remover(new_str)
I have added a solution which will work for 3 or more repetition as the above solution didn't work for me. It is a recursive solution.
import re
def format_string(u_str):
f_str = remove_string(u_str)
if f_str == u_str:
return f_str
else:
return format_string(f_str)
def remove_string(u_str):
index = 0 # This will maintain the index while traversing the entire string
while index < len(u_str):
r = re.search(u_str[index]*4 + '*', u_str)
if r:
start, end = r.span() # start and end index of substring matching 3 or more repetition
u_str = u_str[:start] + u_str[end:] # removing the found substring
index = end
else:
index += 1
return u_str
test1 = 'abbbaaccada'
print('output:' + format_string(test1))
test2 = 'bbccdddcb'
print('output:' + format_string(test2))
I received an interesting challenge in an algorithm Meetup. Given an input string, return a string in which all substrings within brackets have been replicated n times, where n is the integer outside the brackets. Characters outside brackets should simply be concatenated to the substring inside. For example:
2[ab] should return abab
a[3[bc]] should return abcbcbc
2[ab[cd]] should return abcdabcd
I've started implementing the solution using a stack, but I've got the feeling that my approach of checking each de-stacked character for a bracket is off, anyone have any suggestions? Code is below
class Stack:
def __init__(self):
self.items = []
def push(self, item):
self.items.append(item)
def pop(self):
return self.items.pop()
def length(self):
return len(self.items)
def is_number(s):
try:
int(s)
return True
except ValueError:
return False
def character_math(charstr):
final_output = ""
substring = ""
for i in charstr:
myStack.push(i)
for m in range(myStack.length() - 2):
destacked = myStack.pop()
# We want to go to the inner-most right bracket
if destacked != "]":
substring += destacked
if destacked == "[":
possible_multiplier = myStack.pop()
if is_number(possible_multiplier):
final_output += int(possible_multiplier) * substring
else:
final_output += possible_multiplier[::-1]
break
final_output += substring[::-1]
return "Final output is ", final_output
myStack = Stack()
# 3[ab[cd]] should return 'abcdabcd'
sample_str = '2[ab[cd]]'
print(character_math(sample_str))
The best way to do that is to use a recursive algorithm. The idea is to repeat a function until a condition is match. Here is the code I used, it works on your examples, and I don't think I forgot one of the possibilities.
# -*-coding:Utf-8 -*
Input = "2[ab[cd]]"
def Treatment(STR):
# Exit the treatment. That's the end condition.
if "[" not in STR:
return STR
# Find the inner [], in this case, the "cd" part
Bound1_ID = len(STR) - STR[::-1].index("[") - 1
Bound2_ID = STR.index("]")
# Separate STR into : First_part + middle between [] + Last_part
Last_part = STR[Bound2_ID + 1:]
# First_part depends if there is a number or not
try:
Multiplier = int(STR[Bound1_ID - 1])
First_part = STR[:Bound1_ID - 1]
except:
Multiplier = 1
First_part = STR[:Bound1_ID]
Middle_part = STR[Bound1_ID + 1: Bound2_ID] * Multiplier
# Assemble the new STR :
New_STR = First_part + Middle_part + Last_part
# Recursive command, repeat the function on the new STR
return Treatment(New_STR)
print (Treatment(Input))
EDIT : That's what it does :
First iteration : "2[ab[cd]]"
Second iteration : "2[abcd]"
Third iteration : abcdabcd => No more "[" so stop here.
I am trying to split some text. Basically I want to separate level-1 brackets, like "('1','a',NULL),(2,'b')" => ["('1','a',NULL)", "(2,'b')]", but I need to be aware of possible quoted strings inside. It needs to at least satisfy the following py.tests:
from splitter import split_text
def test_normal():
assert split_text("('1'),('2')") == ["('1')", "('2')"]
assert split_text("(1),(2),(3)") == ["(1)", "(2)", "(3)"]
def test_complex():
assert split_text("('1','a'),('2','b')") == ["('1','a')", "('2','b')"]
assert split_text("('1','a',NULL),(2,'b')") == ["('1','a',NULL)", "(2,'b')"]
def test_apostrophe():
assert split_text("('\\'1','a'),('2','b')") == ["('\\'1','a')", "('2','b')"]
def test_coma_in_string():
assert split_text("('1','a,c'),('2','b')") == ["('1','a,c')", "('2','b')"]
def test_bracket_in_string():
assert split_text("('1','a)c'),('2','b')") == ["('1','a)c')", "('2','b')"]
def test_bracket_and_coma_in_string():
assert split_text("('1','a),(c'),('2','b')") == ["('1','a),(c')", "('2','b')"]
def test_bracket_and_coma_in_string_apostrophe():
assert split_text("('1','a\\'),(c'),('2','b')") == ["('1','a\\'),(c')", "('2','b')"]
I have tried the following:
1) Regular expressions
This looks like the best solution, but unfortunately I did not come up with anything satisfying all tests.
My best try is:
def split_text(text):
return re.split('(?<=\)),(?=\()', text)
But obviously, that is rather simplistic and fails test_bracket_and_coma_in_string and test_bracket_and_coma_in_string_apostrophe.
2) Finite-state-machine-like solution
I tried to code the FSM myself:
OUTSIDE, IN_BRACKETS, IN_STRING, AFTER_BACKSLASH = range(4)
def split_text(text):
state = OUTSIDE
read = []
result = []
for character in text:
if state == OUTSIDE:
if character == ',':
result.append(''.join(read))
read = []
elif character == '(':
read.append(character)
state = IN_BRACKETS
else:
read.append(character)
elif state == IN_BRACKETS:
read.append(character)
if character == ')':
state = OUTSIDE
elif character == "'":
state = IN_STRING
elif state == IN_STRING:
read.append(character)
if character == "'":
state = IN_BRACKETS
elif character == '\\':
state = AFTER_BACKSLASH
elif state == AFTER_BACKSLASH:
read.append(character)
state = IN_STRING
result.append(''.join(read)) # The rest of string
return result
It works, passes all tests, but is very slow.
3) pyparsing
from pyparsing import QuotedString, ZeroOrMore, Literal, Group, Suppress, Word, nums
null_value = Literal('NULL')
number_value = Word(nums)
string_value = QuotedString("'", escChar='\\', unquoteResults=False)
value = null_value | number_value | string_value
one_bracket = Group(Literal('(') + value + ZeroOrMore(Literal(',') + value) + Literal(')'))
all_brackets = one_bracket + ZeroOrMore(Suppress(',') + one_bracket)
def split_text(text):
parse_result = all_brackets.parseString(text)
return [''.join(a) for a in parse_result]
Also passes all tests, but surprisingly it is even slower than solution #2.
Any ideas how to make the solution fast and robust? I have this feeling that I am missing something obvious.
One way would be to use the newer regex module which supports the (*SKIP)(*FAIL) functionality:
import regex as re
def split_text(text):
rx = r"""'.*?(?<!\\)'(*SKIP)(*FAIL)|(?<=\)),(?=\()"""
return re.split(rx, text)
Broken down it says:
'.*?(?<!\\)' # look for a single quote up to a new single quote
# that MUST NOT be escaped (thus the neg. lookbehind)
(*SKIP)(*FAIL)| # these parts shall fail
(?<=\)),(?=\() # your initial pattern with a positive lookbehind/ahead
This succeeds on all your examples.
I cooked this and it works on given tests.
tests = ["('1'),('2')",
"(1),(2),(3)",
"('1','a'),('2','b')",
"('1','a',NULL),(2,'b')",
"('\\'1','a'),('2','b')",
"('1','a,c'),('2','b')",
"('1','a)c'),('2','b')",
"('1','a),(c'),('2','b')",
"('1','a\\'),(c'),('2','b')"]
for text in tests:
tmp = ''
res = []
bracket = 0
quote = False
for idx,i in enumerate(text):
if i=="'":
if text[idx-1]!='\\':
quote = not quote
tmp += i
elif quote:
tmp += i
elif i==',':
if bracket: tmp += i
else: pass
else:
if i=='(': bracket += 1
elif i==')': bracket -= 1
if bracket: tmp += i
else:
tmp += i
res.append(tmp)
tmp = ''
print res
Output:
["('1')", "('2')"]
['(1)', '(2)', '(3)']
["('1','a')", "('2','b')"]
["('1','a',NULL)", "(2,'b')"]
["('\\'1','a')", "('2','b')"]
["('1','a,c')", "('2','b')"]
["('1','a)c')", "('2','b')"]
["('1','a),(c')", "('2','b')"]
["('1','a\\'),(c')", "('2','b')"]
The code has room for improvement, and edits are welcome. :)
This is the regular expression which seems to work and passes all the tests. Running it on real data it is about 6x faster than finite state machine implemented in Python.
PATTERN = re.compile(
r"""
\( # Opening bracket
(?:
# String
(?:'(?:
(?:\\')|[^'] # Either escaped apostrophe, or other character
)*'
)
|
# or other literal not containing right bracket
[^')]
)
(?:, # Zero or more of them separated with comma following the first one
# String
(?:'(?:
(?:\\')|[^'] # Either escaped apostrophe, or other character
)*'
)
|
# or other literal
[^')]
)*
\) # Closing bracket
""",
re.VERBOSE)
def split_text(text):
return PATTERN.findall(text)