RLE ALgorithm in python - python

like the title suggest I want to do an RLE algorithm and I have few problems with that
for example in RLE algorithm if we take aaaabbbccd it should return a4b3c2d1 as a result
the rle function should return the compressed string of aaabbbccccddddd
rle(data : str) : str
so it would be a3b3c4d5
Here's the code I know it's wrong but I don't know If it was a good way to begin
def rle(data):
data = 'aaabbbccccddddd'
for i in range(0,len(data)):
if data.count(f'{i}') > 1:
data.replace(i, data.count(f'{i}'))
print(data)
print(rle(data))

data = 'aaabbbccccddddd'
seq = []
r = None
for d in data:
if d != r:
seq.append(d)
seq.append(str(1))
r = d
else:
seq[-1] = str(int(seq[-1]) + 1)
print("".join(seq))
I thought that this code snippet is simple, and so didn't explain it...
we have a str and want to convert it to Char-TheNumberOfRepetitions pairs, like ['a',3,'b',3,'c',4,...], so we loop a char over str and when it is new, we add [char, 1] to list, otherwise, we add 1 to last element of list while we get a new char...
r variable is for new char recognition and is a temp variable that we store every new char (if a char was not equal to it, replace)
finally, we convert ['a',3,'b',3,'c',4,...] to str, using join
why we use str() and int()? because python join method is a bit silly :) and throw an exception, if an element of list be int... and everytime we convert it to int to add 1 and then convert to str, again...
why not map? because I assume that OP is beginner and map is complicate for him...
and, a more pythonic way:
def rle(data: str) -> str:
seq = [data[0], 1]
for elem in data[1:]:
if elem != seq[-2]:
seq += [elem, 1]
else:
seq[-1] += 1
return ''.join(map(str, seq))
and reverse:
def reverse_rle(data: str, end_char = '$') -> str:
def convert(): return seq[:-2] + [seq[-2] * (seq[-1] or 1)]
seq = [data[0], 0]
for elem in data[1:] + end_char:
if elem.isdigit():
seq[-1] = seq[-1] * 10 + int(elem)
else:
seq = convert()
if elem != end_char:
seq += [elem, 0]
return "".join(seq)
and if you dont want to use end_char:
def reverse_rle(data: str) -> str:
def convert(): return seq[:-2] + [seq[-2] * (seq[-1] or 1)]
seq = [data[0], 0]
for elem in data[1:]:
if elem.isdigit():
seq[-1] = seq[-1] * 10 + int(elem)
else:
seq = convert() + [elem, 0]
return "".join(convert())

This should work better
def rle(data):
# Initialize a few variables
prev = None
cnt = 0
res = ""
for i in data:
if i == prev:
# if same char as previous one, just add one to counter
cnt += 1
else:
# Check that we are not at first round and add to result
if prev != None:
res += "%s%s" % (cnt,prev)
# Set new character as previous one
prev = i
# Add last char to result
res += "%s%s" % (cnt,prev)
return res
print(rle("aaabbbccccddddd"))

Related

finding the longest common prefix of elements inside a list

I have a sequence print(lcp(["flower","flow","flight", "dog"])) which should return fl. Currently I can get it to return flowfl.
I can locate the instances where o or w should be removed, and tried different approaches to remove them. However they seem to hit syntax issue, which I cannot seem to resolve by myself.
I would very much appreciate a little guidance to either have the tools to remedy this issue my self, or learn from a working proposed solution.
def lcp(strs):
if not isinstance(strs, list) or len(strs) == 0:
return ""
if len(strs) == 1:
return strs[0]
original = strs[0]
original_max = len(original)
result = ""
for _, word in enumerate(strs[1:],1):
current_max = len(word)
i = 0
while i < current_max and i < original_max:
copy = "".join(result)
if len(copy) and copy[i-1] not in word:
# result = result.replace(copy[i-1], "")
# result = copy[:i-1]
print(copy[i-1], copy, result.index(copy[i-1]), i, word)
if word[i] == original[i]:
result += word[i]
i += 1
return result
print(lcp(["flower","flow","flight", "dog"])) # returns flowfl should be fl
print(lcp(["dog","car"])) # works
print(lcp(["dog","racecar","car"])) # works
print(lcp([])) # works
print(lcp(["one"])) # works
I worked on an alternative which does not be solve removing inside the same loop, adding a counter at the end. However my instincts suggest it can be solved within the for and while loops without increasing code bloat.
if len(result) > 1:
counter = {char: result.count(char) for char in result}
print(counter)
I have solved this using the below approach.
class Solution:
def longestCommonPrefix(self, strs: List[str]) -> str:
N = len(strs)
if N == 1:
return strs[0]
len_of_small_str, small_str = self.get_min_str(strs)
ans = ""
for i in range(len_of_small_str):
ch = small_str[i]
is_qualified = True
for j in range(N):
if strs[j][i] != ch:
is_qualified = False
break
if is_qualified:
ans += ch
else:
break
return ans
def get_min_str(self, A):
min_len = len(A[0])
s = A[0]
for i in range(1, len(A)):
if len(A[i]) < min_len:
min_len = len(A[i])
s = A[i]
return min_len, s
Returns the longest prefix that the set of words have in common.
def lcp(strs):
if len(strs) == 0:
return ""
result = strs[0]
for word in strs[1:]:
for i, (l1, l2) in enumerate(zip(result, word)):
if l1 != l2:
result = result[:i]
break
else:
result = result[:i+1]
return result
Results:
>>> print(lcp(["flower","flow","flight"]))
fl
>>> print(lcp(["flower","flow","flight", "dog"]))
>>> print(lcp(["dog","car"]))
>>> print(lcp(["dog","racecar","car"]))
>>> print(lcp([]))
>>> print(lcp(["one"]))
one
>>> print(lcp(["one", "one"]))
one
You might need to rephrase your goal.
By your description you don't want the longest common prefix, but the prefix that the most words have in common with the first one.
One of your issues is that your tests only test one real case and four edgecases. Make some more real examples.
Here's my proposition: I mostly added the elif to check if we already have a difference on the first letter to then discard the entry.
It also overwrites the original to rebuild the string based on the common prefix with the next word (if there are any)
def lcp(strs):
if not isinstance(strs, list) or len(strs) == 0:
return ""
if len(strs) == 1:
return strs[0]
original = strs[0]
result = ""
for word in strs[1:]:
i = 0
while i < len(word) and i < len(original) :
if word[i] == original[i]:
result += word[i]
elif i == 0:
result = original
break
i += 1
original = result
result = ""
return original
print(lcp(["flower","flow","flight", "dog"])) # fl
print(lcp(["shift", "shill", "hunter", "shame"])) # sh
print(lcp(["dog","car"])) # dog
print(lcp(["dog","racecar","car"])) # dog
print(lcp(["dog","racecar","dodge"])) # do
print(lcp([])) # [nothing]
print(lcp(["one"])) # one

Type error: function() missing 1 required positional argument: 's'

Im new to python and im trying to write algorithm to find the longest sub string without repeating characters. I keep getting this Error: " lengthOfLongestSubstring() missing 1 required positional argument: 's' ", when i obviously call the function with the argument. why is it heppening? Thanks for any help.
class Solution:
def lengthOfLongestSubstring(self, s):
arr = [char1 for char1 in s]
help_arr = [i*0 for i in range(26)]
sub_strings = []
sub = ""
for char in s:
index = ord(char) - ord('a')
if help_arr[index] == 0:
help_arr[index] = int(1)
sub += char
else:
sub_strings.append(sub)
sub = ""
sub += char
help_arr = [i * 0 for i in range(26)]
help_arr[index] = int(1)
max(sub_strings)
def max(arr):
max = 0
index = -1
for i in range(len(arr)):
if len(arr[i]) > max:
max = len(arr[i])
index= i
return print("The answer is '{}', with the length of {}.".format(arr[index], max))
#call to function
lengthOfLongestSubstring("aabcdefffgges")
Error: " lengthOfLongestSubstring() missing 1 required positional argument: 's' "
Look at how you define this function:
def lengthOfLongestSubstring(self, s):
This requires two arguments, but when you call it, you only pass one:
lengthOfLongestSubstring("aabcdefffgges")
In this case, you can solve the problem by not using a class. It isn't needed for what you are doing. This also means you need to remove the self parameter from lengthOfLongestSubstring():
def lengthOfLongestSubstring(s):
arr = [char1 for char1 in s]
help_arr = [i*0 for i in range(26)]
sub_strings = []
sub = ""
for char in s:
index = ord(char) - ord('a')
if help_arr[index] == 0:
help_arr[index] = int(1)
sub += char
else:
sub_strings.append(sub)
sub = ""
sub += char
help_arr = [i * 0 for i in range(26)]
help_arr[index] = int(1)
max(sub_strings)
def max(arr):
max = 0
index = -1
for i in range(len(arr)):
if len(arr[i]) > max:
max = len(arr[i])
index= i
print("The answer is '{}', with the length of {}.".format(arr[index], max))
#call to function
lengthOfLongestSubstring("aabcdefffgges")
On a side note, you should not do return print(...). return here doesn't do anything useful, so I removed id.

Manual string 'in' function doesn't work if substring is at the end of the word

I'm trying to manually code the python string in function for an assignment. Using this code, where s is the string and t is the substring I am trying to find:
def test(s, t):
stidx = 0
while stidx < len(s):
idx = 0
for i in s[stidx:]:
if idx < len(t):
if t[idx] == i:
idx += 1
continue
else:
break
if idx == len(t):
return True
stidx += 1
return False
The above code works, except when I am checking a substring at the very end of the word (e.g. s = 'happy' and t = 'py'). If I add an arbitrary character to the end of s, it works. Why is this?
maybe?
def test(s, t):
"""
:param s: exp: happy
:param t: exp: py
:return:
"""
tmp = result = 0
t_len = len(t)
while tmp <= len(s)-t_len:
if s[tmp: tmp+t_len] == t:
result = 1
break
else:
tmp += 1
continue
return bool(result)

how to recursively remove all adjacent characters that have repeated 3 or more times using python

Test Cases
Input: abbbaaccada
Output: ccada
Input: bbccdddcb
Output: (Empty string)
str = input("Enter string: ")
def my_string(string):
if not string:
return ""
if len(string) == 1:
return string
if string[0] == string[1] == string[2]:
return my_string(string[3:])
return string[0] + my_string(string[1:])
print (my_string(str))
I am new to python. and I am trying to remove characters with 3 or more consecutive appearance in a string. In this I could only able to get output of only 1 iteration. e.g. i/p- hhhelllo o/p-eo but for i/p- abbbaaccada o/p is aaaccada but it should be ccada.. please help..
I have done this till 3 repetition but how to generalize it for more than 3 repetition.??
Your problem presents the opportunity to show how else in for loops can be useful. Take a look:
def remover(my_str):
temp = set(my_str)
while True:
for c in temp:
if 3*c in my_str:
my_str = my_str.replace(3*c, '')
break
else:
break
return my_str
test1 = 'abbbaaccada'
print(remover(test1)) # -> ccada
test2 = 'i/p- hhhelllo'
print(remover(test2)) # -> i/p- eo
If you insist on having recursive calls, you can modify the above as follows:
def remover(my_str):
temp = set(my_str)
new_str = my_str
for c in temp:
if 3*c in new_str:
new_str = new_str.replace(3*c, '')
if my_str == new_str:
return new_str
else:
return remover(new_str)
I have added a solution which will work for 3 or more repetition as the above solution didn't work for me. It is a recursive solution.
import re
def format_string(u_str):
f_str = remove_string(u_str)
if f_str == u_str:
return f_str
else:
return format_string(f_str)
def remove_string(u_str):
index = 0 # This will maintain the index while traversing the entire string
while index < len(u_str):
r = re.search(u_str[index]*4 + '*', u_str)
if r:
start, end = r.span() # start and end index of substring matching 3 or more repetition
u_str = u_str[:start] + u_str[end:] # removing the found substring
index = end
else:
index += 1
return u_str
test1 = 'abbbaaccada'
print('output:' + format_string(test1))
test2 = 'bbccdddcb'
print('output:' + format_string(test2))

Python character math using stack

I received an interesting challenge in an algorithm Meetup. Given an input string, return a string in which all substrings within brackets have been replicated n times, where n is the integer outside the brackets. Characters outside brackets should simply be concatenated to the substring inside. For example:
2[ab] should return abab
a[3[bc]] should return abcbcbc
2[ab[cd]] should return abcdabcd
I've started implementing the solution using a stack, but I've got the feeling that my approach of checking each de-stacked character for a bracket is off, anyone have any suggestions? Code is below
class Stack:
def __init__(self):
self.items = []
def push(self, item):
self.items.append(item)
def pop(self):
return self.items.pop()
def length(self):
return len(self.items)
def is_number(s):
try:
int(s)
return True
except ValueError:
return False
def character_math(charstr):
final_output = ""
substring = ""
for i in charstr:
myStack.push(i)
for m in range(myStack.length() - 2):
destacked = myStack.pop()
# We want to go to the inner-most right bracket
if destacked != "]":
substring += destacked
if destacked == "[":
possible_multiplier = myStack.pop()
if is_number(possible_multiplier):
final_output += int(possible_multiplier) * substring
else:
final_output += possible_multiplier[::-1]
break
final_output += substring[::-1]
return "Final output is ", final_output
myStack = Stack()
# 3[ab[cd]] should return 'abcdabcd'
sample_str = '2[ab[cd]]'
print(character_math(sample_str))
The best way to do that is to use a recursive algorithm. The idea is to repeat a function until a condition is match. Here is the code I used, it works on your examples, and I don't think I forgot one of the possibilities.
# -*-coding:Utf-8 -*
Input = "2[ab[cd]]"
def Treatment(STR):
# Exit the treatment. That's the end condition.
if "[" not in STR:
return STR
# Find the inner [], in this case, the "cd" part
Bound1_ID = len(STR) - STR[::-1].index("[") - 1
Bound2_ID = STR.index("]")
# Separate STR into : First_part + middle between [] + Last_part
Last_part = STR[Bound2_ID + 1:]
# First_part depends if there is a number or not
try:
Multiplier = int(STR[Bound1_ID - 1])
First_part = STR[:Bound1_ID - 1]
except:
Multiplier = 1
First_part = STR[:Bound1_ID]
Middle_part = STR[Bound1_ID + 1: Bound2_ID] * Multiplier
# Assemble the new STR :
New_STR = First_part + Middle_part + Last_part
# Recursive command, repeat the function on the new STR
return Treatment(New_STR)
print (Treatment(Input))
EDIT : That's what it does :
First iteration : "2[ab[cd]]"
Second iteration : "2[abcd]"
Third iteration : abcdabcd => No more "[" so stop here.

Categories

Resources