Group string by 3 character to a List - python

for example, I have string "25037654", and I want to Group the string by 3.
but, since the string is 8 character, and 8 % 3 is not 0. it have remainder. and I want the final List is ["25", "037", "654"], the first Index will only accept the remainder, for example 1 or 2 character

I think this code may satisfy your requirement.
First get the idx of every slice, then cut this string to each substring.
str_num = "25037654"
idx = [len(str_num)%3+3*i for i in range(len(str_num)//3+1)]
print(idx) # [2, 5, 8]
if(idx[0]!=0):
idx = [0]+idx
res = [str_num[idx[i]:idx[i+1]] for i in range(len(idx)-1)]
print(res) # ['25', '037', '654']

One easy way, in this case, is to use f-string:
s = "25037654"
output = f"{int(s):,}".split(',')
print(output) # ['25', '037', '654']
The above won't work if the input is not numeric. Then try:
leading = len(s) % 3
output = ([s[:leading]] if leading else []) + [s[i:i+3] for i in range(leading, len(s), 3)]

Thanks everyone who respond and answer My Question.
after a few moments I post my question. I found the answer LoL
it's the Long Way, my version. thanks Guyss
s = "25037654"
if len(s) % 3 == 2:
fn = s[0:2]
xx = s[2:len(s)]
group = list(map(''.join, zip(*[iter(xx)]*3)))
final = fn + " " + " ".join(group)
elif len(s) % 3 == 1:
fn = s[0:1]
xx = s[1:len(s)]
group = list(map(''.join, zip(*[iter(xx)]*3)))
final = fn + " " + " ".join(group)
elif len(s) % 3 == 0:
group = list(map(''.join, zip(*[iter(s)]*3)))
final = " ".join(group)
print(final) # 25 037 654

t = "25037654555"
i, j = divmod(len(t), 3) # i - int part, j - remainder
print([t[:j]] + [t[j+(i-1)*3:j+i*3] for i in range(1,i+1) ])
Result:
['25', '037', '654', '555']

You can try my version:
s = "25037654"
rem = len(s)%3
lst = []
i = 0
while i < len(s)-2:
if i == 0:
lst.append(s[:rem])
i+=rem
else:
lst.append(s[i:i+3])
i+=3

Related

How to increment alphanumeric number in python?

I am creating a passkey of 16 alphanumeric characters where I am generating starting 4 digits with A001, A002, A003 till A999. Once it goes till A999, the alphabet will auto increase to B and digits will again start with 001. And the same process will go till Z999. Once the A-Z series will over, then it will start with AA01 and so on. How to do this thing in python? As I am new in python so I tried it on my own and also tried some examples but I am unable to make the increment of characters.
Any ideas or thoughts would be greatly appreciated.
Many thanks
rec=0
new_list16 = []
def autoIncrement():
global rec
first = 'A'
i = chr(ord(first))
new_list16.append(i)
while True:
pStart = 1 #adjust start value, if req'd
pInterval = 1 #adjust interval value, if req'd
if (rec == 0):
rec += pStart
else:
rec = rec + pInterval
return str(rec).zfill(3)
#print(autoIncrement())
new_list16.append(autoIncrement())
print(*new_list16, sep = '')
Going from A999 to B001 instead of B000 really messes things up a bit, but you can still use this for the A-Z part, and a simple modulo operation for the numbers.
def excel_format(num):
# see https://stackoverflow.com/a/182924/1639625
res = ""
while num:
mod = (num - 1) % 26
res = chr(65 + mod) + res
num = (num - mod) // 26
return res
def full_format(num, d=3):
chars = num // (10**d-1) + 1 # this becomes A..ZZZ
digit = num % (10**d-1) + 1 # this becomes 001..999
return excel_format(chars) + "{:0{}d}".format(digit, d)
for i in range(10000):
print(i, full_format(i, d=2))
Number of digits in the numeric part is controlled with the optional d parameter. I'll use 2 for purpose of demonstration, but 3 works just as well.
0 A01
...
98 A99
99 B01
...
2573 Z99
2574 AA01
...
9998 CW99
9999 CX01
def auto_increment(number):
if number == 'ZZZZ':
return 'ZZZZ'
digits = "".join([i for i in number if i.isdigit()])
chars = "".join([i for i in number if not i.isdigit()])
if int(digits) == int('9' * len(digits)):
digits = "000"
new_char = [ord(i) for i in chars]
if new_char[-1] % ord('Z') == 0:
new_char = "".join([chr(i) for i in new_char]) + 'A'
else:
new_char[-1] = new_char[-1] + 1
new_char = "".join([chr(i) for i in new_char])
else:
new_char = chars
new_digit = int(digits) + 1
l = len(new_char)
ll = len(str(new_digit))
new_digit = (("0" * (3-ll)) + str(new_digit))[(l-1):]
return f"{new_char}{new_digit}"
This function return you the next number, given any number.
for example: A999 will return AB01.
you can now just use this function in a loop.
This probably needs to be tested and refactored more, but here's a start for you:
def leadingZeros(number, digits):
numberString = str(number)
for digit in range(1, digits):
if number < 10**digit:
numberString = '0' + numberString
return numberString
def autoIncrement(oldNumber):
order = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ!'
lastDigitOrder = order.find(oldNumber[3])
newNumber = ''
if order.find(oldNumber[1]) <= 9:
# 3 digit number
number = int(oldNumber[1:]) + 1
letter = oldNumber[0]
if 1000 == number:
letterOrder = order.find(oldNumber[0])
letter = order[letterOrder + 1]
newNumber = letter + leadingZeros(number % 1000, 3)
elif order.find(oldNumber[2]) <= 9:
# 2 digit number
number = int(oldNumber[2:]) + 1
letters = oldNumber[0:2]
if 100 == number:
letterOrder = order.find(oldNumber[1])
letter = order[letterOrder + 1]
letters = oldNumber[0] + letter
newNumber = letters + leadingZeros(number % 100, 2)
elif order.find(oldNumber[3]) <= 9:
# 1 digit number
number = int(oldNumber[3]) + 1
letters = oldNumber[0:3]
if 10 == number:
letterOrder = order.find(oldNumber[2])
letter = order[letterOrder + 1]
letters = oldNumber[0:2] + letter
newNumber = letters + leadingZeros(number % 10, 1)
else:
# just letters
print(oldNumber)
letterOrder = order.find(oldNumber[3])
letter = order[letterOrder + 1]
newNumber = oldNumber[0:3] + letter
# if one of the digits has gone past Z then we need to update the letters
if '!' == newNumber[3]:
# past Z in 4th digit
letterOrder = order.find(oldNumber[2])
newNumber = newNumber[0:2] + order[letterOrder + 1] + 'A'
if '!' == newNumber[2]:
# past Z in 3rd digit
letterOrder = order.find(oldNumber[1])
newNumber = newNumber[0:1] + order[letterOrder + 1] + 'A' + newNumber[3]
if '!' == newNumber[1]:
# past Z in 2nd digit
letterOrder = order.find(oldNumber[0])
newNumber = order[letterOrder + 1] + 'A' + newNumber[2:]
return newNumber
print(autoIncrement('A999'))
print(autoIncrement('AA99'))
print(autoIncrement('AAA9'))
print(autoIncrement('AAAA'))
print(autoIncrement('AZZ9'))
This is not quite what you are asking for, but if your requirement is for 4-character "sequential" strings, let me suggest a far more simpler approach. Why not simply used base 36 numbers? That is, have your numbers go from 0, 1, 2, ... A, B, C, ... Z, 10, 11, 12, ... 1Z, ... Then to convert one of the base 36 strings to an int it is simply:
n = int('12AV', 36)
And to convert an int to a base n string:
def baseN(num, base, numerals="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
return ((num == 0) and numerals[0]) or (baseN(num // base, base, numerals).lstrip(numerals[0]) + numerals[num % base])
Putting it all together:
n = int('12AV', 36)
s = baseN(n + 1, 36)
print(s)
Prints:
12AW
You can, of course, start with 'A001' if you need to. You will then go to A00Z after 35 iterations. You will end up generating the same numbers as in your original method, just in a different order.
Thank you for the solutions you had provided. But I tried something exactly which I want for my question. Please check it and give your comments on it.
def full_format(i):
# limit of first range is 26 letters (A-Z) times 999 numbers (001-999)
if i < 26 * 999:
c,n = divmod(i,999) # quotient c is index of letter 0-25, remainder n is 0-998
c = chr(ord('A') + c) # compute letter
n += 1
return f'{c}{n:03}'
# After first range, second range is 26 letters times 26 letters * 99 numbers (01-99)
elif i < 26*999 + 26*26*99:
i -= 26*999 # remove first range offset
cc,n = divmod(i,99) # remainder n is 0-98, use quotient cc to compute two letters
c1,c2 = divmod(cc,26) # c1 is index of first letter, c2 is index of second letter
c1 = chr(ord('A') + c1) # compute first letter
c2 = chr(ord('A') + c2) # compute second letter
n += 1
return f'{c1}{c2}{n:02}'
else:
raise OverflowError(f'limit is {26*999+26*26*99}')
for i in range(92880, 92898):
print(full_format(i))

How to augment long string containing (0,1,2,3,4,5,6,?) by replacing a '?' with the the largest neighboring number?

I'm trying to augment a lengthy string that can contain multiple number of digits (0,1,2,3,4,5,6,?)
Consider a string "000000000004?0??100001??2?0?10000000".
I'm trying to replace all the question marks (?) by the neighbouring largest digit. The comparison should be done from both left character and right character to the question mark (?).
Input String: "000000000004?0??100001??2?0?10000000"
Output String: "000000000004401110000122220110000000"
I wrote a function that ends up replacing them during the first iteration of the loop itself which results in replacing the ? by the highest number i.e, 4 in this case. Check the code snippet below.
Wrong Output: "000000000004404410000144240410000000"
def method_augment(aug_str):
global pos_of_sec_char, sec_char, preced_char
flag_marks_string_end = 0
list_predef = ['0', '1', '2', '3', '4', '5', '6']
len_aug_str = len(aug_str)
for m in range(0, (len_aug_str - 1)):
if aug_str[m] == '?':
pos_of_first_ques = m
if m != 0:
preced_char = aug_str[m - 1]
# print("preced char:", preced_char)
for n in range((pos_of_first_ques + 1), (len_aug_str - 1)):
if aug_str[n] in list_predef:
pos_of_sec_char = n
sec_char = aug_str[n]
print(sec_char)
if preced_char > sec_char:
aug_str = aug_str.replace(aug_str[pos_of_first_ques], preced_char)
del preced_char, sec_char, pos_of_first_ques, m
else:
aug_str = aug_str.replace(aug_str[pos_of_first_ques], sec_char)
del preced_char, sec_char, pos_of_first_ques
break
else:
flag_marks_string_end += 1
else:
for q in range((pos_of_first_ques + 1), (len_aug_str - 1)):
if aug_str[q] in list_predef:
pos_of_sec_char = q
sec_char = aug_str[q]
aug_str = aug_str.replace(aug_str[pos_of_first_ques], sec_char)
break
# if preced_char > sec_char:
# aug_str = aug_str.replace(aug_str[m], preced_char)
# else:
# aug_str = aug_str.replace(aug_str[m], sec_char)
else:
continue
return aug_str
Input String: "000000000004?0??100001??2?0?10000000"
Expected Output String: "000000000004401110000122220110000000"
Actual Output String: "000000000004404410000144240410000000"
There are multiple strings like this with different combinations of digit and ?. I hope I have explained it well. Please help. Thanks.
Here is a way to do it, with some tests:
def replace_with_largest(s):
out = []
last_left = None
next_right = None
for i, c in enumerate(s):
if c in '0123456789':
out.append(c)
last_left = c
next_right = None # now the next digit to the right is unknown
continue
# Now we have a '?'.
# We need the next digit to the right
if next_right is None:
for j in range(i+1, len(s)):
if s[j] != '?':
next_right = s[j]
break
else:
# No more digit right of here, we'll use the last one on the left
next_right = last_left
out.append(max(last_left, next_right) if last_left is not None else next_right)
return ''.join(out)
The tests, some strings and the expected output:
tests = [("000000000004?0??100001??2?0?10000000", "000000000004401110000122220110000000"),
("??123??1", "11123331"),
("123???", "123333")]
for test in tests:
print(test[0], replace_with_largest(test[0]), replace_with_largest(test[0]) == test[1])
000000000004?0??100001??2?0?10000000 000000000004401110000122220110000000 True
??123??1 11123331 True
123??? 123333 True
Your program sounds overly complicated. I didn't even try to understand. Can you read and understand this?
import re
def method_augment(text: str) -> str:
while "?" in text:
text = replacesingle("0" + text + "0")[1:-1] # consider starting and ending question marks
return text
def replacesingle(text: str) -> str:
match = re.search("\\d\\?+\\d", text)
span = match.span(0)
partialtext = text[span[0]:span[1]]
left = int(partialtext[0])
right = int(partialtext[-1])
larger = left if left > right else right
number_of_question_marks = len(partialtext) - 2
text = text[:span[0] + 1] + str(larger) * number_of_question_marks + text[span[1] - 1:]
return text
assert(method_augment("000000000004?0??100001??2?0?10000000") == "000000000004401110000122220110000000")
assert (method_augment("??123??1??") == "1112333111")
I'm not sure how efficient this is but you can split your list such you retain all consecutive values of ?s and together as separate elements, pad that list with leading and trailing characters that'll never pass as the max value test compared to digits or ?s (and also make accessing indices slightly more convenient), eg:
import re
def augment(text):
w = [' ', *[el for el in re.split(r'(\?+)', text) if el], ' ']
for i in range(1, len(w) - 1):
w[i] = w[i].replace('?', max(w[i - 1][-1], w[i + 1][0]))
return ''.join(w[1:-1]).strip() or None
Then to use it, eg:
cases = [
'000000000004?0??100001??2?0?10000000',
'?????????9',
'9????????0',
'0????????9',
'?0???????9',
'123???????',
'12?????321',
'??????????',
]
for case in cases:
print(case, '->', augment(case))
Which gives you:
000000000004?0??100001??2?0?10000000 -> 000000000004401110000122220110000000
?????????9 -> 9999999999
9????????0 -> 9999999990
0????????9 -> 0999999999
?0???????9 -> 0099999999
123??????? -> 1233333333
12?????321 -> 1233333321
?????????? -> None

How to draw this bow tie pattern using Python 2.7?

I need to draw the following pattern using Python While Loops.
I have spent quite a lot of time and came up with this code which prints it perfectly but this code is so much long and I feel like it is not one of those good codes.
If anybody here can help me out shrinking this code or suggesting a better way to output?
here is the code:
#Question 10, Alternate Approach
temp = 1
pattern = ""
innerSpace = 7
starCount = 1
while temp <= 5:
st = 1
while st <= starCount:
pattern = pattern + "*"
if st != starCount:
pattern = pattern + " "
st = st + 1
sp = 0
if temp == 5:
innerSpace = 1
while sp < innerSpace:
pattern = pattern + " "
sp = sp + 1
st = 1
while st <= starCount:
if temp == 5:
st = st + 1
pattern = pattern + "*"
if st != starCount:
pattern = pattern + " "
st = st + 1
temp = temp + 1
innerSpace = innerSpace - 2
pattern = pattern + "\n"
if temp % 2 == 0:
pattern = pattern + " "
else:
starCount = starCount + 1
starCount = 2
innerSpace = 1
while temp > 5 and temp <= 9:
st = 1
while st <= starCount:
pattern = pattern + "*"
if st != starCount:
pattern = pattern + " "
st = st + 1
sp = 0
while sp < innerSpace:
pattern = pattern + " "
sp = sp + 1
st = 1
while st <= starCount:
pattern = pattern + "*"
if st != starCount:
pattern = pattern + " "
st = st + 1
temp = temp + 1
innerSpace = innerSpace + 2
pattern = pattern + "\n"
if temp % 2 == 0:
starCount = starCount - 1
pattern = pattern + " "
print pattern
Since this looks like an assignment, I'll give you a hint how I would do it.
Take advantage of the symmetry of the bow. It is symmetrical about the horizontal and vertical axis. Therefore, you really only need to solve 1 corner, then copy/mirror the results to get the rest.
This code gives one way of looking at the problem, which is just shifting a initial string (the middle of the bow) to get the desired shape:
m = '*'
size = 4
n = 5 # must be odd
pad = ' ' * n
middle = (m + pad) * size
half = int(n / 2) + 1
print middle
print middle[half*1:]
print middle[half*2:]
print middle[half*3:]
print middle[half*4:]
print middle[half*5:]
print middle[half*6:]
Which yields this:
* * * *
* * *
* * *
* *
* *
*
*
Good luck!
I would use list comprehensions and strings and would exploit the symmetry of the figure.
Not a complete solution, but could be a part of a loop body
In [2]: a = '*' + ' '*8
In [3]: a
Out[3]: '* '
In [24]: result = ''
In [25]: result += a
In [26]: result
Out[26]: '* '
In [27]: result += a[-1::-1]
In [28]: result
Out[28]: '* *'
In [29]: result += '\n'
In [30]: a = ' '+'*' + ' '*7
In [31]: a
Out[31]: ' * '
In [32]: result += a
In [33]: result += a[-1::-1]
In [34]: result += '\n'
In [36]: print result
* *
* *
IMHO you use while loop much as if they where for loops.
I don't think that's what your teacher wants.
The idea behind while is to run until a certain condition is met, not
necessarily when the number of iterations exceed a certain limit.
The condition does not need to be included in the while statement, you can check it later and use the break command to escape the loop
Try for example this:
start = '*'
while True:
print start
if start[0] == '*':
start = ' ' + start
else:
start = '*' + start
if (start == '* * *'):
break
output is just a part of your assignment, think you should be able to work it out to the final, expected result!
Hopefully by this time HW is done. Since I solved this using dynamic programming, I thought I would list solution here.
Observations:
While looking at pattern its observed that bottom half is palindrome of top half. Hence we need to calculate only the top half.
Next we see that for every row count,we have pattern like,
row 1 = 1 , n
row 2 = 2 , n -1
row 3 = 1,3, n-2, n
row 4 = 2, 4 , n-3, n-1
.. and so on.
With iteration index as row count and n as input value we can dynamically calculate remaining values very efficiently.
Source-Code
def get_list(bound, alist):
tmp_list = []
for i in xrange(1,bound + 1):
tmp_list.append(star if i in alist else dot)
return tmp_list
star = "*"
dot = " "
n = 20 #How large of BowTie do you want?
m = (n * 2) - 1
#get top half list
th = []
for idx,k in enumerate(xrange(1,n+1)): #run through 1 - n
row = idx + 1
tmplst = []
if row % 2 != 0:
tmplst.append(i for i in xrange(1,row + 1) if i % 2 != 0)
tmplst.append(i for i in xrange(m, m-row, -1) if i % 2 != 0)
else:
tmplst.append(i for i in xrange(1,row + 1) if i % 2 == 0)
tmplst.append(i for i in xrange(m, m-row, -1) if i % 2 == 0)
#append each row value to top half list.
th.append(sorted(set([j for i in tmplst for j in i])))
#create palindrome of top half which is our bottom half
th = th + th[len(th) -2::-1]
#create list of * and blanks
final = [get_list(m, i) for i in th]
#Print BowTie
for i in final:
print ' '.join(i)
Using a stars and spacing and counting variable
counting=1
star_amount=1
space_amount=6
loop_var=7
while loop_var>0:
loop_var-=1
if space_amount==0:
counting*=-1
stars=" * "*star_amount
spaces=" "*space_amount
print(stars+spaces+stars)
star_amount+=counting
space_amount-= counting*2

Longest substring without repeating characters in python

This is a pretty standard interview question. Find the longest substring without repeating characters. Here are the test cases,
abcabcbb -> 3
bbbbb -> 1
pwwkew -> 3
bpfbhmipx -> 7
tmmzuxt -> 5
Here's my code which uses a pretty simple approach with 2 pointers.
def lengthOfLongestSubstring(s):
checklist = {}
starting_index_of_current_substring = 0
length_of_longest_substring = 0
for i, v in enumerate(s):
if v in checklist:
starting_index_of_current_substring = checklist[v] + 1
else:
length_of_current_substring = i - starting_index_of_current_substring + 1
length_of_longest_substring = max(length_of_current_substring, length_of_longest_substring)
checklist[v] = i
return length_of_longest_substring
My code passes all the test cases except the last one (actual 4, expected 5). Can someone help me modify the code to take care of the last test case. I don't wish to reinvent the algorithm.
Here is a simple tweak in your code with 2 pointers to find the longest sub-string without repeating characters.
Change in your code is instead of calculating the length of longest substring when v is not present in checklist, I am calculating length of longest substring for all cases.
def lengthOfLongestSubstring(s):
checklist = {}
starting_index_of_current_substring = 0
length_of_longest_substring = 0
for i, v in enumerate(s):
if v in checklist:
starting_index_of_current_substring = max(starting_index_of_current_substring, checklist[v] + 1)
checklist[v] = i
length_of_longest_substring = max(length_of_longest_substring, i - starting_index_of_current_substring + 1)
return length_of_longest_substring
## Main
result = {}
for string in ['abcabcbb', 'bbbbb', 'ppwwkew', 'wcabcdeghi', 'bpfbhmipx', 'tmmzuxt', 'AABGAKGIMN', 'stackoverflow']:
result[string] = lengthOfLongestSubstring(string)
print(result)
Sample run:
{'abcabcbb': 3, 'bbbbb': 1, 'ppwwkew': 3, 'wcabcdeghi': 8, 'bpfbhmipx': 7, 'tmmzuxt': 5, 'AABGAKGIMN': 6, 'stackoverflow': 11}
This post is pretty old, but I think my solution fixes the bug in the original code.
def lengthOfLongestSubstring(s):
checklist = {}
starting_index_of_current_substring = 0
length_of_longest_substring = 0
for i, v in enumerate(s):
if v in checklist:
if checklist[v] >= starting_index_of_current_substring:
starting_index_of_current_substring = checklist[v] + 1
length_of_current_substring = i - starting_index_of_current_substring + 1
length_of_longest_substring = max(length_of_current_substring, length_of_longest_substring)
checklist[v] = i
return length_of_longest_substring
This doesnt really iterate upon your solution, but it's a bit simpler approach, just to give you an idea how it could be also solved.
def longest_substr(s):
longest = 0
for start_index in range(len(s)):
contains = set()
for letter in s[start_index:]:
if letter in contains:
break
contains.add(letter)
longest = max(longest, len(contains))
return longest
0
I would prefer this solution>>
Time and Space Management Optimised:
def lengthOfLongestSubstring(self, s: str) -> int:
curlen = maxlen = 0 # curlen saves the max len of substrings ending with current num
for i, num in enumerate(s):
curlen -= s[i-curlen:i].find(num)
maxlen = max(maxlen, curlen)
return maxlen
Find Longest Substring in the string without repeating characters.
def find_non_repeating_substring(input_str):
output_length = 0
longest_sub_str = ''
len_str = len(input_str)
index = 0
while len_str != 1:
l_str = ''
for i in range(index, len(input_str)):
if input_str[i] not in l_str:
l_str = l_str + input_str[i]
else:
break
sub_str_length = len(l_str)
if sub_str_length > output_length:
output_length = sub_str_length
longest_sub_str = l_str
len_str = len_str -1
index = index + 1
return output_length,longest_sub_str
if __name__ == '__main__':
input_str = raw_input("Please enter the string: ")
sub_str_length, sub_str = find_non_repeating_substring(input_str)
print ('Longest Substing lenght is "{0}" and the sub string is "{1}"'.format(sub_str_length, sub_str))```

Create a compress function in Python?

I need to create a function called compress that compresses a string by replacing any repeated letters with a letter and number. My function should return the shortened version of the string. I've been able to count the first character but not any others.
Ex:
>>> compress("ddaaaff")
'd2a3f2'
def compress(s):
count=0
for i in range(0,len(s)):
if s[i] == s[i-1]:
count += 1
c = s.count(s[i])
return str(s[i]) + str(c)
Here is a short python implementation of a compression function:
def compress(string):
res = ""
count = 1
#Add in first character
res += string[0]
#Iterate through loop, skipping last one
for i in range(len(string)-1):
if(string[i] == string[i+1]):
count+=1
else:
if(count > 1):
#Ignore if no repeats
res += str(count)
res += string[i+1]
count = 1
#print last one
if(count > 1):
res += str(count)
return res
Here are a few examples:
>>> compress("ddaaaff")
'd2a3f2'
>>> compress("daaaafffyy")
'da4f3y2'
>>> compress("mississippi")
'mis2is2ip2i'
Short version with generators:
from itertools import groupby
import re
def compress(string):
return re.sub(r'(?<![0-9])[1](?![0-9])', '', ''.join('%s%s' % (char, sum(1 for _ in group)) for char, group in groupby(string)))
(1) Grouping by chars with groupby(string)
(2) Counting length of group with sum(1 for _ in group) (because no len on group is possible)
(3) Joining into proper format
(4) Removing 1 chars for single items when there is a no digit before and after 1
There are several reasons why this doesn't work. You really need to try debugging this yourself first. Put in a few print statements to trace the execution. For instance:
def compress(s):
count=0
for i in range(0, len(s)):
print "Checking character", i, s[i]
if s[i] == s[i-1]:
count += 1
c = s.count(s[i])
print "Found", s[i], c, "times"
return str(s[i]) + str(c)
print compress("ddaaaff")
Here's the output:
Checking character 0 d
Found d 2 times
Checking character 1 d
Found d 2 times
Checking character 2 a
Found a 3 times
Checking character 3 a
Found a 3 times
Checking character 4 a
Found a 3 times
Checking character 5 f
Found f 2 times
Checking character 6 f
Found f 2 times
f2
Process finished with exit code 0
(1) You throw away the results of all but the last letter's search.
(2) You count all occurrences, not merely the consecutive ones.
(3) You cast a string to a string -- redundant.
Try working through this example with pencil and paper. Write down the steps you use, as a human being, to parse the string. Work on translating those to Python.
x="mississippi"
res = ""
count = 0
while (len(x) > 0):
count = 1
res= ""
for j in range(1, len(x)):
if x[0]==x[j]:
count= count + 1
else:
res = res + x[j]
print(x[0], count, end=" ")
x=res
Just another simplest way to perform this:
def compress(str1):
output = ''
initial = str1[0]
output = output + initial
count = 1
for item in str1[1:]:
if item == initial:
count = count + 1
else:
if count == 1:
count = ''
output = output + str(count)
count = 1
initial = item
output = output + item
print (output)
Which gives the output as required, examples:
>> compress("aaaaaaaccddddeehhyiiiuuo")
a7c2d4e2h2yi3u2o
>> compress("lllhhjuuuirrdtt")
l3h2ju3ir2dt
>> compress("mississippi")
mis2is2ip2i
from collections import Counter
def string_compression(string):
counter = Counter(string)
result = ''
for k, v in counter.items():
result = result + k + str(v)
print(result)
input = "mississippi"
count = 1
for i in range(1, len(input) + 1):
if i == len(input):
print(input[i - 1] + str(count), end="")
break
else:
if input[i - 1] == input[i]:
count += 1
else:
print(input[i - 1] + str(count), end="")
count = 1
Output : m1i1s2i1s2i1p2i1
s=input("Enter the string:")
temp={}
result=" "
for x in s:
if x in temp:
temp[x]=temp[x]+1
else:
temp[x]=1
for key,value in temp.items():
result+=str(key)+str(value)
print(result)
Here is something I wrote.
def stringCompression(str1):
counter=0
prevChar = str1[0]
str2=""
charChanged = False
loopCounter = 0
for char in str1:
if(char==prevChar):
counter+=1
charChanged = False
else:
str2 += prevChar + str(counter)
counter=1
prevChar = char
if(loopCounter == len(str1) - 1):
str2 += prevChar + str(counter)
charChanged = True
loopCounter+=1
if(not charChanged):
str2+= prevChar + str(counter)
return str2
Not the best code I guess. But works well.
a -> a1
aaabbbccc -> a3b3c3
This is a solution to the problem. But keep in mind that this method only effectively works if there's a lot of repetition, specifically if consecutive characters are repetitive. Otherwise, it will only worsen the situation.
e.g.,
AABCD --> A2B1C1D1
BcDG ---> B1c1D1G1
def compress_string(s):
result = [""] * len(s)
visited = None
index = 0
count = 1
for c in s:
if c == visited:
count += 1
result[index] = f"{c}{count}"
else:
count = 1
index += 1
result[index] = f"{c}{count}"
visited = c
return "".join(result)
You can simply achieve that by:
gstr="aaabbccccdddee"
last=gstr[0]
count=0
rstr=""
for i in gstr:
if i==last:
count=count+1
elif i!=last:
rstr=rstr+last+str(count)
count=1
last=i
rstr=rstr+last+str(count)
print ("Required string for given string {} after conversion is {}.".format(gstr,rstr))
Here is a short python implementation of a compression function:
#d=compress('xxcccdex')
#print(d)
def compress(word):
list1=[]
for i in range(len(word)):
list1.append(word[i].lower())
num=0
dict1={}
for i in range(len(list1)):
if(list1[i] in list(dict1.keys())):
dict1[list1[i]]=dict1[list1[i]]+1
else:
dict1[list1[i]]=1
s=list(dict1.keys())
v=list(dict1.values())
word=''
for i in range(len(s)):
word=word+s[i]+str(v[i])
return word
Below logic will work irrespective of
Data structure
Group By OR Set or any sort of compression logic
Capital or non-capital characters
Character repeat if not sequential
def fstrComp_1(stng):
sRes = ""
cont = 1
for i in range(len(stng)):
if not stng[i] in sRes:
stng = stng.lower()
n = stng.count(stng[i])
if n > 1:
cont = n
sRes += stng[i] + str(cont)
else:
sRes += stng[i]
print(sRes)
fstrComp_1("aB*b?cC&")
I wanted to do it by partitioning the string.
So aabbcc would become: ['aa', 'bb', 'cc']
This is how I did it:
def compression(string):
# Creating a partitioned list
alist = list(string)
master = []
n = len(alist)
for i in range(n):
if alist[i] == alist[i-1]:
master[-1] += alist[i]
else:
master += alist[i]
# Adding the partitions together in a new string
newString = ""
for i in master:
newString += i[0] + str(len(i))
# If the newString is longer than the old string, return old string (you've not
# compressed it in length)
if len(newString) > n:
return string
return newString
string = 'aabbcc'
print(compression(string))
string = 'aabccccd'
output = '2a3b4c4d'
new_string = " "
count = 1
for i in range(len(string)-1):
if string[i] == string[i+1]:
count = count + 1
else:
new_string = new_string + str(count) + string[i]
count = 1
new_string = new_string + str(count) + string[i+1]
print(new_string)
For a coding interview, where it was about the algorithm, and not about my knowledge of Python, its internal representation of data structures, or the time complexity of operations such as string concatenation:
def compress(message: str) -> str:
output = ""
length = 0
previous: str = None
for char in message:
if previous is None or char == previous:
length += 1
else:
output += previous
if length > 1:
output += str(length)
length = 1
previous = char
if previous is not None:
output += previous
if length > 1:
output += str(length)
return output
For code I'd actually use in production, not reinventing any wheels, being more testable, using iterators until the last step for space efficiency, and using join() instead of string concatenation for time efficiency:
from itertools import groupby
from typing import Iterator
def compressed_groups(message: str) -> Iterator[str]:
for char, group in groupby(message):
length = sum(1 for _ in group)
yield char + (str(length) if length > 1 else "")
def compress(message: str) -> str:
return "".join(compressed_groups(message))
Taking things a step further, for even more testability:
from itertools import groupby
from typing import Iterator
from collections import namedtuple
class Segment(namedtuple('Segment', ['char', 'length'])):
def __str__(self) -> str:
return self.char + (str(self.length) if self.length > 1 else "")
def segments(message: str) -> Iterator[Segment]:
for char, group in groupby(message):
yield Segment(char, sum(1 for _ in group))
def compress(message: str) -> str:
return "".join(str(s) for s in segments(message))
Going all-out and providing a Value Object CompressedString:
from itertools import groupby
from typing import Iterator
from collections import namedtuple
class Segment(namedtuple('Segment', ['char', 'length'])):
def __str__(self) -> str:
return self.char + (str(self.length) if self.length > 1 else "")
class CompressedString(str):
#classmethod
def compress(cls, message: str) -> "CompressedString":
return cls("".join(str(s) for s in cls._segments(message)))
#staticmethod
def _segments(message: str) -> Iterator[Segment]:
for char, group in groupby(message):
yield Segment(char, sum(1 for _ in group))
def compress(message: str) -> str:
return CompressedString.compress(message)
def compress(val):
print(len(val))
end=0
count=1
result=""
for i in range(0,len(val)-1):
#print(val[i],val[i+1])
if val[i]==val[i+1]:
count=count+1
#print(count,val[i])
elif val[i]!=val[i+1]:
#print(end,i)
result=result+val[end]+str(count)
end=i+1
count=1
result=result+val[-1]+str(count)
return result
res=compress("I need to create a function called compress that compresses a string by replacing any repeated letters with a letter and number. My function should return the shortened version of the string. I've been able to count the first character but not any others.")
print(len(res))
Use python's standard library re.
def compress(string):
import re
p=r'(\w+?)\1+' # non greedy, group1 1
sub_str=string
for m in re.finditer(p,string):
num=m[0].count(m[1])
sub_str=re.sub(m[0],f'{m[1]}{num}',sub_str)
return sub_str
string='aaaaaaaabbbbbbbbbcccccccckkkkkkkkkkkppp'
string2='ababcdcd'
string3='abcdabcd'
string4='ababcdabcdefabcdcd'
print(compress(string))
print(compress(string2))
print(compress(string3))
print(compress(string4))
Resut:
a8b9c8k11p3
ab2cd2
abcd2
ab2cdabcdefabcd2
Using generators:
input = "aaaddddffwwqqaattttttteeeeeee"
from itertools import groupby
print(''.join(([char+str(len(list(group))) for char, group in groupby(input)])))
def compress(string):
# taking out unique characters from the string
unique_chars = []
for c in string:
if not c in unique_chars:
unique_chars.append(c)
# Now count the characters
res = ""
for i in range(len(unique_chars)):
count = string.count(unique_chars[i])
res += unique_chars[i]+str(count)
return res
string = 'aabccccd'
compress(string)
from collections import Counter
def char_count(input_str):
my_dict = Counter(input_str)
print(my_dict)
output_str = ""
for i in input_str:
if i not in output_str:
output_str += i
output_str += str(my_dict[i])
return output_str
result = char_count("zddaaaffccc")
print(result)
This is the modification of Patrick Yu's code. It code fails for the below test cases.
SAMPLE INPUT:
c
aaaaaaaaaabcdefgh
EXPECTED OUTPUT:
c1
a10b1c1d1e1f1g1h1
OUPUT OF Patrick's Code:
c
a10bcdefgh
Below is the modified code:
def Compress(S):
Ans = S[0]
count = 1
for i in range(len(S)-1):
if S[i] == S[i+1]:
count += 1
else:
if count >= 1:
Ans += str(count)
Ans += S[i+1]
count = 1
if count>=1:
Ans += str(count)
return Ans
Just the condition must be changed from greater(">") to greater than equal to(">=") when comparing the count with 1.

Categories

Resources