Create a compress function in Python? - python
I need to create a function called compress that compresses a string by replacing any repeated letters with a letter and number. My function should return the shortened version of the string. I've been able to count the first character but not any others.
Ex:
>>> compress("ddaaaff")
'd2a3f2'
def compress(s):
count=0
for i in range(0,len(s)):
if s[i] == s[i-1]:
count += 1
c = s.count(s[i])
return str(s[i]) + str(c)
Here is a short python implementation of a compression function:
def compress(string):
res = ""
count = 1
#Add in first character
res += string[0]
#Iterate through loop, skipping last one
for i in range(len(string)-1):
if(string[i] == string[i+1]):
count+=1
else:
if(count > 1):
#Ignore if no repeats
res += str(count)
res += string[i+1]
count = 1
#print last one
if(count > 1):
res += str(count)
return res
Here are a few examples:
>>> compress("ddaaaff")
'd2a3f2'
>>> compress("daaaafffyy")
'da4f3y2'
>>> compress("mississippi")
'mis2is2ip2i'
Short version with generators:
from itertools import groupby
import re
def compress(string):
return re.sub(r'(?<![0-9])[1](?![0-9])', '', ''.join('%s%s' % (char, sum(1 for _ in group)) for char, group in groupby(string)))
(1) Grouping by chars with groupby(string)
(2) Counting length of group with sum(1 for _ in group) (because no len on group is possible)
(3) Joining into proper format
(4) Removing 1 chars for single items when there is a no digit before and after 1
There are several reasons why this doesn't work. You really need to try debugging this yourself first. Put in a few print statements to trace the execution. For instance:
def compress(s):
count=0
for i in range(0, len(s)):
print "Checking character", i, s[i]
if s[i] == s[i-1]:
count += 1
c = s.count(s[i])
print "Found", s[i], c, "times"
return str(s[i]) + str(c)
print compress("ddaaaff")
Here's the output:
Checking character 0 d
Found d 2 times
Checking character 1 d
Found d 2 times
Checking character 2 a
Found a 3 times
Checking character 3 a
Found a 3 times
Checking character 4 a
Found a 3 times
Checking character 5 f
Found f 2 times
Checking character 6 f
Found f 2 times
f2
Process finished with exit code 0
(1) You throw away the results of all but the last letter's search.
(2) You count all occurrences, not merely the consecutive ones.
(3) You cast a string to a string -- redundant.
Try working through this example with pencil and paper. Write down the steps you use, as a human being, to parse the string. Work on translating those to Python.
x="mississippi"
res = ""
count = 0
while (len(x) > 0):
count = 1
res= ""
for j in range(1, len(x)):
if x[0]==x[j]:
count= count + 1
else:
res = res + x[j]
print(x[0], count, end=" ")
x=res
Just another simplest way to perform this:
def compress(str1):
output = ''
initial = str1[0]
output = output + initial
count = 1
for item in str1[1:]:
if item == initial:
count = count + 1
else:
if count == 1:
count = ''
output = output + str(count)
count = 1
initial = item
output = output + item
print (output)
Which gives the output as required, examples:
>> compress("aaaaaaaccddddeehhyiiiuuo")
a7c2d4e2h2yi3u2o
>> compress("lllhhjuuuirrdtt")
l3h2ju3ir2dt
>> compress("mississippi")
mis2is2ip2i
from collections import Counter
def string_compression(string):
counter = Counter(string)
result = ''
for k, v in counter.items():
result = result + k + str(v)
print(result)
input = "mississippi"
count = 1
for i in range(1, len(input) + 1):
if i == len(input):
print(input[i - 1] + str(count), end="")
break
else:
if input[i - 1] == input[i]:
count += 1
else:
print(input[i - 1] + str(count), end="")
count = 1
Output : m1i1s2i1s2i1p2i1
s=input("Enter the string:")
temp={}
result=" "
for x in s:
if x in temp:
temp[x]=temp[x]+1
else:
temp[x]=1
for key,value in temp.items():
result+=str(key)+str(value)
print(result)
Here is something I wrote.
def stringCompression(str1):
counter=0
prevChar = str1[0]
str2=""
charChanged = False
loopCounter = 0
for char in str1:
if(char==prevChar):
counter+=1
charChanged = False
else:
str2 += prevChar + str(counter)
counter=1
prevChar = char
if(loopCounter == len(str1) - 1):
str2 += prevChar + str(counter)
charChanged = True
loopCounter+=1
if(not charChanged):
str2+= prevChar + str(counter)
return str2
Not the best code I guess. But works well.
a -> a1
aaabbbccc -> a3b3c3
This is a solution to the problem. But keep in mind that this method only effectively works if there's a lot of repetition, specifically if consecutive characters are repetitive. Otherwise, it will only worsen the situation.
e.g.,
AABCD --> A2B1C1D1
BcDG ---> B1c1D1G1
def compress_string(s):
result = [""] * len(s)
visited = None
index = 0
count = 1
for c in s:
if c == visited:
count += 1
result[index] = f"{c}{count}"
else:
count = 1
index += 1
result[index] = f"{c}{count}"
visited = c
return "".join(result)
You can simply achieve that by:
gstr="aaabbccccdddee"
last=gstr[0]
count=0
rstr=""
for i in gstr:
if i==last:
count=count+1
elif i!=last:
rstr=rstr+last+str(count)
count=1
last=i
rstr=rstr+last+str(count)
print ("Required string for given string {} after conversion is {}.".format(gstr,rstr))
Here is a short python implementation of a compression function:
#d=compress('xxcccdex')
#print(d)
def compress(word):
list1=[]
for i in range(len(word)):
list1.append(word[i].lower())
num=0
dict1={}
for i in range(len(list1)):
if(list1[i] in list(dict1.keys())):
dict1[list1[i]]=dict1[list1[i]]+1
else:
dict1[list1[i]]=1
s=list(dict1.keys())
v=list(dict1.values())
word=''
for i in range(len(s)):
word=word+s[i]+str(v[i])
return word
Below logic will work irrespective of
Data structure
Group By OR Set or any sort of compression logic
Capital or non-capital characters
Character repeat if not sequential
def fstrComp_1(stng):
sRes = ""
cont = 1
for i in range(len(stng)):
if not stng[i] in sRes:
stng = stng.lower()
n = stng.count(stng[i])
if n > 1:
cont = n
sRes += stng[i] + str(cont)
else:
sRes += stng[i]
print(sRes)
fstrComp_1("aB*b?cC&")
I wanted to do it by partitioning the string.
So aabbcc would become: ['aa', 'bb', 'cc']
This is how I did it:
def compression(string):
# Creating a partitioned list
alist = list(string)
master = []
n = len(alist)
for i in range(n):
if alist[i] == alist[i-1]:
master[-1] += alist[i]
else:
master += alist[i]
# Adding the partitions together in a new string
newString = ""
for i in master:
newString += i[0] + str(len(i))
# If the newString is longer than the old string, return old string (you've not
# compressed it in length)
if len(newString) > n:
return string
return newString
string = 'aabbcc'
print(compression(string))
string = 'aabccccd'
output = '2a3b4c4d'
new_string = " "
count = 1
for i in range(len(string)-1):
if string[i] == string[i+1]:
count = count + 1
else:
new_string = new_string + str(count) + string[i]
count = 1
new_string = new_string + str(count) + string[i+1]
print(new_string)
For a coding interview, where it was about the algorithm, and not about my knowledge of Python, its internal representation of data structures, or the time complexity of operations such as string concatenation:
def compress(message: str) -> str:
output = ""
length = 0
previous: str = None
for char in message:
if previous is None or char == previous:
length += 1
else:
output += previous
if length > 1:
output += str(length)
length = 1
previous = char
if previous is not None:
output += previous
if length > 1:
output += str(length)
return output
For code I'd actually use in production, not reinventing any wheels, being more testable, using iterators until the last step for space efficiency, and using join() instead of string concatenation for time efficiency:
from itertools import groupby
from typing import Iterator
def compressed_groups(message: str) -> Iterator[str]:
for char, group in groupby(message):
length = sum(1 for _ in group)
yield char + (str(length) if length > 1 else "")
def compress(message: str) -> str:
return "".join(compressed_groups(message))
Taking things a step further, for even more testability:
from itertools import groupby
from typing import Iterator
from collections import namedtuple
class Segment(namedtuple('Segment', ['char', 'length'])):
def __str__(self) -> str:
return self.char + (str(self.length) if self.length > 1 else "")
def segments(message: str) -> Iterator[Segment]:
for char, group in groupby(message):
yield Segment(char, sum(1 for _ in group))
def compress(message: str) -> str:
return "".join(str(s) for s in segments(message))
Going all-out and providing a Value Object CompressedString:
from itertools import groupby
from typing import Iterator
from collections import namedtuple
class Segment(namedtuple('Segment', ['char', 'length'])):
def __str__(self) -> str:
return self.char + (str(self.length) if self.length > 1 else "")
class CompressedString(str):
#classmethod
def compress(cls, message: str) -> "CompressedString":
return cls("".join(str(s) for s in cls._segments(message)))
#staticmethod
def _segments(message: str) -> Iterator[Segment]:
for char, group in groupby(message):
yield Segment(char, sum(1 for _ in group))
def compress(message: str) -> str:
return CompressedString.compress(message)
def compress(val):
print(len(val))
end=0
count=1
result=""
for i in range(0,len(val)-1):
#print(val[i],val[i+1])
if val[i]==val[i+1]:
count=count+1
#print(count,val[i])
elif val[i]!=val[i+1]:
#print(end,i)
result=result+val[end]+str(count)
end=i+1
count=1
result=result+val[-1]+str(count)
return result
res=compress("I need to create a function called compress that compresses a string by replacing any repeated letters with a letter and number. My function should return the shortened version of the string. I've been able to count the first character but not any others.")
print(len(res))
Use python's standard library re.
def compress(string):
import re
p=r'(\w+?)\1+' # non greedy, group1 1
sub_str=string
for m in re.finditer(p,string):
num=m[0].count(m[1])
sub_str=re.sub(m[0],f'{m[1]}{num}',sub_str)
return sub_str
string='aaaaaaaabbbbbbbbbcccccccckkkkkkkkkkkppp'
string2='ababcdcd'
string3='abcdabcd'
string4='ababcdabcdefabcdcd'
print(compress(string))
print(compress(string2))
print(compress(string3))
print(compress(string4))
Resut:
a8b9c8k11p3
ab2cd2
abcd2
ab2cdabcdefabcd2
Using generators:
input = "aaaddddffwwqqaattttttteeeeeee"
from itertools import groupby
print(''.join(([char+str(len(list(group))) for char, group in groupby(input)])))
def compress(string):
# taking out unique characters from the string
unique_chars = []
for c in string:
if not c in unique_chars:
unique_chars.append(c)
# Now count the characters
res = ""
for i in range(len(unique_chars)):
count = string.count(unique_chars[i])
res += unique_chars[i]+str(count)
return res
string = 'aabccccd'
compress(string)
from collections import Counter
def char_count(input_str):
my_dict = Counter(input_str)
print(my_dict)
output_str = ""
for i in input_str:
if i not in output_str:
output_str += i
output_str += str(my_dict[i])
return output_str
result = char_count("zddaaaffccc")
print(result)
This is the modification of Patrick Yu's code. It code fails for the below test cases.
SAMPLE INPUT:
c
aaaaaaaaaabcdefgh
EXPECTED OUTPUT:
c1
a10b1c1d1e1f1g1h1
OUPUT OF Patrick's Code:
c
a10bcdefgh
Below is the modified code:
def Compress(S):
Ans = S[0]
count = 1
for i in range(len(S)-1):
if S[i] == S[i+1]:
count += 1
else:
if count >= 1:
Ans += str(count)
Ans += S[i+1]
count = 1
if count>=1:
Ans += str(count)
return Ans
Just the condition must be changed from greater(">") to greater than equal to(">=") when comparing the count with 1.
Related
Python Inserting a string
I need to insert a string (character by character) into another string at every 3rd position For example:- string_1:-wwwaabkccgkll String_2:- toadhp Now I need to insert string2 char by char into string1 at every third position So the output must be wwtaaobkaccdgkhllp Need in Python.. even Java is ok So i tried this Test_str="hiimdumbiknow" challenge="toadh" new_st=challenge [k] Last=list(test_str) K=0 For i in range(Len(test_str)): if(i%3==0): last.insert(i,new_st) K+=1 and the output i get thitimtdutmbtiknow
You can split test_str into sub-strings to length 2, and then iterate merging them with challenge: def concat3(test_str, challenge): chunks = [test_str[i:i+2] for i in range(0,len(test_str),2)] result = [] i = j = 0 while i<len(chunks) or j<len(challenge): if i<len(chunks): result.append(chunks[i]) i += 1 if j<len(challenge): result.append(challenge[j]) j += 1 return ''.join(result) test_str = "hiimdumbiknow" challenge = "toadh" print(concat3(test_str, challenge)) # hitimoduambdikhnow This method works even if the lengths of test_str and challenge are mismatching. (The remaining characters in the longest string will be appended at the end.)
You can split Test_str in to groups of two letters and then re-join with each letter from challenge in between as follows; import itertools print(''.join(f'{two}{letter}' for two, letter in itertools.zip_longest([Test_str[i:i+2] for i in range(0,len(Test_str),2)], challenge, fillvalue=''))) Output: hitimoduambdikhnow *edited to split in to groups of two rather than three as originally posted
you can try this, make an iter above the second string and iterate over the first one and select which character should be part of the final string according the position def add3(s1, s2): def n(): try: k = iter(s2) for i,j in enumerate(s1): yield (j if (i==0 or (i+1)%3) else next(k)) except: try: yield s1[i+1:] except: pass return ''.join(n())
def insertstring(test_str,challenge): result = '' x = [x for x in test_str] y = [y for y in challenge] j = 0 for i in range(len(x)): if i % 2 != 0 or i == 0: result += x[i] else: if j < 5: result += y[j] result += x[i] j += 1 get_last_element = x[-1] return result + get_last_element print(insertstring(test_str,challenge)) #output: hitimoduambdikhnow
Python: how to replace a substring with a number of its occurences?
Let's say I have a string presented in the following fashion: st = 'abbbccccaaaAAbccc' The task is to encode it so that single characters are followed by a number of their occurences: st = 'a1b3c4a3A2b1c3' I know one possible solution but it's too bulky and primitive. s = str(input()) l = len(s)-1 c = 1 t = '' if len(s)==1: t = t +s+str(c) else: for i in range(0,l): if s[i]==s[i+1]: c +=1 elif s[i]!=s[i+1]: t = t + s[i]+str(c) c = 1 for j in range(l,l+1): if s[-1]==s[-2]: t = t +s[j]+str(c) elif s[-1]!=s[-2]: t = t +s[j]+str(c) c = 1 print(t) Is there any way to solve this shortly and elegantly? P.S: I'm an unexperienced Python user and a new StackOverflow member, so I beg my pardon if the question is asked incorrectly.
Take advantage of the standard library: from itertools import groupby st = "abbbccccaaaAAbccc" print("".join("{}{}".format(key, len(list(group))) for key, group in groupby(st))) Output: a1b3c4a3A2b1c3 >>>
just loop through and count. There are more graceful snippets but this will get the job done and is clear: count = 1 char = st[0] new_st = [] for c in st[1:]: if c == char: count += 1 else: new_st.append(char + str(count)) char = c count = 1 new_st.append(char + str(count)) s2= "".join(new_st) print(s2) # 'a1b3c4a3A2b1c3' If you want a fancy recursive solution: def compose(s): if not s: return "" count = 1 for char in s[1:]: if s[0] != char: break count += 1 return s[0] + str(count) + compose(s[count:])
Add a start index to a string index generator
I'm currently learning to create generators and to use itertools. So I decided to make a string index generator, but I'd like to add some parameters such as a "start index" allowing to define where to start generating the indexes. I came up with this ugly solution which can be very long and not efficient with large indexes: import itertools import string class StringIndex(object): ''' Generator that create string indexes in form: A, B, C ... Z, AA, AB, AC ... ZZ, AAA, AAB, etc. Arguments: - startIndex = string; default = ''; start increment for the generator. - mode = 'lower' or 'upper'; default = 'upper'; is the output index in lower or upper case. ''' def __init__(self, startIndex = '', mode = 'upper'): if mode == 'lower': self.letters = string.ascii_lowercase elif mode == 'upper': self.letters = string.ascii_uppercase else: cmds.error ('Wrong output mode, expected "lower" or "upper", ' + 'got {}'.format(mode)) if startIndex != '': if not all(i in self.letters for i in startIndex): cmds.error ('Illegal characters in start index; allowed ' + 'characters are: {}'.format(self.letters)) self.startIndex = startIndex def getIndex(self): ''' Returns: - string; current string index ''' startIndexOk = False x = 1 while True: strIdMaker = itertools.product(self.letters, repeat = x) for stringList in strIdMaker: index = ''.join([s for s in stringList]) # Here is the part to simpify if self.startIndex: if index == self.startIndex: startIndexOk = True if not startIndexOk: continue ### yield index x += 1 Any advice or improvement is welcome. Thank you! EDIT: The start index must be a string!
You would have to do the arithmetic (in base 26) yourself to avoid looping over itertools.product. But you can at least set x=len(self.startIndex) or 1!
Old (incorrect) answer If you would do it without itertools (assuming you start with a single letter), you could do the following: letters = 'abcdefghijklmnopqrstuvwxyz' def getIndex(start, case): lets = list(letters.lower()) if case == 'lower' else list(letters.upper()) # default is 'upper', but can also be an elif for r in xrange(0,10): for l in lets[start:]: if l.lower() == 'z': start = 0 yield ''.join(lets[:r])+l I run until max 10 rows of letters are created, but you could ofcourse use an infinite while loop such that it can be called forever. Correct answer I found the solution in a different way: I used a base 26 number translator (based on (and fixxed since it didn't work perfectly): http://quora.com/How-do-I-write-a-program-in-Python-that-can-convert-an-integer-from-one-base-to-another) I uses itertools.count() to count and just loops over all the possibilities. The code: import time from itertools import count def toAlph(x, letters): div = 26 r = '' if x > 0 else letters[0] while x > 0: r = letters[x % div] + r if (x // div == 1) and (x % div == 0): r = letters[0] + r break else: x //= div return r def getIndex(start, case='upper'): alphabet = 'abcdefghijklmnopqrstuvwxyz' letters = alphabet.upper() if case == 'upper' else alphabet started = False for num in count(0,1): l = toAlph(num, letters) if l == start: started = True if started: yield l iterator = getIndex('AA') for i in iterator: print(i) time.sleep(0.1)
Longest substring without repeating characters in python
This is a pretty standard interview question. Find the longest substring without repeating characters. Here are the test cases, abcabcbb -> 3 bbbbb -> 1 pwwkew -> 3 bpfbhmipx -> 7 tmmzuxt -> 5 Here's my code which uses a pretty simple approach with 2 pointers. def lengthOfLongestSubstring(s): checklist = {} starting_index_of_current_substring = 0 length_of_longest_substring = 0 for i, v in enumerate(s): if v in checklist: starting_index_of_current_substring = checklist[v] + 1 else: length_of_current_substring = i - starting_index_of_current_substring + 1 length_of_longest_substring = max(length_of_current_substring, length_of_longest_substring) checklist[v] = i return length_of_longest_substring My code passes all the test cases except the last one (actual 4, expected 5). Can someone help me modify the code to take care of the last test case. I don't wish to reinvent the algorithm.
Here is a simple tweak in your code with 2 pointers to find the longest sub-string without repeating characters. Change in your code is instead of calculating the length of longest substring when v is not present in checklist, I am calculating length of longest substring for all cases. def lengthOfLongestSubstring(s): checklist = {} starting_index_of_current_substring = 0 length_of_longest_substring = 0 for i, v in enumerate(s): if v in checklist: starting_index_of_current_substring = max(starting_index_of_current_substring, checklist[v] + 1) checklist[v] = i length_of_longest_substring = max(length_of_longest_substring, i - starting_index_of_current_substring + 1) return length_of_longest_substring ## Main result = {} for string in ['abcabcbb', 'bbbbb', 'ppwwkew', 'wcabcdeghi', 'bpfbhmipx', 'tmmzuxt', 'AABGAKGIMN', 'stackoverflow']: result[string] = lengthOfLongestSubstring(string) print(result) Sample run: {'abcabcbb': 3, 'bbbbb': 1, 'ppwwkew': 3, 'wcabcdeghi': 8, 'bpfbhmipx': 7, 'tmmzuxt': 5, 'AABGAKGIMN': 6, 'stackoverflow': 11}
This post is pretty old, but I think my solution fixes the bug in the original code. def lengthOfLongestSubstring(s): checklist = {} starting_index_of_current_substring = 0 length_of_longest_substring = 0 for i, v in enumerate(s): if v in checklist: if checklist[v] >= starting_index_of_current_substring: starting_index_of_current_substring = checklist[v] + 1 length_of_current_substring = i - starting_index_of_current_substring + 1 length_of_longest_substring = max(length_of_current_substring, length_of_longest_substring) checklist[v] = i return length_of_longest_substring
This doesnt really iterate upon your solution, but it's a bit simpler approach, just to give you an idea how it could be also solved. def longest_substr(s): longest = 0 for start_index in range(len(s)): contains = set() for letter in s[start_index:]: if letter in contains: break contains.add(letter) longest = max(longest, len(contains)) return longest
0 I would prefer this solution>> Time and Space Management Optimised: def lengthOfLongestSubstring(self, s: str) -> int: curlen = maxlen = 0 # curlen saves the max len of substrings ending with current num for i, num in enumerate(s): curlen -= s[i-curlen:i].find(num) maxlen = max(maxlen, curlen) return maxlen
Find Longest Substring in the string without repeating characters. def find_non_repeating_substring(input_str): output_length = 0 longest_sub_str = '' len_str = len(input_str) index = 0 while len_str != 1: l_str = '' for i in range(index, len(input_str)): if input_str[i] not in l_str: l_str = l_str + input_str[i] else: break sub_str_length = len(l_str) if sub_str_length > output_length: output_length = sub_str_length longest_sub_str = l_str len_str = len_str -1 index = index + 1 return output_length,longest_sub_str if __name__ == '__main__': input_str = raw_input("Please enter the string: ") sub_str_length, sub_str = find_non_repeating_substring(input_str) print ('Longest Substing lenght is "{0}" and the sub string is "{1}"'.format(sub_str_length, sub_str))```
Finding longest substring in alphabetical order
EDIT: I am aware that a question with similar task was already asked in SO but I'm interested to find out the problem in this specific piece of code. I am also aware that this problem can be solved without using recursion. The task is to write a program which will find (and print) the longest sub-string in which the letters occur in alphabetical order. If more than 1 equally long sequences were found, then the first one should be printed. For example, the output for a string abczabcd will be abcz. I have solved this problem with recursion which seemed to pass my manual tests. However when I run an automated tests set which generate random strings, I have noticed that in some cases, the output is incorrect. For example: if s = 'hixwluvyhzzzdgd', the output is hix instead of luvy if s = 'eseoojlsuai', the output is eoo instead of jlsu if s = 'drurotsxjehlwfwgygygxz', the output is dru instead of ehlw After some time struggling, I couldn't figure out what is so special about these strings that causes the bug. This is my code: pos = 0 maxLen = 0 startPos = 0 endPos = 0 def last_pos(pos): if pos < (len(s) - 1): if s[pos + 1] >= s[pos]: pos += 1 if pos == len(s)-1: return len(s) else: return last_pos(pos) return pos for i in range(len(s)): if last_pos(i+1) != None: diff = last_pos(i) - i if diff - 1 > maxLen: maxLen = diff startPos = i endPos = startPos + diff print s[startPos:endPos+1]
There are many things to improve in your code but making minimum changes so as to make it work. The problem is you should have if last_pos(i) != None: in your for loop (i instead of i+1) and you should compare diff (not diff - 1) against maxLen. Please read other answers to learn how to do it better. for i in range(len(s)): if last_pos(i) != None: diff = last_pos(i) - i + 1 if diff > maxLen: maxLen = diff startPos = i endPos = startPos + diff - 1
Here. This does what you want. One pass, no need for recursion. def find_longest_substring_in_alphabetical_order(s): groups = [] cur_longest = '' prev_char = '' for c in s.lower(): if prev_char and c < prev_char: groups.append(cur_longest) cur_longest = c else: cur_longest += c prev_char = c return max(groups, key=len) if groups else s Using it: >>> find_longest_substring_in_alphabetical_order('hixwluvyhzzzdgd') 'luvy' >>> find_longest_substring_in_alphabetical_order('eseoojlsuai') 'jlsu' >>> find_longest_substring_in_alphabetical_order('drurotsxjehlwfwgygygxz') 'ehlw' Note: It will probably break on strange characters, has only been tested with the inputs you suggested. Since this is a "homework" question, I will leave you with the solution as is, though there is still some optimization to be done, I wanted to leave it a little bit understandable.
You can use nested for loops, slicing and sorted. If the string is not all lower-case then you can convert the sub-strings to lower-case before comparing using str.lower: def solve(strs): maxx = '' for i in xrange(len(strs)): for j in xrange(i+1, len(strs)): s = strs[i:j+1] if ''.join(sorted(s)) == s: maxx = max(maxx, s, key=len) else: break return maxx Output: >>> solve('hixwluvyhzzzdgd') 'luvy' >>> solve('eseoojlsuai') 'jlsu' >>> solve('drurotsxjehlwfwgygygxz') 'ehlw'
Python has a powerful builtin package itertools and a wonderful function within groupby An intuitive use of the Key function can give immense mileage. In this particular case, you just have to keep a track of order change and group the sequence accordingly. The only exception is the boundary case which you have to handle separately Code def find_long_cons_sub(s): class Key(object): ''' The Key function returns 1: For Increasing Sequence 0: For Decreasing Sequence ''' def __init__(self): self.last_char = None def __call__(self, char): resp = True if self.last_char: resp = self.last_char < char self.last_char = char return resp def find_substring(groups): ''' The Boundary Case is when an increasing sequence starts just after the Decresing Sequence. This causes the first character to be in the previous group. If you do not want to handle the Boundary Case seperately, you have to mak the Key function a bit complicated to flag the start of increasing sequence''' yield next(groups) try: while True: yield next(groups)[-1:] + next(groups) except StopIteration: pass groups = (list(g) for k, g in groupby(s, key = Key()) if k) #Just determine the maximum sequence based on length return ''.join(max(find_substring(groups), key = len)) Result >>> find_long_cons_sub('drurotsxjehlwfwgygygxz') 'ehlw' >>> find_long_cons_sub('eseoojlsuai') 'jlsu' >>> find_long_cons_sub('hixwluvyhzzzdgd') 'luvy'
Simple and easy. Code : s = 'hixwluvyhzzzdgd' r,p,t = '','','' for c in s: if p <= c: t += c p = c else: if len(t) > len(r): r = t t,p = c,c if len(t) > len(r): r = t print 'Longest substring in alphabetical order is: ' + r Output : Longest substring in alphabetical order which appeared first: luvy
Here is a single pass solution with a fast loop. It reads each character only once. Inside the loop operations are limited to 1 string comparison (1 char x 1 char) 1 integer increment 2 integer subtractions 1 integer comparison 1 to 3 integer assignments 1 string assignment No containers are used. No function calls are made. The empty string is handled without special-case code. All character codes, including chr(0), are properly handled. If there is a tie for the longest alphabetical substring, the function returns the first winning substring it encountered. Case is ignored for purposes of alphabetization, but case is preserved in the output substring. def longest_alphabetical_substring(string): start, end = 0, 0 # range of current alphabetical string START, END = 0, 0 # range of longest alphabetical string yet found prev = chr(0) # previous character for char in string.lower(): # scan string ignoring case if char < prev: # is character out of alphabetical order? start = end # if so, start a new substring end += 1 # either way, increment substring length if end - start > END - START: # found new longest? START, END = start, end # if so, update longest prev = char # remember previous character return string[START : END] # return longest alphabetical substring Result >>> longest_alphabetical_substring('drurotsxjehlwfwgygygxz') 'ehlw' >>> longest_alphabetical_substring('eseoojlsuai') 'jlsu' >>> longest_alphabetical_substring('hixwluvyhzzzdgd') 'luvy' >>>
a lot more looping, but it gets the job done s = raw_input("Enter string") fin="" s_pos =0 while s_pos < len(s): n=1 lng=" " for c in s[s_pos:]: if c >= lng[n-1]: lng+=c n+=1 else : break if len(lng) > len(fin): fin= lng`enter code here` s_pos+=1 print "Longest string: " + fin
def find_longest_order(): `enter code here`arr = [] `enter code here`now_long = '' prev_char = '' for char in s.lower(): if prev_char and char < prev_char: arr.append(now_long) now_long = char else: now_long += char prev_char = char if len(now_long) == len(s): return now_long else: return max(arr, key=len) def main(): print 'Longest substring in alphabetical order is: ' + find_longest_order() main()
Simple and easy to understand: s = "abcbcd" #The original string l = len(s) #The length of the original string maxlenstr = s[0] #maximum length sub-string, taking the first letter of original string as value. curlenstr = s[0] #current length sub-string, taking the first letter of original string as value. for i in range(1,l): #in range, the l is not counted. if s[i] >= s[i-1]: #If current letter is greater or equal to previous letter, curlenstr += s[i] #add the current letter to current length sub-string else: curlenstr = s[i] #otherwise, take the current letter as current length sub-string if len(curlenstr) > len(maxlenstr): #if current cub-string's length is greater than max one, maxlenstr = curlenstr; #take current one as max one. print("Longest substring in alphabetical order is:", maxlenstr)
s = input("insert some string: ") start = 0 end = 0 temp = "" while end+1 <len(s): while end+1 <len(s) and s[end+1] >= s[end]: end += 1 if len(s[start:end+1]) > len(temp): temp = s[start:end+1] end +=1 start = end print("longest ordered part is: "+temp)
I suppose this is problem set question for CS6.00.1x on EDX. Here is what I came up with. s = raw_input("Enter the string: ") longest_sub = "" last_longest = "" for i in range(len(s)): if len(last_longest) > 0: if last_longest[-1] <= s[i]: last_longest += s[i] else: last_longest = s[i] else: last_longest = s[i] if len(last_longest) > len(longest_sub): longest_sub = last_longest print(longest_sub)
I came up with this solution def longest_sorted_string(s): max_string = '' for i in range(len(s)): for j in range(i+1, len(s)+1): string = s[i:j] arr = list(string) if sorted(string) == arr and len(max_string) < len(string): max_string = string return max_string
Assuming this is from Edx course: till this question, we haven't taught anything about strings and their advanced operations in python So, I would simply go through the looping and conditional statements string ="" #taking a plain string to represent the then generated string present ="" #the present/current longest string for i in range(len(s)): #not len(s)-1 because that totally skips last value j = i+1 if j>= len(s): j=i #using s[i+1] simply throws an error of not having index if s[i] <= s[j]: #comparing the now and next value string += s[i] #concatinating string if above condition is satisied elif len(string) != 0 and s[i] > s[j]: #don't want to lose the last value string += s[i] #now since s[i] > s[j] #last one will be printed if len(string) > len(present): #1 > 0 so from there we get to store many values present = string #swapping to largest string string = "" if len(string) > len(present): #to swap from if statement present = string if present == s[len(s)-1]: #if no alphabet is in order then first one is to be the output present = s[0] print('Longest substring in alphabetical order is:' + present)
I agree with #Abhijit about the power of itertools.groupby() but I took a simpler approach to (ab)using it and avoided the boundary case problems: from itertools import groupby LENGTH, LETTERS = 0, 1 def longest_sorted(string): longest_length, longest_letters = 0, [] key, previous_letter = 0, chr(0) def keyfunc(letter): nonlocal key, previous_letter if letter < previous_letter: key += 1 previous_letter = letter return key for _, group in groupby(string, keyfunc): letters = list(group) length = len(letters) if length > longest_length: longest_length, longest_letters = length, letters return ''.join(longest_letters) print(longest_sorted('hixwluvyhzzzdgd')) print(longest_sorted('eseoojlsuai')) print(longest_sorted('drurotsxjehlwfwgygygxz')) print(longest_sorted('abcdefghijklmnopqrstuvwxyz')) OUTPUT > python3 test.py luvy jlsu ehlw abcdefghijklmnopqrstuvwxyz >
s = 'azcbobobegghakl' i=1 subs=s[0] subs2=s[0] while(i<len(s)): j=i while(j<len(s)): if(s[j]>=s[j-1]): subs+=s[j] j+=1 else: subs=subs.replace(subs[:len(subs)],s[i]) break if(len(subs)>len(subs2)): subs2=subs2.replace(subs2[:len(subs2)], subs[:len(subs)]) subs=subs.replace(subs[:len(subs)],s[i]) i+=1 print("Longest substring in alphabetical order is:",subs2)
s = 'gkuencgybsbezzilbfg' x = s.lower() y = '' z = [] #creating an empty listing which will get filled for i in range(0,len(x)): if i == len(x)-1: y = y + str(x[i]) z.append(y) break a = x[i] <= x[i+1] if a == True: y = y + str(x[i]) else: y = y + str(x[i]) z.append(y) # fill the list y = '' # search of 1st longest string L = len(max(z,key=len)) # key=len takes length in consideration for i in range(0,len(z)): a = len(z[i]) if a == L: print 'Longest substring in alphabetical order is:' + str(z[i]) break
first_seq=s[0] break_seq=s[0] current = s[0] for i in range(0,len(s)-1): if s[i]<=s[i+1]: first_seq = first_seq + s[i+1] if len(first_seq) > len(current): current = first_seq else: first_seq = s[i+1] break_seq = first_seq print("Longest substring in alphabetical order is: ", current)