Longest Prefix That is Also a Substring in Second String - python

This code (adapted from a Prefix-Suffix code) is quite slow for larger corpora:
s1 = 'gafdggeg'
s2 = 'adagafrd'
Output: gaf
def pref_also_substr(s):
n = len(s)
for res in range(n, 0, -1):
prefix = s[0: res]
if (prefix in s1):
return res
# if no prefix and string2 match occurs
return 0
Any option for an efficient alternative?

I have another approach to solve this question. First you can find all substrings of s2 and replace the key in dictionary d with highest size.
s2 = "'adagafrd'"
# Get all substrings of string
# Using list comprehension + string slicing
substrings = [test_str[i: j] for i in range(len(test_str))
for j in range(i + 1, len(test_str) + 1)]
Now you can use startswith() function to check longest prefix from this list of substring and compare the size of substring.
s1 = 'gafdggeg'
d={}
for substring in substrings:
if s1.startswith(substring):
if not d:
d[substring]=len(substring)
else:
if len(substring)>list(d.values())[0]:
d={}
d[substring]=len(substring)
print(d)
Output:
{'gaf': 3}

def f(s1, s2):
for i in range(len(s1)):
i += 1
p = s1[:i]
if p in s2:
s2 = s2[s2.index(p):]
else:
return i - 1
Check the prefixes starting from length 1.
If find a prefix, discard the chars behind the prefix founded and continue searching.

Related

I need combine indexs of a tuple how can i do this

I was trying a bigger letter to the index of word i did it but it is not in the same of index
k = 'kars'
k = k[0].upper(),k[1],k[2],k[3],k[4]
it is distributing the letters how can i combine them
Python has a title() method:
k = 'kars'
print(k.title())
# OR
print(k[0].upper() + k[1:])
# OR
def uppercaseAtIndex(k, i):
newK = list(k)
newK[i] = newK[i].upper()
return ''.join(newK)
print(uppercaseAtIndex(k, 1))
Out:
Kars
Kars
kArs
Use a generator expression and enumerate to capitalize a specific index. Join the letters with str.join().
index = 1
''.join(letter.upper() if i==index else letter for i,letter in enumerate('thestring'))

Python Inserting a string

I need to insert a string (character by character) into another string at every 3rd position
For example:- string_1:-wwwaabkccgkll
String_2:- toadhp
Now I need to insert string2 char by char into string1 at every third position
So the output must be wwtaaobkaccdgkhllp
Need in Python.. even Java is ok
So i tried this
Test_str="hiimdumbiknow"
challenge="toadh"
new_st=challenge [k]
Last=list(test_str)
K=0
For i in range(Len(test_str)):
if(i%3==0):
last.insert(i,new_st)
K+=1
and the output i get
thitimtdutmbtiknow
You can split test_str into sub-strings to length 2, and then iterate merging them with challenge:
def concat3(test_str, challenge):
chunks = [test_str[i:i+2] for i in range(0,len(test_str),2)]
result = []
i = j = 0
while i<len(chunks) or j<len(challenge):
if i<len(chunks):
result.append(chunks[i])
i += 1
if j<len(challenge):
result.append(challenge[j])
j += 1
return ''.join(result)
test_str = "hiimdumbiknow"
challenge = "toadh"
print(concat3(test_str, challenge))
# hitimoduambdikhnow
This method works even if the lengths of test_str and challenge are mismatching. (The remaining characters in the longest string will be appended at the end.)
You can split Test_str in to groups of two letters and then re-join with each letter from challenge in between as follows;
import itertools
print(''.join(f'{two}{letter}' for two, letter in itertools.zip_longest([Test_str[i:i+2] for i in range(0,len(Test_str),2)], challenge, fillvalue='')))
Output:
hitimoduambdikhnow
*edited to split in to groups of two rather than three as originally posted
you can try this, make an iter above the second string and iterate over the first one and select which character should be part of the final string according the position
def add3(s1, s2):
def n():
try:
k = iter(s2)
for i,j in enumerate(s1):
yield (j if (i==0 or (i+1)%3) else next(k))
except:
try:
yield s1[i+1:]
except:
pass
return ''.join(n())
def insertstring(test_str,challenge):
result = ''
x = [x for x in test_str]
y = [y for y in challenge]
j = 0
for i in range(len(x)):
if i % 2 != 0 or i == 0:
result += x[i]
else:
if j < 5:
result += y[j]
result += x[i]
j += 1
get_last_element = x[-1]
return result + get_last_element
print(insertstring(test_str,challenge))
#output: hitimoduambdikhnow

How to find longest common substring of words in a list?

I have a list of words:
list1 = ['technology','technician','technical','technicality']
I want to check which phrase is repeated in each of the word. In this case, it is 'tech'.
I have tried converting all the characters to ascii values, but I am stuck there as I am unable to think of any logic.
Can somebody please help me with this?
This is generally called the Longest common substring/subsequence problem.
A very basic (but slow) strategy:
longest_substring = ""
curr_substring = ""
# Loop over a particular word (ideally, shortest).
for start_idx in range(shortest_word):
# Select a substring from that word.
for length in range(1, len(shortest_word) - start_idx):
curr_substring = shortest_word[start_idx : start_idx + length]
# Check if substring is present in all words,
# and exit loop or update depending on outcome.
if "curr_substring not in all words":
break
if "new string is longer":
longest_substring = curr_substring
Iterate over first word, increase length of prefix if there is only one prefix in all words checked by set, when difference in prefix is found return last result
list1 = ['technology', 'technician', 'technical', 'technicality']
def common_prefix(li):
s = set()
word = li[0]
while(len(s) < 2):
old_s = s
for i in range(1, len(word)):
s.add(word[:i])
return old_s.pop()
print(common_prefix(list1))
output: techn
Find the shortest word. Iterate over increasingly small chunks of the first word, starting with a chunk equal in length to the shortest word, checking that each is contained in all of the other strings. If it is, return that substring.
list1 = ['technology', 'technician', 'technical', 'technicality']
def shortest_common_substring(lst):
shortest_len = min(map(len, lst))
shortest_word = next((w for w in lst if len(w) == shortest_len), None)
for i in range(shortest_len, 1, -1):
for j in range(0, shortest_len - i):
substr = lst[0][j:i]
if all(substr in w for w in lst[1:]):
return substr
And just for fun, let's replace that loop with a generator expression, and just take the first thing it gives us (or None).
def shortest_common_substring(lst):
shortest_len = min(map(len, lst))
shortest_word = next((w for w in lst if len(w) == shortest_len), 0)
return next((lst[0][j:i] for i in range(shortest_len, 1, -1)
for j in range(0, shortest_len - i)
if all(lst[0][j:i] in w for w in lst[1:])),
None)

How to find the longest repeating sequence using python

I went through an interview, where they asked me to print the longest repeated character sequence.
I got stuck is there any way to get it?
But my code prints only the count of characters present in a string is there any approach to get the expected output
import pandas as pd
import collections
a = 'abcxyzaaaabbbbbbb'
lst = collections.Counter(a)
df = pd.Series(lst)
df
Expected output :
bbbbbbb
How to add logic to in above code?
A regex solution:
max(re.split(r'((.)\2*)', a), key=len)
Or without library help (but less efficient):
s = ''
max((s := s * (c in s) + c for c in a), key=len)
Both compute the string 'bbbbbbb'.
Without any modules, you could use a comprehension to go backward through possible sizes and get the first character multiplication that is present in the string:
next(c*s for s in range(len(a),0,-1) for c in a if c*s in a)
That's quite bad in terms of efficiency though
another approach would be to detect the positions of letter changes and take the longest subrange from those
chg = [i for i,(x,y) in enumerate(zip(a,a[1:]),1) if x!=y]
s,e = max(zip([0]+chg,chg+[len(a)]),key=lambda se:se[1]-se[0])
longest = a[s:e]
Of course a basic for-loop solution will also work:
si,sc = 0,"" # current streak (start, character)
ls,le = 0,0 # longest streak (start, end)
for i,c in enumerate(a+" "): # extra space to force out last char.
if i-si > le-ls: ls,le = si,i # new longest
if sc != c: si,sc = i,c # new streak
longest = a[ls:le]
print(longest) # bbbbbbb
A more long winded solution, picked wholesale from:
maximum-consecutive-repeating-character-string
def maxRepeating(str):
len_s = len(str)
count = 0
# Find the maximum repeating
# character starting from str[i]
res = str[0]
for i in range(len_s):
cur_count = 1
for j in range(i + 1, len_s):
if (str[i] != str[j]):
break
cur_count += 1
# Update result if required
if cur_count > count :
count = cur_count
res = str[i]
return res, count
# Driver code
if __name__ == "__main__":
str = "abcxyzaaaabbbbbbb"
print(maxRepeating(str))
Solution:
('b', 7)

Removing all instances of the second string from the first

The question states: Write code that takes two strings from the user, and returns what is left over if all instances of the second string is removed from the first. The second string is guaranteed to be no longer than two characters.
I started off with the following:
def remove(l1,l2):
string1 = l1
string2 = l2
result = ""
ctr = 0
while ctr < len(l1):
Since it cannot be longer than 2 characters I think I have to put in an if function as such:
if len(sub) == 2:
if (ctr + 1) < len(string) and string[ctr] == sub[0]
You could just use the replace method to remove all occurrences of the the second string from the first:
def remove(s1, s2):
return s1.replace(s2, "")
print remove("hello this is a test", "l")
For a manual method, you can use:
def remove(s1, s2):
newString = []
if len(s2) > 2:
return "The second argument cannot exceed two characters"
for c in s1:
if c not in s2:
newString.append(c)
return "".join(newString)
print remove("hello this is a test", "l")
Yields: heo this is a test
The code looks like this:
def remove(l1,l2):
string1 = l1
string2 = l2
ctr = 0
result = ""
while ctr < len(string1):
if string1[ctr : ctr + len(string2)] == string2:
ctr += len(string2)
else:
result += string1[ctr]
ctr += 1
return result
I got it resolved; just took me a little bit of time.
You could use list comprehension:
st1 = "Hello how are you"
st2 = "This is a test"
st3 = [i for i in st1 if i not in st2]
print ''.join(st3)
Using solely the slice method:
def remove_all(substr,theStr):
num=theStr.count(substr)
for i in range(len(theStr)):
finalStr=""
if theStr.find(substr)<0:
return theStr
elif theStr[i:i+len(substr)]==substr:
return theStr[0:i]+ theStr[i+len(substr*num):len(theStr)]
s1= input()
s2= input()
#get length of each string
l_s1,l_s2= len(s1),len(s2)
#list to store the answer
ans= list()
i=0
#check if more characters are left
#in s1 to b compared
#and length of substring of s1 remaining to
#be compared must be greater than or equal
#to the length of s2
while i<l_s1 and l_s1-i>=l_s2:
j=0
#compare the substring from s1 with s2
while j<l_s2 and s1[i+j]==s2[j]:
j+=1
#if string matches
#discard that substring of s1
#from solution
#and update the pointer i
#accordingly
if j==l_s2:
i+=j
#otherwise append the ith character to
#ans list
else:
ans.append(s1[i])
i+=1
#append if any characters remaining
while i<l_s1:
ans.append(s1[i])
i+=1
print(''.join(ans))
'''
Sample Testcase
1.
kapil
kd
kapil
2.
devansh
dev
ansh
3.
adarsh
ad
arsh
'''

Categories

Resources