Here are my merge and mergeSort functions. merge merges two separate arrays and mergesSort sorts and merges them with recursion:
def merge(arrL, arrR):
arrNew = []
d = len(arrL) + len(arrR)
i = 0
j = 0
for k in range (0, d-1):
if (arrL[i] < arrR[j]) :
arrNew.append(arrL[i]) # appends to the end of the array
i = i + 1
else:
arrNew.append(arrR[j])
j = j + 1
return arrNew
def mergeSort(arr, m, n):
if (n - m == 1):
return arr[m]
else:
p = (m + n) // 2
arrL = mergeSort(arr, m, p)
arrR = mergeSort(arr, p, n)
arrNew = merge(arrL, arrR)
return arrNew
I am getting an error from lines 32, 33 and 13:
d = len(arrL) + len(arrR)
TypeError: object of type 'int' has no len()
What is causing this error? merge is taking two arrays as inputs.
What is causing this error? merge is taking two arrays as inputs.
Except when it doesn't.
if(n-m == 1):
return arr[m]
This output of mergeSort is not an array.
My guess is it's this line
if(n-m == 1):
return arr[m]
which presumably is returning the content arr[m] of the array and not an array itself.
Since your code sorts arrays, when this naked element gets recursed on, it will generate the error you're seeing.
There are multiple problems in the code:
in mergeSort, you should return arr instead of arr[m] when the length of the array is less than 2. The test if (n - m == 1) does not allow for empty arrays:
if (n - m < 2):
return arr
in merge, the main loop should run d times, ie: instead of for k in range (0, d-1): you should write:
for k in range (d):
the test in the merge loop should also check if the index value in still in range. If the second slice is exhausted, the element arrL[i] should be selected:
for k in range (d):
if i < len(arrL) and (j >= len(arrR) or arrL[i] < arrR[j]):
arrNew.append(arrL[i])
i = i + 1
else:
arrNew.append(arrR[j])
j = j + 1
Here is a modified version:
def merge(arrL, arrR):
arrNew = []
i = 0
j = 0
for k in range(len(arrL) + len(arrR)):
if i < len(arrL) and (j >= len(arrR) or arrL[i] < arrR[j]):
arrNew.append(arrL[i])
i = i + 1
else:
arrNew.append(arrR[j])
j = j + 1
return arrNew
def mergeSort(arr, m, n):
if (n - m < 2):
return arr
else:
p = (m + n) // 2
arrL = mergeSort(arr, m, p)
arrR = mergeSort(arr, p, n)
return merge(arrL, arrR)
So i started with the code from an answer to this question Function to find all common substrings in two strings not giving correct output and modified it a little bit to accommodate case-independence (i.e. AbCd is the same as ABCD as Abcd and so on) by turning the string to lowercase. However, for strings like 'ABCDXGHIJ' and 'ghijYAbCd', it only returns ['ghij'], not the desired output ['ABCD', 'GHIJ'].
Here are other examples:
'Bonywasawarrior' and 'Bonywasxwarrior' (output: ['Bonywas', 'warrior', 'wa'], desired output: ['Bonywas', 'warrior'])
'01101001' and '101010' (output: ['1010', '0', '1010', '01', '10', '01'], desired output: ['1010'])
here is my code:
t = int(input()) #t cases
while t > 0:
A = str(input()) #1st string
B = str(input()) #2nd string
low_A = A.lower()
low_B = B.lower()
answer = ""
anslist=[]
for i in range(len(A)):
common = ""
for j in range(len(B)):
if (i + j < len(A) and low_A[i + j] == low_B[j]):
common += B[j]
else:
#if (len(common) > len(answer)):
answer = common
if answer != '' and len(answer) > 1:
anslist.append(answer)
common = ""
if common != '':
anslist.append(common)
if len(anslist) == 0:
print('[]') #print if no common substring
else:
print(anslist)
t -= 1
You can increment an offset in a while loop to keep concatenating common characters at the offset from the respective indices until they become different instead. To find the longest, non-overlapping common substrings, you can use a function that recursively traverses different paths of substring partitioning, and returns the one with the longest lengths of substrings:
def common_strings(a, b, i=0, j=0):
candidates = []
len_a = len(a)
len_b = len(b)
if j == len_b:
candidates.append(common_strings(a, b, i + 1, 0))
elif i < len_a:
offset = 0
while i + offset < len_a and j + offset < len_b and a[i + offset].lower() == b[j + offset].lower():
offset += 1
if offset > 1:
candidates.append([a[i: i + offset]] + common_strings(a, b, i + offset, j + offset))
candidates.append(common_strings(a, b, i, j + 1))
return candidates and max(candidates, key=lambda t: sorted(map(len, t), reverse=True))
so that:
print(common_strings('ABCDXGHIJ', 'ghijYAbCd'))
print(common_strings('Bonywasawarrior', 'Bonywasxwarrior'))
print(common_strings('01101001', '101010'))
outputs:
['ABCD', 'GHIJ']
['Bonywas', 'warrior']
['1010']
This is a duplicate of Finding all the common substrings of given two strings, which offers a solution in Java and for which I have done my best to translate to Python with the "enhancement" of making it case-insensitive:
def find_common(s, t):
table = [len(t)*[0] for i in range(len(s))]
longest = 0
result = set()
for i, ch1 in enumerate(s.lower()):
for j, ch2 in enumerate(t.lower()):
if ch1 != ch2:
continue
table[i][j] = 1 if i == 0 or j == 0 else 1 + table[i - 1][j - 1]
if table[i][j] > longest:
longest = table[i][j]
result.clear()
if table[i][j] == longest:
result.add(s[i - longest + 1:i + 1]);
return result
print(find_common('Bonywasawarrior', 'Bonywasxwarrior'))
print(find_common('01101001', '101010'))
print(find_common('ABCDXGHIJ', 'ghijYAbCd'))
Prints:
{'Bonywas', 'warrior'}
{'1010'}
{'GHIJ', 'ABCD'}
I have two sorted lists of numbers A and B with B being at least as long as A. Say:
A = [1.1, 2.3, 5.6, 5.7, 10.1]
B = [0, 1.9, 2.4, 2.7, 8.4, 9.1, 10.7, 11.8]
I want to associate each number in A with a different number in B but preserving order. For any such mapping we define the total distance to be the sum of the squared distances between mapped numbers.
For example:
If we map 1.1 to 0 0 then 2.3 can be mapped to any number from 1.9 onwards. But if we had mapped 1.1 to 2.7, then 2.3 could only be mapped to a number in B from 8.4 onwards.
Say we map 1.1->0, 2.3->1.9, 5.6->8.4, 5.7->9.1, 10.1->10.7. This is a valid mapping and has distance (1.1^2+0.4^2+2.8^2+3.4^2+0.6^2).
Another example to show a greedy approach will not work:
A = [1, 2]
B = [0, 1, 10000]
If we map 1->1 then we have to map 2->10000 which is bad.
The task is to find the valid mapping with minimal total distance.
Is hard to do? I am interested in a method that is fast when the lists are of length a few thousand.
And here is a O(n) solution! (This is the original attempt, see below for a fixed version.)
The idea is as follows. We first solve the problem for every other element, turn that into a very close solution, then use dynamic programming to find the real solution. This is solving a problem that is half the size first, followed by O(n) work. Using the fact that x + x/2 + x/4 + ... = 2x this turns out to be O(n) work.
This very, very much requires sorted lists. And doing a band that is 5 across is overkill, it very much looks like a band that is 3 across always gives the right answer, but I wasn't confident enough to go with that.
def improve_matching (list1, list2, matching):
# We do DP forward, trying a band that is 5 across, building up our
# answer as a linked list. If our answer changed by no more than 1
# anywhere, we are done. Else we recursively improve again.
best_j_last = -1
last = {-1: (0.0, None)}
for i in range(len(list1)):
best_j = None
best_cost = None
this = {}
for delta in (-2, 2, -1, 1, 0):
j = matching[i] + delta
# Bounds sanity checks.
if j < 0:
continue
elif len(list2) <= j:
continue
j_prev = best_j_last
if j <= j_prev:
if j-1 in last:
j_prev = j-1
else:
# Can't push back this far.
continue
cost = last[j_prev][0] + (list1[i] - list2[j])**2
this[j] = (cost, [j, last[j_prev][1]])
if (best_j is None) or cost <= best_cost:
best_j = j
best_cost = cost
best_j_last = best_j
last = this
(final_cost, linked_list) = last[best_j_last]
matching_rev = []
while linked_list is not None:
matching_rev.append( linked_list[0])
linked_list = linked_list[1]
matching_new = [x for x in reversed(matching_rev)]
for i in range(len(matching_new)):
if 1 < abs(matching[i] - matching_new[i]):
print "Improving further" # Does this ever happen?
return improve_matching(list1, list2, matching_new)
return matching_new
def match_lists (list1, list2):
if 0 == len(list1):
return []
elif 1 == len(list1):
best_j = 0
best_cost = (list1[0] - list2[0])**2
for j in range(1, len(list2)):
cost = (list1[0] - list2[j])**2
if cost < best_cost:
best_cost = cost
best_j = j
return [best_j]
elif 1 < len(list1):
# Solve a smaller problem first.
list1_smaller = [list1[2*i] for i in range((len(list1)+1)//2)]
list2_smaller = [list2[2*i] for i in range((len(list2)+1)//2)]
matching_smaller = match_lists(list1_smaller, list2_smaller)
# Start with that matching.
matching = [None] * len(list1)
for i in range(len(matching_smaller)):
matching[2*i] = 2*matching_smaller[i]
# Fill in the holes between
for i in range(len(matching) - 1):
if matching[i] is None:
best_j = matching[i-1] + 1
best_cost = (list1[i] - list2[best_j])**2
for j in range(best_j+1, matching[i+1]):
cost = (list1[i] - list2[j])**2
if cost < best_cost:
best_cost = cost
best_j = j
matching[i] = best_j
# And fill in the last one if needed
if matching[-1] is None:
if matching[-2] + 1 == len(list2):
# This will be an invalid matching, but improve will fix that.
matching[-1] = matching[-2]
else:
best_j = matching[-2] + 1
best_cost = (list1[-2] - list2[best_j])**2
for j in range(best_j+1, len(list2)):
cost = (list1[-1] - list2[j])**2
if cost < best_cost:
best_cost = cost
best_j = j
matching[-1] = best_j
# And now improve.
return improve_matching(list1, list2, matching)
def best_matching (list1, list2):
matching = match_lists(list1, list2)
cost = 0.0
result = []
for i in range(len(matching)):
pair = (list1[i], list2[matching[i]])
result.append(pair)
cost = cost + (pair[0] - pair[1])**2
return (cost, result)
UPDATE
There is a bug in the above. It can be demonstrated with match_lists([1, 3], [0, 0, 0, 0, 0, 1, 3]). However the solution below is also O(n) and I believe has no bugs. The difference is that instead of looking for a band of fixed width, I look for a band of width dynamically determined by the previous matching. Since no more than 5 entries can look to match at any given spot, it again winds up O(n) for this array and a geometrically decreasing recursive call. But long stretches of the same value cannot cause a problem.
def match_lists (list1, list2):
prev_matching = []
if 0 == len(list1):
# Trivial match
return prev_matching
elif 1 < len(list1):
# Solve a smaller problem first.
list1_smaller = [list1[2*i] for i in range((len(list1)+1)//2)]
list2_smaller = [list2[2*i] for i in range((len(list2)+1)//2)]
prev_matching = match_lists(list1_smaller, list2_smaller)
best_j_last = -1
last = {-1: (0.0, None)}
for i in range(len(list1)):
lowest_j = 0
highest_j = len(list2) - 1
if 3 < i:
lowest_j = 2 * prev_matching[i//2 - 2]
if i + 4 < len(list1):
highest_j = 2 * prev_matching[i//2 + 2]
if best_j_last == highest_j:
# Have to push it back.
best_j_last = best_j_last - 1
best_cost = last[best_j_last][0] + (list1[i] - list2[highest_j])**2
best_j = highest_j
this = {best_j: (best_cost, [best_j, last[best_j_last][1]])}
# Now try the others.
for j in range(lowest_j, highest_j):
prev_j = best_j_last
if j <= prev_j:
prev_j = j - 1
if prev_j not in last:
continue
else:
cost = last[prev_j][0] + (list1[i] - list2[j])**2
this[j] = (cost, [j, last[prev_j][1]])
if cost < best_cost:
best_cost = cost
best_j = j
last = this
best_j_last = best_j
(final_cost, linked_list) = last[best_j_last]
matching_rev = []
while linked_list is not None:
matching_rev.append( linked_list[0])
linked_list = linked_list[1]
matching_new = [x for x in reversed(matching_rev)]
return matching_new
def best_matching (list1, list2):
matching = match_lists(list1, list2)
cost = 0.0
result = []
for i in range(len(matching)):
pair = (list1[i], list2[matching[i]])
result.append(pair)
cost = cost + (pair[0] - pair[1])**2
return (cost, result)
Note
I was asked to explain why this works.
Here is my heuristic understanding. In the algorithm we solve the half-problem. Then we have to solve the full problem.
The question is how far can an optimal solution for the full problem be forced to be from the optimal solution to the half problem? We push it to the right by having every element in list2 that wasn't in the half problem be large as possible, and every element in list1 that wasn't in the half problem be small as possible. But if we shove the ones from the half problem to the right, and put the duplicate elements where they were then modulo boundary effects, we've got 2 optimal solutions to the half problem and nothing moved by more than to where the next element right was in the half problem. Similar reasoning applies to trying to force the solution left.
Now let's discuss those boundary effects. Those boundary effects are at the end by 1 element. So when we try to shove an element off the end, we can't always. By looking 2 elements instead of 1 over, we add enough wiggle room to account for that as well.
Hence there has to be an optimal solution that is fairly close to the half problem doubled in an obvious way. There may be others, but there is at least one. And the DP step will find it.
I would need to do some work to capture this intuition into a formal proof, but I'm confident that it could be done.
Here's a recursive solution. Pick the middle element of a; map that to each possible element of b (leave enough on each end to accommodate the left and right sections of a). For each such mapping, compute the single-element cost; then recur on each of the left and right fragments of a and b.
Here's the code; I'll leave memoization as an exercise for the student.
test_case = [
[ [1, 2], [0, 1, 10] ],
[ [1.1, 2.3, 5.6, 5.7, 10.1], [0, 1.9, 2.4, 2.7, 8.4, 9.1, 10.7, 11.8] ],
]
import math
indent = ""
def best_match(a, b):
"""
Find the best match for elements in a mapping to b, preserving order
"""
global indent
indent += " "
# print(indent, "ENTER", a, b)
best_cost = math.inf
best_map = []
if len(a) == 0:
best_cost = 0
best_map = []
else:
# Match the middle element of `a` to each eligible element of `b`
a_midpt = len(a) // 2
a_elem = a[a_midpt]
l_margin = a_midpt
r_margin = a_midpt + len(b) - len(a)
for b_pos in range(l_margin, r_margin+1):
# For each match ...
b_elem = b[b_pos]
# print(indent, "TRACE", a_elem, b_elem)
# ... compute the element cost ...
mid_cost = (a_elem - b_elem)**2
# ... and recur for similar alignments on left & right list fragments
l_cost, l_map = best_match(a[:l_margin], b[:b_pos])
r_cost, r_map = best_match(a[l_margin+1:], b[b_pos+1:])
# Check total cost against best found; keep the best
cand_cost = l_cost + mid_cost + r_cost
# print(indent, " COST", mid_cost, l_cost, r_cost)
if cand_cost < best_cost:
best_cost = cand_cost
best_map = l_map[:] + [(a_elem, b_elem)]
best_map.extend(r_map[:])
# print(indent, "LEAVE", best_cost, best_map)
return best_cost, best_map
for a, b in test_case:
print('\n', a, b)
print(best_match(a, b))
Output:
a = [1, 2]
b = [0, 1, 10]
2 [(1, 0), (2, 1)]
a = [1.1, 2.3, 5.6, 5.7, 10.1]
b = [0, 1.9, 2.4, 2.7, 8.4, 9.1, 10.7, 11.8]
16.709999999999997 [(1.1, 1.9), (2.3, 2.4), (5.6, 2.7), (5.7, 8.4), (10.1, 10.7)]
For giggles and grins, here is what is hopefully a much faster solution than either of the other working ones. The idea is simple. First we do a greedy match left to right. Then a greedy match right to left. This gives us bounds on where each element can go. Then we can do a DP solution left to right only looking at possible values.
If the greedy approaches agree, this will take linear time. If the greedy approaches are very far apart, this can take quadratic time. But the hope is that the greedy approaches produce reasonably close results, resulting in close to linear performance.
def match_lists(list1, list2):
# First we try a greedy matching from left to right.
# This gives us, for each element, the last place it could
# be forced to match. (It could match later, for instance
# in a run of equal values in list2.)
match_last = []
j = 0
for i in range(len(list1)):
while True:
if len(list2) - j <= len(list1) - i:
# We ran out of room.
break
elif abs(list2[j+1] - list1[i]) <= abs(list2[j] - list1[i]):
# Take the better value
j = j + 1
else:
break
match_last.append(j)
j = j + 1
# Next we try a greedy matching from right to left.
# This gives us, for each element, the first place it could be
# forced to match.
# We build it in reverse order, then reverse.
match_first_rev = []
j = len(list2) - 1
for i in range(len(list1) - 1, -1, -1):
while True:
if j <= i:
# We ran out of room
break
elif abs(list2[j-1] - list1[i]) <= abs(list2[j] - list1[i]):
# Take the better value
j = j - 1
else:
break
match_first_rev.append(j)
j = j - 1
match_first = [x for x in reversed(match_first_rev)]
# And now we do DP forward, building up our answer as a linked list.
best_j_last = -1
last = {-1: (0.0, None)}
for i in range(len(list1)):
# We initialize with the last position we could choose.
best_j = match_last[i]
best_cost = last[best_j_last][0] + (list1[i] - list2[best_j])**2
this = {best_j: (best_cost, [best_j, last[best_j_last][1]])}
# Now try the rest of the range of possibilities
for j in range(match_first[i], match_last[i]):
j_prev = best_j_last
if j <= j_prev:
j_prev = j - 1 # Push back to the last place we could match
cost = last[j_prev][0] + (list1[i] - list2[j])**2
this[j] = (cost, [j, last[j_prev][1]])
if cost < best_cost:
best_cost = cost
best_j = j
last = this
best_j_last = best_j
(final_cost, linked_list) = last[best_j_last]
matching_rev = []
while linked_list is not None:
matching_rev.append(
(list1[len(matching_rev)], list2[linked_list[0]]))
linked_list = linked_list[1]
matching = [x for x in reversed(matching_rev)]
return (final_cost, matching)
print(match_lists([1.1, 2.3, 5.6, 5.7, 10.1], [0, 1.9, 2.4, 2.7, 8.4, 9.1, 10.7, 11.8]))
Python is not very friendly with recursion so attempting to apply it to a list of thousands of elements might not fair so well. Here is a bottom-up approach that takes advantage of the optimal solution for any a from A as we increase the index for its potential partner from B being non-decreasing. (Works for both sorted and non-sorted input.)
def f(A, B):
m = [[(float('inf'), -1) for b in B] for a in A]
for i in xrange(len(A)):
for j in xrange(i, len(B) - len(A) + i + 1):
d = (A[i] - B[j]) ** 2
if i == 0:
if j == i:
m[i][j] = (d, j)
elif d < m[i][j-1][0]:
m[i][j] = (d, j)
else:
m[i][j] = m[i][j-1]
# i > 0
else:
candidate = d + m[i-1][j-1][0]
if j == i:
m[i][j] = (candidate, j)
else:
if candidate < m[i][j-1][0]:
m[i][j] = (candidate, j)
else:
m[i][j] = m[i][j-1]
result = m[len(A)-1][len(B)-1][0]
# Backtrack
lst = [None for a in A]
j = len(B) - 1
for i in xrange(len(A)-1, -1, -1):
j = m[i][j][1]
lst[i] = j
j = j - 1
return (result, [(A[i], B[j]) for i, j in enumerate(lst)])
A = [1, 2]
B = [0, 1, 10000]
print f(A, B)
print ""
A = [1.1, 2.3, 5.6, 5.7, 10.1]
B = [0, 1.9, 2.4, 2.7, 8.4, 9.1, 10.7, 11.8]
print f(A, B)
Output:
(2, [(1, 0), (2, 1)])
(16.709999999999997, [(1.1, 1.9), (2.3, 2.4), (5.6, 2.7), (5.7, 8.4), (10.1, 10.7)])
Update
Here's an O(|B|) space implementation. I'm not sure if this still offers a way to backtrack to get the mapping but I'm working on it.
def f(A, B):
m = [(float('inf'), -1) for b in B]
m1 = [(float('inf'), -1) for b in B] # m[i-1]
for i in xrange(len(A)):
for j in xrange(i, len(B) - len(A) + i + 1):
d = (A[i] - B[j]) ** 2
if i == 0:
if j == i:
m[j] = (d, j)
elif d < m[j-1][0]:
m[j] = (d, j)
else:
m[j] = m[j-1]
# i > 0
else:
candidate = d + m1[j-1][0]
if j == i:
m[j] = (candidate, j)
else:
if candidate < m[j-1][0]:
m[j] = (candidate, j)
else:
m[j] = m[j-1]
m1 = m
m = m[:len(B) - len(A) + i + 1] + [(float('inf'), -1)] * (len(A) - i - 1)
result = m1[len(B)-1][0]
# Backtrack
# This doesn't work as is
# to get the mapping
lst = [None for a in A]
j = len(B) - 1
for i in xrange(len(A)-1, -1, -1):
j = m1[j][1]
lst[i] = j
j = j - 1
return (result, [(A[i], B[j]) for i, j in enumerate(lst)])
A = [1, 2]
B = [0, 1, 10000]
print f(A, B)
print ""
A = [1.1, 2.3, 5.6, 5.7, 10.1]
B = [0, 1.9, 2.4, 2.7, 8.4, 9.1, 10.7, 11.8]
print f(A, B)
import random
import time
A = [random.uniform(0, 10000.5) for i in xrange(10000)]
B = [random.uniform(0, 10000.5) for i in xrange(15000)]
start = time.time()
print f(A, B)[0]
end = time.time()
print(end - start)