strings order (left to right and right to left) - python

Trying to understand how can we determine if the second string(S2) is following the same alphabetic order or not as the first string(s1) (regardless if its from left to right or right to left):
examples:
qwer
asdf
Answer:No
abcdefghi
dfge
Answer: No
qwkedlrfid
kelid
Answer: Yes
abcdefghi
hcba
Answer: Yes
abacdfeag
bca
Answer:Yes (based on the last 'a' in the first string)
One thing that helps to filter the results to "No" is that if an item in string2 does not exist in string1 that is automatically a "No"..exp 1)
my code is not completed & not returning the right answers obviously but since the community usually want to see some effort thought of sharing what I have so far... and not sure how to proceed...
s1=input()
s2=input()
check=any(items in s1 for items in s2)
if check is not True or s1[-1] >= s2[0]:
print("NO")
elif s2[-1] <= s1[0]:
print("YES")

You can implement a stack-based backtracking mechanism yourself, or do it recursively for each letter.
I just chose to let Python's regex engine do the job:
import re
def check_contains(s1, s2):
regex = f"{'.*'.join(s2)}|{'.*'.join(reversed(s2))}"
return bool(re.search(regex,s1))
I basically create a regex searching for each of the letters separated with anything in between, and the same for the reversed.
I took the liberty of improving over #Timus'es answer. Unfortunately, as I anticipated, my regex solution causes catastrophic backtracking on long inputs. Preventing it is not simple as it requires creating a group for each character or using the external regex module, both of which I don't like.
Here is the improved version, which is an O(n) (the fastest way possible):
from functools import reduce
def check_contains(s1, s2):
def _inner(s2):
try:
reduce(lambda location, letter: s1.index(letter, location + 1), s2, -1)
return True
except ValueError:
return False
return _inner(s2) or _inner(reversed(s2))
Keep in mind it's technically an 8 line solution, but I added comments, documentation, doctesting, optimizations and made it production ready. You can strip it to your liking:
from functools import reduce
from contextlib import suppress
from typing import Iterable, Reversible, Sequence
def check_contains(s1: Sequence[object], s2: Iterable[object]) -> bool:
"""Check if s1 contains all items of s2 in order
Examples:
>>> check_contains("abc", "b")
True
>>> check_contains("abc", "d")
False
>>> check_contains("abc", "ac") # Skipping the middle letter
True
>>> check_contains("abcd", "cbd") # Incorrect order
False
>>> check_contains("aaab", "aaaa") # Repeating letters
False
"""
index = s1.index # Cache the index method of string (entirely optional)
# Attempt short circuit. s2 might not allow len().
with suppress(TypeError):
if len(s1) < len(s2):
return False
# We're using the index method instead of find for short circuit.
# Must use -1 and location+1, otherwise s2 == "aaaa" will find all a's in
# same spot. Equivalent to (pseudocode):
# s2 = "abc"; pos = s1.index(letter, start)
# x = s1.index("a", 0); x = s1.index("b", x+1); s1.index("c", x+1)
try:
reduce(lambda location, letter: index(letter, location + 1), s2, -1)
return True
except ValueError:
return False
# I do not think this function should exist.
def check_contains_including_reversed(
s1: Sequence[object], s2: Reversible[object]) -> bool:
"""Check if s1 contains all items of s2 in order or reversed order
Exactly like check_contains(), but includes the following examples:
>>> check_contains_including_reversed("abc", "bc") # Normal order
True
>>> check_contains_including_reversed("abc", "cb") # Reversed order
True
>>> check_contains_including_reversed("abcd", "cbd") # Incorrect order
False
"""
return check_contains(s1, s2) or check_contains(s1, reversed(s2))
As an added bonus - if you wish to know the position of the letters, just replace functools.reduce with itertools.accumulate.

Here's a version without regex but string slicing and str.find instead:
def check(s1, s2):
i = 0
for c in s2: # looping over the characters in s2
if i < len(s1):
incr = s1[i:].find(c) + 1 # looking for c in the rest of s1
if incr == 0: # c not found
break
i += incr
else: # end of s1 reached, but still c's to cover
break
else: # loop went through without break -> found
return True
return False # loop exit with break -> not found
def check_contains(s1, s2):
return check(s1, s2) or check(s1[::-1], s2)
Your examples:
strings = [("qwer", "asdf"), ("abcdefghi", "dfge"), ("qwkedlrfid", "kelid"), ("abcdefghi", "hcba"), ("abacdfeag", "bca")]
for s1, s2 in strings:
print(check_contains(s1, s2))
Result:
False
False
True
True
True
EDIT: check is an obvious candidate for a recursive implementation, which is more compact and performes in the same range:
def check(s1, s2):
if not s2:
return True
if len(s1) < len(s2):
return False
i = s1.find(s2[0]) + 1
if i == 0:
return False
return check(s1[i:], s2[1:])
(Added also the sanity check if len(s1) < len(s2): return False.)
I've played a bit around with performance measurement: Seems to me that Bharel's version has an edge over this one for the kind of strings you've provided. This seems to change when the strings to search in get larger. I've tried the following (check_contains_1 is Bharel's solution, check_contains_2 is the one in this answer):
from random import choices, randint
from string import ascii_lowercase as chars
from time import perf_counter
num = 10_000
max_len_1, max_len_2 = 50, 5
strings = [
(
"".join(choices(chars, k=randint(2, max_len_1))),
"".join(choices(chars, k=randint(2, max_len_2)))
)
for _ in range(num)
]
start = perf_counter()
result_1 = [check_contains_1(s1, s2) for s1, s2 in strings]
end = perf_counter()
print(f"Version 1: {end - start:.2f} secs")
start = perf_counter()
result_2 = [check_contains_2(s1, s2) for s1, s2 in strings]
end = perf_counter()
print(f"Version 2: {end - start:.2f} secs")
print(result_1 == result_2)
Output:
Version 1: 1.85 secs
Version 2: 0.04 secs
True
But maybe I made a mistake ...

Related

Remove equal characters from two python strings

I am writing a Python code to remove equal same characters from two strings which lies on the same indices. For example remove_same('ABCDE', 'ACBDE') should make both arguments as BC and CB. I know that string is immutable here so I have converted them to list. I am getting an out of index error.
def remove_same(l_string, r_string):
l_list = list(l_string)
r_list = list(r_string)
i = 0
while i != len(l_list):
print(f'in {i} length is {len(l_list)}')
while l_list[i] == r_list[i]:
l_list.pop(i)
r_list.pop(i)
if i == len(l_list) - 1:
break
if i != len(l_list):
i += 1
return l_list[0] == r_list[0]
I would avoid using a while loop in that case, I think this is a better and more clear solution:
def remove_same(s1, s2):
l1 = list(s1)
l2 = list(s2)
out1 = []
out2 = []
for c1, c2 in zip(l1, l2):
if c1 != c2:
out1.append(c1)
out2.append(c2)
s1_out = "".join(out1)
s2_out = "".join(out2)
print(s1_out)
print(s2_out)
It could be shortened using some list comprehensions but I was trying to be as explicit as possible
I feel this could be a problem.
while l_list[i] == r_list[i]:
l_list.pop(i)
r_list.pop(i)
This could reduce size of list and it can go below i.
Do a dry run on this, if l_list = ["a"] and r_list = ["a"].
It is in general not a good idea to modify a list in a loop. Here is a cleaner, more Pythonic solution. The two strings are zipped and processed in parallel. Each pair of equal characters is discarded, and the remaining characters are arranged into new strings.
a = 'ABCDE'
b = 'ACFDE'
def remove_same(s1, s2):
return ["".join(s) for s
in zip(*[(x,y) for x,y in zip(s1,s2) if x!=y])]
remove_same(a, b)
#['BC', 'CF']
Here you go:
def remove_same(l_string, r_string):
# if either string is empty, return False
if not l_string or not r_string:
return False
l_list = list(l_string)
r_list = list(r_string)
limit = min(len(l_list), len(r_list))
i = 0
while i < limit:
if l_list[i] == r_list[i]:
l_list.pop(i)
r_list.pop(i)
limit -= 1
else:
i += 1
return l_list[0] == r_list[0]
print(remove_same('ABCDE', 'ACBDE'))
Output:
False

How to write a function to find the longest common subsequence using dynamic programming?

To be clear I am looking for the subsequence itself and not the length. I have written this function which works the majority of the time but in some cases it doesn't work. I have to write this recursively without any loops or imports. I used a memoise function to be more efficient but didn't include it here.
This function works when s1 = "abcde" and s2 = "qbxxd" (which correctly returns "bd") but it doesn't work for when s1 = "Look at me, I can fly!" and s2 = "Look at that, it's a fly" which should return "Look at , a fly" but I get instead "Look at a fly". For whatever reason the comma and the space is ignored. I've tried s1 = "ab, cde" and s2 = "qbxx, d" which correctly returns "b, d".
def lcs(s1, s2):
"""y5tgr"""
i = len(s1)
j = len(s2)
if i == 0 or j == 0:
return ""
if s1[i-1] == s2[j-1]:
return lcs(s1[:-1], s2[:-1]) + s1[i-1]
else:
return max(lcs(s1[:-1], s2), lcs(s1, s2[:-1]))
I have a feeling the problem is with the last line and the max function. I've seen solutions with for and while loops but not without.
There's only a slight change to fix your code (you're right the problem was in max).
Just change max so it finds the string of max length using it's key function.
def lcs(s1, s2):
"""y5tgr"""
i = len(s1)
j = len(s2)
if i == 0 or j == 0:
return ""
if s1[i-1] == s2[j-1]:
return lcs(s1[:-1], s2[:-1]) + s1[i-1]
else:
# Find max based upon the string length
return max(lcs(s1[:-1], s2), lcs(s1, s2[:-1]), key=len)
However, this is very slow without memoization
Code with Memoization (to improve performance)
Memoization Decorator Reference
import functools
def memoize(obj):
cache = obj.cache = {}
#functools.wraps(obj)
def memoizer(*args, **kwargs):
if args not in cache:
cache[args] = obj(*args, **kwargs)
return cache[args]
return memoizer
#memoize
def lcs(s1, s2):
"""y5tgr"""
i = len(s1)
j = len(s2)
if i == 0 or j == 0:
return ""
if s1[i-1] == s2[j-1]:
return lcs(s1[:-1], s2[:-1]) + s1[i-1]
else:
return max(lcs(s1[:-1], s2), lcs(s1, s2[:-1]), key=len)
Test
s1 = "Look at me, I can fly!"
s2 = "Look at that, it's a fly"
print(lcs(s1, s2))
Output
Look at , a fly
For strings, max takes the string which lexicographically goes last:
>>> max("a", "b")
'b'
>>> max("aaaaa", "b")
'b'
>>>
Certainly not what you need; you seem to look for the longer of the two.
You don't need a loop, just a comparison:
lsc1 = lcs(s1[:-1], s2)
lcs2 = lcs(s1, s2[:-1])
return lcs1 if len(lcs1) > len(lcs2) else lcs2

isomorphic python algorithms

Question:
Given two strings s and t, determine if they are isomorphic.
Two strings are isomorphic if the characters in s can be replaced to get t.
All occurrences of a character must be replaced with another character while preserving the order of characters. No two characters may map to the same character but a character may map to itself.
My code:
def isIsomorphic(self, s, t):
# write your code here
remap = dict()
if s == t:
return True
if len(s) != len(t):
return False
for i in range(len(s)):
if s[i] not in remap.keys() and t[i] in remap.values():
return False
elif s[i] not in remap.keys():
remap[s[i]] = t[i]
else:
if remap[s[i]] != t[i]:
return False
return True
error hint:
Your code ran too much time than we expected. Check your time complexity. Time limit exceeded usually caused by infinite loop if your time complexity is the best.
Pls ask how i improve my code
The strings will be isomorphic if the number of unique characters in each string is the same as the number of unique pairs of corresponding characters between them (they also have to be the same length).
So this function will do it concisely and much faster:
def isIsomorphic(w1,w2) :
if len(w1) != len(w2): return False
return len(set(w1)) == len(set(w2)) == len(set(zip(w1,w2)))
[EDIT] 3.3 seconds on my computer for 1 million iterations of a pair of 25 character strings (vs 12 seconds for Aran-Fey's updated code).
A good way to do this is to normalize your strings
import re,string
def normalize(s):
key={}
def replace_ltr(match):
ltr = match.group(1)
if ltr not in key:
key[ltr] = string.printable[len(key)]
return key[ltr]
return re.sub("([a-zA-Z])",replace_ltr,s)
print normalize("Hello")
print normalize("ratty")
print normalize("SAS") == normalize("QBQ")
once you do that you can simply compare the normalized versions
def can_transform(s1,s2):
return normalize(s1) == normalize(s2)
Pulled from Understanding isomorphic strings algorithm
from itertools import groupby
from collections import defaultdict
def isomorphic(a, b):
a_idxs, b_idxs = defaultdict(set), defaultdict(set)
for idx, ((a_grp, a_vals), (b_grp, b_vals)) in enumerate(zip(groupby(a), groupby(b))):
if sum(1 for _ in a_vals) != sum(1 for _ in b_vals):
return False
# ensure sequence is of same length
if a_grp in a_idxs and b_idxs[b_grp] != a_idxs[a_grp] or\
b_grp in b_idxs and a_idxs[a_grp] != b_idxs[b_grp]:
return False
# ensure previous occurrences are matching groups
a_idxs[a_grp].add(idx)
b_idxs[b_grp].add(idx)
# save indexes for future checks
return True
One problem in your code is this part:
if ... and t[i] in remap.values():
Since remap.values() is not a set or a dict, membership testing with in is a O(n) operation. This can slow down your code significantly if many characters have to be remapped.
You can speed this up by storing the remapped characters in a set:
def isIsomorphic():
remap = dict()
if s == t:
return True
if len(s) != len(t):
return False
remapped = set() # <- add this
for i in range(len(s)):
if s[i] not in remap.keys() and t[i] in remapped: # <- change this
return False
elif s[i] not in remap.keys():
remap[s[i]] = t[i]
remapped.add(t[i])
else:
if remap[s[i]] != t[i]:
return False
return True
Timed on two strings with 25 remapped characters and 1 million iterations, we notice a significant speedup:
original code 26.817705629997363 seconds
updated code 19.41265572499833 seconds
I need to check that characters from string1 are not in string2 and vice versa so use two dicts: a mapping mapper, and a reverse mapping revmap for this.
dict.setdefault is doing a lot of the heavy lifting here - a useful method to know.
I got stuck in writing it with one long main expression, hence the style.
I only got this far by creating a few tests too.
def is_iso(s1, s2):
mapper, revmap = {}, {}
return (len(s1) == len(s2)
and all(((ch1 not in mapper and ch2 not in revmap) or
(ch1 in mapper and ch2 in revmap))
and ch2 == mapper.setdefault(ch1, ch2)
and ch1 == revmap.setdefault(ch2, ch1)
for ch1, ch2 in zip(s1, s2))
), ' '.join(f'{fr}<->{to}' for fr, to in mapper.items())
The tests:
for s1, s2 in [("11", "aa"), ("ab", "aa"), ('abc', 'aaa'), ("foo", "bar"),
("egg", "add"), ("paper", "title"), ('aabccd', '112334'),
('aabccc', '112334')]:
print( f'is_iso({s1!r}, {s2!r}) = %s \t# mappings: %s' % is_iso(s1, s2))
Output:
is_iso('11', 'aa') = True # mappings: 1<->a
is_iso('ab', 'aa') = False # mappings: a<->a
is_iso('abc', 'aaa') = False # mappings: a<->a
is_iso('foo', 'bar') = False # mappings: f<->b o<->a
is_iso('egg', 'add') = True # mappings: e<->a g<->d
is_iso('paper', 'title') = True # mappings: p<->t a<->i e<->l r<->e
is_iso('aabccd', '112334') = True # mappings: a<->1 b<->2 c<->3 d<->4
is_iso('aabccc', '112334') = False # mappings: a<->1 b<->2 c<->3

Edit Distance with accents

Are there some edit-distance in python that take account of the accent.
Where for exemple hold the following property
d('ab', 'ac') > d('àb', 'ab') > 0
With the Levenshtein module:
In [1]: import unicodedata, string
In [2]: from Levenshtein import distance
In [3]: def remove_accents(data):
...: return ''.join(x for x in unicodedata.normalize('NFKD', data)
...: if x in string.ascii_letters).lower()
In [4]: def norm_dist(s1, s2):
...: norm1, norm2 = remove_accents(s1), remove_accents(s2)
...: d1, d2 = distance(s1, s2), distance(norm1, norm2)
...: return (d1+d2)/2.
In [5]: norm_dist(u'ab', u'ac')
Out[5]: 1.0
In [6]: norm_dist(u'àb', u'ab')
Out[6]: 0.5
Unicode allows decomposition of accented characters into the base character plus a combining accent character; e.g. à decomposes into a followed by a combining grave accent.
You want to convert both strings using normalization form NFKD, which decomposes accented characters and converts compatibility characters to their canonical forms, then use an edit distance metric that ranks substitutions above insertions and deletions.
Here's a solution based on difflib and unicodedata with no dependencies whatsoever:
import unicodedata
from difflib import Differ
# function taken from https://stackoverflow.com/a/517974/1222951
def remove_accents(input_str):
nfkd_form = unicodedata.normalize('NFKD', input_str)
only_ascii = nfkd_form.encode('ASCII', 'ignore').decode()
return only_ascii
def compare(wrong, right):
# normalize both strings to make sure equivalent (but
# different) unicode characters are canonicalized
wrong = unicodedata.normalize('NFKC', wrong)
right = unicodedata.normalize('NFKC', right)
num_diffs = 0
index = 0
differences = list(Differ().compare(wrong, right))
while True:
try:
diff = differences[index]
except IndexError:
break
# diff is a string like "+ a" (meaning the character "a" was inserted)
# extract the operation and the character
op = diff[0]
char = diff[-1]
# if the character isn't equal in both
# strings, increase the difference counter
if op != ' ':
num_diffs += 1
# if a character is wrong, there will be two operations: one
# "+" and one "-" operation
# we want to count this as a single mistake, not as two mistakes
if op in '+-':
try:
next_diff = differences[index+1]
except IndexError:
pass
else:
next_op = next_diff[0]
if next_op in '+-' and next_op != op:
# skip the next operation, we don't want to count
# it as another mistake
index += 1
# we know that the character is wrong, but
# how wrong is it?
# if the only difference is the accent, it's
# a minor mistake
next_char = next_diff[-1]
if remove_accents(char) == remove_accents(next_char):
num_diffs -= 0.5
index += 1
# output the difference as a ratio of
# (# of wrong characters) / (length of longest input string)
return num_diffs / max(len(wrong), len(right))
Tests:
for w, r in (('ab','ac'),
('àb','ab'),
('être','etre'),
('très','trés'),
):
print('"{}" and "{}": {}% difference'.format(w, r, compare(w, r)*100))
"ab" and "ac": 50.0% difference
"àb" and "ab": 25.0% difference
"être" and "etre": 12.5% difference
"très" and "trés": 12.5% difference

Python set intersection question

I have three sets:
s0 = [set([16,9,2,10]), set([16,14,22,15]), set([14,7])] # true, 16 and 14
s1 = [set([16,9,2,10]), set([16,14,22,15]), set([7,8])] # false
I want a function that will return True if every set in the list intersects with at least one other set in the list. Is there a built-in for this or a simple list comprehension?
all(any(a & b for a in s if a is not b) for b in s)
Here's a very simple solution that's very efficient for large inputs:
def g(s):
import collections
count = collections.defaultdict(int)
for a in s:
for x in a:
count[x] += 1
return all(any(count[x] > 1 for x in a) for a in s)
It's a little verbose but I think it's a pretty efficient solution. It takes advantage of the fact that when two sets intersect, we can mark them both as connected. It does this by keeping a list of flags as long as the list of sets. when set i and set j intersect, it sets the flag for both of them. It then loops over the list of sets and only tries to find a intersection for sets that haven't already been intersected. After reading the comments, I think this is what #Victor was talking about.
s0 = [set([16,9,2,10]), set([16,14,22,15]), set([14,7])] # true, 16 and 14
s1 = [set([16,9,2,10]), set([16,14,22,15]), set([7,8])] # false
def connected(sets):
L = len(sets)
if not L: return True
if L == 1: return False
passed = [False] * L
i = 0
while True:
while passed[i]:
i += 1
if i == L:
return True
for j, s in enumerate(sets):
if j == i: continue
if sets[i] & s:
passed[i] = passed[j] = True
break
else:
return False
print connected(s0)
print connected(s1)
I decided that an empty list of sets is connected (If you produce an element of the list, I can produce an element that it intersects ;). A list with only one element is dis-connected trivially. It's one line to change in either case if you disagree.
Here's a more efficient (if much more complicated) solution, that performs a linear number of intersections and a number of unions of order O( n*log(n) ), where n is the length of s:
def f(s):
import math
j = int(math.log(len(s) - 1, 2)) + 1
unions = [set()] * (j + 1)
for i, a in enumerate(s):
unions[:j] = [set.union(set(), *s[i+2**k:i+2**(k+1)]) for k in range(j)]
if not (a & set.union(*unions)):
return False
j = int(math.log(i ^ (i + 1), 2))
unions[j] = set.union(a, *unions[:j])
return True
Note that this solution only works on Python >= 2.6.
As usual I'd like to give the inevitable itertools solution ;-)
from itertools import combinations, groupby
from operator import itemgetter
def any_intersects( sets ):
# we are doing stuff with combinations of sets
combined = combinations(sets,2)
# group these combinations by their first set
grouped = (g for k,g in groupby( combined, key=itemgetter(0)))
# are any intersections in each group
intersected = (any((a&b) for a,b in group) for group in grouped)
return all( intersected )
s0 = [set([16,9,2,10]), set([16,14,22,15]), set([14,7])]
s1 = [set([16,9,2,10]), set([16,14,22,15]), set([7,8])]
print any_intersects( s0 ) # True
print any_intersects( s1 ) # False
This is really lazy and will only do the intersections that are required. It can also be a very confusing and unreadable oneliner ;-)
To answer your question, no, there isn't a built-in or simple list comprehension that does what you want. Here's another itertools based solution that is very efficient -- surprisingly about twice as fast as #THC4k's itertools answer using groupby() in timing tests using your sample input. It could probably be optimized a bit further, but is very readable as presented. Like #AaronMcSmooth, I arbitrarily decided what to return when there are no or is only one set in the input list.
from itertools import combinations
def all_intersect(sets):
N = len(sets)
if not N: return True
if N == 1: return False
intersected = [False] * N
for i,j in combinations(xrange(N), 2):
if not intersected[i] or not intersected[j]:
if sets[i] & sets[j]:
intersected[i] = intersected[j] = True
return all(intersected)
This strategy isn't likely to be as efficient as #Victor's suggestion, but might be more efficient than jchl's answer due to increased use of set arithmetic (union).
s0 = [set([16,9,2,10]), set([16,14,22,15]), set([14,7])]
s1 = [set([16,9,2,10]), set([16,14,22,15]), set([7,8])]
def freeze(list_of_sets):
"""Transform a list of sets into a frozenset of frozensets."""
return frozenset(frozenset(set_) for set_ in list_of_sets)
def all_sets_have_relatives(set_of_sets):
"""Check if all sets have another set that they intersect with.
>>> all_sets_have_relatives(s0) # true, 16 and 14
True
>>> all_sets_have_relatives(s1) # false
False
"""
set_of_sets = freeze(set_of_sets)
def has_relative(set_):
return set_ & frozenset.union(*(set_of_sets - set((set_,))))
return all(has_relative(set) for set in set_of_sets)
This may give better performance depending on the distribution of the sets.
def all_intersect(s):
count = 0
for x, a in enumerate(s):
for y, b in enumerate(s):
if a & b and x!=y:
count += 1
break
return count == len(s)

Categories

Resources