Intersection with order between two strings

Intersection with order between two strings - python

My Question is that if we need to find the intersect between two strings?
How could we do that?
For example "address" and "dress" should return "dress".
I used a dict to implement my function, but I can only sort these characters and not output them with the original order? So how should I modify my code?
def IntersectStrings(first,second):
a={}
b={}
for c in first:
if c in a:
a[c] = a[c]+1
else:
a[c] = 1
for c in second:
if c in b:
b[c] = b[c]+1
else:
b[c] = 1
l = []
print a,b
for key in sorted(a):
if key in b:
cnt = min(a[key],b[key])
while(cnt>0):
l.append(key)
cnt = cnt-1
return ''.join(l)
print IntersectStrings('address','dress')

There are lots of intersecting strings. One way you could create a set of all substrings of each string and then intersect. If you want the biggest intersection just find the max from the resulting set, e.g.:
def substrings(s):
for i in range(len(s)):
for j in range(i, len(s)):
yield s[i:j+1]
def intersect(s1, s2):
return set(substrings(s1)) & set(substrings(s2))
Then you can see the intersections:
>>> intersect('address', 'dress')
{'re', 'ss', 'ess', 'es', 'ress', 'dress', 'dres', 'd', 'e', 's', 'res', 'r', 'dre', 'dr'}
>>> max(intersect('address', 'dress'), key=len)
'dress'
>>> max(intersect('sprinting', 'integer'), key=len)
'int'

Related

Merging overlapping (str) objects

The problem is the following :
I want to go from having this set
{'A/B', 'B/C', 'C/D', 'D/E', ..., 'U/V', 'V/W', ..., 'X/Y', ..., 'Z', ...}
to this set
{'A/B/C/D/E', ..., 'U/V/W', ..., 'X/Y', ..., 'Z', ...}
where the objects A, B, C ... are just strings of characters. The output solution should be independent of the order in which the objects appears (i.e. if you scramble the objects in the set, the solution should always be the same)
In other words I want to merge overlapping objects.
Inputs of the following form cannot happen :
{"A/B", "B/C", "B/D"}
{"A/B", "B/C", "C/A"}
There can be objects with no '/' in them.
Here is a partial solution I've come up with :
example={'A/B', 'B/C', 'C/D', 'D/E','U/V', 'V/W','X/Y'}
def ext_3plus(unit):
for couple in list(itertools.combinations(list(unit),2)):
if '/' in couple[0] and '/' in couple[1]:
if couple[0].split('/')[0]==couple[1].split('/')[1]:
unit.remove(couple[0])
unit.remove(couple[1])
unit.add(couple[1].split('/')[0]+'/'+couple[0])
if couple[0].split('/')[1]==couple[1].split('/')[0]:
unit.remove(couple[0])
unit.remove(couple[1])
unit.add(couple[0]+'/'+couple[1].split('/')[1])
else: #the input can contain object not having '/'
continue
There is two problems, first it does only one iteration,
the result on {'A/B', 'B/C', 'C/D', 'D/E','U/V', 'V/W','X/Y'}
is :
{'A/B/C', 'C/D/E', 'U/V/W', 'X/Y'}
Second, if I include objects containing no '/', the input being {'A/B', 'B/C', 'C/D', 'D/E','U/V', 'V/W','X/Y','Z'}, the result is different from the previous one :
{'A/B', 'B/C/D', 'D/E', 'U/V/W', 'X/Y', 'Z'}
So there should be a recursive call on the first iteration etc.
How should it be done ?

If I understood correctly this can be seen as a graph problem, and solve as such:
import networkx as nx
example = {'A/B', 'B/C', 'C/D', 'D/E', 'U/V', 'V/W', 'X/Y', "Z"}
# convert each string to a and edge
# each pattern to the side of / is a node
edges = [tuple(s.split("/")) for s in example if "/" in s]
nodes = [s for s in example if "/" not in s]
# create directed graph from edges
g = nx.from_edgelist(edges, create_using=nx.DiGraph)
g.add_nodes_from(nodes)
# find each path using topological sort
runs, current = [], []
for e in nx.topological_sort(g):
# start a new path each time a node with in-degree 0
# in-degree 0 means it is the start of a new path
if g.in_degree(e) == 0:
if current:
runs.append(current)
current = []
current.append(e)
if current:
runs.append(current)
# format the result
result = ["/".join(run) for run in runs]
print(result)
Output
['Z', 'U/V/W', 'X/Y', 'A/B/C/D/E']
If I'm not mistaken the overall complexity of this approach is O(n). More on topological sorting can be found here.
UPDATE
In networkx 2.6.4 use lexicographical_topological_sort

You can use a recursive generator function:
vals = ['A/B', 'B/C', 'C/D', 'D/E', 'U/V', 'V/W', 'X/Y']
data = [i.split('/') for i in vals]
def paths(d, c = [], s = []):
if not (k:=[b for a, b in data if a == d]):
yield c+[d]
if (t:=[a for a, b in data if a not in s+[d]]):
yield from paths(t[0], c = [], s=s+[d])
else:
yield from [j for i in k for j in paths(i, c=c+[d], s=s+[d])]
vals = list(paths(data[0][0]))
Output:
[['A', 'B', 'C', 'D', 'E'], ['U', 'V', 'W'], ['X', 'Y']]
It should be noted, however, that the solution above will only work on inputs that contain standard edge definitions. If the contents of vals can very in the number of items divided by the /, then you can use the solution below:
class Node:
def __init__(self, n, c = []):
self.n, self.c = n, c
def __contains__(self, e):
return e[0] == self.n or e[-1] == self.n or any(e in i for i in self.c)
def add_edge(self, e):
if self.n != e[0] and len(e) > 1 and (m:=[i for i in self.c if i.n == e[-1]]):
self.c = [i for i in self.c if i != m[0]]+[Node(e[0], [m[0]])]
elif self.n == e[0]:
if len(e) > 1 and not any(i.n == e[1] for i in self.c):
self.c = [*self.c, Node(e[1])]
elif (m:=[i for i in self.c if e in i]):
m[0].add_edge(e)
else:
self.c = [*self.c, Node(e[0], [] if len(e) == 1 else [Node(e[1])])]
vals = ['A/B/C', 'A/B', 'B/C', 'C/D', 'D/E', 'U/V', 'V/W', 'X/Y', 'K']
n = Node(None)
for i in vals:
k = i.split('/')
for j in range(len(k)):
n.add_edge(k[j:j+2])
def get_paths(n, c = []):
if not n.c:
yield c+[n.n]
else:
yield from [j for k in n.c for j in get_paths(k, c+[n.n])]
final_result = [i[1:] for i in get_paths(n)]
print(final_result)
Output:
[['A', 'B', 'C', 'D', 'E'], ['U', 'V', 'W'], ['X', 'Y'], ['K']]
With the trie-style approach of class Node, the order of the input (vals) does not matter (no sort is required) and input paths of any depth can be added.

It might not be the most efficient, but you could just repeat the loop until there's nothing modified.
def ext_3plus(unit):
while True:
oldlen = len(unit)
for couple in itertools.combinations(list(unit),2):
if '/' in couple[0] and '/' in couple[1]:
if couple[0].split('/')[0]==couple[1].split('/')
unit.remove(couple[0])
unit.remove(couple[1])
unit.add(couple[1].split('/')[0]+'/'+couple[0])
modified = True
if couple[0].split('/')[1]==couple[1].split('/')[0]
unit.remove(couple[0])
unit.remove(couple[1])
unit.add(couple[0]+'/'+couple[1].split('/')[1])
if len(unit) == oldlen:
# Nothing was merged, so we're done
break

get occurrence of all substring of matching characters from string

e.g. find substring containing 'a', 'b', 'c' in a string 'abca', answer should be 'abc', 'abca', 'bca'
Below code is what I did, but is there better, pythonic way than doing 2 for loops?
Another e.g. for 'abcabc' count should be 10
def test(x):
counter = 0
for i in range(0, len(x)):
for j in range(i, len(x)+1):
if len((x[i:j]))>2:
print(x[i:j])
counter +=1
print(counter)
test('abca')

You can condense it down with list comprehension:
s = 'abcabc'
substrings = [s[b:e] for b in range(len(s)-2) for e in range(b+3, len(s)+1)]
substrings, len(substrings)
# (['abc', 'abca', 'abcab', 'abcabc', 'bca', 'bcab', 'bcabc', 'cab', 'cabc', 'abc'], 10)

You can use combinations from itertools:
from itertools import combinations
string = "abca"
result = [string[x:y] for x, y in combinations(range(len(string) + 1), r = 2)]
result = [item for item in result if 'a' in item and 'b' in item and 'c' in item]

comparing two strings in python with duplicates

I am trying to compare the two strings: 'apple' and 'pear' and return letters that do not belong to the other string.
For example, 'apple' does not contain 'r' in 'pear'
'pear' does not contain 'l' and 'p' in apple (pear contains p but does not contains two p's).
So I want to have a function that returns 'r', 'l', and 'p'.
I tried set, but it ignores the duplicates (p, in this example).
def solution(A, B):
N = len(A)
M = len(B)
letters_not_in_B = list(set([c for c in A if c not in B]))
letters_not_in_A = list(set([c for c in B if c not in A]))
answer = len(letters_not_in_B) + len(letters_not_in_A)
return answer

You can compare the character counts for each separate string resulting from the concatenation of the parameters a and b:
def get_results(a, b):
return list(set([i for i in a+b if a.count(i) != b.count(i)]))
print(get_results('apple', 'pear'))
Output:
['p', 'r', 'l']

Use a Counter
from collections import Counter
Counter('apple') - Counter('pear') # --> Counter({'p': 1, 'l': 1})
Counter('pear') - Counter('apple') # --> Counter({'r': 1})

def solution(a, b):
# create mutable list copies of a and b
list_a = list(a)
list_b = list(b)
for ch in a:
if ch in list_b:
list_b.remove(ch)
for ch in b:
if ch in list_a:
list_a.remove(ch)
return list_a + list_b

Adding certain lengthy elements to a list

I'm doing a project for my school and for now I have the following code:
def conjunto_palavras_para_cadeia1(conjunto):
acc = []
conjunto = sorted(conjunto, key=lambda x: (len(x), x))
def by_size(words, size):
result = []
for word in words:
if len(word) == size:
result.append(word)
return result
for i in range(0, len(conjunto)):
if i > 0:
acc.append(("{} ->".format(i)))
acc.append(by_size(conjunto, i))
acc = ('[%s]' % ', '.join(map(str, acc)))
print( acc.replace(",", "") and acc.replace("'", "") )
conjunto_palavras_para_cadeia1(c)
I have this list: c = ['A', 'E', 'LA', 'ELA'] and what I want is to return a string where the words go from the smallest one to the biggest on in terms of length, and in between they are organized alphabetically. I'm not being able to do that...
OUTPUT: [;1 ->, [A, E], ;2 ->, [LA], ;3 ->, [ELA]]
WANTED OUTPUT: ’[1->[A, E];2->[LA];3->[ELA]]’

Taking a look at your program, the only issue appears to be when you are formatting your output for display. Note that you can use str.format to insert lists into strings, something like this:
'{}->{}'.format(i, sublist)
Here's my crack at your problem, using sorted + itertools.groupby.
from itertools import groupby
r = []
for i, g in groupby(sorted(c, key=len), key=len):
r.append('{}->{}'.format(i, sorted(g)).replace("'", ''))
print('[{}]'.format(';'.join(r)))
[1->[A, E];2->[LA];3->[ELA]]
A breakdown of the algorithm stepwise is as follows -
sort elements by length
group consecutive elements by length
for each group, sort sub-lists alphabetically, and then format them as strings
at the end, join each group string and surround with square brackets []

Shortest solution (with using of pure python):
c = ['A', 'E', 'LA', 'ELA']
result = {}
for item in c:
result[len(item)] = [item] if len(item) not in result else result[len(item)] + [item]
str_result = ', '.join(['{0} -> {1}'.format(res, sorted(result[res])) for res in result])
I will explain:
We are getting items one by one in loop. And we adding them to dictionary by generating lists with index of word length.
We have in result:
{1: ['A', 'E'], 2: ['LA'], 3: ['ELA']}
And in str_result:
1 -> ['A', 'E'], 2 -> ['LA'], 3 -> ['ELA']
Should you have questions - ask

Difference Between Two Lists with Duplicates in Python

I have two lists that contain many of the same items, including duplicate items. I want to check which items in the first list are not in the second list. For example, I might have one list like this:
l1 = ['a', 'b', 'c', 'b', 'c']
and one list like this:
l2 = ['a', 'b', 'c', 'b']
Comparing these two lists I would want to return a third list like this:
l3 = ['c']
I am currently using some terrible code that I made a while ago that I'm fairly certain doesn't even work properly shown below.
def list_difference(l1,l2):
for i in range(0, len(l1)):
for j in range(0, len(l2)):
if l1[i] == l1[j]:
l1[i] = 'damn'
l2[j] = 'damn'
l3 = []
for item in l1:
if item!='damn':
l3.append(item)
return l3
How can I better accomplish this task?

You didn't specify if the order matters. If it does not, you can do this in >= Python 2.7:
l1 = ['a', 'b', 'c', 'b', 'c']
l2 = ['a', 'b', 'c', 'b']
from collections import Counter
c1 = Counter(l1)
c2 = Counter(l2)
diff = c1-c2
print list(diff.elements())

Create Counters for both lists, then subtract one from the other.
from collections import Counter
a = [1,2,3,1,2]
b = [1,2,3,1]
c = Counter(a)
c.subtract(Counter(b))

To take into account both duplicates and the order of elements:
from collections import Counter
def list_difference(a, b):
count = Counter(a) # count items in a
count.subtract(b) # subtract items that are in b
diff = []
for x in a:
if count[x] > 0:
count[x] -= 1
diff.append(x)
return diff
Example
print(list_difference("z y z x v x y x u".split(), "x y z w z".split()))
# -> ['y', 'x', 'v', 'x', 'u']
Python 2.5 version:
from collections import defaultdict
def list_difference25(a, b):
# count items in a
count = defaultdict(int) # item -> number of occurrences
for x in a:
count[x] += 1
# subtract items that are in b
for x in b:
count[x] -= 1
diff = []
for x in a:
if count[x] > 0:
count[x] -= 1
diff.append(x)
return diff

Counters are new in Python 2.7.
For a general solution to substract a from b:
def list_difference(b, a):
c = list(b)
for item in a:
try:
c.remove(item)
except ValueError:
pass #or maybe you want to keep a values here
return c

you can try this
list(filter(lambda x:l1.remove(x),li2))
print(l1)

Try this one:
from collections import Counter
from typing import Sequence
def duplicates_difference(a: Sequence, b: Sequence) -> Counter:
"""
>>> duplicates_difference([1,2],[1,2,2,3])
Counter({2: 1, 3: 1})
"""
shorter, longer = sorted([a, b], key=len)
return Counter(longer) - Counter(shorter)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Intersection with order between two strings - python

Related

Merging overlapping (str) objects

get occurrence of all substring of matching characters from string

comparing two strings in python with duplicates

Adding certain lengthy elements to a list

Difference Between Two Lists with Duplicates in Python

Categories

Resources