comparing two strings in python with duplicates - python

I am trying to compare the two strings: 'apple' and 'pear' and return letters that do not belong to the other string.
For example, 'apple' does not contain 'r' in 'pear'
'pear' does not contain 'l' and 'p' in apple (pear contains p but does not contains two p's).
So I want to have a function that returns 'r', 'l', and 'p'.
I tried set, but it ignores the duplicates (p, in this example).
def solution(A, B):
N = len(A)
M = len(B)
letters_not_in_B = list(set([c for c in A if c not in B]))
letters_not_in_A = list(set([c for c in B if c not in A]))
answer = len(letters_not_in_B) + len(letters_not_in_A)
return answer

You can compare the character counts for each separate string resulting from the concatenation of the parameters a and b:
def get_results(a, b):
return list(set([i for i in a+b if a.count(i) != b.count(i)]))
print(get_results('apple', 'pear'))
Output:
['p', 'r', 'l']

Use a Counter
from collections import Counter
Counter('apple') - Counter('pear') # --> Counter({'p': 1, 'l': 1})
Counter('pear') - Counter('apple') # --> Counter({'r': 1})

def solution(a, b):
# create mutable list copies of a and b
list_a = list(a)
list_b = list(b)
for ch in a:
if ch in list_b:
list_b.remove(ch)
for ch in b:
if ch in list_a:
list_a.remove(ch)
return list_a + list_b

Related

Changing letters in string to letter and number of frequency-python [duplicate]

This question already has answers here:
Run length encoding in Python
(10 answers)
Closed 4 years ago.
I'm looking for the solution to my problem. I want to make a program where someone inputs string and I cover it into something like this:
'ZZZZYYYZZ' -> 'Z4Y3Z2'
I'm open for any suggestions.
The code I did:
def compress(s):
e={}
if s.isalpha():
for i in s:
if i in e:
e[i] += 1
else:
e[i] = 1
else:
return None
return ''.join(['{0}{1}'.format(k,v)for k,v in e.items()])
s=input("Write string: ")
compress(s)
This produces the wrong output
Write string: ZZZZYYYZZ
Y3Z6
Grouping of unsorted data into chunks is a job for itertools.groupby.
>>> from itertools import groupby
>>>
>>> s = 'ZZZZYYYZZ'
>>> ''.join('{}{}'.format(c, len(list(g))) for c, g in groupby(s))
'Z4Y3Z2'
Detrails on what groupby produces here:
>>> [(c, list(g)) for c, g in groupby(s)]
[('Z', ['Z', 'Z', 'Z', 'Z']), ('Y', ['Y', 'Y', 'Y']), ('Z', ['Z', 'Z'])]
~edit~
Slight memory optimization without intermediary lists:
>>> ''.join('{}{}'.format(c, sum(1 for _ in g)) for c, g in groupby(s))
'Z4Y3Z2'
~edit 2~
Instead of C1 can we have just C?
>>> s = 'XYXYXXX'
>>> to_join = []
>>> groups = groupby(s)
>>>
>>> for char, group in groups:
...: group_len = sum(1 for _ in group)
...: if group_len == 1:
...: to_join.append(char)
...: else:
...: to_join.append('{}{}'.format(char, group_len))
...:
>>> ''.join(to_join)
'XYXYX3'
This lends itself to a neat use of zip, allowing you to iterate over each character and the next character:
s = 'ZZZZYYYZZ'
out = ''
count = 1
for a, b in zip(s[:-1], s[1:]):
print(a, b)
if a != b:
out += a + str(count)
count = 1
else:
count += 1
out += s[-1] + str(count)
which gives out as 'Z4Y3Z2'.

Intersection with order between two strings

My Question is that if we need to find the intersect between two strings?
How could we do that?
For example "address" and "dress" should return "dress".
I used a dict to implement my function, but I can only sort these characters and not output them with the original order? So how should I modify my code?
def IntersectStrings(first,second):
a={}
b={}
for c in first:
if c in a:
a[c] = a[c]+1
else:
a[c] = 1
for c in second:
if c in b:
b[c] = b[c]+1
else:
b[c] = 1
l = []
print a,b
for key in sorted(a):
if key in b:
cnt = min(a[key],b[key])
while(cnt>0):
l.append(key)
cnt = cnt-1
return ''.join(l)
print IntersectStrings('address','dress')
There are lots of intersecting strings. One way you could create a set of all substrings of each string and then intersect. If you want the biggest intersection just find the max from the resulting set, e.g.:
def substrings(s):
for i in range(len(s)):
for j in range(i, len(s)):
yield s[i:j+1]
def intersect(s1, s2):
return set(substrings(s1)) & set(substrings(s2))
Then you can see the intersections:
>>> intersect('address', 'dress')
{'re', 'ss', 'ess', 'es', 'ress', 'dress', 'dres', 'd', 'e', 's', 'res', 'r', 'dre', 'dr'}
>>> max(intersect('address', 'dress'), key=len)
'dress'
>>> max(intersect('sprinting', 'integer'), key=len)
'int'

How to count items in list recursively

I am looking to count the items in a list recursively. For example, I have a list few lists:
a = ['b', 'c', 'h']
b = ['d']
c = ['e', 'f']
h = []
I was trying to find a way in which I find out the length of list 'a'. But in list 'a' I have 'b', 'c' and 'h' ... hence my function then goes into list 'b' and counts the number of elements there... Then list 'c' and then finally list 'h'.
b = ['d']
c = ['e', 'f']
h = []
a = [b,c,h]
def recur(l):
if not l: # keep going until list is empty
return 0
else:
return recur(l[1:]) + len(l[0]) # add length of list element 0 and move to next element
In [8]: recur(a)
Out[8]: 3
Added print to help understand the output:
def recur(l,call=1):
if not l:
return 0
else:
print("l = {} and l[0] = {} on recursive call {}".format(l,l[0],call))
call+=1
return recur(l[1:],call) + len(l[0])
If you want to get more deeply nested lists you can flatten and get the len():
b = ['d']
c = ['e', 'f',['x', 'y'],["x"]]
h = []
a = [b,c,h]
from collections import Iterable
def flatten_nest(l):
if not l:
return l
if isinstance(l[0], Iterable) and not isinstance(l[0],basestring): # isinstance(l[0],str) <- python 3
return flatten_nest(l[0]) + flatten_nest(l[1:])
return l[:1] + flatten_nest(l[1:])
In [13]: len(flatten_nest(a))
Out[13]: 6
The solution that worked for me was this:
def recur(arr):
if not arr:
return 0
else:
return 1 + recur(arr[1:])

How to maintain a strict alternating pattern of item "types" in a list?

Given a list of strings, where each string is in the format "A - something" or "B - somethingelse", and list items mostly alternate between pieces of "A" data and "B" data, how can irregularities be removed?
Irregularities being any sequence that breaks the A B pattern.
If there are multiple A's, the next B should also be removed.
If there are multiple B's, the preceding A should also be removed.
After removal of these invalid sequnces, list order should be kept.
Example: A B A B A A B A B A B A B A B B A B A B A A B B A B A B
In this case, AAB (see rule 2), ABB (see rule 3) and AABB should be removed.
I'll give it a try with regexp returning indexes of sequences to be removed
>>> import re
>>> data = 'ABABAABABABABABBABABAABBABAB'
>>> [(m.start(0), m.end(0)) for m in re.finditer('(AA+B+)|(ABB+)', data)]
[(4, 7), (13, 16), (20, 24)]
or result of stripping
>>> re.sub('(AA+B+)|(ABB+)', '', data)
ABABABABABABABABAB
The drunk-on-itertools solution:
>>> s = 'ABABAABABABABABBABABAABBABAB'
>>> from itertools import groupby, takewhile, islice, repeat, chain
>>> groups = (list(g) for k,g in groupby(s))
>>> pairs = takewhile(bool, (list(islice(groups, 2)) for _ in repeat(None)))
>>> kept_pairs = (p for p in pairs if len(p[0]) == len(p[1]) == 1)
>>> final = list(chain(*chain(*kept_pairs)))
>>> final
['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
(Unfortunately I'm now in no shape to think about corner cases and trailing As etc..)
I'd write it as a generator. Repeat:
read as many A's as possible,
read as many B's as possible,
if you've read exactly 1 A and 1 B, yield them; otherwise ignore and proceed.
Also this needs an additional special case in case you want to allow the input to end with an A.
Using itertools.groupby:
from itertools import groupby
def solve(strs):
drop_next = False
ans = []
for k, g in groupby(strs):
lis = list(g)
if drop_next:
#if True then don't append the current set to `ans`
drop_next = False
elif len(lis) > 1 and k == 'A':
#if current group contains more than 1 'A' then skip the next set of 'B'
drop_next = True
elif len(lis) > 1 and k == 'B':
#if current group contains more than 1 'B' then pop the last appended item
if ans:
ans.pop(-1)
else:
ans.append(k)
return ''.join(ans)
strs = 'ABABAABABABABABBABABAABBABAB'
print solve(strs)
#ABABABABABABABABAB

Difference Between Two Lists with Duplicates in Python

I have two lists that contain many of the same items, including duplicate items. I want to check which items in the first list are not in the second list. For example, I might have one list like this:
l1 = ['a', 'b', 'c', 'b', 'c']
and one list like this:
l2 = ['a', 'b', 'c', 'b']
Comparing these two lists I would want to return a third list like this:
l3 = ['c']
I am currently using some terrible code that I made a while ago that I'm fairly certain doesn't even work properly shown below.
def list_difference(l1,l2):
for i in range(0, len(l1)):
for j in range(0, len(l2)):
if l1[i] == l1[j]:
l1[i] = 'damn'
l2[j] = 'damn'
l3 = []
for item in l1:
if item!='damn':
l3.append(item)
return l3
How can I better accomplish this task?
You didn't specify if the order matters. If it does not, you can do this in >= Python 2.7:
l1 = ['a', 'b', 'c', 'b', 'c']
l2 = ['a', 'b', 'c', 'b']
from collections import Counter
c1 = Counter(l1)
c2 = Counter(l2)
diff = c1-c2
print list(diff.elements())
Create Counters for both lists, then subtract one from the other.
from collections import Counter
a = [1,2,3,1,2]
b = [1,2,3,1]
c = Counter(a)
c.subtract(Counter(b))
To take into account both duplicates and the order of elements:
from collections import Counter
def list_difference(a, b):
count = Counter(a) # count items in a
count.subtract(b) # subtract items that are in b
diff = []
for x in a:
if count[x] > 0:
count[x] -= 1
diff.append(x)
return diff
Example
print(list_difference("z y z x v x y x u".split(), "x y z w z".split()))
# -> ['y', 'x', 'v', 'x', 'u']
Python 2.5 version:
from collections import defaultdict
def list_difference25(a, b):
# count items in a
count = defaultdict(int) # item -> number of occurrences
for x in a:
count[x] += 1
# subtract items that are in b
for x in b:
count[x] -= 1
diff = []
for x in a:
if count[x] > 0:
count[x] -= 1
diff.append(x)
return diff
Counters are new in Python 2.7.
For a general solution to substract a from b:
def list_difference(b, a):
c = list(b)
for item in a:
try:
c.remove(item)
except ValueError:
pass #or maybe you want to keep a values here
return c
you can try this
list(filter(lambda x:l1.remove(x),li2))
print(l1)
Try this one:
from collections import Counter
from typing import Sequence
def duplicates_difference(a: Sequence, b: Sequence) -> Counter:
"""
>>> duplicates_difference([1,2],[1,2,2,3])
Counter({2: 1, 3: 1})
"""
shorter, longer = sorted([a, b], key=len)
return Counter(longer) - Counter(shorter)

Categories

Resources