Difference Between Two Lists with Duplicates in Python - python

I have two lists that contain many of the same items, including duplicate items. I want to check which items in the first list are not in the second list. For example, I might have one list like this:
l1 = ['a', 'b', 'c', 'b', 'c']
and one list like this:
l2 = ['a', 'b', 'c', 'b']
Comparing these two lists I would want to return a third list like this:
l3 = ['c']
I am currently using some terrible code that I made a while ago that I'm fairly certain doesn't even work properly shown below.
def list_difference(l1,l2):
for i in range(0, len(l1)):
for j in range(0, len(l2)):
if l1[i] == l1[j]:
l1[i] = 'damn'
l2[j] = 'damn'
l3 = []
for item in l1:
if item!='damn':
l3.append(item)
return l3
How can I better accomplish this task?

You didn't specify if the order matters. If it does not, you can do this in >= Python 2.7:
l1 = ['a', 'b', 'c', 'b', 'c']
l2 = ['a', 'b', 'c', 'b']
from collections import Counter
c1 = Counter(l1)
c2 = Counter(l2)
diff = c1-c2
print list(diff.elements())

Create Counters for both lists, then subtract one from the other.
from collections import Counter
a = [1,2,3,1,2]
b = [1,2,3,1]
c = Counter(a)
c.subtract(Counter(b))

To take into account both duplicates and the order of elements:
from collections import Counter
def list_difference(a, b):
count = Counter(a) # count items in a
count.subtract(b) # subtract items that are in b
diff = []
for x in a:
if count[x] > 0:
count[x] -= 1
diff.append(x)
return diff
Example
print(list_difference("z y z x v x y x u".split(), "x y z w z".split()))
# -> ['y', 'x', 'v', 'x', 'u']
Python 2.5 version:
from collections import defaultdict
def list_difference25(a, b):
# count items in a
count = defaultdict(int) # item -> number of occurrences
for x in a:
count[x] += 1
# subtract items that are in b
for x in b:
count[x] -= 1
diff = []
for x in a:
if count[x] > 0:
count[x] -= 1
diff.append(x)
return diff

Counters are new in Python 2.7.
For a general solution to substract a from b:
def list_difference(b, a):
c = list(b)
for item in a:
try:
c.remove(item)
except ValueError:
pass #or maybe you want to keep a values here
return c

you can try this
list(filter(lambda x:l1.remove(x),li2))
print(l1)

Try this one:
from collections import Counter
from typing import Sequence
def duplicates_difference(a: Sequence, b: Sequence) -> Counter:
"""
>>> duplicates_difference([1,2],[1,2,2,3])
Counter({2: 1, 3: 1})
"""
shorter, longer = sorted([a, b], key=len)
return Counter(longer) - Counter(shorter)

Related

How to get count of non-repeating values in list

I know I can do something like below to get number of occurrences of elements in the list:
from collections import Counter
words = ['a', 'b', 'c', 'a']
Counter(words).keys() # equals to list(set(words))
Counter(words).values() # counts the elements' frequency
Outputs:
['a', 'c', 'b']
[2, 1, 1]
But I want to get the count 2 for b and c as b and c occur exactly once in the list.
Is there any way to do this in concise / pythonic way without using Counter or even using above output from Counter?
You could just make an algorithm that does that, here is a one liner (thanks #d.b):
sum(x for x in Counter(words).values() if x == 1)
Or more than one line:
seen = []
count = 0
for word in words:
if word not in seen:
count += 1
seen.append(word)

How to delete repeat elements in this list?

I have a list is a = ['R','R','R','B','R','B','B','S','S']. my goal is to delete repeat 'R's and 'S's and then delete the 'B's (if there is only one R or S, just keep it). Therefore, I want the output to be ['R','R','S'], but mine is ['R', 'S'].
Can anyone help me take look my code? Thank you
This is my code
a = ['R','R','R','B','R','B','B','S','S'] # create a list to store R S B
a = [x for x in a if x != 'B'] # Delete all the B's
new_list = [] # create another list to store R and S without repeat
last = None
for x in a:
if last == x and (len(new_list) == 0 or new_list[-1] != x):
new_list.append(last)
last = x
print(new_list)
My output is this
['R', 'S']
but I want this
['R','R','S']
You could use itertools.groupby to group the elements first, then delete the B values:
from itertools import groupby
a = ['R','R','R','B','R','B','S','S'] # create a list to store R S B
[k for k, v in groupby(a) if k != 'B']
Result:
['R', 'R', 'S']
You could try this. This creates a new list without anything that is a repeat, and no 'B's.
a = ['R','R','R','B','R','B','B','S','S']
new_list = [] # create another list to store R and S without repeat
last = None
for x in a:
if last != x and x!='B':
new_list.append(x)
last = x
print(new_list)
Another option is to use a list comprehension:
a = ['R','R','R','B','R','B','B','S','S']
new_list = [ x for i,x in enumerate(a) if (a[i-1] != x and x!='B') or (i==0) ]
print(new_list)
Output from either example is the same:
['R', 'R', 'S']
Neither of these options require an import. However, I think the groupby code given by Mark Meyer is what I'd use in most cases.
You can use fromkeys in this case.
mylist = ["a", "b", "a", "c", "c"]
mylist = list(dict.fromkeys(mylist))
print(mylist) # ['a', 'b', 'c']

How do I pick an arbitrary number of an element occurring many times in a list?

I have two variables holding a string each and an empty list:
a = 'YBBB'
b = 'RYBB'
x = []
I want to loop through each of the strings and treat each 'B' in the two lists as an independent element (wish I could just type a.('B') and b.('B'). What I actually want to do is loop through b and ask if each of the items in b are in a. If so, the length of the item in b (say'B') is checked for in a. This should give 3. Then I want to compare the lengths of the item in the two lists and push the lesser of the two into the empty list. In this case, only two 'B's will be pushed into x.
You can use a nested list comprehension like following:
>>> [i for i in set(b) for _ in range(min(b.count(i), a.count(i)))]
['B', 'B', 'Y']
If the order is important you can use collections.OrderedDict for creating the unique items from b:
>>> from collections import OrderedDict
>>>
>>> [i for i in OrderedDict.fromkeys(b) for _ in range(min(b.count(i), a.count(i)))]
['Y', 'B', 'B']
This is useless text for the moderators.
import collections
a = 'YBBB'
b = 'RYBB'
x = []
a_counter = collections.Counter(a)
b_counter = collections.Counter(b)
print(a_counter)
print(b_counter)
for ch in b:
if a_counter[ch]:
x.append(min(a_counter[ch], b_counter[ch]) * ch)
print(x)
--output:--
Counter({'B': 3, 'Y': 1})
Counter({'B': 2, 'Y': 1, 'R': 1})
['Y', 'BB', 'BB']
Or, if you only want to step through each unique element in b:
for ch in set(b):
if a_counter[ch]:
x.append(min(a_counter[ch], b_counter[ch]) * ch)
print(x)
--output:--
['Y', 'BB']

How to count items in list recursively

I am looking to count the items in a list recursively. For example, I have a list few lists:
a = ['b', 'c', 'h']
b = ['d']
c = ['e', 'f']
h = []
I was trying to find a way in which I find out the length of list 'a'. But in list 'a' I have 'b', 'c' and 'h' ... hence my function then goes into list 'b' and counts the number of elements there... Then list 'c' and then finally list 'h'.
b = ['d']
c = ['e', 'f']
h = []
a = [b,c,h]
def recur(l):
if not l: # keep going until list is empty
return 0
else:
return recur(l[1:]) + len(l[0]) # add length of list element 0 and move to next element
In [8]: recur(a)
Out[8]: 3
Added print to help understand the output:
def recur(l,call=1):
if not l:
return 0
else:
print("l = {} and l[0] = {} on recursive call {}".format(l,l[0],call))
call+=1
return recur(l[1:],call) + len(l[0])
If you want to get more deeply nested lists you can flatten and get the len():
b = ['d']
c = ['e', 'f',['x', 'y'],["x"]]
h = []
a = [b,c,h]
from collections import Iterable
def flatten_nest(l):
if not l:
return l
if isinstance(l[0], Iterable) and not isinstance(l[0],basestring): # isinstance(l[0],str) <- python 3
return flatten_nest(l[0]) + flatten_nest(l[1:])
return l[:1] + flatten_nest(l[1:])
In [13]: len(flatten_nest(a))
Out[13]: 6
The solution that worked for me was this:
def recur(arr):
if not arr:
return 0
else:
return 1 + recur(arr[1:])

All the common elements present in the List of Python

Hi I am new to programming and want to learn python. I am working on a code that should return items that are most redundant in a list. If there are more than 1 then it should return all.
Ex.
List = ['a','b','c','b','d','a'] #then it should return both a and b.
List = ['a','a','b','b','c','c','d'] #then it should return a b and c.
List = ['a','a','a','b','b','b','c','c','d','d','d'] #then it should return a b and d.
Note: We don't know what element is most common in the list so we have to find the most common element and if there are more than one it should return all. If the list has numbers or other strings as elements then also the code has to work
I have no idea how to proceed. I can use a little help.
Here is the whole program:
from collections import Counter
def redundant(List):
c = Counter(List)
maximum = c.most_common()[0][1]
return [k for k, v in c.items()if v == maximum]
def find_kmers(DNA_STRING, k):
length = len(DNA_STRING)
a = 0
List_1 = []
string_1 = ""
while a <= length - k:
string_1 = DNA_STRING[a:a+k]
List_1.append(string_1)
a = a + 1
redundant(List_1)
This program should take DNA string and length of kmer and find what are the kemers of that length that are present in that DNA string.
Sample Input:
ACGTTGCATGTCGCATGATGCATGAGAGCT
4
Sample Output:
CATG GCAT
You can use collections.Counter:
from collections import Counter
def solve(lis):
c = Counter(lis)
mx = c.most_common()[0][1]
#or mx = max(c.values())
return [k for k, v in c.items() if v == mx]
print (solve(['a','b','c','b','d','a']))
print (solve(['a','a','b','b','c','c','d']))
print (solve(['a','a','a','b','b','b','c','c','d','d','d'] ))
Output:
['a', 'b']
['a', 'c', 'b']
['a', 'b', 'd']
A slightly different version of the above code using itertools.takewhile:
from collections import Counter
from itertools import takewhile
def solve(lis):
c = Counter(lis)
mx = max(c.values())
return [k for k, v in takewhile(lambda x: x[1]==mx, c.most_common())]
inputData = [['a','b','c','b','d','a'], ['a','a','b','b','c','c','d'], ['a','a','a','b','b','b','c','c','d','d','d'] ]
from collections import Counter
for myList in inputData:
temp, result = -1, []
for char, count in Counter(myList).most_common():
if temp == -1: temp = count
if temp == count: result.append(char)
else: break
print result
Output
['a', 'b']
['a', 'c', 'b']
['a', 'b', 'd']
>>> def maxs(L):
... counts = collections.Counter(L)
... maxCount = max(counts.values())
... return [k for k,v in counts.items() if v==maxCount]
...
>>> maxs(L)
['a', 'b']
>>> L = ['a','a','b','b','c','c','d']
>>> maxs(L)
['a', 'b', 'c']
>>> L = ['a','a','a','b','b','b','c','c','d','d','d']
>>> maxs(L)
['d', 'a', 'b']
Just for the sake of giving a solution not using collections & using list comprehensions.
given_list = ['a','b','c','b','d','a']
redundant = [(each, given_list.count(each)) for each in set(given_list) if given_list.count(each) > 1]
count_max = max(redundant, key=lambda x: x[1])[1]
final_list = [char for char, count in redundant if count == count_max]
PS - I myself haven't used Counters yet :( Time to learn!

Categories

Resources