set operation on a list of elements - python

I have a list containing thousands of sets similar to this:
set_list = [a, b, c, d]
each set in the list look something like this:
a = set([1, 2, 3, 4, 5])
b = set([4, 5, 6, 7, 7, 9])
c = set([1, 2, 6, 8, 10, 12, 45])
d = set([11, 3, 23, 3, 4, 44])
I would like to do the set operation: X-(YUZUAUB......etc) for every set in the list, for example, this would look something like this:
after applying this operation on all elements in set_list the new elements look like this:
a = a.difference(b.union(c, d))
b = b.difference(c.union(a, d))
c = c.difference(d.union(b, a))
d = d.difference(a.union(c, b))
how do i accomplish this?

One possibility is to make use of the multiset module to precompute the multiset union of all elements in set_list, like so:
from multiset import Multiset
union = sum(set_list, Multiset())
set_list = [s - (union - s) for s in set_list]
Here, union - s computes the Y ∪ Z ∪ A ∪ B... in your notation.
See Aran-Fey's answer for the same method implemented (more verbosely) using only the standard library.

If I'm understanding correctly, you want the difference for each set and the union of the rest of the sets. I would use a loop and functools.reduce and operator.or_:
Setup
import functools
import operator
a = set([1, 2, 3, 4, 5])
b = set([4, 5, 6, 7, 7, 9])
c = set([1, 2, 6, 8, 10, 12, 45])
d = set([11, 3, 23, 3, 4, 44])
set_list = [a, b, c, d]
Loop and save results
# I don't know what you want to do with the results so
# I'll save them in a list...
results = []
for i in set_list:
list_copy = list(set_list)
list_copy.remove(i)
r = i - functools.reduce(operator.or_, list_copy)
results.append(r)
print(results)
# prints [set(), {9, 7}, {8, 10, 12, 45}, {11, 44, 23}]

This is a re-implementation of NPE's answer using collections.Counter from the standard library:
from collections import Counter
def mutual_difference(set_list):
# create a multiset out of the union of all sets
union = Counter()
for s in set_list:
union.update(s)
new_set_list = []
for s in set_list:
# subtract s from the union and discard 0-count elements
union.subtract(s)
union += {}
# take the difference
new_set_list.append(s.difference(union))
# add s to the union again
union.update(s)
return new_set_list
Example:
>>> mutual_difference([{1,2}, {2,3}, {1,4,5}])
[set(), {3}, {4, 5}]

[value - {item for subset in set_list[0:index] + set_list[index + 1:] for item in subset} for index, value in enumerate(set_list)]
which means:
result = []
for index, value in enumerate(set_list):
union = {
item
for subset in set_list[0:index] + set_list[index + 1:]
for item in subset
}
result.append(value - union)
print(result)
Outputs:
[set(), {9, 7}, {8, 10, 12, 45}, {11, 44, 23}]

Related

Python: how to remove tuple from a large list based on similarity with other tuples

i have some issues with a python exercise. I have a large list of tuples of 10 elements, containing all combinations of 20 numbers.
import itertools
Comb = []
data = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
for i in itertools.combinations(data, 10):
Comb.append(i)
I want to reduce the list removing tuples that have at least 7 elements in common with tuple of another list
L=[(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),(11,12,13,14,15,16,17,18,19,20),(1, 2, 3, 5, 6, 7, 9, 10, 11, 12)]
I've tried to resolve like this:
for x in Comb:
for i, y in enumerate(Comb):
for j in L:
similarity = len(set(y).intersection(set(j)))
if similarity > 7:
del Comb[i]
but that doesn't seem to work.
Any help?
You can't modify a list (or set or dict) while you're looping over it in Python.
The simplest solution is to just create a new list.
import itertools
comb = list(itertools.combinations(range(10), 5))
# We'll remove any element with more than 3 even or odd numbers.
filters = ({0, 2, 4, 6, 8}, {1, 3, 5, 7, 9})
threshold = 3
result = []
for element in comb:
for filter_element in filters:
if len(filter_element.intersection(element)) <= threshold:
result.append(element)
# Make it smaller with a list comprehension
[elt for elt in comb
if any(len(f.intersection(elt)) <= threshold for f in filters))]
I have test this and it works, print out some logs just to see what is going on
import itertools
Comb = []
data = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20];
for i in itertools.combinations(data, 10):Comb.append(i);
L=[(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),(11,12,13,14,15,16,17,18,19,20),(1, 2, 3, 5, 6, 7, 9, 10, 11, 12)];
counter=0
print(len(Comb))
for x in Comb:
for j in L:
similarity = len(set(x).intersection(set(j)))
if similarity > 7:
print(str(similarity)+"\n"+str(set(x))+"\n"+str(set(j))+"\n\n")
del Comb[counter]
counter+=1;
print(len(Comb))

Polynomial Multiplication in Python without Scipy or Numpy

I'm trying to write a function that multiplies two polynomials given as input, and I'm facing an issue with ordering the coefficient list.
Suppose, a polynomial can be expressed with two elements: idx (exponents) and coef (coefficient). Key thing to note is that the idx list is ordered by exponential degree and the coefficients are ordered in corresponding order to the respective exponents.
Take a look at the example below.
idx1 = [5, 4, 0], coef1 = [1, 2, 3] would represent polynomial1 = x^5+2x^4+3x^0
idx2 = [5, 3], coef2 = [1, 5] would represent polynomial2 = x^5+5x^3
I want my function mult_coef(idx1, coef1, idx2, coef2) to return two lists: idx_mult and coef_mult, like this:
ideal output:
idx_mult = [10, 9, 8, 7, 5, 3]
coef_mult = [1, 2, 5, 10, 3, 15]
I wrote a function to get the idx part:
def mult_idx(idx1, idx2):
from itertools import combinations, product
def pairs(*lists):
for t in combinations(lists, 2):
for pair in product(*t):
yield pair
output = list(set(pairs(idx1,idx2)))
output = sorted([x+y for x,y in output],reverse=True)
return sorted(list(set(output)),reverse=True)
#matches ideal idx, but still need coefficients
mult_idx(idx1,idx2) >>> [10, 9, 8, 7, 5, 3]
def mult_coef(idx1, coef1, idx2, coef2):
coef_output = list(pairs(coef1,coef2))
coef_output = [x*y for x,y in coef_output]
return coef_output
#Doesn't match the ideal coefficient output (order is wrong, should be [1, 2, 5, 10, 3, 15])
mult_coef(idx1, coef1, idx2, coef2) >>> [1, 5, 2, 10, 3, 15]
Any idea how to tackle this problem? Been stuck for too long, and I don't think Scipy or Numpy's poloynomial implementations can handle this case specifically, if not please feel free to use either.
you could easily solve that with a defaultdict (a dict may be a better representation for your polynomials... poly = {exp: coeff})
from collections import defaultdict
mult = defaultdict(int) # mult[i] will default to 0
for i1, c1 in zip(idx1, coef1):
for i2, c2 in zip(idx2, coef2):
mult[i1 + i2] += c1 * c2
for your input this gives
mult = defaultdict(<class 'int'>, {10: 1, 8: 5, 9: 2, 7: 10, 5: 3, 3: 15})
then you can arrange that into the lists you are interested in:
mult_sorted = tuple(sorted(mult.items(), reverse=True))
idx_mult = [item[0] for item in mult_sorted]
# [10, 9, 8, 7, 5, 3]
coeff_mult = [item[1] for item in mult_sorted]
# [1, 2, 5, 10, 3, 15]
none of this is thoroughly tested!
it may be a bit more elegant to reduce the indentation of the for loops:
from itertools import product
for (i1, c1), (i2, c2) in product(zip(idx1, coef1), zip(idx2, coef2)):
mult[i1 + i2] += c1 * c2

Convert elements of list to a list of consecutive elements in an efficient way

I have a base list [1,4,10] which needs to be converted to a list having consecutive elements of each element in the base list in an efficient way
Examples:
If I need 2 consecutive numbers then [1,4,10] will be [1,2,4,5,10,11].
If 3 consecutive numbers then [1,4,10] will be [1,2,3,4,5,6,10,11,12].
arr=[1,4,10]
con=3
[r + i for r in arr for i in range(con)]
# [1, 2, 3, 4, 5, 6, 10, 11, 12]
Here's a one liner, assuming the list is x and the number of 'consecutives' is c:
reduce(lambda a, b: a + b, map(lambda x: range(x, x+c), x))
a = [1,4,10]
k = 3 #no of consecutive
x=[range(b,b+k) for b in a]
output = [m for d in x for m in d]
Here is one way. itertools.chain removes the need for explicit nested loops.
from itertools import chain
def consecutiver(lst, n=3):
return list(chain.from_iterable(range(i, i+n) for i in lst))
res = consecutiver([1, 4, 10], 2)
# [1, 2, 4, 5, 10, 11]
res2 = consecutiver([1, 4, 10], 3)
# [1, 2, 3, 4, 5, 6, 10, 11, 12]

Create dictionary where keys are from a list and values are the sum of corresponding elements in another list

I have two lists L1 and L2. Each unique element in L1 is a key which has a value in the second list L2. I want to create a dictionary where the values are the sum of elements in L2 that are associated to the same key in L1.
I did the following but I am not very proud of this code. Is there any simpler pythonic way to do it ?
L = [2, 3, 7, 3, 4, 5, 2, 7, 7, 8, 9, 4] # as L1
W = range(len(L)) # as L2
d = { l:[] for l in L }
for l,w in zip(L,W): d[l].append(w)
d = {l:sum(v) for l,v in d.items()}
EDIT:
Q: How do I know which elements of L2 are associated to a given key element of L1?
A: if they have the same index. For example if the element 7 is repeated 3 times in L1 (e.g. L1[2] == L1[7] == L1[8] = 7), then I want the value of the key 7 to be L2[2]+L2[7]+L2[8]
You can use enumerate() in order to access to item's index while you loop over the list and use collections.defaultdict() (by passing the int as it's missing function which will be evaluated as 0 at first time) to preserve the items and add the values while encounter a duplicate key:
>>> from collections import defaultdict
>>> d = defaultdict(int)
>>> for i,j in enumerate(L):
... d[j]+=i
...
>>> d
defaultdict(<type 'int'>, {2: 6, 3: 4, 4: 15, 5: 5, 7: 17, 8: 9, 9: 10})
If you don't need the intermediate dict of lists you can use the collections.Counter:
import collections
L = [2, 3, 7, 3, 4, 5, 2, 7, 7, 8, 9, 4] # as L1
W = range(len(L)) # as L2
d2 = collections.Counter()
for i, value in enumerate(L):
d2[value] += i
which behaves like a normal dict:
Counter({2: 6, 3: 4, 4: 15, 5: 5, 7: 17, 8: 9, 9: 10})
Hope this may help you.
L = [2, 3, 7, 3, 4, 5, 2, 7, 7, 8, 9, 4] # as L1
dict_a = dict.fromkeys(set(L),0)
for l,w in enumerate(L):
dict_a[w] = int(dict_a[w]) + l

find common data python

Using
def compare_lsts(list1,list2):
first_set = set(list1)
second_set=set(list2)
results =[x for x in list1 if x in list2]
print(results)
and running compare_lsts([1,2,3,4,5],[3,8,9,1,7]) gives the numbers contained in both sets, i.e. [1,3].
However making list 1 contain more than 1 list e.g. compare_lsts([[1,2,3,4,5],[5,8,2,9,12],[3,7,19,4,16]],[3,7,2,16,19]) gives [],[],[].
I have used for list in list1 followed by results for the loop. I clearly don't know what I am doing.
Basically the question is: How does one compare items in one static list with as many lists as there are?
First of all, you already started using sets, so you should definitely use them, as they are faster when checking containment. Also, there are already a few helpful built-in features for sets, so for comparing two lists, you can just intersect the sets to get those items that are in both lists:
>>> set1 = set([1, 2, 3, 4, 5])
>>> set2 = set([3, 8, 9, 1, 7])
>>> set1 & set2
{1, 3}
>>> list(set1 & set2) # in case you need a list as the output
[1, 3]
Similarly, you can also find the union of two sets to get those items that are in any of the sets:
>>> set1 | set2
{1, 2, 3, 4, 5, 7, 8, 9}
So, if you want to find all items from list2 that are in any of list1’s sublists, then you could intersect all the sublists with list2 and then union all those results:
>>> sublists = [set([1, 2, 3, 4, 5]), set([5, 8, 2, 9, 12]), set([3, 7, 19, 4, 16])]
>>> otherset = set([3, 7, 2, 16, 19])
>>> intersections = [sublist & otherset for sublist in sublists]
>>> intersections
[{2, 3}, {2}, {16, 3, 19, 7}]
>>> union = set()
>>> for intersection in intersections:
union = union | intersection
>>> union
{16, 19, 2, 3, 7}
You can also do that a little bit nicer using functools.reduce:
>>> import functools
>>> functools.reduce(set.union, intersections)
{16, 19, 2, 3, 7}
Similarly, if you want to actually intersect those results, you could do that as well:
>>> functools.reduce(set.intersection, intersections)
set()
And finally, you can pack that all in a nice function:
def compareLists (mainList, *otherLists):
mainSet = set(mainList)
otherSets = [set(otherList) for otherList in otherLists]
intersections = [mainSet & otherSet for otherSet in otherSets]
return functools.reduce(set.union, intersections) # or replace with set.intersection
And use it like this:
>>> compareLists([1, 2, 3, 4, 5], [3, 8, 9, 1, 7])
{1, 3}
>>> compareLists([3, 7, 2, 16, 19], [1, 2, 3, 4, 5], [5, 8, 2, 9, 12], [3, 7, 19, 4, 16])
{16, 19, 2, 3, 7}
Note, that I replaced the order of the arguments in the function, so the main list (in your case list2) is mentioned first as that is the one the others are compared to.
If you're after elements from the first that are in all of the lists:
set(first).intersection(second, third) # fourth, fifth, etc...
>>> set([1, 2, 3]).intersection([2, 3, 4], [3, 4, 5])
set([3])
If you're after elements from the first that are in any of the other lists:
>>> set([1, 2, 3]) & set([4]).union([5])
set([2])
So, then a simple func:
def in_all(fst, *rst):
return set(fst).intersection(*rst)
def in_any(fst, *rst):
it = iter(rst)
return set(fst) & set(next(it, [])).union(*it)
Not sure if it's the best way but:
def flat(l):
c_l = []
for i in l:
if isinstance(i,list):
map(c_l.append,i)
else:
c_l.append(i)
return c_l
def compare_lsts(a,b):
if all([True if isinstance(x,list) else False for x in a]): #if there is sublists in a
a = flat(a) #flats a
if all([True if isinstance(x,list) else False for x in b]): #if there is sublists in b
b = flat(b) #flats b
return list(set(a) & set(b)) #intersection between a and b
print (compare_lsts([[1,2,3,4,5],[5,8,2,9,12],[3,7,19,4,16]],[3,7,2,16,19]) #[16, 3, 2, 19, 7])

Categories

Resources