Related
N = 14
SIZE = 6
lst = range(N+1)
sum_n_combs = [
list(comb) for comb in it.combinations_with_replacement(lst, SIZE)
if sum(comb) == N
]
print(sum_n_combs)
output [[0, 0, 0, 0, 0, 14], [0, 0, 0, 0, 1, 13], [0, 0, 0, 0, 2, 12], [0, 0, 0, 0, 3, 11], [0, 0, 0, 0, 4, 10], [0, 0, 0, 0, 5, 9], [0, 0, 0, 0, 6, 8], [0, 0, 0, 0, 7, 7], [0, 0, 0, 1, 1, 12], [0, 0, 0, 1, 2, 11], [0, 0, 0, 1, 3, 10], [0, 0, 0, 1, 4, 9], [0, 0, 0, 1, 5, 8], [0, 0, 0, 1, 6, 7], [0, 0, 0, 2, 2, 10], [0, 0, 0, 2, 3, 9], [0, 0, 0, 2, 4, 8], [0, 0, 0, 2, 5, 7], [0, 0, 0, 2, 6, 6], [0, 0, 0, 3, 3, 8], [0, 0, 0, 3, 4, 7], [0, 0, 0, 3, 5, 6], [0, 0, 0, 4, 4, 6], [0, 0, 0, 4, 5, 5], [0, 0, 1, 1, 1, 11], [0, 0, 1, 1, 2, 10], [0, 0, 1, 1, 3, 9], [0, 0, 1, 1, 4, 8], [0, 0, 1, 1, 5, 7], [0, 0, 1, 1, 6, 6], [0, 0, 1, 2, 2, 9], [0, 0, 1, 2, 3, 8], [0, 0, 1, 2, 4, 7], [0, 0, 1, 2, 5, 6], [0, 0, 1, 3, 3, 7], [0, 0, 1, 3, 4, 6], [0, 0, 1, 3, 5, 5], [0, 0, 1, 4, 4, 5], [0, 0, 2, 2, 2, 8], [0, 0, 2, 2, 3, 7], [0, 0, 2, 2, 4, 6], [0, 0, 2, 2, 5, 5], [0, 0, 2, 3, 3, 6], [0, 0, 2, 3, 4, 5], [0, 0, 2, 4, 4, 4], [0, 0, 3, 3, 3, 5], [0, 0, 3, 3, 4, 4], [0, 1, 1, 1, 1, 10], [0, 1, 1, 1, 2, 9], [0, 1, 1, 1, 3, 8], [0, 1, 1, 1, 4, 7], [0, 1, 1, 1, 5, 6], [0, 1, 1, 2, 2, 8], [0, 1, 1, 2, 3, 7], [0, 1, 1, 2, 4, 6], [0, 1, 1, 2, 5, 5], [0, 1, 1, 3, 3, 6], [0, 1, 1, 3, 4, 5], [0, 1, 1, 4, 4, 4], [0, 1, 2, 2, 2, 7], [0, 1, 2, 2, 3, 6], [0, 1, 2, 2, 4, 5], [0, 1, 2, 3, 3, 5], [0, 1, 2, 3, 4, 4], [0, 1, 3, 3, 3, 4], [0, 2, 2, 2, 2, 6], [0, 2, 2, 2, 3, 5], [0, 2, 2, 2, 4, 4], [0, 2, 2, 3, 3, 4], [0, 2, 3, 3, 3, 3], [1, 1, 1, 1, 1, 9], [1, 1, 1, 1, 2, 8], [1, 1, 1, 1, 3, 7], [1, 1, 1, 1, 4, 6], [1, 1, 1, 1, 5, 5], [1, 1, 1, 2, 2, 7], [1, 1, 1, 2, 3, 6], [1, 1, 1, 2, 4, 5], [1, 1, 1, 3, 3, 5], [1, 1, 1, 3, 4, 4], [1, 1, 2, 2, 2, 6], [1, 1, 2, 2, 3, 5], [1, 1, 2, 2, 4, 4], [1, 1, 2, 3, 3, 4], [1, 1, 3, 3, 3, 3], [1, 2, 2, 2, 2, 5], [1, 2, 2, 2, 3, 4], [1, 2, 2, 3, 3, 3], [2, 2, 2, 2, 2, 4], [2, 2, 2, 2, 3, 3]]
As "combinations with replacement" does, this function only produces the combination. I want permutation of each combination without repetition.
For example
[[0, 0, 0, 0, 0, 14], [0, 0, 0, 0, 14, 0] ... [3, 2, 3, 2, 2, 2], [3, 3, 2, 2, 2]]
When I tried to do this by
ret=[]
for i in range(90):
ret.extend(it.permutations(sum_n_combs[i], SIZE))
Time complexity was exponential, and made repititions
When I tested with one list sum_n_combs[0], which is [0, 0, 0, 0, 0, 14] produced 720 permutations when I only want 6 of them(14 at each different place).
How can I make permutation without repetition for each combination in an efficient way?
You could separate this in two steps:
generate partitions of the targeted sum
generate distinct permutations of each partition
Recursive generators will allow you to get the results efficiently without trial/error filtering and without storing everything in memory:
def partitions(N,size):
if size == 1 :
yield (N,) # base case, only 1 part
return
for a in range(N//size+1): # smaller part followed by
for p in partitions(N-a*size,size-1): # equal or larger ones
yield (a, *(n+a for n in p)) # recursing on delta only
def permuteDistinct(A):
if len(A) == 1:
yield tuple(A) # single value
return
used = set() # track starting value
for i,n in enumerate(A): # for each starting value
if n in used: continue # not yet used
used.add(n)
for p in permuteDistinct(A[:i]+A[i+1:]):
yield (n,*p) # starting value & rest
output:
N = 14
SIZE = 6
PARTITIONS...
for part in partitions(N,SIZE):
print(part)
(0, 0, 0, 0, 0, 14)
(0, 0, 0, 0, 1, 13)
(0, 0, 0, 0, 2, 12)
(0, 0, 0, 0, 3, 11)
(0, 0, 0, 0, 4, 10)
(0, 0, 0, 0, 5, 9)
(0, 0, 0, 0, 6, 8)
(0, 0, 0, 0, 7, 7)
(0, 0, 0, 1, 1, 12)
(0, 0, 0, 1, 2, 11)
(0, 0, 0, 1, 3, 10)
(0, 0, 0, 1, 4, 9)
(0, 0, 0, 1, 5, 8)
(0, 0, 0, 1, 6, 7)
(0, 0, 0, 2, 2, 10)
(0, 0, 0, 2, 3, 9)
(0, 0, 0, 2, 4, 8)
(0, 0, 0, 2, 5, 7)
(0, 0, 0, 2, 6, 6)
(0, 0, 0, 3, 3, 8)
(0, 0, 0, 3, 4, 7)
(0, 0, 0, 3, 5, 6)
(0, 0, 0, 4, 4, 6)
(0, 0, 0, 4, 5, 5)
...
PERMUTED PARTITIONS (DISTINCT):
for part in partitions(N,SIZE):
for permutedPart in permuteDistinct(part):
print(permutedPart)
(0, 0, 0, 0, 0, 14)
(0, 0, 0, 0, 14, 0)
(0, 0, 0, 14, 0, 0)
(0, 0, 14, 0, 0, 0)
(0, 14, 0, 0, 0, 0)
(14, 0, 0, 0, 0, 0)
(0, 0, 0, 0, 1, 13)
(0, 0, 0, 0, 13, 1)
(0, 0, 0, 1, 0, 13)
(0, 0, 0, 1, 13, 0)
(0, 0, 0, 13, 0, 1)
(0, 0, 0, 13, 1, 0)
(0, 0, 1, 0, 0, 13)
(0, 0, 1, 0, 13, 0)
(0, 0, 1, 13, 0, 0)
(0, 0, 13, 0, 0, 1)
(0, 0, 13, 0, 1, 0)
(0, 0, 13, 1, 0, 0)
...
I have several large files in a folder. Each single file fits in my RAM, but all of them not. I have the following loop processing each file:
for dataset_index,path in enumerate(file_paths_train):
with np.load(path) as dataset:
x_batch = dataset['data']
y_batch = dataset['labels']
for i in range(x_batch.shape[0]):
if y_batch[i] in INDICES:
# This selects a minimal subset of the data
data_list.append((y_batch[i], x_batch[i]))
# End loop
(the paths for all files are stored in the variable file_paths_train)
This answer stated that using with ... as ... would automatically delete the variable associated with the file once the program is out of the scope. Except it isn't, memory usage increases until the computer stops working and I need to restart.
Ideas?
Indexing a multidimensional array with a scalar creates a view. If that view is saved in a list, the original array remains, regardless of what happens to its variable references.
In [95]: alist = []
...: for i in range(3):
...: x = np.ones((10,10),int)*i
...: alist.append(x[0])
...:
In [96]: alist
Out[96]:
[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])]
In [97]: [item.base for item in alist]
Out[97]:
[array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]),
array([[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2]])]
You have to append a copy if you want to truely 'throw-away' the original array.
In [98]: alist = []
...: for i in range(3):
...: x = np.ones((10,10),int)*i
...: alist.append(x[0].copy())
...:
...:
In [99]: alist
Out[99]:
[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])]
In [100]: [item.base for item in alist]
Out[100]: [None, None, None]
I have matrix (below), which represents classes (e.x. 0,1,2). I am plottling it with plotly (python) using heatmap, and I can't find any function which will give me the coordinates of classes' borders.
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 2, 2],
[1, 1, 1, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 0, 0],
[2, 2, 2, 2, 2, 2, 2, 0, 0, 0],
[2, 2, 2, 2, 2, 0, 0, 0, 0, 0],
[2, 2, 2, 2, 0, 0, 0, 0, 0, 0],
[2, 2, 2, 0, 0, 0, 0, 0, 0, 0],
[2, 2, 2, 0, 0, 0, 0, 0, 0, 0]])
The black lines below are borders, could you give some tips to calculate it in python efficient way? Checking every element in array and its neighbour works very slow.
upd. I also tried to look in plotly contour plot, but the shape of lines has interpolation so it looks no as in the exmaple above...
You can use skimage.measure to find the components in the array. Since 0 is considered to be "background", you'll have to change the label to another one, the maximum value +1 for instance. This will give you tuples of slices with the coordinates.
Labeling is necessary if blocks with the same label are not necessarily "attached", or in the same component. Otherwise you can directly use regionprops to find the slices.
from skimage.measure import label, regionprops
a[a==0]=a.max()+1
l = label(a)
for s in regionprops(l):
print(s.slice)
(slice(0, 4, None), slice(0, 10, None))
(slice(2, 10, None), slice(0, 10, None))
(slice(4, 10, None), slice(3, 10, None))
Input data:
a = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 2, 2],
[1, 1, 1, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2, 0, 0],
[2, 2, 2, 2, 2, 2, 2, 0, 0, 0],
[2, 2, 2, 2, 2, 0, 0, 0, 0, 0],
[2, 2, 2, 2, 0, 0, 0, 0, 0, 0],
[2, 2, 2, 0, 0, 0, 0, 0, 0, 0],
[2, 2, 2, 0, 0, 0, 0, 0, 0, 0]])
I have a python series. The series contains lists with different lengths.
0 [2, 0, 2, 0, 2, 1, 0, 0, 0, 1, 1, 2, 2, 0, 2, ...
1 [2, 2]
2 [2]
3 [1, 1, 0, 2, 2, 1, 0, 2, 2, 2, 0, 0, 0, 2, 0, ...
4 [1, 2, 0, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2]
5 [2, 0, 1, 1]
6 [2, 2]
7 [0, 0, 2, 0, 2, 2]
8 [2, 0, 2, 0]
9 [2, 0, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, ...
10 [1, 0]
11 [1, 2, 0, 0, 1, 2, 0, 2, 1, 1]
12 [1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 1, ...
13 [0, 1, 0, 0, 2, 0, 1, 2, 2, 2, 2, 0, 2, 1, 0, ...
14 [0, 0, 0, 2, 1, 0, 0, 2, 1, 2, 2, 2, 2, 0, 2, ...
15 [1, 1, 2, 0, 0, 0, 0, 2, 2]
What I want to do is to measure the Volatility of these lists. As far as I'm concerned, I need to do the normalization work(which means all these lists will share the same length) before measuring. I think parsing each list precentagewise plausibility is a good choice.Sadly, I don't know how to manage it.
Maybe the first step is to transform lists to a given length. Sec step is to calculate the new score in each percentile(something like max_pooling avg? I don't know^).How to extract items from lists by peicentile?
I have a 2D array. I have to initialize the array by marking the number of 1's in the rectangle from the top left point to all points.
Original 2D array:
[0, 1, 0, 0, 0, 1, 0]
[1, 1, 0, 0, 1, 0, 1]
[0, 1, 1, 0, 1, 0, 0]
[0, 0, 0, 0, 0, 0, 1]
1st step (sum vertical elements with the previous one):
[0, 1, 1, 1, 1, 2, 2]
[1, 2, 2, 2, 3, 3, 4]
[0, 1, 2, 2, 3, 3, 3]
[0, 0, 0, 0, 0, 0, 1]
2nd step (sum horizontal elements with the previous one):
[0, 1, 1, 1, 1, 2, 2]
[1, 3, 3, 3, 4, 5, 6]
[1, 4, 5, 5, 7, 8, 9]
[1, 4, 5, 5, 7, 8, 10]
Both of these operations are O(n2). Is there a quicker way to initialize the list?
You cannot avoid quadratic time, but there is no need in two steps
(OK, code with correct answer looks longer a bit :))
lst=[[0, 1, 0, 0, 0, 1, 0]]
lst.append([1, 1, 0, 0, 1, 0, 1])
lst.append([0, 1, 1, 0, 1, 0, 0])
lst.append([0, 0, 0, 0, 0, 0, 1])
for i in range(1.len(lst)):
for j in range(len(lst[0])):
if (i>0):
lst[i][j] += lst[i-1][j]
if (j>0):
lst[i][j] += lst[i][j-1]
if (i>0) & (j>0):
lst[i][j] -= lst[i-1][j-1]
print(lst)
>>>[[0, 1, 1, 1, 1, 2, 2],
[1, 3, 3, 3, 4, 5, 6],
[1, 4, 5, 5, 7, 8, 9],
[1, 4, 5, 5, 7, 8, 10]]
or without if's:
for j in range(1,len(lst[0])):
lst[0][j] += lst[0][j-1]
for i in range(1,len(lst)):
lst[i][0] += lst[i-1][0]
for i in range(1,len(lst)):
for j in range(1,len(lst[0])):
lst[i][j] = lst[i][j] + lst[i-1][j] + lst[i][j-1] - lst[i-1][j-1]