BFS to find all fixed-length cycles - python

I have to find all cycles of length 3 in a given graph. I've implemented it using BFS, but so far it works only for relatively small inputs. It still works for bigger ones and gives correct answer, but the time it takes to find an answer is extremely high. Is there any way to improve the following code to make it more efficient?
num_res = 0
adj_list = []
cycles_list = []
def bfs_cycles(start):
queue = [(start, [start])]
depth = 0
while queue and depth <= 3:
(vertex, path) = queue.pop(0)
current_set = set(adj_list[vertex]) - set(path)
if start in set(adj_list[vertex]):
current_set = current_set.union([start])
depth = len(path)
for node in current_set:
if node == start:
if depth == 3 and sorted(path) not in cycles_list:
cycles_list.append(sorted(path))
yield path + [node]
else:
queue.append((node, path + [node]))
if __name__ == "__main__":
num_towns, num_pairs = [int(x) for x in input().split()]
adj_list = [[] for x in range(num_towns)]
adj_matrix = [[0 for x in range(num_towns)] for x in range(num_towns)]
# EDGE LIST TO ADJACENCY LIST
for i in range(num_pairs):
cur_start, cur_end = [int(x) for x in input().split()]
adj_list[cur_start].append(cur_end)
adj_list[cur_end].append(cur_start)
num_cycles = 0
for i in range(num_towns):
my_list = list(bfs_cycles(i))
num_cycles += len(my_list)
print(num_cycles)
Examples of inputs:
6 15
5 4
2 0
3 1
5 1
4 1
5 3
1 0
4 0
4 3
5 2
2 1
3 0
3 2
5 0
4 2
(output: 20; works ok)
52 1051
48 5
41 28
12 4
33 27
12 5
1 0
15 12
50 8
33 8
38 28
26 10
13 7
39 18
31 11
48 19
41 19
40 25
47 45
27 16
46 25
42 6
5 4
51 2
30 21
41 27
26 25
33 11
45 26
16 7
23 15
17 6
45 22
32 6
29 8
36 20
30 1
36 25
41 6
46 4
46 40
18 8
38 1
28 5
43 22
21 11
39 14
31 29
18 9
50 35
32 17
48 27
49 40
16 1
49 47
41 12
30 28
33 14
48 12
37 20
49 20
48 8
48 6
27 17
46 44
31 12
17 9
32 27
14 11
40 23
36 19
38 10
42 2
35 22
26 23
29 23
30 11
11 7
47 12
30 13
38 34
48 11
46 8
42 31
30 4
35 17
50 2
51 1
12 10
44 25
47 17
45 24
25 2
45 11
39 21
39 31
9 6
16 3
10 6
15 11
37 2
23 6
41 40
34 26
45 33
35 23
45 36
11 4
38 7
36 6
10 3
33 12
39 12
41 24
47 8
33 5
44 18
45 8
48 41
44 37
11 3
16 6
21 10
20 0
44 36
29 4
43 33
48 4
46 35
33 6
42 12
45 19
12 8
37 15
43 41
36 11
12 11
50 37
9 7
51 30
36 0
33 17
36 35
50 36
49 37
50 16
46 21
36 22
49 15
46 28
50 27
20 10
23 0
36 29
35 33
42 17
31 16
48 47
48 23
17 2
40 14
10 5
45 7
48 42
39 32
51 4
42 8
38 19
34 10
50 5
51 36
46 26
42 38
20 12
44 32
34 4
49 6
50 45
37 10
45 41
38 11
42 30
21 20
43 23
42 26
33 1
17 7
26 6
16 12
44 16
21 9
36 30
39 24
26 4
47 10
18 7
36 12
26 17
28 13
18 11
23 7
44 4
43 26
26 16
22 21
37 0
36 28
34 5
22 17
41 20
31 8
27 25
12 2
42 11
29 28
39 33
34 12
30 2
22 8
40 15
42 9
28 7
44 41
41 35
44 17
12 7
13 10
23 20
48 38
43 12
32 19
43 30
50 1
10 1
17 12
32 2
26 14
29 12
32 5
7 6
36 16
49 7
31 1
45 17
33 29
28 11
32 0
49 32
42 36
16 4
45 20
21 14
39 15
34 18
13 8
27 15
19 11
37 36
36 14
28 4
36 13
17 11
38 13
35 28
50 10
39 28
40 2
35 8
32 24
47 34
45 27
41 21
21 4
47 27
48 1
35 30
21 5
20 14
27 26
17 1
28 17
43 7
31 6
20 3
34 21
8 2
21 1
32 9
29 1
45 43
50 39
19 15
22 12
48 7
46 18
45 35
50 42
51 17
37 6
24 23
29 3
39 20
51 50
38 6
50 11
38 14
25 24
14 7
45 44
28 14
50 49
42 28
36 7
35 25
13 4
46 1
48 21
51 11
39 11
17 5
31 0
49 36
40 4
37 21
35 1
23 4
43 4
46 36
38 20
37 27
30 0
44 34
49 10
48 14
48 45
38 31
47 29
40 16
51 20
34 17
51 19
24 9
24 5
5 1
15 13
26 2
19 12
50 14
42 7
35 14
46 20
43 28
8 3
38 37
28 1
21 0
51 5
17 16
38 17
34 30
46 12
17 14
50 9
16 13
30 27
45 0
41 16
41 32
48 18
30 8
51 47
11 8
40 13
34 32
23 11
51 28
42 35
36 2
13 11
28 8
15 10
39 35
27 1
50 7
41 23
46 39
38 9
44 10
46 38
6 4
44 27
36 21
35 9
45 30
44 7
37 1
44 28
9 1
32 31
39 16
4 0
44 13
24 0
17 15
15 1
32 8
39 22
42 34
24 6
49 18
36 1
51 42
38 5
14 12
33 3
51 45
24 18
37 32
46 6
44 12
23 10
32 12
50 26
29 20
41 30
6 0
48 31
39 8
21 19
47 6
47 16
18 3
46 27
11 10
36 3
47 2
17 10
43 6
36 8
4 1
14 9
42 1
44 1
46 22
44 23
40 26
30 17
21 17
42 29
45 16
49 45
11 6
35 7
46 42
14 10
26 13
49 44
19 18
26 12
46 2
50 41
43 20
38 24
48 30
34 29
25 19
32 11
46 16
30 25
38 15
50 38
51 23
47 28
14 5
40 12
21 8
47 36
38 32
32 15
28 21
45 10
44 8
34 0
32 14
43 25
32 21
38 2
27 2
24 17
33 31
49 26
22 13
13 1
32 20
43 0
46 0
45 29
40 32
48 44
45 34
29 2
39 27
14 8
26 3
40 19
45 38
40 11
34 6
43 39
40 8
35 0
18 0
47 25
21 18
24 8
18 4
25 14
20 11
18 17
24 14
27 23
47 15
38 21
19 2
6 1
46 11
51 38
6 3
31 17
3 0
13 2
41 1
51 14
19 5
39 2
41 22
16 9
22 3
13 0
42 21
24 16
44 31
51 25
40 33
46 29
47 31
51 35
35 18
43 1
47 22
20 18
48 29
39 23
31 25
32 25
22 10
46 24
32 3
46 13
24 15
34 13
50 18
41 4
41 2
43 27
29 10
30 20
32 7
50 20
42 10
42 24
15 7
48 25
41 39
32 1
40 36
20 7
32 13
27 3
34 7
48 34
47 39
39 36
40 5
19 0
25 20
38 12
27 14
44 3
36 4
37 4
33 28
37 23
34 9
46 45
25 9
30 16
34 14
46 37
28 26
26 22
18 5
16 0
36 27
45 42
38 33
37 22
27 0
44 15
49 42
34 23
29 11
30 12
17 8
48 28
10 4
36 15
44 14
23 19
43 18
27 5
40 1
18 12
34 20
50 23
9 3
35 4
46 15
37 11
27 4
19 3
45 1
47 1
48 17
9 2
39 26
33 10
38 30
45 25
48 24
29 17
37 28
34 31
51 21
43 8
31 4
20 16
39 25
31 13
24 3
50 43
13 9
32 23
40 18
45 40
37 35
47 38
42 13
51 26
43 31
49 23
18 15
15 0
43 9
7 2
48 46
35 11
42 23
47 40
3 1
25 6
46 3
42 19
28 9
15 3
43 3
35 10
42 41
51 46
9 4
46 34
28 0
6 5
45 14
26 11
48 13
33 23
40 9
23 21
18 16
28 12
43 29
35 31
30 14
36 34
49 38
49 22
24 11
23 14
45 13
49 21
48 16
51 10
39 4
50 46
50 48
43 17
31 18
38 23
2 0
41 0
30 19
20 1
29 19
48 32
30 15
40 22
51 12
50 40
24 4
39 10
31 20
7 0
40 17
41 31
37 29
33 32
30 3
40 6
51 15
46 19
31 28
34 22
31 5
33 7
29 14
34 24
44 6
24 2
44 40
35 6
37 18
47 0
43 42
49 30
49 25
19 1
25 3
49 5
40 10
25 21
48 15
35 19
50 6
36 17
44 33
21 13
15 4
36 32
28 6
49 35
47 9
49 46
47 14
25 4
44 29
38 25
23 12
51 41
20 5
39 34
15 6
47 23
21 6
47 11
22 7
41 29
34 2
43 38
6 2
3 2
40 20
40 24
37 16
32 26
49 31
49 16
50 13
31 2
26 1
5 0
19 16
45 32
42 40
16 5
15 8
38 27
12 6
47 4
39 6
31 19
26 9
47 18
42 32
4 2
42 20
46 10
27 6
41 7
49 2
49 28
20 9
46 33
16 11
14 4
34 1
33 2
30 6
47 44
41 8
23 17
33 25
23 5
24 13
33 20
44 35
47 46
47 7
41 25
45 5
28 23
31 15
31 10
39 9
40 7
45 6
43 11
35 26
51 34
44 38
45 3
24 19
51 22
47 42
34 15
37 33
29 9
49 3
14 3
23 2
39 7
46 23
40 31
33 16
44 43
41 36
37 17
43 40
32 18
46 32
26 18
4 3
39 5
44 11
28 20
44 21
41 26
39 38
36 5
7 3
39 0
27 18
26 20
18 2
50 28
37 26
40 27
17 4
50 3
39 30
32 29
50 34
18 1
20 4
36 23
25 15
49 0
45 39
39 1
37 5
23 16
47 20
27 20
38 4
46 43
34 27
15 5
31 23
39 29
46 7
38 35
41 14
45 9
25 22
10 9
35 21
19 14
37 8
47 35
9 0
35 13
21 16
50 32
37 7
19 8
22 5
51 24
51 9
29 0
51 39
44 19
42 5
31 9
40 30
51 37
25 12
26 0
32 16
25 1
41 13
47 43
25 18
35 29
50 44
45 23
44 20
50 47
22 2
45 4
34 19
48 33
34 16
18 10
29 18
37 13
45 2
43 14
48 10
15 2
28 22
29 16
45 15
19 17
35 16
46 9
9 5
35 27
30 5
49 39
32 28
42 3
48 37
43 32
44 30
37 30
14 2
47 32
20 8
18 13
25 5
44 5
29 15
49 11
42 14
30 29
42 27
19 6
51 49
51 13
12 1
40 34
23 13
27 11
51 43
27 24
19 13
26 19
16 10
23 1
46 5
35 15
30 10
48 3
19 9
25 23
16 14
23 3
34 11
27 9
32 30
39 19
50 33
45 21
50 12
13 3
50 15
25 16
49 14
41 17
47 19
43 36
13 12
30 7
49 48
14 0
24 7
49 27
30 26
47 21
14 6
30 22
22 9
29 5
23 22
51 40
42 37
29 6
8 5
51 29
22 4
28 19
21 3
45 12
47 26
43 35
48 43
20 2
24 21
33 22
24 20
41 5
35 3
43 15
43 34
19 10
47 41
49 8
29 21
51 31
43 19
50 17
47 24
(output: 11061; takes around 10 seconds)

A few problems in your code:
the operation sorted(path) not in cycles_list has O(n) complexity, where n is the size of cycles_list
queue.pop(0) has O(n) complexity, where n is the size of the queue. You should use the collections.deque structure, not a list here.
As a general note, unless you really need to solve the question using specifically BFS (e.g. because some asked you to use this method), a simple combination of loops would do the job better. Pseudocode:
num_loops = 0
for a in nodes:
for b in neighbors(a)
if b > a:
for c in neighbors(b):
if c > b and a in neighbors(c):
num_loops += 1
The b > a and c > b checks are added to count each loop only once.

For a small number of steps like 3, you can just check for each node if you can walk away from and back to the node within 3 steps.
This works reasonably fast:
import fileinput
graph = {}
# Recursive function to find a goal in a number of steps
def count_unique_walks(start, goal, length, visited=[]):
if length == 0:
# Out of steps
return 1 if start == goal else 0
if start in visited:
# Already been here
return 0
result = 0
for neighbor in graph[start]:
if neighbor < start and neighbor != goal:
# Count only unique cycles
continue
result += count_unique_walks(neighbor, goal, length-1, visited+[start])
return result
# Read input
for line in fileinput.input():
a, b = map(int, line.split())
if a not in graph:
graph[a] = set()
graph[a].add(b)
if b not in graph:
graph[b] = set()
graph[b].add(a)
# Sum up the cycles of each node
result = 0
for node in graph:
result += count_unique_walks(node, node, 3)
print result

Related

printing a number table of square

n: 8
0 1 2 3 4 5 6 7
8 9 10 11 12 13 14 15
16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31
32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55
56 57 58 59 60 61 62 63
How to print a number table like this in python with n that can be any number?
I am using a very stupid way to print it but the result is not the one expected:
n = int(input('n: '))
if n == 4:
print(' 0 1 2 3\n4 5 6 7\n8 9 10 11\n12 13 14 15')
if n == 5:
print(' 0 1 2 3 4\n5 6 7 8 9\n10 11 12 13 14\n15 16 17 18 19\n20 21 22 23 24')
if n == 6:
print(' 0 1 2 3 4 5\n6 7 8 9 10 11\n12 13 14 15 16 17\n18 19 20 21 22 23\n24 25 26 27 28 29\n30 31 32 33 34 35')
if n == 7:
print(' 0 1 2 3 4 5 6\n7 8 9 10 11 12 13\n14 15 16 17 18 19 20\n21 22 23 24 25 26 27\n28 29 30 31 32 33 34\n35 36 37 38 39 40 41\n42 43 44 45 46 47 48')
if n == 8:
print(' 0 1 2 3 4 5 6 7\n8 9 10 11 12 13 14 15\n16 17 18 19 20 21 22 23\n24 25 26 27 28 29 30 31\n32 33 34 35 36 37 38 39\n40 41 42 43 44 45 46 47\n48 49 50 51 52 53 54 55\n56 57 58 59 60 61 62 63')
if n == 9:
print(' 0 1 2 3 4 5 6 7 8\n9 10 11 12 13 14 15 16 17\n18 19 20 21 22 23 24 25 26\n27 28 29 30 31 32 33 34 35\n36 37 38 39 40 41 42 43 44\n45 46 47 48 49 50 51 52 53\n54 55 56 57 58 59 60 61 62\n63 64 65 66 67 68 69 70 71\n72 73 74 75 76 77 78 79 80')
if n == 10:
print(' 0 1 2 3 4 5 6 7 8 9\n10 11 12 13 14 15 16 17 18 19\n20 21 22 23 24 25 26 27 28 29\n30 31 32 33 34 35 36 37 38 39\n40 41 42 43 44 45 46 47 48 49\n50 51 52 53 54 55 56 57 58 59\n60 61 62 63 64 65 66 67 68 69\n70 71 72 73 74 75 76 77 78 79\n80 81 82 83 84 85 86 87 88 89\n90 91 92 93 94 95 96 97 98 99')
here is the result:
n: 8
0 1 2 3 4 5 6 7
8 9 10 11 12 13 14 15
16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31
32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55
56 57 58 59 60 61 62 63
I won't show you the code directly, here is some tips for you. Do you know % operator in python? And how to use it to break lines. As for the format, zfill function will help you. You may need to learn for or while statement to solve your problem
You can do this with a range loop and a list comprehension.
In order for the output to look right you need to figure out what the width of the largest value in the square will be. You then need to format each value to fit in that width (right-justified). Something like this:
def number_square(n):
w = len(str(n*n-1))
for r in range(n):
print(*[f'{c:>{w}}' for c in range(r*n, r*n+n)])
number_square(8)
Output:
0 1 2 3 4 5 6 7
8 9 10 11 12 13 14 15
16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31
32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55
56 57 58 59 60 61 62 63

Create bi-weekly and monthly labels with week numbers in pandas

I have a dataframe with profit values, IDs, and week values. It looks a little like this
ID
Week
Profit
A
1
2
A
2
2
A
3
0
A
4
0
I want to create two new columns called "Bi-Weekly" and "Monthly", so week 1 would be label 2, week 2 would also be label 2, but week 3 would be labeled 4, and week 4 would be labeled 4, and they would all be labeled month 1, so I could groupby weekly, bi-weekly, or monthly profit as needed. Right now I've created two functions which work, but the weeks are going to go up to a year (52 weeks) so I was wondering if there's a more efficient way. My bi-weekly function below.
def biweek(prof_calc):
if (prof_calc['week']==2):
return 2
elif (prof_calc['week']==3):
return 2
elif (prof_calc['week']==4):
return 4
elif (prof_calc['week']==5):
return 4
elif (prof_calc['week']==6):
return 6
elif (prof_calc['week']==7):
return 6
elif (prof_calc['week']==8):
return 8
elif (prof_calc['week']==9):
return 8
elif (prof_calc['week']==10):
return 10
elif (prof_calc['week']==11):
return 10
prof_calc['BiWeek'] = prof_calc.apply(biweek, axis=1)
IIUC, you could try:
df["Biweekly"] = (df["Week"]-1)//2+1
df["Monthly"] = (df["Week"]-1)//4+1
>>> df
ID Week Profit Biweekly Monthly
0 A 1 42 1 1
1 A 2 69 1 1
2 A 3 53 2 1
3 A 4 63 2 1
4 A 5 56 3 2
5 A 6 57 3 2
6 A 7 86 4 2
7 A 8 23 4 2
8 A 9 35 5 3
9 A 10 10 5 3
10 A 11 25 6 3
11 A 12 21 6 3
12 A 13 39 7 4
13 A 14 82 7 4
14 A 15 76 8 4
15 A 16 20 8 4
16 A 17 97 9 5
17 A 18 67 9 5
18 A 19 21 10 5
19 A 20 22 10 5
20 A 21 88 11 6
21 A 22 67 11 6
22 A 23 33 12 6
23 A 24 38 12 6
24 A 25 8 13 7
25 A 26 67 13 7
26 A 27 16 14 7
27 A 28 49 14 7
28 A 29 3 15 8
29 A 30 17 15 8
30 A 31 79 16 8
31 A 32 19 16 8
32 A 33 21 17 9
33 A 34 9 17 9
34 A 35 56 18 9
35 A 36 83 18 9
36 A 37 1 19 10
37 A 38 53 19 10
38 A 39 66 20 10
39 A 40 55 20 10
40 A 41 85 21 11
41 A 42 90 21 11
42 A 43 34 22 11
43 A 44 3 22 11
44 A 45 9 23 12
45 A 46 28 23 12
46 A 47 58 24 12
47 A 48 14 24 12
48 A 49 42 25 13
49 A 50 69 25 13
50 A 51 76 26 13
51 A 52 49 26 13

How do you correctly format multiple columns of integers in python?

I have some code here:
for i in range(self.size):
print('{:6d}'.format(self.data[i], end=' '))
if (i + 1) % NUMBER_OF_COLUMNS == 0:
print()
Right now this prints as:
1
1
1
1
1
2
3
3
3
3
(whitespace)
3
3
3
etc.
It creates a new line when it hits 10 digits, but it doens't print the initial 10 in a row...
This is what I want-
1 1 1 1 1 1 1 2 2 3
3 3 3 3 3 4 4 4 4 5
However when it hits two digit numbers it gets messed up -
8 8 8 8 8 9 9 9 9 10
10 10 10 10 10 10 etc.
I want it to be right-aligned like this-
8 8 8 8 8 9
10 10 10 10 11 12 etc.
When I remove the format piece it will print the rows out, but there wont be the extra spacing in there of course!
You can align strings by "padding" values using a string's .rjust method. Using some dummy data:
NUMBER_OF_COLUMNS = 10
for i in range(100):
print("{}".format(i//2).rjust(3), end=' ')
#print("{:3}".format(i//2), end=' ') edit: this also works. Thanks AChampion
if (i + 1) % NUMBER_OF_COLUMNS == 0:
print()
#Output:
0 0 1 1 2 2 3 3 4 4
5 5 6 6 7 7 8 8 9 9
10 10 11 11 12 12 13 13 14 14
15 15 16 16 17 17 18 18 19 19
20 20 21 21 22 22 23 23 24 24
25 25 26 26 27 27 28 28 29 29
30 30 31 31 32 32 33 33 34 34
35 35 36 36 37 37 38 38 39 39
40 40 41 41 42 42 43 43 44 44
45 45 46 46 47 47 48 48 49 49
Another approach is to just chunk up the data into rows and print each row, e.g.:
def chunk(iterable, n):
return zip(*[iter(iterable)]*n)
for row in chunk(self.data, NUMBER_OF_COLUMNS):
print(' '.join(str(data).rjust(6) for data in row))
e.g:
In []:
for row in chunk(range(100), 10):
print(' '.join(str(data//2).rjust(3) for data in row))
Out[]:
0 0 1 1 2 2 3 3 4 4
5 5 6 6 7 7 8 8 9 9
10 10 11 11 12 12 13 13 14 14
15 15 16 16 17 17 18 18 19 19
20 20 21 21 22 22 23 23 24 24
25 25 26 26 27 27 28 28 29 29
30 30 31 31 32 32 33 33 34 34
35 35 36 36 37 37 38 38 39 39
40 40 41 41 42 42 43 43 44 44
45 45 46 46 47 47 48 48 49 49

Calculating the Derivatives of Curvatures on surface

I want to calculate the derivatives of curvatures on surface using vtk and python. I first calculate the curvatures using:
curvatures = vtk.vtkCurvatures()
curvatures.SetInputConnection(reader.GetOutputPort())
curvatures.SetCurvatureTypeToGaussian()
and calculate the derivative of curvatures using:
Derivativers = vtk.vtkCellDerivatives()
Derivativers.SetInputConnection(curvatures.GetOutputPort())
It seems that the results are the same with vtkCurvatures and vtkCellDerivatives.
What should I do to get the derivative of curvature on a surface. Many thanks!
I think your code is correct as it is. But we need to be sure that the curvature point data array is the currently active scalar array. I have attached a input data file that you can save with name 'Test.vtk'. It has two point data arrays -- PointIds (a scalar array) and PointNormals( a vector array). Then we will calculate Gaussian curvatures which will become the third array of scalars of the point data. We will print names of all the point data arrays irrespective of whether they are scalars or vectors. Then we will explicitly set the 'Gauss_Curvature' scalar array as the Active Scalar. We will compute Cell Derivatives which will create a Cell Data Vector array called 'ScalarGradient' which will be the gradient of the curvatures. This will be saved in a file 'Output.vtk'
import vtk
rd = vtk.vtkPolyDataReader()
rd.SetFileName('Test.vtk')
curv = vtk.vtkCurvatures()
curv.SetInputConnection(rd.GetOutputPort())
curv.SetCurvatureTypeToGaussian()
curv.Update()
pd = curv.GetOutput()
for i in range(pd.GetPointData().GetNumberOfArrays()):
print(pd.GetPointData().GetArrayName(i))
# This will print the following:
# PointIds
# PointNormals
# Gauss_Curvature
# To set the active scalar to Gauss_Curvature
pd.GetPointData().SetActiveScalars('Gauss_Curvature')
curvdiff = vtk.vtkCellDerivatives()
curvdiff.SetInputData(pd)
curvdiff.SetVectorModeToComputeGradient()
curvdiff.Update()
writer = vtk.vtkPolyDataWriter()
writer.SetFileName('Output.vtk')
writer.SetInputConnection(curvdiff.GetOutputPort())
writer.Write()
gives me the following outputs -- first for the curvature and then the gradient. Notice that the color scale in the two figures are different. So the curvature and derivative values are different although the color scheme makes them look similar.
In case you want to reproduce the results, the input vtk file is as below
# vtk DataFile Version 4.2
vtk output
ASCII
DATASET POLYDATA
POINTS 72 double
2.0927648978 0.33091989273 -0.39812666792 1.6450815105 0.64303293033 -1.236079764 1.7000810807 1.2495041516 -0.44287861593
1.0622264471 1.4540269048 -1.1853937884 0.8533187462 0.72833963362 -1.8409362444 0.161573121 1.415272931 -1.6182009866
-0.4682233113 2.0970647997 -0.17539653223 0.30090053169 1.9778473 -0.80327873468 -0.62604403311 1.746197318 -1.0984268611
0.62604948422 1.746195345 1.0984268742 0.4682298575 2.0970633231 0.17539654742 -0.30089435724 1.9778482191 0.80327874624
1.3794219731 1.1031586743 1.2360880686 1.9321437012 0.84755424016 0.44288858377 1.3329709879 1.6469225081 0.39813606858
-1.3329658439 1.6469266769 -0.39813605266 -1.3794185207 1.1031629885 -1.2360880529 -1.9321410548 0.84756028031 -0.44288857482
-0.16156870247 1.4152734137 1.6182009959 -1.0622219128 1.4540302146 1.1853938087 -0.85331647216 0.72834227646 1.8409362479
-1.7000771766 1.2495094572 0.44287862867 -2.0927638628 0.33092642637 0.39812667143 -1.6450795106 0.64303805991 1.2360797754
0.10502897512 0.5677157381 2.0771002606 -0.54417928828 -0.19289519204 2.0770984773 0.43913323132 -0.37482057542 2.077101172
1.0574135878 0.37481822068 1.8409414841 1.3064404335 -0.56771795917 1.6182050108 1.7903331906 0.19289323113 1.1854016225
-0.72812102639 -1.6469234624 1.18539471 -0.20411225533 -1.1031605232 1.8409380189 -1.1448850389 -0.84755547744 1.6181982897
0.26564737208 -1.7461967516 1.236085002 -0.23207016686 -2.0970637037 0.44288263714 0.75978960067 -1.9778489401 0.39813448025
1.1992202745 -1.4152750453 1.0984284306 1.5819944619 -1.4540310306 0.17539958384 1.8633106814 -0.72834386503 0.80328466622
-1.825278792 -0.33092031521 1.0984201446 -2.0502257619 -0.64303229501 0.17538963068 -1.5624229303 -1.2495043655 0.80327527281
-0.26565282447 -1.7461959014 -1.2360850131 0.23206361633 -2.0970644256 -0.44288265596 -0.7597957797 -1.977846564 -0.39813449851
-1.1992246997 -1.4152712955 -1.0984284473 -1.5819990123 -1.4540260972 -0.17539960215 -1.8633129661 -0.72833804688 -0.80328468018
0.20410881451 -1.1031611451 -1.8409380327 1.1448823984 -0.84755903977 -1.6181983017 0.72811588321 -1.6469257176 -1.1853947189
2.0502237661 -0.64303869999 -0.17538964133 1.5624190405 -1.2495092418 -0.80327529169 1.8252777661 -0.33092600698 -1.0984201511
-0.43913440065 -0.37481918558 -2.0771011678 -0.10502720377 0.56771608521 -2.0771002475 0.54417868626 -0.19289687027 -2.0770984714
-1.3064422115 -0.56771386838 -1.6182050202 -1.7903325818 0.19289882961 -1.185401614 -1.057412421 0.3748215375 -1.8409414839
-0.76083174443 1.3178134523 -1.9919051229 -0.7608358562 -1.3178110596 -1.9919051353 -2.4621262785 3.8465962003e-06 -0.47023127203
1.5216839818 -2.3645462409e-06 -1.991898872 2.4621262803 -3.846902628e-06 0.47023127288 1.2310617434 -2.1322669408 -0.47022115796
-1.2310684033 -2.1322631023 0.47022113869 -1.5216839821 2.3661982943e-06 1.9918988726 0.76083174316 -1.3178134534 1.9919051234
0.76083585779 1.317811059 1.9919051359 -1.2310617441 2.1322669425 0.47022115881 1.2310684021 2.1322631008 -0.47022113785
POLYGONS 140 560
3 12 14 9
3 27 69 24
3 70 21 19
3 1 53 63
3 2 14 13
3 38 36 37
3 28 68 36
3 39 67 23
3 64 38 51
3 13 14 12
3 20 24 18
3 34 35 33
3 40 41 39
3 16 58 17
3 20 18 19
3 26 27 24
3 11 6 70
3 10 14 71
3 22 39 23
3 6 10 7
3 3 5 7
3 29 64 13
3 41 30 32
3 57 45 47
3 54 61 57
3 66 30 41
3 50 43 42
3 30 33 31
3 33 35 36
3 65 37 35
3 37 36 35
3 26 68 28
3 68 33 36
3 27 28 29
3 28 36 38
3 29 28 38
3 38 37 51
3 61 48 42
3 37 65 52
3 66 34 30
3 43 65 35
3 32 30 31
3 30 34 33
3 40 39 22
3 41 32 39
3 66 41 46
3 32 67 39
3 67 32 25
3 33 68 31
3 32 31 25
3 31 26 25
3 27 26 28
3 26 31 68
3 64 29 38
3 12 69 27
3 18 9 11
3 69 12 9
3 18 24 69
3 20 67 25
3 26 24 25
3 24 20 25
3 13 12 29
3 12 27 29
3 18 11 19
3 11 9 10
3 69 9 18
3 9 14 10
3 70 6 15
3 11 10 6
3 10 71 7
3 71 14 2
3 70 15 21
3 6 8 15
3 21 17 22
3 15 8 16
3 20 23 67
3 19 11 70
3 21 23 19
3 23 20 19
3 22 17 62
3 22 23 21
3 15 17 21
3 62 40 22
3 58 57 47
3 62 17 58
3 62 47 40
3 58 16 59
3 15 16 17
3 6 7 8
3 16 60 59
3 48 54 56
3 8 5 60
3 2 1 3
3 7 5 8
3 3 1 4
3 2 3 71
3 71 3 7
3 3 4 5
3 5 55 60
3 49 50 48
3 8 60 16
3 60 55 59
3 4 55 5
3 54 57 59
3 1 63 4
3 56 55 4
3 49 48 56
3 44 45 42
3 63 56 4
3 48 61 54
3 56 54 55
3 54 59 55
3 59 57 58
3 47 62 58
3 40 46 41
3 57 61 45
3 47 45 46
3 43 34 44
3 47 46 40
3 46 44 66
3 37 52 51
3 42 48 50
3 42 43 44
3 43 35 34
3 45 44 46
3 44 34 66
3 61 42 45
3 50 65 43
3 65 50 52
3 56 63 49
3 51 52 53
3 49 63 53
3 50 49 52
3 49 53 52
3 2 0 1
3 1 0 53
3 0 51 53
3 0 64 51
3 13 64 0
3 2 13 0
POINT_DATA 72
SCALARS PointIds vtkIdType
LOOKUP_TABLE default
0 1 2 3 4 5 6 7 8
9 10 11 12 13 14 15 16 17
18 19 20 21 22 23 24 25 26
27 28 29 30 31 32 33 34 35
36 37 38 39 40 41 42 43 44
45 46 47 48 49 50 51 52 53
54 55 56 57 58 59 60 61 62
63 64 65 66 67 68 69 70 71
NORMALS PointNormals double
0.94738623196 0.18727650058 -0.25958975291 0.78313719053 0.35367076216 -0.51148131227 0.83545291047 0.50824408436 -0.20906072109
0.47898857295 0.62402000487 -0.61738884061 0.34465195337 0.40584589543 -0.84646567573 0.15649087604 0.66776200195 -0.72773931766
-0.15609353126 0.97764567412 -0.14086782943 0.059136449433 0.91410106494 -0.40115099829 -0.27742338135 0.85504231805 -0.43810832201
0.27739675558 0.85505949665 0.43809165386 0.1561128187 0.97764027026 0.14088395868 -0.05910174957 0.91410169764 0.40115467037
0.6978536347 0.50139725414 0.51146954756 0.85786633279 0.46941626794 0.20907826874 0.63588503517 0.72681646701 0.25959207486
-0.63587825439 0.72682167945 -0.25959409059 -0.69785435483 0.50138010962 -0.51148537136 -0.85787788306 0.46940090711 -0.20906536337
-0.15651072102 0.66775823558 0.72773850593 -0.47897825964 0.62400201859 0.61741502054 -0.34463970914 0.40587327082 0.84645753521
-0.8354604399 0.50822639825 0.20907362693 -0.94738511041 0.18728497536 0.25958773195 -0.78315168201 0.35366155935 0.51146548701
-0.0042059530133 0.19834561529 0.98012311821 -0.16967339936 -0.10281294988 0.98012266318 0.17387129188 -0.095532679284 0.98012360499
0.52381294065 0.095528092331 0.84645991446 0.65654796659 -0.19833859028 0.72774073074 0.77988819356 0.10280486141 0.61741846913
-0.30091501143 -0.72680688867 0.6174155023 -0.17918019275 -0.50140137515 0.8464579845 -0.50004323045 -0.46941965564 0.72773755886
0.085293841035 -0.8550588207 0.51146786196 -0.022407612419 -0.97764266904 0.20907584885 0.31149794024 -0.91410144601 0.25959117785
0.60180306843 -0.66776342878 0.4380925359 0.76860432156 -0.62401813766 0.14088563004 0.82118451863 -0.40586818759 0.40115707731
-0.87920342686 -0.18729607405 0.43808847833 -0.92471829912 -0.35362213463 0.14088098936 -0.76208606823 -0.50823351579 0.40115273655
-0.08527862936 -0.85505086473 -0.51148369876 0.022426691996 -0.97764498848 -0.20906295701 -0.31150584284 -0.91409817614 -0.25959320919
-0.60177487516 -0.6677778914 -0.43810921854 -0.76861864432 -0.62400413386 -0.14086951598 -0.82120131764 -0.40583781416 -0.40115341765
0.17915035572 -0.50139827931 -0.84646613373 0.5000564084 -0.4694043424 -0.72773838139 0.30092542605 -0.72682480482 -0.61738933507
0.92471333385 -0.3536415386 -0.14086487277 0.7620681671 -0.50826324483 -0.40114907784 0.8792018588 -0.18726442077 -0.43810515655
-0.17386795802 -0.09554490851 -0.98012300434 0.0041936861197 0.19834885542 -0.98012251507 0.16968233044 -0.10280393451 -0.9801220627
-0.65654129971 -0.19835764899 -0.72774155087 -0.77990892195 0.10280480888 -0.61739229403 -0.52379534825 0.095552395563 -0.84646805779
-0.3035934934 0.52568869256 -0.79465866212 -0.30345974591 -0.52576590314 -0.79465866742 -0.98224561547 8.0181630296e-06 -0.18759944248
0.60705977261 7.7220189155e-05 -0.79465616874 0.98224726985 9.1023794904e-07 0.18759078032 0.49111663606 -0.85065410882 -0.18759540755
-0.49112361722 -0.85065199031 0.18758673723 -0.60706560357 -2.8418937296e-07 0.79465171803 0.30352977695 -0.52573221141 0.79465421184
0.30352929122 0.52573248433 0.79465421681 -0.49112519531 0.85065107744 0.18758674521 0.49113052171 0.85064609373 -0.18759539936

performing differences between rows in pandas based on columns values

I have this dataframe, I'm trying to create a new column where I want to store the difference of products sold based on code and date.
for example this is the starting dataframe:
date code sold
0 20150521 0 47
1 20150521 12 39
2 20150521 16 39
3 20150521 20 38
4 20150521 24 38
5 20150521 28 37
6 20150521 32 36
7 20150521 4 43
8 20150521 8 43
9 20150522 0 47
10 20150522 12 37
11 20150522 16 36
12 20150522 20 36
13 20150522 24 36
14 20150522 28 35
15 20150522 32 31
16 20150522 4 42
17 20150522 8 41
18 20150523 0 50
19 20150523 12 48
20 20150523 16 46
21 20150523 20 46
22 20150523 24 46
23 20150523 28 45
24 20150523 32 42
25 20150523 4 49
26 20150523 8 49
27 20150524 0 39
28 20150524 12 33
29 20150524 16 30
... ... ... ...
150 20150606 32 22
151 20150606 4 34
152 20150606 8 33
153 20150607 0 31
154 20150607 12 30
155 20150607 16 30
156 20150607 20 29
157 20150607 24 28
158 20150607 28 26
159 20150607 32 24
160 20150607 4 30
161 20150607 8 30
162 20150608 0 47
I think this could be a solution...
full_df1=full_df[full_df.date == '20150609'].reset_index(drop=True)
full_df1['code'] = full_df1['code'].astype(float)
full_df1= full_df1.sort(['code'], ascending=[False])
code date sold
8 32 20150609 33
7 28 20150609 36
6 24 20150609 37
5 20 20150609 39
4 16 20150609 42
3 12 20150609 46
2 8 20150609 49
1 4 20150609 49
0 0 20150609 50
full_df1.set_index('code')['sold'].diff().reset_index()
that gives me back this output for a single date 20150609 :
code difference
0 32 NaN
1 28 3
2 24 1
3 20 2
4 16 3
5 12 4
6 8 3
7 4 0
8 0 1
is there a better solution to have the same result in a more pythonic way?
I would like to create a new column [difference] and store the data there having as result 4 columns [date, code, sold, difference]
This exactly the kind of thing that panda's groupby functionality is built for, and I highly recommend reading and working through this documentation: panda's groupby documentation
This code replicates what you are asking for, but for every date.
df = pd.DataFrame({'date':['Mon','Mon','Mon','Tue','Tue','Tue'],'code':[10,21,30,10,21,30], 'sold':[12,13,34,10,15,20]})
df['difference'] = df.groupby('date')['sold'].diff()
df
code date sold difference
0 10 Mon 12 NaN
1 21 Mon 13 1
2 30 Mon 34 21
3 10 Tue 10 NaN
4 21 Tue 15 5
5 30 Tue 20 5

Categories

Resources