Altair distance between bar labels in mark_text - python

Help me with my grouped stacked bar plot. I can't set distances between bar labels in altair.
This is my code
chart = alt.Chart(chain_and_prices_for_bar, title='Распределение средних цен различных ценовых категорий среди аптечных сетей в разрезе страны производства').mark_bar().encode(
x=alt.X('category_of_price:N', stack='zero', sort=['Низкая', 'Ниже среднего', 'Средняя', 'Выше среднего', 'Высокая', 'Самая высокая'], title=None, axis=alt.Axis(labelAngle=-45, labelOverlap=False)),
y=alt.Y('mean_price_of_medicine:Q', axis=alt.Axis(grid=False, title='Суммарная средняя цена'), scale=alt.Scale(domain=[0, 201], bins=[i for i in range(211) if i%10 ==0])),
#column=alt.Column('retail_chain:N', title=None, sort=list_of_top_pharmacies, header=alt.Header(labelFontSize=11, labelFontStyle='bold')),
order=alt.Order(
'is_import', sort='ascending'),
color=alt.Color('is_import:N', scale=alt.Scale(range=['#96ceb4', '#ffcc5c']),
legend=alt.Legend(title='Страна производства'))
).properties(
width=100,
height=600)
#chart = chart.configure_view(strokeOpacity=0)
chart.configure_title(fontSize=18, anchor='middle', align='center', dy=-10)
text = alt.Chart(chain_and_prices_for_bar).mark_text(dx=-1, dy=2, color='black', align='center', baseline='bottom', angle=270).encode(
x=alt.X('category_of_price:N', stack='zero', sort=['Низкая', 'Ниже среднего', 'Средняя', 'Выше среднего', 'Высокая', 'Самая высокая'], title=None, axis=alt.Axis(labelAngle=-45, labelOverlap=False)),
y=alt.Y('mean_price_of_medicine:Q'),
detail='retail_chain:N',
text=alt.Text('mean_price_of_medicine:Q', format='.2f'))
alt.layer(
chart, text, data=chain_and_prices_for_bar).facet(
facet=alt.Column('retail_chain:N', title=None, sort=list_of_top_pharmacies, header=alt.Header(labelFontSize=11, labelFontStyle='bold')),
).configure_view(continuousHeight=200, continuousWidth= 0.5).configure_facet(spacing=0.5)
This is what i got
numbers are overlapping and i need to change it
chain_and_prices_for_bar = pd.DataFrame(my_dict)
list_of_top_pharmacies = ['Гродненское РУП Фармация', 'Альфа-аптека', 'Планета Здоровья', 'Моя Аптека', 'Остров здоровья', 'Биотест', 'Искамед', 'ADEL','Inlek']
my_dict = {'retail_chain': {0: 'ADEL',
34: 'Альфа-аптека',
72: 'Моя Аптека',
36: 'Биотест',
38: 'Биотест',
86: 'Остров здоровья',
40: 'Биотест',
42: 'Биотест',
84: 'Остров здоровья',
44: 'Биотест',
46: 'Биотест',
82: 'Моя Аптека',
48: 'Гродненское РУП Фармация',
50: 'Гродненское РУП Фармация',
80: 'Моя Аптека',
52: 'Гродненское РУП Фармация',
106: 'Планета Здоровья',
54: 'Гродненское РУП Фармация',
56: 'Гродненское РУП Фармация',
78: 'Моя Аптека',
58: 'Гродненское РУП Фармация',
60: 'Искамед',
76: 'Моя Аптека',
62: 'Искамед',
64: 'Искамед',
74: 'Моя Аптека',
66: 'Искамед',
68: 'Искамед',
32: 'Альфа-аптека',
90: 'Остров здоровья',
88: 'Остров здоровья',
98: 'Планета Здоровья',
2: 'ADEL',
104: 'Планета Здоровья',
4: 'ADEL',
6: 'ADEL',
102: 'Планета Здоровья',
8: 'ADEL',
10: 'ADEL',
100: 'Планета Здоровья',
12: 'Inlek',
14: 'Inlek',
30: 'Альфа-аптека',
16: 'Inlek',
18: 'Inlek',
70: 'Искамед',
96: 'Планета Здоровья',
20: 'Inlek',
28: 'Альфа-аптека',
92: 'Остров здоровья',
22: 'Inlek',
94: 'Остров здоровья',
24: 'Альфа-аптека',
26: 'Альфа-аптека',
105: 'Планета Здоровья',
73: 'Моя Аптека',
89: 'Остров здоровья',
75: 'Моя Аптека',
103: 'Планета Здоровья',
93: 'Остров здоровья',
87: 'Остров здоровья',
83: 'Моя Аптека',
101: 'Планета Здоровья',
79: 'Моя Аптека',
99: 'Планета Здоровья',
85: 'Остров здоровья',
95: 'Остров здоровья',
81: 'Моя Аптека',
97: 'Планета Здоровья',
77: 'Моя Аптека',
91: 'Остров здоровья',
53: 'Гродненское РУП Фармация',
69: 'Искамед',
27: 'Альфа-аптека',
25: 'Альфа-аптека',
23: 'Inlek',
21: 'Inlek',
19: 'Inlek',
17: 'Inlek',
29: 'Альфа-аптека',
15: 'Inlek',
11: 'ADEL',
9: 'ADEL',
7: 'ADEL',
5: 'ADEL',
3: 'ADEL',
1: 'ADEL',
13: 'Inlek',
31: 'Альфа-аптека',
33: 'Альфа-аптека',
35: 'Альфа-аптека',
67: 'Искамед',
65: 'Искамед',
63: 'Искамед',
61: 'Искамед',
59: 'Гродненское РУП Фармация',
57: 'Гродненское РУП Фармация',
55: 'Гродненское РУП Фармация',
51: 'Гродненское РУП Фармация',
49: 'Гродненское РУП Фармация',
47: 'Биотест',
45: 'Биотест',
43: 'Биотест',
41: 'Биотест',
39: 'Биотест',
37: 'Биотест',
71: 'Искамед',
107: 'Планета Здоровья'},
'category_of_price': {0: 'Низкая',
34: 'Самая высокая',
72: 'Низкая',
36: 'Низкая',
38: 'Ниже среднего',
86: 'Ниже среднего',
40: 'Средняя',
42: 'Выше среднего',
84: 'Низкая',
44: 'Высокая',
46: 'Самая высокая',
82: 'Самая высокая',
48: 'Низкая',
50: 'Ниже среднего',
80: 'Высокая',
52: 'Средняя',
106: 'Самая высокая',
54: 'Выше среднего',
56: 'Высокая',
78: 'Выше среднего',
58: 'Самая высокая',
60: 'Низкая',
76: 'Средняя',
62: 'Ниже среднего',
64: 'Средняя',
74: 'Ниже среднего',
66: 'Выше среднего',
68: 'Высокая',
32: 'Высокая',
90: 'Выше среднего',
88: 'Средняя',
98: 'Ниже среднего',
2: 'Ниже среднего',
104: 'Высокая',
4: 'Средняя',
6: 'Выше среднего',
102: 'Выше среднего',
8: 'Высокая',
10: 'Самая высокая',
100: 'Средняя',
12: 'Низкая',
14: 'Ниже среднего',
30: 'Выше среднего',
16: 'Средняя',
18: 'Выше среднего',
70: 'Самая высокая',
96: 'Низкая',
20: 'Высокая',
28: 'Средняя',
92: 'Высокая',
22: 'Самая высокая',
94: 'Самая высокая',
24: 'Низкая',
26: 'Ниже среднего',
105: 'Высокая',
73: 'Низкая',
89: 'Средняя',
75: 'Ниже среднего',
103: 'Выше среднего',
93: 'Высокая',
87: 'Ниже среднего',
83: 'Самая высокая',
101: 'Средняя',
79: 'Выше среднего',
99: 'Ниже среднего',
85: 'Низкая',
95: 'Самая высокая',
81: 'Высокая',
97: 'Низкая',
77: 'Средняя',
91: 'Выше среднего',
53: 'Средняя',
69: 'Высокая',
27: 'Ниже среднего',
25: 'Низкая',
23: 'Самая высокая',
21: 'Высокая',
19: 'Выше среднего',
17: 'Средняя',
29: 'Средняя',
15: 'Ниже среднего',
11: 'Самая высокая',
9: 'Высокая',
7: 'Выше среднего',
5: 'Средняя',
3: 'Ниже среднего',
1: 'Низкая',
13: 'Низкая',
31: 'Выше среднего',
33: 'Высокая',
35: 'Самая высокая',
67: 'Выше среднего',
65: 'Средняя',
63: 'Ниже среднего',
61: 'Низкая',
59: 'Самая высокая',
57: 'Высокая',
55: 'Выше среднего',
51: 'Ниже среднего',
49: 'Низкая',
47: 'Самая высокая',
45: 'Высокая',
43: 'Выше среднего',
41: 'Средняя',
39: 'Ниже среднего',
37: 'Низкая',
71: 'Самая высокая',
107: 'Самая высокая'},
'is_import': {0: 'Беларусь',
34: 'Беларусь',
72: 'Беларусь',
36: 'Беларусь',
38: 'Беларусь',
86: 'Беларусь',
40: 'Беларусь',
42: 'Беларусь',
84: 'Беларусь',
44: 'Беларусь',
46: 'Беларусь',
82: 'Беларусь',
48: 'Беларусь',
50: 'Беларусь',
80: 'Беларусь',
52: 'Беларусь',
106: 'Беларусь',
54: 'Беларусь',
56: 'Беларусь',
78: 'Беларусь',
58: 'Беларусь',
60: 'Беларусь',
76: 'Беларусь',
62: 'Беларусь',
64: 'Беларусь',
74: 'Беларусь',
66: 'Беларусь',
68: 'Беларусь',
32: 'Беларусь',
90: 'Беларусь',
88: 'Беларусь',
98: 'Беларусь',
2: 'Беларусь',
104: 'Беларусь',
4: 'Беларусь',
6: 'Беларусь',
102: 'Беларусь',
8: 'Беларусь',
10: 'Беларусь',
100: 'Беларусь',
12: 'Беларусь',
14: 'Беларусь',
30: 'Беларусь',
16: 'Беларусь',
18: 'Беларусь',
70: 'Беларусь',
96: 'Беларусь',
20: 'Беларусь',
28: 'Беларусь',
92: 'Беларусь',
22: 'Беларусь',
94: 'Беларусь',
24: 'Беларусь',
26: 'Беларусь',
105: 'Импорт',
73: 'Импорт',
89: 'Импорт',
75: 'Импорт',
103: 'Импорт',
93: 'Импорт',
87: 'Импорт',
83: 'Импорт',
101: 'Импорт',
79: 'Импорт',
99: 'Импорт',
85: 'Импорт',
95: 'Импорт',
81: 'Импорт',
97: 'Импорт',
77: 'Импорт',
91: 'Импорт',
53: 'Импорт',
69: 'Импорт',
27: 'Импорт',
25: 'Импорт',
23: 'Импорт',
21: 'Импорт',
19: 'Импорт',
17: 'Импорт',
29: 'Импорт',
15: 'Импорт',
11: 'Импорт',
9: 'Импорт',
7: 'Импорт',
5: 'Импорт',
3: 'Импорт',
1: 'Импорт',
13: 'Импорт',
31: 'Импорт',
33: 'Импорт',
35: 'Импорт',
67: 'Импорт',
65: 'Импорт',
63: 'Импорт',
61: 'Импорт',
59: 'Импорт',
57: 'Импорт',
55: 'Импорт',
51: 'Импорт',
49: 'Импорт',
47: 'Импорт',
45: 'Импорт',
43: 'Импорт',
41: 'Импорт',
39: 'Импорт',
37: 'Импорт',
71: 'Импорт',
107: 'Импорт'},
'mean_price_of_medicine': {0: 4.92,
34: 78.74,
72: 5.1,
36: 5.09,
38: 15.15,
86: 14.92,
40: 25.95,
42: 38.38,
84: 5.37,
44: 48.12,
46: 84.02,
82: 83.49,
48: 5.28,
50: 15.13,
80: 49.23,
52: 26.11,
106: 86.08,
54: 38.06,
56: 49.25,
78: 37.33,
58: 83.79,
60: 5.18,
76: 26.22,
62: 15.19,
64: 26.29,
74: 14.81,
66: 38.48,
68: 48.93,
32: 47.22,
90: 38.31,
88: 25.82,
98: 15.17,
2: 15.21,
104: 50.87,
4: 26.52,
6: 38.14,
102: 37.9,
8: 46.43,
10: 89.32,
100: 25.85,
12: 5.14,
14: 15.01,
30: 38.04,
16: 26.16,
18: 38.56,
70: 93.85,
96: 5.06,
20: 47.71,
28: 26.08,
92: 50.44,
22: 88.74,
94: 86.42,
24: 5.29,
26: 14.98,
105: 48.25,
73: 7.21,
89: 26.74,
75: 15.85,
103: 37.83,
93: 49.03,
87: 16.1,
83: 87.7,
101: 26.52,
79: 38.01,
99: 16.33,
85: 7.03,
95: 82.19,
81: 48.59,
97: 7.17,
77: 26.46,
91: 38.22,
53: 26.42,
69: 48.85,
27: 15.86,
25: 7.32,
23: 87.21,
21: 48.81,
19: 38.58,
17: 26.51,
29: 26.55,
15: 16.06,
11: 83.96,
9: 48.56,
7: 38.32,
5: 26.66,
3: 16.1,
1: 7.33,
13: 7.21,
31: 38.03,
33: 48.07,
35: 94.97,
67: 38.34,
65: 26.62,
63: 16.09,
61: 7.22,
59: 95.27,
57: 48.59,
55: 38.14,
51: 16.14,
49: 6.9,
47: 89.96,
45: 48.1,
43: 38.06,
41: 26.7,
39: 16.12,
37: 7.17,
71: 108.55,
107: 86.54},
'divided_mean_price_of_medicine': {0: 2.46,
34: 39.37,
72: 2.55,
36: 2.54,
38: 7.57,
86: 7.46,
40: 12.98,
42: 19.19,
84: 2.68,
44: 24.06,
46: 42.01,
82: 41.75,
48: 2.64,
50: 7.56,
80: 24.61,
52: 13.05,
106: 43.04,
54: 19.03,
56: 24.62,
78: 18.67,
58: 41.89,
60: 2.59,
76: 13.11,
62: 7.6,
64: 13.15,
74: 7.41,
66: 19.24,
68: 24.46,
32: 23.61,
90: 19.16,
88: 12.91,
98: 7.59,
2: 7.6,
104: 25.44,
4: 13.26,
6: 19.07,
102: 18.95,
8: 23.22,
10: 44.66,
100: 12.92,
12: 2.57,
14: 7.5,
30: 19.02,
16: 13.08,
18: 19.28,
70: 46.92,
96: 2.53,
20: 23.86,
28: 13.04,
92: 25.22,
22: 44.37,
94: 43.21,
24: 2.65,
26: 7.49,
105: 24.12,
73: 3.6,
89: 13.37,
75: 7.92,
103: 18.92,
93: 24.51,
87: 8.05,
83: 43.85,
101: 13.26,
79: 19.0,
99: 8.17,
85: 3.52,
95: 41.09,
81: 24.3,
97: 3.58,
77: 13.23,
91: 19.11,
53: 13.21,
69: 24.43,
27: 7.93,
25: 3.66,
23: 43.6,
21: 24.4,
19: 19.29,
17: 13.26,
29: 13.28,
15: 8.03,
11: 41.98,
9: 24.28,
7: 19.16,
5: 13.33,
3: 8.05,
1: 3.67,
13: 3.6,
31: 19.01,
33: 24.04,
35: 47.48,
67: 19.17,
65: 13.31,
63: 8.04,
61: 3.61,
59: 47.63,
57: 24.3,
55: 19.07,
51: 8.07,
49: 3.45,
47: 44.98,
45: 24.05,
43: 19.03,
41: 13.35,
39: 8.06,
37: 3.59,
71: 54.27,
107: 43.27}}

There is an example of this in the docs:
import altair as alt
from vega_datasets import data
source=data.barley()
bars = alt.Chart(source).mark_bar().encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
color=alt.Color('site')
)
text = alt.Chart(source).mark_text(dx=-15, dy=3, color='white').encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
detail='site:N',
text=alt.Text('sum(yield):Q', format='.1f')
)
bars + text
With faceting it can look like this:
import altair as alt
from vega_datasets import data
import random
source=data.barley()
source['group'] = [random.choice(['A', 'B']) for num in range(source.shape[0])]
bars = alt.Chart(source).mark_bar().encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
color=alt.Color('site')
)
text = alt.Chart(source).mark_text(dx=-15, dy=3, color='white').encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
detail='site:N',
text=alt.Text('sum(yield):Q', format='.1f')
)
(bars + text).facet(row='group')

Related

Python Pandas combinations to build the best team

I can't simplify my data so I put them entirely.
I would like to build the best possible team of 11 players according to the "niveau" column.
Each "id" has a "niveau" note for the "statut" column.
I think it would be necessary to test all the possible combinations of "niveau" without there being any "id" duplicates in order to obtain the best average level of the 11 players, but I don't know how to proceed.
Do you have an idea please?
Thank you
import pandas as pd
data = {'statut': {0: 'titulaire_01', 1: 'titulaire_01', 2: 'titulaire_01', 3: 'titulaire_01', 4: 'titulaire_01', 5: 'titulaire_01', 6: 'titulaire_01', 7: 'titulaire_01', 8: 'titulaire_02', 9: 'titulaire_02', 10: 'titulaire_02', 11: 'titulaire_02', 12: 'titulaire_02', 13: 'titulaire_02', 14: 'titulaire_02', 15: 'titulaire_02', 16: 'titulaire_02', 17: 'titulaire_02', 18: 'titulaire_02', 19: 'titulaire_02', 20: 'titulaire_02', 21: 'titulaire_02', 22: 'titulaire_02', 23: 'titulaire_02', 24: 'titulaire_02', 25: 'titulaire_02', 26: 'titulaire_02', 27: 'titulaire_02', 28: 'titulaire_03', 29: 'titulaire_03', 30: 'titulaire_03', 31: 'titulaire_03', 32: 'titulaire_03', 33: 'titulaire_03', 34: 'titulaire_03', 35:
'titulaire_03', 36: 'titulaire_03', 37: 'titulaire_03', 38: 'titulaire_03', 39: 'titulaire_03', 40: 'titulaire_03', 41: 'titulaire_03', 42: 'titulaire_03', 43: 'titulaire_03', 44: 'titulaire_03', 45: 'titulaire_03', 46: 'titulaire_03', 47: 'titulaire_03', 48: 'titulaire_04', 49: 'titulaire_04', 50: 'titulaire_04', 51: 'titulaire_04', 52: 'titulaire_04', 53: 'titulaire_04', 54: 'titulaire_04', 55: 'titulaire_04', 56: 'titulaire_04', 57: 'titulaire_05', 58: 'titulaire_05', 59: 'titulaire_05', 60: 'titulaire_05', 61: 'titulaire_05', 62: 'titulaire_05', 63: 'titulaire_05', 64: 'titulaire_05', 65: 'titulaire_05', 66: 'titulaire_05', 67: 'titulaire_06', 68: 'titulaire_06', 69: 'titulaire_06', 70: 'titulaire_06', 71: 'titulaire_06', 72: 'titulaire_06', 73: 'titulaire_06', 74: 'titulaire_06', 75: 'titulaire_06', 76: 'titulaire_06', 77: 'titulaire_06', 78: 'titulaire_06', 79: 'titulaire_07', 80: 'titulaire_07', 81: 'titulaire_07', 82: 'titulaire_07', 83: 'titulaire_07', 84: 'titulaire_07', 85: 'titulaire_07', 86: 'titulaire_07', 87: 'titulaire_07', 88: 'titulaire_07', 89: 'titulaire_07', 90: 'titulaire_07', 91: 'titulaire_07', 92: 'titulaire_07', 93: 'titulaire_07', 94: 'titulaire_07', 95: 'titulaire_07', 96: 'titulaire_07', 97: 'titulaire_07', 98: 'titulaire_08', 99: 'titulaire_08', 100: 'titulaire_08', 101: 'titulaire_08', 102: 'titulaire_08', 103: 'titulaire_08', 104: 'titulaire_08', 105: 'titulaire_08', 106: 'titulaire_08', 107: 'titulaire_08', 108: 'titulaire_08', 109: 'titulaire_08', 110: 'titulaire_08', 111: 'titulaire_08', 112: 'titulaire_08', 113: 'titulaire_08', 114: 'titulaire_08', 115: 'titulaire_08', 116: 'titulaire_08', 117: 'titulaire_09', 118: 'titulaire_09', 119: 'titulaire_09', 120: 'titulaire_09', 121: 'titulaire_09', 122: 'titulaire_09', 123: 'titulaire_09', 124: 'titulaire_09', 125: 'titulaire_09', 126: 'titulaire_09', 127: 'titulaire_09', 128: 'titulaire_09', 129: 'titulaire_09', 130: 'titulaire_09', 131: 'titulaire_09', 132: 'titulaire_09', 133: 'titulaire_09', 134: 'titulaire_09', 135: 'titulaire_09', 136: 'titulaire_10', 137: 'titulaire_10', 138: 'titulaire_10', 139: 'titulaire_10', 140: 'titulaire_10', 141: 'titulaire_10', 142: 'titulaire_10', 143: 'titulaire_10', 144: 'titulaire_10', 145: 'titulaire_10', 146: 'titulaire_10', 147: 'titulaire_10', 148: 'titulaire_10', 149: 'titulaire_10', 150: 'titulaire_10', 151: 'titulaire_10', 152: 'titulaire_10', 153: 'titulaire_10', 154: 'titulaire_10', 155: 'titulaire_10', 156: 'titulaire_10', 157: 'titulaire_10', 158: 'titulaire_11', 159: 'titulaire_11', 160: 'titulaire_11', 161: 'titulaire_11', 162: 'titulaire_11', 163: 'titulaire_11', 164: 'titulaire_11', 165: 'titulaire_11', 166: 'titulaire_11', 167: 'titulaire_11', 168: 'titulaire_11', 169: 'titulaire_11', 170: 'titulaire_11', 171: 'titulaire_11', 172: 'titulaire_11', 173: 'titulaire_11', 174: 'titulaire_11', 175: 'titulaire_11', 176: 'titulaire_11', 177: 'titulaire_11', 178: 'titulaire_11', 179: 'titulaire_11'}, 'id': {0: 2002134607, 1: 2002043469, 2: 67156610, 3: 73201503, 4: 2000165962, 5: 2000143545, 6: 2002042688, 7: 2000055323, 8: 49054631, 9: 48031358, 10: 49048802, 11: 2002042816, 12: 2000045508, 13: 73201458, 14: 67191910, 15: 2002134617, 16: 2002042628, 17: 2000023214, 18: 2000165961, 19: 2000121963, 20: 2000045487, 21: 2000006106, 22: 14196664, 23: 2000055604, 24: 2002043613, 25: 49054633, 26: 49037900, 27: 2002043635, 28: 48031358, 29: 49037900, 30: 2002043635, 31: 2000121963, 32: 2000165961, 33: 67191910, 34: 2002042816, 35: 73201458, 36: 49054633, 37: 2000045487, 38: 2002043613, 39: 2000006106, 40: 2000055604, 41: 2000023214, 42: 2000045508, 43: 2002042628, 44: 14196664, 45: 2002134617, 46: 49054631, 47: 49048802, 48: 49040506, 49: 85126966, 50: 83169864, 51: 2002043476, 52: 2000045508, 53: 2002043613, 54: 2002042669, 55: 2000023214, 56: 73201460, 57: 67211095, 58: 83169864, 59: 13196665, 60: 2000055604, 61: 2000011411, 62: 2000165964, 63: 73201458, 64: 2002042939, 65: 2002043635, 66: 2002043613, 67: 2000045698, 68: 2002042722, 69: 2000132382, 70: 49054633, 71: 2002042845, 72: 2000045520, 73: 73201505, 74: 73201458, 75: 70137157, 76: 49040506, 77: 2002043635, 78: 2000143548, 79: 73200890, 80: 49060705, 81: 2000045543, 82: 2000045698,
83: 2000011617, 84: 2002042722, 85: 2002042642, 86: 2000113673, 87: 85137101, 88: 19217413, 89: 2000147147, 90: 2002042845, 91: 2002043003, 92: 2002042627, 93: 2002042966, 94: 2000047331, 95: 2002042666, 96: 2000134665, 97: 2002042690, 98: 2000011617, 99: 2000045698, 100: 49060705, 101: 2000047331, 102: 2000147147, 103: 2000134665, 104: 2000113673, 105: 73200890, 106: 2002042845, 107: 19217413, 108: 2000045543, 109: 2002043003, 110: 2002042722, 111: 2002042666, 112: 2002042966, 113: 2002042627, 114: 2002042690, 115: 2002042642, 116: 85137101, 117: 2000134665, 118: 2002042666, 119: 2002042627, 120: 2000047331, 121: 2002042966, 122: 2002043003, 123: 2002042690, 124: 2002042845, 125: 2000147147, 126: 19217413, 127: 85137101, 128: 2002042722, 129: 2002042642, 130: 2000045543, 131: 2000011617, 132: 2000113673, 133: 49060705, 134: 73200890, 135: 2000045698, 136: 62124125, 137: 2002043171, 138: 2000165960, 139: 2002134617, 140: 2002042690, 141: 2000047311, 142: 2000105477, 143: 2002042627, 144: 2000037444, 145: 49060705, 146: 2002042642, 147: 2002134611, 148: 2002043003, 149: 2002042966, 150: 73201412, 151: 2002042813, 152: 67256520, 153: 2000047306, 154: 2002042983, 155: 12092876, 156: 96026541, 157: 2002043636, 158: 2000165960, 159: 49060705, 160: 12092876, 161: 2002042690, 162: 2002134617, 163: 2002042642, 164: 73201412, 165: 62124125, 166: 2000105477, 167: 2002042966, 168: 96026541, 169: 2002042983, 170: 2000047311, 171: 2002043171, 172: 2002134611, 173: 2002042813, 174: 2000047306, 175: 67256520, 176: 2002043003, 177: 2002043636, 178: 2002042627, 179: 2000037444}, 'niveau': {0: 13.605263157894736, 1: 25.13157894736842, 2: 22.473684210526315, 3: 16.236842105263158, 4: 15.789473684210526, 5: 15.342105263157896, 6: 28.394736842105264, 7: 14.789473684210526, 8: 16.727272727272727, 9: 25.741935483870968, 10: 17.424242424242426, 11: 28.03030303030303, 12: 16.696969696969695, 13: 16.636363636363637, 14: 25.454545454545453, 15: 16.484848484848484, 16: 30.606060606060606, 17: 16.424242424242426, 18: 17.151515151515152, 19: 17.151515151515152, 20: 19.151515151515152, 21: 22.03030303030303, 22: 25.272727272727273, 23: 19.818181818181817, 24: 25.12121212121212, 25: 20.272727272727273, 26: 28.09090909090909, 27: 26.0, 28: 26.06451612903226, 29: 28.545454545454547, 30: 26.242424242424242, 31: 17.454545454545453, 32: 17.606060606060606, 33: 25.757575757575758, 34: 28.333333333333332, 35: 17.09090909090909, 36: 20.575757575757574, 37: 19.454545454545453, 38: 25.272727272727273, 39: 21.575757575757574, 40: 20.12121212121212, 41: 15.969696969696969, 42: 16.393939393939394, 43: 30.303030303030305, 44: 25.515151515151516, 45: 16.939393939393938, 46: 17.03030303030303, 47: 17.87878787878788, 48: 18.142857142857142, 49: 24.37142857142857, 50: 24.057142857142857, 51: 25.4, 52: 15.17142857142857, 53: 23.34285714285714, 54: 28.142857142857142, 55: 15.085714285714285, 56: 16.257142857142856, 57: 23.34285714285714, 58: 23.771428571428572, 59: 22.6, 60: 18.285714285714285, 61: 18.685714285714287, 62: 16.514285714285716, 63: 15.82857142857143, 64: 25.885714285714286, 65: 26.142857142857142, 66: 23.485714285714284, 67: 17.564102564102566, 68: 28.384615384615383, 69: 17.153846153846153, 70: 18.205128205128204, 71: 25.46153846153846, 72: 15.512820512820513, 73: 14.615384615384615, 74: 14.846153846153847, 75: 17.564102564102566, 76: 17.487179487179485, 77: 24.974358974358974, 78: 14.461538461538462, 79: 22.5, 80: 20.0625, 81: 19.84375, 82: 18.9375, 83: 20.25, 84: 31.59375, 85: 33.1875, 86: 18.34375,
87: 24.71875, 88: 26.03125, 89: 18.09375, 90: 28.34375, 91: 29.1875, 92: 32.46875, 93: 30.09375, 94: 18.5625, 95: 31.9375, 96: 15.28125, 97: 32.3125, 98: 19.9375, 99: 18.625, 100: 19.8125, 101: 18.8125, 102: 18.40625, 103: 15.75, 104: 18.03125, 105: 22.1875, 106: 28.09375, 107: 26.34375, 108: 20.15625, 109: 29.4375, 110: 31.34375, 111: 31.78125, 112:
29.84375, 113: 32.21875, 114: 32.625, 115: 33.5, 116: 24.46875, 117: 15.870967741935484, 118: 31.483870967741936, 119: 32.354838709677416, 120: 18.29032258064516, 121: 29.741935483870968, 122: 29.677419354838708, 123: 32.41935483870968, 124: 28.129032258064516, 125: 18.032258064516128, 126: 26.06451612903226, 127: 24.70967741935484, 128: 31.838709677419356, 129: 33.61290322580645, 130: 20.35483870967742, 131: 19.129032258064516, 132: 18.580645161290324, 133: 20.419354838709676, 134: 22.483870967741936, 135: 19.451612903225808, 136: 23.59375, 137: 30.78125, 138: 19.28125, 139: 16.03125, 140: 31.78125, 141: 19.625, 142: 19.09375, 143: 32.0625, 144: 20.65625, 145: 20.625, 146: 32.96875, 147: 20.71875, 148: 29.15625, 149: 29.5, 150: 17.875, 151: 29.0625, 152: 21.28125, 153: 18.84375, 154: 28.4375, 155: 24.84375, 156: 26.53125, 157: 29.0625, 158: 18.8125, 159: 20.375, 160: 24.53125, 161: 32.09375, 162: 15.5625, 163: 33.28125, 164: 18.34375, 165: 23.125, 166: 18.625, 167: 29.25, 168: 26.84375, 169: 28.125, 170: 19.3125, 171: 30.53125, 172: 20.875, 173: 28.75, 174: 18.53125, 175: 21.03125, 176: 29.40625, 177: 29.375, 178: 31.8125, 179: 20.34375}}
df = pd.DataFrame(data)
print(df)
statut id niveau
0 titulaire_01 2002134607 13.605263
1 titulaire_01 2002043469 25.131579
2 titulaire_01 67156610 22.473684
3 titulaire_01 73201503 16.236842
4 titulaire_01 2000165962 15.789474
.. ... ... ...
175 titulaire_11 67256520 21.031250
176 titulaire_11 2002043003 29.406250
177 titulaire_11 2002043636 29.375000
178 titulaire_11 2002042627 31.812500
179 titulaire_11 2000037444 20.343750
if I do groupby("statut") keeping the max of the "niveau" column I have "id" duplicates, an "id" can be in several "titulaire_01" and "titulaire_02" etc..
the result should be 11 rows with no duplicates
It looks like an optimization problem, you can pivot your data to a rectangular format, then use scipy.optimize.linear_sum_assignment:
from scipy.optimize import linear_sum_assignment
df2 = df.pivot_table(index='id', columns='statut', values='niveau',
fill_value=0) # or fill_value=-np.inf
ID, statut = linear_sum_assignment(df2, maximize=True)
out = (pd.DataFrame({'statut': df2.columns[statut], 'id': df2.index[ID]})
.sort_values(by='statut', ignore_index=True)
)
output:
statut id
0 titulaire_01 2002042688
1 titulaire_02 2002042628
2 titulaire_03 49037900
3 titulaire_04 2002042669
4 titulaire_05 2002043635
5 titulaire_06 2002042722
6 titulaire_07 2002042666
7 titulaire_08 2002042690
8 titulaire_09 2002042627
9 titulaire_10 2002043171
10 titulaire_11 2002042642

Statistical Time Series Forecasting Error in Date Output and FVA

I was trying to replicate this code for stat forecasting in python, The monthly frequency of the output which is generated is incorrect. I am not sure as to what went wrong here.
Here is the link for reference : https://towardsdatascience.com/time-series-forecasting-with-statistical-models-f08dcd1d24d1
import random
from itertools import product
from IPython.display import display, Markdown
from multiprocessing import cpu_count
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from statsforecast import StatsForecast
from statsforecast.models import (
adida,
croston_classic,
croston_sba,
croston_optimized,
historic_average,
imapa,
naive,
random_walk_with_drift,
seasonal_exponential_smoothing,
seasonal_naive,
seasonal_window_average,
ses,
tsb,
window_average
)
df = pd.read_excel ('C:/2. Path/Sample_Data_2.xlsx')
print (df)
df.info()
df["ds"] = pd.to_datetime(df["ds"],format='%Y-%m-%d')
df_test = df.groupby('unique_id').tail(6).copy()
df = df.drop(df_test.index)
df['unique_id'] = df['unique_id'].astype('object')
df = df.set_index('unique_id')
df.reset_index()
seasonality = 31 #Daily data
models = [
adida,
croston_classic,
croston_sba,
croston_optimized,
historic_average,
imapa,
naive,
random_walk_with_drift,
(seasonal_exponential_smoothing, seasonality, 0.2),
(seasonal_naive, seasonality),
(seasonal_window_average, seasonality, 2 * seasonality),
(ses, 0.1),
(tsb, 0.3, 0.2),
(window_average, 2 * seasonality)
]
fcst = StatsForecast(df=df, models=models, freq='M', n_jobs=cpu_count())
%time forecasts = fcst.forecast(6)
forecasts.reset_index()
forecasts = forecasts.reset_index().merge(df_test, how='left', on=['unique_id', 'ds'])
models = forecasts.drop(columns=['unique_id', 'ds', 'y']).columns.to_list()
from nixtlats.losses.numpy import mape
y = forecasts['y'].values
mape_bench = mape(y, forecasts['historic_average'].values)
fva = {}
for model in models:
mape_model = mape(y, forecasts[model].values)
fva[model] = mape_bench - mape_model
pd.DataFrame(fva, index=['FVA']).T.sort_values('FVA').rename_axis('model').reset_index()
The output Image is given below:
The Dataset is:
{'ds': {0: '2019-01-01', 1: '2019-02-01', 2: '2019-03-01', 3: '2019-04-01', 4: '2019-05-01', 5: '2019-06-01', 6: '2019-07-01', 7: '2019-08-01', 8: '2019-09-01', 9: '2019-10-01', 10: '2019-11-01', 11: '2019-12-01', 12: '2020-01-01', 13: '2020-02-01', 14: '2020-03-01', 15: '2020-04-01', 16: '2020-05-01', 17: '2020-06-01', 18: '2020-07-01', 19: '2020-08-01', 20: '2020-09-01', 21: '2020-10-01', 22: '2020-11-01', 23: '2020-12-01', 24: '2021-01-01', 25: '2021-02-01', 26: '2021-03-01', 27: '2021-04-01', 28: '2021-05-01', 29: '2021-06-01', 30: '2021-07-01', 31: '2021-08-01', 32: '2021-09-01', 33: '2021-10-01', 34: '2021-11-01', 35: '2021-12-01', 36: '2022-01-01', 37: '2022-02-01', 38: '2022-03-01', 39: '2022-04-01', 40: '2022-05-01', 41: '2022-06-01', 42: '2022-07-01', 43: '2022-08-01', 44: '2022-09-01', 45: '2019-01-01', 46: '2019-02-01', 47: '2019-03-01', 48: '2019-04-01', 49: '2019-05-01', 50: '2019-06-01', 51: '2019-07-01', 52: '2019-08-01', 53: '2019-09-01', 54: '2019-10-01', 55: '2019-11-01', 56: '2019-12-01', 57: '2020-01-01', 58: '2020-02-01', 59: '2020-03-01', 60: '2020-04-01', 61: '2020-05-01', 62: '2020-06-01', 63: '2020-07-01', 64: '2020-08-01', 65: '2020-09-01', 66: '2020-10-01', 67: '2020-11-01', 68: '2020-12-01', 69: '2021-01-01', 70: '2021-02-01', 71: '2021-03-01', 72: '2021-04-01', 73: '2021-05-01', 74: '2021-06-01', 75: '2021-07-01', 76: '2021-08-01', 77: '2021-09-01', 78: '2021-10-01', 79: '2021-11-01', 80: '2021-12-01', 81: '2022-01-01', 82: '2022-02-01', 83: '2022-03-01', 84: '2022-04-01', 85: '2022-05-01', 86: '2022-06-01', 87: '2022-07-01', 88: '2022-08-01', 89: '2022-09-01', 90: '2019-01-01', 91: '2019-02-01', 92: '2019-03-01', 93: '2019-04-01', 94: '2019-05-01', 95: '2019-06-01', 96: '2019-07-01', 97: '2019-08-01', 98: '2019-09-01', 99: '2019-10-01', 100: '2019-11-01', 101: '2019-12-01', 102: '2020-01-01', 103: '2020-02-01', 104: '2020-03-01', 105: '2020-04-01', 106: '2020-05-01', 107: '2020-06-01', 108: '2020-07-01', 109: '2020-08-01', 110: '2020-09-01', 111: '2020-10-01', 112: '2020-11-01', 113: '2020-12-01', 114: '2021-01-01', 115: '2021-02-01', 116: '2021-03-01', 117: '2021-04-01', 118: '2021-05-01', 119: '2021-06-01', 120: '2021-07-01', 121: '2021-08-01', 122: '2021-09-01', 123: '2021-10-01', 124: '2021-11-01', 125: '2021-12-01', 126: '2022-01-01', 127: '2022-02-01', 128: '2022-03-01', 129: '2022-04-01', 130: '2022-05-01', 131: '2022-06-01', 132: '2022-07-01', 133: '2022-08-01', 134: '2022-09-01', 135: '2019-01-01', 136: '2019-02-01', 137: '2019-03-01', 138: '2019-04-01', 139: '2019-05-01', 140: '2019-06-01', 141: '2019-07-01', 142: '2019-08-01', 143: '2019-09-01', 144: '2019-10-01', 145: '2019-11-01', 146: '2019-12-01', 147: '2020-01-01', 148: '2020-02-01', 149: '2020-03-01', 150: '2020-04-01', 151: '2020-05-01', 152: '2020-06-01', 153: '2020-07-01', 154: '2020-08-01', 155: '2020-09-01', 156: '2020-10-01', 157: '2020-11-01', 158: '2020-12-01', 159: '2021-01-01', 160: '2021-02-01', 161: '2021-03-01', 162: '2021-04-01', 163: '2021-05-01', 164: '2021-06-01', 165: '2021-07-01', 166: '2021-08-01', 167: '2021-09-01', 168: '2021-10-01', 169: '2021-11-01', 170: '2021-12-01', 171: '2022-01-01', 172: '2022-02-01', 173: '2022-03-01', 174: '2022-04-01', 175: '2022-05-01', 176: '2022-06-01', 177: '2022-07-01', 178: '2022-08-01', 179: '2022-09-01', 180: '2019-01-01', 181: '2019-02-01', 182: '2019-03-01', 183: '2019-04-01', 184: '2019-05-01', 185: '2019-06-01', 186: '2019-07-01', 187: '2019-08-01', 188: '2019-09-01', 189: '2019-10-01', 190: '2019-11-01', 191: '2019-12-01', 192: '2020-01-01', 193: '2020-02-01', 194: '2020-03-01', 195: '2020-04-01', 196: '2020-05-01', 197: '2020-06-01', 198: '2020-07-01', 199: '2020-08-01', 200: '2020-09-01', 201: '2020-10-01', 202: '2020-11-01', 203: '2020-12-01', 204: '2021-01-01', 205: '2021-02-01', 206: '2021-03-01', 207: '2021-04-01', 208: '2021-05-01', 209: '2021-06-01', 210: '2021-07-01', 211: '2021-08-01', 212: '2021-09-01', 213: '2021-10-01', 214: '2021-11-01', 215: '2021-12-01', 216: '2022-01-01', 217: '2022-02-01', 218: '2022-03-01', 219: '2022-04-01', 220: '2022-05-01', 221: '2022-06-01', 222: '2022-07-01', 223: '2022-08-01', 224: '2022-09-01'}, 'unique_id': {0: 'XYZ|419', 1: 'XYZ|419', 2: 'XYZ|419', 3: 'XYZ|419', 4: 'XYZ|419', 5: 'XYZ|419', 6: 'XYZ|419', 7: 'XYZ|419', 8: 'XYZ|419', 9: 'XYZ|419', 10: 'XYZ|419', 11: 'XYZ|419', 12: 'XYZ|419', 13: 'XYZ|419', 14: 'XYZ|419', 15: 'XYZ|419', 16: 'XYZ|419', 17: 'XYZ|419', 18: 'XYZ|419', 19: 'XYZ|419', 20: 'XYZ|419', 21: 'XYZ|419', 22: 'XYZ|419', 23: 'XYZ|419', 24: 'XYZ|419', 25: 'XYZ|419', 26: 'XYZ|419', 27: 'XYZ|419', 28: 'XYZ|419', 29: 'XYZ|419', 30: 'XYZ|419', 31: 'XYZ|419', 32: 'XYZ|419', 33: 'XYZ|419', 34: 'XYZ|419', 35: 'XYZ|419', 36: 'XYZ|419', 37: 'XYZ|419', 38: 'XYZ|419', 39: 'XYZ|419', 40: 'XYZ|419', 41: 'XYZ|419', 42: 'XYZ|419', 43: 'XYZ|419', 44: 'XYZ|419', 45: 'XYZ|426', 46: 'XYZ|426', 47: 'XYZ|426', 48: 'XYZ|426', 49: 'XYZ|426', 50: 'XYZ|426', 51: 'XYZ|426', 52: 'XYZ|426', 53: 'XYZ|426', 54: 'XYZ|426', 55: 'XYZ|426', 56: 'XYZ|426', 57: 'XYZ|426', 58: 'XYZ|426', 59: 'XYZ|426', 60: 'XYZ|426', 61: 'XYZ|426', 62: 'XYZ|426', 63: 'XYZ|426', 64: 'XYZ|426', 65: 'XYZ|426', 66: 'XYZ|426', 67: 'XYZ|426', 68: 'XYZ|426', 69: 'XYZ|426', 70: 'XYZ|426', 71: 'XYZ|426', 72: 'XYZ|426', 73: 'XYZ|426', 74: 'XYZ|426', 75: 'XYZ|426', 76: 'XYZ|426', 77: 'XYZ|426', 78: 'XYZ|426', 79: 'XYZ|426', 80: 'XYZ|426', 81: 'XYZ|426', 82: 'XYZ|426', 83: 'XYZ|426', 84: 'XYZ|426', 85: 'XYZ|426', 86: 'XYZ|426', 87: 'XYZ|426', 88: 'XYZ|426', 89: 'XYZ|426', 90: 'XYZ|465', 91: 'XYZ|465', 92: 'XYZ|465', 93: 'XYZ|465', 94: 'XYZ|465', 95: 'XYZ|465', 96: 'XYZ|465', 97: 'XYZ|465', 98: 'XYZ|465', 99: 'XYZ|465', 100: 'XYZ|465', 101: 'XYZ|465', 102: 'XYZ|465', 103: 'XYZ|465', 104: 'XYZ|465', 105: 'XYZ|465', 106: 'XYZ|465', 107: 'XYZ|465', 108: 'XYZ|465', 109: 'XYZ|465', 110: 'XYZ|465', 111: 'XYZ|465', 112: 'XYZ|465', 113: 'XYZ|465', 114: 'XYZ|465', 115: 'XYZ|465', 116: 'XYZ|465', 117: 'XYZ|465', 118: 'XYZ|465', 119: 'XYZ|465', 120: 'XYZ|465', 121: 'XYZ|465', 122: 'XYZ|465', 123: 'XYZ|465', 124: 'XYZ|465', 125: 'XYZ|465', 126: 'XYZ|465', 127: 'XYZ|465', 128: 'XYZ|465', 129: 'XYZ|465', 130: 'XYZ|465', 131: 'XYZ|465', 132: 'XYZ|465', 133: 'XYZ|465', 134: 'XYZ|465', 135: 'XYZ|489', 136: 'XYZ|489', 137: 'XYZ|489', 138: 'XYZ|489', 139: 'XYZ|489', 140: 'XYZ|489', 141: 'XYZ|489', 142: 'XYZ|489', 143: 'XYZ|489', 144: 'XYZ|489', 145: 'XYZ|489', 146: 'XYZ|489', 147: 'XYZ|489', 148: 'XYZ|489', 149: 'XYZ|489', 150: 'XYZ|489', 151: 'XYZ|489', 152: 'XYZ|489', 153: 'XYZ|489', 154: 'XYZ|489', 155: 'XYZ|489', 156: 'XYZ|489', 157: 'XYZ|489', 158: 'XYZ|489', 159: 'XYZ|489', 160: 'XYZ|489', 161: 'XYZ|489', 162: 'XYZ|489', 163: 'XYZ|489', 164: 'XYZ|489', 165: 'XYZ|489', 166: 'XYZ|489', 167: 'XYZ|489', 168: 'XYZ|489', 169: 'XYZ|489', 170: 'XYZ|489', 171: 'XYZ|489', 172: 'XYZ|489', 173: 'XYZ|489', 174: 'XYZ|489', 175: 'XYZ|489', 176: 'XYZ|489', 177: 'XYZ|489', 178: 'XYZ|489', 179: 'XYZ|489', 180: 'XYZ|457', 181: 'XYZ|457', 182: 'XYZ|457', 183: 'XYZ|457', 184: 'XYZ|457', 185: 'XYZ|457', 186: 'XYZ|457', 187: 'XYZ|457', 188: 'XYZ|457', 189: 'XYZ|457', 190: 'XYZ|457', 191: 'XYZ|457', 192: 'XYZ|457', 193: 'XYZ|457', 194: 'XYZ|457', 195: 'XYZ|457', 196: 'XYZ|457', 197: 'XYZ|457', 198: 'XYZ|457', 199: 'XYZ|457', 200: 'XYZ|457', 201: 'XYZ|457', 202: 'XYZ|457', 203: 'XYZ|457', 204: 'XYZ|457', 205: 'XYZ|457', 206: 'XYZ|457', 207: 'XYZ|457', 208: 'XYZ|457', 209: 'XYZ|457', 210: 'XYZ|457', 211: 'XYZ|457', 212: 'XYZ|457', 213: 'XYZ|457', 214: 'XYZ|457', 215: 'XYZ|457', 216: 'XYZ|457', 217: 'XYZ|457', 218: 'XYZ|457', 219: 'XYZ|457', 220: 'XYZ|457', 221: 'XYZ|457', 222: 'XYZ|457', 223: 'XYZ|457', 224: 'XYZ|457'}, 'y': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 791, 11: 833, 12: 478, 13: 343, 14: 543, 15: 560, 16: 427, 17: 302, 18: 391, 19: 279, 20: 405, 21: 580, 22: 824, 23: 767, 24: 1102, 25: 1000, 26: 1032, 27: 668, 28: 540, 29: 477, 30: 353, 31: 427, 32: 28, 33: 2, 34: 914, 35: 718, 36: 44, 37: 0, 38: 0, 39: 0, 40: 0, 41: 0, 42: 0, 43: 0, 44: 0, 45: 0, 46: 0, 47: 0, 48: 0, 49: 0, 50: 0, 51: 0, 52: 0, 53: 0, 54: 0, 55: 0, 56: 0, 57: 0, 58: 0, 59: 0, 60: 0, 61: 0, 62: 29, 63: 374, 64: 330, 65: 402, 66: 1005, 67: 1533, 68: 1582, 69: 1824, 70: 1168, 71: 193, 72: 895, 73: 613, 74: 651, 75: 267, 76: 233, 77: 135, 78: 173, 79: 564, 80: 789, 81: 343, 82: 275, 83: 383, 84: 181, 85: 96, 86: 499, 87: 53, 88: 84, 89: 23, 90: 0, 91: 0, 92: 0, 93: 0, 94: 0, 95: 0, 96: 0, 97: 0, 98: 0, 99: 0, 100: 0, 101: 0, 102: 0, 103: 0, 104: 0, 105: 0, 106: 0, 107: 44, 108: 292, 109: 240, 110: 364, 111: 806, 112: 1110, 113: 1232, 114: 1207, 115: 753, 116: 571, 117: 731, 118: 0, 119: 174, 120: 0, 121: 23, 122: 86, 123: 31, 124: 559, 125: 857, 126: 316, 127: 217, 128: 182, 129: 93, 130: 50, 131: 323, 132: 42, 133: 48, 134: 23, 135: 481, 136: 179, 137: 295, 138: 187, 139: 180, 140: 78, 141: 535, 142: 164, 143: 172, 144: 340, 145: 495, 146: 445, 147: 469, 148: 230, 149: 163, 150: 187, 151: 222, 152: 147, 153: 154, 154: 140, 155: 194, 156: 379, 157: 402, 158: 533, 159: 659, 160: 545, 161: 269, 162: 277, 163: 187, 164: 4, 165: 80, 166: 149, 167: 129, 168: 192, 169: 396, 170: 446, 171: 0, 172: 0, 173: 0, 174: 0, 175: 0, 176: 0, 177: 0, 178: 0, 179: 0, 180: 181, 181: 80, 182: 74, 183: 150, 184: 665, 185: 187, 186: 335, 187: 238, 188: 149, 189: 281, 190: 696, 191: 440, 192: 619, 193: 349, 194: 310, 195: 396, 196: 251, 197: 202, 198: 165, 199: 176, 200: 166, 201: 249, 202: 167, 203: 364, 204: 411, 205: 327, 206: 326, 207: 396, 208: 6, 209: 107, 210: 177, 211: 136, 212: 6, 213: 0, 214: 0, 215: 0, 216: 0, 217: 0, 218: 0, 219: 0, 220: 0, 221: 0, 222: 0, 223: 0, 224: 0}}
The PVA output is coming up as null, which should not be the case, attached is the image:
The problem arises because of the freq parameter. Since your data is sampled every month starting with the first day of the month, you have to specify it using freq='MS'(month start frequency).
By changing that, I get the following

Python Pandas Dataframe: Everytime I use dataframe.reindex() all of my data in the dataframe gets deleted however index is correct

So my code works like this:
I take 2 dataframes(df1, df2) and make them the same size through some sorting by columns:
# sort the values in both dataframes by Part Number and Operation so we can reindex them
df1Query = df1Query.sort_values(by=['Part Number', 'Operation'])
df2Query = df2Query.sort_values(by=['Part Number', 'Operation'])
Then I use:
newIndex = df1.index.tolist()
To get a list of the indexes, now I want to change df2 to have the same indexes, so I do:
df2 = df2.reset_index(drop=True).reindex(newIndex)
However, when I do this, all of my data in df2 gets deleted. The only thing that remains is the new index. Why is this? In my Python Interactive Window in VS Code, both df1 and df2 are listed as type dataframe. Something weird I see though is count for df1 is (35, 44) but it only shows df2 as (35). I can't think of why this is happening.
If I declare them as different variables for the second dataframe, like in my Python Interactive Window I view df2Query and df2(after reindex), the only loc that has run is the reindexing but df2Query is still 100% correct and df2 has the data disappear.
Every combination of reindexing I have tried has resulted in a blank dataframe so far. Other things I have tried...
df2 = df2.reindex_like(df1)
or
newIndex = df1.index.tolist()
df2 = df2.set_index(index) #just to see if the index column somehow isnt working right but it seems like it is
EDIT Sample Data, first is df2, its a smaller dataframe, shape(35,10). Both df1 and df2 look very similar, just df1 has many more columns:
{'Part Number': {41: 'ME0000272', 42: 'ME0000272', 43: 'ME0000272', 44: 'ME0000272', 45: 'ME0000272', 46: 'ME0000272', 47: 'ME0000272', 48: 'ME0000272', 49: 'ME0000272', 50: 'ME0000272', 51: 'ME0000273', 52: 'ME0000273', 53: 'ME0000273', 54: 'ME0000273', 55: 'ME0000273', 56: 'ME0000273', 57: 'ME0000273', 58: 'ME0000273', 59: 'ME0000273', 60: 'ME0000273', 61: 'ME0000273', 62: 'ME0000273', 63: 'ME0000273', 74: 'ME0000274', 75: 'ME0000274', 76: 'ME0000274', 77: 'ME0000274', 78: 'ME0000274', 79: 'ME0000274', 80: 'ME0000274', 81: 'ME0000274', 82: 'ME0000274', 83: 'ME0000274', 84: 'ME0000274', 85: 'ME0000274'}, 'Planning Revision': {41: '001B.000', 42: '001B.000', 43: '001B.000', 44: '001B.000', 45: '001B.000', 46: '001B.000', 47: '001B.000', 48: '001B.000', 49: '001B.000', 50: '001B.000', 51: '001A.000', 52: '001A.000', 53: '001A.000', 54: '001A.000', 55: '001A.000', 56: '001A.000', 57: '001A.000', 58: '001A.000', 59: '001A.000', 60: '001A.000', 61: '001A.000', 62: '001A.000', 63: '001A.000', 74: '001A.000', 75: '001A.000', 76: '001A.000', 77: '001A.000', 78: '001A.000', 79: '001A.000', 80: '001A.000', 81: '001A.000', 82: '001A.000', 83: '001A.000', 84: '001A.000', 85: '001A.000'}, 'Operation': {41: '0100-00-0', 42: '0200-00-0', 43: '0300-00-0', 44: '0400-00-0', 45: '0500-00-0', 46: '0600-00-0', 47: '0700-00-0', 48: '0800-00-0', 49: '0900-00-0', 50: '1000-00-0', 51: '0100-00-0', 52: '0200-00-0', 53: '0300-00-0', 54: '0350-00-0', 55: '0400-00-0', 56: '0500-00-0', 57: '0600-00-0', 58: '0700-00-0', 59: '0800-00-0', 60: '0900-00-0', 61: '1000-00-0', 62: '1100-00-0', 63: '1200-00-0', 74: '0100-00-0', 75: '0200-00-0', 76: '0300-00-0', 77: '0400-00-0', 78: '0500-00-0', 79: '0600-00-0', 80: '0700-00-0', 81: '0800-00-0', 82: '0900-00-0', 83: '1000-00-0', 84: '1100-00-0', 85: '1200-00-0'}, 'Part Description': {41: 'SEAL, LABYRINTH-COMB AFT', 42: 'SEAL, LABYRINTH-COMB AFT', 43: 'SEAL, LABYRINTH-COMB AFT', 44: 'SEAL, LABYRINTH-COMB AFT', 45: 'SEAL, LABYRINTH-COMB AFT', 46: 'SEAL, LABYRINTH-COMB AFT', 47: 'SEAL, LABYRINTH-COMB AFT', 48: 'SEAL, LABYRINTH-COMB AFT', 49: 'SEAL, LABYRINTH-COMB AFT', 50: 'SEAL, LABYRINTH-COMB AFT', 51: 'SEAL ASSY, REAR, FUEL SLINGER', 52: 'SEAL ASSY, REAR, FUEL SLINGER', 53: 'SEAL ASSY, REAR, FUEL SLINGER', 54: 'SEAL ASSY, REAR, FUEL SLINGER', 55: 'SEAL ASSY, REAR, FUEL SLINGER', 56: 'SEAL ASSY, REAR, FUEL SLINGER', 57: 'SEAL ASSY, REAR, FUEL SLINGER', 58: 'SEAL ASSY, REAR, FUEL SLINGER', 59: 'SEAL ASSY, REAR, FUEL SLINGER', 60: 'SEAL ASSY, REAR, FUEL SLINGER', 61: 'SEAL ASSY, REAR, FUEL SLINGER', 62: 'SEAL ASSY, REAR, FUEL SLINGER', 63: 'SEAL ASSY, REAR, FUEL SLINGER', 74: 'SEAL ASSY, REAR, FUEL SLINGER', 75: 'SEAL ASSY, REAR, FUEL SLINGER', 76: 'SEAL ASSY, REAR, FUEL SLINGER', 77: 'SEAL ASSY, REAR, FUEL SLINGER', 78: 'SEAL ASSY, REAR, FUEL SLINGER', 79: 'SEAL ASSY, REAR, FUEL SLINGER', 80: 'SEAL ASSY, REAR, FUEL SLINGER', 81: 'SEAL ASSY, REAR, FUEL SLINGER', 82: 'SEAL ASSY, REAR, FUEL SLINGER', 83: 'SEAL ASSY, REAR, FUEL SLINGER', 84: 'SEAL ASSY, REAR, FUEL SLINGER', 85: 'SEAL ASSY, REAR, FUEL SLINGER'}, 'Operation Description': {41: 'ISSUE MATERIAL', 42: 'CLEAN PARTS', 43: 'PROG #8-27: P1270 OR 1272 (PRECIP HARDEN INCO718 # 1325/480 MIN)', 44: 'MACHINE - SIDE 1', 45: 'MACHINE - SIDE 2', 46: 'DEBURR', 47: 'CLEAN PARTS', 48: 'IDENTIFY', 49: 'FINAL INSPECTION', 50: 'STORE', 51: 'ISSUE', 52: 'ROUGH MACHINE I.D.', 53: 'ROUGH MACHINE O.D.', 54: 'DEBURR', 55: 'CLEAN PARTS', 56: 'PROG #8-27: P1270 OR 1272 (PRECIP HARDEN INCO 718 # 1325 / 480 MIN)', 57: 'FINISH MACHINE ID', 58: 'FINISH MACHINE OD', 59: 'DEBURR', 60: 'CLEAN PARTS', 61: 'IDENTIFY', 62: 'FINAL INSPECTION', 63: 'STORE', 74: 'ISSUE', 75: 'ROUGH MACHINE ID', 76: 'ROUGH MACHINE OD', 77: 'CLEAN PARTS', 78: 'PROG #8-27: P1270 OR 1272 (PRECIP HARDEN INCO 718 # 1325 / 480 MIN)', 79: 'FINISH MACHINE ID', 80: 'FINISH MACHINE OD', 81: 'DEBURR', 82: 'CLEAN PARTS', 83: 'IDENTIFY', 84: 'FINAL INSPECTION', 85: 'STORE'}, 'Planning Status': {41: 'W4_RELEASED', 42: 'W4_RELEASED', 43: 'W4_RELEASED', 44: 'W4_RELEASED', 45: 'W4_RELEASED', 46: 'W4_RELEASED', 47: 'W4_RELEASED', 48: 'W4_RELEASED', 49: 'W4_RELEASED', 50: 'W4_RELEASED', 51: 'W4_RELEASED', 52: 'W4_RELEASED', 53: 'W4_RELEASED', 54: 'W4_RELEASED', 55: 'W4_RELEASED', 56: 'W4_RELEASED', 57: 'W4_RELEASED', 58: 'W4_RELEASED', 59: 'W4_RELEASED', 60: 'W4_RELEASED', 61: 'W4_RELEASED', 62: 'W4_RELEASED', 63: 'W4_RELEASED', 74: 'W4_RELEASED', 75: 'W4_RELEASED', 76: 'W4_RELEASED', 77: 'W4_RELEASED', 78: 'W4_RELEASED', 79: 'W4_RELEASED', 80: 'W4_RELEASED', 81: 'W4_RELEASED', 82: 'W4_RELEASED', 83: 'W4_RELEASED', 84: 'W4_RELEASED', 85: 'W4_RELEASED'}, 'Resopnsible Cell': {41: 'H01', 42: 'H01', 43: 'H01', 44: 'H01', 45: 'H01', 46: 'H01', 47: 'H01', 48: 'H01', 49: 'H01', 50: 'H01', 51: 'H44', 52: 'H44', 53: 'H44', 54: 'H44', 55: 'H44', 56: 'H44', 57: 'H44', 58: 'H44', 59: 'H44', 60: 'H44', 61: 'H44', 62: 'H44', 63: 'H44', 74: 'H44', 75: 'H44', 76: 'H44', 77: 'H44', 78: 'H44', 79: 'H44', 80: 'H44', 81: 'H44', 82: 'H44', 83: 'H44', 84: 'H44', 85: 'H44'}, 'Work Center': {41: '500SSA', 42: '500CPW', 43: '534HFI', 44: '500LNP', 45: '500LNP', 46: '500YDC', 47: '500CPW', 48: '500YIC', 49: '500QII', 50: '500SSA', 51: '500SSA', 52: '500LNP', 53: '500LNP', 54: '500YDC', 55: '500CPW', 56: '534HFI', 57: '500LNP', 58: '500LNP', 59: '500YDC', 60: '500CPW', 61: '500YIC', 62: '500QII', 63: '500SSA', 74: '500SSA', 75: '500LNP', 76: '500LNP', 77: '500CPW', 78: '534HFI', 79: '500LNP', 80: '500LNP', 81: '500YDC', 82: '500CPW', 83: '500YIC', 84: '500QII', 85: '500SSA'}, 'Work Center Description': {41: 'HQ Cell Stores', 42: 'PARTS WASHER', 43: 'VACUUM FURNACE', 44: 'PUMA LATHE GT3100M', 45: 'PUMA LATHE GT3100M', 46: 'HAND DEBURR', 47: 'PARTS WASHER', 48: 'HAND IDENTIFY', 49: 'FINAL INSPECTION - HQ', 50: 'HQ Cell Stores', 51: 'HQ Cell Stores', 52: 'PUMA LATHE GT3100M', 53: 'PUMA LATHE GT3100M', 54: 'HAND DEBURR', 55: 'PARTS WASHER', 56: 'VACUUM FURNACE', 57: 'PUMA LATHE GT3100M', 58: 'PUMA LATHE GT3100M', 59: 'HAND DEBURR', 60: 'PARTS WASHER', 61: 'HAND IDENTIFY', 62: 'FINAL INSPECTION - HQ', 63: 'HQ Cell Stores', 74: 'HQ Cell Stores', 75: 'PUMA LATHE GT3100M', 76: 'PUMA LATHE GT3100M', 77: 'PARTS WASHER', 78: 'VACUUM FURNACE', 79: 'PUMA LATHE GT3100M', 80: 'PUMA LATHE GT3100M', 81: 'HAND DEBURR', 82: 'PARTS WASHER', 83: 'HAND IDENTIFY', 84: 'FINAL INSPECTION - HQ', 85: 'HQ Cell Stores'}, 'Engine / Project': {41: None, 42: None, 43: None, 44: None, 45: None, 46: None, 47: None, 48: None, 49: None, 50: None, 51: None, 52: None, 53: None, 54: None, 55: None, 56: None, 57: None, 58: None, 59: None, 60: None, 61: None, 62: None, 63: None, 74: None, 75: None, 76: None, 77: None, 78: None, 79: None, 80: None, 81: None, 82: None, 83: None, 84: None, 85: None}}
df1 is too large to fit, but its shape is (35,44) and contains these column headers (10 of the headers are the exact same as df2's):
Index(['Part Number', 'Operation', 'Part Description', 'Engine / Project',
'Resopnsible Cell', 'Lead', 'Support', 'Component Status',
'Planning Revision', 'Priority', 'ECD / Status', 'Planning Status',
'Tool Design', 'Tool Available', 'NC Program Status', 'Gage Status',
'Gage Available', 'Perishable Tool Status', 'CMM Fixture Design',
'CMM Fixture Available', 'CMM Program Status', 'CMM Programmer',
'Work Center', 'Work Center Type', 'Work Center Description',
'Operation Description', 'Date On Machine', 'FAIR', 'Notes',
'On Machine?', 'Quantity On Hand', 'Earliest MRP/HRP Date',
'Open Order Quantity', 'Behind', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
'Aug', 'Total To Aug', 'Demand To 3/26/2023', 'Demand To 5/6/2022',
'O/P Quote Received'],
dtype='object')
The ideal output would be:
df1
index data
1000 ABC
1001 DEF
1002 GHI
1003 JKL
df2
index data
0 MNO
1 PQR
2 STU
3 VWX
and then...
df2.'change index'() =
index data
1000 MNO
1001 PQR
1002 STU
1003 VWX

Perform a single task on (220 choose 5) combination in a dataframe

I have data with 220 rows. Initially choose 5 rows randomly and apply an operation to them. Now I have to perform a similar task on (220 choose 5) combination(That means 4102565544 data frames with 5 rows).Python is hitting memory issues when I use list(itertools.combinations(list(range(0,222)),5)) and applying loop on each data frame with 5 rows is too much time-consuming. Below I have attached my data as a dictionary and I have replicated my problem set.
Data
df={'Name': {0: '004737367A89', 1: '006D631822DA', 2: '007FEEEF095D', 3: '015EA8035B5D', 4: '0168C7824FB3', 5: '02236A01C769', 6: '026A35601C28', 7: '03939D273F7D', 8: '05BE3A6A6344', 9: '0735B7F399C8', 10: '075F90DEDAAC', 11: '079D00DB87B6', 12: '08321FDDA475', 13: '084147D3DE00', 14: '08693ADAF466', 15: '08EE69FF7C9B', 16: '0996F835D14B', 17: '0A061E004649', 18: '0BDADD43DF2D', 19: '0D580A803B2C', 20: '11DCF10E0F76', 21: '1241EC5AC73C', 22: '150595F71A7A', 23: '160D7B436114', 24: '1805135DA1B7', 25: '18D26316EA11', 26: '1B744908A7E9', 27: '1CB417508187', 28: '1EA75E92E370', 29: '1F1B4DA40CE4', 30: '209D86760A9C', 31: '228BC53DB280', 32: '235D0F9A5E0E', 33: '2452814BCC90', 34: '2923CA6C88B1', 35: '2CB60EF30BAA', 36: '2CD7BD1FC443', 37: '2D03FAC79D60', 38: '2F34FFA27A7C', 39: '2F8F282FDCEE', 40: '3.03891E+11', 41: '31B4A8BDBA5F', 42: '34EC4E7D8E15', 43: '3695444ADBFF', 44: '370F1D138305', 45: '3826943C86AF', 46: '39F11738A59D', 47: '39F2FF0A2E05', 48: '3A8B6F61E548', 49: '3B256CE48F60', 50: '3C09C2C73655', 51: '3D6858B43366', 52: '3D94154B544C', 53: '3DDD62DDF6C4', 54: '3EBDAFB8E7EE', 55: '408B3D0EAF85', 56: '40ED913F4BB6', 57: '43380E855E4E', 58: '44C8332521DE', 59: '4817047FFAC1', 60: '481896BC4240', 61: '49263E82B2B8', 62: '4AF76F8D6BBB', 63: '4BC2016E5222', 64: '4CCF2D4FF5EC', 65: '4E9750936994', 66: '4F61F6A5588D', 67: '505F16F25595', 68: '50756E6D3B32', 69: '50E1E1F5F31D', 70: '516B4C9C3F45', 71: '52608C24A09E', 72: '52B2EBC622A6', 73: '539B8164BD32', 74: '5462E581A288', 75: '55149C502434', 76: '55D8B9306A65', 77: '5808368AFA0A', 78: '58F6BA305E2A', 79: '58FE73C690DA', 80: '596857EDC73F', 81: '599DF7F0CB41', 82: '59F1F27E85F4', 83: '5AE11428142F', 84: '5B27B574EA5B', 85: '5D3FA98DDD61', 86: '5DE6CFC7E471', 87: '5DF85F5EA21C', 88: '5EA87B759595', 89: '5EAA2E0BEAA2', 90: '5EAFEBA99A30', 91: '5EFC03FC84DF', 92: '5F6A8D18E234', 93: '6008B6021BAA', 94: '63765F49AC32', 95: '64099F419232', 96: '652349DF5059', 97: '6551FB43EE37', 98: '6613C12B0634', 99: '66C312BFDFD6', 100: '66D964D2E1D0', 101: '6790A35547E2', 102: '67A2603888E5', 103: '6991A9411704', 104: '6CFC28C22836', 105: '6D5DAED137C9', 106: '6EBB87FAD022', 107: '6EF1206450AF', 108: '70C74C90C3E2', 109: '71168B36CCFD', 110: '7177392ADD8B', 111: '74AF6AA78FB9', 112: '759CFBB05E2F', 113: '771E8EA5A4C7', 114: '7740740D57BE', 115: '7926DFB85C8B', 116: '7A6091203844', 117: '7C23D53CE5DD', 118: '7C4ED1AA239F', 119: '7E0C21E0010F', 120: '80E9914A0BF8', 121: '82867FEAF519', 122: '82C735B34C85', 123: '85EF1FFBAC47', 124: '872F22A4D018', 125: '87C72000AAB2', 126: '8978B70E88C3', 127: '8ADEF3F17E42', 128: '8B5F4EE22DF5', 129: '8B757ED14D67', 130: '8E0C10341AA8', 131: '90289E4E68F6', 132: '9259DEED6524', 133: '92754763710B', 134: '92B164934E01', 135: '96DBA1873BFF', 136: '97E7144ECEF9', 137: '9AE4EB9DF4F0', 138: '9CAC53908EE1', 139: '9F31161E7BDF', 140: 'A090B8A939CB', 141: 'A12E89E87CB5', 142: 'A31CA572620F', 143: 'A4263AA51F9A', 144: 'A540D6615FA0', 145: 'A56804CE6BAF', 146: 'A60313C4FC06', 147: 'A612803F81BA', 148: 'A77E12FFA171', 149: 'A87B6602E946', 150: 'AADE28D99973', 151: 'AEB37BE9DBFF', 152: 'B04ACAB6A193', 153: 'B41004303288', 154: 'B454AAFDA2AF', 155: 'B701B4E2F2BF', 156: 'B7EF621EC0AE', 157: 'B9084B8E2378', 158: 'BA8C4B0E8378', 159: 'BBD01B2776A8', 160: 'BE5377A632DF', 161: 'BE8D95B26DEE', 162: 'BEEB25AC3BB3', 163: 'BF585F42B5F6', 164: 'BF889C615B6A', 165: 'C1934D47BC69', 166: 'C31934680839', 167: 'C43F40D3D865', 168: 'C4955BCC1F0C', 169: 'C4F03F22DE3E', 170: 'C5BC9B26046C', 171: 'C5D2BE738C56', 172: 'C762399CAF83', 173: 'C7B9B444D117', 174: 'C943B9F6FDDF', 175: 'C9C7138CAF65', 176: 'CB66BE597E30', 177: 'CC7DA44E344E', 178: 'CE81A7E65B6B', 179: 'CE971F87D0B5', 180: 'CECC8C16ECAB', 181: 'D111860A3AC1', 182: 'D159C02757AE', 183: 'D33BB70DCA77', 184: 'D386F0671D80', 185: 'D43B801CCCA9', 186: 'D465BE3D4A94', 187: 'D49E08EEC650', 188: 'D4BD5D5DD7E4', 189: 'D64F455CB56A', 190: 'D6D99F00B58B', 191: 'D7774555E609', 192: 'D7CDFD417C01', 193: 'DBF16B9938A4', 194: 'DCC2FA798C09', 195: 'DE6E090827B8', 196: 'E25F5A55A4D8', 197: 'E5A82C4E86C7', 198: 'E5AC30A8337B', 199: 'E6EBC0EFBF18', 200: 'EB9BBBA2FEB9', 201: 'EC8A20CAC153', 202: 'EC8EA44FDACD', 203: 'ECB284CBDDA7', 204: 'EED0F8B3B968', 205: 'EF4B578B0902', 206: 'F13986786A7A', 207: 'F17F0E81FC73', 208: 'F34CFBCB7A28', 209: 'F396C1E8BF59', 210: 'F40ED923507F', 211: 'F87A72CF9671', 212: 'F8CDE15A2FCB', 213: 'F9032EE897A9', 214: 'FAC08B5AA521', 215: 'FB3071FBA3BC', 216: 'FC6435726337', 217: 'FD5F2F4D32D7', 218: 'FD6E925243AA', 219: 'FDA85734568D', 220: 'FF18E7D41654', 221: 'FFEC03758A05'}, 'Code': {0: 375000, 1: 275000, 2: 225000, 3: 275000, 4: 175000, 5: 275000, 6: 295000, 7: 525000, 8: 175000, 9: 135000, 10: 275000, 11: 250000, 12: 275000, 13: 350000, 14: 225000, 15: 175000, 16: 395000, 17: 275000, 18: 225000, 19: 195000, 20: 225000, 21: 175000, 22: 135000, 23: 225000, 24: 250000, 25: 225000, 26: 250000, 27: 295000, 28: 275000, 29: 250000, 30: 275000, 31: 250000, 32: 295000, 33: 195000, 34: 275000, 35: 195000, 36: 275000, 37: 175000, 38: 525000, 39: 225000, 40: 350000, 41: 135000, 42: 295000, 43: 195000, 44: 495000, 45: 495000, 46: 275000, 47: 375000, 48: 295000, 49: 250000, 50: 250000, 51: 225000, 52: 175000, 53: 250000, 54: 475000, 55: 135000, 56: 350000, 57: 225000, 58: 250000, 59: 275000, 60: 225000, 61: 295000, 62: 225000, 63: 250000, 64: 225000, 65: 250000, 66: 135000, 67: 175000, 68: 295000, 69: 175000, 70: 295000, 71: 295000, 72: 225000, 73: 225000, 74: 365000, 75: 295000, 76: 225000, 77: 195000, 78: 225000, 79: 225000, 80: 225000, 81: 295000, 82: 135000, 83: 195000, 84: 295000, 85: 550000, 86: 250000, 87: 225000, 88: 275000, 89: 225000, 90: 295000, 91: 250000, 92: 250000, 93: 225000, 94: 175000, 95: 250000, 96: 175000, 97: 350000, 98: 175000, 99: 275000, 100: 295000, 101: 225000, 102: 225000, 103: 195000, 104: 175000, 105: 350000, 106: 175000, 107: 275000, 108: 275000, 109: 175000, 110: 195000, 111: 225000, 112: 275000, 113: 375000, 114: 135000, 115: 135000, 116: 395000, 117: 295000, 118: 195000, 119: 275000, 120: 195000, 121: 375000, 122: 195000, 123: 275000, 124: 275000, 125: 175000, 126: 325000, 127: 275000, 128: 250000, 129: 135000, 130: 175000, 131: 195000, 132: 550000, 133: 225000, 134: 250000, 135: 350000, 136: 495000, 137: 275000, 138: 135000, 139: 175000, 140: 175000, 141: 225000, 142: 175000, 143: 275000, 144: 325000, 145: 295000, 146: 275000, 147: 275000, 148: 175000, 149: 350000, 150: 550000, 151: 250000, 152: 350000, 153: 325000, 154: 175000, 155: 250000, 156: 175000, 157: 250000, 158: 275000, 159: 225000, 160: 195000, 161: 175000, 162: 225000, 163: 275000, 164: 225000, 165: 135000, 166: 250000, 167: 225000, 168: 175000, 169: 275000, 170: 175000, 171: 275000, 172: 175000, 173: 195000, 174: 325000, 175: 275000, 176: 295000, 177: 350000, 178: 350000, 179: 425000, 180: 225000, 181: 135000, 182: 150000, 183: 135000, 184: 350000, 185: 225000, 186: 375000, 187: 175000, 188: 295000, 189: 195000, 190: 350000, 191: 175000, 192: 225000, 193: 195000, 194: 195000, 195: 350000, 196: 250000, 197: 175000, 198: 175000, 199: 395000, 200: 175000, 201: 225000, 202: 175000, 203: 350000, 204: 175000, 205: 250000, 206: 375000, 207: 275000, 208: 525000, 209: 175000, 210: 375000, 211: 295000, 212: 275000, 213: 175000, 214: 325000, 215: 250000, 216: 195000, 217: 275000, 218: 250000, 219: 135000, 220: 195000, 221: 135000}}
What I want is to select random 5 rows first
import random
import pandas as pd
data = pd.DataFrame(df)
inputt=pd.DataFrame({"NameID":data1.Name[random.sample(range(10, 30), 5)]})
for i in range(len(inputt.index)):
D1 = data[data["Name"] == inputt["NameID"].iloc[i]]
D2 = D2.append(D1)
values=D2.Code
real_sum=values.sum()
and then I want to perform the same operation on the rest of the rows in the data frame and figure which data frame with such rows has sum less than the real_sum.Is there any simulation technique I can apply here or anything else ?
Thanks
To avoid the memory issues you don't need to access the whole information directly. What I mean is that you can be "lazy" about it and use it only when needed. -> Enter Lazy evaluation
In programming language theory, lazy evaluation, or call-by-need,[1] is an evaluation strategy which delays the evaluation of an expression until its value is needed
https://en.wikipedia.org/wiki/Lazy_evaluation
This means that you don't need to evaluate the result from the combinations completely at first, but only when needed:
import itertools
# This will create an iterator (not the whole list)
combos = itertools.combinations(list(range(0,222)),5)
and use it afterwards like this:
D2 = pd.DataFrame()
data = pd.DataFrame(df)
for combo in combos:
inputt=pd.DataFrame({"NameID":data.Name[list(combo)]})
for i in range(len(inputt.index)):
D1 = data[data["Name"] == inputt["NameID"].iloc[i]]
D2 = D2.append(D1)
values=D2.Code
real_sum=values.sum()

Python plotly Express Histogram: Graph not showing all unique TIME_BUCKET values, it clubbing TIME_BUCKETs in hourly value

My CSV content has three columns like 1.1K columns. This has values for 5 minutes TIME_BUCKET like 03:40:00+00:00, 03:45:00+00:00 etc.
I expect the graph to plot histogram for all these different TIME_BUCKETS, but it actually plotting the graph for hourly time buckets like 03:00, 04:00, etc.
My code is like below
import pandas as pd
import plotly.express as px
df = pd.read_csv("D:/Work/Issue/5MinTimeBucketHistogramNotWorking1.csv")
graph = px.histogram(df, x='TIME_BUCKET', color='REPORT_NAME', title='Report Category Wise Execution Count (5 minuntes sample size)')
graph.show()
My CSV content is like below with 1.1K columns. The whole CSV is shared here for reference.
,REPORT_NAME,TIME_BUCKET
23,DashboardReport,2021-01-20 03:30:00+00:00
33,DashboardReport,2021-01-20 03:40:00+00:00
69,ExportReport,2021-01-20 03:40:00+00:00
74,ExportReport,2021-01-20 03:40:00+00:00
97,ExportReport,2021-01-20 03:40:00+00:00
98,ExportReport,2021-01-20 03:40:00+00:00
99,ExportReport,2021-01-20 03:40:00+00:00
101,ExportReport,2021-01-20 03:40:00+00:00
103,ExportReport,2021-01-20 03:40:00+00:00
2821,DashboardReport,2021-01-20 15:40:00+00:00
2822,DashboardReport,2021-01-20 15:40:00+00:00
2823,DashboardReport,2021-01-20 15:45:00+00:00
2896,DashboardReport,2021-01-20 16:15:00+00:00
3283,SQLReport,2021-01-20 19:00:00+00:00
3285,DashboardReport,2021-01-20 19:00:00+00:00
3288,DashboardReport,2021-01-20 19:05:00+00:00
3289,DashboardReport,2021-01-20 19:05:00+00:00
3292,ImportReport,2021-01-20 19:05:00+00:00
3293,DashboardReport,2021-01-20 19:05:00+00:00
3294,DashboardReport,2021-01-20 19:05:00+00:00
3295,DashboardReport,2021-01-20 19:10:00+00:00
3297,DashboardReport,2021-01-20 19:10:00+00:00
3298,SQLReport,2021-01-20 19:10:00+00:00
3300,DashboardReport,2021-01-20 19:10:00+00:00
3303,SQLReport,2021-01-20 19:15:00+00:00
3307,ImportReport,2021-01-20 19:15:00+00:00
3309,DashboardReport,2021-01-20 19:15:00+00:00
3312,DashboardReport,2021-01-20 19:15:00+00:00
3313,DashboardReport,2021-01-20 19:15:00+00:00
3314,SQLReport,2021-01-20 19:15:00+00:00
3315,DashboardReport,2021-01-20 19:15:00+00:00
3316,DashboardReport,2021-01-20 19:15:00+00:00
3317,DashboardReport,2021-01-20 19:15:00+00:00
3318,ImportReport,2021-01-20 19:15:00+00:00
3319,DashboardReport,2021-01-20 19:15:00+00:00
3324,DashboardReport,2021-01-20 19:20:00+00:00
3328,SQLReport,2021-01-20 19:20:00+00:00
3331,ImportReport,2021-01-20 19:20:00+00:00
3332,ImportReport,2021-01-20 19:20:00+00:00
3335,DashboardReport,2021-01-20 19:20:00+00:00
3336,ImportReport,2021-01-20 19:20:00+00:00
3337,DashboardReport,2021-01-20 19:20:00+00:00
3339,DashboardReport,2021-01-20 19:20:00+00:00
3344,DashboardReport,2021-01-20 19:20:00+00:00
3345,DashboardReport,2021-01-20 19:20:00+00:00
3349,DBReport,2021-01-20 19:20:00+00:00
3350,SQLReport,2021-01-20 19:20:00+00:00
3354,DashboardReport,2021-01-20 19:20:00+00:00
3355,DashboardReport,2021-01-20 19:20:00+00:00
3356,DashboardReport,2021-01-20 19:20:00+00:00
3357,DashboardReport,2021-01-20 19:20:00+00:00
3358,DashboardReport,2021-01-20 19:20:00+00:00
3359,DashboardReport,2021-01-20 19:20:00+00:00
3360,DashboardReport,2021-01-20 19:20:00+00:00
3368,DashboardReport,2021-01-20 19:25:00+00:00
3370,DashboardReport,2021-01-20 19:25:00+00:00
3375,DashboardReport,2021-01-20 19:25:00+00:00
3377,DashboardReport,2021-01-20 19:30:00+00:00
3379,DashboardReport,2021-01-20 19:30:00+00:00
3381,DashboardReport,2021-01-20 19:30:00+00:00
3384,DashboardReport,2021-01-20 19:30:00+00:00
3396,ImportReport,2021-01-20 19:40:00+00:00
3398,DashboardReport,2021-01-20 19:40:00+00:00
3403,DashboardReport,2021-01-20 19:45:00+00:00
3404,DashboardReport,2021-01-20 19:45:00+00:00
3408,DashboardReport,2021-01-20 19:45:00+00:00
3410,DashboardReport,2021-01-20 19:45:00+00:00
3418,DashboardReport,2021-01-20 19:50:00+00:00
3419,SQLReport,2021-01-20 19:50:00+00:00
3421,DashboardReport,2021-01-20 19:50:00+00:00
3422,DashboardReport,2021-01-20 19:50:00+00:00
3429,DashboardReport,2021-01-20 19:50:00+00:00
3434,DashboardReport,2021-01-20 19:55:00+00:00
3443,ImportReport,2021-01-20 20:00:00+00:00
3444,ImportReport,2021-01-20 20:00:00+00:00
3450,DBReport,2021-01-20 20:05:00+00:00
3451,DBReport,2021-01-20 20:05:00+00:00
3489,SQLReport,2021-01-20 20:20:00+00:00
3490,ImportReport,2021-01-20 20:20:00+00:00
3496,DashboardReport,2021-01-20 20:20:00+00:00
3499,ImportReport,2021-01-20 20:25:00+00:00
3501,DashboardReport,2021-01-20 20:25:00+00:00
3505,DashboardReport,2021-01-20 20:25:00+00:00
3513,SQLReport,2021-01-20 20:30:00+00:00
3514,DashboardReport,2021-01-20 20:35:00+00:00
3521,SQLReport,2021-01-20 20:35:00+00:00
3522,DashboardReport,2021-01-20 20:35:00+00:00
3523,DashboardReport,2021-01-20 20:35:00+00:00
3527,DashboardReport,2021-01-20 20:40:00+00:00
3537,DashboardReport,2021-01-20 20:40:00+00:00
3538,DashboardReport,2021-01-20 20:40:00+00:00
3540,DashboardReport,2021-01-20 20:45:00+00:00
3549,DashboardReport,2021-01-20 20:50:00+00:00
3552,DashboardReport,2021-01-20 20:55:00+00:00
3555,SQLReport,2021-01-20 20:55:00+00:00
3556,DashboardReport,2021-01-20 20:55:00+00:00
3557,SQLReport,2021-01-20 20:55:00+00:00
3558,DashboardReport,2021-01-20 20:55:00+00:00
The output looks like below
Your df['TIME_BUCKETS'] is unsuprisingly interpreted by plotly to be continuous time, and is shown as such on a continuous x-axis. If you'd like to display values for the bucket categories just as they occur in your dataframe, just add:
fig.update_xaxes(type='category')
If you adjust the font size of the ticktext a bit as well, then you'll end up with this:
Notice that I've used a formatted version of df['TIME_BUCKETS'] in:
df['buckets'] = [dat[11:16] for dat in df['TIME_BUCKET']]
If you don't you'll end up with this:
Complete code with data sample:
import pandas as pd
import plotly.express as px
df.to_dict()
df = pd.DataFrame({' ': {0: 23,
1: 33,
2: 69,
3: 74,
4: 97,
5: 98,
6: 99,
7: 101,
8: 103,
9: 2821,
10: 2822,
11: 2823,
12: 2896,
13: 3283,
14: 3285,
15: 3288,
16: 3289,
17: 3292,
18: 3293,
19: 3294,
20: 3295,
21: 3297,
22: 3298,
23: 3300,
24: 3303,
25: 3307,
26: 3309,
27: 3312,
28: 3313,
29: 3314,
30: 3315,
31: 3316,
32: 3317,
33: 3318,
34: 3319,
35: 3324,
36: 3328,
37: 3331,
38: 3332,
39: 3335,
40: 3336,
41: 3337,
42: 3339,
43: 3344,
44: 3345,
45: 3349,
46: 3350,
47: 3354,
48: 3355,
49: 3356,
50: 3357,
51: 3358,
52: 3359,
53: 3360,
54: 3368,
55: 3370,
56: 3375,
57: 3377,
58: 3379,
59: 3381,
60: 3384,
61: 3396,
62: 3398,
63: 3403,
64: 3404,
65: 3408,
66: 3410,
67: 3418,
68: 3419,
69: 3421,
70: 3422,
71: 3429,
72: 3434,
73: 3443,
74: 3444,
75: 3450,
76: 3451,
77: 3489,
78: 3490,
79: 3496,
80: 3499,
81: 3501,
82: 3505,
83: 3513,
84: 3514,
85: 3521,
86: 3522,
87: 3523,
88: 3527,
89: 3537,
90: 3538,
91: 3540,
92: 3549,
93: 3552,
94: 3555,
95: 3556,
96: 3557,
97: 3558},
'REPORT_NAME': {0: 'DashboardReport',
1: 'DashboardReport',
2: 'ExportReport',
3: 'ExportReport',
4: 'ExportReport',
5: 'ExportReport',
6: 'ExportReport',
7: 'ExportReport',
8: 'ExportReport',
9: 'DashboardReport',
10: 'DashboardReport',
11: 'DashboardReport',
12: 'DashboardReport',
13: 'SQLReport',
14: 'DashboardReport',
15: 'DashboardReport',
16: 'DashboardReport',
17: 'ImportReport',
18: 'DashboardReport',
19: 'DashboardReport',
20: 'DashboardReport',
21: 'DashboardReport',
22: 'SQLReport',
23: 'DashboardReport',
24: 'SQLReport',
25: 'ImportReport',
26: 'DashboardReport',
27: 'DashboardReport',
28: 'DashboardReport',
29: 'SQLReport',
30: 'DashboardReport',
31: 'DashboardReport',
32: 'DashboardReport',
33: 'ImportReport',
34: 'DashboardReport',
35: 'DashboardReport',
36: 'SQLReport',
37: 'ImportReport',
38: 'ImportReport',
39: 'DashboardReport',
40: 'ImportReport',
41: 'DashboardReport',
42: 'DashboardReport',
43: 'DashboardReport',
44: 'DashboardReport',
45: 'DBReport',
46: 'SQLReport',
47: 'DashboardReport',
48: 'DashboardReport',
49: 'DashboardReport',
50: 'DashboardReport',
51: 'DashboardReport',
52: 'DashboardReport',
53: 'DashboardReport',
54: 'DashboardReport',
55: 'DashboardReport',
56: 'DashboardReport',
57: 'DashboardReport',
58: 'DashboardReport',
59: 'DashboardReport',
60: 'DashboardReport',
61: 'ImportReport',
62: 'DashboardReport',
63: 'DashboardReport',
64: 'DashboardReport',
65: 'DashboardReport',
66: 'DashboardReport',
67: 'DashboardReport',
68: 'SQLReport',
69: 'DashboardReport',
70: 'DashboardReport',
71: 'DashboardReport',
72: 'DashboardReport',
73: 'ImportReport',
74: 'ImportReport',
75: 'DBReport',
76: 'DBReport',
77: 'SQLReport',
78: 'ImportReport',
79: 'DashboardReport',
80: 'ImportReport',
81: 'DashboardReport',
82: 'DashboardReport',
83: 'SQLReport',
84: 'DashboardReport',
85: 'SQLReport',
86: 'DashboardReport',
87: 'DashboardReport',
88: 'DashboardReport',
89: 'DashboardReport',
90: 'DashboardReport',
91: 'DashboardReport',
92: 'DashboardReport',
93: 'DashboardReport',
94: 'SQLReport',
95: 'DashboardReport',
96: 'SQLReport',
97: 'DashboardReport'},
'TIME_BUCKET': {0: '2021-01-20 03:30:00+00:00',
1: '2021-01-20 03:40:00+00:00',
2: '2021-01-20 03:40:00+00:00',
3: '2021-01-20 03:40:00+00:00',
4: '2021-01-20 03:40:00+00:00',
5: '2021-01-20 03:40:00+00:00',
6: '2021-01-20 03:40:00+00:00',
7: '2021-01-20 03:40:00+00:00',
8: '2021-01-20 03:40:00+00:00',
9: '2021-01-20 15:40:00+00:00',
10: '2021-01-20 15:40:00+00:00',
11: '2021-01-20 15:45:00+00:00',
12: '2021-01-20 16:15:00+00:00',
13: '2021-01-20 19:00:00+00:00',
14: '2021-01-20 19:00:00+00:00',
15: '2021-01-20 19:05:00+00:00',
16: '2021-01-20 19:05:00+00:00',
17: '2021-01-20 19:05:00+00:00',
18: '2021-01-20 19:05:00+00:00',
19: '2021-01-20 19:05:00+00:00',
20: '2021-01-20 19:10:00+00:00',
21: '2021-01-20 19:10:00+00:00',
22: '2021-01-20 19:10:00+00:00',
23: '2021-01-20 19:10:00+00:00',
24: '2021-01-20 19:15:00+00:00',
25: '2021-01-20 19:15:00+00:00',
26: '2021-01-20 19:15:00+00:00',
27: '2021-01-20 19:15:00+00:00',
28: '2021-01-20 19:15:00+00:00',
29: '2021-01-20 19:15:00+00:00',
30: '2021-01-20 19:15:00+00:00',
31: '2021-01-20 19:15:00+00:00',
32: '2021-01-20 19:15:00+00:00',
33: '2021-01-20 19:15:00+00:00',
34: '2021-01-20 19:15:00+00:00',
35: '2021-01-20 19:20:00+00:00',
36: '2021-01-20 19:20:00+00:00',
37: '2021-01-20 19:20:00+00:00',
38: '2021-01-20 19:20:00+00:00',
39: '2021-01-20 19:20:00+00:00',
40: '2021-01-20 19:20:00+00:00',
41: '2021-01-20 19:20:00+00:00',
42: '2021-01-20 19:20:00+00:00',
43: '2021-01-20 19:20:00+00:00',
44: '2021-01-20 19:20:00+00:00',
45: '2021-01-20 19:20:00+00:00',
46: '2021-01-20 19:20:00+00:00',
47: '2021-01-20 19:20:00+00:00',
48: '2021-01-20 19:20:00+00:00',
49: '2021-01-20 19:20:00+00:00',
50: '2021-01-20 19:20:00+00:00',
51: '2021-01-20 19:20:00+00:00',
52: '2021-01-20 19:20:00+00:00',
53: '2021-01-20 19:20:00+00:00',
54: '2021-01-20 19:25:00+00:00',
55: '2021-01-20 19:25:00+00:00',
56: '2021-01-20 19:25:00+00:00',
57: '2021-01-20 19:30:00+00:00',
58: '2021-01-20 19:30:00+00:00',
59: '2021-01-20 19:30:00+00:00',
60: '2021-01-20 19:30:00+00:00',
61: '2021-01-20 19:40:00+00:00',
62: '2021-01-20 19:40:00+00:00',
63: '2021-01-20 19:45:00+00:00',
64: '2021-01-20 19:45:00+00:00',
65: '2021-01-20 19:45:00+00:00',
66: '2021-01-20 19:45:00+00:00',
67: '2021-01-20 19:50:00+00:00',
68: '2021-01-20 19:50:00+00:00',
69: '2021-01-20 19:50:00+00:00',
70: '2021-01-20 19:50:00+00:00',
71: '2021-01-20 19:50:00+00:00',
72: '2021-01-20 19:55:00+00:00',
73: '2021-01-20 20:00:00+00:00',
74: '2021-01-20 20:00:00+00:00',
75: '2021-01-20 20:05:00+00:00',
76: '2021-01-20 20:05:00+00:00',
77: '2021-01-20 20:20:00+00:00',
78: '2021-01-20 20:20:00+00:00',
79: '2021-01-20 20:20:00+00:00',
80: '2021-01-20 20:25:00+00:00',
81: '2021-01-20 20:25:00+00:00',
82: '2021-01-20 20:25:00+00:00',
83: '2021-01-20 20:30:00+00:00',
84: '2021-01-20 20:35:00+00:00',
85: '2021-01-20 20:35:00+00:00',
86: '2021-01-20 20:35:00+00:00',
87: '2021-01-20 20:35:00+00:00',
88: '2021-01-20 20:40:00+00:00',
89: '2021-01-20 20:40:00+00:00',
90: '2021-01-20 20:40:00+00:00',
91: '2021-01-20 20:45:00+00:00',
92: '2021-01-20 20:50:00+00:00',
93: '2021-01-20 20:55:00+00:00',
94: '2021-01-20 20:55:00+00:00',
95: '2021-01-20 20:55:00+00:00',
96: '2021-01-20 20:55:00+00:00',
97: '2021-01-20 20:55:00+00:00'}})
df['buckets'] = [dat[11:16] for dat in df['TIME_BUCKET']]
fig = px.histogram(df, x='TIME_BUCKET', color='REPORT_NAME', title='Report Category Wise Execution Count (5 minuntes sample size)')
fig.update_xaxes(type='category')
fig.layout.xaxis.tickfont.size = 10
fig.show()

Categories

Resources