I want to collapse the rows of a dataframe such that each row represents an hour (the source data goes down to minute granularity). I don't want to lose the data, instead I want to create a dict so that the key of the dict is the minute (the source data doesn't go down to seconds) and then have the value be the value for that minute. See my brute force example at the bottom for what I'm really getting at.
I have data that looks something like this
import pandas as pd
from datetime import datetime, timedelta
import itertools
import random
from copy import deepcopy
fruits=['apple','banana','pear']
dts=[datetime(2022,1,1)+timedelta(minutes=x*random.randint(1,9)) for x in range(48)]
df=pd.DataFrame([x for x in itertools.product(fruits, dts)], columns=['fruit','dt'])
df['value1']=[random.randrange(0,100) for i in range(df.shape[0])]
df['value2']=[random.randrange(0,100) for i in range(df.shape[0])]
df['value3']=[random.randrange(0,100) for i in range(df.shape[0])]
# fruit dt value1 value2 value3
# 0 apple 2022-01-01 00:00:00 56 55 65
# 1 apple 2022-01-01 00:02:00 98 67 16
# 2 apple 2022-01-01 00:12:00 52 19 23
# 3 apple 2022-01-01 00:09:00 0 60 82
# 4 apple 2022-01-01 00:08:00 94 51 22
# ... ... ... ... ... ...
# 139 pear 2022-01-01 00:43:00 14 49 37
# 140 pear 2022-01-01 02:56:00 92 98 73
# 141 pear 2022-01-01 02:15:00 9 5 99
# 142 pear 2022-01-01 03:04:00 92 39 10
# 143 pear 2022-01-01 05:29:00 52 61 65
The best I can do using reasonable syntax is:
df['minute']=[x.minute for x in df.dt]
df['hour']=[x.hour+1 for x in df.dt]
df['date']=[x.replace(hour=0, minute=0) for x in df.dt]
def splitdict(x):
mydict={}
x=list(x)
for elem in x:
mydict[elem[0]]=elem[1]
return(mydict)
df.groupby(['fruit','date','hour']).apply(lambda x: splitdict(zip(df.minute,df.value1)))
Of course, this doesn't actually work (so I'll spare you the output). It returns the same dict for every row and it would only work for value1 (ie. I don't know how to get the results for value1, value2, value3 at once) I thought if I did a deepcopy that would resolve the repeating dict but it didn't so I took that out.
I did a horrendous for loop to brute force what I want. That looks like this:
#brute force
uniq=df.drop_duplicates(['fruit','date','hour'])
uniq=uniq[['fruit','date','hour']]
results=[]
for index, row in uniq.iterrows():
mydict=(df[(df['date']==row['date']) & (df['hour']==row['hour']) & (df['fruit']==row['fruit'])].loc[:,['minute','value1','value2','value3']]).to_dict('records')
value1dict={mydict[i]['minute']:mydict[i]['value1'] for i in range(len(mydict))}
value2dict={mydict[i]['minute']:mydict[i]['value2'] for i in range(len(mydict))}
value3dict={mydict[i]['minute']:mydict[i]['value3'] for i in range(len(mydict))}
results.append(pd.DataFrame({'date':row['date'], 'hour':row['hour'], 'fruit':row['fruit'] , 'value1':[value1dict], 'value2':[value2dict], 'value3':[value3dict]}))
results=pd.concat(results)
# date hour fruit value1 value2 value3
# 0 2022-01-01 1 apple {0: 56, 2: 98, 12: 52, 9: 0, 8: 94, 30: 90, 48: 92, 21: 16, 45: 9, 40: 58, 24: 31, 13: 45, 28: 86, 57: 47, 56: 66, 34: 69, 39: 12, 41: 92, 43: 12} {0: 55, 2: 67, 12: 19, 9: 60, 8: 51, 30: 95, 48: 62, 21: 97, 45: 1, 40: 89, 24: 52, 13: 5, 28: 67, 57: 16, 56: 13, 34: 54, 39: 86, 41: 45, 43: 50} {0: 65, 2: 16, 12: 23, 9: 82, 8: 22, 30: 76, 48: 98, 21: 23, 45: 14, 40: 87, 24: 45, 13: 87, 28: 29, 57: 75, 56: 26, 34: 25, 39: 70, 41: 97, 43: 89}
# 0 2022-01-01 2 apple {12: 31, 39: 42, 42: 87, 20: 86, 45: 62, 28: 86, 55: 59, 44: 83, 0: 26, 2: 97, 36: 55, 24: 44} {12: 74, 39: 55, 42: 16, 20: 12, 45: 92, 28: 2, 55: 98, 44: 44, 0: 63, 2: 54, 36: 76, 24: 55} {12: 97, 39: 81, 42: 76, 20: 71, 45: 26, 28: 56, 55: 61, 44: 93, 0: 90, 2: 87, 36: 28, 24: 52}
# 0 2022-01-01 3 apple {15: 72, 8: 84, 24: 97, 0: 54, 30: 10, 12: 50, 55: 51, 56: 31} {15: 30, 8: 42, 24: 96, 0: 76, 30: 58, 12: 44, 55: 82, 56: 57} {15: 41, 8: 11, 24: 40, 0: 89, 30: 22, 12: 51, 55: 57, 56: 60}
# 0 2022-01-01 5 apple {3: 14, 56: 79, 0: 25} {3: 3, 56: 61, 0: 49} {3: 2, 56: 29, 0: 32}
# 0 2022-01-01 4 apple {52: 53, 0: 50, 4: 53} {52: 32, 0: 64, 4: 26} {52: 88, 0: 28, 4: 59}
# 0 2022-01-01 6 apple {42: 40, 29: 52} {42: 30, 29: 82} {42: 50, 29: 33}
# 0 2022-01-01 1 banana {0: 6, 2: 10, 12: 43, 9: 77, 8: 19, 30: 59, 48: 67, 21: 17, 45: 92, 40: 69, 24: 25, 13: 86, 28: 81, 57: 72, 56: 35, 34: 22, 39: 61, 41: 8, 43: 56} {0: 63, 2: 92, 12: 49, 9: 22, 8: 2, 30: 92, 48: 96, 21: 21, 45: 62, 40: 23, 24: 77, 13: 41, 28: 64, 57: 49, 56: 30, 34: 59, 39: 63, 41: 54, 43: 85} {0: 27, 2: 83, 12: 35, 9: 37, 8: 70, 30: 94, 48: 16, 21: 19, 45: 71, 40: 5, 24: 26, 13: 91, 28: 16, 57: 42, 56: 8, 34: 31, 39: 93, 41: 57, 43: 65}
# 0 2022-01-01 2 banana {12: 0, 39: 53, 42: 13, 20: 91, 45: 88, 28: 66, 55: 46, 44: 24, 0: 41, 2: 32, 36: 69, 24: 11} {12: 83, 39: 21, 42: 41, 20: 81, 45: 79, 28: 37, 55: 28, 44: 2, 0: 51, 2: 87, 36: 63, 24: 82} {12: 6, 39: 57, 42: 18, 20: 3, 45: 74, 28: 58, 55: 46, 44: 25, 0: 45, 2: 96, 36: 37, 24: 14}
# 0 2022-01-01 3 banana {15: 10, 8: 14, 24: 60, 0: 48, 30: 82, 12: 70, 55: 39, 56: 4} {15: 81, 8: 55, 24: 8, 0: 64, 30: 72, 12: 45, 55: 79, 56: 45} {15: 95, 8: 90, 24: 36, 0: 47, 30: 88, 12: 12, 55: 86, 56: 75}
# 0 2022-01-01 5 banana {3: 47, 56: 39, 0: 60} {3: 60, 56: 15, 0: 72} {3: 48, 56: 86, 0: 16}
# 0 2022-01-01 4 banana {52: 49, 0: 30, 4: 86} {52: 39, 0: 85, 4: 5} {52: 64, 0: 22, 4: 96}
# 0 2022-01-01 6 banana {42: 2, 29: 26} {42: 15, 29: 54} {42: 61, 29: 58}
# 0 2022-01-01 1 pear {0: 39, 2: 55, 12: 25, 9: 98, 8: 14, 30: 82, 48: 59, 21: 77, 45: 8, 40: 75, 24: 19, 13: 92, 28: 39, 57: 63, 56: 95, 34: 77, 39: 77, 41: 41, 43: 14} {0: 54, 2: 11, 12: 63, 9: 12, 8: 38, 30: 34, 48: 96, 21: 27, 45: 19, 40: 87, 24: 83, 13: 28, 28: 22, 57: 25, 56: 38, 34: 66, 39: 80, 41: 80, 43: 49} {0: 98, 2: 71, 12: 97, 9: 54, 8: 70, 30: 22, 48: 31, 21: 4, 45: 47, 40: 42, 24: 28, 13: 68, 28: 65, 57: 73, 56: 32, 34: 1, 39: 73, 41: 39, 43: 37}
# 0 2022-01-01 2 pear {12: 93, 39: 57, 42: 12, 20: 98, 45: 69, 28: 60, 55: 77, 44: 16, 0: 96, 2: 16, 36: 76, 24: 15} {12: 23, 39: 63, 42: 59, 20: 15, 45: 66, 28: 50, 55: 18, 44: 87, 0: 33, 2: 15, 36: 9, 24: 90} {12: 80, 39: 50, 42: 98, 20: 12, 45: 54, 28: 90, 55: 67, 44: 37, 0: 86, 2: 2, 36: 51, 24: 64}
# 0 2022-01-01 3 pear {15: 9, 8: 71, 24: 3, 0: 94, 30: 53, 12: 90, 55: 28, 56: 92} {15: 5, 8: 85, 24: 77, 0: 53, 30: 26, 12: 62, 55: 2, 56: 98} {15: 99, 8: 4, 24: 14, 0: 86, 30: 50, 12: 17, 55: 70, 56: 73}
# 0 2022-01-01 5 pear {3: 57, 56: 2, 0: 65} {3: 32, 56: 16, 0: 90} {3: 88, 56: 74, 0: 80}
# 0 2022-01-01 4 pear {52: 23, 0: 32, 4: 92} {52: 4, 0: 93, 4: 39} {52: 30, 0: 97, 4: 10}
# 0 2022-01-01 6 pear {42: 43, 29: 52} {42: 63, 29: 61} {42: 80, 29: 65}
IIUC
cols_to_dict = ['value1', 'value2', 'value3']
out = df.assign(date=df['dt'].dt.date, hour=df['dt'].dt.hour, minute=df['dt'].dt.minute) \
.set_index('minute').groupby(['date', 'hour', 'fruit'])[cols_to_dict] \
.agg(dict).sort_index(level=['date', 'fruit', 'hour']).reset_index()
Output:
>>> out
date hour fruit value1 value2 value3
0 2022-01-01 0 apple {0: 67, 9: 82, 6: 33, 21: 74, 16: 99, 20: 82, ... {0: 46, 9: 47, 6: 57, 21: 21, 16: 8, 20: 96, 1... {0: 25, 9: 42, 6: 1, 21: 99, 16: 63, 20: 47, 1...
1 2022-01-01 1 apple {3: 30, 0: 36, 5: 81, 24: [51, 57, 59], 25: 77... {3: 40, 0: 32, 5: 14, 24: [18, 49, 58], 25: 60... {3: 83, 0: 80, 5: 17, 24: [37, 24, 34], 25: 16...
2 2022-01-01 2 apple {0: 49, 24: [64, 26], 40: 6, 18: 48, 4: 11, 16... {0: 77, 24: [53, 77], 40: 80, 18: 11, 4: 76, 1... {0: 0, 24: [62, 75], 40: 44, 18: 13, 4: 56, 16...
3 2022-01-01 3 apple {20: [88, 88], 28: 94, 54: 48, 25: 34} {20: [68, 34], 28: 55, 54: 15, 25: 46} {20: [16, 49], 28: 50, 54: 11, 25: 26}
4 2022-01-01 4 apple {36: 91} {36: 86} {36: 95}
5 2022-01-01 5 apple {33: 21, 36: 70, 8: 89} {33: 34, 36: 66, 8: 69} {33: 39, 36: 38, 8: 92}
6 2022-01-01 6 apple {45: 87} {45: 80} {45: 24}
7 2022-01-01 0 banana {0: 67, 9: 67, 6: 64, 21: 80, 16: 29, 20: 85, ... {0: 30, 9: 33, 6: 4, 21: 64, 16: 29, 20: 59, 1... {0: 92, 9: 37, 6: 93, 21: 40, 16: 49, 20: 97, ...
8 2022-01-01 1 banana {3: 12, 0: 73, 5: 6, 24: [19, 81, 44], 25: 11,... {3: 41, 0: 22, 5: 86, 24: [68, 41, 7], 25: 25,... {3: 94, 0: 52, 5: 15, 24: [19, 59, 12], 25: 2,...
9 2022-01-01 2 banana {0: 49, 24: [36, 87], 40: 35, 18: 26, 4: 21, 1... {0: 96, 24: [30, 45], 40: 86, 18: 33, 4: 2, 16... {0: 53, 24: [39, 22], 40: 84, 18: 7, 4: 47, 16...
10 2022-01-01 3 banana {20: [45, 27], 28: 58, 54: 96, 25: 90} {20: [14, 22], 28: 49, 54: 14, 25: 90} {20: [94, 50], 28: 19, 54: 26, 25: 65}
11 2022-01-01 4 banana {36: 64} {36: 62} {36: 43}
12 2022-01-01 5 banana {33: 83, 36: 4, 8: 34} {33: 13, 36: 36, 8: 37} {33: 2, 36: 63, 8: 94}
13 2022-01-01 6 banana {45: 22} {45: 2} {45: 0}
14 2022-01-01 0 pear {0: 55, 9: 29, 6: 30, 21: 57, 16: 37, 20: 63, ... {0: 74, 9: 38, 6: 18, 21: 47, 16: 47, 20: 34, ... {0: 25, 9: 75, 6: 36, 21: 60, 16: 94, 20: 68, ...
15 2022-01-01 1 pear {3: 20, 0: 22, 5: 1, 24: [94, 27, 44], 25: 73,... {3: 59, 0: 50, 5: 7, 24: [34, 15, 28], 25: 24,... {3: 90, 0: 71, 5: 75, 24: [4, 4, 63], 25: 73, ...
16 2022-01-01 2 pear {0: 42, 24: [83, 98], 40: 83, 18: 34, 4: 58, 1... {0: 55, 24: [13, 50], 40: 39, 18: 37, 4: 68, 1... {0: 69, 24: [80, 49], 40: 80, 18: 82, 4: 13, 1...
17 2022-01-01 3 pear {20: [33, 50], 28: 47, 54: 16, 25: 0} {20: [28, 26], 28: 74, 54: 66, 25: 13} {20: [40, 67], 28: 88, 54: 96, 25: 4}
18 2022-01-01 4 pear {36: 46} {36: 28} {36: 97}
19 2022-01-01 5 pear {33: 27, 36: 23, 8: 57} {33: 71, 36: 82, 8: 57} {33: 23, 36: 88, 8: 91}
20 2022-01-01 6 pear {45: 83} {45: 45} {45: 3}
I have a dataframe like this,
I want to create a dictionary from this to remap a column in another data frame ( if you look at 330th row it has 524 and 545. I want to assign a single value(330) in another dataframe)
So i used this code to create a dictorionary.
di = new2.T.to_dict('list')
But the dictionary i get is this,
{0: ['-1'],
1: ['187'],
2: ['212'],
3: ['30'],
4: ['209'],
5: ['213'],
6: ['214'],
7: ['238'],
8: ['544'],
9: ['557'],
10: ['317'],
11: ['516'],
12: ['571'],
13: ['184, 549'],
14: ['64'],
15: ['43'],
16: ['584'],
17: ['185'],
18: ['190'],
19: ['218'],
20: ['174'],
21: ['550'],
22: ['138'],
23: ['1'],
24: ['311'],
25: ['576'],
26: ['500'],
27: ['208, 241'],
28: ['16'],
29: ['327'],
30: ['3, 34, 50'],
31: ['332'],
32: ['520'],
33: ['491'],
34: ['535'],
35: ['523'],
36: ['119'],
37: ['482'],
38: ['574'],
39: ['165'],
40: ['370'],
41: ['51, 62, 73, 87, 101, 120, 199, 240, 304, 360, 506'],
And when i invert it using this code,
{value: key for key, values in di.items() for value in values}
It becomes this,
{'-1': 0,
'187': 1,
'212': 2,
'30': 3,
'209': 4,
'213': 5,
'214': 6,
'238': 7,
'544': 8,
'557': 9,
'317': 10,
'516': 11,
'571': 12,
'184, 549': 13,
'64': 14,
'43': 15,
'584': 16,
'185': 17,
'190': 18,
'218': 19,
'174': 20,
'550': 21,
'138': 22,
'1': 23,
'311': 24,
'576': 25,
'500': 26,
'208, 241': 27,
'16': 28,
'327': 29,
'3, 34, 50': 30,
'332': 31,
'520': 32,
'491': 33,
'535': 34,
'523': 35,
'119': 36,
'482': 37,
'574': 38,
'165': 39,
'370': 40,
'51, 62, 73, 87, 101, 120, 199, 240, 304, 360, 506': 41,
'525': 42,
But I want is to map them individually such as,
184: 13,
549: 13,
instead of this,
'184, 549': 13,
and use .map() function to map it using the dictionary
The problem is that you have string inside a list, just split the string:
di = {
27: ['208, 241'],
28: ['16'],
29: ['327'],
30: ['3, 34, 50'],
31: ['332'],
32: ['520'],
33: ['491']
}
result = {value: key for key, values in di.items() for value in values[0].split(', ')}
print(result)
Output
{'208': 27, '241': 27, '16': 28, '327': 29, '3': 30, '34': 30, '50': 30, '332': 31, '520': 32, '491': 33}
Note that I used a small fraction of di as an example, this can be applied to the whole dictionary.
I have the following dictionary:
'{0: 0, 1: 11, 2: 26, 3: 43, 4: 14, 5: 29, 6: 34, 7: 49, 8: 49, 9: 108, 10: 124, 11: 108, 12: 361, 13: 290, 14: 2118, 15: 5408, 16: 43473, 17: 109462, 18: 111490, 19: 244675, 20: 115878, 21: 6960}'
And for this dictionary I want write a function that returns the three key-value pairs that have the highest values (So in this case key 18, 19, 20).
I came up with the following:
cachedict = nr_of_objects_per_century() #Dictionary mentioned above
def top_3_centuries():
max_nr_works_list = sorted(cachedict.values())
top_3_values = []
for i in range(len(max_nr_works_list)-3, len(max_nr_works_list)):
top_3_values.append(max_nr_works_list[i])
print(top_3_values)
This gives me a list of the max-values I want to lookup. But how do I proceed from here? Is there a way to do this without a reverse-lookup (Which is slow for dictionaries, right?) I have the feeling that I can do this task much more efficiently/pythonic.
You could also use collections.Counter with most_common (which internally uses a heap queue):
from collections import Counter
dct = {0: 0, 1: 11, 2: 26, 3: 43, 4: 14, 5: 29, 6: 34, 7: 49, 8: 49,
9: 108, 10: 124, 11: 108, 12: 361, 13: 290, 14: 2118, 15: 5408,
16: 43473, 17: 109462, 18: 111490, 19: 244675, 20: 115878, 21: 6960}
count = Counter(dct)
print(count.most_common(3)) # [(19, 244675), (20, 115878), (18, 111490)]
heapq.nlargest
You can avoid a full sort here by using a heap queue:
from heapq import nlargest
from operator import itemgetter
dct = {0: 0, 1: 11, 2: 26, 3: 43, 4: 14, 5: 29, 6: 34, 7: 49, 8: 49,
9: 108, 10: 124, 11: 108, 12: 361, 13: 290, 14: 2118, 15: 5408,
16: 43473, 17: 109462, 18: 111490, 19: 244675, 20: 115878, 21: 6960}
res = nlargest(3, dct.items(), key=itemgetter(1))
print(res)
# [(19, 244675), (20, 115878), (18, 111490)]
You can use this:
a = {0: 0, 1: 11, 2: 26, 3: 43, 4: 14, 5: 29, 6: 34, 7: 49, 8: 49,
9: 108, 10: 124, 11: 108, 12: 361, 13: 290, 14: 2118, 15: 5408,
16: 43473, 17: 109462, 18: 111490, 19: 244675, 20: 115878, 21: 6960}
l = sorted(list(a.items()), key=lambda tup: tup[1], reverse=True)[:3]
print(l) # [(19, 244675), (20, 115878), (18, 111490)]
It converts the dictionary a into a list of tuples, sort by tup[1], reverse it and get the first 3 hits.
You can do it like so:
dct = {0: 0, 1: 11, 2: 26, 3: 43, 4: 14, 5: 29, 6: 34, 7: 49, 8: 49, 9: 108, 10: 124, 11: 108, 12: 361, 13: 290, 14: 2118, 15: 5408, 16: 43473, 17: 109462, 18: 111490, 19: 244675, 20: 115878, 21: 6960}
res = [next(k for k in dct if dct[k]==v) for v in sorted(dct.values(), reverse=True)[:3]]
print(res) # -> [19, 20, 18]
Break-down:
sorted(dct.values(), reverse=True)[:3]:: Takes the 3 max dictionary values.
next(k for k in dct if dct[k]==v):: returns the dictionary key, for which the value is one of the above 3 (iteratively).
in two simple steps :
aux = sorted([(v,k) for (k,v) in dic.items()])
res = [(v,k) for (k,v) in aux[-3:]]
#[(18, 111490), (20, 115878), (19, 244675)]
faster than nlargest and Counter.most_common on this example.
This returns what you want:
d = {0: 0, 1: 11, 2: 26, 3: 43, 4: 14, 5: 29, 6: 34, 7: 49, 8: 49, 9: 108, 10: 124, 11: 108, 12: 361, 13: 290, 14: 2118, 15: 5408, 16: 43473, 17: 109462, 18: 111490, 19: 244675, 20: 115878, 21: 6960}
print(sorted([(i,j) for i, j in d.items() if j in (sorted(d.values())[-3:])])[-3:])
#[(18, 111490), (19, 244675), (20, 115878)]
d = {0: 0, 1: 11, 2: 26, 3: 43, 4: 14, 5: 29, 6: 34, 7: 49, 8: 49, 9: 108, 10: 124, 11: 108, 12: 361, 13: 290, 14: 2118, 15: 5408, 16: 43473, 17: 109462, 18: 111490, 19: 244675, 20: 115878, 21: 6960}
d_items_sorted = sorted(d.items(), key=lambda x: x[1], reverse=True)
d_items_sorted[:3]
Returns :
[(19, 244675), (20, 115878), (18, 111490)]
This is the easiest code I could get, but sorting the dictionary cost O(nlogn) and you should be able to do the same in O(n)
Are you looking for the most efficient way or just the optimal way in permormace/algorithm simplicity?
If it's the latter may be you should consider sorting dictionary items as tuples (you can get them with cachedict.items()) like in this answer https://stackoverflow.com/a/613218/10453363
Just sort tuples by the value and then get the last 3 tuples (which are key/value pairs)
I have following data frame (represented by dictionary below):
{'Name': {0: '204',
1: '110838',
2: '110999',
3: '110998',
4: '111155',
5: '111710',
6: '111157',
7: '111156',
8: '111144',
9: '118972',
10: '111289',
11: '111288',
12: '111145',
13: '121131',
14: '118990',
15: '110653',
16: '110693',
17: '110694',
18: '111577',
19: '111702',
20: '115424',
21: '115127',
22: '115178',
23: '111578',
24: '115409',
25: '115468',
26: '111711',
27: '115163',
28: '115149',
29: '115251'},
'Sequence_new': {0: 1.0,
1: 2.0,
2: 3.0,
3: 4.0,
4: 5.0,
5: 6.0,
6: 7.0,
7: 8.0,
8: 9.0,
9: 10.0,
10: 11.0,
11: 12.0,
12: nan,
13: 13.0,
14: 14.0,
15: 15.0,
16: 16.0,
17: 17.0,
18: 18.0,
19: 19.0,
20: 20.0,
21: 21.0,
22: 22.0,
23: 23.0,
24: 24.0,
25: 25.0,
26: 26.0,
27: 27.0,
28: 28.0,
29: 29.0},
'Sequence_old': {0: 1,
1: 2,
2: 3,
3: 4,
4: 5,
5: 6,
6: 7,
7: 8,
8: 9,
9: 10,
10: 11,
11: 12,
12: 13,
13: 14,
14: 15,
15: 16,
16: 17,
17: 18,
18: 19,
19: 20,
20: 21,
21: 22,
22: 23,
23: 24,
24: 25,
25: 26,
26: 27,
27: 28,
28: 29,
29: 30}}
I am trying to understand what changed between old and new sequences. If by Name Sequence_old = Sequence_new, nothing changed. If Sequence+_new is 'nan', Name removed. Can you please help implement this in pandas?
What tried till now without success:
for i in range(0, len(Merge)):
if Merge.iloc[i]['Sequence_x'] == Merge.iloc[i]['Sequence_y']:
Merge.iloc[i]['New'] = 'N'
else:
Merge.iloc[i]['New'] = 'Y'
Thank you
You can use double numpy.where with condition with isnull:
mask = df.Sequence_old == df.Sequence_new
df['New'] = np.where(df.Sequence_new.isnull(), 'Removed',
np.where(mask, 'N', 'Y'))
print (df)
Name Sequence_new Sequence_old New
0 204 1.0 1 N
1 110838 2.0 2 N
2 110999 3.0 3 N
3 110998 4.0 4 N
4 111155 5.0 5 N
5 111710 6.0 6 N
6 111157 7.0 7 N
7 111156 8.0 8 N
8 111144 9.0 9 N
9 118972 10.0 10 N
10 111289 11.0 11 N
11 111288 12.0 12 N
12 111145 NaN 13 Removed
13 121131 13.0 14 Y
14 118990 14.0 15 Y
15 110653 15.0 16 Y
16 110693 16.0 17 Y
17 110694 17.0 18 Y
18 111577 18.0 19 Y
19 111702 19.0 20 Y
20 115424 20.0 21 Y
21 115127 21.0 22 Y
22 115178 22.0 23 Y
23 111578 23.0 24 Y
24 115409 24.0 25 Y
25 115468 25.0 26 Y
26 111711 26.0 27 Y
27 115163 27.0 28 Y
28 115149 28.0 29 Y
29 115251 29.0 30 Y
dic_new = {0: 1.0, 1: 2.0, 2: 3.0, 3: 4.0, 4: 5.0, 5: 6.0, 6: 7.0, 7: 8.0, 8: 9.0, 9: 10.0, 10: 11.0, 11: 12.0,
12: 'Nan', 13: 13.0, 14: 14.0, 15: 15.0, 16: 16.0, 17: 17.0, 18: 18.0, 19: 19.0, 20: 20.0, 21: 21.0,
22: 22.0, 23: 23.0, 24: 24.0, 25: 25.0, 26: 26.0, 27: 27.0, 28: 28.0, 29: 29.0}
dic_old = {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16,
16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29,
29: 30}
# Does the same thing as the code below
for a, b in zip(dic_new.items(), dic_old.items()):
if b[1].lower() != 'nan':
# You can add whatever print statement you want here
print(a[1] == b[1])
# Does the same thing as the code above
[print(a[1] == b[1]) for a, b in zip(dic_new.items(), dic_old.items()) if b[1].lower() != 'nan']