Related
I'm trying to narrow down list of dicts by filtering it by value in one of the keys.
Current codes does it but I don't know how to retain entire dictionary rather then only those fields I filter by.
final_list = []
jobs = [glue_client.job_status(e) for e in j]
for e in jobs:
for page in e:
final_list.append(page["JobRuns"])
flat_list = [item for sublist in final_list for item in sublist]
sorted_list = sorted(flat_list, key=lambda k: (k['JobName'], k['StartedOn']), reverse=True)
#need to have following keys: "JobName", "JobRunState", "StartedOn" and "Id"
latest_jobs = [
{'JobName': key, 'StartedOn': max(item['StartedOn'] for item in values)}
for key, values in groupby(flat_list, lambda dct: dct['JobName'])
]
print(latest_jobs)
Data at sorted_list variable looks as below:
list_of_dicts = [
{'JobName': 'a', 'StartedOn': datetime.datetime(2022, 10, 18, 13, 0, 47, 306000, tzinfo=tzlocal()), 'JobRunState': 'fail', 'id': 'xyz'},
{'JobName': 'a', 'StartedOn': datetime.datetime(2021, 10, 18, 13, 0, 47, 306000, tzinfo=tzlocal()), 'JobRunState': 'ok', 'id': 'xyz'},
{'JobName': 'b', 'StartedOn': datetime.datetime(2022, 10, 18, 13, 0, 47, 306000, tzinfo=tzlocal()), 'JobRunState': 'fail', 'id': 'xyz'},
{'JobName': 'a', 'StartedOn': datetime.datetime(2020, 10, 18, 13, 0, 47, 306000, tzinfo=tzlocal()), 'JobRunState': 'fai;', 'id': 'xyz'},
{'JobName': 'b', 'StartedOn': datetime.datetime(2021, 10, 18, 13, 0, 47, 306000, tzinfo=tzlocal()), 'JobRunState': 'ok', 'id': 'xyz'}
]
Expected output:
filtered_list = [
{'JobName': 'a', 'StartedOn': datetime.datetime(2022, 10, 18, 13, 0, 47, 306000, tzinfo=tzlocal()), 'JobRunState': 'fail', 'id': 'xyz'},
{'JobName': 'b', 'StartedOn': datetime.datetime(2022, 10, 18, 13, 0, 47, 306000, tzinfo=tzlocal()), 'JobRunState': 'fail', 'id': 'xyz'}
]
Some judicious use of itertools.groupby, sorted, and max.
list_of_dicts = [
{'JobName': 'a', 'StartedOn': datetime.datetime(2022, 10, 18, 13, 0, 47, 306000), 'JobRunState': 'fail', 'id': 'xyz'},
{'JobName': 'a', 'StartedOn': datetime.datetime(2021, 10, 18, 13, 0, 47, 306000), 'JobRunState': 'ok', 'id': 'xyz'},
{'JobName': 'b', 'StartedOn': datetime.datetime(2022, 10, 18, 13, 0, 47, 306000), 'JobRunState': 'fail', 'id': 'xyz'},
{'JobName': 'a', 'StartedOn': datetime.datetime(2020, 10, 18, 13, 0, 47, 306000), 'JobRunState': 'fai;', 'id': 'xyz'},
{'JobName': 'b', 'StartedOn': datetime.datetime(2021, 10, 18, 13, 0, 47, 306000), 'JobRunState': 'ok', 'id': 'xyz'}
]
from itertools import groupby
from operator import itemgetter
lst = sorted(list_of_dicts, key=itemgetter('JobName'))
[max(jobs, key=itemgetter('StartedOn'))
for jn, jobs in groupby(lst, key=itemgetter('JobName'))]
# [{'JobName': 'a', 'StartedOn': datetime.datetime(2022, 10, 18, 13, 0, 47, 306000), 'JobRunState': 'fail', 'id': 'xyz'},
# {'JobName': 'b', 'StartedOn': datetime.datetime(2022, 10, 18, 13, 0, 47, 306000), 'JobRunState': 'fail', 'id': 'xyz'}]
I'm writing a Python script. I need to return lines that contain latest 'timestamp': field from a text file. For example, in the below text file example:
{'uid': 3167, 'user_id': '6', 'timestamp': datetime.datetime(2021, 3, 10, 18, 7, 13), 'status': 1, 'punch': 1}, {'uid': 3168, 'user_id': '198', 'timestamp': datetime.datetime(2021, 3, 10, 18, 10, 42), 'status': 2, 'punch': 1}, {'uid': 3169, 'user_id': '3', 'timestamp': datetime.datetime(2021, 3, 10, 18, 13, 53), 'status': 1, 'punch': 1}, {'uid': 3170, 'user_id': '13', 'timestamp': datetime.datetime(2021, 3, 10, 18, 22, 2), 'status': 1, 'punch': 1}, {'uid': 3171, 'user_id': '9', 'timestamp': datetime.datetime(2021, 3, 10, 18, 22, 43), 'status': 1, 'punch': 1}, {'uid': 3172, 'user_id': '15', 'timestamp': datetime.datetime(2021, 3, 10, 18, 32, 30), 'status': 2, 'punch': 1}, {'uid': 3173, 'user_id': '4', 'timestamp': datetime.datetime(2021, 3, 10, 19, 42, 26), 'status': 1, 'punch': 1}, {'uid': 3174, 'user_id': '1', 'timestamp': datetime.datetime(2021, 3, 10, 19, 42, 34), 'status': 1, 'punch': 1}, {'uid': 3175, 'user_id': '3', 'timestamp': datetime.datetime(2021, 3, 11, 8, 48, 6), 'status': 1, 'punch': 1}, {'uid': 3176, 'user_id': '7', 'timestamp': datetime.datetime(2021, 3, 11, 9, 2, 30), 'status': 2, 'punch': 1}, {'uid': 3177, 'user_id': '5', 'timestamp': datetime.datetime(2021, 3, 11, 9, 12, 40), 'status': 1, 'punch': 1}, {'uid': 3178, 'user_id': '6', 'timestamp': datetime.datetime(2021, 3, 11, 9, 40, 47), 'status': 1, 'punch': 1}, {'uid': 3179, 'user_id': '15', 'timestamp': datetime.datetime(2021, 3, 11, 9, 49, 59), 'status': 2, 'punch': 1},
Return Text File 'today's date 11/3/2021' ex:
{'uid': 3175, 'user_id': '3', 'timestamp': datetime.datetime(2021, 3, 11, 8, 48, 6), 'status': 1, 'punch': 1}, {'uid': 3176, 'user_id': '7', 'timestamp': datetime.datetime(2021, 3, 11, 9, 2, 30), 'status': 2, 'punch': 1}, {'uid': 3177, 'user_id': '5', 'timestamp': datetime.datetime(2021, 3, 11, 9, 12, 40), 'status': 1, 'punch': 1}, {'uid': 3178, 'user_id': '6', 'timestamp': datetime.datetime(2021, 3, 11, 9, 40, 47), 'status': 1, 'punch': 1}, {'uid': 3179, 'user_id': '15', 'timestamp': datetime.datetime(2021, 3, 11, 9, 49, 59), 'status': 2, 'punch': 1},
It seems you're dealing with tabular data and pandas is very natural for that.
import datetime
import pandas as pd
df = pd.DataFrame([{'uid': 3167, 'user_id': '6', 'timestamp': datetime.datetime(2021, 3, 10, 18, 7, 13), 'status': 1, 'punch': 1}, {'uid': 3168, 'user_id': '198', 'timestamp': datetime.datetime(2021, 3, 10, 18, 10, 42), 'status': 2, 'punch': 1}, {'uid': 3169, 'user_id': '3', 'timestamp': datetime.datetime(2021, 3, 10, 18, 13, 53), 'status': 1, 'punch': 1}, {'uid': 3170, 'user_id': '13', 'timestamp': datetime.datetime(2021, 3, 10, 18, 22, 2), 'status': 1, 'punch': 1}, {'uid': 3171, 'user_id': '9', 'timestamp': datetime.datetime(2021, 3, 10, 18, 22, 43), 'status': 1, 'punch': 1}, {'uid': 3172, 'user_id': '15', 'timestamp': datetime.datetime(2021, 3, 10, 18, 32, 30), 'status': 2, 'punch': 1}, {'uid': 3173, 'user_id': '4', 'timestamp': datetime.datetime(2021, 3, 10, 19, 42, 26), 'status': 1, 'punch': 1}, {'uid': 3174, 'user_id': '1', 'timestamp': datetime.datetime(2021, 3, 10, 19, 42, 34), 'status': 1, 'punch': 1}, {'uid': 3175, 'user_id': '3', 'timestamp': datetime.datetime(2021, 3, 11, 8, 48, 6), 'status': 1, 'punch': 1}, {'uid': 3176, 'user_id': '7', 'timestamp': datetime.datetime(2021, 3, 11, 9, 2, 30), 'status': 2, 'punch': 1}, {'uid': 3177, 'user_id': '5', 'timestamp': datetime.datetime(2021, 3, 11, 9, 12, 40), 'status': 1, 'punch': 1}, {'uid': 3178, 'user_id': '6', 'timestamp': datetime.datetime(2021, 3, 11, 9, 40, 47), 'status': 1, 'punch': 1}, {'uid': 3179, 'user_id': '15', 'timestamp': datetime.datetime(2021, 3, 11, 9, 49, 59), 'status': 2, 'punch': 1},])
today = pd.to_datetime('today').normalize()
rows = df[df['timestamp'] >= today]
Which gives
uid user_id timestamp status punch
8 3175 3 2021-03-11 08:48:06 1 1
9 3176 7 2021-03-11 09:02:30 2 1
10 3177 5 2021-03-11 09:12:40 1 1
11 3178 6 2021-03-11 09:40:47 1 1
12 3179 15 2021-03-11 09:49:59 2 1
If you want the result in a list of dicts, you can then do rows.to_dict('records').
Without pandas it'd be a similar approach of getting today's datetime and iterating over your data to filter them.
lines = [{'uid': 3167, 'user_id': '6', 'timestamp': datetime.datetime(2021, 3, 10, 18, 7, 13), 'status': 1, 'punch': 1}, {'uid': 3168, 'user_id': '198', 'timestamp': datetime.datetime(2021, 3, 10, 18, 10, 42), 'status': 2, 'punch': 1}, {'uid': 3169, 'user_id': '3', 'timestamp': datetime.datetime(2021, 3, 10, 18, 13, 53), 'status': 1, 'punch': 1}, {'uid': 3170, 'user_id': '13', 'timestamp': datetime.datetime(2021, 3, 10, 18, 22, 2), 'status': 1, 'punch': 1}, {'uid': 3171, 'user_id': '9', 'timestamp': datetime.datetime(2021, 3, 10, 18, 22, 43), 'status': 1, 'punch': 1}, {'uid': 3172, 'user_id': '15', 'timestamp': datetime.datetime(2021, 3, 10, 18, 32, 30), 'status': 2, 'punch': 1}, {'uid': 3173, 'user_id': '4', 'timestamp': datetime.datetime(2021, 3, 10, 19, 42, 26), 'status': 1, 'punch': 1}, {'uid': 3174, 'user_id': '1', 'timestamp': datetime.datetime(2021, 3, 10, 19, 42, 34), 'status': 1, 'punch': 1}, {'uid': 3175, 'user_id': '3', 'timestamp': datetime.datetime(2021, 3, 11, 8, 48, 6), 'status': 1, 'punch': 1}, {'uid': 3176, 'user_id': '7', 'timestamp': datetime.datetime(2021, 3, 11, 9, 2, 30), 'status': 2, 'punch': 1}, {'uid': 3177, 'user_id': '5', 'timestamp': datetime.datetime(2021, 3, 11, 9, 12, 40), 'status': 1, 'punch': 1}, {'uid': 3178, 'user_id': '6', 'timestamp': datetime.datetime(2021, 3, 11, 9, 40, 47), 'status': 1, 'punch': 1}, {'uid': 3179, 'user_id': '15', 'timestamp': datetime.datetime(2021, 3, 11, 9, 49, 59), 'status': 2, 'punch': 1},]
today = datetime.date.today()
today = datetime.datetime(today.year, today.month, today.day)
result = [line for line in lines if line['timestamp'] >= today]
I have a list with barline ticks and midi notes that can overlap the barlines. So I made a list of 'barlineticks':
barlinepos = [0, 768.0, 1536.0, 2304.0, 3072.0, 3840.0, 4608.0, 5376.0, 6144.0, 6912.0, 0, 576.0, 1152.0, 1728.0, 2304.0, 2880.0, 3456.0, 4032.0, 4608.0, 5184.0, 5760.0, 6336.0, 6912.0, 7488.0]
And a MidiFile:
{'type': 'time_signature', 'numerator': 4, 'denominator': 4, 'time': 0, 'duration': 768, 'ID': 0}
{'type': 'set_tempo', 'tempo': 500000, 'time': 0, 'ID': 1}
{'type': 'track_name', 'name': 'Tempo Track', 'time': 0, 'ID': 2}
{'type': 'track_name', 'name': 'New Instrument', 'time': 0, 'ID': 3}
{'type': 'note_on', 'time': 0, 'channel': 0, 'note': 48, 'velocity': 100, 'ID': 4, 'duration': 956}
{'type': 'time_signature', 'numerator': 3, 'denominator': 4, 'time': 768, 'duration': 6911, 'ID': 5}
{'type': 'note_on', 'time': 768, 'channel': 0, 'note': 46, 'velocity': 100, 'ID': 6, 'duration': 575}
{'type': 'note_off', 'time': 956, 'channel': 0, 'note': 48, 'velocity': 0, 'ID': 7}
{'type': 'note_off', 'time': 1343, 'channel': 0, 'note': 46, 'velocity': 0, 'ID': 8}
{'type': 'end_of_track', 'time': 7679, 'ID': 9}
And I want to check if the midi note is overlapping a barline. Every note_on message has a 'time' and a 'duration' value. I have to check if one of the barlineticks(in the list) is inside the range of the note('time' and 'duration'). I tried:
if barlinepos in range(0, 956):
print(True)
Of course this doesn't work because barlinepos is a list. How can I check if one of the values in the list results in True?
Simple iteration to solve the requirement:
for i in midifile:
start, end = i["time"], i["time"]+i["duration"]
for j in barlinepos:
if j >= start and j<= end:
print(True)
break
print(False)
I have a django view that I need to query from different models and combine them, and then organize by date ('created_at'), right now when combining the models I get a list of dicts like below. How can I sort this by date.
[{'content': u'Just another another message', 'created_at':
datetime.datetime(2018, 4, 22, 15, 35, 11, 577175, tzinfo=<UTC>),
u'successmatch_id': 5, u'id': 8, 'reciever': u'UserA'},
{'content': u'testing blah', 'created_at': datetime.datetime(2018, 4,
22, 15, 33, 28, 84469, tzinfo=<UTC>), u'successmatch_id': 5, u'id': 7,
'reciever': u'UserB'}, {'content': u'Hi how are you',
'created_at': datetime.datetime(2018, 4, 22, 13, 29, 49, 516701,
tzinfo=<UTC>), u'successmatch_id': 5, u'id': 6, 'reciever':
u'UserA'}]
Python's built-in sorting has the ability to specify what metric to sort by:
x = [{"test": 1}, {"test": 2}, {"test": 0}]
x.sort(key=lambda item: item["test"])
x is edited in place, and is now:
[{'test': 0}, {'test': 1}, {'test': 2}]
So, in your case, assuming your list is called my_list, you'd want to do:
my_list.sort(key=lambda item: item["created_at"])
Or, if you wanted the newest dicts to occur first,
my_list.sort(key=lambda item: item["created_at"], reverse=True)
If you are happy using a 3rd party library, you can use pandas, which accepts a list of dictionaries.
But note that datetime objects may be converted to pandas.Timestamp objects.
import pandas as pd
import datetime
lst = [{'content': u'Just another another message',
'created_at': datetime.datetime(2018, 4, 22, 15, 35, 11, 577175, tzinfo=None),
u'successmatch_id': 5, u'id': 8, 'reciever': u'UserA'},
{'content': u'testing blah',
'created_at': datetime.datetime(2018, 4, 22, 15, 33, 28, 84469, tzinfo=None),
u'successmatch_id': 5, u'id': 7, 'reciever': u'UserB'},
{'content': u'Hi how are you',
'created_at': datetime.datetime(2018, 4, 22, 13, 29, 49, 516701, tzinfo=None),
u'successmatch_id': 5, u'id': 6, 'reciever': u'UserA'}]
res = pd.DataFrame(lst).sort_values('created_at').T.to_dict().values()
Result:
dict_values([{'content': 'Hi how are you', 'created_at': Timestamp('2018-04-22 13:29:49.516701'),
'id': 6, 'reciever': 'UserA', 'successmatch_id': 5},
{'content': 'testing blah', 'created_at': Timestamp('2018-04-22 15:33:28.084469'),
'id': 7, 'reciever': 'UserB', 'successmatch_id': 5},
{'content': 'Just another another message', 'created_at': Timestamp('2018-04-22 15:35:11.577175'),
'id': 8, 'reciever': 'UserA', 'successmatch_id': 5}])
input={11: {'perc': 0, 'name': u'B test', 'cid': 11, 'total': 0, 'pending': 0, 'complete': 0}, 10: {'perc': 0, 'name': u'C test', 'cid': 10, 'total': 0, 'pending': 0,'complete': 0}, 3: {'perc': 9, 'name': u'Atest Pre-requisites', 'cid': 3, 'total': 11, 'pending': 10, 'complete': 1}}
I want to sort this dict based on name field. I'm new in python, anyone please help me.
First, you should avoid using reserved words (such as input) as variables (now input is redefined and no longer calls the function input()).
Also, a dictionary cannot be sorted. If you don't need the keys, you can transform the dictionary into a list, and then sort it. The code would be like this:
input_dict = {11: {'perc': 0, 'name': u'B test', 'cid': 11, 'total': 0, 'pending': 0, 'complete': 0}, 10: {'perc': 0, 'name': u'C test', 'cid': 10, 'total': 0, 'pending': 0,'complete': 0}, 3: {'perc': 9, 'name': u'Atest Pre-requisites', 'cid': 3, 'total': 11, 'pending': 10, 'complete': 1}}
input_list = sorted(input_dict.values(), key=lambda x: x['name'])
print(input_list)
# prints [{'perc': 9, 'complete': 1, 'cid': 3, 'total': 11, 'pending': 10, 'name': u'Atest Pre-requisites'}, {'perc': 0, 'complete': 0, 'cid': 11, 'total': 0, 'pending': 0, 'name': u'B test'}, {'perc': 0, 'complete': 0, 'cid': 10, 'total': 0, 'pending': 0, 'name': u'C test'}]
EDIT
If you wish to keep the keys and use iteritems() as you said in the comments, use this code instead:
input_dict = {11: {'perc': 0, 'name': u'B test', 'cid': 11, 'total': 0, 'pending': 0, 'complete': 0}, 10: {'perc': 0, 'name': u'C test', 'cid': 10, 'total': 0, 'pending': 0,'complete': 0}, 3: {'perc': 9, 'name': u'Atest Pre-requisites', 'cid': 3, 'total': 11, 'pending': 10, 'complete': 1}}
input_list = sorted(input_dict.iteritems(), key=lambda x: x[1]['name'])
print(input_list)
# prints [(3, {'perc': 9, 'complete': 1, 'cid': 3, 'total': 11, 'pending': 10, 'name': u'Atest Pre-requisites'}), (11, {'perc': 0, 'complete': 0, 'cid': 11, 'total': 0, 'pending': 0, 'name': u'B test'}), (10, {'perc': 0, 'complete': 0, 'cid': 10, 'total': 0, 'pending': 0, 'name': u'C test'})]