I need to create a tally dictionary of time stamps on our server log files with the hours as keys
I dont want to do the long-winded case by case check regular expression and append (its python..there is a better way)
e.g. say I have a list:
times = ['02:49:04', '02:50:03', '03:21:23', '03:21:48', '03:24:29', '03:30:29', '03:30:30', '03:44:54', '03:50:11', '03:52:03', '03:52:06', '03:52:30', '03:52:48', '03:54:50', '03:55:21', '03:56:50', '03:57:31', '04:05:10', '04:35:59', '04:39:50', '04:41:47', '04:46:43']
How do I (in a pythonic manner) produce something like so:
where "0200" would hold the number of times a value between 02:00:00 to 02:59:59 occurs
result = { "0200":2, "0300":15, "0400":5 }
something like:
from collections import Counter
counts = Counter(time[:2]+'00' for time in times)
from collections import defaultdict
countDict = defaultdict(int)
for t in times:
countDict[t[:2]+"--"] += 1
print countDict
If you don't want to use counter. You can do:
dict = {}
for i in times:
try:
dict[i.split(':')[0] + "00"]+=1
except KeyError:
dict[i.split(':')[0] + "00"] = 1
Here's one more way with itertools.
import itertools
key = lambda x: x[:2]
result = {}
for hour, group in itertools.groupby(sorted(times, key=key), key=key):
result[hour + '00'] = len(list(group))
Related
How do I code a function in python which can:
iterate through a list of word strings which may contain duplicate words and referencing to a dictionary,
find the word with the highest absolute sum, and
output it along with the corresponding absolute value.
The function also has to ignore words which are not in the dictionary.
For example,
Assume the function is called H_abs_W().
Given the following list and dict:
list_1 = ['apples','oranges','pears','apples']
Dict_1 = {'apples':5.23,'pears':-7.62}
Then calling the function as:
H_abs_W(list_1,Dict_1)
Should give the output:
'apples',10.46
EDIT:
I managed to do it in the end with the code below. Looking over the answers, turns out I could have done it in a shorter fashion, lol.
def H_abs_W(list_1,Dict_1):
freqW = {}
for char in list_1:
if char in freqW:
freqW[char] += 1
else:
freqW[char] = 1
ASum_W = 0
i_word = ''
for a,b in freqW.items():
x = 0
d = Dict_1.get(a,0)
x = abs(float(b)*float(d))
if x > ASum_W:
ASum_W = x
i_word = a
return(i_word,ASum_W)
list_1 = ['apples','oranges','pears','apples']
Dict_1 = {'apples':5.23,'pears':-7.62}
d = {k:0 for k in list_1}
for x in list_1:
if x in Dict_1.keys():
d[x]+=Dict_1[x]
m = max(Dict_1, key=Dict_1.get)
print(m,Dict_1[m])
try this,
key, value = sorted(Dict_1.items(), key = lambda x : x[1], reverse=True)[0]
print(f"{key}, {list_1.count(key) * value}")
# apples, 10.46
you can use Counter to calculate the frequency(number of occurrences) of each item in the list.
max(counter.values()) will give us the count of maximum occurring element
max(counter, key=counter.get) will give the which item in the list is
associated with that highest count.
========================================================================
from collections import Counter
def H_abs_W(list_1, Dict_1):
counter = Counter(list_1)
count = max(counter.values())
item = max(counter, key=counter.get)
return item, abs(count * Dict_1.get(item))
I have a list given below:
ticket_list=["AI567:MUM:LON:014","AI077:MUM:LON:056", "BA896:MUM:LON:067", "SI267:MUM:SIN:145","AI077:MUM:CAN:060","SI267:BLR:MUM:148","AI567:CHE:SIN:015","AI077:MUM:SIN:050","AI077:MUM:LON:051","SI267:MUM:SIN:146"]
I have to find the number of passengers per flight but I am unable to join the count and elem with : in a finallist. I have given the function below and comments are for description and output format.
def find_passengers_per_flight():
'''Write the logic to find and return a list having number of passengers traveling per flight based on the details in the ticket_list
In the list, details should be provided in the format:
[flight_no:no_of_passengers, flight_no:no_of_passengers, etc.].'''
listairline=[]
count=0
finallist=[]
for i in ticket_list:
# listairline=[]
list2=i.split(":")
listairline.append(list2[0])
for elem in listairline:
count=listairline.count(elem)
if elem not in finallist:
finallist.append(elem,":",count) #here it is the line which needs to be modified. Kindly help
print (finallist)
Lets do that with one-liner:
ticket_list=["AI567:MUM:LON:014","AI077:MUM:LON:056", "BA896:MUM:LON:067", "SI267:MUM:SIN:145","AI077:MUM:CAN:060","SI267:BLR:MUM:148","AI567:CHE:SIN:015","AI077:MUM:SIN:050","AI077:MUM:LON:051","SI267:MUM:SIN:146"]
[i+':'+str([i.split(':')[0] for i in ticket_list].count(i.split(':')[0])) for i in sorted(set([i.split(':')[0] for i in ticket_list])) ]
Or in a simpler way:
res = [i.split(':')[0] for i in ticket_list]
res = [i+':'+str(res.count(i.split(':')[0])) for i in sorted(set(res)) ]
Or a more faster way:
from collections import Counter
res = Counter([i.split(':')[0] for i in ticket_list])
[i+':'+str(res[i]) for i in res]
All the above gives:
['AI567:2', 'AI077:4', 'BA896:1', 'SI267:3']
Use collections.defaultdict to count the tickets per flight:
from collections import defaultdict
ticket_list=["AI567:MUM:LON:014","AI077:MUM:LON:056", "BA896:MUM:LON:067", "SI267:MUM:SIN:145","AI077:MUM:CAN:060","SI267:BLR:MUM:148","AI567:CHE:SIN:015","AI077:MUM:SIN:050","AI077:MUM:LON:051","SI267:MUM:SIN:146"]
flight_ticket_counts = defaultdict(int)
for ticket in ticket_list:
flight_no, *_ = ticket.split(":")
flight_ticket_counts[flight_no] += 1
print([f"{k}:{v}" for k, v in flight_ticket_counts.items()])
# ['AI567:2', 'AI077:4', 'BA896:1', 'SI267:3']
Or as a one-liner using collections.Counter:
from collections import Counter
flight_ticket_counts = Counter(ticket.split(":")[0] for ticket in ticket_list)
print([f"{k}:{v}" for k, v in flight_ticket_counts.items()])
# ['AI567:2', 'AI077:4', 'BA896:1', 'SI267:3']
If you want the number of passengers, then you can modify the above approach to do so:
flight_passenger_counts = defaultdict(int)
for flight in ticket_list:
flight_no, _, _, no_passengers = flight.split(":")
flight_passenger_counts[flight_no] += int(no_passengers)
print([f"{k}:{v}" for k, v in flight_passenger_counts.items()])
# ['AI567:29', 'AI077:217', 'BA896:67', 'SI267:439']
To me it looks like the question is asking for you to convert your answers to a string to get the same format your input was in.
You only need to change the line to this:
finallist.append(elem + ":" + str(count))
I have the following random selection script:
import random
length_of_list = 200
my_list = list(range(length_of_list))
num_selections = 10
numbers = random.sample(my_list, num_selections)
It looks at a list of predetermined size and randomly selects 10 numbers. Is there a way to run this section 500 times and then get the top 10 numbers which were selected the most? I was thinking that I could feed the numbers into a dictionary and then get the top 10 numbers from there. So far, I've done the following:
for run in range(0, 500):
numbers = random.sample(my_list, num_selections)
for number in numbers:
current_number = my_dict.get(number)
key_number = number
my_dict.update(number = number+1)
print(my_dict)
Here I want the code to take the current number assigned to that key and then add 1, but I cannot manage to make it work. It seems like the key for the dictionary update has to be that specific key, cannot insert a variable.. Also, I think having this nested loop might not be so efficient as I have to run this 500 times 1500 times 23... so I am concerned about performance. If anyone has an idea of what I should try, it would be great! Thanks
SOLUTION:
import random
from collections import defaultdict
from collections import OrderedDict
length_of_list = 50
my_list = list(range(length_of_list))
num_selections = 10
my_dict = dict.fromkeys(my_list)
di = defaultdict(int)
for run in range(0, 500):
numbers = random.sample(my_list, num_selections)
for number in numbers:
di[number] += 1
def get_top_numbers(data, n, order=False):
"""Gets the top n numbers from the dictionary"""
top = sorted(data.items(), key=lambda x: x[1], reverse=True)[:n]
if order:
return OrderedDict(top)
return dict(top)
print(get_top_numbers(di, n=10))
my_dict.update(number = number+1) in this line you are assigning a new value to a variable inside the parentheses of a function call. Unless you're giving the function a kwarg called number with value number+1 this in the following error:
TypeError: 'number' is an invalid keyword argument for this function
Also dict.update doesn't accept an integer but another dictionary. You should read the documentation about this function: https://www.tutorialspoint.com/python3/dictionary_update.htm
Here it say's dict.update(dict2) takes a dictionary which it will integrate into dict. See example below:
dict = {'Name': 'Zara', 'Age': 17}
dict2 = {'Gender': 'female' }
dict.update(dict2)
print ("updated dict : ", dict)
Gives as result:
updated dict : {'Gender': 'female', 'Age': 17, 'Name': 'Zara'}
So far for the errors in your code, I see a good answer is already given so I won't repeat him.
Checkout defaultdict of collections module,
So basically, you create a defaultdict with default value 0 and then iterate over your numbers list and update the value of the number to +=1
from collections import defaultdict
di = defaultdict(int)
for run in range(0, 500):
numbers = random.sample(my_list, num_selections)
for number in numbers:
di[number] += 1
print(di)
You can use for this task collections.Counter which provides addition method. So you will use two counters one which is sum of all and second which contains count of samples.
counter = collections.Counter()
for run in range(500):
samples = random.sample(my_list, num_samples)
sample_counter = collections.Counter(samples)
counter = counter + sample_counter
I have a dictionary which looks like this:
cq={'A1_B2M_01':2.04, 'A2_B2M_01':2.58, 'A3_B2M_01':2.80, 'B1_B2M_02':5.00,
'B2_B2M_02':4.30, 'B2_B2M_02':2.40 etc.}
I need to calculate mean of triplets, where the keys[2:] agree. So, I would ideally like to get another dictionary which will be:
new={'_B2M_01': 2.47, '_B2M_02': 3.9}
The data is/should be in triplets so in theory I could just get the means of the consecutive values, but first of all, I have it in a dictionary so the keys/values will likely get reordered, besides I'd rather stick to the names, as a quality check for the triplets assigned to names (I will later add a bit showing error message when there will be more than three per group).
I've tried creating a dictionary where the keys would be _B2M_01 and _B2M_02 and then loop through the original dictionary to first append all the values that are assigned to these groups of keys so I could later calculate an average, but I am getting errors even in the first step and anyway, I am not sure if this is the most effective way to do this...
cq={'A1_B2M_01':2.4, 'A2_B2M_01':5, 'A3_B2M_01':4, 'B1_B2M_02':3, 'B2_B2M_02':7, 'B3_B2M_02':6}
trips=set([x[2:] for x in cq.keys()])
new={}
for each in trips:
for k,v in cq.iteritems():
if k[2:]==each:
new[each].append(v)
Traceback (most recent call last):
File "<pyshell#28>", line 4, in <module>
new[each].append(v)
KeyError: '_B2M_01'
I would be very grateful for any suggestions. It seems like a fairly easy operation but I got stuck.
An alternative result which would be even better would be to get a dictionary which contains all the names used as in cq, but with values being the means of the group. So the end result would be:
final={'A1_B2M_01':2.47, 'A2_B2M_01':2.47, 'A3_B2M_01':2.47, 'B1_B2M_02':3.9,
'B2_B2M_02':3.9, 'B2_B2M_02':3.9}
Something like this should work. You can probably make it a little more elegant.
cq = {'A1_B2M_01':2.04, 'A2_B2M_01':2.58, 'A3_B2M_01':2.80, 'B1_B2M_02':5.00, 'B2_B2M_02':4.30, 'B2_B2M_02':2.40 }
sum = {}
count = {}
mean = {}
for k in cq:
if k[2:] in sum:
sum[k[2:]] += cq[k]
count[k[2:]] += 1
else:
sum[k[2:]] = cq[k]
count[k[2:]] = 1
for k in sum:
mean[k] = sum[k] / count[k]
cq={'A1_B2M_01':2.4, 'A2_B2M_01':5, 'A3_B2M_01':4, 'B1_B2M_02':3, 'B2_B2M_02':7, 'B3_B2M_02':6}
sums = dict()
for k, v in cq.iteritems():
_, p2 = k.split('_', 1)
if p2 not in sums:
sums[p2] = [0, 0]
sums[p2][0] += v
sums[p2][1] += 1
res = {}
for k, v in sums.iteritems():
res[k] = v[0]/float(v[1])
print res
also could be done with one iteration
Grouping:
SEPARATOR = '_'
cq={'A1_B2M_01':2.4, 'A2_B2M_01':5, 'A3_B2M_01':4, 'B1_B2M_02':3, 'B2_B2M_02':7, 'B3_B2M_02':6}
groups = {}
for key in cq:
group_key = SEPARATOR.join(key.split(SEPARATOR)[1:])
if group_key in groups:
groups[group_key].append(cq[key])
else:
groups[group_key] = [cq[key]]
Generate means:
def means(groups):
for group, group_vals in groups.iteritems():
yield (group, float(sum(group_vals)) / len(group_vals),)
print list(means(groups))
I currently have two set lists that combine "steps" and "time":
step = 1,1,1,1,2,2,2,2
time = 1,2,5,6,1,3,5,6
These values directly correlate, meaning a tuple looking like [(1,1),(1,2),(1,5),(1,6),(2,1),(2,3),(2,5),(2,11)]
Basically I'm trying to find the max value for step 1, and the min value of step one, as well as min/max for step 2
minstep1 = 1
maxstep1 = 6
minstep2 = 1
maxstep2 = 11
how can I accomplish this in python? do i need to create a multidimensional list? is there a function that can iterate keyvalue pairs of a tuple that I can just use the zip function?
Thanks!
You're looking for itertools.groupby. Here is some example code for your question:
step = 1,1,1,1,2,2,2,2
time = 1,2,5,6,1,3,5,6
from itertools import groupby, izip
from operator import itemgetter
for key, group in groupby(izip(step, time), itemgetter(0)):
group = [item[1] for item in group]
print 'Step:', key, 'Min:', min(group), 'Max:', max(group)
It groups time by step then finds the min and max for each group. Alternatively, you could do something like:
step.reverse()
for key, group in groupby(time, lambda _: step.pop()):
group = tuple(group)
print 'Step:', key, 'Min:', min(group), 'Max:', max(group)
To group by step without zipping with time.
How about this approach?
step = [1,1,1,1,2,2,2,2]
time = [1,2,5,6,1,3,5,6]
from collections import defaultdict
dd = defaultdict(set)
for s,t in zip(step, time):
dd[s].add(t)
for k,v in dd.iteritems():
print "step %d min: %d max: %d" %(k, min(v), max(v))