Construct python dict from DeepDiff result - python

I have a DeepDiff result which is obtained by comparing two JSON files. I have to construct a python dictionary from the deepdiff result as follows.
json1 = {"spark": {"ttl":3, "poll":34}}
json2 = {"spark": {"ttl":3, "poll":34, "toll":23}, "cion": 34}
deepdiffresult = {'dictionary_item_added': {"root['spark']['toll']", "root['cion']"}}
expecteddict = {"spark" : {"toll":23}, "cion":34}
How can this be achieved?

There is probably a better way to do this. But you can parse the returned strings and chain together a new dictionary with the result you want.
json1 = {"spark": {"ttl":3, "poll":34}}
json2 = {"spark": {"ttl":3, "poll":34, "toll":23}, "cion": 34}
deepdiffresult = {'dictionary_item_added': {"root['spark']['toll']", "root['cion']"}}
added = deepdiffresult['dictionary_item_added']
def convert(s, j):
s = s.replace('root','')
s = s.replace('[','')
s = s.replace("'",'')
keys = s.split(']')[:-1]
d = {}
for k in reversed(keys):
if not d:
d[k] = None
else:
d = {k: d}
v = None
v_ref = d
for i, k in enumerate(keys, 1):
if not v:
v = j.get(k)
else:
v = v.get(k)
if i<len(keys):
v_ref = v_ref.get(k)
v_ref[k] = v
return d
added_dict = {}
for added_str in added:
added_dict.update(convert(added_str, json2))
added_dict
#returns:
{'cion': 34, 'spark': {'toll': 23}}

Simple Answer,
in python have a in-build called Dictdiffer function. can you try this.
$ pip install dictdiffer
Examples:
from dictdiffer import diff
result = diff(json1, json2)
print result == {"spark" : {"toll":23}, "cion":34}
References:
DictDiffer

Related

What's the best method to create a dictionary from outputs of multiple for loops

This is my code:
def get_coin_tickers(url):
req = requests.get(url)
# print(req.text)
resp = json.loads(req.text)
# print(resp.text)
return resp
pair_a_list = ["BTC_USDT", "EOS_USDT", "ETH_USDT"]
pair_b_list = ["SOL_USDT", "MATIC_USDT", "SUSHI_USDT"]
for pair_a in pair_a_list:
orderbook_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_a}&limit=5'
pair_a_prices_json = get_coin_tickers(orderbook_url)
pair_a_ask = pair_a_prices_json['data']['asks'][0][0]
pair_a_bid = pair_a_prices_json['data']['bids'][0][0]
for pair_b in pair_b_list:
orderbook_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_b}&limit=5'
pair_a_prices_json = get_coin_tickers(orderbook_url)
pair_b_ask = pair_a_prices_json['data']['asks'][0][0]
pair_b_bid = pair_a_prices_json['data']['bids'][0][0]
keys = ['pair_a', 'pair_a_ask', 'pair_a_bid', 'pair_b', 'pair_b_ask', 'pair_b_bid']
values = [pair_a, pair_a_ask, pair_a_bid, pair_b, pair_b_ask, pair_b_bid]
mydict = {k: v for (k, v) in zip(keys, values)}
print(mydict)
I'm able to create a Dictionary but with only one Symbol-Pairs from each list. Which seems to be outputs from only the last symbol pairs of both the lists:
{'pair_a': 'ETH_USDT', 'pair_a_ask': '1254.18', 'pair_a_bid': '1253.51', 'pair_b': 'SUSHI_USDT', 'pair_b_ask': '0.9815', 'pair_b_bid': '0.9795'}
I'm expecting to see a combined dictionary with values of both lists as keys (with their API-Values) in the final list (after) iterating through both lists using for Loops
Found a solution to my question based on suggestion from #JonSG, plus adapted to the suggestion. I've included the code below for others to use as and if the need arises.
for pair_a_list, pair_b_list, pair_c_list in zip(pair_a_list, pair_b_list, pair_c_list):
orderbook_a_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_a_list}&limit=1'
pair_a_prices_json = get_coin_tickers(orderbook_a_url)
pair_a_ask = pair_a_prices_json['data']['asks'][0][0]
pair_a_bid = pair_a_prices_json['data']['bids'][0][0]
my_dict_a = {
'pair_a_ask': pair_a_ask,
'pair_a_bid': pair_a_bid
}
orderbook_b_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_b_list}&limit=1'
pair_b_prices_json = get_coin_tickers(orderbook_b_url)
pair_b_ask = pair_b_prices_json['data']['asks'][0][0]
pair_b_bid = pair_b_prices_json['data']['bids'][0][0]
my_dict_b = {
'pair_b_ask': pair_b_ask,
'pair_b_bid': pair_b_bid
}
orderbook_c_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_c_list}&limit=1'
pair_c_prices_json = get_coin_tickers(orderbook_c_url)
pair_c_ask = pair_c_prices_json['data']['asks'][0][0]
pair_c_bid = pair_c_prices_json['data']['bids'][0][0]
my_dict_c = {
'pair_c_ask': pair_c_ask,
'pair_c_bid': pair_c_bid
}
# (Use either option below.)
# my_dict = {**my_dict_a, **my_dict_b, **my_dict_c}
# my_dict = my_dict_a | my_dict_b | my_dict_c

Easy way to map values from list of list to dictionary

Inputs
I have two lists of lists.
rule_seq =
[['#1', '#2', '#3'],
['#1', '#2', '#3']]
KG_seq =
[['nationality', 'placeOfBirth', 'locatedIn'],
['nationality', 'hasFather', 'nationality']]
I have to map the values in the same index to the dictionary with the value of rule_seq as the key in the list of above.
My desired output is
Output
unify_dict =
{'#1': ['nationality'],
'#2': ['placeOfBirth', 'hasFather'],
'#3': ['locatedIn', 'nationality']}
I made a dictionary as follows by flattening and zipping both lists of lists to check whether keys and values are in the dictionary.
My code is as follows.
def create_unify_dict(rule_seq, KG_seq):
unify_dict = collections.defaultdict(list)
flat_aug_rule_list = list(itertools.chain.from_iterable(rule_seq))
flat_aug_KG_list = list(itertools.chain.from_iterable(KG_seq))
[unify_dict[key].append(val) for key, val in zip(flat_aug_rule_list, flat_aug_KG_list)
if key not in unify_dict.keys() or val not in unify_dict[key]]
return unify_dict
unify_dict = create_unify_dict(rule_seq, KG_seq)
Is there a simpler way to get the result I want?
You can just call append using the same defualtdict with second level of nesting.
from collections import defaultdict
result = defaultdict(list)
for keyList,valueList in zip(rule_seq, KG_seq):
for key,item in zip(keyList, valueList):
if item not in result[key]: result[key].append(item)
OUTPUT:
defaultdict(<class 'list'>,
{'#1': ['nationality'],
'#2': ['placeOfBirth', 'hasFather'],
'#3': ['locatedIn', 'nationality']})
You can use collections:
import collections
# Create a defaultdict with list as value type
result = collections.defaultdict(list)
for s0, s1 in zip(rule_seq, KG_seq):
for v0, v1 in zip(s0, s1):
if v1 not in result[v0]:
result[v0].append(v1)
print({k: v for k, v in result.items()})
# {
# '#1': ['nationality'],
# '#2': ['placeOfBirth', 'hasFather'],
# '#3': ['locatedIn', 'nationality'],
# }
This would be my homemade approach using no modules. Vanilla Python.
combine = [list(set(l)) for l in [[lst[i] for lst in KG_seq] for i in range(len(KG_seq[0]))]]
dct = {place:st for place,st in zip(rule_seq[0],combine)}
output
{'#1': ['nationality'], '#2': ['hasFather', 'placeOfBirth'], '#3': ['nationality', 'locatedIn']}
oversimplified version
combine = []
for i in range(len(KG_seq[0])):
group = []
for lst in KG_seq:
group.append(lst[i])
combine.append(group)
newComb = []
for simp in combine:
newComb.append(list(set(simp)))
dct = {}
for place,st in zip(rule_seq[0],combine):
dct[place] = st
print(dct)
undersimplified
dct = {place:st for place,st in zip(rule_seq[0],[list(set(l)) for l in [[lst[i] for lst in KG_seq] for i in range(len(KG_seq[0]))]])}
Based on the following assumptions there could be several forms to what your method look like
rule_seq and kg_seq are equal in length
rule_seq and kg_seq items are also equal in length
One liner
def one_liner(rule_seq, kg_seq):
ret = {}
[ret.update({idx: ret.get(idx, set()) | {val}}) for arr_idx, arr_val in zip(rule_seq, kg_seq) for idx, val in zip(arr_idx, arr_val)]
return ret
Single loop + one liner
def one_loop(rule_seq, kg_seq):
ret = {}
for arr_idx, arr_val in zip(rule_seq, kg_seq):
[ret.update({idx: ret.get(idx, set()) | {val}}) for idx, val in zip(arr_idx, arr_val)]
return ret
Nested loops
def nested_loop(rule_seq, kg_seq):
ret = {}
for arr_idx, arr_val in zip(rule_seq, kg_seq):
for idx, val in zip(arr_idx, arr_val):
ret[idx] = ret.get(idx, set()) | {val}
return ret
Testing these out
one_liner(rule_seq, KG_seq)
{'#1': {'nationality'},
'#2': {'hasFather', 'placeOfBirth'},
'#3': {'locatedIn', 'nationality'}}
one_loop(rule_seq, KG_seq)
{'#1': {'nationality'},
'#2': {'hasFather', 'placeOfBirth'},
'#3': {'locatedIn', 'nationality'}}
nested_loop(rule_seq, KG_seq)
{'#1': {'nationality'},
'#2': {'hasFather', 'placeOfBirth'},
'#3': {'locatedIn', 'nationality'}}
d = {}
for i in range(len(rule_seq)):
for j in range(len(rule_seq[i])):
rule, kg = rule_seq[i][j], KG_seq[i][j]
if (rule not in d.keys()):
d[rule] = [kg]
elif kg not in d[rule]:
d[rule].append(kg)
result:
{'#1': ['nationality'], '#2': ['placeOfBirth', 'hasFather'], '#3': ['locatedIn', 'nationality']}

Performance problem when using pandas apply on big dataframes

Im having some performance issues with the code below, mostly because of the apply function that im using on a huge dataframe. I want to update the semi_dict dictionary with some other data that im calculating with the some functions. Is it any way to improve this?
def my_function_1(semi_dict, row):
#do some calculation/other stuff based on the row data and append it to the dictionary
random_dict = dict(data=some_data, more_data=more_data)
semi_dict["data"].append(random_dict)
def my_function_2(semi_dict, row):
#do some calculation/other stuff based on the row data and append it to the dictionary
random_dict = dict(data=some_data, more_data=more_data)
semi_dict["data2"].append(random_dict)
dictionary_list = []
for v in values:
df_1_rows = df_1_rows[(df_1_rows.values == v)]
df_2_rows = df_2_rows[(df_2_rows.values == v)]
semi_dict = dict(value=v, data=[], data2=[])
function = partial(my_function_1, semi_dict)
function_2 = partial(my_function_2, semi_dict)
df_1_rows.apply(lambda row : function(row), axis=1)
df_2_rows.apply(lambda row : function_2(row), axis=1)
dictionary_list.append(semi_dict)
This answer uses dictionary merge from How to merge dictionaries of dictionaries?, but depending on your use case, you might not need it in the end:
import pandas as pd
import random
len_df = 10
row_values = list("ABCD")
extra_col_values = list("12345")
df_1 = pd.DataFrame([[random.choice(row_values), random.choice(extra_col_values)] for _ in range(len_df)], columns=['col1', 'extra1'])
df_2 = pd.DataFrame([[random.choice(row_values), random.choice(extra_col_values)] for _ in range(len_df)], columns=['col2', 'extra2'])
def make_dict(df):
# some calculations on the df
return {
'data': df.head(1).values.tolist(),
}
def make_dict_2(df):
# some calculations on the df
return {
'data_2': df.head(1).values.tolist(),
}
def merge(a, b, path=None):
"merges b into a, taken from https://stackoverflow.com/questions/7204805/how-to-merge-dictionaries-of-dictionaries "
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
dict1 = df_1.groupby('col1').apply(make_dict).to_dict()
dict2 = df_2.groupby('col2').apply(make_dict_2).to_dict()
result = merge(dict1, dict2)
result

python key dict error multilevel dict

I'm not entirely sure why im getting a dictionary key error. I'm trying to create a multi level dict with = sign and getting a key error on metrics, but not on the first two.
doc['timestamp']
and
doc['instance_id']
both work fine, but when it gets to metrics it gives me a metrics key error. I'm not entirely sure why.
doc = {}
doc['timestamp'] = datetime.now()
#doc['instance_id'] = get_cloud_app_name()
doc['instance_id'] = "MyMac"
cpu_dict_returned = get_cpu_info()
doc['metrics']['cpu_usage']['user_cpu'] = cpu_dict_returned['user_cpu']
doc['metrics']["cpu_usage"]['system_cpu'] = cpu_dict_returned['system_cpu']
doc['metrics']["cpu_usage"]['idle_cpu'] = cpu_dict_returned['idle_cpu']
doc['metrics']["cpu_usage"]['cpu_count'] = cpu_dict_returned['cpu_count']
You must create the sub-dictionnaries before using them:
doc = {}
doc['timestamp'] = datetime.now()
doc['instance_id'] = "MyMac"
cpu_dict_returned = get_cpu_info()
doc['metrics'] = {}
doc['metrics']['cpu_usage'] = {}
doc['metrics']['cpu_usage']['user_cpu'] = cpu_dict_returned['user_cpu']
doc['metrics']["cpu_usage"]['system_cpu'] = cpu_dict_returned['system_cpu']
doc['metrics']["cpu_usage"]['idle_cpu'] = cpu_dict_returned['idle_cpu']
doc['metrics']["cpu_usage"]['cpu_count'] = cpu_dict_returned['cpu_count']
You can do this more succinctly using a dictionary comprehension:
doc = {}
doc['timestamp'] = datetime.now()
doc['instance_id'] = "MyMac"
cpu_dict_returned = get_cpu_info()
doc['metrics'] = {
'cpu_usage':
{k: cpu_dict_returned.get(k)
for k in ['user_cpu', 'system_cpu', 'idle_cpu', 'cpu_count']}
}
Note that the sub dictionary cpu_usage is first created, and then the nested dictionary is inserted.

Create a sublist by datedelta in Python

I have a list of data points that contains a measurement every 5 minutes for 24 hours. I need to create a new list with the average of that measurement for each hour in the list. What's the best way to accomplish that?
Date Amount
2015-03-14T00:00:00.000-04:00 12545.869
2015-03-14T00:05:00.000-04:00 12467.326
2015-03-14T00:10:00.000-04:00 12416.948
2015-03-14T00:15:00.000-04:00 12315.698
2015-03-14T00:20:00.000-04:00 12276.38
2015-03-14T00:25:00.000-04:00 12498.696
2015-03-14T00:30:00.000-04:00 12426.145
2015-03-14T00:35:00.000-04:00 12368.659
2015-03-14T00:40:00.000-04:00 12322.785
2015-03-14T00:45:00.000-04:00 12292.719
2015-03-14T00:50:00.000-04:00 12257.965
2015-03-14T00:55:00.000-04:00 12221.375
2015-03-14T01:00:00.000-04:00 12393.725
2015-03-14T01:05:00.000-04:00 12366.674
2015-03-14T01:10:00.000-04:00 12378.578
2015-03-14T01:15:00.000-04:00 12340.754
2015-03-14T01:20:00.000-04:00 12288.511
2015-03-14T01:25:00.000-04:00 12266.136
2015-03-14T01:30:00.000-04:00 12236.639
2015-03-14T01:35:00.000-04:00 12181.668
2015-03-14T01:40:00.000-04:00 12171.992
2015-03-14T01:45:00.000-04:00 12164.298
2015-03-14T01:50:00.000-04:00 12137.282
2015-03-14T01:55:00.000-04:00 12116.486
2015-03-14T02:00:02.000-04:00 12090.439
2015-03-14T02:05:00.000-04:00 12085.924
2015-03-14T02:10:00.000-04:00 12034.78
2015-03-14T02:15:00.000-04:00 12037.367
2015-03-14T02:20:00.000-04:00 12006.649
2015-03-14T02:25:00.000-04:00 11985.588
2015-03-14T02:30:00.000-04:00 11999.41
2015-03-14T02:35:00.000-04:00 11943.121
2015-03-14T02:40:00.000-04:00 11934.346
2015-03-14T02:45:00.000-04:00 11928.568
2015-03-14T02:50:00.000-04:00 11918.63
2015-03-14T02:55:00.000-04:00 11885.698
2015-03-14T03:00:00.000-04:00 11863.065
2015-03-14T03:05:00.000-04:00 11883.256
2015-03-14T03:10:00.000-04:00 11870.095
2015-03-14T03:15:00.000-04:00 11849.104
2015-03-14T03:20:00.000-04:00 11849.18
2015-03-14T03:25:00.000-04:00 11834.229
2015-03-14T03:30:00.000-04:00 11826.603
2015-03-14T03:35:00.000-04:00 11823.516
2015-03-14T03:40:00.000-04:00 11849.386
2015-03-14T03:45:00.000-04:00 11832.385
2015-03-14T03:50:00.000-04:00 11847.059
2015-03-14T03:55:00.000-04:00 11831.807
2015-03-14T04:00:00.000-04:00 11844.027
2015-03-14T04:05:00.000-04:00 11873.114
2015-03-14T04:10:00.000-04:00 11904.105
2015-03-14T04:15:00.000-04:00 11879.018
2015-03-14T04:20:00.000-04:00 11899.658
2015-03-14T04:25:00.000-04:00 11887.808
2015-03-14T04:30:00.000-04:00 11879.875
2015-03-14T04:35:00.000-04:00 11924.149
2015-03-14T04:40:00.000-04:00 11929.499
2015-03-14T04:45:00.000-04:00 11932.086
2015-03-14T04:50:00.000-04:00 11989.847
2015-03-14T04:55:00.000-04:00 12000.971
This is a beautiful use of itertools.groupby because you can actually take advantage of the generators it returns instead of instantly making them lists or something:
import itertools, pprint
d = {}
for (key,gen) in itertools.groupby(lst, key=lambda l: int(l[0][11:13])):
d[key] = sum(v for (d,v) in gen)
pprint.pprint(d)
And for average instead of sum:
import itertools, pprint
def avg(gf):
_sum = 0
for (i,e) in enumerate(gf): _sum += e
return float(_sum) / (i+1)
d = {}
for (key,gen) in itertools.groupby(lst, key=lambda l: int(l[0][11:13])):
#d[key] = sum(v for (d,v) in gen)
d[key] = avg(v for (d,v) in gen)
pprint.pprint(d)
Output:
{0: 148410.565,
1: 147042.743,
2: 143850.52000000002,
3: 142159.685,
4: 142944.15699999998}
Where the key of the dictionary ([0,1,2,3,4]) corresponds to the hour of the timestamp.
Input:
lst = [
['2015-03-14T00:00:00.000-04:00', 12545.869 ],
['2015-03-14T00:05:00.000-04:00', 12467.326],
['2015-03-14T00:10:00.000-04:00', 12416.948],
['2015-03-14T00:15:00.000-04:00', 12315.698],
['2015-03-14T00:20:00.000-04:00', 12276.38],
['2015-03-14T00:25:00.000-04:00', 12498.696],
['2015-03-14T00:30:00.000-04:00', 12426.145],
['2015-03-14T00:35:00.000-04:00', 12368.659],
['2015-03-14T00:40:00.000-04:00', 12322.785],
['2015-03-14T00:45:00.000-04:00', 12292.719],
['2015-03-14T00:50:00.000-04:00', 12257.965],
['2015-03-14T00:55:00.000-04:00', 12221.375],
['2015-03-14T01:00:00.000-04:00', 12393.725],
['2015-03-14T01:05:00.000-04:00', 12366.674],
['2015-03-14T01:10:00.000-04:00', 12378.578],
['2015-03-14T01:15:00.000-04:00', 12340.754],
['2015-03-14T01:20:00.000-04:00', 12288.511],
['2015-03-14T01:25:00.000-04:00', 12266.136],
['2015-03-14T01:30:00.000-04:00', 12236.639],
['2015-03-14T01:35:00.000-04:00', 12181.668],
['2015-03-14T01:40:00.000-04:00', 12171.992],
['2015-03-14T01:45:00.000-04:00', 12164.298],
['2015-03-14T01:50:00.000-04:00', 12137.282],
['2015-03-14T01:55:00.000-04:00', 12116.486],
['2015-03-14T02:00:02.000-04:00', 12090.439],
['2015-03-14T02:05:00.000-04:00', 12085.924],
['2015-03-14T02:10:00.000-04:00', 12034.78],
['2015-03-14T02:15:00.000-04:00', 12037.367],
['2015-03-14T02:20:00.000-04:00', 12006.649],
['2015-03-14T02:25:00.000-04:00', 11985.588],
['2015-03-14T02:30:00.000-04:00', 11999.41],
['2015-03-14T02:35:00.000-04:00', 11943.121],
['2015-03-14T02:40:00.000-04:00', 11934.346],
['2015-03-14T02:45:00.000-04:00', 11928.568],
['2015-03-14T02:50:00.000-04:00', 11918.63],
['2015-03-14T02:55:00.000-04:00', 11885.698],
['2015-03-14T03:00:00.000-04:00', 11863.065],
['2015-03-14T03:05:00.000-04:00', 11883.256],
['2015-03-14T03:10:00.000-04:00', 11870.095],
['2015-03-14T03:15:00.000-04:00', 11849.104],
['2015-03-14T03:20:00.000-04:00', 11849.18],
['2015-03-14T03:25:00.000-04:00', 11834.229],
['2015-03-14T03:30:00.000-04:00', 11826.603],
['2015-03-14T03:35:00.000-04:00', 11823.516],
['2015-03-14T03:40:00.000-04:00', 11849.386],
['2015-03-14T03:45:00.000-04:00', 11832.385],
['2015-03-14T03:50:00.000-04:00', 11847.059],
['2015-03-14T03:55:00.000-04:00', 11831.807],
['2015-03-14T04:00:00.000-04:00', 11844.027],
['2015-03-14T04:05:00.000-04:00', 11873.114],
['2015-03-14T04:10:00.000-04:00', 11904.105],
['2015-03-14T04:15:00.000-04:00', 11879.018],
['2015-03-14T04:20:00.000-04:00', 11899.658],
['2015-03-14T04:25:00.000-04:00', 11887.808],
['2015-03-14T04:30:00.000-04:00', 11879.875],
['2015-03-14T04:35:00.000-04:00', 11924.149],
['2015-03-14T04:40:00.000-04:00', 11929.499],
['2015-03-14T04:45:00.000-04:00', 11932.086],
['2015-03-14T04:50:00.000-04:00', 11989.847],
['2015-03-14T04:55:00.000-04:00', 12000.971],
]
Edit: per discussion in comments, what about:
import itertools, pprint
def avg(gf):
_sum = 0
for (i,e) in enumerate(gf): _sum += e
return float(_sum) / (i+1)
d = {}
for (key,gen) in itertools.groupby(lst, key=lambda l: int(l[0][11:13])):
vals = list(gen) # Unpack generator
key = vals[0][0][:13]
d[key] = avg(v for (d,v) in vals)
pprint.pprint(d)
You can do this pretty easily using a variety of tools, but I'll use a simple loop for simplicity sake:
>>> with open("listfile.txt", "r") as e:
>>> list_ = e.read().splitlines()
>>> list_ = list_[1:] # Grab all but the first line
>>>
>>> dateValue = dict()
>>> for row in list_:
>>> date, value - row.split()
>>> if ":00:" in date:
>>> # Start new value
>>> amount = int(value)
>>>
>>> elif ":55:" in date:
>>> # End new value
>>> date = date.split(':') # Grab only date and hour info
>>> dateValue[date] = amount / 12. # Returns a float, remove the period to return an integer
>>> del amount # Just in case the data isn't uniform, so it raises an error
>>>
>>> else:
>>> date += int(value)
If you want to export it to lists, just do:
>>> listDate = list()
>>> listAmount = list()
>>> for k in sorted(dateValue.keys() ):
>>> v = dateValue.get(k)
>>>
>>> listDate.append(k)
>>> listAmount.append(v)
quick and dirty way
reads= [
'2015-03-14T00:00:00.000-04:00 12545.869',
'2015-03-14T00:05:00.000-04:00 12467.326',
'2015-03-14T00:10:00.000-04:00 12416.948',
'2015-03-14T00:15:00.000-04:00 12315.698',
'2015-03-14T00:20:00.000-04:00 12276.38',
'2015-03-14T00:25:00.000-04:00 12498.696',
'2015-03-14T00:30:00.000-04:00 12426.145',
'2015-03-14T00:35:00.000-04:00 12368.659',
'2015-03-14T00:40:00.000-04:00 12322.785',
'2015-03-14T00:45:00.000-04:00 12292.719',
'2015-03-14T00:50:00.000-04:00 12257.965',
'2015-03-14T00:55:00.000-04:00 12221.375',
'2015-03-14T01:00:00.000-04:00 12393.725',
'2015-03-14T01:05:00.000-04:00 12366.674',
'2015-03-14T01:10:00.000-04:00 12378.578',
'2015-03-14T01:15:00.000-04:00 12340.754',
'2015-03-14T01:20:00.000-04:00 12288.511',
'2015-03-14T01:25:00.000-04:00 12266.136',
'2015-03-14T01:30:00.000-04:00 12236.639',
'2015-03-14T01:35:00.000-04:00 12181.668',
'2015-03-14T01:40:00.000-04:00 12171.992',
'2015-03-14T01:45:00.000-04:00 12164.298',
'2015-03-14T01:50:00.000-04:00 12137.282',
'2015-03-14T01:55:00.000-04:00 12116.486'
]
sums = {}
for read in reads:
hour = read.split(':')[0]
value = float(read.split().pop())
if hour in sums:
sums[hour] += value
else:
sums[hour] = value
avg = {}
for s in sums:
avg[s] = sums[s]/12
print avg

Categories

Resources