The price of using infinite_defaultdict

The price of using infinite_defaultdict - python

This is for me a heaven sent:
>>> from collections import defaultdict
>>> infinite_defaultdict = lambda: defaultdict(infinite_defaultdict)
>>> d = infinite_defaultdict()
>>> d['x']['y']['z'] = 10
by Raymond Hettinger on Twitter
Having that I don't see why we should do these anymore:
mydict = defaultdict(list)
mydict = defaultdict(lambda: defaultdict(float))
etc....
But I may be wrong.
Is there a case where you want to avoid infinite_defaultdict?
Update:
I tried to benchmark the time
from collections import defaultdict
def infdd():
infinite_defaultdict = lambda: defaultdict(infinite_defaultdict)
idd = infinite_defaultdict()
idd['x'] = [1,2,3]
def plaindd():
ddl = defaultdict(list)
ddl['x'] = [1,2,3]
if __name__ == '__main__':
import timeit
print "Infd = %.3f" % (timeit.timeit("infdd()",setup="from __main__ import infdd"))
print "Plaind = %.3f" % (timeit.timeit("plaindd()",setup="from __main__ import plaindd"))
Apparently infinite_dict is almost twice as slow than normal:
Infd = 0.632
Paind = 0.387

If you need the default value to be something other than a dict, then you should not use infinite_defaultdict. For example, if you want to count items or accumulate arrays of items, you'll want the default value to be a number or an array.
def group_by(key, items):
result = defaultdict(list)
for item in items:
result[key(item)].append(item)
return result
group_by(len, ['here', 'are', 'some', 'words'])
# -> { 3: ['are'] 4: ['here', 'some'], 5: ['words'] }

Related

Count occurrences of Enum in a string

I am attempting to count the number of occurrences of an ENUM in a string value e.g.
class numbers(Enum):
one = 1
two = 2
string = "121212123324"
string.count(str(numbers.one.value))
This just seems very unintuitive to convert the enum back to string - are there any quicker ways?

Your solution is good, you can see runtime of 5 approach in below:
from timeit import timeit
from collections import Counter
from enum import Enum
class numbers(Enum):
one = 1
two = 2
three = 3
four = 4
def approach1(products):
return Counter(products)[str(numbers.one.value)]
def approach2(products):
return products.count(str(numbers.one.value))
def approach3(products):
lst = list(map(int, products))
return lst.count(int(numbers.one.value))
def approach4(products):
cnt = Counter(products)
return (cnt[str(numbers.one.value)] , str(numbers.two.value) ,
cnt[str(numbers.three.value)] , str(numbers.four.value))
def approach5(products):
cnt_o = products.count(str(numbers.one.value))
cnt_t = products.count(str(numbers.two.value))
cnt_h = products.count(str(numbers.three.value))
cnt_f = products.count(str(numbers.four.value))
return (cnt_o , cnt_t , cnt_h , cnt_f)
funcs = approach1, approach2, approach3 , approach4, approach5
products = "121212123324"*10000000
for _ in range(3):
for func in funcs:
t = timeit(lambda: func(products), number=1)
print('%.3f s ' % t, func.__name__)
print()
Output:
6.279 s approach1
0.140 s approach2
17.172 s approach3
6.403 s approach4
0.491 s approach5
6.340 s approach1
0.139 s approach2
16.049 s approach3
6.559 s approach4
0.474 s approach5
6.245 s approach1
0.143 s approach2
15.876 s approach3
6.172 s approach4
0.475 s approach5

'itertools._grouper' object has no attribute 'user'

Why can't I convert the loop group in groupby as list? Currently, I am working on Django==2.2.1 and when I try this data = [...] below into python console, it is working fine.
from itertools import groupby
from operator import itemgetter
#login_required
def list(request, template_name='cart/list.html'):
# I also try with this dummy data
test_data = [{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':7.0}]
print(type(data)) # a list
sorted_totals = sorted(test_data, key=itemgetter('total_order'))
for agent_name, group in groupby(sorted_totals, key=lambda x: x['agent_name']):
print(agent_name, list(group)) # I stopped here when converting the `group` as list.
But, I am getting an error looking like this when I try it at views in Django.
I also tried it with defaultdict
from collections import defaultdict
#login_required
def list(request, template_name='cart/list.html'):
test_data = [{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':7.0}]
grouped = defaultdict(list)
for data_total in test_data:
grouped[data_total['agent_name']].append(data_total) # stoped here
grouped_out = []
for agent_name, group in grouped.items():
total_order = 0
total_pcs = 0
total_kg = 0
if isinstance(group, list):
for data_total in group:
total_order += data_total.get('total_order')
total_pcs += data_total.get('total_pcs')
total_kg += data_total.get('total_kg')
grouped_out.append({
'agent_name': agent_name,
'total_order': total_order,
'total_pcs': total_pcs,
'total_kg': total_kg
})
But the error I found stoped by wrapper view. If we following the previous issue, it referenced with this _wrapped_view

Finally, I fixed it manually by using a dict.
test_data = [{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':7.0}]
grouped = {}
for data_total in test_data:
agent_name = data_total.get('agent_name')
if agent_name in grouped:
new_data = grouped[agent_name] # dict
new_data['total_order'] += data_total.get('total_order')
new_data['total_pcs'] += data_total.get('total_pcs')
new_data['total_kg'] += data_total.get('total_kg')
grouped[agent_name].update(**new_data)
else:
grouped[agent_name] = data_total
And the result of grouped is look like this:
{'agent123': {'agent_name': 'agent123',
'total_kg': 18.0,
'total_order': 3,
'total_pcs': 3},
'agentbeli': {'agent_name': 'agentbeli',
'total_kg': 17.0,
'total_order': 3,
'total_pcs': 3}}

Construct python dict from DeepDiff result

I have a DeepDiff result which is obtained by comparing two JSON files. I have to construct a python dictionary from the deepdiff result as follows.
json1 = {"spark": {"ttl":3, "poll":34}}
json2 = {"spark": {"ttl":3, "poll":34, "toll":23}, "cion": 34}
deepdiffresult = {'dictionary_item_added': {"root['spark']['toll']", "root['cion']"}}
expecteddict = {"spark" : {"toll":23}, "cion":34}
How can this be achieved?

There is probably a better way to do this. But you can parse the returned strings and chain together a new dictionary with the result you want.
json1 = {"spark": {"ttl":3, "poll":34}}
json2 = {"spark": {"ttl":3, "poll":34, "toll":23}, "cion": 34}
deepdiffresult = {'dictionary_item_added': {"root['spark']['toll']", "root['cion']"}}
added = deepdiffresult['dictionary_item_added']
def convert(s, j):
s = s.replace('root','')
s = s.replace('[','')
s = s.replace("'",'')
keys = s.split(']')[:-1]
d = {}
for k in reversed(keys):
if not d:
d[k] = None
else:
d = {k: d}
v = None
v_ref = d
for i, k in enumerate(keys, 1):
if not v:
v = j.get(k)
else:
v = v.get(k)
if i<len(keys):
v_ref = v_ref.get(k)
v_ref[k] = v
return d
added_dict = {}
for added_str in added:
added_dict.update(convert(added_str, json2))
added_dict
#returns:
{'cion': 34, 'spark': {'toll': 23}}

Simple Answer,
in python have a in-build called Dictdiffer function. can you try this.
$ pip install dictdiffer
Examples:
from dictdiffer import diff
result = diff(json1, json2)
print result == {"spark" : {"toll":23}, "cion":34}
References:
DictDiffer

Python Case Insensitive Replace All of multiple strings

I want to replace all occurrences of a set of strings in a text line. I came up with this approach, but I am sure there is a better way of doing this:
myDict = {}
test = re.compile(re.escape('pig'), re.IGNORECASE)
myDict['car'] = test
test = re.compile(re.escape('horse'), re.IGNORECASE)
myDict['airplane'] = test
test = re.compile(re.escape('cow'), re.IGNORECASE)
myDict['bus'] = test
mystring = 'I have this Pig and that pig with a hOrse and coW'
for key in myDict:
regex_obj = myDict[key]
mystring = regex_obj.sub(key, mystring)
print mystring
I have this car and that car with a airplane and bus
Based on #Paul Rooney's answer below, ideally I would do this:
def init_regex():
rd = {'pig': 'car', 'horse':'airplane', 'cow':'bus'}
myDict = {}
for key,value in rd.iteritems():
pattern = re.compile(re.escape(key), re.IGNORECASE)
myDict[value] = pattern
return myDict
def strrep(mystring, patternDict):
for key in patternDict:
regex_obj = patternDict[key]
mystring = regex_obj.sub(key, mystring)
return mystring

Try
import itertools
import re
mystring = 'I have this Pig and that pig with a hOrse and coW'
rd = {'pig': 'car', 'horse':'airplane', 'cow':'bus'}
cachedict = {}
def strrep(orig, repdict):
for k,v in repdict.iteritems():
if k in cachedict:
pattern = cachedict[k]
else:
pattern = re.compile(k, re.IGNORECASE)
cachedict[k] = pattern
orig = pattern.sub(v, orig)
return orig
print strrep(mystring, rd)
This answer was initially written for python2, but for python 3 you would use repdict.items instead of repdict.iteritems.

Create a sublist by datedelta in Python

I have a list of data points that contains a measurement every 5 minutes for 24 hours. I need to create a new list with the average of that measurement for each hour in the list. What's the best way to accomplish that?
Date Amount
2015-03-14T00:00:00.000-04:00 12545.869
2015-03-14T00:05:00.000-04:00 12467.326
2015-03-14T00:10:00.000-04:00 12416.948
2015-03-14T00:15:00.000-04:00 12315.698
2015-03-14T00:20:00.000-04:00 12276.38
2015-03-14T00:25:00.000-04:00 12498.696
2015-03-14T00:30:00.000-04:00 12426.145
2015-03-14T00:35:00.000-04:00 12368.659
2015-03-14T00:40:00.000-04:00 12322.785
2015-03-14T00:45:00.000-04:00 12292.719
2015-03-14T00:50:00.000-04:00 12257.965
2015-03-14T00:55:00.000-04:00 12221.375
2015-03-14T01:00:00.000-04:00 12393.725
2015-03-14T01:05:00.000-04:00 12366.674
2015-03-14T01:10:00.000-04:00 12378.578
2015-03-14T01:15:00.000-04:00 12340.754
2015-03-14T01:20:00.000-04:00 12288.511
2015-03-14T01:25:00.000-04:00 12266.136
2015-03-14T01:30:00.000-04:00 12236.639
2015-03-14T01:35:00.000-04:00 12181.668
2015-03-14T01:40:00.000-04:00 12171.992
2015-03-14T01:45:00.000-04:00 12164.298
2015-03-14T01:50:00.000-04:00 12137.282
2015-03-14T01:55:00.000-04:00 12116.486
2015-03-14T02:00:02.000-04:00 12090.439
2015-03-14T02:05:00.000-04:00 12085.924
2015-03-14T02:10:00.000-04:00 12034.78
2015-03-14T02:15:00.000-04:00 12037.367
2015-03-14T02:20:00.000-04:00 12006.649
2015-03-14T02:25:00.000-04:00 11985.588
2015-03-14T02:30:00.000-04:00 11999.41
2015-03-14T02:35:00.000-04:00 11943.121
2015-03-14T02:40:00.000-04:00 11934.346
2015-03-14T02:45:00.000-04:00 11928.568
2015-03-14T02:50:00.000-04:00 11918.63
2015-03-14T02:55:00.000-04:00 11885.698
2015-03-14T03:00:00.000-04:00 11863.065
2015-03-14T03:05:00.000-04:00 11883.256
2015-03-14T03:10:00.000-04:00 11870.095
2015-03-14T03:15:00.000-04:00 11849.104
2015-03-14T03:20:00.000-04:00 11849.18
2015-03-14T03:25:00.000-04:00 11834.229
2015-03-14T03:30:00.000-04:00 11826.603
2015-03-14T03:35:00.000-04:00 11823.516
2015-03-14T03:40:00.000-04:00 11849.386
2015-03-14T03:45:00.000-04:00 11832.385
2015-03-14T03:50:00.000-04:00 11847.059
2015-03-14T03:55:00.000-04:00 11831.807
2015-03-14T04:00:00.000-04:00 11844.027
2015-03-14T04:05:00.000-04:00 11873.114
2015-03-14T04:10:00.000-04:00 11904.105
2015-03-14T04:15:00.000-04:00 11879.018
2015-03-14T04:20:00.000-04:00 11899.658
2015-03-14T04:25:00.000-04:00 11887.808
2015-03-14T04:30:00.000-04:00 11879.875
2015-03-14T04:35:00.000-04:00 11924.149
2015-03-14T04:40:00.000-04:00 11929.499
2015-03-14T04:45:00.000-04:00 11932.086
2015-03-14T04:50:00.000-04:00 11989.847
2015-03-14T04:55:00.000-04:00 12000.971

This is a beautiful use of itertools.groupby because you can actually take advantage of the generators it returns instead of instantly making them lists or something:
import itertools, pprint
d = {}
for (key,gen) in itertools.groupby(lst, key=lambda l: int(l[0][11:13])):
d[key] = sum(v for (d,v) in gen)
pprint.pprint(d)
And for average instead of sum:
import itertools, pprint
def avg(gf):
_sum = 0
for (i,e) in enumerate(gf): _sum += e
return float(_sum) / (i+1)
d = {}
for (key,gen) in itertools.groupby(lst, key=lambda l: int(l[0][11:13])):
#d[key] = sum(v for (d,v) in gen)
d[key] = avg(v for (d,v) in gen)
pprint.pprint(d)
Output:
{0: 148410.565,
1: 147042.743,
2: 143850.52000000002,
3: 142159.685,
4: 142944.15699999998}
Where the key of the dictionary ([0,1,2,3,4]) corresponds to the hour of the timestamp.
Input:
lst = [
['2015-03-14T00:00:00.000-04:00', 12545.869 ],
['2015-03-14T00:05:00.000-04:00', 12467.326],
['2015-03-14T00:10:00.000-04:00', 12416.948],
['2015-03-14T00:15:00.000-04:00', 12315.698],
['2015-03-14T00:20:00.000-04:00', 12276.38],
['2015-03-14T00:25:00.000-04:00', 12498.696],
['2015-03-14T00:30:00.000-04:00', 12426.145],
['2015-03-14T00:35:00.000-04:00', 12368.659],
['2015-03-14T00:40:00.000-04:00', 12322.785],
['2015-03-14T00:45:00.000-04:00', 12292.719],
['2015-03-14T00:50:00.000-04:00', 12257.965],
['2015-03-14T00:55:00.000-04:00', 12221.375],
['2015-03-14T01:00:00.000-04:00', 12393.725],
['2015-03-14T01:05:00.000-04:00', 12366.674],
['2015-03-14T01:10:00.000-04:00', 12378.578],
['2015-03-14T01:15:00.000-04:00', 12340.754],
['2015-03-14T01:20:00.000-04:00', 12288.511],
['2015-03-14T01:25:00.000-04:00', 12266.136],
['2015-03-14T01:30:00.000-04:00', 12236.639],
['2015-03-14T01:35:00.000-04:00', 12181.668],
['2015-03-14T01:40:00.000-04:00', 12171.992],
['2015-03-14T01:45:00.000-04:00', 12164.298],
['2015-03-14T01:50:00.000-04:00', 12137.282],
['2015-03-14T01:55:00.000-04:00', 12116.486],
['2015-03-14T02:00:02.000-04:00', 12090.439],
['2015-03-14T02:05:00.000-04:00', 12085.924],
['2015-03-14T02:10:00.000-04:00', 12034.78],
['2015-03-14T02:15:00.000-04:00', 12037.367],
['2015-03-14T02:20:00.000-04:00', 12006.649],
['2015-03-14T02:25:00.000-04:00', 11985.588],
['2015-03-14T02:30:00.000-04:00', 11999.41],
['2015-03-14T02:35:00.000-04:00', 11943.121],
['2015-03-14T02:40:00.000-04:00', 11934.346],
['2015-03-14T02:45:00.000-04:00', 11928.568],
['2015-03-14T02:50:00.000-04:00', 11918.63],
['2015-03-14T02:55:00.000-04:00', 11885.698],
['2015-03-14T03:00:00.000-04:00', 11863.065],
['2015-03-14T03:05:00.000-04:00', 11883.256],
['2015-03-14T03:10:00.000-04:00', 11870.095],
['2015-03-14T03:15:00.000-04:00', 11849.104],
['2015-03-14T03:20:00.000-04:00', 11849.18],
['2015-03-14T03:25:00.000-04:00', 11834.229],
['2015-03-14T03:30:00.000-04:00', 11826.603],
['2015-03-14T03:35:00.000-04:00', 11823.516],
['2015-03-14T03:40:00.000-04:00', 11849.386],
['2015-03-14T03:45:00.000-04:00', 11832.385],
['2015-03-14T03:50:00.000-04:00', 11847.059],
['2015-03-14T03:55:00.000-04:00', 11831.807],
['2015-03-14T04:00:00.000-04:00', 11844.027],
['2015-03-14T04:05:00.000-04:00', 11873.114],
['2015-03-14T04:10:00.000-04:00', 11904.105],
['2015-03-14T04:15:00.000-04:00', 11879.018],
['2015-03-14T04:20:00.000-04:00', 11899.658],
['2015-03-14T04:25:00.000-04:00', 11887.808],
['2015-03-14T04:30:00.000-04:00', 11879.875],
['2015-03-14T04:35:00.000-04:00', 11924.149],
['2015-03-14T04:40:00.000-04:00', 11929.499],
['2015-03-14T04:45:00.000-04:00', 11932.086],
['2015-03-14T04:50:00.000-04:00', 11989.847],
['2015-03-14T04:55:00.000-04:00', 12000.971],
]
Edit: per discussion in comments, what about:
import itertools, pprint
def avg(gf):
_sum = 0
for (i,e) in enumerate(gf): _sum += e
return float(_sum) / (i+1)
d = {}
for (key,gen) in itertools.groupby(lst, key=lambda l: int(l[0][11:13])):
vals = list(gen) # Unpack generator
key = vals[0][0][:13]
d[key] = avg(v for (d,v) in vals)
pprint.pprint(d)

You can do this pretty easily using a variety of tools, but I'll use a simple loop for simplicity sake:
>>> with open("listfile.txt", "r") as e:
>>> list_ = e.read().splitlines()
>>> list_ = list_[1:] # Grab all but the first line
>>>
>>> dateValue = dict()
>>> for row in list_:
>>> date, value - row.split()
>>> if ":00:" in date:
>>> # Start new value
>>> amount = int(value)
>>>
>>> elif ":55:" in date:
>>> # End new value
>>> date = date.split(':') # Grab only date and hour info
>>> dateValue[date] = amount / 12. # Returns a float, remove the period to return an integer
>>> del amount # Just in case the data isn't uniform, so it raises an error
>>>
>>> else:
>>> date += int(value)
If you want to export it to lists, just do:
>>> listDate = list()
>>> listAmount = list()
>>> for k in sorted(dateValue.keys() ):
>>> v = dateValue.get(k)
>>>
>>> listDate.append(k)
>>> listAmount.append(v)

quick and dirty way
reads= [
'2015-03-14T00:00:00.000-04:00 12545.869',
'2015-03-14T00:05:00.000-04:00 12467.326',
'2015-03-14T00:10:00.000-04:00 12416.948',
'2015-03-14T00:15:00.000-04:00 12315.698',
'2015-03-14T00:20:00.000-04:00 12276.38',
'2015-03-14T00:25:00.000-04:00 12498.696',
'2015-03-14T00:30:00.000-04:00 12426.145',
'2015-03-14T00:35:00.000-04:00 12368.659',
'2015-03-14T00:40:00.000-04:00 12322.785',
'2015-03-14T00:45:00.000-04:00 12292.719',
'2015-03-14T00:50:00.000-04:00 12257.965',
'2015-03-14T00:55:00.000-04:00 12221.375',
'2015-03-14T01:00:00.000-04:00 12393.725',
'2015-03-14T01:05:00.000-04:00 12366.674',
'2015-03-14T01:10:00.000-04:00 12378.578',
'2015-03-14T01:15:00.000-04:00 12340.754',
'2015-03-14T01:20:00.000-04:00 12288.511',
'2015-03-14T01:25:00.000-04:00 12266.136',
'2015-03-14T01:30:00.000-04:00 12236.639',
'2015-03-14T01:35:00.000-04:00 12181.668',
'2015-03-14T01:40:00.000-04:00 12171.992',
'2015-03-14T01:45:00.000-04:00 12164.298',
'2015-03-14T01:50:00.000-04:00 12137.282',
'2015-03-14T01:55:00.000-04:00 12116.486'
]
sums = {}
for read in reads:
hour = read.split(':')[0]
value = float(read.split().pop())
if hour in sums:
sums[hour] += value
else:
sums[hour] = value
avg = {}
for s in sums:
avg[s] = sums[s]/12
print avg

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

The price of using infinite_defaultdict - python

Related

Count occurrences of Enum in a string

'itertools._grouper' object has no attribute 'user'

Construct python dict from DeepDiff result

Python Case Insensitive Replace All of multiple strings

Create a sublist by datedelta in Python

Categories

Resources