Looking at the following sample code from the "Fluent Python" book that explains the "bidirectional tunnel" functionality of yield from, I have the following question.
from collections import namedtuple
Result = namedtuple('Result', 'count average')
# the subgenerator
def averager(): # <1>
total = 0.0
count = 0
average = None
while True:
term = yield # <2>
if term is None: # <3>
break
total += term
count += 1
average = total/count
return Result(count, average) # <4>
# the delegating generator
def grouper(results, key): # <5>
while True: # <6>
results[key] = yield from averager() # <7>
# the client code, a.k.a. the caller
def main(data): # <8>
results = {}
for key, values in data.items():
group = grouper(results, key) # <9>
next(group) # <10>
for value in values:
group.send(value) # <11>
group.send(None) # important! <12>
print("wrapped up grouper")
print(results)
data = {
'girls;kg':
[40.9, 38.5, 44.3, 42.2, 45.2, 41.7, 44.5, 38.0, 40.6, 44.5],
'girls;m':
[1.6, 1.51, 1.4, 1.3, 1.41, 1.39, 1.33, 1.46, 1.45, 1.43],
'boys;kg':
[39.0, 40.8, 43.2, 40.8, 43.1, 38.6, 41.4, 40.6, 36.3],
'boys;m':
[1.38, 1.5, 1.32, 1.25, 1.37, 1.48, 1.25, 1.49, 1.46],
}
Why do I get a StopIteration exception when I replace the delegating generator above with the following delegating generator?
def grouper(results, key):
results[key] = yield from averager()
From what I have learned so far, it seems in theory that removing the while True should be fine. group.send(None) would cause the averager() coroutine to break and return the Result(...), which would be passed up to the delegating generator. And then the delegating generator would finish by assigning that Result(...) to results[key].
But what's happening instead is the following.
Traceback (mostrecent call last):
File "coroaverager3.py", line 111, in <module>
main(data)
File "coroaverager3.py", line 83, in main
group.send(None) # important! <12>
StopIteration
Any insight?
You’re right that the delegating generator assigns to results[key], but it doesn’t finish by doing that. Its execution continues, since there’s nowhere for it to suspend. Of course, it immediately falls off the end, causing the send(None) to raise StopIteration (since there’s no value (from a yield) for it to return).
The while True: is a sort of silly way of adding another place to suspend; a single extra yield after the yield from would be a bit more obvious. (It could provide a value distinct from those supplied by the inner generator if the client didn’t always know when the execution was finished.)
Related
I am trying to implement multiprocessing to speed up traversing a relationship graph. I want to capture items that have a total less than 1000. If the parent is over 1000, process the children until there's no more to check.
I've mocked up an illustration that shows that ThreadPoolExecutor only processes the initial items provided to the class while the class.search_queue_list list is still populated. I also tried using a Queue instead of a list with similar results. Synchronous processing works as expected for list and Queue. Is there a way to make multiprocessing work here when the initial array of items can change?
from concurrent.futures import ThreadPoolExecutor
from queue import Queue
from time import sleep
dummy_data = {
'id1': {'total': 1001, 'children': ['id101','id102']}, # over 1000, children will be processed
'id2': {'total': 999, 'children': ['id201','id202']}, # under 1000, children won't be processed
'id101': {'total': 501, 'children': ['more_children']},
'id102': {'total': 500, 'children': ['more_children']},
'id201': {'total': 499,'children': ['more_children']},
'id202': {'total': 500, 'children': ['more_children']},
}
class SearchDummy(object):
def __init__(self, start_list):
# with list
self.search_queue_list = start_list
# with Queue
self.search_queue_queue = Queue()
for item in self.search_queue_list:
self.search_queue_queue.put(item)
self.good_ids = []
def get_total(self, search_id):
# artificial delay
sleep(0.5)
return dummy_data[search_id]['total']
def get_children(self, search_id):
# artificial delay
sleep(0.5)
return dummy_data[search_id]['children']
# START LIST
def current_check_list(self):
# get first element in search_queue_list
current_id = self.search_queue_list.pop(0)
# check if current_id['total'] is over 1000
if self.get_total(current_id) <= 1000:
self.good_ids.append(current_id)
else:
# prepend children to search_queue_list
self.search_queue_list.extend(self.get_children(current_id))
def search_list(self):
while self.search_queue_list:
self.current_check_list()
def multi_search_list(self):
with ThreadPoolExecutor() as e:
while self.search_queue_list:
e.submit(self.current_check_list)
# END LIST
# START QUEUE
def current_check_queue(self):
# get item from search_queue_queue
current_id = self.search_queue_queue.get()
# check if current_id['total'] is over 1000
if self.get_total(current_id) <= 1000:
self.good_ids.append(current_id)
else:
# put children in search_queue_queue
for child in self.get_children(current_id):
self.search_queue_queue.put(child)
def search_queue(self):
while not self.search_queue_queue.empty():
self.current_check_queue()
def multi_search_queue(self):
with ThreadPoolExecutor() as e:
while not self.search_queue_queue.empty():
e.submit(self.current_check_queue)
# END QUEUE
# synchronous list
s = SearchDummy(['id1','id2'])
s.search_list()
print('List output', s.good_ids) # returns ['id101', 'id102', 'id2']
print('Remaining list size', len(s.search_queue_list)) # returns 0
# synchronous queue
s = SearchDummy(['id1','id2'])
s.search_queue()
print('Queue output', s.good_ids) # returns ['id101', 'id102', 'id2']
print('Remaining queue size', s.search_queue_queue.qsize()) # returns 0
# multiprocessing list
s = SearchDummy(['id1','id2'])
s.multi_search_list()
print('Multi list output', s.good_ids) # returns ['id2']
print('Multi list remaining', s.search_queue_list) # returns ['id101', 'id102']
# multiprocessing queue
s = SearchDummy(['id1','id2'])
s.multi_search_queue()
print('Multi queue output', s.good_ids) # returns ['id2']
print('Multi queue remaining', list(s.search_queue_queue.queue)) # returns ['id101', 'id102']
I am trying to implement a recursive function, in which my base condition is working fine, another condition is also working properly. But when I jump to the recursive condition it gives me error that,"class name" object has no attribute "function name" .
My class:
class NGram(object):
def __init__(self):
self.n = None
self.ngramprobability = None
self.uniquegrams = [
["sample", "this"],
["is", "a"],
["this", "is"],
["sample"],
["this"],
["a"],
["is"],
["a", "sample"],
]
self.out = [
[
["sample", 0.16666666666666666],
["this", 0.3333333333333333],
["a", 0.16666666666666666],
["is", 0.3333333333333333],
],
[
["sample", "this", 1.0],
["is", "a", 0.5],
["this", "is", 1.0],
["a", "sample", 1.0],
],
]
def get_prob(self, words):
if len(words) == 1:
probability = [j[-1] for i in self.out for j in i if j[:-1] == words][0]
return probability # condition works fine
elif words in self.uniquegrams:
probability = [j[-1] for i in self.out for j in i if j[:-1] == words][0]
return probability # condition works fine
else:
return self.get_prob(self, words[1:]) * 0.4
My script that is raising errors:
# train bi_gram
bi_gram = NGram()
c = bi_gram.get_prob(["this", "sample"])
print(c)
I am not able to understand where I am making mistake. Please help me in solving this error.
Since you don't seem to understand what minimal reproducible example means, here are two examples (you'll notice that I removed everything that's irrelevant and only kept what's necessary to reproduce the issues) :
1/ forgetting to use self. to reference the method:
class NGram(object):
def get_prob(self, words):
if len(words) == 1:
return 1
else:
return get_prob(words[1:]) * 0.4
ng = NGram()
print(ng.get_prob(["foo"]))
print(ng.get_prob(["foo", "bar"]))
which constantly raises a NameError on the second call:
1
Traceback (most recent call last):
File "probs.py", line 10, in <module>
print(ng.get_prob(["foo", "bar"]))
File "probs.py", line 6, in get_prob
return get_prob(words[1:]) * 0.4
NameError: global name 'get_prob' is not defined
2/ using self. to reference the method but incorrectly passing self as argument:
class NGram(object):
def get_prob(self, words):
if len(words) == 1:
return 1
else:
return self.get_prob(self, words[1:]) * 0.4
ng = NGram()
print(ng.get_prob(["foo"]))
print(ng.get_prob(["foo", "bar"]))
which constantly raises a TypeError on the second call:
1
Traceback (most recent call last):
File "probs.py", line 10, in <module>
print(ng.get_prob(["foo", "bar"]))
File "probs.py", line 6, in get_prob
return self.get_prob(self, words[1:]) * 0.4
TypeError: get_prob() takes 2 positional arguments but 3 were given
And to address your issue - which you wouldn't have if you had done the tutorial -, the correct way is:
class NGram(object):
def get_prob(self, words):
if len(words) == 1:
return 1
else:
return self.get_prob(words[1:]) * 0.4
ng = NGram()
print(ng.get_prob(["foo"]))
print(ng.get_prob(["foo", "bar"]))
which works as expected:
1
0.4
Now may I kindly suggest you do the full official tutorial, and for future questions do some search before asking here ([which you are supposed to do(https://stackoverflow.com/help/how-to-ask)), and provide all relevant informations and (when relevant) a real minimal and reproducible example ?
Short answer: replace your code at line #22 from
return self.get_prob(self, words[1:]) * 0.4
to
return self.get_prob(words[1:]) * 0.4
You are not supposed to give self as an argument when calling any function of a class (it's only included in the definition).
Long answer: Check #Bruno's answer
I know there are many questions with the same title. My situation is a little different. I have a string like:
"Cat(Money(8)Points(80)Friends(Online(0)Offline(8)Total(8)))Mouse(Money(10)Points(10000)Friends(Online(10)Offline(80)Total(90)))"
(Notice that there are parenthesis nested inside another)
and I need to parse it into nested dictionaries like for example:
d["Cat"]["Money"] == 8
d["Cat"]["Points"] = 80
d["Mouse"]["Friends"]["Online"] == 10
and so on. I would like to do this without libraries and regex. If you choose to use these, please explain the code in great detail.
Thanks in advance!
Edit:
Although this code will not make any sense, this is what I have so far:
o_str = "Jake(Money(8)Points(80)Friends(Online(0)Offline(8)Total(8)))Mouse(Money(10)Points(10000)Friends(Online(10)Offline(80)Total(90)))"
spl = o_str.split("(")
def reverseIndex(str1, str2):
try:
return len(str1) - str1.rindex(str2)
except Exception:
return len(str1)
def app(arr,end):
new_arr = []
for i in range(0,len(arr)):
if i < len(arr)-1:
new_arr.append(arr[i]+end)
else:
new_arr.append(arr[i])
return new_arr
spl = app(spl,"(")
ends = []
end_words = []
op = 0
cl = 0
for i in range(0,len(spl)):
print i
cl += spl[i].count(")")
op += 1
if cl == op-1:
ends.append(i)
end_words.append(spl[i])
#break
print op
print cl
print
print end_words
The end words are the sections at the beginning of each statement. I plan on using recursive to do the rest.
Now that was interesting. You really nerd-sniped me on this one...
def parse(tokens):
""" take iterator of tokens, parse to dictionary or atom """
dictionary = {}
# iterate tokens...
for token in tokens:
if token == ")" or next(tokens) == ")":
# token is ')' -> end of dict; next is ')' -> 'leaf'
break
# add sub-parse to dictionary
dictionary[token] = parse(tokens)
# return dict, if non-empty, else token
return dictionary or int(token)
Setup and demo:
>>> s = "Cat(Money(8)Points(80)Friends(Online(0)Offline(8)Total(8)))Mouse(Money(10)Points(10000)Friends(Online(10)Offline(80)Total(90)))"
>>> tokens = iter(s.replace("(", " ( ").replace(")", " ) ").split())
>>> pprint(parse(tokens))
{'Cat': {'Friends': {'Offline': 8, 'Online': 0, 'Total': 8},
'Money': 8,
'Points': 80},
'Mouse': {'Friends': {'Offline': 80, 'Online': 10, 'Total': 90},
'Money': 10,
'Points': 10000}}
Alternatively, you could also use a series of string replacements to turn that string into an actual Python dictionary string and then evaluate that, e.g. like this:
as_dict = eval("{'" + s.replace(")", "'}, ")
.replace("(", "': {'")
.replace(", ", ", '")
.replace(", ''", "")[:-3] + "}")
This will wrap the 'leafs' in singleton sets of strings, e.g. {'8'} instead of 8, but this should be easy to fix in a post-processing step.
I am writing a Python class to model a process and I want to initialized the parameters from a file, say 'input.dat'. The format of the input file looks like this.
'input.dat' file:
Z0: 0 0
k: 0.1
g: 1
Delta: 20
t_end: 300
The code I wrote is the following. It works but appears redundant and inflexible. Is there a better way to do the job? Such as a loop to do readline() and then match the keyword?
def load(self,filename="input.dat"):
FILE = open(filename)
s = FILE.readline().split()
if len(s) is 3:
self.z0 = [float(s[1]),float(s[2])] # initial state
s = FILE.readline().split()
if len(s) is 2:
self.k = float(s[1]) # kappa
s = FILE.readline().split()
if len(s) is 2:
self.g = float(s[1])
s = FILE.readline().split()
if len(s) is 2:
self.D = float(s[1]) # Delta
s = FILE.readline().split()
if len(s) is 2:
self.T = float(s[1]) # end time
Assuming the params are coming from a safe place (made by you or users, not the internet), just make the parameters file a Python file, params.py:
Z0 = (0, 0)
k = 0.1
g = 1
Delta = 20
t_end = 300
Then in your code all you need is:
import params
fancy_calculation(10, k=params.k, delta=params.Delta)
The beauty of this is two-fold: 1) simplicity, and 2) you can use the power of Python in your parameter descriptions -- particularly useful here, for example:
k = 0.1
Delta = 20
g = 3 * k + Delta
Alternatively, you could use Python's built-in JSON or ConfigParser .INI parser modules.
If you are open to some other kind of file where you can keep your parameters, I would suggest you to use a YAML file.
The Python library is PyYAML. This is how you can easily use it with Python.
For a better introduction, look at this Wikipedia article: http://en.wikipedia.org/wiki/YAML.
The benefit is you can read the parameter values as lists or maps.
You would love it!
Try the following:
def load(self, filename="input.dat"):
d = {"Z0": "z0", "k": "k", "g": "g", "Delta": "D", "t_end": "T"}
FILE = open(filename)
for line in FILE:
name, value = line.split(":")
value = value.strip()
if " " in value:
value = map(float, value.split())
else:
value = float(value)
setattr(self, d[name], value)
Proof that it works:
>>> class A(object): pass
...
>>> a = A()
>>> load(a)
>>> a.__dict__
{'k': 0.10000000000000001, 'z0': [0.0, 0.0], 'D': 20.0, 'g': 1.0, 'T': 300.0}
As others have mentioned, in Python you can create object attributes dynamically "on the fly". That means you could do something like the following to create Params objects as they're read-in. I've tried to make the code as data-driven as possible, so relatively flexible.
# maps label to attribute name and types
label_attr_map = {
"Z0:": ["z0", float, float],
"k:": [ "k", float],
"g:": [ "g", float],
"Delta:": [ "D", float],
"t_end:": [ "T", float]
}
class Params(object):
def __init__(self, input_file_name):
with open(input_file_name, 'r') as input_file:
for line in input_file:
row = line.split()
label = row[0]
data = row[1:] # rest of row is data list
attr = label_attr_map[label][0]
datatypes = label_attr_map[label][1:]
values = [(datatypes[i](data[i])) for i in range(len(data))]
self.__dict__[attr] = values if len(values) > 1 else values[0]
params = Params('input.dat')
print 'params.z0:', params.z0
print 'params.k:', params.k
print 'params.g:', params.g
print 'params.D:', params.D
print 'params.T:', params.T
Output:
params.z0: [0.0, 0.0]
params.k: 0.1
params.g: 1.0
params.D: 20.0
params.T: 300.0
Perhaps this might give you what you need:
def load(self,filename='input.dat'):
with open(filename) as fh:
for line in fh:
s = line.split()
if len(s) == 2:
setattr(self,s[1],s[2])
elif len(s) == 3:
setattr(self,s[1],s[2:])
I also didn't include any error checking, but setattr is very handy.
Something like this:
def load(self,filename="input.dat"):
# maps names to number of fields they need
# only necessary for variables with more than 1 field
argmap = dict(Z0=2)
# maps config file names to their attribute names on the object
# if name is the same both places, no need
namemap = dict(Z0="z0", Delta="D", t_end="T")
with open(filename) as FILE:
for line in FILE:
s = line.split()
var = s[0].rstrip(":")
try:
val = [float(x) for x in s[1:]]
except ValueError:
continue
if len(val) == varmap.get(var, 1):
if len(val) == 1:
val = val[0]
setattr(self, namemap.get(var, var), val)
Python objects have a built-in __dict__ member. You can modify it, and then refer to properties as obj.key.
class Data(object):
def __init__(self, path='infile.dat'):
with open(path, 'r') as fo:
for line in fo.readlines():
if len(line) < 2: continue
parts = [s.strip(' :\n') for s in line.split(' ', 1)]
numbers = [float(s) for s in parts[1].split()]
# This is optional... do you want single values to be stored in lists?
if len(numbers) == 1: numbers = numbers[0]
self.__dict__[parts[0]] = numbers
# print parts -- debug
obj = Data('infile.dat')
print obj.g
print obj.Delta
print obj.Z0
At the end of this, we print out a few of the keys. Here's the output of those.
1.0
20.0
[0.0, 0.0]
For consistency, you can remove the line marked "optional" in my code, and have all objects in lists -- regardless of how many elements they have. That will make using them quite a bit easier, because you never have to worry about obj.g[0] returning an error.
Here's another one
def splitstrip(s):
return s.split(':')[1].strip()
with open('input.dat','r') as f:
a.z0 = [float(x) for x in splitstrip(f.readline()).split(' ')]
a.k, a.g, a.D, a.T = tuple([float(splitstrip(x)) for x in f.read().rstrip().split('\n')])
;)
I have strings like this:
"MSE 2110, 3030, 4102"
I would like to output:
[("MSE", 2110), ("MSE", 3030), ("MSE", 4102)]
This is my way of going about it, although I haven't quite gotten it yet:
def makeCourseList(str, location, tokens):
print "before: %s" % tokens
for index, course_number in enumerate(tokens[1:]):
tokens[index + 1] = (tokens[0][0], course_number)
print "after: %s" % tokens
course = Group(DEPT_CODE + COURSE_NUMBER) # .setResultsName("Course")
course_data = (course + ZeroOrMore(Suppress(',') + COURSE_NUMBER)).setParseAction(makeCourseList)
This outputs:
>>> course.parseString("CS 2110")
([(['CS', 2110], {})], {})
>>> course_data.parseString("CS 2110, 4301, 2123, 1110")
before: [['CS', 2110], 4301, 2123, 1110]
after: [['CS', 2110], ('CS', 4301), ('CS', 2123), ('CS', 1110)]
([(['CS', 2110], {}), ('CS', 4301), ('CS', 2123), ('CS', 1110)], {})
Is this the right way to do it, or am I totally off?
Also, the output of isn't quite correct - I want course_data to emit a list of course symbols that are in the same format as each other. Right now, the first course is different from the others. (It has a {}, whereas the others don't.)
This solution memorizes the department when parsed, and emits a (dept,coursenum) tuple when a number is found.
from pyparsing import Suppress,Word,ZeroOrMore,alphas,nums,delimitedList
data = '''\
MSE 2110, 3030, 4102
CSE 1000, 2000, 3000
'''
def memorize(t):
memorize.dept = t[0]
def token(t):
return (memorize.dept,int(t[0]))
course = Suppress(Word(alphas).setParseAction(memorize))
number = Word(nums).setParseAction(token)
line = course + delimitedList(number)
lines = ZeroOrMore(line)
print lines.parseString(data)
Output:
[('MSE', 2110), ('MSE', 3030), ('MSE', 4102), ('CSE', 1000), ('CSE', 2000), ('CSE', 3000)]
Is this the right way to do it, or am
I totally off?
It's one way to do it, though of course there are others (e.g. use as parse actions two bound method -- so the instance the method belongs to can keep state -- one for the dept code and another for the course number).
The return value of the parseString call is harder to bend to your will (though I'm sure sufficiently dark magic will do it and I look forward to Paul McGuire explaining how;-), so why not go the bound-method route as in...:
from pyparsing import *
DEPT_CODE = Regex(r'[A-Z]{2,}').setResultsName("DeptCode")
COURSE_NUMBER = Regex(r'[0-9]{4}').setResultsName("CourseNumber")
class MyParse(object):
def __init__(self):
self.result = None
def makeCourseList(self, str, location, tokens):
print "before: %s" % tokens
dept = tokens[0][0]
newtokens = [(dept, tokens[0][1])]
newtokens.extend((dept, tok) for tok in tokens[1:])
print "after: %s" % newtokens
self.result = newtokens
course = Group(DEPT_CODE + COURSE_NUMBER).setResultsName("Course")
inst = MyParse()
course_data = (course + ZeroOrMore(Suppress(',') + COURSE_NUMBER)
).setParseAction(inst.makeCourseList)
ignore = course_data.parseString("CS 2110, 4301, 2123, 1110")
print inst.result
this emits:
before: [['CS', '2110'], '4301', '2123', '1110']
after: [('CS', '2110'), ('CS', '4301'), ('CS', '2123'), ('CS', '1110')]
[('CS', '2110'), ('CS', '4301'), ('CS', '2123'), ('CS', '1110')]
which seems to be what you require, if I read your specs correctly.
data = '''\
MSE 2110, 3030, 4102
CSE 1000, 2000, 3000'''
def get_courses(data):
for row in data.splitlines():
department, *numbers = row.replace(",", "").split()
for number in numbers:
yield department, number
This would give a generator for the course codes. A list can be made with list() if need be, or you can iterate over it directly.
Sure, everybody loves PyParsing. For easy stuff like this split is sooo much easier to grok:
data = '''\
MSE 2110, 3030, 4102
CSE 1000, 2000, 3000'''
all = []
for row in data.split('\n'):
klass,num_l = row.split(' ',1)
all.extend((klass,int(num)) for num in num_l.split(','))