calling setattr before 'self' is returned - python

I suspect this is kind of a klugefest on my part, but I'm working with the Luigi and Sciluigi modules which set a number of critical parameters PRIOR to 'self' being returned by an init. ANd if I try to manhandle these parameters AFTER self is returned, the Luigi.Parameter object masks them in such a way that I cant do what I need to do.
The luigi and sciluigi classes (as one uses them) contain no __init__. And if I try to insert an __init__ or call super(ChildClass, self).__init__(*args, **kwargs) ...I get weird errors 'unexpected parameter' errors.
So a Sciluigi class looks like this...
class MyTask(sciluigi.task):
param1 = sciluigi.Parameter(default='Yes') #String only
param2 = sciluigi.Parameter(default='No') #String only
def out_target(self):
return sciluigi.TargetInfo(self, self.out)
def run(self):
with self.out_target().open('w') as foofile:
foofile.write('foo\n')
SO...I'm hoping I can dynamically set some parameters via setattr PRIOR to 'self' actually being returned. But setattr requires the object.
I was hoping I could do something like...
setattr(inspect.stack()[?][?], 'DynamicVar', sciluigi.Parameter(default='Yes') )
EDIT: #Charles Duffy
Well, I'm not sure what info would be most helpful.
First issue is; I can't add an init. The actual code is below, with an __init__method added. I've included the resulting error if I try to run it. Its the same error is if I tr to run the super call to __init__
class FileConverter(sciluigi.Task):
"""
"""
in_target = None # Mandatory
out = sciluigi.Parameter() # <file>
exepath = sciluigi.Parameter(default = "")
def __init__(self):
self.var = 'anything'
def out_target(self):
log.debug("In 'FileConverter.out_target'... ")
return sciluigi.TargetInfo(self, self.out)
def run(self):
result = None
command = ''.join([
self.exepath, _slash, "FileConverter ",
" -in ", self.in_target().path,
" -out ", self.out_target().path,
" -out_type ", self.file_type
])
log.info("RUNNING COMMAND: " + command)
result = self.ex(command)
log.info("result: " + result[1])
Error
2017-02-24 17:01:48 | WARNING | Will not run MyWorkflow(instance_name=sciluigi_workflow) or any dependencies due to error in deps() method:
Traceback (most recent call last):
File "/Library/Python/2.7/site-packages/luigi/worker.py", line 697, in _add
deps = task.deps()
File "/Library/Python/2.7/site-packages/luigi/task.py", line 572, in deps
return flatten(self._requires())
File "/Library/Python/2.7/site-packages/luigi/task.py", line 544, in _requires
return flatten(self.requires()) # base impl
File "/Library/Python/2.7/site-packages/sciluigi/workflow.py", line 105, in requires
workflow_output = self.workflow()
File "/Users/mikes/Documents/Eclipseworkspace/Bioproximity/OpenMS-Python-Luigi/site-packages/Bioproximity/sciluigi_tasks/PipelineTest1.py", line 33, in workflow
exepath = "/Applications/OpenMS-2.1.0/TOPP"
File "/Library/Python/2.7/site-packages/sciluigi/workflow.py", line 145, in new_task
newtask = sciluigi.new_task(instance_name, cls, self, **kwargs)
File "/Library/Python/2.7/site-packages/sciluigi/task.py", line 37, in new_task
newtask = cls.from_str_params(kwargs)
File "/Library/Python/2.7/site-packages/luigi/task.py", line 412, in from_str_params
return cls(**kwargs)
File "/Library/Python/2.7/site-packages/luigi/task_register.py", line 99, in __call__
h[k] = instantiate()
File "/Library/Python/2.7/site-packages/luigi/task_register.py", line 80, in instantiate
return super(Register, cls).__call__(*args, **kwargs)
TypeError: __init__() got an unexpected keyword argument 'instance_name'
The second issue is:
If I wait for self to return, I can no longer differentiate between (for example using the above code)...
in_target = None # Mandatory
out = sciluigi.Parameter() # <file>
If I do a type(out), type reports that the parameter is just a string (not a sciluigi.Parameter object) so if I try to use a `ìsinstance(out, sciluigi.Parameter)...it returns False.
The bottom line is:
I need to be able to set the sciluigi.Parameter objects dynamically (programatically) and subsequently be able to differentiate between a sciluigi.Parameter() object variable (like out) and a 'real' str() object (like in_target)
I hope this makes sense.

Accept and silently discard arguments in your constructor, like so:
class FileConverter(sciluigi.Task):
def __init__(self, *_args, **_kwargs):
self.var = 'anything'

Just for future reference, the answer to the isolated question, "How to...
setattr(<thisClassObject>, 'DynamicVar', sciluigi.Parameter(default='Yes') )
Is to use the locals() built in function. I.e.
locals()['DynamicVar'] = sciluigi.Parameter(default='Yes') #String only
This is a snippet of how I solved my particular kluge ;)
deleteme.py
import sciluigi
class MyFooWriter(sciluigi.Task):
locals()['outfile'] = sciluigi.Parameter(default='./foo.txt') #String only
locals()['normalvar'] = 'Normalstring'
print "pre-self-returned outfile type =", type(outfile)
print "pre-self-returned normalvar type =", type(normalvar)
# locals()['param1'] =
def out_foo(self):
# raw_input("Enter...")
return sciluigi.TargetInfo(self, self.outfile)
def run(self):
print "self.outfile type =", type(self.outfile)
print "self.normalvar type =", type(self.normalvar)
# raw_input("Enter...")
with self.out_foo().open('w') as foofile:
foofile.write('foo\n')
class MyWorkflow(sciluigi.WorkflowTask):
def workflow(self):
print 'Starting workflow...'
foowriter = self.new_task('foowriter', MyFooWriter, outfile = 'testfile.txt')
return foowriter
if __name__ == '__main__':
sciluigi.run_local(main_task_cls=MyWorkflow)
OUTPUT
pre-self-returned outfile type = <class 'sciluigi.parameter.Parameter'>
pre-self-returned normalvar type = <type 'str'>
Starting workflow...
2017-02-27 12:08:37 | INFO | --------------------------------------------------------------------------------
2017-02-27 12:08:37 | INFO | SciLuigi: MyWorkflow Workflow Started (logging to log/workflow_myworkflow_started_20170227_110837_278707.log)
2017-02-27 12:08:37 | INFO | --------------------------------------------------------------------------------
2017-02-27 12:08:37 | INFO | Task foowriter started
self.outfile type = <type 'str'>
self.normalvar type = <type 'str'>
2017-02-27 12:08:37 | INFO | Task foowriter finished after 0.001s
Starting workflow...
2017-02-27 12:08:37 | INFO | --------------------------------------------------------------------------------
2017-02-27 12:08:37 | INFO | SciLuigi: MyWorkflow Workflow Finished (workflow log at log/workflow_myworkflow_started_20170227_110837_278707.log)
2017-02-27 12:08:37 | INFO | --------------------------------------------------------------------------------

Related

How to recursively chain a Celery task that returns a list into a group?

I started from this question: How to chain a Celery task that returns a list into a group?
But I want to expand twice. So in my use case I have:
task A: determines total number of items for a given date
task B: downloads 1000 metadata entries for that date
task C: download the content for one item
So each step I'm expanding the number of items of the next step. I can do it by looping through the results in my task and calling .delay() on the next task function. But I thought I'd try to not make my main tasks do that. Instead they'd return a list of tuples - each tuple would then be expanded into the arguments for a call to the next function.
The above question has an answer that appears to meet my need, but I can't work out the correct way of chaining it for a two level expansion.
Here is a very cut down example of my code:
from celery import group
from celery.task import subtask
from celery.utils.log import get_task_logger
from .celery import app
logger = get_task_logger(__name__)
#app.task
def task_range(upper=10):
# wrap in list to make JSON serializer work
return list(zip(range(upper), range(upper)))
#app.task
def add(x, y):
logger.info(f'x is {x} and y is {y}')
char = chr(ord('a') + x)
char2 = chr(ord('a') + x*2)
result = x + y
logger.info(f'result is {result}')
return list(zip(char * result, char2 * result))
#app.task
def combine_log(c1, c2):
logger.info(f'combine log is {c1}{c2}')
#app.task
def dmap(args_iter, celery_task):
"""
Takes an iterator of argument tuples and queues them up for celery to run with the function.
"""
logger.info(f'in dmap, len iter: {len(args_iter)}')
callback = subtask(celery_task)
run_in_parallel = group(callback.clone(args) for args in args_iter)
return run_in_parallel.delay()
I've then tried various ways to make my nested mapping work. First, a one level mapping works fine, so:
pp = (task_range.s() | dmap.s(add.s()))
pp(2)
Produces the kind of results I'd expect, so I'm not totally off.
But when I try to add another level:
ppp = (task_range.s() | dmap.s(add.s() | dmap.s(combine_log.s())))
Then in the worker I see the error:
[2019-11-23 22:34:12,024: ERROR/ForkPoolWorker-2] Task proj.tasks.dmap[e92877a9-85ce-4f16-88e3-d6889bc27867] raised unexpected: TypeError("add() missing 2 required positional arguments: 'x' and 'y'",)
Traceback (most recent call last):
File "/home/hdowner/.venv/play_celery/lib/python3.6/site-packages/celery/app/trace.py", line 385, in trace_task
R = retval = fun(*args, **kwargs)
File "/home/hdowner/.venv/play_celery/lib/python3.6/site-packages/celery/app/trace.py", line 648, in __protected_call__
return self.run(*args, **kwargs)
File "/home/hdowner/dev/playground/celery/proj/tasks.py", line 44, in dmap
return run_in_parallel.delay()
File "/home/hdowner/.venv/play_celery/lib/python3.6/site-packages/celery/canvas.py", line 186, in delay
return self.apply_async(partial_args, partial_kwargs)
File "/home/hdowner/.venv/play_celery/lib/python3.6/site-packages/celery/canvas.py", line 1008, in apply_async
args=args, kwargs=kwargs, **options))
File "/home/hdowner/.venv/play_celery/lib/python3.6/site-packages/celery/canvas.py", line 1092, in _apply_tasks
**options)
File "/home/hdowner/.venv/play_celery/lib/python3.6/site-packages/celery/canvas.py", line 578, in apply_async
dict(self.options, **options) if options else self.options))
File "/home/hdowner/.venv/play_celery/lib/python3.6/site-packages/celery/canvas.py", line 607, in run
first_task.apply_async(**options)
File "/home/hdowner/.venv/play_celery/lib/python3.6/site-packages/celery/canvas.py", line 229, in apply_async
return _apply(args, kwargs, **options)
File "/home/hdowner/.venv/play_celery/lib/python3.6/site-packages/celery/app/task.py", line 532, in apply_async
check_arguments(*(args or ()), **(kwargs or {}))
TypeError: add() missing 2 required positional arguments: 'x' and 'y'
And I'm not sure why changing the argument to dmap() from a plain task signature to a chain changes how the arguments get passed into add(). My impression was that it shouldn't, it just means the return value of add() would get passed on. But apparently that is not the case ...
Turns out the problem is that the clone() method on a chain instance does not pass the arguments through at some point - see https://stackoverflow.com/a/53442344/3189 for the full details. If I use the method in that answer, my dmap() code becomes:
#app.task
def dmap(args_iter, celery_task):
"""
Takes an iterator of argument tuples and queues them up for celery to run with the function.
"""
callback = subtask(celery_task)
run_in_parallel = group(clone_signature(callback, args) for args in args_iter)
return run_in_parallel.delay()
def clone_signature(sig, args=(), kwargs=(), **opts):
"""
Turns out that a chain clone() does not copy the arguments properly - this
clone does.
From: https://stackoverflow.com/a/53442344/3189
"""
if sig.subtask_type and sig.subtask_type != "chain":
raise NotImplementedError(
"Cloning only supported for Tasks and chains, not {}".format(sig.subtask_type)
)
clone = sig.clone()
if hasattr(clone, "tasks"):
task_to_apply_args_to = clone.tasks[0]
else:
task_to_apply_args_to = clone
args, kwargs, opts = task_to_apply_args_to._merge(args=args, kwargs=kwargs, options=opts)
task_to_apply_args_to.update(args=args, kwargs=kwargs, options=deepcopy(opts))
return clone
And then when I do:
ppp = (task_range.s() | dmap.s(add.s() | dmap.s(combine_log.s())))
everything works as expected.
Thanks for the great answer. I had to tweak the code to make sure it could handle tasks with single arguments. I am sure this is awful, but it works! Any improvements appreciated.
#celery_app.task(name='app.worker.dmap')
def dmap(args_iter, celery_task):
"""
Takes an iterator of argument tuples and queues them up for celery to run with the function.
"""
callback = subtask(celery_task)
print(f"ARGS: {args_iter}")
args_list = []
run_in_parallel = group(clone_signature(callback, args if type(args) is list else [args]) for args in args_iter)
print(f"Finished Loops: {run_in_parallel}")
return run_in_parallel.delay()
Specifically - I added:
if type(args) is list else [args]
to this line:
run_in_parallel = group(clone_signature(callback, args ***if type(args) is list else [args]***) for args in args_iter)

When patching 2 objects, second return first patched value

I am writing UT for one of my function where I have to patch 2 objects.
#patch('mypackage.models.db_models.MongoClient',
return_value={})
#patch('mypackage.models.db_models.GridFS')
def test_file_in_db(self, mock_mongoclient, mock_gridfs):
print "*"*80
print mock_gridfs
print mock_gridfs.return_value
print "*"*80
mock_gridfs.return_value.new_file.return_value = {}
This gives error:
----------------------------------------------------------------------
Traceback (most recent call last):
File "/venv/lib/python2.7/site-packages/mock/mock.py", line 1305, in patched
return func(*args, **keywargs)
File "/tests/models/test_db_models.py", line 29, in test_file_in_db
mock_gridfs.return_value.new_file.return_value = {}
AttributeError: 'dict' object has no attribute 'new_file'
-------------------- >> begin captured stdout << ---------------------
********************************************************************************
<MagicMock name='MongoClient' id='4385486992'>
{}
********************************************************************************
--------------------- >> end captured stdout << ----------------------
When I am accessing second argument, means mock_gridfs why it return Mock object for MongoClient?
You have them in the wrong order, put the params in the reverse order you define them.
#patch('mypackage.models.db_models.MongoClient',
return_value={})
#patch('mypackage.models.db_models.GridFS')
def test_file_in_db(self, mock_gridfs, mock_mongoclient):

Python - Selenium Web Driver error - self._driver.execute - AttributeError: 'unicode' object has no attribute 'id'

I found an answer here but my code already does what this suggested and still produces the same error so I'm hoping for another answer.
This is my code that calls ActionChains:
elif first_col_value == first_col_in_assign:
res2, assign_string = assign_cmd(spreadsheet_name, row)
print "Got to main 8 - res2/cmd_string: %s %s" % (res2, assign_string)
# assign_string2 = u"search_field = driver.find_element_by_name(“q”)"
if not res2:
exit(False)
else:
action = webdriver.ActionChains(driver).move_to_element(assign_string)
action.perform()
continue
This is the what the assign_string looks like built from the spreadsheet:
In assign_cmd - param1 = %s search_field
In assign_cmd - param2 = %s driver.find_element_by_name
In assign_cmd - param3 = %s “q”
In assign_cmd - param4 = %s #
Got to main 8 - res2/assign_string: True search_field = driver.find_element_by_name(“q”)
and this is the error:
Traceback (most recent call last):
File "/home/susan/PycharmProjects/justPython/test1.py", line 397, in <module>
action.perform()
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/common/action_chains.py", line 70, in perform
action()
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/common/action_chains.py", line 215, in <lambda>
self._driver.execute(Command.MOVE_TO, {'element': to_element.id}))
AttributeError: 'unicode' object has no attribute 'id'
Process finished with exit code 1
I tried putting the unicode string directly into my code that is the commented out line above but it produces the same error. I am stuck and really appreciate any help you can give me. Many thanks.
move_to_element() assumes you are passing in an element found before, not a string:
move_to_element(to_element)
Moving the mouse to the middle of an
element.
For example:
element = driver.find_element_by_id('myid')
action = webdriver.ActionChains(driver).move_to_element(element)
action.perform()
If you have control over the incoming spreadsheet configuration, I'd reorganize it a bit. Instead of having find_element_by_* strings, I'd have two things for each element: a type of locator and a locator itself, e.g.:
| type | value |
| xpath | //div[#id="test"] |
| name | q |
...
Then, in your tests, you can use find_element() method that receives exactly a type of locator and a value:
locator_type, locator_value = get_locator_from_spreadsheet(...)
element = driver.find_element(locator_type, locator_value)
action = webdriver.ActionChains(driver).move_to_element(element)
action.perform()

Python unittesting initiate values

Sorry if this question is stupid. I created an unittest class which needs to take given inputs and outputs from outside. Thus, I guess these values should be initiated. However, I met some errors in the following code:
CODE:
import unittest
from StringIO import StringIO
##########Inputs and outputs from outside#######
a=[1,2]
b=[2,3]
out=[3,4]
####################################
def func1(a,b):
return a+b
class MyTestCase(unittest.TestCase):
def __init__(self,a,b,out):
self.a=a
self.b=b
self.out=out
def testMsed(self):
for i in range(self.tot_iter):
print i
fun = func1(self.a[i],self.b[i])
value = self.out[i]
testFailureMessage = "Test of function name: %s iteration: %i expected: %i != calculated: %i" % ("func1",i,value,fun)
self.assertEqual(round(fun,3),round(value,3),testFailureMessage)
if __name__ == '__main__':
f = MyTestCase(a,b,out)
from pprint import pprint
stream = StringIO()
runner = unittest.TextTestRunner(stream=stream, verbosity=2)
result = runner.run(unittest.makeSuite(MyTestCase(a,b,out)))
print 'Tests run', result.testsRun
However, I got the following error
Traceback (most recent call last):
File "C:testing.py", line 33, in <module>
result = runner.run(unittest.makeSuite(MyTestCase(a,b,out)))
File "C:\Python27\lib\unittest\loader.py", line 310, in makeSuite
return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromTestCase(testCaseClass)
File "C:\Python27\lib\unittest\loader.py", line 50, in loadTestsFromTestCase
if issubclass(testCaseClass, suite.TestSuite):
TypeError: issubclass() arg 1 must be a class
Can anyone give me some suggestions? Thanks!
The root of the problem is this line,
result = runner.run(unittest.makeSuite(MyTestCase(a,b,out)))
unittest.makeSuite expects a class, not an instance of a class. So just MyTestCase, not MyTestCase(a, b, out). This means that you can't pass parameters to your test case in the manner you are attempting to. You should probably move the code from init to a setUp function. Either access a, b, and out as globals inside setUp or take a look at this link for information regarding passing parameters to a unit test.
By the way, here is the source file within python where the problem originated. Might be informative to read.

How to send a function to a remote Pyro object

I am trying to set up some code using Pyro to process python code functions on a remote host and get results back. After starting the name server, i would execute this code on the remote host (actually still on localhost):
import Pyro4
class Server(object):
def evaluate(self, func, args):
return func(*args)
def main():
server = Server()
Pyro4.Daemon.serveSimple(
{
server: "server"
},
ns=True)
if __name__ == '__main__':
main()
On the client side i have this code, which is an example of the behaviour i am trying to set up.
import Pyro4
remoteServer = Pyro4.Proxy('PYRONAME:server')
def square(x):
return x**2
print remoteServer.evaluate(square, 4)
However, this code results in the following exception:
/usr/lib/python2.7/site-packages/Pyro4/core.py:155: UserWarning: HMAC_KEY not set,
protocol data may not be secure
warnings.warn("HMAC_KEY not set, protocol data may not be secure")
Traceback (most recent call last):
File "/home/davide/Projects/rempy/example-api-pyro.py", line 7, in <module>
print remoteServer.evaluate(square, 4)
File "/usr/lib/python2.7/site-packages/Pyro4/core.py", line 149, in __call__
return self.__send(self.__name, args, kwargs)
File "/usr/lib/python2.7/site-packages/Pyro4/core.py", line 289, in _pyroInvoke
raise data
AttributeError: 'module' object has no attribute 'square'
It seems to me that the function object is pickled correctly and is sent to the Server instance on the remote host, but there is some problem in the namespace.
How can i solve this problem?
Thanks
I think i know your problem:
the module the function is defiined in is called
'__main__'
it exists in all running versions of python.
pickle does not transfer the source code but a reference
__main__.square
so you have two possibilities:
source square out and make the main module as short as possible such as:
# main.py
def square(x):
return x**2
import Pyro4
def main():
remoteServer = Pyro4.Proxy('PYRONAME:server')
print remoteServer.evaluate(square, 4)
and:
# __main__.py
import main
main.main()
Then the server can import exactly the same module from the file.
or create a module with my code:
class ThisShallNeverBeCalledError(Exception):
pass
class _R(object):
def __init__(self, f, *args):
self.ret = (f, args)
def __reduce__(self):
return self.ret
def __call__(self, *args):
raise ThisShallNeverBeCalledError()
#classmethod
def fromReduce(cls, value):
ret = cls(None)
ret.ret = value
return ret
def dump_and_load(obj):
'''pickle and unpickle the object once'''
s = pickle.dumps(obj)
return pickle.loads(s)
# this string creates an object of an anonymous type that can
# be called to create an R object or that can be reduced by pickle
# and creates another anonymous type when unpickled
# you may not inherit from this MetaR object because it is not a class
PICKLABLE_R_STRING= "type('MetaR', (object,), " \
" {'__call__' : lambda self, f, *args: "\
" type('PICKLABLE_R', "\
" (object,), "\
" {'__reduce__' : lambda self: (f, args), "\
" '__module__' : 'pickleHelp_', "\
" '__name__' : 'PICKLABLE_R', "\
" '__call__' : lambda self: None})(), "\
" '__reduce__' : lambda self: "\
" self(eval, meta_string, "\
" {'meta_string' : meta_string}).__reduce__(), "\
" '__module__' : 'pickleHelp_', "\
" '__name__' : 'R'})()".replace(' ', '')
PICKLABLE_R = _R(eval, PICKLABLE_R_STRING, \
{'meta_string' : PICKLABLE_R_STRING})
R = dump_and_load(PICKLABLE_R)
del PICKLABLE_R, PICKLABLE_R_STRING
PICKLABLE___builtins__ = R(vars, R(__import__, '__builtin__'))
PICKLABLE_FunctionType = R(type, R(eval, 'lambda:None'))
##R.__module__ = __name__
##R.__name__ = 'PICKLABLE_R'
def packCode(code, globals = {}, add_builtins = True, use_same_globals = False, \
check_syntax = True, return_value_variable_name = 'obj',
__name__ = __name__ + '.packCode()'):
'''return an object that executes code in globals when unpickled
use_same_globals
if use_same_globals is True all codes sent through
one pickle connection share the same globals
by default the dont
return_value_variable_name
if a variable with the name in return_value_variable_name exists
in globals after the code execution
it is returned as result of the pickling operation
if not None is returned
__name__
'''
if check_syntax:
compile(code, '', 'exec')
# copying locals is important
# locals is transferred through pickle for all code identical
# copying it prevents different code from beeing executed in same globals
if not use_same_globals:
globals = globals.copy()
if add_builtins:
globals['__builtins__'] = PICKLABLE___builtins__
globals.setdefault('obj', None)
# get the compilation code
# do not marshal or unmarshal code objects because the platforms may vary
code = R(compile, code, __name__, 'exec')
# the final object that can reduce, dump and load itself
obj = R(R(getattr, tuple, '__getitem__'), (
R(R(PICKLABLE_FunctionType, code, globals)),
R(R(getattr, type(globals), 'get'), globals, \
returnValueVariableName, None)
), -1)
return obj
and then send this to the other side:
packCode('''
def square(...):
...
''', return_value_variable_name = 'square')
and the function will come out on the other side, no module code is needed to transefer this python function to the other server side.
If something does not work out please tell me.

Categories

Resources