IO operation timeout - python

I had an issue with os.path.exists() when working with UNCs and network paths in general.
Some servers tend to die in weird fashion that instead of returning error they hang for 130 seconds and then return False (I guess samba has some weird fetish timeout that I was unable find and configure).
So my question is: How to timeout such (atomic) operations?
I just need it to finish in under 2 seconds. I've tried using threading and mutable object for value returning like this:
import time
import threading
import os.path
class ValueContainer(object):
''' Generic imutable type
'''
def __init__(self, value=None):
self.value = value
def timed_out_function(target, timeout, *args, **kwargs):
''' Times out function execution
'''
val = ValueContainer()
# Inner function that passes result to mutable type
def inner(retval, *args, **kwargs):
retval.value = target(*args, **kwargs)
# Create thread with this function
t = threading.Thread(target=inner, args=(val, ) + args, kwargs=kwargs)
t.daemon = True
t.start()
# Just wait for it
t.join(timeout)
if t.is_alive():
raise Exception('Timeout')
return val.value
print(timed_out_function(os.path.exists, 2, 'python_non_blocking_io.py'))
print(timed_out_function(os.path.exists, 2, 'Nope nope nope'))
timed_out_function(time.sleep, 2, 5)
# True
# False
# Traceback (most recent call last):
# File "D:\tmp\python_non_blocking_io.py", line 39, in <module>
# timed_out_function(time.sleep, 2, 5)
# File "D:\tmp\python_non_blocking_io.py", line 32, in timed_out_function
# raise Exception('Timeout')
# Exception: Timeout
But I'm not sure whether that won't create too many parallel IO requests (there's continuous stream of requests one after another each 2 seconds handing for 130), threads or some similar issue.
Do you have any experience with this kind of workarounds?

Related

How to know actual result of `proc_output.waitFor` if `expected_output` is wrong?

I am trying to follow a ROS2 testing tutorial which tests a topic listener to understand how ROS2 testing works. Here is a screenshot of the related code at 21:15
I have a node target_control_node which subscribes the topic turtle1/pose and then move the turtle to a new random pose.
import math
import random
import rclpy
from geometry_msgs.msg import Twist
from rclpy.node import Node
from turtlesim.msg import Pose
class TargetControlNode(Node):
def __init__(self):
super().__init__("target_control_node")
self.get_logger().info("target_control_node")
self._target_pose = None
self._cmd_vel_publisher = self.create_publisher(Twist, "turtle1/cmd_vel", 10)
self.create_subscription(Pose, "turtle1/pose", self.subscribe_target_pose, 10)
self.create_timer(1.0, self.control_loop)
def subscribe_target_pose(self, msg):
self._target_pose = msg
def control_loop(self):
if self._target_pose is None:
return
target_x = random.uniform(0.0, 10.0)
target_y = random.uniform(0.0, 10.0)
dist_x = target_x - self._target_pose.x
dist_y = target_y - self._target_pose.y
distance = math.sqrt(dist_x**2 + dist_y**2)
msg = Twist()
# position
msg.linear.x = 1.0 * distance
# orientation
goal_theta = math.atan2(dist_y, dist_x)
diff = goal_theta - self._target_pose.theta
if diff > math.pi:
diff -= 2 * math.pi
elif diff < -math.pi:
diff += 2 * math.pi
msg.angular.z = 2 * diff
self._cmd_vel_publisher.publish(msg)
def main(args=None):
rclpy.init(args=args)
node = TargetControlNode()
rclpy.spin(node)
node.destroy_node()
rclpy.shutdown()
if __name__ == "__main__":
main()
I am trying to write a simple test for the subscription part based on the tutorial above to understand how it works.
Here is my initial test code. Note inside I am using expected_output=str(msg), however, it is wrong, and I am not sure what to put there.
import pathlib
import random
import sys
import time
import unittest
import uuid
import launch
import launch_ros
import launch_testing
import pytest
import rclpy
import std_msgs.msg
from geometry_msgs.msg import Twist
from turtlesim.msg import Pose
#pytest.mark.rostest
def generate_test_description():
src_path = pathlib.Path(__file__).parent.parent
target_control_node = launch_ros.actions.Node(
executable=sys.executable,
arguments=[src_path.joinpath("turtle_robot/target_control_node.py").as_posix()],
additional_env={"PYTHONUNBUFFERED": "1"},
)
return (
launch.LaunchDescription(
[
target_control_node,
launch_testing.actions.ReadyToTest(),
]
),
{
"target_control_node": target_control_node,
},
)
class TestTargetControlNodeLink(unittest.TestCase):
#classmethod
def setUpClass(cls):
rclpy.init()
#classmethod
def tearDownClass(cls):
rclpy.shutdown()
def setUp(self):
self.node = rclpy.create_node("target_control_test_node")
def tearDown(self):
self.node.destroy_node()
def test_target_control_node(self, target_control_node, proc_output):
pose_pub = self.node.create_publisher(Pose, "turtle1/pose", 10)
try:
msg = Pose()
msg.x = random.uniform(0.0, 10.0)
msg.y = random.uniform(0.0, 10.0)
msg.theta = 0.0
msg.linear_velocity = 0.0
msg.angular_velocity = 0.0
pose_pub.publish(msg)
success = proc_output.waitFor(
# `str(msg)` is wrong, however, I am not sure what to put here.
expected_output=str(msg), process=target_control_node, timeout=1.0
)
assert success
finally:
self.node.destroy_publisher(pose_pub)
When I run launch_test src/turtle_robot/test/test_target_control_node.py, it only prints this without telling me what is actual output:
[INFO] [launch]: All log files can be found below /home/parallels/.ros/log/2023-01-02-16-37-27-631032-ubuntu-linux-22-04-desktop-1439830
[INFO] [launch]: Default logging verbosity is set to INFO
test_target_control_node (test_target_control_node.TestTargetControlNodeLink) ... [INFO] [python3-1]: process started with pid [1439833]
[python3-1] [INFO] [1672706247.877402445] [target_control_node]: target_control_node
FAIL
======================================================================
FAIL: test_target_control_node (test_target_control_node.TestTargetControlNodeLink)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/my-ros/src/turtle_robot/test/test_target_control_node.py", line 91, in test_target_control_node
assert success
AssertionError
----------------------------------------------------------------------
Ran 1 test in 1.061s
FAILED (failures=1)
[INFO] [python3-1]: sending signal 'SIGINT' to process[python3-1]
[python3-1] Traceback (most recent call last):
[python3-1] File "/my-ros/src/turtle_robot/turtle_robot/target_control_node.py", line 59, in <module>
[python3-1] main()
[python3-1] File "/my-ros/src/turtle_robot/turtle_robot/target_control_node.py", line 53, in main
[python3-1] rclpy.spin(node)
[python3-1] File "/opt/ros/humble/local/lib/python3.10/dist-packages/rclpy/__init__.py", line 222, in spin
[python3-1] executor.spin_once()
[python3-1] File "/opt/ros/humble/local/lib/python3.10/dist-packages/rclpy/executors.py", line 705, in spin_once
[python3-1] handler, entity, node = self.wait_for_ready_callbacks(timeout_sec=timeout_sec)
[python3-1] File "/opt/ros/humble/local/lib/python3.10/dist-packages/rclpy/executors.py", line 691, in wait_for_ready_callbacks
[python3-1] return next(self._cb_iter)
[python3-1] File "/opt/ros/humble/local/lib/python3.10/dist-packages/rclpy/executors.py", line 588, in _wait_for_ready_callbacks
[python3-1] wait_set.wait(timeout_nsec)
[python3-1] KeyboardInterrupt
[ERROR] [python3-1]: process has died [pid 1439833, exit code -2, cmd '/usr/bin/python3 /my-ros/src/turtle_robot/turtle_robot/target_control_node.py --ros-args'].
----------------------------------------------------------------------
Ran 0 tests in 0.000s
OK
I checked the source code of waitFor, but still no clue.
Is there a way to print the actual output so that I can give correct expected_output? Thanks!
To answer your question(and give some more general tips): You can always print out the msg inside the node. That said, the reason you're getting an error is because msg is a ros message type, meaning it's an object. So by doing str(msg) your expected output will be something like <object of type Pose at (some_address)>; and not the actual message values. Also since it appears you're just testing a subscriber, it doesn't usually make sense to have a unit test that's expecting stdout. I would also note that you're publishing a message before actually waiting for the result, this should always fail. The reason is because by the time your subscriber is started, the message has already been published and is gone.
Finally, you shouldn't have a publisher included in any unit tests. It adds an extra dependency and since it's a prepackaged transport layer testing if it works is irrelevant. Instead to fix your problem you should simply be calling the individual methods of your node directly. Basically import the script, instantiate the object, and don't try to deal with the whole node lifecycle.
Edit based on comments
Looking at your subscriber code, you'll need to actually print something out. Unfortunately because it's not a std_msg(i.e. it has more fields than just .data) you'll need to decide how you want to go about confirming the data is right. You could simply look at one field or all of them in order. For example you might have in your test:
success = proc_output.waitFor(
expected_output=str(msg.x),
process=target_control_node, timeout=1.0)
And in your control node:
def subscribe_target_pose(self, msg):
self._target_pose = msg
print(msg.x)
That said, this IO handling method doesn't seem like the best to me. Mainly because it relies on stdout which isn't something you always want.

Error in multiprocessing using concurrent.futures

This is a follow-up question for the question I asked here. I tried to parallelize my code as follows:
import concurrent.futures as futures
class A(object):
def __init__(self, q):
self.p = q
def add(self, num):
r = 0
for _ in xrange(10000):
r += num
return r
num_instances = 5
instances = []
for i in xrange(num_instances):
instances.append(A(i))
n = 20
# Create a pool of processes. By default, one is created for each CPU in your machine.
results = []
pool = futures.ProcessPoolExecutor(max_workers=num_instances)
for inst in instances:
future = pool.submit(inst.add, n)
results.append(future.result())
pool.join()
print(results)
But, I got this error:
Traceback (most recent call last): File
"/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/queues.py",
line 268, in _feed
send(obj) PicklingError: Can't pickle : attribute lookup builtin.instancemethod failed
Any idea why I get this error? I know we can use map function to assign jobs, but I intentionally don't want to do that.

Python process not cleaned for reuse

Processes not cleaned for reuse
Hi there,
I stumbled upon a problem with ProcessPoolExecutor, where processes access
data, they should not be able to. Let me explain:
I have a situation similar to the below example: I got several runs to start
with different arguments each. They compute their stuff in parallel and have no
reason to interact with each other. Now, as I understand it, when a process
forks, it duplicates itself. The child process has the same (memory) data, as
its parent, but should it change anything, it does so on its own copy. If I
would want the changes to survive the lifetime of the child process, I would
call in queues, pipes and other IPC stuff.
But I actually don't! The processes each manipulate data for their own, which
should not carry over to any of the other runs. The example below shows
otherwise, though. The next runs (not parallel running ones) can access the
data of their previous run, implicating, that the data has not been scrubbed
from the process.
Code/Example
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import current_process, set_start_method
class Static:
integer: int = 0
def inprocess(run: int) -> None:
cp = current_process()
# Print current state
print(f"[{run:2d} {cp.pid} {cp.name}] int: {Static.integer}", flush=True)
# Check value
if Static.integer != 0:
raise Exception(f"[{run:2d} {cp.pid} {cp.name}] Variable already set!")
# Update value
Static.integer = run + 1
def pooling():
cp = current_process()
# Get master's pid
print(f"[{cp.pid} {cp.name}] Start")
with ProcessPoolExecutor(max_workers=2) as executor:
for i, _ in enumerate(executor.map(inprocess, range(4))):
print(f"run #{i} finished", flush=True)
if __name__ == '__main__':
set_start_method("fork") # enforce fork
pooling()
Output
[1998 MainProcess] Start
[ 0 2020 Process-1] int: 0
[ 2 2020 Process-1] int: 1
[ 1 2021 Process-2] int: 0
[ 3 2021 Process-2] int: 2
run #0 finished
run #1 finished
concurrent.futures.process._RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.6/concurrent/futures/process.py", line 175, in _process_worker
r = call_item.fn(*call_item.args, **call_item.kwargs)
File "/usr/lib/python3.6/concurrent/futures/process.py", line 153, in _process_chunk
return [fn(*args) for args in chunk]
File "/usr/lib/python3.6/concurrent/futures/process.py", line 153, in <listcomp>
return [fn(*args) for args in chunk]
File "<stdin>", line 14, in inprocess
Exception: [ 2 2020 Process-1] Variable already set!
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<stdin>", line 29, in <module>
File "<stdin>", line 24, in pooling
File "/usr/lib/python3.6/concurrent/futures/process.py", line 366, in _chain_from_iterable_of_lists
for element in iterable:
File "/usr/lib/python3.6/concurrent/futures/_base.py", line 586, in result_iterator
yield fs.pop().result()
File "/usr/lib/python3.6/concurrent/futures/_base.py", line 425, in result
return self.__get_result()
File "/usr/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result
raise self._exception
Exception: [ 2 2020 Process-1] Variable already set!
This behaviour can also be reproduced with max_workers=1, as the process is
re-used. The start-method has no influence on the error (though only "fork"
seems to use more than one process).
So to summarise: I want each new run in a process with all previous data, but
no new data from any of the other runs. Is that possible? How would I achive
it? Why does the above not do exactly that?
I appreciate any help.
I found multiprocessing.pool.Pool where one can set maxtasksperchild=1, so
a worker process is destroyed, when its task is finished. But I dislike the
multiprocessing interface; the ProcessPoolExecutor is more comfortable to
use. Additionally, the whole idea of the pool is to save process setup time,
which would be dismissed, when killing the hosting process after each run.
Brand new processes in python do not share memory state. However ProcessPoolExecutor reuses process instances. It's a pool of active processes after all. I assume this is done to prevent the OS overhead of stooping and starting processes all the time.
You see the same behavior in other distribution technologies like celery where if you're not careful you can bleed global state between executions.
I recommend you manage your namespace better to encapsulate your data. Using your example, you could for example encapsulate your code and data in a parent class which you instantiate in inprocess(), instead of storing it in a shared namespace like a static field in classes or directly in a module. That way the object will ultimate be cleaned up by the garbage collector:
class State:
def __init__(self):
self.integer: int = 0
def do_stuff():
self.integer += 42
def use_global_function(state):
state.integer -= 1664
state.do_stuff()
def inprocess(run: int) -> None:
cp = current_process()
state = State()
print(f"[{run:2d} {cp.pid} {cp.name}] int: {state.integer}", flush=True)
if state.integer != 0:
raise Exception(f"[{run:2d} {cp.pid} {cp.name}] Variable already set!")
state.integer = run + 1
state.do_stuff()
use_global_function(state)
I have been facing some potentially similar problems and saw some interesting posts a in this one High Memory Usage Using Python Multiprocessing, that points towards using gc.collector(), however in your case it did not worked. So I thought of how the Static class was initialized, some points:
Unfortunately, I cannot reproduce your minimal example the value error prompts:
ValueError: cannot find context for 'fork'
Considering 1, I use set_start_method("spawn")
A quick fix then could be to initialize every time the Static class as below:
{
class Static:
integer: int = 0
def __init__(self):
pass
def inprocess(run: int) -> None:
cp = current_process()
# Print current state
print(f"[{run:2d} {cp.pid} {cp.name}] int: {Static().integer}", flush=True)
# Check value
if Static().integer != 0:
raise Exception(f"[{run:2d} {cp.pid} {cp.name}] Variable already set!")
# Update value
Static().integer = run + 1
def pooling():
cp = current_process()
# Get master's pid
print(f"[{cp.pid} {cp.name}] Start")
with ProcessPoolExecutor(max_workers=2) as executor:
for i, _ in enumerate(executor.map(inprocess, range(4))):
print(f"run #{i} finished", flush=True)
if __name__ == "__main__":
print("start")
# set_start_method("fork") # enforce fork , ValueError: cannot find context for 'fork'
set_start_method("spawn") # Alternative
pooling()
}
This returns:
[ 0 1424 SpawnProcess-2] int: 0
[ 1 1424 SpawnProcess-2] int: 0
run #0 finished
[ 2 17956 SpawnProcess-1] int: 0
[ 3 1424 SpawnProcess-2] int: 0
run #1 finished
run #2 finished
run #3 finished

raise AttributeError(name) AttributeError: LCC_GetChannelHandle

I am very new in python cffi. I have to access my temprature module by using its Index or with its channel name. I am trying with both as you can see in my QmixTC class. I am getting attribute error. In other class, there is no errors. Can someone help me understand where is the problem. I am putting my code as well as error trace. Thanks.
main code with name qmix.py (importing it in to sample code):
class QmixTC (object):
"""
"""
def __init__(self, index=0, handle=None,name=''):
self.dll_dir = DLL_DIR
self.dll_file = os.path.join(self.dll_dir,
'labbCAN_Controller_API.dll')
self._ffi = FFI()
self._ffi.cdef(CONTROLLER_HEADER)
self._dll = self._ffi.dlopen(self.dll_file)
self._handle = self._ffi.new('dev_hdl *', 0)
if handle is None:
self.index = index
self._handle = self._ffi.new('dev_hdl *', 0)
self._call('LCC_GetChannelHandle', self.index, self._handle)
else:
self.index = None
self._handle = handle
self._ch_name="QmixTC_1_DO0_INA"
self._channel = self._ch_name + str(index)
self._call('LCC_LookupChanByName',
bytes(self._channel,'utf8'),
self._handle)
self.name = name
def _call(self, func_name, *args):
func = getattr(self._dll, func_name)
r = func(*args)
r = CHK(r, func_name, *args)
return r
def Setpoint_write (self, setpoint):
"""
Write setpoint value to controller device.
Parameters
[in] ChanHdl Valid handle of open controller channel
[in] fSetPointValue The setpoint value to write
Returns
Error code - ERR_NOERR indicates success
"""
self._call('LCC_WriteSetPoint', self._handle[0], setpoint)
def enable_controllLoop (self, enable):
"""
Enables / disables a control loop.
If the control loop is enabled, then the output value is calculated periodically.
Parameters
ChanHdl Valid handle of a controller channel
Enable 1 = enable, 0 = disable
Returns
Error code - ERR_NOERR indicates success
"""
self._call('LCC_EnableControlLoop', self._handle[0], enable)
def read_temp_value (self, actualvalue):
"""
Read actual value from device.
Parameters
[in] ChanHdl Valid handle of open controller channel
[out] pfActualValue Returns the actual controller value
Returns
Error code - ERR_NOERR indicates success
"""
self._call('LCC_ReadActualValue', self._handle[0], actualvalue)
if __name__ == '__main__':
import os.path as op
dll_dir = op.normpath('C:\\Users\\Ravikumar\\AppData\\Local\\QmixSDK')
config_dir = op.normpath('C:\\Users\\Public\\Documents\\QmixElements\\Projects\\QmixTC_Pump\\Configurations\\QmixTC_pump')
bus = QmixBus(config_dir=config_dir)
bus.open()
bus.start()
controller_0 = QmixTC(index=0)
controller_0.enable_controllLoop(1)
sample program:
from __future__ import division, print_function
from win32api import GetSystemMetrics
import numpy as np
import os
import qmix
import pandas as pd
#%% CHANNEL INITIALIZATION
if __name__ == '__main__':
dll_dir = ('C:\\Users\\Ravikumar\\AppData\\Local\\QmixSDK')
config_dir = ('C:\\Users\\Public\\Documents\\QmixElements\\Projects\\QmixTC_test1\\Configurations\\QmixTC_test1')
qmix_bus = qmix.QmixBus(config_dir=config_dir,dll_dir=dll_dir)
qmix_bus.open()
qmix_bus.start()
controller_0 = qmix.QmixTC(index=0)
controller_0.Setpoint_write(50)
error:
Traceback (most recent call last):
File "<ipython-input-5-40d4a3db9493>", line 17, in <module>
controller_0 = qmix.QmixTC(index=0)
File "qmix.py", line 921, in __init__
self._call('LCC_GetChannelHandle', self.index, self._handle)
File "qmix.py", line 937, in _call
func = getattr(self._dll, func_name)
File "C:\Users\Ravikumar\Anaconda2\lib\site-packages\cffi\api.py", line 875, in __getattr__
make_accessor(name)
File "C:\Users\Ravikumar\Anaconda2\lib\site-packages\cffi\api.py", line 870, in make_accessor
raise AttributeError(name)
AttributeError: LCC_GetChannelHandle

Lazy Deferred List reaching maximum recursion depth

I have a large list of documents to upsert into MongoDB (possibly n > 100000). I don't want to create 100000 deferreds all at once, but I don't want to execute and wait for each query sequentially because I have a connection pool to MongoDB and I want to utilize it fully. So I have a generator function that will yield deferreds to be consumed by a DeferredLazyList.
def generate_update_deferreds(collection, many_docs):
for doc in many_docs:
d = collection.update({'_id': doc['_id']}, doc, upsert=True)
yield d
This is the code linking the generation of the deferred upserts, and the DeferredLazyList.
#defer.inlineCallbacks
def update_docs(collection, many_docs):
gen_deferreds = generate_update_deferreds(collection, many_docs)
results = yield DeferredLazyList(gen_deferreds, count=pool_size, consume_errors=True)
The DeferredLazyList is similar to DeferredList, but instead of accepting a list of deferreds to wait for it accepts an iterator. The deferreds are retrieved from the iterator while only having count deferreds active simultaneously. This is used to effectively batch deferreds because they are created as they are yielded.
class DeferredLazyList(defer.Deferred):
"""
The ``DeferredLazyList`` class is used for collecting the results of
many deferreds. This is similar to ``DeferredList``
(``twisted.internet.defer.DeferredList``) but works with an iterator
yielding deferreds. This will only maintain a certain number of
deferreds simultaneously. Once one of the deferreds finishes, another
will be obtained from the iterator.
"""
def __init__(self, deferreds, count=None, consume_errors=None):
defer.Deferred.__init__(self)
if count is None:
count = 1
self.__consume_errors = bool(consume_errors)
self.__iter = enumerate(deferreds)
self.__results = []
for _i in xrange(count):
# Start specified number of simultaneous deferreds.
if not self.called:
self.__next_save_result(None, None, None)
else:
break
def __next_save_result(self, result, success, index):
"""
Called when a deferred completes.
"""
# Make sure we can save result at index.
if index is not None:
results_len = len(self.__results)
if results_len <= index:
self.__results += [NO_RESULT] * (index - results_len + 1)
# Save result.
self.__results[index] = (success, result)
# Get next deferred.
try:
i, d = self.__iter.next()
d.addCallbacks(self.__next_save_result, self.__next_save_result, callbackArgs=(True, i), errbackArgs=(False, i))
except StopIteration:
# Iterator is exhausted, callback self with results.
self.callback(self.__results)
# Pass through result.
return result if success or not self.__consume_errors else None
The problem is when the deferreds are yielded from generate_update_deferreds() their .called is already set to True which is causing DeferredLazyList to recursively call itself.
What's happening is:
In DeferredLazyList.__init__(), self.__next_save_result() is called count times (say 5).
Each call to self.__next_save_result() consumes 1 deferred from self.__iter, and itself is added as a callback.
Because the yielded deferred has .called set to True, d.addCallbacks(self.__next_save_result, ...) immediately calls self.__next_save_result() and this loop continues until a RuntimeError is raised because recursion depth has been reached.
I've printed a stacktrace before the recursion limit was reached to confirm that this is the cause of the problem:
File "/home/caleb/it/Development/projects/python/amazon/bin/feeds-daemon/lib/server.py", line 937, in update_many_docs
results = yield DeferredLazyList(gen_deferreds, count=self.mongo_connections, consume_errors=True, return_results=True)
File "/home/caleb/it/Development/projects/python/amazon/bin/feeds-daemon/lib/twisted.py", line 157, in __init__
self.__next_save_result(None, None, None)
File "/home/caleb/it/Development/projects/python/amazon/bin/feeds-daemon/lib/twisted.py", line 222, in __next_save_result
d.addCallbacks(self.__next_save_result, self.__next_save_result, callbackArgs=(True, i), errbackArgs=(False, i))
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 290, in addCallbacks
self._runCallbacks()
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 551, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/home/caleb/it/Development/projects/python/amazon/bin/feeds-daemon/lib/twisted.py", line 222, in __next_save_result
d.addCallbacks(self.__next_save_result, self.__next_save_result, callbackArgs=(True, i), errbackArgs=(False, i))
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 290, in addCallbacks
self._runCallbacks()
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 551, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/home/caleb/it/Development/projects/python/amazon/bin/feeds-daemon/lib/twisted.py", line 222, in __next_save_result
d.addCallbacks(self.__next_save_result, self.__next_save_result, callbackArgs=(True, i), errbackArgs=(False, i))
# Repeated until the RuntimeError
exceptions.RuntimeError: maximum recursion depth exceeded
Any help would be greatly appreciated. By the way, I am running Python 2.7.3 with Twisted 12.1.0 and the MongoDB stuff is really only relevant to understand the context.
I wanted the result from each deferred, but cooperate() doesn't return those so I added a callback to each deferred before yielding them to the CooperativeTasks:
from twisted.internet.defer import DeferredList, inlineCallbacks
from twisted.internet.task import cooperate
NO_RESULT = object()
def generate_update_deferreds(collection, many_docs, save_results):
for i, doc in enumerate(update_docs):
d = collection.update({'_id': doc['_id']}, doc, upsert=True)
d.addBoth(save_result, i, save_results) # Save result
yield d
def save_result(result, i, save_results):
save_results[i] = result
#inlineCallbacks
def update_docs(collection, many_docs):
save_results = [NO_RESULT] * len(many_docs)
gen_deferreds = generate_update_deferreds(collection, many_docs, save_results))
workers = [cooperate(gen_deferreds).whenDone() for _i in xrange(count)]
yield defer.DeferredList(workers)
# Handle save_results...
There are some tools in Twisted that will help you do this more easily. For example, cooperate:
from twisted.internet.task import cooperate
def generate_update_deferreds(collection, many_docs):
for doc in update_docs:
d = collection.update({'_id': doc['_id']}, doc, upsert=True)
yield d
work = generate_update_deferreds(...)
worker_tasks = []
for i in range(count):
task = cooperate(work)
worker_tasks.append(task)
all_done_deferred = DeferredList([task.whenDone() for task in worker_tasks])

Categories

Resources