Passing a variable from another function in a class to Pool - python

The below code simulates a stock price and calculates its payoff. I am trying to use multiprocessing to speed up the simulations. The problem is that in CallUpAndOut where I have pool.map, I am not sure how to access total from simulations
I have tried several things like self.Simulations.Total or self.total but it doesn't work.
import numpy as np
from multiprocessing import Pool
import time
class PricingSimulatedBarrierOption:
def __init__(self, spot, strike, barrier, rate, sigma, time, sims, steps):
self.spot = spot
self.strike = strike
self.barrier = barrier
self.rate = rate
self.sigma = sigma
self.time = time
self.sims = sims
self.steps = steps
self.dt = self.time / self.steps
def Simulations(self):
total = np.zeros((self.sims,self.steps+1),float)
pathwiseS= np.zeros((self.steps+1),float)
for j in range(self.sims):
pathwiseS[0] =self.spot
total[j,0] = self.spot
for i in range(1,self.steps+1):
phi = np.random.normal()
pathwiseS[i] = pathwiseS[i-1]*(1+self.rate*self.dt+self.sigma*phi*np.sqrt(self.dt))
total[j,i]= pathwiseS[i]
return total.reshape(self.sims, self.steps+1)
def CallUpAndOut(self):
start_time = time.time()
p = Pool()
getpayoff = p.map(self.Simulations(),self.total) ###How to pass total here?
p.close()
p.join()
end_time = time.time()-start_time
print(end_time)
# getpayoff = self.Simulations()
callpayoff = np.zeros((self.sims),float)
for j in range(self.sims):
if max(getpayoff[j,])>=self.barrier:
callpayoff[j] = 0
else:
callpayoff[j] = max(getpayoff[j,self.steps-1]-self.strike,0)
return np.exp(-self.rate*self.time)*np.average(callpayoff)
c = PricingSimulatedBarrierOption(100,100,170,0.05,0.2,1,10000,252)
print(c.CallUpAndOut())

In function definition add parameter see below example:
def CallUpAndOut(self, total):
And pass array of total values in map see below example:
total = [1,2,3]
getpayoff = p.map(self.Simulations,total)

To work this I had to move the declaration outside. Below code is now able to accept variable in the Pool function.
import numpy as np
from multiprocessing import Pool
import time
class PricingSimulatedBarrierOption:
def __init__(self, spot, strike, barrier, rate, sigma, time, sims, steps):
self.spot = spot
self.strike = strike
self.barrier = barrier
self.rate = rate
self.sigma = sigma
self.time = time
self.sims = sims
self.steps = steps
self.dt = self.time / self.steps
self.pathwiseS= np.zeros((self.steps+1),float)
def Simulations(self):
print("Called")
total = np.zeros((self.sims,self.steps+1),float)
self.pathwiseS= np.zeros((self.steps+1),float)
for j in range(self.sims):
self.pathwiseS[0] =self.spot
total[j,0] = self.spot
for i in range(1,self.steps+1):
phi = np.random.normal()
self.pathwiseS[i] = self.pathwiseS[i-1]*(1+self.rate*self.dt+self.sigma*phi*np.sqrt(self.dt))
total[j,i]= self.pathwiseS[i]
return total.reshape(self.sims, self.steps+1)
def CallUpAndOut(self):
start_time = time.time()
p = Pool()
getpayoff = p.map(self.Simulations(),self.pathwiseS)
p.close()
p.join()
end_time = time.time()-start_time
print(end_time)
# getpayoff = self.Simulations()
callpayoff = np.zeros((self.sims),float)
for j in range(self.sims):
if max(getpayoff[j,])>=self.barrier:
callpayoff[j] = 0
else:
callpayoff[j] = max(getpayoff[j,self.steps-1]-self.strike,0)
return np.exp(-self.rate*self.time)*np.average(callpayoff)

Related

Is there a way for finding the last list(not the last value) generated from Python iteration?

A = 300
Delta = 0.01
p0 = 100
ev0 = 0
dev = 0.001
dp0 = A*p0*dev
p = []
dp = []
ev = []
class Nonlinear():
def __init__(self):
self.p, self.dp, self.ev = p0, 0, ev0-dev
def __iter__(self):
return self
def __next__(self):
self.dp, self.p, self.ev = A*(self.p+self.dp)*dev, self.p + self.dp, self.ev+dev
p.append(self.p)
dp.append(self.dp)
if self.p > 1500:
raise StopIteration()
return p
for n in Nonlinear():
print(n)
The above are all the codes I use as an replacement of Excel in iteration. The results always provide me with all the iterative lists rather than the last that I need.
I wonder:
If there is a way for finding the last list in Python 3.8.
As the calculated results will be utilized to plot a figure using matplotlib, I want to get the relationship of p and ev calculated from iteration. Also it's also very important to swift the list into numpy array as well as gain the last iterative lists of p and ev including all evalues.
I use Python 3.8. If you have any ideas about solving the issue. Please let me know.
I think this does what you want:
class Nonlinear():
def __init__(self, p0 = 100, dp0 = 0, A = 300, dev = 0.001, maxval = 1500):
self.p = p0
self.dp = dp0
self.A = A
self.dev = dev
self.maxval = maxval
def __iter__(self):
while self.p + self.dp <= self.maxval:
self.p += self.dp
self.dp = self.A * (self.p + self.dp) * self.dev
yield self.p
n = Nonlinear()
print(*n)
# 100 130.0 178.0 245.8 339.88 470.068 650.1448 899.21128 1243.694608
Or
n = [_ for _ in Nonlinear()]
# [100, 130.0, 178.0, 245.8, 339.88, 470.068, 650.1448, 899.21128, 1243.694608]
You could also write as a generator:
def nonlinear(p = 100, dp = 0, A = 300, dev = 0.001, maxval = 1500):
Adev = A * dev
while(p + dp <= maxval):
p += dp
dp = Adev * (p + dp)
yield p
As a note, be careful how you define your variables. Entering them line by line as I've done above gives different answers for p than having them listed in one line the way you have.

whay multi thread worse than single thread? [duplicate]

This question already has answers here:
Python: Why is threaded function slower than non thread
(2 answers)
Closed 2 years ago.
I need to implement search function for array of complex class but when I changed to multi thread, I figured out that it became worse than past!
So I test simple code and saw it is true.
My code:
import numpy as np
import threading
import time
class Test(object):
def __init__(self):
self.num_workers = 1
self.workers = []
self.poses = []
self.arr = None
def search(self, arr, val):
self.poses = []
for i, a in enumerate(arr):
if a == val:
self.poses.append(i)
return self.poses
def search_worker(self, val, ID):
search_len = int(len(self.arr) / self.num_workers)
prefix = ID * search_len
if ID == self.num_workers - 1:
search_len = int(len(self.arr) - prefix)
for i in range(search_len):
if self.arr[prefix + i] == val:
self.poses.append(i)
def search_multi_thread(self, arr, val):
self.arr = arr
self.poses = []
self.num_workers = 5
for i in range(self.num_workers):
worker = threading.Thread(target=self.search_worker, args=(val, i,))
worker.start()
self.workers.append(worker)
for i in range(self.num_workers):
self.workers[i].join()
return self.poses
if __name__ == '__main__':
t = Test()
sample = np.random.randint(1000, size=50000000)
t1 = time.perf_counter()
res = t.search(sample, 65)
t2 = time.perf_counter()
print(F'Elapsed time to search = {t2 - t1}')
t1 = time.perf_counter()
res = t.search_multi_thread(sample, 65)
t2 = time.perf_counter()
print(F'Elapsed time to search with multiple thread = {t2 - t1}')
result :
Elapsed time to search = 13.291269699999999
Elapsed time to search with multiple thread = 17.8231911
Environment:
OS = windows 10
python = 3.7.7
CPU = Intel core i7 6700HQ
Whats I wrong?
How can I solve this problem?
(I read about multiprocessing but it seems that each process has different stack so they cant access to single array)
Note, while working with threads, one thing to be kept in mind is that threads tend to increase the efficiency of your program when there are possibilities of the processr sitting idle (whatever be the cause like i/o, sleeps, user interactions, etc.) during the processing of your work. If this is not the case, then the overhead of thread switching will simply further degrade the performance of your program.
In your case, there are very low possibilities of processor sitting idle for significant time. Moreover, you are using far too many threads for this task. So, the thread switching simply overweights whatever performance you achieve from the use of threads. Hence, the performance of your program degrades further.
Massive performance boost after using multiprocessing.
import numpy as np
import time
import multiprocessing
class Test(object):
def __init__(self):
self.num_workers = 1
self.workers = []
self.poses = []
self.arr = None
def search(self, arr, val):
self.poses = []
for i, a in enumerate(arr):
if a == val:
self.poses.append(i)
return self.poses
def search_worker(self, val, ID):
search_len = int(len(self.arr) / self.num_workers)
prefix = ID * search_len
if ID == self.num_workers - 1:
search_len = int(len(self.arr) - prefix)
for i in range(search_len):
if self.arr[prefix + i] == val:
self.poses.append(i)
def search_multi_thread(self, arr, val):
self.arr = arr
self.poses = []
self.num_workers = 5
for i in range(self.num_workers):
worker = multiprocessing.Process(target=self.search_worker, args=(val, i,))
worker.start()
self.workers.append(worker)
for i in range(self.num_workers):
self.workers[i].join()
return self.poses
if __name__ == '__main__':
t = Test()
sample = np.random.randint(1000, size=50000000)
t1 = time.perf_counter()
res = t.search(sample, 65)
t2 = time.perf_counter()
print(F'Elapsed time to search = {t2 - t1}')
t1 = time.perf_counter()
res = t.search_multi_thread(sample, 65)
t2 = time.perf_counter()
print(F'Elapsed time to search with multiprocessing = {t2 - t1}')

How to parallelize access to a class?

I've implemented a simple direct Nbody simulation in python. I'm looking to parallelize it as we are doing the same operation again and again. In C++, I would have use openmp, but python doesn't have it.
So I was thinking to use the multiprocessing module. From what I understand, I would need a manager to manage the class (and the list particles?) and I was thinking of using a starmap pool.
I'm quite lost on how to use these function to achieve any semblance of parallelization, so any help is appreciated.
PS: I'm open to use other module too, the easier the better. The class is ditchable if using numpy array (for position velocity mass) solves the problem, I'll go with it.
Code:
import numpy as np
import matplotlib.pyplot as plt
import multiprocessing as mp
class particle:
def __init__(self, xy, uv, m):
self.xy =xy # position
self.uv = uv # velocity
self.m = m # mass
self.force = np.zeros([2]) # at t=0s, force =0
def update(self,dt):
self.uv += self.force/self.m * dt
self.xy += self.uv*dt
self.force=np.zeros([2])
def interaction(self,p,jj,eps):
dr = p[jj].xy - self.xy
dr_norm = np.linalg.norm(dr + eps)
self.force += G*self.m*p[jj].m/(dr_norm**2) * dr/dr_norm
p[jj].force -= G*self.m*p[jj].m/(dr_norm**2) * dr/dr_norm
def init_world(n_part):
p=[]
for ii in range(n_part):
p.append(particle(np.random.uniform(0,50,size=(2))*1e15,np.random.uniform(-10,10,size=(2))*0,np.random.uniform(2,25)*1e28))
return p
G = 6.67e-11 # in SI units
dt= 1e5 # in second, 86400s = one day
niter = 10000
n_part = 300
eps = 1e8 #softening to avoid infinite force a small distance
p = init_world(n_part)
xy = np.asarray([p[ii].xy for ii in range(n_part)])
fig, ax1 = plt.subplots()
im = ax1.scatter(xy[:,0],xy[:,1])
plt.show()
for tt in range(niter):
for ii in range(n_part):
for jj in range(ii+1,n_part):
p[ii].interaction(p,jj,eps)
for ii in range(n_part):
p[ii].update(dt)
xy = np.asarray([p[ii].xy for ii in range(n_part)])
ax1.set_title(tt)
im.set_offsets(xy)
plt.pause(0.01)
If you want to share a list of custom objects (such as particle in the question) among processes, you can consider a simplified example here:
import multiprocessing
from multiprocessing.managers import BaseManager
TOTAL_PROCESS = 3
class Particle():
def __init__(self, x, y):
self.x = x
self.y = y
def multiply(self, z):
self.x *= z
self.y *= z
def __repr__(self):
return f'(x={self.x},y={self.y})'
def worker(sharedList, ix):
# Call multiply() for the specific item in the list
sharedList[ix].multiply(2)
def main():
BaseManager.register('Particle', Particle) # Register your custom class
clsManager = BaseManager() # A manager to manage Particle objects
clsManager.start()
manager = multiprocessing.Manager() # Another manager to manage a shared list
# Create a list of Particle objects
sharedList = manager.list([clsManager.Particle(x, x+1) for x in range(0, TOTAL_PROCESS)])
# See the origina list
for x in sharedList:
print(x, end=' ')
else:
print()
# Run multiple processes and to make each of them them to work on a specific object only
processes = []
for i in range(TOTAL_PROCESS):
p = multiprocessing.Process(target=worker, args=[sharedList, i])
p.start()
processes.append(p)
for p in processes:
p.join()
# See the updated list of Pariticle objects
for x in sharedList:
print(x, end=' ')
else:
print()
if __name__ == '__main__':
main()

Why is ROS Publisher not publishing values?

I am currently trying to write a Python ROS program which can be executed as a ROS node (using rosrun) that implements the defs declared in a separate Python file arm.py (available at: https://github.com/nortega1/dvrk-ros/...). The program initially examines the current cartesian position of the arm. Subsequently, when provided with a series of points that the arm must pass through, the program calculates a polynomial equation and given a range of x values the program evaluates the equation to find the corresponding y values.
Within the arm.py file there is a publisher set_position_cartesian_pub that sets the Cartesian position of the arm as follows:
self.__set_position_cartesian_pub = rospy.Publisher(self.__full_ros_namespace + '/set_position_cartesian', Pose, latch = True, queue_size = 1)
The issue is that the publisher set_position_cartesian is not publishing the values of the newPose to the robot - can anyone figure out what the issue might be? I can confirm that the def lagrange correctly calculates the values of the x and y coordinates, which are printed out to the terminal via the command rospy.loginfo(newPose). Any help would be greatly appreciated as I've been trying to solve this issue for the last 2 days!
#! /usr/bin/python
import rospy
import sys
from std_msgs.msg import String, Bool, Float32
from geometry_msgs.msg import Pose
from geometry_msgs.msg import PoseStamped
from geometry_msgs.msg import Vector3
from geometry_msgs.msg import Quaternion
from geometry_msgs.msg import Wrench
class example_application:
def callback(self, data):
self.position_cartesian_current = data.pose
rospy.loginfo(data.pose)
def configure(self,robot_name):
self._robot_name = 'PSM1'
ros_namespace = '/dvrk/PSM1'
rospy.Subscriber('/dvrk/PSM1/position_cartesian_current', PoseStamped, self.callback)
self.set_position_cartesian = rospy.Publisher('/dvrk/PSM1/set_position_cartesian', Pose, latch=True, queue_size = 10)
rospy.sleep(3)
rospy.init_node('listener', anonymous=True)
rospy.spin()
def lagrange(self, f, x):
total = 0
n = len(f)
for i in range(n):
xi, yi = f[i]
def g(i, n):
g_tot = 1
for j in range(n):
if i == j:
continue
xj, yj = f[j]
g_tot *= (x - xj) / float(xi - xj)
return g_tot
total += yi * g(i, n)
return total
def trajectoryMover(self):
newPose = Pose()
points =[(0.0156561,0.123151),(0.00715134,0.0035123151),(0.001515177,0.002123151),(0.0071239751,0.09123150)]
xlist = [i*0.001 for i in range(10)]
ylist = [self.lagrange(points, xlist[i])*0.001 for i in range(10)]
for x, y in zip(xlist, ylist):
newPose.position.x = x
newPose.position.y = y
newPose.position.z = 0.001
newPose.orientation.x = 0.001
newPose.orientation.y = 0.001
newPose.orientation.z = 0.005
newPose.orientation.w = 0.002
rospy.sleep(1)
self.set_position_cartesian.publish(newPose)
rospy.loginfo(newPose)
rospy.spin()
def run(self):
# self.home()
self.trajectoryMover()
if __name__ == '__main__':
try:
if (len(sys.argv) != 2):
print(sys.argv[0] + ' requires one argument, i.e. name of dVRK arm')
else:
application = example_application()
application.configure(sys.argv[1])
application.run()
except rospy.ROSInterruptException:
pass
You are not publishing because the code stops at rospy.spin() when you call application.configure(). For what I understand of what you are trying to do, the code will publish 10 poses to a topic, then you don't need it anymore.
I've moved the location of rospy.spin(), but the code needs more revision than that.
#! /usr/bin/python
import rospy
import sys
from std_msgs.msg import String, Bool, Float32
from geometry_msgs.msg import Pose
from geometry_msgs.msg import PoseStamped
from geometry_msgs.msg import Vector3
from geometry_msgs.msg import Quaternion
from geometry_msgs.msg import Wrench
class example_application(object):
def callback(self, data):
self.position_cartesian_current = data.pose
rospy.loginfo(data.pose)
def configure(self,robot_name):
self._robot_name = 'PSM1'
ros_namespace = '/dvrk/PSM1'
rospy.Subscriber('/dvrk/PSM1/position_cartesian_current', PoseStamped, self.callback)
self.set_position_cartesian = rospy.Publisher('/dvrk/PSM1/set_position_cartesian', Pose, latch=True, queue_size = 10)
def lagrange(self, f, x):
total = 0
n = len(f)
for i in range(n):
xi, yi = f[i]
def g(i, n):
g_tot = 1
for j in range(n):
if i == j:
continue
xj, yj = f[j]
g_tot *= (x - xj) / float(xi - xj)
return g_tot
total += yi * g(i, n)
return total
def trajectoryMover(self):
newPose = Pose()
points =[(0.0156561,0.123151),(0.00715134,0.0035123151),(0.001515177,0.002123151),(0.0071239751,0.09123150)]
xlist = [i*0.001 for i in range(10)]
ylist = [self.lagrange(points, xlist[i])*0.001 for i in range(10)]
for x, y in zip(xlist, ylist):
newPose.position.x = x
newPose.position.y = y
newPose.position.z = 0.001
newPose.orientation.x = 0.001
newPose.orientation.y = 0.001
newPose.orientation.z = 0.005
newPose.orientation.w = 0.002
self.set_position_cartesian.publish(newPose)
rospy.loginfo(newPose)
def run(self):
# self.home()
self.trajectoryMover()
if __name__ == '__main__':
if (len(sys.argv) != 2):
print(sys.argv[0] + ' requires one argument, i.e. name of dVRK arm')
else:
application = example_application()
application.configure(sys.argv[1])
application.run()
try:
rospy.spin()
except KeyboardInterrupt:
rospy.loginfo("Keyboard Interrupt")
Think of:
making the script argument a parameter of the node.
moving the configure method to the __init__ method.
taking the g() function outside lagrange().
It's a good practice to use relative topic names, instead of absolute ones (absolute: topic name start with /, e.g.: '/dvrk/PSM1').

Python - high disk usage in SumTree

I've encountered some weird behaviour of my python program. Basically when I tried to create adn fill a SumTree of length larger than 1000, my disk usage increases a lot to ~300MB/s then the programme died.
I'm pretty sure there's no file r/w involved in this process, and the problem is with the add function. The code is shown below.
import numpy as np
class SumTree():
trans_idx = 0
def __init__(self, capacity):
self.num_samples = 0
self.capacity = capacity
self.tree = np.zeros(2 * capacity - 1)
self.transitions = np.empty(self.capacity, dtype=object)
def add(self, p, experience):
tree_idx = self.trans_idx + self.capacity - 1
self.transitions[self.trans_idx] = experience
self.transitions.append(experience)
self.update(tree_idx, p)
self.trans_idx += 1
if self.trans_idx >= self.capacity:
self.trans_idx = 0
self.num_samples = min(self.num_samples + 1, self.capacity)
def update(self, tree_idx, p):
diff = p - self.tree[tree_idx]
self.tree[tree_idx] = p
while tree_idx != 0:
tree_idx = (tree_idx - 1) // 2
self.tree[tree_idx] += diff
def get_leaf(self, value):
parent_idx = 0
while True:
childleft_idx = 2 * parent_idx + 1
childright_idx = childleft_idx + 1
if childleft_idx >= len(self.tree):
leaf_idx = parent_idx
break
else:
if value <= self.tree[childleft_idx]:
parent_idx = childleft_idx
else:
value -= self.tree[childleft_idx]
parent_idx = childright_idx
data_idx = leaf_idx - self.capacity + 1
return leaf_idx, self.tree[leaf_idx], self.transitions[data_idx]
#property
def total_p(self):
return self.tree[0] # the root
#property
def volume(self):
return self.num_samples # number of transistions stored
Here's an example where this SumTree object will be used:
def add(self, experience)
max_p = np.max(self.tree.tree[-self.tree.capacity:])
if max_p == 0:
max_p = 1.0
exp = self.Experience(*experience)
self.tree.add(max_p, exp)
where Experience is a named tuple and self.tree is a Sumtree instance, when I removed the last line the high disk usage disappears.
Can anyone help me with this?
I finally sort this out because each experience is a tuple of namedtuple and I'm creating another namedtuple Experience from it. Fixed by changing experience to a tuple of numpy arrays.

Categories

Resources