I have a script which has a function that is used in various classes and other functions throughout the script.
For example:
from scipy.stats import beta
import matplotlib.pyplot as plt
def function(i):
x = beta.pdf(i,a=10,b=2, scale = 100, loc = -50)
return x
def plotme():
graphme = []
for i in range(500):
graphme.append(function(i))
plt.plot(graphme)
def average():
averageme = []
for i in range(500):
averageme.append(function(i))
average = sum(averageme)/float(len(averageme))
return print(average)
Now if I wanted to import the module and call plotme() or average() it would use the values that are in the function(i). But is there a way for me to change the values of a, b, scale, and loc in function(i) when importing it? I know I could change each function to allow for it to change but I am hoping I could just adjust the initial function.
Ideally, I would like to be able to do something like this:
import mymodule
mymodule.function(i, a = 500, b = 200, scale = 50, loc = 0)
mymodule.plotme()
And the plotme() would be based on the new values not what is coded in the script.
Without touching your module, you could monkey-patch it after importing it. One way, using your example:
import mymodule
def function(i):
x = beta.pdf(i, a=500, b=200, scale=50, loc=0)
return x
mymodule.function = function
mymodule.plotme()
I agree with commenter jasonharper that mymodule's functions could have a better API.
Ideally, if you can change existing implementation, it should have a class which makes it possible to modify these parameters:
class WhateverYouCallIt:
def __init__(a=10, b=2, scale=100, loc=-50):
self.a = a
self.b = 2
self.scale = scale
self.loc = loc
def function(self, i):
return beta.pdf(i, a=self.a, b=self.b, scale=self.scale, loc=self.loc)
def plotme(self):
graphme = []
for i in range(500):
graphme.append(self.function(i))
plt.plot(graphme)
def average(self):
averageme = []
for i in range(500):
averageme.append(self.function(i))
average = sum(averageme)/float(len(averageme))
return print(average)
Then you can have several differently parameterized instances:
default_one = WhateverYouCallIt() # the default
default_one.plotme()
default_one.average()
a_different_one = WhateverYouCallIt(a=500, b=200, scale=50, loc=0)
a_different_one.plotme()
a_different_one.average()
You can use functools.partial:
from functools import partial
import mymodule
# and any time you need you assign a partial func to your function
mymodule.function = partial(mymodule.function(a=500, b=200, scale=50, loc=0))
...
mymodule.function(i)
...
Related
im writing a code like this
import random
class A:
def __init__(self):
self.att = self.set_att()
def set_att(self):
x = random.randint(0,10)
if x == 1:
return "att1"
elif x == 2:
return "att2"
# ... and so on
my question is: should i do it like this? or there is a better more pythonic way to do it.
i just want to call set_att in the init.
thank you
pycharm says i should use #staticmethod, but i dont understand is de difference
Here are two ideas I can think of:
import random
POSSIBLE_ATT_VALUES = [f'att{x}' for x in range(1, 11)]
class A:
def __init__(self):
# Produce a one-time random value.
self.att = random.choice(POSSIBLE_ATT_VALUES)
class B:
#property
def att(self):
# Every access returns a new random value.
return random.choice(POSSIBLE_ATT_VALUES)
>>> a = A()
>>> a.att
"att3" # Possible value.
>>> a.att
"att3" # The same value.
>>> b = B()
>>> b.att
"att5" # Possible value.
>>> b.att
"att1" # Possibly different value.
I have a large list of elements ~ 100000 and need to map it as follows:
def mark_diff(args):
item = args[0]
pi = args[1]
item.marked_diff = (item.p/pi[0]+item.c/pi[1]+item.f/pi[2] - 3)**2
return item
def mark(f_set , goal):
with Pool(3) as p:
data = p.map(mark_diff , zip(f_set , itertools.repeat(goal)))
return data
The default value of item.markded_diff is 0, and item is a mongoengine document.
I am resorting to multiprocessing because the mark_diff is substantially more complicated than shown here and involves a lot of exponents, logarithms for which i am using numpy.
Now for the problem,
The returned data still has item.marked_diff as 0.
While if I add a print statement at the bottom of mark_diff correct values are being assigned and are non-zero.
Definition of item.
import random,mongoengine
class F(mongoengine.Document):
p = mongoengine.FloatField()
c = mongoengine.FloatField()
f = mongoengine.FloatField()
marked_diff = 0
f_sets = F.objects.all()
goal = [0.2,0.35,0.45]
So something is going on in what you didn't show. When I flesh this out into a complete, executable program, it appears to work fine. Here's the output from one run under Python 3.6.1:
0.7024116548559156
13.468354599594324
6.036133666404753
0.16520292241977205
0.17073749475275496
1.903674418518389
0.2432159511273063
7.743326563037492
4.1990243814914425
19.36243187965931
And here's the full program:
from multiprocessing import Pool
import random
import itertools
class F:
def __init__(self):
self.p = random.random()
self.c = random.random()
self.f = random.random()
def mark_diff(args):
item = args[0]
pi = args[1]
item.marked_diff = (item.p/pi[0]+item.c/pi[1]+item.f/pi[2] - 3)**2
return item
def mark(f_set , goal):
with Pool(3) as p:
data = p.map(mark_diff , zip(f_set , itertools.repeat(goal)))
return data
if __name__ == "__main__":
f_set = [F() for _ in range(10)]
goal = [0.2,0.35,0.45]
xs = mark(f_set, goal)
for x in xs:
print(x.marked_diff)
Is it possible that you're looking at marked_diff in the original f_set instead of in the items returned by mark()?
I am writing a code for a project in particle physics (using pyroot).
In my first draft, I use the following line
for i in MyTree:
pion.SetXYZM(K_plus_PX, K_plus_PY, K_plus_PZ,K_plus_MM)
This basically assigns to the pion the values of variables in the parenthesis, ie momenta and inv. mass of the kaon.
Physics aside, I would like to write a function "of the form":
def myfunc(particle):
return %s_PX % particle
I know this is wrong. What I would like to achieve is to write a function that allows, for a given particle, to set particle_PX, particle_PY etc to be the arguments of SetXYZM.
Thank you for your help,
B
To access class attributes from string variables you can use python's getattr:
import ROOT
inputfile = ROOT.TFile.Open("somefile.root","read")
inputtree = inputfile.Get("NameOfTTree")
inputtree.Print()
# observe that there are branches
# K_plus_PX
# K_plus_PY
# K_plus_PZ
# K_plus_MM
# K_minus_PX
# K_minus_PY
# K_minus_PZ
# K_minus_MM
# pi_minus_PX
# pi_minus_PY
# pi_minus_PZ
# pi_minus_MM
def getx(ttree,particlename):
return getattr(ttree,particlename+"_PX")
def gety(ttree,particlename):
return getattr(ttree,particlename+"_PY")
def getz(ttree,particlename):
return getattr(ttree,particlename+"_PZ")
def getm(ttree,particlename):
return getattr(ttree,particlename+"_MM")
def getallfour(ttree,particlename):
x = getattr(ttree,particlename+"_PX")
y = getattr(ttree,particlename+"_PY")
z = getattr(ttree,particlename+"_PZ")
m = getattr(ttree,particlename+"_MM")
return x,y,z,m
for entry in xrange(inputtree.GetEntries()):
inputtree.GetEntry(entry)
pion1 = ROOT.TLorentzVector()
x = getx(inputtree,"K_plus")
y = gety(inputtree,"K_plus")
z = getz(inputtree,"K_plus")
m = getm(inputtree,"K_plus")
pion2.SetXYZM(x,y,z,m)
x,y,z,m = getallfour(inputtree,"pi_minus")
pion2 = ROOT.TLorentzVector()
pion2.SetXYZM(x,y,z,m)
As linked by Josh Caswell, you can similarly access variable names:
def getx(particlename):
x = globals()[partilcename+"_PX"]
though that might get nasty quickly as of whether your variables are global or local and for local, in which context.
I am new to python and I am trying to get plot of a function.
This is my code:
class Optic():
def __init__(self):
'''
param define
'''
global time,lamda, light_speed, Fc, D, z, beta, T0
self.__time = np.linspace(-400,400, 1600) #Picosec
self.__lamda = 1550000 #Picometer
self.__light_speed = 0.3 #meter/Picosec
self.__Fc=light_speed/lamda #Hz
self.__D = 17*np.power(10,-6) #sec/meter
self.__z=[0,10000,20000,30000] #meter
self.__beta = (-(np.power(lamda,2)/(2*np.pi*light_speed))*D) #sec/meter
self.__T0 = 200
# time = np.linspace(-400*np.power(10,-12), 0.5*np.power(10,-12), 400*np.power(10,-12)) #sec
# lamda = 1550*np.power(10,-9) #meter
# light_speed = 3*np.power(10,8) #sec/meter
# Fc=light_speed/lamda #Hz
# D = 17*np.power(10,-6) #sec/meter
# z=[0,10000,20000,30000] #meter
# beta = (-(np.power(lamda,2)/(2*np.pi*light_speed))*D) #sec/meter
# T0 = 20*np.power(10,-12)
def firstPulse(self):
global first
first = np.exp(-0.5*np.power(time/T0,2))
def main():
plt.plot(first, time)
plt.show()
if (__name__ == "__main__"):
main()
but I get:
NameError: name 'first' is not defined
BTW, I am using Spyder.
You have init Optic and call def firstPulse(self) which is defining that variable.
Something like this:
def main():
opt = Optic():
opt.firstPulse()
plt.plot(first, time)
plt.show()
When you call
plt.plot(first, time)
you haven't defined first.
You could put
first = 0
somewhere in your code.
You may then see problems with time (and plt unless you have the appropriate imports).
It might make more sense to pass first in to Optic. In fact, all the global variables could be sent in to the init function instead.
def __init__(self):
could become
def __init__(self, first):
And then you could refer to self.first later if you set it up in the init method.
As a general rule, globals are best avoided.
I've been implementing a model with spark via a python class. I had some headaches calling class methods on a RDD defined in the class (see this question for details), but finally have made some progress. Here is an example of a class method I'm working with:
#staticmethod
def alpha_sampler(model):
# all the variables in this block are numpy arrays or floats
var_alpha = model.params.var_alpha
var_rating = model.params.var_rating
b = model.params.b
beta = model.params.beta
S = model.params.S
Z = model.params.Z
x_user_g0_inner_over_var = model.x_user_g0_inner_over_var
def _alpha_sampler(row):
feature_arr = row[2]
var_alpha_given_rest = 1/((1/var_alpha) + feature_arr.shape[0]*(1/var_rating))
i = row[0]
items = row[1]
O = row[3] - np.inner(feature_arr,b) - beta[items] - np.inner(S[i],Z[items])
E_alpha_given_rest = var_alpha_given_rest * (x_user_g0_inner_over_var[i] + O.sum()/var_rating)
return np.random.normal(E_alpha_given_rest,np.sqrt(var_alpha_given_rest))
return _alpha_sampler
As you can see, to avoid serialization errors, I define a static method that returns a function that is in turn applied to each row of an RDD (model is the parent class here, and this is called from within another method of model):
# self.grp_user is the RDD
self.params.alpha = np.array(self.grp_user.map(model.alpha_sampler(self)).collect())
Now, this all works fine, but is not leveraging Spark's broadcast variables at all. Ideally, all the variables I'm passing in this function (var_alpha, beta, S, etc.) could first be broadcast to the workers, so that I wasn't redundantly passing them as part of the map. But I'm not sure how to do this.
My question, then, is the following: How/where should I make these into broadcast variables such that they are available to the alpha_sampler function that I map to grp_user? One thing I believe will work would be to make them globals, e.g.
global var_alpha
var_alpha = sc.broadcast(model.params.var_alpha)
# and similarly for the other variables...
Then the alpha_sampler could be much simplified:
#staticmethod
def _alpha_sampler(row):
feature_arr = row[2]
var_alpha_given_rest = 1/((1/var_alpha.value) + feature_arr.shape[0]*(1/var_rating.value))
i = row[0]
items = row[1]
O = row[3] - np.inner(feature_arr,b.value) - beta.value[items] - np.inner(S.value[i],Z.value[items])
E_alpha_given_rest = var_alpha_given_rest * (x_user_g0_inner_over_var.value[i] + O.sum()/var_rating.value)
return np.random.normal(E_alpha_given_rest,np.sqrt(var_alpha_given_rest))
But of course this is really dangerous use of globals that I would like to avoid. Is there a better way that lets me leverage broadcast variables?
Assuming that variables you use here are simply scalars there is probably nothing to gain here from a performance perspective and using broadcast variables will make you code less readable but you can either pass a broadcast variable as an argument to the static method:
class model(object):
#staticmethod
def foobar(a_model, mu):
y = a_model.y
def _foobar(x):
return x - mu.value + y
return _foobar
def __init__(self, sc):
self.sc = sc
self.y = -1
self.rdd = self.sc.parallelize([1, 2, 3])
def get_mean(self):
return self.rdd.mean()
def run_foobar(self):
mu = self.sc.broadcast(self.get_mean())
self.data = self.rdd.map(model.foobar(self, mu))
or initialize it there:
class model(object):
#staticmethod
def foobar(a_model):
mu = a_model.sc.broadcast(a_model.get_mean())
y = a_model.y
def _foobar(x):
return x - mu.value + y
return _foobar
def __init__(self, sc):
self.sc = sc
self.y = -1
self.rdd = self.sc.parallelize([1, 2, 3])
def get_mean(self):
return self.rdd.mean()
def run_foobar(self):
self.data = self.rdd.map(model.foobar(self))