Why is my optional python argument throwing an error? - python

I'm trying to add two optional arguments to a function that trains a GLM using the statsmodel package. I used this question to guide the development of the function: How do I create a Python function with optional arguments?
Basically, I want to give the user the ability to use OR not use weights and offsets.
This is the function:
def model_train(df, formula, *args, **kwargs):
'''
run non discrete model
df = model set
formula = model formula
weight = column used for weights
offset = column used for offsets
'''
weight = kwargs.get(df[weight], None)
print(f"Weights initialized....Starting to intialize offsets")
offset_factor = kwargs.get(df[offset], None)
#print(f"Offset initialized....starting matrix development")
y, x = patsy.dmatrices(formula, df, return_type = 'dataframe')
print(f"Matrix done...starting to instantiate model")
glm = sm.GLM(y, x, family = sm.families.Poisson(), var_weights = weight, offset = offset_factor)
print(f"Model instantiated....starting to fit")
glm_results = glm.fit()
print("Model fit. If you are reading this, you're done. Run 'model_object'[0].summary() to get summary statistics")
return glm_results, x, y
This is the error it throws:
---------------------------------------------------------------------------
UnboundLocalError Traceback (most recent call last)
<ipython-input-34-0ce97f02e15e> in <module>
----> 1 model_80150 = model_train(df = train_model1, formula=formula_80150, weight = 'eunit', offset = None)
~\Documents\GitHub\Edit\run_model.py in model_train(df, formula, *args, **kwargs)
7 offset = column used for offsets
8 '''
----> 9 weight = kwargs.get(df[weight], None)
10 print(f"Weights initialized....Starting to intialize offsets")
11
UnboundLocalError: local variable 'weight' referenced before assignment
EDIT UPDATE:
I've tried the following with a TypeError: unsupported operand type(s) for &: 'NoneType' and 'str' error
def model_train(df, formula, *args, **kwargs):
'''
run non discrete model
df = model set
formula = model formula
weight = column used for weights
offset = column used for offsets
'''
weight_value = kwargs.get('weight', None)
print(f"Weights initialized....Starting to intialize offsets")
offset_factor = kwargs.get('offset', None)
print(f"Offset initialized....starting matrix development")
y, x = patsy.dmatrices(formula, df, return_type = 'dataframe')
print(f"Matrix done...starting to instantiate model")
if weight_value == None:
glm = sm.GLM(y, x, family = sm.families.Poisson())
elif weight_value == None & offset_factor != None:
glm = sm.GLM(y, x, family = sm.families.Poisson(), offset = df[offset_factor])
elif weight_value != None and offset_factor == None:
glm = sm.GLM(y, x, family = sm.families.Poisson(), var_weights = df[weight_value])
else:
glm = sm.GLM(y, x, family = sm.families.Poisson(), var_weights = df[weight_value], offset = df[offset_factor])
print(f"Model instantiated....starting to fit")
glm_results = glm.fit()
print("Model fit. If you are reading this, you're done. Run 'model_object'[0].summary() to get summary statistics")
return glm_results, x, y

Related

Unable to save Tensorflow model due to custom metric

I have set some callbacks in place in order to save my Tensorflow model, however it fails with the following warning:
WARNING:Can save best model only with val_Multi_MeanIoU available, skipping.
I pass the following to callbacks to model.fit:
monitor_mode = "max"
es_patience = 8
log_dir = "/some/directory/"
monitor_metric = "val_" + met.__name__
cllbs = [
#ks.callbacks.ReduceLROnPlateau(monitor = "val_loss", factor = 0.2,
# patience = 5, min_lr = 0.001),
ks.callbacks.EarlyStopping(monitor = monitor_metric, mode = monitor_mode, \
patience = es_patience, verbose = 1),
ks.callbacks.ModelCheckpoint(os.path.join(cptdir, \
"Epoch.{epoch:02d}.hdf5"), \
monitor = monitor_metric, \
mode = monitor_mode, \
save_best_only = True, \
save_freq = 1),
ks.callbacks.TensorBoard(log_dir = logdir, histogram_freq = 5)
]
cb_metric is an argument I pass to the Python script and it is set to None.
The metric is defined as:
class MulticlassMeanIoU(tf.keras.metrics.MeanIoU):
def __init__(self,
y_true = None,
y_pred = None,
num_classes = None,
name = "MultiMeanIoU",
dtype = None):
super(MulticlassMeanIoU, self).__init__(num_classes = num_classes,
name = name, dtype = dtype)
self.__name__ = name
def update_state(self, y_true, y_pred, sample_weight = None):
y_pred = tf.math.argmax(y_pred, axis = -1)
return super().update_state(y_true, y_pred, sample_weight)
met = MulticlassMeanIoU(num_classes = N_CLASSES)
metrix = [met, "sparse_categorical_accuracy"]
model.compile.(..., metrics = metrix)
where N_CLASSES is the number of classes for my semantic segmentation model.
Initially, I monitored different metrics in different callbacks, but I read an answer to another question stating the metric mus always have the same name. I fixed that, but it is still not working.
Any ideas?
Edit: For some reason. tf.Keras inserts an underscore in the metric name. I added this underscore to where I defined the metric name. Now the names match but it still fails with the same error.
When I do this:
hist = model.fit(...)
with open("/some/path/Hist.txt", "w") as f:
for key in hist.history:
f.write(key)
f.write("\n")
I get the following contents in the .txt file:
loss
Multi_MeanIoU
sparse_categorical_accuracy
val_loss
val_Multi_MeanIoU
val_sparse_categorical_accuracy

how to fix missing 1 required positional argument PyTorch

I tried to check the length of my training data to train the model but I got this error. I am implementing this in PyTorch. I have 3 main functions. dataset, extract beat and extract signal. can someone help to fix this issue, please?
This is my dataset class
class MyDataset(Dataset):
def __init__(self, patient_ids,bih2aami=True):#This method runs once when we call this class, and we pass the data or its references here with the label data.
self.patient_ids = patient_ids # list of patients ID
self.directory="C:\\Users\\User\\Downloads\\list\mit-bih-arrhythmia-database-1.0.0\\" # path
self.nb_qrs = 99 #number of beats extracted for each patient, found that each recording had at least 99 normal beats
self.idx_tuples = flatten([[(patient_idx, rpeak_idx) for rpeak_idx in range(self.nb_qrs)]
for patient_idx in range(len(patient_ids))])
self.bih2aami=bih2aami
#if bih2aami==True:
# self.y = self.bih2aami(self.y)
def __len__(self):#returns the size of the data set.
return len(self.idx_tuples) # length of the dataset
def __getitem__(self, idx): # get one sample from the dataset
patient_idx, rpeak_idx = self.idx_tuples[idx]
patient_id = self.patient_ids[patient_idx]
file = self.directory + patient_id
signal, normal_qrs_pos = get_signal(file)
qrs_pos = normal_qrs_pos[rpeak_idx]
beat, label = extract_beat(signal, qrs_pos)
#sample = {'signal': torch.tensor(beat).float(),
# 'label': torch.tensor(label).float()}
print(patient_id, patient_idx, beat.shape,label.shape) # bug : what if label null ??
X, y = torch.tensor(beat).float(), torch.tensor(label).float()
return X,y
Get signal function
def get_signal(file):
record = wfdb.rdrecord(file, channels=[0])
df = pd.DataFrame(record.p_signal, columns=record.sig_name)
lead = df.columns[0]
signal = df[lead] #getting the 1D signal
annotation = wfdb.rdann(file, 'atr') #getting the annotation
relabeled_ann = bih2lamedo(annotation.symbol)
annotations = pd.DataFrame(relabeled_ann,annotation.sample)
normal_qrs_pos = list(annotations[annotations[0]=='N'].index) #normal beats
#normal_qrs_pos = list(annotations[annotations[0]!='O'].index) #beats
#normal_qrs_pos = list(annotations.index) #normal beats
return signal, normal_qrs_pos
Get beat function
def extract_beat(signal, win_pos, qrs_positions, win_msec=40, fs=360, start_beat=36, end_beat=108):
"""
win_pos position at which you place the window of your beat
qrs_positions (list) the qrs indices from the annotations (read them from the atr file)-->obtained from annotation.sample
win_msec in milliseconds
"""
#extract signal
signal = np.array(signal)
#print(signal.shape)
#beat_array = np.zeros(start_beat+end_beat)#number of channels
start = int(max(win_pos-start_beat,0))
stop=start+start_beat+end_beat
#print(beat_array.shape,signal.shape)
beat = signal[start:stop]
#compute the nearest neighbor of win_pos among qrs_positions
tolerance = fs*win_msec//1000 #samples at a distance <tolrance are matched
nbr = NearestNeighbors(n_neighbors=1).fit(qrs_positions)
distances, indices = nbr.kneighbors(np.array([[win_pos]]).reshape(-1,1))
#label
if distances[0][0] <= tolerance:
label = 1
else:
label = 0
print(distances[0],tolerance,label)
return beat, label

SARIMAX Rolling Window with exogen Variables

at the moment i am trying to build a SARIMAX Model in python with exogen variables.
Unfortunately i am getting this error: "cannot perform reduce with flexible type"
# Function for Rolling Forecast with Sarima
def rolling_forecast(traindata,test_data, Modell_order = None , Sarima_order = None, eliminate_0 = True, exogen= None, exogen_test=None):
history = [x for x in traindata]
history_exogen = [x for x in exogen]
predictions = list()
for t in range(len(test_data)):
Sarima_Modell_same = SARIMAX(history,order = Modell_order ,seasonal_order= Sarima_order, exog=history_exogen)
model_fit = Sarima_Modell_same.fit()
output = model_fit.forecast(steps = 1,exog=history_exogen)
yhat = output[0]
obs = test_data[t]
obs_ex = exogen_test[t]
predictions.append(yhat)
history.append(obs)
history_exogen.append(obs_ex)
#print('predicted=%f, expected=%f' % (yhat, obs))
series_predicted = pd.Series(predictions, dtype='float64')
series_predicted.index = test_data.index
if eliminate_0 is True:
# Eliminate 0 values --> (for differenced Time Series not applyable because of negativ values)
series_predicted = series_predicted.apply(lambda x : x if x > 0 else 0)
test_data.plot()
series_predicted.plot(color = 'red')
else:
test_data.plot()
series_predicted.plot(color = 'red')
#print(sqrt(mean_squared_error(test_data, series_predicted)))
Is there any way to do this?
Ist More about the multivariate Part. Without the exogen variable it is working but if i try to include it the error appears.
Would appreciate any help.

python scipy cannot pass a dataframe generated from class attribute?

I am learning a little bit about using classes to make my code easier to modify. I was working on defining a ml_setup class which calls a spearman calculation from a separate function.
class ml_setup:
def __init__(self, df, dropcols, ycol, **kwargs ):
self.ycol = ycol
self.df = df
if 'stratify' in kwargs:
self.stratify = kwargs['stratify']
else:
self.stratify = None
self.train_Y = df[ycol]
self.train_X = df.drop(columns=dropcols)
if 'seed' in kwargs:
self.seed = kwargs['seed']
else:
self.seed = self.seed_gen()
if 'test' in kwargs:
self.test = kwargs['test']
else:
self.test = 0.3
if 'final_model' in kwargs:
self.final_model = kwargs['final_model']
else:
self.final_model = None
def seed_gen(self):
seed = np.random.randint(0,2**32 - 1)
return seed
def linear_reg(self, positive=False):
self.regr = linear_model.LinearRegression(positive=positive)
if self.final_model is None:
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.train_X, self.train_Y, test_size=self.test, random_state=self.seed, shuffle=True, stratify=self.stratify)
#for test/train
self.regr.fit(self.X_train, self.y_train)
self.predictions = self.regr.predict(self.X_test)
#print(y_test)
#print(self.predictions)
#print(self.y_test[self.ycol])
self.p, self.s = pearson_stat(self.y_test[self.ycol], self.predictions, print_out='no')
self.r2 = r_squared(self.y_test, self.predictions)
and the pearson_stat function look like the following,
def pearson_stat (x_data, y_data, print_out='no'):
import scipy.stats as ss
p = ss.pearsonr(x_data, y_data)
s = ss.spearmanr(x_data, y_data)
if print_out == 'yes':
print('Pearson rho = {:.4f}, P = {:.4g}'
.format(*p))
print('Spearman r = {:.4f}, P = {:.4g}'
.format(*s))
return p[0], s[0]
While this code perfectly works if I pass the x and y to pearson_stat function following way.
a, s = pearson_stat(ml_mods[seed].y_test['exp_val'],ml_mods[seed].y_test['exp_val'])
But if I now set the 'exp_val' from the class attribute, it doesn't work. gives me the following error.
a, s = pearson_stat(ml_mods[seed].y_test[ml_mods[seed].ycol],ml_mods[seed].y_test[ml_mods[seed].ycol])
xmean = x.mean(dtype=dtype)
File "../anaconda3/envs/py3/lib/python3.6/site-packages/numpy/core/_methods.py", line 160, in _mean
ret = umr_sum(arr, axis, dtype, out, keepdims)
TypeError: No loop matching the specified signature and casting was found for ufunc add
Can you help me understand this?
So after the direction from #hpaulj I realized that the way I call for the column resulted in a DataFrame rather than a Series. For Pearson calculation, I needed the data in form of Series (i.e., array-like).
type(ml_mods[seed].y_test[ml_mods[seed].ycol])
<class 'pandas.core.frame.DataFrame'>
type(ml_mods[seed].y_test['exp_val'])
<class 'pandas.core.series.Series'>

Using PyMC3 to compute ODE parameter posterior: Bad initial energy error

I am trying to sample the parameter posterior of an ODE's parameters using a Likelihood that has mean equal to the logarithm of those ODE solutions for a particular choice of parameter and initial value. This is based on the tutorial found here. I can replicate the tutorial, but can't make my model work. My model's ODE is:
dQ(t)/dt = (1/K)*(R(t) - Q(t))
where R(t) is based on rainfall data that I input.
I am assigning priors to the noise standard deviation \sigma, the initial value Q(0) and parameter K.
Any help on how to overcome the error would be much appreciated :)
This is my code:
from scipy.integrate import odeint
from scipy.interpolate import interp1d
import numpy as np
import pandas as pd
import theano
from theano import *
import pymc3 as pm
import theano.tensor as tt
THEANO_FLAGS='optimizer=fast_compile'
theano.config.exception_verbosity= 'high'
theano.config.floatX = 'float64'
n_states = 1
n_odeparams = 1
n_ivs = 1
class LinearReservoirModel(object):
def __init__(self, n_states, n_odeparams, n_ivs,net_rainfall_data, y0=None):
self._n_states = n_states
self._n_odeparams = n_odeparams
self._n_ivs = n_ivs
self._y0 = y0
self._nr = net_rainfall_data
def simulate(self, parameters, times):
return self._simulate(parameters, times, self._nr, False)
def simulate_with_sensitivities(self, parameters, times):
return self._simulate(parameters, times, self._nr, True)
def _simulate(self, parameters, times, net_rainfall_data, sensitivities):
k, q0 = [x for x in parameters]
# Interpolate net_rainfall
nr_int = interp1d(times, net_rainfall_data,fill_value="extrapolate",kind='slinear')
def r(q,time,k,nrint):
return (nrint(time) - q) * (1./k)
if sensitivities:
def jac(k):
ret = np.zeros((self._n_states, self._n_states))
ret[0, 0] = (-1./k)
return ret
def dfdp(x,t,k,nrint):
ret = np.zeros((self._n_states,
self._n_odeparams + self._n_ivs))
ret[0, 0] = (-1./(k**2)) * (nrint(t) - x)
return ret
def rhs(q_and_dqdp, t, k, nrint):
q = q_and_dqdp[0:self._n_states]
dqdp = q_and_dqdp[self._n_states:].reshape((self._n_states,
self._n_odeparams + self._n_ivs))
dqdt = r(q, t, k, nrint)
# print('jacobian',jac(q))
# print('dqdp',dqdp)
# print('dfdp',dfdp(q,t,nrint))
d_dqdp_dt = jac(k)*dqdp + dfdp(q,t,k,nrint) # CHANGED CODE HERE np.matmul(jac(q), dqdp) + dfdp(q,t,nrint)
return np.concatenate((dqdt, d_dqdp_dt.reshape(-1)))
y0 = np.zeros( (n_states*(n_odeparams+n_ivs)) + n_states ) # CHANGED CODE HERE 2*
y0[2] = 1. #\frac{\partial [X]}{\partial Xt0} at t==0, and same below for Y
y0[0:n_states] = q0
result = odeint(rhs, y0, times, (k,nr_int),rtol=1e-6,atol=1e-5)
values = result[:, 0:self._n_states]
dvalues_dp = result[:, self._n_states:].reshape((len(times),
self._n_states,
self._n_odeparams + self._n_ivs))
return values, dvalues_dp
else:
q = odeint(r,q0,times,args=(k,nr_int),rtol=1e-6,atol=1e-5)
q_flat = [item for sublist in q for item in sublist]
return q_flat
q = [0.01, 0.084788051,0.289827287,0.487426902,0.623592162,0.855202214,0.901709887,0.87936577,0.857067839,0.775516564,0.701725939,0.675138958,0.68101658,0.64644605,0.701305112,0.747128907,0.676039744,0.668502137,0.731464651,0.766588801]
nr = [1.618666063,0.0001,4.405308823,0.394073731,3.392555321,2.733285785,0.0001,1.31186209,0.0001,0.0001,0.0001,0.83074128,0.646141131,0.0001,2.405660466,0.0001,0.0001,1.174002978,1.481146447,0.73244669]
ode_model = LinearReservoirModel(n_states, n_odeparams, n_ivs, nr)
class ODEGradop(theano.Op):
def __init__(self, numpy_vsp):
self._numpy_vsp = numpy_vsp
def make_node(self, x, g):
x = theano.tensor.as_tensor_variable(x)
g = theano.tensor.as_tensor_variable(g)
node = theano.Apply(self, [x, g], [g.type()])
return node
def perform(self, node, inputs_storage, output_storage):
x = inputs_storage[0]
g = inputs_storage[1]
out = output_storage[0]
out[0] = self._numpy_vsp(x, g) # get the numerical VSP
class ODEop(theano.Op):
def __init__(self, state, numpy_vsp):
self._state = state
self._numpy_vsp = numpy_vsp
def make_node(self, x):
x = theano.tensor.as_tensor_variable(x)
return theano.Apply(self, [x], [x.type()])
def perform(self, node, inputs_storage, output_storage):
x = inputs_storage[0]
out = output_storage[0]
out[0] = self._state(x) # get the numerical solution of ODE states
def grad(self, inputs, output_grads):
x = inputs[0]
g = output_grads[0]
grad_op = ODEGradop(self._numpy_vsp) # pass the VSP when asked for gradient
grad_op_apply = grad_op(x, g)
return [grad_op_apply]
class solveCached(object):
def __init__(self, times, n_params, n_outputs):
self._times = times
self._n_params = n_params
self._n_outputs = n_outputs
self._cachedParam = np.zeros(n_params)
self._cachedSens = np.zeros((len(times), n_outputs, n_params))
self._cachedState = np.zeros((len(times),n_outputs))
def __call__(self, x):
if np.all(x==self._cachedParam):
state, sens = self._cachedState, self._cachedSens
else:
state, sens = ode_model.simulate_with_sensitivities(x, times)
return state, sens
times = np.arange(0, len(q)) # number of measurement points (see below)
cached_solver=solveCached(times, n_odeparams + n_ivs, n_states)
def state(x):
State, Sens = cached_solver(np.array(x,dtype=np.float64))
cached_solver._cachedState, cached_solver._cachedSens, cached_solver._cachedParam = State, Sens, x
return State.reshape((len(State),))
def numpy_vsp(x, g):
numpy_sens = cached_solver(np.array(x,dtype=np.float64))[1].reshape((n_states*len(times),len(x)))
return numpy_sens.T.dot(g)
# Define the data matrix
Q = np.vstack((q))
# Now instantiate the theano custom ODE op
my_ODEop = ODEop(state,numpy_vsp)
# The probabilistic model
with pm.Model() as LR_model:
# Priors for unknown model parameters
k = pm.Uniform('k', lower=0.01, upper=10)
# Priors for initial conditions and noise level
q0 = pm.Lognormal('q0', mu=np.log(1.2), sd=1)
sigma = pm.Lognormal('sigma', mu=-1, sd=1, shape=1)
# Forward model
all_params = pm.math.stack([k,q0],axis=0)
ode_sol = my_ODEop(all_params)
forward = ode_sol.reshape(Q.shape)
# log_forward = pm.math.log(forward)
# log_forward_print = tt.printing.Print('log_forward')(log_forward.shape)
# tt.printing.Print('sigma')(sigma.shape)
# Likelihood
Q_obs = pm.Lognormal('Q_obs', mu=pm.math.log(forward), sd=sigma, observed=Q)
print(LR_model.check_test_point())
# Y_obs_print = tt.printing.Print('Y_obs')(Y_obs)
trace = pm.sample(n_init=1500, tune=1000, chains=1, init='adapt_diag')
trace['diverging'].sum()
If you run the code above you should be able to reproduce the following error:
Traceback (most recent call last):
File "examples/myexample.py", line 195, in <module>
trace = pm.sample(1500, tune=1000, chains=1, init='adapt_diag')
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 457, in sample
trace = _sample_many(**sample_args)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 503, in _sample_many
step=step, random_seed=random_seed[i], **kwargs)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 544, in _sample
for it, strace in enumerate(sampling):
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/tqdm/std.py", line 1091, in __iter__
for obj in iterable:
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 633, in _iter_sample
point, states = step.step(point)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py", line 247, in step
apoint, stats = self.astep(array)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 144, in astep
raise SamplingError("Bad initial energy")
pymc3.exceptions.SamplingError: Bad initial energy
PyMC3 Version: 3.7
Theano Version: 1.0.4
Python Version: 3.6.5
Operating system: macOS Catalina (v10.15.1)
How did you install PyMC3: pip (managed in a pyenv virtualenv)

Categories

Resources