Add cross-entropy loss plot with focal loss

Add cross-entropy loss plot with focal loss - python

import numpy as np
from scipy import optimize
from scipy import special
class FocalLoss:
def __init__(self, gamma, alpha=None):
self.alpha = alpha
self.gamma = gamma
def at(self, y):
if self.alpha is None:
return np.ones_like(y)
return np.where(y, self.alpha, 1 - self.alpha)
def pt(self, y, p):
p = np.clip(p, 1e-15, 1 - 1e-15)
return np.where(y, p, 1 - p)
def __call__(self, y_true, y_pred):
at = self.at(y_true)
pt = self.pt(y_true, y_pred)
return -at * (1 - pt) ** self.gamma * np.log(pt)
def grad(self, y_true, y_pred):
y = 2 * y_true - 1 # {0, 1} -> {-1, 1}
at = self.at(y_true)
pt = self.pt(y_true, y_pred)
g = self.gamma
return at * y * (1 - pt) ** g * (g * pt * np.log(pt) + pt - 1)
def hess(self, y_true, y_pred):
y = 2 * y_true - 1 # {0, 1} -> {-1, 1}
at = self.at(y_true)
pt = self.pt(y_true, y_pred)
g = self.gamma
u = at * y * (1 - pt) ** g
du = -at * y * g * (1 - pt) ** (g - 1)
v = g * pt * np.log(pt) + pt - 1
dv = g * np.log(pt) + g + 1
return (du * v + u * dv) * y * (pt * (1 - pt))
def init_score(self, y_true):
res = optimize.minimize_scalar(
lambda p: self(y_true, p).sum(),
bounds=(0, 1),
method='bounded'
)
p = res.x
log_odds = np.log(p / (1 - p))
return log_odds
def lgb_obj(self, preds, train_data):
y = train_data.get_label()
p = special.expit(preds)
return self.grad(y, p), self.hess(y, p)
def lgb_eval(self, preds, train_data):
y = train_data.get_label()
p = special.expit(preds)
is_higher_better = False
return 'focal_loss', self(y, p).mean(), is_higher_better
import matplotlib
import matplotlib.pyplot as plt
from scipy import special
fig, ax = plt.subplots(figsize=(10, 7))
matplotlib.rc('font', size=14)
np.random.seed(10)
y = np.random.randint(2, size=500) # random 0s and 1s
for alpha in [.1, 0.25, .5, 0.65,.7]:
for gamma in [2]:
fl = FocalLoss(alpha=alpha, gamma=gamma)
ps = np.linspace(5e-2, 1 - 5e-2, 100)
ls = [fl(y, p).sum() for p in ps]
curve = ax.plot(ps, ls, label=r'$\alpha$ = %s, $\gamma$ = %s' % (alpha, gamma))[0]
p = special.expit(fl.init_score(y))
ax.axvline(p, color=curve.get_color(), linestyle='--')
ax.legend()
ax.grid()
ax.set_title('Obtained initialization constants')
ax.set_xlabel(r'$p$')
ax.set_ylabel('Focal loss value')
fig.savefig('focal_loss_min.png')
which gives me
I wanted to add cross-entropy loss as well in the same plot. But I couldn't find a way to add this. I have so far:
import numpy as np
import matplotlib.pyplot as plt
'''
Hypothesis Function - Sigmoid function
'''
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
'''
yHat represents the predicted value / probability value calculated as output of hypothesis / sigmoid function
y represents the actual label
'''
def cross_entropy_loss(yHat, y):
if y == 1:
return -np.log(yHat)
else:
return -np.log(1 - yHat)
How can I add cross entropy loss plot with focal loss plot? I am interested to compare them visually. Thanks!
Reference
https://maxhalford.github.io/blog/lightgbm-focal-loss/

Related

Moving Points with 1D Noise

I'd like to move points in X & Y with 1D Noise. To further clarify, I don't want each point to move by a unique random number, but rather a larger noise over the whole line with gradients moving the points. The Noise would serve as a multiplier for a move amount and would be a value between -1 and 1. For example, if the Noise value was 0.8, it would multiply the X & Y of points by the amount.
How would I go about this?
This is what I have so far (the black line is the original line). I think it's wrong, because the frequency is 1 but there appears to be multiple waves in the noise.
import numpy as np
import matplotlib.pyplot as plt
import random
import math
from enum import Enum
#PerlinNoise by alexandr-gnrk
class Interp(Enum):
LINEAR = 1
COSINE = 2
CUBIC = 3
class PerlinNoise():
def __init__(self,
seed, amplitude=1, frequency=1,
octaves=1, interp=Interp.COSINE, use_fade=False):
self.seed = random.Random(seed).random()
self.amplitude = amplitude
self.frequency = frequency
self.octaves = octaves
self.interp = interp
self.use_fade = use_fade
self.mem_x = dict()
def __noise(self, x):
# made for improve performance
if x not in self.mem_x:
self.mem_x[x] = random.Random(self.seed + x).uniform(-1, 1)
return self.mem_x[x]
def __interpolated_noise(self, x):
prev_x = int(x) # previous integer
next_x = prev_x + 1 # next integer
frac_x = x - prev_x # fractional of x
if self.use_fade:
frac_x = self.__fade(frac_x)
# intepolate x
if self.interp is Interp.LINEAR:
res = self.__linear_interp(
self.__noise(prev_x),
self.__noise(next_x),
frac_x)
elif self.interp is Interp.COSINE:
res = self.__cosine_interp(
self.__noise(prev_x),
self.__noise(next_x),
frac_x)
else:
res = self.__cubic_interp(
self.__noise(prev_x - 1),
self.__noise(prev_x),
self.__noise(next_x),
self.__noise(next_x + 1),
frac_x)
return res
def get(self, x):
frequency = self.frequency
amplitude = self.amplitude
result = 0
for _ in range(self.octaves):
result += self.__interpolated_noise(x * frequency) * amplitude
frequency *= 2
amplitude /= 2
return result
def __linear_interp(self, a, b, x):
return a + x * (b - a)
def __cosine_interp(self, a, b, x):
x2 = (1 - math.cos(x * math.pi)) / 2
return a * (1 - x2) + b * x2
def __cubic_interp(self, v0, v1, v2, v3, x):
p = (v3 - v2) - (v0 - v1)
q = (v0 - v1) - p
r = v2 - v0
s = v1
return p * x**3 + q * x**2 + r * x + s
def __fade(self, x):
# useful only for linear interpolation
return (6 * x**5) - (15 * x**4) + (10 * x**3)
x = np.linspace(10, 10, 20)
y = np.linspace(0, 10, 20)
seed = 10
gen_x = PerlinNoise(seed=seed, amplitude=5, frequency=1, octaves=1, interp=Interp.CUBIC, use_fade=True)
noise_x = np.array([gen_x.get(pos) for pos in y])
fig, ax = plt.subplots(1)
ax.set_aspect("equal")
ax.plot(x, y, linewidth=2, color="k")
ax.scatter(x, y, s=20, zorder=4, color="k")
ax.plot(x+noise_x, y, linewidth=2, color="blue")
ax.scatter(x+noise_x, y, s=80, zorder=4, color="red")
plt.show()
Thank you!

What is my problem with the implementation of multivariate_gauss pdf?

I use python to calculate the multivariate_gauss distribution, but I don't know what's wrong.
The code is here
# calculate multi-d gaussian pdf
def mul_gauss(x, mu, sigma) -> float:
d = len(x[0])
front = 1 / math.sqrt(((2 * math.pi) ** d) * np.linalg.det(sigma))
tmp = (np.array(x) - np.array(mu))
tmp_T = np.transpose(tmp)
back = -0.5 * (np.matmul(np.matmul(tmp, np.linalg.inv(sigma)), tmp_T))[0][0]
return front * math.exp(back)
I compared the result with scipy.stats.multivariate_normal(x,mu,sigma)
x = [[2,2]]
mu = [[4,4]]
sigma = [[3,0],[0,3]]
ret_1 = mul_gauss(x, mu, sigma)
ret_2 = scipy.stats.multivariate_normal(x[0], mu[0], sigma).pdf(x[0])
print('ret_1=',ret1)
print('ret_2=',ret2)
and output is
ret_1=0.013984262505331654
ret_2=0.03978873577297383
Could anyone help me?

In line 5 of your main you call the .pdf() on the object instead as a method.
Here is a fix:
# calculate multi-d gaussian pdf
import math
import numpy as np
from scipy import stats
def mul_gauss(x, mu, sigma) -> float:
d = x[0].shape[0]
coeff = 1/np.sqrt((2 * math.pi) ** d * np.linalg.det(sigma))
tmp = x - mu
exponent = -0.5 * (np.matmul(np.matmul(tmp, np.linalg.inv(sigma)), tmp.T))[0][0]
return coeff * math.exp(exponent)
x = np.array([[2,2]])
mu = np.array([[4,4]])
sigma = np.array([[3,0],[0,3]])
ret_1 = mul_gauss(x, mu, sigma)
ret_2 = stats.multivariate_normal.pdf(x[0], mu[0], sigma)
print('ret_1=',ret_1)
print('ret_2=',ret_2)
Output:
ret_1= 0.013984262505331654
ret_2= 0.013984262505331658
Cheers.

Log likelihood is going down as I train my Maximum Likelihood Estimation program

I implemented a Maximum Likelihood Estimation program. My likelihood usually goes up after each full batch epoch on the dataset.
I believe there is a logical error but I couldn't find it. Could someone more experienced help me on this. I am also open to improvements.
import scipy.io
import math
import numpy as np
dataset = scipy.io.loadmat('dataset.mat')
data = dataset['hog_features_train'] # Size is [2000, 324]
labels = dataset['superclass_labels_train'] # Size is [2000, 1]
NUMBER_OF_FEATURES = len(dataset['hog_features_train'][0]) # Is 324
# Initialize weights with last weight as bias
w = np.random.normal(0, 0.01, NUMBER_OF_FEATURES + 1)
# linear(𝐱) = 𝑏₀ + 𝑏₁𝑥₁ + ⋯ +
def linear(w, observation):
return np.dot(w[:NUMBER_OF_FEATURES], observation) + w[NUMBER_OF_FEATURES]
# sigmoid(𝐱) = 1 / (1 + exp(−𝐱)
def sigmoid(x):
return 1 / (1 + math.exp(-x))
# prob(𝐱) = 1 / (1 + exp(−linear(𝐱))
def prob(w, observation):
return sigmoid(linear(w, observation))
# LLF = Σᵢ(𝑦ᵢ log(prob(𝐱ᵢ)) + (1 − 𝑦ᵢ) log(1 − prob(𝐱ᵢ)))
def log_likelyhood(w, data, labels):
sum = 0
for i in range(len(data)):
sum += labels[i] * np.log(prob(w, data[i])) + (1 - labels[i]) * np.log(1 - prob(w, data[i]))
return sum
# NOTE: d/dx(log(1/(1 + e^(a * x + b)))) = -(a * e^(a * x + b))/(e^(a x + b) + 1)
def gradient(w, data, labels):
#Initialze gradient vector
gradient = np.zeros(len(w))
# For input coefficients
for i in range(len(w) - 1):
for j in range(len(data)):
power = math.exp(linear(w, data[j]))
gradient[i] += - labels[j] * data[j][i] * power / (1 + power) + (1 - labels[j]) * (1 + data[j][i] * power / (1 + power))
# Gradient term for bias
for j in range(len(data)):
power = math.exp(linear(w, data[j]))
gradient[len(w) - 1] += - labels[j] * power / (1 + power) + (1 - labels[j]) * (1 + power / (1 + power))
return gradient
LEARNING_RATE = 0.0001
EPOCH = 1000
# Calculate the LLF
likelyhood = log_likelyhood(w, data, labels)
print('likelyhood at the beginning: ', likelyhood)
# Gradient ascent algorithm
for i in range(EPOCH):
gradient1 = gradient(w, data, labels)
w += gradient1 * LEARNING_RATE
likelyhood = log_likelyhood(w, data, labels)
print('likelyhood after epoch', i + 1, ': ', likelyhood)
Dataset if you want to reproduce my results:
https://drive.google.com/open?id=1tCHXDnxql-_mEvEjFh4OVmev9N_Nreu5

your code is very slow, learn to use vectorization, and you are computing the derivatives wrong, I did not double check the code, but here is a vectorized version of your code with correct gradients, see how fast it runs this way :
import scipy.io
import math
import numpy as np
dataset = scipy.io.loadmat('dataset.mat')
data = dataset['hog_features_train'].astype('float64') # Size is [2000, 324]
bias_term = np.ones(shape=(2000,1))
data = np.concatenate(( bias_term , data), axis=1) # add bias term as an extra 1 in data features
labels = dataset['superclass_labels_train'].astype('float16') # Size is [2000, 1]
NUMBER_OF_FEATURES = data.shape[1] # Is 325
# Initialize weights with last weight as bias
w = np.random.normal(0, 0.01, NUMBER_OF_FEATURES)
# linear(𝐱) = 𝑏₀ + 𝑏₁𝑥₁ + ⋯ +
def linear(w, observation):
return np.matmul(observation,w)
# sigmoid(𝐱) = 1 / (1 + exp(−𝐱)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# prob(𝐱) = 1 / (1 + exp(−linear(𝐱))
def prob(w, observation):
return sigmoid(linear(w, observation))
# LLF = Σᵢ(𝑦ᵢ log(prob(𝐱ᵢ)) + (1 − 𝑦ᵢ) log(1 − prob(𝐱ᵢ)))
def log_likelyhood(w, data, labels):
return np.sum(prob(w, data))
# NOTE: d/dw(log(1/(1 + e^(-w * x + b)))) = x / (1 + e^(wx+b))
def gradient(w, data, labels):
#Initialze gradient vector
denom = (np.exp(linear(w, data)) + 1)
denom = np.expand_dims(denom, axis=1) # reshape from (2000,) to (2000, 1) for broadcasting
gradient = np.zeros_like(w)
gradient[1:] = np.sum((data[:, 1:] * labels) / denom, axis=0)
gradient[0] = np.sum(-1 / denom)
return gradient
LEARNING_RATE = 0.0001
EPOCH = 1000
# Calculate the LLF
likelyhood = log_likelyhood(w, data, labels)
print('likelyhood at the beginning: ', likelyhood)
# Gradient ascent algorithm
for i in range(EPOCH):
gradient1 = gradient(w, data, labels)
w += gradient1 * LEARNING_RATE
likelyhood = log_likelyhood(w, data, labels)
print('likelyhood after epoch', i + 1, ': ', likelyhood)

The issue seems to be that not all the parameters are updated simultaneously in the gradient() function.
import scipy.io
import math
import numpy as np
import matplotlib.pyplot as plt
# Load the data
dataset = scipy.io.loadmat('dataset.mat')
# Extract the feature matrix
data = dataset['hog_features_train'] # Size is [2000, 324]
# Extract the labels
labels = dataset['superclass_labels_train'] # Size is [2000, 1]
# Extract the number of features
NUMBER_OF_FEATURES = data.shape[1] # Is 324
# Initialize weights with last weight as bias
w = np.random.normal(0, 0.01, NUMBER_OF_FEATURES + 1)
# linear(𝐱) = 𝑏₀ + 𝑏₁𝑥₁ + ⋯ +
def linear(w, observation):
return np.dot(w, np.hstack([observation, 1]))
# sigmoid(𝐱) = 1 / (1 + exp(−𝐱)
def sigmoid(x):
return 1 / (1 + math.exp(-x))
# prob(𝐱) = 1 / (1 + exp(−linear(𝐱))
def prob(w, observation):
return sigmoid(linear(w, observation))
# LLF = Σᵢ(𝑦ᵢ log(prob(𝐱ᵢ)) + (1 − 𝑦ᵢ) log(1 − prob(𝐱ᵢ)))
def log_likelihood(w, data, labels):
sum = 0
for i in range(len(data)):
sum += labels[i] * np.log(prob(w, data[i, :])) + (1 - labels[i]) * np.log(1 - prob(w, data[i, :]))
return sum
# gradient = Σᵢ 𝐱ᵢ (𝑦ᵢ - prob(𝐱ᵢ))
def gradient(w, data, labels):
# Initialize gradient vector
gradient = np.zeros(len(w))
# Update gradient vector
for i in range(len(data)):
gradient += np.hstack([data[i, :], 1]) * (labels[i] - prob(w, data[i, :]))
return gradient
LEARNING_RATE = 0.0001
EPOCH = 1000
# Calculate the LLF
loglikelihood = [log_likelihood(w, data, labels)[0]]
print('loglikelihood at the beginning: ', loglikelihood[0])
# Run the gradient ascent algorithm
for i in range(EPOCH):
gradient1 = gradient(w, data, labels)
w += gradient1 * LEARNING_RATE
LLF = log_likelihood(w, data, labels)[0]
loglikelihood.append(LLF)
print('likelihood after epoch', i + 1, ': ', LLF)
# Plot the loglikelihood
plt.plot(np.arange(1 + EPOCH), loglikelihood)
plt.xlabel('Epoch')
plt.ylabel('Loglikelihood')
plt.show()

Nullcline Plot for Nonlinear System of ODEs

I am attempting to plot the nullcline (steady state) curves of the Oregonator model to assert the existence of a limit cycle by applying the Poincare-Bendixson Theorem. I am close, but for some reason the plot that is produced shows two straight lines. I think it has something to do with the plotting stage. Any ideas?
Also any hints for how to construct a quadrilateral to apply the theorem with would be most appreciated.
Code:
import numpy as np
import matplotlib.pyplot as plt
# Dimensionless parameters
eps = 0.04
q = 0.0008
f = 1
# Oregonator model as numpy array
def Sys(Y, t = 0):
return np.array((Y[0] * (1 - Y[0] - ((Y[0] - q) * f * Y[1]) / (Y[0] + q)) / eps, Y[0] - Y[1] ))
# Oregonator model steady states
def g(x,z):
return (x * (1 - x) + ((q - x) * f * z) / (q + x)) / eps
def h(x,z):
return x - z
# Initial lists containing values
x = []
z = []
def sys(iv1, iv2, dt, time):
# initial values:
x.append(iv1)
z.append(iv2)
# Compute and fill lists
for i in range(time):
x.append(x[i] + (g(x[i],z[i])) * dt)
z.append(z[i] + (h(x[i],z[i])) * dt)
return x, z
sys(1, 0.5, 0.01, 30)
# Locate and find equilibrium points
eqp = []
def find_fixed_points(r):
for x in range(r):
for z in range(r):
if ((g(x, z) == 0) and (h(x, z) == 0)):
eqp.append((x,z))
return eqp
# Plot nullclines
plt.plot([0,2],[2,0], 'r-', lw=2, label='x-nullcline')
plt.plot([1,1],[0,2], 'b-', lw=2, label='z-nullcline')
# Plot equilibrium points
for point in eqp:
plt.plot(point[0],point[1],"red", marker = "o", markersize = 10.0)
plt.legend(loc='best')
x = np.linspace(0, 2, 20)
z = np.linspace(0, 2, 20)
X1 , Z1 = np.meshgrid(x, z) # Create a grid
DX1, DZ1 = Sys([X1, Z1]) # Compute reaction rate on the grid
M = (np.hypot(DX1, DZ1)) # Norm reaction rate
M[ M == 0] = 1. # Avoid zero division errors
DX1 /= M # Normalise each arrows
DZ1 /= M
plt.quiver(X1, Z1, DX1, DZ1, M, pivot='mid')
plt.xlabel("x(\u03C4)")
plt.ylabel("z(\u03C4)")
plt.legend()
plt.grid()
plt.show()

Python: Gibbs sampler for regression model

I am trying to write a function for Gibbs sampler in the Bayesian framework. I got the code from this [website][1], which is a straightforward regression model. However, I am tackling a more complicated model which is: y= beta0 + beta1* x + x^gamma * sigma * epsilon where sigma is the variance of the model. That means I need to estimate p(beta0|y,x,beta1,sigma,gamma) and so on(in the Gibbs sampler method). my question is how should I modify the code to sample beta0, beta1 and other variables as there are extra variables to condition on.
My codes are:
import numpy as np
import pymc as pm
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
plt.rcParams['figure.figsize'] = (10, 5)
conda install -c conda-forge pymc3=3.0
def sample_beta_0(y, x, beta_1, sigma, gamma, mu_0, tau_0):
N = len(y)
assert len(x) == N
tau_i = 1/((x**gamma)*sigma)**2
precision = tau_0 + sum(tau_i)
mean = tau_0 * mu_0 + np.sum((y - beta_1 * x)*tau_i)
mean /= precision
return np.random.normal(mean, 1 / np.sqrt(precision))
def sample_beta_1(y, x, beta_0, sigma, mu_1, sigma_1):
N = len(y)
assert len(x) == N
precision = sigma_1 + sigma * np.sum(x * x)
mean = sigma_1 * mu_1 + sigma * np.sum( (y - beta_0) * x)
mean /= precision
return np.random.normal(mean, 1 / np.sqrt(precision))
def sample_sigma(y, x, beta_0, beta_1, alpha, beta):
N = len(y)
alpha_new = alpha + N / 2
resid = y - beta_0 - beta_1 * x
beta_new = beta + np.sum(resid * resid) / 2
return np.random.gamma(alpha_new, 1 / beta_new)
beta_0_true = -1
beta_1_true = 2
sigma_true = 1
N = 50
x = np.random.uniform(low=0, high=4, size=N)
y = np.random.normal(beta_0_true + beta_1_true * x, 1 / np.sqrt(sigma_true))
synth_plot = plt.plot(x, y, "o")
plt.xlabel("x")
plt.ylabel("y")
# print('Y are', y)
# print('X are', x)
plt.show()
"""GIBBS Sampler"""
# specify initial values
init = {"beta_0": 0,
"beta_1": 0,
"sigma": 2}
# specify hyper parameters
hypers = {"mu_0": 0,
"sigma_0": 1,
"mu_1": 0,
"sigma_1": 1,
"alpha": 2,
"beta": 1}
def gibbs(y, x, iters, init, hypers):
assert len(y) == len(x)
beta_0 = init["beta_0"]
beta_1 = init["beta_1"]
sigma = init["sigma"]
trace = np.zeros((iters, 3)) # trace to store values of beta_0, beta_1, sigma
for it in range(iters):
beta_0 = sample_beta_0(y, x, beta_1, sigma, hypers["mu_0"], hypers["sigma_0"])
beta_1 = sample_beta_1(y, x, beta_0, sigma, hypers["mu_1"], hypers["sigma_1"])
sigma = sample_sigma(y, x, beta_0, beta_1, hypers["alpha"], hypers["beta"])
trace[it, :] = np.array((beta_0, beta_1, sigma))
trace = pd.DataFrame(trace)
trace.columns = ['beta_0', 'beta_1', 'sigma']
print(trace)
return trace
iters = 1000
trace = gibbs(y, x, iters, init, hypers)
traceplot = trace.plot()
traceplot.set_xlabel("Iteration")
traceplot.set_ylabel("Parameter value")
trace_burnt = trace[500:999]
hist_plot = trace_burnt.hist(bins = 30, layout = (1,3))
print(trace_burnt.median())
print(trace_burnt.std())
I know is really long but please help!

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Add cross-entropy loss plot with focal loss - python

Related

Moving Points with 1D Noise

What is my problem with the implementation of multivariate_gauss pdf?

Log likelihood is going down as I train my Maximum Likelihood Estimation program

Nullcline Plot for Nonlinear System of ODEs

Python: Gibbs sampler for regression model

Categories

Resources