Cannot pass one test case using Bellman Ford Algorithm - python

Problem Description
Task. Given an directed graph with possibly negative edge weights and with 𝑛 vertices and 𝑚 edges, check
whether it contains a cycle of negative weight.
Input Format. A graph is given in the standard format.
Constraints. 1 ≤ 𝑛 ≤ 103
, 0 ≤ 𝑚 ≤ 104
, edge weights are integers of absolute value at most 103
.
Output Format. Output 1 if the graph contains a cycle of negative weight and 0 otherwise.
Here is my solution:
import sys
def negative_cycle(adj, cost):
#write your code here
distance = [float('inf')] * len(adj)
distance[0] = 0
edges = []
for i in range(len(adj)):
for j in adj[i]:
edges.append([i,j])
for _ in range(len(adj)-1):
for i in edges:
a = i[0]
b = adj[a].index(i[1])
if distance[i[1]] > distance[a] + cost[a][b] and distance[a] != float('inf'):
distance[i[1]] = distance[a] + cost[a][b]
for i in edges:
a, b = i[0], adj[i[0]].index(i[1])
if distance[i[1]] > distance[a] + cost[a][b] and distance[a] != float('inf'):
return 1
return 0
if __name__ == '__main__':
input = sys.stdin.read()
data = list(map(int, input.split()))
n, m = data[0:2]
data = data[2:]
edges = list(zip(zip(data[0:(3 * m):3], data[1:(3 * m):3]), data[2:(3 * m):3]))
data = data[3 * m:]
adj = [[] for _ in range(n)]
cost = [[] for _ in range(n)]
for ((a, b), w) in edges:
adj[a - 1].append(b - 1)
cost[a - 1].append(w)
print(negative_cycle(adj, cost))
My code works for most test cases but fails on one test case.
Failed case #12/19: Wrong answer
(Time used: 0.23/10.00, memory used: 14229504/2147483648.)
The input format is
Number of vertices, number of edges
Vertice 1, vertice 2, weight of edge
...
...
... For all edges
May I know whats the error in this code?

cost[a - 1].append(w)
Here, you're not taking the cost of the edges properly. It should be cost[a - 1][b - 1] = w, the cost of the edge going from A -> B.

Related

minimum spanning tree remove all extra edges

Extra edge- edge made with 2 points, where each point is connected with another edge.
I want to disconnect MST by deleting these edges.
What is the best approach to minimize the weight of new disconnected MST,
or in what order should I delete these edges(deleting one could affect the other)?
My approach is to delete the biggest weight extra edges first?
https://prnt.sc/1xq1msp
In this case, removing 7(CD)-> no more edges could be deleted.
But you could also remove B-C, and then remove D-E which is better solution
Here’s an exact solution with NumPy/SciPy/OR-Tools that uses a kd-tree
to enumerate a sparse subset of edges that can possibly be included in
an optimal solution, then formulates and solves a mixed integer program.
Not sure it will scale to your needs though; you could set setting a gap
limit if you’re willing to settle for an approximation.
import collections
import numpy
import scipy.spatial
from ortools.linear_solver import pywraplp
def min_edge_cover(points):
# Enumerate the candidate edges.
candidate_edges = set()
tree = scipy.spatial.KDTree(points)
min_distances = numpy.ndarray(len(points))
for i, p in enumerate(points):
if i % 1000 == 0:
print(i)
distances, indexes = tree.query(p, k=2)
# Ignore p itself.
d, j = (
(distances[1], indexes[1])
if indexes[0] == i
else (distances[0], indexes[0])
)
candidate_edges.add((min(i, j), max(i, j)))
min_distances[i] = d
for i, p in enumerate(points):
if i % 1000 == 0:
print(i)
# An edge is profitable only if it's shorter than the sum of the
# distance from each of its endpoints to that endpoint's nearest
# neighbor.
indexes = tree.query_ball_point(p, 2 * min_distances[i])
for j in indexes:
if i == j:
continue
discount = (
min_distances[i] + min_distances[j]
) - scipy.spatial.distance.euclidean(points[i], points[j])
if discount >= 0:
candidate_edges.add((min(i, j), max(i, j)))
candidate_edges = sorted(candidate_edges)
# Formulate and solve a mixed integer program to find the minimum distance
# edge cover. There's a way to do this with general weighted matching, but
# OR-Tools doesn't expose that library yet.
solver = pywraplp.Solver.CreateSolver("SCIP")
objective = 0
edge_variables = []
coverage = collections.defaultdict(lambda: 0)
for i, j in candidate_edges:
x = solver.BoolVar("x{}_{}".format(i, j))
objective += scipy.spatial.distance.euclidean(points[i], points[j]) * x
coverage[i] += x
coverage[j] += x
edge_variables.append(x)
solver.Minimize(objective)
for c in coverage.values():
solver.Add(c >= 1)
solver.EnableOutput()
assert solver.Solve() == pywraplp.Solver.OPTIMAL
return {e for (e, x) in zip(candidate_edges, edge_variables) if x.solution_value()}
def random_point():
return complex(random(), random())
def test(points, graphics=False):
cover = min_edge_cover(points)
if not graphics:
return
with open("out.ps", "w") as f:
print("%!PS", file=f)
print(0, "setlinewidth", file=f)
inch = 72
scale = 7 * inch
print((8.5 * inch - scale) / 2, (11 * inch - scale) / 2, "translate", file=f)
for x, y in points:
print(scale * x, scale * y, 1, 0, 360, "arc", "fill", file=f)
for i, j in cover:
xi, yi = points[i]
xj, yj = points[j]
print(
scale * xi,
scale * yi,
"moveto",
scale * xj,
scale * yj,
"lineto",
file=f,
)
print("stroke", file=f)
print("showpage", file=f)
test(numpy.random.rand(100, 2), graphics=True)
test(numpy.random.rand(10000, 2))

igraph barabasi model with the possibility of a 2-way directed edge(in generation linking existing node to a future node)

I need to create a network where the graph generated can link an existing node (the node being generated) to one or more of the future nodes (the nodes that are going to be generated i.e. node_ID_future > node_ID_current) and not only to already generated nodes.
import igraph
N = 50
degree_mean = 5
m = list(np.random.poisson(degree_mean,N))
g = Graph.Barabasi(N, m, outpref=True, directed=True, power=1, zero_appeal=1, implementation="psumtree", start_from=None)
Below was my original code/model but I couldnt get it to run as my python.exe crashed every time. Im trying to get a model like the Barabasi model but is not limited to having directed edges only from a node of higher number to lower ones(ie node 2 can only have edges to node 1 or 0 {2->1 or 2-> 0} and node 20 to nodes lower than 20 but I need a directed node from eg 2-> 19 or 4->35)
N = 20
in_k = 3
out_k = in_k
indegree = np.random.poisson(in_k,N) #degrees for each node
outdegree = np.random.poisson(out_k,N) #degrees for each node
#lets create a storage array for the ID and col columns
columns = 3
shape = (N,columns)
a = zeros(shape)
ID = list(range(0, N, 1))
#Assigning the ID to col 1 and the degree to col 2
for k in range(0,len(ID)):
a[k,0] = ID[k]
a[k,1] = outdegree[k]
a[k,2] = indegree[k]
def return_vertices(deg1_indices,deg2_indices,unused_indices,required_out): #function to choose edges based on degree and probability
out_indices = zeros(required_out)
pvals = [0.42,0.33,0.25] #probability of joining to 1deg, 2deg, or no degree node
iterations = int(required_out)
for h in range(0,iterations):
chosen_type = np.where(np.random.multinomial(1,pvals))[0][0] #choose type from probabilities
if (chosen_type < 1):
type_options = len(deg1_indices)
and_the_winner_is = np.random.randint(type_options) #choose random vertex of type selected
out_indices[h] = (deg1_indices[np.random.randint(type_options)])
if (chosen_type < 2) & (chosen_type >= 1):
type_options = len(deg2_indices)
and_the_winner_is = np.random.randint(type_options)
out_indices[h] = (deg2_indices[np.random.randint(type_options)])
if (chosen_type > 1):
type_options = len(unused_indices)
and_the_winner_is = np.random.randint(type_options)
out_indices[h] = (unused_indices[np.random.randint(type_options)])
return out_indices
#now we need to set up an adjacency matrix with the in and out degrees
#also i would like to add a higher probability of an edge Eji existing if there is an edge Eij
#Blank matrix for storage
A = zeros([N,N])
for i in range(0,N): #loop through vertices using rows as index
#for j in range(0,N):
if (A[i].sum() < a[i,1]): #if vertex needs outgoing edges
if (A[:,i].sum() < 1): #if vertex is not receiving any edges, pick vertices at random
edges_to_add = a[i,1] - A[i].sum()
Column_selected = np.random.randint(N,size=edges_to_add)
A[i,Column_selected] = 1
if (A[:,i].sum() >= 1): #if vertex is receiving edges, higher probability of returning an edge based on degree
edges_to_add = a[i,1] - A[i].sum()
deg1_indices = nonzero(A[:,i]) #find 1st degree verts
for l in range(0,len(deg1_indices)): #find 2nd degree verts
deg2_indices = nonzero(A[:,deg1_indices[l]])
#non_degree_count = N - len(deg1_indices) - len(deg2_indices)
print(type(deg1_indices[np.random.randint(1)]))
#V_options = array([[zeros(len(deg1_indices))],[zeros(non_degree_count)]])
used_indices = array([deg1_indices + deg2_indices])
fitter_used = [None] * (N + 1)
fitter_ID = [None] * (N + 1)
for x in range(0, len(used_indices)):
fitter_used[x] = used_indices[x]
for y in range(0, len(ID)):
fitter_ID[y] = ID[y]
unused_indices = setxor1d(fitter_ID,fitter_used)
#unused_indices = [2,5,9]
the_chosen_ones = return_vertices(deg1_indices,deg2_indices,unused_indices,edges_to_add)
for m in range(0,len(the_chosen_ones)):
A[i,the_chosen_ones[m]] = 1
#print(A)
conn_indices = np.where(A)
edges = zip(*conn_indices)
Im going to be working on this for the next week, so hopefully I will be able to post my own answer in the next couple of days.

Adaptive mesh refinement - Python

i'm currently incredibly stuck on what isn't working in my code and have been staring at it for hours. I have created some functions to approximate the solution to the laplace equation adaptively using the finite element method then estimate it's error using the dual weighted residual. The error function should give a vector of errors (one error for each element), i then choose the biggest errors, add more elements around them, solve again and then recheck the error; however i have no idea why my error estimate isn't changing!
My first 4 functions are correct but i will include them incase someone wants to try the code:
def Poisson_Stiffness(x0):
"""Finds the Poisson equation stiffness matrix with any non uniform mesh x0"""
x0 = np.array(x0)
N = len(x0) - 1 # The amount of elements; x0, x1, ..., xN
h = x0[1:] - x0[:-1]
a = np.zeros(N+1)
a[0] = 1 #BOUNDARY CONDITIONS
a[1:-1] = 1/h[1:] + 1/h[:-1]
a[-1] = 1/h[-1]
a[N] = 1 #BOUNDARY CONDITIONS
b = -1/h
b[0] = 0 #BOUNDARY CONDITIONS
c = -1/h
c[N-1] = 0 #BOUNDARY CONDITIONS: DIRICHLET
data = [a.tolist(), b.tolist(), c.tolist()]
Positions = [0, 1, -1]
Stiffness_Matrix = diags(data, Positions, (N+1,N+1))
return Stiffness_Matrix
def NodalQuadrature(x0):
"""Finds the Nodal Quadrature Approximation of sin(pi x)"""
x0 = np.array(x0)
h = x0[1:] - x0[:-1]
N = len(x0) - 1
approx = np.zeros(len(x0))
approx[0] = 0 #BOUNDARY CONDITIONS
for i in range(1,N):
approx[i] = math.sin(math.pi*x0[i])
approx[i] = (approx[i]*h[i-1] + approx[i]*h[i])/2
approx[N] = 0 #BOUNDARY CONDITIONS
return approx
def Solver(x0):
Stiff_Matrix = Poisson_Stiffness(x0)
NodalApproximation = NodalQuadrature(x0)
NodalApproximation[0] = 0
U = scipy.sparse.linalg.spsolve(Stiff_Matrix, NodalApproximation)
return U
def Dualsolution(rich_mesh,qoi_rich_node): #BOUNDARY CONDITIONS?
"""Find Z from stiffness matrix Z = K^-1 Q over richer mesh"""
K = Poisson_Stiffness(rich_mesh)
Q = np.zeros(len(rich_mesh))
Q[qoi_rich_node] = 1.0
Z = scipy.sparse.linalg.spsolve(K,Q)
return Z
My error indicator function takes in an approximation Uh, with the mesh it is solved over, and finds eta = (f - Bu)z.
def Error_Indicators(Uh,U_mesh,Z,Z_mesh,f):
"""Take in U, Interpolate to same mesh as Z then solve for eta vector"""
u_inter = interp1d(U_mesh,Uh) #Interpolation of old mesh
U2 = u_inter(Z_mesh) #New function u for the new mesh to use in
Bz = Poisson_Stiffness(Z_mesh)
Bz = Bz.tocsr()
eta = np.empty(len(Z_mesh))
for i in range(len(Z_mesh)):
for j in range(len(Z_mesh)):
eta[i] += (f[i] - Bz[i,j]*U2[j])
for i in range(len(Z)):
eta[i] = eta[i]*Z[i]
return eta
My next function seems to adapt the mesh very well to the given error indicator! Just no idea why the indicator seems to stay the same regardless?
def Mesh_Refinement(base_mesh,tolerance,refinement,z_mesh,QOI_z_mesh):
"""Solve for U on a normal mesh, Take in Z, Find error indicators, adapt. OUTPUT NEW MESH"""
New_mesh = base_mesh
Z = Dualsolution(z_mesh,QOI_z_mesh) #Solve dual solution only once
f = np.empty(len(z_mesh))
for i in range(len(z_mesh)):
f[i] = math.sin(math.pi*z_mesh[i])
U = Solver(New_mesh)
eta = Error_Indicators(U,base_mesh,Z,z_mesh,f)
while max(abs(k) for k in eta) > tolerance:
orderedeta = np.sort(eta) #Sort error indicators LENGTH 40
biggest = np.flipud(orderedeta[int((1-refinement)*len(eta)):len(eta)])
position = np.empty(len(biggest))
ratio = float(len(New_mesh))/float(len(z_mesh))
for i in range(len(biggest)):
position[i] = eta.tolist().index(biggest[i])*ratio #GIVES WHAT NUMBER NODE TO REFINE
refine = np.zeros(len(position))
for i in range(len(position)):
refine[i] = math.floor(position[i])+0.5 #AT WHAT NODE TO PUT NEW ELEMENT 5.5 ETC
refine = np.flipud(sorted(set(refine)))
for i in range(len(refine)):
New_mesh = np.insert(New_mesh,refine[i]+0.5,(New_mesh[refine[i]+0.5]+New_mesh[refine[i]-0.5])/2)
U = Solver(New_mesh)
eta = Error_Indicators(U,New_mesh,Z,z_mesh,f)
print eta
An example input for this would be:
Mesh_Refinement(np.linspace(0,1,3),0.1,0.2,np.linspace(0,1,60),20)
I understand there is alot of code here but i am at a loss, i have no idea where to turn!
Please consider this piece of code from def Error_Indicators:
eta = np.empty(len(Z_mesh))
for i in range(len(Z_mesh)):
for j in range(len(Z_mesh)):
eta[i] = (f[i] - Bz[i,j]*U2[j])
Here you override eta[i] each j iteration, so the inner cycle proves useless and you can go directly to the last possible j. Did you mean to find a sum of the (f[i] - Bz[i,j]*U2[j]) series?
eta = np.empty(len(Z_mesh))
for i in range(len(Z_mesh)):
for j in range(len(Z_mesh)):
eta[i] += (f[i] - Bz[i,j]*U2[j])

Perceptron problems

I am trying to make a training set of data points by making a line (perceptron) f and making the points on one side +1 and -1 on the other. Then making a new line g and trying to get it as close to f as possible by updating with w = w+ y(t)x(t) where w is weights and y(t) is +1,-1 and x(t) is coordinates of a missclassified point. after implementing this tho i am not getting a very good fit from g to f. here is my code and some sample outputs.
import random
random.seed()
points = [ [1, random.randint(-25, 25), random.randint(-25,25), 0] for k in range(1000)]
weights = [.1,.1,.1]
misclassified = []
############################################################# Function f
interceptf = (0,random.randint(-5,5))
slopef = (random.randint(-10, 10),random.randint(-10,10))
point1f = ((interceptf[0] + slopef[0]),(interceptf[1] + slopef[1]))
point2f = ((interceptf[0] - slopef[0]),(interceptf[1] - slopef[1]))
############################################################# Function G starting
interceptg = (-weights[0],weights[2])
slopeg = (-weights[1],weights[2])
point1g = ((interceptg[0] + slopeg[0]),(interceptg[1] + slopeg[1]))
point2g = ((interceptg[0] - slopeg[0]),(interceptg[1] - slopeg[1]))
#############################################################
def isLeft(a, b, c):
return ((b[0] - a[0])*(c[1] - a[1]) - (b[1] - a[1])*(c[0] - a[0])) > 0
for i in points:
if isLeft(point1f,point2f,i):
i[3]=1
else:
i[3]=-1
for i in points:
if (isLeft(point1g,point2g,i)) and (i[3] == -1):
misclassified.append(i)
if (not isLeft(point1g,point2g,i)) and (i[3] == 1):
misclassified.append(i)
print len(misclassified)
while misclassified:
first = misclassified[0]
misclassified.pop(0)
a = [first[0],first[1],first[2]]
b = first[3]
a[:] = [x*b for x in a]
weights = [(x + y) for x, y in zip(weights,a)]
interceptg = (-weights[0],weights[2])
slopeg = (-weights[1],weights[2])
point1g = ((interceptg[0] + slopeg[0]),(interceptg[1] + slopeg[1]))
point2g = ((interceptg[0] - slopeg[0]),(interceptg[1] - slopeg[1]))
check = 0
for i in points:
if (isLeft(point1g,point2g,i)) and (i[3] == -1):
check += 1
if (not isLeft(point1g,point2g,i)) and (i[3] == 1):
check += 1
print weights
print check
117 <--- number of original missclassifieds with g
[-116.9, -300.9, 190.1] <--- final weights
617 <--- number of original missclassifieds with g after algorithm
956 <--- number of original missclassifieds with g
[-33.9, -12769.9, -572.9] <--- final weights
461 <--- number of original missclassifieds with g after algorithm
There are at least few problems with your algorithm:
Your "while" conditions is wrong - the perceptron learning is not about iterating once through all misclassified points as you do now. The algorithm should iterate through all the points for as long as any of them is missclassified. In particular - each update can make some correctly classified point as the wrong one, so you have to always iterate through all of them and check if everything is fine.
I am pretty sure that what you actually wanted is update rule in form of (y(i)-p(i))x(i) where p(i) is predicted label and y(i) is a true label (but this obviously degenrates to your method if you only update misclassifieds)

Python Implementation of Viterbi Algorithm

I'm doing a Python project in which I'd like to use the Viterbi Algorithm. Does anyone know of a complete Python implementation of the Viterbi algorithm? The correctness of the one on Wikipedia seems to be in question on the talk page. Does anyone have a pointer?
Here's mine. Its paraphrased directly from the psuedocode implemenation from wikipedia. It uses numpy for conveince of their ndarray but is otherwise a pure python3 implementation.
import numpy as np
def viterbi(y, A, B, Pi=None):
"""
Return the MAP estimate of state trajectory of Hidden Markov Model.
Parameters
----------
y : array (T,)
Observation state sequence. int dtype.
A : array (K, K)
State transition matrix. See HiddenMarkovModel.state_transition for
details.
B : array (K, M)
Emission matrix. See HiddenMarkovModel.emission for details.
Pi: optional, (K,)
Initial state probabilities: Pi[i] is the probability x[0] == i. If
None, uniform initial distribution is assumed (Pi[:] == 1/K).
Returns
-------
x : array (T,)
Maximum a posteriori probability estimate of hidden state trajectory,
conditioned on observation sequence y under the model parameters A, B,
Pi.
T1: array (K, T)
the probability of the most likely path so far
T2: array (K, T)
the x_j-1 of the most likely path so far
"""
# Cardinality of the state space
K = A.shape[0]
# Initialize the priors with default (uniform dist) if not given by caller
Pi = Pi if Pi is not None else np.full(K, 1 / K)
T = len(y)
T1 = np.empty((K, T), 'd')
T2 = np.empty((K, T), 'B')
# Initilaize the tracking tables from first observation
T1[:, 0] = Pi * B[:, y[0]]
T2[:, 0] = 0
# Iterate throught the observations updating the tracking tables
for i in range(1, T):
T1[:, i] = np.max(T1[:, i - 1] * A.T * B[np.newaxis, :, y[i]].T, 1)
T2[:, i] = np.argmax(T1[:, i - 1] * A.T, 1)
# Build the output, optimal model trajectory
x = np.empty(T, 'B')
x[-1] = np.argmax(T1[:, T - 1])
for i in reversed(range(1, T)):
x[i - 1] = T2[x[i], i]
return x, T1, T2
I found the following code in the example repository of Artificial Intelligence: A Modern Approach. Is something like this what you're looking for?
def viterbi_segment(text, P):
"""Find the best segmentation of the string of characters, given the
UnigramTextModel P."""
# best[i] = best probability for text[0:i]
# words[i] = best word ending at position i
n = len(text)
words = [''] + list(text)
best = [1.0] + [0.0] * n
## Fill in the vectors best, words via dynamic programming
for i in range(n+1):
for j in range(0, i):
w = text[j:i]
if P[w] * best[i - len(w)] >= best[i]:
best[i] = P[w] * best[i - len(w)]
words[i] = w
## Now recover the sequence of best words
sequence = []; i = len(words)-1
while i > 0:
sequence[0:0] = [words[i]]
i = i - len(words[i])
## Return sequence of best words and overall probability
return sequence, best[-1]
Hmm I can post mine. Its not pretty though, please let me know if you need clarification. I wrote this relatively recently for specifically part of speech tagging.
class Trellis:
trell = []
def __init__(self, hmm, words):
self.trell = []
temp = {}
for label in hmm.labels:
temp[label] = [0,None]
for word in words:
self.trell.append([word,copy.deepcopy(temp)])
self.fill_in(hmm)
def fill_in(self,hmm):
for i in range(len(self.trell)):
for token in self.trell[i][1]:
word = self.trell[i][0]
if i == 0:
self.trell[i][1][token][0] = hmm.e(token,word)
else:
max = None
guess = None
c = None
for k in self.trell[i-1][1]:
c = self.trell[i-1][1][k][0] + hmm.t(k,token)
if max == None or c > max:
max = c
guess = k
max += hmm.e(token,word)
self.trell[i][1][token][0] = max
self.trell[i][1][token][1] = guess
def return_max(self):
tokens = []
token = None
for i in range(len(self.trell)-1,-1,-1):
if token == None:
max = None
guess = None
for k in self.trell[i][1]:
if max == None or self.trell[i][1][k][0] > max:
max = self.trell[i][1][k][0]
token = self.trell[i][1][k][1]
guess = k
tokens.append(guess)
else:
tokens.append(token)
token = self.trell[i][1][token][1]
tokens.reverse()
return tokens
I have just corrected the pseudo implementation of Viterbi in Wikipedia. From the initial (incorrect) version, it took me a while to figure out where I was going wrong but I finally managed it, thanks partly to Kevin Murphy's implementation of the viterbi_path.m in the MatLab HMM toolbox.
In the context of an HMM object with variables as shown:
hmm = HMM()
hmm.priors = np.array([0.5, 0.5]) # pi = prior probs
hmm.transition = np.array([[0.75, 0.25], # A = transition probs. / 2 states
[0.32, 0.68]])
hmm.emission = np.array([[0.8, 0.1, 0.1], # B = emission (observation) probs. / 3 obs modes
[0.1, 0.2, 0.7]])
The Python function to run Viterbi (best-path) algorithm is below:
def viterbi (self,observations):
"""Return the best path, given an HMM model and a sequence of observations"""
# A - initialise stuff
nSamples = len(observations[0])
nStates = self.transition.shape[0] # number of states
c = np.zeros(nSamples) #scale factors (necessary to prevent underflow)
viterbi = np.zeros((nStates,nSamples)) # initialise viterbi table
psi = np.zeros((nStates,nSamples)) # initialise the best path table
best_path = np.zeros(nSamples); # this will be your output
# B- appoint initial values for viterbi and best path (bp) tables - Eq (32a-32b)
viterbi[:,0] = self.priors.T * self.emission[:,observations(0)]
c[0] = 1.0/np.sum(viterbi[:,0])
viterbi[:,0] = c[0] * viterbi[:,0] # apply the scaling factor
psi[0] = 0;
# C- Do the iterations for viterbi and psi for time>0 until T
for t in range(1,nSamples): # loop through time
for s in range (0,nStates): # loop through the states #(t-1)
trans_p = viterbi[:,t-1] * self.transition[:,s]
psi[s,t], viterbi[s,t] = max(enumerate(trans_p), key=operator.itemgetter(1))
viterbi[s,t] = viterbi[s,t]*self.emission[s,observations(t)]
c[t] = 1.0/np.sum(viterbi[:,t]) # scaling factor
viterbi[:,t] = c[t] * viterbi[:,t]
# D - Back-tracking
best_path[nSamples-1] = viterbi[:,nSamples-1].argmax() # last state
for t in range(nSamples-1,0,-1): # states of (last-1)th to 0th time step
best_path[t-1] = psi[best_path[t],t]
return best_path
This is an old question, but none of the other answers were quite what I needed because my application doesn't have specific observed states.
Taking after #Rhubarb, I've also re-implemented Kevin Murphey's Matlab implementation (see viterbi_path.m), but I've kept it closer to the original. I've included a simple test case as well.
import numpy as np
def viterbi_path(prior, transmat, obslik, scaled=True, ret_loglik=False):
'''Finds the most-probable (Viterbi) path through the HMM state trellis
Notation:
Z[t] := Observation at time t
Q[t] := Hidden state at time t
Inputs:
prior: np.array(num_hid)
prior[i] := Pr(Q[0] == i)
transmat: np.ndarray((num_hid,num_hid))
transmat[i,j] := Pr(Q[t+1] == j | Q[t] == i)
obslik: np.ndarray((num_hid,num_obs))
obslik[i,t] := Pr(Z[t] | Q[t] == i)
scaled: bool
whether or not to normalize the probability trellis along the way
doing so prevents underflow by repeated multiplications of probabilities
ret_loglik: bool
whether or not to return the log-likelihood of the best path
Outputs:
path: np.array(num_obs)
path[t] := Q[t]
'''
num_hid = obslik.shape[0] # number of hidden states
num_obs = obslik.shape[1] # number of observations (not observation *states*)
# trellis_prob[i,t] := Pr((best sequence of length t-1 goes to state i), Z[1:(t+1)])
trellis_prob = np.zeros((num_hid,num_obs))
# trellis_state[i,t] := best predecessor state given that we ended up in state i at t
trellis_state = np.zeros((num_hid,num_obs), dtype=int) # int because its elements will be used as indicies
path = np.zeros(num_obs, dtype=int) # int because its elements will be used as indicies
trellis_prob[:,0] = prior * obslik[:,0] # element-wise mult
if scaled:
scale = np.ones(num_obs) # only instantiated if necessary to save memory
scale[0] = 1.0 / np.sum(trellis_prob[:,0])
trellis_prob[:,0] *= scale[0]
trellis_state[:,0] = 0 # arbitrary value since t == 0 has no predecessor
for t in xrange(1, num_obs):
for j in xrange(num_hid):
trans_probs = trellis_prob[:,t-1] * transmat[:,j] # element-wise mult
trellis_state[j,t] = trans_probs.argmax()
trellis_prob[j,t] = trans_probs[trellis_state[j,t]] # max of trans_probs
trellis_prob[j,t] *= obslik[j,t]
if scaled:
scale[t] = 1.0 / np.sum(trellis_prob[:,t])
trellis_prob[:,t] *= scale[t]
path[-1] = trellis_prob[:,-1].argmax()
for t in range(num_obs-2, -1, -1):
path[t] = trellis_state[(path[t+1]), t+1]
if not ret_loglik:
return path
else:
if scaled:
loglik = -np.sum(np.log(scale))
else:
p = trellis_prob[path[-1],-1]
loglik = np.log(p)
return path, loglik
if __name__=='__main__':
# Assume there are 3 observation states, 2 hidden states, and 5 observations
priors = np.array([0.5, 0.5])
transmat = np.array([
[0.75, 0.25],
[0.32, 0.68]])
emmat = np.array([
[0.8, 0.1, 0.1],
[0.1, 0.2, 0.7]])
observations = np.array([0, 1, 2, 1, 0], dtype=int)
obslik = np.array([emmat[:,z] for z in observations]).T
print viterbi_path(priors, transmat, obslik) #=> [0 1 1 1 0]
print viterbi_path(priors, transmat, obslik, scaled=False) #=> [0 1 1 1 0]
print viterbi_path(priors, transmat, obslik, ret_loglik=True) #=> (array([0, 1, 1, 1, 0]), -7.776472586614755)
print viterbi_path(priors, transmat, obslik, scaled=False, ret_loglik=True) #=> (array([0, 1, 1, 1, 0]), -8.0120386579275227)
Note that this implementation does not use emission probabilities directly but uses a variable obslik. Generally, emissions[i,j] := Pr(observed_state == j | hidden_state == i) for a particular observed state i, making emissions.shape == (num_hidden_states, num_obs_states).
However, given a sequence observations[t] := observation at time t, all the Viterbi Algorithm requires is the likelihood of that observation for each hidden state. Hence, obslik[i,t] := Pr(observations[t] | hidden_state == i). The actual value the of the observed state isn't necessary.
I have modified #Rhubarb's answer for the condition where the marginal probabilities are already known (e.g by computing the Forward Backward algorithm).
def viterbi (transition_probabilities, conditional_probabilities):
# Initialise everything
num_samples = conditional_probabilities.shape[1]
num_states = transition_probabilities.shape[0] # number of states
c = np.zeros(num_samples) #scale factors (necessary to prevent underflow)
viterbi = np.zeros((num_states,num_samples)) # initialise viterbi table
best_path_table = np.zeros((num_states,num_samples)) # initialise the best path table
best_path = np.zeros(num_samples).astype(np.int32) # this will be your output
# B- appoint initial values for viterbi and best path (bp) tables - Eq (32a-32b)
viterbi[:,0] = conditional_probabilities[:,0]
c[0] = 1.0/np.sum(viterbi[:,0])
viterbi[:,0] = c[0] * viterbi[:,0] # apply the scaling factor
# C- Do the iterations for viterbi and psi for time>0 until T
for t in range(1, num_samples): # loop through time
for s in range (0,num_states): # loop through the states #(t-1)
trans_p = viterbi[:, t-1] * transition_probabilities[:,s] # transition probs of each state transitioning
best_path_table[s,t], viterbi[s,t] = max(enumerate(trans_p), key=operator.itemgetter(1))
viterbi[s,t] = viterbi[s,t] * conditional_probabilities[s][t]
c[t] = 1.0/np.sum(viterbi[:,t]) # scaling factor
viterbi[:,t] = c[t] * viterbi[:,t]
## D - Back-tracking
best_path[num_samples-1] = viterbi[:,num_samples-1].argmax() # last state
for t in range(num_samples-1,0,-1): # states of (last-1)th to 0th time step
best_path[t-1] = best_path_table[best_path[t],t]
return best_path

Categories

Resources