Topological sort (Kahn's algorithm) trouble - python

I am having trouble wrapping my head around my code in the nested for loop. I am following the Kahn's algorithm here on wiki: Kahn's. I don't understand how to test for if outgoingEdge has incoming edges for each endArray element (m).
Here is what I have so far:
def topOrdering(self, graph):
retList = []
candidates = set()
left = []
right = []
for key in graph:
left.append(key)
right.append(graph[key])
flattenedRight = [val for sublist in right for val in sublist]
for element in left:
if element not in flattenedRight:
#set of all nodes with no incoming edges
candidates.add(element)
candidates = sorted(candidates)
while len(candidates) != 0:
a = candidates.pop(0)
retList.append(a)
endArray = graph[a]
for outGoingEdge in endArray:
if outGoingEdge not in flattenedRight:
candidates.append(outGoingEdge)
#flattenedRight.remove(outGoingEdge)
del outGoingEdge
if not graph:
return "the input graph is not a DAG"
else:
return retList
Here is a picture visualizing my algorithm. Graph is in a form of an adjacency list.

You can store indegree (number of incoming edges) separately and decrement the count every time you remove a vertex from empty set. When count becomes 0 add the vertex to empty set to be processed later. Here's example:
def top_sort(adj_list):
# Find number of incoming edges for each vertex
in_degree = {}
for x, neighbors in adj_list.items():
in_degree.setdefault(x, 0)
for n in neighbors:
in_degree[n] = in_degree.get(n, 0) + 1
# Iterate over edges to find vertices with no incoming edges
empty = {v for v, count in in_degree.items() if count == 0}
result = []
while empty:
# Take random vertex from empty set
v = empty.pop()
result.append(v)
# Remove edges originating from it, if vertex not present
# in adjacency list use empty list as neighbors
for neighbor in adj_list.get(v, []):
in_degree[neighbor] -= 1
# If neighbor has no more incoming edges add it to empty set
if in_degree[neighbor] == 0:
empty.add(neighbor)
if len(result) != len(in_degree):
return None # Not DAG
else:
return result
ADJ_LIST = {
1: [2],
2: [3],
4: [2],
5: [3]
}
print(top_sort(ADJ_LIST))
Output:
[1, 4, 5, 2, 3]

Related

Attribute Error for Prim's Algorithm implementation in Python

I've been struggling with the code used to implement Prim's Algorithm in Python. I really hope that someone help me as I needed this specific code for my Case Study. Thank you!
Here's the code:
# A Python program for Prims's MST for
# adjacency list representation of graph
from collections import defaultdict
import sys
class Heap():
def __init__(self):
self.array = []
self.size = 0
self.pos = []
def newMinHeapNode(self, v, dist):
minHeapNode = [v, dist]
return minHeapNode
# A utility function to swap two nodes of
# min heap. Needed for min heapify
def swapMinHeapNode(self, a, b):
t = self.array[a]
self.array[a] = self.array[b]
self.array[b] = t
# A standard function to heapify at given idx
# This function also updates position of nodes
# when they are swapped. Position is needed
# for decreaseKey()
def minHeapify(self, idx):
smallest = idx
left = 2 * idx + 1
right = 2 * idx + 2
if left < self.size and self.array[left][1] < \
self.array[smallest][1]:
smallest = left
if right < self.size and self.array[right][1] < \
self.array[smallest][1]:
smallest = right
# The nodes to be swapped in min heap
# if idx is not smallest
if smallest != idx:
# Swap positions
self.pos[ self.array[smallest][0] ] = idx
self.pos[ self.array[idx][0] ] = smallest
# Swap nodes
self.swapMinHeapNode(smallest, idx)
self.minHeapify(smallest)
# Standard function to extract minimum node from heap
def extractMin(self):
# Return NULL wif heap is empty
if self.isEmpty() == True:
return
# Store the root node
root = self.array[0]
# Replace root node with last node
lastNode = self.array[self.size - 1]
self.array[0] = lastNode
# Update position of last node
self.pos[lastNode[0]] = 0
self.pos[root[0]] = self.size - 1
# Reduce heap size and heapify root
self.size -= 1
self.minHeapify(0)
return root
def isEmpty(self):
return True if self.size == 0 else False
def decreaseKey(self, v, dist):
# Get the index of v in heap array
i = self.pos[v]
# Get the node and update its dist value
self.array[i][1] = dist
# Travel up while the complete tree is not
# hepified. This is a O(Logn) loop
while i > 0 and self.array[i][1] < \
self.array[(i - 1) / 2][1]:
# Swap this node with its parent
self.pos[ self.array[i][0] ] = (i-1)/2
self.pos[ self.array[(i-1)/2][0] ] = i
self.swapMinHeapNode(i, (i - 1)/2 )
# move to parent index
i = (i - 1) / 2;
# A utility function to check if a given vertex
# 'v' is in min heap or not
def isInMinHeap(self, v):
if self.pos[v] < self.size:
return True
return False
def printArr(parent, n):
for i in range(1, n):
print ("% d - % d" % (parent[i], i))
class Graph():
def __init__(self, V):
self.V = V
self.graph = defaultdict(list)
# Adds an edge to an undirected graph
def addEdge(self, src, dest, weight):
# Add an edge from src to dest. A new node is
# added to the adjacency list of src. The node
# is added at the begining. The first element of
# the node has the destination and the second
# elements has the weight
newNode = [dest, weight]
self.graph[src].insert(0, newNode)
# Since graph is undirected, add an edge from
# dest to src also
newNode = [src, weight]
self.graph[dest].insert(0, newNode)
# The main function that prints the Minimum
# Spanning Tree(MST) using the Prim's Algorithm.
# It is a O(ELogV) function
def PrimMST(self):
# Get the number of vertices in graph
V = self.V
# key values used to pick minimum weight edge in cut
key = []
# List to store contructed MST
parent = []
# minHeap represents set E
minHeap = Heap()
# Initialize min heap with all vertices. Key values of all
# vertices (except the 0th vertex) is is initially infinite
for v in range(V):
parent.append(-1)
key.append(sys.maxint)
minHeap.array.append( minHeap.newMinHeapNode(v, key[v]) )
minHeap.pos.append(v)
# Make key value of 0th vertex as 0 so
# that it is extracted first
minHeap.pos[0] = 0
key[0] = 0
minHeap.decreaseKey(0, key[0])
# Initially size of min heap is equal to V
minHeap.size = V;
# In the following loop, min heap contains all nodes
# not yet added in the MST.
while minHeap.isEmpty() == False:
# Extract the vertex with minimum distance value
newHeapNode = minHeap.extractMin()
u = newHeapNode[0]
# Traverse through all adjacent vertices of u
# (the extracted vertex) and update their
# distance values
for pCrawl in self.graph[u]:
v = pCrawl[0]
# If shortest distance to v is not finalized
# yet, and distance to v through u is less than
# its previously calculated distance
if minHeap.isInMinHeap(v) and pCrawl[1] < key[v]:
key[v] = pCrawl[1]
parent[v] = u
# update distance value in min heap also
minHeap.decreaseKey(v, key[v])
printArr(parent, V)
def main():
#pass
graph = Graph(9)
graph.addEdge(0, 7, 8)
graph.addEdge(1, 2, 8)
graph.addEdge(1, 7, 11)
graph.addEdge(2, 3, 7)
graph.addEdge(2, 8, 2)
graph.addEdge(2, 5, 4)
graph.addEdge(3, 4, 9)
graph.addEdge(3, 5, 14)
graph.addEdge(4, 5, 10)
graph.addEdge(5, 6, 2)
graph.addEdge(6, 7, 1)
graph.addEdge(6, 8, 6)
graph.addEdge(7, 8, 7)
graph.PrimMST()
if __name__ == '__main__':
main()
When I tried to run it, it returns an error 'Graph' object has no attribute 'addEdge'. I specifically need this code since this kind of Prim's Algorithm used a Edge List (node, node, weight) in adding an edge unlike other code that use Adjacency Matrix. Therefore, it is much easier to use this kind of code.

Given a list of words, determine whether the words can be chained to form a circle

Given a list of words, determine whether the words can be chained to form a circle. A word X
can be placed in front of another word Y in a circle if the last character of X is the same as
the first character of Y.
For example, the words ['chair', 'height', 'racket', touch', 'tunic'] can form the following circle:
chair --> racket --> touch --> height --> tunic --> chair
The output it has to be a txt file with one word per line, ex:
chair
racket
touch
height
tunic
I searched for the solution, but i only managed to get the partial solution which answers wether or not it can be a circle.
# Python program to check if a given directed graph is Eulerian or not
CHARS = 26
# A class that represents an undirected graph
class Graph(object):
def __init__(self, V):
self.V = V # No. of vertices
self.adj = [[] for x in range(V)] # a dynamic array
self.inp = [0] * V
# function to add an edge to graph
def addEdge(self, v, w):
self.adj[v].append(w)
self.inp[w]+=1
# Method to check if this graph is Eulerian or not
def isSC(self):
# Mark all the vertices as not visited (For first DFS)
visited = [False] * self.V
# Find the first vertex with non-zero degree
n = 0
for n in range(self.V):
if len(self.adj[n]) > 0:
break
# Do DFS traversal starting from first non zero degree vertex.
self.DFSUtil(n, visited)
# If DFS traversal doesn't visit all vertices, then return false.
for i in range(self.V):
if len(self.adj[i]) > 0 and visited[i] == False:
return False
# Create a reversed graph
gr = self.getTranspose()
# Mark all the vertices as not visited (For second DFS)
for i in range(self.V):
visited[i] = False
# Do DFS for reversed graph starting from first vertex.
# Starting Vertex must be same starting point of first DFS
gr.DFSUtil(n, visited)
# If all vertices are not visited in second DFS, then
# return false
for i in range(self.V):
if len(self.adj[i]) > 0 and visited[i] == False:
return False
return True
# This function returns true if the directed graph has an eulerian
# cycle, otherwise returns false
def isEulerianCycle(self):
# Check if all non-zero degree vertices are connected
if self.isSC() == False:
return False
# Check if in degree and out degree of every vertex is same
for i in range(self.V):
if len(self.adj[i]) != self.inp[i]:
return False
return True
# A recursive function to do DFS starting from v
def DFSUtil(self, v, visited):
# Mark the current node as visited and print it
visited[v] = True
# Recur for all the vertices adjacent to this vertex
for i in range(len(self.adj[v])):
if not visited[self.adj[v][i]]:
self.DFSUtil(self.adj[v][i], visited)
# Function that returns reverse (or transpose) of this graph
# This function is needed in isSC()
def getTranspose(self):
g = Graph(self.V)
for v in range(self.V):
# Recur for all the vertices adjacent to this vertex
for i in range(len(self.adj[v])):
g.adj[self.adj[v][i]].append(v)
g.inp[v]+=1
return g
# This function takes an of strings and returns true
# if the given array of strings can be chained to
# form cycle
def canBeChained(arr, n):
# Create a graph with 'alpha' edges
g = Graph(CHARS)
# Create an edge from first character to last character
# of every string
for i in range(n):
s = arr[i]
g.addEdge(ord(s[0])-ord('a'), ord(s[len(s)-1])-ord('a'))
# The given array of strings can be chained if there
# is an eulerian cycle in the created graph
return g.isEulerianCycle()
# Driver program
arr1 = ["for", "geek", "rig", "kaf"]
n1 = len(arr1)
if canBeChained(arr1, n1):
print ("Can be chained")
else:
print ("Cant be chained")
arr2 = ["aab", "abb"]
n2 = len(arr2)
if canBeChained(arr2, n2):
print ("Can be chained")
else:
print ("Can't be chained")
Source: https://www.geeksforgeeks.org/given-array-strings-find-strings-can-chained-form-circle/
This solution only returns the Boolean statement of the list, it means that if there is a circle it will output True. The goal for me is to try and expand this solution to give the list separated, i will give another example:
Input:
{"for", "geek", "rig", "kaf"}
Output:
for
rig
geek
kaf
for
The problem you're describing is the Eulerian circuit problem.
There is an algorithm implemented in module networkx:
networkx.algorithms.euler.eulerian_circuit
from networkx import DiGraph, eulerian_circuit
words = ['chair', 'height', 'racket', 'touch', 'tunic']
G = DiGraph()
G.add_weighted_edges_from(((w[0], w[-1], w) for w in words), weight='word')
result = [G[a][b]['word'] for a,b in eulerian_circuit(G)]
print(result)
# ['chair', 'racket', 'touch', 'height', 'tunic']
This seems like a lot of effort to solve this problem. Consider a simple solution like:
from collections import defaultdict
words = ['chair', 'height', 'racket', 'touch', 'tunic']
def findChains(words):
dictionary = defaultdict(list)
for word in words:
dictionary[word[0]].append(word)
chains = [[words[0]]] # start with an arbitrary word
while True:
new_chains = []
for chain in chains:
for follower in dictionary[chain[-1][-1]]:
if follower in chain:
continue
new_chains.append([*chain, follower])
if new_chains:
chains = new_chains
else:
break
return [chain for chain in chains if len(chain) == len(words) and chain[-1][-1] == chain[0][0]]
print(findChains(words))
OUTPUT
% python3 test.py
[['chair', 'racket', 'touch', 'height', 'tunic']]
%
Is the issue that a simple algorithm like the above becomes unworkable as the list of words gets longer? You also seem to assume a single solution, but with enough start and end letter redundancy, there could be multiple solutions. You need to code for multiple even if in the end you just pick one.

Undirected Breadth First Search for calculating distance

Trying to make a Breadth First Search, to create list of distance from one node to another. However i get confused with the for loop and the table.
I keep getting error 'list index out of range'
Expected output = [0,1,2,2,3]
#graph of nodes and vertex
G = {0: [1], 1: [2, 3], 2: [1, 3], 3: [4]}
print(G)
distance = []
for u in range(len(G)):
distance.append(float('inf')) #filling the table with infinity, just in case theres a disconnected nodes
#In the case looking for distance from point 0
distance[0] = 0
distance.append(0)
print(distance)
while distance!=[]:
u = distance.pop()
for v in G[u]: #v as the neighbor for every nodes
print(v)
if distance[v] == float('inf'):
distance[v] = distance[u]+1
print(distance)
distance.append(v)

Sort edges in BFS/DFS

I am working a problem of sorting edges, where edges are stored in a tuple form (node_i, node_j) like below
>> edgeLst
>> [('123','234'),
('123','456'),
('123','789'),
('456','765'),
('456','789')
('234','765')]
Note that edges are unique, and if you see ('123', '234'), you won't see ('234', '123') (the graph is undirected). And there might be a loop in the graph. Since the graph is very large, can anyone show me the efficient way to sort edges in BFS and DFS with a given start node, e.g., '123'?
Demo output:
>> edgeSorting(input_lst=edgeLst, by='BFS', start_node='123')
>> [('123','234'),
('123','456'),
('123','789'),
('234','765')
('456','765'),
('456','789')]
Here is how you could do it for BFS and DFS:
from collections import defaultdict
def sorted_edges(input_lst, by, start_node):
# Key the edges by the vertices
vertices = defaultdict(lambda: [])
for u, v in input_lst:
vertices[u].append(v)
vertices[v].append(u)
if by == 'DFS':
# Sort the lists
for lst in vertices:
lst = sorted(lst)
# Perform DFS
visited = set()
def recurse(a):
for b in vertices[a]:
if not (a, b) in visited:
yield ((a,b))
# Make sure this edge is not visited anymore in either direction
visited.add((a,b))
visited.add((b,a))
for edge in recurse(b):
yield edge
for edge in recurse(start_node):
yield edge
else: #BFS
# Collect the edges
visited = set()
queue = [start_node]
while len(queue):
for a in queue: # Process BFS level by order of id
level = []
for b in vertices[a]:
if not (a, b) in visited:
yield ((a,b))
# Add to next level to process
level.append(b)
# Make sure this edge is not visited anymore in either direction
visited.add((a,b))
visited.add((b,a))
queue = sorted(level)
edgeLst = [('123','234'),
('123','456'),
('123','789'),
('456','765'),
('456','789'),
('234','765')]
print (list(sorted_edges(edgeLst, 'DFS', '123')))
In Python 3, you can simplify:
for edge in recurse(b):
yield edge
to:
yield from recurse(b)
... and same for start_node.

Calculating the number of graphs created and the number of vertices in each graph from a list of edges

Given a list of edges such as, edges = [[1,2],[2,3],[3,1],[4,5]]
I need to find how many graphs are created, by this I mean how many groups of components are created by these edges. Then get the number of vertices in the group of components.
However, I am required to be able to handle 10^5 edges, and i am currently having trouble completing the task for large number of edges.
My algorithm is currently getting the list of edges= [[1,2],[2,3],[3,1],[4,5]] and merging each list as set if they have a intersection, this will output a new list that now contains group components such as , graphs = [[1,2,3],[4,5]]
There are two connected components : [1,2,3] are connected and [4,5] are connected as well.
I would like to know if there is a much better way of doing this task.
def mergeList(edges):
sets = [set(x) for x in edges if x]
m = 1
while m:
m = 0
res = []
while sets:
common, r = sets[0], sets[1:]
sets = []
for x in r:
if x.isdisjoint(common):
sets.append(x)
else:
m = 1
common |= x
res.append(common)
sets = res
return sets
I would like to try doing this in a dictionary or something efficient, because this is toooo slow.
A basic iterative graph traversal in Python isn't too bad.
import collections
def connected_components(edges):
# build the graph
neighbors = collections.defaultdict(set)
for u, v in edges:
neighbors[u].add(v)
neighbors[v].add(u)
# traverse the graph
sizes = []
visited = set()
for u in neighbors.keys():
if u in visited:
continue
# visit the component that includes u
size = 0
agenda = {u}
while agenda:
v = agenda.pop()
visited.add(v)
size += 1
agenda.update(neighbors[v] - visited)
sizes.append(size)
return sizes
Do you need to write your own algorithm? networkx already has algorithms for this.
To get the length of each component try
import networkx as nx
G = nx.Graph()
G.add_edges_from([[1,2],[2,3],[3,1],[4,5]])
components = []
for graph in nx.connected_components(G):
components.append([graph, len(graph)])
components
# [[set([1, 2, 3]), 3], [set([4, 5]), 2]]
You could use Disjoint-set data structure:
edges = [[1,2],[2,3],[3,1],[4,5]]
parents = {}
size = {}
def get_ancestor(parents, item):
# Returns ancestor for a given item and compresses path
# Recursion would be easier but might blow stack
stack = []
while True:
parent = parents.setdefault(item, item)
if parent == item:
break
stack.append(item)
item = parent
for item in stack:
parents[item] = parent
return parent
for x, y in edges:
x = get_ancestor(parents, x)
y = get_ancestor(parents, y)
size_x = size.setdefault(x, 1)
size_y = size.setdefault(y, 1)
if size_x < size_y:
parents[x] = y
size[y] += size_x
else:
parents[y] = x
size[x] += size_y
print(sum(1 for k, v in parents.items() if k == v)) # 2
In above parents is a dict where vertices are keys and ancestors are values. If given vertex doesn't have a parent then the value is the vertex itself. For every edge in the list the ancestor of both vertices is set the same. Note that when current ancestor is queried the path is compressed so following queries can be done in O(1) time. This allows the whole algorithm to have O(n) time complexity.
Update
In case components are required instead of just number of them the resulting dict can be iterated to produce it:
from collections import defaultdict
components = defaultdict(list)
for k, v in parents.items():
components[v].append(k)
print(components)
Output:
defaultdict(<type 'list'>, {3: [1, 2, 3], 5: [4, 5]})

Categories

Resources