Python Connected Components edges list

Python Connected Components edges list - python

I use these algorithms in python for finding connected components from edges.
components = []
def connected_components(pairs):
for a, b in pairs:
for component in components:
if a in component:
for i, other_component in enumerate(components):
if b in other_component and other_component != component: # a, and b are already in different components: merge
component.extend(other_component)
components[i:i+1] = []
break # we don't have to look for other components for b
else: # b wasn't found in any other component
if b not in component:
component.append(b)
break # we don't have to look for other components for a
if b in component: # a wasn't in in the component
component.append(a)
break # we don't have to look further
else: # neither a nor b were found
components.append([a, b])
return components
This algorithms return components like this :
[ [n1,n2,n4],[n3,n5] ]
I would like to have the list of all edges in connected components like this :
[ [(n1,n2),(n2,n4),(n4,n1)],[(n3,n5)] ]
in the same order of the previous list but i don't know how creates this list
Thank you for your help.

Note: This doesn't require any python dependency.
I will share my approach, with recursive depth-first search. I am assuming graph is bi-directional and the following code can be easily manipulated for directed graph.
pairs = [] // edge list
adj_list = {} // adjacency list
vis = [] // visited_list
connected_components = [] // contains all the connected components
temp_component = []
// basic depth first search
def dfs( node ):
vis[node] = "true"
temp_component.append(node)
for neighbour in adj_list[node]:
if vis[neighbour] == "false":
dfs(neigbour)
//main
for a,b in pairs:
if a not in adj_list:
adj_list[a] = []
if b not in adj_list:
adj_list[b] = []
adj_list[a].append(b)
adj_list[b].append(a)
vis["a"] = "false"
vis["b"] = "false"
for a,b in pairs:
temp_component = []
if vis[a] == "false":
dfs(a)
if len(temp_component) > 0:
connected_components.append(temp_component)
// once you have connected components you can get the edge lists in connected component as well
answer = []
for component in connected_components:
temp_pairs = [] // contains the pair of edges for the current connected component
for node in component:
for i,j in pairs:
if (node == i or node == j) and (i,j) not in temp_node:
temp_node.append(i,j)
answer.append(temp_pairs)

Create a mini graph in python using apgl library.
You can use SparseGraph module from apgl. from apgl.graph import SparseGraph
Initiate a sparsegraph with number of nodes required.
graph = SparseGraph(num_vertices)
Then you can create a mini graph by adding edges between nodes of graph.
graph.addEdge(component1, component2)
Then just use findConnectedComponents function to find connected components.
graph.findConnectedComponents()

I know this is an old question, but I came across the same problem and was not glad with the performance of the given answers. So I wanted to share my own solution using scipy's connected_components and shortest_path functions to handle arbitrary edge-soups.
from scipy.sparse import csr_matrix, csgraph
import numpy as np
def coords_to_indices(coords):
"""
Decompose a set of primitives defined by vertex-coordinates to their shared vertices and primitive indices
Parameters
----------
coords : array like
A NxMxD array, where N is the number of primitives, M the number of vertices per primitive and D the number of dimensions of each vertex
Returns
-------
vertices : NDArray
UxD array containing the unique vertices
primitives : NDArray
NxM array containing the indices of vertices building the primitives
"""
coords = np.asarray(coords)
vert_dim = coords.shape[-1]
prim_dim = coords.shape[1]
vertices, rev_indx = np.unique(coords.reshape((-1, vert_dim)), axis=0, return_inverse=True)
primitives = rev_indx.reshape((coords.shape[0], prim_dim))
return vertices, primitives
def is_ordered(edge_indices) -> bool:
"""
Check if the edges are ordered or not
Parameters
----------
edge_indices : Nx2 array of indices
Indices of the edge vertices
Returns
-------
bool
True, if all edges are in order. That means, every edge is connected with the following one by its second vertex.
"""
edge_indices = np.asarray(edge_indices)
e_flat = edge_indices.flatten()[1:-1]
return all(e_flat[1::2] - e_flat[::2] == 0)
def reorder_edges(connected_edges):
"""
Reorder an unsorted list of edges (indices Nx2) or coordinates (Nx2xM) so that:
- Each edge is connected to the next edge
- The connection is made from the second vertex to the first vertex of the following edge
Parameters
----------
connected_edges : array-like, Nx2 indices or Nx2xM coordinates
edges that build the segment. all edges have to be connected, but every vertex has to be shared by exactly 2 edges.
If the segment is not closed, two vertices are allowed to rise up in only one edge (the ends).
Returns
-------
NDArray - like the input
The edges, reordered
"""
connected_edges = np.asarray(connected_edges)
if is_ordered(connected_edges):
return connected_edges
if connected_edges.ndim == 3 and connected_edges.shape[1] == 2 and np.issubdtype(connected_edges.dtype, np.floating):
# vertex coordinates given, transform to edge indices and back
verts, edges = coords_to_indices(connected_edges)
e_ordered = reorder_edges(edges)
return verts[e_ordered]
assert np.issubdtype(connected_edges.dtype, np.integer) and connected_edges.ndim == 2 and connected_edges.shape[1] == 2, "Wrong shape"
edges = connected_edges
n_edges = edges.shape[0]
n_verts = edges.max() + 1
# find the unique indices and counts of the vertices
idxs, counts = np.unique(connected_edges.flat, return_counts=True)
# translate edges to monotone space
if np.all(counts == 2):
# cyclic contour (all vertices are counted twice)
# wo have to cut the cycle and restore it afterwards. Otherwise, its hard to follow the two valid paths
edges = edges[1:]
n_edges -= 1
new_edges = reorder_edges(edges)
# add the missing piece. Due to the strict order of the indices, this should always be between the very first and very last index
new_edges = np.row_stack((new_edges, (new_edges[-1, -1], new_edges[0, 0])))
return new_edges
# open contour
# find the open ends in the chain of segments
ends = idxs[counts == 1]
assert len(ends) == 2, "More than 2 unconnected segments found. not a contiguous contour"
# lets begin the connection walk on one of the end-segments. I choose the minimum, so maybe the indices are rising again
start_index, end_index = np.sort(ends)
# build sparse matrix of edge relations
csm = csr_matrix((np.full(n_edges, 1, dtype=np.bool_), (edges[:, 0], edges[:, 1])), (n_verts, n_verts))
# get shortest path and number of hops
n_hops, prev_idx = csgraph.shortest_path(csm, directed=False, indices=start_index, return_predecessors=True, unweighted=True)
# limit to the existing vertices
n_hops = n_hops[idxs]
prev_idx = prev_idx[idxs]
vert_order = np.argsort(n_hops)
assert np.all(np.isfinite(n_hops)), "Unreachable parts detected"
# check that the hops are increasing monotonously, otherwise something went wrong
dst = n_hops[vert_order]
assert np.all(dst[1:] - dst[:-1] == 1), "path not contiguous... something went wrong"
# get indices of neighbors
ordered_idxs = prev_idx[vert_order][1:]
# add the farthest node
ordered_idxs = np.append(ordered_idxs, end_index)
new_edges = np.column_stack((ordered_idxs[:-1], ordered_idxs[1:]))
return new_edges
def find_connected(edges):
"""
Find and return connected components in a soup of edges. Each component will be returned as an ordered list of edges
Parameters
----------
edges : array_Like
A list of edges. Either as a Nx2 list of indices or as a Nx2xD list of vertex coordinates per edge
Returns
-------
List[NDArray]
A list of Mx2 or Mx2xD arrays (depending on the input type), each containing an ordered list of connected (and maybe cyclic) edges
"""
if edges.ndim == 3 and edges.shape[1] == 2 and np.issubdtype(edges.dtype, np.floating):
# vertex coordinates given, transform to edge indices and back
verts, edges = coords_to_indices(edges)
connected_edges = find_connected(edges)
# return the result as coordinate array
return [verts[e] for e in connected_edges]
# get number of edges and maximum number of vertices
n_edges = edges.shape[0]
n_verts = edges.max() + 1
# create a sparse matrix of the relations
csm = csr_matrix((np.full(n_edges, 1, dtype=np.bool_), (edges[:, 0], edges[:, 1])), (n_verts, n_verts))
# get number and labels of the connected components. labels refer to the vertices
n_c, labels = csgraph.connected_components(csm, directed=False, return_labels=True)
# get the association to the edges. should not matter which column
edge_labels = labels[edges]
assert np.all(edge_labels[:, 0] == edge_labels[:, 1])
connected_edges = []
for label in range(n_c):
# get mask for current label:
edge_mask = edge_labels[:, 0] == label
if not np.any(edge_mask):
#this vertex was no member of any edge
continue
# iterate labels and gather all edges of that label
edges_l = edges[edge_mask, :]
# reorder if necessary
if not is_ordered(edges_l):
edges_l = reorder_edges(edges_l)
connected_edges.append(edges_l)
return connected_edges
Short Example:
def makePoly(npts=360, r=10):
angles = np.linspace(0, 2*np.pi, npts)
x = np.sin(angles) * r
y = np.cos(angles) * r
return x, y
# create a polygon
verts = np.array(makePoly(16)).T
# create the edges
edges = np.column_stack((np.arange(verts.shape[0]), np.arange(verts.shape[0])+1))
# close the loop
edges[-1, -1] = 0
# shuffe all edges
np.random.seed(123)
np.random.shuffle(edges)
# remove some edges, so we get multiple chains
edges = np.delete(edges, [1, 2, 3], axis=0)
# swap some vertex orders
swap = [4, 5, 6, 7, 8, 9]
edges[swap, :] = edges[swap, :][:, [1, 0]]
# now reorder everything
edges_ordered = find_connected(edges)
print(edges_ordered)
Result is
[
array([[ 0, 15],
[15, 14],
[14, 13],
[13, 12],
[12, 11]]),
array([[1, 2],
[2, 3],
[3, 4]]),
array([[ 5, 6],
[ 6, 7],
[ 7, 8],
[ 8, 9],
[ 9, 10]])
]

Related

reading a file and assigning it to a variable

i recently started coding and tried the custom found "maximum flow algorithm" code and started changing functions to see how they work and when i tried to change
graph = [[0, 16, 13, 0, 0, 0],
[0, 0, 10, 12, 0, 0],
[0, 4, 0, 0, 14, 0],
[0, 0, 9, 0, 0, 20],
[0, 0, 0, 7, 0, 4],
[0, 0, 0, 0, 0, 0]]
into
fileobject = open("Testdata.txt","r")
graph = fileobject.readlines();
i get these errors:
Traceback (most recent call last):
File "C:/Users/DELL/Desktop/maxflow.py", line 92, in <module>
print ("The maximum possible flow is %d " % g.FordFulkerson(source, sink))
File "C:/Users/DELL/Desktop/maxflow.py", line 57, in FordFulkerson
while self.BFS(source, sink, parent) :
File "C:/Users/DELL/Desktop/maxflow.py", line 38, in BFS
if visited[ind] == False and val > 0 :
TypeError: '>' not supported between instances of 'str' and 'int'
this is the complete code:
from collections import defaultdict
#This class represents a directed graph using adjacency matrix representation
class Graph:
def __init__(self,graph):
self.graph = graph # residual graph
self. ROW = len(graph)
#self.COL = len(gr[0])
'''Returns true if there is a path from source 's' to sink 't' in
residual graph. Also fills parent[] to store the path '''
def BFS(self,s, t, parent):
# Mark all the vertices as not visited
visited =[False]*(self.ROW)
# Create a queue for BFS
queue=[]
# Mark the source node as visited and enqueue it
queue.append(s)
visited[s] = True
# Standard BFS Loop
while queue:
#Dequeue a vertex from queue and print it
u = queue.pop(0)
# Get all adjacent vertices of the dequeued vertex u
# If a adjacent has not been visited, then mark it
# visited and enqueue it
for ind, val in enumerate(self.graph[u]):
if visited[ind] == False and val > 0 :
queue.append(ind)
visited[ind] = True
parent[ind] = u
# If we reached sink in BFS starting from source, then return
# true, else false
return True if visited[t] else False
# Returns tne maximum flow from s to t in the given graph
def FordFulkerson(self, source, sink):
# This array is filled by BFS and to store path
parent = [-1]*(self.ROW)
max_flow = 0 # There is no flow initially
# Augment the flow while there is path from source to sink
while self.BFS(source, sink, parent) :
# Find minimum residual capacity of the edges along the
# path filled by BFS. Or we can say find the maximum flow
# through the path found.
path_flow = float("Inf")
s = sink
while(s != source):
path_flow = min (path_flow, self.graph[parent[s]][s])
s = parent[s]
# Add path flow to overall flow
max_flow += path_flow
# update residual capacities of the edges and reverse edges
# along the path
v = sink
while(v != source):
u = parent[v]
self.graph[u][v] -= path_flow
self.graph[v][u] += path_flow
v = parent[v]
return max_flow
# Create a graph given in the above diagram
fileobject = open("Testdata.txt","r")
graph = fileobject.readlines();
g = Graph(graph)
source = 0; sink = 5
print ("The maximum possible flow is %d " % g.FordFulkerson(source, sink))
i need to change the given values to variable "graph" by reading them through a file without getting any of these errors. any suggestions to improve this code?

I think you need to cast val to an int. I think when you read the graph from a file all the numbers are treated as strings.
from collections import defaultdict
#This class represents a directed graph using adjacency matrix representation
class Graph:
def __init__(self,graph):
self.graph = graph # residual graph
self. ROW = len(graph)
#self.COL = len(gr[0])
'''Returns true if there is a path from source 's' to sink 't' in
residual graph. Also fills parent[] to store the path '''
def BFS(self,s, t, parent):
# Mark all the vertices as not visited
visited =[False]*(self.ROW)
# Create a queue for BFS
queue=[]
# Mark the source node as visited and enqueue it
queue.append(s)
visited[s] = True
# Standard BFS Loop
while queue:
#Dequeue a vertex from queue and print it
u = queue.pop(0)
# Get all adjacent vertices of the dequeued vertex u
# If a adjacent has not been visited, then mark it
# visited and enqueue it
for ind, val in enumerate(self.graph[u]):
if visited[ind] == False and int(val) > 0 :
queue.append(ind)
visited[ind] = True
parent[ind] = u
# If we reached sink in BFS starting from source, then return
# true, else false
return True if visited[t] else False
# Returns tne maximum flow from s to t in the given graph
def FordFulkerson(self, source, sink):
# This array is filled by BFS and to store path
parent = [-1]*(self.ROW)
max_flow = 0 # There is no flow initially
# Augment the flow while there is path from source to sink
while self.BFS(source, sink, parent) :
# Find minimum residual capacity of the edges along the
# path filled by BFS. Or we can say find the maximum flow
# through the path found.
path_flow = float("Inf")
s = sink
while(s != source):
path_flow = min (path_flow, self.graph[parent[s]][s])
s = parent[s]
# Add path flow to overall flow
max_flow += path_flow
# update residual capacities of the edges and reverse edges
# along the path
v = sink
while(v != source):
u = parent[v]
self.graph[u][v] -= path_flow
self.graph[v][u] += path_flow
v = parent[v]
return max_flow
# Create a graph given in the above diagram
fileobject = open("Testdata.txt","r")
graph = fileobject.readlines();
g = Graph(graph)
source = 0; sink = 5
print ("The maximum possible flow is %d " % g.FordFulkerson(source, sink))

Indexerror on prim's algorithm

I'm running this prim's algorithm code with negative weights, but I'm getting Indexerror: list assignment index out of range right here adjMatrix[G[i][0]][G[i][1]] = G[i][2], and I don't know what is the problem.
thanks for your time and help.
def createAdjMatrix(V, G):
adjMatrix = []
# create N x N matrix filled with 0 edge weights between all vertices
for i in range(0, V):
adjMatrix.append([])
for j in range(0, V):
adjMatrix[i].append(0)
# populate adjacency matrix with correct edge weights
for i in range(0, len(G)):
#IndexError starts right here
adjMatrix[G[i][0]][G[i][1]] = G[i][2]
adjMatrix[G[i][1]][G[i][0]] = G[i][2]
return adjMatrix
def prims(V, G):
# create adj matrix from graph
adjMatrix = createAdjMatrix(V, G)
# arbitrarily choose initial vertex from graph
vertex = 0
# initialize empty edges array and empty MST
MST = []
edges = []
visited = []
minEdge = [None,None,float('inf')]
# run prims algorithm until we create an MST
# that contains every vertex from the graph
while len(MST) != V-1:
# mark this vertex as visited
visited.append(vertex)
# add each edge to list of potential edges
for r in range(0, V):
if adjMatrix[vertex][r] != 0:
edges.append([vertex,r,adjMatrix[vertex][r]])
# find edge with the smallest weight to a vertex
# that has not yet been visited
for e in range(0, len(edges)):
if edges[e][2] < minEdge[2] and edges[e][1] not in visited:
minEdge = edges[e]
# remove min weight edge from list of edges
edges.remove(minEdge)
# push min edge to MST
MST.append(minEdge)
# start at new vertex and reset min edge
vertex = minEdge[1]
minEdge = [None,None,float('inf')]
return MST
graph = [
[0,1,6807],
[1,2,-8874],
[2,3,-1055],
[3,4,4414],
[4,5,1728],
[5,6,-2237],
[6,7,-7507],
[7,8,7990],
[8,9,-5012],
[9,10,7353]]
## pass the # of vertices and the graph to run prims algorithm
print (prims(11, graph))
Edit: this is the traceback
adjMatrix[G[i][0]][G[i][1]] = G[i][2]
IndexError: list assignment index out of range

Finding the minimum and maximum value of a cluster for cyclic data

How can the minimum and maximum values of a cluster be determined for cyclic data, here in the range from 0 to 24, taking into account that the cluster goes beyond the limit of the value range?
Looking at the blue cluster, I would like to determine the values 22 and 2 as the boundaries of the cluster. Which algorithm can solve this problem?

I've found a solution to the problem.
Suppose the data is in the following format:
#!/usr/bin/env python3
import numpy as np
data = np.array([0, 1, 2, 12, 13, 14, 15, 21, 22, 23])
labels = np.array([0, 0, 0, 1, 1, 1, 1, 0, 0, 0])
bounds = get_cluster_bounds(data, labels)
print(bounds) # {0: array([21, 2]), 1: array([12, 15])}
You can find the function here:
#!/usr/bin/env python3
import numpy as np
def get_cluster_bounds(data: np.ndarray, labels: np.ndarray) -> dict:
"""
There are five ways in which the points of the cluster can be cyclically
considered. The points to be determined are marked with an arrow.
In the first case, the cluster data is distributed beyond the edge of
the cycle:
↓B ↓A
|#####____________#####|
In the second case, the data lies exactly at the beginning of the value
range, but without exceeding it.
↓A ↓B
|##########____________|
In the third case, the data lies exactly at the end of the value
range, but without exceeding it.
↓A ↓B
|____________##########|
In the fourth, the data lies within the value range
without touching a border.
↓A ↓B
|_______##########_____|
In the fifth and simplest case, the data lies in the entire area without
another label existing.
↓A ↓B
|######################|
Args:
data: (n, 1) numpy array containing all data points.
labels: (n, 1) numpy array containing all data labels.
Returns:
bounds: A dictionary whose key is the index of the cluster and
whose value specifies the start and end point of the
cluster.
"""
# Sort the data in ascending order.
shuffle = data.argsort()
data = data[shuffle]
labels = labels[shuffle]
# Get the number of unique clusters.
labels_unique = np.unique(labels)
num_clusters = labels_unique.size
bounds = {}
for c_index in range(num_clusters):
mask = labels == c_index
# Case 1 or 5
if mask[0] and mask[-1]:
# Case 5
if np.all(mask):
start = data[0]
end = data[-1]
# Case 1
else:
edges = np.where(np.invert(mask))[0]
start = data[edges[-1] + 1]
end = data[edges[0] - 1]
# Case 2
elif mask[0] and not mask[-1]:
edges = np.where(np.invert(mask))[0]
start = data[0]
end = data[edges[0] - 1]
# Case 3
elif not mask[0] and mask[-1]:
edges = np.where(np.invert(mask))[0]
start = data[edges[-1] + 1]
end = data[-1]
# Case 4
elif not mask[0] and not mask[-1]:
edges = np.where(mask)[0]
start = data[edges[0]]
end = data[edges[-1]]
else:
raise ValueError('This should not happen.')
bounds[c_index] = np.array([start, end])
return bounds

Algorithm to multiply edges of a Networkx graph

So my problem is to find the longest path from a node to another node (or the same node) in a graph implemented with Networkx library.
I don't want to add the edges' weights but multiply them and take the biggest result. Obviously, passing only once by each node or not at all.
For example if I want to go from node 1 to node 4, the best result would be : 2 x 14 x 34 x 58
Graph example
Thank you for your help !

This may work:
import networkx as nx
G = nx.Graph()
# create the graph
G.add_edge(1, 2, weight=2 )
G.add_edge(1, 4, weight=5 )
G.add_edge(2, 3, weight=14 )
G.add_edge(2, 4, weight=5 )
G.add_edge(2, 5, weight=4 )
G.add_edge(3, 5, weight=34 )
G.add_edge(4, 5, weight=58 )
start = 1 # start node
end = 4 # end node
all_paths = [path for path in nx.all_simple_paths(G, start, end)]
# initialize result
largest_path = None
largest_path_weight = None
# iterate through all paths to find the largest
for p in all_paths: # keep track of each path
for _ in range(len(p)): # for each node in this path
pairs = zip(p, p[1:]) # get sequence of nodes
product = 1 # reset product for this paths calculation
for pair in pairs: # for each pair of nodes in this path
an_edge = G.get_edge_data(pair[0], pair[1]) # get this edge's data
product *= an_edge['weight'] # multiply all weights
if product > largest_path_weight: # check if this path's product is greater
largest_path = p # if True, set largest to this path
largest_path_weight = product # save the weight of this path
# display result
print 'largest path:', largest_path
print 'weight:', largest_path_weight
for this example:
largest path: [1, 2, 3, 5, 4]
weight: 55216

python + maya: Returns me a list of nodes with incorrect names

I've created a function which creates a grid of circles and I need to collect the circle nodes created into a list so I can later manipulate the nodes. The problem is that I noticed the nodeList is given the nodes name before it's auto-renamed by maya to be unique. You'll notice when you run this script that the collected names are all the same but when you selected them in maya they are incremented to be unique.
I'm returned this
[u'mainShape_00', u'makeNurbCircle1']
[u'|mainShape_00', u'makeNurbCircle2']
[u'|mainShape_00', u'makeNurbCircle3']...
When it should be
[u'mainShape_00', u'makeNurbCircle1']
[u'|mainShape_01', u'makeNurbCircle2']
[u'|mainShape_02', u'makeNurbCircle3']...
Here is the script
# Import Modules
import maya.cmds as cmds
import random
# Scene setup
try:
cmds.select(all=True)
cmds.delete()
except:
pass
# create 2D grid of circles
numRows = 4
numColumns = 3
radiusMin = .1
radiusMax = .75
#create empty group for nodes
nodeGroup = cmds.group(em=True, name='main_group_00')
nodeList = []
for r in range(0,numRows):
for c in range(0,numColumns):
# Calculate random radius
radius = random.uniform(radiusMin,radiusMax)
# Create circle shape and transform it
node = cmds.circle(n='mainShape_00', ch=True, o=True, nr=(0, 0, 1), c=(0, 0, 0), r=radius)
cmds.xform(node, t=(r*(radiusMax*2), c*(radiusMax*2), 0) )
# Parent node under the group node
cmds.parent(node[0], nodeGroup, relative=False)
# Append nodes to list
nodeList.append(node)
for n in nodeList:
shape = n
print shape

node is 'mainShape_00' because at that time, that's what it's named. There is no collision until it's parented under nodeGroup. Grab the real name after parenting:
node[0] = cmds.parent(node[0], nodeGroup, relative=False)[0]
This substitutes the original node[0] with the newly parented node[0]

Why not naming yourself your nodes as this :
x = 0
padding = str(x).zfill(2)
mainShapeName = 'mainShape_' + padding
x += 1
# Create circle shape and transform it
node = cmds.circle(n=mainShapeName, ch=True, o=True, nr=(0, 0, 1), c=(0, 0, 0), r=radius)
cmds.xform(node, t=(r*(radiusMax*2), c*(radiusMax*2), 0) )
By incrementing yourself, you avoid maya problems.
You should even gave unique name throught every groups.
Cheers.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python Connected Components edges list - python

Related

reading a file and assigning it to a variable

Indexerror on prim's algorithm

Finding the minimum and maximum value of a cluster for cyclic data

Algorithm to multiply edges of a Networkx graph

python + maya: Returns me a list of nodes with incorrect names

Categories

Resources