I am using NetworkX for drawing graph, when I searching in NetworkX documentation I saw a code from Antigraph class that was confusing and I can't understand some line of this code. Help me for understanding this code, please.
I attached this code:
import networkx as nx
from networkx.exception import NetworkXError
import matplotlib.pyplot as plt
class AntiGraph(nx.Graph):
"""
Class for complement graphs.
The main goal is to be able to work with big and dense graphs with
a low memory footprint.
In this class you add the edges that *do not exist* in the dense graph,
the report methods of the class return the neighbors, the edges and
the degree as if it was the dense graph. Thus it's possible to use
an instance of this class with some of NetworkX functions.
"""
all_edge_dict = {"weight": 1}
def single_edge_dict(self):
return self.all_edge_dict
edge_attr_dict_factory = single_edge_dict
def __getitem__(self, n):
"""Return a dict of neighbors of node n in the dense graph.
Parameters
----------
n : node
A node in the graph.
Returns
-------
adj_dict : dictionary
The adjacency dictionary for nodes connected to n.
"""
return {
node: self.all_edge_dict for node in set(self.adj) - set(self.adj[n]) - {n}
}
def neighbors(self, n):
"""Return an iterator over all neighbors of node n in the
dense graph.
"""
try:
return iter(set(self.adj) - set(self.adj[n]) - {n})
except KeyError as e:
raise NetworkXError(f"The node {n} is not in the graph.") from e
def degree(self, nbunch=None, weight=None):
"""Return an iterator for (node, degree) in the dense graph.
The node degree is the number of edges adjacent to the node.
Parameters
----------
nbunch : iterable container, optional (default=all nodes)
A container of nodes. The container will be iterated
through once.
weight : string or None, optional (default=None)
The edge attribute that holds the numerical value used
as a weight. If None, then each edge has weight 1.
The degree is the sum of the edge weights adjacent to the node.
Returns
-------
nd_iter : iterator
The iterator returns two-tuples of (node, degree).
See Also
--------
degree
Examples
--------
>>> G = nx.path_graph(4) # or DiGraph, MultiGraph, MultiDiGraph, etc
>>> list(G.degree(0)) # node 0 with degree 1
[(0, 1)]
>>> list(G.degree([0, 1]))
[(0, 1), (1, 2)]
"""
if nbunch is None:
nodes_nbrs = (
(
n,
{
v: self.all_edge_dict
for v in set(self.adj) - set(self.adj[n]) - {n}
},
)
for n in self.nodes()
)
elif nbunch in self:
nbrs = set(self.nodes()) - set(self.adj[nbunch]) - {nbunch}
return len(nbrs)
else:
nodes_nbrs = (
(
n,
{
v: self.all_edge_dict
for v in set(self.nodes()) - set(self.adj[n]) - {n}
},
)
for n in self.nbunch_iter(nbunch)
)
if weight is None:
return ((n, len(nbrs)) for n, nbrs in nodes_nbrs)
else:
# AntiGraph is a ThinGraph so all edges have weight 1
return (
(n, sum((nbrs[nbr].get(weight, 1)) for nbr in nbrs))
for n, nbrs in nodes_nbrs
)
def adjacency_iter(self):
"""Return an iterator of (node, adjacency set) tuples for all nodes
in the dense graph.
This is the fastest way to look at every edge.
For directed graphs, only outgoing adjacencies are included.
Returns
-------
adj_iter : iterator
An iterator of (node, adjacency set) for all nodes in
the graph.
"""
for n in self.adj:
yield (n, set(self.adj) - set(self.adj[n]) - {n})
# Build several pairs of graphs, a regular graph
# and the AntiGraph of it's complement, which behaves
# as if it were the original graph.
Gnp = nx.gnp_random_graph(20, 0.8, seed=42)
Anp = AntiGraph(nx.complement(Gnp))
Gd = nx.davis_southern_women_graph()
Ad = AntiGraph(nx.complement(Gd))
Gk = nx.karate_club_graph()
Ak = AntiGraph(nx.complement(Gk))
pairs = [(Gnp, Anp), (Gd, Ad), (Gk, Ak)]
# test connected components
for G, A in pairs:
gc = [set(c) for c in nx.connected_components(G)]
ac = [set(c) for c in nx.connected_components(A)]
for comp in ac:
assert comp in gc
# test biconnected components
for G, A in pairs:
gc = [set(c) for c in nx.biconnected_components(G)]
ac = [set(c) for c in nx.biconnected_components(A)]
for comp in ac:
assert comp in gc
# test degree
for G, A in pairs:
node = list(G.nodes())[0]
nodes = list(G.nodes())[1:4]
assert G.degree(node) == A.degree(node)
assert sum(d for n, d in G.degree()) == sum(d for n, d in A.degree())
# AntiGraph is a ThinGraph, so all the weights are 1
assert sum(d for n, d in A.degree()) == sum(d for n, d in A.degree(weight="weight"))
assert sum(d for n, d in G.degree(nodes)) == sum(d for n, d in A.degree(nodes))
nx.draw(Gnp)
plt.show()
I can't understand in these 2 lines:
(1) for v in set(self.adj) - set(self.adj[n]) - {n}
(2) nbrs = set(self.nodes()) - set(self.adj[nbunch]) - {nbunch}
To understand these lines, lets break each term carefully. For the purpose of explaination, I will create the following Graph:
import networkx as nx
source = [1, 2, 3, 4, 2, 3]
dest = [2, 3, 4, 6, 5, 5]
edge_list = [(u, v) for u, v in zip(source, dest)]
G = nx.Graph()
G.add_edges_from(ed_ls)
The Graph has the following edges:
print(G.edges())
# EdgeView([(1, 2), (2, 3), (2, 5), (3, 4), (3, 5), (4, 6)])
Now lets understand the terms in the above code:
set(self.adj)
If we print this out, we can see it is the set of nodes in the Graph:
print(set(self.adj))
# {1, 2, 3, 4, 5, 6}
set(self.adj[n])
This is the set of nodes adjacent to node n:
print(set(G.adj[2]))
# {1, 3, 5}
Now lets look at the first line that you asked in your question
for v in set(self.adj) - set(self.adj[n]) - {n}
This can be translated as follows:
for v in set of all nodes - set of nodes adjacent to node N - node N
So, this set of all nodes - set of nodes adjacent to node N returns the set of nodes that are not adjacent to node N (and this includes node N itself). (Essentially this will create the complement of the Graph).
Lets, look at an example:
nodes_nbrs = (
(
n,
{
v: {'weight': 1}
for v in set(G.adj) - set(G.adj[n]) - {n}
},
)
for n in G.nodes()
)
This will have the following value:
Node 1: {3: {'weight': 1}, 4: {'weight': 1}, 5: {'weight': 1}, 6: {'weight': 1}}
Node 2: {4: {'weight': 1}, 6: {'weight': 1}}
Node 3: {1: {'weight': 1}, 6: {'weight': 1}}
Node 4: {1: {'weight': 1}, 2: {'weight': 1}, 5: {'weight': 1}}
Node 6: {1: {'weight': 1}, 2: {'weight': 1}, 3: {'weight': 1}, 5: {'weight': 1}}
Node 5: {1: {'weight': 1}, 4: {'weight': 1}, 6: {'weight': 1}}
So if you look closely, for each node, we get the a list of nodes that were not adjacent to the node.
For say, node 2, the calculation would look something like this:
{1, 2, 3, 4, 5, 6} - {1, 3, 5} - {2} = {4, 6}
Now lets come to the second line:
nbrs = set(self.nodes()) - set(self.adj[nbunch]) - {nbunch}
Here set(self.adj[nbunch]) is basically the set of nodes adjacent to nodes in nbunch. nbunch is nothing but an iterator of nodes, so instead of set(self.adj[n]) where we get neighbors of a single node, here we get neighbors of multiple nodes.
So the expression can be translated as follows:
Set of all nodes - Set of all nodes adjacent to each node in nbunch - Set of nodes in nbunch
Which is same as the first expression that you asked except that this one is for multiple nodes, i.e. This will also return the list of nodes that are not adjacent to nodes in nbunch
Related
I have a network of Twitter users and their followers which I am modelling using NetworkX. I am trying to find a bidirectional link between users, i.e. if one node follows one of its neighbors, does that neighbor also follow that node.
Is there a built in function within NetworkX that would accomplish this? I tried using nx.reciprocity() but it just returns a single value rather than a dictionary.
You can determine if there is an edge connection between two nodes with networkx.Graph.has_edge. This could then be used to test if there are opposite directed edges between nodes.
import networkx as nx
def have_bidirectional_relationship(G, node1, node2):
return G.has_edge(node1, node2) and G.has_edge(node2, node1)
G = nx.DiGraph()
G.add_edge(1, 2)
G.add_edge(2, 1)
G.add_edge(3, 4)
print(f"Nodes 1, 2 have opposite edges: {have_bidirectional_relationship(G, 1, 2)}")
print(f"Nodes 3, 4 have opposite edges: {have_bidirectional_relationship(G, 3, 4)}")
Output
Nodes 1, 2 have opposite edges: True
Nodes 3, 4 have opposite edges: False
Find all nodes with bidirectional relationships
biconnections = set()
for u, v in G.edges():
if u > v: # Avoid duplicates, such as (1, 2) and (2, 1)
v, u = u, v
if have_bidirectional_relationship(G, u, v):
biconnections.add((u, v))
print(biconnections)
Output
{(1, 2)}
I am implementing a graph class and would like to write a function that calculates whether a given path is valid. I'm getting a key error in my is_path_valid function.
My graph is represented as {a:{b:c}} where a and b are a vertex connected to each other, and c is the weight of the edge
Given:
{0: {1: 5.0, 2: 10.0}, 1: {3: 3.0, 4: 6.0}, 3: {2: 2.0, 4: 2.0, 5: 2.0}, 4: {6: 6.0}, 5: {6: 2.0}, 7: {9: 1.0}, 8: {7: 2.0, 9: 4.0}}
Vertex 2 to 3 is a valid path.
My graph class:
class Graph:
def __init__(self, n):
"""
Constructor
:param n: Number of vertices
"""
self.order = n
self.size = 0
self.vertex = {}
def insert_edge(self, u, v, w): #works fine
if u in self.vertex and v < self.order:
if not v in self.vertex[u]:
self.vertex[u][v] = w
self.size += 1
elif u not in self.vertex and u < self.order and v < self.order:
self.vertex[u] = {}
self.vertex[u][v] = w
self.size += 1
else:
raise IndexError
def is_path_valid(self, path):
while True:
try:
s = path.pop(0)
except IndexError:
break
if path:
d = path.pop(0)
if s not in self.vertex and d not in self.vertex[s]: #ERROR
return False
s = d
return True
My main function:
def main():
g = Graph(10)
g.insert_edge(0,1,5.0)
g.insert_edge(0,2,10.0)
g.insert_edge(1,3,3.0)
g.insert_edge(1,4,6.0)
g.insert_edge(3,2,2.0)
g.insert_edge(3,4,2.0)
g.insert_edge(3,5,2.0)
g.insert_edge(4,6,6.0)
g.insert_edge(5,6,2.0)
g.insert_edge(7,9,1.0)
g.insert_edge(8,7,2.0)
g.insert_edge(8,9,4.0)
True(g.is_path_valid([0, 2]))
True(g.is_path_valid([2, 3]))
True(g.is_path_valid([0, 2, 3]))
False(g.is_path_valid([0, 1, 8]))
False(g.is_path_valid([0, 4, 3]))
print(g.vertex) #to see the graph
print(g.is_path_valid([2,3]))
if __name__ == '__main__':
main()
My error:
if s not in self.vertex and d not in self.vertex[s]:
KeyError: 2
You just are mixing together arcs and edges which leads to some unexpected things happening, you have to choose between either of the two.
On the other hand you can have an oriented graph and still have a function that will add "edges" in the sense that it will add both arcs (u, v) and (v, u). I have edges in quotes because they're not really edges (the term edge only have meaning in a non-oriented graph).
from collections import defaultdict
class Graph:
def __init__(self):
self._arcs = defaultdict(dict)
def insert_arc(self, u, v, w):
self._arcs[u][v] = w
def is_arc(self, u, v):
return u in self._arcs and v in self._arcs[u]
def is_path_valid(self, path):
for u, v in zip(path, path[1:]):
if not self.is_arc(u, v):
return False
return True
# We add the notion of "edges" with the following methods:
def insert_edge(self, u, v, w):
self.insert_arc(u, v, w)
self.insert_arc(v, u, w)
#property
def edges(self):
return {((u, v), w) for u, Nu in self._arcs.items() for v, w in Nu.items() if self.is_edge(u, v)}
def is_edge(self, u, v):
is_symetric = self.is_arc(u, v) and self.is_arc(v, u)
if not is_symetric:
return False
return self._arcs[u][v] == self._arcs[v][u]
You can now add either edges or arcs to your graph:
g = Graph()
# This is an arc:
g.insert_arc(1, 8, 1.)
# Weight is not symmetric but this still look like an edge:
g.insert_arc(1, 0, 3.)
g.insert_arc(0, 1, 2.)
# These are all symmetric (ie. "edges")
g.insert_edge(1, 2, 7.)
g.insert_edge(2, 3, 5.)
g.insert_edge(0, 3, 13.)
# we added an arc (1, 8):
print(g.is_path_valid([1, 8])) # True
print(g.is_path_valid([8, 1])) # False
# All true:
print(g.is_path_valid([0, 3]))
print(g.is_path_valid([2, 3]))
print(g.is_path_valid([0, 1, 2, 3, 0]))
# Adding one step make this false since (0, 2) doesn't exist:
print(g.is_path_valid([0, 1, 2, 3, 0, 2]))
We can use the edges property to find all "edges" (symmetric arcs with the same weight in both directions):
>>> print(g.edges)
{((3, 0), 13.0), ((3, 2), 5.0), ((2, 1), 7.0), ((1, 2), 7.0), ((2, 3), 5.0), ((0, 3), 13.0)}
Notice how (0, 1) is not part of the set of edges, that's because the link exists in both directions but the weight is not the same. The arc (1, 8) is obviously not here as (8, 1) is not part of the graph.
I have a graph of different locations:
import networkx as nx
G = nx.Graph()
for edge in Edge.objects.all():
G.add_edge(edge.from_location, edge.to_location, weight=edge.distance)
The locations (nodes) have different types (toilets, building entrances, etc.) I need to find the shortest way from some given location to any location of a specific type. (For example: Find the nearest entrance from a given node.)
Is there some method in the Networkx library to solve that without loops? Something like:
nx.shortest_path(
G,
source=start_location,
target=[first_location, second_location],
weight='weight'
)
The result will be the shortest path to either the first_location or the second_location, if both locations are of the same type.
And is there some method that also returns path length?
We will do it in three steps.
Step 1: Let's create a dummy graph to illustrate
Step 2: Plot the graph and color nodes to indicate edge lengths and special node types (toilets, entrances etc.)
Step 3: From any given node (source) calculate shortest path to all reachable nodes, then subset to the node types of interest and select path with the minimum length.
The code below can definitely be optimized, but this might be easier to follow.
Step 1: Create the graph
edge_objects = [(1,2, 0.4), (1, 3, 1.7), (2, 4, 1.2), (3, 4, 0.3), (4 , 5, 1.9),
(4 ,6, 0.6), (1,7, 0.4), (3,5, 1.7), (2, 6, 1.2), (6, 7, 0.3),
(6, 8, 1.9), (8,9, 0.6)]
toilets = [5,9] # Mark two nodes (5 & 9) to be toilets
entrances = [2,7] # Mark two nodes (2 & 7) to be Entrances
common_nodes = [1,3,4,6,8] #all the other nodes
node_types = [(9, 'toilet'), (5, 'toilet'),
(7, 'entrance'), (2, 'entrance')]
#create the networkx Graph with node types and specifying edge distances
G = nx.Graph()
for n,typ in node_types:
G.add_node(n, type=typ) #add each node to the graph
for from_loc, to_loc, dist in edge_objects:
G.add_edge(from_loc, to_loc, distance=dist) #add all the edges
Step 2: Draw the graph
#Draw the graph (optional step)
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True)
edge_labels = nx.get_edge_attributes(G,'distance')
nx.draw_networkx_edge_labels(G, pos, edge_labels = edge_labels)
nx.draw_networkx_nodes(G, pos, nodelist=toilets, node_color='b')
nx.draw_networkx_nodes(G, pos, nodelist=entrances, node_color='g')
nx.draw_networkx_nodes(G, pos, nodelist=common_nodes, node_color='r')
plt.show()
Step 3: create small functions to find the shortest path to node type
def subset_typeofnode(G, typestr):
'''return those nodes in graph G that match type = typestr.'''
return [name for name, d in G.nodes(data=True)
if 'type' in d and (d['type'] ==typestr)]
#All computations happen in this function
def find_nearest(typeofnode, fromnode):
#Calculate the length of paths from fromnode to all other nodes
lengths=nx.single_source_dijkstra_path_length(G, fromnode, weight='distance')
paths = nx.single_source_dijkstra_path(G, fromnode)
#We are only interested in a particular type of node
subnodes = subset_typeofnode(G, typeofnode)
subdict = {k: v for k, v in lengths.items() if k in subnodes}
#return the smallest of all lengths to get to typeofnode
if subdict: #dict of shortest paths to all entrances/toilets
nearest = min(subdict, key=subdict.get) #shortest value among all the keys
return(nearest, subdict[nearest], paths[nearest])
else: #not found, no path from source to typeofnode
return(None, None, None)
Test:
find_nearest('entrance', fromnode=5)
produces:
(7, 2.8, [5, 4, 6, 7])
Meaning: The nearest 'entrance' node from 5 is 7, the path length is 2.8 and the full path is: [5, 4, 6, 7]. Hope this helps you move forward. Please ask if anything is not clear.
I have this code. It reads a list of sentences, and then uses sklearn's CountVectorizer to compute word co-occurrences.
from sklearn.feature_extraction.text import CountVectorizer
data = ['this is a sentence', 'this was a monkey', 'all this is nice']
count_model = CountVectorizer(ngram_range=(1,1)) # default unigram model
X = count_model.fit_transform(data)
Xc = (X.T * X) # this is co-occurrence matrix in sparse csr format
Xc.setdiag(0) # sometimes you want to fill same word cooccurence to 0
matrix_dense = Xc.todense() # matrix in dense format
import networkx as nx
G=nx.from_numpy_matrix(matrix_dense)
If I do G.edges(data=True), it outputs this:
[(0, 1, {'weight': 1}),
(0, 3, {'weight': 1}),
(0, 5, {'weight': 1}),
(1, 3, {'weight': 1}),
(1, 4, {'weight': 1}),
(1, 5, {'weight': 2})
and so on. How can I get words instead of numbers as source, target?
EDIT:
This is a:
labels = count:model.get_feature_names() # get the word labels
G=nx.from_numpy_matrix(matrix_dense) # create graph
for node, label in zip(G.nodes(), labels): # add labels to the graph
G.node[node]['label'] = label
With networkx you can replace one set of with another set of nodes. This is with relabel_nodes.
Here is the example from the documentation. It creates a 3 node graph and then creates a copy of that graph with the new node names. You can also do directly to G by setting the optional argument copy to False in the function call.
G = nx.path_graph(3)
sorted(G)
> [0, 1, 2]
mapping = {0: 'a', 1: 'b', 2: 'c'}
H = nx.relabel_nodes(G, mapping)
sorted(H)
> ['a', 'b', 'c']
Is there a simpler, easier way to convert coordinates (long, lat) to a "networkx"-graph, than nested looping over those coordinates and adding weighted nodes/edges for each one?
for idx1, itm1 in enumerate(data):
for idx2, itm2 in enumerate(data):
pos1 = (itm1["lng"], itm1["lat"])
pos2 = (itm2["lng"], itm2["lat"])
distance = vincenty(pos1, pos2).meters #geopy distance
# print(idx1, idx2, distance)
graph.add_edge(idx1, idx2, weight=distance)
The target is representing points as a graph in order to use several functions on this graph.
Edit: Using an adjacency_matrix would still need a nested loop
You'll have to do some kind of loop. But if you are using an undirected graph you can eliminate half of the graph.add_edge() (only need to add u-v and not v-u). Also as #EdChum suggests you can use graph.add_weighted_edges_from() to make it go faster.
Here is a nifty way to do it
In [1]: from itertools import combinations
In [2]: import networkx as nx
In [3]: data = [10,20,30,40]
In [4]: edges = ( (s[0],t[0],s[1]+t[1]) for s,t in combinations(enumerate(data),2))
In [5]: G = nx.Graph()
In [6]: G.add_weighted_edges_from(edges)
In [7]: G.edges(data=True)
Out[7]:
[(0, 1, {'weight': 30}),
(0, 2, {'weight': 40}),
(0, 3, {'weight': 50}),
(1, 2, {'weight': 50}),
(1, 3, {'weight': 60}),
(2, 3, {'weight': 70})]