deepcopy() is highly inefficient in copying a large graph object

deepcopy() is highly inefficient in copying a large graph object - python

Code is in Python.
There is a graph object, which in turn consists of vertex objects and edge objects.
I have to copy the complete graph object into a new graph object.
Using deepcopy is proving to be highly inefficient, not sure why.
It takes 170 secs out of total runtime of 200 secs just to copy using deepcopy.
One execution of deepcopy takes around 1 sec. Since, there is a loop, so all executions of deepcopy account for 170 secs out of total 200 secs
Sharing the code below:
import copy
class Vertex:
def __init__(self, key):
self.key = key
self.label = None
self.neighbors = set()
self.incident_edges = set()
self.in_left = None
def get_edge(self, neighbor):
for e in self.incident_edges:
if neighbor in e.vertices:
return e
return False
def set_label(self, label):
self.label = label
def set_in_left(self, in_left):
self.in_left = in_left
def filter_neighbors(self):
new_neighbors = set()
for v in self.neighbors:
for e in self.incident_edges:
if v == e.vertices[0] or v == e.vertices[1]:
new_neighbors.add(v)
break
self.neighbors = new_neighbors
class Edge:
def __init__(self, v1, v2, weight = math.inf, heur = False):
'''Edge constructor.
Parameters
----------
v1 : str, required (endpoint1 key)
v2 : str, required (endpoint2 key)
weight : int, optional (default = inf)
'''
self.vertices = [v1, v2]
self.weight = weight
if heur == True:
self.typeOfWeight = 'h' # can be 'h' - for heuristic or 'a' - for actual weight
else:
self.typeOfWeight = None
def get_pair_vertex(self, v):
for vertex in self.vertices:
if ( vertex != v.key ):
return vertex
def __eq__(self, e):
'''Edges with equal endpoints and weights are equal.'''
return (self.vertices == e.vertices
and self.weight == e.weight)
def __hash__(self):
'''Hash the vertices (frozen set) and weight.'''
return hash((frozenset(self.vertices), self.weight))
# ------------------------------------------------------------------------------------------------------------------------
class Graph:
def __init__( self, G = {}, heur = False ):
self.vertices = {}
# If the input graph is in the form of an adjacency matrix (then we won't have vertex labels such as 'r1', 't1' in the input; rather we need to create them)
goals_lessThan_robots = False # change 999
if type(G) is not dict:
# change 999 - for handling case when no of robots != no of goals
if G.shape[1] < G.shape[0]: # that is, if the no of goals is < nof of robots, then we change the left-right orientation
goals_lessThan_robots = True
for i in range(G.shape[0]):
v1 = 'r' + str(i)
for j in range(G.shape[1]):
v2 = 'g' + str(j)
# self.add_edge(v1, v2, G[i][j]) change 999
self.add_edge(v1, v2, G[i][j], goals_lessThan_robots, heur) # change 999
else: # if the input graph is in the form of dictionary
for v1 in G:
for v2 in G[v1]:
# self.add_edge(v1, v2, G[v1][v2]) # change 999
self.add_edge(v1, v2, G[v1][v2], goals_lessThan_robots, heur)
def add_vertex(self, key):
'''
Adds a vertex to the graph.
'''
self.vertices[key] = Vertex(key) # aks - instantiates a Vertex object and adds in Graphs' vertex dictionary
def add_edge( self, v1, v2, weight = math.inf, goals_lessThan_robots = False, heur = False ):
'''
Adds an edge to the graph.
'''
if v1 not in self.vertices:
self.add_vertex(v1)
if v2 not in self.vertices:
self.add_vertex(v2)
e = Edge(v1, v2, weight, heur)
self.vertices[v1].neighbors.add(v2)
self.vertices[v2].neighbors.add(v1)
self.vertices[v1].incident_edges.add(e)
self.vertices[v2].incident_edges.add(e)
self.vertices[v1].set_in_left(True) # aks 98
self.vertices[v2].set_in_left(False) # aks 98
def edge_in_equality_subgraph(self, e):
'''
Determines whether edge is in equality subgraph
'''
e_endpoints = list(e.vertices)
if (self.vertices[e_endpoints[0]].label == None or
self.vertices[e_endpoints[1]].label == None):
return False
return e.weight == (self.vertices[e_endpoints[0]].label +
self.vertices[e_endpoints[1]].label)
def equality_subgraph(self):
'''
Creates an equality subgraph with respect to labeling
'''
eq_H = copy.deepcopy(self)
for v in eq_H.vertices:
eq_H.vertices[v].incident_edges = list(filter(
self.edge_in_equality_subgraph,
eq_H.vertices[v].incident_edges))
eq_H.vertices[v].filter_neighbors()
return eq_H
In another method, I have below calls:
G = Graph( _G, heur = True )
eq_G = G.equality_subgraph()
The first line in equality_subgraph function is using deepcopy and is the culprit.
I created a bipartite graph, with 100 vertices on left and 100 vertices on right, so 200 vertices in total.
Request for help.

Related

Python Dictionary not being filled?

I'm trying to generate a random graph by first adding 20 vertices to it and then randomly match each of these with another vertex. The adjacency list is a dictionary.
When I try to add these to the graph I just get a key error and I'm not sure why.
Is the dictionary not being filled correctly?
Below is the code.
'''
class Vertex:
# Constructor for a new Vertex object. All vertex objects
# start with a distance of positive infinity.
def __init__(self, label):
self.label = label
self.distance = float('inf')
self.pred_vertex = None
class Graph:
def __init__(self):
self.adjacency_list = {}
self.edge_weights = {}
def add_vertex(self, new_vertex):
self.adjacency_list[new_vertex] = []
def add_directed_edge(self, from_vertex, to_vertex, weight = 1.0):
self.edge_weights[(from_vertex, to_vertex)] = weight
self.adjacency_list[from_vertex].append(to_vertex)
def add_undirected_edge(self, vertex_a, vertex_b, weight = 1.0):
self.add_directed_edge(vertex_a, vertex_b, weight)
self.add_directed_edge(vertex_b, vertex_a, weight)
g = Graph()
l = []
#add vertices 0-19 to the graph
for i in range(20):
l.append(i)
s = str(i)
g.add_vertex(Vertex(s))
#connect a random vertex (0-19) with another random vertex
edges = [[random.choice(l), random.randrange(0, 20)] for i in range(10)]
#add these random edges to the graph
for i in range(10):
g.add_undirected_edge(str(edges[i][0]), str(edges[i][1]), random.randint(0,100))
'''
Thank you!

The keys in your adjacency list are not numbers, but Vertex objects. That is why you are getting a key error, you are trying to look up the vertex using an str(integer), but you need to pass a Vertex object.
The simplest solution is to remove the Vertex class and just use the strings of the integers for your vertex labels and adjacency keys. That would reduce your code to this:
class Graph:
def __init__(self):
self.adjacency_list = {}
self.edge_weights = {}
def add_vertex(self, new_vertex):
self.adjacency_list[new_vertex] = []
def add_directed_edge(self, from_vertex, to_vertex, weight = 1.0):
self.edge_weights[(from_vertex, to_vertex)] = weight
self.adjacency_list[from_vertex].append(to_vertex)
def add_undirected_edge(self, vertex_a, vertex_b, weight = 1.0):
self.add_directed_edge(vertex_a, vertex_b, weight)
self.add_directed_edge(vertex_b, vertex_a, weight)
g = Graph()
l = []
#add vertices 0-19 to the graph
for i in range(20):
l.append(i)
s = str(i)
g.add_vertex(s)
#connect a random vertex (0-19) with another random vertex
edges = [[random.choice(l), random.randrange(0, 20)] for i in range(10)]
#add these random edges to the graph
for i in range(10):
g.add_undirected_edge(str(edges[i][0]), str(edges[i][1]), random.randint(0,100))
Another option is to store the adjacency using the string labels, but also store a mapping between the labels and the Vertex objects so they can be retrieved.
class Vertex:
# Constructor for a new Vertex object. All vertex objects
# start with a distance of positive infinity.
def __init__(self, label):
self.label = label
self.distance = float('inf')
self.pred_vertex = None
def __repr__(self):
return f'<Vertex {self.label!r}>'
class Graph:
def __init__(self):
self.adjacency_list = {}
self.vertex_map = {}
self.edge_weights = {}
def add_vertex(self, vertex_label):
self.adjacency_list[vertex_label] = []
self.vertex_map[vertex_label] = Vertex(vertex_label)
def add_directed_edge(self, from_vertex, to_vertex, weight = 1.0):
self.edge_weights[(from_vertex, to_vertex)] = weight
self.adjacency_list[from_vertex].append(to_vertex)
def add_undirected_edge(self, vertex_a, vertex_b, weight = 1.0):
self.add_directed_edge(vertex_a, vertex_b, weight)
self.add_directed_edge(vertex_b, vertex_a, weight)
g = Graph()
l = []
#add vertices 0-19 to the graph
for i in range(20):
l.append(i)
s = str(i)
g.add_vertex(s)
#connect a random vertex (0-19) with another random vertex
edges = [[random.choice(l), random.randrange(0, 20)] for i in range(10)]
#add these random edges to the graph
for i in range(10):
g.add_undirected_edge(str(edges[i][0]), str(edges[i][1]), random.randint(0,100))

How to efficiently copy a large Graph object in python?

I have a graph object, which in turn consists of vertex objects and edge objects.
I have to copy the entire graph object into a new Graph object.
Using deepcopy is proving to be highly inefficient, don't know why.
It takes 170 secs out of total runtime of 200 secs just to copy using deepcopy.
I request you to help me please.
One execution of deepcopy takes around 1 sec. Since, there is a loop, so all executions of deepcopy account for 170 secs out of total 200 secs
Sharing the code below:
import copy
class Vertex:
def __init__(self, key):
self.key = key
self.label = None
self.neighbors = set()
self.incident_edges = set()
self.in_left = None
def get_edge(self, neighbor):
for e in self.incident_edges:
if neighbor in e.vertices:
return e
return False
def set_label(self, label):
'''Label the vertex.'''
self.label = label
def set_in_left(self, in_left):
self.in_left = in_left
def filter_neighbors(self):
new_neighbors = set()
for v in self.neighbors:
for e in self.incident_edges:
if v == e.vertices[0] or v == e.vertices[1]:
new_neighbors.add(v)
break
self.neighbors = new_neighbors
class Edge:
def __init__(self, v1, v2, weight = math.inf, heur = False):
'''Edge constructor.
Parameters
----------
v1 : str, required (endpoint1 key)
v2 : str, required (endpoint2 key)
weight : int, optional (default = inf)
'''
self.vertices = [v1, v2]
self.weight = weight
if heur == True:
self.typeOfWeight = 'h' # can be 'h' - for heuristic or 'a' - for actual weight
else:
self.typeOfWeight = None
def get_pair_vertex(self, v):
for vertex in self.vertices:
if ( vertex != v.key ):
return vertex
def __eq__(self, e):
'''Edges with equal endpoints and weights are equal.'''
return (self.vertices == e.vertices
and self.weight == e.weight)
def __hash__(self):
'''Hash the vertices (frozen set) and weight.'''
return hash((frozenset(self.vertices), self.weight))
# ------------------------------------------------------------------------------------------------------------------------
class Graph:
def __init__( self, G = {}, heur = False ):
self.vertices = {}
# If the input graph is in the form of an adjacency matrix (then we won't have vertex labels such as 'r1', 't1' in the input; rather we need to create them)
goals_lessThan_robots = False # change 999
if type(G) is not dict:
# change 999 - for handling case when no of robots != no of goals
if G.shape[1] < G.shape[0]: # that is, if the no of goals is < nof of robots, then we change the left-right orientation
goals_lessThan_robots = True
for i in range(G.shape[0]):
v1 = 'r' + str(i)
for j in range(G.shape[1]):
v2 = 'g' + str(j)
# self.add_edge(v1, v2, G[i][j]) change 999
self.add_edge(v1, v2, G[i][j], goals_lessThan_robots, heur) # change 999
else: # if the input graph is in the form of dictionary
for v1 in G:
for v2 in G[v1]:
# self.add_edge(v1, v2, G[v1][v2]) # change 999
self.add_edge(v1, v2, G[v1][v2], goals_lessThan_robots, heur) # change 999
def add_vertex(self, key):
'''
Adds a vertex to the graph.
'''
self.vertices[key] = Vertex(key) # aks - instantiates a Vertex object and adds in Graphs' vertex dictionary
def add_edge( self, v1, v2, weight = math.inf, goals_lessThan_robots = False, heur = False ):
'''
Adds an edge to the graph.
'''
if v1 not in self.vertices:
self.add_vertex(v1)
if v2 not in self.vertices:
self.add_vertex(v2)
e = Edge(v1, v2, weight, heur)
self.vertices[v1].neighbors.add(v2)
self.vertices[v2].neighbors.add(v1)
self.vertices[v1].incident_edges.add(e)
self.vertices[v2].incident_edges.add(e)
self.vertices[v1].set_in_left(True) # aks 98
self.vertices[v2].set_in_left(False) # aks 98
def edge_in_equality_subgraph(self, e):
'''
Determines whether edge is in equality subgraph
'''
e_endpoints = list(e.vertices)
if (self.vertices[e_endpoints[0]].label == None or
self.vertices[e_endpoints[1]].label == None):
return False
return e.weight == (self.vertices[e_endpoints[0]].label +
self.vertices[e_endpoints[1]].label)
def equality_subgraph(self):
'''
Creates an equality subgraph with respect to labeling
'''
eq_H = copy.deepcopy(self)
for v in eq_H.vertices:
eq_H.vertices[v].incident_edges = list(filter(
self.edge_in_equality_subgraph,
eq_H.vertices[v].incident_edges))
eq_H.vertices[v].filter_neighbors()
return eq_H
In another method, I have below calls:
G = Graph( _G, heur = True )
eq_G = G.equality_subgraph()
The first line in equality_subgraph function is using deepcopy and is the culprit.
I created a bipartite graph, with 100 vertices on left and 100 vertices on right, so 200 vertices in total.

Why are the elements in my heapq not ordered python?

I am using a simple heapq in python with custom elements on which I implemented the lt function.
class Edge:
def __init__(self, cost, u, v):
self.u = u
self.v = v
self.cost = cost
def weight(self):
w = self.cost
v = self.v
while v.parent is not None:
w += v.const
v = v.parent
return w
def __lt__(self, other):
return self.weight() < other.weight()
Then I keep a heap of these elements in another array called P:
class Vertex:
def __init__(self, node=None):
#other stuff omited #####
self.P = []
def add_incoming_nodes(self, subgraph):
for node, costs in subgraph.items():
#if costs[self.vertex] is not 0: #node is not self
#push endpoints of the edge from another vertex to this vertex
heapq.heappush(self.P, Edge(costs[self.vertex], node, self))
The problem is that when I heappop an element, I would expect it to be the smallest element in my array right ? But this assertion here fails
#select arbitrary vertex
a = all_nodes[0]
while a.P: #while P[a] is not ∅
edge = heapq.heappop(a.P)
for a_edge in a.P:
assert edge.weight() < a_edge.weight()

(Python 2.7) Access variable from class with accessor/mutator

(Python 2.7) I'm trying to access the vertices variable from the SierpinskiTriangle class and use it in the second bit of code listed but it shows
TypeError: 'property' object is not iterable
I can only assume it is due to the accessors/mutators
Base code:
class Fractal(object):
# the constructor
def __init__(self, dimensions):
# the canvas dimensions
self.dimensions = dimensions
# the default number of points to plot is 50,000
self.num_points = 50000
# the default distance ratio is 0.5 (halfway)
self.r = 0.5
# accessors and mutators
#property
def vertices(self):
return self._vertices
#vertices.setter
def vertices(self, v):
self._vertices = v
class SierpinskiTriangle(Fractal):
# the constructor
def __init__(self, canvas):
# call the constructor in the superclass
Fractal.__init__(self, canvas)
# define the vertices based on the fractal size
v1 = Point(self.dimensions["mid_x"], self.dimensions["min_y"])
v2 = Point(self.dimensions["min_x"], self.dimensions["max_y"])
v3 = Point(self.dimensions["max_x"], self.dimensions["max_y"])
self.vertices = [ v1, v2, v3 ]
Code to get vertices in:
class ChaosGame(Canvas):
vertex_radius = 2
vertex_color = "red"
point_radius = 0
point_color = "black"
def __init__(self, master):
Canvas.__init__(self, master, bg = "white")
self.pack(fill = BOTH, expand = 1)
# a function that takes a string that represents the fractal to create
def make(self, f):
if f == "SierpinskiTriangle":
vertices = SierpinskiTriangle.vertices
if f == "SierpinskiCarpet":
vertices = []
if f == "Pentagon":
vertices = []
if f == "Hexagon":
vertices = []
if f == "Octagon":
vertices = []
print vertices
for point in vertices:
self.plot_point(self, point, ChaosGame.vertex_color, ChaosGame.vertex_radius)

This is because you are accessing the class instead of an object of that type.
Let's try it on a minimal example:
class Container:
def __init__(self):
self._content = range(10)
#property
def content(self):
return self._content
#content.setter
def set_content(self, c):
self._content = c
This works:
c = Container()
for number in c.content:
print(number)
(prints out numbers from 0 to 9).
But this fails:
for number in Container.content:
print(number)
with the error
TypeError Traceback (most recent call last)
<ipython-input-27-f1df89781355> in <module>()
1 # This doesn't:
----> 2 for number in Container.content:
3 print(number)
TypeError: 'property' object is not iterable
Besides of the problems with the properties, you didn't initialize an object, so the __init__ function of the class was never called and Container._content was not initialized.
In fact, you would get a similar problem if you had just used
class Container:
def __init__(self):
self.container = range(10)
(only that it would be an attribute error in this case).
Final note: This
for number in Container().content: # note the '()'!!
print(number)
works again, because we create a container object on the fly.

Make undirected graph from adjacency list

I'm trying to make an undirected graph from an adjacency list to practice the Karger's Min Cut algorithm. The following is my code
class Vertex(object):
'''Represents a vertex, with the indices of edges
incident on it'''
def __init__(self,name,edgeIndices=[]):
self.name = name
self.edgeIndices = edgeIndices
def getName(self):
return self.name
def addEdge(self,ind):
self.edgeIndices.append(ind)
def getEdges(self):
return self.edgeIndices
def __eq__(self,other):
return self.name == other.name
class Edge(object):
'''Represents an edge with the indices of its endpoints'''
def __init__(self,ends):
self.ends = ends
def getEnds(self):
return self.ends
def __eq__(self,other):
return (self.ends == other.ends)\
or ((self.ends[1],self.ends[0]) == other.ends)
class Graph(object):
def __init__(self,vertices,edges):
self.edges = edges
self.vertices = vertices
def createGraph(filename):
'''Input: Adjacency list
Output: Graph object'''
vertices = []
edges = []
with open(filename) as f:
for line in f:
elements = line.split()
newVert = Vertex(elements[0])
if newVert not in vertices:
vertices.append(newVert)
for verts in elements[1:]:
otherVert = Vertex(verts)
if otherVert not in vertices:
vertices.append(otherVert)
end1 = vertices.index(newVert)
end2 = vertices.index(otherVert)
newEdge = Edge((end1,end2))
if newEdge not in edges:
edges.append(newEdge)
newVert.addEdge(edges.index(newEdge))
return Graph(vertices,edges)
Suppose the adjacency list is as follows with vertices represented by integers
1 -> 2,3,4
2 -> 1,3
3 -> 1,2,4
4 -> 1,3
In total, this graph will have five edges, so the length of list holding indices of edges a vertex is associated with can't more than 5 long.
For instance, I expect the vertex '2' to have indices of just two edges, i.e. edges with vertices 1 and 3. Instead, what I get is [0, 1, 2, 3, 0, 2, 1, 3].
I need help to figure out what is going wrong.

First error comes from the Vertex init. When passing a list as default argument, Python instantiates it once, and share this instance with all future instances of Vertex.
Pass None, and use a local list if no list is given.
class Vertex(object):
def __init__(self,name,edgeIndices=None):
self.name = name
self.edgeIndices = edgeIndices if edgeIndices else []
In the createGraph method, when the vertex already exists in the graph you need to use it. See the added else: newVert = ...
You also seem to have an issue with the ligne splitting. See the iteration over elements[2].split(',').
def createGraph(filename):
'''Input: Adjacency list
Output: Graph object'''
vertices = []
edges = []
with open(filename) as f:
for line in f:
elements = line.split()
newVert = Vertex(elements[0])
if newVert not in vertices:
vertices.append(newVert)
else:
newVert = vertices[vertices.index(newVert)]
for verts in elements[2].split(','):
otherVert = Vertex(verts)
if otherVert not in vertices:
vertices.append(otherVert)
end1 = vertices.index(newVert)
end2 = vertices.index(otherVert)
newEdge = Edge((end1,end2))
if newEdge not in edges:
edges.append(newEdge)
newVert.addEdge(edges.index(newEdge))
return Graph(vertices,edges)
As a side note, I would try to use a dict to store the vertices (and edges) and do the lookup. List.index is used a lot, and you may create a lot of objects for nothing.

I would recommend to take a look at Dict, OrderedDict, Linked List based graph implementations. The are far more effective then based on lists and indexes.
To make you code work you can do the following:
Change a Vertex to avoid issue described in previous answer:
class Vertex(object):
def __init__(self,name, edgeIndices=None):
self.name = name
self.edgeIndices = edgeIndices or []
Let the graph do some work:
class Graph(object):
def __init__(self):
self.edges = []
self.vertices = []
def add_vertex(self, name):
vertex = Vertex(name)
if vertex not in self.vertices:
self.vertices.append(vertex)
def add_edge(self, *names):
self._add_vertices(names)
edge = self._add_edge(names)
self._update_vertices_links(edge, names)
def get_vertex_index(self, name):
vertex = Vertex(name)
return self.vertices.index(vertex)
def get_vertex(self, name):
return self.vertices[self.get_vertex_index(name)]
def _update_vertices_links(self, edge, names):
for name in names:
vertex = self.get_vertex(name)
vertex.addEdge(self.edges.index(edge))
def _add_edge(self, names):
edge = Edge((self.get_vertex_index(names[0]), self.get_vertex_index(names[1])))
if edge not in self.edges:
self.edges.append(edge)
return edge
def _add_vertices(self, names):
for name in names:
self.add_vertex(name)
def __repr__(self):
return "Vertices: %s\nEdges: %s" % (self.vertices, self.edges)
Create Graph:
def createGraph(filename):
with open(filename) as f:
graph = Graph()
for line in f:
elements = line.strip().split()
graph.add_vertex(elements[0])
for element in elements[2].split(","):
graph.add_edge(elements[0], element)
return graph
Run it:
graph = createGraph('input.txt')
print graph
Output for your input:
Vertices: [<Name:1 Edges:[0, 1, 2]>, <Name:2 Edges:[0, 3]>, <Name:3 Edges:[1, 3, 4]>, <Name:4 Edges:[2, 4]>]
Edges: [(0, 1), (0, 2), (0, 3), (1, 2), (2, 3)]

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

deepcopy() is highly inefficient in copying a large graph object - python

Related

Python Dictionary not being filled?

How to efficiently copy a large Graph object in python?

Why are the elements in my heapq not ordered python?

(Python 2.7) Access variable from class with accessor/mutator

Make undirected graph from adjacency list

Categories

Resources