Related
The problem is the following :
I want to go from having this set
{'A/B', 'B/C', 'C/D', 'D/E', ..., 'U/V', 'V/W', ..., 'X/Y', ..., 'Z', ...}
to this set
{'A/B/C/D/E', ..., 'U/V/W', ..., 'X/Y', ..., 'Z', ...}
where the objects A, B, C ... are just strings of characters. The output solution should be independent of the order in which the objects appears (i.e. if you scramble the objects in the set, the solution should always be the same)
In other words I want to merge overlapping objects.
Inputs of the following form cannot happen :
{"A/B", "B/C", "B/D"}
{"A/B", "B/C", "C/A"}
There can be objects with no '/' in them.
Here is a partial solution I've come up with :
example={'A/B', 'B/C', 'C/D', 'D/E','U/V', 'V/W','X/Y'}
def ext_3plus(unit):
for couple in list(itertools.combinations(list(unit),2)):
if '/' in couple[0] and '/' in couple[1]:
if couple[0].split('/')[0]==couple[1].split('/')[1]:
unit.remove(couple[0])
unit.remove(couple[1])
unit.add(couple[1].split('/')[0]+'/'+couple[0])
if couple[0].split('/')[1]==couple[1].split('/')[0]:
unit.remove(couple[0])
unit.remove(couple[1])
unit.add(couple[0]+'/'+couple[1].split('/')[1])
else: #the input can contain object not having '/'
continue
There is two problems, first it does only one iteration,
the result on {'A/B', 'B/C', 'C/D', 'D/E','U/V', 'V/W','X/Y'}
is :
{'A/B/C', 'C/D/E', 'U/V/W', 'X/Y'}
Second, if I include objects containing no '/', the input being {'A/B', 'B/C', 'C/D', 'D/E','U/V', 'V/W','X/Y','Z'}, the result is different from the previous one :
{'A/B', 'B/C/D', 'D/E', 'U/V/W', 'X/Y', 'Z'}
So there should be a recursive call on the first iteration etc.
How should it be done ?
If I understood correctly this can be seen as a graph problem, and solve as such:
import networkx as nx
example = {'A/B', 'B/C', 'C/D', 'D/E', 'U/V', 'V/W', 'X/Y', "Z"}
# convert each string to a and edge
# each pattern to the side of / is a node
edges = [tuple(s.split("/")) for s in example if "/" in s]
nodes = [s for s in example if "/" not in s]
# create directed graph from edges
g = nx.from_edgelist(edges, create_using=nx.DiGraph)
g.add_nodes_from(nodes)
# find each path using topological sort
runs, current = [], []
for e in nx.topological_sort(g):
# start a new path each time a node with in-degree 0
# in-degree 0 means it is the start of a new path
if g.in_degree(e) == 0:
if current:
runs.append(current)
current = []
current.append(e)
if current:
runs.append(current)
# format the result
result = ["/".join(run) for run in runs]
print(result)
Output
['Z', 'U/V/W', 'X/Y', 'A/B/C/D/E']
If I'm not mistaken the overall complexity of this approach is O(n). More on topological sorting can be found here.
UPDATE
In networkx 2.6.4 use lexicographical_topological_sort
You can use a recursive generator function:
vals = ['A/B', 'B/C', 'C/D', 'D/E', 'U/V', 'V/W', 'X/Y']
data = [i.split('/') for i in vals]
def paths(d, c = [], s = []):
if not (k:=[b for a, b in data if a == d]):
yield c+[d]
if (t:=[a for a, b in data if a not in s+[d]]):
yield from paths(t[0], c = [], s=s+[d])
else:
yield from [j for i in k for j in paths(i, c=c+[d], s=s+[d])]
vals = list(paths(data[0][0]))
Output:
[['A', 'B', 'C', 'D', 'E'], ['U', 'V', 'W'], ['X', 'Y']]
It should be noted, however, that the solution above will only work on inputs that contain standard edge definitions. If the contents of vals can very in the number of items divided by the /, then you can use the solution below:
class Node:
def __init__(self, n, c = []):
self.n, self.c = n, c
def __contains__(self, e):
return e[0] == self.n or e[-1] == self.n or any(e in i for i in self.c)
def add_edge(self, e):
if self.n != e[0] and len(e) > 1 and (m:=[i for i in self.c if i.n == e[-1]]):
self.c = [i for i in self.c if i != m[0]]+[Node(e[0], [m[0]])]
elif self.n == e[0]:
if len(e) > 1 and not any(i.n == e[1] for i in self.c):
self.c = [*self.c, Node(e[1])]
elif (m:=[i for i in self.c if e in i]):
m[0].add_edge(e)
else:
self.c = [*self.c, Node(e[0], [] if len(e) == 1 else [Node(e[1])])]
vals = ['A/B/C', 'A/B', 'B/C', 'C/D', 'D/E', 'U/V', 'V/W', 'X/Y', 'K']
n = Node(None)
for i in vals:
k = i.split('/')
for j in range(len(k)):
n.add_edge(k[j:j+2])
def get_paths(n, c = []):
if not n.c:
yield c+[n.n]
else:
yield from [j for k in n.c for j in get_paths(k, c+[n.n])]
final_result = [i[1:] for i in get_paths(n)]
print(final_result)
Output:
[['A', 'B', 'C', 'D', 'E'], ['U', 'V', 'W'], ['X', 'Y'], ['K']]
With the trie-style approach of class Node, the order of the input (vals) does not matter (no sort is required) and input paths of any depth can be added.
It might not be the most efficient, but you could just repeat the loop until there's nothing modified.
def ext_3plus(unit):
while True:
oldlen = len(unit)
for couple in itertools.combinations(list(unit),2):
if '/' in couple[0] and '/' in couple[1]:
if couple[0].split('/')[0]==couple[1].split('/')
unit.remove(couple[0])
unit.remove(couple[1])
unit.add(couple[1].split('/')[0]+'/'+couple[0])
modified = True
if couple[0].split('/')[1]==couple[1].split('/')[0]
unit.remove(couple[0])
unit.remove(couple[1])
unit.add(couple[0]+'/'+couple[1].split('/')[1])
if len(unit) == oldlen:
# Nothing was merged, so we're done
break
I am new to python and algorithms. I have been trying to implement a topological sorting algorithm for a while but can't seem to create a structure that works. The functions I have made run on a graph represented in an adj list.
When I have a DFS, the nodes are discovered top down, and nodes that have been already visited and not processed again:
def DFS(location, graph, visited = None):
if visited == None:
visited = [False for i in range(len(graph))]
if visited[location] == True:
return
visited[location] = True
node_visited.append(location)
for node in graph[location]:
DFS(node, graph, visited)
return visited
When I am trying to build a topological sort algorithm, I create a new function which essentially checks the "availability" of that node to be added to the sorted list (ie: whether its neighbouring nodes have been visited already)
def availability(graph, node):
count = 0
for neighbour in graph[node]:
if neighbour in available_nodes:
count += 1
if count != 0:
return False
return True
However, my issue is that once I have visited the node path to get to the bottom of the graph, the DFS does not allow me to revisit that those nodes. Hence, any updates I make once I discover the end of the path can not be processed.
My approach may be totally off, but I am wondering if someone could help improve my implementation design, or explain how the implementation is commonly done. Thanks in advance.
You don't need that availability check to do a topological sort with DFS.
DFS itself ensures that you don't leave a node until its children have already been processed, so if you add each node to a list when DFS finishes with it, they will be added in (reverse) topological order.
Don't forget to do the whole graph, though, like this:
def toposort(graph):
visited = [False for i in range(len(graph))]
result = []
def DFS(node):
if visited[node]:
return
visited[node] = True
for adj in graph[node]:
DFS(adj)
result.append(node)
for i in range(len(graph)):
DFS(i)
return result
class Graph:
def __init__(self):
self.edges = {}
def addNode(self, node):
self.edges[node] = []
def addEdge(self, node1, node2):
self.edges[node1] += [node2]
def getSub(self, node):
return self.edges[node]
def DFSrecu(self, start, path):
for node in self.getSub(start):
if node not in path:
path = self.DFSrecu(node, path)
if start not in path:
path += [start]
return path
def topological_sort(self, start):
topo_ordering_list = self.DFSrecu(start, [])
# this for loop it will help you to visit all nodes in the graph if you chose arbitrary node
# because you need to check if all nodes in the graph is visited and sort them
for node in g.edges:
if node not in topo_ordering_list:
topo_ordering_list = g.DFSrecu(node, topo_ordering_list)
return topo_ordering_list
if __name__ == "__main__":
g = Graph()
for node in ['S', 'B', 'A', 'C', 'G', 'I', "L", 'D', 'H']:
g.addNode(node)
g.addEdge("S", "A")
g.addEdge("S", "B")
g.addEdge("B", "D")
g.addEdge("D", "H")
g.addEdge("D", "G")
g.addEdge("H", "I")
g.addEdge("I", "L")
g.addEdge("G", "I")
last_path1 = g.topological_sort("D")
last_path2 = g.topological_sort("S")
print("Start From D: ",last_path1)
print("start From S: ",last_path2)
Output:
Start From D: ['L', 'I', 'H', 'G', 'D', 'A', 'B', 'S', 'C']
start From S: ['A', 'L', 'I', 'H', 'G', 'D', 'B', 'S', 'C']
you can see here 'C' is included in topological sorted list even it's not connect to any other node but 'C' in the graph and you need to visited her
that's way you need for loop in topological_sort() function
s = """
1:A,B,C,D;E,F
2:G,H;J,K
&:L,M,N
"""
def read_nodes(gfile):
for line in gfile.split():
nodes = line.split(":")[1].replace(';',',').split(',')
for node in nodes:
print node
print read_nodes(s)
I am expected to get ['A','B','C','D','E',.....'N'], but I get A B C D E .....N and it's not a list. I spent a lot of time debugging, but could not find the right way.
I believe this is what you're looking for:
s = """
1:A,B,C,D;E,F
2:G,H;J,K
&:L,M,N
"""
def read_nodes(gfile):
nodes = [line.split(":")[1].replace(';',',').split(',') for line in gfile.split()]
nodes = [n for l in nodes for n in l]
return nodes
print read_nodes(s) # prints: ['A','B','C','D','E',.....'N']
What you were doing wrong is that for each sub-list you create, your were iterating over that sub-list and printing out the contents.
The code above uses list comprehension to first iterate over the gfile and create a list of lists. The list is then flattened with the second line. Afterwards, the flatten list is returned.
If you still want to do it your way, Then you need a local variable to store the contents of each sub-list in, and then return that variable:
s = """
1:A,B,C,D;E,F
2:G,H;J,K
&:L,M,N
"""
def read_nodes(gfile):
all_nodes = []
for line in gfile.split():
nodes = line.split(":")[1].replace(';',',').split(',')
all_nodes.extend(nodes)
return all_nodes
print read_nodes(s)
Each line you read will create a new list called nodes. You need to create a list outside this loop and store all the nodes.
s = """
1:A,B,C,D;E,F
2:G,H;J,K
&:L,M,N
"""
def read_nodes(gfile):
allNodes = []
for line in gfile.split():
nodes =line.split(":")[1].replace(';',',').split(',')
for node in nodes:
allNodes.append(node)
return allNodes
print read_nodes(s)
Not quite sure what you are ultimately trying to accomplish but this will print what you say you are expecting:
s = """
1:A,B,C,D;E,F
2:G,H;J,K
&:L,M,N
"""
def read_nodes(gfile):
nodes = []
for line in gfile.split():
nodes += line.split(":")[1].replace(';',',').split(',')
return nodes
print read_nodes(s)
Add the following code so that the output is
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N']
//Code to be added
nodes_list = []
def read_nodes(gfile):
for line in gfile.split():
nodes =line.split(":")[1].replace(';',',').split(',')
nodes_list.extend(nodes)
print nodes_list
print read_nodes(s)
I need to iterate a tree/graph and produce a certain output but following some rules:
_ d
/ / \
b c _e
/ / |
a f g
The expected output should be (order irrelevant):
{'bde', 'bcde', 'abde', 'abcde', 'bdfe', 'bdfge', 'abdfe', ...}
The rules are:
The top of the tree 'bde' (leftmost_root_children+root+rightmost_root_children) should always be present
The left-right order should be preserved so for example the combinations 'cb' or 'gf' are not allowed.
All paths follow the left to right direction.
I need to find all paths following these rules. Unfortunately I don't have a CS background and my head is exploding. Any tip will be helpful.
EDIT: This structure represents my tree very closely:
class N():
"""Node"""
def __init__(self, name, lefts, rights):
self.name = name
self.lefts = lefts
self.rights = rights
tree = N('d', [N('b', [N('a', [], [])], []), N('c', [], [])],
[N('e', [N('f', [], []), N('g', [], [])],
[])])
or may be more readable:
N('d', lefts =[N('b', lefts=[N('a', [], [])], rights=[]), N('c', [], [])],
rights=[N('e', lefts=[N('f', [], []), N('g', [], [])], rights=[])])
So this can be treated as a combination of two problems. My code below will assume the N class and tree structure have already been defined as in your problem statement.
First: given a tree structure like yours, how do you produce an in-order traversal of its nodes? This is a pretty straightforward problem, so I'll just show a simple recursive generator that solves it:
def inorder(node):
if not isinstance(node, list):
node = [node]
for n in node:
for left in inorder(getattr(n, 'lefts', [])):
yield left
yield n.name
for right in inorder(getattr(n, 'rights', [])):
yield right
print list(inorder(tree))
# ['a', 'b', 'c', 'd', 'f', 'g', 'e']
Second: Now that we have the "correct" ordering of the nodes, we next need to figure out all possible combinations of these that a) maintain this order, and b) contain the three "anchor" elements ('b', 'd', 'e'). This we can accomplish using some help from the always-handy itertools library.
The basic steps are:
Identify the anchor elements and partition the list into four pieces around them
Figure out all combinations of elements for each partition (i.e. the power set)
Take the product of all such combinations
Like so:
from itertools import chain, combinations
# powerset recipe taken from itertools documentation
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
def traversals(tree):
left, mid, right = tree.lefts[0].name, tree.name, tree.rights[0].name
nodes = list(inorder(tree))
l_i, m_i, r_i = [nodes.index(x) for x in (left, mid, right)]
parts = nodes[:l_i], nodes[l_i+1:m_i], nodes[m_i+1:r_i], nodes[r_i+1:]
psets = [powerset(x) for x in parts]
for p1, p2, p3, p4 in product(*psets):
yield ''.join(chain(p1, left, p2, mid, p3, right, p4))
print list(traversals(tree))
# ['bde', 'bdfe', 'bdge', 'bdfge', 'bcde', 'bcdfe',
# 'bcdge', 'bcdfge', 'abde', 'abdfe', 'abdge', 'abdfge',
# 'abcde', 'abcdfe', 'abcdge', 'abcdfge']
graph={ 0:[1,3,4], 1:[0,2,4], 2:[1,6], 3:[0,4,6], 4:[0,1,3,5], 5:[4], 6:[2,3] }
def bfs(graph, start, path=[]):
queue = [start]
while queue:
vertex = queue.pop(0)
if vertex not in path:
path.append(vertex)
queue.extend(graph[vertex] - path)
return path
print bfs(graph, 0)
Guys! Can someone help me with this bfs code? I can't understand how to solve this queue line.
To extend your queue with all nodes not yet seen on the path, use set operations:
queue.extend(set(graph[vertex]).difference(path))
or use a generator expression:
queue.extend(node for node in graph[vertex] if node not in path)
Lists don't support subtraction.
You don't really need to filter the nodes, however, your code would work with a simple:
queue.extend(graph[vertex])
as the if vertex not in path: test also guards against re-visiting nodes.
You should not use a list as default argument, see "Least Astonishment" and the Mutable Default Argument; you don't need a default argument here at all:
def bfs(graph, start):
path = []
Demo:
>>> graph={ 0:[1,3,4], 1:[0,2,4], 2:[1,6], 3:[0,4,6], 4:[0,1,3,5], 5:[4], 6:[2,3] }
>>> def bfs(graph, start):
... path = []
... queue = [start]
... while queue:
... vertex = queue.pop(0)
... if vertex not in path:
... path.append(vertex)
... queue.extend(graph[vertex])
... return path
...
>>> print bfs(graph, 0)
[0, 1, 3, 4, 2, 6, 5]
queue.extend(graph[vertex] - path)
This line is giving TypeError: unsupported operand type(s) for -: 'list' and 'list', because you are not allowed to subtract two lists. You could convert them to a different collection that does support differences. For example:
graph={ 0:[1,3,4], 1:[0,2,4], 2:[1,6], 3:[0,4,6], 4:[0,1,3,5], 5:[4], 6:[2,3] }
def bfs(graph, start, path=[]):
queue = [start]
while queue:
vertex = queue.pop(0)
if vertex not in path:
path.append(vertex)
queue.extend(set(graph[vertex]) - set(path))
return path
print bfs(graph, 0)
Result:
[0, 1, 3, 4, 2, 6, 5]
By the way, it may be good to modify the argument list so that you don't have a mutable list as a default:
def bfs(graph, start, path=None):
if path == None: path = []
Bug is that there is no list difference method. Either you can convert it to set and use set difference method or you can use list comprehension as
queue.extend(graph[vertex] - path)
can be replaced by
queue += [i for i in graph[vertex] if i not in path].
#USE BELOW CODE FOR SIMPLE UNDERSTANDING
graph = {
'A' : ['B' , 'C','D'],
'B' : ['E'],
'C' : ['F'],
'D' : ['G'],
'E' : [],
'F' : ['Z'],
'G' : [],
'Z' : [],
}
visited = [] #Store visted nodes
queue = [] #BFS uses queue structure so this varible will work like QUEUE ( LIFO)
final_result = []
def bfs(visited,graph,node):
visited.append(node)
queue.append(node)
while queue:
s = queue.pop(0)
print(s,end=" ")
#final_result.append(s)
for neighbour in graph[s]:
if neighbour not in visited:
visited.append(neighbour)
queue.append(neighbour)
bfs(visited,graph,'A')
print(final_result)