taxonomy, hierarchical vertices, how do i fetch top parent and selective children? - python

I really enjoyed the graph traversals of arangodb which allows me to visit any path or nodes with little query sweats. However, i m stacked with a context which is already implemented in neo4j, I believe anyone using arangodb might find this useful for his future operation.
I have successfully imported the list of product categories google product taxonomy into arangodb database. in a vertex collection named taxonomy and edge collection named catof.
If i m correct, from this query, i m able to fetch all vertices and linked edges.
FOR t IN taxonomy
for c in inbound t catof
sort c.name asc
return {c}
While feeding the taxonomy documents, parent vertex do not have an edge if any of both parts _from, _to are null. i need to mention, i m using flask-script and python-arango to proceed on these operations, they have been helpful.
manager = Manager(app)
tax_item = storegraph.vertex_collection('taxonomy')
catof = storegraph.edge_collection('catof')
#manager.command
def fetch_tree():
dictionary = {}
with open('input.csv') as file:
for row in file.readlines():
things = row.strip().split(' > ')
dictionary[things[0]] = None
i, j = 0, 1
while j < len(things):
parent, child = things[i], things[j]
dictionary[child] = parent
i += 1
j += 1
# for key in dictionary:
#tax_item.insert({"name": key})
for child, parent in dictionary.iteritems():
# edge_collection.insert_edge({from: vertex_collection /
# parent, to: vertex_collection / child})
chl, par = tax_item.find({'name': child}),
tax_item.find({'name': parent})
c, p = [h for h in chl], [a for a in par]
if c and p:
#print 'Child: %s parent: %s' % (c[0]['_id'], p[0]['_id'])
catof.insert({'_from': c[0]['_id'], '_to': p[0]['_id'] })
#print '\n'
After operation, i have the following sample vertices.
[{"_key": "5246198", "_id": "taxonomy/5246198","name": "Computers"},
{"_key": "5252911", "_id": "taxonomy/5252911","name": "Hardwares"},
{"_key": "5257587", "_id": "taxonomy/5257587", "name": "Hard disk"
}]
and edges
[
{ "_key": "5269883", "_id": "catof/5269883", "_from": "taxonomy/5246198", "_to": "taxonomy/5252911"},
{"_key": "5279833", "_id": "catof/5279833", "_from": "taxonomy/5252911",
"_to": "taxonomy/5257587"}]
Now my question is:
How do I fetch only parent documents? i.e Computers
From parent documents, how do i print all their children using ? to be a format of Computers, Hardwares, Hard Disks

Related

Why it resulting a "list index out of range error" on this dijkstra algorithm

I tried to implement a dijkstra algorithm on my own graph but it doesn't run instead it says that "list index out of range"
here is the code that i tried
it works on a smaller graph like with 6 nodes but mine has 39 nodes and the maker of this code said that "At every iteration analyze the list and how the list is accessed using print()..it will help troubleshoot the error" what is that supposed to mean ?
so after a while i realized it resulting that because the algorithm reached a dead end like in node (A,B,C,AL) so when i tried to go from A to F
it reached a dead end on B, can i get a help how to fix this please ?
import sys
from heapq import heapify, heappush, heappop
def dijsktra(graph,src,dest):
inf = sys.maxsize
node_data = {'A':{'cost':inf,'pred':[]},
'B':{'cost':inf,'pred':[]},
'C':{'cost':inf,'pred':[]},
'D':{'cost':inf,'pred':[]},
'E':{'cost':inf,'pred':[]},
'F':{'cost':inf,'pred':[]},
'G':{'cost':inf,'pred':[]},
'H':{'cost':inf,'pred':[]},
'I':{'cost':inf,'pred':[]},
'J':{'cost':inf,'pred':[]},
'K':{'cost':inf,'pred':[]},
'L':{'cost':inf,'pred':[]},
'M':{'cost':inf,'pred':[]},
'N':{'cost':inf,'pred':[]},
'O':{'cost':inf,'pred':[]},
'P':{'cost':inf,'pred':[]},
'Q':{'cost':inf,'pred':[]},
'R':{'cost':inf,'pred':[]},
'S':{'cost':inf,'pred':[]},
'T':{'cost':inf,'pred':[]},
'U':{'cost':inf,'pred':[]},
'V':{'cost':inf,'pred':[]},
'W':{'cost':inf,'pred':[]},
'X':{'cost':inf,'pred':[]},
'Y':{'cost':inf,'pred':[]},
'Z':{'cost':inf,'pred':[]},
'AA':{'cost':inf,'pred':[]},
'AB':{'cost':inf,'pred':[]},
'AC':{'cost':inf,'pred':[]},
'AD':{'cost':inf,'pred':[]},
'AE':{'cost':inf,'pred':[]},
'AF':{'cost':inf,'pred':[]},
'AG':{'cost':inf,'pred':[]},
'AH':{'cost':inf,'pred':[]},
'AI':{'cost':inf,'pred':[]},
'AJ':{'cost':inf,'pred':[]},
'AK':{'cost':inf,'pred':[]},
'AL':{'cost':inf,'pred':[]},
'AM':{'cost':inf,'pred':[]},
}
node_data[src]['cost'] = 0
visited = []
temp = src
for i in range(38):
if temp not in visited: # TODO: Reassign source
visited.append(temp)
min_heap = []
for j in graph[temp]:
if j not in visited:
cost = node_data[temp]['cost'] + graph[temp][j]
if cost < node_data[j]['cost']:
node_data[j]['cost'] = cost
node_data[j]['pred'] = node_data[temp]['pred'] + [temp]
heappush(min_heap,(node_data[j]['cost'],j))
heapify(min_heap)
temp = min_heap[0][1]
print("Shortest Distance: " + str(node_data[dest]['cost']))
print("Shortest Path: " + str(node_data[dest]['pred'] + list(dest)))
if __name__ == "__main__":
graph = {
'A':{'D':105.3},
'B':{'E':65},
'C':{'AM':103.4},
'D':{'A':105.3,'E':132.8,'J':165.8},
'E':{'B':65,'D':132.8,'F':176.6,'H':78.3},
'F':{'E':176.6,'R':181.8,'AM':20.3},
'G':{'H':63,'K':57.2},
'H':{'E':78.3,'G':63,'I':65,'O':101.2},
'I':{'H':65,'P':104},
'J':{'D':165.8,'K':125.6,'L':25.9},
'K':{'G':57.2,'J':125.6,'N':37.5},
'L':{'J':25.9,'M':68,'Y':177.7},
'M':{'L':25.9,'N':56,'V':124},
'N':{'K':37.5,'M':56,'O':77.4},
'O':{'H':101.2,'N':77.4,'P':70.2,'W':128.6},
'P':{'I':104,'O':70.2,'Q':68},
'Q':{'P':68,'R':45,'T':102.9},
'R':{'F':181.8,'Q':45,'S':51},
'S':{'R':51,'U':104.3,'AM':193.3},
'T':{'Q':102.9,'U':84.35,'X':21.6},
'U':{'S':104.3,'A':84.35,'AF':160.7},
'V':{'M':124,'W':128,'Z':45},
'W':{'O':128.6,'V':128,'X':150.7,'AD':132.9},
'X':{'T':21.6,'W':150.7,'AE':166.8},
'Y':{'L':177.7,'Z':100.9,'AA':39.8},
'Z':{'V':45,'Y':100.9,'AB':34},
'AA':{'Y':39.8,'AB':100.3,'AH':258.5},
'AB':{'Z':34,'AA':100.3,'AC':47.8},
'AC':{'AB':47.8,'AD':126,'AH':60.37},
'AD':{'W':132.9,'AE':110.2,'AK':93.14,'AC':126},
'AE':{'X':166.8,'AI':82.2,'AD':110.2},
'AF':{'U':160.7,'AG':13.7,'AI':181.2},
'AG':{'AF':13.7},
'AH':{'AA':285.5,'AC':60.37,'AJ':33.8},
'AI':{'AE':82.2,'AF':181.2,'AK':110},
'AJ':{'AH':33.8,'AK':119.3,'AL':52},
'AK':{'AD':93.14,'AI':110,'AJ':119.3},
'AI':{'AJ':52},
'AM':{'C':103.4,'S':193.3,'F':20.3}
}
source = 'A'
destination = 'F'
dijsktra(graph,source,destination)
it said error instead on this line
temp = min_heap[0][1]

How to Get All Combination Node (Given dictionary and list) and sort by the longest node in Python

How to get all combinations (listed) from a given dictionary, in python ?
My Dictionary Input :
node_data = {
"1":["2","3","4","5"],#1
"2":["7","8"],#2
"3":["6"],#3
"4":[],#4
"5":[],#5
"6":["11"],#6
"7":[],#7
"8":["9","10",],#8
"9":["12"],#9
"10":[],#10
"11":["13"],#11
"12":[],#12
"13":["14"],#13
"14":[]#14
}
Desidered output (sort by the longest node):
["1","3","6","11","13","14"]
["1","2","8","9","12"]
["1","2","8","10"]
["1","2","7"]
["1","4"]
["1","5"]
I did something like this and it seems to work:
def recurse(current, nodes, path, all_path):
path.append(current)
if nodes[current]:
for child in nodes[current]:
recurse(child, nodes, path.copy(), all_path)
else:
all_path.append(path)
return all_path
if __name__ == '__main__':
node_data = {
"1":["2","3","4","5"],#1
"2":["7","8"],#2
"3":["6"],#3
"4":[],#4
"5":[],#5
"6":["11"],#6
"7":[],#7
"8":["9","10",],#8
"9":["12"],#9
"10":[],#10
"11":["13"],#11
"12":[],#12
"13":["14"],#13
"14":[]#14
}
toto = recurse("1", node_data, [], [])
toto.sort(key=len, reverse=True)
print(toto)
Hope it'll help you

find all possible paths in a tree in python

I am trying to create a list with all possible paths in a tree. I have following structure given (subset from DB):
text = """
1,Product1,INVOICE_FEE,
3,Product3,INVOICE_FEE,
7,Product7,DEFAULT,
2,Product2,DEFAULT,7
4,Product4,DEFAULT,7
5,Product5,DEFAULT,2
"""
where the columns are: ID, product-name, invoice-type, reference-to-parent-ID.
I would like to create list with all possible paths, like in the example:
[[Product1],[Product3],[Product7,Product2,Product5],[Product7,Product4]]
I do following:
lines = [ l.strip() for l in text.strip().splitlines() ]
hierarchy = [ tuple(l.split(',')) for l in lines ]
parents = defaultdict(list)
for p in hierarchy:
parents[p[3]].append(p)
for creation the tree and then I would like to find all paths:
def pathsMet(parents, node=''):
childNodes = parents.get(node)
if not childNodes:
return []
paths = []
for ID, productName, invoiceType, parentID in childNodes:
paths.append([productName] + pathsMet(parents, ID))
return paths
print(pathsMet(parents))
The result which I got is following:
[['FeeCashFlow1'], ['FeeCashFlow3'], ['PrincipalCashFlow7', ['AmortisationCashFlow3', ['AmortisationCashFlow2']], ['AmortisationCashFlow4']]]
How to correct the code to have following output:
[['FeeCashFlow1'], ['FeeCashFlow3'], ['PrincipalCashFlow7', 'AmortisationCashFlow3', 'AmortisationCashFlow2'], ['PrincipalCashFlow7','AmortisationCashFlow4']]
You can do this by first building a tree of your data nodes and then going through all branches to build a list of paths:
text = """
1,Product1,INVOICE_FEE,
3,Product3,INVOICE_FEE,
7,Product7,DEFAULT,
2,Product2,DEFAULT,7
4,Product4,DEFAULT,7
5,Product5,DEFAULT,2
"""
data = [ line.split(",") for line in text.split("\n") if line.strip() ]
keys = { k:name for k,name,*_ in data } # to get names from keys
tree = { k:{} for k in keys } # initial tree structure with all keys
root = tree[""] = dict() # tree root
for k,_,_,parent in data:
tree[parent].update({k:tree[k]}) # connect children to their parent
nodes = [[k] for k in root] # cumulative paths of keys
paths = [] # list of paths by name
while nodes:
kPath = nodes.pop(0)
subs = tree[kPath[-1]] # get children
if subs: nodes.extend(kPath+[k] for k in subs) # accumulate nodes
else : paths.append([keys[k] for k in kPath]) # return path if leaf node
output:
print(paths)
[['Product1'], ['Product3'], ['Product7', 'Product4'], ['Product7', 'Product2', 'Product5']]
Your code seems correct except that you are appending entire list to the paths variable, instead of list elements.
Try this modification:
def pathsMet(parents, node=''):
childNodes = parents.get(node)
if not childNodes:
return [[]]
paths = []
for ID, productName, invoiceType, parentID in childNodes:
for p in pathsMet(parents, ID):
paths.append([productName] + p)
return paths

Trying to iterate over a list attached to an object under a variable class(?)

Full code is at the end.
I've written a program that reads in data from a csv file. It creates a class of variable called "Facility". Each facility can have multiple water sources, so there is another class called "WaterSource" which appends a list of attributes for an individual water source to each Facility. If I call :
data['00312']
I get output:
Facility 00312 US Aggregates Inc IN
If I ask for data['00312'].records:
[ WaterSource 00312 WELL Willshire 80 683175 4511625,
WaterSource 00312 WELL Willshire 80 682550 4511750,
WaterSource 00312 INTAKE Willshire 1200 Unnamed Quarry 683225 4512075,
WaterSource 00312 INTAKE Willshire 1200 Unnamed Quarry 683225 4512050]
I need to create a report that iterates over every variable in the class and returns a list of Facilities that have multiple water sources. Thus the final output would a list of [RegNo, Facility Name, No. of WaterSources] such as:
[Facility 00312 US Aggregates Inc 4]
The issue I'm having is understanding how to iterate over the Facilities to count the records of the water sources appended to each Facilities object. I think I could add a method into the class somewhere, but I can't quite figure out where. I'm a python beginner, so please forgive me if this isn't quite the right vocabulary. I'm not even sure where to start, so any suggestions you could offer would be helpful.
class Facilities:
def __init__(self, regno, name, mwu): ##creates facility attributes
self.regno = regno
self.name = name
self.mwu = mwu
self.records = []
def add_record(self,record):
self.records.append(record)
def __repr__(self):
'''Makes a string representation'''
return 'Facility {0} {1} {2}'.format(self.regno, self.name, self.mwu)
class WaterSource(Facility):
'''holds info about the water source'''
def __init__(self, regno, source, quad, cap, body, utmE, utmN): ##creates water source attributes
self.regno = regno
self.source = source
self.quad = quad
self.cap = cap
self.body = body
self.utmE = utmE
self.utmN = utmN
self.records = []
def source_data(self):
regnos = []
sources = []
quads = []
caps = []
bodies = []
utmEs = []
utmNs = []
for record in self.records:
regnos.append(record.regno)
sources.append(record.source)
quads.append(record.quad)
caps.append(record.cap)
bodies.append(record.body)
utmEs.append(record.utmE)
utmNs.append(record.utmN)
return (regnos,sources,quads,caps,bodies,utmEs,utmNs)
def __repr__(self):
return ' WaterSource {0} {1} {2} {3} {4} {5} {6}'.format(self.regno, \
self.source, self.quad, self.cap, self.body, self.utmE, self.utmN)
def read_data(filename):
rv = {}
for r in csv.DictReader(open(filename, 'r', encoding='UTF-8')):
regno = r['RegNo']
if r['RegNo'] not in rv:
rv[regno] = Facilities(r['RegNo'],r['Facility'], r['MWU Code'])
rv[regno].add_record(WaterSource(regno, r['Source Code'], r['Quadrangle'], \
r['Capacity (GPM)'], r['Water Body Name'], r['UTM East'], r['UTM North']))
return rv
data = read_data('Fac-2013-2016.csv')
[Facility 00312 US Aggregates Inc 4]
The issue I'm having is understanding how to iterate over the
Facilities to count the records of the water sources appended to each
Facilities object.
From my understanding, simply add a method and return a count of the objects or straight up count the records using len unless there is something more to what you are asking for?
class Facilities:
def __init__(self, regno, name, mwu): ##creates facility attributes
self.regno = regno
self.name = name
self.mwu = mwu
self.records = []
def add_record(self,record):
self.records.append(record)
def __repr__(self):
'''Makes a string representation'''
return 'Facility {0} {1} {2} {3}'.format(self.regno, self.name, self.mwu , len(self.records))
All of your Facilities are stored as values in the dictionary data using the facility's RegNo for the keys. You can iterate over all the data using the dictionary items method. The length of each facility's records attribute is the number of water sources. You can build a format string to use the information you need.
for reg_no, facility in data.items():
no_of_sources = len(facility.records)
print(f'Facility {facility.regno} {facility.name} {no_of_sources}') #Python v3.6+
#print('Facility {} {} {}'.format(facility.regno, facility.name, no_of_sources)) #Python versions <3.6

format into json with array that belong to a certain parent

I have result set of rows in a database that all relate to each other through a parent child relationship
Each row is represented as follows objectid, id, parent, child, name, level so when I read an example from the database in my program it looks like this
Organization1
Component1
Department1
Sections1
Sections2
Department2
Sections3
Component2
Department3
Sections4
Sections5
Department4
Sections6
Where Organizations has many departments and departments has many Components and Components has many sections
my code thus far looks like this and that works but I need to put it into json format and the json format has to look like the below
for v in result:
level = v[5]
child = v[3]
parent = v[2]
if level == 0:
OrgDic['InstID'] = v[4]
OrgDic['Child'] = v[3]
OrgDic['Parent'] = v[2]
Organizations.append(InstDic)
OrgDic = {}
if level == 1:
ComponentsDic['CollegeID'] = v[4]
ComponentsDic['Child'] = v[3]
ComponentsDic['Parent'] = v[2]
Components.append(CollegeDic)
ComponentsDic = {}
if level == 2:
DepartmentDic['DepartmentID'] = v[4]
DepartmentDic['Child'] = v[3]
DepartmentDic['Parent'] = v[2]
Departments.append(DepartmentDic)
DepartmentDic = {}
if level == 3:
SectionDic['SubjectID'] = v[4]
SectionDic['Child'] = v[3]
SectionDic['Parent'] = v[2]
Sections.append(SubjectDic)
SectionDic = {}
for w in :
print w['Organization']
for x in Components:
if w['Child'] == x['Parent']:
print x['Components']
for y in Departments:
if x['Child'] == y['Parent']:
print y['Deparments']
for z in Sections:
if y['Child'] == z['Parent']:
print z['Sections']
JSON FORMAT
{
"Eff_Date": "08/02/2013",
"Tree":
[
{
"OrganizationID": "Organization1",
"Components":
[
{"ComponentID": "Component1",
"Departments":
[
{"DepartmentID": "Dep1",
"Sections":
[
{"SectionID": "Section1"},
{"SectionID": "Section2"}
]},
{"DepartmentID": "Dep2",
"Sections":
[
{"SectionID": "Section3"}
]}
]}
]
}
basically, all you have to do is dump the json after your first snippet (given that snippet does correctly create the tree you exposed, I did not thoroughly check it, but it looks coherent):
import json
print json.dumps({"Eff_Date": "08/02/2013", "Tree":Organizations})
and tada!
I was able to do it the following way
data[]
data.append([-1, 0 ,"name1", 0])
data.append([0,1, "name2", 1])
data.append([1, 2, "name3", 1])
data.append([2 ,3, "name4", 2])
data.append([2 ,4, "name5" ,2])
data.append([1 ,5, "name6", 2])
data.append([5, 6, "name7", 3])
data.append([5, 7, "name8",1])
data.append([5, 7, "name9",2])
def listToDict(input):
root = {}
lookup = {}
for parent_id, id, name, attr in input:
if parent_id == -1:
root['name'] = name;
lookup[id] = root
else:
node = {'name': name}
lookup[parent_id].setdefault('children', []).append(node)
lookup[id] = node
return root
result = listToDict(data)
print result
print json.dumps(result)
In my case my data was a result set from a database so I had to loop through it as follows
for v in result:
values = [v[2], v[3], v[4], v[5]]
pc.append(values)

Categories

Resources