I have a list of tuples like
list_of_tuples = [(number, name, id, parent_id),
(number, name, id, parent_id),
]
I am trying to sort it into an ordered structure like:
{
parent: [(id, name), (id, name)],
parent: {parent: [(id, name)]
{
So, any node could have a parent and/or children
I tried with:
tree = defaultdict(lambda: [None, ()])
ancestors = set([item[3] for item in list_of_tuples])
for items in list_of_tuples:
children_root = {}
descendants = []
number, name, id, parent = items
if parent is None:
tree[id] = [(id, name)]
elif parent:
if parent not in tree.keys():
node = tree.get(parent)
node.append((id, name))
children = (id, name)
tree[parent].append(children)
But I'm losing deep hierarchy when a node has both a parent and children
How do I make the ordering work correctly?
I propose to represent the tree nodes as tuples ((id, name), dict_of_children).
list_of_tuples = [(1, 'name1', 1, None),
(2, 'name2', 2, 1),
(3, 'name3', 3, 1),
(4, 'name4', 4, 2),
(5, 'name5', 5, 2),
(6, 'name5', 6, None),
(7, 'name5', 7, 6),
]
def build_tree(list_of_tuples):
"""
>>> import pprint
>>> pprint.pprint(build_tree(list_of_tuples))
{1: ((1, 'name1'),
{2: ((2, 'name2'), {4: ((4, 'name4'), {}), 5: ((5, 'name5'), {})}),
3: ((3, 'name3'), {})}),
6: ((6, 'name5'), {7: ((7, 'name5'), {})})}
"""
all_nodes = {n[2]:((n[2], n[1]), {}) for n in list_of_tuples}
root = {}
for item in list_of_tuples:
number, name, id, parent = item
if parent is not None:
all_nodes[parent][1][id] = all_nodes[id]
else:
root[id] = all_nodes[id]
return root
Related
I'm a university student and we were tasked to implement Dijkstra's algorithm on the given graph below.
Graph to implement Dijkstra's algorithm on
We were given a code to use and/or modify and help answer the question given.
import heapq
import math
def dijkstra(G, S):
pq = []
entry_finder = {}
costs = {}
pred = {S: None}
REMOVED = 'removed'
def add_entry(label, priority):
if label in entry_finder:
remove_entry(label)
entry = [priority, label]
entry_finder[label] = entry
heapq.heappush(pq, entry)
def remove_entry(label):
entry = entry_finder.pop(label)
entry[-1] = REMOVED
def pop_entry():
while pq:
priority, label = heapq.heappop(pq)
if label != REMOVED:
del entry_finder[label]
return priority, label
return None, None
for v in G:
if v == S:
add_entry(S, 0)
else:
add_entry(v, math.inf)
while pq:
d_u, u = pop_entry()
if u is not None and u != REMOVED:
costs[u] = d_u
for e in G[u]:
v, w = e
entry_v = entry_finder[v]
d_v = entry_v[0]
if d_v > d_u + w:
add_entry(v, d_u + w)
pred[v] = u
return costs, pred
This code was shown to work for a separate graph that was used in an example from our lectures. The graph was converted into code as such.
G = {
'0': [('1', 2), ('2', 6), ('3', 7)],
'1': [('3', 3), ('4', 6)],
'2': [('4', 1)],
'3': [('4', 5)],
'4': []
}
costs, pred = dijkstra(G, '0')
print(costs, pred)
So I know for a fact that the given code works. The problem arose when I tried to implement the graph into code and it gave me a KeyError: 'D'. My implementation of the graph is as follows.
G = {
'A': [('B', 56), ('C', 96), ('D', 78)],
'B': [('D', 18), ('F', 208), ('E', 110)],
'C': [('D', 20), ('F', 90)],
'D': [('F', 112)],
'E': [('F', 16), ('G', 46), ('I', 108)],
'F': [('G', 20), ('H', 62)],
'G': [('H', 40)],
'H': [('I', 29), ('J', 56)],
'I': [('J', 21)],
'J': []
}
costs, pred = dijkstra(G, 'A')
print(costs, pred)
The error also comes with:
line 41, in dijkstra
entry_v = entry_finder[v]. I'd like to know if the error came from my wrong implementation of the graph or if the given sample code itself had errors.
I have a parent class Character and inherit classes Warrior, Sorcerer, Drood.
Each time, new warrior creates as an object, I need to print the next message form defined list, if there are no more sentences, just go back to the first one.
The print function should be in Character class, not in Warrior - condition.
I would be happy if you can explain to me the approach itself and maybe, advise what to read. Pure answers are also welcome but would be happy to find the solution in the end. Thank you!
class Character:
def __init__(self, life_points, name, hair_color):
self.life_points = life_points
self.name = name
self.hair_color = hair_color
print(self.__class__.welcome_words) # "welcome_words" should receive the actual sentence for the list "welcome_words_list"
class Warrior(Character):
welcome_words_list = ["Grrr!", "AAAAAAA", "BBBBBBB", "CCCCCCC", "DDDDDDD"]
class Sorcerer(Character):
welcome_words_list = ["Baboo!", "FFFFFFF", "GGGGGG", "HHHHHHH", "IIIIIIII"]
class Drood(Character):
welcome_words_list = ["WoooWaaa!", "JJJJJJJ", "KKKKKKK", "LLLLLLL", "MMMMMMM"]
my_warrior = Warrior(80, "Kartman", "red")
my_warrior1 = Warrior(80, "David", "green")
my_warrior2 = Warrior(80, "Kate", "blue")
my_sorcerer = Sorcerer(80, "Lucia", "brown")
my_sorcerer1 = Sorcerer(80, "Mark", "yellow")
my_sorcerer2 = Sorcerer(80, "Smith", "brown")
my_drood = Drood(80, "Boby", "blue")
my_drood1 = Drood (80, "Jenifer", "green")
my_drood2 = Drood (80, "Sam", "red")
I can't understand how to change the message of new created object each time it gets created.
Question: Each time, a new Character object is created, I need to print the next message from a list of words, defined in the subclass, e.g. Warrior.
To share a object with all instantiated objects from the same class, you can use class objects.
To cycle infinite, the list of words, there is the function cycle in the itertools module.
9.3.2. Class Objects - class variable
A variable that is defined in a class level (not in an instance of the class), that is shared by all instances of a class.
itertools.cycle(iterable)
Make an iterator returning elements from the iterable. Repeats indefinitely.
import itertools
class Character:
def __init__(self, life_points, name):
self.life_points = life_points
self.name = name
# "welcome_words"
print('{}'.format((life_points, name,
self.__class__.__name__,
next(self.WORD_LIST))))
class Warrior(Character):
WORD_LIST = itertools.cycle(
("Grrr!", "AAAAAAA", "BBBBBBB", "CCCCCCC", "DDDDDDD"))
class Sorcerer(Character):
WORD_LIST = itertools.cycle(
("Baboo!", "FFFFFFF", "GGGGGG", "HHHHHHH", "IIIIIIII"))
name = ("Kartman", "David", "Kate")
for n in range(1, 11):
Warrior(n, name[int(n % 3)])
Sorcerer(n, name[int(n % 3)])
Output:
(1, 'David', 'Warrior', 'Grrr!')
(1, 'David', 'Sorcerer', 'Baboo!')
(2, 'Kate', 'Warrior', 'AAAAAAA')
(2, 'Kate', 'Sorcerer', 'FFFFFFF')
(3, 'Kartman', 'Warrior', 'BBBBBBB')
(3, 'Kartman', 'Sorcerer', 'GGGGGG')
(4, 'David', 'Warrior', 'CCCCCCC')
(4, 'David', 'Sorcerer', 'HHHHHHH')
(5, 'Kate', 'Warrior', 'DDDDDDD')
(5, 'Kate', 'Sorcerer', 'IIIIIIII')
(6, 'Kartman', 'Warrior', 'Grrr!')
(6, 'Kartman', 'Sorcerer', 'Baboo!')
(7, 'David', 'Warrior', 'AAAAAAA')
(7, 'David', 'Sorcerer', 'FFFFFFF')
(8, 'Kate', 'Warrior', 'BBBBBBB')
(8, 'Kate', 'Sorcerer', 'GGGGGG')
(9, 'Kartman', 'Warrior', 'CCCCCCC')
(9, 'Kartman', 'Sorcerer', 'HHHHHHH')
(10, 'David', 'Warrior', 'DDDDDDD')
(10, 'David', 'Sorcerer', 'IIIIIIII')
Tested with Python: 3.6
I am trying to save all function arguments as it is ran, to a container. Container is common for all funcs ran in the script. How to ensure all container's content is NOT saved every time I save function arguments?
Below decorator saves function arguments:
import inspect
from datetime import datetime
import time
def func_logger(method):
def wrapper(*args, **kw):
method_args = inspect.signature(method).bind(*args, **kw).arguments
runtime = str( datetime.now() )
name = method.__name__
module = method.__module__
signature = runtime + ': ' + '.'.join([module, name])
ts = time.time()
result = method(*args, **kw)
te = time.time()
kw['log'][signature] = {}
kw['log'][signature]['time'] = round(te - ts, 2)
kw['log'][signature]['args'] = method_args
return result
return wrapper
And an example function:
#func_logger
def test(a, b=4, c='blah-blah', *args, **kwargs):
return 4**4**8
When I am running the following snippet:
log = {}
output = test(1,4,2,4,1,par=1, log=log)
output = test(1,4,2,4,1,par=1, log=log)
log
I receive this output:
{'2019-05-17 13:48:25.214094: __main__.test': {'time': 0.0,
'args': OrderedDict([('a', 1),
('b', 4),
('c', 2),
('args', (4, 1)),
('kwargs', {'par': 1, 'log': {...}})])},
'2019-05-17 13:48:25.215092: __main__.test': {'time': 0.0,
'args': OrderedDict([('a', 1),
('b', 4),
('c', 2),
('args', (4, 1)),
('kwargs', {'par': 1, 'log': {...}})])}}
I already tried a workaround - a function that removes 'log' entry from the dictionary. However, every next item in this log stores of the log's current content. So when I try this:
list( log.items() )[-1][-1]['args']
The output is this:
OrderedDict([('a', 1),
('b', 4),
('c', 2),
('args', (4, 1)),
('kwargs',
{'par': 1,
'log': {'2019-05-17 13:45:45.748722: __main__.test': {'time': 0.0,
'args': OrderedDict([('a', 1),
('b', 4),
('c', 2),
('args', (4, 1)),
('kwargs', {'par': 1, 'log': {...}})])},
'2019-05-17 13:45:45.749221: __main__.test': {'time': 0.0,
'args': OrderedDict([('a', 1),
('b', 4),
('c', 2),
('args', (4, 1)),
('kwargs', {'par': 1, 'log': {...}})])},
'2019-05-17 13:45:45.750218: __main__.test': {'time': 0.0,
'args': OrderedDict(...)}}})])
So essentially, such a workaround won't work because with time, the memory would get clogged quickly.
Is there any way decorator would not save log entry every time I save function arguments? What I would rather like to avoid is to create a new 'log = {}' container every time I want to dump arguments from a new function.
You could simply store the log parameter if present and remove it from **kw:
def func_logger(method):
def wrapper(*args, **kw):
try:
log = kw['log']
del kw['log']
except KeyError:
log = None
method_args = inspect.signature(method).bind(*args, **kw).arguments
runtime = str( datetime.now() )
name = method.__name__
module = method.__module__
signature = runtime + ': ' + '.'.join([module, name])
ts = time.time()
result = method(*args, **kw)
te = time.time()
if log is not None:
log[signature] = {}
log[signature]['time'] = round(te - ts, 2)
log[signature]['args'] = method_args
return result
return wrapper
use global log in func_logger
log = {}
def func_logger(method):
def wrapper(*args, **kw):
# pass
log[signature] = {...}
return result
return wrapper
then, use output = test(1,4,2,4,1,par=1)
Which is the better way to left join following Pcollection in apache beam?
pcoll1 = [('key1', [[('a', 1)],[('b', 2)], [('c', 3)], [('d', 4)],[('e', 5)], [('f', 6)]]), ('key2',[[('a', 12)],[('b', 21)], [('c', 13)]]), ('key3',[[('a', 21)],[('b', 23)], [('c', 31)]])]
pcoll2 = [('key1', [[('x', 10)]]), ('key2', [[('x', 20)]])]
Expected outpus is
[('a', 1), ('x', 10)]
[('b', 2), ('x', 10)]
[('c', 3), ('x', 10)]
[('d', 4), ('x', 10)]
[('e', 5), ('x', 10)]
[('f', 6), ('x', 10)]
[('a', 12), ('x', 20)]
[('b', 21), ('x', 20)]
[('c', 13), ('x', 20)]
[('a', 21)]
[('b', 23)]
[('c', 31)]
I have implemented a left joiner using CoGroupByKey() and Pardo(). Is there any other method to implement left joiner in beam Python SDK?
left_joined = (
{'left': pcoll1, 'right': pcoll2}
| 'LeftJoiner: Combine' >> beam.CoGroupByKey()
| 'LeftJoiner: ExtractValues' >> beam.Values()
| 'LeftJoiner: JoinValues' >> beam.ParDo(LeftJoinerFn())
)
class LeftJoinerFn(beam.DoFn):
def __init__(self):
super(LeftJoinerFn, self).__init__()
def process(self, row, **kwargs):
left = row['left']
right = row['right']
if left and right:
for each in left:
yield each + right[0]
elif left:
for each in left:
yield each
You can use the follwing code for using side inputs for the right side of the join, assuming the right side is always going to have one element mapped to each key which means that it is always much smaller in size than the left pcollection.
Also, if your pcollection is created by reading from an external source instead of an in-memory array, you will need to pass right_list=beam.pvalue.asList(pcoll2) instead of right_list=pcoll2 to the ParDo . Check here for more info
class LeftJoinerFn(beam.DoFn):
def __init__(self):
super(LeftJoinerFn, self).__init__()
def process(self, row, **kwargs):
right_dict = dict(kwargs['right_list'])
left_key = row[0]
if left_key in right_dict:
for each in row[1]:
yield each + right_dict[left_key]
else:
for each in row[1]:
yield each
class Display(beam.DoFn):
def process(self, element):
LOG.info(str(element))
yield element
p = beam.Pipeline(options=pipeline_options)
pcoll1 = [('key1', [[('a', 1)],[('b', 2)], [('c', 3)], [('d', 4)],[('e', 5)], [('f', 6)]]), \
('key2',[[('a', 12)],[('b', 21)], [('c', 13)]]), \
('key3',[[('a', 21)],[('b', 23)], [('c', 31)]])\
]
pcoll2 = [('key1', [[('x', 10)]]), ('key2', [[('x', 20)]])]
left_joined = (
pcoll1
| 'LeftJoiner: JoinValues' >> beam.ParDo(LeftJoinerFn(), right_list=pcoll2)
| 'Display' >> beam.ParDo(Display())
)
p.run()
If the second collection is always smaller, an alternative approach would be to use side inputs. This would require making the right collection a side-input that is broadcast to all the workers, then writing a ParDo that processes elements from the left collection and reads in the right collection.
left.txt
149633CM,Marco,10
212539MU,Rebekah,10
231555ZZ,Itoe,10
right.txt
149633CM,Australia
212539MU,India
Code for left Join:
from apache_beam.io.gcp.internal.clients import bigquery
import apache_beam as beam
def retTuple(element):
thisTuple=element.split(',')
return (thisTuple[0],thisTuple[1:])
def jstr(j):
import datetime
jlist=[]
for k in ((j[1]['left_data'])):
if len((j[1]['right_data']))==0:
id,name,rank=([j[0]]+k)
json_str={ "id":id,"name":name,"rank":rank}
jlist.append(json_str)
else:
for l in ((j[1]['right_data'])):
# print(([j[0]]+k+l))
id,name,rank,country=([j[0]]+k+l)
json_str={ "id":id,"name":name,"rank":rank,"country":country }
jlist.append(json_str)
return jlist
table_spec = 'project:dataset.table_name'
table_schema = 'id:STRING,name:STRING,rank:INTEGER,country:STRING'
gcs='gs://dataflow4bigquery/temp'
p1 = beam.Pipeline()
# Apply a ParDo to the PCollection "words" to compute lengths for each word.
left_rows = (
p1
| "Reading File 1" >> beam.io.ReadFromText('left.txt')
| 'Pair each employee with key' >> beam.Map(retTuple) # {149633CM : [Marco,10]}
)
right_rows = (
p1
| "Reading File 2" >> beam.io.ReadFromText('right.txt')
| 'Pair each country with key' >> beam.Map(retTuple) # {149633CM : [9876843261,New York]}
)
results = ({'left_data': left_rows, 'right_data': right_rows}
| beam.CoGroupByKey()
| beam.FlatMap(jstr)
| beam.io.WriteToBigQuery(
custom_gcs_temp_location=gcs,
table=table_spec,
schema=table_schema,
write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
)
)
p1.run().wait_until_finish()
I have this code in views.py:
def pins_info(request):
if request.method == "GET":
getpin = request.GET.get('pin', None)
m = ButuanMaps.objects.filter(clandpin=getpin).
values_list('landproperty__ctaxdec')
n = ButuanMaps.objects.filter(clandpin=getpin).
values_list('ssectionid__sbrgyid__cbrgyname')
return HttpResponse(json.dumps({'taxdec': list(m),'brgy': list(n)}),
content_type='application/json')
I works fine, but it is not that effective when I want to get other values. I can access the result in my template like this:
success: function(data) {
taxdec = data['taxdec'];
brgy = data['brgy'];
var inputform = $('#forminput').val();
if( inputform == "Select Land PIN") {
alert('Please Select Land PIN')
}
else{
$('#status').append(
"<p>Tax Declaration: " + taxdec + "<br/>Barangay: " + brgy + "</p>"
);
}
}
How can I simplify my code to make it more effective like:
m = ButuanMaps.objects.filter(clandpin=getpin).
values_list('landproperty__ctaxdec','ssectionid__sbrgyid__cbrgyname')
But how do I pass it to my template?
If we take your m and n queries as:
m = range(5)
n = range(6, 11)
Then your single query of m = ButuanMaps.objects.filter(clandpin=getpin). values_list('landproperty__ctaxdec','ssectionid__sbrgyid__cbrgyname') is equivalent to the structure of:
new = zip(m, n)
#[(0, 6), (1, 7), (2, 8), (3, 9), (4, 10)]
So you can "transpose" that:
zip(*new)
# [(0, 1, 2, 3, 4), (6, 7, 8, 9, 10)]
Then build a dict from that and your keys:
results = dict(zip(['taxdec', 'brgy'], zip(*new))))
# {'brgy': (6, 7, 8, 9, 10), 'taxdec': (0, 1, 2, 3, 4)}
Then json.dumps results.
Or use an OrderedDict for your JSON name and column names values and generalise further:
from collections import OrderedDict
keyvals = OrderedDict([
('taxdec','landproperty__ctaxdec'),
('brgy', 'ssectionid__sbrgyid__cbrgyname')
])
m = ButuanMaps.objects.filter(clandpin=getpin).values_list(*keyvals.values())
result = dict(zip(keyvals, zip(*m)))
That way, you can add/remove columns to be selected and their associated JSON values in one place for the same query.