nested if for variable names with pattern - python

Recently I wrote a program as follows:
empty_list = []
for row in rows:
if row.num1 != 0:
empty_list.extend(row.amt1)
if row.num2 != 0:
empty_list.extend(row.amt2)
if row.num3 != 0:
empty_list.extend(row.amt3)
if row.num4 != 0:
empty_list.extend(row.amt4)
this pattern goes on till num10.
Basically we are extending the list till we find row.num{d} = 0.
My question is since the variables are numbered, can we do something in a pythonic way or in a loop.
The example is simplified. We are importing legacy data from an old dbase format.

I'm guessing the class structure of a row is like this.
# This is a mock class
class Row:
def __init__(self):
self.num1 = 32
self.num2 = 23
self.num3 = 323
self.num4 = 213
self.num5 = 23
self.num6 = 0
self.amt1 = [20]
self.amt2 = [320]
self.amt3 = [320]
self.amt4 = [340]
self.amt5 = [30]
self.amt6 = [330]
Then you can use getattr like this
row = Row()
empty_list = []
i = 1
while (getattr(row, "num{}".format(i)) != 0):
empty_list.extend(getattr(row, "amt{}".format(i)))
i += 1

import itertools
import contextlib
for row in rows:
with contextlib.supress(AttributeError):
for i in itertools.count(start=1):
if getattr(row, f'num{i}') != 0:
empty_list.extend(getattr(row, f'amt{i}'))
break
should work. For each row, getattr is used until AttributeError is raised and next row is processed.
Note: the above code uses f-strings, so you need python 3.6+ to use them. If you use an older version, simply use .format().

You can try to use eval() function
empty_list = []
for row in rows:
for i in range(1, 11):
if eval('row.num' + str(i)) != 0:
empty_list.extend(eval('row.atm' + str(i)))

Related

How to create multiple subclasses from a list in python?

I'm trying to get my data in an hierarchical way, so I decided to turn to subclasses. The file I'm getting the data from is formatted like this:
2WQZ_chain_A
Score = 338.0
53-164
208-317
327-595
611-654
2WQZ_chain_B
Score = 344.0
53-164
205-317
327-595
611-655
2XB6_chain_A
Score = 319.0
64-163
211-317
327-596
613-654
2XB6_chain_B
Score = 329.0
53-163
212-317
327-596
613-654
And what I want to obtain is a first class called as the PDB name (i.e: 2WQZ) with the subclasses called chain_A, chain_B and so on. These subclasses should contain an object called "score" and a third subclass called "intervals" containing the possibles intervals. The general idea is something like this.
At the moment I tried using a dictionary but ended up with the correct PDB class, but only the second chain, my code is
class PDB(object):
def __init__(self, pdbname):
self.pdbid = pdbname
class Chain(PDB):
def __init__(self, chainame, score, pdbname):
self.chainid = chainame
self.score = score
super().__init__(pdbname)
making_class = open("covered_intervals.txt", "r").readlines()
pdblist = []
for i in making_class:
if "chain" in i:
pdblist.append(i[:4])
pdblist = list(dict.fromkeys(pdblist))
pdblist2 = dict.fromkeys(pdblist)
for i in pdblist:
pdblist2[i] = PDB(i)
for j in making_class:
if i in j:
chainame = j[5:12]
pdblist2[i] = Chain(chainame, 4, i)
4 is a placeholder, and I see why I get only the last chain, but have no idea how to get the two chains under the same PDB.
In this case, a dictionary can be created for top nodes, and since It is a fixed depth tree, class nesting is not required. Chain Class will have three components
Chain Name
Score
List of Ranges - I have implemented a class for range
class Chain():
def __init__(self, chainame, score=None):
self.chainid = chainame
self.score = score
self.ranges=[]
def add_range(self, range):
self.ranges.append(range)
def add_score(self, score):
self.score = score
class range1():
def __init__(self, str):
x = str.split("-")
self.start = int(x[0])
self.end = int(x[1])
counter = 0
pdb = ""
ch = None
data = {}
with open("covered_intervals.txt", "r") as f:
line = f.readline()
while line:
line = line.strip()
if line.strip()=="":
counter=0
x = data.get(pdb, [])
x.append(ch)
data[pdb] = x
elif counter==0:
x = line.split("_", 1)
pdb = x[0]
chainname = x[1]
ch = Chain(chainname)
counter = counter +1
elif counter==1:
ch.add_score(float(line.split("=")[1]))
counter = counter +1
else:
ch.add_range(range1(line))
line = f.readline()
if counter != 0:
x = data.get(pdb, [])
x.append(ch)
data[pdb] = x
First I would suggest creating something, that can parse one textblock from the file into usable variables, for example like this:
def parse_block(lines):
pdb_name = lines[0][:4]
chain = lines[0][5:]
score = lines[1].split("=")[1].strip()
intervals = lines[2:]
return (pdb_name, chain, score, intervals)
Using this, you could build your classes, or use a nested dictionary, which would fit the data structure well, too.
from collections import defaultdict
with open("pdbdata", "r") as f:
content = f.read()
pdb_dict = defaultdict(dict)
for block in content.split("\n\n"):
pdb_name, chain, score, intervals = parse_block(block.splitlines())
pdb_dict[pdb_name][chain] = {"score": score, "intervals": intervals}
The resulting nested dict looks like this:
{'2WQZ': {'chain_A': {'intervals': ['53-164', '208-317', '327-595', '611-654'],
'score': '338.0'},
'chain_B': {'intervals': ['53-164', '205-317', '327-595', '611-655'],
'score': '344.0'}},
'2XB6': {'chain_A': {'intervals': ['64-163', '211-317', '327-596', '613-654'],
'score': '319.0'},
'chain_B': {'intervals': ['53-163', '212-317', '327-596', '613-654'],
'score': '329.0'}}}

Program does not recognize value as a valid variable value?

The program I am trying to create involves writing a method called monster_fight(monster1,monster2) to have the two monsters "Fight". However I am having one issue retrieving the damage value stored in each of the monster object dictionaries named 'self.attacks'.
I am trying to retrieve a value from dictionary 'monster1.attacks' to reduce the hp of the monster2 object. However with the current code I have in place, the program does not recognize the value of the keys when I call the dictionary. Can anybody show what I am doing wrong?
Thanks!
class Monster():
def __init__(self, name, max_hp = 20, hp=20):
self.name = name
self.type = type
self.current_hp = max_hp
self.attacks = {'wait': 0}
self.possible_attacks = {'sneak_attack': 1,
'slash': 2,
'ice_storm': 3,
'fire_storm': 3,
'whirlwind': 3,
'earthquake': 2,
'double_hit': 4,
'wait': 0}
self.exp = 0
def add_attack(self, attack_name):
if attack_name in self.possible_attacks:
self.attacks[attack_name] = self.possible_attacks.get(attack_name)
return True
else:
return False
if attack_name in self.attacks:
return False
def remove_attack(self, attack_name):
if attack_name in self.attacks:
self.attacks.pop(attack_name)
if len(self.attacks) == 0:
self.attacks['wait'] = 0
return True
else:
return False
def win_fight(self):
self.exp += 5
self.current_hp = self.max_hp
def lose_fight(self):
self.exp += 1
self.current_hp = self.max_hp
def monster_fight(monster1,monster2):
round1 = 0
moves1 = []
moves2 = []
list1 = []
list2 = []
for i in monster1.attacks:
values = ''
values = monster1.attacks.get(i)
list1.append(values)
for i in range(0,len(monster2.attacks)):
values = monster2.attacks.get(i)
list2.append(values)
while monster1.current_hp > 0 or monster2.current_hp > 0:
round1 += 1
monster1_attack = int(monster1.attacks[list1[(round1-1)%len(list1)]])
monster2.current_hp -= monster1_attack
moves1.append(list1[(round1-1)%len(list1)])
monster2_attack= monster2.attacks[list2[(round1-1)%len(list2)]]
monster1.current_hp -= monster2_attack
moves2.append(list2[(round1-1)%len(list2)])
if monster1.current_hp <= 0:
monster1.lose_fight()
monster2.win_fight()
return round1, monster2.name, moves2
elif monster1.current_hp <= 0:
monster2.lose_fight()
monster1.win_fight()
return round1,monster1.name, moves1
else:
return -1,"None","None"
a = Monster("a", 9)
b = Monster("b", 9)
a.add_attack("ice_storm")
b.add_attack("ice_storm")
b.remove_attack("wait")
a.remove_attack("wait")
round1, winner, moves = monster_fight(a, b)
print(round1)
print(winner.name)
print(moves)
monster1_attack = int(monster1.attacks[list1[(round1-1)%len(list1)]])
KeyError: 3
Well, let's see. You're calling:
monster1_attack = int(monster1.attacks[list1[(round1-1)%len(list1)]])
monster1 is a Monster object, created from:
a = Monster("a", 9)
a.add_attack("ice_storm")
a.remove_attack("wait")
So monster1.attacks looks like:
{
'ice_storm': 3,
}
You're trying to access that dictionary using key dervied from list1[(round1-1)%len(list1)].
list1 is set here:
for i in monster1.attacks:
values = ''
values = monster1.attacks.get(i)
list1.append(values)
After the above code runs, list1 is a list that looks like:
[3]
(Because you ask for monster1.attacks.get(i), which will return the value associated with key i.)
So when you ask for list1[(round1-1)%len(list1)], you get the value 3, which means you're asking for monster1.attacks[3].
There is no key named 3 in monster1.attacks. As we saw earlier, the only key is ice_storm which has the value 3. It looks like you're trying to figure out the damage that monster1's attack will do. That is actually what you have in list1, so in theory you could just write:
monster1_attack = list1[(round1-1)%len(list1)]
monster2.current_hp -= monster1_attack
I think your logic here may be a bit convoluted. You should probably think carefully about exactly what you're trying to accomplish and try to simplify your code with that goal in mind. Using the Python debugger to see the value of your variables prior to the error -- or using print statements to accomplish the same thing -- can help diagnose this sort of problem.

Easy Logical Error In Python - My Function Calling Isn't Working Properly

I can't get my "doLogicForColumn5" function to affect my program. Column 5 of the CSV output should be filled with various values, but instead it's all the same number (12.12).
Lines 14-27 to be specific--they're simply not working!
Any help greatly appreciated!
# -*- coding: utf-8 -*-
import csv
import string # for capwords
date = "7/5/2015"
monthlybudget = 100000
dailybudget = monthlybudget/30
#campaign variables
levels = [["1"], ["2"], ["3"], ["4"], ["5"], ["6"]]
language = [["english"], ["spanish"]]
variables = [["1"], ["2"]]
nouns = [["wordA1"], ["+wordA2"]]
adjectives1 = [["wordB1"], ["wordB2"]]
adjectives2 = [["wordC1"], ["wordC2"]]
def doLogicForColumn5(self): # budget
self.column5 = dailybudget/36
if self.language == ["spanish"]:
self.column5 = self.column5 * .6
if self.level == ["1"]:
self.column5 = self.column5*.8
else:
self.column5 = self.column5*.1
else: #if spanish
self.column5 = self.column5*.4
if self.level == ["1"]:
self.column5 = self.column5*.2
else:
self.column5 = self.column5*.3
class Row(object):
column1 = "column1"
column2 = "column2"
column3 = "column3"
column4 = "column4"
column5 = "budget"
def __init__(self, level, language, noun, adjective1, adjective2, variable):
self.level = level
self.level = str(self.level)
self.language = language
self.language = str(self.language)
self.noun = noun
self.noun = str(self.noun)
self.adjective1 = adjective1
self.adjective1 = str(self.adjective1)
self.adjective2 = adjective2
self.adjective2 = str(self.adjective2)
self.variable = variable
self.variable = str(self.variable)
def rowEntry(self, level, language, noun, adjective1, adjective2, variable):
doLogicForColumn5(self)
lol = [[self.column1], [self.column2], [self.column3], [self.column4], [self.column5]]
lol[0] = self.column1
lol[1] = self.column2
lol[2] = self.column3
lol[3] = self.column4
lol[4] = self.column5
file_writer.writerow([o for o in lol])
with open("test.csv", "wb") as test_file:
file_writer = csv.writer(test_file)
for a in range(0, len(levels)):
for e in range(0, len(language)):
for y in range (0, len(nouns)):
for x in range (0, len(adjectives1)):
for w in range (0, len(adjectives2)):
for n in range(0, len(variables)):
city = "n/a"
stateVersion = "n/a"
food = Row(levels[a], language[e], nouns[y], adjectives1[x], adjectives2[w], variables[n])
food.rowEntry(levels[a], language[e], nouns[y], adjectives1[x], adjectives2[w], variables[n])
First:
for a in range(0, len(levels)):
so a is a number. Then:
food = Row(levels[a],
and so levels[a] is a list such as ["1"]. Then, when you __init__() a Row:
self.level = level
self.level = str(self.level)
So now self.level is a string. Then, in doLogicForColumn5():
if self.level == ["1"]
Recall that self.level was cast as a string earlier. So self.level will never be equal to a list of ["1"]. It may, however, be equal to a string of "['1']", so it would make more sense to compare to that. Better yet, turn the global levels into a flat list like ["1", "2", "3", "4", "5", "6"]. The best option would be for levels to simply be '123456' and refactor your program to use that simpler structure.
Same goes for self.language and any other variables which have been set to a single-element list within a larger list.
Check your code for other instances of unnecessary complexity, as that sort of thing will eventually bite you (as another example, the doLogicForColumn5() function should really just be a method in Row, as commenters are suggesting).

tables in python - having some errors - not sure why

I have some questions about "technical" and basic functions in python.
I have a table like this:
Name,Gender,Age,Salary,Height
Menny, M, 1, 1, 1
James, J, 2, 2, 2
Sami, S, 3, 3, 3
class Table:
def __init__(self,path,sep):
try:
f = open(path, "r")
read_file = f.read()
f.close()
except:
print "cannot create a table from this file"
return
table = read_file.split("\n")
for i in range (len(table)):
table[i] = table[i].split(sep)
if len(table) > 0:
for i in range(len(table[0])):
if table[0][i] in table[0][0:i]:
raise ValueError
row_names = []
for i in range(1,len(table)):
if len(table[i]) != len(table[0]):
raise ValueError
if table[i][0] in row_names:
raise ValueError
row_names.append(table[i][0])
Now I want to use functions:
1. to know how many cells there are. here I have 12 cells. The height of the table is len(table). Then the width is len(table[0]). The number of cells is height*width.
so:
def len(self):
height = len(table)
width = len(table[0])
return height * width
and if I tried this:
def len(self):
len(self.nestedList)*len(self.nestedList[0])
I get "None"
If in the shell I write the name Menny, Sami etc, then print the rest of the line (age, salary etc)....
So I thought about it:
def the_row (self, rowname):
rows_checking = []
for i in range(1, len(table)):
rows_checking.append(table[i])
if rowname in rows_checking:
table[i].remove(table[0:0])
return table[i]
almost the same thing like in the second task, but this time the function will print the value that is common to 2 thing. For example - the code will print "1" if I write Menny and Age.
Again, I think I'll do it almost the same as I did in the pre task, but this time:
get_the_value(self,rowname,colname)
So far seems to be good ideas, I hope so...
but I get errors:
AttributeError: Table instance has no attribute '__len__'
or
AttributeError: Table instance has no attribute 'len'
Probably because I didn't used "self" here, right? So what I can do?
You don't have to feed me by the spoon and tell me the codes as it should be, but just give me advices as possible as you can, please.
edited code:
class Table:
def __init__(self,path,sep):
self.path=path
self.sep=sep
self.g=[]
self.count=0
self.headlines=[]
self.matrix=[]
self.headrows=[]
self.postionrow=0
self.postioncolmn=0
try:
f=open(self.path,'r')
read_file=f.read()
split_file=read_file.split()
for line in split_file:
list_the_line=line.split(self.sep)
self.g.append(list_the_line)
self.count=0
for z in range (len(self.g[0])):
self.count=0
for d in range(len(self.g[0])):
if self.g[0][z]==self.g[0][d]:
self.count+=1
if self.count>=2:
raise ValueError
num_first_line=len(self.g[0])
for k in range (len(self.g)):
if len(self.g[k])!= num_first_line:
raise ValueError
self.headlines=self.g[0]
self.g.remove(self.g[0])
self.count=0
for row_name1 in range (len(self.g)):
self.count=0
for row_name2 in range(len(self.g)):
if self.g[row_name1][0]==self.g[row_name2][0]:
self.count+=1
if self.count>=2:
raise ValueError
for i in range (len(self.g)):
self.headrows.append(self.g[i][0])
self.g[i].remove(self.g[i][0])
ezer=[]
for op in range (len(self.g)):
ezer=[]
for od in range (len(self.g[0])):
ezer.append(self.g[od][op])
self.matrix.append(ezer)
f.close()
except :
print "cannot creat a table object from this file"
return
def len(self):
num_rows=len(self.g)
num_cols=len(self.g[0])
return num_rows*num_cols
def get_row(self,rowname):
for i in range (len(self.headlines)):
if rowname==self.headrows[i]:
self.postionrow=i
return self.g[i]
if not rowname in self.headrows :
raise ValueError
def get_column(self,colname):
for i in range (len(self.headlines)):
if colname==self.headlines[i]:
self.postioncolmn=i-1
return self.matrix[i-1]
if not colname in self.headlines :
raise ValueError
def get_value(self,rowname,colname):
self.get_row(rowname)
self.get_column(colname)
if not rowname in self.headrows :
raise ValueError
if not colname in self.headlines :
raise ValueError
return self.g[self.postionrow][self.postioncolmn]
def get_row_name_with_max_value(self,colname):
if not colname in self.headlines :
raise ValueError
max_colmn=max(self.get_column(colname))
for i in range (len(self.matrix)):
if max_colmn == self.g[i][self.postioncolmn]:
return self.headrows[i]
and what should be the result:
>>> table = Table("table_examp1111111","\t")
cannot create a table from this file
>>> table = Table("table_example1.txt","\t")
>>> print table.len()
12
>>> print table.get_row("Menny")
['M', '1', '1', '1']
>>> print table.get_column("Height")
['1', '2', '3']
>>> print table.get_value("Sami","Age")
3
>>> print table.get_row_name_with_max_value("Height")
Sami
>>> print table.get_row_name_with_max_value("Salary")
Sami
This code works but I want to make it more pythonic. Please don't change the form, don't add or remove function just fix my syntex.
Thanks.
Whenever you call the function len() on an object. It will try to call the __ len__ function of that object. So if you do that it might work.
def __len__(self):
height = len(self.table)
width = len(self.table[0])
return height * width
you are tying to call __len__ on the Table class, while you look like you should be calling it on your table string array in the constructor.
You should create an attribute self.table, and then either use the len function on that, or
def numOfCells(self):
return len(self.table) * len(self.table[0])
This looks like a perfect place to use the csv module:
import csv
def load_csv(fname, **kwargs):
with open(fname, 'rb') as inf:
in_csv = csv.reader(inf, **kwargs)
return list(in_csv)
class Table:
def __init__(self, path, sep=','):
self.table = load_csv(path, delimiter=sep)
if len(self.table) == 0:
raise ValueError('No data in file {}'.format(path))
self.header = self.table.pop(0)
self.cols = len(self.header)
self.labels = {}
for i, row in enumerate(self.table, 1):
# make sure rows are all same length
if len(row) != self.cols:
raise ValueError('row {} contains {} items - should be {}'.format(i, len(row), self.cols))
# make sure rows-labels are unique
lbl = row[0]
if lbl in self.labels:
raise ValueError('rows {} and {} - duplicate labels'.format(self.labels[lbl], i))
else:
self.labels[lbl] = i - 1
#property
def rows(self):
return len(self.table)
#property
def cells(self):
return self.rows * (self.cols - 1) # omit row labels
def get_row_by_label(self, lbl):
return self.table[self.labels[lbl]]
def get_item(self, lbl, attr):
ndx = self.header.index(attr)
return self.get_row_by_label(lbl)[ndx]
def main():
t = Table('tbl.csv')
print(t.cells)
print(t.get_row_by_label("Menny"))
print(t.get_item("Menny", "Age"))
if __name__=="__main__":
main()
EDIT:
Ok, this is for your FIRST question. From what I understand, you are wanting a function that will return the number of cells in your table. This number does not include the names of people in the rows, and does not include the first row at all. If I understand correctly, then this should work:
If table is:
Name,Gender,Age,Salary,Height
Menny, M, 1, 1, 1
James, J, 2, 2, 2
Sami, S, 3, 3, 3
Then number of cells is '12'... so:
Example:
class Table:
def __init__(self, path, sep):
try:
with open(path) as f:
read_file = f.read() # will auto close the file after the read
except:
print "cannot create a table from this file"
return
self.table = read_file.split('\n') # self.table will make this easier
self.table = [self.table[i].split(sep) for i in range(len(self.table))] # does the same as your for loop
if len(self.table) > 0:
for i in range(len(self.table[0])):
if self.table[0][i] in self.table[0][0:i]:
raise ValueError
row_names = []
for i in range(1,len(self.table)):
if len(self.table[i]) != len(self.table[0]):
raise ValueError
if self.table[i][0] in row_names:
raise ValueError
row_names.append(self.table[i][0])
# now a function that could return the table length
def get_num_cells(self):
# essentially you sum each item in row[1:] for table[1:]
return sum((sum(1 for i in range(1, len(self.table[0])))) for i in range(1,len(self.table)))
Using self.table will make this easier, as you don't have to include it in the other function args, as above in get_num_cells, I just used self.table without putting it in the args of the function.
To call this function you would do the following:
app = Table(path, sep)
app.get_num_cells()
# this code would be placed at the end of your file, and not inside the class
Example:
class Table()
__init__(self, path, sep):
etc.
etc.etc.etc.
# now the code to create an instance of Table and call a method here like this
app = Table(path, sep) # path would be a filepath "C:/etc./file.txt", and sep = "," etc.
app.get_num_cells()
For your other questions, I am not entirely sure what you want yet, but if you write again in the comments for this, I will try. Please let me know if this works for you.

Creating a tree/deeply nested dict from an indented text file in python

Basically, I want to iterate through a file and put the contents of each line into a deeply nested dict, the structure of which is defined by the amount of whitespace at the start of each line.
Essentially the aim is to take something like this:
a
b
c
d
e
And turn it into something like this:
{"a":{"b":"c","d":"e"}}
Or this:
apple
colours
red
yellow
green
type
granny smith
price
0.10
into this:
{"apple":{"colours":["red","yellow","green"],"type":"granny smith","price":0.10}
So that I can send it to Python's JSON module and make some JSON.
At the moment I'm trying to make a dict and a list in steps like such:
{"a":""} ["a"]
{"a":"b"} ["a"]
{"a":{"b":"c"}} ["a","b"]
{"a":{"b":{"c":"d"}}}} ["a","b","c"]
{"a":{"b":{"c":"d"},"e":""}} ["a","e"]
{"a":{"b":{"c":"d"},"e":"f"}} ["a","e"]
{"a":{"b":{"c":"d"},"e":{"f":"g"}}} ["a","e","f"]
etc.
The list acts like 'breadcrumbs' showing where I last put in a dict.
To do this I need a way to iterate through the list and generate something like dict["a"]["e"]["f"] to get at that last dict. I've had a look at the AutoVivification class that someone has made which looks very useful however I'm really unsure of:
Whether I'm using the right data structure for this (I'm planning to send it to the JSON library to create a JSON object)
How to use AutoVivification in this instance
Whether there's a better way in general to approach this problem.
I came up with the following function but it doesn't work:
def get_nested(dict,array,i):
if i != None:
i += 1
if array[i] in dict:
return get_nested(dict[array[i]],array)
else:
return dict
else:
i = 0
return get_nested(dict[array[i]],array)
Would appreciate help!
(The rest of my extremely incomplete code is here:)
#Import relevant libraries
import codecs
import sys
#Functions
def stripped(str):
if tab_spaced:
return str.lstrip('\t').rstrip('\n\r')
else:
return str.lstrip().rstrip('\n\r')
def current_ws():
if whitespacing == 0 or not tab_spaced:
return len(line) - len(line.lstrip())
if tab_spaced:
return len(line) - len(line.lstrip('\t\n\r'))
def get_nested(adict,anarray,i):
if i != None:
i += 1
if anarray[i] in adict:
return get_nested(adict[anarray[i]],anarray)
else:
return adict
else:
i = 0
return get_nested(adict[anarray[i]],anarray)
#initialise variables
jsondict = {}
unclosed_tags = []
debug = []
vividfilename = 'simple.vivid'
# vividfilename = sys.argv[1]
if len(sys.argv)>2:
jsfilename = sys.argv[2]
else:
jsfilename = vividfilename.split('.')[0] + '.json'
whitespacing = 0
whitespace_array = [0,0]
tab_spaced = False
#open the file
with codecs.open(vividfilename,'rU', "utf-8-sig") as vividfile:
for line in vividfile:
#work out how many whitespaces at start
whitespace_array.append(current_ws())
#For first line with whitespace, work out the whitespacing (eg tab vs 4-space)
if whitespacing == 0 and whitespace_array[-1] > 0:
whitespacing = whitespace_array[-1]
if line[0] == '\t':
tab_spaced = True
#strip out whitespace at start and end
stripped_line = stripped(line)
if whitespace_array[-1] == 0:
jsondict[stripped_line] = ""
unclosed_tags.append(stripped_line)
if whitespace_array[-2] < whitespace_array[-1]:
oldnested = get_nested(jsondict,whitespace_array,None)
print oldnested
# jsondict.pop(unclosed_tags[-1])
# jsondict[unclosed_tags[-1]]={stripped_line:""}
# unclosed_tags.append(stripped_line)
print jsondict
print unclosed_tags
print jsondict
print unclosed_tags
Here is an object oriented approach based on a composite structure of nested Node objects.
Input:
indented_text = \
"""
apple
colours
red
yellow
green
type
granny smith
price
0.10
"""
a Node class
class Node:
def __init__(self, indented_line):
self.children = []
self.level = len(indented_line) - len(indented_line.lstrip())
self.text = indented_line.strip()
def add_children(self, nodes):
childlevel = nodes[0].level
while nodes:
node = nodes.pop(0)
if node.level == childlevel: # add node as a child
self.children.append(node)
elif node.level > childlevel: # add nodes as grandchildren of the last child
nodes.insert(0,node)
self.children[-1].add_children(nodes)
elif node.level <= self.level: # this node is a sibling, no more children
nodes.insert(0,node)
return
def as_dict(self):
if len(self.children) > 1:
return {self.text: [node.as_dict() for node in self.children]}
elif len(self.children) == 1:
return {self.text: self.children[0].as_dict()}
else:
return self.text
To parse the text, first create a root node.
Then, remove empty lines from the text, and create a Node instance for every line, pass this to the add_children method of the root node.
root = Node('root')
root.add_children([Node(line) for line in indented_text.splitlines() if line.strip()])
d = root.as_dict()['root']
print(d)
result:
{'apple': [
{'colours': ['red', 'yellow', 'green']},
{'type': 'granny smith'},
{'price': '0.10'}]
}
I think that it should be possible to do it in one step, where you simply call the constructor of Node once, with the indented text as an argument.
Here is a recursive solution. First, transform the input in the following way.
Input:
person:
address:
street1: 123 Bar St
street2:
city: Madison
state: WI
zip: 55555
web:
email: boo#baz.com
First-step output:
[{'name':'person','value':'','level':0},
{'name':'address','value':'','level':1},
{'name':'street1','value':'123 Bar St','level':2},
{'name':'street2','value':'','level':2},
{'name':'city','value':'Madison','level':2},
{'name':'state','value':'WI','level':2},
{'name':'zip','value':55555,'level':2},
{'name':'web','value':'','level':1},
{'name':'email','value':'boo#baz.com','level':2}]
This is easy to accomplish with split(':') and by counting the number of leading tabs:
def tab_level(astr):
"""Count number of leading tabs in a string
"""
return len(astr)- len(astr.lstrip('\t'))
Then feed the first-step output into the following function:
def ttree_to_json(ttree,level=0):
result = {}
for i in range(0,len(ttree)):
cn = ttree[i]
try:
nn = ttree[i+1]
except:
nn = {'level':-1}
# Edge cases
if cn['level']>level:
continue
if cn['level']<level:
return result
# Recursion
if nn['level']==level:
dict_insert_or_append(result,cn['name'],cn['value'])
elif nn['level']>level:
rr = ttree_to_json(ttree[i+1:], level=nn['level'])
dict_insert_or_append(result,cn['name'],rr)
else:
dict_insert_or_append(result,cn['name'],cn['value'])
return result
return result
where:
def dict_insert_or_append(adict,key,val):
"""Insert a value in dict at key if one does not exist
Otherwise, convert value to list and append
"""
if key in adict:
if type(adict[key]) != list:
adict[key] = [adict[key]]
adict[key].append(val)
else:
adict[key] = val
The following code will take a block-indented file and convert into an XML tree; this:
foo
bar
baz
ban
bal
...becomes:
<cmd>foo</cmd>
<cmd>bar</cmd>
<block>
<name>baz</name>
<cmd>ban</cmd>
<cmd>bal</cmd>
</block>
The basic technique is:
Set indent to 0
For each line, get the indent
If > current, step down and save current block/ident on a stack
If == current, append to current block
If < current, pop from the stack until you get to the matching indent
So:
from lxml import builder
C = builder.ElementMaker()
def indent(line):
strip = line.lstrip()
return len(line) - len(strip), strip
def parse_blockcfg(data):
top = current_block = C.config()
stack = []
current_indent = 0
lines = data.split('\n')
while lines:
line = lines.pop(0)
i, line = indent(line)
if i==current_indent:
pass
elif i > current_indent:
# we've gone down a level, convert the <cmd> to a block
# and then save the current ident and block to the stack
prev.tag = 'block'
prev.append(C.name(prev.text))
prev.text = None
stack.insert(0, (current_indent, current_block))
current_indent = i
current_block = prev
elif i < current_indent:
# we've gone up one or more levels, pop the stack
# until we find out which level and return to it
found = False
while stack:
parent_indent, parent_block = stack.pop(0)
if parent_indent==i:
found = True
break
if not found:
raise Exception('indent not found in parent stack')
current_indent = i
current_block = parent_block
prev = C.cmd(line)
current_block.append(prev)
return top
First of all, don't use array and dict as variable names because they're reserved words in Python and reusing them may end up in all sorts of chaos.
OK so if I get you correctly, you have a tree given in a text file, with parenthood indicated by indentations, and you want to recover the actual tree structure. Right?
Does the following look like a valid outline? Because I have trouble putting your current code into context.
result = {}
last_indentation = 0
for l in f.xreadlines():
(c, i) = parse(l) # create parse to return character and indentation
if i==last_indentation:
# sibling to last
elif i>last_indentation:
# child to last
else:
# end of children, back to a higher level
OK then your list are the current parents, that's in fact right - but I'd keep them pointed to the dictionary you've created, not the literal letter
just starting some stuff here
result = {}
parents = {}
last_indentation = 1 # start with 1 so 0 is the root of tree
parents[0] = result
for l in f.xreadlines():
(c, i) = parse(l) # create parse to return character and indentation
if i==last_indentation:
new_el = {}
parents[i-1][c] = new_el
parents[i] = new_el
elif i>last_indentation:
# child to last
else:
# end of children, back to a higher level

Categories

Resources