Accumulate conditions during recursion on classification tree

Accumulate conditions during recursion on classification tree - python

I have the following function which produces code from a sci-kit learn classification tree:
def mxTreeToCode(tree, feature_names, mx_name = 'mxTree', rm_file = False):
# Remove pre-existent file
if rm_file:
import os
try:
os.remove('./tree.py')
except OSError:
pass
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
file = open('tree.py', 'a')
file.write('def ' + mx_name + '(x):'+ '\n')
#col_name = ''
def recurse(node, depth):
global col_name
indent = " " * depth
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
file.write(indent +"if x['"+ name + "'] <= " + str(threshold) + ':' + '\n')
col_name += "'"+name + '_' + '<=' + str(threshold) +"'"
recurse(tree_.children_left[node], depth + 1)
file.write(indent + "else: # if x['"+ name +"'] > " + str(threshold) + '\n')
col_name += "'"+name + '_' + '>' + str(threshold) +"'"
recurse(tree_.children_right[node], depth + 1)
else:
file.write(indent + 'return '+str(col_name) + '\n')
#print(col_name)
col_name = ""
recurse(0, 1)
file.close()
With this I obtain the following output on file 'tree.py' for a given classification tree:
def mxTree(x):
if x['V1'] <= 0.5:
if x['V2'] <= 0.5:
return 'V1_<=0.5''V2_<=0.5'
else: # if x['V2'] > 0.5
return 'V2_>0.5'
else: # if x['V1'] > 0.5
return 'V1_>0.5'
While I can cumulate the conditions on the IF side and return the addition of conditions, I fail to do the accumulation when the IF and ELSE (left/right side of the tree node) follows:
def mxTree(x):
if x['V1'] <= 0.5:
if x['V2'] <= 0.5:
return 'V1_<=0.5''V2_<=0.5'
else: # if x['V2'] > 0.5
return 'V1_<=0.5''V2_>0.5' # 'V1<=0.5' must be added
else: # if x['V1'] > 0.5
return 'V1_>0.5'
I would appreciate any suggestion.

Since the left/right side of each node are recursed at the same time, I just created an additional variable which saves the output for each side. Finally I concatenate to variable col_name:
col_name = ""
names_list={}
def mxTreeToCode(tree, feature_names, mx_name = 'mxTree', rm_file = False):
# Remove pre-existent file
if rm_file:
import os
try:
os.remove('./tree.py')
except OSError:
pass
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
file = open('tree.py', 'a')
file.write('def ' + mx_name + '(x):'+ '\n')
def recurse(node, depth):
global col_name, names_list
indent = " " * depth
names_list[node] = col_name
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
file.write(indent +"if x['"+ name + "'] <= " + str(threshold) + ':' + '\n')
col_name += "'"+name + '_' + '<=' + str(threshold) +"'"
recurse(tree_.children_left[node], depth + 1)
file.write(indent + "else: # if x['"+ name +"'] > " + str(threshold) + '\n')
col_name += names_list[node]
col_name += "'"+name + '_' + '>' + str(threshold) +"'"
recurse(tree_.children_right[node], depth + 1)
else:
file.write(indent + 'return '+str(col_name) + '\n')
col_name = ""
recurse(0, 1)
file.close()
I wonder if there are other working approaches.

Related

how to feed strings in an empty list?

I am trying to store the values obtained from excel sheet cells to a list. The code provided basically collects data from different continuous rows and columns and creates a string of those values. I could work upt o storing the string value but I don't really know how to store the strings in a list, Can anyone help me with this?
for i in range(NR):
print("This TC checks the output for")
for j in range(NC):
inputVariable = str(ws[get_column_letter(ColumnStart+j) + str(rowStart-1)].value)
c = str((ws.cell(row = (rowStart + i),column = (ColumnStart +j)).value))
if (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MaxValRow),column = (ColumnStart+j)).value):
b = '(maximum)'
elif (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MinValRow),column = (ColumnStart+j)).value):
b = '(minimum)'
else:
b ='(intermediate)'
Commentstr = str(j+1) + '. The value of input ' + inputVariable + ' =' + " " + c + b
# need to create a list here to store the commentstr for each iteration
NR = no. of rows, NC = no. of columns

my_list=[]
for i in range(NR):
x=0
print("This TC checks the output for")
for j in range(NC):
inputVariable = str(ws[get_column_letter(ColumnStart+j) + str(rowStart-1)].value)
c = str((ws.cell(row = (rowStart + i),column = (ColumnStart +j)).value))
if (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MaxValRow),column = (ColumnStart+j)).value):
b = '(maximum)'
elif (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MinValRow),column = (ColumnStart+j)).value):
b = '(minimum)'
else:
b ='(intermediate)'
Commentstr = str(j+1) + '. The value of input ' + inputVariable + ' =' + " " + c + b
my_list[x]=Commentstr
x+=1

python: TypeError: 'NoneType' object has no attribute 'getitem' again

The following code return a typeerror. I have tried a lot, but I do not know what to do. The following is the code, so if anybody could it would be great:
import sys
from elasticsearch import Elasticsearch
start = 0
fetch = 1
sz = 50
sStatus = True
es = Elasticsearch(host='distribution.virk.dk', port=80, http_auth='USERID:CODEID')
def getPage():
global start
#result = es.search(index='cvr-permanent-prod-20151209', doc_type="virksomhed", from_=start, size=sz, q= 'Vrvirksomhed.cvrNummer:36549807')
if (sStatus):
result = es.search(index='cvr-permanent-prod-20151209', doc_type="virksomhed", size=sz, q='Vrvirksomhed.virksomhedMetadata.sammensatStatus:NORMAL', body= '{"filter":{"range":{"Vrvirksomhed.cvrNummer":{"gt":'+str(start)+'}}},"sort":[{"Vrvirksomhed.cvrNummer":{"order":"asc"}}]}')
else:
result = es.search(index='cvr-permanent-prod-20151209', doc_type="virksomhed", size=sz, body= '{"filter":{"range":{"Vrvirksomhed.cvrNummer":{"gt":'+str(start)+'}}},"sort":[{"Vrvirksomhed.cvrNummer":{"order":"asc"}}]}')
#result = es.search(index='cvr-permanent-prod-20151209', doc_type="virksomhed", from_=start, size=sz)
for entry in result['hits']['hits']:
start = entry['_source']['Vrvirksomhed']['cvrNummer']
print str(entry['_source']['Vrvirksomhed']['cvrNummer']) + ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteNavn']['navn'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteNavn']['navn'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['sammensatStatus'].encode('utf-8')) + '",',
print str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['postnummer']) + "," if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['postnummer'] !=None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['kommune']['kommuneNavn'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['kommune']['kommuneNavn'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['postdistrikt'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['postdistrikt'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['landekode'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['landekode'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['bynavn'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['bynavn'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['vejnavn'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['vejnavn'] != None) else ",",
print str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['husnummerFra']) + "," if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['husnummerFra'] != None) else ",",
print str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['husnummerTil']) + "," if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['husnummerTil'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['etage'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['etage'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['sidedoer'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['sidedoer'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['bogstavFra'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['bogstavFra'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['bogstavTil'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['bogstavTil'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['fritekst'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteBeliggenhedsadresse']['fritekst'] != None) else ",",
if ('reklamebeskyttet' in entry['_source'] and entry['_source']['reklamebeskyttet'] != None):
print str(entry['_source']['reklamebeskyttet'].encode('utf-8')),
print ",",
if ('telefonNummer' in entry['_source']):
tmp = ''
for tel in entry['_source']['telefonNummer']:
tmp += str(tel['kontaktoplysning'].encode('utf-8')) + " | "
print str(tmp),
print ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteVirksomhedsform']['kortBeskrivelse'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteVirksomhedsform']['kortBeskrivelse'] != None) else ",",
if ('nyesteKvartalsbeskaeftigelse' in entry['_source']['Vrvirksomhed']['virksomhedMetadata'] and entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteKvartalsbeskaeftigelse'] != None):
print str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteKvartalsbeskaeftigelse']['kvartal']) + "," if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteKvartalsbeskaeftigelse']['kvartal'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteKvartalsbeskaeftigelse']['intervalKodeAntalAnsatte'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteKvartalsbeskaeftigelse']['intervalKodeAntalAnsatte'] != None) else ",",
else:
print ", ,",
if ('nyesteHovedbranche' in entry['_source']['Vrvirksomhed']['virksomhedMetadata'] and entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteHovedbranche'] != None):
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteHovedbranche']['branchekode'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteHovedbranche']['branchekode'] != None) else ",",
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteHovedbranche']['branchetekst'].encode('utf-8')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteHovedbranche']['branchetekst'] != None) else ",",
else:
print ", ,",
navne = '"'
vaerdi = '"'
for dr in entry['_source']['Vrvirksomhed']['deltagerRelation']:
for org in dr['organisationer']:
for orN in org['organisationsNavn']:
if (orN['navn'] == "EJERREGISTER"):
if ('deltager' in dr and dr['deltager'] != None and 'navne' in dr['deltager'] and dr['deltager']['navne'] != None):
for de in dr['deltager']['navne']:
navne += str(de['navn'].encode('utf-8')) + " | "
break
for md in org['medlemsData']:
for att in md['attributter']:
if (att['type'] != "EJERANDEL_PROCENT"):
break
for vrdier in att['vaerdier']:
vaerdi += str(vrdier['vaerdi'].encode('utf-8')) + " | "
print str(navne) + '",',
print str(vaerdi) + '",',
print "\n",
if __name__ == '__main__':
if (len(sys.argv) >= 3):
start = long(sys.argv[1])
fetch = int(sys.argv[2])
print "cvrNummer,navn,sammensatStatus,postnummer,kommuneNavn,postdistrikt,landekode,bynavn,vejnavn,husnummerFra,husnummerTil,etage,sidedoer,bogstavFra,bogstavTil,fritekst,reklamebeskyttet,kontaktoplysning,kortBeskrivelse,kvartal,intervalKodeAntalAnsatte,branchekode,branchetekst,ejerregister,ejerandel_procent"
for i in range(0,fetch):
getPage()
And gives this ERROR, when getting the data:
Traceback (most recent call last):
File "es_json2csv.py", line 94, in <module>
getPage()
File "es_json2csv.py", line 30, in getPage
print '"' + str(entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteNavn']['navn'].encode('utf-8
')) + '",' if (entry['_source']['Vrvirksomhed']['virksomhedMetadata']['nyesteNavn']['navn'] != None) else ",",
TypeError: 'NoneType' object has no attribute '__getitem__'
What am I doing wrong

Python comma and divide an integer

def SetHP(self, hpPercentage, curHP, maxHP):
if not self.hpGauge.IsShow():
self.SetSize(200 + 7*self.nameLength, 70)
self.hpGauge.Show()
self.UpdatePosition()
self.hpGauge.SetPercentage(hpPercentage, 100)
strCurHP = str(curHP)
strMaxHP = str(maxHP)
self.broadCastHP.SetText(strCurHP + " / " + strMaxHP)
Example output is: 8993 / 18782
I see some questions like that, but all of them was about "float".
I want to make these integers like that:
8,9K / 18,7K
What is the "proper" way to do that?

Try this function:
def HPformat(str):
if len(str)==5:
newstr=str[0]+str[1] + ','+ str[2] + 'K'
return newstr
elif len(str)==4:
newstr=str[0]+','+ str[1] + 'K'
return newstr
And replace your final line of code with the function:
def SetHP(self, hpPercentage, curHP, maxHP):
if not self.hpGauge.IsShow():
self.SetSize(200 + 7*self.nameLength, 70)
self.hpGauge.Show()
self.UpdatePosition()
self.hpGauge.SetPercentage(hpPercentage, 100)
strCurHP = str(curHP)
strMaxHP = str(maxHP)
self.broadCastHP.SetText(HPformat(strCurHP) + " / " + HPformat(strMaxHP))
Also, if you don't want to add a new function you could just do:
def SetHP(self, hpPercentage, curHP, maxHP):
if not self.hpGauge.IsShow():
self.SetSize(200 + 7*self.nameLength, 70)
self.hpGauge.Show()
self.UpdatePosition()
self.hpGauge.SetPercentage(hpPercentage, 100)
strCurHP = str(curHP)
strMaxHP = str(maxHP)
newCurHP = strCurHP[0] + ',' + strCurHP [1] + 'K'
newMaxHP = strMaxHP[0] + strMaxHP[1] + ',' + strMaxHP[2] + 'K'
self.broadCastHP.SetText(newCurHP + " / " + newMaxHP)

Python - key error when using "if in dict"

I am receiving the following error when running a script to parse contents of an XML file.
if iteration.findtext("Iteration_query-def") in ecdict:
KeyError: 'XLOC_000434'
I was under the impression that using "if in dict" would mean that if the key is not found in the dictionary, the script will continue past the if statement and proceed with the rest of the code. Below is the problematic section of the code I am using. I realise this is quite a basic question, but I am unsure what else I can say, and I don't understand why I am receiving this error.
import xml.etree.ElementTree as ET
tree = ET.parse('507.FINAL_14.2.14_2_nr.out_fmt5.out')
blast_iteration = tree.find("BlastOutput_iterations")
for iteration in blast_iteration.findall("Iteration"):
query = iteration.findtext("Iteration_query-def").strip().strip("\n")
if query in score:
continue
if iteration.findtext("Iteration_message") == "No hits found":
if iteration.findtext("Iteration_query-def") in tair:
tairid = tair[iteration.findtext("Iteration_query-def")][0]
tairdes = tair[iteration.findtext("Iteration_query-def")][1]
else:
tairid = "-"
tairdes = "-"
goterms = ""
ecterms = ""
if iteration.findtext("Iteration_query-def") in godict:
for x in godict[iteration.findtext("Iteration_query-def")][:-1]:
goterms = goterms + x + ";"
goterms = goterms + godict[iteration.findtext("Iteration_query-def")][-1]
else:
goterms = "-"
if iteration.findtext("Iteration_query-def") in ecdict:
for x in ecdict[iteration.findtext("Iteration_query-def")][:-1]:
ecterms = ecterms + x + ";"
ecterms = ecterms + ecdict[iteration.findtext("Iteration_query-def")][-1]
else:
ecterms = "-"
if iteration.findtext("Iteration_query-def") in godescr:
desc = godescr[iteration.findtext("Iteration_query-def")]
else:
desc = "-"
n += 1
p = "PvOAK_up"+str(n) + "\t" + tranlen[iteration.findtext("Iteration_query-def")] + "\t" + orflen[iteration.findtext("Iteration_query-def")] + "\t" + "-" + "\t" + "-" + "\t" + tairid + "\t" + tairdes + "\t" + goterms + "\t" + ecterms + "\t" + desc + "\t" + str(flower[query][2]) + "\t" + str('{0:.2e}'.format(float(flower[query][1]))) + "\t" + str('{0:.2f}'.format(float(flower[query][0]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][2]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][1]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][0])))
print p
Hope you can help,
Thanks.
edit: I should say that godict and ecdict were previously created as follows - I can submit the entire code if needs be:
godict = {}
ecdict = {}
godescr = {}
f = open("507.FINAL_14.2.14_2_nr.out_fmt5.out.annot")
for line in f:
line = line.split("\t")
if len(line) > 2:
godescr[line[0]] = line[2]
line[1] = line[1].strip("\n")
if line[1].startswith("EC"):
if line[0] in ecdict:
a = ecdict[line[0]]
a.append(line[1])
ecdict[line[0]] = a
else:
ecdict[line[0]] = [line[1]]
else:
if line[0] in godict:
a = godict[line[0]]
a.append(line[1])
godict[line[0]] = a
else:
godict[line[0]] = [line[1]]
Traceback:
Traceback (most recent call last):
File "2d.test.py", line 170, in <module>
p = "PvOAK_up"+str(n) + "\t" + tranlen[iteration.findtext("Iteration_query-def")] + "\t" + orflen[iteration.findtext("Iteration_query-def")] + "\t" + "-" + "\t" + "-" + "\t" + tairid + "\t" + tairdes + "\t" + goterms + "\t" + ecterms + "\t" + desc + "\t" + str(flower[query][2]) + "\t" + str('{0:.2e}'.format(float(flower[query][1]))) + "\t" + str('{0:.2f}'.format(float(flower[query][0]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][2]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][1]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][0])))
KeyError: 'XLOC_000434'

avoiding code duplication in Python code redux

This is a followup to an earlier question. I got some good suggestions for that, so I thought I would try my luck again.
from itertools import takewhile
if K is None:
illuminacond = lambda x: x.split(',')[0] != '[Controls]'
else:
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and i < K
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
i = 0
if K is None:
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
cf.write(line)
else:
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
Is it possible to compactify this code? If I have some stuff in common in two loops like this,
one obvious possibility is to just factor out the common code, but here, eww.
The annoying thing is that the only difference here is the writing to c.
Brief summary of code: If K is not None, then loop over K lines of a and write to both b and c. Otherwise, loop over all of a and just write to b.

Why not use only one loop, but including the condition inside that loop? Also, you can get rid of the redundancy in that lambda, I think.
from itertools import takewhile
k_is_none = K is None
def illuminacond(x):
global i
global K
result = x.split(',')[0] != '[Controls]'
if not k_is_none:
result = result and i < K
return result
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
i = 0
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
if k_is_none:
cf.write(line)

One check, one loop, no classes, psyco-optimizable.
from itertools import takewhile
if K is None:
illuminacond = lambda x: x.split(',')[0] != '[Controls]'
def action(cf, line): cf.write(line)
else:
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and i < K
def action(cf, line): pass
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
i = 0
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
action(cf, line)

Why not just:
from itertools import takewhile
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and (K is None or i<K) #i'm not so sure about this part, confused me a little :).
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
if K is None:
cf.write(line)

How about this (second class based version)?
from itertools import takewhile
class Foo:
def __init__(self, K = None):
self.bf=open('b', 'w')
self.cf=open('c', 'w')
self.count = 0
self.K = K
def Go(self):
for self.line in takewhile(self.Lamda(), open('a')):
self.SplitLine()
if self.IsValidPid():
self.WriteLineToFiles()
def SplitLine(self):
self.lineSplit=self.line.split(',')
def Lamda(self):
if self.K is None:
return lambda x: x.split(',')[0] != '[Controls]'
else:
return lambda x: x.split(',')[0] != '[Controls]' and self.count < self.K
def IsValidPid(self):
pid=self.lineSplit[1][0:3]
return pid!='cnv' and pid!='hCV' and pid!='cnv'
def WriteLineToFiles(self):
self.count += 1
self.bf.write(self.ParseLine())
if self.K is None:
self.cf.write(self.line)
def ParseLine(self):
return (self.lineSplit[1] + ',' + self.lineSplit[2] + ',' +
self.lineSplit[3][1] + self.lineSplit[3][3] + ',' +
self.lineSplit[15] + ',' + self.lineSplit[9] + ',' +
self.lineSplit[10]).strip('"')+'\n'
Foo().Go()
Original version:
from itertools import takewhile
if K is None:
illuminacond = lambda x: x.split(',')[0] != '[Controls]'
else:
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and i < K
def Parse(line):
return (line[1] + ',' + line[2] + ',' + line[3][1] + line[3][3] + ',' +
line[15] + ',' + line[9] + ',' + line[10]).strip('"')+'\n'
def IsValidPid(line_split):
pid=line_split[1][0:3]
return pid!='cnv' and pid!='hCV' and pid!='cnv'
bf=open('b', 'w')
cf=open('c', 'w')
def WriteLineToFiles(line, line_split):
bf.write(Parse(line_split))
if K is None:
cf.write(line)
i = 0
for line in takewhile(illuminacond, open('a')):
line_split=line.split(',')
if IsValidPid(line_split):
WriteLineToFiles(line, line_split)
i += 1

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Accumulate conditions during recursion on classification tree - python

Related

how to feed strings in an empty list?

python: TypeError: 'NoneType' object has no attribute 'getitem' again

Python comma and divide an integer

Python - key error when using "if in dict"

avoiding code duplication in Python code redux

Categories

Resources

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Accumulate conditions during recursion on classification tree - python

Related

how to feed strings in an empty list?

python: TypeError: 'NoneType' object has no attribute '__getitem__' again

Python comma and divide an integer

Python - key error when using "if in dict"

avoiding code duplication in Python code redux

Categories

Resources

python: TypeError: 'NoneType' object has no attribute 'getitem' again