avoiding code duplication in Python code redux - python

This is a followup to an earlier question. I got some good suggestions for that, so I thought I would try my luck again.
from itertools import takewhile
if K is None:
illuminacond = lambda x: x.split(',')[0] != '[Controls]'
else:
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and i < K
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
i = 0
if K is None:
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
cf.write(line)
else:
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
Is it possible to compactify this code? If I have some stuff in common in two loops like this,
one obvious possibility is to just factor out the common code, but here, eww.
The annoying thing is that the only difference here is the writing to c.
Brief summary of code: If K is not None, then loop over K lines of a and write to both b and c. Otherwise, loop over all of a and just write to b.

Why not use only one loop, but including the condition inside that loop? Also, you can get rid of the redundancy in that lambda, I think.
from itertools import takewhile
k_is_none = K is None
def illuminacond(x):
global i
global K
result = x.split(',')[0] != '[Controls]'
if not k_is_none:
result = result and i < K
return result
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
i = 0
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
if k_is_none:
cf.write(line)

One check, one loop, no classes, psyco-optimizable.
from itertools import takewhile
if K is None:
illuminacond = lambda x: x.split(',')[0] != '[Controls]'
def action(cf, line): cf.write(line)
else:
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and i < K
def action(cf, line): pass
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
i = 0
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
action(cf, line)

Why not just:
from itertools import takewhile
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and (K is None or i<K) #i'm not so sure about this part, confused me a little :).
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
if K is None:
cf.write(line)

How about this (second class based version)?
from itertools import takewhile
class Foo:
def __init__(self, K = None):
self.bf=open('b', 'w')
self.cf=open('c', 'w')
self.count = 0
self.K = K
def Go(self):
for self.line in takewhile(self.Lamda(), open('a')):
self.SplitLine()
if self.IsValidPid():
self.WriteLineToFiles()
def SplitLine(self):
self.lineSplit=self.line.split(',')
def Lamda(self):
if self.K is None:
return lambda x: x.split(',')[0] != '[Controls]'
else:
return lambda x: x.split(',')[0] != '[Controls]' and self.count < self.K
def IsValidPid(self):
pid=self.lineSplit[1][0:3]
return pid!='cnv' and pid!='hCV' and pid!='cnv'
def WriteLineToFiles(self):
self.count += 1
self.bf.write(self.ParseLine())
if self.K is None:
self.cf.write(self.line)
def ParseLine(self):
return (self.lineSplit[1] + ',' + self.lineSplit[2] + ',' +
self.lineSplit[3][1] + self.lineSplit[3][3] + ',' +
self.lineSplit[15] + ',' + self.lineSplit[9] + ',' +
self.lineSplit[10]).strip('"')+'\n'
Foo().Go()
Original version:
from itertools import takewhile
if K is None:
illuminacond = lambda x: x.split(',')[0] != '[Controls]'
else:
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and i < K
def Parse(line):
return (line[1] + ',' + line[2] + ',' + line[3][1] + line[3][3] + ',' +
line[15] + ',' + line[9] + ',' + line[10]).strip('"')+'\n'
def IsValidPid(line_split):
pid=line_split[1][0:3]
return pid!='cnv' and pid!='hCV' and pid!='cnv'
bf=open('b', 'w')
cf=open('c', 'w')
def WriteLineToFiles(line, line_split):
bf.write(Parse(line_split))
if K is None:
cf.write(line)
i = 0
for line in takewhile(illuminacond, open('a')):
line_split=line.split(',')
if IsValidPid(line_split):
WriteLineToFiles(line, line_split)
i += 1

Related

how to feed strings in an empty list?

I am trying to store the values obtained from excel sheet cells to a list. The code provided basically collects data from different continuous rows and columns and creates a string of those values. I could work upt o storing the string value but I don't really know how to store the strings in a list, Can anyone help me with this?
for i in range(NR):
print("This TC checks the output for")
for j in range(NC):
inputVariable = str(ws[get_column_letter(ColumnStart+j) + str(rowStart-1)].value)
c = str((ws.cell(row = (rowStart + i),column = (ColumnStart +j)).value))
if (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MaxValRow),column = (ColumnStart+j)).value):
b = '(maximum)'
elif (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MinValRow),column = (ColumnStart+j)).value):
b = '(minimum)'
else:
b ='(intermediate)'
Commentstr = str(j+1) + '. The value of input ' + inputVariable + ' =' + " " + c + b
# need to create a list here to store the commentstr for each iteration
NR = no. of rows, NC = no. of columns
my_list=[]
for i in range(NR):
x=0
print("This TC checks the output for")
for j in range(NC):
inputVariable = str(ws[get_column_letter(ColumnStart+j) + str(rowStart-1)].value)
c = str((ws.cell(row = (rowStart + i),column = (ColumnStart +j)).value))
if (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MaxValRow),column = (ColumnStart+j)).value):
b = '(maximum)'
elif (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MinValRow),column = (ColumnStart+j)).value):
b = '(minimum)'
else:
b ='(intermediate)'
Commentstr = str(j+1) + '. The value of input ' + inputVariable + ' =' + " " + c + b
my_list[x]=Commentstr
x+=1

Accumulate conditions during recursion on classification tree

I have the following function which produces code from a sci-kit learn classification tree:
def mxTreeToCode(tree, feature_names, mx_name = 'mxTree', rm_file = False):
# Remove pre-existent file
if rm_file:
import os
try:
os.remove('./tree.py')
except OSError:
pass
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
file = open('tree.py', 'a')
file.write('def ' + mx_name + '(x):'+ '\n')
#col_name = ''
def recurse(node, depth):
global col_name
indent = " " * depth
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
file.write(indent +"if x['"+ name + "'] <= " + str(threshold) + ':' + '\n')
col_name += "'"+name + '_' + '<=' + str(threshold) +"'"
recurse(tree_.children_left[node], depth + 1)
file.write(indent + "else: # if x['"+ name +"'] > " + str(threshold) + '\n')
col_name += "'"+name + '_' + '>' + str(threshold) +"'"
recurse(tree_.children_right[node], depth + 1)
else:
file.write(indent + 'return '+str(col_name) + '\n')
#print(col_name)
col_name = ""
recurse(0, 1)
file.close()
With this I obtain the following output on file 'tree.py' for a given classification tree:
def mxTree(x):
if x['V1'] <= 0.5:
if x['V2'] <= 0.5:
return 'V1_<=0.5''V2_<=0.5'
else: # if x['V2'] > 0.5
return 'V2_>0.5'
else: # if x['V1'] > 0.5
return 'V1_>0.5'
While I can cumulate the conditions on the IF side and return the addition of conditions, I fail to do the accumulation when the IF and ELSE (left/right side of the tree node) follows:
def mxTree(x):
if x['V1'] <= 0.5:
if x['V2'] <= 0.5:
return 'V1_<=0.5''V2_<=0.5'
else: # if x['V2'] > 0.5
return 'V1_<=0.5''V2_>0.5' # 'V1<=0.5' must be added
else: # if x['V1'] > 0.5
return 'V1_>0.5'
I would appreciate any suggestion.
Since the left/right side of each node are recursed at the same time, I just created an additional variable which saves the output for each side. Finally I concatenate to variable col_name:
col_name = ""
names_list={}
def mxTreeToCode(tree, feature_names, mx_name = 'mxTree', rm_file = False):
# Remove pre-existent file
if rm_file:
import os
try:
os.remove('./tree.py')
except OSError:
pass
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
file = open('tree.py', 'a')
file.write('def ' + mx_name + '(x):'+ '\n')
def recurse(node, depth):
global col_name, names_list
indent = " " * depth
names_list[node] = col_name
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
file.write(indent +"if x['"+ name + "'] <= " + str(threshold) + ':' + '\n')
col_name += "'"+name + '_' + '<=' + str(threshold) +"'"
recurse(tree_.children_left[node], depth + 1)
file.write(indent + "else: # if x['"+ name +"'] > " + str(threshold) + '\n')
col_name += names_list[node]
col_name += "'"+name + '_' + '>' + str(threshold) +"'"
recurse(tree_.children_right[node], depth + 1)
else:
file.write(indent + 'return '+str(col_name) + '\n')
col_name = ""
recurse(0, 1)
file.close()
I wonder if there are other working approaches.

Is there a way to solve this TypeError [duplicate]

This question already has answers here:
Why do I get an IndexError (or TypeError, or just wrong results) from "ar[i]" inside "for i in ar"?
(4 answers)
Closed 6 months ago.
I am having trouble figuring out what is wrong with my code. I need help.
next(fhr) # skip header row
customer_list = [] # initialize empty customer list
for line in fhr:
ls = line.split(',')
customer_list.append([ls[0], ls[1], ls[2], ls[3], ls[4], ls[5], ls[6], int(ls[7]), ls[8], ls[9], ls[10].strip('\n')])
from operator import itemgetter
customer_list.sort(key=itemgetter(7), reverse=True)
print(customer_list)
writepath = './loan-data-output-v1.csv'
fwh = open(writepath, 'w', encoding='utf-8')
fwh.write('Name' +','+ 'State' +','+'Age' +','+'Annual Income'+','+ 'Loan Type' +','+' Loan Amount' +','+ 'Length of Loan in Years' +','+ 'Days Delinquent' +','+ 'Interest Rate' +','+ 'Number of Loans Prior' +','+'Years as Customer' + '\n')
for i in customer_list:
if customer_list[i][7] >= 90:
fwh.write(customer_list[i][0] + ',' + customer_list[i][1] + ',' + customer_list[i][2] + ',' + customer_list[i][3] + ',' + customer_list[i][4] + ',' + customer_list[i][5] + ',' + customer_list[i][6] + ',' + customer_list[i][7] + ',' + customer_list[i][8] + ',' + customer_list[i][9] + ','+ customer_list[i][10] + '\n')
fhr.close()
fwh.close()
I am getting this error for the last for loop and I'm not sure what to do about it. Can someone help.
TypeError: list indices must be integers or slices, not list
You are using lists of lists so when you use for i in list_of_list, i itself becomes a list..
for i in customer_list:
if i[7] >= '90':
fwh.write(i[0] + ',' + i[1] + ',' + i[2] + ',' + i[3] + ',' + i[4] + ',' + i[5] + ',' + i[6] + ',' + str(i[7]) + ',' + i[8] + ',' + i[9] + ','+ i[10] + '\n')
fhr.close()
fwh.close()
Alternatively you can use,
for i in range(0,len(customer_list)):
if customer_list[i][7] >= '90':
fwh.write(customer_list[i][0] + ',' + customer_list[i][1] + ',' + customer_list[i][2] + ',' + customer_list[i][3] + ',' + customer_list[i][4] + ',' + customer_list[i][5] + ',' + customer_list[i][6] + ',' + str(customer_list[i][7]) + ',' + customer_list[i][8] + ',' + customer_list[i][9] + ','+ customer_list[i][10] + '\n')
fhr.close()
fwh.close()
EDIT: Second method assumes that your length of customer_list is constant or in other words you are not adding anything to customer_list during the loop. Thanks to DanielRoseman for pointing out potential bug in second code..
EDIT 2:
Thanks to quamrana for suggesting this way,
for i in customer_list:
if i[7] >= '90':
i[7] = str(i[7])
fwh.write(','.join(i[0:11]) + '\n')
fhr.close()
fwh.close()

Python comma and divide an integer

def SetHP(self, hpPercentage, curHP, maxHP):
if not self.hpGauge.IsShow():
self.SetSize(200 + 7*self.nameLength, 70)
self.hpGauge.Show()
self.UpdatePosition()
self.hpGauge.SetPercentage(hpPercentage, 100)
strCurHP = str(curHP)
strMaxHP = str(maxHP)
self.broadCastHP.SetText(strCurHP + " / " + strMaxHP)
Example output is: 8993 / 18782
I see some questions like that, but all of them was about "float".
I want to make these integers like that:
8,9K / 18,7K
What is the "proper" way to do that?
Try this function:
def HPformat(str):
if len(str)==5:
newstr=str[0]+str[1] + ','+ str[2] + 'K'
return newstr
elif len(str)==4:
newstr=str[0]+','+ str[1] + 'K'
return newstr
And replace your final line of code with the function:
def SetHP(self, hpPercentage, curHP, maxHP):
if not self.hpGauge.IsShow():
self.SetSize(200 + 7*self.nameLength, 70)
self.hpGauge.Show()
self.UpdatePosition()
self.hpGauge.SetPercentage(hpPercentage, 100)
strCurHP = str(curHP)
strMaxHP = str(maxHP)
self.broadCastHP.SetText(HPformat(strCurHP) + " / " + HPformat(strMaxHP))
Also, if you don't want to add a new function you could just do:
def SetHP(self, hpPercentage, curHP, maxHP):
if not self.hpGauge.IsShow():
self.SetSize(200 + 7*self.nameLength, 70)
self.hpGauge.Show()
self.UpdatePosition()
self.hpGauge.SetPercentage(hpPercentage, 100)
strCurHP = str(curHP)
strMaxHP = str(maxHP)
newCurHP = strCurHP[0] + ',' + strCurHP [1] + 'K'
newMaxHP = strMaxHP[0] + strMaxHP[1] + ',' + strMaxHP[2] + 'K'
self.broadCastHP.SetText(newCurHP + " / " + newMaxHP)

Python - key error when using "if in dict"

I am receiving the following error when running a script to parse contents of an XML file.
if iteration.findtext("Iteration_query-def") in ecdict:
KeyError: 'XLOC_000434'
I was under the impression that using "if in dict" would mean that if the key is not found in the dictionary, the script will continue past the if statement and proceed with the rest of the code. Below is the problematic section of the code I am using. I realise this is quite a basic question, but I am unsure what else I can say, and I don't understand why I am receiving this error.
import xml.etree.ElementTree as ET
tree = ET.parse('507.FINAL_14.2.14_2_nr.out_fmt5.out')
blast_iteration = tree.find("BlastOutput_iterations")
for iteration in blast_iteration.findall("Iteration"):
query = iteration.findtext("Iteration_query-def").strip().strip("\n")
if query in score:
continue
if iteration.findtext("Iteration_message") == "No hits found":
if iteration.findtext("Iteration_query-def") in tair:
tairid = tair[iteration.findtext("Iteration_query-def")][0]
tairdes = tair[iteration.findtext("Iteration_query-def")][1]
else:
tairid = "-"
tairdes = "-"
goterms = ""
ecterms = ""
if iteration.findtext("Iteration_query-def") in godict:
for x in godict[iteration.findtext("Iteration_query-def")][:-1]:
goterms = goterms + x + ";"
goterms = goterms + godict[iteration.findtext("Iteration_query-def")][-1]
else:
goterms = "-"
if iteration.findtext("Iteration_query-def") in ecdict:
for x in ecdict[iteration.findtext("Iteration_query-def")][:-1]:
ecterms = ecterms + x + ";"
ecterms = ecterms + ecdict[iteration.findtext("Iteration_query-def")][-1]
else:
ecterms = "-"
if iteration.findtext("Iteration_query-def") in godescr:
desc = godescr[iteration.findtext("Iteration_query-def")]
else:
desc = "-"
n += 1
p = "PvOAK_up"+str(n) + "\t" + tranlen[iteration.findtext("Iteration_query-def")] + "\t" + orflen[iteration.findtext("Iteration_query-def")] + "\t" + "-" + "\t" + "-" + "\t" + tairid + "\t" + tairdes + "\t" + goterms + "\t" + ecterms + "\t" + desc + "\t" + str(flower[query][2]) + "\t" + str('{0:.2e}'.format(float(flower[query][1]))) + "\t" + str('{0:.2f}'.format(float(flower[query][0]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][2]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][1]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][0])))
print p
Hope you can help,
Thanks.
edit: I should say that godict and ecdict were previously created as follows - I can submit the entire code if needs be:
godict = {}
ecdict = {}
godescr = {}
f = open("507.FINAL_14.2.14_2_nr.out_fmt5.out.annot")
for line in f:
line = line.split("\t")
if len(line) > 2:
godescr[line[0]] = line[2]
line[1] = line[1].strip("\n")
if line[1].startswith("EC"):
if line[0] in ecdict:
a = ecdict[line[0]]
a.append(line[1])
ecdict[line[0]] = a
else:
ecdict[line[0]] = [line[1]]
else:
if line[0] in godict:
a = godict[line[0]]
a.append(line[1])
godict[line[0]] = a
else:
godict[line[0]] = [line[1]]
Traceback:
Traceback (most recent call last):
File "2d.test.py", line 170, in <module>
p = "PvOAK_up"+str(n) + "\t" + tranlen[iteration.findtext("Iteration_query-def")] + "\t" + orflen[iteration.findtext("Iteration_query-def")] + "\t" + "-" + "\t" + "-" + "\t" + tairid + "\t" + tairdes + "\t" + goterms + "\t" + ecterms + "\t" + desc + "\t" + str(flower[query][2]) + "\t" + str('{0:.2e}'.format(float(flower[query][1]))) + "\t" + str('{0:.2f}'.format(float(flower[query][0]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][2]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][1]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][0])))
KeyError: 'XLOC_000434'

Categories

Resources