I wrote some codes trying to let the user be able to check the percentage of the money they spent(compared to the money they earned). Almost every step perform normally, until the final part.
a_c[('L'+row_t)].value return:
=<Cell 'Sheet1'.B5>/<Cell 'Sheet1'.J5>
yet I hope it should be some value.
Code:
st_column = st_column_r.capitalize()
row_s = str(a_c.max_row)
row_t = str(a_c.max_row + 1)
row = int(row_t)
a_c[('J'+row_t)] = ('=SUM(I2,J'+row_s+')') #總收入
errorprevention = a_c[('J'+row_t)].value
a_c[(st_column+row_t)] = ('=SUM('+(st_column+'2')+','+(st_column+row_s)+')')
a_c['L'+row_t].number_format = FORMAT_PERCENTAGE_00
if errorprevention != 0:
a_c[('L'+row_t)] = ('='+str(a_c[(st_column+row_t)])+'/'+str(a_c[('J'+row_t)]))
print('過往支出中,'+inputtype[st_column]+'類別佔總收入的比率為:'+a_c[('L'+row_t)].value)
Try changing the formula creation to;
a_c[('L' + row_t)].value = '=' + a_c[(st_column + row_t)].coordinate + '/' + a_c[('J' + row_t)].coordinate
or use an f string
a_c[('L' + row_t)].value = f"={a_c[(st_column + row_t)].coordinate}/{a_c[('J' + row_t)].coordinate}"
I want to define my own function as below:
def myown(df, ADD1, ADD2 = None, OtherArgument_1, OtherArgument_2):
tmp = df
tmp['NEWADD'] = (tmp['ADD1'] + ' ' + tmp['ADD2']).str.strip()
return tmp
I know this is incorrect so I can add if statement in the function.
def myown(df, ADD1, ADD2 = None, OtherArgument_1, OtherArgument_2):
tmp = df
if ADD2 == None:
tmp['NEWADD'] = tmp[ADD1].str.strip()
else:
tmp['NEWADD'] = (tmp[ADD1] + ' ' + tmp[ADD2]).str.strip()
However, If I don know how many ADD inputs at first, how can I modify this?
For example, there are 5 ADD need to be combined this time and next time it may be 3. It is difficult to re-write function each time like this:
def myown(df, ADD1, ADD2, ADD3, ADD4, ADD5, OtherArgument_1, OtherArgument_2):
tmp = df
tmp['NEWADD'] = (tmp[ADD1] + ' ' + tmp[ADD2] + ' ' + tmp[ADD3] + ' ' + tmp[ADD4] + ' ' + tmp[ADD5]).str.strip()
You can accomplish this by using loops and lists like this:
def myown(df, add_args, OtherArgument_1, OtherArgument_2):
tmp = df
new_add = ''
for i in add_args:
new_add = new_add + tmp[i].str.strip() + ''
tmp['NEWADD'] = new_add
Your add_args parameter must be a list, which looks like this:
add_args = [ADD1, ADD2, ADDn]
So I am working on a certain code to modify a text file. When I use this function individually, it works perfectly
TextRotation.rotTextC("cv.txt")
But when I use it in batch as a list like this
def files_LTXT(pathF):
return glob.glob(pathF + "*" + ".txt")
for i in range (len(listFileTXT)):
TextRotation.rotTextC(listFileTXT[i])
IT gives the following error:
File "C:\Users\Administrator\PycharmProjects\openCV\TextRotation.py", line
9, in rotLineC
0
valueObj = int(lineStr[c1])
0.472917 0.713281 0.845833 0.376563
IndexError: string index out of range
Function rotLineC is as follows:
def rotLineC(lineStr, c1):
if len(lineStr) > 2:
valueObj = int(lineStr[c1])
print(valueObj)
valueXC = float(lineStr[(c1+2):(c1+10)])
valueYC = float(lineStr[(c1+11):(c1+19)])
valueW = float(lineStr[(c1+20):(c1+28)])
valueH = float(lineStr[(c1+29):(c1+37)])
# print(valueXC)
# print(valueYC)
# print(valueW)
# print(valueH)
nValueXC = round(1 - valueYC, 6)
nValueYC = round(valueXC, 6)
nValueW = round(valueH, 6)
nValueH = round(valueW, 6)
rotString = str(int(valueObj)) + " " + str(nValueXC) + " " + \
str(nValueYC) + " " + str(nValueW) + " " + str(nValueH)
print(str(nValueXC) + " " + str(nValueYC) + " " + str(nValueW) + " " + str(nValueH))
print(rotString)
return rotString
This function works fine!
for i in range (len(listFileJPG)):
ImageRotation.rotImage(listFileJPG[i])
Mind to include the / to the end of the path! (I am assuming a UNIX environment here)
If the path is 'dev/my_pat', for example, your function will fail. The path must end with a /. You can it to your function:
...
if pathF[-1] != '/':
return glob.glob(pathF + "/*.txt")
...
Also, do not iterate using indices, use the pythonic way!
for file in listFileTXT(my_path):
TextRotation.rotTextC(file)
I have two DFs from 2 excel files.
1st file(awcProjectMaster)(1500 records)
projectCode projectName
100101 kupwara
100102 kalaroos
100103 tangdar
2nd file(village master)(more than 10 million records)
villageCode villageName
425638 wara
783651 tangdur
986321 kalaroo
I need to compare the projectName and villageName along with the percentage match.
The following code works fine but it is slow. How can I do the same thing in a more efficient way.
import pandas as pd
from datetime import datetime
df = pd.read_excel("C:\\Users\\Desktop\\awcProjectMaster.xlsx")
df1 = pd.read_excel("C:\\Users\\Desktop\\prjToVillageStateWise\\stCodeVillage1To6.xlsx")
def compare(prjCode, prjName, stCode, stName, dCode, dName, sdCode, sdName, vCode, vName):
with open(r"C:\\Users\\Desktop\\prjToVillageStateWise\\stCodeVillage1To6.txt", "a") as f:
percentMatch = 0
vLen = len(vName)
prjLen = len(prjName)
if vLen > prjLen:
if vName.find(prjName) != -1:
percentMatch = (prjLen / vLen) * 100
f.write(prjCode + "," + prjName + "," + vCode + "," + vName + "," + str(round(percentMatch)) + "," + stCode + "," + stName + "," + dCode + "," + dName + sdCode + "," + sdName + "\n")
else:
res = 0
# print(res)
elif prjLen >= vLen:
if prjName.find(vName) != -1:
percentMatch = (vLen / prjLen) * 100
f.write(prjCode + "," + prjName + "," + vCode + "," + vName + "," + str(round(percentMatch)) + "," + stCode + "," + stName + "," + dCode + "," + dName + sdCode + "," + sdName + "\n")
else:
res = 0
# print(res)
f.close()
for idx, row in df.iterrows():
for idxv, r in df1.iterrows():
compare(
str(row["ProjectCode"]),
row["ProjectName"].lower(),
str(r["StateCensusCode"]),
r["StateName"],
str(r["DistrictCode"]),
r["DistrictName"],
str(r["SubDistrictCode"]),
r["SubDistrictNameInEnglish"],
str(r["VillageCode"]),
r["VillageNameInEnglish"].lower(),
)
Your distance metric for the strings isn't too accurate, but if it works for you, fine. (You may want to look into other options like the builtin difflib, or the Python-Levenshtein module, though.)
If you really do need to compare 1,500 x 10,000,000 records pairwise, things are bound to take some time, but there are a couple things that we can do pretty easily to speed things up:
open the log file only once; there's overhead, sometimes significant, in that
refactor your comparison function into a separate unit, then apply the lru_cache() memoization decorator to make sure each pair is compared only once, and the subsequent result is cached in memory. (In addition, see how we sort the vName/prjName pair – since the actual order of the two strings doesn't matter, we end up with half the cache size.)
Then for general cleanliness,
use the csv module for streaming CSV into a file (the output format is slightly different than with your code, but you can change this with the dialect parameter to csv.writer()).
Hope this helps!
import pandas as pd
from datetime import datetime
from functools import lru_cache
import csv
df = pd.read_excel("C:\\Users\\Desktop\\awcProjectMaster.xlsx")
df1 = pd.read_excel("C:\\Users\\Desktop\\prjToVillageStateWise\\stCodeVillage1To6.xlsx")
log_file = open(r"C:\\Users\\Desktop\\prjToVillageStateWise\\stCodeVillage1To6.txt", "a")
log_writer = csv.writer(log_file)
#lru_cache()
def compare_vname_prjname(vName, prjName):
vLen = len(vName)
prjLen = len(prjName)
if vLen > prjLen:
if vName.find(prjName) != -1:
return (prjLen / vLen) * 100
elif prjLen >= vLen:
if prjName.find(vName) != -1:
return (vLen / prjLen) * 100
return None
def compare(prjCode, prjName, stCode, stName, dCode, dName, sdCode, sdName, vCode, vName):
# help the cache decorator out by halving the number of possible pairs:
vName, prjName = sorted([vName, prjName])
percent_match = compare_vname_prjname(vName, prjName)
if percent_match is None: # No match
return False
log_writer.writerow(
[
prjCode,
prjName,
vCode,
vName,
round(percent_match),
stCode,
stName,
dCode,
dName + sdCode,
sdName,
]
)
return True
for idx, row in df.iterrows():
for idxv, r in df1.iterrows():
compare(
str(row["ProjectCode"]),
row["ProjectName"].lower(),
str(r["StateCensusCode"]),
r["StateName"],
str(r["DistrictCode"]),
r["DistrictName"],
str(r["SubDistrictCode"]),
r["SubDistrictNameInEnglish"],
str(r["VillageCode"]),
r["VillageNameInEnglish"].lower(),
)
def SetHP(self, hpPercentage, curHP, maxHP):
if not self.hpGauge.IsShow():
self.SetSize(200 + 7*self.nameLength, 70)
self.hpGauge.Show()
self.UpdatePosition()
self.hpGauge.SetPercentage(hpPercentage, 100)
strCurHP = str(curHP)
strMaxHP = str(maxHP)
self.broadCastHP.SetText(strCurHP + " / " + strMaxHP)
Example output is: 8993 / 18782
I see some questions like that, but all of them was about "float".
I want to make these integers like that:
8,9K / 18,7K
What is the "proper" way to do that?
Try this function:
def HPformat(str):
if len(str)==5:
newstr=str[0]+str[1] + ','+ str[2] + 'K'
return newstr
elif len(str)==4:
newstr=str[0]+','+ str[1] + 'K'
return newstr
And replace your final line of code with the function:
def SetHP(self, hpPercentage, curHP, maxHP):
if not self.hpGauge.IsShow():
self.SetSize(200 + 7*self.nameLength, 70)
self.hpGauge.Show()
self.UpdatePosition()
self.hpGauge.SetPercentage(hpPercentage, 100)
strCurHP = str(curHP)
strMaxHP = str(maxHP)
self.broadCastHP.SetText(HPformat(strCurHP) + " / " + HPformat(strMaxHP))
Also, if you don't want to add a new function you could just do:
def SetHP(self, hpPercentage, curHP, maxHP):
if not self.hpGauge.IsShow():
self.SetSize(200 + 7*self.nameLength, 70)
self.hpGauge.Show()
self.UpdatePosition()
self.hpGauge.SetPercentage(hpPercentage, 100)
strCurHP = str(curHP)
strMaxHP = str(maxHP)
newCurHP = strCurHP[0] + ',' + strCurHP [1] + 'K'
newMaxHP = strMaxHP[0] + strMaxHP[1] + ',' + strMaxHP[2] + 'K'
self.broadCastHP.SetText(newCurHP + " / " + newMaxHP)