I have written a short script to extract certain data from a long text-file in Python. This the code.
fname = raw_input("Enter file name: ")
if ( len(fname) < 1 ) : fname = 'test.v2'
rdf = open(fname)
import re
totalmoney = 0
totallent = 0
totaldebt = 0
for line in rdf:
line = line.rstrip()
money = re.findall('.*money=([0-9]*)', line)
#lent = re.findall('.*money_lent=([0-9]*)', line)
#debt = re.findall('.*debt=([0-9]*)', line)
try:
money = int(money[0])
totalmoney = totalmoney + money
#lent = int(lent[0])
#totallent = totallent + lent
#debt = int(debt[0])
#totaldebt = totaldebt + debt
except: continue
print 'money total:',totalmoney
#print 'lent total:',totallent
#print 'debt total:',totaldebt
As you can see, the second and third meaningful data extractor lines are currently inactive (#), that is because when they are on it finds the value as zero, which I know is incorrect, because when I run them separately, they find a real value. I suspect that for some reason it cannot run all the three lines in the same loop, but I do not know why.
Could you please tell me how to extract three kinds of info from the same line?
Thanks.
Swallowing exceptions is not a good practice - you may never find out the reason why things go wrong. It is better not to swallow exceptions at all and do it right. This should work.
fname = raw_input("Enter file name: ")
if ( len(fname) < 1 ):
fname = 'test.v2'
rdf = open(fname)
import re
totalmoney = 0
totallent = 0
totaldebt = 0
for line in rdf:
line = line.rstrip()
money = re.search('money=([0-9]*)', line)
lent = re.search('money_lent=([0-9]*)', line)
debt = re.search('debt=([0-9]*)', line)
if money:
totalmoney = totalmoney + int(money.group(1))
if lent:
totallent = totallent + int(lent.group(1))
if debt:
totaldebt = totaldebt + int(debt.group(1))
print('money total:',totalmoney)
print('lent total:',totallent)
print('debt total:',totaldebt)
Have fun playing Victoria 2 - I myself wasted a lot of time playing that game :)
Related
Here is my code:
inputFile = open("Employees.txt", "r").read()
inputList = inputFile.split("\n")
fList = []
def listString(s):
string = ""
return (string.join(s))
for i in inputList:
for x in i.split(","):
fList.append(x)
for y in range (len(fList)):
**if fList[y] == "90000":
fList[y] = str(90000 * 1.05) + "\n"
elif fList[y] == "75000":
fList[y] = str(75000 * 1.05) + "\n"
elif fList[y] == "110000":
fList[y] = str(110000 * 1.05) + "\n"
else:
fList[y] = fList[y] + ","**
print(listString(fList))
file = open("Emp_Bonus.txt", "a")
file.write(listString(fList))
Employees.txt contains the following:
Adam Lee,Programmer,90000
Morris Heather,DA,75000
John Lee,PM,110000
I am trying to get the following output:
Adam Lee,Programmer,94500
Morris Heather,DA,78750
John Lee,PM,115500
The part of the code that is in bold is the problem, The input salaries need to be able to be different values instead of the code only working for the sample input. The input salaries have to be multiplied by 1.05. How should I go about doing this? Thanks!
Another way without any library. Just read lines of the file as a list using readlines() and then iterate each line. Only modify the last part after splitting it using split(',') e.g salary of each line and finally create the new file as per the requirements.
multiply, final_result = 1.05, []
with open('Employees.txt', 'r') as f:
fList = f.readlines()
if fList:
for line in fList:
employee_info = line.split(',')
name = employee_info[0]
designation = employee_info[2]
salary = float(employee_info[2].replace('\n','').strip()) * multiply
final_result.append(f"{name},{employee_info[1]},{salary}")
if final_result:
with open('Emp_Bonus.txt', 'w') as f:
f.write('\n'.join(final_result))
Output:
Adam Lee,Programmer,94500.0
Morris Heather,DA,78750.0
John Lee,PM,115500.0
I will like to use Pandas:
import pandas as pd
df = pd.read_csv("Employees.txt",header=None)
df[2] = df.loc[df[2].isin([90000,75000,110000]),2]*1.05
df[2] = df[2].astype(int)
df.to_csv("Emp_Bonus.txt",mode="a",header=None)
count = 0
fname = input("Enter file name: ")
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
print(line)
count = count + 1
print(count)
There is a file with 27 lines like X-DSPAM-Confidence : 0.xxxxx, I need to extract the numerical value from each of them to be used for calculations.
Try to use split(':'):
Code:
count = 0
fname = input("Enter file name: ")
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
print(line)
value = line.split(':')[-1] # will split line into 'X-DSPAM-Confidence' and 'value'
# if you have ',' at the end of the line, simply do this:
value = value.strip(',')
value = float(value)
print(value)
count = count + 1
print(count)
As long as the format is exactly as you described it, you can use the code below:
float(line.split(':')[1])
If there's more variation in the text than what you described, you might need to try regex.
You can use str.rfind(':') to get the position of : and then do a string slice to get the value.
count = 0
fname = input("Enter file name: ")
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
print(line)
value = line[line.rfind(':'):] # will take the last occurrence of : to slice the line
print(value)
count = count + 1
print(count)
fname = input("Enter file name: ")
fh = open(fname)
count = 0
pos = 0
ans = None
total = 0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:") :
continue
else :
count = count + 1
pos = line.find(':')
ans = line[pos+1 : ]
total = total + float(ans)
avg = total/count
fname = input("Enter file name: ")
fh = open(fname)
val = 0
count = 0
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
count = count + 1
val=val + float(line[line.find('0'):])
elif not line.startswith("X-DSPAM-Confidence:") :
continue
print("Average spam confidence:",val/count)
fname = input("Enter file name:")
fh = open(fname)
count = 0
s=0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:"):
continue
count = count+1
pos = line.find('0')
floatingP = float(line[pos:])
s += floatingP
print(s/count)
import datetime
with open("fine.txt","r") as f, open("fine1.txt","a") as fine1:
lines = f.read().split("\n")
for i in range(2):
var = input("reg : ") # registration number(reg_num)
enter = input('Time entered camera 1(24hrs)in the format HH:MM:SS: ')
ext = input('Time enterd camera 2 (24hrs)in the format HH:MM:SS : ')
total_time = '%H:%M:%S'
enter_time = datetime.datetime.strptime(enter, total_time)
ext_time = datetime.datetime.strptime(ext, total_time)
if enter_time > ext_time:
ext_time += datetime.timedelta(hours=24)
t_diff = ext_time - enter_time
time = t_diff.total_seconds() / 3600
speed = 1 / time
reg = var[0:1].isalpha() and var[2:3].isdigit() and var[4].isspace() and var[5:7].isalpha() and var.isupper()
if reg == True:
for i, line in enumerate(lines):
if var in line:
num = int("{}".format(i))
var = f.read()
name = (var[num]) #the problem
print(name)
address = (var[num + 0])
if speed > 70:
print("ovrspeeding", (var[num + 0]))
fine1.write(name+address+speed+"\n")
The whole code had to inputted, otherwise you will not understand what i am trying to do.
fine.txt is a file that has already been made and looks like:
reg_num1 aaaaaaaaaaaaaaaaaaa
reg_num2 bbbbbbbbbbbbbbbbbbb
reg_num3 ccccccccccccccccccc
this code takes in inputs of the registration number(e.g. AA01 SSS) and 2 time formats (which will later be used to calculate the speed). i want this code to find the line in fine.txt that have the registration number i inputted and if that vehicle is overspeeding(speed >70mph)the whole line needs to be appended into the file fine1.txt.
the problem is that when i run the code the error massage states that:
name = (var[num])
IndexError: string index out of range
i dont what this means, so can you help me with this.
EDIT: Have updated post for better clarity, no answers have yet to help!
Alright, so my assignment is to take a text file, that would have 4 entries per line, those being firstName, lastName, hours, payRate. I'm to do some calculations and throw all this information in a formatted table in python. Now, I've got the code to enter the data into the table, but it only works for the first entry in the text file, and I can't quite make it loop. I honestly feel like an idiot, and that this is just a simple fix.
My output is supposed to look like this:
http://i.imgur.com/bIOBqye.png
Could really use some pointers on making this loop through and print the data from each line of the text file. Here's how my current code looks:
heading1 = "{0:15s}{1:15s}{2:10s}{3:15s}{4:20s}{5:15s}".format("First Name", "Last Name", "Hours", "Payrate", "Overtime Hours", "Gross Pay")
heading2= "=============================================================================================================="
print(heading1)
print(heading2)
if os.path.isfile(fileQuestion) == True:
file = open('emps', 'r')
data = file.readlines()
for tmp in data:
data2= [word.rstrip("\n") for word in data]
first = data2[0].split()
lastName = first[0]
firstName = first[1]
first[2]=(int(first[2]))
first[3]=(int(first[3]))
initialHours = first[2]
payRate = first[3]
if initialHours > 40:
overHours = initialHours - 40
regHours = 40
regPay = payRate * regHours
otPay = overHours * (payRate * 1.5)
grossPay = regPay + otPay
else:
regHours = first[2]
grossPay = initialHours * payRate
overHours = 0
heading3= "{0:15s}{1:15s}{2:2d}{3:10d}{4:14d} {5:24.2f}".format(firstName, lastName, regHours, payRate, overHours, grossPay)
heading4= "{0:15s}{1:21.2f}".format("Total Gross Pay", grossPay)
heading5= "{0:15s}{1:19.2f}".format("Average Gross Pay", grossPay)
heading6= "{0:15s}{1:16d}".format("Total Overtime Hours", 33)
spaceHeading = " "
print(heading3)
print(spaceHeading)
print(heading4)
print(heading5)
print(heading6)
Please let me know if I haven't done this correctly or anything, first time here. Thanks.
I found the duplicate, and think some people treat rude ;/ Just not focus on pragmatic problems of programmers but on good rules of Stack in bad way :(
Here is my complete answer for your problem:
1)
First of all, you must remember that ident is used against code block's brackets known from another landuages.
I reformatted your code remember that all of lines should have extra spaces at the beginning when you pase it here ;)
2) like it was said:
first = word.split()
fix "not changing" of lines in loop.
3) Total overtime hours have hardcoded number:
heading6= "{0:15s}{1:16d}".format("Total Overtime Hours", overHours)
Also, overHours(All?) should be not "zeroed" in 'else' block in loop. You must initialize it before loop.
I change some other places i.e. some hardcoded ints, maybe it not ideal and in your style, but you have code with my fixes below...
Best, if you use GitHub or Bitbucket or another repo accesible by web, because you help to contribute if you want it, and also - yourself, to find all the changes which was done. And then, just ask here to help in extremely unknown problems. In the begging of learning it is always hard to find out, but later - you could achieve more!
Here is code after my changes:
from os.path import isfile as isFileExsist
import sys
filePath = input("What is the name of your file?: ")
while isFileExsist(filePath) == False:
pos = ['y', 'Y', 'yes', 'Yes']
neg = ['n', 'N', 'no', 'No']
answer = input("File not found! Do you want to start again? (y-yes/n-no)")
if answer in neg:
exit("Bye!")
elif answer in pos:
filePath = input("What is the name of your file?: ")
continue
else:
print("Not sure what is the answer. Try again!")
continue
file = open(filePath, 'r')
data = file.readlines()
print("{0:15s}{1:15s}{2:10s}{3:15s}{4:20s}{5:15s}".format("First Name", "Last Name", "Hours", "Payrate", "Overtime Hours", "Gross Pay"))
print("==============================================================================================================")
overHoursAll = 0
grossPayAll = 0
count = 0
for line in data:
words = line.split()
lastName = words[0]
firstName = words[1]
initialHours=(int(words[2]))
payRate =(int(words[3]))
if initialHours > 40:
regHours = 40
overHours = initialHours - 40
regPay = payRate * regHours
otPay = overHours * (payRate * 1.5)
grossPay = regPay + otPay
else:
regHours = initialHours
overHours = 0
grossPay = initialHours * payRate
grossPayAll += grossPay
overHoursAll += overHours
# heading3
print("{0:15s}{1:15s}{2:2d}{3:10d}{4:14d}{5:24.2f}".format(firstName, lastName, regHours, payRate, overHours, grossPay))
# space heading
print(" ")
# overall stats
print("{0:15s}{1:21.2f}".format("Total Gross Pay", grossPayAll))
print("{0:15s}{1:19.2f}".format("Average Gross Pay", grossPayAll / len(data)))
print("{0:15s}{1:16d}".format("Total Overtime Hours", overHoursAll))
Best regards, I am sorry for my English.
Well, I think, you probably want data2 = [word.rstrip("\n") for word in tmp], but without seeing sample input and desired output it's hard to tell.
Also,
first[2]=(int(first[2]))
first[3]=(int(first[3]))
initialHours = first[2]
payRate = first[3]
Could be:
initialHours = int(first[2])
payRate = int(first[3])
But you'd also need to change other references to first[2]
Finally, I'd change
if os.path.isfile(fileQuestion) == True:
file = open('emps', 'r')
data = file.readlines()
for tmp in data:
to:
if os.path.isfile(fileQuestion) == True:
with open('emps', 'r') as myfile:
for tmp in myfile:
This ensures that the file gets closed properly (your code doesn't close it), and iterates directly through the file, rather than using readlines() which needlessly reads the entire file to memory before doing enything else. Note that file is a python builtin, so a bad choice of variable name.
You are using these lines:
data = file.readlines()
for tmp in data:
which already splits your data into lines, and iterates through them. That means that this line [data2= [word.rstrip("\n") for word in data]] is setting data2 to be the first line EVERY TIME, which renders the original for loop useless.
Try instead:
tmp = tmp.split()
which will split each line as you iterate, you can now call tmp as a list, like you called first except it will reflect the values for each line.
You could also change your original for loop to:
for tmp in file:
since file objects in python are generators that yield each line (this saves you some memory space)
try these changes:
totothrs = 0
totgross = 0.0
employees = 0
for tmp in data:
employees += 1
fname, lname, rate, hrs = tm.split()
hrs = int(hrs)
rate = float(rate)
othrs = 0
if hrs > 40:
othrs = hrs - 40
hrs = hrs - othrs
totothrs += othrs
gross = rate * hrs + (1.5*rate)*othrs
totgross += gross
heading3= "{0:15s}{1:15s}{2:2d}{3:10d}{4:14d} {5:24.2f}".format(firstName, lastName, hrs, rate, othrs, gross)
print heading3
spaceHeading = " "
heading4= "{0:15s}{1:21.2f}".format("Total Gross Pay", totgross)
heading5= "{0:15s}{1:19.2f}".format("Average Gross Pay", (totgross/employees)
heading6= "{0:15s}{1:16d}".format("Total Overtime Hours", totothrs)
print heading4
print heading5
print heading6
Note: you dontneed to define the "headingN"'s you can just print them
import os.path
import sys
#fileQuestion = input("What is the name of your file?: ")
fileQuestion = "Testfile.txt"
heading1 = "{0:15s}{1:15s}{2:10s}{3:15s}{4:20s}{5:15s}".format("First Name", "Last Name", "Hours", "Payrate", "Overtime Hours", "Gross Pay")
heading2= "=============================================================================================================="
print(heading1)
print(heading2)
if os.path.isfile(fileQuestion) == True:
file_handle = open(fileQuestion, 'r')
#file = open('emps', 'r')
#data = file.readlines() I would't go for readline here
#file_handle2 = open('outupt.txt')
total_gross_pay = 0
number_of_employees = 0
average_gross_pay = 0
total_overtime = 0
standard_working_hours = 40
for i in file_handle:
data = i.rstrip().lstrip().split()
#print (data)
first_name, last_name, hours, payrate = data
hours = int(hours)
payrate = int(payrate)
basic_pay = hours * payrate
if(hours > standard_working_hours):
overtime = hours - standard_working_hours
overtime_premium = overtime * payrate
gross_pay = overtime_premium + basic_pay
else:
overtime = 0
gross_pay = basic_pay
total_overtime += overtime
total_gross_pay += gross_pay
number_of_employees += 1
print("{0:15s}{1:15s}{2:10s}{3:15s}{4:20s}{5:15s}".format(first_name, last_name, str(hours), str(payrate), str(overtime), str(gross_pay)))
print('\n')
print("Total Gross Pay: ",total_gross_pay)
print("Average Gross Pay: ",total_gross_pay/number_of_employees)
print("Total overtime: ",total_overtime)
The Code Below I wrote takes input from a sample file which contains First and Last names. Then it converts those names to sample emails. For some reason the Script keeps printing the same Last name over and over.
namess.txt looks like this:
firstname,lastname
CODE:
import os, re, time, getpass, linecache
Original = os.path.join(os.path.expanduser('~'), 'Desktop','namess.txt')
File = os.path.join(os.path.expanduser('~'), 'Desktop','output.txt')
badNames = []
Names = []
def RemCommas():
outfile = open(os.path.join('C:\\', 'Users', getpass.getuser(), 'Desktop','output.txt'),'w')
Filedata = open(Original).read()
outfile.write(re.sub(',', ' ', Filedata))
outfile.close()
def ClassNum():
count = 6
Year = int(time.strftime('%Y'))
Class = str((Year - 2013) + 6)
return Class
def ReadStoreFile():
i = 0
OpenFile = open(File)
LenFile = len(OpenFile.readlines())
while i < LenFile:
i += 1
badNames.append(linecache.getline(File, i))
def CleanNames():
i = 0
while i < len(badNames):
cleaned = badNames[i].rstrip()
Names.append(cleaned)
i += 1
def NamePrint():
Interns = 'makchessclub.org'
arrayname = []
i = 0
j = 0
m = 0
while m < len(Names):
Name = Names[m]
Name = Name.lower()
InternName = Name[0] + Name[1]
#------------Checking for space and first name--
while i < len(Name):
if Name[i] == ' ':
i = Name.index(' ')
break;
i += 1
#---------------adding last name in an array----
Namelen = len(Name) - (i+1)
while j < Namelen:
arrayname.append(Name[i+1])
j += 1
i += 1
#---------------Final Name Print----------------
Lastname = ''.join(arrayname)
#print arrayname
#Lastname = Lastname.strip(' ')
#print InternName + Lastname + ClassNum() + Interns
file = open('C:\\Users\\username\\Desktop\\emails.txt', 'a')
file.write(InternName + Lastname + ClassNum() + Interns + '\n')
file.close()
m += 1
RemCommas()
ReadStoreFile()
CleanNames()
NamePrint()
print ''
os.system('pause')
The reason the last name doesn't change is because you are not resetting arrayname in your loop. You keep appending names to it, and the program picks the first one. So you should put your arrayname = [] after the while m < len(Names):
I guess this what you are trying to do:
import os
import re
import time
def create_mails(input_path, output_path, year, addr):
with open(input_path, 'r') as data:
mail = re.sub(r'(\w+)\s*,\s*(\w+)\n?', r'\1\g<2>%s%s\n' % (year, addr), data.read())
with open(output_path, 'w') as output:
output.write(mail.lower())
print 'Mail addresses generated and saved to', output_path
Demo:
create_mails(
os.path.join(os.path.expanduser('~'), 'Desktop', 'namess.txt'),
os.path.join(os.path.expanduser('~'), 'Desktop', 'output.txt'),
str(int(time.strftime('%Y')) - 2013 + 6),
'#makchessclub.org'
)
If namess.txt is something like this:
First, Last
John,Doe
Spam, Ham
Cabbage, egg
Then output.txt is going to be like this:
firstlast6#makchessclub.org
johndoe6#makchessclub.org
spamham6#makchessclub.org
cabbageegg6#makchessclub.org