Splitting data in file in Python

Splitting data in file in Python - python

class Student:
def __init__(self, name, hours, qpoints):
self.name = name
self.hours = float(hours)
self.qpoints = float(qpoints)
def getName(self):
return self.name
def getHours(self):
return self.hours
def getQPoints(self):
return self.qpoints
def gpa(self):
return self.qpoints/self.hours
def makeStudent(infoStr):
name, hours, qpoints = infoStr.split("\t")
return Student(name, hours, qpoints)
def main():
fileName = input("Enter file name: ")
infile = open(fileName, "r")
best = makeStudent(infile.readline())
for line in infile:
s = makeStudent(line)
if s.gpa() > best.gpa():
best = s
infile.close()
print("The best student is:", best.getName())
print("hours:", best.getHours())
print("GPA:", best.gpa())
if __name__ == '__main__':
main()
I want to read line from a text file, split it in by "\t" or "," so I can assign it to variables, and I get "ValueError: not enough values to unpack (expected 3, got 1) in makeStudent(infoStr) function. File I use is written correctly, I get same error if I edit file and code to "," instead of "\t". Why is that happening? Edit: Issue was in skipping lines in text. Solved.

Sometimes the infoStr line may not contain the character you're splitting on (e.g. a blank line ''). Wrap this in a try block and you should be fine.
try:
name, hours, qpoints = infoStr.split('\t')
except ValueError:
name, hours, qpoints = None, None, None
You'll then need to handle the None case before instantiating Student.

I bet this is a classic tabs vs. spaces problem. Your file might actually be space separated due to IDE formatting or search and replace going haywire.
Try this:
def makeStudent(infoStr):
FAKE_TAB = ' '
name, hours, qpoints = infoStr.split(FAKE_TAB)
return Student(name, hours, qpoints)
If that doesn't work, determine how many spaces are between each value in each line manually and then replace FAKE_TAB with that. Admittedly, its a slightly sketchy patch...

Notice that you are already iterating over the file lines with the block starting in for line in infile, so there's no need to do infile.readline() within.
Also you could check your line format before sending it to your function (or checking the format in the fuction, whatever you prefer).
{truncated code}
# This loop will put on each iteration the next line of the file in the "line" var.
for line in infile:
# You need two commas in your line to be able to split it in 3 values.
if line.count(",") != 2:
print("WARN: Invalid format in line: "+line)
# Of course that you could implement some counter to identify
# the problematic line location within the file...
quit()
s = makeStudent(line)
if s.gpa() > best.gpa():
best = s
{truncated code}

Related

Replacing multiple words in a string from different data sets in Python

Essentially I have a python script that loads in a number of files, each file contains a list and these are used to generate strings. For example: "Just been to see $film% in $location%, I'd highly recommend it!" I need to replace the $film% and $location% placeholders with a random element of the array of their respective imported lists.
I'm very new to Python but have picked up most of it quite easily but obviously in Python strings are immutable and so handling this sort of task is different compared to other languages I've used.
Here is the code as it stands, I've tried adding in a while loop but it would still only replace the first instance of a replaceable word and leave the rest.
#!/usr/bin/python
import random
def replaceWord(string):
#Find Variable Type
if "url" in string:
varType = "url"
elif "film" in string:
varType = "film"
elif "food" in string:
varType = "food"
elif "location" in string:
varType = "location"
elif "tvshow" in string:
varType = "tvshow"
#LoadVariableFile
fileToOpen = "/prototype/default_" + varType + "s.txt"
var_file = open(fileToOpen, "r")
var_array = var_file.read().split('\n')
#Get number of possible variables
numberOfVariables = len(var_array)
#ChooseRandomElement
randomElement = random.randrange(0,numberOfVariables)
#ReplaceWord
oldValue = "$" + varType + "%"
newString = string.replace(oldValue, var_array[randomElement], 1)
return newString
testString = "Just been to see $film% in $location%, I'd highly recommend it!"
Test = replaceWord(testString)
This would give the following output: Just been to see Harry Potter in $location%, I'd highly recommend it!
I have tried using while loops, counting the number of words to replace in the string etc. however it still only changes the first word. It also needs to be able to replace multiple instances of the same "variable" type in the same string, so if there are two occurrences of $film% in a string it should replace both with a random element from the loaded file.

The following program may be somewhat closer to what you are trying to accomplish. Please note that documentation has been included to help explain what is going on. The templates are a little different than yours but provide customization options.
#! /usr/bin/env python3
import random
PATH_TEMPLATE = './prototype/default_{}s.txt'
def main():
"""Demonstrate the StringReplacer class with a test sting."""
replacer = StringReplacer(PATH_TEMPLATE)
text = "Just been to see {film} in {location}, I'd highly recommend it!"
result = replacer.process(text)
print(result)
class StringReplacer:
"""StringReplacer(path_template) -> StringReplacer instance"""
def __init__(self, path_template):
"""Initialize the instance attribute of the class."""
self.path_template = path_template
self.cache = {}
def process(self, text):
"""Automatically discover text keys and replace them at random."""
keys = self.load_keys(text)
result = self.replace_keys(text, keys)
return result
def load_keys(self, text):
"""Discover what replacements can be made in a string."""
keys = {}
while True:
try:
text.format(**keys)
except KeyError as error:
key = error.args[0]
self.load_to_cache(key)
keys[key] = ''
else:
return keys
def load_to_cache(self, key):
"""Warm up the cache as needed in preparation for replacements."""
if key not in self.cache:
with open(self.path_template.format(key)) as file:
unique = set(filter(None, map(str.strip, file)))
self.cache[key] = tuple(unique)
def replace_keys(self, text, keys):
"""Build a dictionary of random replacements and run formatting."""
for key in keys:
keys[key] = random.choice(self.cache[key])
new_string = text.format(**keys)
return new_string
if __name__ == '__main__':
main()

The varType you are assigning will be set in only one of your if-elif-else sequence and then the interpreter will go outside. You would have to run all over it and perform operations. One way would be to set flags which part of sentence you want to change. It would go that way:
url_to_change = False
film_to_change = False
if "url" in string:
url_to_change = True
elif "film" in string:
film_to_change = True
if url_to_change:
change_url()
if film_to_change:
change_film()
If you want to change all occurances you could use a foreach loop. Just do something like this in the part you are swapping a word:
for word in sentence:
if word == 'url':
change_word()
Having said this, I'd reccomend introducing two improvements. Push changing into separate functions. It would be easier to manage your code.
For example function for getting items from file to random from could be
def load_variable_file(file_name)
fileToOpen = "/prototype/default_" + file_name + "s.txt"
var_file = open(fileToOpen, "r")
var_array = var_file.read().split('\n')
var_file.clos()
return var_array
Instead of
if "url" in string:
varType = "url"
you could do:
def change_url(sentence):
var_array = load_variable_file(url)
numberOfVariables = len(var_array)
randomElement = random.randrange(0,numberOfVariables)
oldValue = "$" + varType + "%"
return sentence.replace(oldValue, var_array[randomElement], 1)
if "url" in sentence:
setnence = change_url(sentence)
And so on. You could push some part of what I've put into change_url() into a separate function, since it would be used by all such functions (just like loading data from file). I deliberately do not change everything, I hope you get my point. As you see with functions with clear names you can write less code, split it into logical, reusable parts, no needs to comment the code.

A few points about your code:
You can replace the randrange with random.choice as you just
want to select an item from an array.
You can iterate over your types and do the replacement without
specifying a limit (the third parameter), then assign it to the same object, so you keep all your replacements.
readlines() do what you want for open, read from the file as store the lines as an array
Return the new string after go through all the possible replacements
Something like this:
#!/usr/bin/python
import random
def replaceWord(string):
#Find Variable Type
types = ("url", "film", "food", "location", "tvshow")
for t in types:
if "$" + t + "%" in string:
var_array = []
#LoadVariableFile
fileToOpen = "/prototype/default_" + varType + "s.txt"
with open(fname) as f:
var_array = f.readlines()
tag = "$" + t + "%"
while tag in string:
choice = random.choice(var_array)
string = string.replace(tag, choice, 1)
var_array.remove(choice)
return string
testString = "Just been to see $film% in $location%, I'd highly recommend it!"
new = replaceWord(testString)
print(new)

NameError in Python - not sure if the class or the function is causing the error

I have a text file describing resumes where each line looks like:
name university sex filename
So one line would say something like
John Texas M resume1.doc
The file has standard formatting and does not contain any errors. There are four possible names and four possible universities, randomized to create 64 resumes. I'm trying to write a program that reads through the text file, creates a resume object with attributes for the name, university, sex, and filename, and adds these objects to a list of resume objects. I have a lot of experience in C++, but this is my first Python program and I'm getting thrown off by an error:
File "mycode.py", line 142, in <module>
resumes()
File "mycode.py", line 65, in resumes
r = resume(name,uni,sex,filename)
NameError: global name "name" is not defined
My code looks like:
class resume:
def __init__(self, name, uni, sex, filename)
self.name = name
self.uni = uni
self.sex = sex
self.filename = filename
mylist[]
def resumes():
f = open("resumes.txt",'r')
for line in f:
for word in line.split():
if word == ("John" or "Fred" or "Jim" or "Michael"):
name = word
elif word == ("Texas" or "Georgia" or "Florida" or "Montana"):
uni = word
elif word == "M":
sex = word
elif re.match(r'\w\.doc',word):
filename = word
r = resume(name,uni,sex,filename)
mylist.insert(r)
I'm not sure if the error is in the class or the function. My computer isn't showing any syntax errors but I'm new to this so if there are, please feel free to tell me how to fix them.
I've tried defining name, uni, etc. outside the "for word in line.split()" loop but the program still had an issue with the line "r = resume(name,uni,sex,filename)" so I'm not sure what the issue is. I've read through other answers about NameError but I'm new to Python and couldn't figure out the equivalent problem in my code.

The NameError is caused by undefined variables in cases where no values are found in the text file. Define them within the function before you try to assign values from the text file to them:
def resumes():
f = open("resumes.txt",'r')
for line in f:
name = ""
uni = ""
sex = ""
filename = ""
for word in line.split():
...
You can also pre-define the variables in your class initialization by using keyword arguments if you like (this isn't the cause of the NameError though):
class resume:
def __init__(self, name="", uni="", sex="", filename="")
self.name = name
self.uni = uni
self.sex = sex
self.filename = filename
Defining a list in python is done by typing mylist = [], not mylist[]. Also, at the moment, the list would be defined in the global namespace which is generally discouraged. Instead, you can make resumes return a list and assign this value to mylist:
def resumes():
resume_list = []
f = open("resumes.txt",'r')
for line in f:
for word in line.split():
if word == ("John" or "Fred" or "Jim" or "Michael"):
name = word
elif word == ("Texas" or "Georgia" or "Florida" or "Montana"):
uni = word
elif word == "M":
sex = word
elif re.match(r'\w\.doc',word):
filename = word
r = resume(name,uni,sex,filename)
resume_list.insert(r)
return resume_list
Then you can do the following anywhere in your code:
mylist = resumes()
Remember to close files after opening them; in your case by calling f.close() after processing all the lines. Even better, have python manage it automatically by using the context manager with so you don't have to call f.close():
def resumes():
with open("resumes.txt",'r') as f:
for line in f:
...
Typically, you'd use append rather than insert when working with lists. insert takes two arguments (position/index, and the element to insert) so mylist.insert(r) should raise a TypeError: insert() takes exactly 2 arguments (1 given). Instead, do mylist.append(r) to insert r after the last element in the list.
As, johnrsharpe pointed out in the comments, your word comparisons probably aren't doing what you expect. See this example:
>>> word = "John"
>>> word == ("John" or "Fred" or "Jim" or "Michael")
True
>>> word = "Fred"
>>> word == ("John" or "Fred" or "Jim" or "Michael")
False
>>>
Instead, use a tuple or a set and the keyword in to check if word equals any of the four names:
>>> word = "John"
>>> word in {"John", "Fred", "Jim", "Michael"}
True
>>> word = "Fred"
>>> word in {"John", "Fred", "Jim", "Michael"}
True
>>>
>>> type({"John", "Fred", "Jim", "Michael"})
<type 'set'>
>>>
Finally, as Daniel pointed out, remember the colon, :, after function definitions such as def __init__(...)

Your code is throwing a NameError because at some point in the iteration of your file, some word variable doesn't fulfill any of the conditionals in this line of your function: if word == ("John" or "Fred" or "Jim" or "Michael"):, and name doesn't get defined.
The simplest way to workaround this error is to assign default values to your variables outside the scopes of your class and function (or within the scope of your function):
name = "name"
uni = "uni"
sex = "sex"
filename = "filename"
class resume:
# rest of your code
As an alternative, you could include conditional checks within your function for your variables; if the variable isn't yet defined, assign it a default value:
if "name" not in locals():
name = "name"
r = resume(name,uni,sex,filename)
Finally, you'll want to append a colon to this line, from this:
def __init__(self, name, uni, sex, filename)
to this:
def __init__(self, name, uni, sex, filename):
change this line where you intialize mylist from this:
mylist[]
to this:
mylist = []
and change:
mylist.insert(r)
to:
mylist.append(r)

Saving name, number and alias from a dynamic phonebook to a file

I'm making a dynamic phone-book where you can save name, number and alias. When you are done you can choose too save everything to a file, this is where the problem comes. I've figured out how to save my names and numbers, but not the aliases.
this is a piece of the function where i save my alias:
def main()
..stuff
def alias(person_list, input_list):
..stuff..
for persons in list(person_list):
..stuff..
person_list[person_list.index(persons)].addAlias(newname)
print "Alias saved"
the methods i use:
class person():
..stuff..
def addAlias(self, alias):
self.alias.append(alias)
def hasAlias(self, alias):
if alias in self.alias:
return True
else:
return False
this is the function where i want to save everything:
def save(input_list, person_list):
filename = input_list[1]
f = open(filename, "w")
for i in range(0, len(person_list)):
line = person_list[i].number + ";" + person_list[i].name + ";" + "\n"
f.write(line,)
f.close
I can find out if the element person_list[i] has an alias with the method hasAlias, but I can't figure out how I can get out the value alias, not just True and False, and print that together with the name and number.

Your problem description isn't quite clear enough for me to be sure what you want. Please refer to MCVE.
I think that you're merely missing a method to retrieve the alias, such as
getAlais(self):
return self.alias
Then you simply include that call in your output line. If this isn't what you mean by "get out the value alias", then please clarify.

Why does my program add ('', ' to the name of my file?

Here is my code (sorry for the messy code):
def main():
pass
if __name__ == '__main__':
main()
from easygui import *
import time
import os
import random
import sys
##multenterbox(msg='Fill in values for the fields.', title=' ', fields=(), values=())
msg = "Enter your personal information"
title = "Credit Card Application"
fieldNames = ["First name",'Last name','email',"Street Address","City","State","ZipCode",'phone','phone 2)']
fieldValues = [] # we start with blanks for the values
fieldValues = multenterbox(msg,title, fieldNames)
# make sure that none of the fields was left blank
def make(x):
xys = x,".acc"
xyzasd = str(xys)
tf = open(xyzasd,'a+')
tf.writelines(lifes)
tf.writelines("\n")
tf.writelines("credits = 0")
tf.close
def add(x):
nl = "\n"
acc = ".acc"
xy = x + acc
exyz = xy
xyz = exyz
xxx = str(xyz)
tf = open('accounts.dat',"a+")
tf.writelines(nl)
tf.writelines(xxx)
tf.close
while 1:
if fieldValues == None: break
errmsg = ""
for i in range(len(fieldNames)-1):
if fieldValues[i].strip() == "":
errmsg += ('"%s" is a required field.\n\n' % fieldNames[i])
if errmsg == "":
break # no problems found
fieldValues = multenterbox(errmsg, title, fieldNames, fieldValues)
names = enterbox(msg= ('confirm FIRST name and the FIRST LETTER of the persons LAST name'))
##txt = "acc"
##na = str(name)
##name = (names)
life = ( str(fieldValues))
lifes = life,'\n'
herro = ("Reply was: %s" % str(fieldValues))
correct = buttonbox(msg=(herro,'\n is that correct'),choices = ('yes','no','cancel'))
if correct == "yes":
make(names)
add(names)
elif correct == "no":
os.system('openacc.py')
time.sleep(0.5)
sys.exit()
else:
os.system('cellocakes-main.py')
sys.exit()
os.system('cellocakes-main.py')
I don't know what the problem is also I am sorry about how sloppy it was programmed I have a white board to help me out still new to programming (I'm only 13) sorry. Personally I think the issue is in the def add area's syntax but because I am still new I don't see the issue personally I am hoping to have a more experienced programmer help me out.

This is an answer not directly answering your question.
Alas, comment fields are STILL not capable to hold formatted code, so I choose this way.
def main():
pass
if __name__ == '__main__':
main()
This is a nice coding pattern, but used by you in a useless way.
It is supposed to prevent executing of the stuff if it is imported as a module and not executed as a script.
Nevertheless, it is not bad to use it always, but then put your code inside the main() function instead of adding it below.
fieldNames = ["First name",'Last name','email',"Street Address","City","State","ZipCode",'phone','phone 2)']
There is a ) too much.
fieldValues = [] # we start with blanks for the values
fieldValues = multenterbox(msg,title, fieldNames)
The second line makes the first one useless, as you don't use fieldValues in-between.
It would be different if you expected multenterbox() to fail and would want [] as a default value.
def make(x):
xys = x,".acc"
xyzasd = str(xys)
tf = open(xyzasd,'a+')
tf.writelines(lifes)
tf.writelines("\n")
tf.writelines("credits = 0")
tf.close
You was already told about this: x, ".acc" creates a tuple, not a string. To create a string, use x + ".acc".
Besides, your close call is no call, because it is missing the (). This one just references the function and ignores the value.
A better way to write this would be (please name your variables appropriately)
with open(xyzs, 'a+') as tf:
tf.writelines(lifes)
tf.writelines("\n")
tf.writelines("credits = 0")
The with statement automatically closes the file, even if an error occurs.
Besides, you use writelines() wrong: it is supposed to take a sequence of strings and write each element to the file. As it doesn't add newlines in-between, the result looks the same,. but in your case, it writes each byte separately, making it a little bit more inefficient.
Additionally, you access the global variable lifes from within the function. You should only do such things if it is absolutely necessary.
def add(x):
Here the same remarks hold as above, plus
xy = x + acc
exyz = xy
xyz = exyz
xxx = str(xyz)
why that? Just use xy; the two assignments do nothing useful and the str() call is useless as well, as you already have a string.
for i in range(len(fieldNames)-1):
if fieldValues[i].strip() == "":
errmsg += ('"%s" is a required field.\n\n' % fieldNames[i])
Better:
for name, value in zip(fieldNames, fieldValues):
if not value.strip(): # means: empty
errmsg += '"%s" is a required field.\n\n' % name
Then:
life = ( str(fieldValues))
makes a string from a list.
lifes = life,'\n'
makes a tuple from these 2 strings.
os.system('openacc.py')
os.system('cellocakes-main.py')
Please don't use os.system(); it is deprecated. Better use the subprocess module.

The problem of the question is here:
# assign the tuple (x, ".acc") to xys
xys = x,".acc"
# now xyzasd is the tuple converted to a string, thus
# making the name of your file into '("content of x", ".acc")'
xyzasd = str(xys)
# and open file named thus
tf = open(xyzasd,'a+')
What you wanted to do is:
# use proper variable and function names!
def make_account(account):
filename = account + '.acc'
the_file = open(filename, 'a+')
....
On the other hand there are other problems with your code, for example the
def main():
pass
if __name__ == '__main__':
main()
is utterly useless.

"Backspace" over last character written to file

I have a Python application which outputs an SQL file:
sql_string = "('" + name + "', " + age + "'),"
output_files['sql'].write(os.linesep + sql_string)
output_files['sql'].flush()
This is not done in a for loop, it is written as data becomes available. Is there any way to 'backspace' over the last comma character when the application is done running, and to replace it with a semicolon? I'm sure that I could invent some workaround by outputting the comma before the newline, and using a global Bool to determine if any particular 'write' is the first write. However, I think that the application would be much cleaner if I could just 'backspace' over it. Of course, being Python maybe there is such an easier way!
Note that having each insert value line in a list and then imploding the list is not a viable solution in this use case.

Use seek to move your cursor one byte (character) backwards, then write the new character:
f.seek(-1, os.SEEK_CUR)
f.write(";")
This is the easiest change, maintaining your current code ("working code" beats "ideal code") but it would be better to avoid the situation.

How about adding the commas before adding the new line?
first_line = True
...
sql_string = "('" + name + "', " + age + "')"
if not first_line:
output_files['sql'].write(",")
first_line = False
output_files['sql'].write(os.linesep + sql_string)
output_files['sql'].flush()
...
output_files['sql'].write(";")
output_files['sql'].flush()
You did mention this in your question - I think this is a much clearer to a maintainer than seeking commas and overwriting them.
EDIT: Since the above solution would require a global boolean in your code (which is not desirable) you could instead wrap the file writing behaviour into a helper class:
class SqlFileWriter:
first_line = True
def __init__(self, file_name):
self.f = open(file_name)
def write(self, sql_string):
if not self.first_line:
self.f.write(",")
self.first_line = False
self.f.write(os.linesep + sql_string)
self.f.flush()
def close(self):
self.f.write(";")
self.f.close()
output_files['sql'] = SqlFileWriter("myfile.sql")
output_files['sql'].write("('" + name + "', '" + age + "')")
This encapsulates all the SQL notation logic into a single class, keeping the code readable and at the same time simplifying the caller code.

Try opening the file to write as binary: 'wb' instead of 'w'.

Use generators, e.g.:
def with_separator(data, sep):
first = True:
for datum in data:
if first:
first = False
else:
yield sep
yield datum
with open("sdfasdfas", "w") as outf:
for x in with_separator(sql_get_rows(), ",\n"):
outf.write(x)
# flush if needed
For hardcore iterator use, this should get you started:
In [11]: list( itertools.imap("".join, itertools.izip(itertools.chain([""], itertools.repeat(",\n")), "abc")) )
Out[11]: ['a', ',\nb', ',\nc']
If your data uses imperative API, that is not iterable, send() your data to generator:
def write_with_separator(filename, sep):
with file(filename, "w"):
first = True
yield None
while True:
datum = yield None
if first:
first = False
else:
fout.write(sep)
fout.write(datum)
# flush if needed
writer = write_with_separator("somefilename", ",\n")
writer.next() # can't send to just-started generator
# to be called when you get data
for row in sql_get_rows():
writer.send(row)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Splitting data in file in Python - python

Related

Replacing multiple words in a string from different data sets in Python

NameError in Python - not sure if the class or the function is causing the error

Saving name, number and alias from a dynamic phonebook to a file

Why does my program add ('', ' to the name of my file?

"Backspace" over last character written to file

Categories

Resources