Rewrite the code without using objects in python - python

When not using objects i am finding it difficult to store and parse through the data. Looking for ways where i can shorten the code using generator comprehension.
for a problem where input to the program code is as below
Courses
TRAN~Transfiguration~1~2011-2012~Minerva McGonagall
CHAR~Charms~1~2011-2012~Filius Flitwick
Students
SLY2301~Hannah Abbott
SLY2302~Euan Abercrombie
SLY2303~Stewart Ackerley
SLY2304~Bertram Aubrey
SLY2305~Avery
SLY2306~Malcolm Baddock
SLY2307~Marcus Belby
SLY2308~Katie Bell
SLY2309~Sirius Orion Black
Grades
TRAN~1~2011-2012~SLY2301~AB
TRAN~1~2011-2012~SLY2302~B
TRAN~1~2011-2012~SLY2303~B
TRAN~1~2011-2012~SLY2305~A
TRAN~1~2011-2012~SLY2306~BC
TRAN~1~2011-2012~SLY2308~A
TRAN~1~2011-2012~SLY2309~AB
CHAR~1~2011-2012~SLY2301~A
CHAR~1~2011-2012~SLY2302~BC
CHAR~1~2011-2012~SLY2303~B
CHAR~1~2011-2012~SLY2305~BC
CHAR~1~2011-2012~SLY2306~C
CHAR~1~2011-2012~SLY2307~B
CHAR~1~2011-2012~SLY2308~AB
EndOfInput
Expected Output
SLY2301~Hannah Abbott~9.5
SLY2302~Euan Abercrombie~7.5
SLY2303~Stewart Ackerley~8.0
SLY2304~Bertram Aubrey~0
SLY2305~Avery~8.5
SLY2306~Malcolm Baddock~6.5
SLY2307~Marcus Belby~8.0
SLY2308~Katie Bell~9.5
SLY2309~Sirius Orion Black~9.0
I managed to solve it using objects but is there any other way to write the code without using objects?
import sys
from Courses.Courses import Course
from Students.Students import Student
from Grades.Grades import Grade
courses = []
students = []
grades = []
gradeDict = {'A':10,'AB':9,'B':8,'BC':7,'C':6,'CD':5,'D':4}
courseCodeDict = {}
def readInput():
isSectionStart=True
while True:
# Reading data from console
input_var = raw_input()
if "EndOfInput" != input_var:
if input_var in "Courses Students Grades":
section=input_var
isSectionStart=True
else:
isSectionStart=False
if not isSectionStart:
extractDataFromRawData(input_var,section)
else:
break;
#printData(courses,students,grades)
calculateGradeAverage(grades,courses)
def calculateGradeAverage(grades,courses):
print("Calculating Average now...")
gradeRollNumberDict={}
courseGradeDict={}
gradesSet = {}
for course in courses:
courseCodeDict.update({course.course_code : 1})
for grade in grades:
if gradeRollNumberDict.get(grade.roll_number) == None:
grade.totalGradePoint = grade.grade
gradeRollNumberDict.update({grade.roll_number : grade.totalGradePoint})
else:
grade.totalGradePoint= grade.grade + gradeRollNumberDict.get(grade.roll_number)
gradeRollNumberDict.update({grade.roll_number : grade.totalGradePoint})
if courseGradeDict.get(grade.roll_number) == None:
grade.totalCourseTaken = courseCodeDict.get(grade.course_code)
courseGradeDict.update({grade.roll_number : courseCodeDict.get(grade.course_code)})
else:
grade.totalCourseTaken= courseCodeDict.get(grade.course_code) + courseGradeDict.get(grade.roll_number)
courseGradeDict.update({grade.roll_number : grade.totalCourseTaken})
for grade in grades:
grade.avgGrade = grade.totalGradePoint/grade.totalCourseTaken
grade.avgGrade = round(grade.avgGrade)
seenGrades = set()
uniqueGrades = []
grades.reverse()
for grade in grades:
if grade.roll_number not in seenGrades:
uniqueGrades.append(grade)
seenGrades.add(grade.roll_number)
#uniqueGrades.reverse()
for a in uniqueGrades:
print(a.roll_number)
#print(uniqueGrades)
grades=uniqueGrades
grades.sort(key=lambda grade:grade.roll_number)
for grade in grades:
print("RollNumber: {0} \t Total CourseTaken: {1} \t Total Grade Point: {2} \t Avg Grade: {3}".format(grade.roll_number,grade.totalCourseTaken,grade.totalGradePoint,grade.avgGrade))
def extractDataFromRawData(input_data,section):
if "Courses" == section:
courses.append(createCourseObject(input_data))
elif "Students" == section:
students.append(createStudentObject(input_data))
elif "Grades" == section:
grades.append(createGradeObject(input_data))
else:
print("Invalid input!!! Exiting the system...")
sys.exit()
def createCourseObject(input_data):
courseInputData = input_data.split("~")
course = Course(courseInputData[0],courseInputData[1],courseInputData[2],courseInputData[3],courseInputData[4])
return course
def createStudentObject(input_data):
studentInputData = input_data.split("~")
student = Student(studentInputData[0],studentInputData[1])
return student
def createGradeObject(input_data):
gradeInputData = input_data.split("~")
grade = Grade(gradeInputData[0],gradeInputData[1],gradeInputData[2],gradeInputData[3],gradeDict[gradeInputData[4]])
return grade
def printData(courses,students,grades):
printObject(courses,"Courses")
printObject(students,"Students")
printObject(grades,"Grades")
def printObject(list,object):
print("Printing %s"%object)
for data in list:
print(data)
if __name__ == '__main__':
readInput()

Code:
from collections import OrderedDict
from pprint import pprint as pp
SEPARATOR = "~"
GRADE_DICT = {
"A": 10,
"AB": 9,
"B": 8,
"BC": 7,
"C": 6,
"CD": 5,
"D": 4
}
def read_input_from_file(file_name="input.txt"):
course_list= list()
student_list = list()
grade_list = list()
section_map = {
"Courses": course_list,
"Students": student_list,
"Grades": grade_list,
}
with open(file_name) as f:
current_item = None
for line in f:
line = line.strip()
if line in section_map:
current_item = section_map[line]
elif line == "EndOfInput":
break
elif current_item is not None:
current_item.append(line)
else:
print("Ignoring line: {}".format(line))
return course_list, student_list, grade_list
def convert_names(name_list):
ret = OrderedDict()
for element in name_list:
id, name = element.split(SEPARATOR)
ret[id] = name
return ret
def convert_grades(grade_list):
ret = dict()
for element in grade_list:
course_id, student_id, grade_id = element.rsplit(SEPARATOR, 2)
ret.setdefault(student_id, dict())[course_id] = grade_id
return ret
def main():
course_list, student_list, grade_list = read_input_from_file()
student_dict = convert_names(student_list)
print("\n[SECTION 0]: Student IDs and names:\n")
pp(student_dict)
exam_stat_dict = convert_grades(grade_list)
print("\n[SECTION 1]: Grades organized by students and courses:\n")
pp(exam_stat_dict)
print("\n[SECTION 2]: Final Grades:\n")
for student_id in student_dict:
if student_id in exam_stat_dict:
grade_dict = exam_stat_dict[student_id]
grades_sum = sum([GRADE_DICT.get(item, 0) for item in grade_dict.values()])
print(SEPARATOR.join([student_id, student_dict[student_id], str(grades_sum/len(grade_dict))]))
else:
print(SEPARATOR.join([student_id, student_dict.get(student_id), "0.0"]))
if __name__ == "__main__":
main()
Output (I'm placing it before the Notes, since I'm going to refer to it from there):
(py35x64_test) c:\Work\Dev\StackOverflow\q45987148>python a.py
[SECTION 0]: Student IDs and names:
OrderedDict([('SLY2301', 'Hannah Abbott'),
('SLY2302', 'Euan Abercrombie'),
('SLY2303', 'Stewart Ackerley'),
('SLY2304', 'Bertram Aubrey'),
('SLY2305', 'Avery'),
('SLY2306', 'Malcolm Baddock'),
('SLY2307', 'Marcus Belby'),
('SLY2308', 'Katie Bell'),
('SLY2309', 'Sirius Orion Black')])
[SECTION 1]: Grades organized by students and courses:
{'SLY2301': {'CHAR~1~2011-2012': 'A', 'TRAN~1~2011-2012': 'AB'},
'SLY2302': {'CHAR~1~2011-2012': 'BC', 'TRAN~1~2011-2012': 'B'},
'SLY2303': {'CHAR~1~2011-2012': 'B', 'TRAN~1~2011-2012': 'B'},
'SLY2305': {'CHAR~1~2011-2012': 'BC', 'TRAN~1~2011-2012': 'A'},
'SLY2306': {'CHAR~1~2011-2012': 'C', 'TRAN~1~2011-2012': 'BC'},
'SLY2307': {'CHAR~1~2011-2012': 'B'},
'SLY2308': {'CHAR~1~2011-2012': 'AB', 'TRAN~1~2011-2012': 'A'},
'SLY2309': {'TRAN~1~2011-2012': 'AB'}}
[SECTION 2]: Final Grades:
SLY2301~Hannah Abbott~9.5
SLY2302~Euan Abercrombie~7.5
SLY2303~Stewart Ackerley~8.0
SLY2304~Bertram Aubrey~0.0
SLY2305~Avery~8.5
SLY2306~Malcolm Baddock~6.5
SLY2307~Marcus Belby~8.0
SLY2308~Katie Bell~9.5
SLY2309~Sirius Orion Black~9.0
Notes:
This is a "slightly" modified version of your code, that only uses stuff from Python standard library
Code explanation:
read_input_from_file (since it's only a helper function, I'm not going to insist much on it):
I saved the input (copy/paste) in a file (called it input.txt), and every time the program runs, it loads the data from there (the reason is obvious)
It (populates and) returns 3 lists (curses, students and grades from your code)
convert_names:
Converts every student name entry (as given in input) into a dictionary*: {id: name} (e.g. "SLY2301~Hannah Abbott" -> {"SLY2301": "Hannah Abbott"}) - the key will be id
*Since in a regular Python dictionary ([Python]: Mapping Types — dict) the keys are ordered by their hash (the hash function can change between Python versions), there's almost 100% chance that the dictionary elements won't be stored in the order they were inserted (as an example you could type in the Python console {1:2, 0:1} and you'll see that it will output {0: 1, 1: 2}), I'm using [Python]: class collections.OrderedDict([items]) which ensures the key order
The return value can be seen in program output (SECTION 0)
convert_grades:
This is where (most of) the magic takes place
Converts every grade entry (as given in input) in a dictionary: {student_id : {course_id: grade_id}} (the last 2 values are aggregated in an inner dictionary; e.g. "TRAN~1~2011-2012~SLY2301~AB" -> {"SLY2301": {"TRAN~1~2011-2012": "AB"}}). For that, I'm using [Python]: str.rsplit(sep=None, maxsplit=-1) with a maxsplit value of 2, as I don't care about the ~s in TRAN~1~2011-2012
If a student_id is present more than once (was to more than 1 course exam), I am just adding the course_id and grade_id in the inner dictionary (this is where [Python]: setdefault(key[, default]) comes into play)
The return value can be seen in program output (SECTION 1)
main:
The program main function. Here, I'm making use of the other functions and display the final data in a proper manner to the user (SECTION 2)
If there was a student that wasn't at any exam, like Bertram Aubrey (the id is not present in the exam statistics dictionary), I just print the id, name and 0.0
Otherwise, I calculate the arithmetic average from the grades in the inner dictionary (I am using [Python]: list Comprehensions to convert the grades into actual numbers, [Python]: sum(iterable[, start]) to sum the grades, then I divide the total by the number of inner dictionary keys) and display it, together with the id and name
The code runs with Python3 and Python2
#EDIT0:
Adding read_input function (read_input_from_file with minimum and trivial modifications) to read input from keyboard:
def read_input():
course_list= list()
student_list = list()
grade_list = list()
section_map = {
"Courses": course_list,
"Students": student_list,
"Grades": grade_list,
}
current_item = None
while(1):
line = input()
if line in section_map:
current_item = section_map[line]
elif line == "EndOfInput":
break
elif current_item is not None:
current_item.append(line)
else:
print("Ignoring line: {}".format(line))
return course_list, student_list, grade_list
Notes:
In order for this function to work with Python2, this code should be added at the beginning of the file:
import sys
if sys.version_info.major < 3:
input = raw_input
You can also use the input.txt file to test the code with large datasets (like provided in the question, without having to manually type all the data) like this:
python a.py < input.txt

#CristiFati using your code i managed to tweak to solve my problem as below.
from collections import OrderedDict
import sys
SEPARATOR = "~"
GRADE_DICT = {
"A": 10,
"AB": 9,
"B": 8,
"BC": 7,
"C": 6,
"CD": 5,
"D": 4
}
def read_input_from_file():
course_list= list()
student_list = list()
grade_list = list()
section_map = {
"Courses": course_list,
"Students": student_list,
"Grades": grade_list,
}
current_item = None
while True:
line = input()
#line = line.strip()
if line != "EndOfInput":
if line in section_map:
current_item = section_map[line]
elif current_item is not None:
current_item.append(line)
else:
print("Ignoring line: {}".format(line))
else:
break
return course_list, student_list, grade_list
def convert_names(name_list):
ret = OrderedDict()
for element in name_list:
id, name = element.split(SEPARATOR)
ret[id] = name
return ret
def convert_grades(grade_list):
ret = dict()
for element in grade_list:
course_id, student_id, grade_id = element.rsplit(SEPARATOR, 2)
ret.setdefault(student_id, dict())[course_id] = grade_id
return ret
def main():
course_list, student_list, grade_list = read_input_from_file()
student_list.sort()
student_dict = convert_names(student_list)
exam_stat_dict = convert_grades(grade_list)
for student_id in student_dict:
if student_id in exam_stat_dict:
grade_dict = exam_stat_dict[student_id]
grades_sum = sum([GRADE_DICT.get(item, 0) for item in grade_dict.values()])
print(SEPARATOR.join([student_id, student_dict[student_id], str(round(float(grades_sum/len(grade_dict)),2))]))
else:
print(SEPARATOR.join([student_id, student_dict.get(student_id), "0"]))
if __name__ == "__main__":
main()

Related

Python dictionary is ordered alphabetically - how do I order it chronologically?

I'm following a tutorial (uploaded oct. 2020) about web scraping and storing the data in a dictionary. Everything works fine except the data in my dictionary starts with the newest entry and ends with the first one.
Should: {title,........, budget}
Is: {budget,........, title}
What could be the resaon for this to happen?
Part of the code:
def get_content_value(row_data):
if row_data.find("li"):
return [li.get_text(" ", strip = True).replace("\xa0", " ") for li in row_data.find_all("li")]
else:
return row_data.get_text(" ", strip = True).replace("\xa0", " ")
movie_info = {}
for index, row in enumerate(info_rows):
if index == 0:
movie_info['title'] = row.find("th").get_text(" ", strip = True)
elif index == 1:
continue
else:
content_key = row.find("th").get_text(" ", strip = True)
content_value = get_content_value(row.find("td"))
movie_info[content_key] = content_value
movie_info
index == 0 is the title
index == 1 is a picture i don't want to have
EDIT:
It's not reversed, it's in alphabetical order! Why? And how can i change it to chronological order?
Dictionaries are inherently not sorted/ordered. This is unlike lists and tuples that are ordered. To get around this issue, the collections library has OrderedDict.
You can use something like this:
# Import the OrderedDict object from `collections`
from collections import OrderedDict
def get_content_value(row_data):
if row_data.find("li"):
return [li.get_text(" ", strip = True).replace("\xa0", " ") for li in row_data.find_all("li")]
else:
return row_data.get_text(" ", strip = True).replace("\xa0", " ")
# Instead of using a regular dictionary ("{}"), we set `movie_info` to be an OrderedDict
movie_info = OrderedDict()
for index, row in enumerate(info_rows):
if index == 0:
movie_info['title'] = row.find("th").get_text(" ", strip = True)
elif index == 1:
continue
else:
content_key = row.find("th").get_text(" ", strip = True)
content_value = get_content_value(row.find("td"))
movie_info[content_key] = content_value
movie_info
It's a bit tricky, but you can do it like below.
from collections import OrderedDict
dictionary = {'one': 1, 'two': 2, 'three':3}
res = OrderedDict(reversed(list(dictionary.items())))

python function Namerror: name 'dictionary' is not defined

I would like to load a file and convert it to a dictionary. then, would like to use the loaded file to display the data. user input for choice is between 1 and 2, in order to decide which function to run.
The catch is if I press 2 before 1 then it will display the message "dictionary is empty". Again, when I load the file and try to run display data, it is showing "Nameerror: name 'dictionary' is not defined"
The code is as below: (Thanks in advance)
def load_data():
import csv
open_data = open('file.csv', 'r')
datasets = csv.reader(open_data)
mydict = {row[0]:row[1:] for row in datasets}
return mydict
def display_data(my_dict):
ds = my_dict
if ds == {}:
print("dictionary is empty")
else:
for key, value in ds.items():
print(key)
print(value)
def main():
while True:
choice = int(input("select 1 or 2"))
if choice == 1:
my_dict = load_data()
print(my_dict)
elif choice == 2:
display_data(my_dict)
main()
First of all.The code you provided has many faults.
The key point is you should use a variable my_dict to store the dict you load or display the empty dict if type 2 before 1.
Try the code list below:
import csv
def load_data():
open_data = open('file.csv', 'r')
datasets = csv.reader(open_data)
mydict = {row[0]:row[1:] for row in datasets}
return mydict
def display_data(my_dict):
ds = my_dict
if ds == {}:
print("dictionary is empty")
else:
for key, value in ds.items():
print(key)
print(value)
def main():
my_dict = {}
while True:
choice = int(input("select 1 or 2"))
if choice == 1:
my_dict = load_data()
print(my_dict)
elif choice == 2:
display_data(my_dict)
main()
You have not defined any my_dict in line 24, that is why it is giving a name error.
You shall add the line :
my_dict = {}
If you want your desired output.
In choice 2 you have no data or haven't assigned any value to my_dict before calling display_data you should load those data in my_dict and pass it to the function
Here is the code
def load_data():
import csv
open_data = open('file.csv', 'r')
datasets = csv.reader(open_data)
mydict = {row[0]:row[1:] for row in datasets}
return mydict
def display_data(my_dict):
ds = my_dict
if ds == {}:
print("dictionary is empty")
else:
for key, value in ds.items():
print(key)
print(value)
def main():
while True:
choice = int(input("select 1 or 2"))
if choice == 1:
my_dict = load_data()
print(my_dict)
elif choice == 2:
my_dict = load_data()
display_data(my_dict)
main()

Average of a nested dictionary

I have a nested dictionary for a grade-book program that looks like this(this is just an example, it could be any number of students or tests):
workDictionary = {'kevin': {'Test1': 97, 'Test2': 84, 'Test3': 89},
''Bob':{'Test1': 67, 'Test2': 74, 'Test3': 59},
'carol':{'Test1': 47, 'Test2': 94, 'Test3': 79},
'ted':{'Test1': 67, 'Test2': 64, 'Test3': 99}}
And I want to get the average of the innermost values, for instance:
finalGrade = {}
for k,v in workDictionary.iteritems():
finalGrade[k] = sum(v)/ float(len(v))
There are other factors however, i'm using pickling and an undefined amount of students and tests. This is the full program:
# Modules
import pickle
def dumpPickle(fileName):
pickle.dump(workDictionary, open(fileName,'wb'))
return
def loadUnPickle(fileName):
global workDictionary
workDictionary = pickle.load(open(fileName, 'rb'))
return(workDictionary)
workDictionary = {}
keys = workDictionary.keys()
values = workDictionary.values()
def myMenu():
mySelect = -1
while mySelect != 0:
print("\n1. Open Dictionary File\n"+
"2. Create/Write to a Dictionary\n"+
"3. Add a New Student\n"+
"4. Find a Student's Scores\n"+
"5. Add a New Student Score\n"+
"6. Display Dictionary Data\n"+
"0. Exit\n"
)
mySelect = int(input("Enter Menu Number: "))
if mySelect == 1:
fileName = input("Enter file name")
print("\nyour file is now open")
loadUnPickle(fileName)
elif mySelect == 2:
fileName = input("please create a new file.")
print("\nyour new file is now open")
elif mySelect == 3:
newStudent = input("Enter the new student's name")
firstTest = input("Enter the name of the first test")
testGrade = input("Enter the new student's first grade")
addDictionary = {newStudent:{firstTest:testGrade}}
workDictionary.update(addDictionary)
print("\n" + newStudent + str(workDictionary[newStudent]))
dumpPickle(fileName)
elif mySelect == 4:
print("\nEnter student name")
myName = input()
for name in workDictionary:
if name == myName:
print("\n",workDictionary.get(myName))
elif mySelect == 5:
print("\nEnter student name ")
myName = input()
print("\nEnter assignment to add or update")
myValue = input()
for name in workDictionary:
if name == myName:
newGrade = input("Enter new Grade")
workDictionary[name][myValue]= newGrade
dumpPickle(fileName)
print("\n" + name + str(workDictionary[name]))
elif mySelect == 6:
print(workDictionary)
return
# Main Loop
I want to add another menu selection that takes the average of a certain student and displays it.
This is what i wrote, but you can rewrite it so it would fir better in your program:
def student_avg(student):
summ = 0
grades_num = 0
for test, grade in student.items():
summ += grade
# unless you aren't sure that grade would be a int, in which case add exception
grades_num += 1
average = summ / grades_num
return average
average = student_avg(workDict["kevin"])
You can use a Dict Comprehension
from statistics import mean
avg_grades = {name: mean(tests.values()) for (name, tests) in workDictionary.items()}
The result stored in avg_grades will be:
{'Bob': 66.66666666666667,
'carol': 73.33333333333333,
'kevin': 90.0,
'ted': 76.66666666666667}

Writing a standard deviation function

I have a dictionary of words as keys and ints as value. It outputs as such:
print (word_ratings_dict)
{'hate': [1, 2, 2, 1, 1, 3, 0, 2, 3, 2, 0, 4, 1, 1], 'joy': [3, 4, 3, 3, 2, 4, 1]}
For each key word in the dictionary, I need to calculate its standard deviation WITHOUT using the statistics module.
Heres what I have so far:
def menu_validate(prompt, min_val, max_val):
""" produces a prompt, gets input, validates the input and returns a value. """
while True:
try:
menu = int(input(prompt))
if menu >= min_val and menu <= max_val:
return menu
break
elif menu.lower == "quit" or menu.lower == "q":
quit()
print("You must enter a number value from {} to {}.".format(min_val, max_val))
except ValueError:
print("You must enter a number value from {} to {}.".format(min_val, max_val))
def open_file(prompt):
""" opens a file """
while True:
try:
file_name = str(input(prompt))
if ".txt" in file_name:
input_file = open(file_name, 'r')
return input_file
else:
input_file = open(file_name+".txt", 'r')
return input_file
except FileNotFoundError:
print("You must enter a valid file name. Make sure the file you would like to open is in this programs root folder.")
def make_list(file):
lst = []
for line in file:
lst2 = line.split(' ')
del lst2[-1]
lst.append(lst2)
return lst
def rating_list(lst):
'''iterates through a list of lists and appends the first value in each list to a second list'''
rating_list = []
for list in lst:
rating_list.append(list[0])
return rating_list
def word_cnt(lst, word : str):
cnt = 0
for list in lst:
for word in list:
cnt += 1
return cnt
def words_list(file):
lst = []
for word in file:
lst.append(word)
return lst
def word_rating(word, ratings_lst):
'''finds ratings for a word and appends them to a dictionary of words'''
lst = []
for line in ratings_lst:
line = line.split()
if word in line:
rating = line[0]
lst.append(int(rating))
return lst
cnt_list = []
while True:
menu = menu_validate("1. Get sentiment for all words in a file? \nQ. Quit \n", 1, 1)
if menu == True:
ratings_file = open("sample.txt")
ratings_list = make_list(ratings_file)
word_ratings_dict = {}
word_avg_dict = {}
std_dev_dict = {}
word_file = open_file("Enter the name of the file with words to score \n")
word_list = words_list(word_file)
for word in word_list:
#counts the words
cnt = word_cnt(ratings_list, word)
cnt_dict[word] = cnt
word_ratings_dict[word] = word_rating(word, ratings_list)
total_rating = 0
for i in range (0, cnt):
total_rating += word_ratings_dict[word][i]
word_avg_dict[word] = total_rating/cnt
std_dev_dict[word] =
These will do the job nicely:
def mean(data):
return float(sum(data) / len(data))
def variance(data):
mu = mean(data)
return mean([(x - mu) ** 2 for x in data])
def stddev(data):
return sqrt(variance(data))
or you can do it in one function :
data = []
def StdDev( data ):
suma = 0
for x in data: suma += ((x - sum(data)/len(data))**2) # every iteration subtracks the mean value of a list [sum(data)/len(data)] from every [x] element of a list and then raises to the second power [**2]
print((suma/(len(data)-1))**0.5) # then divide by n-1 [len(data)-1] and extract the root of 2 [**0.5]

Reading from raw_data.txt and writing to a results.txt file with processing in Python

This is my first post. I have a function that asks for the number of students. Then, for each student, first three lines contain the following information: Student ID, name, and number of courses taken that semester. Now, for each course, the course number, credit hour, and the percentage of scores earned by the student is listed.
def rawdata():
semester = 1
if semester == 1:
raw_file = open('raw_data.txt', 'a')
total = 0.0
total2 = 0.0
num_students = int(input('Enter number of students: '))
for student in range(num_students):
raw_file.write('Start' + '\n')
student_id = input('Enter student ID: ')
name = input('Enter Name: ')
num_classes = int(input('Enter number of courses taken: '))
raw_file.write(student_id + '\n')
raw_file.write(name + '\n')
raw_file.write(str(num_classes) + '\n')
for classes in range(num_classes):
course_number = input('Enter Course Number: ')
credits = int(input('Enter Credit Hours: '))
GPA1 = float(input('Enter percentage grade for class: '))
raw_file.write(course_number + '\n')
raw_file.write(str(credits) + '\n')
raw_file.write(str(GPA1) + '\n')
total += credits
raw_file.write('End' + '\n')
raw_file.close()
print('Data has been written')
All the data is listed to a txt file and now I need to PULL this information from my raw_data.txt which looks like(varies with inputs):
Start
eh2727
Josh D
2
MAT3000
4
95.0
COM3000
4
90.0
End
Start
ah2718
Mary J
1
ENG3010
4
100.0
End
and process it so that I can calculate each students GPA. I have each students block of info contained by a Start/End and I don't know how to read this info in my processing function in order for me to calculate their GPA. This is what I have so far:
def process():
data = open('raw_data.txt', 'r')
results = open('process_results.txt', 'w')
buffer = []
for line in data:
if line.startswith('Start'):
buffer = []
buffer.append(line)
if line.startswith("End"):
for outline in buffer:
results.write(outline)
This simply writes it all into my results text and I don't know how to individually process each block of information to calculate the GPA. Any help would be greatly appreciated.
Since it is your own code writing out the data to the .txt file, you might consider writing it in an easier and/or more fault tolerant format for machine reading, for example JSON or XML. Alternatively, you might want to consider using pickle or cpickle to serialize the data and read it in again.
Anyway, on to your question: how to read the file. Unfortunately, you do not tell us what you want to do with the parsed data. I assume here you want to print it. Normally you would of course create a nice class or classes describing students and courses.
For parsing of files like yours, I use the string method split() a lot. split() is your best friend. See the python docs for more info on string methods.
f = open('raw_data.txt', 'rt')
data = f.read()
students = data.split('Start\n')[1:]
for s in students:
lines = s.split('\n')
id = lines[0]
name = lines[1]
nrcourses = int(lines[2])
line = 2
courses = []
for n in range(nrcourses):
number = lines[line+1]
credit = lines[line+2]
score = lines[line+3]
courses.append((number, credit, score))
line += 3
print 'id: %s; name %s; course list %s' % (id, name, courses)
f.close()
You need to develop a state machine for processing a student record. You're on the right track with your 'if line.strip() == 'Start', that's a sentinel indicating the beginning of a record. What you can do at this point is set a flag, processStudentRecord = true, so the next pass through in 'for line in data' you know the line you get is a part of a record. After setting the flag you should break that if loop so you don't have to have a bunch of elifs.
processStudentRecord = False
for line in data:
if line.strip() == 'Start':
processStudentRecord = True
expecting = "student_id"
# break here so you go immediately to the next line
if line.strip() == 'End':
processStudentRecord = False
# break here so you go immediately to the next line
if processStudentRecord:
# keep track of where you are in the student record
if expecting == "student_id":
# extract the student name and then proceed to the next expected line
expecting = "student_name"
elif expecting == ""
And so on and so forth. Note, this is a "procedural" method of doing this - one can invent object-oriented or functional solutions.
This is quite a bit of code, but if you trace it through until you understand how it works you'll learn a lot.
First we need to take a class mark and convert it to points. You could write this as a cascade of 13 if's, but I like the data-driven approach:
import bisect
def grade_points(pct):
grade = [ 0, 50, 53, 57, 60, 63, 67, 70, 73, 77, 80, 85, 90]
points = [0.0, 0.7, 1.0, 1.3, 1.7, 2.0, 2.3, 2.7, 3.0, 3.3, 3.7, 4.0, 4.0]
if 0 <= pct <= 100:
# find the highest grade <= pct
idx = bisect.bisect_right(grade, pct) - 1
# return the corresponding grade-point
return points[idx]
else:
raise ValueError('pct value should be in 0..100, not {}'.format(pct))
Next we want a Student class to make it easy to track per-student data
class Student(object):
str_format = '{id:>8} {name} {gpa}'
def __init__(self, *args):
if len(args)==1: # copy constructor
student = args[0]
self.id, self.name, self.classes = student.id, student.name, student.classes
elif len(args)==3: # "id", "name", [classes,]
self.id, self.name, self.classes = args
else:
raise ValueError('Failed call to {}.__init__{}'.format(type(self), args))
#property
def gpa(self):
points = sum(hour*grade_points(grade) for code,hour,grade in self.classes)
hours = sum(hour for code,hour,grade in self.classes)
return points / hours
def __str__(self):
return type(self).str_format.format(id=self.id, name=self.name, classes=self.classes, gpa=self.gpa)
def __repr__(self):
return "{}('{}', '{}', {})".format(type(self).__name__, self.id, self.name, self.classes)
So you can create a student and find her GPA like so:
sally = Student('ab2773', 'S Atkins', [
('MAT3000', 4, 81.0),
('ENG3010', 4, 85.0)
])
print sally # ' ab2773 S Atkins 3.85'
Now we need to be able to stream Students to and from a file. This is a bit painful from an OOP point of view, because a Student object really shouldn't need to know anything about a File object or vice-versa, and more so because we want to upgrade you to a better file format - a Student object definitely doesn't need to know about multiple incompatible File types.
I've approached this by subclassing Student; I wrote the Student.__init__ method in such a way that I can cast back and forth and don't have to rewrite it for the subclasses, so the subclass just knows how to convert itself to and from your nasty file format
class NastyFileStudent(Student):
#classmethod
def from_strings(cls, strings):
if len(strings) > 3 and len(strings) == 3 + int(strings[2])*3:
codes = strings[3::3]
hours = map(int, strings[4::3])
grades = map(float, strings[5::3])
return Student(strings[0], strings[1], zip(codes, hours, grades))
else:
# not enough data returned - probably end of file
return None
def __str__(self):
data = [self.id, self.name, str(len(self.classes))] + [str(i) for c in self.classes for i in c]
return '\n'.join(data)
and the file just knows it has Student data but nothing about the contents
class NastyFile(object):
START = 'Start'
END = 'End'
#staticmethod
def _read_until(endfn, seq):
is_end = endfn if callable(endfn) else lambda s: s==endfn
data = []
for s in seq:
if is_end(s):
break
else:
data.append(s)
return data
def __init__(self, name, mode='r'):
self.name = name
self.mode = mode
self._f = open(name, mode)
self.lines = (ln.strip() for ln in self._f)
def __del__(self):
self._f.close()
def __iter__(self):
return self
def next(self):
_ = NastyFile._read_until(NastyFile.START, self.lines)
strings = NastyFile._read_until(NastyFile.END, self.lines)
student = NastyFileStudent.from_strings(strings)
if student is None:
raise StopIteration()
else:
return student
def read(self):
return list(self)
def write(self, s):
if not hasattr(s, '__iter__'):
s = [s]
for student in s:
self._f.write('{}\n{}\n{}\n'.format(NastyFile.START, str(NastyFileStudent(student)), NastyFile.END))
Now we can read and write files of student records like so
>>> students = NastyFile('student_records.txt').read()
>>> for s in students:
... print s
eh2727 Josh D 4.0
ah2718 Mary J 4.0
>>> students.append(sally)
>>> students.sort(key=lambda s: s.name.rsplit(None,1)[-1]) # sort by last name
>>> for s in students:
... print s
ab2773 S Atkins 3.85
eh2727 Josh D 4.0
ah2718 Mary J 4.0
>>> newfile = NastyFile('new_records.txt', 'w')
>>> newfile.write(students)
>>> for i,s in enumerate(NastyFile('new_records.txt'), 1):
... print '{:>2}: {}'.format(i, s)
1: ab2773 S Atkins 3.85
2: eh2727 Josh D 4.0
3: ah2718 Mary J 4.0

Categories

Resources