Python remove newlines from a column in csv file

Python remove newlines from a column in csv file - python

I tried lot of suggestions but I am unable to remove carriage returns. I am new python and trying it with csv file cleaning.
import csv
filepath_i = 'C:\Source Files\Data Source\Flat File Source\PatientRecords.csv'
filepath_o = 'C:\Source Files\Data Source\Flat File Source\PatientRecords2.csv'
rows = []
with open(filepath_i, 'rU', newline='') as csv_file:
#filtered = (line.replace('\r\n', '') for line in csv_file)
filtered = (line.replace('\r', '') for line in csv_file)
csv_reader = csv.reader(csv_file, delimiter=',')
i = 0
for row in csv_reader:
print(row)
i = i + 1
if(i == 10):
break
#with open(filepath_o, 'w',newline='' ) as writeFile:
# writer = csv.writer(writeFile,lineterminator='\r')
# for row in csv_reader:
# #rows.append(row.strip())
# rows.append(row.strip())
# writer.writerows(rows)
Input
DRG Definition,Provider Id,Provider Name,Provider Street Address,Provider City,Provider State,Provider Zip Code,Hospital Referral Region Description,Hospital Category,Hospital Type, Total Discharges ,Covered Charges , Total Payments ,Medicare Payments
039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,AL - Dothan,Specialty Centers,Government Funded,91,"$32,963.07 ","$5,777.24 ","$4,763.73 "
039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10005,MARSHALL MEDICAL CENTER SOUTH,"2505 U S HIGHWAY
431 NORTH",BOAZ,AL,35957,AL - Birmingham,Specialty Centers,Private Institution,14,"$15,131.85 ","$5,787.57 ","$4,976.71 "
039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10006,ELIZA COFFEE MEMORIAL HOSPITAL,205 MARENGO STREET,FLORENCE,AL,35631,AL - Birmingham,Rehabilitation Centers,Private Institution,24,"$37,560.37 ","$5,434.95 ","$4,453.79 "
Output (4th column 'Provider Street Address')
['DRG Definition', 'Provider Id', 'Provider Name', 'Provider Street Address', 'Provider City', 'Provider State', 'Provider Zip Code', 'Hospital Referral Region Description', 'Hospital Category', 'Hospital Type', ' Total Discharges ', 'Covered Charges ', ' Total Payments ', 'Medicare Payments']
['039 - EXTRACRANIAL PROCEDURES W/O CC/MCC', '10001', 'SOUTHEAST ALABAMA MEDICAL CENTER', '1108 ROSS CLARK CIRCLE', 'DOTHAN', 'AL', '36301', 'AL - Dothan', 'Specialty Centers', 'Government Funded', '91', '$32,963.07 ', '$5,777.24 ', '$4,763.73 ']
['039 - EXTRACRANIAL PROCEDURES W/O CC/MCC', '10005', 'MARSHALL MEDICAL CENTER SOUTH', '2505 U S HIGHWAY \n431 NORTH', 'BOAZ', 'AL', '35957', 'AL - Birmingham', 'Specialty Centers', 'Private Institution', '14', '$15,131.85 ', '$5,787.57 ', '$4,976.71 ']

I ran this on my side and it works:
with open(filepath_i, 'rU', newline='') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
for row in csv_reader:
row[3] = row[3].replace("\n","").replace("\r","")
print(row)
Output:
['DRG Definition', 'Provider Id', 'Provider Name', 'Provider Street Address', 'Provider City', 'Provider State', 'Provider Zip Code', 'Hospital Referral Region Description', 'Hospital Category', 'Hospital Type', ' Total Discharges ', 'Covered Charges ', ' Total Payments ', 'Medicare Payments']
['039 - EXTRACRANIAL PROCEDURES W/O CC/MCC', '10001', 'SOUTHEAST ALABAMA MEDICAL CENTER', '1108 ROSS CLARK CIRCLE', 'DOTHAN', 'AL', '36301', 'AL - Dothan', 'Specialty Centers', 'Government Funded', '91', '$32,963.07 ', '$5,777.24 ', '$4,763.73 ']
['039 - EXTRACRANIAL PROCEDURES W/O CC/MCC', '10005', 'MARSHALL MEDICAL CENTER SOUTH', '2505 U S HIGHWAY 431 NORTH', 'BOAZ', 'AL', '35957', 'AL - Birmingham', 'Specialty Centers', 'Private Institution', '14', '$15,131.85 ', '$5,787.57 ', '$4,976.71 ']
['039 - EXTRACRANIAL PROCEDURES W/O CC/MCC', '10006', 'ELIZA COFFEE MEMORIAL HOSPITAL', '205 MARENGO STREET', 'FLORENCE', 'AL', '35631', 'AL - Birmingham', 'Rehabilitation Centers', 'Private Institution', '24', '$37,560.37 ', '$5,434.95 ', '$4,453.79 ']

Related

PFF data scraping not recognizing text

I am trying to scrape PFF.com for football grades with selenium, I am trying to get a specific grade for all Quarterbacks. Problem is, it doesn't seem like it's capturing the text as .text isn't working but I am not getting any NoSuchElementException.
Here's my code:
service = Service(executable_path="C:\\chromedriver.exe")
op = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=op)
driver.get("https://premium.pff.com/nfl/positions/2022/REG/passing?position=QB")
sleep(2)
sign_in = driver.find_element(By.XPATH, '/html/body/div/div/header/div[3]/button')
sign_in.click()
sleep(2)
email = driver.find_element(By.XPATH, '/html/body/div/div/div/div/div/div/form/div[1]/input')
email.send_keys(my_email)
password = driver.find_element(By.XPATH,
'/html/body/div/div/div/div/div/div/form/div[2]/input')
password.send_keys(my_password)
sleep(2)
sign_in_2 = driver.find_element(By.XPATH,
'/html/body/div/div/div/div/div/div/form/button')
sign_in_2.click()
sleep(2)
all_off_grades = driver.find_elements(By.CSS_SELECTOR, '.kyber-table
.kyber-grade-badge__info-text div')
all_qb_names = driver.find_elements(By.CSS_SELECTOR, '.kyber-table .p-1 a')
qb_grades = []
qb_names = []
for grade in all_off_grades:
qb_grades.append(grade.text)
for qb_name in all_qb_names:
qb_names.append(qb_name.text)
print(qb_grades)
print(qb_names)
The lists keep showing as empty.
Here are the elements I am trying to pull, but for every QB, I already confirmed the other QB's have the same class names for their grade and name.
<div class="kyber-grade-badge__info-text">91.5</div>
need to pull the 91.5
<a class="p-1" href="/nfl/players/2022/REG/josh-allen/46601/passing">Josh Allen</a>
need to pull Josh Allen

#Jbuck3 I tried modifying the locator and it works for me. I am also giving the output I am getting. Let me know that is what you were expecting.
all_off_grades = driver.find_elements(By.CSS_SELECTOR, '.kyber-table-body__scrolling-rows-container .kyber-grade-badge__info-text')
all_qb_names = driver.find_elements(By.CSS_SELECTOR, "a[data-gtm-id = 'player_name']")
And the output I got is:
['91.5', '90.3', '74.6', '-', '-', '60.0', '84.3', '78.3', '78.1', '-', '-', '60.0', '82.8', '83.4', '-', '-', '-', '60.0']
['Josh Allen ', 'Geno Smith ', 'Kirk Cousins ', 'Marcus Mariota ', 'Jameis Winston ', 'Trey Lance ', 'Derek Carr ', 'Justin Fields ', 'Trevor Lawrence ', 'Russell Wilson ', 'Ryan Tannehill ', 'Tom Brady ', 'Tua Tagovailoa ', 'Mac Jones ', 'Davis Mills ', 'Matthew Stafford ', 'Baker Mayfield ', 'Lamar Jackson ', 'Joe Flacco ', 'Matt Ryan ', 'Jalen Hurts ', 'Daniel Jones ', 'Kyler Murray ', 'Justin Herbert ', 'Joe Burrow ', 'Aaron Rodgers ', 'Patrick Mahomes ', 'Mitchell Trubisky ', 'Dak Prescott ', 'Jacoby Brissett ', 'Carson Wentz ', 'Jared Goff ']

Python to print out to excel or csv from a for loop

I have working on this code:
Section = {'East':['Alan','Bob'],
'North':['Alan','Michael'],
'South':['Tom'],
'West':['Bob','Michael','Tom']}
Name = {'Alan':['Subaru','Chevvy','Honda'],
'Bob':['Toyota','Honda','Camry'],
'Michael':['Camry','Ford'],
'Tom':['Ford','Toyota']}
Inventory = {'East':['Toyota','Honda','Camry'],
'North':['Ford','Chevvy','Ferrari','Subaru'],
'South':['Subaru','Acura','Lexus','BMW'],
'West':['Ford','Subaru','Camry']}
for name,sections in Section.items():
for section in sections:
haveInventory = Name[name]
needInventory = Inventory[section]
for inventory in needInventory:
if inventory not in haveInventory:
print(str(name) + ' ' + str(section) + ' ' + str(inventory))
Not sure what but the code does not run
How can i export the result out to excel or csv file with the following format:
Alan East Camry
Alan East Honda
Tom South Subaru

Have a look at this:
for name,sections in Section.items(): # ('East', ['Alan','Bob'])
for section in sections:
haveInventory = Name[name] # Name['East'] Causes error
needInventory = Inventory[section]
As you can see, in that particular iteration, you are trying to get value of the key 'East' from the dictionary 'haveInventory', but that dictionary doesn't have that key.

I have changed these two lines:
for name,sections in Section.items():
for section in sections:
To
for sections,names in Section.items():
for name in names:
import csv
Section = {'East':['Alan','Bob'],
'North':['Alan','Michael'],
'South':['Tom'],
'West':['Bob','Michael','Tom']}
Name = {'Alan':['Subaru','Chevvy','Honda'],
'Bob':['Toyota','Honda','Camry'],
'Michael':['Camry','Ford'],
'Tom':['Ford','Toyota']}
Inventory = {'East':['Toyota','Honda','Camry'],
'North':['Ford','Chevvy','Ferrari','Subaru'],
'South':['Subaru','Acura','Lexus','BMW'],
'West':['Ford','Subaru','Camry']}
f1 = open('file.csv','w')
writer=csv.writer(f1, delimiter='\t',lineterminator='\n',)
for sections,names in Section.items():
for name in names:
haveInventory = Name[name]
needInventory = Inventory[sections]
for inventory in needInventory:
if inventory not in haveInventory:
print(str(name) + ' ' + str(sections) + ' ' + str(inventory))
writer.writerow([f"{name} {sections} {inventory}"])
Alan East Toyota
Alan East Camry
Alan North Ford
Alan North Ferrari
Michael North Chevvy
Michael North Ferrari
Michael North Subaru
Tom South Subaru
Tom South Acura
Tom South Lexus
Tom South BMW
Bob West Ford
Bob West Subaru
Michael West Subaru
Tom West Subaru
Tom West Camry

Section = {'East': ['Alan', 'Bob'],
'North': ['Alan', 'Michael'],
'South': ['Tom'],
'West': ['Bob', 'Michael', 'Tom']}
Name = {'Alan': ['Subaru', 'Chevvy', 'Honda'],
'Bob': ['Toyota', 'Honda', 'Camry'],
'Michael': ['Camry', 'Ford'],
'Tom': ['Ford', 'Toyota']}
Inventory = {'East': ['Toyota', 'Honda', 'Camry'],
'North': ['Ford', 'Chevvy', 'Ferrari', 'Subaru'],
'South': ['Subaru', 'Acura', 'Lexus', 'BMW'],
'West': ['Ford', 'Subaru', 'Camry']}
import pandas as pd
result=pd.DataFrame()
NAME = []
SECTION = []
INVENTORY = []
for section, names in Section.items():
for name in names:
haveInventory = Name[name]
needInventory = Inventory[section]
for inventory in needInventory:
if inventory not in haveInventory:
print(str(name) + ' ' + str(section) + ' ' + str(inventory))
NAME.append(name)
SECTION.append(section)
INVENTORY.append(inventory)
data = {'Name': NAME, 'Section': SECTION, 'inventory': INVENTORY}
result = pd.DataFrame(data, columns=['Name', 'Section', 'inventory'])
result.to_csv('result.csv')

from time import gmtime, strftime, time
import csv
current_time = strftime("%Y-%m-%d_%H-%M-%S", gmtime())
with open('result_'+ current_time + '.csv','w') as f1:
writer = csv.writer(f1, delimiter='\t',lineterminator='\n',)
for section, names in Section.items():
for name in names:
haveInventory = Name[name]
needInventory = Inventory[section]
for inventory in needInventory:
if inventory not in haveInventory:
print(str(name) + ' ' + str(section) + ' ' + str(inventory))
writer.writerow([str(name) + ' ' + str(section) + ' ' + str(inventory)])

Save 2D array to CSV

I want function input is list's and each list save as one row.
My attempt to save a two-dimensional array:
my_list = [['\ufeffUser Name', 'First Name', 'Last Name', 'Display Name', 'Job Title', 'Department', 'Office Number', 'Office Phone', 'Mobile Phone', 'Fax', 'Address', 'City', 'State or Province', 'ZIP or Postal Code', 'Country or Region'], ['chris#contoso.com', 'Chris', 'Green', 'Chris Green', 'IT Manager', 'Information Technology', '123451', '123-555-1211', '123-555-6641', '123-555-9821', '1 Microsoft way', 'Redmond', 'Wa',
'98052', 'United States'], ['ben#contoso.com', 'Ben', 'Andrews', 'Ben Andrews', 'IT Manager', 'Information Technology', '123452', '123-555-1212', '123-555-6642', '123-555-9822', '1 Microsoft way', 'Redmond', 'Wa', '98052', 'United States'], ['david#contoso.com', 'David', 'Longmuir', 'David Longmuir', 'IT Manager', 'Information Technology', '123453', '123-555-1213', '123-555-6643', '123-555-9823', '1 Microsoft way', 'Redmond', 'Wa', '98052', 'United States'], ['cynthia#contoso.com', 'Cynthia', 'Carey', 'Cynthia Carey', 'IT Manager', 'Information Technology', '123454', '123-555-1214', '123-555-6644', '123-555-9824', '1 Microsoft way', 'Redmond', 'Wa', '98052', 'United States'], ['melissa#contoso.com', 'Melissa', 'MacBeth', 'Melissa MacBeth', 'IT Manager', 'Information Technology', '123455', '123-555-1215', '123-555-6645', '123-555-9825', '1 Microsoft way', 'Redmond', 'Wa', '98052', 'United States']]
unifile.dump.excel("file.csv", "array", "utf-8", my_list)
My attempt to save lists to CSV files:
l1 = ["aa", "ftyg"]
l2 = ["fgghg", "ftyfuv"]
unifile.dump.excel("file.csv", "list", "utf-8", l1, l2)
My function
def excel(file_path: str, mode: str = "array", t_encoding: str = "utf-8", write_mode: str = "r", *data: list):
try:
os.remove(file_path)
except:
pass
if mode == "list":
with open(file_path, 'w') as csv_file:
csv_writer = csv.writer(csv_file, delimiter=',')
for l in data:
csv_writer.writerow(l)
elif mode == "array":
with open(file_path, write_mode, encoding=t_encoding) as csv_file:
csv_writer = csv.writer(csv_file, delimiter=',')
for l in data:
csv_writer.writerow(l)
else:
pass
User Name,First Name,Last Name,Display Name,Job Title,Department,Office Number,Office Phone,Mobile Phone,Fax,Address,City,State or Province,ZIP or Postal Code,Country or Region
chris#contoso.com,Chris,Green,Chris Green,IT Manager,Information Technology,123451,123-555-1211,123-555-6641,123-555-9821,1 Microsoft way,Redmond,Wa,98052,United States
ben#contoso.com,Ben,Andrews,Ben Andrews,IT Manager,Information Technology,123452,123-555-1212,123-555-6642,123-555-9822,1 Microsoft way,Redmond,Wa,98052,United States
david#contoso.com,David,Longmuir,David Longmuir,IT Manager,Information Technology,123453,123-555-1213,123-555-6643,123-555-9823,1 Microsoft way,Redmond,Wa,98052,United States
cynthia#contoso.com,Cynthia,Carey,Cynthia Carey,IT Manager,Information Technology,123454,123-555-1214,123-555-6644,123-555-9824,1 Microsoft way,Redmond,Wa,98052,United States
melissa#contoso.com,Melissa,MacBeth,Melissa MacBeth,IT Manager,Information Technology,123455,123-555-1215,123-555-6645,123-555-9825,1 Microsoft way,Redmond,Wa,98052,United States
The problem is this code will replace all previous lines with end line.

The Pandas module is really useful. This worked for me:
import pandas as pd
my_list = pd.DataFrame(my_list)
my_list.to_csv("example.csv")

Python: split strings and form dictionaries in the list

I have a list of string as follows:
e = ['Website: Alabama Office of the Attorney General',
'Toll Free: 1-800-392-5658',
'Website: State Banking Department',
'Toll Free: 1-866-465-2279',
'Website: Department of Insurance',
'Phone Number: 334-241-4141',
'Website: Securities Commission',
'Phone Number: 334-242-2984',
'Website: Public Service Commission',
'Toll Free: 1-800-392-8050']
I want to form dictionaries by splitting the strings at ":" and form dictionaries of each two elements in the list like:
e = [{'Website': 'Alabama Office of the Attorney General',
'Toll Free': '1-800-392-5658'},
{'Website': 'State Banking Department',
'Toll Free': '1-866-465-2279'},
{'Website': 'Department of Insurance',
'Phone Number': '334-241-4141'},
{'Website': 'Securities Commission',
'Phone Number': 334-242-2984'},
{'Website': 'Public Service Commission',
'Toll Free': '1-800-392-8050'}]
Thank you for your help as always.

If you want a dictionary per two lines. You can use:
ei = iter(e)
[{k:v for k,v in (x.split(':',1) for x in xs)} for xs in zip(ei,ei)]
generating:
>>> [{k:v for k,v in (x.split(':',1) for x in xs)} for xs in zip(ei,ei)]
[{'Website': ' Alabama Office of the Attorney General', 'Toll Free': ' 1-800-392-5658'}, {'Website': ' State Banking Department', 'Toll Free': ' 1-866-465-2279'}, {'Website': ' Department of Insurance', 'Phone Number': ' 334-241-4141'}, {'Website': ' Securities Commission', 'Phone Number': ' 334-242-2984'}, {'Website': ' Public Service Commission', 'Toll Free': ' 1-800-392-8050'}]
Or better formatted:
>>> [{k:v for k,v in (x.split(':',1) for x in xs)} for xs in zip(ei,ei)]
[{'Website': ' Alabama Office of the Attorney General', 'Toll Free': ' 1-800-392-5658'},
{'Website': ' State Banking Department', 'Toll Free': ' 1-866-465-2279'},
{'Website': ' Department of Insurance', 'Phone Number': ' 334-241-4141'},
{'Website': ' Securities Commission', 'Phone Number': ' 334-242-2984'},
{'Website': ' Public Service Commission', 'Toll Free': ' 1-800-392-8050'}]
If you want to remove the spaces in the value, we can use strip():
ei = iter(e)
[{k:v.strip() for k,v in (x.split(':',1) for x in xs)} for xs in zip(ei,ei)]
If there are n lines per dictionary, we can use:
n = 2
ei = iter(e)
[{k:v for k,v in (x.split(':',1) for x in xs)} for xs in zip(*((ei,)*n))]

# Assumption : Total 2*n entries are present
ans = []
for i in xrange(0, len(e), 2):
website = e[i].strip().split(':')
toll = e[i+1].strip().split(':')
ans.append({website[0]:website[1], toll[0]:toll[1]})

Python - creating multiple objects for a class

In python I need to create 43 instances of a class 'Student' that includes the variables first_name, middle_name, last_name, student_id by reading in a file (Students.txt) and parsing it. The text file appears like this:
Last Name Midle Name First Name Student ID
----------------------------------------------
Howard Moe howar1m
Howard Curly howar1c
Fine Lary fine1l
Howard Shemp howar1s
Besser Joe besse1j
DeRita Joe Curly derit1cj
Tiure Desilijic Jaba tiure1jd
Tharen Bria thare1b
Tai Besadii Durga tai1db
Hego Damask hego1d
Lannister Tyrion lanni1t
Stark Arya stark1a
Clegane Sandor clega1s
Targaryen Daenerys targa1d
Bombadil Tom bomba1t
Brandybuck Meriadoc brand1m
Took Pregrin took1p
McCoy Leonard mccoy1l
Scott Montgomery scott1m
Crusher Wesley crush1w
Montoya Inigo monto1i
Rugen Tyrone rugen1t
Solo Han solo1h
Corey Carl corey1c
Flaumel Evelyn flaum1e
Taltos Vlad talto1v
e'Drien Morrolan edrie1m
Watson John watso1j
McCoy Ebenezar mccoy1e
Carpenter Molly carpe1m
Graystone Zoe grays1z
Adama William adama1w
Adama Joseph Leland adama1l
Roslin Laura rosli1l
Baltar Gaius balta1g
Tigh Ellen tigh1e
Tigh Saul tigh1s
Cottle Sherman cottl1s
Zarek Thomas zarek1t
Murphy James Alexander murph1a
Sobchak Walter sobch1w
Dane Alexander dane1a
Gruber Hans grube1h
Biggs John Gil biggs1gj
The class student is:
class Student (object):
def __init__(self, first_name, middle_name, last_name, student_id):
self.__first_name = first_name
self.__middle_name = middle_name
self.__last_name = last_name
self.__student_id = student_id
What would be the easiest way to read into 'Students.txt' and create each instance of student?

Step by step tutorial
To read the file content, use io.open. Don't forget to specify the file encoding if any name has accentuated characters.
with io.open('students.txt', mode="r", encoding="utf8") as fd:
content = fd.read()
Here, you read the whole content and store it in memory (amount of data is small). You can also use an iterator.
Then, you can split the content line by line with str.splitlines():
lines = content.splitlines()
# print(lines)
You get something like:
['Last Name Midle Name First Name Student ID ',
'----------------------------------------------',
'Howard Moe howar1m ',
'Howard Curly howar1c ',
'Fine Lary fine1l ',
'Howard Shemp howar1s ',
'Besser Joe besse1j ',
'DeRita Joe Curly derit1cj ',
'Tiure Desilijic Jaba tiure1jd ',
'Tharen Bria thare1b ']
You have (nearly) fixed-length lines, so you can use slices to extract the fields.
Here is what you can do for the header:
header = lines.pop(0)
fields = header[0:8], header[11:21], header[23:33], header[36:46]
# print(fields)
You get:
('Last Nam', 'Midle Name', 'First Name', 'Student ID')
You can drop the line of hyphens:
lines.pop(0)
For each line, you can extract values using slices too. Note: slice indices are slightly different:
for line in lines:
record = line[0:8], line[12:21], line[23:34], line[36:46]
# print(record)
You'll get values with trailing space:
('Howard ', ' ', ' Moe ', 'howar1m ')
('Howard ', ' ', ' Curly ', 'howar1c ')
('Fine ', ' ', ' Lary ', 'fine1l ')
('Howard ', ' ', ' Shemp ', 'howar1s ')
('Besser ', ' ', ' Joe ', 'besse1j ')
('DeRita ', 'Joe ', ' Curly ', 'derit1cj ')
('Tiure ', 'Desilijic', ' Jaba ', 'tiure1jd ')
('Tharen ', ' ', ' Bria ', 'thare1b ')
To avoid trailing spaces, use str.strip() function:
for line in lines:
record = line[0:8], line[12:21], line[23:34], line[36:46]
record = [v.strip() for v in record]
# print(record)
You get:
['Howard', '', 'Moe', 'howar1m']
['Howard', '', 'Curly', 'howar1c']
['Fine', '', 'Lary', 'fine1l']
['Howard', '', 'Shemp', 'howar1s']
['Besser', '', 'Joe', 'besse1j']
['DeRita', 'Joe', 'Curly', 'derit1cj']
['Tiure', 'Desilijic', 'Jaba', 'tiure1jd']
['Tharen', '', 'Bria', 'thare1b']
At this point, I recommend you to store your record as a dict in a list:
records = []
for line in lines:
record = line[0:8], line[12:21], line[23:34], line[36:46]
record = [v.strip() for v in record]
records.append(dict(zip(header, record)))
You get:
[{'First Name': 'Moe', 'Last Nam': 'Howard', 'Midle Name': '', 'Student ID': 'howar1m'},
{'First Name': 'Curly', 'Last Nam': 'Howard', 'Midle Name': '', 'Student ID': 'howar1c'},
{'First Name': 'Lary', 'Last Nam': 'Fine', 'Midle Name': '', 'Student ID': 'fine1l'},
{'First Name': 'Shemp', 'Last Nam': 'Howard', 'Midle Name': '', 'Student ID': 'howar1s'},
{'First Name': 'Joe', 'Last Nam': 'Besser', 'Midle Name': '', 'Student ID': 'besse1j'},
{'First Name': 'Curly', 'Last Nam': 'DeRita', 'Midle Name': 'Joe', 'Student ID': 'derit1cj'},
{'First Name': 'Jaba', 'Last Nam': 'Tiure', 'Midle Name': 'Desilijic', 'Student ID': 'tiure1jd'},
{'First Name': 'Bria', 'Last Nam': 'Tharen', 'Midle Name': '', 'Student ID': 'thare1b'}]
But you can also use a class:
class Student(object):
def __init__(self, first_name, middle_name, last_name, student_id):
self.first_name = first_name
self.middle_name = middle_name
self.last_name = last_name
self.student_id = student_id
def __repr__(self):
fmt = "<Student('{first_name}', '{middle_name}', '{last_name}', '{student_id}')>"
return fmt.format(first_name=self.first_name, middle_name=self.middle_name, last_name=self.last_name, student_id=self.student_id)
And construct a list of students:
students = []
for line in lines:
record = line[0:8], line[12:21], line[23:34], line[36:46]
record = [v.strip() for v in record]
students.append(Student(*record))
You get:
[<Student('Howard', '', 'Moe', 'howar1m')>,
<Student('Howard', '', 'Curly', 'howar1c')>,
<Student('Fine', '', 'Lary', 'fine1l')>,
<Student('Howard', '', 'Shemp', 'howar1s')>,
<Student('Besser', '', 'Joe', 'besse1j')>,
<Student('DeRita', 'Joe', 'Curly', 'derit1cj')>,
<Student('Tiure', 'Desilijic', 'Jaba', 'tiure1jd')>,
<Student('Tharen', '', 'Bria', 'thare1b')>]

list_of_students = []
with open('students.txt') as f:
for line in f:
data = line.split()
if len(data) == 3:
firstname, lastname, id = data
list_of_students.append(Student(firstname, '', lastname, id))
elif len(data) == 4:
list_of_students.append(Student(*data))
else:
raise ValueError
I'm not usre exactly how your input file is laid out, so there's a little processing here to handle the cases where there is no middle name.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python remove newlines from a column in csv file - python

Related

PFF data scraping not recognizing text

Python to print out to excel or csv from a for loop

Save 2D array to CSV

Python: split strings and form dictionaries in the list

Python - creating multiple objects for a class

Categories

Resources