Related
First off, I do not have pandas framework and am unable to install it. I am hoping that I can solve this problem without pandas.
I am trying to clean my data using python framework, by removing rows that contain empty cells.
this is my code:
import csv
input_file = 'test.csv'
output_file = 'test1.csv'
cols_to_remove =[0,1,9,11,14,15,23,28,29,32,33,37,38,39,41,43,44,45,46,47,48,49]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r") as source: #to run and delete column
reader = csv.reader(source)
with open(output_file, "w") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
print(row)
I have tried codes from other similar questions asked, however it prints into an empty file.
Assuming that the empty row is in fact one that looks like this
,,,,,,,,,,,,,, (and many more)
you can do the following:
import csv
input_file = 'test.csv'
output_file = 'test1.csv'
cols_to_remove =[0,1,9,11,14,15,23,28,29,32,33,37,38,39,41,43,44,45,46,47,48,49]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r") as source: #to run and delete column
reader = csv.reader(source)
with open(output_file, "w") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
all_empty = False # new
for cell in row: # new
if len(cell) == 0: # new
all_empty = True # new
break# new
if all_empty: # new
continue # new
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
print(row)
try to skip row if any cell empty like this:
if any(cel is None or cel == '' for cel in row):
continue
Here's the code:
import csv
input_file = 'hey.txt'
output_file = 'test1.csv'
cols_to_remove = [0, 1, 9, 11, 14, 15, 23, 28, 29, 32, 33, 37, 38]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r+") as source: # to run and delete column
reader = csv.reader(source)
with open(output_file, "w+") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
if any(cel is None or cel == '' for cel in row):
continue
for col_index in cols_to_remove:
try:
del row[col_index]
except Exception:
pass
writer.writerow(row)
print(row)
files and cols_to_remove were changed, please, use your own.
step of skipping might be moving after row deleting.
Checking length of row as someone suggested might not work as empty rows in csv may contain empty Strings ''. So len(row) wouldn't return 0 because it is a list of empty strings.
To simply delete empty rows in csv try adding the following check
skip_row = True
for item in row:
if item != None and item != '':
skip_row = False
if not skip_row:
# process row
Full code would be
import csv
input_file = 'test.csv'
output_file = 'test1.csv'
cols_to_remove = [0,1,9,11,14,15,23,28,29,32,33,37,38,39,41,43,44,45,46,47,48,49]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r") as source: #to run and delete column
reader = csv.reader(source)
with open(output_file, "w", newline='' ) as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
skip_row = True
for item in row:
if item != None and item != '':
skip_row = False
if not skip_row:
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
print(row)
Here we check if each element of a row is None type or an empty string and decide if we should skip that row or not.
you can use this for delete all the row that have a null value,
data = data.dropna()
and this one for delete column,
data = data.drop(['column'], axis=1)
my file contains "Name" and 5 eye movement values (TFF, TFD, TVD, FB, FC). I want to sum up each eye movement values if the rows under Name column are the same. It seems like the code is working, there's no error happened, but my output files stayed empty. Could anyone give me some pointers where went wrong? Here's the code:
import csv
file = open("P01_All.csv", "r") #Open CSV File in Read Mode
reader = csv.reader(file) #Create reader object which iterates over lines
outfile = open("Name.csv","w")
outfile2 = open("TFF.csv","w")
outfile3 = open("TFD.csv","w")
outfile4 = open("TVD.csv","w")
outfile5 = open("FB.csv","w")
outfile6 = open("FC.csv","w")
class Object: #Object to store unique data
def __init__(self, Name, TFF, TFD, TVD, FB, FC):
self.Name = Name
self.TFF = TFF
self.TFD = TFD
self.TVD = TVD
self.FB = FB
self.FC = FC
rownum = 0 #Row Number currently iterating over
list = [] #List to store objects
def checkList(Name, TFF, TFD, TVD, FB, FC):
for object in list: #Iterate through list
if object.Name == Name:
object.TFF += float(TFF)
object.TFD += float(TFD)
object.TVD += float(TVD)
object.FB += float(FB)
object.FC += float(FC)
return
newObject = Object(Name, float(TFF),float(TFD), float(TVD), float(FB), float(FC)) #Create a new object with new eye and TFF
list.append(newObject) #Add to list and break out
for row in reader: #Iterate through all the rows
if rownum == 0: #Store header row seperately to not get confused
header = row
else:
Name = row[0]
TFF = row[1]
TFD = row[2]
TVD = row[3]
FB = row[4]
FC = row[5]
if len(list) == 0: #Default case if list = 0
newObject = Object(Name, float(TFF),float(TFD), float(TVD), float(FB), float(FC))
list.append(newObject)
else: #If not...
checkList(Name, TFF, TFD, TVD, FB, FC)
rownum += 1
for each in list: #Print out result
# print(each.Name, each.TFF, each.TFD, each.TVD, each.FB, each.FC)
outfile.write(each.Name + "\n" )
outfile2.write(str(each.TFF)+ "\n" )
outfile3.write(str(each.TFD)+ "\n" )
outfile4.write(str(each.TVD)+ "\n" )
outfile5.write(str(each.FB)+ "\n" )
outfile6.write(str(each.FC)+ "\n" )
file.close() #Close file
outfile.close()
outfile2.close()
outfile3.close()
outfile4.close()
outfile5.close()
outfile6.close()
Like #zwer said, the reason why you have nothing in your output file is because you don't increment rownum while you are iterating the rows from your input file. By indenting the line rownum += 1 you put it inside your loop where you read each row. So with minimal modification it would look
import csv
file = open("P01_All.csv", "r") #Open CSV File in Read Mode
reader = csv.reader(file) #Create reader object which iterates over lines
outfile = open("Name.csv","w")
outfile2 = open("TFF.csv","w")
outfile3 = open("TFD.csv","w")
outfile4 = open("TVD.csv","w")
outfile5 = open("FB.csv","w")
outfile6 = open("FC.csv","w")
class Movement_value: #Object to store unique data
def __init__(self, Name, TFF, TFD, TVD, FB, FC):
self.Name = Name
self.TFF = TFF
self.TFD = TFD
self.TVD = TVD
self.FB = FB
self.FC = FC
rownum = 0 #Row Number currently iterating over
notebook = [] #List to store objects
def checkList(Name, TFF, TFD, TVD, FB, FC):
for value in notebook: #Iterate through list
if value.Name == Name:
value.TFF += float(TFF)
value.TFD += float(TFD)
value.TVD += float(TVD)
value.FB += float(FB)
value.FC += float(FC)
return
newObject = Movement_value(Name, float(TFF),float(TFD), float(TVD), float(FB), float(FC)) #Create a new object with new eye and TFF
notebook.append(newObject) #Add to list and break out
for row in reader: #Iterate through all the rows
if rownum == 0: #Store header row seperately to not get confused
header = row
else:
Name = row[0]
TFF = row[1]
TFD = row[2]
TVD = row[3]
FB = row[4]
FC = row[5]
if len(notebook) == 0: #Default case if list = 0
newObject = Movement_value(Name, float(TFF),float(TFD), float(TVD), float(FB), float(FC))
notebook.append(newObject)
else: #If not...
checkList(Name, TFF, TFD, TVD, FB, FC)
rownum += 1
for each in notebook: #Print out result
# print(each.Name, each.TFF, each.TFD, each.TVD, each.FB, each.FC)
outfile.write(each.Name + "\n" )
outfile2.write(str(each.TFF)+ "\n" )
outfile3.write(str(each.TFD)+ "\n" )
outfile4.write(str(each.TVD)+ "\n" )
outfile5.write(str(each.FB)+ "\n" )
outfile6.write(str(each.FC)+ "\n" )
file.close() #Close file
outfile.close()
outfile2.close()
outfile3.close()
outfile4.close()
outfile5.close()
outfile6.close()
I have made some additional change: It's better that you don't use list or object as variable names because they are already used in Python and by doing so you'll override their meaning. You could have a bad surprise eventually.
But we can do more.
We don't need to create a class to hold the values
We can work with files using context managers to make sure that our file is not kept open for not relevant reasons.
Here's a version that is shorter than yours:
import csv
import pathlib
input_filepath = pathlib.Path("Input.csv")
output_filepath = pathlib.Path("")
with open(input_filepath, newline="") as input_file:
# Where our data will be kept
input_data = {}
csv_reader = csv.reader(input_file)
# Skip the first line
next(csv_reader)
for (Name, *rest_of_data) in csv_reader:
if Name in input_data:
for (index_of_data_to_update, data_to_update) in enumerate(rest_of_data):
input_data[Name][index_of_data_to_update] += float(data_to_update)
else:
input_data[Name] = [float(x) for x in rest_of_data]
output_rows = ([name] + list(data) for (name, data) in input_data.items())
output_filenames = [
"Name.csv",
"TFF.csv",
"TFD.csv",
"TVD.csv",
"FB.csv",
"FC.csv"
]
output_files = [open(output_filepath / filename, "w") for filename in output_filenames]
# Open all the files
with output_files[0], output_files[1], output_files[2], output_files[3], \
output_files[4], output_files[5]:
for row in output_rows:
for (output_file, data) in zip(output_files, row):
output_file.write("{}\n".format(data))
I am new in python and I am trying to getting CSV data using python code.
Every thing is working first time,But when I edit my .csv file then an error occured says:
File "D:/wamp/www/optimizer_new/new_code/optimal_lineup.py", line 310, in get_player_list
if (int(row[4]) == -1):
IndexError: list index out of range
I am just putting a extra space inside my .csv
here is my sample code:
def get_player_list(possible_name):
file_name = ""
if (len(possible_name) > 0):
file_name = possible_name
else:
file_name = 'basketball_data2.csv'
player_list = []
with open(file_name) as csvfile:
reader = csv.reader(csvfile, delimiter=',')
reader.next()
for row in reader:
if (int(row[4]) == -1):
#print("Skipping %s" % (row[0]))
continue
name = row[0]
pos_p = get_possible_positions(row[1])
c = row[2]
v = row[3]
my_p = player(int(c) / 100, float(v), name, pos_p, int(row[4]))
player_list.append(my_p)
'''
name = row['Player Name']
c = row['Salary']
v = row['FP']
pos_p = get_possible_positions(row['Pos'])
player_list.append(player(c, v, name, pos_p))
'''
return player_list
My CSV contain these columns:
Player Name,Pos,Salary,FP,Keep/exclude
Any suggestion?
#!/usr/bin/python
import csv
import re
string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')
#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "
#def does_match(string):
# stringl = string.lower()
# return any(s in stringl for s in searched)
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
for row in reader:
found = False
for col in row:
if col in [string_1, string_2, string_3] and not found:
writer.writerow(row)
found = True
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
inFile.close()
outFile.close()
I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.
Sample Data File
LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300
I'm trying to figure out how to search a CSV file for 3 items. If
those items exist print the row. Ideally I would like only Columns 1
and 3 to print to a new file.
Try this:
import csv
search_for = ['OneTouch AT','LinkRunner AT','AirCheck']
with open('in.csv') as inf, open('out.csv','w') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
if row[0] in search_for:
print('Found: {}'.format(row))
writer.writerow(row)
#!/usr/bin/python
import csv
import numpy as np
class search_csv(object):
def __init__(self, infile, outfile):
infile = open(infile, 'rb')
read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
self.non_numpy_data = read_infile
self.data = np.array(read_infile, dtype=None)
self.outfile = open(outfile, 'wb')
self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
def write_to(self, matched_values):
self.writer_.writerows(matched_values)
print ' Matched Values Written '
return True
def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
find_these = np.array(items, dtype=None)
matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
matching_data = self.data[matching_y][:,return_cols]
self.write_to(matching_data)
self.outfile.close()
return True
def non_numpy_search(self, items, return_cols=[0,2]):
lst = []
for i in self.non_numpy_data:
for ii in items:
if ii in i:
z = []
for idx in return_cols:
z.append(i[idx])
lst.append(z)
break
self.write_to(lst)
return True
### now use the class ###
SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'
non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)
By the phrasing of your question I'm assuming you just want to complete the task at hand and don't really care how. So copy and paste this in and use your data file as the 'IN_FILE' value and the file name you want to write to as the 'OUT_FILE' value. Place the values you want to search for in the 'SEARCHING_FOR' list as you're done.
Things to note....
SEARCHING_FOR should be a list.
the values in SEARCHING_FOR are matched EXACTLY so 'A' will not match 'a'. If you want a to use a regex or something more complex let me know.
In function 'non_numpy_search' there is a 'return_cols' parameter. It defaults to the first and 3rd column.
If you don't have numpy let me know.
#!/usr/bin/python
import csv
import re
import sys
import gdata.docs.service
#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
def does_match(string):
stringl = string.lower()
return any(s in stringl for s in searched)
#Opens Input file for read and output file to write.
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
#for row in reader:
# found = False
# for col in row:
# if col in [string_1, string_2, string_3] and not found:
# writer.writerow(row)
# found = True
"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
one stores a triple (1, 12, row)
When the triples are sorted later, all rows in group 0 will come first, then
all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
# Closing Input and Output files.
inFile.close()
outFile.close()
I have a CSV file that has a single cell that I want to edit.
I can write a pretty simple function that, for instance, can look up an ID field in the file and return the row of the ID in question:
id = 3 #column number of the ID field
csvfile = open(os.path.join(LOCAL_FOLDER, "csvfile.csv"), "rU")
csvFile= csv.reader(csvfile, delimiter=",")
def lookup(ID):
rowNo = 1
for row in csvFile:
if row[id] == ID:
return rowNo
else:
rowNo += 1
return 0
What I want to do is to write a corresponding replace function that will take in an ID, a column variable and a data variable:
def replace(ID, col, data):
row = lookup(ID)
#use a CSV writer to replace the item at row, col with data
I have no idea how to do this, all of the examples I can find for how to use the writer only show you how to completely rewrite an entire .CSV file, which is not what I'm looking to do; I want an equivalent of a PUT rather than a POST.
fwiw, per inspectorG4dget's suggestion, I rewrote my code as follows:
LOCAL_FOLDER = os.getcwd()
CSV_FILE = "csvfile.csv"
def lookup(ID):
csvfile = open(os.path.join(LOCAL_FOLDER, CSV_FILE), "rU")
csvFile= csv.reader(csvfile, delimiter=",")
rowNo = 1
for row in csvFile:
if row[id] == ID:
csvfile.close()
return rowNo
else:
rowNo += 1
csvfile.close()
return 0
def replace(ID, col, data):
index = 1
row = lookup(ID)
if row == 0:
return 0
csvwritefile = open(os.path.join(LOCAL_FOLDER, "temp.csv"), "w")
csvWriteFile = csv.writer(csvwritefile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) #obviously change this if you want a diff quoting format
csvreadfile = open(os.path.join(LOCAL_FOLDER, CSV_FILE), "rU")
csvReadFile= csv.reader(csvreadfile, delimiter=",")
for readrow in csvReadFile:
if index == row:
temp = readrow
temp[col] = data
csvWriteFile.writerow(temp)
index += 1
else:
index += 1
csvWriteFile.writerow(readrow)
csvwritefile.close()
csvreadfile.close()
os.rename(os.path.join(LOCAL_FOLDER, "temp.csv"), os.path.join(LOCAL_FOLDER, CSV_FILE))
return 1