Python: Replace one cell in a CSV file - python

I have a CSV file that has a single cell that I want to edit.
I can write a pretty simple function that, for instance, can look up an ID field in the file and return the row of the ID in question:
id = 3 #column number of the ID field
csvfile = open(os.path.join(LOCAL_FOLDER, "csvfile.csv"), "rU")
csvFile= csv.reader(csvfile, delimiter=",")
def lookup(ID):
rowNo = 1
for row in csvFile:
if row[id] == ID:
return rowNo
else:
rowNo += 1
return 0
What I want to do is to write a corresponding replace function that will take in an ID, a column variable and a data variable:
def replace(ID, col, data):
row = lookup(ID)
#use a CSV writer to replace the item at row, col with data
I have no idea how to do this, all of the examples I can find for how to use the writer only show you how to completely rewrite an entire .CSV file, which is not what I'm looking to do; I want an equivalent of a PUT rather than a POST.

fwiw, per inspectorG4dget's suggestion, I rewrote my code as follows:
LOCAL_FOLDER = os.getcwd()
CSV_FILE = "csvfile.csv"
def lookup(ID):
csvfile = open(os.path.join(LOCAL_FOLDER, CSV_FILE), "rU")
csvFile= csv.reader(csvfile, delimiter=",")
rowNo = 1
for row in csvFile:
if row[id] == ID:
csvfile.close()
return rowNo
else:
rowNo += 1
csvfile.close()
return 0
def replace(ID, col, data):
index = 1
row = lookup(ID)
if row == 0:
return 0
csvwritefile = open(os.path.join(LOCAL_FOLDER, "temp.csv"), "w")
csvWriteFile = csv.writer(csvwritefile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) #obviously change this if you want a diff quoting format
csvreadfile = open(os.path.join(LOCAL_FOLDER, CSV_FILE), "rU")
csvReadFile= csv.reader(csvreadfile, delimiter=",")
for readrow in csvReadFile:
if index == row:
temp = readrow
temp[col] = data
csvWriteFile.writerow(temp)
index += 1
else:
index += 1
csvWriteFile.writerow(readrow)
csvwritefile.close()
csvreadfile.close()
os.rename(os.path.join(LOCAL_FOLDER, "temp.csv"), os.path.join(LOCAL_FOLDER, CSV_FILE))
return 1

Related

how to remove entire row that has empty cells in csv file using python

First off, I do not have pandas framework and am unable to install it. I am hoping that I can solve this problem without pandas.
I am trying to clean my data using python framework, by removing rows that contain empty cells.
this is my code:
import csv
input_file = 'test.csv'
output_file = 'test1.csv'
cols_to_remove =[0,1,9,11,14,15,23,28,29,32,33,37,38,39,41,43,44,45,46,47,48,49]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r") as source: #to run and delete column
reader = csv.reader(source)
with open(output_file, "w") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
print(row)
I have tried codes from other similar questions asked, however it prints into an empty file.
Assuming that the empty row is in fact one that looks like this
,,,,,,,,,,,,,, (and many more)
you can do the following:
import csv
input_file = 'test.csv'
output_file = 'test1.csv'
cols_to_remove =[0,1,9,11,14,15,23,28,29,32,33,37,38,39,41,43,44,45,46,47,48,49]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r") as source: #to run and delete column
reader = csv.reader(source)
with open(output_file, "w") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
all_empty = False # new
for cell in row: # new
if len(cell) == 0: # new
all_empty = True # new
break# new
if all_empty: # new
continue # new
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
print(row)
try to skip row if any cell empty like this:
if any(cel is None or cel == '' for cel in row):
continue
Here's the code:
import csv
input_file = 'hey.txt'
output_file = 'test1.csv'
cols_to_remove = [0, 1, 9, 11, 14, 15, 23, 28, 29, 32, 33, 37, 38]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r+") as source: # to run and delete column
reader = csv.reader(source)
with open(output_file, "w+") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
if any(cel is None or cel == '' for cel in row):
continue
for col_index in cols_to_remove:
try:
del row[col_index]
except Exception:
pass
writer.writerow(row)
print(row)
files and cols_to_remove were changed, please, use your own.
step of skipping might be moving after row deleting.
Checking length of row as someone suggested might not work as empty rows in csv may contain empty Strings ''. So len(row) wouldn't return 0 because it is a list of empty strings.
To simply delete empty rows in csv try adding the following check
skip_row = True
for item in row:
if item != None and item != '':
skip_row = False
if not skip_row:
# process row
Full code would be
import csv
input_file = 'test.csv'
output_file = 'test1.csv'
cols_to_remove = [0,1,9,11,14,15,23,28,29,32,33,37,38,39,41,43,44,45,46,47,48,49]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r") as source: #to run and delete column
reader = csv.reader(source)
with open(output_file, "w", newline='' ) as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
skip_row = True
for item in row:
if item != None and item != '':
skip_row = False
if not skip_row:
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
print(row)
Here we check if each element of a row is None type or an empty string and decide if we should skip that row or not.
you can use this for delete all the row that have a null value,
data = data.dropna()
and this one for delete column,
data = data.drop(['column'], axis=1)

Calculating row and column totals form csv files

I have the following CSV file about family expenses:
Family, Medical, Travel, Education
Smith, 346, 566, 45
Taylor, 56,837,848
I want to be able to calculate the row totals and column totals. For example:
Smith = 346+566+45
Taylor = 56+837+848
Medical = 346+56
Travel = 566+837
Education = 45+848
I have the following so far:
import csv
file = open('Family expenses.csv', newline='')
reader = csv.reader(file)
header = next(reader)
data = [row for row in header]
ndata = []
x = 0
for x in range(0, 3):
for i in data[x]:
i.split(',')
x += 1
ndata.append(i)
rdata = [int(s) if s.isdecimal() else s for s in ndata]
There's no need for pandas for this; using DictReader makes it easy:
import csv
file = open("Family expenses.csv", newline="")
reader = csv.DictReader(file, skipinitialspace=True)
results = {}
for row in reader:
results[row["Family"]] = 0 # initialize result for each family name
for key, value in row.items():
if key == "Family":
continue
if key not in results: # initialize result for each category
results[key] = 0
results[key] += float(value) # add value for category
results[row["Family"]] += float(value) # add value for family name
for key, result in results.items():
print(key, result)
I used skipinitialspace because there were some whitespaces in your CSV data.
#Using a list in Python. Here you go
import csv
file = open('Family expenses.csv', newline='')
reader = csv.reader(file)
header = next(reader) #read first row & skip first row (header)
header.pop(0) #removing [0,0] first row first column for column wise sum heading
num_of_cols = len(header) #counting #columns
sum_col=[0,0,0] #a list for columnwise sum
j,temp=0,0
for row in reader:
sum_row,i = 0,0
print(row[0])
for i in range(1,len(row)):
sum_row+=int(row[i])
sum_col[i-1]=int(sum_col[i-1])+int(row[i])
print(sum_row)
print(header)
print(sum_col)`

Write variable output to a specific column in a CSV?

I'm working on a Python script that scrapes data from an Excel doc, then writes the output to a .csv.
I was able to grab the data and get it to write to the .csv, but all of the data goes into the first column.
I need the bar data to go into the 4th and the foo to go into the 5th column, so I tried to use csv.reader to select the row, and this runs without error but doesn't actually write to the .csv file.
Here's my code:
import xlrd
import csv
###Grab the data
def get_row_values(workSheet, row):
to_return = []
num_cells = myWorksheet.ncols - 1
curr_cell = -1
while curr_cell < num_cells:
curr_cell += 1
cell_value = myWorksheet.cell_value(row, curr_cell)
to_return.append(cell_value)
return to_return
file_path = 'map_test.xlsx'
output = []
output_bar = []
output_foo = []
myWorkbook = xlrd.open_workbook(file_path)
myWorksheet = myWorkbook.sheet_by_name('Sheet1')
num_rows = myWorksheet.nrows - 1
curr_row = 0
column_names = get_row_values(myWorksheet, curr_row)
print len(column_names)
while curr_row < num_rows:
curr_row += 1
row = myWorksheet.row(curr_row)
this_row = get_row_values(myWorksheet, curr_row)
x = 0
while x <len(this_row):
if this_row[x] == 'x':
output.append([this_row[0], column_names[x]])
output_bar.append([column_names[x]])
output_foo.append([this_row[0]])
print output
myData = [["number", "name", "version", "bar",
"foo"]]
##### Next section is the code in question, it
####doesn't error out, but won't write to the .csv######
myFile = open("test123.csv", "w")
writer = csv.writer(myFile)
with open('test123.csv', 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
row[5] = myFile.readline()
writer.writerows(output_foo)
row[4] = myFile.readline()
writer.writerows(outpu_bar)
#####This successfully writes to the csv, but
#####all data to first column#####
# myFile = open('test123.csv', 'w')
# with myFile:
# writer = csv.writer(myFile)
# writer.writerows(myData)
# #writer.writerows(output)
# writer.writerows(output_foo)
# writer.writerows(output_bar)
x += 1
print ("CSV Written")

Writing columns to a CSV file

I would like to update a column called Score for a specific row in a csv file. When a button is pressed, I would like the code to search the csv file until the row with the specified name is found (which is stored in variable name and randomly pulled from the csv file in a previous function called NameGenerator()), and update the relevant cell in the Score column to increment by 1.
Please note I am using an excel file saved as a .csv for this.
Any ideas how to do this? The code below does not work. Any help would be appreciated.
def Correct():
writer = csv.writer(namelist_file)
score=0
for row in writer:
if row[0] == name:
score=score+1
writer.writerow([col[1]] = score)
![The CSV file looks as follows
]1
So for example if the name tom is selected (elsewhere in the code, however stored in variable name), his score of 3 should be incremented by 1, turning into 4.
Here is what the function which pulls a random name from the csv file looks like:
def NameGenerator():
namelist_file = open('StudentNames&Questions.csv')
reader = csv.reader(namelist_file)
rownum=0
global array
array=[]
for row in reader:
if row[0] != '':
array.append(row[0])
rownum=rownum+1
length = len(array)-1
i = random.randint(1,length)
name = array[i]
return name
Can you please check if this works :
import sys
import random,csv
def update(cells):
d=""
for cell in cells:
d=d + str(cell)+","
return d[:-1]
def update_score(name):
with open('StudentNames&Questions.csv', 'r') as file:
data = file.readlines()
name_index = - 1
score_index = -1
headers = data[0]
for index,header in enumerate(headers.split(",")):
if header.strip() == 'Names':
name_index=index
if header.strip() == 'Score':
score_index=index
if name_index == -1 or score_index == -1:
print "Headers not found"
sys.exit()
for index,row in enumerate(data):
cells = row.split(",")
if cells[name_index] == name:
cells[score_index] = int(cells[score_index]) + 1
data[index]=update(cells)
with open('/Users/kgautam/tmp/tempfile-47', 'w') as file:
file.writelines(data)
def NameGenerator():
namelist_file = open('StudentNames&Questions.csv')
reader = csv.reader(namelist_file)
rownum=0
global array
array=[]
for row in reader:
if row[0] != '':
array.append(row[0])
rownum=rownum+1
length = len(array)-1
i = random.randint(1,length)
name = array[i]
return name
randome_name=NameGenerator()
update_score(randome_name)

Search for string in CSV Files using python and write the results

#!/usr/bin/python
import csv
import re
string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')
#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "
#def does_match(string):
# stringl = string.lower()
# return any(s in stringl for s in searched)
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
for row in reader:
found = False
for col in row:
if col in [string_1, string_2, string_3] and not found:
writer.writerow(row)
found = True
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
inFile.close()
outFile.close()
I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.
Sample Data File
LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300
I'm trying to figure out how to search a CSV file for 3 items. If
those items exist print the row. Ideally I would like only Columns 1
and 3 to print to a new file.
Try this:
import csv
search_for = ['OneTouch AT','LinkRunner AT','AirCheck']
with open('in.csv') as inf, open('out.csv','w') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
if row[0] in search_for:
print('Found: {}'.format(row))
writer.writerow(row)
#!/usr/bin/python
import csv
import numpy as np
class search_csv(object):
def __init__(self, infile, outfile):
infile = open(infile, 'rb')
read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
self.non_numpy_data = read_infile
self.data = np.array(read_infile, dtype=None)
self.outfile = open(outfile, 'wb')
self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
def write_to(self, matched_values):
self.writer_.writerows(matched_values)
print ' Matched Values Written '
return True
def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
find_these = np.array(items, dtype=None)
matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
matching_data = self.data[matching_y][:,return_cols]
self.write_to(matching_data)
self.outfile.close()
return True
def non_numpy_search(self, items, return_cols=[0,2]):
lst = []
for i in self.non_numpy_data:
for ii in items:
if ii in i:
z = []
for idx in return_cols:
z.append(i[idx])
lst.append(z)
break
self.write_to(lst)
return True
### now use the class ###
SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'
non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)
By the phrasing of your question I'm assuming you just want to complete the task at hand and don't really care how. So copy and paste this in and use your data file as the 'IN_FILE' value and the file name you want to write to as the 'OUT_FILE' value. Place the values you want to search for in the 'SEARCHING_FOR' list as you're done.
Things to note....
SEARCHING_FOR should be a list.
the values in SEARCHING_FOR are matched EXACTLY so 'A' will not match 'a'. If you want a to use a regex or something more complex let me know.
In function 'non_numpy_search' there is a 'return_cols' parameter. It defaults to the first and 3rd column.
If you don't have numpy let me know.
#!/usr/bin/python
import csv
import re
import sys
import gdata.docs.service
#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
def does_match(string):
stringl = string.lower()
return any(s in stringl for s in searched)
#Opens Input file for read and output file to write.
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
#for row in reader:
# found = False
# for col in row:
# if col in [string_1, string_2, string_3] and not found:
# writer.writerow(row)
# found = True
"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
one stores a triple (1, 12, row)
When the triples are sorted later, all rows in group 0 will come first, then
all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
# Closing Input and Output files.
inFile.close()
outFile.close()

Categories

Resources