change order of columns in csv (python)

change order of columns in csv (python) - python

I made a script, which reads a given input-file (csv), manipulates the data somehow and writes an output-file (csv).
In my case, my given input-file looks like this:
| sku | article_name |
| 1 | MyArticle |
For my output-file, I need to re-arrange these columns (there are plenty more, but I think i might be able to solve it, when someone shows me the way)
My output-file should look like this:
| article_name | another_column | sku |
| MyArticle | | 1 |
Note, that here is a new column, that isn't in the source csv-file, but it has to be printed anyway (the order is important as well)
This is what I have so far:
#!/usr/bin/env python
# -*- coding: latin_1 -*-
import csv
import argparse
import sys
header_mappings = {'attr_artikel_bezeichnung1': 'ARTICLE LABEL',
'sku': 'ARTICLE NUMBER',
'Article label locale': 'Article label locale',
'attr_purchaseprice': 'EK-Preis',
'attr_salesPrice': 'EuroNettoPreis',
'attr_salesunit': 'Einheit',
'attr_salesvatcode': 'MwSt.-Satz',
'attr_suppliercode': 'Lieferantennummer',
'attr_suppliersitemcode': 'Artikelnummer Lieferant',
'attr_isbatchitem': 'SNWarenausgang'}
row_mapping = {'Einheit': {'pc': 'St.'},
'MwSt.-Satz': {'3': '19'}}
def remap_header(header):
for h_map in header_mappings:
if h_map in header:
yield header_mappings.get(h_map), header.get(h_map)
def map_header(header):
for elem in header:
yield elem, header.index(elem)
def read_csv(filename):
with open(filename, 'rb') as incsv:
csv_reader = csv.reader(incsv, delimiter=';')
for r in csv_reader:
yield r
def add_header(header, fields=()):
for f in fields:
header.append(f)
return header
def duplicate(csv_row, header_name, fields):
csv_row[new_csv_header.index(fields)] = csv_row[new_csv_header.index(header_name)]
return csv_row
def do_new_row(csv_row):
for header_name in new_csv_header:
for r_map in row_mapping:
row_content = csv_row[mapped_header.get(r_map)]
if row_content in row_mapping.get(r_map):
csv_row[mapped_header.get(r_map)] = row_mapping.get(r_map).get(row_content)
try:
yield csv_row[mapped_header.get(header_name)]
except TypeError:
continue
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', metavar='CSV')
parser.add_argument('-o', '--outfile', metavar='CSV')
args = parser.parse_args()
arguments = vars(args)
if len(sys.argv[1:]) == 0:
parser.print_usage()
sys.exit(0)
# print arguments
# parse_csv(**arguments)
"""
"""
csv_reader_iter = read_csv(arguments.get('infile'))
# neuer csv header
new_csv_header = list()
csv_header = next(csv_reader_iter)
for h in csv_header:
if h in header_mappings:
new_csv_header.append(header_mappings.get(h))
# print new_csv_header
new_csv_header = add_header(new_csv_header, ('Article label locale', 'Nummer'))
mapped_header = dict(remap_header(dict(map_header(csv_header))))
# print mapped_header
with open(arguments.get('outfile'), 'wb') as outcsv:
csv_writer = csv.writer(outcsv, delimiter=';')
csv_writer.writerow(new_csv_header)
for row in csv_reader_iter:
row = list(do_new_row(row))
delta = len(new_csv_header) - len(row)
if delta > 0:
row = row + (delta * [''])
# duplicate(row, 'SNWarenausgang', 'SNWareneingang')
# duplicate(row, 'SNWarenausgang', 'SNWareneingang')
csv_writer.writerow(row)
print "Done."
"""
print new_csv_header
for row in csv_reader_iter:
row = list(do_new_row(row))
delta = len(new_csv_header) - len(row)
if delta > 0:
row = row + (delta * [''])
duplicate(row, 'Herstellernummer', 'Nummer')
duplicate(row, 'SNWarenausgang', 'SNWareneingang')
print row
"""
Right now, even though it says "ARTICLE LABEL" first, the sku is printed first. My guess: This is due the order of the csv-file, since sku is the first field there... right?

If you use the DictWriter from the csv lib you can specify the order of the columns. Use DictReader to read in rows from your file as dicts. Then you just explicitly specify the order of the keys when you create your DictWriter.
https://docs.python.org/2/library/csv.html#csv.DictReader

As riotburn already suggested, you can use a DictWriter and its fieldnames argument to adjust the order of columns in the new file.
Reordering a file could look like this:
def read_csv (filename):
with open(filename) as incsv:
reader = csv.DictReader(incsv, delimiter=';')
for r in reader:
yield r
columns = ['article_name', 'another_column', 'sku']
with open('newfile.csv', 'w+') as f:
writer = csv.DictWriter(f, columns, delimiter=';')
writer.writeheader()
for row in read_csv('oldfile.csv'):
# add a property
row['another_column'] = 'foo'
# write row (using the order specified in columns)
writer.writerow(row)

Related

Summing values from duplicate keys in a CSV file without panda

I have a large dataset that looks like the following
party,cp,qualifier,amount
ABC,DEF,GOOGLE_2,100
ABC,DEF,GOOGLE_2,200
GHI,JKL,FACEBOOK_1,500
GHI,JKL,FACEBOOK_1,-600
I would like to output :
ABC,DEF,GOOGLE,300
GHI,JKL,FACEBOOK,-100
Here is my python code so far:
headers = ["valuation_date","party_group_name","type","party_name","cp_group_name","cp_name","qualifier","amount"]
data = {}
with open(t1file,'rb') as f:
reader = csv.reader(f)
headers = reader.next()
for row in reader:
party = row[headers.index('party')]
cp = row[headers.index('cp')]
qualifier = row[headers.index('qualifier')]
amount = row[headers.index('amount')]
if row[headers.index('type')] == "Equity":
new_qualifier = qualifier.split("_")[0]
if party in data.keys():
if cp in data.keys():
if new_qualifier in data.keys():
data[party][cp][new_qualifier] += float(amount)
else:
data[party][cp][qualifier][amount] = data[party][cp][new_qualifier][amount]
else:
data[cp] = cp
else:
data[party] = party
When I run the above code I get the following error:
data[party][cp][qualifier][amount] = data[party][cp][new_qualifier][amount]
TypeError: string indices must be integers, not str
Very rusty with python apologize if it's glaringly obivous but any insights as to what i'm doing wrong ?
Thanks !

you can use pandas.drop_duplicates to drop duplicates of multiple columns and combine it with pandas.groupby() & sum to get the desired result
>>>import pandas as pd
>>>#read file using pandas.read_csv()
>>>df
party cp qualifier amount
0 ABC DEF GOOGLE_2 100
1 ABC DEF GOOGLE_2 200
2 GHI JKL FACEBOOK_1 500
3 GHI JKL FACEBOOK_1 -600
>>>df['Total'] = df.groupby(['party','cp','qualifier'])['amount'].transform('sum')
>>>print(df.drop_duplicates(subset=['party','cp','qualifier'], keep='last'))
party cp qualifier amount Total
1 ABC DEF GOOGLE_2 200 300
3 GHI JKL FACEBOOK_1 -600 -100

Below
from collections import defaultdict
PARTY_IDX = 0
CP_IDX = 1
QUALIFIER_IDX = 2
AMOUNT_IDX = 3
data = defaultdict(int)
with open('del-me.csv') as f:
lines = [l.strip() for l in f.readlines()]
for idx, line in enumerate(lines):
if idx > 0:
fields = line.split(',')
party = fields[PARTY_IDX]
cp = fields[CP_IDX]
qualifier = fields[QUALIFIER_IDX]
qualifier = qualifier[:qualifier.find('_')]
key = ','.join([party, cp, qualifier])
amount = int(fields[AMOUNT_IDX])
data[key] += amount
with open('out.csv', 'w') as f:
for k, v in data.items():
f.write('{},{}\n'.format(k, v))
del-me.csv
party,cp,qualifier,amount
ABC,DEF,GOOGLE_2,100
ABC,DEF,GOOGLE_2,200
GHI,JKL,FACEBOOK_1,500
GHI,JKL,FACEBOOK_1,-600
out.csv
ABC,DEF,GOOGLE,300
GHI,JKL,FACEBOOK,-100

You have already enough answers, but let me correct your own code to help you derive the answer and understand the original issue:
import csv as csv
headers = ["valuation_date","party_group_name","party_name","cp_group_name","cp_name","qualifier","amount"]
data = {}
with open('test_data.csv','rt', encoding='utf-8') as f:
reader = csv.reader(f)
headers = next(reader)
for row in reader:
party = row[headers.index('party')]
cp = row[headers.index('cp')]
qualifier = row[headers.index('qualifier')]
amount = row[headers.index('amount')]
if row[headers.index('type')] == "Equity":
new_qualifier = qualifier.split("_")[0]
if party in data.keys():
cp_ = data[party]
if cp in cp_.keys():
qualifier_ = data[party][cp]
if new_qualifier in qualifier_.keys():
data[party][cp][new_qualifier] += float(amount)
else:
data[party][cp][qualifier][amount] = {}
else:
data[cp] = {}
else:
data[party] = {}
data[party][cp] = {}
data[party][cp][qualifier.split("_")[0]] = float(amount)
print(data)
This gives you
{'ABC': {'DEF': {'GOOGLE': 300.0}}, 'GHI': {'JKL': {'FACEBOOK': -100.0}}}
The problem was how you were populating your dictionary and how you were accessing it.

In order to simplify things, you might use just one key for the dict which is composed out of the identifying parts of a given line.
You might have to extract values by the header names like you already did. The following is based on the specified input. rsplit is used to split the string once at the end in order to use the party,cp,qualifier combination as a key and extract the amount.
def sumUp():
d = {}
with open(t1file,'rb') as f:
for line in f:
if 'party' in line:
continue # skip header
key, value = line.rsplit(',', 1) # split once at the end
d[key] = d[key] + int(value) if key in d else int(value)

You can do it like this:
from csv import DictReader, DictWriter
map_dic = dict()
with open('test1.csv', 'r') as fr:
csv_reader = DictReader(fr, delimiter=',')
for line in csv_reader:
key = '{}_{}_{}'.format(line['party'], line['cp'], line['qualifier'])
if key not in map_dic.keys():
map_dic[key] = {'party': line['party'], 'cp': line['cp'], 'qualifier': line['qualifier'], 'amount': int(line['amount'])}
else:
map_dic[key]['amount'] = map_dic[key]['amount'] + int(line['amount'])
with open('test2.csv', 'w') as csvfile:
writer = DictWriter(csvfile, fieldnames=['party', 'cp', 'qualifier', 'amount'])
writer.writeheader()
for key, data in map_dic.items():
writer.writerow(data)

How to read several rows from a csv

I have a csv file which contains among other things the names and the phone numbers. I'm only interested in a name only if I've its phone number.
with open(phone_numbers) as f:
reader = csv.DictReader(f)
names = [record['Name'] for record in reader if record['phone']]
But I also want the respective phone number, I've try this:
user_data = {}
with open(phone_numbers) as f:
reader = csv.DictReader(f)
user_data['Name'] = [record['Name'] for record in reader if record['phone']]
user_data['phone'] = [record['phone'] for record in reader if record['phone']]
But for the second item I got an empty string, I'm guessing that record is a generator and that's why I can iterate over it twice.
I've try to use tuples, but only had worked this way:
user_data = {}
with open(phone_numbers) as f:
reader = csv.DictReader(f)
user_data['Name'] = [(record['Name'],record['phone']) for record in reader if record['phone']]
In that case I have the two variables, phone and Name stored in user_data['Name'], that isn't what I want.
And if I try this:
user_data = {}
with open(phone_numbers) as f:
reader = csv.DictReader(f)
user_data['Name'],user_data['phone'] = [(record['Name'],record['phone']) for record in reader if record['phone']]
I got the following error:
ValueError: too many values to unpack
Edit:
This is a sample of the table:
+--------+---------------+
| Phone | Number |
+--------+---------------+
| Luis | 000 111 22222 |
+--------+---------------+
| Paul | 000 222 3333 |
+--------+---------------+
| Andrea | |
+--------+---------------+
| Jorge | 111 222 3333 |
+--------+---------------+
So all rows have a Name but not all have phones.

You can use dict to convert your list of tuple into dictionary. Also you need to use get if you have record without phone value.
import csv
user_data = {}
with open(phone_numbers) as f:
reader = csv.DictReader(f)
user_data = dict([(record['Name'], record['phone']) for record in reader if record.get('phone').strip())
If you want a list of names and phones separately you can use the * expression
with open(phone_numbers) as f:
reader = csv.DictReader(f)
names, phones = zip(*[(record['name'], record['value']) for record in reader if record.get('phone').strip()])

I think there is a much easier approach Because it is a csv file since there are column headings as you indicate then there is a value for phone in each row, it is either nothing or something - so this tests for nothing and if not nothing adds the name and phone to user_data
import csv
user_data = []
with open(f,'rb') as fh:
my_reader = csv.DictReader(fh)
for row in my_reader:
if row['phone'] != ''
user_details = dict()
user_details['Name'] = row['Name']
user_details['phone'] = row['phone']
user_data.append(user_details)
By using DictReader we are letting the magic happen so we don't have to worry about seek etc.
If I did not understand and you want a dictionary then easy enough
import csv
user_data = dict()
with open(f,'rb') as fh:
my_reader = csv.DictReader(fh)
for row in my_reader:
if row['phone'] != ''
user_data['Name'] = row['phone']

Your guess is quite right. If this is the approach you want take - iteration twice, you should use seek(0)
reader = csv.DictReader(f)
user_data['Name'] = [record['Name'] for record in reader if record['phone']]
f.seek(0) # role back to begin of file ...
reader = csv.DictReader(f)
user_data['phone'] = [record['phone'] for record in reader if record['phone']]
However, this is not very efficient and you should try and get your data in one roll. The following should do it in one roll:
user_data = {}
def extract_user(user_data, record):
if record['phone']:
name = record.pop('name')
user_data.update({name: record})
[extract_user(user_data, record) for record in reader]
Example:
In [20]: cat phones.csv
name,phone
hans,01768209213
grettel,
henzel,123457123
In [21]: f = open('phones.csv')
In [22]: reader = csv.DictReader(f)
In [24]: %paste
user_data = {}
def extract_user(user_data, record):
if record['phone']:
name = record.pop('name')
user_data.update({name: record})
[extract_user(user_data, record) for record in reader]
## -- End pasted text --
Out[24]: [None, None, None]
In [25]: user_data
Out[25]: {'hans': {'phone': '01768209213'}, 'henzel': {'phone': '123457123'}}

Is it possible that what you're looking for is throwing away some info in your data file?
In [26]: !cat data00.csv
Name,Phone,Address
goofey,,ade
mickey,1212,heaven
tip,3231,earth
In [27]: f = open('data00.csv')
In [28]: r = csv.DictReader(f)
In [29]: lod = [{'Name':rec['Name'], 'Phone':rec['Phone']} for rec in r if rec['Phone']]
In [30]: lod
Out[30]: [{'Name': 'mickey', 'Phone': '1212'}, {'Name': 'tip', 'Phone': '3231'}]
In [31]:
On the other hand, should your file contain ONLY Name and Phone columns, it's
just
In [31]: lod = [rec for rec in r if rec['Phone']]

I normally use row indexing:
input = open('mycsv.csv', 'r')
user_data = {}
for row in csv.reader(input):
if row[<row # containing phone>]:
name = row[<row # containing name>]
user_data[name] = row[<row # containing phone>]

You were correct the whole time, except for the unpacking.
result = [(record["name"], record["phone"]) for record in reader if record["phone"]]
# this gives [(name1, phone1), (name2,phone2),....]
You have to do [dostuff for name, phone in result] not name,phone = result, which does not make sense semantically and syntactically.

How to split code into smaller functions

I have an application that works. But in the interest of attempting to understand functions and python better. I am trying to split it out into various functions.
I"m stuck on the file_IO function. I'm sure the reason it does not work is because the main part of the application does not understand reader or writer. To better explain. Here is a full copy of the application.
Also I'm curious about using csv.DictReader and csv.DictWriter. Do either provide any advantages/disadvantages to the current code?
I suppose another way of doing this is via classes which honestly I would like to know how to do it that way as well.
#!/usr/bin/python
""" Description This script will take a csv file and parse it looking for specific criteria.
A new file is then created based of the original file name containing only the desired parsed criteria.
"""
import csv
import re
import sys
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
#Prompt for File Name
def file_IO():
print "Please Enter a File Name, (Without .csv extension): ",
base_Name = raw_input()
print "You entered: ",base_Name
in_Name = base_Name + ".csv"
out_Name = base_Name + ".parsed.csv"
print "Input File: ", in_Name
print "OutPut Files: ", out_Name
#Opens Input file for read and output file to write.
in_File = open(in_Name, "rU")
reader = csv.reader(in_File)
out_File = open(out_Name, "wb")
writer = csv.writer(out_File, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
return (reader, writer)
file_IO()
# Read header
header = reader.next()
stored = []
writer.writerow([header[0], header[3]])
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow([row[0], row[3]])
# Closing Input and Output files.
in_File.close()
out_File.close()

If I were you, I'd only separate find_group.
import csv
def find_group(row):
GROUPS = ['aircheck', 'linkrunner at', 'onetouch at']
for idx, group in enumerate(GROUPS):
if group in map(str.lower, row):
return idx
return -1
def get_filenames():
# this might be the only other thing you'd want to factor
# into a function, and frankly I don't really like getting
# user input this way anyway....
basename = raw_input("Enter a base filename (no extension): ")
infilename = basename + ".csv"
outfilename = basename + ".parsed.csv"
return infilename, outfilename
# notice that I don't open the files yet -- let main handle that
infilename, outfilename = get_filenames()
with open(infilename, 'rU') as inf, open(outfilename, 'wb') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter=',',
quotechar='"', quoting=csv.QUOTE_ALL)
header = next(reader)
writer.writerow([[header[0], header[3]])
stored = sorted([(find_group(row),idx,row) for idx,row in
enumerate(reader)) if find_group(row) >= 0])
for _, _, row in stored:
writer.writerow([row[0], row[3]])

Search for string in CSV Files using python and write the results

#!/usr/bin/python
import csv
import re
string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')
#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "
#def does_match(string):
# stringl = string.lower()
# return any(s in stringl for s in searched)
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
for row in reader:
found = False
for col in row:
if col in [string_1, string_2, string_3] and not found:
writer.writerow(row)
found = True
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
inFile.close()
outFile.close()
I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.
Sample Data File
LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300

I'm trying to figure out how to search a CSV file for 3 items. If
those items exist print the row. Ideally I would like only Columns 1
and 3 to print to a new file.
Try this:
import csv
search_for = ['OneTouch AT','LinkRunner AT','AirCheck']
with open('in.csv') as inf, open('out.csv','w') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
if row[0] in search_for:
print('Found: {}'.format(row))
writer.writerow(row)

#!/usr/bin/python
import csv
import numpy as np
class search_csv(object):
def __init__(self, infile, outfile):
infile = open(infile, 'rb')
read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
self.non_numpy_data = read_infile
self.data = np.array(read_infile, dtype=None)
self.outfile = open(outfile, 'wb')
self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
def write_to(self, matched_values):
self.writer_.writerows(matched_values)
print ' Matched Values Written '
return True
def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
find_these = np.array(items, dtype=None)
matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
matching_data = self.data[matching_y][:,return_cols]
self.write_to(matching_data)
self.outfile.close()
return True
def non_numpy_search(self, items, return_cols=[0,2]):
lst = []
for i in self.non_numpy_data:
for ii in items:
if ii in i:
z = []
for idx in return_cols:
z.append(i[idx])
lst.append(z)
break
self.write_to(lst)
return True
### now use the class ###
SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'
non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)
By the phrasing of your question I'm assuming you just want to complete the task at hand and don't really care how. So copy and paste this in and use your data file as the 'IN_FILE' value and the file name you want to write to as the 'OUT_FILE' value. Place the values you want to search for in the 'SEARCHING_FOR' list as you're done.
Things to note....
SEARCHING_FOR should be a list.
the values in SEARCHING_FOR are matched EXACTLY so 'A' will not match 'a'. If you want a to use a regex or something more complex let me know.
In function 'non_numpy_search' there is a 'return_cols' parameter. It defaults to the first and 3rd column.
If you don't have numpy let me know.

#!/usr/bin/python
import csv
import re
import sys
import gdata.docs.service
#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
def does_match(string):
stringl = string.lower()
return any(s in stringl for s in searched)
#Opens Input file for read and output file to write.
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
#for row in reader:
# found = False
# for col in row:
# if col in [string_1, string_2, string_3] and not found:
# writer.writerow(row)
# found = True
"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
one stores a triple (1, 12, row)
When the triples are sorted later, all rows in group 0 will come first, then
all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
# Closing Input and Output files.
inFile.close()
outFile.close()

Python: Replace one cell in a CSV file

I have a CSV file that has a single cell that I want to edit.
I can write a pretty simple function that, for instance, can look up an ID field in the file and return the row of the ID in question:
id = 3 #column number of the ID field
csvfile = open(os.path.join(LOCAL_FOLDER, "csvfile.csv"), "rU")
csvFile= csv.reader(csvfile, delimiter=",")
def lookup(ID):
rowNo = 1
for row in csvFile:
if row[id] == ID:
return rowNo
else:
rowNo += 1
return 0
What I want to do is to write a corresponding replace function that will take in an ID, a column variable and a data variable:
def replace(ID, col, data):
row = lookup(ID)
#use a CSV writer to replace the item at row, col with data
I have no idea how to do this, all of the examples I can find for how to use the writer only show you how to completely rewrite an entire .CSV file, which is not what I'm looking to do; I want an equivalent of a PUT rather than a POST.

fwiw, per inspectorG4dget's suggestion, I rewrote my code as follows:
LOCAL_FOLDER = os.getcwd()
CSV_FILE = "csvfile.csv"
def lookup(ID):
csvfile = open(os.path.join(LOCAL_FOLDER, CSV_FILE), "rU")
csvFile= csv.reader(csvfile, delimiter=",")
rowNo = 1
for row in csvFile:
if row[id] == ID:
csvfile.close()
return rowNo
else:
rowNo += 1
csvfile.close()
return 0
def replace(ID, col, data):
index = 1
row = lookup(ID)
if row == 0:
return 0
csvwritefile = open(os.path.join(LOCAL_FOLDER, "temp.csv"), "w")
csvWriteFile = csv.writer(csvwritefile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) #obviously change this if you want a diff quoting format
csvreadfile = open(os.path.join(LOCAL_FOLDER, CSV_FILE), "rU")
csvReadFile= csv.reader(csvreadfile, delimiter=",")
for readrow in csvReadFile:
if index == row:
temp = readrow
temp[col] = data
csvWriteFile.writerow(temp)
index += 1
else:
index += 1
csvWriteFile.writerow(readrow)
csvwritefile.close()
csvreadfile.close()
os.rename(os.path.join(LOCAL_FOLDER, "temp.csv"), os.path.join(LOCAL_FOLDER, CSV_FILE))
return 1

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

change order of columns in csv (python) - python

If you use the DictWriter from the csv lib you can specify the order of the columns. Use DictReader to read in rows from your file as dicts. Then you just explicitly specify the order of the keys when you create your DictWriter. https://docs.python.org/2/library/csv.html#csv.DictReader

Related

Summing values from duplicate keys in a CSV file without panda

How to read several rows from a csv

How to split code into smaller functions

Search for string in CSV Files using python and write the results

Python: Replace one cell in a CSV file

Categories

Resources