Parse CSV file with and aggregate values, multiple columns - python

I would like to adapt the post here (Parse CSV file and aggregate the values) to sum multiple columns instead of just one.
So for these data:
CITY,AMOUNT,AMOUNT2,AMOUNTn
London,20,21,22
Tokyo,45,46,47
London,55,56,57
New York,25,26,27
How can I get this:
CITY,AMOUNT,AMOUNT2,AMOUNTn
London,75,77,79
Tokyo,45,46,47
New York,25,26,27
I will have several thousand columns eventually, and unfortunately I can not use the pandas package for this task. Here is the code I have just aggregates all three AMOUNT cols into one, which is not what I am after
from __future__ import division
import csv
from collections import defaultdict
def default_factory():
return [0, None, None, 0]
reader = csv.DictReader(open('test_in.txt'))
cities = defaultdict(default_factory)
for row in reader:
headers = [r for r in row.keys()]
headers.remove('CITY')
for i in headers:
amount = int(row[i])
cities[row["CITY"]][0] += amount
max = cities[row["CITY"]][1]
cities[row["CITY"]][1] = amount if max is None else amount if amount > max else max
min = cities[row["CITY"]][2]
cities[row["CITY"]][2] = amount if min is None else amount if amount < min else min
cities[row["CITY"]][3] += 1
for city in cities:
cities[city][3] = cities[city][0]/cities[city][3] # calculate mean
with open('test_out.txt', 'wb') as myfile:
writer = csv.writer(myfile, delimiter="\t")
writer.writerow(["CITY", "AMOUNT", "AMOUNT2", "AMOUNTn ,"max", "min", "mean"])
writer.writerows([city] + cities[city] for city in cities)
Thank you for any help

Here is one way using itertools.groupby.
import StringIO
import csv
import itertools
data = """CITY,AMOUNT,AMOUNT2,AMOUNTn
London,20,21,22
Tokyo,45,46,47
London,55,56,57
New York,25,26,27"""
# I use StringIO to create a file like object for demo purposes
f = StringIO.StringIO(data)
fieldnames = f.readline().strip().split(',')
key = lambda x: x[0] # the first column will be a grouping key
# rows must be sorted by city before passing to itertools.groupby
rows_sorted = sorted(csv.reader(f), key=key)
outfile = StringIO.StringIO('')
writer = csv.DictWriter(outfile, fieldnames=fieldnames, lineterminator='\n')
writer.writeheader()
for city, rows in itertools.groupby(rows_sorted, key=key):
# remove city column for aggregation, convert to ints
rows = [[int(x) for x in row[1:]] for row in rows]
agg = [sum(column) for column in zip(*rows)]
writer.writerow(dict(zip(fieldnames, [city] + agg)))
print outfile.getvalue()
# CITY,AMOUNT,AMOUNT2,AMOUNTn
# London,75,77,79
# New York,25,26,27
# Tokyo,45,46,47

Here is how I would do it.
import csv
from StringIO import StringIO
data = '''CITY,AMOUNT,AMOUNT2,AMOUNTn
London,20,21,22
Tokyo,45,46,47
London,55,56,57,99
New York,25,26,27'''
file_ = StringIO(data)
reader = csv.reader(file_)
headers = next(reader)
rows = {}
def add(col1, col2):
l = len(col1)
for i, n in enumerate(col2):
if i >= l:
col1.extend(col2[i:])
break
col1[i] += n
return col1
for row in reader:
key = row[0]
nums = map(int, row[1:])
if key in rows:
rows[key] = add(rows[key], nums)
else:
rows[key] = map(int, nums)

Related

How do i write csv basis on comparing two csv file[column based]

I have two csv files:
csv1
csv2
(*note headers can be differ)
csv1 has 1 single column an csv2 has 5 columns
now column 1 of csv1 has some matching values in column2 of csv2
my concern is how can i write a csv where column1 of csv1 does not have a MATCHING VALUES to column2 of csv2
I have attached three files csv1, csv2 and expected output..
Expected Output:
ProfileID,id,name,class ,rollnumber
1,lkha,prince,sfasd,DAS
2,hgfhfk,kabir,AD,AD
5,jlh,antriskh,ASDA,AD
CSV 1:
id,name
10927,prince
109582,kabir
f546416,rahul
g44674,saini
r7341,antriskh
CSV 2:
ProfileID,id,name,class ,rollnumber
1,lkha,prince,sfasd,DAS
2,hgfhfk,kabir,AD,AD
3,f546416,rahul,AD,FF
44,g44674,saini,DD,FF
5,jlh,antriskh,ASDA,AD
I tried using converting them into dictionary and match them csv1 keys to csv2 values but it is not working as expected
def read_csv1(filename):
prj_structure = {}
f = open(filename, "r")
data = f.read()
f.close()
lst = data.split("\n")
prj = ""
for i in range(0, len(lst)):
val = lst[i].split(",")
if len(val)>0:
prj = val[0]
if prj!="":
if prj not in prj_structure.keys():
prj_structure[prj] = []
prj_structure[prj].append([val[1], val[2], val[3], val[4])
return prj_structure
def read_csv2(filename):
prj_structure = {}
f = open(filename, "r")
data = f.read()
f.close()
lst = data.split("\n")
prj = ""
for i in range(0, len(lst)):
val = lst[i].split(",")
if len(val)>0:
prj = val[0]
if prj!="":
if prj not in prj_structure.keys():
prj_structure[prj] = []
prj_structure[prj].append([val[0])
return prj_structure
csv1_data = read_csv1("csv1.csv")
csv2_data = read_csv2("csv2.csv")
for k, v in csv1_data.items():
for ks, vs in csv2_data.items():
if k==vs[0][0]:
#here it is not working
sublist = []
sublist.append(k)
Use the DictReader from the csv package.
import csv
f1 = open('csv1.csv')
csv_1 = csv.DictReader(f1)
f2 = open('csv2.csv')
csv_2 = csv.DictReader(f2)
first_dict = {}
for row in csv_1:
first_dict[row['name']]=row
f1.close()
f_out = open('output.csv','w')
csv_out = csv.DictWriter(f_out,csv_2.fieldnames)
csv_out.writeheader()
for second_row in csv_2:
if second_row['name'] in first_dict:
first_row = first_dict[second_row['name']]
if first_row['id']!=second_row['id']:
csv_out.writerow(second_row)
f2.close()
f_out.close()
If you have the option, I have always found pandas as a great tool to import and manipulate CSV files.
import pandas as pd
# Read in both the CSV files
df_1 = pd.DataFrame(pd.read_csv('csv1.csv'))
df_2 = pd.DataFrame(pd.read_csv('csv2.csv'))
# Iterate over both DataFrames and if any id's from in df_2 match
# df_1, remove them from df_2
for num1, row1 in df_1.iterrows():
for num2, row2 in df_2.iterrows():
if row1['id'] == row2['id']:
df_2.drop(num2, inplace=True)
df_2.head()
For any kind of csv processing, using the builtin csv module makes most of the error prone processing trivial. Given your example values, the following code should produce the desired results. I use comprehensions to do the filtering.
import csv
import io
# example data, as StringIO that will behave like file objects
raw_csv_1 = io.StringIO('''\
id,name
10927,prince
109582,kabir
f546416,rahul
g44674,saini
r7341,antriskh''')
raw_csv_2 = io.StringIO('''\
ProfileID,id,name,class,rollnumber
1,lkha,prince,sfasd,DAS
2,hgfhfk,kabir,AD,AD
3,f546416,rahul,AD,FF
44,g44674,saini,DD,FF
5,jlh,antriskh,ASDA,AD''')
# in your actual data, you would use actual file objects instead, like
# with open('location/of/your/csv_1') as file_1:
# raw_csv_1 = file_1.read()
# with open('location/of/your/csv_2') as file_2:
# raw_csv_2 = file_2.read()
Then we need to transform then into csv.reader objects:
csv_1 = csv.reader(raw_csv_1)
next(csv_1) # consume once to skip the header
csv_2 = csv.reader(raw_csv_2)
header = next(csv_2) # consume once to skip the header, but store it
Last but not least, collect the names of the first csv in a set to use them as lookup table, filter the second csv with it, and write it back as 'result.csv' into your file system.
skip_keys = {id_ for id_, name in vals_1}
result = [row for row in vals_2 if row[1] not in skip_keys]
# at this point, result contains
# [['1', 'lkha', 'prince', 'sfasd', 'DAS'],
# ['2', 'hgfhfk', 'kabir', 'AD', 'AD'],
# ['5', 'jlh', 'antriskh', 'ASDA', 'AD']]
with open('result.csv', 'w') as result_file:
csv.writer(result_file).writerows(header+result)

Split a row into multiple cells and keep the maximum value of second value for each gene

I am new to Python and I prepared a script that will modify the following csv file
accordingly:
1) Each row that contains multiple Gene entries separated by the /// such as:
C16orf52 /// LOC102725138 1.00551
should be transformed to:
C16orf52 1.00551
LOC102725138 1.00551
2) The same gene may have different ratio values
AASDHPPT 0.860705
AASDHPPT 0.983691
and we want to keep only the pair with the highest ratio value (delete the pair AASDHPPT 0.860705)
Here is the script I wrote but it does not assign the correct ratio values to the genes:
import csv
import pandas as pd
with open('2column.csv','rb') as f:
reader = csv.reader(f)
a = list(reader)
gene = []
ratio = []
for t in range(len(a)):
if '///' in a[t][0]:
s = a[t][0].split('///')
gene.append(s[0])
gene.append(s[1])
ratio.append(a[t][1])
ratio.append(a[t][1])
else:
gene.append(a[t][0])
ratio.append(a[t][1])
gene[t] = gene[t].strip()
newgene = []
newratio = []
for i in range(len(gene)):
g = gene[i]
r = ratio[i]
if g not in newgene:
newgene.append(g)
for j in range(i+1,len(gene)):
if g==gene[j]:
if ratio[j]>r:
r = ratio[j]
newratio.append(r)
for i in range(len(newgene)):
print newgene[i] + '\t' + newratio[i]
if len(newgene) > len(set(newgene)):
print 'missionfailed'
Thank you very much for any help or suggestion.
Try this:
with open('2column.csv') as f:
lines = f.read().splitlines()
new_lines = {}
for line in lines:
cols = line.split(',')
for part in cols[0].split('///'):
part = part.strip()
if not part in new_lines:
new_lines[part] = cols[1]
else:
if float(cols[1]) > float(new_lines[part]):
new_lines[part] = cols[1]
import csv
with open('clean_2column.csv', 'wb') as csvfile:
writer = csv.writer(csvfile, delimiter=' ',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for k, v in new_lines.items():
writer.writerow([k, v])
First of all, if you're importing Pandas, know that you have I/O Tools to read CSV files.
So first, let's import it that way :
df = pd.read_csv('2column.csv')
Then, you can extract the indexes where you have your '///' pattern:
l = list(df[df['Gene Symbol'].str.contains('///')].index)
Then, you can create your new rows :
for i in l :
for sub in df['Gene Symbol'][i].split('///') :
df=df.append(pd.DataFrame([[sub, df['Ratio(ifna vs. ctrl)'][i]]], columns = df.columns))
Then, drop the old ones :
df=df.drop(df.index[l])
Then, I'll do a little trick to remove your lowest duplicate values. First, I'll sort them by 'Ratio (ifna vs. ctrl)' then I'll drop all the duplicates but the first one :
df = df.sort('Ratio(ifna vs. ctrl)', ascending=False).drop_duplicates('Gene Symbol', keep='first')
If you want to keep your sorting by Gene Symbol and reset indexes to have simpler ones, simply do :
df = df.sort('Gene Symbol').reset_index(drop=True)
If you want to re-export your modified data to your csv, do :
df.to_csv('2column.csv')
EDIT : I edited my answer to correct syntax errors, I've tested this solution with your csv and it worked perfectly :)
This should work.
It uses the dictionary suggestion of Peter.
import csv
with open('2column.csv','r') as f:
reader = csv.reader(f)
original_file = list(reader)
# gets rid of the header
original_file = original_file[1:]
# create an empty dictionary
genes_ratio = {}
# loop over every row in the original file
for row in original_file:
gene_name = row[0]
gene_ratio = row[1]
# check if /// is in the string if so split the string
if '///' in gene_name:
gene_names = gene_name.split('///')
# loop over all the resulting compontents
for gene in gene_names:
# check if the component is in the dictionary
# if not in dictionary set value to gene_ratio
if gene not in genes_ratio:
genes_ratio[gene] = gene_ratio
# if in dictionary compare value in dictionary to gene_ratio
# if dictionary value is smaller overwrite value
elif genes_ratio[gene] < gene_ratio:
genes_ratio[gene] = gene_ratio
else:
if gene_name not in genes_ratio:
genes_ratio[gene_name] = gene_ratio
elif genes_ratio[gene_name] < gene_ratio:
genes_ratio[gene_name] = gene_ratio
#loop over dictionary and print gene names and their ratio values
for key in genes_ratio:
print key, genes_ratio[key]

Python: reading the same rows from a csv file - logic

I have a problem with appending data for missing rows in the csv file: I am reading rows from a csv file for each customer and appending lists with the data the rows have. Each customer needs to have the same id's that are highlighted in green in the example image. If the next customer doesn't have the rows with all needed id's, I still need to append 0 values to the lists for these missing rows. So the customer highlighted in yellow needs to have same number of values appended to the data lists as the one in green.
I am trying to read each row and compare its id with the list of all possible id's that I created, but I am always stuck on the first id and not sure if this is the right way to go and read the previous row again until it's id is equal to the id from the list for possible id's (I do this to add the missing row's data to the list). Please let me know if you have any suggestions?
Note: if take into consideration only the column with id's, for these two customers I would like the list to look like this: list_with_ids = [410, 409, 408, 407, 406, 405, 403, 402, **410, 409, 408, 407, 406, 405, 403, 402**]. So I am looking for a way - once I am on row 409 in yellow - to first append the first needed id 410, and only then 409 and so forth. And same - append the two missing ids at the end: 403, 402.
Code:
def write_data(workbook):
[...]
# Lists.
list_cust = []
list_quantity = [] # from Some_data columns
# Get the start row in the csv file.
for row in range(worksheet.nrows):
base_id = str(410)
value = worksheet.cell(row, 1).value
start = str(value)
if base_id [0] == start[0]:
num_of_row_for_id = row
# Append the first id.
first_cust = str(worksheet.cell(num_of_row_for_id, 0).value)
list_cust.append(first_cust)
# Needed to count id's.
count = 0
# List with all needed id's for each customer.
# instead of ... - all ids' in green from the image.
all_ids = [....]
# Get data.
for row in range(worksheet.nrows):
next_id = str(worksheet.cell(num_of_row_for_id, 1).value)
cust = str(worksheet.cell(num_of_row_for_id, 0).value)
# Append id to the list.
list_cust.append(cust)
# Needed to separate rows for each customer.
if list_cust[len(list_cust)-1] == list_cust[len(list_cust)-2]:
# Get data: I read columns to get data.
# Let's say I read col 4 to 21.
for col_num in range(3, 20):
# Here is the prolem: ############################
if next_id != all_ids[count]:
list_quantity.append(0)
if next_id == all_ids[count]:
qty = worksheet.cell(num_of_row_for_id, col_num).value
list_quantity.append(qty)
# Get the next row in reverse order.
num_of_row_for_id -= 1
# Increment count for id's index.
if list_cust[len(list_cust)-1] == list_cust[len(list_cust)-2]:
# 8 possible id's.
if count < 7:
count += 1
else:
count = 0
Consider the following data wrangling with list comprehensions and loops using following data input containing random data columns:
Input Data
# Cust ID Data1 Data2 Data3 Data4 Data5
# 2011 62,404 0.269101238 KPT 0.438881697 UAX 0.963170513
# 2011 62,405 0.142397746 XYD 0.51668728 PTQ 0.761695425
# 2011 62,406 0.782342616 QCN 0.259141256 FNX 0.870971924
# 2011 62,407 0.221750017 EIU 0.358439487 MAN 0.13633062
# 2011 62,408 0.097509568 CRU 0.410058705 BFK 0.680228327
# 2011 62,409 0.322871333 LAC 0.489425167 GUX 0.449476844
# 919 62,403 0.371461633 PUR 0.626146074 KWX 0.525711736
# 919 62,404 0.384859932 AJZ 0.223408599 JSU 0.914916663
# 919 62,405 0.020630503 SFY 0.260778598 VUU 0.213559498
# 919 62,406 0.952425138 EBI 0.59595738 ZYU 0.283794413
# 919 62,407 0.410368534 BTT 0.252698401 FFY 0.41080646
# 919 62,408 0.553390336 GMA 0.846309022 BIN 0.049852419
# 919 62,409 0.193437955 NBB 0.877311494 XQX 0.080656637
Python code
import csv
i = 0
data = []
# READ CSV AND CAPTURE HEADERS AND DATA
with open('Input.csv', 'r') as f:
rdr = csv.reader(f)
for line in rdr:
if i == 0:
headers = line
else:
line[1] = int(line[1].replace(',',''))
data.append(line)
i += 1
# CREATE NEEDED LISTS
cust_list = list(set([i[0] for i in data]))
id_list = [62402,62403,62404,62405,62406,62407,62408,62409,62410]
# CAPTURE MISSING IDS BY CUSTOMER
for c in cust_list:
currlist = [d[1] for d in data if d[0] == c]
missingids = [i for i in id_list if i not in currlist]
for m in missingids:
data.append([c, m,'','','','',''])
# WRITE DATA TO NEW CSV IN SORTED ORDER
with open('Output.csv', 'w') as f:
wtr = csv.writer(f, lineterminator='\n')
wtr.writerow(headers)
for c in cust_list:
for i in sorted(id_list, reverse=True):
for d in data:
if d[0] == c and d[1] == i:
wtr.writerow(d)
Output Data
Consider even Python third-party modules such as pandas, the data analysis package; and even an SQL solution using pyodbc since Windows' built-in Jet/ACE SQL Engine can query CSV files directly.
You will notice below and previous solution, quite a bit of handling is needed to remove the thousand comma separators in the ID column as modules consider them as string first. If you remove such commas from original csv file, you can reduce lines of code.
Pandas (with left merge on two dataframes)
import pandas as pd
df = pd.read_csv('Input.csv')
cust_list = df['Cust'].unique()
id_list = [62402,62403,62404,62405,62406,62407,62408,62409,62410]
ids = pd.DataFrame({'Cust': [int(c) for i in id_list for c in cust_list],
'ID': [int(i) for i in id_list for c in cust_list]})
df['ID'] = df['ID'].str.replace(',','').astype(int)
df = ids.merge(df, on=['Cust', 'ID'], how='left').\
sort_values(['Cust', 'ID'], ascending=[True, False])
df.to_csv('Output_pandas.csv', index=False)
PyODBC (works only for Windows machines using left join on two csv files)
import pyodbc
conn = pyodbc.connect(r'Driver=Microsoft Access Text Driver (*.txt, *.csv);' + \
'DBQ=C:\Path\To\CSV\Files;Extensions=asc,csv,tab,txt;',
autocommit=True)
cur = conn.cursor()
cust_list = [i[0] for i in cur.execute("SELECT DISTINCT c.Cust FROM Input.csv c")]
id_list = [62402,62403,62404,62405,62406,62407,62408,62409,62410]
cur.close()
with open('ID_list.csv', 'w') as f:
wtr = csv.writer(f, lineterminator='\n')
wtr.writerow(['Cust', 'ID'])
for item in [[int(c),int(i)] for c in cust_list for i in id_list]:
wtr.writerow(item)
i = 0
with open('Input.csv', 'r') as f1, open('Input_without_commas.csv', 'w') as f2:
rdr = csv.reader(f1); wtr = csv.writer(f2, lineterminator='\n')
for line in rdr:
if i > 0:
line[1] = int(line[1].replace(',',''))
wtr.writerow(line)
i += 1
strSQL = "SELECT i.Cust, i.ID, c.Data1, c.Data2, c.Data3, c.Data4, c.Data5 " +\
" FROM ID_list.csv i" +\
" LEFT JOIN Input_without_commas.csv c" +\
" ON i.Cust = c.Cust AND i.ID = c.ID" +\
" ORDER BY i.Cust, i.ID DESC"
cur = conn.cursor()
with open('Output_sql.csv', 'w') as f:
wtr = csv.writer(f, lineterminator='\n')
wtr.writerow(['Cust', 'ID', 'Data1', 'Data2', 'Data3', 'Data4', 'Data5'])
for i in cur.execute(strSQL):
wtr.writerow(i)
cur.close()
conn.close()
Output (for both above solutions)

howto write column 0 and column 2 header from original CSV file?

I was wondering if someone could tell me how to write out the header for columns 0 and 3 from the original CSV file to the new CSV file? I'm also curious if anyone has any expereince with pushing to google docs?
**
#!/usr/bin/python
import csv
import re
import sys
import gdata.docs.service
email = "myemail#gmail.com"
password = "password"
#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
#def does_match(string):
# stringl = string.lower()
# return any(s in stringl for s in searched)
#Opens Input file for read and output file to write.
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
# Read header
header = reader.next()
#for row in reader:
# found = False
# for col in row:
# if col in [string_1, string_2, string_3] and not found:
# writer.writerow(row)
# found = True
#writer.writerow(header(0,2))
"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
one stores a triple (1, 12, row)
When the triples are sorted later, all rows in group 0 will come first, then
all rows in group 1, etc.
"""
stored = []
writer.writerow(row[header] for header in (0,2))
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 3
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
# Closing Input and Output files.
inFile.close()
outFile.close()
**
I think what you're looking for is this:
writer.writerow([header[0], header[2]])
You could also use either of the two more complicated mechanisms you use later in the same script:
writer.writerow(header[i] for i in (0,2))
writer.writerow(tuple(header[k] for k in (0,2)))
… but there's really no good reason to. In fact, you'd be better off changing those lines to do things the simple way. Also, you'd be better off not trying to re-use the variable header as a loop index variable… So:
for g, i, row in stored:
writer.writerow([row[0], row[2]])

Parse CSV file and aggregate the values

I'd like to parse a CSV file and aggregate the values. The city row has repeating values (sample):
CITY,AMOUNT
London,20
Tokyo,45
London,55
New York,25
After parsing the result should be something like:
CITY, AMOUNT
London,75
Tokyo,45
New York,25
I've written the following code to extract the unique city names:
def main():
contrib_data = list(csv.DictReader(open('contributions.csv','rU')))
combined = []
for row in contrib_data:
if row['OFFICE'] not in combined:
combined.append(row['OFFICE'])
How do I then aggregate values?
Tested in Python 3.2.2:
import csv
from collections import defaultdict
reader = csv.DictReader(open('test.csv', newline=''))
cities = defaultdict(int)
for row in reader:
cities[row["CITY"]] += int(row["AMOUNT"])
writer = csv.writer(open('out.csv', 'w', newline = ''))
writer.writerow(["CITY", "AMOUNT"])
writer.writerows([city, cities[city]] for city in cities)
Result:
CITY,AMOUNT
New York,25
London,75
Tokyo,45
As for your added requirements:
import csv
from collections import defaultdict
def default_factory():
return [0, None, None, 0]
reader = csv.DictReader(open('test.csv', newline=''))
cities = defaultdict(default_factory)
for row in reader:
amount = int(row["AMOUNT"])
cities[row["CITY"]][0] += amount
max = cities[row["CITY"]][1]
cities[row["CITY"]][1] = amount if max is None else amount if amount > max else max
min = cities[row["CITY"]][2]
cities[row["CITY"]][2] = amount if min is None else amount if amount < min else min
cities[row["CITY"]][3] += 1
for city in cities:
cities[city][3] = cities[city][0]/cities[city][3] # calculate mean
writer = csv.writer(open('out.csv', 'w', newline = ''))
writer.writerow(["CITY", "AMOUNT", "max", "min", "mean"])
writer.writerows([city] + cities[city] for city in cities)
This gives you
CITY,AMOUNT,max,min,mean
New York,25,25,25,25.0
London,75,55,20,37.5
Tokyo,45,45,45,45.0
Note that under Python 2, you'll need the additional line from __future__ import division at the top to get correct results.
Using a dict with the value as the AMOUNT might do the trick. Something like the following-
Suppose you read one line at a time and city indicates the current city and amount indicates the current amount -
main_dict = {}
---for loop here---
if city in main_dict:
main_dict[city] = main_dict[city] + amount
else:
main_dict[city] = amount
---end for loop---
At the end of the loop you will have aggregate values in main_dict.

Categories

Resources