converting csv file to another csv choosing specific columns python - python

i am trying to convert a csv file with the following columns:
ID,Name,Postcode,State,Suburb,Lat,Lon
1,Hurstville Store,1493,NSW,Hurstville,-33.975869,151.088939
I want to make a new csv with only the Name, Lat, Lon columns but im getting this error:
header = csvReader.next()
AttributeError: '_csv.reader' object has no attribute 'next'
here is my code so far:
import csv
# Set up input and output variables for the script
storeLoc = open("store_locations.csv", "r")
# Set up CSV reader and process the header
csvReader = csv.reader(storeLoc)
header = csvReader.next()
nameIndex = header.index("Name")
latIndex = header.index("Lat")
lonIndex = header.index("Lon")
# Make an empty list
coordList = []
# Loop through the lines in the file and get each coordinate
for row in csvReader:
name = row[nameIndex]
lat = row[latIndex]
lon = row[lonIndex]
coordList.append([name,lat,lon])
# Print the coordinate list
print(coordList)
coordList.append([name,lat,lon])
stores = open('store_coords.csv','w', newline='')
thanks for any feedback

That code will work in Python 2, i.e csv.reader objects have a next() method. However, in Python 3 there is no such method.
Instead, and this works in both versions of Python, use next(reader):
import csv
# Set up input and output variables for the script
storeLoc = open("store_locations.csv", "r")
# Set up CSV reader and process the header
csvReader = csv.reader(storeLoc)
header = next(csvReader)
Here is a concise way of writing it using the csv module:
import csv
from operator import itemgetter
name_lat_lon = itemgetter(1, 5, 6)
with open('store_locations.csv') as infile, open('store_coords.csv', 'w') as outfile:
csv.writer(outfile).writerows(name_lat_lon(row) for row in csv.reader(infile))
More concise still:
import csv
with open('store_locations.csv') as infile, open('store_coords.csv', 'w') as outfile:
csv.writer(outfile).writerows((row[1], row[5], row[6]) for row in csv.reader(infile))
Or even more so if certain assumptions are made about the CSV delimiter:
with open('store_locations.csv') as infile, open('store_coords.csv', 'w') as outfile:
outfile.writelines(','.join((row[1], row[5], row[6])) for row in (line.split(',') for line in infile))

Related

How to add data to existing rows of a CSV file? [duplicate]

This question already has answers here:
How to add a string to each line in a file?
(3 answers)
Closed 9 months ago.
I have already an existing CSV file that I am accessing and I want to append the data to the first row, but it writes data at the end of the file.
What I am getting:
But I want the data to append like this:
Code I have done so far:
import CSV
with open('explanation.csv' , 'a', newline="") as file:
myFile = csv.writer(file)
myFile.writerow(["1"])
What you're actually wanting to do is replace data in an existing CSV file with new values, however in order to update a CSV file you must rewrite the whole thing.
One way to do that is by reading the whole thing into memory, updating the data, and then use it to overwrite the existing file. Alternatively you could process the file a row-at-a-time and store the results in a temporary file, then replace the original with the temporary file when finished updating them all.
The code to do the latter is shown below:
import csv
import os
from pathlib import Path
from tempfile import NamedTemporaryFile
filepath = Path('explanation.csv') # CSV file to update.
with open(filepath, 'r', newline='') as csv_file, \
NamedTemporaryFile('w', newline='', dir=filepath.parent, delete=False) as tmp_file:
reader = csv.reader(csv_file)
writer = csv.writer(tmp_file)
# Replace value in the first column of the first 5 rows.
for data_value in range(1, 6):
row = next(reader)
row[0] = data_value
writer.writerow(row)
writer.writerows(reader) # Copy remaining rows of original file.
# Replace original file with updated version.
os.replace(tmp_file.name, filepath)
print('CSV file updated')
You could read in the entire file, append your rows in memory, and then write the entire file:
def append(fname, data):
with open(fname) as f:
reader = csv.reader(f)
data = list(reader) + list(data)
with open(fname, 'w') as f:
writer = csv.writer(f)
writer.writerows(data)

Combine two rows into one in a csv file with Python

I am trying to combine multiple rows in a csv file together. I could easily do it in Excel but I want to do this for hundreds of files so I need it to be as a code. I have tried to store rows in arrays but it doesn't seem to work. I am using Python to do it.
So lets say I have a csv file;
1,2,3
4,5,6
7,8,9
All I want to do is to have a csv file as this;
1,2,3,4,5,6,7,8,9
The code I have tried is this;
fin = open("C:\\1.csv", 'r+')
fout = open("C:\\2.csv",'w')
for line in fin.xreadlines():
new = line.replace(',', ' ', 1)
fout.write (new)
fin.close()
fout.close()
Could you please help?
You should be using the csv module for this as splitting CSV manually on commas is very error-prone (single columns can contain strings with commas, but you would incorrectly end up splitting this into multiple columns). The CSV module uses lists of values to represent single rows.
import csv
def return_contents(file_name):
with open(file_name) as infile:
reader = csv.reader(infile)
return list(reader)
data1 = return_contents('csv1.csv')
data2 = return_contents('csv2.csv')
print(data1)
print(data2)
combined = []
for row in data1:
combined.extend(row)
for row in data2:
combined.extend(row)
with open('csv_out.csv', 'w', newline='') as outfile:
writer = csv.writer(outfile)
writer.writerow(combined)
That code gives you the basis of the approach but it would be ugly to extend this for hundreds of files. Instead, you probably want os.listdir to pull all the files in a single directory, one by one, and add them to your output. This is the reason that I packed the reading code into the return_contents function; we can repeat the same process millions of times on different files with only one set of code to do the actual reading. Something like this:
import csv
import os
def return_contents(file_name):
with open(file_name) as infile:
reader = csv.reader(infile)
return list(reader)
all_files = os.listdir('my_csvs')
combined_output = []
for file in all_files:
data = return_contents('my_csvs/{}'.format(file))
for row in data:
combined_output.extend(row)
with open('csv_out.csv', 'w', newline='') as outfile:
writer = csv.writer(outfile)
writer.writerow(combined_output)
If you are specially dealing with csv file format. I recommend you to use csv package for the file operations. If you also use with...as statement, you don't need to worry about closing the file etc. You just need to define the PATH then program will iterate all .csv files
Here is what you can do:
PATH = "your folder path"
def order_list():
data_list = []
for filename in os.listdir(PATH):
if filename.endswith(".csv"):
with open("data.csv") as csvfile:
read_csv = csv.reader(csvfile, delimiter=',', quoting=csv.QUOTE_NONNUMERIC)
for row in read_csv:
data_list.extend(row)
print(data_list)
if __name__ == '__main__':
order_list()
Store your data in pandas df
import pandas as pd
df = pd.read_csv('file.csv')
Store the modified dataframe into new one
df_2 = df.groupby('Column_Name').agg(lambda x: ' '.join(x)).reset_index() ## Write Name of your column
Write the df to new csv
df2.to_csv("file_modified.csv")
You could do it also like this:
fIn = open("test.csv", "r")
fOut = open("output.csv", "w")
fOut.write(",".join([line for line in fIn]).replace("\n",""))
fIn.close()
fOut.close()
I've you want now to run it on multiple file you can run it as script with arguments:
import sys
fIn = open(sys.argv[1], "r")
fOut = open(sys.argv[2], "w")
fOut.write(",".join([line for line in fIn]).replace("\n",""))
fIn.close()
fOut.close()
So now expect you use some Linux System and the script is called csvOnliner.py you could call it with:
for i in *.csv; do python csvOnliner.py $i changed_$i; done
With windows you could do it in a way like this:
FOR %i IN (*.csv) DO csvOnliner.py %i changed_%i

Convert from CSV to array in Python

I have a CSV file containing the following.
0.000264,0.000352,0.000087,0.000549
0.00016,0.000223,0.000011,0.000142
0.008853,0.006519,0.002043,0.009819
0.002076,0.001686,0.000959,0.003107
0.000599,0.000133,0.000113,0.000466
0.002264,0.001927,0.00079,0.003815
0.002761,0.00288,0.001261,0.006851
0.000723,0.000617,0.000794,0.002189
I want convert the values into an array in Python and keep the same order (row and column). How I can achieve this?
I have tried different functions but ended with error.
You should use the csv module:
import csv
results = []
with open("input.csv") as csvfile:
reader = csv.reader(csvfile, quoting=csv.QUOTE_NONNUMERIC) # change contents to floats
for row in reader: # each row is a list
results.append(row)
This gives:
[[0.000264, 0.000352, 8.7e-05, 0.000549],
[0.00016, 0.000223, 1.1e-05, 0.000142],
[0.008853, 0.006519, 0.002043, 0.009819],
[0.002076, 0.001686, 0.000959, 0.003107],
[0.000599, 0.000133, 0.000113, 0.000466],
[0.002264, 0.001927, 0.00079, 0.003815],
[0.002761, 0.00288, 0.001261, 0.006851],
[0.000723, 0.000617, 0.000794, 0.002189]]
If your file doesn't contain parentheses
with open('input.csv') as f:
output = [float(s) for line in f.readlines() for s in line[:-1].split(',')]
print(output);
The csv module was created to do just this. The following implementation of the module is taken straight from the Python docs.
import csv
with open('file.csv','rb') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in reader:
#add data to list or other data structure
The delimiter is the character that separates data entries, and the quotechar is the quotechar.

Parsing CSV files using Python 2.7

I'm trying to write a script that will open a CSV file and write rows from that file to a new CSV file based on the match criteria of a unique telephone number in column 4 of csv.csv. The phone numbers are always in column 4, and are often duplicated in the file, however the other columns are often unique, thus each row is inherently unique.
A row from the csv file I'm reading looks like this: (the TN is 9259991234)
2,PPS,2015-09-17T15:44,9259991234,9DF51758-A2BD-4F65-AAA2
I hit an error with the code below saying that '_csv.writer' is not iterable and I'm not sure how to modify my code to solve the problem.
import csv
import sys
import os
os.chdir(r'C:\pTest')
with open(r'csv.csv', 'rb') as f:
reader = csv.reader(f, delimiter=',')
with open (r'new_csv.csv', 'ab') as new_f:
writer = csv.writer(new_f, delimiter=',')
for row in reader:
if row[3] not in writer:
writer.writerow(new_f)
Your error stems from this expression:
row[3] not in writer
You cannot test for membership against a csv.writer() object. If you wanted to track if you already have processed a phone number, use a separate set() object to track those:
with open(r'csv.csv', 'rb') as f:
reader = csv.reader(f, delimiter=',')
with open (r'new_csv.csv', 'ab') as new_f:
writer = csv.writer(new_f, delimiter=',')
seen = set()
for row in reader:
if row[3] not in seen:
seen.add(row[3])
writer.writerow(row)
Note that I also changed your writer.writerow() call; you want to write the row, not the file object.

Python script to turn input csv columns into output csv row values

I have an input csv that look like
email,trait1,trait2,trait3
foo#gmail,biz,baz,buzz
bar#gmail,bizzy,bazzy,buzzy
foobars#gmail,bizziest,bazziest,buzziest
and I need the output format to look like
Indv,AttrName,AttrValue,Start,End
foo#gmail,"trait1",biz,,,
foo#gmail,"trait2",baz,baz,,
foo#gmail,"trait3",buzz,,,
For each row in my input file I need to write a row for the N-1 columns in the input csv. The Start and End fields in the output file can be empty in some cases.
I'm trying to read in the data using a DictReader. So for i've been able to read in the data with
import unicodecsv
import os
import codecs
with open('test.csv') as csvfile:
reader = unicodecsv.csv.DictReader(csvfile)
outfile = codecs.open("test-write", "w", "utf-8")
outfile.write("Indv", "ATTR", "Value", "Start","End\n")
for row in reader:
outfile.write([row['email'],"trait1",row['trait1'],'',''])
outfile.write([row['email'],"trait2",row['trait2'],row['trait2'],''])
outfile.write([row['email'],"trait3",row['trait3'],'','')
Which doesn't work. (I think I need to cast the list to a string), and is also very brittle as I'm hardcoding the column names for each row. The bigger issue is that the data within the for loop isn't written to "test-write". Only the line
outfile.write("Indv", "ATTR", "Value", "Start","End\n") actually write out to the file. Is DictReader the appropriate class to use in my case?
This uses a unicodecsv.DictWriter and the zip() function to do what you want, and the code is fairly readable in my opinion.
import unicodecsv
import os
import codecs
with open('test.csv') as infile, \
codecs.open('test-write.csv', 'w', 'utf-8') as outfile:
reader = unicodecsv.DictReader(infile)
fieldnames = 'Indv,AttrName,AttrValue,Start,End'.split(',')
writer = unicodecsv.DictWriter(outfile, fieldnames)
writer.writeheader()
for row in reader:
email = row['email']
trait1, trait2, trait3 = row['trait1'], row['trait2'], row['trait3']
writer.writerows([ # writes three rows of output from each row of input
dict(zip(fieldnames, [email, 'trait1', trait1])),
dict(zip(fieldnames, [email, 'trait2', trait2, trait2])),
dict(zip(fieldnames, [email, 'trait3', trait3]))])
Here's the contents of the test-write.csv file it produced from your example input csv file:
Indv,AttrName,AttrValue,Start,End
foo#gmail,trait1,biz,,
foo#gmail,trait2,baz,baz,
foo#gmail,trait3,buzz,,
bar#gmail,trait1,bizzy,,
bar#gmail,trait2,bazzy,bazzy,
bar#gmail,trait3,buzzy,,
foobars#gmail,trait1,bizziest,,
foobars#gmail,trait2,bazziest,bazziest,
foobars#gmail,trait3,buzziest,,
I may be completely off since I don't do a lot of work with unicode, but it seems to me that the following should work:
import csv
with open('test.csv', 'ur') as csvin, open('test-write', 'uw') as csvout:
reader = csv.DictReader(csvin)
writer = csv.DictWriter(csvout, fieldnames=['Indv', 'AttrName',
'AttrValue', 'Start', 'End'])
for row in reader:
for traitnum in range(1, 4):
key = "trait{}".format(traitnum)
writer.writerow({'Indv': row['email'], 'AttrName': key,
'AttrValue': row[key]})
import pandas as pd
pd1 = pd.read_csv('input_csv.csv')
pd2 = pd.melt(pd1, id_vars=['email'], value_vars=['trait1','trait2','trait3'], var_name='AttrName', value_name='AttrValue').rename(columns={'email': 'Indv'}).sort(columns=['Indv','AttrName']).reset_index(drop=True)
pd2.to_csv('output_csv.csv', index=False)
Unclear on what the Start and End fields represent, but this gets you everything else.

Categories

Resources