Adding a new column on CSV with Python

Adding a new column on CSV with Python - python

I have the following list of numbers: ['Number', 1,2,3,4]
If I have the following CSV file:
`Name`
`First`
`Second`
`Third`
`Fourth`
How do I add my list of numbers to it and make it look like this:
`Name Number`
`First 1`
`Second 2`
`Third 3`
`Fourth 4`

You can use fileinput.input with inplace=True to modify the original file:
import fileinput
import sys
l =['Number', 1,2,3,4]
for ind, line in enumerate(fileinput.input("in.csv",inplace=True)):
sys.stdout.write("{} {}\n".format(line.rstrip(), l[ind]))
Input:
Name
First
Second
Third
Fourth
Output:
Name Number
First 1
Second 2
Third 3
Fourth 4
Or write to a tempfile and move with shutil.move to replace the original file:
l =['Number', 1,2,3,4]
from shutil import move
from tempfile import NamedTemporaryFile
with open('in.csv') as csvfile, NamedTemporaryFile("w",dir=".", delete=False) as temp:
r = csv.reader(csvfile)
wr = csv.writer(temp,delimiter=" ")
for row,new in zip(r,l):
wr.writerow(row+[new])
move(temp.name,"in.csv")

Not an elegant way but It works:
#!/usr/bin/python
import csv
import sys
def csv_to_dict(csv_file_path):
csv_file = open(csv_file_path, 'rb')
csv_file.seek(0)
sniffdialect = csv.Sniffer().sniff(csv_file.read(10000), delimiters='\t,;')
csv_file.seek(0)
dict_reader = csv.DictReader(csv_file, dialect=sniffdialect)
csv_file.seek(0)
dict_data = []
for record in dict_reader:
dict_data.append(record)
csv_file.close()
return dict_data
def dict_to_csv(csv_file_path, dict_data):
csv_file = open(csv_file_path, 'wb')
writer = csv.writer(csv_file, dialect='excel')
headers = dict_data[0].keys()
writer.writerow(headers)
for dat in dict_data:
line = []
for field in headers:
line.append(dat[field])
writer.writerow(line)
csv_file.close()
if __name__ == '__main__':
org_path = sys.argv[1]
new_path = sys.argv[2]
your_array = ['Number', 1, 2, 3, 4]
org_csv = csv_to_dict(org_path)
new_data = []
for line in org_csv:
new_line = dict()
new_line['Name'] = line['Name']
new_line[your_array[0]] = your_array[org_csv.index(line)+1]
new_data.append(new_line)
if new_data:
dict_to_csv(new_path, new_data)
Hope that will help!

import csv
with open('existing_file.csv', 'rb') as infile:
reader = csv.reader(infile)
your_list = list(reader)
list2 = ['Number', 1,2,3,4]
zipped= zip(your_list, list2)
with open("test.csv", "wb") as outfile:
writer = csv.writer(outfile)
writer.writerows(zipped)

Related

converting TXT to CSV python

I have a txt data. it looks as follows
time pos
0.02 1
0.1 2
...
and so on. so the each line is separated with a space. I need to convert it in to a CSV file. like
time,pos
0.02,1
0.1,2
0.15,3
How can I do it with python ? This is what I have tried
time = []
pos = []
def get_data(filename):
with open(filename, 'r') as csvfile:
csvFileReader = csv.reader(csvfile)
next(csvFileReader)
for row in csvFileReader:
time.append((row[0].split(' ')[0]))
pos.append((row[1]))
return

with open(filename) as infile, open('outfile.csv','w') as outfile:
for line in infile:
outfile.write(line.replace(' ',','))

From here:
import csv
with open(filename, newline='') as f:
reader = csv.reader(f, delimiter=' ')
for row in reader:
print(row)
For writing just use default options and it would save file with comma as a delimiter.

try:
import pandas as pd
with open(filename, 'r') as fo:
data = fo.readlines()
for d in range(len(data)):
if d==0:
column_headings = data[d].split()
data_to_insert = data[d].split()
pd.DataFrame(data_to_insert).to_excel('csv_file.csv', header=False, index=False, columns = column_headings))

You can use this:
import csv
time = []
pos = []
def get_data(filename):
with open(filename, 'r') as csvfile:
csvfile1 = csv.reader(csvfile, delimiter=' ')
with open(filename.replace('.txt','.csv'), 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
for row in csvfile1:
writer.writerow(row)

Missing headers in csv output file (python)

I am trying to output a csv file, but the problem is, the headers are gone, and I tried looking at my code line by line but I don't know what's wrong with my code..
My sample data is :
ABC.csv (assuming there are multiple data in it so I also add the code on how to remove it)
KeyID,GeneralID
145258,KL456
145259,BG486
145260,HJ789
145261,KL456
145259,BG486
145259,BG486
My code:
import csv
import fileinput
from collections import Counter
file_path_1 = "ABC.csv"
key_id = []
general_id = []
total_general_id = []
with open(file_path_1, 'rU') as f:
reader = csv.reader(f)
header = next(reader)
lines = [line for line in reader]
counts = Counter([l[1] for l in lines])
new_lines = [l + [str(counts[l[1])] for l in lines]
with open(file_path_1, 'wb') as f:
writer = csv.writer(f)
writer.writerow(header + ['Total_GeneralID'])
writer.writerows(new_lines)
with open(file_path_1, 'rU') as f:
reader = csv.DictReader(f)
for row in reader:
key_id.append(row['KeyID'])
general_id.append(row['GeneralID'])
total_general_id.append(['Total_GeneralID'])
New_List = [[] for _ in range(len(key_id))]
for attr in range(len(key_id)):
New_List[attr].append(key_id[attr])
New_List[attr].append(general_id[attr])
New_List[attr].append(total_general_id[attr])
with open('result_id_with_total.csv', 'wb+') as newfile:
header = ['KEY ID', 'GENERAL ID' , 'TOTAL GENERAL ID']
wr = csv.writer(newfile, delimiter=',', quoting = csv.QUOTE_MINIMAL)
wr.writerow(header) #I already add the headers but it won't work.
for item in New_List:
if item not in newfile:
wr.writerow(item)
Unfortunately, my output would be like this(result_id_with_total.csv);
145258,KL456,2
145259,BG486,1
145260,HJ789,1
145261,KL456,2
What I am trying to achieve;
KEY ID,GENERAL ID,TOTAL GENERAL ID
145258,KL456,2
145259,BG486,1
145260,HJ789,1
145261,KL456,2
My main problem in this code:
wr.writerow(header)
won't work.

This is to do with opening a file with wb+ (write bytes). Because when you write a file in bytes mode you need to pass to it an array of bytes and not strings.
I get this error in the console when I run it:
TypeError: a bytes-like object is required, not 'str'
Try changing wb+ to just w, this does the trick.
with open('result_id_with_total.csv', 'w') as newfile:
header = ['KEY ID', 'GENERAL ID' , 'TOTAL GENERAL ID']
wr = csv.writer(newfile, delimiter=',', quoting = csv.QUOTE_MINIMAL)

Sorting a csv file by column

I want to sort the data in a csv file alphabetically according to the contents in the first column. For example, if the file contained:
city/month,Jan,Feb,Mar,Apr
Melbourne,41.2,35.5,37.4,29.3
Brisbane,31.3,40.2,37.9,29
Darwin,34,34,33.2,34.5
it would be sorted as:
city/month,Jan,Feb,Mar,Apr
Brisbane,31.3,40.2,37.9,29
Darwin,34,34,33.2,34.5
Melbourne,41.2,35.5,37.4,29.3
what I've done so far, sorts correctly but it doesn't return the answer correctly, instead of returning it in the table format, it returns everything as a list - any idea why that is?
import csv
import operator
def sort_records(csv_filename, new_filename):
f = open(csv_filename)
csv1 = csv.reader(f, delimiter = ',')
new_filename = sorted(csv1)
return new_filename
f.close()

>>> import csv
>>> import operator
>>> def sort_records(csv_filename, new_filename):
... with open(csv_filename, 'r') as i, open(new_filename, 'w') as o:
... reader = csv.reader(i, delimiter = ',')
... writer = csv.writer(o, delimiter=',')
... writer.writerow(next(reader)) # header row
... writer.writerows(sorted(reader, key=operator.itemgetter(0)))
>>> sort_records('a.csv', 'b.csv')

Cut data from one sheet into another in csv in python

I am working on a simple program to open a file and read certain rows and then print them in another new file but I want to cut them and remove them from the earlier csv. how do I do that?. This is what I have tried.
import csv
f = open('1.csv')
csv_f = csv.reader(f)
content_value = []
for row in csv_f:
if 'yepme' in row[2]:
content_value.append(row)
g = open('output.csv', 'wb')
wr = csv.writer(g, dialect='excel')
wr.writerows(content_value)
I am editing and found the answer:
import csv
f = open('1.csv')
csv_f = csv.reader(f)
content_value = []
old_value = []
for row in csv_f:
if 'yepme' in row[2]:
content_value.append(row)
else:
old_value.append(row)
g = open('output.csv', 'wb')
wr = csv.writer(g, dialect='excel')
wr.writerows(content_value)
h = open('2.csv','wb')
ws = csv.writer(h, dialect='excel')
ws.writerows(old_value)

A similar problem is mentioned in this question.
Short solution: Write two files: One with the extracted lines, one with the leftovers.
Coded solution:
import csv
with open('1.csv', 'r') as f:
csv_f = csv.reader(f)
new_content = []
old_content = []
for row in csv_f:
if 'yepme' in row[2]:
new_content.append(row)
else:
old_content.append(row)
with open('output.csv', 'wb') as f:
wr = csv.writer(f, dialect='excel')
wr.writerows(new_content)
with open('1.csv', 'wb') as f:
wr = csv.writer(f, dialect='excel')
f.writerows(old_content)
I never used csv, but you should get the idea. If your csv-file is very huge, you should probably read and write line-by-line to avoid memory issues.

Search for string in CSV Files using python and write the results

#!/usr/bin/python
import csv
import re
string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')
#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "
#def does_match(string):
# stringl = string.lower()
# return any(s in stringl for s in searched)
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
for row in reader:
found = False
for col in row:
if col in [string_1, string_2, string_3] and not found:
writer.writerow(row)
found = True
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
inFile.close()
outFile.close()
I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.
Sample Data File
LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300

I'm trying to figure out how to search a CSV file for 3 items. If
those items exist print the row. Ideally I would like only Columns 1
and 3 to print to a new file.
Try this:
import csv
search_for = ['OneTouch AT','LinkRunner AT','AirCheck']
with open('in.csv') as inf, open('out.csv','w') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
if row[0] in search_for:
print('Found: {}'.format(row))
writer.writerow(row)

#!/usr/bin/python
import csv
import numpy as np
class search_csv(object):
def __init__(self, infile, outfile):
infile = open(infile, 'rb')
read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
self.non_numpy_data = read_infile
self.data = np.array(read_infile, dtype=None)
self.outfile = open(outfile, 'wb')
self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
def write_to(self, matched_values):
self.writer_.writerows(matched_values)
print ' Matched Values Written '
return True
def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
find_these = np.array(items, dtype=None)
matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
matching_data = self.data[matching_y][:,return_cols]
self.write_to(matching_data)
self.outfile.close()
return True
def non_numpy_search(self, items, return_cols=[0,2]):
lst = []
for i in self.non_numpy_data:
for ii in items:
if ii in i:
z = []
for idx in return_cols:
z.append(i[idx])
lst.append(z)
break
self.write_to(lst)
return True
### now use the class ###
SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'
non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)
By the phrasing of your question I'm assuming you just want to complete the task at hand and don't really care how. So copy and paste this in and use your data file as the 'IN_FILE' value and the file name you want to write to as the 'OUT_FILE' value. Place the values you want to search for in the 'SEARCHING_FOR' list as you're done.
Things to note....
SEARCHING_FOR should be a list.
the values in SEARCHING_FOR are matched EXACTLY so 'A' will not match 'a'. If you want a to use a regex or something more complex let me know.
In function 'non_numpy_search' there is a 'return_cols' parameter. It defaults to the first and 3rd column.
If you don't have numpy let me know.

#!/usr/bin/python
import csv
import re
import sys
import gdata.docs.service
#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
def does_match(string):
stringl = string.lower()
return any(s in stringl for s in searched)
#Opens Input file for read and output file to write.
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
#for row in reader:
# found = False
# for col in row:
# if col in [string_1, string_2, string_3] and not found:
# writer.writerow(row)
# found = True
"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
one stores a triple (1, 12, row)
When the triples are sorted later, all rows in group 0 will come first, then
all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
# Closing Input and Output files.
inFile.close()
outFile.close()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Adding a new column on CSV with Python - python

I have the following list of numbers: ['Number', 1,2,3,4] If I have the following CSV file: `Name` `First` `Second` `Third` `Fourth` How do I add my list of numbers to it and make it look like this: `Name Number` `First 1` `Second 2` `Third 3` `Fourth 4`

import csv with open('existing_file.csv', 'rb') as infile: reader = csv.reader(infile) your_list = list(reader) list2 = ['Number', 1,2,3,4] zipped= zip(your_list, list2) with open("test.csv", "wb") as outfile: writer = csv.writer(outfile) writer.writerows(zipped)

Related

converting TXT to CSV python

Missing headers in csv output file (python)

Sorting a csv file by column

Cut data from one sheet into another in csv in python

Search for string in CSV Files using python and write the results

Categories

Resources