I'm trying to make the first row/header lowercase, in multiple csv files in a directory using python. The code and error are below. Is there any way to fix the code or some other way?
import csv
import glob
path = (r'C:\Users\Documents')
for fname in glob(path):
with open(fname, newline='') as f:
reader = csv.reader(f)
row1 = next(reader)
for row1 in reader:
data = [row1.lower() for row1 in row1]
os.rename(row1, data)
The error is:
TypeError: rename: src should be string, bytes or os.PathLike, not list
I think you're getting rows and columns mixed-up. Here's some untested code that does what you want, I think:
import csv
from glob import glob
path = (r'C:\Users\Documents\*.csv') # Note wildcard character added for glob().
for fname in glob(path):
with open(fname, newline='') as f:
reader = csv.reader(f)
header = next(reader) # Get the header row.
header = [column.lower() for column in header] # Lowercase the headings.
rows = [header] + list(reader) # Read the rest of the rows.
with open(fname, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(rows) # Write new header & original rows back to file.
Related
I am trying to leave just the first column of a csv file. But it seems not to be working for me and can not find the working solution.
def leavethefirstcolumn(filename):
with open(filename) as f, open('out.csv', "w") as out:
reader = csv.reader(f)
for row in reader:
out.write(row[0])
import csv
def leavethefirstcolumn(filename):
with open(filename) as file, open('out.csv', "w") as out:
reader = csv.reader(file)
for row in reader:
out.write(row[0] + "\n")
# example of using the function
leavethefirstcolumn("in.csv")
You are calling csv.reader(file) while on the previous line, you wrote with open(filename) as f instead of with open(filename) as file.
Also when you are writing to out, you should add a new line
character '\n'
I want to replace the header row of a cvs file text.csv.
header_list = ['column_1', 'column_2', 'column_3']
The header will look like this;
column_1, column_2, column_3
Here is my code;
import csv
with open('text.csv', 'w') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(header_list)
The header of the csv file was replaced correctly. However, the rest of the rows in the csv file were deleted. How do I replace only the header leaving the other rows intact?
I am using python v3.6
Here is a proper way to do it using csv module.
csv.DictReader reads the content of csv file into a list of dicts. It takes an optional fieldnames argument which if set applies a custom header and ignores an original header and treats it as a data row. So, all you need to do is read your csv
file with csv.DictReader and write data with csv.DictWriter. You will have to drop the first row in the reader because it contains the old header and write the new header. It does make sense to write the new data to a separate file though.
import csv
header = ["column_1", "column_2", "column_3"]
with open('text.csv', 'r') as fp:
reader = csv.DictReader(fp, fieldnames=header)
# use newline='' to avoid adding new CR at end of line
with open('output.csv', 'w', newline='') as fh:
writer = csv.DictWriter(fh, fieldnames=reader.fieldnames)
writer.writeheader()
header_mapping = next(reader)
writer.writerows(reader)
Use this:
import csv
header_list = ['column_1', 'column_2', 'column_3']
mystring = ",".join(header_list)
def line_prepender(filename, line):
with open(filename, 'r+') as csvfile:
content = csvfile.read()
csvfile.seek(0, 0)
csvfile.write(line.rstrip('\r\n') + '\n' + content)
line_prepender("text.csv", mystring)
I have a file "TAB.csv" with many columns. I would like to choose one column without header (index of that column is 3) from CSV file. Then create a new text file "NEW.txt" and write there that column (without header).
Below code reads that column but with the header. How to omit the header and save that column in a new text file?
import csv
with open('TAB.csv','rb') as f:
reader = csv.reader(f)
for row in reader:
print row[3]
This is the solution #tmrlvi was talking: it skips the first row (header) via next function:
import csv
with open('TAB.csv','rb') as input_file:
reader = csv.reader(input_file)
output_file = open('output.csv','w')
next(reader, None)
for row in reader:
row_str = row[3]
output_file.write(row_str + '\n')
output_file.close()
Try this:
import csv
with open('TAB.csv', 'rb') as f, open('out.txt', 'wb') as g:
reader = csv.reader(f)
next(reader) # skip header
g.writelines(row[3] + '\n' for row in reader)
enumerate is a nice function that returns a tuple. It enables to to view the index while running over an iterator.
import csv
with open('NEW.txt','wb') as outfile:
with open('TAB.csv','rb') as f:
reader = csv.reader(f)
for index, row in enumerate(reader):
if index > 0:
outfile.write(row[3])
outfile.write("\n")
Another solution would be to read one line from the file (in order to skip the header).
It's an old question but I would like to add my answer about Pandas library, I would like to say. It's better to use Pandas library for such tasks instead of writing your own code. And the simple code with Pandas will be like :
import pandas as pd
reader = pd.read_csv('TAB.csv', header = None)
I have some data that needs to be written to a CSV file. The data is as follows
A ,B ,C
a1,a2 ,b1 ,c1
a2,a4 ,b3 ,ct
The first column has comma inside it. The entire data is in a list that I'd like to write to a CSV file, delimited by commas and without disturbing the data in column A. How can I do that? Mentioning delimiter = ',' splits it into four columns on the whole.
Just use the csv.writer from the csv module.
import csv
data = [['A','B','C']
['a1,a2','b1','c1']
['a2,a4','b3','ct']]
fname = "myfile.csv"
with open(fname,'wb') as f:
writer = csv.writer(f)
for row in data:
writer.writerow(row)
https://docs.python.org/library/csv.html#csv.writer
No need to use the csv module since the ',' in the first column is already part of your data, this will work:
with open('myfile.csv', 'w') as f:
for row in data:
f.write(', '.join(row))
f.write('\n')
You could try the below.
Code:
import csv
import re
with open('infile.csv', 'r') as f:
lst = []
for line in f:
lst.append(re.findall(r',?(\S+)', line))
with open('outfile.csv', 'w', newline='') as w:
writer = csv.writer(w)
for row in lst:
writer.writerow(row)
Output:
A,B,C
"a1,a2",b1,c1
"a2,a4",b3,ct
I have hundreds of large CSV files that I would like to merge into one. However, not all CSV files contain all columns. Therefore, I need to merge files based on column name, not column position.
Just to be clear: in the merged CSV, values should be empty for a cell coming from a line which did not have the column of that cell.
I cannot use the pandas module, because it makes me run out of memory.
Is there a module that can do that, or some easy code?
The csv.DictReader and csv.DictWriter classes should work well (see Python docs). Something like this:
import csv
inputs = ["in1.csv", "in2.csv"] # etc
# First determine the field names from the top line of each input file
# Comment 1 below
fieldnames = []
for filename in inputs:
with open(filename, "r", newline="") as f_in:
reader = csv.reader(f_in)
headers = next(reader)
for h in headers:
if h not in fieldnames:
fieldnames.append(h)
# Then copy the data
with open("out.csv", "w", newline="") as f_out: # Comment 2 below
writer = csv.DictWriter(f_out, fieldnames=fieldnames)
for filename in inputs:
with open(filename, "r", newline="") as f_in:
reader = csv.DictReader(f_in) # Uses the field names in this file
for line in reader:
# Comment 3 below
writer.writerow(line)
Comments from above:
You need to specify all the possible field names in advance to DictWriter, so you need to loop through all your CSV files twice: once to find all the headers, and once to read the data. There is no better solution, because all the headers need to be known before DictWriter can write the first line. This part would be more efficient using sets instead of lists (the in operator on a list is comparatively slow), but it won't make much difference for a few hundred headers. Sets would also lose the deterministic ordering of a list - your columns would come out in a different order each time you ran the code.
The above code is for Python 3, where weird things happen in the CSV module without newline="". Remove this for Python 2.
At this point, line is a dict with the field names as keys, and the column data as values. You can specify what to do with blank or unknown values in the DictReader and DictWriter constructors.
This method should not run out of memory, because it never has the whole file loaded at once.
For those of us using 2.7, this adds an extra linefeed between records in "out.csv". To resolve this, just change the file mode from "w" to "wb".
The solution by #Aaron Lockey, which is the accepted answer has worked well for me except, there were no headers for the file. The out put had no headers and only the row data. Each column was without headings (keys). So I inserted following:
writer.writeheader()
and it worked perfectly fine for me! So now the entire code appears like this:
import csv
inputs = ["in1.csv", "in2.csv"] # etc
# First determine the field names from the top line of each input file
fieldnames = []
for filename in inputs:
with open(filename, "r", newline="") as f_in:
reader = csv.reader(f_in)
headers = next(reader)
for h in headers:
if h not in fieldnames:
fieldnames.append(h)
# Then copy the data
with open("out.csv", "w", newline="") as f_out:
writer = csv.DictWriter(f_out, fieldnames=fieldnames)
writer.writeheader() #this is the addition.
for filename in inputs:
with open(filename, "r", newline="") as f_in:
reader = csv.DictReader(f_in) # Uses the field names in this file
for line in reader:
writer.writerow(line)
You can use the pandas module to do this pretty easily. This snippet assumes all your csv files are in the current folder.
import pandas as pd
import os
all_csv = [file_name for file_name in os.listdir(os.getcwd()) if '.csv' in file_name]
li = []
for filename in all_csv:
df = pd.read_csv(filename, index_col=None, header=0, parse_dates=True, infer_datetime_format=True)
li.append(df)
frame = pd.concat(li, axis=0, ignore_index=True)
frame.to_csv('melted_csv.csv', index=False)
I've faced a situation where not only the number of columns are different, but also some column names are missing. For this kind of situation and obviously for your case, this code snippet can help you :)
The tricky part is naming the columns which have no names and adding them to dictionary. The read_csv_file function is playing the main role here.
def read_csv_file(csv_file_path):
headers = []
data = []
with open(csv_file_path, 'r') as f:
csv_reader = csv.reader(f)
rows = []
for i, row in enumerate(csv_reader):
if i == 0:
for j in range(len(row)):
if row[j].strip() == "":
col_name = f"col-{j+1}"
else:
col_name = row[j]
if col_name not in headers:
headers.append(col_name)
else:
rows.append(row)
if len(row) > len(headers):
for j in range(len(row)):
if j+1 > len(headers):
col_name = f"col-{j+1}"
if col_name not in headers:
headers.append(col_name)
for i, row in enumerate(rows):
row_data = {}
for j in range(len(headers)):
if len(row) > j:
row_data[headers[j]] = row[j]
else:
row_data[headers[j]] = ''
data.append(row_data)
return headers, data
def write_csv_file(file_path, rows):
if len(rows) > 0:
headers = list(rows[0].keys())
with open(file_path, 'w', newline='', encoding='UTF8') as f:
writer = csv.DictWriter(f, fieldnames=headers)
writer.writeheader()
writer.writerows(rows)
# The list of the csv file paths which will be merged
files_to_be_merged = [
'file-1.csv',
'file-2.csv',
'file-3.csv'
]
# Read and store all the file data in new_file_data
final_headers = []
new_file_data = []
for f1 in files_to_be_merged:
single_file_data = read_csv_file(f1)
for h in single_file_data[0]:
if h not in final_headers:
final_headers.append(h)
new_file_data += single_file_data[1]
# Add the missing keys to the dictionaries
for d in new_file_data:
for h in final_headers:
if d.get(h) is None:
d[h] = ""
# Write a new file
target_file_name = 'merged_file.csv'
write_csv_file(target_file_name, new_file_data)