I'm trying to convert any .csv file from this path to an excel file. The code works but I need to rename that .csv file manually. Is there a way to read and convert whatever .csv file without renaming it? Your input is highly appreciated. Thanks a lot!
import pandas as pd
read_file = pd.read_csv(r'C:/Processed_Report/Source1.csv')
read_file.to_excel (r'C:/Processed_Report/Source.xlsx', index = None, header=True)
I don't sure if i understood, but this is how you can read all file inside a folder and convert them to excel files.
import os
for root, dirs, files in os.walk("C:/Processed_Report/", topdown=False):
for name in files:
base_name, ext = os.path.splitext(name) #Split name, extension
if ext in ".cvs":
df = pd.read_csv(os.path.join(root, name))
df.to_excel(os.path.join(root, f'{base_name}.xlsx'))
Using pathlib
from pathlib import Path
import pandas as pd
file_path = r"C:/Processed_Report"
files = [x for x in Path(file_path).glob("*csv")]
[pd.read_csv(x).to_excel(f"{file_path}/{x.stem}.xlsx", index=False) for x in files]
Related
I would like to name new CSV files similar to their corresponding xlsx files
import pandas as pd
for filename in my_path.glob('*.xlsx'):
read_file = pd.read_excel (str(filename)', sheet_name='My Excel sheet name')
read_file.to_csv ("XLSX NAME SHOULD = CSV NAME.csv', index = None, header=True)
To get the filename with path but without extension use os.path.splitext
from os import path
path = "/path/to/file.txt"
path.splitext(path)
# -> ["/path/to/file", "txt"]
To get the filename without the path :
from os import path
path = "/path/to/file.txt"
path.basename(path)
# -> "file.txt"
So to change the extension from xlsx to csv :
from os import path
path = "/path/to/file.xlsx"
filename = path.splitext(path)[0] + '.csv'
# -> "/path/to/file.csv"
And if you need to change the path to save the file in another folder, then you can use basename first.
I need to transform csv files into Excel files in an automatic way. I am failing in naming Excel files with the name of the corresponding csv file.
I saved csv files as 'Trials_1', 'Trials_2', Trilas_3' but with the code that I wrote Python gives me an error and asks me for csv file named 'Trials_4'. Then, if I rename csv file 'Trials_1' into 'Trials_4' the program works and generates an Excel file named 'Trials_1'.
How can I correct my code?
'''
import csv
import openpyxl as xl
import os, os.path
directory=r'C:\\Users\\PycharmProjects\\input\\'
folder=r'C:\\Users\\PycharmProjects\\output\\'
for csv_file in os.listdir(directory):
def csv_to_excel(csv_file, excel_file):
csv_data=[]
with open(os.path.join(directory, csv_file)) as file_obj:
reader=csv.reader(file_obj)
for row in reader:
csv_data.append(row)
workbook= xl.Workbook()
sheet=workbook.active
for row in csv_data:
sheet.append(row)
workbook.save(os.path.join(folder,excel_file))
if __name__=="__main__":
m = sum(1 for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)))
new_name = "{}Trial_{}.csv".format(directory, m + 1)
k = sum(1 for file in os.listdir(folder) if os.path.isfile(os.path.join(folder, file)))
new_name_e = "{}Trial_{}.xlsx".format(folder, k + 1)
csv_to_excel(new_name,new_name_e)
'''
Thanks.
Hi Annachiara welcome to StackOverflow,
I would modify the "csv_to_excel" function by using only pandas.
Before that you should install 'xlsxwriter' with:
pip install XlsxWriter
Then the function would be like this:
def csv_to_excel(csv_file,excel_file,csv_sep=';'):
# read the csv file with pandas
df=pd.read_csv(csv_file,sep=csv_sep)
# create the excel file
writer=pd.ExcelWriter(excel_file, engine='xlsxwriter')
# copy the csv content (df) into the excel file
df.to_excel(writer,index=False)
# save the excel file
writer.save()
# print what you converted for reference
print(f'csv file {csv_file} saved as excel in {excel_file}')
Just only make sure that the csv is read correctly: I added just the separator parameter, but you might want to add all the other parameters (like parse dates etc.)
Then you can convert the list of csv files with a for loop (I used more steps to make it clearer)
dir_in=r'C:\\Users\\PycharmProjects\\input\\'
dir_out=r'C:\\Users\\PycharmProjects\\output\\'
csvs_to_convert=os.listdir(dir_in)
for csv_file_in in csvs_to_convert:
# remove extension from csv files
file_name_no_extension=os.path.splitext(csv_file_in)[0]
# add excel extension .xlsx
excel_name_out=file_name_no_extension+'.xlsx'
# write names with their directories
complete_excel_name_out=os.path.join(dir_out,excel_name_out)
complete_csv_name_in=os.path.join(dir_in,csv_file_in)
# convert csv file to excel file
csv_to_excel(complete_csv_name_in,complete_excel_name_out,csv_sep=';')
Each csv as seperate excel file
import glob
import pandas as pd
import os
csv_files = glob.glob('*.csv')
for filename in csv_files:
sheet_name = os.path.split(filename)[-1].replace('.csv', '.xlsx')
df = pd.read_csv(filename)
df.to_excel(sheet_name, index=False)
All csv in same excel in different sheet
import glob
import pandas as pd
import os
# Create excel file
writer = pd.ExcelWriter('all_csv.xlsx')
csv_files = glob.glob('*.csv')
for filename in csv_files:
sheet_name = os.path.split(filename)[-1].replace('.csv', '')
df = pd.read_csv(filename)
# Append each csv as sheet
df.to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
Assuming you would like to keep the same structure of your code, I just fixed some technical issues in your code to make it work (please change the folders path to your own):
import csv
import openpyxl as xl
import glob, os, os.path
directory= 'input'
folder= '../output' # Since 'input' would be my cwd, need to step back a directory to reach 'output'
# Using your function, just passing different arguments for convinient.
def csv_to_excel(f_path, f_name):
csv_data=[]
with open(f_path, 'r') as file_obj:
reader=csv.reader(file_obj)
for row in reader:
csv_data.append(row)
workbook= xl.Workbook()
sheet=workbook.active
for row in csv_data:
sheet.append(row)
workbook.save(os.path.join(folder, f_name + ".xlsx"))
def main():
os.chdir(directory) # Defining input directory as your cwd
# Searching for all files with csv extention and sending each to your function
for file in glob.glob("*.csv"):
f_path = os.getcwd() + '\\' + file # Saving the absolute path to the file
f_name = (os.path.splitext(file)[0]) # Saving the name of the file
csv_to_excel(f_path, f_name)
if __name__=="__main__":
main()
P.S:
Please avoid iterating a definition of a function since you only need to define a function once.
I have 10s of tab delimeted text files in my local directory. When I copy and paste a text file into an excel sheet, it becomes a file having 100s of columns. Now, I would like to read all the text files and convert them to corresponding excel files.
If there was a single file, I would have done the following way:
import pandas as pd
df = pd.read_csv("H:\\Yugeen\\text1.txt", sep='\t')
df.to_excel('H:\\Yugeen\\output1.xlsx', 'Sheet1', index = False)
Is there any way to achive a solution that I am looking for ?
I use this function to list all files in a directory, along with their file path:
import os
def list_files_in_directory(path):
'''docstring for list_files_in_directory'''
x = []
for root, dirs, files in os.walk('.'+path):
for file in files:
x.append(root+'/'+file)
return x
Selecting for only text files:
files = list_files_in_directory('.')
filtered_files = [i for i in files if '.txt' in i]
Like Sophia demonstrated, you can use pandas to create a dataframe. I'm assuming you want to merge these files as well.
import pandas as pd
dfs = []
for file in filtered_files:
df = pd.read_csv(file,sep='\t')
dfs.append(df)
df_master = pd.concat(dfs,axis=1)
filename = 'master_dataframe.csv'
df_master.to_csv(filename,index=False)
The saved file can then be opened in Excel.
Are you talking about how to get the filenames? You can use the glob library.
import glob
import pandas as pd
file_paths = glob.glob('your-directory\\*.txt')
for file in file_path:
df = pd.read_csv(file,sep='\t')
df.to_excel('output-directory\\filename.xlsx',index=False)
Does this answer your question?
I have several csv files in a folder that I need to read and do the same thing to each file. I want to rename each dataframe that is created with the file name, but am not sure how. Could I store the file names in a list and then refer to them later somehow...? My current code is bellow. Thank you in advance.
import os
Path = "C:\Users\DATA"
filelist = os.listdir(Path)
for x in filelist:
RawData = pd.read_csv("C:\Users\DATA\%s" % x)
What if you have just one dataframe with all files?
import os
path = "C:\Users\DATA"
raw_data = {i: pd.read_csv(os.path.abspath(i)) for i in os.listdir(path)}
I have a folder with lots of .txt files. How can I read all the files in the folder and get the content of them with pandas?. I tried the following:
import pandas as pd
list_=pd.read_csv("/path/of/the/directory/*.txt",header=None)
print list_
Something like this:
import glob
l = [pd.read_csv(filename) for filename in glob.glob("/path/*.txt")]
df = pd.concat(l, axis=0)
You have to take into account the header, for example if you want to ignore it take a look at the skiprows option in read_csv.
I used this in my project for merging the csv files
import pandas as pd
import os
path = "path of the file"
files = [file for file in os.listdir(path) if not file.startswith('.')]
all_data = pd.DataFrame()
for file in files:
current_data = pd.read_csv(path+"/"+file , encoding = "ISO-8859-1")
all_data = pd.concat([all_data,current_data])