Convert Excel to CSV using Python - python

I would like to name new CSV files similar to their corresponding xlsx files
import pandas as pd
for filename in my_path.glob('*.xlsx'):
read_file = pd.read_excel (str(filename)', sheet_name='My Excel sheet name')
read_file.to_csv ("XLSX NAME SHOULD = CSV NAME.csv', index = None, header=True)

To get the filename with path but without extension use os.path.splitext
from os import path
path = "/path/to/file.txt"
path.splitext(path)
# -> ["/path/to/file", "txt"]
To get the filename without the path :
from os import path
path = "/path/to/file.txt"
path.basename(path)
# -> "file.txt"
So to change the extension from xlsx to csv :
from os import path
path = "/path/to/file.xlsx"
filename = path.splitext(path)[0] + '.csv'
# -> "/path/to/file.csv"
And if you need to change the path to save the file in another folder, then you can use basename first.

Related

How to convert any .csv file to an excel file using Python?

I'm trying to convert any .csv file from this path to an excel file. The code works but I need to rename that .csv file manually. Is there a way to read and convert whatever .csv file without renaming it? Your input is highly appreciated. Thanks a lot!
import pandas as pd
read_file = pd.read_csv(r'C:/Processed_Report/Source1.csv')
read_file.to_excel (r'C:/Processed_Report/Source.xlsx', index = None, header=True)
I don't sure if i understood, but this is how you can read all file inside a folder and convert them to excel files.
import os
for root, dirs, files in os.walk("C:/Processed_Report/", topdown=False):
for name in files:
base_name, ext = os.path.splitext(name) #Split name, extension
if ext in ".cvs":
df = pd.read_csv(os.path.join(root, name))
df.to_excel(os.path.join(root, f'{base_name}.xlsx'))
Using pathlib
from pathlib import Path
import pandas as pd
file_path = r"C:/Processed_Report"
files = [x for x in Path(file_path).glob("*csv")]
[pd.read_csv(x).to_excel(f"{file_path}/{x.stem}.xlsx", index=False) for x in files]

saving csv files to new directory

I am trying to use this code to write my edited csv files to a new directory. Does anyone know how I specify the directory?
I have tried this but it doesn't seem to be working.
dir = r'C:/Users/PycharmProjects/pythonProject1' # raw string for windows.
csv_files = [f for f in Path(dir).glob('*.csv')] # finds all csvs in your folder.
cols = ['Temperature']
for csv in csv_files: #iterate list
df = pd.read_csv(csv) #read csv
df[cols].to_csv('C:/Users/Desktop', csv.name, index=False)
print(f'{csv.name} saved.')
I think your only problem is the way you're calling to_csv(), passing a directory and a filename. I tried that and got this error:
IsADirectoryError: [Errno 21] Is a directory: '/Users/zyoung/Desktop/processed'
because to_csv() is expecting a path to a file, not a directory path and a file name.
You need to join the output directory and CSV's file name, and pass that, like:
out_dir = PurePath(base_dir, r"processed")
# ...
# ...
csv_out = PurePath(out_dir, csv_in)
df[cols].to_csv(csv_out, index=False)
I'm writing to the subdirectory processed, in my current dir ("."), and using the PurePath() function to do smart joins of the path components.
Here's the complete program I wrote for myself to test this:
import os
from pathlib import Path, PurePath
import pandas as pd
base_dir = r"."
out_dir = PurePath(base_dir, r"processed")
csv_files = [x for x in Path(base_dir).glob("*.csv")]
if not os.path.exists(out_dir):
os.mkdir(out_dir)
cols = ["Temperature"]
for csv_in in csv_files:
df = pd.read_csv(csv_in)
csv_out = PurePath(out_dir, csv_in)
df[cols].to_csv(csv_out, index=False)
print(f"Saved {csv_out.name}")

Convert files from different paths using Python

I´m trying to convert Excel files from different paths but it only converts the file in the last path in path list.
What is the proper way to loop trough the paths in the list to to get the files to be converted?
import pandas as pd
import glob, os
import csv, json
import openpyxl
from pathlib import Path
list_path = Path("excel_files/PLM", "excel_files/PTR", "excel_files/TMR")
for xlsx_file in glob.glob(os.path.join(list_path,"*.xlsx*")):
data_xls = pd.read_excel(xlsx_file, 'Relatório - DADOS', index_col=None, engine = 'openpyxl')
csv_file = os.path.splitext(xlsx_file)[0]+".csv"
data_xls.to_csv(csv_file, encoding='utf-8', index=False)
Path("excel_files/PLM", "excel_files/PTR", "excel_files/TMR") returns a single path, not a list of paths:
>>> Path("excel_files/PLM", "excel_files/PTR", "excel_files/TMR")
PosixPath('excel_files/PLM/excel_files/PTR/excel_files/TMR')
I'm not sure why it finds any files at all, to be honest.
Instead, you will probably have to do another loop - something like:
for path in ["excel_files/PLM", "excel_files/PTR", "excel_files/TMR"]:
for xlsx_file in glob.glob(os.path.join(path, "*.xlsx*")):
...

How to automatically name Excel files just generated from csv files with Python

I need to transform csv files into Excel files in an automatic way. I am failing in naming Excel files with the name of the corresponding csv file.
I saved csv files as 'Trials_1', 'Trials_2', Trilas_3' but with the code that I wrote Python gives me an error and asks me for csv file named 'Trials_4'. Then, if I rename csv file 'Trials_1' into 'Trials_4' the program works and generates an Excel file named 'Trials_1'.
How can I correct my code?
'''
import csv
import openpyxl as xl
import os, os.path
directory=r'C:\\Users\\PycharmProjects\\input\\'
folder=r'C:\\Users\\PycharmProjects\\output\\'
for csv_file in os.listdir(directory):
def csv_to_excel(csv_file, excel_file):
csv_data=[]
with open(os.path.join(directory, csv_file)) as file_obj:
reader=csv.reader(file_obj)
for row in reader:
csv_data.append(row)
workbook= xl.Workbook()
sheet=workbook.active
for row in csv_data:
sheet.append(row)
workbook.save(os.path.join(folder,excel_file))
if __name__=="__main__":
m = sum(1 for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)))
new_name = "{}Trial_{}.csv".format(directory, m + 1)
k = sum(1 for file in os.listdir(folder) if os.path.isfile(os.path.join(folder, file)))
new_name_e = "{}Trial_{}.xlsx".format(folder, k + 1)
csv_to_excel(new_name,new_name_e)
'''
Thanks.
Hi Annachiara welcome to StackOverflow,
I would modify the "csv_to_excel" function by using only pandas.
Before that you should install 'xlsxwriter' with:
pip install XlsxWriter
Then the function would be like this:
def csv_to_excel(csv_file,excel_file,csv_sep=';'):
# read the csv file with pandas
df=pd.read_csv(csv_file,sep=csv_sep)
# create the excel file
writer=pd.ExcelWriter(excel_file, engine='xlsxwriter')
# copy the csv content (df) into the excel file
df.to_excel(writer,index=False)
# save the excel file
writer.save()
# print what you converted for reference
print(f'csv file {csv_file} saved as excel in {excel_file}')
Just only make sure that the csv is read correctly: I added just the separator parameter, but you might want to add all the other parameters (like parse dates etc.)
Then you can convert the list of csv files with a for loop (I used more steps to make it clearer)
dir_in=r'C:\\Users\\PycharmProjects\\input\\'
dir_out=r'C:\\Users\\PycharmProjects\\output\\'
csvs_to_convert=os.listdir(dir_in)
for csv_file_in in csvs_to_convert:
# remove extension from csv files
file_name_no_extension=os.path.splitext(csv_file_in)[0]
# add excel extension .xlsx
excel_name_out=file_name_no_extension+'.xlsx'
# write names with their directories
complete_excel_name_out=os.path.join(dir_out,excel_name_out)
complete_csv_name_in=os.path.join(dir_in,csv_file_in)
# convert csv file to excel file
csv_to_excel(complete_csv_name_in,complete_excel_name_out,csv_sep=';')
Each csv as seperate excel file
import glob
import pandas as pd
import os
csv_files = glob.glob('*.csv')
for filename in csv_files:
sheet_name = os.path.split(filename)[-1].replace('.csv', '.xlsx')
df = pd.read_csv(filename)
df.to_excel(sheet_name, index=False)
All csv in same excel in different sheet
import glob
import pandas as pd
import os
# Create excel file
writer = pd.ExcelWriter('all_csv.xlsx')
csv_files = glob.glob('*.csv')
for filename in csv_files:
sheet_name = os.path.split(filename)[-1].replace('.csv', '')
df = pd.read_csv(filename)
# Append each csv as sheet
df.to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
Assuming you would like to keep the same structure of your code, I just fixed some technical issues in your code to make it work (please change the folders path to your own):
import csv
import openpyxl as xl
import glob, os, os.path
directory= 'input'
folder= '../output' # Since 'input' would be my cwd, need to step back a directory to reach 'output'
# Using your function, just passing different arguments for convinient.
def csv_to_excel(f_path, f_name):
csv_data=[]
with open(f_path, 'r') as file_obj:
reader=csv.reader(file_obj)
for row in reader:
csv_data.append(row)
workbook= xl.Workbook()
sheet=workbook.active
for row in csv_data:
sheet.append(row)
workbook.save(os.path.join(folder, f_name + ".xlsx"))
def main():
os.chdir(directory) # Defining input directory as your cwd
# Searching for all files with csv extention and sending each to your function
for file in glob.glob("*.csv"):
f_path = os.getcwd() + '\\' + file # Saving the absolute path to the file
f_name = (os.path.splitext(file)[0]) # Saving the name of the file
csv_to_excel(f_path, f_name)
if __name__=="__main__":
main()
P.S:
Please avoid iterating a definition of a function since you only need to define a function once.

How to copy files selected from a txt file to another folder python

Folder A has more than 100 files, folder B is my destination folder. I want to copy 10 files in folder A to folder B. The 10 files names are in the text file C.
import os
import shutil
from glob import glob
namelist = open('/Users/C.txt').read().splitlines()
input = '/Users/A'
output = '/Users/B'
path = '/Users/A'
files = glob(path)
for path in files:
filedir, filename = os.path.split(path)
for filename in namelist:
shutil.copy2(input,output)
It returns an Error. Please help me to do it in Python, thanks a lot!
There are a lot of things that you can do with your code:
import os
import shutil
from glob import glob
#namelist = open('/Users/C.txt').read().splitlines()
# context manager will take care of closing the file after open
# no need read as one string and do splitlines, readlines take care of that
with open('/Users/C.txt') as fp:
namelist = fp.readlines()
input = '/Users/A'
output = '/Users/B'
path = '/Users/A'
files = os.listdir(path)
# dont need glob import as you already imported os
#files = glob(path)
# loop only through files mentioned in the text file and see if they are available in
# folder A
for file_name in namelist:
file_path = os.path.join(input,file_name)
if file_path in files:
dest_path = os.path.join(output,file_name)
shutil.copy(file_path,dest_path)
#for path in files:
# filedir, filename = os.path.split(path)
# for filename in namelist:
# shutil.copy2(input,output)
I do not have sample data or error message to check. From what i can see in your code,
for path in files:
filedir, filename = os.path.split(path)
if filename in namelist:
shutil.copy2(input,output)
Your paths are from the root folder because of the starting forward slash. Try putting a dot in front of them if the folders and files are relative to the location of your .py file or no preceding slash:
./Users/A or Users/A

Categories

Resources