Renaming excel files based on content - python

I am trying to rename my excel files in a folder based on the content of cell a1, I am using pandas to parse the files and store the value of cell a1, then using that value to rename the file.
The code is as follows:
import os
import pandas as pd
import glob
source_dir = r'C:\Users\Ahmed_Abdelmuniem\Desktop\RenameFolder'
file_names = glob.glob(os.path.join(source_dir, '*.xlsx'))
for file_name in file_names:
df1 = pd.read_excel(file_name)
new_name = df1.iat[0,0]
# print (new_name)
file_name.rename(file_name.with_name(new_name))
I get the following traceback:
C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\python.exe C:/Users/Ahmed_Abdelmuniem/PycharmProjects/Renamer/main.py
Traceback (most recent call last):
File "C:\Users\Ahmed_Abdelmuniem\PycharmProjects\Renamer\main.py", line 13, in <module>
file_name.rename(file_name.with_name(f"new_name"))
AttributeError: 'str' object has no attribute 'rename'
Process finished with exit code 1
I am not sure what it means by AttributeError: 'str' object has no attribute 'rename', as the value stored in new_name is a str, I tested it out with print. It produces the following result:
XXX.xlsx
YYY.xlsx

The variable file_name you are trying to rename is just a str, which does not contain the rename method. You can rename a file using os.rename. See code below.
import os
import pandas as pd
import glob
source_dir = ''
file_names = glob.glob(os.path.join(source_dir, '*.xlsx'))
for file_name in file_names:
df1 = pd.read_excel(file_name)
new_name = df1.iat[0,0]
os.rename(os.path.join(source_dir, file_name), os.path.join(source_dir,new_name+".xlsx"))

Related

Renaming multiple csv files within a folder in Python

I have a folder with 50 .csv files. The .csv files are auto-generated and a results/ output from a process-based model (long and automatically named). For example, sandbox_username_vetch_scaleup_IA_1.csv; sandbox_username_vetch_scaleup_IA_2.csv, and it continues till sandbox_username_vetch_scaleup_IA_50.csv.
I am trying to shorten the file names in a way so that the files are names are IA_1, IA_2 ...up to IA_50 and subsequently the new .csv file name gets added as a column to the data frame. Here is what I have tried so far
# import necessary libraries
import pandas as pd
import os
import glob
import sys
from pathlib import Path
import re
data_p = "/Users/Username/Documents/HV_Scale/CWAD"
output_p = "/Users/Username/Documents/HV_Scale/CWAD"
retval = os.getcwd()
print (retval) # see in which folder you are
os.chdir(data_p) # move to the folder with your data
os.getcwd()
filenames = sorted(glob.glob('*.csv'))
fnames = list(filenames) # get the names of all your files
#print(fnames)
#Loop over
for f in range(len(fnames)):
print(f'fname: {fnames[f]}\n')
pfile = pd.read_csv(fnames[f], delimiter=",") # read in file
#extract filename
filename = fnames[f]
parts = filename.split(".") # giving you the number in file name and .csv
only_id = parts[0].split("_") # if there is a bracket included
# get IA from your file
filestate = pfile["IA"][0] # assuming this is on the first row
filestate = str(filestate)
# get new filename
newfilename = only_id[0]+"-"+filestate+parts[1]
# save your file (don't put a slash at the end of your directories on top)
pfile.to_csv(output_p+"/"+newfilename, index = False, header = True)
Here is the code for adding the csv file name as a column
import glob
import os
import shutil
import sys
import pandas as pd
path = '/Users/Username/Documents/HV_Scale/IA_CWAD/short'
all_files = glob.glob(os.path.join(path, "*.csv"))
names = [os.path.basename(x) for x in glob.glob(path+'\*.csv')]
df = pd.DataFrame()
for file_ in all_files:
file_df = pd.read_csv(file_,sep=';', parse_dates=[0], infer_datetime_format=True,header=None )
file_df['file_name'] = file_
df = df.append(file_df)
#However, this adds the old csv file name and not the renamed one
In order to rename and move these files, all you need is:
import glob
import os
import shutil
import sys
SOURCE = '<Your source directory>'
TARGET = '<Your target directory>'
for file in glob.glob(os.path.join(SOURCE, '*_IA_*.csv')):
idx = file.index('_IA_')
filename = file[idx+1:]
target = os.path.join(TARGET, filename)
if os.path.exists(target):
print(f'Target file {target} already exists', file=sys.stderr)
else:
shutil.copy(file, target)
As there's nothing in the OP's question that tries to handle modification of the CSV files, that is left as an exercise for the OP.
Source and target directories should be different otherwise this can lead to ambiguous results

zipped multiple excel files than merge its content into one file using python

I am trying to create 2 functions with python
first function zip multiple excel files that exist in the given
path.
second function read the content of the zip file than merge all
existing file into one excel file.(all files has same structure.)
The problem is that when i run the script it crashs when it comes to read the zip file and display the below error:
AttributeError: 'ZipFile' object has no attribute 'seek'
code:
import pandas as pd
import numpy as np
import zipfile
import os
def get_all_file_path(directory):
file_paths=[]
for root,directories,files in os.walk(directory):
for filename in files:
filepath = os.path.join(root,filename)
file_paths.append(filepath)
return file_paths
# Excel file merge function
def excel_file_merge(zip_file_name):
df = pd.DataFrame()
archive = zipfile.ZipFile(zip_file_name, 'r')
with zipfile.ZipFile(zip_file_name, "r") as f:
for file in f.namelist():
xlfile = archive.open(file)
if file.endswith('.xlsx'):
# Add a note indicating the file name that this dataframe originates from
df_xl = pd.read_excel(xlfile, engine='openpyxl')
df_xl['Note'] = file
# Appends content of each Excel file iteratively
df = df.append(df_xl, ignore_index=True)
return df
uploaded_file = 'F:/AIenv/test_zip'
file_paths = get_all_file_path(uploaded_file)
print("following files will be zipped: ")
for file_name in file_paths:
print(file_name)
with zipfile.ZipFile("my _python_files.zip","w")as f:
for file in file_paths:
f.write(file)
f.close()
print("All Files Zipped successfully")
df = excel_file_merge(f)
print(df)

Python Error: AttributeError: 'NoneType' object has no attribute 'to_excel'

I am trying to combine all files in a directory and the save the combined file into another directory.
I am using Python 3.8.
When I run the code I get the following with a AttributeError:
c:\test\Upload test\Book1.xlsx
c:\test\Upload test\Book2.xlsx
c:\test\Upload test\Book3.xlsx
Traceback (most recent call last):
File "C:/Python/PythonDev/combine.py", line 104, in <module>
newdf.to_excel(writer,"All")
AttributeError: 'NoneType' object has no attribute 'to_excel'
the code:
import pandas as pd
import globe
filelist = glob.glob(r'c:\test\Upload test\*.xlsx')
file1 = "*.*"
for i in filelist:
file2 = pd.read_excel(i)
file2['FileName'] = i
file1 = ['newdf']
newdf = file1.append(file2)
print (i)
writer = pd.ExcelWriter(r'c:\test\Uploaded\uploadfile.xlsx', engine= 'xlsxwriter')
newdf.to_excel(writer,"All")
writer.save()
Append doesn't return anything...
Try something like this:
import pandas as pd
import glob
raw_files = glob.glob(r'c:\test\Upload test\*.xlsx')
pd_files = pd.DataFrame()
for file in raw_files:
pd_files.append(pd.read_excel(file))
pd_files.to_excel("c:\test\Uploaded\uploadfile.xlsx")

AttributeError: 'list' object has no attribute 'to_hdf'

I am running following code which imports csv files and append all data into single DATA array. But while storing this array into HDF5, I am keep getting error AttributeError: 'list' object has no attribute 'to_hdf'.
Please help me understand what I am missing.
import pandas as pd
import h5py
import glob
import os
path = "Z:\Test Data"
def get_CSV_files(path):
results = []
for root, dirs, files in os.walk(path):
for file in files:
fileExt=os.path.splitext(file)[-1]
if fileExt.lower() == '.csv':
results.append(os.path.join(root, file))
for directory in dirs:
results += get_CSV_files(os.path.join(root, directory))
return results
def store_all_data_hdf5(path):
files = get_CSV_files(path)
DATA=[]
for file_name in files:
data = pd.DataFrame.from_csv(file_name, sep="\t")
DATA.append(data)
store = pd.HDFStore('STORE.h5')
DATA.to_hdf('STORE.h5','table', append=True)
store.close()
return DATA
DATA is a list you define by - DATA=[] and it does not have attribute to_hdf.
You can find some example of how to use pandas HDFStore here
And you would probably need something like -
def store_all_data_hdf5(path):
files = get_CSV_files(path)
DATA=[]
store = pd.HDFStore('STORE.h5')
for file_name in files:
data = pd.DataFrame.from_csv(file_name, sep="\t")
DATA.append(data)
store.put('my_file', data, append=True)
store.close()
return DATA

how to sort out files according to excel

I have an Excel file that contains long product tag name like(for now, just working on 3 of them):
4049047000037
4049047000044
4049047118954
and i have a folder on my desktop called "1" containing .jpg files with tag names like:
4049047000037.jpg
4049047000044.jpg
4049047118954.jpg
i want to write a code, if tag name in my excel, i want to copy that .jpg file to an other folder.
import os
import pandas as pd
movdir = ["C:\Users\muhammedcan\Desktop\1"]
basedir = "C:\Users\muhammedcan\Desktop\2"
i=0
#to see what i have in my folder
print os.listdir("C:/Users/muhammedcan/Desktop/1/")
df= pd.read_excel("deneme.xls", sheetname= "sayfa4")
df1= df.columns[1]
listSepalWidth = df[df1]
print listSepalWidth
#to make file name and product tag name same
for i in listSepalWidth:
i=str(i)+(".jpg")
print i
can you help me with copying file into an other file if it is exist in my excel?
this is my result so far:
['4049047000037.jpg', '4049047000044.jpg', '4049047000068.jpg',
'4049047000075.jpg', '4049047000082.jpg', '4049047000105.jpg',
'4049047118947.jpg', '4049047118954.jpg']
4049047000037.jpg
4049047000044.jpg
4049047118954.jpg
4049047000068.jpg
4049047000075.jpg
4049047000082.jpg
4049047118947.jpg
4049047000105.jpg
I used following code, and I am recieving error.
from shutil import copyfile
copyfile("C:\Users\muhammedcan\Desktop\1", "C:\Users\muhammedcan\Desktop\2")
Error is:
C:\Python27\python.exe "C:/Users/muhammedcan/Desktop/summer
courses/programing/MP4/solution/my_work.py"
Traceback (most recent call last):
File "C:/Users/muhammedcan/Desktop/summer courses/programing/MP4/solution/my_work.py", line 3, in <module>
copyfile("C:\Users\muhammedcan\Desktop\1",
Process finished with exit code 1
The following should do what you are looking for:
import os
import glob
import pandas as pd
import shutil
source_folder = r"C:\Users\muhammedcan\Desktop\1"
destination_folder = r"C:\Users\muhammedcan\Desktop\2"
available_filenames = [os.path.basename(fn) for fn in glob.glob(os.path.join(source_folder, '*.jpg'))]
df = pd.read_excel("deneme.xls", sheetname="sayfa4")
for tag_name in df.iloc[:,1]:
filename = "{}.jpg".format(tag_name)
if filename in available_filenames:
print "{} - found".format(filename)
shutil.copyfile(os.path.join(source_folder, filename), os.path.join(destination_folder, filename))
else:
print "{} - not found".format(filename)
If first creates a list of .jpg filenames found in the source_folder. It then loads the Excel file into pandas and iterates over the second column. If the tag name is found in the list of available_filenames the shutil.copyfile() function is used to copy the file from 1 to 2. Note os.path.join() is used to safely join parts of a file together.
To make it into a function to let you also do 'pdf' you could do:
import os
import glob
import pandas as pd
import shutil
source_folder = r"C:\Users\muhammedcan\Desktop\1"
destination_folder = r"C:\Users\muhammedcan\Desktop\2"
df = pd.read_excel("deneme.xls", sheetname="sayfa4")
def copy_files(source_folder, destination_folder, extension):
available_filenames = [os.path.basename(fn) for fn in glob.glob(os.path.join(source_folder, '*.{}'.format(extension)))]
for tag_name in df.iloc[:,1]:
filename = "{}.{}".format(tag_name, extension)
if filename in available_filenames:
print "{} - found".format(filename)
shutil.copyfile(os.path.join(source_folder, filename), os.path.join(destination_folder, filename))
else:
print "{} - not found".format(filename)
copy_files(source_folder, destination_folder, 'jpg')
copy_files(source_folder, destination_folder, 'pdf')
This assumes the same deneme.xls is used for both. If not it could be passed as another argument to the function.

Categories

Resources