I was doing "robot" which it should run through the contents of the folder and folders inside the folders, and save in excel a list of files lying there in the format. Everything is doing great before going inside the folder which is located in a folder. Like fixedtest --> test --> insidetest. The issue that when I run debug, passing through "if not os.path.isdir(file): " for some reason, he fulfills this condition, considering that this folder is not a folder.
I'm beginner in python and os library, so may be idk something and how it's work, but everything seems to be correct and it's confused. What I should to change, how I can solve the problem?
import openpyxl
import os
def add_row(rowN, folderName, fileName, ext):
sheet.cell(row=rowN, column=1).value = rowN
sheet.cell(row=rowN, column=2).value = folderName
sheet.cell(row=rowN, column=3).value = fileName
sheet.cell(row=rowN, column=4).value = ext
path = os.path.abspath(os.getcwd())
folders = []
i = 0
wb = openpyxl.Workbook()
sheet = wb.active
sheet['A1'] = 'Number of row'
sheet['B1'] = 'Folder where file located'
sheet['C1'] = 'File name'
sheet['D1'] = 'File extension'
folders.append(path)
for folder in folders:
try:
for file in os.listdir(folder):
if not os.path.isdir(file):
i = i + 1
add_row(i, os.path.basename(folder), os.path.splitext(file)[0],
os.path.splitext(file)[1])
else:
folders.append(file)
except:
print(folders)
wb.save("test.xlsx")
wb.close()
Related
I have a code, which is working properly if I manually insert strings for path, directory and file name, here is the code:
path = r"test//ab3b//ab3b_all_anal.xlsx"
directory = "test"
file1 = "test//ab3b//ab3b80.csv"
df1 = all_calc_80(file1, directory)
file2 = "test//ab3b//ab3b80m.csv"
df2 = all_calc_80m(file2, directory)
writer = pd.ExcelWriter(path, engine = 'xlsxwriter')
df1.to_excel(writer, sheet_name = '80')
df2.to_excel(writer, sheet_name = '80m')
writer.close()
Test directory has subdirectories named as ab3b, bg3a, ge3b etc. and in each of subdirectories there are files named in same way: ab3b80.csv, ab3b80m.csv; bg3a80.csv, bg3a80m.csv; ge3b80.csv, ge3b80m.csv.
Each of files based on ending 80.csv or 80m.csv use different function for analysing.
The final output is one excel workbook with sheets names after ending of csv files.
Now I am working on iterating through whole directory test, where I just give the name of the directory and everything is proceed automatically from there. So far I have:
import os
import xlsxwriter
rootdir = 'test'
slovar = {}
for subdir, dirs, files in os.walk(rootdir):
slovar[subdir] = files
for key, value in slovar.items():
if len(key) > 4: #to get just subdirectories I need
end = key[-4:]
path = 'r' + '\'' + key + '\\\\' + end + '_all_anal.xlsx' + '\''
print(path)
for vrednost in value:
if vrednost.endswith('80.csv'):
file1 = vrednost
df1 = all_calc_80(file1, rootdir)
elif vrednost.endswith('80m.csv'):
file2 = vrednost
df2 = all_calc_80m(file2, rootdir)
writer = pd.ExcelWriter(path, engine = 'xlsxwriter')
df1.to_excel(writer, sheet_name = '80')
df2.to_excel(writer, sheet_name = '80m')
writer.close()
But I got error message: Invalid extension for engine '<property object at 0x000002123659D0E0>': 'xlsx''.
I think there might be some problems due to /and \ in windows paths or types of object, even though when I print out just keys and values, I get usefull output, also name of the path is written properly.
But I don't really understand why manually everything works and automated not.
If someone will still search for this answer, I had found a solution.
Main discovery was regarding how to append path and file name to the list.
It is done with os.path.join(dirpath, filename), if you use os.walk.
Here is the working code:
seznam80 = []
seznam80m = []
seznam120 = []
seznam120m = []
seznam150 = []
seznam150m = []
seznamSMT = []
dirp = []
for dirpath, dirnames, filenames in os.walk(directory): #directory with all folders of participants
for filename in [f for f in filenames if f.endswith("80.csv")]: #search for all 80 files
seznam80.append(os.path.join(dirpath, filename))
dirp.append(dirpath)
for dirpath, dirnames, filenames in os.walk(directory): #directory with all folders of participants
for filename in [f for f in filenames if f.endswith("80m.csv")]: #search for all 80m files
seznam80m.append(os.path.join(dirpath, filename))
for vsak80, vsak80m pot in zip(seznam80, seznam80m, dirp):
path = pot + '_all_anal.xlsx'
file1 = vsak80
df1 = all_calc_80(file1, directory)
file2 = vsak80m
df2 = all_calc_80m(file2, directory)
writer = pd.ExcelWriter(path, engine = 'xlsxwriter')
df1.to_excel(writer, sheet_name = '80')
df2.to_excel(writer, sheet_name = '80m')
writer.close()
I have the below code, the folder to where it contains the Excel file with two columns (Original Name of the PDF file, and New Name desired column called "Matched Results.xls"; as well as all of the original name PDF files that are contained in this folder. How do I run this code so that all of my PDFs will be renamed?
def rename_file(file_to_rename, source_file):
p = Path(file_to_rename)
filename = p.stem
wb = xlrd.open_workbook(r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xls')
# excel file to get new filename????
sheet = wb.sheet_by_index(0)
for row_num in range(sheet.nrows):
row_value = sheet.row_values(row_num)
col = 2 # 'john smith' col number
if row_value[col] == filename:
new_filename = f'{row_value[col+1]}' # format as you want
p.rename(Path(p.parent, new_filename + p.suffix)) # rename
break
def get_paths_in_directory(directory):
return Path(directory).glob('*.pdf')
if __name__ == "__main__":
source_file = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xlsx' # excel file to get new filename
source_directory = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python' # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
The point of if __name__ == "__main__": is to check if the code was run directly and not imported. Since that you are not importing:
def rename_file(file_to_rename, source_file):
p = Path(file_to_rename)
filename = p.stem
wb = xlrd.open_workbook(r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xls')
# wb should be the below variable instead
#wb = xlrd.open_workbook(source_file)
# excel file to get new filename????
sheet = wb.sheet_by_index(0)
for row_num in range(sheet.nrows):
row_value = sheet.row_values(row_num)
col = 2 # 'john smith' col number
if row_value[col] == filename:
new_filename = f'{row_value[col+1]}' # format as you want
p.rename(Path(p.parent, new_filename + p.suffix)) # rename
print("the if was matched!", new_filename, Path(p.parent, new_filename + p.suffix))
break
def get_paths_in_directory(directory):
return Path(directory).glob('*.pdf')
if __name__ == "__main__":
source_file = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xlsx' # excel file to get new filename
source_directory = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python' # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
You can replace everything after and including if __name__ == "__main__": with:
def my_func():
source_file = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xlsx' # excel file to get new filename
source_directory = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python' # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
and call the function with:
my_func()
so your whole jupyter field would look like:
def rename_file(file_to_rename, source_file):
p = Path(file_to_rename)
filename = p.stem
wb = xlrd.open_workbook(r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xls')
# excel file to get new filename????
sheet = wb.sheet_by_index(0)
for row_num in range(sheet.nrows):
row_value = sheet.row_values(row_num)
col = 3 # 'john smith' col number
if row_value[col] == filename:
new_filename = f'{row_value[col+1]}' # format as you want
p.rename(Path(p.parent, new_filename + p.suffix)) # rename
print("the if was matched!", new_filename, Path(p.parent, new_filename + p.suffix))
break
def get_paths_in_directory(directory):
return Path(directory).glob('*.pdf')
def my_func():
source_file = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xlsx' # excel file to get new filename
source_directory = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python' # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
my_func()
provided you have all the needed imports.
EDIT: A function I put together with some threads on SO - you will need to figure out how you want to rename your file, create the algorithm, then implement it in the if statement.
import os
def rename_func(directory):
d = os.fsencode(directory)
for file in os.listdir(d):
filename = os.fsdecode(file)
if filename.endswith(".pdf"):
# this is what your file will be renamed to
os.rename(os.path.join(d, filename), 'renamed_file.pdf')
rename_func(r"C:\my_dir")
I have a folder (Molecules) with many sdf files (M00001.sdf, M00002.sdf and so on) representing different molecules. I also have a csv where each row represents the a molecule (M00001, M00002 etc).
I'm writing a code in order to get files on Molecules folder if their name is a row on the csv file.
First attempt
import os
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.splitext(files)[0] # get the basename (molecule name)
with open('molecules.csv') as ligs: # Open the csv file of molecules names
for hits in ligs:
if names == hits:
print names, hits
else:
print 'File is not here'
However this returns nothing on the command line (literally nothing). What is wrong with this code?
I am not sure that this is the best way (I only know that the following code works for my data) but if your molecule.csv has the standard csv format, i.e. "molecule1,molecule2,molecule3 ...", you can try to rearrange your code in this way:
import os
import csv
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.basename(files)
names = names.replace(".sdf","")
with open('molecules.csv','r') as ligs:
content = csv.reader(ligs)
for elem in content:
for hits in elem:
if names == hits:
print names, hits
else:
print 'File is not here'
See csv File Reading and Writing for csv module
I solved the problem with a rather brute approach
import os
import csv
import shutil
path_to_files = None # path to Molecules folder
new_path = None # new folder to save files
os.mkdir(new_path) # create the folder to store the molecules
hits = open('molecules.csv', 'r')
ligands = []
for line in hits:
lig = line.rstrip('\n')
ligands.append(lig)
for files in os.listdir(path_to_files):
molecule_name = os.path.splitext(files)[0]
full_name = '/' + molecule_name + '.sdf'
old_file = path_to_files + full_name
new_file = new_path + full_name
if molecule_name in ligands:
shutil.copy(old_file, new_file)
I wrote the following code to make an inventory of every files in a library. the idea is that the 3 columns have infromation about the file.
1) complete path 2) name of the parent directory 3) filename.
import os
import openpyxl
def crearlista (*arg, **kw):
inventario = openpyxl.Workbook(encoding = "Utf-8")
sheet = inventario.active
i = 1
f = 1
e = ""
for dirpath, subdirs, files in os.walk(*arg, **kw):
for name in subdirs:
e = os.path.join (name)
for name in files:
sheet.cell(row=i, column=3).value = name
sheet.cell(row=i, column=1).value = dirpath
sheet.cell(row=i, column=2).value = e
i = i + 1
inventario.save("asd3.xlsx")
crearlista("//media//rayeus/Datos/Mis Documentos/Nueva carpeta/", topdown=False)
The problem is that it iterates first through the files in the first folder and after that starts filling the 'e' variable with the name of the first folder.
That way it starts writing late the names in the folder column. And it writes theme as many times as files in the next folder, not as many files there are in THAT folder.
How can i solve this?
Here is my code:
f = open("myfile.txt")
f.write("Writing something")
f.close()
plt.savefig('plot1.png')
plt.savefig('plot2.png')
workbook = xlsxwriter.Workbook('results.xlsx')
workbook.close()
At the moment, these files are being saved to my desktop. How would i save them to a file that the user specifies the name for? So i would have a widget like so:
self.directoryname= tkinter.Entry(self.master)
self.directoryname["width"] = 60
self.directoryname.focus_set()
self.directoryname.grid(row=1, column=1)
foldername = (self.directoryname.get())
How can i take foldername and create a directory with that name, and store the files in there?
Use the os.path.join() function and if you want to create a folder os.makedirs()
import os
path = "some/path/to/location"
foldername = (self.directoryname.get())
file_path = os.path.join(path,foldername)
os.makedirs(file_path)
filename = 'results.xlsx'
full_path = os.path.join(file_path,filename)
workbook = xlsxwriter.Workbook(full_path)
workbook.close()