batch rename files by common unique value - python

I have a folder containing over 500 images. and a text file with new names for these images. I have tried to create a renaming script in python but I am struggling with the robustness issue. I would like to rename the file by selecting the new name by unique identifier
I wrote this...
import os
import csv
img_dir = str(raw_input("Path to Images Folder as C:/path/to/IMAGES : "))
os.chdir(img_dir)
newNames_file = str(raw_input("csv file path and name as C:/path/to/loc_data.csv : "))
with open(newNames_file, 'rb') as f:
reader = csv.reader(f)
newNames = list(reader)
newNames = [l[0] for l in newNames]
print"Current Working Directory is: " + os.getcwd()
path = os.getcwd()
filenames = [os.path.join(img_dir,fn) for fn in next(os.walk(img_dir))[2]]
assert len(filenames) == len(newNames)
for i in range(len(newNames)):
os.rename(str(filenames[i]), str(newNames[i]))
but the results are unreliable.. e.g. the order inst guaranteed to match.
How can I use the unique ID to pass the correct value top the rename function?
the file names are raw e.g. IMG_3036.tif
and the new values are in the form 7700_50fpl_4_1_3036
where 3036 is the unique ID

I write a script and i think it will do the trick.
You can rewrite the get_name_uid function to get your new files' uid and old files's uid if you want.
Make sure you change the glob name and new_names file name as you use.
import glob
import os
old_image_filenames = glob.glob('*.img')
new_names = open('new_names').read().strip().split(',')
def get_name_uid(name):
uid = name.split('.')[0]
uid = uid.split('_')[-1]
return uid
def get_uid_filename_map(filenames, get_uid_func):
uid_filename_map = {}
for filename in filenames:
uid_filename_map[get_uid_func(filename)] = filename
return uid_filename_map
uid_old_filenames = get_uid_filename_map(old_image_filenames, get_name_uid)
uid_new_filenames = get_uid_filename_map(new_names, get_name_uid)
for uid in uid_old_filenames.keys():
if uid in uid_new_filenames:
os.rename(uid_old_filenames[uid], uid_new_filenames[uid])
The folder i run script before
$ ls
new_names play.py zasdf_3036.img zxcsdf_3037.img
after run script
$ ls
7700_50fpl_4_1_3036.img 7700_50fpl_4_2_3037.img new_names play.py
file new_names
$ cat new_names
7700_50fpl_4_1_3036.img,7700_50fpl_4_2_3037.img

This is what i made in the end. Thank you for your help and input.
def main():
pass
if __name__ == '__main__':
main()
import os
import glob
import csv
import numpy as np
# Establish path to images that need renamed
path_to_img = raw_input("your path to your imgs\n")
glob_name = os.path.join(path_to_img, '*.tif')
old_image_filenames = glob.glob(glob_name)
# Make list from csv of new names
path_to_namefile = raw_input('your path to name file\n')
f = open(path_to_namefile)
csv_f = csv.reader(f)
csv_names = []
for i in csv_f:
csv_names.append(i)
new_names = []
for i in csv_names:
new_names.append(str(i)[2:-2])
print new_names
def get_name_uid(name):
uid = name.split('.')[0]
uid = uid.split('_')[-1]
return uid
def get_uid_filename_map(filenames, get_uid_func):
uid_filename_map = {}
for filename in filenames:
uid_filename_map[get_uid_func(filename)] = filename
return uid_filename_map
uid_old_filename = get_uid_filename_map(old_image_filenames, get_name_uid)
uid_new_filename = get_uid_filename_map(new_names, get_name_uid)
for uid in uid_old_filename.keys():
if uid in uid_new_filename:
os.rename(os.path.join(path_to_img, uid_old_filename[uid]), os.path.join(path_to_img, uid_new_filename[uid]))
I had to work through the reading the csv part a bit. And i dont think there is actually a numpy module... not sure how that wound up in there but this works well so i didnt change it anymore.

Related

Renaming multiple csv files within a folder in Python

I have a folder with 50 .csv files. The .csv files are auto-generated and a results/ output from a process-based model (long and automatically named). For example, sandbox_username_vetch_scaleup_IA_1.csv; sandbox_username_vetch_scaleup_IA_2.csv, and it continues till sandbox_username_vetch_scaleup_IA_50.csv.
I am trying to shorten the file names in a way so that the files are names are IA_1, IA_2 ...up to IA_50 and subsequently the new .csv file name gets added as a column to the data frame. Here is what I have tried so far
# import necessary libraries
import pandas as pd
import os
import glob
import sys
from pathlib import Path
import re
data_p = "/Users/Username/Documents/HV_Scale/CWAD"
output_p = "/Users/Username/Documents/HV_Scale/CWAD"
retval = os.getcwd()
print (retval) # see in which folder you are
os.chdir(data_p) # move to the folder with your data
os.getcwd()
filenames = sorted(glob.glob('*.csv'))
fnames = list(filenames) # get the names of all your files
#print(fnames)
#Loop over
for f in range(len(fnames)):
print(f'fname: {fnames[f]}\n')
pfile = pd.read_csv(fnames[f], delimiter=",") # read in file
#extract filename
filename = fnames[f]
parts = filename.split(".") # giving you the number in file name and .csv
only_id = parts[0].split("_") # if there is a bracket included
# get IA from your file
filestate = pfile["IA"][0] # assuming this is on the first row
filestate = str(filestate)
# get new filename
newfilename = only_id[0]+"-"+filestate+parts[1]
# save your file (don't put a slash at the end of your directories on top)
pfile.to_csv(output_p+"/"+newfilename, index = False, header = True)
Here is the code for adding the csv file name as a column
import glob
import os
import shutil
import sys
import pandas as pd
path = '/Users/Username/Documents/HV_Scale/IA_CWAD/short'
all_files = glob.glob(os.path.join(path, "*.csv"))
names = [os.path.basename(x) for x in glob.glob(path+'\*.csv')]
df = pd.DataFrame()
for file_ in all_files:
file_df = pd.read_csv(file_,sep=';', parse_dates=[0], infer_datetime_format=True,header=None )
file_df['file_name'] = file_
df = df.append(file_df)
#However, this adds the old csv file name and not the renamed one
In order to rename and move these files, all you need is:
import glob
import os
import shutil
import sys
SOURCE = '<Your source directory>'
TARGET = '<Your target directory>'
for file in glob.glob(os.path.join(SOURCE, '*_IA_*.csv')):
idx = file.index('_IA_')
filename = file[idx+1:]
target = os.path.join(TARGET, filename)
if os.path.exists(target):
print(f'Target file {target} already exists', file=sys.stderr)
else:
shutil.copy(file, target)
As there's nothing in the OP's question that tries to handle modification of the CSV files, that is left as an exercise for the OP.
Source and target directories should be different otherwise this can lead to ambiguous results

shutil move based on csv filename

hi guys currently moving file based on filename on my csv file but it always move the files first and then read the filename so it always got error already exist like this
Error: Destination path 'Sortir/Membuka kertas contekan/aug1_Data16_133_86.jpg' already exists
CODE
import pandas as pd
data = pd.read_csv('train.csv')
filenames = data['filename'].values
filenames = filenames.tolist()
classes = data['class'].values
classes = classes.tolist()
print(filenames)
print(classes)
import shutil
import os
for index, row in data.iterrows():
print(row['filename'], os.path.join("Sortir",row['class']))
if not os.path.exists(os.path.join("Sortir",row['class'])):
print("[INFO] 'creating {}' directory".format(os.path.join("Sortir",row['class'])))
os.mkdir(os.path.join("Sortir",row['class']))
shutil.move(os.path.join("images",row["filename"]), os.path.join("Sortir",row['class']))
Anyone know how to do the read the row first and then move the file? or maybe keep continue to read other row even if the file that I want to move being already moved?
Found the Answer Code here :
import shutil
import os
import pandas as pd
data = pd.read_csv('test.csv')
filenames = data['filename'].values
filenames = filenames.tolist()
classes = data['class'].values
classes = classes.tolist()
print(filenames)
print(classes)
for index, row in data.iterrows():
if not os.path.exists(os.path.join("SortirTest",row['class'])):
print("[INFO] 'creating {}' directory".format(os.path.join("SortirTest",row['class'])))
os.mkdir(os.path.join("SortirTest",row['class']))
input_name = os.path.join("images", row["filename"])
output_name = os.path.join("SortirTest", row['class'], row['filename'])
if os.path.exists(input_name):
dest = shutil.move(input_name, output_name)
print("This File Has Been Moved:", input_name)
else:
print("This File Doesnt Exist :", input_name)
continue
In shutil.move() function you have to add the filename in the new directory too:
input_name = os.path.join("images", row["filename"])
output_name = os.path.join("Sortir", row['class'], row['filename'])
shutil.move(input_name, output_name)
Have you tried to clear the 'Sortir' folder before running the script?

Trying to use multiprocessing over files but freezes when I have the user input a path

I'm pretty new to Python, and I'm working on an assignment where multiprocessing is a requirement. I have a folder with a few .csv files in it and I want output of the form [file name, gradient, intercept] for each file in the folder. I can get this to work using a for loop, and the multiprocessing works when I put the file names manually into the code, like this:
import numpy
import os
import csv
from multiprocessing import Pool
files_txt = ['Data_set_1.csv','Data_set_2.csv','Data_set_3.csv','Data_set_4.csv']
print("files: ",files_txt)
lines = []
def GetLines(name):
f = open(name)
csv_f = csv.reader(f)
csv_x = []
csv_y = []
for row in csv_f:
try:
csv_x.append(float(row[0]))
csv_y.append(float(row[1]))
except ValueError:
pass
f.close()
x = numpy.array(csv_x)
y = numpy.array(csv_y)
m, b = numpy.polyfit(x, y, 1)
lines.append([name,m,b])
return lines
if __name__ == '__main__':
pool = Pool(processes = 2)
result = pool.map(GetLines,files_txt)
print(result)
This gives me my desired output, however when I change the first line to:
path = input("path: ")
files = os.listdir(path)
files_txt = [a for a in files if a.endswith(".csv")]
It prints out the names of my files in the folder and then just does nothing.
I have no idea how to remedy this and I've been googling all night.

Is there a way to load data from all files in a directory using Python?

My question: Is there a way to load data from all files in a directory using Python
Input: Get all files in a given directory of mine (wow.txt, testting.txt,etc.)
Process: I want to run all the files through a def function
Output: I want the output to be all the files names and their respective content below it.For example:
/home/file/wow.txt
"all of its content"
/home/file/www.txt
"all of its content"
Here is my code:
# Import Functions
import os
import sys
# Define the file path
path="/home/my_files"
file_name="wow.txt"
#Load Data Function
def load_data(path,file_name):
"""
Input : path and file_name
Purpose: loading text file
Output : list of paragraphs/documents and
title(initial 100 words considered as title of document)
"""
documents_list = []
titles=[]
with open( os.path.join(path, file_name) ,"rt", encoding='latin-1') as fin:
for line in fin.readlines():
text = line.strip()
documents_list.append(text)
print("Total Number of Documents:",len(documents_list))
titles.append( text[0:min(len(text),100)] )
return documents_list,titles
#Output
load_data(path,file_name)
Here is my output:
My Problem is that my output only takes one file and shows its content. Obviously, i defined the path and file name in my code to one file but I am confused as to how to write the path in a way to load all the files and output each of its contents separately. Any suggestions?
Using glob:
import glob
files = glob.glob("*.txt") # get all the .txt files
for file in files: # iterate over the list of files
with open(file, "r") as fin: # open the file
# rest of the code
Using os.listdir():
import os
arr = os.listdir()
files = [x for x in arr if x.endswith('.txt')]
for file in files: # iterate over the list of files
with open(file, "r") as fin: # open the file
# rest of the code
Try this:
import glob
for file in glob.glob("test/*.xyz"):
print(file)
if my directory name was "test" and I had lots of xyz files in them...
You can use glob and pandas
import pandas as pd
import glob
path = r'some_directory' # use your path
all_files = glob.glob(path + "/*.txt")
li = []
for filename in all_files:
#read file here
# if you decide to use pandas you might need to use the 'sep' paramaeter as well
df = pd.read_csv(filename, index_col=None, header=0)
li.append(df)
# get it all together
frame = pd.concat(li, axis=0, ignore_index=True)
I will take advantage of the function you have already written, so use the following:
data = []
path="/home/my_files"
dirs = os.listdir( path )
for file in dirs:
data.append(load_data(path, file))
In this case you will have all data in the list data.
Hi you can use a for loop on a listdir:
os.listdir(<path of your directory>)
this gives you the list of files in your directory, but this gives you also the name of folders in that directory
Try generating a file list first, then passing that to a modified version of your function.
def dir_recursive(dirName):
import os
import re
fileList = list()
for (dir, _, files) in os.walk(dirName):
for f in files:
path = os.path.join(dir, f)
if os.path.exists(path):
fileList.append(path)
fList = list()
prog = re.compile('.txt$')
for k in range(len(fileList)):
binMatch = prog.search(fileList[k])
if binMatch:
fList.append(binMatch.string)
return fList
def load_data2(file_list):
documents_list = []
titles=[]
for file_path in file_list:
with open( file_path ,"rt", encoding='latin-1') as fin:
for line in fin.readlines():
text = line.strip()
documents_list.append(text)
print("Total Number of Documents:",len(documents_list))
titles.append( text[0:min(len(text),100)] )
return documents_list,titles
# Generate a file list & load the data from it
file_list = dir_recursive(path)
documents_list, titles = load_data2(file_list)

Check if a file exists using a text file

I have a folder (Molecules) with many sdf files (M00001.sdf, M00002.sdf and so on) representing different molecules. I also have a csv where each row represents the a molecule (M00001, M00002 etc).
I'm writing a code in order to get files on Molecules folder if their name is a row on the csv file.
First attempt
import os
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.splitext(files)[0] # get the basename (molecule name)
with open('molecules.csv') as ligs: # Open the csv file of molecules names
for hits in ligs:
if names == hits:
print names, hits
else:
print 'File is not here'
However this returns nothing on the command line (literally nothing). What is wrong with this code?
I am not sure that this is the best way (I only know that the following code works for my data) but if your molecule.csv has the standard csv format, i.e. "molecule1,molecule2,molecule3 ...", you can try to rearrange your code in this way:
import os
import csv
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.basename(files)
names = names.replace(".sdf","")
with open('molecules.csv','r') as ligs:
content = csv.reader(ligs)
for elem in content:
for hits in elem:
if names == hits:
print names, hits
else:
print 'File is not here'
See csv File Reading and Writing for csv module
I solved the problem with a rather brute approach
import os
import csv
import shutil
path_to_files = None # path to Molecules folder
new_path = None # new folder to save files
os.mkdir(new_path) # create the folder to store the molecules
hits = open('molecules.csv', 'r')
ligands = []
for line in hits:
lig = line.rstrip('\n')
ligands.append(lig)
for files in os.listdir(path_to_files):
molecule_name = os.path.splitext(files)[0]
full_name = '/' + molecule_name + '.sdf'
old_file = path_to_files + full_name
new_file = new_path + full_name
if molecule_name in ligands:
shutil.copy(old_file, new_file)

Categories

Resources