copy and rename images based on a csv managing duplicates python - python

I'm working on a script in python to copy and rename a bunch of images based on a csv.
The image folder named "originals" is like:
Alpha_1.jpg
Beta_2.jpg
And the csv. contains:
Alfa_1.jpg 4474094_1
Beta_2.jpg 4474094_2
Beta_2.jpg 4474094_3
So the result should leave on a new folder named "newnames" the sames images like:
4474094_1.jpg
4474094_2.jpg
4474094_3.jpg
Where the 4474094_2.jpg and 4474094_3.jpg is the same picture as Beta_2.jpg
I have the following code which is not working, please any advice I would be most grateful!
import os
import csv
import sys
import shutil
def copy_rename():
os.chdir(r"C:\Transformer-SSBI\Original")
saved_path = os.getcwd()
file_list = os.listdir(saved_path)
src_dir= r"C:\Transformer-SSBI\Originals"
dst_dir= r"C:\Transformer-SSBI\Newnames"
IDs = {}
with open (r'transformer.csv','rb') as csvfile:
Reader = csv.reader(csvfile, delimiter = ';')
for row in Reader:
IDs[row[0]] = row[1]+'.jpg'
for row in IDs:
for file_name in file_list:
if file_name in row:
oldname = shutil.copy(file_name,dst_dir)
newname = IDs[file_name]
os.rename(oldname, newname)
copy_rename()

since you store the mapping in a map, and Beta_2.jpg is renamed to two files,there can be only one key in the map,so it will only be renamed to 4474094_3.jpg,not 4474094_2.jpg,you can avoid the construction of map, and just do the renaming while iterating the csv files:
import os
import csv
import sys
import shutil
def copy_rename():
src_dir= r"C:\Transformer-SSBI\Originals"
dst_dir= r"C:\Transformer-SSBI\Newnames"
or.chdir(dst_dir)
with open (r'transformer.csv','rb') as csvfile:
Reader = csv.reader(csvfile, delimiter = ',')
for row in Reader:
oldname=row[0]
newname=row[1]+".jpg"
if os.path.exists(src_dir+"\\"+oldname):
shutil.copy(src_dir+"\\"+oldname,dst_dir)
os.rename(oldname, newname)
copy_rename()

This is like #Samuelliyi answer, except it avoids any race conditions and is (slightly) more cross platform by using os.path.join.
import os
import csv
import sys
import errno
import shutil
def copy_rename(src_dir, dst_dir, csv_path=None):
if csv_path is None:
csv_path = os.path.join(dst_dir, 'transformer.csv')
with open (csv_path, mode='rb') as csvfile:
Reader = csv.reader(csvfile, delimiter = ',')
for row in Reader:
oldname = row[0]
newname = row[1] + os.path.splitext(oldname)[1]
oldpath = os.path.join(src_dir, oldname)
newpath = os.path.join(dst_dir, newname)
try:
# the rename is implicit in the copy operation
shutil.copy(oldpath, newpath)
except OSError as e:
# only raise exception if it is something other than the file
# not existing
if e.errno != errno.ENOENT:
raise
src_dir= r"C:\Transformer-SSBI\Originals"
dst_dir= r"C:\Transformer-SSBI\Newnames"
copy_rename(src_dir, dst_dir)
Also, the function is now more general and can be used on any two directories that have the same structure (don't hardcode what you can pass in as a parameter).

Related

Move png's to appropriate folder in python

I have a csv with two columns Directory and Filename. Each row in the csv shows what directory each file belongs like so
Directory, File Name
DIR18, IMG_42.png
DIR12, IMG_16.png
DIR4, IMG_65.png
So far I have written code that grabs each directory and filename from the csv and then all files at their destination like so:
movePng.py
import shutil
import os
import csv
from collections import defaultdict
columns = defaultdict(list) # each value in each column is appended to a list
with open('/User/Results.csv') as f:
reader = csv.DictReader(f)
for row in reader:
for (k,v) in row.items():
columns[k].append(v)
source = '/User/PNGItems'
files = os.listdir(source)
for f in files:
pngName = f[:-4]
for filename in columns['File Name']:
fileName = filename[:-4]
if pngName == fileName
# GET THIS POSITION IN columns['File Name'] for columns['Directory']
shutil.move(f, source + '/' + DIRECTORY)
How do I get the index of the columns['File Name'] and grab the corresponding directory out of columns['Directory'] ?
You should read the assignments into a dictionary and then query that:
folder_assignment_file = "folders.csv"
file_folder = dict()
with open(folder_assignment_file, "r") as fh:
reader = csv.reader(fh)
for folder, filename in reader:
file_folder[filename] = folder
And then get the target folder like so: DIRECTORY = file_folder[fileName].
Some other hints:
filename, fileName are not good variable names, this will only lead to hard to find bugs because Python is case sensitive
use os.path.splitext to split the extension off the filename
if not all your files are in one folder the glob module and os.walk might come in handy
Edit:
Creating the dict can be made even nicer like so:
with open(folder_assignment_file, "r") as fh:
reader = csv.reader(fh)
file_folders = {filename: folder for folder, filename in reader}
To solve this I used #Peter Wood suggestion and it worked beautifully. Also I had to modify shutil.
Here is the code below
for f in files:
pngName = f[:-4]
for filename, directory in zip(columns['File Name'], columns['Directory']):
fileName = filename[:-4]
if pngName == fileName:
directoryName = directory[1:]
shutil.move(os.path.join(source, f), source + '/' + directoryName)

Create multiply amount of files with random names in python and zip them

I'm newbie in Python,
I need to create a large quantity of files with random names in Dest_Dir (my destination directory), And then Zip them to one file.
Does anyone have an idea how to do that?
I managed to create files like that with a for loop into a specific folder, but it doesn't fit in case I want to create large amount of file (Lets say 100)
And the names I created aren't random.
import os
import sys
import platform
SRC_Dir = os.path.dirname(__file__)
Dest_Dir = os.path.join(SRC_Dir, 'dest')
items = ["one", "two", "three"]
for item in items:
#(os.path.join(Dest_Dir, filename), 'wb') as temp_file:
with open(os.path.join(Dest_Dir, item), 'wb') as f:
f.write("This is my first line of code")
f.write("\nThis is my second line of code with {} the first item in my list".format(item))
f.write("\nAnd this is my last line of code")
You could make use of the built-in tempfile
import os
import tempfile
for _ in range(100):
file_descriptor, file_path = tempfile.mkstemp(".txt", "prefix-", Dest_Dir)
file_handle = open(file_path, "wb")
# do stuff
os.close(file_descriptor)
file_handle.close()
Since a comment was made about the zip part I figured I would add that as well
import os
import tempfile
import zipfile
new_files = []
for _ in range(10):
file_descriptor, file_path = tempfile.mkstemp(".txt", "prefix-", "/tmp")
file_handle = open(file_path, "wb")
file_handle.write("HELLO")
os.close(file_descriptor)
file_handle.close()
new_files.append(file_path)
with zipfile.ZipFile("/tmp/zipped.zip", "w") as zipped:
for file_path in new_files:
zipped.write(file_path, os.path.basename(file_path))
The zipped.write arguments here assume only the filename (and not the path) are desired for the archived name.

I have a ".txt "file which consists of various filenames and I want to search each filename in a folder where these files are actually kept

Suppose I have a text file aiq_hits.txt.
Each line in this file corresponds a filename
ant1.aiq
ant2.aiq
ant3.aiq
ant4.aiq
I want to match each line of my textfile (ant1.aiq,ant2.aiq and so on) with filenames which are present at some specific place(R:\Sample) and extract matching files into some other place (R:\sample\wsa).
I have an idea that I need to use functions like os.walk() and fnmatch.fnmatch(), shutil.copy() but I am not able to implement them
My code:
import os
import shutil
import fnmatch
with open("aiq_hits.txt","r") as in_file:
for line in in_file:
I am stuck here
import os
import shutil
sourceDir = "R:\\Sample"
targetDir = "R:\\Sample\\wsa"
existingFiles = set(f for f in os.listdir(sourceDir) if os.path.isfile(os.path.join(sourceDir, f)))
infilepath = "aiq_hits.txt"
with open(infilepath) as infile:
for line in infile:
fname = line.strip()
if fname not in existingFiles: continue
shutil.move(os.path.join(sourceDir, fname), os.path.join(targetDir, fname))
I hope this will suffice:
import os
def match_files(url,file_read, dest):
f = open(file_read, 'rb')
file_list = os.listdir(url)
print(file_list)
saved_path = os.getcwd()
print("Current working directory is " + saved_path)
os.chdir(url)
match = []
for file_name in f:
file_name = file_name.strip()
if file_name in file_list:
match.append(file_name)
os.rename(os.path.join(url, file_name), os.path.join(dest, file_name))
os.chdir(saved_path)
print match
here, url is source directory or folder from which u want to match files, file_read is the name of file (with path) in which list of file names is given, dest is the destination folder.
this code moves the matching files from url to dest, i.e. these files won't remin in url after running the code.
Alternatively you could use the glob module which allows you to enter in a expression for the file name\extension which will then return a list that you can loop over.
I'd use this module if the source directory can have files with the same extension that you want to exclude from being looped over
Also I'm assuming that the file name list is not large and so storing it in a list wont be an issue
eg (I haven't tested the below )
from glob import glob
import os
import shutil
src = 'R:\\Sample'
dst = "R:\\Sample\\wsa"
in_file_list = "aiq_hits.txt"
list_Of_files = glob(os.path.join(src, 'ant*.aiq'))
data = []
with open(in_file_list) as reader:
data += reader.readlines()
for row in list_Of_files:
file_path, file_name = os.path.split(row)
if file_name in data:
shutil.copy2(row, os.path.join(dst, file_name))
# or if you want to move the file
# shutil.move(row, os.path.join(dst, file_name))

Transposing all csv files within a folder

I got help the last time I asked a question on this site regarding batch processing csv files within a folder using glob.glob() with Python. I am trying to use it this time to transpose all csv files within a folder. The script below only processes the last file and stops. What am I doing wrong?
import csv
import os
import glob
directory = raw_input ("INPUT Folder")
output = raw_input("OUTPUT Folder:")
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
cols = []
for row in reader:
cols.append(row)
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
with open (os.path.join(output, filename), 'wb') as output_file:
writer = csv.writer(output_file)
for i in range(len(max(cols, key=len))):
writer.writerow ([(c[i] if i<len(c) else '') for c in cols])
You need to indent the "output" portion of the code so that it runs once for each iteration of the for in_file loop:
import csv
import os
import glob
directory = raw_input ("INPUT Folder")
output = raw_input("OUTPUT Folder:")
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
cols = []
for row in reader:
cols.append(row)
# "outdent" this code so it only needs to run once for each in_file
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
# Indent this to the same level as the rest of the "for in_file" loop!
with open (os.path.join(output, filename), 'wb') as output_file:
writer = csv.writer(output_file)
for i in range(len(max(cols, key=len))):
writer.writerow ([(c[i] if i<len(c) else '') for c in cols])
In your version that code only runs once, after the for in_file loop has completed, and therefore only outputs cols data left over from the final iteration of that loop.
I have also "outdented" the filename = ... statement to the for in_file level, as this only needs to be done once for each in_file, not once for each row of each in_file.
You can get a lot of mileage with data manipulation using pandas:
import os
import pandas as pd
for filename in os.listdir('.'):
# We save an augmented filename later,
# so using splitext is useful for more
# than just checking the extension.
prefix, ext = os.path.splitext(filename)
if ext.lower() != '.csv':
continue
# Load the data into a dataframe
df = pd.DataFrame.from_csv(filename,
header=None,
index_col=None,
parse_dates=False)
# Transpose is easy, but you could do TONS
# of data processing here. pandas is awesome.
df_transposed = df.T
# Save to a new file with an augmented name
df_transposed.to_csv(prefix+'_T'+ext, header=True, index=False)
The os.walk version is not much different, if you need to dig into subfolders as well.
Here is a working one:
had to google for an hour, but works and tested on python33
import csv
import os
import glob
directory = 'C:\Python33\csv'
output = 'C:\Python33\csv2'
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
cols = []
for row in reader:
cols.append(row)
# "outdent" this code so it only needs to run once for each in_file
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
# Indent this to the same level as the rest of the "for in_file" loop!
with open (os.path.join(output, filename), 'w') as output_file:
writer = csv.writer(output_file)
for i in range(len(max(cols, key=len))):
writer.writerow ([(c[i] if i<len(c) else '') for c in cols])
in_files will only return a single result in that format. Try returning a list:
in_files = [f for f in os.listdir(directory) if f.endswith('.csv')]

Read CSV from within Zip File

I have a directory of zip files (approximately 10,000 small files), within each is a CSV file I am trying to read and split into a number of different CSV files.
I managed to write the code to split the CSV files from a directory of CSVs, shown below, that reads the first atttribute of the CSV, and depending what it is write it to the relevent CSV.
import csv
import os
import sys
import re
import glob
reader = csv.reader(open("C:/Projects/test.csv", "rb"), delimiter=',', quotechar='"')
write10 = csv.writer(open('ouput10.csv', 'w'), delimiter=',', lineterminator='\n', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
write15 = csv.writer(open('ouput15.csv', 'w'), delimiter=',', lineterminator='\n', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
headings10=["RECORD_IDENTIFIER","CUSTODIAN_NAME","LOCAL_CUSTODIAN_NAME","PROCESS_DATE","VOLUME_NUMBER","ENTRY_DATE","TIME_STAMP","VERSION","FILE_TYPE"]
write10.writerow(headings10)
headings15=["RECORD_IDENTIFIER","CHANGE_TYPE","PRO_ORDER","USRN","STREET_DESCRIPTION","LOCALITY_NAME","TOWN_NAME","ADMINSTRATIVE_AREA","LANGUAGE"]
write15.writerow(headings15)
for row in reader:
type = row[0]
if "10" in type:
write10.writerow(row)
elif "15" in type:
write15.writerow(row)
So I am now trying to read the Zip files rather than wasting time extracting them first.
This is what I have so far after following as many tutorials as I have found
import glob
import os
import csv
import zipfile
import StringIO
for name in glob.glob('C:/Projects/abase/*.zip'):
base = os.path.basename(name)
filename = os.path.splitext(base)[0]
datadirectory = 'C:/Projects/abase/'
dataFile = filename
archive = '.'.join([dataFile, 'zip'])
fullpath = ''.join([datadirectory, archive])
csv = '.'.join([dataFile, 'csv'])
filehandle = open(fullpath, 'rb')
zfile = zipfile.ZipFile(filehandle)
data = StringIO.StringIO(zfile.read(csv))
reader = csv.reader(data)
for row in reader:
print row
However and error gets thrown
AttributeError: 'str' object has no attribute 'reader'
Hopefully someone can show me how to change my CSV reading code that works to read the Zip file.
Much appreciated
Tim
Simple fix. You're overriding the csv module with your local csv variable. Just change the name of that variable:
import glob
import os
import csv
import zipfile
import StringIO
for name in glob.glob('C:/Projects/abase/*.zip'):
base = os.path.basename(name)
filename = os.path.splitext(base)[0]
datadirectory = 'C:/Projects/abase/'
dataFile = filename
archive = '.'.join([dataFile, 'zip'])
fullpath = ''.join([datadirectory, archive])
csv_file = '.'.join([dataFile, 'csv']) #all fixed
filehandle = open(fullpath, 'rb')
zfile = zipfile.ZipFile(filehandle)
data = StringIO.StringIO(zfile.read(csv_file)) #don't forget this line!
reader = csv.reader(data)
for row in reader:
print row

Categories

Resources