I have a csv with two columns Directory and Filename. Each row in the csv shows what directory each file belongs like so
Directory, File Name
DIR18, IMG_42.png
DIR12, IMG_16.png
DIR4, IMG_65.png
So far I have written code that grabs each directory and filename from the csv and then all files at their destination like so:
movePng.py
import shutil
import os
import csv
from collections import defaultdict
columns = defaultdict(list) # each value in each column is appended to a list
with open('/User/Results.csv') as f:
reader = csv.DictReader(f)
for row in reader:
for (k,v) in row.items():
columns[k].append(v)
source = '/User/PNGItems'
files = os.listdir(source)
for f in files:
pngName = f[:-4]
for filename in columns['File Name']:
fileName = filename[:-4]
if pngName == fileName
# GET THIS POSITION IN columns['File Name'] for columns['Directory']
shutil.move(f, source + '/' + DIRECTORY)
How do I get the index of the columns['File Name'] and grab the corresponding directory out of columns['Directory'] ?
You should read the assignments into a dictionary and then query that:
folder_assignment_file = "folders.csv"
file_folder = dict()
with open(folder_assignment_file, "r") as fh:
reader = csv.reader(fh)
for folder, filename in reader:
file_folder[filename] = folder
And then get the target folder like so: DIRECTORY = file_folder[fileName].
Some other hints:
filename, fileName are not good variable names, this will only lead to hard to find bugs because Python is case sensitive
use os.path.splitext to split the extension off the filename
if not all your files are in one folder the glob module and os.walk might come in handy
Edit:
Creating the dict can be made even nicer like so:
with open(folder_assignment_file, "r") as fh:
reader = csv.reader(fh)
file_folders = {filename: folder for folder, filename in reader}
To solve this I used #Peter Wood suggestion and it worked beautifully. Also I had to modify shutil.
Here is the code below
for f in files:
pngName = f[:-4]
for filename, directory in zip(columns['File Name'], columns['Directory']):
fileName = filename[:-4]
if pngName == fileName:
directoryName = directory[1:]
shutil.move(os.path.join(source, f), source + '/' + directoryName)
Related
I have the following txt files (10000s) in multiple directories eg.
BaseDirectory\04_April\2019-04-14\UniqeDirectoryName1 (username)\345308457384745637.txt
BaseDirectory\04_April\2019-04-14\UniqeDirectoryName2 (username)\657453456456546543.txt
BaseDirectory\04_April\2019-04-14\UniqeDirectoryName3 (username)\234545743564356774.txt
BaseDirectory\05_May\2019-05-14\UniqeDirectoryName1 (username)\266434564564563565.txt
BaseDirectory\05_May\2019-05-14\UniqeDirectoryName2 (username)\934573845739632048.txt
BaseDirectory\05_May\2019-05-14\UniqeDirectoryName3 (username)\634534534535654501.txt
so in other words in each date folder there are multiple directories that again contains text files.
import os
import re
import csv
for path, subdirs, files in os.walk("E:\\BaseDir\\"):
for name in files:
file_fullinfo = os.path.join(path, name)
path, filename = os.path.split(file_fullinfo)
NoExtension = os.path.splitext(file_fullinfo)[0]
file_noext = str(NoExtension)
file_splitinfo = re.split('\\\\', file_noext, 0)
file_month = file_splitinfo[2]
file_date = file_splitinfo[3]
file_folder = re.sub(r'\([^)]*\)', '', file_splitinfo[4])
file_name = file_splitinfo[5]
file_category = file_folder
My script generates the following..
['E:', 'BaseDirectory', '04_April', '2019-04-09', 'UniqeDirectoryName', '345308457384745637.txt', 'UniqeDirectoryName']
So far so good, writing this to a generic CSV file is also straight forward, but I want to create a new CSV file based on the changing date like this.
E:\BaseDir\2019-04-09.csv
file_folder, file_name, file_category
'UniqeDirectoryName', '543968732948754398','UniqeDirectoryName'
'UniqeDirectoryName', '345308457384745637','UniqeDirectoryName'
'UniqeDirectoryName', '324089734983987439','UniqeDirectoryName'
E:\BaseDir\2019-05-14.csv
file_folder, file_name, file_category
'UniqeDirectoryName', '543968732948754398','UniqeDirectoryName'
'UniqeDirectoryName', '345308457384745637','UniqeDirectoryName'
'UniqeDirectoryName', '324089734983987439','UniqeDirectoryName'
How can I accomplise this can't quite wrap my head a around it, the struggle of being a Python noob is real.. :)
If you can live without the first line as a header row it can be achieved quite simply.
output_file_path = 'D:/output_files/' + file_date + '.csv'
with open(file=output_file_path, mode='a') as csv_file: #open a csv file to write to in append mode
csv_file.write("my data\n")
if you absolutely must have the header then you can test if the file exists first, if it doesn't exist write the header row
import os.path
output_file_path = 'D:/output_files/' + file_date + '.csv'
if not os.path.exists(output_file_path): #open a csv file to write header row if doesn't exist
with open(file=output_file_path, mode='a') as csv_file:
csv_file.write("my header row\n")
with open(file=output_file_path, mode='a') as csv_file: #open a csv file to write to in append mode
csv_file.write("my data\n")
My question: Is there a way to load data from all files in a directory using Python
Input: Get all files in a given directory of mine (wow.txt, testting.txt,etc.)
Process: I want to run all the files through a def function
Output: I want the output to be all the files names and their respective content below it.For example:
/home/file/wow.txt
"all of its content"
/home/file/www.txt
"all of its content"
Here is my code:
# Import Functions
import os
import sys
# Define the file path
path="/home/my_files"
file_name="wow.txt"
#Load Data Function
def load_data(path,file_name):
"""
Input : path and file_name
Purpose: loading text file
Output : list of paragraphs/documents and
title(initial 100 words considered as title of document)
"""
documents_list = []
titles=[]
with open( os.path.join(path, file_name) ,"rt", encoding='latin-1') as fin:
for line in fin.readlines():
text = line.strip()
documents_list.append(text)
print("Total Number of Documents:",len(documents_list))
titles.append( text[0:min(len(text),100)] )
return documents_list,titles
#Output
load_data(path,file_name)
Here is my output:
My Problem is that my output only takes one file and shows its content. Obviously, i defined the path and file name in my code to one file but I am confused as to how to write the path in a way to load all the files and output each of its contents separately. Any suggestions?
Using glob:
import glob
files = glob.glob("*.txt") # get all the .txt files
for file in files: # iterate over the list of files
with open(file, "r") as fin: # open the file
# rest of the code
Using os.listdir():
import os
arr = os.listdir()
files = [x for x in arr if x.endswith('.txt')]
for file in files: # iterate over the list of files
with open(file, "r") as fin: # open the file
# rest of the code
Try this:
import glob
for file in glob.glob("test/*.xyz"):
print(file)
if my directory name was "test" and I had lots of xyz files in them...
You can use glob and pandas
import pandas as pd
import glob
path = r'some_directory' # use your path
all_files = glob.glob(path + "/*.txt")
li = []
for filename in all_files:
#read file here
# if you decide to use pandas you might need to use the 'sep' paramaeter as well
df = pd.read_csv(filename, index_col=None, header=0)
li.append(df)
# get it all together
frame = pd.concat(li, axis=0, ignore_index=True)
I will take advantage of the function you have already written, so use the following:
data = []
path="/home/my_files"
dirs = os.listdir( path )
for file in dirs:
data.append(load_data(path, file))
In this case you will have all data in the list data.
Hi you can use a for loop on a listdir:
os.listdir(<path of your directory>)
this gives you the list of files in your directory, but this gives you also the name of folders in that directory
Try generating a file list first, then passing that to a modified version of your function.
def dir_recursive(dirName):
import os
import re
fileList = list()
for (dir, _, files) in os.walk(dirName):
for f in files:
path = os.path.join(dir, f)
if os.path.exists(path):
fileList.append(path)
fList = list()
prog = re.compile('.txt$')
for k in range(len(fileList)):
binMatch = prog.search(fileList[k])
if binMatch:
fList.append(binMatch.string)
return fList
def load_data2(file_list):
documents_list = []
titles=[]
for file_path in file_list:
with open( file_path ,"rt", encoding='latin-1') as fin:
for line in fin.readlines():
text = line.strip()
documents_list.append(text)
print("Total Number of Documents:",len(documents_list))
titles.append( text[0:min(len(text),100)] )
return documents_list,titles
# Generate a file list & load the data from it
file_list = dir_recursive(path)
documents_list, titles = load_data2(file_list)
There are text files of various names in the folder 'a'. I want to read all of these text files and add the letter 'b' to each text file. What should I do?
cwd = os.getcwd()
input_dir = os.path.join(cwd, "my .txt files dir")
sorts = sorted(glob(input_dir), key = lambda x:(len(x) , x))
for f in sorts :
f = open(input_dir, 'a')
data = "add text"
f.write(data)
f.close()
Append data to file:
- first: get all file in folder a.
- second: find extension with .txt.
- third: open it and do something('append', or 'rewrite').
Demo:
import os
# your .txt files dir
path = 'a'
# append data what you want
appendData = 'b'
fileNames = list(os.walk(path))[0][2]
fileNames.sort(key=len)
fileNums = len(fileNames)
# your dst file extension
fileExt = '.txt'
# # Extract extension from filename
# fileExt = os.path.splitext(fileNames[0])[1]
for fileName in fileNames:
if fileName.endswith(fileExt):
fileFullPath = os.path.join(path, fileName)
with open(fileFullPath, 'a') as f:
f.write(appendData)
Like the others said, this is an easy question that could easily be find on google. Anyway here's how to do it:
from os import listdir
from os.path import isfile, isdir, join
files = [file for file in listdir("files") if isfile(join("files", file))]
directories = [directory for directory in listdir("files") if isdir(join("files", directory))]
print(files)
for file_name in files:
try:
file = open("files/" + file_name, "a")
file.write("b")
file.close()
except IOError as err:
print("Could not open file because : ", err)
Replace "file" with the directory where your files are or the path to that directory like "directory0/directory1/directory_with_files"
Avoid to open files with
f = open(input_dir, 'a')
f.close()
Instead
with open(input_dir, 'a') as inputFile:
Do something
Also what you want is
import os
import glob # We will use this module to open only .txt files
path = 'your/path'
for filename in glob.glob(os.path.join(path, '*.txt'))
with open(filename, 'a') as inputFile:
inputFile.write('b')
import os, unicodecsv as csv
# open and store the csv file
IDs = {}
with open('labels.csv','rb') as csvfile:
timeReader = csv.reader(csvfile, delimiter = ',')
# build dictionary with associated IDs
for row in timeReader:
IDs[row[0]] = row[1]
# move files
path = 'train/'
tmpPath = 'train2/'
for oldname in os.listdir(path):
# ignore files in path which aren't in the csv file
if oldname in IDs:
try:
os.rename(os.path.join(path, oldname), os.path.join(tmpPath, IDs[oldname]))
except:
print 'File ' + oldname + ' could not be renamed to ' + IDs[oldname] + '!'
I am trying to sort my files according to this csv file. But the file contains many ids with same name. Is there a way to move files with same name to 1 folder or adding a number in front of a file if the file with same name already exist in directory?
Example-
id name
001232131hja1.jpg golden_retreiver
0121221122ld.jpg black_hound
0232113222kl.jpg golden_retreiver
0213113jjdsh.jpg alsetian
05hkhdsk1233a.jpg black_hound
I actually want to move all the files having id corresponding to golden_retreiver to one folder and so on.
Based on what you describe, here is my approach:
import csv
import os
SOURCE_ROOT = 'train'
DEST_ROOT = 'train2'
with open('labels.csv') as infile:
next(infile) # Skip the header row
reader = csv.reader(infile)
seen = set()
for dogid, breed in reader:
# Create a new directory if needed
if breed not in seen:
os.mkdir(os.path.join(DEST_ROOT, breed))
seen.add(breed)
src = os.path.join(SOURCE_ROOT, dogid + '.jpg')
dest = os.path.join(DEST_ROOT, breed, dogid + '.jpg')
try:
os.rename(src, dest)
except WindowsError as e:
print e
Notes
For every line in the data file, I create the breed directory at the destination. I use the set seen to make sure that I only create each directory once.
After that, it is a trivia matter of moving files into place
One possible move error: file does not exist in the source dir. In which case, the code just prints out the error and ignore it.
Suppose I have a text file aiq_hits.txt.
Each line in this file corresponds a filename
ant1.aiq
ant2.aiq
ant3.aiq
ant4.aiq
I want to match each line of my textfile (ant1.aiq,ant2.aiq and so on) with filenames which are present at some specific place(R:\Sample) and extract matching files into some other place (R:\sample\wsa).
I have an idea that I need to use functions like os.walk() and fnmatch.fnmatch(), shutil.copy() but I am not able to implement them
My code:
import os
import shutil
import fnmatch
with open("aiq_hits.txt","r") as in_file:
for line in in_file:
I am stuck here
import os
import shutil
sourceDir = "R:\\Sample"
targetDir = "R:\\Sample\\wsa"
existingFiles = set(f for f in os.listdir(sourceDir) if os.path.isfile(os.path.join(sourceDir, f)))
infilepath = "aiq_hits.txt"
with open(infilepath) as infile:
for line in infile:
fname = line.strip()
if fname not in existingFiles: continue
shutil.move(os.path.join(sourceDir, fname), os.path.join(targetDir, fname))
I hope this will suffice:
import os
def match_files(url,file_read, dest):
f = open(file_read, 'rb')
file_list = os.listdir(url)
print(file_list)
saved_path = os.getcwd()
print("Current working directory is " + saved_path)
os.chdir(url)
match = []
for file_name in f:
file_name = file_name.strip()
if file_name in file_list:
match.append(file_name)
os.rename(os.path.join(url, file_name), os.path.join(dest, file_name))
os.chdir(saved_path)
print match
here, url is source directory or folder from which u want to match files, file_read is the name of file (with path) in which list of file names is given, dest is the destination folder.
this code moves the matching files from url to dest, i.e. these files won't remin in url after running the code.
Alternatively you could use the glob module which allows you to enter in a expression for the file name\extension which will then return a list that you can loop over.
I'd use this module if the source directory can have files with the same extension that you want to exclude from being looped over
Also I'm assuming that the file name list is not large and so storing it in a list wont be an issue
eg (I haven't tested the below )
from glob import glob
import os
import shutil
src = 'R:\\Sample'
dst = "R:\\Sample\\wsa"
in_file_list = "aiq_hits.txt"
list_Of_files = glob(os.path.join(src, 'ant*.aiq'))
data = []
with open(in_file_list) as reader:
data += reader.readlines()
for row in list_Of_files:
file_path, file_name = os.path.split(row)
if file_name in data:
shutil.copy2(row, os.path.join(dst, file_name))
# or if you want to move the file
# shutil.move(row, os.path.join(dst, file_name))