Print statement not responding in my filemanagement system - python

I have 2 folders: Source and Destination. Each of those folders have 3 subfolders inside them named A, B and C. The 3 subfolders in Source all contain multiple files. The 3 subfolders in Destination are empty (yet).
I need the full path of all because my goal is to overwrite the files from Source A, B and C in Destination A, B and C.
How come my two print statements are not printing anything? I have zero errors.
import os
src = r'c:\data\AM\Desktop\Source'
dst = r'c:\data\AM\Desktop\Destination'
os.chdir(src)
for root, subdirs, files in os.walk(src):
for f in subdirs:
subdir_paths = os.path.join(src, f)
subdir_paths1 = os.path.join(dst, f)
for a in files:
file_paths = os.path.join(subdir_paths, a)
file_paths1 = os.path.join(subdir_paths1, a)
print(file_paths)
print(file_paths1)

Problem
As jasonharper said in a comment,
You are misunderstanding how os.walk() works. The files returned in files are in the root directory; you are acting as if though they existed in each of the subdirs directories, which are actually in root themselves.
The reason nothing is printed is that, on the first iteration, files is empty, so for a in files is not entered. Then on the following iterations (where root is A, B and C respectively), subdirs is empty, so for f in subdirs is not entered.
Solution
In fact you can ignore subdirs entirely. Instead walk the current dir, and join src/dst + root + a:
import os
src = r'c:\data\AM\Desktop\Source'
dst = r'c:\data\AM\Desktop\Destination'
os.chdir(src)
for root, subdirs, files in os.walk('.'):
src_dir = os.path.join(src, root)
dst_dir = os.path.join(dst, root)
for a in files:
src_file = os.path.join(src_dir, a)
dst_file = os.path.join(dst_dir, a)
print(src_file)
print(dst_file)
The output should have an extra dot directory between src/dst and root. If anyone could tell me how to get rid of it, I'm all ears.

Related

How can I confirm and remove the original files after sorting and copying them into several folders?

I'm a newbie and I'm trying to make office work a little less tedious. I currently have a little program that sorts and copies .pdf files from a folder into several folders, based on who these files need to be sent to later.
It works great. There's just the issue that I keep double-checking if it did its job. So then I added a bit where it counts the copied files to make checking easier.
Now I've been trying to figure out if I could make the program compare the list of files in the original folder with a list of files from all the other destination folders and then delete the originals if the files are indeed copied.
I've also resorted to having the program print the resulting file paths, but it's ugly and still requires me to manually compare.
Here's my code:
import os
import shutil
import pathlib
import pprint
dir = ('[path to original folder]')
files = os.listdir(dir)
user_data = [
('Karl H. Preusse', [Path to Karl]),
('Rom', [Path to Rom]),
('Hochschule', [Path to Hochschule]),
('Kiefer', [Path to Kiefer),
('Penny', [Path to Penny),
('Steigenberger', [Path to Steigenberger]),
('Penzkofer', [Path to Penzkofer]),
('Stoffel', [Path to Stoffel]),
('Cavertitzer', [Path to Cavertitzer])
]
for pattern, dest_dir in user_data:
matching_files = [f for f in files if pattern in f]
for filename in matching_files:
full_filename = os.path.join(dir, filename)
if os.path.isfile(full_filename):
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
shutil.copy(full_filename, dest_dir)
pprint.pprint(shutil.copy(full_filename, dest_dir))
stetje_datotek = sum(len(files) for _, _, files in os.walk([Path to directory that holds the copy folders])) #defines files to count
print('Stevilo datotek v mapi Posiljanje je: {}' .format(stetje_datotek)) #Prints out how many files are in the target folders.
Below are my attempts at getting things automated.
#I commented this function out as I couldn't figure out how to get the data out of it.
#def sub_files(folder):
# relpath = os.path.relpath
# join = os.path.join
# for path, _, files in os.walk([Path to directory that holds the copy folders]):
# relative = relpath(path, [Path to directory that holds the copy folders])
# for file in files:
# yield join(relative, file)
#print(sub_files)
Here I thought to use inputs to individually check each folder:
#print(os.listdir([Path to directory that holds the copy folders]))
#if input() == 'Penzkofer':
#pprint.pprint(os.listdir([Path to Penzkofer folder]))
And here I tried to compare lists, but I get a TypeError: unhashable type: 'list' error
prvotne_datoteke = set(os.listdir(dir))
kopirane_datoteke = set(os.walk([Path to directory that holds the copy folders])
set(prvotne_datoteke).intersection(kopirane_datoteke)
Any help is appreciated. Thank you.
One approach is to print the names of each copied file recipient and the number of recipients, then delete the original file if all intended recipients are included.
to_be_copied = set() # holds original paths of all files being copied
for pattern, dest_dir in user_data:
matching_files = [f for f in files if pattern in f]
for filename in matching_files:
full_filename = os.path.join(dir, filename)
to_be_copied.add(filename) # adds filepaths
if os.path.isfile(full_filename):
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
shutil.copy(full_filename, dest_dir)
pprint.pprint(shutil.copy(full_filename, dest_dir))
# Iterates through copied files
for original_file in to_be_copied:
count = 0
recipients = []
# Iterates through potential recipients
for pattern, dest_dir in user_data:
complete_name = os.path.join(dest_dir, original_file)
if os.path.isfile(complete_name):
count += 1
recipients.append(pattern)
print(original_file + ' sent to ' + str(count) + ' people:')
print(recipients)
# Quick manual check, could be changed to checking if count/recipients is correct
print('Delete original file? (Y or N): ')
delete = input()
if (delete == 'Y'):
os.remove(os.path.join(dir, original_file))

Scanning for file paths with glob

I am searching for all .csv's located in a subfolder with glob like so:
def scan_for_files(path):
file_list = []
for path, dirs, files in os.walk(path):
for d in dirs:
for f in glob.iglob(os.path.join(path, d, '*.csv')):
file_list.append(f)
return file_list
If I call:
path = r'/data/realtimedata/trades/bitfinex/'
scan_for_files(path)
I get the correct recursive list of files:
['/data/realtimedata/trades/bitfinex/btcusd/bitfinex_btcusd_trades_2018_05_12.csv',
'/data/realtimedata/trades/bitfinex/btcusd/bitfinex_btcusd_trades_2018_05_13.csv',
'/data/realtimedata/trades/bitfinex/btcusd/bitfinex_btcusd_trades_2018_05_15.csv',
'/data/realtimedata/trades/bitfinex/btcusd/bitfinex_btcusd_trades_2018_05_11.csv',
'/data/realtimedata/trades/bitfinex/btcusd/bitfinex_btcusd_trades_2018_05_09.csv',
'/data/realtimedata/trades/bitfinex/btcusd/bitfinex_btcusd_trades_2018_05_10.csv',
'/data/realtimedata/trades/bitfinex/btcusd/bitfinex_btcusd_trades_2018_05_08.csv',
'/data/realtimedata/trades/bitfinex/btcusd/bitfinex_btcusd_trades_2018_05_14.csv',
'/data/realtimedata/trades/bitfinex/ethusd/bitfinex_ethusd_trades_2018_05_14.csv',
'/data/realtimedata/trades/bitfinex/ethusd/bitfinex_ethusd_trades_2018_05_12.csv',
'/data/realtimedata/trades/bitfinex/ethusd/bitfinex_ethusd_trades_2018_05_10.csv',
'/data/realtimedata/trades/bitfinex/ethusd/bitfinex_ethusd_trades_2018_05_08.csv',
'/data/realtimedata/trades/bitfinex/ethusd/bitfinex_ethusd_trades_2018_05_09.csv',
'/data/realtimedata/trades/bitfinex/ethusd/bitfinex_ethusd_trades_2018_05_15.csv',
'/data/realtimedata/trades/bitfinex/ethusd/bitfinex_ethusd_trades_2018_05_11.csv',
'/data/realtimedata/trades/bitfinex/ethusd/bitfinex_ethusd_trades_2018_05_13.csv']
However when using the actual sub-directory containing the files I want - it returns an empty list. Any idea why this is happening? Thanks.
path = r'/data/realtimedata/trades/bitfinex/btcusd/'
scan_for_files(path)
returns: []
Looks like btcusd is a bottom-level directory. That means that when you call os.walk with the r'/data/realtimedata/trades/bitfinex/btcusd/' path, the dirs variable will be an empty list [], so the inner loop for d in dirs: does not execute at all.
My advice would be to re-write your function to iterate over the files directly, and not the directories... don't worry, you'll get there eventually, that's the nature of a directory tree.
def scan_for_files(path):
file_list = []
for path, _, files in os.walk(path):
for f in files:
file_list.extend(glob.iglob(os.path.join(path, f, '*.csv'))
return file_list
However, on more recent versions of python (3.5+), you can use recursive glob:
def scan_for_files(path):
return glob.glob(os.path.join(path, '**', '*.csv'), recursive=True)
Source.

Make a list of root with os.walk

I am trying to setup a specific folder/file structure, which I will then copy into my test setup. I want a list of unique folders, that I can then create.
How to I get root into a list?
If I do the following:
for root, dirs, filenames in os.walk(path):
print root
I get:
/Users/Me/Folder
/Users/Me/Folder/SubFolder
But as I want use it in a for-loop it gets messed up.
for root, dirs, filenames in os.walk(path):
for x in root:
print x
and I get this result:
/
U
s
e
r
s
/
M
e
/
F
o
l
.
.
. and so on
To get an variable you are iterating over into a list simply append it to a list:
list = []
for root, dirs, filenames in os.walk(path):
list.append(root)
To create a list of folders you can simple use os.mkdir(path):
for path in list:
os.mkdir(path)
if you want an additional print statement to see which folders you created use:
for path in list:
os.mkdir(path)
print("created:{}".format(path))
Finally I found the answer:
for root, dirs, filenames in os.walk(src_path):
for x in root.splitlines():
print x

Extracting folder name from file through iteration - slow

I have a program where I need to loop throughout the files and sub-directories. I need to extract the subfolder name where the has been extracted.
I have a dictionary that contains all the subfolders names that I need to work with, d. Then by iterating through the files, I need to check if their director is in d or not.
Here is my code:
d = {'folder_1': 'a', 'folder_2': 'b', 'folder_3': 'c'}
dir_path = "/Users/user_1/Desktop/images_testing"
for root, directories, files in os.walk(dir_path):
for filename in files:
filepath = os.path.join(root, filename)
temp_path = os.path.dirname(filepath)
temp_sub_dir = temp_path.split("/")
if temp_sub_dir[-1] in d:
#do some work
This works fine but SUPER slow. Is there any way to make this process faster? It is super slow.
My main problem is on these lines:
temp_path = os.path.dirname(filepath)
temp_sub_dir = temp_path.split("/")
I do not need the full path, I just need the folder name where this file came from.
How about do that like this:
for root, directories, files in os.walk(dir_path):
temp_sub_dir = os.path.basename(root)
if temp_sub_dir in d:
for filename in files:
filepath = os.path.join(root, filename)
#do some work
As you 'walk' through check whether the current directory is one of those listed in d. If it is and if the file in this dictionary location is in the current directory then 'do something'. Seems simpler.
import os
d = {'folder_1': 'a', 'folder_2': 'b', 'folder_3': 'c'}
dir_path = "/Users/user_1/Desktop/images_testing"
for dirpath, dirnames, filenames in os.walk(dir_path):
if os.path.split(dirpath) in d and d[os.path.split(dirpath)] in files:
#do some work

count number of folders with given name

I am lookling to get count of folders and subfolders with a given name... Here I am searching for number of subfolders named "L-4"? Returns zero and I am sure thats not true? What did I miss?
import os
path = "R:\\"
i = 0
for (path, dirs, files) in os.walk(path):
if os.path.dirname == "L-4":
i += 1
print i
os.path.dirname is a reference to the standard library function, not a string. Perhaps you wanted to use os.path.dirname(path) instead here.
You could instead count how many times L-4 appears in the dirs list:
i = 0
for root, dirs, files in os.walk(path):
i += dirs.count('L-4')
print i
or, as a one-liner:
print sum(dirs.count('L-4') for _, dirs, _ in os.walk(path))

Categories

Resources