Loop through subfolder and copy file with specific extension - python

I have a parent folder, lets call it "workspace". Within this parent folder, there are sub folders which have further subfolders that have a specific naming convention. It looks something like this:
- Workspace
- Subfolder A
- Name
- Image
- Class
- Subfolder B
- Name
- Image
- Class
- Subfolder C
- Name
- Image
- Class
I need help of some sort or direction writing a script that iterates through A-C within the workspace and copying all files in the "images" folder of each subfolder to a new destination.
This is what I have so far:
import os
import arcpy
import shutil
import fnmatch
workspace = "source"
pfolder = "rootdir"
files = os.listdir(workspace)
print (files)
test = workspace + "\\scratch.gdb"
if os.path.exists(test):
print ("Scratch GDB already exists")
shutil.rmtree(test)
scratch = arcpy.CreateFileGDB_management(workspace,"scratch")
print ("Original Scratch GDB removed and new GDB created ")
else:
scratch = arcpy.CreateFileGDB_management(workspace,"scratch")
print ("Scratch GDB has been created")
def main():
for dirname, dirnames, filenames in os.walk(pfolder):
for file in filenames:
if fnmatch.fnmatch(file,"*.jpg")==True:
shutil.copy2(file,scratch)
print("Files have been copied!")
else:
print("Error in copying files")
I want to copy all jpg files in that subdirectory and place them in a geodatabase. For some reason it does not run the line of code that executes the loop and copy.

Shutil may not work, to input a raster file in a geodatabase you cannot use the file extension in the name.
The code below is your code with minimal modification (like using CopyRaster_management instead copy2) to work, so it may not be the best code because I was not worried about that, but works:
import os
import arcpy
import shutil
import fnmatch
workspace = "C:\\Teste\\"
pfolder = r'C:\Teste\\'
files = os.listdir(workspace)
print (files)
tests = workspace + "\\scratch.gdb"
sGdbP = "C:\\Teste\\scratch.gdb\\"
if os.path.exists(tests):
print ("Scratch GDB already exists")
shutil.rmtree(tests)
scratch = arcpy.CreateFileGDB_management(workspace,"scratch")
print ("Original Scratch GDB removed and new GDB created ")
else:
scratch = arcpy.CreateFileGDB_management(workspace,"scratch")
print ("Scratch GDB has been created")
for dirname, dirnames, filenames in os.walk(pfolder):
for file in filenames:
if fnmatch.fnmatch(file,"*.tif")==True:
try:
arcpy.env.workspace = dirname
in_data = file
out_data = sGdbP + file[:-4] # cannot use extension
arcpy.CopyRaster_management(in_data, out_data)
except:
print "Raster To Geodatabase example failed."
print arcpy.GetMessages()
print("Files have been copied!")
print "End of script"

Related

How to use os.system to convert all files in a folder at once using external python script

I've managed to find out the method to convert a file from one file extension to another (.evtx to .xml) using an external script. Below is what I am using:
os.system("file_converter.py file1.evtx > file1.xml")
This successfully converts a file from .txt to .xml using the external script I called (file_converter.py).
I am now trying to find out a method on how I can use 'os.system' or perhaps another method to convert more than one file at once, I would like for my program to dive into a folder and convert all of the 10 files I have at once to .xml format.
The questions I have are how is this possible as os.system only takes 1 argument and I'm not sure on how I could make it locate through a directory as unlike the first file I converted was on my standard home directory, but the folder I want to access with the 10 files is inside of another folder, I am trying to find out a way to address this argument and for the conversion to be done at once, I also want the file name to stay the same for each individual file with the only difference being the '.xml' being changed from '.evtx' at the end.
The file "file_converter.py" is downloadable from here
import threading
import os
def file_converter(file):
os.system("file_converter.py {0} > {1}".format(file, file.replace(".evtx", ".xml")))
base_dir = "C:\\Users\\carlo.zanocco\\Desktop\\test_dir\\"
for file in os.listdir(base_dir):
threading.Thread(target=file_converter, args=(file,)).start()
Here my sample code.
You can generate multiple thread to run the operation "concurrently". The program will check for all files in the directory and convert it.
EDIT python2.7 version
Now that we have more information about what you want I can help you.
This program can handle multiple file concurrently from one folder, it check also into the subfolders.
import subprocess
import os
base_dir = "C:\\Users\\carlo.zanocco\\Desktop\\test_dir\\"
commands_to_run = list()
#Search all files
def file_list(directory):
allFiles = list()
for entry in os.listdir(directory):
fullPath = os.path.join(directory, entry)
#if is directory search for more files
if os.path.isdir(fullPath):
allFiles = allFiles + file_list(fullPath)
else:
#check that the file have the right extension and append the command to execute later
if(entry.endswith(".evtx")):
commands_to_run.append("C:\\Python27\\python.exe file_converter.py {0} > {1}".format(fullPath, fullPath.replace(".evtx", ".xml")))
return allFiles
print "Searching for files"
file_list(base_dir)
print "Running conversion"
processes = [subprocess.Popen(command, shell=True) for command in commands_to_run]
print "Waiting for converted files"
for process in processes:
process.wait()
print "Conversion done"
The subprocess module can be used in two ways:
subprocess.Popen: it run the process and continue the execution
subprocess.call: it run the process and wait for it, this function return the exit status. This value if zero indicate that the process terminate succesfully
EDIT python3.7 version
if you want to solve all your problem just implement the code that you share from github in your program. You can easily implement it as function.
import threading
import os
import Evtx.Evtx as evtx
import Evtx.Views as e_views
base_dir = "C:\\Users\\carlo.zanocco\\Desktop\\test_dir\\"
def convert(file_in, file_out):
tmp_list = list()
with evtx.Evtx(file_in) as log:
tmp_list.append(e_views.XML_HEADER)
tmp_list.append("<Events>")
for record in log.records():
try:
tmp_list.append(record.xml())
except Exception as e:
print(e)
tmp_list.append("</Events>")
with open(file_out, 'w') as final:
final.writelines(tmp_list)
#Search all files
def file_list(directory):
allFiles = list()
for entry in os.listdir(directory):
fullPath = os.path.join(directory, entry)
#if is directory search for more files
if os.path.isdir(fullPath):
allFiles = allFiles + file_list(fullPath)
else:
#check that the file have the right extension and append the command to execute later
if(entry.endswith(".evtx")):
threading.Thread(target=convert, args=(fullPath, fullPath.replace(".evtx", ".xml"))).start()
return allFiles
print("Searching and converting files")
file_list(base_dir)
If you want to show your files generate, just edit as above:
def convert(file_in, file_out):
tmp_list = list()
with evtx.Evtx(file_in) as log:
with open(file_out, 'a') as final:
final.write(e_views.XML_HEADER)
final.write("<Events>")
for record in log.records():
try:
final.write(record.xml())
except Exception as e:
print(e)
final.write("</Events>")
UPDATE
If you want to delete the '.evtx' files after the conversion you can simply add the following rows at the end of the convert function:
try:
os.remove(file_in)
except(Exception, ex):
raise ex
Here you just need to use try .. except because you run the thread only if the input value is a file.
If the file doesn't exist, this function throws an exception, so it's necessary to check os.path.isfile() first.
import os, sys
DIR = "D:/Test"
# ...or as a command line argument
DIR = sys.argv[1]
for f in os.listdir(DIR):
path = os.path.join(DIR, f)
name, ext = os.path.splitext(f)
if ext == ".txt":
new_path = os.path.join(DIR, f"{name}.xml")
os.rename(path, new_path)
Iterates over a directory, and changes all text files to XML.

Python - how to change directory

I am doing a school assignment where I have to take input from a user and save it to a text file.
My file structure will be something like:
- Customer register
- Customer ID
- .txt files 1-5
It can be saved in the python folder and I can make the folders like this:
os.makedirs("Customer register/Customer ID")
My question is, how do I set the path the text files are to be stored in, in the directory when I don't know the directory? So that no matter where the program is run it is saved in the "Customer ID" folder I create (but on the computer the program is run on)?
Also, how do I make this work on both windows and mac?
I also want to program to be able to be executed several times, and check if the folder is there and save to the "Customer ID" folder if it already exists. Is there a way to do that?
EDIT:
This is the code I am trying to use:
try:
dirs = os.makedirs("Folder")
path = os.getcwd()
os.chdir(path + "/Folder")
print (os.getcwd())
except:
if os.path.exists:
path = os.getcwd()
unique_filename = str(uuid.uuid4())
customerpath = os.getcwd()
os.chdir(customerpath + "/Folder/" + unique_filename)
I am able to create a folder and change the directory (everything in "try" works as I want).
When this folder is created I want to create a second folder with a random generated folder name (used for saving customer files). I can't get this to work in the same way.
Error:
FileNotFoundError: [WinError 2] The system cannot find the file specified: 'C:\Users\48736\PycharmProjects\tina/Folder/979b9026-b2f6-4526-a17a-3b53384f60c4'
EDIT 2:
try:
os.makedirs("Folder")
path = os.getcwd()
os.chdir(path + "/Folder")
print (os.getcwd())
except:
if os.path.exists:
path = os.getcwd()
os.chdir(os.path.join(path, 'Folder'))
print(os.getcwd())
def userId(folderid):
try:
if not os.path.exists(folderid):
os.makedirs(folderid)
except:
if os.path.exists(folderid):
os.chdir(path + "/Folder/" + folderid)
userId(str(uuid.uuid4()))
print(os.getcwd())
So I can now create a folder, change directory to the folder I have created and create a new folder with a unique filename within that folder.
But I can't change the directory again to the folder with the unique filename.
Any suggestions?
I have tried:
os.chdir(path + "/Folder/" + folderid)
os.chdir(path, 'Folder', folderid)
os.chdir(os.path.join(path, 'Folder', folderid))
But is still just stays in: C:\Users\47896\PycharmProjects\tina\Folder
You can use relative paths in your create directory command, i.e.
os.makedirs("./Customer register/Customer ID")
to create folder in project root (=where the primary caller is located) or
os.makedirs("../Customer register/Customer ID") in parent directory.
You can, of course, traverse the files tree as you need.
For specific options mentioned in your question, please, see makedirs documentation at Python 3 docs
here is solution
import os
import shutil
import uuid
path_on_system = os.getcwd() # directory where you want to save data
path = r'Folder' # your working directory
dir_path = os.path.join(path_on_system, path)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
file_name = str(uuid.uuid4()) # file which you have created
if os.path.exists(file_name) and os.path.exists(dir_path):
shutil.move(file_name,os.path.join(dir_path,file_name))
else:
print(" {} does not exist".format(file_name))

files not deletes when using os module

I tried to make a program which delete all of the empty files ( whose size is zero ). Then, i run the program by dragging the script file in "command prompt" and run it .
However, no empty files had deleted (but i have some of them).
Please help me to find the error in my code.
import os
a = os.listdir('C:\\Python27')
for folder in a :
sizes = os.stat('C:\\Python27')
b = sizes.st_size
s = folder
if b == 0 :
remove('C:\\Python27\s')
You're assigning the values iterator os.listdir returns to folder and yet you aren't using it at all in os.stat or os.remove, but instead you are passing to them fixed values that you don't need.
You should do something like this:
import os
dir = 'C:\\Python27'
for file_name in os.listdir(dir):
file_path = os.path.join(dir, file_name)
if os.stat(file_path).st_size == 0:
os.remove(file_path)
You can delete something like the following code and you need to add some exception handling. I have used a test folder name to demonstrate.
import os
import sys
dir = 'c:/temp/testfolder'
for root, dirs, files in os.walk(dir):
for file in files:
fname = os.path.join(root, file)
try:
if os.path.getsize(fname) == 0:
print("Removing file %s" %(fname))
os.remove(fname)
except:
print("error: unable to remove 0 byte file")
raise

How do I iteratively copy logs from the local drive to a network share?

I'm new to Python. I'm running version 3.3. I'd like to iteratively copy all wildcard named folders and files from the C drive to a network share. Wildcard named folders are called "Test_1", "Test_2", etc. with folders containing the same named folder, "Pass". The files in "Pass" end with .log. I do NOT want to copy the .log files in the Fail folder. So, I have this:
C:\Test_1\Pass\a.log
C:\Test_1\Fail\a.log
C:\Test_1\Pass\b.log
C:\Test_1\Fail\b.log
C:\Test_2\Pass\y.log
C:\Test_2\Fail\y.log
C:\Test_2\Pass\z.log
C:\Test_2\Fail\z.log
but only want to copy
C:\Test_1\Pass\a.log
C:\Test_1\Pass\b.log
C:\Test_2\Pass\y.log
C:\Test_2\Pass\z.log
to:
\\share\Test_1\Pass\a.log
\\share\Test_1\Pass\b.log
\\share\Test_2\Pass\y.log
\\share\Test_2\Pass\z.log'
The following code works but I don't want to copy tons of procedural code. I'd like to make it object oriented.
import shutil, os
from shutil import copytree
def main():
source = ("C:\\Test_1\\Pass\\")
destination = ("\\\\share\\Test_1\\Pass\\")
if os.path.exists ("C:\\Test_1\\Pass\\"):
shutil.copytree (source, destination)
print ('Congratulations! Copy was successfully completed!')
else:
print ('There is no Actual folder in %source.')
main()
Also, I noticed it is not printing the "else" print statement when the os path does not exist. How do I accomplish this? Thanks in advance!
This is not a perfect example but you could do this:
import glob, os, shutil
#root directory
start_dir = 'C:\\'
def copy_to_remote(local_folders, remote_path):
if os.path.exists(remote_path):
for source in local_folders:
# source currently has start_dir at start. Strip it and add remote path
dest = os.path.join(remote_path, source.lstrip(start_dir))
try:
shutil.copytree(source, dest)
print ('Congratulations! Copy was successfully completed!')
except FileExistsError as fe_err:
print(fe_err)
except PermissionError as pe_err:
print(pe_err)
else:
print('{} - does not exist'.format(remote_path))
# Find all directories that start start_dir\Test_ and have subdirectory Pass
dir_list = glob.glob(os.path.join(start_dir, 'Test_*\\Pass'))
if dir_list:
copy_to_remote(dir_list, '\\\\Share\\' )
Documentation for glob can be found here.
def remotecopy(local, remote)
if os.path.exists(local):
shutil.copytree (local, remote)
print ('Congratulations! Copy was successfully completed!')
else:
print ('There is no Actual folder in %local.')
Then just remotecopy("C:\Local\Whatever", "C:\Remote\Whatever")

Creating a directory within directories using the python scripting language

Please find my python script below:
import os;
import sys;
dir_dst = sys.argv[1]
for x in range(150) :
dirname = str(x)
dst_dir = os.path.join(dir_dst, dirname)
dirname = "annotation"
dst = os.path.join(dst_dir, dirname)
print dst
if not os.path.exists(dst_dir):
os.mkdir(dst)
The aim is to create a directory called "annotation" within each of the numbered directories ranging as in the code above. This code doesn't do it and on printing the value of "dst", here's an example of what it shows:
NLP/test data/reconcile/0\annotation
NLP/test data/reconcile/1\annotation
How can this be resolved?
Change the second to last line to
if not os.path.exists(dst):
Right now you're checking if the original directory exists.

Categories

Resources