Walk directories and remove file extensions - python

I'm trying to remove all the outlook .ost and .nst files from the user's folder on a network PC, as well as I'm trying to get it to write what files were removed into a CSV file.
I'm able to get it to find all the files in the directory and write it to a CSV file but when I try to remove the files with os.remove it doesn't seem to run, I hashed it out for the time being.
I added in the try and except, to skip the files that are in use.
import os
import sys
sys.stdout = open("output_file.csv", "w")
try:
for rootDir, subdir, files in os.walk("//network_pc_name/c$/Users"):
for filenames in files:
if filenames.endswith((".nst",".ost")):
foundfiles = os.path.join(rootDir, filenames)
#os.remove(os.path.join(rootDir, filenames))
print(foundfiles)
except:
pass
sys.stdout.close()
I made some change to the script as suggested and it appears to run alot quicker, however, I can't seem to figure out how to ignore files which are in use.
I switched the files extensions to .xlsx and .txt files to simulate the .xlsx file being open receiving the permissions error and to see if the script would continue to run and remove the .txt file.
I got the following error:
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: '//DESKTOP-HRLS19N/c$/globtest\Book1.xlsx
import glob
import os
files = [i for i in glob.glob("//DESKTOP-HRLS19N/c$/globtest/**", recursive = True) if i.endswith((".xlsx",".txt"))]
[os.remove(f) for f in files]
with open("output_file.csv", "w") as f:
f.writelines("\n".join(files))

In my experience glob is much easier:
print([i for i in glob.glob("//network_pc_name/c$/Users/**", recursive=True) if i.endswith((".nst", ".ost"))])
Assuming that prints out the files you're expecting:
files = [i for i in glob.glob("//network_pc_name/c$/Users/**", recursive=True) if i.endswith((".nst", ".ost"))]
removed_files = []
for file in files:
try:
size = os.path.getsize(file)
os.remove(file)
removed_files.append(file + " Bytes: " + size)
except Exception as e:
print("Could not remove file: " + file)
with open("output_file.csv", "w") as f:
f.writelines("\n".join(removed_files))

Related

Unable to load contents while reading a .txt file in Python3

I am intending to extract some data stored in a .txt file using python 3, however, when I tried to print out the file content, the program does not display any thing in the console. This is the code snippet I use to read the file:
def get_data(directory):
entries = os.listdir(directory)
#print(entries)
count = 0;
for file in entries:
#print(file)
if file.endswith('.txt'):
with open(file) as curr_file:
#print(curr_file)
#read data and write it to an
#excel worksheet
print(curr_file.readline())
curr_file.close()
What kind of changes am I supposed to make to let the program display contents of the file?
Update: I tried to print out all files saved in entries and the result looks fine. The following is the code snippet I used to unzip files in the directory, I am not sure whether there're anything wrong with it.
def read_zip(path):
file_list = os.listdir(path)
#print(file_list)
#create a new directory and store
#the extracted file there
directory = 'C:/Users/chent/Desktop/Test'
try:
if not os.path.exists(directory):
os.makedirs(directory, exist_ok=True)
print('Folder created')
except FileExistsError:
print ('Directory not created')
for file in file_list:
if file.endswith('.zip'):
filePath=path+'/'+file
zip_file = zipfile.ZipFile(filePath)
for names in zip_file.namelist():
zip_file.extract(names, directory)
get_data(directory)
zip_file.close()
Solution: It turns out that I didn't specify the file path when use with open() statement, which caused the program unable to locate files. To fix it, use with open(file_path, file, "r") as curr_file. See details in my updated code:
def get_data(path):
files = os.listdir(path)
for file in files:
#print(file)
try:
if file.endswith('.txt'):
print(file)
with open('C:/Users/chent/Desktop/Test/' + file, "r", ) as curr_file:
# print(curr_file.readlines())
print(curr_file)
line = curr_file.readline()
print(line)
except FileNotFoundError:
print ('File not found')
path = 'C:/Users/chent/Desktop/Test'
get_data(path)
The problem is that you use curr_file.readline() which only returns the first line.
Use curr_file.read() to get the whole file contents.

How to use os.system to convert all files in a folder at once using external python script

I've managed to find out the method to convert a file from one file extension to another (.evtx to .xml) using an external script. Below is what I am using:
os.system("file_converter.py file1.evtx > file1.xml")
This successfully converts a file from .txt to .xml using the external script I called (file_converter.py).
I am now trying to find out a method on how I can use 'os.system' or perhaps another method to convert more than one file at once, I would like for my program to dive into a folder and convert all of the 10 files I have at once to .xml format.
The questions I have are how is this possible as os.system only takes 1 argument and I'm not sure on how I could make it locate through a directory as unlike the first file I converted was on my standard home directory, but the folder I want to access with the 10 files is inside of another folder, I am trying to find out a way to address this argument and for the conversion to be done at once, I also want the file name to stay the same for each individual file with the only difference being the '.xml' being changed from '.evtx' at the end.
The file "file_converter.py" is downloadable from here
import threading
import os
def file_converter(file):
os.system("file_converter.py {0} > {1}".format(file, file.replace(".evtx", ".xml")))
base_dir = "C:\\Users\\carlo.zanocco\\Desktop\\test_dir\\"
for file in os.listdir(base_dir):
threading.Thread(target=file_converter, args=(file,)).start()
Here my sample code.
You can generate multiple thread to run the operation "concurrently". The program will check for all files in the directory and convert it.
EDIT python2.7 version
Now that we have more information about what you want I can help you.
This program can handle multiple file concurrently from one folder, it check also into the subfolders.
import subprocess
import os
base_dir = "C:\\Users\\carlo.zanocco\\Desktop\\test_dir\\"
commands_to_run = list()
#Search all files
def file_list(directory):
allFiles = list()
for entry in os.listdir(directory):
fullPath = os.path.join(directory, entry)
#if is directory search for more files
if os.path.isdir(fullPath):
allFiles = allFiles + file_list(fullPath)
else:
#check that the file have the right extension and append the command to execute later
if(entry.endswith(".evtx")):
commands_to_run.append("C:\\Python27\\python.exe file_converter.py {0} > {1}".format(fullPath, fullPath.replace(".evtx", ".xml")))
return allFiles
print "Searching for files"
file_list(base_dir)
print "Running conversion"
processes = [subprocess.Popen(command, shell=True) for command in commands_to_run]
print "Waiting for converted files"
for process in processes:
process.wait()
print "Conversion done"
The subprocess module can be used in two ways:
subprocess.Popen: it run the process and continue the execution
subprocess.call: it run the process and wait for it, this function return the exit status. This value if zero indicate that the process terminate succesfully
EDIT python3.7 version
if you want to solve all your problem just implement the code that you share from github in your program. You can easily implement it as function.
import threading
import os
import Evtx.Evtx as evtx
import Evtx.Views as e_views
base_dir = "C:\\Users\\carlo.zanocco\\Desktop\\test_dir\\"
def convert(file_in, file_out):
tmp_list = list()
with evtx.Evtx(file_in) as log:
tmp_list.append(e_views.XML_HEADER)
tmp_list.append("<Events>")
for record in log.records():
try:
tmp_list.append(record.xml())
except Exception as e:
print(e)
tmp_list.append("</Events>")
with open(file_out, 'w') as final:
final.writelines(tmp_list)
#Search all files
def file_list(directory):
allFiles = list()
for entry in os.listdir(directory):
fullPath = os.path.join(directory, entry)
#if is directory search for more files
if os.path.isdir(fullPath):
allFiles = allFiles + file_list(fullPath)
else:
#check that the file have the right extension and append the command to execute later
if(entry.endswith(".evtx")):
threading.Thread(target=convert, args=(fullPath, fullPath.replace(".evtx", ".xml"))).start()
return allFiles
print("Searching and converting files")
file_list(base_dir)
If you want to show your files generate, just edit as above:
def convert(file_in, file_out):
tmp_list = list()
with evtx.Evtx(file_in) as log:
with open(file_out, 'a') as final:
final.write(e_views.XML_HEADER)
final.write("<Events>")
for record in log.records():
try:
final.write(record.xml())
except Exception as e:
print(e)
final.write("</Events>")
UPDATE
If you want to delete the '.evtx' files after the conversion you can simply add the following rows at the end of the convert function:
try:
os.remove(file_in)
except(Exception, ex):
raise ex
Here you just need to use try .. except because you run the thread only if the input value is a file.
If the file doesn't exist, this function throws an exception, so it's necessary to check os.path.isfile() first.
import os, sys
DIR = "D:/Test"
# ...or as a command line argument
DIR = sys.argv[1]
for f in os.listdir(DIR):
path = os.path.join(DIR, f)
name, ext = os.path.splitext(f)
if ext == ".txt":
new_path = os.path.join(DIR, f"{name}.xml")
os.rename(path, new_path)
Iterates over a directory, and changes all text files to XML.

File not found while looping through folder of .txt files?

I wrote a program to loop through a folder of text files, and for each one, read it and write its edited contents to a new txt file. When I write to a new file, I add "JSP" to the file name, and so I included an if statement to avoid editing a file with JSP in its name. It gives me an error message that suggests that it tried to do the method writeToFile on a JSP file, and it couldn't be found within the folder. This confuses me because
if it's looping through the files and gets to that specific file, it should exist, and
it shouldn't even enter the if statement if it has "JSP" in its filename.
Any ideas?
import program
import os
def main():
directoryStr = "/Users/Elle/Documents/TMR/txtfiles/untitled folder"
directory = os.fsencode(directoryStr)
for file in os.listdir(directory):
filename = os.fsdecode(file)
if ".txt" in filename and "JSP" not in filename:
storedProcedure = program.StoredProcedure(filename)
storedProcedure.writeToFile()
main()
newFile = open(self.newName + ".txt", "w", encoding="utf16")
FileNotFoundError: [Errno 2] No such file or directory: 'JSP_Pgm_JpgmAPARCustSummary_Ctrl_Pay/Rec_summedbycustid_LtorGr0.txt'
Try doing things this way — as I said in a comment, os.listdir() only gives you a list of filenames, not complete file paths.
import program
import os
def main():
directory = "/Users/Elle/Documents/TMR/txtfiles/untitled folder"
for filename in os.listdir(directory):
if ".txt" in filename and "JSP" not in filename:
filepath = os.path.join(directory, filename)
storedProcedure = program.StoredProcedure(filepath)
storedProcedure.writeToFile()
main()

python 2 [Error 32] The process cannot access the file because it is being used by another process

I'm working with python 2 and have read several posts about this error i.e(this post).
However, I'm still getting the error.
What I do is:
I read the files in a directory, if any of the files contains a specific string, I delete the directory.
def select_poo():
path = os.walk('/paila_candonga/')
texto = 'poo'
extension = '.tex'
for root, dirs, files in path:
for documento in files:
if extension in documento:
with open(os.path.join(root, documento), 'r') as fin:
for lines in fin:
if texto in lines:
shutil.rmtree(root)
else:
continue
Then I get the error:
WindowsError: [Error 32] The process cannot access the file because it is being used by another process
I have also tried using the absolute path:
def select_poo():
path = os.walk('/paila_candonga/')
texto = 'poo'
extension = '.tex'
for root, dirs, files in path:
for documento in files:
if extension in documento:
with open(os.path.join(root, documento), 'r') as fin:
for lines in fin:
if texto in lines:
route = (os.path.join(root, documento))
files = os.path.basename(route)
folder = os.path.dirname(route)
absolut= os.path.dirname(os.path.abspath(route))
todo = os.path.join(absolut, files)
print todo
else:
continue
Then I will get:
C:\paila_candonga\la_Arepa.tex
C:\paila_candonga\sejodio\laOlla.tex
C:\paila_candonga\sejodio\laPaila.tex
If I remove one file at a time, using the same absolute path and os.remove(''), I won't have problems. If I try to delete all files at once using select_poo() and shutil.rmtree(folder) or os.remove(absolut), I will have the Error 32.
Is there a way I can do a loop through each of the paths in todo and remove them without having the error 32?
Thanks,
it happens here :
with open(os.path.join(root, documento), 'r') as fin:
So you have your file open and locked, that is why you are not able delete this folder using:
shutil.rmtree(root)
within this statement, you have to do outside of with statement

os.renames for ftp in python

I want to move a large number of files from a windows system to a unix ftp server using python. I have a csv which has the current full path and filename and the new base bath to send it to (see here for an example dataset).
I have got a script using os.renames to do the transfer and directory creation in windows but can figure out a way to easily do it via ftp.
import os, glob, arcpy, csv, sys, shutil, datetime
top=os.getcwd()
RootOutput = top
startpath=top
FileList = csv.reader(open('FileList.csv'))
filecount=0
successcount=0
errorcount=0
# Copy/Move to FTP when required
ftp = ftplib.FTP('xxxxxx')
ftp.login('xxxx', 'xxxx')
directory = '/TransferredData'
ftp.cwd(directory)
##f = open(RootOutput+'\\Success_LOG.txt', 'a')
##f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
##f.close()
##
for File in FileList:
infile=File[0]
# local network ver
#outfile=RootOutput+File[4]
#os.renames(infile, outfile)
# ftp netowrk ver
# outfile=RootOutput+File[4]
# ftp.mkd(directory)
print infile, outfile
I tried the process in http://forums.arcgis.com/threads/17047-Upload-file-to-FTP-using-Python-ftplib but this is for moving all files in a directory, I have the old and new full file names and just need it to create the intermediate directories.
Thanks,
The following might work (untested):
def mkpath(ftp, path):
path = path.rsplit('/', 1)[0] # parent directory
if not path:
return
try:
ftp.cwd(path)
except ftplib.error_perm:
mkpath(ftp, path)
ftp.mkd(path)
ftp = FTP(...)
directory = '/TransferredData/'
for File in FileList:
infile = File[0]
outfile = File[4].split('\\') # need forward slashes in FTP
outfile = directory + '/'.join(outfile)
mkpath(ftp, outfile)
ftp.storbinary('STOR '+outfile, open(infile, 'rb'))

Categories

Resources