How to use os.system to convert all files in a folder at once using external python script - python

I've managed to find out the method to convert a file from one file extension to another (.evtx to .xml) using an external script. Below is what I am using:
os.system("file_converter.py file1.evtx > file1.xml")
This successfully converts a file from .txt to .xml using the external script I called (file_converter.py).
I am now trying to find out a method on how I can use 'os.system' or perhaps another method to convert more than one file at once, I would like for my program to dive into a folder and convert all of the 10 files I have at once to .xml format.
The questions I have are how is this possible as os.system only takes 1 argument and I'm not sure on how I could make it locate through a directory as unlike the first file I converted was on my standard home directory, but the folder I want to access with the 10 files is inside of another folder, I am trying to find out a way to address this argument and for the conversion to be done at once, I also want the file name to stay the same for each individual file with the only difference being the '.xml' being changed from '.evtx' at the end.
The file "file_converter.py" is downloadable from here

import threading
import os
def file_converter(file):
os.system("file_converter.py {0} > {1}".format(file, file.replace(".evtx", ".xml")))
base_dir = "C:\\Users\\carlo.zanocco\\Desktop\\test_dir\\"
for file in os.listdir(base_dir):
threading.Thread(target=file_converter, args=(file,)).start()
Here my sample code.
You can generate multiple thread to run the operation "concurrently". The program will check for all files in the directory and convert it.
EDIT python2.7 version
Now that we have more information about what you want I can help you.
This program can handle multiple file concurrently from one folder, it check also into the subfolders.
import subprocess
import os
base_dir = "C:\\Users\\carlo.zanocco\\Desktop\\test_dir\\"
commands_to_run = list()
#Search all files
def file_list(directory):
allFiles = list()
for entry in os.listdir(directory):
fullPath = os.path.join(directory, entry)
#if is directory search for more files
if os.path.isdir(fullPath):
allFiles = allFiles + file_list(fullPath)
else:
#check that the file have the right extension and append the command to execute later
if(entry.endswith(".evtx")):
commands_to_run.append("C:\\Python27\\python.exe file_converter.py {0} > {1}".format(fullPath, fullPath.replace(".evtx", ".xml")))
return allFiles
print "Searching for files"
file_list(base_dir)
print "Running conversion"
processes = [subprocess.Popen(command, shell=True) for command in commands_to_run]
print "Waiting for converted files"
for process in processes:
process.wait()
print "Conversion done"
The subprocess module can be used in two ways:
subprocess.Popen: it run the process and continue the execution
subprocess.call: it run the process and wait for it, this function return the exit status. This value if zero indicate that the process terminate succesfully
EDIT python3.7 version
if you want to solve all your problem just implement the code that you share from github in your program. You can easily implement it as function.
import threading
import os
import Evtx.Evtx as evtx
import Evtx.Views as e_views
base_dir = "C:\\Users\\carlo.zanocco\\Desktop\\test_dir\\"
def convert(file_in, file_out):
tmp_list = list()
with evtx.Evtx(file_in) as log:
tmp_list.append(e_views.XML_HEADER)
tmp_list.append("<Events>")
for record in log.records():
try:
tmp_list.append(record.xml())
except Exception as e:
print(e)
tmp_list.append("</Events>")
with open(file_out, 'w') as final:
final.writelines(tmp_list)
#Search all files
def file_list(directory):
allFiles = list()
for entry in os.listdir(directory):
fullPath = os.path.join(directory, entry)
#if is directory search for more files
if os.path.isdir(fullPath):
allFiles = allFiles + file_list(fullPath)
else:
#check that the file have the right extension and append the command to execute later
if(entry.endswith(".evtx")):
threading.Thread(target=convert, args=(fullPath, fullPath.replace(".evtx", ".xml"))).start()
return allFiles
print("Searching and converting files")
file_list(base_dir)
If you want to show your files generate, just edit as above:
def convert(file_in, file_out):
tmp_list = list()
with evtx.Evtx(file_in) as log:
with open(file_out, 'a') as final:
final.write(e_views.XML_HEADER)
final.write("<Events>")
for record in log.records():
try:
final.write(record.xml())
except Exception as e:
print(e)
final.write("</Events>")
UPDATE
If you want to delete the '.evtx' files after the conversion you can simply add the following rows at the end of the convert function:
try:
os.remove(file_in)
except(Exception, ex):
raise ex
Here you just need to use try .. except because you run the thread only if the input value is a file.
If the file doesn't exist, this function throws an exception, so it's necessary to check os.path.isfile() first.

import os, sys
DIR = "D:/Test"
# ...or as a command line argument
DIR = sys.argv[1]
for f in os.listdir(DIR):
path = os.path.join(DIR, f)
name, ext = os.path.splitext(f)
if ext == ".txt":
new_path = os.path.join(DIR, f"{name}.xml")
os.rename(path, new_path)
Iterates over a directory, and changes all text files to XML.

Related

Walk directories and remove file extensions

I'm trying to remove all the outlook .ost and .nst files from the user's folder on a network PC, as well as I'm trying to get it to write what files were removed into a CSV file.
I'm able to get it to find all the files in the directory and write it to a CSV file but when I try to remove the files with os.remove it doesn't seem to run, I hashed it out for the time being.
I added in the try and except, to skip the files that are in use.
import os
import sys
sys.stdout = open("output_file.csv", "w")
try:
for rootDir, subdir, files in os.walk("//network_pc_name/c$/Users"):
for filenames in files:
if filenames.endswith((".nst",".ost")):
foundfiles = os.path.join(rootDir, filenames)
#os.remove(os.path.join(rootDir, filenames))
print(foundfiles)
except:
pass
sys.stdout.close()
I made some change to the script as suggested and it appears to run alot quicker, however, I can't seem to figure out how to ignore files which are in use.
I switched the files extensions to .xlsx and .txt files to simulate the .xlsx file being open receiving the permissions error and to see if the script would continue to run and remove the .txt file.
I got the following error:
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: '//DESKTOP-HRLS19N/c$/globtest\Book1.xlsx
import glob
import os
files = [i for i in glob.glob("//DESKTOP-HRLS19N/c$/globtest/**", recursive = True) if i.endswith((".xlsx",".txt"))]
[os.remove(f) for f in files]
with open("output_file.csv", "w") as f:
f.writelines("\n".join(files))
In my experience glob is much easier:
print([i for i in glob.glob("//network_pc_name/c$/Users/**", recursive=True) if i.endswith((".nst", ".ost"))])
Assuming that prints out the files you're expecting:
files = [i for i in glob.glob("//network_pc_name/c$/Users/**", recursive=True) if i.endswith((".nst", ".ost"))]
removed_files = []
for file in files:
try:
size = os.path.getsize(file)
os.remove(file)
removed_files.append(file + " Bytes: " + size)
except Exception as e:
print("Could not remove file: " + file)
with open("output_file.csv", "w") as f:
f.writelines("\n".join(removed_files))

files not deletes when using os module

I tried to make a program which delete all of the empty files ( whose size is zero ). Then, i run the program by dragging the script file in "command prompt" and run it .
However, no empty files had deleted (but i have some of them).
Please help me to find the error in my code.
import os
a = os.listdir('C:\\Python27')
for folder in a :
sizes = os.stat('C:\\Python27')
b = sizes.st_size
s = folder
if b == 0 :
remove('C:\\Python27\s')
You're assigning the values iterator os.listdir returns to folder and yet you aren't using it at all in os.stat or os.remove, but instead you are passing to them fixed values that you don't need.
You should do something like this:
import os
dir = 'C:\\Python27'
for file_name in os.listdir(dir):
file_path = os.path.join(dir, file_name)
if os.stat(file_path).st_size == 0:
os.remove(file_path)
You can delete something like the following code and you need to add some exception handling. I have used a test folder name to demonstrate.
import os
import sys
dir = 'c:/temp/testfolder'
for root, dirs, files in os.walk(dir):
for file in files:
fname = os.path.join(root, file)
try:
if os.path.getsize(fname) == 0:
print("Removing file %s" %(fname))
os.remove(fname)
except:
print("error: unable to remove 0 byte file")
raise

command line arguments using python

I have this code that will let the user choose which file he wants to update by passing an argument in the command line, and then it do some more things but I have not included that here:
import sys
import os
from sys import argv
path = "/home/Desktop/python/test"
files = os.walk( path )
filename = argv[1]
if filename in files:
inputFile = open(filename, 'r')
else:
print "no match found"
sys.exit()
inputFile.close()
When I run the script it keeps giving me "no match found" but im pretty sure the file is there. I cant see what Im doing wrong
os.walk() returns a generator, one that produces tuples with (root, directories, files) values for each iteration.
You can't use that generator to test for a single file, not with a simple in membership test.
You'll also need to re-instate the whole path; you can't just open an unclassified filename without the directory it lives in. Just use a for loop here, and break once you found it. The else suite on a for loop only executes when you did not use break (e.g. the file was not found):
path = "/home/Desktop/python/test"
filename = argv[1]
for root, directories, files in os.walk(path):
if filename in files:
full_path = os.path.join(root, filename)
break
else:
print "no match found"
sys.exit()
with open(full_path) as input_file:
# do something with the file
I added a with statement to handle the lifetime of the file object; once the with block is exited the file is automatically closed for you.
Alternatively, you may use following code snippet.
import os.path
filename = argv[1]
path = "/home/Desktop/python/test/"
if os.path.isfile(path + filename):
inputFile = open(path + filename, "r")
else:
print "File Not Found"

Python - Watch a folder for new .zip file and upload via FTP

I am working on creating a script to watch a folder, grab any new .zip files, and then upload them via FTP to a predetermined area. Right now FTP testing is being performed Locally, since the environment isnt yet created.
The strategy I am taking is to first, unzip into a local folder. Then, perform ftplib.storbinary , on the file from the local folder, to the ftpdestination. However, the unzipping process appears to be working but I am getting a "file does not exist" error, all though I can see it in the folder itself.
Also, is there anyway to unzip directly into an FTP location? I havent been able to find a way hence the approach I am taking.
Thanks, local ftp info removed from code. All paths that are relevant in this code will be changed, most likely to dynamic fashion, but for now this is a local environment
extractZip2.py
import zipfile
import ftplib
import os
import logging
import time
from socket import error as socket_error
#Logging Setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('__name__')
FTPaddress = ''
FTPusername = ''
FTPpassword = ''
ftp_destination_location = ''
path_to_watch = "C:/Users/206420055/Desktop/test2/"
before = dict ([(f,None) for f in os.listdir(path_to_watch)])
temp_destination_location = "C:/Users/206420055/Desktop/temp/"
def unzip(fullPath,temporaryPath):
with zipfile.ZipFile(fullPath, "r") as z :
logger.info("Unzipping {0}".format(fullPath))
z.extractall(temporaryPath)
logger.info("Unzipped into local directory {0}".format(temp_destination_location))
def check_or_create_ftp(session, folder):
"""
Checks to see if necessary folder for currenttab is available.
Creates the folder if not found, and enters it.
"""
if folder not in session.nlst():
logger.info('Directory for {0} does not exist, creating directory\n'.format(folder))
session.mkd(folder)
session.cwd(folder)
def check_or_create(temp_destination):
"""
Checks to see if local savepath exists. Will create savepath if not exists.
"""
if not os.path.exists(temp_destination):
logger.info('Directory for %s does not exist, creating directory\n' % temp_destination)
os.makedirs(str(temp_destination))
def transfer(address,username,password,filename,destination):
logger.info("Creating Session")
try:
session = session_init(address,username,password,destination)
except (socket_error,ftplib.error_perm) as e:
logger.error(str(e))
logger.error("Error in Session Init")
else:
try:
logger.info("Sending File {0}".format(filename))
send_file(session,filename)
except (IOError, OSError, ftplib.error_perm) as e:
logger.error(e)
def session_init(address,username,password,path):
session = ftplib.FTP(address,username,password)
check_or_create_ftp(session,path)
logger.info("Session Established")
return session
def send_file(session,filename):
file = open(filename,'rb')
logger.info('Sending File : STOR '+filename)
session.storbinary('STOR '+ filename, file)
file.close()
def delete_local_files(savepath, file):
logger.info("Cleaning Up Folder {0}".format(savepath))
os.remove(file)
while 1:
time.sleep(5)
after = dict ([(f,None) for f in os.listdir(path_to_watch)])
added = [f for f in after if not f in before]
removed = [f for f in before if not f in after]
if added: print "Added: ",", ".join(added)
before = after
check_or_create(temp_destination_location)
if added :
for file in added:
print file
if file.endswith('.zip'):
unzip(path_to_watch+file, temp_destination_location)
temp_files = os.listdir(temp_destination_location)
print("Temp Files {0}".format(temp_files))
for tf in temp_files:
print("TF {0}".format(tf))
transfer(FTPaddress,FTPusername,FTPpassword,tf,ftp_destination_location)
#delete_local_files(temp_destination_location,tf)
else:
pass
edit: adding error image
Seen above, we see the file in the temp folder. But the console obviously shows the error.
just change it to
from glob import glob
zips_in_path = dict ([(f,None) for f in glob("{base_path}/*.zip".format(base_path = path_to_watch)])
os.listdir does not include the path_to_watch part of the path it is just the filenames, however glob does.
so you could also do
after = dict ([(os.path.join(path_to_watch,f),None) for f in os.listdir(path_to_watch)])
using either of these methods you should be able to get the full path to the files in the path

Python - IOError when running through folder tree

I am trying to read in a series of DICOM files in a folder tree and I am using the below code to run through the tree, reading in each file as I go. The problem is I am getting IOErrors for files that definitely exist, I have checked file permissions and other SO threads such as Python: IOError: [Errno 2] No such file or directory but I haven't managed to get it working without these IOErrors yet. Does anyone have any ideas?
for root, dirs, files in os.walk(path):
for fname in files:
name = os.path.basename(os.path.abspath(fname))
if name.startswith('.') == True:
pass
else:
try:
plan=dicom.read_file(fname)
ds=dicom.read_file(fname, stop_before_pixels = True)
kVp = TagChecker([0x18,0x60]) #KVP
Target = TagChecker([0x18,0x1191]) #ANODE
Filter = TagChecker([0x18,0x7050]) #
write_results.writerow([Survey_Number, Patient_ID, View_Protocol, int(kVp), Target, Filter, Thickness, mAs_Exposure, LPad_Yes_No, autoorman, AECMode, AECDset, Patient_Age, Comment, Compression_Force])
#print(fname)
except IOError:
print "IOError: ", "//" + os.path.join(root, fname) + "//"
except InvalidDicomError:
# This exception line prints an error message to the command line, checks to see if an error log
# has been generated for this session, writes a new one if not and then writes the error to the log file
print "Invalid Dicom File: ", fname
Usually a method that takes a filename, like dicom.read_file(fname), will take an absolute filename (or assume that the filename is relative to the dir that your main python program is running in, the cwd()). Can I suggest that you put this line in front of the first read_file() call:
print "reading: %s" % os.path.abspath(fname)
Then you'll see the filename that you're actually trying to read. I'm guessing it's not the file (or droids) you think you're looking for.
In order to fix your problem.. join the dir and the fname before you read.. e.g.
full_fname = os.path.join(dir, fname)
dicom.read_file(full_fname)
In other words, I think you're reading files with relative paths and you want to be using absolute paths.

Categories

Resources