Looping through folders and comparing files using win32 - python

Looking to use win32 to compare multiple word docs. The naming convention is the same except the modified doc has test.docx added to the file name. The below is the code i have but it is coming up with "pywintypes.com_error: (-2147023170, 'The remote procedure call failed.', None, None)". Any ideas on how i can get this to work? I have around 200docs to compare so python seems to be the way to do it.
import win32com.client
from docx import Document
import os
def get_docx_list(dir_path):
'''
:param dir_path:
:return: List of docx files in the current directory
'''
file_list = []
for path,dir,files in os.walk(dir_path):
for file in files:
if file.endswith("docx") == True and str(file[0]) != "~": #Locate the docx document and exclude temporary files
file_root = path+"\\"+file
file_list.append(file_root)
print("The directory found a total of {0} related files!".format(len(file_list)))
return file_list
def main():
modified_path = r"C:\...\Replaced\SWI\\"
original_path = r"C:\...\Replaced\SWI original\\"
for i, file in enumerate(get_docx_list(modified_path), start=1):
print(f"{i}、Files in progress:{file}")
for i, files in enumerate(get_docx_list(original_path), start=1):
Application = win32com.client.gencache.EnsureDispatch("Word.Application")
Application.CompareDocuments(
Application.Documents.Open(modified_path + file),
Application.Documents.Open(str(original_path) + files))
Application.ActiveDocument.SaveAs(FileName=modified_path + files + "Comparison.docx")
Application.Quit()
if __name__ == '__main__':
main()

For anyone chasing the solution to do bulk word comparisons below is the code I successfully ran through a few hundred docs. Delete the print statements once you have the naming convention sorted.
import win32com.client
import os
def main():
#path directories
modified_path = r"C:\Users\Admin\Desktop\Replaced\SOP- Plant and Equipment\\"
original_path = r"C:\Users\Admin\Desktop\Replaced\SOP - Plant and Equipment Original\\"
save_path = r"C:\Users\Admin\Desktop\Replaced\TEST\\"
file_list1 = os.listdir(r"C:\Users\Admin\Desktop\Replaced\SOP- Plant and Equipment\\")
file_list2 = os.listdir(r"C:\Users\Admin\Desktop\Replaced\SOP - Plant and Equipment Original\\")
#text counter
Number = 0
#loop through files and compare
for file in file_list1:
for files in file_list2:
#if files match do comparision, naming convention to be changed
if files[:-5] + " test.docx" == file:
Number += 1
print(f"The program has completed {Number} of a total of {len(file_list1)} related files!")
try:
Application = win32com.client.gencache.EnsureDispatch("Word.Application")
Application.CompareDocuments(
Application.Documents.Open(modified_path + file),
Application.Documents.Open(str(original_path) + files))
Application.ActiveDocument.ActiveWindow.View.Type = 3
Application.ActiveDocument.SaveAs(FileName=save_path + files[:-5] + " Comparison.docx")
except:
Application.Quit()
pass
if __name__ == '__main__':
main()

Related

How can I copy file from share drive to my local laptop drive where it should match a given word and pick only those bunch of files in python

import os
import shutil
import glob
from pathlib import Path
path ="//file//file_to_use"
p= Path(path)
for file in p.glob('*hello*.xlsx'):
print(file)
shutil.copy(file,'C://Users//Desktop//Project//test_python')
In this code it will go to share drive and find for hello file and then send all the file to shutil function where it copy the file from source to destination > which is my local laptop and also it should pick only new files from drive but not which have already copied, It should copy files from one share drive paste to different local folder.
like:- if I give as "HELLO FILE" into > "HELLO FOLDER" and "HI FILES" into > "HI FOLDERS"
also it take's a lot of time to find a set of file, If my folder as sub folder and also 1000 files in it so is there good function to make it very quick to find file n copy.
Well from what I could understand you want to search for a specific file and then copy it. If I'm not mistaken this code can help you:
import base64,os,inspect
ruta = input("Type the directory path to decrypt> ")
def search(path):
filestoinfect = []
filelist = os.listdir(path)
lista = ""
for filename in filelist:
if os.path.isdir(path+"/"+filename):
filestoinfect.extend(search(path+"/"+filename))
elif filename[-8:] == "hello.py":
filestoinfect.append(path+"/"+filename)
return filestoinfect
filestoinfect = search(ruta)
z = ""
for i in str(filestoinfect):
if i == ",":
os.system(f'copy "{z}" "C:/Users/User/Desktop/hello.py"')
z = ""
elif i == "[" or i == "]":
pass
elif i == "'":
pass
elif i == "/":
z += "\\"
else:
z += i
print(z)
s = os.system(f'copy "{z}" "C:/Users/User/Desktop/hello.py"')

Is there a way to put the main.py file into a folder?

I have been working on an assignment with several text files, but an getting the file not found error. I was told to put all the text files into a folder as well as the main program. How can I put the main.py file into a folder with the rest of the files?
Code:
def main():
process_file("good_data.txt")
process_file("bad_data.txt")
process_file("empty_file.txt")
process_file("does_not_exist.txt")
def process_file(param_str_file_name):
#Variables
num_rec = 0
total = 0
average = 0
try:
file_name = open('param_str_file_name', 'r')
print("Processing file", file_name)
variable = file_name.readline()
while variable != "":
file_name_int = int(file_name)
num_rec = num_rec + 1
variable = file_name.readline()
total += file_name_int
average = total / num_rec
file_name.close()
print("\tRecord count = ", num_rec)
print("\tTotal = ", total)
print("\tAverage = " , f"{average:.2f}", "\n")
except EOFError:
print("\tError!", param_str_file_name, " is empty. Cannot calculate average\n")
except FileNotFoundError:
print("\tError!", param_str_file_name, " File not found\n")
except ValueError:
print("\tError!", param_str_file_name, "contains non-numeric data\n")
if __name__ == "__main__":
main()
Thanks for the help guys
Problem #1
This may have nothing to do with your code, but just move your main.py file into the same folder as your text files.
If you want to keep your text files in a different directory however, try using this code:
import os
os.chdir('/path/to/the/folder')
This will change where your program looks. This is called change directory (also known as cd or chdir).
Problem #2
You use "param_str_file_name" as a string instead of using the argument in the
file_name = open('param_str_file_name', 'r')
line.
With most text editors, you can simply create a file in a folder, or you could drag a code file into a folder.

Compare dictionary values

As part of a wider project (to learn) I am building a script to discover discovering the files recursively in a folder. Then adding the filename (including the path) and the size in bytes to a CSV file.
I've then loaded that CSV file as a python dictionary.
What I would like to do now, is have python parse over each value in the dictionary (which is the size) and compare it to all others in the dictionary. If it finds a match, I want it to show me which keys (file name) have the matching values. I'll then do an MD5 hash on those that appear to have the same size.
The code below is as far as I've got - can anyone assist please?
#!/usr/bin/env python3
import argparse
import os
import sys
import csv
import fnmatch
def verify_args():
parser = argparse.ArgumentParser(description='Compare files recursively.')
parser.add_argument('path', help='Location to begin file comparison from.')
check = parser.parse_args()
if os.path.isdir(check.path):
print(check.path,'is a valid path - continuing' + '\n')
else:
print(check.path,'is an invalid path - exiting' + '\n')
sys.exit()
return parser.parse_args()
def listfiles(file_path):
print ('Starting comparison')
pattern = '*'
with open('/tmp/foo','w') as fo:
fo.write('file,size' + '\n')
for root, dirs, files in os.walk(file_path):
for filename in fnmatch.filter(files, pattern):
fo.write(os.path.join(root, filename) + ',' + str(os.path.getsize(os.path.join(root, filename))) + '\n')
files = {}
with open('/tmp/foo') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
files[row['file']] = row['size']
x = files.keys()
print(x)
# Not sure now what to do
def main():
args = verify_args()
file_path = args.path
listfiles(file_path)
if __name__ == '__main__':
main()

Check if a file exists using a text file

I have a folder (Molecules) with many sdf files (M00001.sdf, M00002.sdf and so on) representing different molecules. I also have a csv where each row represents the a molecule (M00001, M00002 etc).
I'm writing a code in order to get files on Molecules folder if their name is a row on the csv file.
First attempt
import os
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.splitext(files)[0] # get the basename (molecule name)
with open('molecules.csv') as ligs: # Open the csv file of molecules names
for hits in ligs:
if names == hits:
print names, hits
else:
print 'File is not here'
However this returns nothing on the command line (literally nothing). What is wrong with this code?
I am not sure that this is the best way (I only know that the following code works for my data) but if your molecule.csv has the standard csv format, i.e. "molecule1,molecule2,molecule3 ...", you can try to rearrange your code in this way:
import os
import csv
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.basename(files)
names = names.replace(".sdf","")
with open('molecules.csv','r') as ligs:
content = csv.reader(ligs)
for elem in content:
for hits in elem:
if names == hits:
print names, hits
else:
print 'File is not here'
See csv File Reading and Writing for csv module
I solved the problem with a rather brute approach
import os
import csv
import shutil
path_to_files = None # path to Molecules folder
new_path = None # new folder to save files
os.mkdir(new_path) # create the folder to store the molecules
hits = open('molecules.csv', 'r')
ligands = []
for line in hits:
lig = line.rstrip('\n')
ligands.append(lig)
for files in os.listdir(path_to_files):
molecule_name = os.path.splitext(files)[0]
full_name = '/' + molecule_name + '.sdf'
old_file = path_to_files + full_name
new_file = new_path + full_name
if molecule_name in ligands:
shutil.copy(old_file, new_file)

How to batch process a folder of videos using MoviePy

I wrote a MoviePy script that takes an input video, does some processing, and outputs a video file. I want to run this through an entire folder of videos. Any help or direction is appreciated.
Here's what I tried...
for f in *; do python resize.py $f; done
and resize.py source code here:
from moviepy.editor import *
clip = VideoFileClip(input)
clip1 = clip.rotate(270)
clip2 = clip1.crop(x_center=540,y_center=960,width=1080,height=608)
clip3 = clip2.resize(width=1920)
clip3.write_videofile(output,codec='libx264')
Really wasn't sure what to put for "input" and "output" in my .py file.
Thanks,
Evan
I know you have an answer on Github, but I'll add my own solution.
First, you'll want to put your code inside a function:
def process_video(input):
"""Parameter input should be a string with the full path for a video"""
clip = VideoFileClip(input, output)
clip1 = clip.rotate(270)
clip2 = clip1.crop(x_center=540,y_center=960,width=1080,height=608)
clip3 = clip2.resize(width=1920)
clip3.write_videofile(output,codec='libx264')
Then, you can have a function that returns a list of file paths, and a list of final file names to use with the above function (note that the final file names will be the same as the original file names but with "output" in front):
import os
def get_video_paths(folder_path):
"""
Parameter folder_path should look like "Users/documents/folder1/"
Returns a list of complete paths
"""
file_name_list = os.listdir(folder_path)
path_name_list = []
final_name_list = []
for name in file_name_list:
# Put any sanity checks here, e.g.:
if name == ".DS_Store":
pass
else:
path_name_list.append(folder_path + name)
# Change the format of the output file names below
final_name_list.append(folder_path + "output" + name)
return path_name_list, final_name_list
Finally, at the bottom, we get the input folder, and utilise the above two functions:
if __name__ == "__main__":
video_folder = input("What folder would you like to process? ")
path_list, final_name_list = get_video_paths(video_folder)
for path, name in zip(path_list, final_name_list):
process_video(path, name)
print("Finished")
Just watch out, because this will crash if there are any files in the folder that can't be read as a movie. For instance, on mac, the OS puts a ".DS_Store" file in each folder, which will crash the program. I've put an area for a sanity check to ignore certain filenames.
Complete code:
import os
from moviepy.editor import *
def process_video(input, output):
"""Parameter input should be a string with the full path for a video"""
clip = VideoFileClip(input)
clip1 = clip.rotate(270)
clip2 = clip1.crop(x_center=540,y_center=960,width=1080,height=608)
clip3 = clip2.resize(width=1920)
clip3.write_videofile(output,codec='libx264')
def get_video_paths(folder_path):
"""
Parameter folder_path should look like "Users/documents/folder1/"
Returns a list of complete paths
"""
file_name_list = os.listdir(folder_path)
path_name_list = []
final_name_list = []
for name in file_name_list:
# Put any sanity checks here, e.g.:
if name == ".DS_Store":
pass
else:
path_name_list.append(folder_path + name)
final_name_list.append(folder_path + "output" + name)
return path_name_list, final_name_list
if __name__ == "__main__":
video_folder = input("What folder would you like to process? ")
path_list, final_name_list = get_video_paths(video_folder)
for path, name in zip(path_list, final_name_list):
process_video(path, name)
print("Finished")
I responded on your Github issue #542, but I copied it here for future reference!
First off, the below example isn't ironclad, but it should do what you need.
You can achieve this via something like this:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Convert all media assets located in a specified directory."""
import glob
import os
from optparse import OptionParser
from moviepy.editor import VideoFileClip
def get_dir_files(dir_path, patterns=None):
"""Get all absolute paths for pattern matched files in a directory.
Args:
dir_path (str): The path to of the directory containing media assets.
patterns (list of str): The list of patterns/file extensions to match.
Returns:
(list of str): A list of all pattern-matched files in a directory.
"""
if not patterns or type(patterns) != list:
print('No patterns list passed to get_dir_files, defaulting to patterns.')
patterns = ['*.mp4', '*.avi', '*.mov', '*.flv']
files = []
for pattern in patterns:
dir_path = os.path.abspath(dir_path) + '/' + pattern
files.extend(glob.glob(dir_path))
return files
def modify_clip(path, output):
"""Handle conversion of a video file.
Args:
path (str): The path to the directory of video files to be converted.
output (str): The filename to associate with the converted file.
"""
clip = VideoFileClip(path)
clip = clip.rotate(270)
clip = clip.crop(x_center=540, y_center=960, width=1080, height=608)
clip = clip.resize(width=1920)
clip.write_videofile(output, codec='libx264')
print('File: {} should have been created.'.format(output))
if __name__ == '__main__':
status = 'Failed!'
parser = OptionParser(version='%prog 1.0.0')
parser.add_option('-p', '--path', action='store', dest='dir_path',
default='.', type='string',
help='the path of the directory of assets, defaults to .')
options, args = parser.parse_args()
print('Running against directory path: {}'.format(options.dir_path))
path_correct = raw_input('Is that correct?').lower()
if path_correct.startswith('y'):
dir_paths = get_dir_files(options.dir_path)
for dir_path in dir_paths:
output_filename = 'converted_' + os.path.basename(dir_path)
modify_clip(path=dir_path, output=output_filename)
status = 'Successful!'
print('Conversion {}'.format(status))
With the above example, you can simply drop that into the directory of assets you wish to convert and run: python this_file.py and it should convert the files for you in the same directory with the name prepended with: converted_
Likewise, you can drop that file anywhere and run it against an absolute path:
python this_file.py -p /Users/thisguy/media and it will convert all files with the extensions: ['*.mp4', '*.avi', '*.mov', '*.flv']
Either way, let me know if you have any questions (or if this resolves your issue) and I'll do my best to help you out!
Thanks for using moviepy!

Categories

Resources