Optimize execution time for retrieving data from xml and copy images

Optimize execution time for retrieving data from xml and copy images - python

the purpose of the following code is to copy image files from one directory to another directory and reorganize the images in a hierarchical structure based on information extracted from XML files.
from bs4 import BeautifulSoup as bs
import shutil
import os
import glob
campaign_name = "CAMPAIGN_2020"
xml_directory = r'XML_DIRECTORY'
picture_directory = r'PICTURE_DIRECTORY'
output_directory = r'OUTPUT_DIRECTORY'
def copy_files(content, picture_files):
bs_content = bs(content, "lxml")
images = bs_content.find_all("images")
for picture in picture_files:
for i, image_group in enumerate(images):
for image in image_group.find_all('img'):
if os.path.basename(image['src']) == os.path.basename(picture):
src = image['src']
station = image['station']
first_field = image.parent.parent.data['first_field']
second_field = image.parent.parent.data['second_field']
start = int(image.parent.parent.data['start'])
end = int(image.parent.parent.data['end'])
length = start - end
class_name = image.parent.parent.assignment['class_name']
number = image.parent.parent.assignment['number']
img_nr = int(image['img_nr'])
location = image.parent.parent.assignment['location']
date = image.parent.parent['date']
# set the complete picture path
picture_path = f'{class_name}{number}\{first_field}_{second_field}_{length}_{start}_{end}\{adjust_date(date)}\{campaign_name}\{adjust_location(location)}\{adjust_img_nr(img_nr)}\{station.zfill(5)}.jpg'
# create new subdirectories if they do not already exist
os.makedirs(os.path.join(output_directory, os.path.dirname(picture_path)), exist_ok=True)
src_file = picture # original picture path
dst_file = os.path.join(output_directory, picture_path) # assembled target path
shutil.copy(src_file, dst_file)
picture_list = []
for pic in glob.glob(picture_directory + '\**\*.jpg', recursive=True): # consider files in all subdirectories that end with .jpg, adjust if necessary
picture_list.append(pic)
for path in os.listdir(xml_directory):
if path.endswith(".xml"): # only consider files that end with .xml
with open(os.path.join(xml_directory, path), "r") as file:
xml_content = file.readlines()
xml_content = "".join(xml_content)
copy_files(xml_content, picture_list)
I tested the code and it works for the most part. To copy 20 pictures the tool needs around 2 hours, so i have to drasticly improve the execution time. How can I do that?
To give you an idea: I have around 8k xml files and around 400k pictures :D

Related

How do I select all images from a folder? -MoviePy, Python

I'm pretty new in python and trying to make a python script to put images together into a video(.mp4) using MoviePy.
However, I have multiple files and would like to be more efficient by sort of.... naming the folder and selecting all images within that folder than having to select all images individually.
Here's my Code:
from moviepy.editor import *
import os
clips = []
clip1 = ImageClip('imagesfolder\images0.jpg').set_duration(4)
clip2 = ImageClip('imagesfolder\images1.jpg').set_duration(4)
clip3 = ImageClip('imagesfolder\images2.jpg').set_duration(4)
clip4 = ImageClip('imagesfolder\images3.jpg').set_duration(4)
clip5 = ImageClip('imagesfolder\images4.jpg').set_duration(4)
clip6 = ImageClip('imagesfolder\images5.jpg').set_duration(4)
clip7 = ImageClip('imagesfolder\images6.jpg').set_duration(4)
clip8 = ImageClip('imagesfolder\images7.jpg').set_duration(4)
clip9 = ImageClip('imagesfolder\images8.jpg').set_duration(4)
clip10 = ImageClip('imagesfolder\images9.jpg').set_duration(4)
clips.append(clip1)
clips.append(clip2)
clips.append(clip3)
clips.append(clip4)
clips.append(clip5)
clips.append(clip6)
clips.append(clip7)
clips.append(clip8)
clips.append(clip9)
clips.append(clip10)
video_clip = concatenate_videoclips(clips, method='compose')
video_clip.write_videofile("memes.mp4", fps=24, remove_temp=True, codec="libx264",
audio_codec="aac")

You can use a function called glob to find all files in a directly which match a pattern.
Eg
from glob import glob
clips = [ImageClip(clip).set_duration(4) for clip in glob("imagesfolder\*.gif")]
video_clip = concatenate_videoclips(clips, method="compose")

Iterate through each XML file

So currently i have a code that passed the information to Report Portal from a XML file, this xml file located on its own folder and it applies to many folder. Currently, the parser only pass the last xml data that are stored in the memory even though it recognize all the other file
this is my code for now:
launch = service.start_launch(name=launch_name,
attributes=rp_attributes,
start_time=timestamp(),
description=launch_doc)
r_path='\\\\esw-fs01\\esw_niagara_no_bck\\BuildResults\\master\\0.1.52.68_390534\\installation_area\\autotestlogs_top'
root = os.listdir(r_path)
for entry in root:
subdir_path = os.path.join(r_path, entry) # create the absolute path of the subdir
if os.path.isdir(subdir_path): # check if it is a folder
subdir_entries = os.listdir(subdir_path) # get the content of the subdir
for subentry in subdir_entries:
if subentry.endswith('.xml'):
subentry_path = os.path.join(subdir_path, subentry)
tree = ET.parse(subentry_path)
root=tree.getroot()
for subentry_path in root.iter('entry'):
if subentry_path.get('type') == "TM":
if suite_item_id != None:
service.finish_test_item(item_id=suite_item_id, end_time=timestamp(), status=tm_verdict)
suite=subentry_path.find('name').text
description=subentry_path.find('messages').text
verdict=subentry_path.find('verdict').text
if verdict=="True":
tm_verdict="PASSED"
elif verdict=="False":
tm_verdict="FAILED"
suite_item_id = service.start_test_item(name=suite,
description=description,
attributes=rp_attributes,
start_time=timestamp(),
item_type="SUITE")
if subentry_path.get('type') == "TR":
name = subentry_path.find('name').text
verdict = subentry_path.find('verdict').text
link = subentry_path.find('link').text
duration = 10
description = subentry_path.find('messages').text
if verdict=="True":
verdict="PASSED"
elif verdict=="False":
verdict="FAILED"
start_time=timestamp()
item_id = service.start_test_item(name=name,
description=description,
start_time=start_time,
parent_item_id=suite_item_id,
item_type="STEP",
parameters={"key1": "val1",
"key2": "val2"})
the goal is to make it read all the files, any help will be apreciated

You could first build a list of paths, then in the second loop parse the files.
# don't use 'root' here
src = os.listdir(r_path)
files = list()
for entry in src:
subdir_path = os.path.join(r_path, entry)
if os.path.isdir(subdir_path):
subdir_entries = os.listdir(subdir_path)
for subentry in subdir_entries:
if subentry.endswith('.xml'):
subentry_path = os.path.join(subdir_path, subentry)
files.append(subentry_path)
for f in files:
tree = ET.parse(f)
root = tree.getroot()
for subentry_path in root.iter('entry'):
...
As a side note, it's advisable to use something more canonical to get all the files, like glob:
import glob
filelist = glob.glob(os.path.join(rpath, "**/*.xml"), recursive=True)
filelist is now a list of paths to all the xml files in the source directory. This will save you a couple of lines and indentations.

How to run python on multiple folder to create pdf?

The following code is to combine multiple images into one pdf. I am trying to run this code on multiple folder where each folder has several images as result, each folder will has one pdf.
import os
from PIL import Image
from fpdf import FPDF
pdf = FPDF()
sdir = "imageFolder/"
w,h = 0,0
for i in range(1, 100):
fname = sdir + "IMG%.3d.png" % i
if os.path.exists(fname):
if i == 1:
cover = Image.open(fname)
w,h = cover.size
pdf = FPDF(unit = "pt", format = [w,h])
image = fname
pdf.add_page()
pdf.image(image,0,0,w,h)
else:
print("File not found:", fname)
print("processed %d" % i)
pdf.output("output.pdf", "F")
print("done")
I was thinking to create another loop to bring the folder path which will come before the first loop:
For j in range(1 to 70):
folderP=sdir+folder%1
And loop in each folder
Sorry I am still learning python. Any suggestion would be great!

You can use glob to get the paths of all pdfs and add them to a list, then you just iterate through the list and you wouldn't even need to check if they exist:
from glob import glob
sDir = 'imageFolder/'
pdfPaths = []
pdfPaths.extend(glob(f'{sDir}**/*.pdf', recursive=True))
for pdf in pdfPaths:
# do stuff

Paraview - Using python script to export data in x3d format

I am trying to export in x3d format OpenFOAM results using paraview-python script. When I do it via paraview graphical interface it works and results can be visualized in Blender, see the following picture
However, when I try to do the same operation using the following script
from paraview.simple import *
import fnmatch
import os
import shutil
#create alist of all vtk files
vtkFiles = []
for root, dirnames, filenames in os.walk('.'):
for filename in fnmatch.filter(filenames, '*.vtk'):
vtkFiles.append(os.path.join(root, filename))
vtkFilesGroups=[
'U',
]
def ResetSession():
pxm = servermanager.ProxyManager()
pxm.UnRegisterProxies()
del pxm
Disconnect()
Connect()
def x3dExport(output,r):
#export in x3d format
exporters = servermanager.createModule("exporters")
Show(r)
view = GetActiveView()
render = Render()
x3dExporter = exporters.X3DExporter(FileName=output)
x3dExporter.SetView(view)
x3dExporter.Write()
ResetSession()
# group VTK files by gruop (fields in openfoam "vtkFilesGroups")
# then loop over all and save it into different formats
groupedVtkFiles=[]
for group in vtkFilesGroups:
vtkDir = os.path.join('.', group, 'vtk')
if not os.path.exists(vtkDir):
os.makedirs(vtkDir)
vtuDir = os.path.join('.', group, 'vtu')
if not os.path.exists(vtuDir):
os.makedirs(vtuDir)
x3dDir = os.path.join('.', group, 'x3d')
if not os.path.exists(x3dDir):
os.makedirs(x3dDir)
for stepFile in vtkFiles:
tmp = stepFile.split(os.sep)
oldFileName = tmp[-1].split('.')[0]
time = tmp[-2]
fileNameVtk = '{}_{}.vtk'.format(oldFileName, time)
fileNameVtp = '{}_{}.vtp'.format(oldFileName, time)
fileNameX3d = '{}_{}.x3d'.format(oldFileName, time)
r = LegacyVTKReader(FileNames=[stepFile])
w = XMLUnstructuredGridWriter()
w.FileName = os.path.join(vtuDir, fileNameVtp)
w.UpdatePipeline()
x3dExport(os.path.join(x3dDir, fileNameX3d), r)
the field values (velocity U) are not exported as you can see from this picture!
Can someone tell me what I am doing wrong?
Thank you!

Your problem is that the .foam file it's not a scientific visualization file, as VTK, .foam file is only used for ParaView (by its extension, not by its content) to identify the reader OpenFOAMReader and then us it for post-processing.
I have two solutions for you:
Read the reader documentation to find a way to do this.
Convert the results into VTK files with FoamToVTK and then loop over the results.
EDIT
I Use this code to transform do that thing long time ago:
from paraview.simple import *
import fnmatch
import os
import shutil
#create alist of all vtk files
vtkFiles = []
for root, dirnames, filenames in os.walk('.'):
for filename in fnmatch.filter(filenames, '*.vtk'):
vtkFiles.append(os.path.join(root, filename))
vtkFilesGroups=('p', 'U')
def ResetSession():
pxm = servermanager.ProxyManager()
pxm.UnRegisterProxies()
del pxm
Disconnect()
Connect()
def x3dExport(output,r):
#export in x3d format
exporters = servermanager.createModule("exporters")
Show(r)
view = GetActiveView()
render = Render()
x3dExporter = exporters.X3DExporter(FileName=output)
x3dExporter.SetView(view)
x3dExporter.Write()
ResetSession()
# group VTK files by gruop (fields in openfoam "vtkFilesGroups")
# then loop over all and save it into different formats
for group in vtkFilesGroups:
x3dDir = os.path.join('.', group, 'x3d')
if not os.path.exists(x3dDir):
os.makedirs(x3dDir)
for stepFile in (f for f in vtkFiles if group in f):
tmp = stepFile.split(os.sep)
oldFileName = tmp[-1].split('.')[0]
time = tmp[-2]
fileNameX3d = '{}_{}.x3d'.format(oldFileName, time)
x3dExport(os.path.join(x3dDir, fileNameX3d), r)

You need to color your data in your script, with something like :
ColorBy(yourRep, ('POINTS', ('YourScalar', 'YourComp'))
Documentation

How to batch process a folder of videos using MoviePy

I wrote a MoviePy script that takes an input video, does some processing, and outputs a video file. I want to run this through an entire folder of videos. Any help or direction is appreciated.
Here's what I tried...
for f in *; do python resize.py $f; done
and resize.py source code here:
from moviepy.editor import *
clip = VideoFileClip(input)
clip1 = clip.rotate(270)
clip2 = clip1.crop(x_center=540,y_center=960,width=1080,height=608)
clip3 = clip2.resize(width=1920)
clip3.write_videofile(output,codec='libx264')
Really wasn't sure what to put for "input" and "output" in my .py file.
Thanks,
Evan

I know you have an answer on Github, but I'll add my own solution.
First, you'll want to put your code inside a function:
def process_video(input):
"""Parameter input should be a string with the full path for a video"""
clip = VideoFileClip(input, output)
clip1 = clip.rotate(270)
clip2 = clip1.crop(x_center=540,y_center=960,width=1080,height=608)
clip3 = clip2.resize(width=1920)
clip3.write_videofile(output,codec='libx264')
Then, you can have a function that returns a list of file paths, and a list of final file names to use with the above function (note that the final file names will be the same as the original file names but with "output" in front):
import os
def get_video_paths(folder_path):
"""
Parameter folder_path should look like "Users/documents/folder1/"
Returns a list of complete paths
"""
file_name_list = os.listdir(folder_path)
path_name_list = []
final_name_list = []
for name in file_name_list:
# Put any sanity checks here, e.g.:
if name == ".DS_Store":
pass
else:
path_name_list.append(folder_path + name)
# Change the format of the output file names below
final_name_list.append(folder_path + "output" + name)
return path_name_list, final_name_list
Finally, at the bottom, we get the input folder, and utilise the above two functions:
if __name__ == "__main__":
video_folder = input("What folder would you like to process? ")
path_list, final_name_list = get_video_paths(video_folder)
for path, name in zip(path_list, final_name_list):
process_video(path, name)
print("Finished")
Just watch out, because this will crash if there are any files in the folder that can't be read as a movie. For instance, on mac, the OS puts a ".DS_Store" file in each folder, which will crash the program. I've put an area for a sanity check to ignore certain filenames.
Complete code:
import os
from moviepy.editor import *
def process_video(input, output):
"""Parameter input should be a string with the full path for a video"""
clip = VideoFileClip(input)
clip1 = clip.rotate(270)
clip2 = clip1.crop(x_center=540,y_center=960,width=1080,height=608)
clip3 = clip2.resize(width=1920)
clip3.write_videofile(output,codec='libx264')
def get_video_paths(folder_path):
"""
Parameter folder_path should look like "Users/documents/folder1/"
Returns a list of complete paths
"""
file_name_list = os.listdir(folder_path)
path_name_list = []
final_name_list = []
for name in file_name_list:
# Put any sanity checks here, e.g.:
if name == ".DS_Store":
pass
else:
path_name_list.append(folder_path + name)
final_name_list.append(folder_path + "output" + name)
return path_name_list, final_name_list
if __name__ == "__main__":
video_folder = input("What folder would you like to process? ")
path_list, final_name_list = get_video_paths(video_folder)
for path, name in zip(path_list, final_name_list):
process_video(path, name)
print("Finished")

I responded on your Github issue #542, but I copied it here for future reference!
First off, the below example isn't ironclad, but it should do what you need.
You can achieve this via something like this:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Convert all media assets located in a specified directory."""
import glob
import os
from optparse import OptionParser
from moviepy.editor import VideoFileClip
def get_dir_files(dir_path, patterns=None):
"""Get all absolute paths for pattern matched files in a directory.
Args:
dir_path (str): The path to of the directory containing media assets.
patterns (list of str): The list of patterns/file extensions to match.
Returns:
(list of str): A list of all pattern-matched files in a directory.
"""
if not patterns or type(patterns) != list:
print('No patterns list passed to get_dir_files, defaulting to patterns.')
patterns = ['*.mp4', '*.avi', '*.mov', '*.flv']
files = []
for pattern in patterns:
dir_path = os.path.abspath(dir_path) + '/' + pattern
files.extend(glob.glob(dir_path))
return files
def modify_clip(path, output):
"""Handle conversion of a video file.
Args:
path (str): The path to the directory of video files to be converted.
output (str): The filename to associate with the converted file.
"""
clip = VideoFileClip(path)
clip = clip.rotate(270)
clip = clip.crop(x_center=540, y_center=960, width=1080, height=608)
clip = clip.resize(width=1920)
clip.write_videofile(output, codec='libx264')
print('File: {} should have been created.'.format(output))
if __name__ == '__main__':
status = 'Failed!'
parser = OptionParser(version='%prog 1.0.0')
parser.add_option('-p', '--path', action='store', dest='dir_path',
default='.', type='string',
help='the path of the directory of assets, defaults to .')
options, args = parser.parse_args()
print('Running against directory path: {}'.format(options.dir_path))
path_correct = raw_input('Is that correct?').lower()
if path_correct.startswith('y'):
dir_paths = get_dir_files(options.dir_path)
for dir_path in dir_paths:
output_filename = 'converted_' + os.path.basename(dir_path)
modify_clip(path=dir_path, output=output_filename)
status = 'Successful!'
print('Conversion {}'.format(status))
With the above example, you can simply drop that into the directory of assets you wish to convert and run: python this_file.py and it should convert the files for you in the same directory with the name prepended with: converted_
Likewise, you can drop that file anywhere and run it against an absolute path:
python this_file.py -p /Users/thisguy/media and it will convert all files with the extensions: ['*.mp4', '*.avi', '*.mov', '*.flv']
Either way, let me know if you have any questions (or if this resolves your issue) and I'll do my best to help you out!
Thanks for using moviepy!

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Optimize execution time for retrieving data from xml and copy images - python

Related

How do I select all images from a folder? -MoviePy, Python

Iterate through each XML file

How to run python on multiple folder to create pdf?

Paraview - Using python script to export data in x3d format

How to batch process a folder of videos using MoviePy

Categories

Resources