MoviePy - video freezes after set_duration - python

I'm trying to make a video the same duration as audio clips
This kinda works, but after 2 seconds (subclip duration), the image just freezes as the audio continues
I was trying to achieve the same behavior as in this tutorial, where it seems that the video repeats itself. My original video has only 2 seconds
import moviepy.editor as mp
raw_video = mp.VideoFileClip("videotest.mp4", audio=False)
raw_audio = mp.AudioFileClip("frei.mp3")
raw_video = raw_video.subclip(0, 2)
my_video = raw_video.set_duration(raw_audio.duration)
my_video.audio = raw_audio
my_video.write_videofile('result.mp4')

This is the solution I've found, but don't really know if there is a better way. Is taking too long to write the video
import moviepy.editor as mp
import math
raw_video = mp.VideoFileClip("videotest.mp4", audio=False)
raw_audio = mp.AudioFileClip("frei.mp3")
# array de vídeos até completar a duração do áudio
amount = math.ceil(raw_audio.duration / raw_video.duration)
list = [raw_video for i in range(amount)]
final_video = mp.concatenate_videoclips(list, method='compose')
final_video.audio = raw_audio
final_video.write_videofile('result42.mp4')

Related

Computer crashing when using python tools in same script

I am attempting to use the speech recognition toolkit VOSK and the speech diarization package Resemblyzer to transcibe audio and then identify the speakers in the audio.
Tools:
https://github.com/alphacep/vosk-api
https://github.com/resemble-ai/Resemblyzer
I can do both things individually but run into issues when trying to do them when running the one python script.
I used the following guide when setting up the diarization system:
https://medium.com/saarthi-ai/who-spoke-when-build-your-own-speaker-diarization-module-from-scratch-e7d725ee279
Computer specs are as follows:
Intel(R) Core(TM) i3-7100 CPU # 3.90GHz, 3912 Mhz, 2 Core(s), 4 Logical Processor(s)
32GB RAM
The following is my code, I am not to sure if using threading is appropriate or if I even implemented it correctly, how can I best optimize this code as to achieve the results I am looking for and not crash.
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment
import json
import sys
import os
import subprocess
import datetime
from resemblyzer import preprocess_wav, VoiceEncoder
from pathlib import Path
from resemblyzer.hparams import sampling_rate
from spectralcluster import SpectralClusterer
import threading
import queue
import gc
def recognition(queue, audio, FRAME_RATE):
model = Model("Vosk_Models/vosk-model-small-en-us-0.15")
rec = KaldiRecognizer(model, FRAME_RATE)
rec.SetWords(True)
rec.AcceptWaveform(audio.raw_data)
result = rec.Result()
transcript = json.loads(result)#["text"]
#return transcript
queue.put(transcript)
def diarization(queue, audio):
wav = preprocess_wav(audio)
encoder = VoiceEncoder("cpu")
_, cont_embeds, wav_splits = encoder.embed_utterance(wav, return_partials=True, rate=16)
print(cont_embeds.shape)
clusterer = SpectralClusterer(
min_clusters=2,
max_clusters=100,
p_percentile=0.90,
gaussian_blur_sigma=1)
labels = clusterer.predict(cont_embeds)
def create_labelling(labels, wav_splits):
times = [((s.start + s.stop) / 2) / sampling_rate for s in wav_splits]
labelling = []
start_time = 0
for i, time in enumerate(times):
if i > 0 and labels[i] != labels[i - 1]:
temp = [str(labels[i - 1]), start_time, time]
labelling.append(tuple(temp))
start_time = time
if i == len(times) - 1:
temp = [str(labels[i]), start_time, time]
labelling.append(tuple(temp))
return labelling
#return
labelling = create_labelling(labels, wav_splits)
queue.put(labelling)
def identify_speaker(queue1, queue2):
transcript = queue1.get()
labelling = queue2.get()
for speaker in labelling:
speakerID = speaker[0]
speakerStart = speaker[1]
speakerEnd = speaker[2]
result = transcript['result']
words = [r['word'] for r in result if speakerStart < r['start'] < speakerEnd]
#return
print("Speaker",speakerID,":",' '.join(words), "\n")
def main():
queue1 = queue.Queue()
queue2 = queue.Queue()
FRAME_RATE = 16000
CHANNELS = 1
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
podcast = podcast.set_channels(CHANNELS)
podcast = podcast.set_frame_rate(FRAME_RATE)
first_thread = threading.Thread(target=recognition, args=(queue1, podcast, FRAME_RATE))
second_thread = threading.Thread(target=diarization, args=(queue2, podcast))
third_thread = threading.Thread(target=identify_speaker, args=(queue1, queue2))
first_thread.start()
first_thread.join()
gc.collect()
second_thread.start()
second_thread.join()
gc.collect()
third_thread.start()
third_thread.join()
gc.collect()
# transcript = recognition(podcast,FRAME_RATE)
#
# labelling = diarization(podcast)
#
# print(identify_speaker(transcript, labelling))
if __name__ == '__main__':
main()
When I say crash I mean everything freezes, I have to hold down the power button on the desktop and turn it back on again. No blue/blank screen, just frozen in my IDE looking at my code. Any help in resolving this issue would be greatly appreciated.
Pydubs AudioSegment was not returning a suitable type for the Resembylzer function preprocess_wav.
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
preprocess_wav instead requires a Numpy Array / Path.
audio_file_path = 'Podcast_Audio/WAV-Film-Release-Clip.wav'
wav_fpath = Path(audio_file_path)
wav = preprocess_wav(wav_fpath)
Additionally preprocess_wav functionality can be achieved using Librosa if desired.
import librosa
def preprocess_wav(waveform, sr):
waveform = librosa.resample(waveform, orig_sr=sr, target_sr=16000)
waveform = waveform.astype(np.float32) / np.max(np.abs(waveform))
return waveform
waveform, sr = librosa.load('Podcast_Audio/WAV-Film-Release-Clip.wav')
wav = preprocess_wav(waveform, sr)

Pytesseract really slow

so I'm trying to read out text from MS Teams and use that text to make inputs on the keyboard.
Right now, I work with the threading module to have one thread for the input and one thread for the image_to_string. Following is the function for the image_to_string.
def imToString():
global message
print("Image getting read")
pytesseract.pytesseract.tesseract_cmd ='C:\\Users\\gornicec\\AppData\\Local\\Programs\\Tesseract-OCR\\tesseract.exe'
while(True):
print("preIMGgrab")
cap = ImageGrab.grab(bbox=(177, 850, 283, 881))
grayCap = cv2.cvtColor(np.array(cap), cv2.COLOR_BGR2GRAY)
print("postIMGgrab")
t = time.perf_counter()
print("preMSG" + str(t))
message = pytesseract.image_to_string(
grayCap,
lang ='deu',config='--psm 6')
print(str(message) + "was read" + str(time.perf_counter() - t))
I don't know how but it takes about 8 seconds to read an image thats 1000 pixels big. I need this to be at highest 2 seconds. I'll add the whole code at the end. If there is any way to make it faster or to do it differently please tell me so.
WHOLE CODE:
import numpy as np
import time
import pytesseract
from win32gui import GetWindowText, GetForegroundWindow
import win32api
import cv2
import pyautogui
from PIL import ImageGrab
import threading
from ahk import AHK
import keyboard
message = ""
ahk = AHK(executable_path='C:\\Program Files\\AutoHotkey\\AutoHotkey.exe')
def Controls():
global message
while True:
booleanVal = True
if booleanVal:
#imToString()
print("message")
#print("rechts" in message.lower())
#print(f'LÄNGE: {len(message)}')
if "vorne" in message.lower():
# Control(message, 'w')
ahk.key_press('w')
#message = ""
if "hinten" in message.lower():
# Control(message, 's')
ahk.key_press('s')
#message = ""
if "links" in message.lower():
# Control(message, 'a')
ahk.key_press('a')
#message = ""
if "rechts" in message.lower():
# Control(message, 'd')
#print("HAHAHA")
ahk.key_press('d')
#message = ""
if "greif" in message.lower():
ahk.key_press('space')
#message = ""
time.sleep(0.5)
#IMGTOSTRING---
controls = threading.Thread(target=Controls)
controls.start()
grab = threading.Thread(target=imToString)
grab.start()
pytesseract is not suit for large amount of images or images that are already in memory, its write them to a file and then pass the file path to tesseract cli, if you want to improve the performance of you script try using library that works directly with tesseract api.
like this: https://pypi.org/project/tess-py-api/

File size limit in making timelapse video on the fly with OpenCV, Picamera and an IR camera

I am creating a raspberry pi timelapse camera encoding video with CV2 videowriter
Each image captured with picamera is added to the videowriter and once the intended number of images are taken the videowriter closes.
However - while this works for a few thousand images - it stops at some limit with a filesize of 366Mb which is now frustrating me and I ask you - the internet and hoard of coders to tell me why I am bad a coding and how to fix this - you must be tempted by this..
Here is my offering of garbage for you to laugh pitifully at
import os, cv2
from picamera import PiCamera
from picamera.array import PiRGBArray
from datetime import datetime
from time import sleep
now = datetime.now()
x = now.strftime("%Y")+"-"+now.strftime("%m")+"-"+now.strftime("%d")+"-"+now.strftime("%H")+"-"+now.strftime("%M") #string of dateandtimestart
print(x)
def main():
imagenum = 10000 #how many images
period = 1 #seconds between images
os.chdir ("/home/pi/t_lapse")
os.mkdir(x)
os.chdir(x)
filename = x + ".avi"
camera = PiCamera()
camera.resolution=(1920,1088)
camera.vflip = True
camera.hflip = True
camera.color_effects = (128,128) #makes a black and white image for IR camera
sleep(0.1)
out = cv2.VideoWriter(filename, cv2.cv.CV_FOURCC(*'XVID'), 30, (1920,1088))
for c in range(imagenum):
with PiRGBArray(camera, size=(1920,1088)) as output:
camera.capture(output, 'bgr')
imagec = output.array
out.write(imagec)
output.truncate(0) #trying to get more than 300mb files..
pass
sleep(period-0.5)
camera.close()
out.release()
if __name__ == '__main__':
main()
This example is a part of the whole code I've written (https://github.com/gchennell/RPi-PiLapse) which has an OLED display and buttons and selection of how many images as I have this all in an enclosure - the number of images seems to be limited to about 3000-4000 and then it just gives up and goes home... I tried adding the output.truncate(0)
I have also recreated this in python3 before you cry "BUT CV2.CV2.VIDEOWRITER!!!!" and that hasn't changed a thing - I'm missing something here...

How to reduce pause between two Audio files in python?

I want to merge both audio files but it seems as if there is a pause of 2 Sec. Can anyone look into it further? It would be a great help.
import simpleaudio as sa
filename = '3.wav'
wave_obj = sa.WaveObject.from_wave_file(filename)
play_obj = wave_obj.play()
play_obj.wait_done()
filename = '4.wav'
wave_obj = sa.WaveObject.from_wave_file(filename)
play_obj = wave_obj.play()
play_obj.wait_done()`
I believe the problem is that after 3.wav ended, it takes a little time for the program to process the rest of the code. Let the program process both before starting one:
import simpleaudio as sa
filename1 = '3.wav'
filename2 = '4.wav'
wave_obj1 = sa.WaveObject.from_wave_file(filename1)
wave_obj2 = sa.WaveObject.from_wave_file(filename2)
play_obj1 = wave_obj1.play()
play_obj1.wait_done()
play_obj2 = wave_obj2.play()
play_obj2.wait_done()

Print image and record audio input

I am very green about programming but wish to learn and develop.
I want to write a simple application that will be useful in linguistic treatments - but at first it is simple demo.
The application is about to display image and record sound during projection.
There are few variables - interval and image/sound/movie clip paths - taken from external txt file (for the beginning - later I would like to perform some creator with presaved configurations).
The config file now looks like:
10
path1
path2
...
The first line is about to input interval in seconds, next there are paths to images, sounds or movie clips (I tried with images for now).
#!/usr/bin/python
# main.py
import sys
from PyQt4 import QtGui, QtCore
from Tkinter import *
import numpy as np
import pyaudio
import wave
import time
from PIL import Image, ImageTk
import multiprocessing
import threading
from threading import Thread
master = Tk()
conf_file = open("conf.txt", "r") #open conf file read only
conf_lines = conf_file.readlines()
conf_file.close()
interwal = conf_lines[0] #interval value from conf.txt file
bodziec1 = conf_lines[1] #paths to stimulus file (img / audio / video)
bodziec2 = conf_lines[2]
bodziec3 = conf_lines[3]
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = interwal #every stimulus has it's own audio record file for further work
timestr = time.strftime("%Y%m%d-%H%M%S") #filename is set to year / month / day - hour / minute / second for easier systematization
def nagrywanie(): #recording action - found somewhere in the all-knowing web
p = pyaudio.PyAudo()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* nagrywanie") #info about record to start
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* koniec nagrywania") #info about record to end
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(timestr, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
def bod1(): #stimulus 1st to display / play
image = Image.open(bodziec1)
photo = ImageTk.PhotoImage(image)
def bod2():
image = Image.open(bodziec2) #stimulus 2nd to display / play
photo = ImageTk.PhotoImage(image)
def bod3():
image = Image.open(bodziec3) #stimulus 3rd to display / play
photo = ImageTk.PhotoImage(image)
def odpal(): #attemption to run display and recording at the same time
Thread(target = bod1).start()
Thread(target = nagrywanie).start()
# Wait interwal for odpal #give impetus for time in first line of the conf.txt
time.sleep(interwal)
# Terminate odpal #stop giving impetus
bod1.terminate()
# Cleanup #?? this part is also copied from all-knowing internet
p.join()
b = Button(master, text="OK", command=odpal) #wanted the program to be easier for non-programmers to operate so few buttons are necessary
b.pack()
mainloop()
When asked few programmers about the code it is as simple as riding a bike, so I wanted to learn how to write it by myself.
I guess it is peace of cake for professionals - 1000s of thanks to these ones who want even to read this junk.
It takes a lot of time for me to understand and figure out the exact commends that is why I am asking politely about the help - not only for education but also for better diagnosis.
Excuse me for the language - English is not my native language.

Categories

Resources