There is no problem without multiprocessing.
Using multiprocessing alone causes path problems.
No matter how hard I search, I can't find the answer to the content, so I ask for help
import numpy as np
import tensorflow as tf
import pandas as pd
from keras.preprocessing.image import array_to_img, img_to_array, load_img
import time
from multiprocessing import Pool
def b(path):
for f in path:
print (f)
new_img = load_img(f,target_size=(256,256))
arr_img = img_to_array(new_img)
return arr_img
def main():
start = int(time.time())
num_cores = 4
pool = Pool(num_cores)
pool.map(b, 'C:\\Users\\003.png')
print("***run time(sec) :", int(time.time()) - start)
if __name__ == "__main__":
main()
Error message
load_img
with open(path, 'rb') as f:
FileNotFoundError: [Errno 2] No such file or directory: 'C'
The error message is the same even if it is put as a variable as follows.
def main():
start = int(time.time())
num_cores = 4
pool = Pool(num_cores)
bb = 'C:\\Users\\003.png'
pool.map(b, bb)
print("***run time(sec) :", int(time.time()) - start)
this piece of code has the problem.
pool.map(b, 'C:\\Users\\003.png')
you are using map, the first parameter is a function(which is okay for you), and the second need to be iterable, so it need to be like(a list for example) ['C:\\Users\\003.png']
because you gave it like 'C:\\Users\\003.png' map is trying to iterate it like C , then :\ and so on. which is throwing the error. so please change your code to (i.e a list)
pool.map(b, ['C:\\Users\\003.png'])
Related
I am attempting to use the speech recognition toolkit VOSK and the speech diarization package Resemblyzer to transcibe audio and then identify the speakers in the audio.
Tools:
https://github.com/alphacep/vosk-api
https://github.com/resemble-ai/Resemblyzer
I can do both things individually but run into issues when trying to do them when running the one python script.
I used the following guide when setting up the diarization system:
https://medium.com/saarthi-ai/who-spoke-when-build-your-own-speaker-diarization-module-from-scratch-e7d725ee279
Computer specs are as follows:
Intel(R) Core(TM) i3-7100 CPU # 3.90GHz, 3912 Mhz, 2 Core(s), 4 Logical Processor(s)
32GB RAM
The following is my code, I am not to sure if using threading is appropriate or if I even implemented it correctly, how can I best optimize this code as to achieve the results I am looking for and not crash.
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment
import json
import sys
import os
import subprocess
import datetime
from resemblyzer import preprocess_wav, VoiceEncoder
from pathlib import Path
from resemblyzer.hparams import sampling_rate
from spectralcluster import SpectralClusterer
import threading
import queue
import gc
def recognition(queue, audio, FRAME_RATE):
model = Model("Vosk_Models/vosk-model-small-en-us-0.15")
rec = KaldiRecognizer(model, FRAME_RATE)
rec.SetWords(True)
rec.AcceptWaveform(audio.raw_data)
result = rec.Result()
transcript = json.loads(result)#["text"]
#return transcript
queue.put(transcript)
def diarization(queue, audio):
wav = preprocess_wav(audio)
encoder = VoiceEncoder("cpu")
_, cont_embeds, wav_splits = encoder.embed_utterance(wav, return_partials=True, rate=16)
print(cont_embeds.shape)
clusterer = SpectralClusterer(
min_clusters=2,
max_clusters=100,
p_percentile=0.90,
gaussian_blur_sigma=1)
labels = clusterer.predict(cont_embeds)
def create_labelling(labels, wav_splits):
times = [((s.start + s.stop) / 2) / sampling_rate for s in wav_splits]
labelling = []
start_time = 0
for i, time in enumerate(times):
if i > 0 and labels[i] != labels[i - 1]:
temp = [str(labels[i - 1]), start_time, time]
labelling.append(tuple(temp))
start_time = time
if i == len(times) - 1:
temp = [str(labels[i]), start_time, time]
labelling.append(tuple(temp))
return labelling
#return
labelling = create_labelling(labels, wav_splits)
queue.put(labelling)
def identify_speaker(queue1, queue2):
transcript = queue1.get()
labelling = queue2.get()
for speaker in labelling:
speakerID = speaker[0]
speakerStart = speaker[1]
speakerEnd = speaker[2]
result = transcript['result']
words = [r['word'] for r in result if speakerStart < r['start'] < speakerEnd]
#return
print("Speaker",speakerID,":",' '.join(words), "\n")
def main():
queue1 = queue.Queue()
queue2 = queue.Queue()
FRAME_RATE = 16000
CHANNELS = 1
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
podcast = podcast.set_channels(CHANNELS)
podcast = podcast.set_frame_rate(FRAME_RATE)
first_thread = threading.Thread(target=recognition, args=(queue1, podcast, FRAME_RATE))
second_thread = threading.Thread(target=diarization, args=(queue2, podcast))
third_thread = threading.Thread(target=identify_speaker, args=(queue1, queue2))
first_thread.start()
first_thread.join()
gc.collect()
second_thread.start()
second_thread.join()
gc.collect()
third_thread.start()
third_thread.join()
gc.collect()
# transcript = recognition(podcast,FRAME_RATE)
#
# labelling = diarization(podcast)
#
# print(identify_speaker(transcript, labelling))
if __name__ == '__main__':
main()
When I say crash I mean everything freezes, I have to hold down the power button on the desktop and turn it back on again. No blue/blank screen, just frozen in my IDE looking at my code. Any help in resolving this issue would be greatly appreciated.
Pydubs AudioSegment was not returning a suitable type for the Resembylzer function preprocess_wav.
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
preprocess_wav instead requires a Numpy Array / Path.
audio_file_path = 'Podcast_Audio/WAV-Film-Release-Clip.wav'
wav_fpath = Path(audio_file_path)
wav = preprocess_wav(wav_fpath)
Additionally preprocess_wav functionality can be achieved using Librosa if desired.
import librosa
def preprocess_wav(waveform, sr):
waveform = librosa.resample(waveform, orig_sr=sr, target_sr=16000)
waveform = waveform.astype(np.float32) / np.max(np.abs(waveform))
return waveform
waveform, sr = librosa.load('Podcast_Audio/WAV-Film-Release-Clip.wav')
wav = preprocess_wav(waveform, sr)
I wrote a code to remove the background of 8000 images but that whole code is taking approximately 8 hours to give the result.
How to improve its time complexity as I have to work on a large dataset in future?
Or do I have to write a whole new code? If it is, please suggest some sample codes.
from rembg import remove
import cv2
import glob
for img in glob.glob('../images/*.jpg'):
a = img.split('../images/')
a1 = a[1].split('.jpg')
try:
cv_img = cv2.imread(img)
output = remove(cv_img)
except:
continue
cv2.imwrite('../output image/' + str(a1[0]) + '.png', output)
One simple approach would be to divide the work into multiple threads. See ThreadPoolExecutor for more.
You can play around with max_workers= to see what get's the best results. Note that max-workers can be any number between 1 and 32.
This sample code is ready to run. It assumes the image files are in the same directory as your main.py and the output_image directory exits.
import cv2
import rembg
import sys
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
out_dir = Path("output_image")
in_dir = Path(".")
def is_image(absolute_path: Path):
return absolute_path.is_file and str(absolute_path).endswith('.png')
input_filenames = [p for p in filter(is_image, Path(in_dir).iterdir())]
def process_image(in_dir):
try:
image = cv2.imread(str(in_dir))
if image is None or not image.data:
raise cv2.error("read failed")
output = rembg.remove(image)
in_dir = out_dir / in_dir.with_suffix(".png").name
cv2.imwrite(str(in_dir), output)
except Exception as e:
print(f"{in_dir}: {e}", file=sys.stderr)
executor = ThreadPoolExecutor(max_workers=4)
for result in executor.map(process_image, input_filenames):
print(f"Processing image: {result}")
Check out the U^2Net repository. Like u2net_test.py, Writing your own remove function and using dataloaders can speed up the process. if it is not necessary skip the alpha matting else you can add the alpha matting code from rembg.
def main():
# --------- 1. get image path and name ---------
model_name='u2net'#u2netp
image_dir = os.path.join(os.getcwd(), 'test_data', 'test_images')
prediction_dir = os.path.join(os.getcwd(), 'test_data', model_name + '_results' + os.sep)
model_dir = os.path.join(os.getcwd(), 'saved_models', model_name, model_name + '.pth')
img_name_list = glob.glob(image_dir + os.sep + '*')
print(img_name_list)
#1. dataloader
test_salobj_dataset = SalObjDataset(img_name_list = img_name_list,
lbl_name_list = [],
transform=transforms.Compose([RescaleT(320),
ToTensorLab(flag=0)])
)
test_salobj_dataloader = DataLoader(test_salobj_dataset,
batch_size=1,
shuffle=False,
num_workers=1)
for i_test, data_test in enumerate(test_salobj_dataloader):
print("inferencing:",img_name_list[i_test].split(os.sep)[-1])
inputs_test = data_test['image']
inputs_test = inputs_test.type(torch.FloatTensor)
if torch.cuda.is_available():
inputs_test = Variable(inputs_test.cuda())
else:
inputs_test = Variable(inputs_test)
d1,d2,d3,d4,d5,d6,d7= net(inputs_test)
# normalization
pred = d1[:,0,:,:]
pred = normPRED(pred)
# save results to test_results folder
if not os.path.exists(prediction_dir):
os.makedirs(prediction_dir, exist_ok=True)
save_output(img_name_list[i_test],pred,prediction_dir)
del d1,d2,d3,d4,d5,d6,d7
Try to use parallelization with multiprocessing like Mark Setchell mentioned in his comment. I rewrote your code according to Method 8 from here. Multiprocessing should speed up your execution time. I did not test the code, try if it works.
import glob
from multiprocessing import Pool
import cv2
from rembg import remove
def remove_background(filename):
a = filename.split("../images/")
a1 = a[1].split(".jpg")
try:
cv_img = cv2.imread(filename)
output = remove(cv_img)
except:
continue
cv2.imwrite("../output image/" + str(a1[0]) + ".png", output)
files = glob.glob("../images/*.jpg")
pool = Pool(8)
results = pool.map(remove_background, files)
Ah, you used the example from https://github.com/danielgatis/rembg#usage-as-a-library as template for your code. Maybe try the other example with PIL image instead of OpenCV. The latter is mostly less fast, but who knows. Try it with maybe 10 images and compare execution time.
Here is your code using PIL instead of OpenCV. Not tested.
import glob
from PIL import Image
from rembg import remove
for img in glob.glob("../images/*.jpg"):
a = img.split("../images/")
a1 = a[1].split(".jpg")
try:
cv_img = Image.open(img)
output = remove(cv_img)
except:
continue
output.save("../output image/" + str(a1[0]) + ".png")
I have logic for bulk calculating image hash.
Script 1
import dhash
import glob
from PIL import Image
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Manager
PATH = '*.jpg'
def makehash(t):
filename, d = t
with Image.open(filename) as image:
image.draft('L', (32,32))
row, col = dhash.dhash_row_col(image)
d[filename] = dhash.format_hex(row, col)
def main():
with Manager() as manager:
d = manager.dict()
with ProcessPoolExecutor() as executor:
executor.map(makehash, [(jpg, d) for jpg in glob.glob(PATH)])
print(d)
if __name__ == '__main__':
main()
For around 10,000 JPEGs, it runs for less than a minute. However, if I put the logic into a class, it runs indefinitely:
import numpy
import cv2
import glob
import os
import dhash
from timeit import default_timer as timer
from datetime import timedelta
from wand.image import Image
from itertools import chain
from alive_progress import alive_bar
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Manager
FOLDER_DUPLICATE = 'duplicate'
def listdir_nohidden(path):
return glob.glob(os.path.join(path, '*'))
def create_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
def print_elapsed(sec):
print("Elapsed Time: ", timedelta(seconds=sec))
class ImgToolkit:
file_list = {}
def __init__(self):
# initialize something
with Manager() as manager:
self.file_list = manager.dict()
print("loaded")
def find_duplicates(self):
if os.path.exists(FOLDER_DUPLICATE) and listdir_nohidden(FOLDER_DUPLICATE):
print("ERROR: Duplicate folder exists and not empty. Halting")
else:
start = timer()
print("Phase 1 - Hashing")
imgs = glob.glob('*.jpg')
def get_photo_hashes_pillow(t):
filename, self.file_list = t
with Image.open(filename) as image:
image.draft('L', (32, 32))
row, col = dhash.dhash_row_col(image)
self.file_list[filename] = dhash.format_hex(row, col)
with ProcessPoolExecutor() as executor:
executor.map(get_photo_hashes_pillow, [(jpg, self.file_list) for jpg in imgs])
print(self.file_list)
end = timer()
print_elapsed(end-start)
And I use the class as follow:
from imgtoolkit import imgtoolkit
if __name__ == '__main__':
kit = imgtoolkit.ImgToolkit()
kit.find_duplicates()
What did I miss? I am quite new in Python.
UPDATE
I found that the function get_photo_hashes_pillow never get called, as I put a print() line in the 1st line of function. But why?
so I'm trying to read out text from MS Teams and use that text to make inputs on the keyboard.
Right now, I work with the threading module to have one thread for the input and one thread for the image_to_string. Following is the function for the image_to_string.
def imToString():
global message
print("Image getting read")
pytesseract.pytesseract.tesseract_cmd ='C:\\Users\\gornicec\\AppData\\Local\\Programs\\Tesseract-OCR\\tesseract.exe'
while(True):
print("preIMGgrab")
cap = ImageGrab.grab(bbox=(177, 850, 283, 881))
grayCap = cv2.cvtColor(np.array(cap), cv2.COLOR_BGR2GRAY)
print("postIMGgrab")
t = time.perf_counter()
print("preMSG" + str(t))
message = pytesseract.image_to_string(
grayCap,
lang ='deu',config='--psm 6')
print(str(message) + "was read" + str(time.perf_counter() - t))
I don't know how but it takes about 8 seconds to read an image thats 1000 pixels big. I need this to be at highest 2 seconds. I'll add the whole code at the end. If there is any way to make it faster or to do it differently please tell me so.
WHOLE CODE:
import numpy as np
import time
import pytesseract
from win32gui import GetWindowText, GetForegroundWindow
import win32api
import cv2
import pyautogui
from PIL import ImageGrab
import threading
from ahk import AHK
import keyboard
message = ""
ahk = AHK(executable_path='C:\\Program Files\\AutoHotkey\\AutoHotkey.exe')
def Controls():
global message
while True:
booleanVal = True
if booleanVal:
#imToString()
print("message")
#print("rechts" in message.lower())
#print(f'LĂ„NGE: {len(message)}')
if "vorne" in message.lower():
# Control(message, 'w')
ahk.key_press('w')
#message = ""
if "hinten" in message.lower():
# Control(message, 's')
ahk.key_press('s')
#message = ""
if "links" in message.lower():
# Control(message, 'a')
ahk.key_press('a')
#message = ""
if "rechts" in message.lower():
# Control(message, 'd')
#print("HAHAHA")
ahk.key_press('d')
#message = ""
if "greif" in message.lower():
ahk.key_press('space')
#message = ""
time.sleep(0.5)
#IMGTOSTRING---
controls = threading.Thread(target=Controls)
controls.start()
grab = threading.Thread(target=imToString)
grab.start()
pytesseract is not suit for large amount of images or images that are already in memory, its write them to a file and then pass the file path to tesseract cli, if you want to improve the performance of you script try using library that works directly with tesseract api.
like this: https://pypi.org/project/tess-py-api/
I have three modules: GetInput, Main and Converter. In the GetInput file there are all the inputs values and excel data in the form of list. In the Converter file I am using those input values from Getinput file and in the main file I am connecting both these files here. I am doing this so that my code can look more organized.
GetInput.py:
import pandas as pd
import numpy as np
import time
def getInputs():
df = pd.read_excel('input.xlsx')
actual = df['actual'].values.tolist()
schedule = df['schedule'].values.tolist()
freq = df['frequency'].values.tolist()
ACP = df['acp'].values.tolist()
modelInput = {
'actual': actual, 'schedule': schedule, 'freq': freq, 'ACP': ACP,'df' : df
}
return modelInput
Converter.py
import pandas as pd
def fun(modelInput):
underdraw = []
overdraw = []
for i,j, in zip(schedule, actual):
dev = j - i
if dev < 0:
underdraw.append(dev)
else:
underdraw.append(0)
if dev > 0:
overdraw.append(dev)
else:
overdraw.append(0)
df['underdraw'] = pd.Series(underdraw)
df['overdraw'] = pd.Series(overdraw)
df.to_excel('mainfile.xlsx')
Main.py
import pandas as pd
import numpy as np
from convert import *
from GetInputs import *
def fun1():
inpu = getInputs()
con = fun(inpu)
fun1()
This whole program works when I run it in a single module but it throw errors when I try divide my code into separate modules. Basically it throw error in GetInput.py and in Converter.py (df is not defined) file. I know its a very basic thing but I don't know how to make it work. There is no desired output for this program, I am already getting an output when I run it in a single file. I just want to divide my code in this format as I mentioned above: GetIput File, Converter File and Main File.
Keep all the files in same directory or else mention the file paths at the top of main code using os module.
You have misspelled the following in the main code:
from convert import *
from GetInputs import *
It should be:
from Converter import *
from GetInput import *
I have tested this using the following:
MainModule.py
from Converter import *
from GetInputs import *
def fun1():
inpu = getInputs()
con = fun(inpu)
fun1()
Converter.py
import pandas as pd
def fun(modelInput):
print("HIE" + modelInput)
GetInputs.py
def getInputs():
return "modelInput"