I am using this library midi2img to generate midi from images
From this library, this is the file am using:
from PIL import Image
import numpy as np
from music21 import instrument, note, chord, stream
lowerBoundNote = 21
def column2notes(column):
notes = []
for i in range(len(column)):
if column[i] > 255/2:
notes.append(i+lowerBoundNote)
return notes
resolution = 0.25
def updateNotes(newNotes,prevNotes):
res = {}
for note in newNotes:
if note in prevNotes:
res[note] = prevNotes[note] + resolution
else:
res[note] = resolution
return res
def image2midi(image_path):
with Image.open(image_path) as image:
im_arr = np.fromstring(image.tobytes(), dtype=np.uint8)
try:
im_arr = im_arr.reshape((image.size[1], image.size[0]))
except:
im_arr = im_arr.reshape((image.size[1], image.size[0],3))
im_arr = np.dot(im_arr, [0.33, 0.33, 0.33])
""" convert the output from the prediction to notes and create a midi file
from the notes """
offset = 0
output_notes = []
# create note and chord objects based on the values generated by the model
prev_notes = updateNotes(im_arr.T[0,:],{})
for column in im_arr.T[1:,:]:
notes = column2notes(column)
# pattern is a chord
notes_in_chord = notes
old_notes = prev_notes.keys()
for old_note in old_notes:
if not old_note in notes_in_chord:
new_note = note.Note(old_note,quarterLength=prev_notes[old_note])
new_note.storedInstrument = instrument.Piano()
if offset - prev_notes[old_note] >= 0:
new_note.offset = offset - prev_notes[old_note]
output_notes.append(new_note)
elif offset == 0:
new_note.offset = offset
output_notes.append(new_note)
else:
print(offset,prev_notes[old_note],old_note)
prev_notes = updateNotes(notes_in_chord,prev_notes)
# increase offset each iteration so that notes do not stack
offset += resolution
for old_note in prev_notes.keys():
new_note = note.Note(old_note,quarterLength=prev_notes[old_note])
new_note.storedInstrument = instrument.Piano()
new_note.offset = offset - prev_notes[old_note]
output_notes.append(new_note)
prev_notes = updateNotes(notes_in_chord,prev_notes)
midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp=image_path.split("/")[-1].replace(".jpeg",".mid"))
import sys
image_path = sys.argv[1]
image2midi(image_path)
and this is the code I execute in terminal to geneate midi from image:
python img2midi.py samples/image.png
I need to make the above code to loop over all input images I put inside samples folder and generate midi for each, not just one file at a time.
Any help would be much appreciated.
You can do this by getting list of images from directory and iterate over them.
import sys
import os
sample_folder_path = sys.argv[1]
images = os.listdir(sample_folder_path) # getting all images stored in sample folder
images_path = [os.path.abspath(f"{sample_folder_path}/{image}") for image in images] # gets absolute path for all images
for image_path in images_path:
image2midi(image_path)
Usage:
python img2midi.py folder_path
Here folder_path is the path of folder that contains images. It can
Related
I am attempting to use the speech recognition toolkit VOSK and the speech diarization package Resemblyzer to transcibe audio and then identify the speakers in the audio.
Tools:
https://github.com/alphacep/vosk-api
https://github.com/resemble-ai/Resemblyzer
I can do both things individually but run into issues when trying to do them when running the one python script.
I used the following guide when setting up the diarization system:
https://medium.com/saarthi-ai/who-spoke-when-build-your-own-speaker-diarization-module-from-scratch-e7d725ee279
Computer specs are as follows:
Intel(R) Core(TM) i3-7100 CPU # 3.90GHz, 3912 Mhz, 2 Core(s), 4 Logical Processor(s)
32GB RAM
The following is my code, I am not to sure if using threading is appropriate or if I even implemented it correctly, how can I best optimize this code as to achieve the results I am looking for and not crash.
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment
import json
import sys
import os
import subprocess
import datetime
from resemblyzer import preprocess_wav, VoiceEncoder
from pathlib import Path
from resemblyzer.hparams import sampling_rate
from spectralcluster import SpectralClusterer
import threading
import queue
import gc
def recognition(queue, audio, FRAME_RATE):
model = Model("Vosk_Models/vosk-model-small-en-us-0.15")
rec = KaldiRecognizer(model, FRAME_RATE)
rec.SetWords(True)
rec.AcceptWaveform(audio.raw_data)
result = rec.Result()
transcript = json.loads(result)#["text"]
#return transcript
queue.put(transcript)
def diarization(queue, audio):
wav = preprocess_wav(audio)
encoder = VoiceEncoder("cpu")
_, cont_embeds, wav_splits = encoder.embed_utterance(wav, return_partials=True, rate=16)
print(cont_embeds.shape)
clusterer = SpectralClusterer(
min_clusters=2,
max_clusters=100,
p_percentile=0.90,
gaussian_blur_sigma=1)
labels = clusterer.predict(cont_embeds)
def create_labelling(labels, wav_splits):
times = [((s.start + s.stop) / 2) / sampling_rate for s in wav_splits]
labelling = []
start_time = 0
for i, time in enumerate(times):
if i > 0 and labels[i] != labels[i - 1]:
temp = [str(labels[i - 1]), start_time, time]
labelling.append(tuple(temp))
start_time = time
if i == len(times) - 1:
temp = [str(labels[i]), start_time, time]
labelling.append(tuple(temp))
return labelling
#return
labelling = create_labelling(labels, wav_splits)
queue.put(labelling)
def identify_speaker(queue1, queue2):
transcript = queue1.get()
labelling = queue2.get()
for speaker in labelling:
speakerID = speaker[0]
speakerStart = speaker[1]
speakerEnd = speaker[2]
result = transcript['result']
words = [r['word'] for r in result if speakerStart < r['start'] < speakerEnd]
#return
print("Speaker",speakerID,":",' '.join(words), "\n")
def main():
queue1 = queue.Queue()
queue2 = queue.Queue()
FRAME_RATE = 16000
CHANNELS = 1
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
podcast = podcast.set_channels(CHANNELS)
podcast = podcast.set_frame_rate(FRAME_RATE)
first_thread = threading.Thread(target=recognition, args=(queue1, podcast, FRAME_RATE))
second_thread = threading.Thread(target=diarization, args=(queue2, podcast))
third_thread = threading.Thread(target=identify_speaker, args=(queue1, queue2))
first_thread.start()
first_thread.join()
gc.collect()
second_thread.start()
second_thread.join()
gc.collect()
third_thread.start()
third_thread.join()
gc.collect()
# transcript = recognition(podcast,FRAME_RATE)
#
# labelling = diarization(podcast)
#
# print(identify_speaker(transcript, labelling))
if __name__ == '__main__':
main()
When I say crash I mean everything freezes, I have to hold down the power button on the desktop and turn it back on again. No blue/blank screen, just frozen in my IDE looking at my code. Any help in resolving this issue would be greatly appreciated.
Pydubs AudioSegment was not returning a suitable type for the Resembylzer function preprocess_wav.
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
preprocess_wav instead requires a Numpy Array / Path.
audio_file_path = 'Podcast_Audio/WAV-Film-Release-Clip.wav'
wav_fpath = Path(audio_file_path)
wav = preprocess_wav(wav_fpath)
Additionally preprocess_wav functionality can be achieved using Librosa if desired.
import librosa
def preprocess_wav(waveform, sr):
waveform = librosa.resample(waveform, orig_sr=sr, target_sr=16000)
waveform = waveform.astype(np.float32) / np.max(np.abs(waveform))
return waveform
waveform, sr = librosa.load('Podcast_Audio/WAV-Film-Release-Clip.wav')
wav = preprocess_wav(waveform, sr)
I wrote a code to remove the background of 8000 images but that whole code is taking approximately 8 hours to give the result.
How to improve its time complexity as I have to work on a large dataset in future?
Or do I have to write a whole new code? If it is, please suggest some sample codes.
from rembg import remove
import cv2
import glob
for img in glob.glob('../images/*.jpg'):
a = img.split('../images/')
a1 = a[1].split('.jpg')
try:
cv_img = cv2.imread(img)
output = remove(cv_img)
except:
continue
cv2.imwrite('../output image/' + str(a1[0]) + '.png', output)
One simple approach would be to divide the work into multiple threads. See ThreadPoolExecutor for more.
You can play around with max_workers= to see what get's the best results. Note that max-workers can be any number between 1 and 32.
This sample code is ready to run. It assumes the image files are in the same directory as your main.py and the output_image directory exits.
import cv2
import rembg
import sys
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
out_dir = Path("output_image")
in_dir = Path(".")
def is_image(absolute_path: Path):
return absolute_path.is_file and str(absolute_path).endswith('.png')
input_filenames = [p for p in filter(is_image, Path(in_dir).iterdir())]
def process_image(in_dir):
try:
image = cv2.imread(str(in_dir))
if image is None or not image.data:
raise cv2.error("read failed")
output = rembg.remove(image)
in_dir = out_dir / in_dir.with_suffix(".png").name
cv2.imwrite(str(in_dir), output)
except Exception as e:
print(f"{in_dir}: {e}", file=sys.stderr)
executor = ThreadPoolExecutor(max_workers=4)
for result in executor.map(process_image, input_filenames):
print(f"Processing image: {result}")
Check out the U^2Net repository. Like u2net_test.py, Writing your own remove function and using dataloaders can speed up the process. if it is not necessary skip the alpha matting else you can add the alpha matting code from rembg.
def main():
# --------- 1. get image path and name ---------
model_name='u2net'#u2netp
image_dir = os.path.join(os.getcwd(), 'test_data', 'test_images')
prediction_dir = os.path.join(os.getcwd(), 'test_data', model_name + '_results' + os.sep)
model_dir = os.path.join(os.getcwd(), 'saved_models', model_name, model_name + '.pth')
img_name_list = glob.glob(image_dir + os.sep + '*')
print(img_name_list)
#1. dataloader
test_salobj_dataset = SalObjDataset(img_name_list = img_name_list,
lbl_name_list = [],
transform=transforms.Compose([RescaleT(320),
ToTensorLab(flag=0)])
)
test_salobj_dataloader = DataLoader(test_salobj_dataset,
batch_size=1,
shuffle=False,
num_workers=1)
for i_test, data_test in enumerate(test_salobj_dataloader):
print("inferencing:",img_name_list[i_test].split(os.sep)[-1])
inputs_test = data_test['image']
inputs_test = inputs_test.type(torch.FloatTensor)
if torch.cuda.is_available():
inputs_test = Variable(inputs_test.cuda())
else:
inputs_test = Variable(inputs_test)
d1,d2,d3,d4,d5,d6,d7= net(inputs_test)
# normalization
pred = d1[:,0,:,:]
pred = normPRED(pred)
# save results to test_results folder
if not os.path.exists(prediction_dir):
os.makedirs(prediction_dir, exist_ok=True)
save_output(img_name_list[i_test],pred,prediction_dir)
del d1,d2,d3,d4,d5,d6,d7
Try to use parallelization with multiprocessing like Mark Setchell mentioned in his comment. I rewrote your code according to Method 8 from here. Multiprocessing should speed up your execution time. I did not test the code, try if it works.
import glob
from multiprocessing import Pool
import cv2
from rembg import remove
def remove_background(filename):
a = filename.split("../images/")
a1 = a[1].split(".jpg")
try:
cv_img = cv2.imread(filename)
output = remove(cv_img)
except:
continue
cv2.imwrite("../output image/" + str(a1[0]) + ".png", output)
files = glob.glob("../images/*.jpg")
pool = Pool(8)
results = pool.map(remove_background, files)
Ah, you used the example from https://github.com/danielgatis/rembg#usage-as-a-library as template for your code. Maybe try the other example with PIL image instead of OpenCV. The latter is mostly less fast, but who knows. Try it with maybe 10 images and compare execution time.
Here is your code using PIL instead of OpenCV. Not tested.
import glob
from PIL import Image
from rembg import remove
for img in glob.glob("../images/*.jpg"):
a = img.split("../images/")
a1 = a[1].split(".jpg")
try:
cv_img = Image.open(img)
output = remove(cv_img)
except:
continue
output.save("../output image/" + str(a1[0]) + ".png")
My goal is to add a text overlay to a series of clips at a given timestamp and then concatenate them all to make a single video. Currently, the video that is output as 'movie.mp4' only plays the first text overlay and none of the others.
I have looked at other posts (here) to try to recreate this but I have been unsuccessful.
In the comments for create_final_video(clips, texts, totalDuration) you can see what else I have tried in order to concatenate the clips. This method also requires removing clip = CompositeVideoClip([clip, text]).set_duration(float(clip_data['clipDuration'])). This second version concatenates the videos but the text overlay doesn't have its position set to the bottom left but to the top right instead, and the text overlays play back to back after each other rather than at the end of each clip.
Below is the first version:
import os
import json
from moviepy.editor import VideoFileClip, CompositeVideoClip, concatenate_videoclips, vfx, TextClip
# reads clip valid clip names from file
def read_valid_clips_list():
# gets clip data
def get_clip_data(filename):
def create_clips(clip_list):
clips = []
texts = []
currentDuration = 0
for filename in clip_list:
filename = filename.strip()
clip_data = get_clip_data(filename)
video_file_path = os.path.join(PATH_TO_RAW_CLIPS, filename)
# create video clip
clip = VideoFileClip(video_file_path)
clip = clip.set_duration(float(clip_data['clipDuration']))
clip = clip.fx(vfx.fadein, .1).fx(vfx.fadeout, .15)
# create text overlay for clip
text = create_text_overlay(clip_data, currentDuration)
# combine clip and text before concatenation
clip = CompositeVideoClip([clip, text]).set_duration(float(clip_data['clipDuration']))
currentDuration += float(clip_data['clipDuration'])
texts.append(text)
clips.append(clip)
return clips, texts, currentDuration
def create_text_overlay(clip_data, currentDuration):
streamerName = str(clip_data.get('streamerName'))
text_clip = TextClip(txt = streamerName, font = FONT_PATH, size = (400,0), color = 'rgb(145, 70, 255)')
tc_width, tc_height = text_clip.size
print(currentDuration)
text_clip = text_clip.set_start(currentDuration)
text_clip = text_clip.set_position(('left', 'bottom'))
text_clip = text_clip.set_duration(2.5)
text_clip = text_clip.crossfadein(0.2).crossfadeout(0.5)
return text_clip
def create_final_video(clips, texts, totalDuration):
vid_clips = concatenate_videoclips(clips, method='compose').set_duration(totalDuration)
# print(type(vid_clips))
# text_clips = concatenate_videoclips(texts).set_duration(totalDuration)
# print(type(text_clips))
# final_movie = CompositeVideoClip([vid_clips, text_clips], size=(1920,1080)).set_duration(totalDuration)
return vid_clips
def create_movie():
valid_list = read_valid_clips_list()
clips, texts, totalDuration = create_clips(valid_list)
movie = create_final_video(clips, texts, totalDuration)
return movie
movie = create_movie()
movie.write_videofile('VideoCompilation\VideoFiles\\videos\movie.mp4')
The solution was to use text_clip = text_clip.set_start(0)
In a SLURM cluster I am submitting a shell script that calls a python script (both scripts can be found below. When the shell script executes it get until where the python script is called but then nothing happens: there is no output, no error message and the SLURM job keeps running.
I assume the entire contents of the python script are not relevant (but I included it anyway for completion). For debugging purposes I inserted the print("script started") line at the very beginning to see if it gets run but it doesn't. The last thing I see in the output is moved to directory.
I tried calling a test.py script containing print("test")right before this and it gets executed normally.
What could be the reason the python script doesn't start and how can I fix it?
Edit: As user jakub recommended changing print("script started")to print("script started", flush=True)successfully gets printed. Including several more of these statements revealed that the script was actually running perfectly fine, it just didn't output anything. Including the same statement within the for loop that gets constantly executed also makes all print() statements previously missing get printed.
The question then turns into: why do the print() statements here need to have flush=True in this script but not in other scripts?
Shell script:
#!/bin/bash
#SBATCH --mail-user=lukas.baehler#pathology.unibe.ch
#SBATCH --mail-type=end,fail
#SBATCH --output=out-ncl
#SBATCH --error=err-ncl
#SBATCH --job-name="Mask RCNN nucleus training and detection"
#SBATCH --time=24:00:00
#SBATCH --partition=gpu
#SBATCH --mem-per-cpu=64G
#SBATCH --gres=gpu:gtx1080ti:1
#SBATCH --constraint=rtx2080
conda init bash
source ~/.bashrc
conda activate nucl
cd MRCNN/samples/nucleus
echo "moved to directory"
python nucleus-pipeline2.py splitTMA
echo "Split TMAs"
Python script:
print("script started")
if __name__ == '__main__':
import argparse
import os
# Copied from later in script because the argparse part was moved up and is
# needed as default in --logs.
ROOT_DIR = os.path.abspath("../../")
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
# Parse command line arguments
parser = argparse.ArgumentParser(
description='Mask R-CNN for nuclei counting and segmentation')
parser.add_argument("command",
metavar="<command>",
help="'splitTMA', 'splitSpot', 'structure', 'train' or 'detect'")
parser.add_argument('--dataset', required=False,
metavar="/path/to/dataset/",
help='Root directory of the dataset')
parser.add_argument('--weights', required=False,
metavar="/path/to/weights.h5",
help="Path to weights .h5 file or 'coco'")
parser.add_argument('--logs', required=False,
default=DEFAULT_LOGS_DIR,
metavar="/path/to/logs/",
help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--subset', required=False,
metavar="Dataset sub-directory",
help="Subset of dataset to run prediction on")
# Own arguments
parser.add_argument("--input", required=False,
metavar="path/to/input/folder",
help="Optionally specify the input directory. Should only be used with splitTMA, splitSpot and structure.")
parser.add_argument("--output", required=False,
metavar="path/to/output/folder",
help="Optionally specify the output directory. Should only be used with splitTMA, splitSpot and structure.")
args = parser.parse_args()
assert args.command in ["train", "detect", "splitTMA", "splitSpot", "structure"], "Must set command."
################################################################################
# splitTMA
################################################################################
# The original script for this is tma-spot.py
# Splits a TMA into images of its spots.
if args.command == "splitTMA":
import os
import cv2
import numpy as np
from openslide import open_slide
from matplotlib import pyplot as plt
###################
# CONFIGURATION
# Defines the level of resolution for spot recognition
level = 7 # Default 7
# Defines the level of resolution to use for the new images
newLevel = 0 # Default 0 (higest resolution)
# Defines the spot size in pixels (has to be changed if newLevel is changed)
SpotSize = 3072 # Default 3500
# # Shift values are for alignment of the two slides.
# shiftX = 445 - 10
# shiftY = -64 + 10
print("Using the following parameters:\nlevel = {}\nnewLevel = {}\nSpotSize = {}".format(level, newLevel, SpotSize))
###################
# NUCLEUS_DIR = "MRCNN/samples/nucleus"
NUCLEUS_DIR = os.path.abspath("")
os.chdir(NUCLEUS_DIR)
if args.input:
INPUT_DIR = args.input
else:
INPUT_DIR = "slides"
print("Using '{}' as input folder.".format(INPUT_DIR))
if args.output:
OUTPUT_DIR = args.output
else:
OUTPUT_DIR = "spots"
print("Using '{}' as output folder.".format(OUTPUT_DIR))
# mrxs_filenames = [filename for filename in os.listdir("slides") if filename[-5:] == ".mrxs"]
mrxs_filenames = [filename for filename in os.listdir(INPUT_DIR) if filename[-5:] == ".mrxs"]
print("\nFound {} MIRAX files.".format(len(mrxs_filenames)))
# Loop through all .mrxs files.
for filename in mrxs_filenames:
print("\nReading {}\n".format(filename))
# filename = mrxs_filenames[0]
img = open_slide("{}/{}".format(INPUT_DIR, filename))
# # Use if you want to to see the resolution of all the levels.
# for i in range(img.level_count):
# print("Level", i, "dimension", img.level_dimensions[i],"down factor",img.level_downsamples[i])
# Use the level set previously and read the slide as an RGB image.
x_img = img.read_region((0,0), level, img.level_dimensions[level])
x_img = np.array(x_img)
rgb = np.zeros_like(x_img)
rgb[x_img==0] = 255
rgba_im = cv2.add(rgb,x_img)
imgLevel = cv2.cvtColor(rgba_im,cv2.COLOR_RGBA2RGB)
# plt.imsave("./Output/level" + str(level) + ".png", imgLevel) # <---------- USE FOR DEBUGGING
# Converts the image to gray levels and applies a gussian blur.
gray = cv2.cvtColor(imgLevel, cv2.COLOR_BGR2GRAY)
gray_blur = cv2.GaussianBlur(gray, (3, 3), 0)
# cv2.imwrite( "./Output/gray.png", gray_blur) # <-------------------------- USE FOR DEBUGGING
# Use an Otsu binarization to generate a mask for where tissue is.
ret3, thresh = cv2.threshold(gray_blur, 8, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
thresh = ~thresh
cont_img = thresh.copy()
# cv2.imwrite( "spots/cd3/contour.png", cont_img) # <------------------------ USE FOR DEBUGGING
# Finds the contours of the mask generated.
contours, hierarchy = cv2.findContours(cont_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Loop through all contours
spot_nr = 0
for cnt in contours:
# Decide based on the area of the contour if it is a spot
area = cv2.contourArea(cnt)
spotInfo = []
x, y, w, h = cv2.boundingRect(cnt)
if area < 100 or area > 2000:
spotInfo.append([-1, x, y, w, h])
continue
if len(cnt) < 5:
spotInfo.append([-1, x, y, w, h])
continue
# Calculate the center of the spot
centerX = x + int(w/2)
centerY = y + int(h/2)
# Calculate how much it needs to be scaled
factorOld = img.level_downsamples[level]
factorNew = img.level_downsamples[newLevel]
# Read the spot region
spot = img.read_region((int(centerX * factorOld)-int(SpotSize/2),
int(centerY * factorOld)-int(SpotSize/2)),
newLevel, (SpotSize, SpotSize))
spot = cv2.cvtColor(np.array(spot), cv2.COLOR_RGBA2RGB)
# Create directory and save the image
if not os.path.isdir("{}/{}".format(OUTPUT_DIR, filename[:-5])):
os.makedirs("{}/{}".format(OUTPUT_DIR, filename[:-5]))
spot_name = "{0}/{1}/{1}-{2}.png".format(OUTPUT_DIR, filename[:-5],str(spot_nr).zfill(3))
plt.imsave(spot_name, spot)
spot_nr += 1
print("Spot {} saved - Center X and Y: {}, {}".format(spot_nr, centerX, centerY))
exit()
################################################################################
# splitSpot
################################################################################
# This is copied from spot-annotation.py
# Splits spots into tiles
if args.command == "splitSpot":
import os
import sys
import argparse
import re
import numpy as np
import cv2
from matplotlib import pyplot as plt
# VARIABLES
# Change the resolution of the tiles here. Note the image resolution
# must be an integer multiple of the tile resolutions (both dimensions).
tile_resolution = [768, 768]
# NUCLEUS_DIR = "MRCNN/samples/nucleus"
NUCLEUS_DIR = os.path.abspath("")
os.chdir(NUCLEUS_DIR)
if args.input:
INPUT_DIR = args.input
else:
INPUT_DIR = "spots"
print("\nUsing '{}' as input folder.".format(INPUT_DIR))
if args.output:
OUTPUT_DIR = args.output
else:
OUTPUT_DIR = "tiles"
print("Using '{}' as output folder.".format(OUTPUT_DIR))
# EXECUTION
TMA_folders = os.listdir(INPUT_DIR)
spot_names = []
spot_count = 0
for name in TMA_folders:
spot_names.append(os.listdir("{}/{}".format(INPUT_DIR, name)))
spot_count += len(spot_names[-1])
print("\nFound {} TMA folders with a total of {} spot images.".format(len(TMA_folders), spot_count))
for a, TMA in enumerate(TMA_folders):
for b, spot in enumerate(spot_names[a]):
print("TMA: {}/{} - Spot: {}/{}".format(a+1, len(TMA_folders), b+1, len(spot_names[a])), end="\r")
# Read the image
img = cv2.imread("{}/{}/{}".format(INPUT_DIR,TMA, spot))
# Calculate how many tiles will be produced
tilesX = img.shape[0]/tile_resolution[0]
tilesY = img.shape[1]/tile_resolution[1]
assert (tilesX == int(tilesX) and tilesY == int(tilesY)), "Image resolution is not an integer multiple of the tile resolution."
tilesX, tilesY = int(tilesX), int(tilesY)
# Create the np array that will hold the tiles
tiles = np.zeros([tilesY,tilesX,tile_resolution[0],tile_resolution[1],3])
# Loop through all tiles and store them in tiles
for i in range(tilesX):
for j in range(tilesY):
tiles[j,i] = img[i*tile_resolution[0]:(i+1)*tile_resolution[0],
j*tile_resolution[1]:(j+1)*tile_resolution[1]]
tiles = tiles.astype("uint8")
# print("\nImage was split into {} tiles.".format(tiles.shape[0]*tiles.shape[1]))
# Save all the tiles
for x in range(tiles.shape[0]):
for y in range(tiles.shape[1]):
# Displays progression
# print("Saving {}/{} images...".format(str(x*tiles.shape[0]+y+1),tiles.shape[0]*tiles.shape[1]), end="\r")
# Using the plt.imsave() gives alterations in color which is
# presumably bad. Using cv2.imwrite() is also ca. 10 times faster.
imdir = "{}/{}/{}".format(OUTPUT_DIR, TMA, spot[:-4])
imname = "{}-{}-{}.png".format(spot[:-4], str(x).zfill(2), str(y).zfill(2))
if not os.path.isdir(imdir):
os.makedirs(imdir)
cv2.imwrite("{}/{}".format(imdir, imname), tiles[x,y])
print("\nSaved images in {} as [spot_name]-x-y.png.".format(OUTPUT_DIR))
exit()
################################################################################
# Prepare Data Structure
################################################################################
# Adapted from prepare-data-structure.py
# Creates the data structure required for the network
if args.command == "structure":
import os
from shutil import copyfile
NUCLEUS_DIR = os.path.abspath("")
os.chdir(NUCLEUS_DIR)
# Setting input and output directories
if args.input:
INPUT_DIR = args.input
else:
INPUT_DIR = "tiles"
print("\nUsing '{}' as input folder.".format(INPUT_DIR))
if args.output:
OUTPUT_DIR = args.output
else:
OUTPUT_DIR = "data"
print("Using '{}' as output folder.".format(OUTPUT_DIR))
# Creates a list with the paths of all tiles. Also stores just the
# filename itself with and without file extension
file_names = []
for path,_,files in os.walk(INPUT_DIR):
for f in files:
file_names.append(["{}/{}".format(path, f), f, f[:-4]])
print("\nFound {} images.".format(len(file_names)))
assert file_names != [], "No images found in input folder."
# The dataset needs to be stored inside another folder (default "own_data")
subset = "own_data"
# For each file creates the appropriate sub-folders and copies the file.
skip_count = 0
for i,info in enumerate(file_names):
print("Saving {}/{} images.".format(i+1, len(file_names)), end="\r")
dirname = "{}/{}/{}/images".format(OUTPUT_DIR, subset, info[2])
try:
os.makedirs(dirname)
except:
skip_count += 1
continue
copyfile(info[0], "{}/{}".format(dirname, info[1]))
print("\n\nSaved dataset in {}/{}".format(OUTPUT_DIR, subset))
if skip_count > 0:
print("Skipped {} files because they already existed.".format(skip_count))
print("")
exit()
Python buffers stdin, stdout, and stderr by default. print() writes to stdout by default, so you will see this buffered behavior.
From https://stackoverflow.com/a/14258511/5666087 :
Python opens the stdin, -out and -error streams in a buffered mode; it'll read or write in larger chunks, keeping data in memory until a threshold is reached.
You can forcibly flush this buffer by passing flush=True to print. See the documentation for more information. If you have multiple print statements in a row, you need only use flush=True in the last one.
This code is for creating tfrecords which is tensorflows standard input format for keeping audios and labels taken from video samples.This file is given as input for training in neural network.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import menpo
import tensorflow as tf
import numpy as np
import os
from io import BytesIO
from pathlib import Path
from moviepy.editor import VideoFileClip
from menpo.visualize import progress_bar_str, print_progress
from moviepy.audio.AudioClip import AudioArrayClip
root_dir = Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/RECOLA') #Where RECOLA is located
portion_to_id = dict(
train = [1], # 25
valid = [70, 71],
test = [80, 81] # 54, 53
) #samples taken
def get_samples(subject_id): #location of arousal and valence files and appropriate video sample
arousal_label_path = root_dir / 'ratings_individual/arousal/{}.csv'.format(subject_id)
valence_label_path = root_dir / 'ratings_individual/valence/{}.csv'.format(subject_id)
clip = VideoFileClip(str(root_dir /"Video_recordings_MP4/{}.mp4".format(subject_id)))
subsampled_audio = clip.audio.set_fps(16000)
audio_frames = []
for i in range(1, 7501): #extract audio sample
try:
time = 0.04 * i
audio = np.array(list(subsampled_audio.subclip(time - 0.04, time).iter_frames()))
audio = audio.mean(1)[:640]
audio_frames.append(audio.astype(np.float32))
except ValueError:
print('Not float')
quit()
try:
arousal = np.loadtxt(str(arousal_label_path), delimiter=',')[:+1][1:]
valence = np.loadtxt(str(valence_label_path), delimiter=',')[:+1][1:]
return audio_frames, np.dstack([arousal, valence])[0].astype(np.float32) #return audio frames
except ValueError:
print('problem')
def get_jpg_string(im):
# Gets the serialized jpg from a menpo `Image`.
fp = BytesIO()
menpo.io.export_image(im, fp, extension='jpg')
fp.seek(0)
return fp.read()
def _int_feauture(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feauture(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def serialize_sample(writer, subject_id):
subject_name = 'P{}'.format(subject_id)
print(subject_name)
print(get_samples)
#repeat for each sample
for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):
example = tf.train.Example(features=tf.train.Features(feature={
'sample_id': _int_feauture(i),
'subject_id': _int_feauture(subject_id),
'label': _bytes_feauture(label.tobytes()),
'raw_audio': _bytes_feauture(audio.tobytes()),
})) #extract sample_id,subject_id,label and raw_audio
writer.write(example.SerializeToString())
del audio, label
def main(directory):
print('In Main')
for portion in portion_to_id.keys():
print(portion)
for subj_id in print_progress(portion_to_id[portion]):
temp = (directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
).as_posix() #display sample
print(temp)
writer = tf.python_io.TFRecordWriter(
(directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
).as_posix()) #write to tfrecords
serialize_sample(writer, subj_id)
if __name__ == "__main__":
print("Calling Main")
main(Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/records')) #save tfrecord
This code raises an error and terminates.I have given all paths to locate input video.
Error
for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):
TypeError: zip() argument after * must be an iterable, not NoneType
Why do I get this error?
do you have following video/audio files in your test, train and valid folders:
train = P1.mp4
valid = P70.mp4 , P71.mp4
test = P80.mp4 , P81.mp4 ??
because the code: zip(*get_samples(subject_name)) seems to unable to fetch the data: Nonetype!