RuntimeError ,Error opening ,System error ,when reading wav file - python

At first,I am a python beginner. I need to read my clean.wav(cut to 50 slices to haha0~haha49 .wav) and noise.wav ,doing a snr [5,10,15]. Then output the before and after's waveform and Spectrogram. But,teriminal always tell me RuntimeError: Error opening 'D:\python\pythonnoisy\add_white\haha0_white_snr5.wav': System error. However, I can see this file in my folder.
import os
import re
import sys
import wave
import librosa
import matplotlib
import numpy as np
import pylab as pl
import soundfile as sf
import matplotlib.pyplot as plt
from scipy.fftpack import fft
wavedir = r"D:\python"
noisydir = wavedir+"\\pythonnoisy"
noisedir = wavedir+"\\pythonnoise"
cleandir = wavedir+"\\pythonclean"
def add_noise(noisydir, noisedir, cleandir, snr): # noisy
noisewav = "white.wav"
noise, fs = sf.read(noisedir+"\\"+noisewav) # 讀取白雜訊.wav
noisy_splitdir = noisydir+"\\"+"add_"+noisewav[:-4]+"\\"
# 迴圈取原始wav檔資料夾裡所有檔案
for cleanwav in os.listdir(cleandir):
clean, Fs = sf.read(cleandir+"\\"+cleanwav) # 讀取原始.wav檔
# 取樣頻率:原始音檔==白雜訊&&時長:原始音檔<白雜訊
if fs == Fs and len(clean) <= len(noise):
cleanenergy = np.sum(np.power(clean, 2)) # 原始音檔Power(=振幅^2)
# 1<隨機生成長度<noise長-clean長+1
ind = np.random.randint(1, len(noise) - len(clean) + 1)
noiselen = noise[ind:len(clean) + ind]
noiseenergy = np.sum(np.power(noiselen, 2)) # 白雜訊Power
ratio2 = np.sqrt(
(cleanenergy / noiseenergy) / (np.power(10, snr * 0.1)))
noisyAudio = clean + noiselen * ratio2 # 混音振幅
# 混音路徑+檔名
noisywavname = noisy_splitdir + \
cleanwav[:-4]+"_"+noisewav[:-4]+"_snr"+str(snr)+".wav"
sf.write(noisywavname, noisyAudio, 44100) # 生成混音.wav
def draw_clean(cleandir, j):
f = wave.open(
r"D:\python\pythonclean\haha" + str(j) + ".wav", "rb")
params = f.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]
str_data = f.readframes(nframes)
f.close()
wave_data = np.fromstring(str_data, dtype=np.short)
wave_data.shape = -1, 2
wave_data = wave_data.T
time = np.arange(0, nframes) * (1.0 / framerate)
plt.subplot(4, 2, 7)
plt.plot(time, wave_data[0])
plt.xlabel("time(s)")
plt.subplot(4, 2, 8)
plt.specgram(wave_data[0], Fs=framerate)
plt.xlabel("time(s)")
def draw_noisy(noisydir, j):
noisydirr = noisydir+"\\add_white\\haha"
for k in range(5, 20, 5):
f = wave.open(r"D:\python\pythonnoisy\add_white\haha" +
str(j)+"_white_snr"+str(k)+".wav", "rb")
params = f.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]
str_data = f.readframes(nframes)
wave_data = np.fromstring(str_data, dtype=np.short)
wave_data.shape = -1, 2
wave_data = wave_data.T
time = np.arange(0, nframes) * (1.0 / framerate)
plt.subplot(4, 2, k/5*2-1)
plt.plot(time, wave_data[0])
plt.subplot(4, 2, k/5*2)
plt.specgram(wave_data[0], Fs=framerate)
wavedir = r"D:\python"
noisydir = wavedir+"\\pythonnoisy"
noisedir = wavedir+"\\pythonnoise"
cleandir = wavedir+"\\pythonclean"
level = [5, 10, 15]
for snr in level:
add_noise(noisydir, noisedir, cleandir, snr)
for j in range(0, 51):
draw_clean(cleandir, j)
draw_noisy(noisydir, j)
picture = noisydir+"\picture\hahawhite"+str(j)+".png"
plt.savefig(picture)
plt.close()
file location
terminal

Related

Small unpacking buffer for wave.struct

I am building a SSTV programm, that can convert 180x136 picture into the sound with the RGB component. But when I try to read data from microphone, it gives me
struct.error: unpack requires a buffer of 400 bytes
when I use only 200 bytes.
I expected the output as decoded microphone out (in short: microphone -> decode signal -> image).
I tried to use chunk*2 but it gave me same error:
struct.error: unpack requires a buffer of 800 bytes
Here is the code for decoder:
import numpy as np
import pyaudio
import wave
from tkinter import *
root = Tk()
c = Canvas(root, width=90*8, height=68*8, bg='white')
c.pack()
def pix(x, y, rr, rg, rb): # r + (r/g/b) is raw rgb because we are decoding frequency
if rr < 0:
r = 0
if rg < 0:
g = 0
if rb < 0:
b = 0
try:
r = min(255,round(rr * (255 / 1124)))
except:
r = 0
try:
g = min(255,round(rg * (255 / 1124)))
except:
g = 0
try:
b = min(255,round(rb * (255 / 1124)))
except:
b = 0
if r < 0:
r = 0
if g < 0:
g = 0
if b < 0:
b = 0
print(r,g,b)
n = hex(r).replace("0x","")
rh = (2 - len(n)) * "0" + n
n = hex(g).replace("0x", "")
gh = (2 - len(n)) * "0" + n
n = hex(b).replace("0x", "")
bh = (2 - len(n)) * "0" + n
c.create_line(x, y, x + 4, y, fill=f"#{rh}{gh}{bh}")
c.create_line(x, y+1, x + 4, y+1, fill=f"#{rh}{gh}{bh}")
c.create_line(x, y+2, x + 4, y+2, fill=f"#{rh}{gh}{bh}")
c.create_line(x, y+3, x + 4, y+3, fill=f"#{rh}{gh}{bh}")
chunk = 100
# open up a wave
wf = wave.open('input.wav', 'rb')
swidth = wf.getsampwidth()
RATE = wf.getframerate()
# use a Blackman window
window = np.blackman(chunk)
# open stream
p = pyaudio.PyAudio()
stream = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = RATE,
output = True)
microphone = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = RATE,
input = True)
# read some data
data = wf.readframes(chunk)
#print(len(data))
#print(chunk*swidth)
pc = 0
x = 0
e = [255,252]
totaly = 0
rrgb = [255, 255, 255] # raw rgb
# play stream and find the frequency of each chunk
while True:
data = microphone.read(chunk)
# write data out to the audio stream
stream.write(data)
indata = np.array(wave.struct.unpack("%dh"%((len(data))/swidth),data))*window#"%dh"%((len(data)-1)/swidth)
# Take the fft and square each value
fftData=abs(np.fft.rfft(indata))**2
# find the maximum
which = fftData[1:].argmax() + 1
# use quadratic interpolation around the max
if which != len(fftData)-1:
y0,y1,y2 = np.log(fftData[which-1:which+2:])
x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0)
# find the frequency and output it
thefreq = (which+x1)*RATE/chunk
if thefreq>3030 or totaly==540:
#print("PING!",f"The freq is %f Hz. {pc}" % (thefreq))
if pc==0:
totaly = x
pc+=4
x=0
root.update()
else:
#print(f"The freq is %f Hz. min {min(e)} max {max(e)}, {x//4}" % (thefreq), end=" ")
if x//4 % 3 == 0:
pix(x / 3, pc, rrgb[0], rrgb[1], rrgb[2]) # /0.77039274924
rrgb[0] = thefreq
#print("R")
elif x//4 % 3 == 1:
rrgb[1] = thefreq
#print("G")
elif x//4 % 3 == 2:
rrgb[2] = thefreq
#print("B")
#e.append(thefreq)
#pix()
x+=4#print("The freq is %f Hz." % (thefreq))
else:
thefreq = which*RATE/chunk
#print("The freq is %f Hz." % (thefreq))
# read some more data
data = microphone.read(chunk)
if data:
stream.write(data)
stream.close()
p.terminate()
root.mainloop()
Here is the code for encoder:
import numpy as np
# Importing Image from PIL package
from PIL import Image
# creating a image object
im = Image.open(r"original.bmp")
px = im.load()
arx = []
art = []
pix = ()
pixels = list(im.getdata())
width, height = im.size
pixels = [pixels[i * width:(i + 1) * width] for i in range(height)]
print(pixels)
for ypos in range(0,136):
for xpos in range(0,180):
pix = pixels[ypos][xpos]
#gray = ((pix[0])+(pix[1])+(pix[2]))/3
rgb = (pix[0],pix[1],pix[2])
arx.append(rgb)
arx.append(406)
art.append(arx)
rate = 44100 # samples per second
T = 3 # sample duration (seconds)
slowdown = int(input("Speed in Hz:"))
f = 100.0/slowdown # sound frequency (Hz)
print("speed multiplier is",slowdown)
encoding = "ansi"
total = []
xpos2 = 0
c = 0
for ypos in art:
for xpos in ypos:
#print(xpos)
t = np.linspace(0, 0.01, (slowdown), endpoint=False)
if xpos==406:
sig = np.sin(2 * np.pi * f * 500 * t)
total.extend(sig)
sig = np.sin(2 * np.pi * f * 700 * t)
total.extend(sig)
continue
sig = np.sin(2 * np.pi * f * xpos[0] * t)
total.extend(sig)
sig = np.sin(2 * np.pi * f * xpos[1] * t)
total.extend(sig)
sig = np.sin(2 * np.pi * f * xpos[2] * t)
total.extend(sig)
print(xpos)
sig = np.sin(2 * np.pi * f * 300 * t)
break
import wavio
wavio.write("input.wav", total, rate, sampwidth=2)
input("done, saved as \'input.wav\'")

Add alpha channel to an .exr file in python

I'm trying to add an alpha channel to an .exr file(RGB) using OpenEXR python library or OpenCV, couldn't figure out how. Here's where I am.
import OpenEXR,Imath,array,os
import numpy as np
def write_exr_alpha(dir,output_dir):
z_file = OpenEXR.InputFile(dir)
FLOAT = Imath.PixelType(Imath.PixelType.FLOAT)
(z,_,_) = [array.array('f', z_file.channel(Chan, FLOAT)).tolist() for Chan in ("R", "G", "B") ]
dw = z_file.header()['dataWindow']
sz = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1)
os.makedirs(os.path.dirname(output_dir), exist_ok=True)
#get alpha values
As = np.ones(np.array(z).shape)
pos = np.where(np.array(z) > 100)[0]
As[pos] = 0
zs = array.array('f',z).tobytes()
out = OpenEXR.OutputFile(output_dir, OpenEXR.Header(sz[0], sz[1]))
# write to .exr with alpha channel
out.writePixels({'R' : zs, 'G' : zs, 'B' : zs ,'A': As})
and OpenCV
import os,cv2
import numpy as np
def write_exr_alpha(dir,output_dir):
os.makedirs(os.path.dirname(output_dir), exist_ok=True)
image=cv2.imread(dir,cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
normal_b, normal_g, normal_r = cv2.split(image)
As = np.ones(normal_b.shape, dtype=normal_b.dtype)
pos = np.where(np.array(image[:,:,0]) > 100)
for i in range(0,len(pos)):
As[pos[0][i]][pos[1][i]]=0
image_out = cv2.merge((normal_r, normal_g, normal_b, As))
cv2.imwrite(output_dir,image_out)
Edit: A test sample.

Deploy model to the web with flask

I am trying to deploy the low light image enhancement to the web using Flask. The main thing that I want to do is, run a webpage on localhost, and when I upload a low light image the model should enhance the image and show the result on same webpage.
Here is my test.py codes:
from glob import glob
import numpy as np
import scipy
import keras
import os
import Network
import utls
import time
import cv2
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input", "-i", type=str, default='../input', help='test image folder')
parser.add_argument("--result", "-r", type=str, default='../result', help='result folder')
parser.add_argument("--model", "-m", type=str, default='Syn_img_lowlight_withnoise', help='model name')
parser.add_argument("--com", "-c", type=int, default=1, help='output with/without origional image and mid result')
parser.add_argument("--highpercent", "-hp", type=int, default=95, help='should be in [85,100], linear amplification')
parser.add_argument("--lowpercent", "-lp", type=int, default=5, help='should be in [0,15], rescale the range [p%,1] to [0, 1]')
parser.add_argument("--gamma", "-g", type=int, default=8, help='should be in [6,10], increase the saturability')
parser.add_argument("--maxrange", "-mr", type=int, default=8, help='linear amplification range')
arg = parser.parse_args()
result_folder = arg.result
if not os.path.isdir(result_folder):
os.makedirs(result_folder)
input_folder = arg.input
path = glob(input_folder+'/*.*')
model_name = arg.model
mbllen = Network.build_mbllen((None, None, 3))
mbllen.load_weights('../models/'+model_name+'.h5')
opt = keras.optimizers.Adam(lr=2 * 1e-04, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
mbllen.compile(loss='mse', optimizer=opt)
flag = arg.com
lowpercent = arg.lowpercent
highpercent = arg.highpercent
maxrange = arg.maxrange/10.
hsvgamma = arg.gamma/10.
for i in range(len(path)):
img_A_path = path[i]
img_A = utls.imread_color(img_A_path)
img_A = img_A[np.newaxis, :]
starttime = time.process_time()
out_pred = mbllen.predict(img_A)
endtime = time.process_time()
print('The ' + str(i+1)+'th image\'s Time:' +str(endtime-starttime)+'s.')
fake_B = out_pred[0, :, :, :3]
fake_B_o = fake_B
gray_fake_B = fake_B[:, :, 0] * 0.299 + fake_B[:, :, 1] * 0.587 + fake_B[:, :, 1] * 0.114
percent_max = sum(sum(gray_fake_B >= maxrange))/sum(sum(gray_fake_B <= 1.0))
# print(percent_max)
max_value = np.percentile(gray_fake_B[:], highpercent)
if percent_max < (100-highpercent)/100.:
scale = maxrange / max_value
fake_B = fake_B * scale
fake_B = np.minimum(fake_B, 1.0)
gray_fake_B = fake_B[:,:,0]*0.299 + fake_B[:,:,1]*0.587 + fake_B[:,:,1]*0.114
sub_value = np.percentile(gray_fake_B[:], lowpercent)
fake_B = (fake_B - sub_value)*(1./(1-sub_value))
imgHSV = cv2.cvtColor(fake_B, cv2.COLOR_RGB2HSV)
H, S, V = cv2.split(imgHSV)
S = np.power(S, hsvgamma)
imgHSV = cv2.merge([H, S, V])
fake_B = cv2.cvtColor(imgHSV, cv2.COLOR_HSV2RGB)
fake_B = np.minimum(fake_B, 1.0)
if flag:
outputs = np.concatenate([img_A[0,:,:,:], fake_B_o, fake_B], axis=1)
else:
outputs = fake_B
filename = os.path.basename(path[i])
img_name = result_folder+'/' + filename
# scipy.misc.toimage(outputs * 255, high=255, low=0, cmin=0, cmax=255).save(img_name)
outputs = np.minimum(outputs, 1.0)
outputs = np.maximum(outputs, 0.0)
utls.imwrite(img_name, outputs)
The data_load.py codes:
from glob import glob
import numpy as np
import random
import scipy
import os
import cv2 as cv
class Dataloader():
def __init__(self, dataset_name, crop_shape=(256, 256)):
self.dataset_name = dataset_name
self.crop_shape = crop_shape
def imread_color(self, path):
img = cv.imread(path, cv.IMREAD_COLOR | cv.IMREAD_ANYDEPTH)/255.
b, g, r = cv.split(img)
img_rgb = cv.merge([r, g, b])
return img_rgb
def imwrite(self, path, img):
r, g, b = cv.split(img)
img_rgb = cv.merge([b, g, r])
cv.imwrite(path, img_rgb)
def load_data(self, batch_size=16):
path = glob('../dataset/train/*.jpg')
self.n_batches = int(len(path) / batch_size)
while 1:
random.shuffle(path)
for i in range(self.n_batches - 1):
batch_path = path[i * batch_size:(i + 1) * batch_size]
input_imgs = np.empty((batch_size, self.crop_shape[0], self.crop_shape[1], 6), dtype="float32")
gt = np.empty((batch_size, self.crop_shape[0], self.crop_shape[1], 3), dtype="float32")
number = 0
for img_B_path in batch_path:
img_B = self.imread_color(img_B_path)
path_mid = os.path.split(img_B_path)
path_A_1 = path_mid[0] + '_' + self.dataset_name
path_A = os.path.join(path_A_1, path_mid[1])
img_A = self.imread_color(path_A)
nw = random.randint(0, img_B.shape[0] - self.crop_shape[0])
nh = random.randint(0, img_B.shape[1] - self.crop_shape[1])
crop_img_A = img_A[nw:nw + self.crop_shape[0], nh:nh + self.crop_shape[1], :]
crop_img_B = img_B[nw:nw + self.crop_shape[0], nh:nh + self.crop_shape[1], :]
if np.random.randint(2, size=1)[0] == 1: # random flip
crop_img_A = np.flipud(crop_img_A)
crop_img_B = np.flipud(crop_img_B)
if np.random.randint(2, size=1)[0] == 1:
crop_img_A = np.fliplr(crop_img_A)
crop_img_B = np.fliplr(crop_img_B)
if np.random.randint(2, size=1)[0] == 1: # random transpose
crop_img_A = np.transpose(crop_img_A, (1, 0, 2))
crop_img_B = np.transpose(crop_img_B, (1, 0, 2))
input_imgs[number, :, :, :] = np.concatenate([crop_img_A, crop_img_B], axis=-1)
gt[number, :, :, :] = crop_img_B
number += 1
yield input_imgs, gt
The utls.py codes:
import tensorflow as tf
from keras import backend as K
import numpy as np
import scipy
import os
import cv2 as cv
def bright_mae(y_true, y_pred):
return K.mean(K.abs(y_pred[:,:,:,:3] - y_true[:,:,:,:3]))
def bright_mse(y_true, y_pred):
return K.mean((y_pred[:,:,:,:3] - y_true[:,:,:,:3])**2)
def bright_AB(y_true, y_pred):
return K.abs(K.mean(y_true[:,:,:,:3])-K.mean(y_pred[:,:,:,:3]))
def log10(x):
numerator = K.log(x)
denominator = K.log(K.constant(10, dtype=numerator.dtype))
return numerator / denominator
def bright_psnr(y_true, y_pred):
mse = K.mean((K.abs(y_pred[:,:,:,:3] - y_true[:,:,:,:3])) ** 2)
max_num = 1.0
psnr = 10 * log10(max_num ** 2 / mse)
return psnr
def _tf_fspecial_gauss(size, sigma):
"""Function to mimic the 'fspecial' gaussian MATLAB function
"""
x_data, y_data = np.mgrid[-size//2 + 1:size//2 + 1, -size//2 + 1:size//2 + 1]
x_data = np.expand_dims(x_data, axis=-1)
x_data = np.expand_dims(x_data, axis=-1)
y_data = np.expand_dims(y_data, axis=-1)
y_data = np.expand_dims(y_data, axis=-1)
x = tf.constant(x_data, dtype=tf.float32)
y = tf.constant(y_data, dtype=tf.float32)
g = tf.exp(-((x**2 + y**2)/(2.0*sigma**2)))
return g / tf.reduce_sum(g)
def tf_ssim(img1, img2, cs_map=False, mean_metric=True, size=11, sigma=1.5):
window = _tf_fspecial_gauss(size, sigma) # window shape [size, size]
K1 = 0.01
K2 = 0.03
L = 1 # depth of image (255 in case the image has a differnt scale)
C1 = (K1*L)**2
C2 = (K2*L)**2
mu1 = tf.nn.conv2d(img1, window, strides=[1,1,1,1], padding='VALID')
mu2 = tf.nn.conv2d(img2, window, strides=[1,1,1,1],padding='VALID')
mu1_sq = mu1*mu1
mu2_sq = mu2*mu2
mu1_mu2 = mu1*mu2
sigma1_sq = tf.nn.conv2d(img1*img1, window, strides=[1,1,1,1],padding='VALID') - mu1_sq
sigma2_sq = tf.nn.conv2d(img2*img2, window, strides=[1,1,1,1],padding='VALID') - mu2_sq
sigma12 = tf.nn.conv2d(img1*img2, window, strides=[1,1,1,1],padding='VALID') - mu1_mu2
if cs_map:
value = (((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*
(sigma1_sq + sigma2_sq + C2)),
(2.0*sigma12 + C2)/(sigma1_sq + sigma2_sq + C2))
else:
value = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*
(sigma1_sq + sigma2_sq + C2))
if mean_metric:
value = tf.reduce_mean(value)
return value
def tf_ms_ssim(img1, img2, mean_metric=True, level=5):
weight = tf.constant([0.0448, 0.2856, 0.3001, 0.2363, 0.1333], dtype=tf.float32)
mssim = []
mcs = []
for l in range(level):
ssim_map, cs_map = tf_ssim(img1, img2, cs_map=True, mean_metric=False)
mssim.append(tf.reduce_mean(ssim_map))
mcs.append(tf.reduce_mean(cs_map))
filtered_im1 = tf.nn.avg_pool(img1, [1,2,2,1], [1,2,2,1], padding='SAME')
filtered_im2 = tf.nn.avg_pool(img2, [1,2,2,1], [1,2,2,1], padding='SAME')
img1 = filtered_im1
img2 = filtered_im2
# list to tensor of dim D+1
mssim = tf.stack(mssim, axis=0)
mcs = tf.stack(mcs, axis=0)
value = (tf.reduce_prod(mcs[0:level-1]**weight[0:level-1])*
(mssim[level-1]**weight[level-1]))
if mean_metric:
value = tf.reduce_mean(value)
return value
def bright_SSIM(y_true, y_pred):
SSIM_loss = tf_ssim(tf.expand_dims(y_pred[:,:,:,0], -1), tf.expand_dims(y_true[:,:,:,0], -1))+tf_ssim(tf.expand_dims(y_pred[:,:,:,1], -1), tf.expand_dims(y_true[:,:,:,1], -1)) + tf_ssim(tf.expand_dims(y_pred[:,:,:,2], -1), tf.expand_dims(y_true[:,:,:,2], -1))
return SSIM_loss/3
def psnr_cau(y_true, y_pred):
mse = np.mean((np.abs(y_pred - y_true)) ** 2)
max_num = 1.0
psnr = 10 * np.log10(max_num ** 2 / mse)
return psnr
def save_model(model, name, epoch, batch_i):
modelname = './Res_models/' + str(epoch) + '_' + str(batch_i) + name + '.h5'
model.save_weights(modelname)
def imread_color(path):
img = cv.imread(path, cv.IMREAD_COLOR | cv.IMREAD_ANYDEPTH) / 255.
b, g, r = cv.split(img)
img_rgb = cv.merge([r, g, b])
return img_rgb
# return scipy.misc.imread(path, mode='RGB').astype(np.float) / 255.
def imwrite(path, img):
r, g, b = cv.split(img*255)
img_rgb = cv.merge([b, g, r])
cv.imwrite(path, img_rgb)
# scipy.misc.toimage(img * 255, high=255, low=0, cmin=0, cmax=255).save(path)
def range_scale(x):
return x * 2 - 1.
In the below codes, I wanted to make a webpage and create an upload button to take low light images from users to enhance it by using the model. But I could not deploy the model as well.
main.py:
from flask import Flask, request
from werkzeug.utils import secure_filename
import torch
def update_image():
app = Flask(
__name__,
static_url_path='',
static_folder=''
)
def get_form():
return """
<form action="/" enctype="multipart/form-data" method="POST">
<input type="file" id="file" name="file">
<input type="submit">
</form>
"""
#app.route("/", methods = ['GET', 'POST'])
def hello_world():
if request.method == 'GET':
return get_form()
elif request.method == 'POST':
f = request.files['file']
print(f.filename)
f.save(secure_filename(f.filename))
update_image(f.filename)
return f'{get_form()} <img src="{f.filename}"/>'

Heatmapped png's not being created on disk

I'm creating a heatmap for a CNN as per this tutorial.
In the last part:
def create_patiens_cam(case, plane):
patient_id = case['id']
mri = case['mri']
folder_path = f'./CAMS/{plane}/{patient_id}/'
if os.path.isdir(folder_path):
shutil.rmtree(folder_path)
os.makedirs(folder_path)
os.makedirs(folder_path + 'slices/')
os.makedirs(folder_path + 'cams/')
params = list(mrnet.parameters())
weight_softmax = np.squeeze(params[-2].cpu().data.numpy())
num_slices = mri.shape[1]
global feature_blobs
feature_blobs = []
mri = mri.to(device)
logit = mrnet(mri)
size_upsample = (256, 256)
feature_conv = feature_blobs[0]
h_x = F.softmax(logit, dim=1).data.squeeze(0)
probs, idx = h_x.sort(0, True)
probs = probs.cpu().numpy()
idx = idx.cpu().numpy()
slice_cams = returnCAM(feature_blobs[-1], weight_softmax, idx[:1])
for s in tqdm_notebook(range(num_slices), leave=False):
slice_pil = (transforms
.ToPILImage()(mri.cpu()[0][s] / 255))
slice_pil.save(folder_path + f'slices/{s}.png',
dpi=(300, 300))
img = mri[0][s].cpu().numpy()
img = img.transpose(1, 2, 0)
heatmap = (cv2
.cvtColor(cv2.applyColorMap(
cv2.resize(slice_cams[s], (256, 256)),
cv2.COLORMAP_JET),
cv2.COLOR_BGR2RGB)
)
result = heatmap * 0.3 + img * 0.5
pil_img_cam = Image.fromarray(np.uint8(result))
pil_img_cam.save(folder_path + f'cams/{s}.png', dpi=(300, 300))
I have created a folder "CAMS" in my 'mrnet' folder. However when running this last code (in jupyter notebook) I get no errors but no png's are being created. Anyone has any idea what could be wrong or where I could look to see what's wrong as I get no errors?
FULL CODE:
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 13 21:54:40 2021
#author: GlaDOS
"""
import os
import io
import requests
from PIL import Image
from torchvision import models, transforms
from torch.autograd import Variable
from torch.nn import functional as F
import numpy as np
import cv2
import pdb
from matplotlib import pyplot as plt
import sys
sys.path.append('C:/Users/GlaDOS/mrnet')
import shutil
import torch
import model
from dataloader import MRDataset
from tqdm import tqdm_notebook
task = 'acl'
plane = 'sagittal'
prefix = 'sag'
model_name = [name for name in os.listdir('C:/Users/GlaDOS/mrnet/models/')
if (task in name) and
(plane in name) and
(prefix in name)][0]
is_cuda = torch.cuda.is_available()
device = torch.device("cuda" if is_cuda else "cpu")
mrnet = torch.load(f'C:/Users/GlaDOS/mrnet/models/{model_name}')
mrnet = mrnet.to(device)
_ = mrnet.eval()
dataset = MRDataset('C:/Users/GlaDOS/mrnet/data/',
task,
plane,
transform=None,
train=False)
loader = torch.utils.data.DataLoader(dataset,
batch_size=1,
shuffle=False,
num_workers=0,
drop_last=False)
def returnCAM(feature_conv, weight_softmax, class_idx):
size_upsample = (256, 256)
bz, nc, h, w = feature_conv.shape
slice_cams = []
for s in range(bz):
for idx in class_idx:
cam = weight_softmax[idx].dot(feature_conv[s].reshape((nc, h*w)))
cam = cam.reshape(h, w)
cam = cam - np.min(cam)
cam_img = cam / np.max(cam)
cam_img = np.uint8(255 * cam_img)
slice_cams.append(cv2.resize(cam_img, size_upsample))
return slice_cams
patients = []
for i, (image, label, _) in tqdm_notebook(enumerate(loader), total=len(loader)):
patient_data = {}
patient_data['mri'] = image
patient_data['label'] = label[0][0][1].item()
patient_data['id'] = '0' * (4 - len(str(i))) + str(i)
patients.append(patient_data)
acl = list(filter(lambda d: d['label'] == 1, patients))
def create_patiens_cam(case, plane):
patient_id = case['id']
mri = case['mri']
folder_path = f'C:/Users/GlaDOS/mrnet/cams/{plane}/{patient_id}/'
if os.path.isdir(folder_path):
shutil.rmtree(folder_path)
os.makedirs(folder_path)
os.makedirs(folder_path + 'slices/')
os.makedirs(folder_path + 'cams/')
params = list(mrnet.parameters())
weight_softmax = np.squeeze(params[-2].cpu().data.numpy())
num_slices = mri.shape[1]
global feature_blobs
feature_blobs = []
mri = mri.to(device)
logit = mrnet(mri)
size_upsample = (256, 256)
feature_conv = feature_blobs[0]
h_x = F.softmax(logit, dim=1).data.squeeze(0)
probs, idx = h_x.sort(0, True)
probs = probs.cpu().numpy()
idx = idx.cpu().numpy()
slice_cams = returnCAM(feature_blobs[-1], weight_softmax, idx[:1])
for s in tqdm_notebook(range(num_slices), leave=False):
slice_pil = (transforms
.ToPILImage()(mri.cpu()[0][s] / 255))
slice_pil.save(folder_path + f'slices/{s}.png',
dpi=(300, 300))
img = mri[0][s].cpu().numpy()
img = img.transpose(1, 2, 0)
heatmap = (cv2
.cvtColor(cv2.applyColorMap(
cv2.resize(slice_cams[s], (256, 256)),
cv2.COLORMAP_JET),
cv2.COLOR_BGR2RGB)
)
result = heatmap * 0.3 + img * 0.5
pil_img_cam = Image.fromarray(np.uint8(result))
pil_img_cam.save(folder_path + f'cams/{s}.png', dpi=(300, 300))
Use seaborn:
import seaborn as sns
sns_plot.savefig('output.png')

Handwriting neural network weights don't change

from struct import unpack
import gzip
import numpy
from numpy import *
import matplotlib.pyplot as plt
learningRate = 0.1
def get_labeled_data(imagefile, labelfile):
"""Read input-vector (image) and target class (label, 0-9) and return
it as list of tuples.
"""
# Open the images with gzip in read binary mode
images = gzip.open(imagefile, 'rb')
labels = gzip.open(labelfile, 'rb')
# Read the binary data
# We have to get big endian unsigned int. So we need '>I'
# Get metadata for images
images.read(4) # skip the magic_number
number_of_images = images.read(4)
number_of_images = unpack('>I', number_of_images)[0]
rows = images.read(4)
rows = unpack('>I', rows)[0]
cols = images.read(4)
cols = unpack('>I', cols)[0]
# Get metadata for labels
labels.read(4) # skip the magic_number
N = labels.read(4)
N = unpack('>I', N)[0]
if number_of_images != N:
raise Exception('number of labels did not match the number of images')
# Get the data
x = zeros((N, rows, cols), dtype="float32") # Initialize numpy array
y = zeros((N, 1), dtype="uint8") # Initialize numpy array
for i in range(N):
if i % 1000 == 0:
print("i: %i" % i)
for row in range(rows):
for col in range(cols):
tmp_pixel = images.read(1) # Just a single byte
tmp_pixel = unpack('>B', tmp_pixel)[0]
x[i][row][col] = tmp_pixel
tmp_label = labels.read(1)
y[i] = unpack('>B', tmp_label)[0]
return (x, y)
ld = get_labeled_data("C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-images-idx3-ubyte.gz", "C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-labels-idx1-ubyte.gz")
def sigmoid(x):
return 1/(1+numpy.exp(-x))
def sigmoid_P(x):
return sigmoid(x) * (1 - sigmoid(x))
def cost(i, t):
return (i - t) ** 2
def cost_P(i, t):
return 2 * (i - t)
# 10x28x28 - number x row x column
weights = numpy.random.random((10, 28, 28))
biases = numpy.random.random((10, 28, 28))
dr = 0
da = 0
for loopi in range(10000):
r = numpy.random.randint(0, len(ld[0][0]))
targets = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
image = ld[0][r]
label = ld[1][r][0]
# weighted 3D Matrix of [number][row][column]
predictions = []
wPredictions = []
# average of predictions for each number
avgPred = []
avgPred2 = []
img = list(image)
for i in range(10):
x = []
y = []
for b, w in zip(biases[i], weights[i]):
x.append(sigmoid(numpy.dot(w, image) + b))
y.append(numpy.dot(w, image) + b)
predictions.append(x)
avgPred.append(numpy.average(list(x)))
avgPred2.append(numpy.average(list(y)))
for i in range(10):
sqError = cost(avgPred[i], targets[i])
# derivative of the cost with respect to each of the weights and biases
dc_dp = cost_P(avgPred[i], targets[i])
dp_dz = sigmoid_P(avgPred2[i])
#for b, w in zip(biases[i], weights[i]):
for imgRow in range(28):
for imgCol in range(28):
dz_dw = image[imgRow][imgCol]
dz_db = 1
print("dc_dp: " + str(dc_dp) + "\ndp_dz: "+ str(dp_dz) + "\ndz_dw: " + str(dz_dw))
dc_dw = dc_dp * dp_dz * dz_dw
dc_db = dc_dp * dp_dz * dz_db
dr = dc_dw
weights[i][imgRow][imgCol] -= learningRate * dc_dw
da = weights[i][imgRow][imgCol]
biases[i][imgRow][imgCol] -= learningRate * dc_db
while True:
big = 0
intid = int(input())
imag = ld[0][intid]
for l in range(10):
papa = []
for b, w in zip(biases[i], weights[i]):
papa.append(sigmoid(numpy.dot(w, imag) + b))
lol = numpy.average(papa)
if(lol > big):
big = l
print(str(dr) + " " + str(da))
print(big)
The weights aren't changing because dp_dz is always 0, I'm not sure what's causing that. I don't mean that they're changing but only a very small change, they're literally NOT changing at all. I believe it has to do with my approach in general, but I'm not sure how else I could approach this problem, I'm very new to neural networks. Any help would be greatly appreciated!

Categories

Resources