I am building a SSTV programm, that can convert 180x136 picture into the sound with the RGB component. But when I try to read data from microphone, it gives me
struct.error: unpack requires a buffer of 400 bytes
when I use only 200 bytes.
I expected the output as decoded microphone out (in short: microphone -> decode signal -> image).
I tried to use chunk*2 but it gave me same error:
struct.error: unpack requires a buffer of 800 bytes
Here is the code for decoder:
import numpy as np
import pyaudio
import wave
from tkinter import *
root = Tk()
c = Canvas(root, width=90*8, height=68*8, bg='white')
c.pack()
def pix(x, y, rr, rg, rb): # r + (r/g/b) is raw rgb because we are decoding frequency
if rr < 0:
r = 0
if rg < 0:
g = 0
if rb < 0:
b = 0
try:
r = min(255,round(rr * (255 / 1124)))
except:
r = 0
try:
g = min(255,round(rg * (255 / 1124)))
except:
g = 0
try:
b = min(255,round(rb * (255 / 1124)))
except:
b = 0
if r < 0:
r = 0
if g < 0:
g = 0
if b < 0:
b = 0
print(r,g,b)
n = hex(r).replace("0x","")
rh = (2 - len(n)) * "0" + n
n = hex(g).replace("0x", "")
gh = (2 - len(n)) * "0" + n
n = hex(b).replace("0x", "")
bh = (2 - len(n)) * "0" + n
c.create_line(x, y, x + 4, y, fill=f"#{rh}{gh}{bh}")
c.create_line(x, y+1, x + 4, y+1, fill=f"#{rh}{gh}{bh}")
c.create_line(x, y+2, x + 4, y+2, fill=f"#{rh}{gh}{bh}")
c.create_line(x, y+3, x + 4, y+3, fill=f"#{rh}{gh}{bh}")
chunk = 100
# open up a wave
wf = wave.open('input.wav', 'rb')
swidth = wf.getsampwidth()
RATE = wf.getframerate()
# use a Blackman window
window = np.blackman(chunk)
# open stream
p = pyaudio.PyAudio()
stream = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = RATE,
output = True)
microphone = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = RATE,
input = True)
# read some data
data = wf.readframes(chunk)
#print(len(data))
#print(chunk*swidth)
pc = 0
x = 0
e = [255,252]
totaly = 0
rrgb = [255, 255, 255] # raw rgb
# play stream and find the frequency of each chunk
while True:
data = microphone.read(chunk)
# write data out to the audio stream
stream.write(data)
indata = np.array(wave.struct.unpack("%dh"%((len(data))/swidth),data))*window#"%dh"%((len(data)-1)/swidth)
# Take the fft and square each value
fftData=abs(np.fft.rfft(indata))**2
# find the maximum
which = fftData[1:].argmax() + 1
# use quadratic interpolation around the max
if which != len(fftData)-1:
y0,y1,y2 = np.log(fftData[which-1:which+2:])
x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0)
# find the frequency and output it
thefreq = (which+x1)*RATE/chunk
if thefreq>3030 or totaly==540:
#print("PING!",f"The freq is %f Hz. {pc}" % (thefreq))
if pc==0:
totaly = x
pc+=4
x=0
root.update()
else:
#print(f"The freq is %f Hz. min {min(e)} max {max(e)}, {x//4}" % (thefreq), end=" ")
if x//4 % 3 == 0:
pix(x / 3, pc, rrgb[0], rrgb[1], rrgb[2]) # /0.77039274924
rrgb[0] = thefreq
#print("R")
elif x//4 % 3 == 1:
rrgb[1] = thefreq
#print("G")
elif x//4 % 3 == 2:
rrgb[2] = thefreq
#print("B")
#e.append(thefreq)
#pix()
x+=4#print("The freq is %f Hz." % (thefreq))
else:
thefreq = which*RATE/chunk
#print("The freq is %f Hz." % (thefreq))
# read some more data
data = microphone.read(chunk)
if data:
stream.write(data)
stream.close()
p.terminate()
root.mainloop()
Here is the code for encoder:
import numpy as np
# Importing Image from PIL package
from PIL import Image
# creating a image object
im = Image.open(r"original.bmp")
px = im.load()
arx = []
art = []
pix = ()
pixels = list(im.getdata())
width, height = im.size
pixels = [pixels[i * width:(i + 1) * width] for i in range(height)]
print(pixels)
for ypos in range(0,136):
for xpos in range(0,180):
pix = pixels[ypos][xpos]
#gray = ((pix[0])+(pix[1])+(pix[2]))/3
rgb = (pix[0],pix[1],pix[2])
arx.append(rgb)
arx.append(406)
art.append(arx)
rate = 44100 # samples per second
T = 3 # sample duration (seconds)
slowdown = int(input("Speed in Hz:"))
f = 100.0/slowdown # sound frequency (Hz)
print("speed multiplier is",slowdown)
encoding = "ansi"
total = []
xpos2 = 0
c = 0
for ypos in art:
for xpos in ypos:
#print(xpos)
t = np.linspace(0, 0.01, (slowdown), endpoint=False)
if xpos==406:
sig = np.sin(2 * np.pi * f * 500 * t)
total.extend(sig)
sig = np.sin(2 * np.pi * f * 700 * t)
total.extend(sig)
continue
sig = np.sin(2 * np.pi * f * xpos[0] * t)
total.extend(sig)
sig = np.sin(2 * np.pi * f * xpos[1] * t)
total.extend(sig)
sig = np.sin(2 * np.pi * f * xpos[2] * t)
total.extend(sig)
print(xpos)
sig = np.sin(2 * np.pi * f * 300 * t)
break
import wavio
wavio.write("input.wav", total, rate, sampwidth=2)
input("done, saved as \'input.wav\'")
I am trying to deploy the low light image enhancement to the web using Flask. The main thing that I want to do is, run a webpage on localhost, and when I upload a low light image the model should enhance the image and show the result on same webpage.
Here is my test.py codes:
from glob import glob
import numpy as np
import scipy
import keras
import os
import Network
import utls
import time
import cv2
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input", "-i", type=str, default='../input', help='test image folder')
parser.add_argument("--result", "-r", type=str, default='../result', help='result folder')
parser.add_argument("--model", "-m", type=str, default='Syn_img_lowlight_withnoise', help='model name')
parser.add_argument("--com", "-c", type=int, default=1, help='output with/without origional image and mid result')
parser.add_argument("--highpercent", "-hp", type=int, default=95, help='should be in [85,100], linear amplification')
parser.add_argument("--lowpercent", "-lp", type=int, default=5, help='should be in [0,15], rescale the range [p%,1] to [0, 1]')
parser.add_argument("--gamma", "-g", type=int, default=8, help='should be in [6,10], increase the saturability')
parser.add_argument("--maxrange", "-mr", type=int, default=8, help='linear amplification range')
arg = parser.parse_args()
result_folder = arg.result
if not os.path.isdir(result_folder):
os.makedirs(result_folder)
input_folder = arg.input
path = glob(input_folder+'/*.*')
model_name = arg.model
mbllen = Network.build_mbllen((None, None, 3))
mbllen.load_weights('../models/'+model_name+'.h5')
opt = keras.optimizers.Adam(lr=2 * 1e-04, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
mbllen.compile(loss='mse', optimizer=opt)
flag = arg.com
lowpercent = arg.lowpercent
highpercent = arg.highpercent
maxrange = arg.maxrange/10.
hsvgamma = arg.gamma/10.
for i in range(len(path)):
img_A_path = path[i]
img_A = utls.imread_color(img_A_path)
img_A = img_A[np.newaxis, :]
starttime = time.process_time()
out_pred = mbllen.predict(img_A)
endtime = time.process_time()
print('The ' + str(i+1)+'th image\'s Time:' +str(endtime-starttime)+'s.')
fake_B = out_pred[0, :, :, :3]
fake_B_o = fake_B
gray_fake_B = fake_B[:, :, 0] * 0.299 + fake_B[:, :, 1] * 0.587 + fake_B[:, :, 1] * 0.114
percent_max = sum(sum(gray_fake_B >= maxrange))/sum(sum(gray_fake_B <= 1.0))
# print(percent_max)
max_value = np.percentile(gray_fake_B[:], highpercent)
if percent_max < (100-highpercent)/100.:
scale = maxrange / max_value
fake_B = fake_B * scale
fake_B = np.minimum(fake_B, 1.0)
gray_fake_B = fake_B[:,:,0]*0.299 + fake_B[:,:,1]*0.587 + fake_B[:,:,1]*0.114
sub_value = np.percentile(gray_fake_B[:], lowpercent)
fake_B = (fake_B - sub_value)*(1./(1-sub_value))
imgHSV = cv2.cvtColor(fake_B, cv2.COLOR_RGB2HSV)
H, S, V = cv2.split(imgHSV)
S = np.power(S, hsvgamma)
imgHSV = cv2.merge([H, S, V])
fake_B = cv2.cvtColor(imgHSV, cv2.COLOR_HSV2RGB)
fake_B = np.minimum(fake_B, 1.0)
if flag:
outputs = np.concatenate([img_A[0,:,:,:], fake_B_o, fake_B], axis=1)
else:
outputs = fake_B
filename = os.path.basename(path[i])
img_name = result_folder+'/' + filename
# scipy.misc.toimage(outputs * 255, high=255, low=0, cmin=0, cmax=255).save(img_name)
outputs = np.minimum(outputs, 1.0)
outputs = np.maximum(outputs, 0.0)
utls.imwrite(img_name, outputs)
The data_load.py codes:
from glob import glob
import numpy as np
import random
import scipy
import os
import cv2 as cv
class Dataloader():
def __init__(self, dataset_name, crop_shape=(256, 256)):
self.dataset_name = dataset_name
self.crop_shape = crop_shape
def imread_color(self, path):
img = cv.imread(path, cv.IMREAD_COLOR | cv.IMREAD_ANYDEPTH)/255.
b, g, r = cv.split(img)
img_rgb = cv.merge([r, g, b])
return img_rgb
def imwrite(self, path, img):
r, g, b = cv.split(img)
img_rgb = cv.merge([b, g, r])
cv.imwrite(path, img_rgb)
def load_data(self, batch_size=16):
path = glob('../dataset/train/*.jpg')
self.n_batches = int(len(path) / batch_size)
while 1:
random.shuffle(path)
for i in range(self.n_batches - 1):
batch_path = path[i * batch_size:(i + 1) * batch_size]
input_imgs = np.empty((batch_size, self.crop_shape[0], self.crop_shape[1], 6), dtype="float32")
gt = np.empty((batch_size, self.crop_shape[0], self.crop_shape[1], 3), dtype="float32")
number = 0
for img_B_path in batch_path:
img_B = self.imread_color(img_B_path)
path_mid = os.path.split(img_B_path)
path_A_1 = path_mid[0] + '_' + self.dataset_name
path_A = os.path.join(path_A_1, path_mid[1])
img_A = self.imread_color(path_A)
nw = random.randint(0, img_B.shape[0] - self.crop_shape[0])
nh = random.randint(0, img_B.shape[1] - self.crop_shape[1])
crop_img_A = img_A[nw:nw + self.crop_shape[0], nh:nh + self.crop_shape[1], :]
crop_img_B = img_B[nw:nw + self.crop_shape[0], nh:nh + self.crop_shape[1], :]
if np.random.randint(2, size=1)[0] == 1: # random flip
crop_img_A = np.flipud(crop_img_A)
crop_img_B = np.flipud(crop_img_B)
if np.random.randint(2, size=1)[0] == 1:
crop_img_A = np.fliplr(crop_img_A)
crop_img_B = np.fliplr(crop_img_B)
if np.random.randint(2, size=1)[0] == 1: # random transpose
crop_img_A = np.transpose(crop_img_A, (1, 0, 2))
crop_img_B = np.transpose(crop_img_B, (1, 0, 2))
input_imgs[number, :, :, :] = np.concatenate([crop_img_A, crop_img_B], axis=-1)
gt[number, :, :, :] = crop_img_B
number += 1
yield input_imgs, gt
The utls.py codes:
import tensorflow as tf
from keras import backend as K
import numpy as np
import scipy
import os
import cv2 as cv
def bright_mae(y_true, y_pred):
return K.mean(K.abs(y_pred[:,:,:,:3] - y_true[:,:,:,:3]))
def bright_mse(y_true, y_pred):
return K.mean((y_pred[:,:,:,:3] - y_true[:,:,:,:3])**2)
def bright_AB(y_true, y_pred):
return K.abs(K.mean(y_true[:,:,:,:3])-K.mean(y_pred[:,:,:,:3]))
def log10(x):
numerator = K.log(x)
denominator = K.log(K.constant(10, dtype=numerator.dtype))
return numerator / denominator
def bright_psnr(y_true, y_pred):
mse = K.mean((K.abs(y_pred[:,:,:,:3] - y_true[:,:,:,:3])) ** 2)
max_num = 1.0
psnr = 10 * log10(max_num ** 2 / mse)
return psnr
def _tf_fspecial_gauss(size, sigma):
"""Function to mimic the 'fspecial' gaussian MATLAB function
"""
x_data, y_data = np.mgrid[-size//2 + 1:size//2 + 1, -size//2 + 1:size//2 + 1]
x_data = np.expand_dims(x_data, axis=-1)
x_data = np.expand_dims(x_data, axis=-1)
y_data = np.expand_dims(y_data, axis=-1)
y_data = np.expand_dims(y_data, axis=-1)
x = tf.constant(x_data, dtype=tf.float32)
y = tf.constant(y_data, dtype=tf.float32)
g = tf.exp(-((x**2 + y**2)/(2.0*sigma**2)))
return g / tf.reduce_sum(g)
def tf_ssim(img1, img2, cs_map=False, mean_metric=True, size=11, sigma=1.5):
window = _tf_fspecial_gauss(size, sigma) # window shape [size, size]
K1 = 0.01
K2 = 0.03
L = 1 # depth of image (255 in case the image has a differnt scale)
C1 = (K1*L)**2
C2 = (K2*L)**2
mu1 = tf.nn.conv2d(img1, window, strides=[1,1,1,1], padding='VALID')
mu2 = tf.nn.conv2d(img2, window, strides=[1,1,1,1],padding='VALID')
mu1_sq = mu1*mu1
mu2_sq = mu2*mu2
mu1_mu2 = mu1*mu2
sigma1_sq = tf.nn.conv2d(img1*img1, window, strides=[1,1,1,1],padding='VALID') - mu1_sq
sigma2_sq = tf.nn.conv2d(img2*img2, window, strides=[1,1,1,1],padding='VALID') - mu2_sq
sigma12 = tf.nn.conv2d(img1*img2, window, strides=[1,1,1,1],padding='VALID') - mu1_mu2
if cs_map:
value = (((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*
(sigma1_sq + sigma2_sq + C2)),
(2.0*sigma12 + C2)/(sigma1_sq + sigma2_sq + C2))
else:
value = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*
(sigma1_sq + sigma2_sq + C2))
if mean_metric:
value = tf.reduce_mean(value)
return value
def tf_ms_ssim(img1, img2, mean_metric=True, level=5):
weight = tf.constant([0.0448, 0.2856, 0.3001, 0.2363, 0.1333], dtype=tf.float32)
mssim = []
mcs = []
for l in range(level):
ssim_map, cs_map = tf_ssim(img1, img2, cs_map=True, mean_metric=False)
mssim.append(tf.reduce_mean(ssim_map))
mcs.append(tf.reduce_mean(cs_map))
filtered_im1 = tf.nn.avg_pool(img1, [1,2,2,1], [1,2,2,1], padding='SAME')
filtered_im2 = tf.nn.avg_pool(img2, [1,2,2,1], [1,2,2,1], padding='SAME')
img1 = filtered_im1
img2 = filtered_im2
# list to tensor of dim D+1
mssim = tf.stack(mssim, axis=0)
mcs = tf.stack(mcs, axis=0)
value = (tf.reduce_prod(mcs[0:level-1]**weight[0:level-1])*
(mssim[level-1]**weight[level-1]))
if mean_metric:
value = tf.reduce_mean(value)
return value
def bright_SSIM(y_true, y_pred):
SSIM_loss = tf_ssim(tf.expand_dims(y_pred[:,:,:,0], -1), tf.expand_dims(y_true[:,:,:,0], -1))+tf_ssim(tf.expand_dims(y_pred[:,:,:,1], -1), tf.expand_dims(y_true[:,:,:,1], -1)) + tf_ssim(tf.expand_dims(y_pred[:,:,:,2], -1), tf.expand_dims(y_true[:,:,:,2], -1))
return SSIM_loss/3
def psnr_cau(y_true, y_pred):
mse = np.mean((np.abs(y_pred - y_true)) ** 2)
max_num = 1.0
psnr = 10 * np.log10(max_num ** 2 / mse)
return psnr
def save_model(model, name, epoch, batch_i):
modelname = './Res_models/' + str(epoch) + '_' + str(batch_i) + name + '.h5'
model.save_weights(modelname)
def imread_color(path):
img = cv.imread(path, cv.IMREAD_COLOR | cv.IMREAD_ANYDEPTH) / 255.
b, g, r = cv.split(img)
img_rgb = cv.merge([r, g, b])
return img_rgb
# return scipy.misc.imread(path, mode='RGB').astype(np.float) / 255.
def imwrite(path, img):
r, g, b = cv.split(img*255)
img_rgb = cv.merge([b, g, r])
cv.imwrite(path, img_rgb)
# scipy.misc.toimage(img * 255, high=255, low=0, cmin=0, cmax=255).save(path)
def range_scale(x):
return x * 2 - 1.
In the below codes, I wanted to make a webpage and create an upload button to take low light images from users to enhance it by using the model. But I could not deploy the model as well.
main.py:
from flask import Flask, request
from werkzeug.utils import secure_filename
import torch
def update_image():
app = Flask(
__name__,
static_url_path='',
static_folder=''
)
def get_form():
return """
<form action="/" enctype="multipart/form-data" method="POST">
<input type="file" id="file" name="file">
<input type="submit">
</form>
"""
#app.route("/", methods = ['GET', 'POST'])
def hello_world():
if request.method == 'GET':
return get_form()
elif request.method == 'POST':
f = request.files['file']
print(f.filename)
f.save(secure_filename(f.filename))
update_image(f.filename)
return f'{get_form()} <img src="{f.filename}"/>'
I'm creating a heatmap for a CNN as per this tutorial.
In the last part:
def create_patiens_cam(case, plane):
patient_id = case['id']
mri = case['mri']
folder_path = f'./CAMS/{plane}/{patient_id}/'
if os.path.isdir(folder_path):
shutil.rmtree(folder_path)
os.makedirs(folder_path)
os.makedirs(folder_path + 'slices/')
os.makedirs(folder_path + 'cams/')
params = list(mrnet.parameters())
weight_softmax = np.squeeze(params[-2].cpu().data.numpy())
num_slices = mri.shape[1]
global feature_blobs
feature_blobs = []
mri = mri.to(device)
logit = mrnet(mri)
size_upsample = (256, 256)
feature_conv = feature_blobs[0]
h_x = F.softmax(logit, dim=1).data.squeeze(0)
probs, idx = h_x.sort(0, True)
probs = probs.cpu().numpy()
idx = idx.cpu().numpy()
slice_cams = returnCAM(feature_blobs[-1], weight_softmax, idx[:1])
for s in tqdm_notebook(range(num_slices), leave=False):
slice_pil = (transforms
.ToPILImage()(mri.cpu()[0][s] / 255))
slice_pil.save(folder_path + f'slices/{s}.png',
dpi=(300, 300))
img = mri[0][s].cpu().numpy()
img = img.transpose(1, 2, 0)
heatmap = (cv2
.cvtColor(cv2.applyColorMap(
cv2.resize(slice_cams[s], (256, 256)),
cv2.COLORMAP_JET),
cv2.COLOR_BGR2RGB)
)
result = heatmap * 0.3 + img * 0.5
pil_img_cam = Image.fromarray(np.uint8(result))
pil_img_cam.save(folder_path + f'cams/{s}.png', dpi=(300, 300))
I have created a folder "CAMS" in my 'mrnet' folder. However when running this last code (in jupyter notebook) I get no errors but no png's are being created. Anyone has any idea what could be wrong or where I could look to see what's wrong as I get no errors?
FULL CODE:
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 13 21:54:40 2021
#author: GlaDOS
"""
import os
import io
import requests
from PIL import Image
from torchvision import models, transforms
from torch.autograd import Variable
from torch.nn import functional as F
import numpy as np
import cv2
import pdb
from matplotlib import pyplot as plt
import sys
sys.path.append('C:/Users/GlaDOS/mrnet')
import shutil
import torch
import model
from dataloader import MRDataset
from tqdm import tqdm_notebook
task = 'acl'
plane = 'sagittal'
prefix = 'sag'
model_name = [name for name in os.listdir('C:/Users/GlaDOS/mrnet/models/')
if (task in name) and
(plane in name) and
(prefix in name)][0]
is_cuda = torch.cuda.is_available()
device = torch.device("cuda" if is_cuda else "cpu")
mrnet = torch.load(f'C:/Users/GlaDOS/mrnet/models/{model_name}')
mrnet = mrnet.to(device)
_ = mrnet.eval()
dataset = MRDataset('C:/Users/GlaDOS/mrnet/data/',
task,
plane,
transform=None,
train=False)
loader = torch.utils.data.DataLoader(dataset,
batch_size=1,
shuffle=False,
num_workers=0,
drop_last=False)
def returnCAM(feature_conv, weight_softmax, class_idx):
size_upsample = (256, 256)
bz, nc, h, w = feature_conv.shape
slice_cams = []
for s in range(bz):
for idx in class_idx:
cam = weight_softmax[idx].dot(feature_conv[s].reshape((nc, h*w)))
cam = cam.reshape(h, w)
cam = cam - np.min(cam)
cam_img = cam / np.max(cam)
cam_img = np.uint8(255 * cam_img)
slice_cams.append(cv2.resize(cam_img, size_upsample))
return slice_cams
patients = []
for i, (image, label, _) in tqdm_notebook(enumerate(loader), total=len(loader)):
patient_data = {}
patient_data['mri'] = image
patient_data['label'] = label[0][0][1].item()
patient_data['id'] = '0' * (4 - len(str(i))) + str(i)
patients.append(patient_data)
acl = list(filter(lambda d: d['label'] == 1, patients))
def create_patiens_cam(case, plane):
patient_id = case['id']
mri = case['mri']
folder_path = f'C:/Users/GlaDOS/mrnet/cams/{plane}/{patient_id}/'
if os.path.isdir(folder_path):
shutil.rmtree(folder_path)
os.makedirs(folder_path)
os.makedirs(folder_path + 'slices/')
os.makedirs(folder_path + 'cams/')
params = list(mrnet.parameters())
weight_softmax = np.squeeze(params[-2].cpu().data.numpy())
num_slices = mri.shape[1]
global feature_blobs
feature_blobs = []
mri = mri.to(device)
logit = mrnet(mri)
size_upsample = (256, 256)
feature_conv = feature_blobs[0]
h_x = F.softmax(logit, dim=1).data.squeeze(0)
probs, idx = h_x.sort(0, True)
probs = probs.cpu().numpy()
idx = idx.cpu().numpy()
slice_cams = returnCAM(feature_blobs[-1], weight_softmax, idx[:1])
for s in tqdm_notebook(range(num_slices), leave=False):
slice_pil = (transforms
.ToPILImage()(mri.cpu()[0][s] / 255))
slice_pil.save(folder_path + f'slices/{s}.png',
dpi=(300, 300))
img = mri[0][s].cpu().numpy()
img = img.transpose(1, 2, 0)
heatmap = (cv2
.cvtColor(cv2.applyColorMap(
cv2.resize(slice_cams[s], (256, 256)),
cv2.COLORMAP_JET),
cv2.COLOR_BGR2RGB)
)
result = heatmap * 0.3 + img * 0.5
pil_img_cam = Image.fromarray(np.uint8(result))
pil_img_cam.save(folder_path + f'cams/{s}.png', dpi=(300, 300))
Use seaborn:
import seaborn as sns
sns_plot.savefig('output.png')
from struct import unpack
import gzip
import numpy
from numpy import *
import matplotlib.pyplot as plt
learningRate = 0.1
def get_labeled_data(imagefile, labelfile):
"""Read input-vector (image) and target class (label, 0-9) and return
it as list of tuples.
"""
# Open the images with gzip in read binary mode
images = gzip.open(imagefile, 'rb')
labels = gzip.open(labelfile, 'rb')
# Read the binary data
# We have to get big endian unsigned int. So we need '>I'
# Get metadata for images
images.read(4) # skip the magic_number
number_of_images = images.read(4)
number_of_images = unpack('>I', number_of_images)[0]
rows = images.read(4)
rows = unpack('>I', rows)[0]
cols = images.read(4)
cols = unpack('>I', cols)[0]
# Get metadata for labels
labels.read(4) # skip the magic_number
N = labels.read(4)
N = unpack('>I', N)[0]
if number_of_images != N:
raise Exception('number of labels did not match the number of images')
# Get the data
x = zeros((N, rows, cols), dtype="float32") # Initialize numpy array
y = zeros((N, 1), dtype="uint8") # Initialize numpy array
for i in range(N):
if i % 1000 == 0:
print("i: %i" % i)
for row in range(rows):
for col in range(cols):
tmp_pixel = images.read(1) # Just a single byte
tmp_pixel = unpack('>B', tmp_pixel)[0]
x[i][row][col] = tmp_pixel
tmp_label = labels.read(1)
y[i] = unpack('>B', tmp_label)[0]
return (x, y)
ld = get_labeled_data("C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-images-idx3-ubyte.gz", "C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-labels-idx1-ubyte.gz")
def sigmoid(x):
return 1/(1+numpy.exp(-x))
def sigmoid_P(x):
return sigmoid(x) * (1 - sigmoid(x))
def cost(i, t):
return (i - t) ** 2
def cost_P(i, t):
return 2 * (i - t)
# 10x28x28 - number x row x column
weights = numpy.random.random((10, 28, 28))
biases = numpy.random.random((10, 28, 28))
dr = 0
da = 0
for loopi in range(10000):
r = numpy.random.randint(0, len(ld[0][0]))
targets = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
image = ld[0][r]
label = ld[1][r][0]
# weighted 3D Matrix of [number][row][column]
predictions = []
wPredictions = []
# average of predictions for each number
avgPred = []
avgPred2 = []
img = list(image)
for i in range(10):
x = []
y = []
for b, w in zip(biases[i], weights[i]):
x.append(sigmoid(numpy.dot(w, image) + b))
y.append(numpy.dot(w, image) + b)
predictions.append(x)
avgPred.append(numpy.average(list(x)))
avgPred2.append(numpy.average(list(y)))
for i in range(10):
sqError = cost(avgPred[i], targets[i])
# derivative of the cost with respect to each of the weights and biases
dc_dp = cost_P(avgPred[i], targets[i])
dp_dz = sigmoid_P(avgPred2[i])
#for b, w in zip(biases[i], weights[i]):
for imgRow in range(28):
for imgCol in range(28):
dz_dw = image[imgRow][imgCol]
dz_db = 1
print("dc_dp: " + str(dc_dp) + "\ndp_dz: "+ str(dp_dz) + "\ndz_dw: " + str(dz_dw))
dc_dw = dc_dp * dp_dz * dz_dw
dc_db = dc_dp * dp_dz * dz_db
dr = dc_dw
weights[i][imgRow][imgCol] -= learningRate * dc_dw
da = weights[i][imgRow][imgCol]
biases[i][imgRow][imgCol] -= learningRate * dc_db
while True:
big = 0
intid = int(input())
imag = ld[0][intid]
for l in range(10):
papa = []
for b, w in zip(biases[i], weights[i]):
papa.append(sigmoid(numpy.dot(w, imag) + b))
lol = numpy.average(papa)
if(lol > big):
big = l
print(str(dr) + " " + str(da))
print(big)
The weights aren't changing because dp_dz is always 0, I'm not sure what's causing that. I don't mean that they're changing but only a very small change, they're literally NOT changing at all. I believe it has to do with my approach in general, but I'm not sure how else I could approach this problem, I'm very new to neural networks. Any help would be greatly appreciated!