I am currently playing the lenet model provided by caffe.
the example (which is in path/to/caffe/examples/mnist/convert_mnist_data.cpp provides a c++ program to convert the mnist data to lmdb.
I write a python program to do the same thing, but the size (480MB) of lmdb is much larger than the one converted by c++ (60MB).
the test accuracy is almost the same (98%).
I want to know why the size differs a lot.
Here is the program. I utilize the mnist module (https://pypi.python.org/pypi/python-mnist/) to help load the binary mnist data.
from mnist import MNIST
import numpy as np
import cv2
import lmdb
import caffe
mndata = MNIST('./data')
images, labels = mndata.load_training()
labels = np.array(labels)
images = np.array(images).reshape(len(labels), 28, 28).astype(np.uint8)
print type(images[0][0][0])
count = 0
env = lmdb.open('mnist_lmdb', map_size=1000*1000*1000)
txn = env.begin(write=True)
for i in xrange(len(labels)):
print i
datum = caffe.proto.caffe_pb2.Datum()
datum.channels = 1
datum.height = 28
datum.width = 28
datum.data = images[i].tobytes()
datum.label = labels[i]
str_id = '{:08}'.format(i)
txn.put(str_id, datum.SerializeToString())
count = count + 1
if count % 1000 == 0:
txn.commit()
txn = env.begin(write=True)
if count % 1000 != 0:
txn.commit()
env.close()
thank you.
env = lmdb.open('mnist_lmdb', map_size=1000*1000*1000)
The db size is mainly depend on the map_size,so you can reduce the map_size
Related
I have trained a multiclass classifier for speech recognition using tensorflow. Then converted the model using tflite converter. The model can predict but it always outputs a single class. I suppose the problem is with the inference code because .h5 model can predict multiclass without any issue. I have been searching online for several days for some insight but I can't quite figure it out. Here is my code. Any suggestions would be really appreciated.
import sounddevice as sd
import numpy as np
import scipy.signal
import timeit
import python_speech_features
import tflite_runtime.interpreter as tflite
import importlib
# Parameters
debug_time = 0
debug_acc = 0
word_threshold = 0.95
rec_duration = 0.5 # 0.5
sample_length = 0.5
window_stride = 0.5 # 0.5
sample_rate = 8000 # The mic requires at least 44100 Hz to work
resample_rate = 8000
num_channels = 1
num_mfcc = 16
model_path = 'model.tflite'
mfccs_old = np.zeros((32, 25))
# Load model (interpreter)
interpreter = tflite.Interpreter(model_path)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details)
# Filter and downsample
def decimate(signal, old_fs, new_fs):
# Check to make sure we're downsampling
if new_fs > old_fs:
print("Error: target sample rate higher than original")
return signal, old_fs
# Downsampling is possible only by an integer factor
dec_factor = old_fs / new_fs
if not dec_factor.is_integer():
print("Error: can only downsample by integer factor")
# Do decimation
resampled_signal = scipy.signal.decimate(signal, int(dec_factor))
return resampled_signal, new_fs
# Callback that gets called every 0.5 seconds
def sd_callback(rec, frames, time, status):
# Start timing for debug purposes
start = timeit.default_timer()
# Notify errors
if status:
print('Error:', status)
global mfccs_old
# Compute MFCCs
mfccs = python_speech_features.base.mfcc(rec,
samplerate=resample_rate,
winlen=0.02,
winstep=0.02,
numcep=num_mfcc,
nfilt=26,
nfft=512, # 2048
preemph=0.0,
ceplifter=0,
appendEnergy=True,
winfunc=np.hanning)
delta = python_speech_features.base.delta(mfccs, 2)
mfccs_delta = np.append(mfccs, delta, axis=1)
mfccs_new = mfccs_delta.transpose()
mfccs = np.append(mfccs_old, mfccs_new, axis=1)
# mfccs = np.insert(mfccs, [0], 0, axis=1)
mfccs_old = mfccs_new
# Run inference and make predictions
in_tensor = np.float32(mfccs.reshape(1, mfccs.shape[0], mfccs.shape[1], 1))
interpreter.set_tensor(input_details[0]['index'], in_tensor)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
val = np.amax(output_data) # DEFINED FOR BINARY CLASSIFICATION, CHANGE TO MULTICLASS
ind = np.where(output_data == val)
prediction = ind[1].astype(int)
if val > word_threshold:
print('index:', ind[1])
print('accuracy', val, '/n')
print(int(prediction))
if debug_acc:
# print('accuracy:', val)
# print('index:', ind[1])
print('out tensor:', output_data)
if debug_time:
print(timeit.default_timer() - start)
# Start recording from microphone
with sd.InputStream(channels=num_channels,
samplerate=sample_rate,
blocksize=int(sample_rate * rec_duration),
callback=sd_callback):
while True:
pass
Since I figured out the issue, I am answering it myself in case others find it useful.
The issue is not having a "background noise" class in your dataset. Also make sure you have enough data for background noises. If you look at Google's teachable machine's audio project (https://teachablemachine.withgoogle.com/train/audio), a "background noise" class is already there, you cannot delete or disable the class.
I tested with both codes provided on tensorflow's github example (https://github.com/tensorflow/examples/blob/master/lite/examples/sound_classification/raspberry_pi/classify.py) and on tensorflow's website (https://www.tensorflow.org/tutorials/audio/simple_audio). They both work well for your prediction as long as you have enough background noise samples in your dataset considering the particular environment you are testing it in.
I made slight changes to the tensorflow's github code to output the category name and category confidence score.
# Loop until the user close the classification results plot.
while True:
# Wait until at least interval_between_inference seconds has passed since
# the last inference.
now = time.time()
diff = now - last_inference_time
if diff < interval_between_inference:
time.sleep(pause_time)
continue
last_inference_time = now
# Load the input audio and run classify.
tensor_audio.load_from_audio_record(audio_record)
result = classifier.classify(tensor_audio)
for category in result.classifications[0].categories:
print(category.category_name, category.score)
Hope it's helpful for people playing around with similar projects.
I keep running out of memory even after i bought google colab pro which has 25gb RAM usage. I have no idea why is this happening. I tried every kernel possible (Google colab, Google colab pro, Kaggle kernel, Amazon Sagemaker, Google Cloud Platform). I reduced my batch size to 8, no success whatsoever.
My goal is to train Bert in Deep Pavlov (with Russian text classification extension) to predict emotion of the tweet. It is a multiclass classification with 5 classes
Here is my whole code:
!pip3 install deeppavlov
import pandas as pd
train_df = pd.read_csv('train_pikabu.csv')
test_df = pd.read_csv('test_pikabu.csv')
val_df = pd.read_csv('validation_pikabu.csv')
from deeppavlov.dataset_readers.basic_classification_reader import BasicClassificationDatasetReader
# read data from particular columns of `.csv` file
data = BasicClassificationDatasetReader().read(
data_path='./',
train='train_pikabu.csv',
valid="validation_pikabu_a.csv",
test="test_pikabu.csv",
x = 'content',
y = 'emotions'
)
from deeppavlov.dataset_iterators.basic_classification_iterator import
BasicClassificationDatasetIterator
# initializing an iterator
iterator = BasicClassificationDatasetIterator(data, seed=42, shuffle=True)
!python -m deeppavlov install squad_bert
from deeppavlov.models.preprocessors.bert_preprocessor import BertPreprocessor
bert_preprocessor = BertPreprocessor(vocab_file="./bert/vocab.txt",
do_lower_case=False,
max_seq_length=256)
from deeppavlov.core.data.simple_vocab import SimpleVocabulary
vocab = SimpleVocabulary(save_path="./binary_classes.dict")
iterator.get_instances(data_type="train")
vocab.fit(iterator.get_instances(data_type="train")[1])
from deeppavlov.models.preprocessors.one_hotter import OneHotter
one_hotter = OneHotter(depth=vocab.len,
single_vector=True # means we want to have one vector per sample
)
from deeppavlov.models.classifiers.proba2labels import Proba2Labels
prob2labels = Proba2Labels(max_proba=True)
from deeppavlov.models.bert.bert_classifier import BertClassifierModel
from deeppavlov.metrics.accuracy import sets_accuracy
bert_classifier = BertClassifierModel(
n_classes=vocab.len,
return_probas=True,
one_hot_labels=True,
bert_config_file="./bert/bert_config.json",
pretrained_bert="./bert/bert_model.ckpt",
save_path="sst_bert_model/model",
load_path="sst_bert_model/model",
keep_prob=0.5,
learning_rate=1e-05,
learning_rate_drop_patience=5,
learning_rate_drop_div=2.0
)
# Method `get_instances` returns all the samples of particular data field
x_valid, y_valid = iterator.get_instances(data_type="valid")
# You need to save model only when validation score is higher than previous one.
# This variable will contain the highest accuracy score
best_score = 0.
patience = 2
impatience = 0
# let's train for 3 epochs
for ep in range(3):
nbatches = 0
for x, y in iterator.gen_batches(batch_size=8,
data_type="train", shuffle=True):
x_feat = bert_preprocessor(x)
y_onehot = one_hotter(vocab(y))
bert_classifier.train_on_batch(x_feat, y_onehot)
print("Batch done\n")
nbatches += 1
if nbatches % 1 == 0:
# validating every 100 batches
y_valid_pred = bert_classifier(bert_preprocessor(x_valid))
score = sets_accuracy(y_valid, vocab(prob2labels(y_valid_pred)))
print("Batches done: {}. Valid Accuracy: {}".format(nbatches, score))
y_valid_pred = bert_classifier(bert_preprocessor(x_valid))
score = sets_accuracy(y_valid, vocab(prob2labels(y_valid_pred)))
print("Epochs done: {}. Valid Accuracy: {}".format(ep + 1, score))
if score > best_score:
bert_classifier.save()
print("New best score. Saving model.")
best_score = score
impatience = 0
else:
impatience += 1
if impatience == patience:
print("Out of patience. Stop training.")
break
It runs up to 1 batch and then crushes.
I am Tensorflow newbie. I have model generated using convNetKerasLarge.py and saved as tflite model.
I am trying to test this saved model as follows
import tensorflow as tf
import numpy as np
import glob
from skimage.transform import resize
from skimage import io
# out of previously used training and test set
start = 4001
# no of images
row_count = 1
end = start + row_count
n_image_rows = 106
n_image_cols = 106
np_val_images = np.zeros(shape=(1, 1))
np_val_labels = np.zeros(shape=(1, 1))
def prepare_validation_set():
global np_val_images
global np_val_labels
positive_samples = glob.glob('datasets/drunk_resize_frontal_faces/pos/*')[start:end]
# negative_samples = glob.glob('datasets/drunk_resize_frontal_faces/neg/*')[start:end]
# negative_samples = random.sample(negative_samples, len(positive_samples))
val_images = []
val_labels = []
for i in range(len(positive_samples)):
val_images.append(resize(io.imread(positive_samples[i]), (n_image_rows, n_image_cols)))
val_labels.append(1)
# for i in range(len(negative_samples)):
# val_images.append(resize(io.imread(negative_samples[i]), (n_image_rows, n_image_cols)))
# val_labels.append(0)
np_val_images = np.array(val_images)
np_val_labels = np.array(val_labels)
def run_tflite_model(tflite_file, index):
prepare_validation_set()
# Initialize the interpreter
interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]
test_image = np_val_images[index]
test_image = np.expand_dims(test_image, axis=0).astype(input_details["dtype"])
interpreter.set_tensor(input_details["index"], test_image)
interpreter.invoke()
output = interpreter.get_tensor(output_details["index"])[0]
print(output_details)
prediction = output.argmax()
print(prediction)
if __name__ == '__main__':
test_image_index = 1
tflite_model_file = "models/converted/model.tflite"
run_tflite_model(tflite_model_file, 0)
If I run this I am getting prediction as 0 even though label should be 1 since I am inputing a positive image. (FYI: Test loss: 0.08881912380456924 Test accuracy: 0.9729166626930237 with 10 epochs). I am confident that there a mistake in my code which causes this please help me find it.
The script you linked normalize the data before the training by subtracting the mean (here 0.5) and dividing by the standard deviation (here 1):
mean = np.array([0.5,0.5,0.5])
std = np.array([1,1,1])
X_train = X_train.astype('float')
X_test = X_test.astype('float')
for i in range(3):
X_train[:,:,:,i] = (X_train[:,:,:,i]- mean[i]) / std[i]
X_test[:,:,:,i] = (X_test[:,:,:,i]- mean[i]) / std[i]
If you don't repeat the same operations before doing a prediction with the model, the input you are passing to the model will not have the same characteristics as the that you trained with.
You could fix it by subtracting the mean (0.5) to the image when preparing the data, i.e:
np_val_images = np.array(val_images) - 0.5
I have trained a model with images.
And now would like to extract the fc-6 features to .npy files.
I'm using caffe.set_mode_gpu()to run the caffe.Classifier and extract the features.
Instead of extracting and saving the feature per frame.
I save all the features of a folder to a temp variable and the result of the complete video to a npy file(decreasing the number of write operations to disk).
I have also heard that I could use the Caffe.Net and then pass a batch of images. But I'm not sure of what preprocessing has to be done and if this is faster ?
import os
import shutil
import sys
import glob
from multiprocessing import Pool
import numpy as np
import os, sys, getopt
import time
def keep_fldrs(path,listr):
ll =list()
for x in listr:
if os.path.isdir(path+x):
ll.append(x)
return ll
def keep_img(path,listr):
ll = list()
for x in listr:
if os.path.isfile(path+str(x)) & str(x).endswith('.jpg'):
ll.append(x)
return ll
def ifdir(path):
if not os.path.isdir(path):
os.makedirs(path)
# Main path to your caffe installation
caffe_root = '/home/anilil/projects/lstm/lisa-caffe-public/python'
# Model prototxt file
model_prototxt = '/home/anilil/projects/caffe2tensorflow/deploy_singleFrame.prototxt'
# Model caffemodel file
model_trained = '/home/anilil/projects/caffe2tensorflow/snapshots_singleFrame_flow_v2_iter_55000.caffemodel'
sys.path.insert(0, caffe_root)
import caffe
caffe.set_mode_gpu()
net = caffe.Classifier(model_prototxt, model_trained,
mean=np.array([128, 128, 128]),
channel_swap=(2,1,0),
raw_scale=255,
image_dims=(255, 255))
Root='/media/anilil/Data/Datasets/UCf_scales/ori_mv_vis/Ori_MV/'
Out_fldr='/media/anilil/Data/Datasets/UCf_scales/ori_mv_vis/feat_fc6/'
allcalsses=keep_fldrs(Root,os.listdir(Root))
for classin in allcalsses:
temp_class=Root+classin+'/'
temp_out_class=Out_fldr+classin+'/'
ifdir(temp_out_class)
allvids_folders=keep_fldrs(temp_class,os.listdir(temp_class))
for each_vid_fldr in allvids_folders:
temp_pres_dir=temp_class+each_vid_fldr+'/'
temp_out_pres_dir=temp_out_class+each_vid_fldr+'/'
ifdir(temp_out_pres_dir)
all_images=keep_img(temp_pres_dir,os.listdir(temp_pres_dir))
frameno=0
if os.path.isfile(temp_out_pres_dir+'video.npy'):
continue
start = time.time()
temp_npy= np.ndarray((len(all_images),4096),dtype=np.float32)
for each_image in all_images:
input_image = caffe.io.load_image(temp_pres_dir+each_image)
prediction = net.predict([input_image],oversample=False)
temp_npy[frameno,:]=net.blobs['fc6'].data[0]
frameno=frameno+1
np.save(temp_out_pres_dir+'video.npy',temp_npy)
end = time.time()
print "lenght of imgs {} and time taken is {}".format(len(all_images),(end - start))
print ('Class {} done'.format(classin))
Output
lenght of imgs 426 and time taken is 388.539139032
lenght of imgs 203 and time taken is 185.467905998
Time needed per image Around 0.9 Seconds now-
I found the best answer here in this post.
Till now I had used a
net = caffe.Classifier(model_prototxt, model_trained,
mean=np.array([128, 128, 128]),
channel_swap=(2,1,0),
raw_scale=255,
image_dims=(255, 255))
to initialize a model and get the output per image.
But this method is really slow and requires around .9 seconds per image.
The best Idea is to pass a batch of images(maybe 100,200,250) changing. Depending on how much memory you have on your GPU.
for this I set caffe.set_mode_gpu() as I have one and It's faster when you send large batches.
Initialize the model with ur trained model.
net=caffe.Net(model_prototxt,model_trained,caffe.TEST)
Create a Transformer and make sure to set mean and other values depending on how u trained your model.
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1)) # height*width*channel -> channel*height*width
mean_file = np.array([128, 128, 128])
transformer.set_mean('data', mean_file) #### subtract mean ####
transformer.set_raw_scale('data', 255) # pixel value range
transformer.set_channel_swap('data', (2,1,0)) # RGB -> BGR
data_blob_shape = net.blobs['data'].data.shape
data_blob_shape = list(data_blob_shape)
Read a group of images and convert to the network input.
net.blobs['data'].reshape(len(all_images), data_blob_shape[1], data_blob_shape[2], data_blob_shape[3])
images = [temp_pres_dir+str(x) for x in all_images]
net.blobs['data'].data[...] = map(lambda x:
transformer.preprocess('data',caffe.io.load_image(x)), images)
Pass the batch of images through network.
out = net.forward()
You can use this output as you wish.
Speed for each image is now 20 msec
Hey guys I need a bit of help with my pybrain code. Everything loads fine, but after it trains the first time the training error doesn't go down. In fact, it just stays stuck there at exactly 13.3484055174. I've been checking my code many times and comparing it with other examples, but I consistently get the same problem. I've also already tried changing the number of hidden units, learningrate, momentum, weightdecay to no avail. I've checked the parameters and it starts off from [-1 to 1] then blows up into ~240-250. I was wondering if anyone can see why it's not working. I'm sure it's a really simple 1-liner that I'm missing.
I'm working on the kaggle 0-9 digit classification dataset. I've already gotten the randomforest to work but I really want to make this neural network work too. Any help would get greatly appreciated.
#learn digit classification with a nerual network
import pybrain
from pybrain.datasets import *
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.utilities import percentError
import numpy
print "Importing training and test data"
data = numpy.genfromtxt('trainR.csv', delimiter = ',')
data = data[1:]
traindata = data[:(len(data)/2)]
testdata = data[(len(data)/2)+1:]
print "Importing actual data"
actualdata = numpy.genfromtxt('trainR.csv', delimiter = ',')
print "Adding samples to dataset and setting up neural network"
ds = ClassificationDataSet(784, 10, nb_classes = 10)
for x in traindata:
ds.addSample(tuple(x[1:]),tuple(x[0:1]))
ds._convertToOneOfMany( bounds=[0,1] )
net = buildNetwork(784, 100, 10, bias=True, outclass=SoftmaxLayer)
print "Training the neural network"
trainer = BackpropTrainer(net, dataset=ds, momentum = 0.1,
verbose = True, weightdecay = 0.01)
for i in range(3):
# train the network for 1 epoch
trainer.trainEpochs( 1 )
# evaluate the result on the training and test data
trnresult = percentError( trainer.testOnClassData(), [x[0] for x in traindata] )
# print the result
print "epoch: " + str(trainer.totalepochs) + " train error: " + str(trnresult)
print ""
print "Predicting with the neural network"
answerlist = []
for row in testdata:
answer = numpy.argmax(net.activate(row[1:]))
answerlist.append(answer)
tstresult = percentError(answerlist, [x[0] for x in testdata])
print "Test error: " + str(tstresult)
try changing
ds = ClassificationDataSet(784, 10, nb_classes = 10)
to
ds = ClassificationDataSet(784, 1, nb_classes = 10)
i think ClassificationDataSet second argument is the dimensions of the targets rather than the number of classes this is given by nb_classes. It depends on how your data is organized. Best thing is to enter each target as an integer for each class and then use _convertToOneOfMany()
It would be useful if you provided your first sample