Using CIFAR-100 datased with VGG19 model in simple_fedavg example

Using CIFAR-100 datased with VGG19 model in simple_fedavg example - python

I'm using the Simple fedavg example from the github of tensorflow federated, i was trying to change the dataset and the model, but i can't get any positive feedback, the accuracy is always at 1%.
This is the code, i just changed the model part and the dataset from the simple_fedavg example. Any idea? I tried with different optimizers, but still no luck.
# Copyright 2020, The TensorFlow Federated Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Simple FedAvg to train EMNIST.
This is intended to be a minimal stand-alone experiment script built on top of
core TFF.
"""
import collections
import functools
from absl import app
from absl import flags
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
import random
import simple_fedavg_tf
import simple_fedavg_tff
import os
np.set_printoptions(precision=None, suppress=None)
# Training hyperparameters
flags.DEFINE_integer('total_rounds', 50, 'Number of total training rounds.')
flags.DEFINE_integer('rounds_per_eval', 1, 'How often to evaluate')
flags.DEFINE_integer('train_clients_per_round', 4,
'How many clients to sample per round.')
flags.DEFINE_integer('client_epochs_per_round', 5,
'Number of epochs in the client to take per round.')
flags.DEFINE_integer('batch_size', 16, 'Batch size used on the client.')
flags.DEFINE_integer('test_batch_size', 128, 'Minibatch size of test data.')
# Optimizer configuration (this defines one or more flags per optimizer).
flags.DEFINE_float('server_learning_rate', 1, 'Server learning rate.')
flags.DEFINE_float('client_learning_rate', 0.0005, 'Client learning rate.')
FLAGS = flags.FLAGS
def create_vgg19_model():
model = tf.keras.applications.VGG19(include_top=True,
weights=None,
input_shape=(32, 32, 3),
classes=100)
return model
def get_cifar100_dataset():
cifar100_train, cifar100_test = tff.simulation.datasets.cifar100.load_data()
def element_fn(element):
return collections.OrderedDict(
x=tf.expand_dims(element['image'], -1), y=element['label'])
def preprocess_train_dataset(dataset):
# Use buffer_size same as the maximum client dataset size,
# 418 for Federated EMNIST
return dataset.map(element_fn).shuffle(buffer_size=418).repeat(
count=FLAGS.client_epochs_per_round) # .batch(
# FLAGS.batch_size, drop_remainder=False)
def preprocess_test_dataset(dataset):
return dataset.map(element_fn).batch(
FLAGS.test_batch_size, drop_remainder=False)
cifar100_train = cifar100_train.preprocess(preprocess_train_dataset)
cifar100_test = preprocess_test_dataset(
cifar100_test.create_tf_dataset_from_all_clients())
return cifar100_train, cifar100_test
def server_optimizer_fn():
return tf.keras.optimizers.SGD(learning_rate=FLAGS.server_learning_rate)
def client_optimizer_fn():
return tf.keras.optimizers.Adam(learning_rate=FLAGS.client_learning_rate)
def main(argv):
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
client_devices = tf.config.list_logical_devices('GPU')
print(client_devices)
server_device = tf.config.list_logical_devices('CPU')[0]
tff.backends.native.set_local_execution_context(
server_tf_device=server_device, client_tf_devices=client_devices)
train_data, test_data = get_cifar100_dataset()
def tff_model_fn():
"""Constructs a fully initialized model for use in federated averaging."""
# keras_model = create_original_fedavg_cnn_model(only_digits=False)
keras_model = create_vgg19_model()
# keras_model.summary()
loss = tf.keras.losses.SparseCategoricalCrossentropy()
return simple_fedavg_tf.KerasModelWrapper(keras_model,
test_data.element_spec, loss)
iterative_process = simple_fedavg_tff.build_federated_averaging_process(
tff_model_fn, tff_model_fn, tff_model_fn, tff_model_fn, server_optimizer_fn, client_optimizer_fn)
server_state = iterative_process.initialize()
metric = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
model = tff_model_fn()
for round_num in range(FLAGS.total_rounds):
sampled_clients = np.random.choice( train_data.client_ids, size=FLAGS.train_clients_per_round, replace=False)
sampled_train_data = [ train_data.create_tf_dataset_for_client(client).batch( FLAGS.batch_size, drop_remainder=False)
for client in sampled_clients
]
server_state, train_metrics = iterative_process.next(
server_state, sampled_train_data)
print(f'Round {round_num} training loss: {train_metrics}')
if round_num % FLAGS.rounds_per_eval == 0:
model.from_weights(server_state.model_weights)
accuracy = simple_fedavg_tf.keras_evaluate(model.keras_model, test_data,
metric)
print(f'Round {round_num} validation accuracy: {accuracy * 100.0}')
if __name__ == '__main__':
app.run(main)

Related

using python in ubuntu, training stops with a description: Segmentation fault (core dumped)

I'm studying TTS and when doing training, the training stops with the information Segmentation fault (core dumped)
import os
# Trainer: Where the ✨️ happens.
# TrainingArgs: Defines the set of arguments of the Trainer.
from trainer import Trainer, TrainerArgs
# GlowTTSConfig: all model related values for training, validating and testing.
from TTS.tts.configs.tacotron2_config import Tacotron2Config
# BaseDatasetConfig: defines name, formatter and path of the dataset.
from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.tacotron2 import Tacotron2
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor
from TTS.config.shared_configs import BaseAudioConfig
# we use the same path as this script as our training folder.
output_path = os.path.dirname(os.path.abspath(__file__))
# DEFINE DATASET CONFIG
# Set LJSpeech as our target dataset and define its path.
# You can also use a simple Dict to define the dataset and pass it to your custom formatter.
dataset_config = BaseDatasetConfig(
name="ljspeech", meta_file_train="metadata.csv", path=os.path.join(output_path, "../LJSpeech-1.1/")
)
# INITIALIZE THE TRAINING CONFIGURATION
# Configure the model. Every config class inherits the BaseTTSConfig.
audio_config = BaseAudioConfig(
sample_rate=22050,
do_trim_silence=True,
trim_db=60.0,
signal_norm=False,
mel_fmin=0.0,
mel_fmax=8000,
spec_gain=1.0,
log_func="np.log",
ref_level_db=20,
preemphasis=0.0,
)
config = Tacotron2Config( # This is the config that is saved for the future use
audio=audio_config,
batch_size=4,
eval_batch_size=4,
num_loader_workers=2,
num_eval_loader_workers=2,
run_eval=True,
test_delay_epochs=-1,
r=6,
gradual_training=[[0, 6, 4], [10000, 4, 4], [50000, 3, 4], [100000, 2, 4]],
double_decoder_consistency=True,
epochs=1000,
text_cleaner="phoneme_cleaners",
use_phonemes=True,
phoneme_language="en-us",
phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
print_step=10,
print_eval=True,
mixed_precision=False,
output_path=output_path,
datasets=[dataset_config],
)
# INITIALIZE THE AUDIO PROCESSOR
# Audio processor is used for feature extraction and audio I/O.
# It mainly serves to the dataloader and the training loggers.
ap = AudioProcessor.init_from_config(config)
# INITIALIZE THE TOKENIZER
# Tokenizer is used to convert text to sequences of token IDs.
# If characters are not defined in the config, default characters are passed to the config
tokenizer, config = TTSTokenizer.init_from_config(config)
# LOAD DATA SAMPLES
# Each sample is a list of ```[text, audio_file_path, speaker_name]```
# You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details.
from TTS.tts.datasets import load_tts_samples
# custom formatter implementation
def formatter(root_path, manifest_file, **kwargs): # pylint: disable=unused-argument
"""Assumes each line as ```<filename>|<transcription>```
"""
txt_file = os.path.join(root_path, manifest_file)
items = []
speaker_name = "my_speaker"
with open(txt_file, "r", encoding="utf-8") as ttf:
for line in ttf:
cols = line.split("|")
#wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
wav_file = "/media/DATA-2/TTS/coqui/LJSpeech-1.1/wavs/" + cols[0] + ".wav"
text = cols[1]
items.append({"text":text, "audio_file":wav_file, "speaker_name":speaker_name})
return items
# load training samples
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, formatter=formatter)
# INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input
# Config defines the details of the model like the number of layers, the size of the embedding, etc.
# Speaker manager is used by multi-speaker models.
print("================== train_samples ========================")
print("len data : ", len(train_samples))
print(train_samples)
print("======================== eval_samples ================")
print("len data : ", len(eval_samples))
print(eval_samples)
model = Tacotron2(config, ap, tokenizer, speaker_manager=None)
# INITIALIZE THE TRAINER
# Trainer provides a generic API to train all the 🐸TTS models with all its perks like mixed-precision training,
# distributed training, etc.
trainer = Trainer(
TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
)
# AND... 3,2,1... 🚀
trainer.fit()
training runs until step 647/2162 and stops like this:
--> STEP: 647/2162 -- GLOBAL_STEP: 647
| > decoder_loss: 35.02273 (33.81891)
| > postnet_loss: 37.02569 (35.82565)
| > stopnet_loss: 0.82287 (0.85986)
| > decoder_coarse_loss: 35.01795 (33.80500)
| > decoder_ddc_loss: 0.00264 (0.00408)
| > ga_loss: 0.00451 (0.00664)
| > decoder_diff_spec_loss: 0.42732 (0.43585)
| > postnet_diff_spec_loss: 4.44786 (4.47058)
| > decoder_ssim_loss: 0.99999 (0.99978)
| > postnet_ssim_loss: 0.99983 (0.99947)
| > loss: 29.33145 (28.48289)
| > align_error: 0.98594 (0.97906)
| > grad_norm: 3.32803 (3.73880)
| > current_lr: 0.00000
| > step_time: 0.59430 (0.45785)
| > loader_time: 0.00150 (0.00150)
Segmentation fault (core dumped)
there was an error "CUBLAS_STATUS_EXECUTION_FAILED before" so I checked the pytorch version and I'm using 1.11.0 version
and I reduced the batch size because it was previously out of memory
what sould i do?

changing name of tensorflow log in detectron2 model

I want to change the name of my tensorflow logs (for ex. events.out.tfevents.1649248617.AlienwareArea51R5.51093) to a custom name. The logs are are saved in the cfg.OUTPUT_DIR location but I cannot find where to change the name.. Should I change it somewhere in the cfg settings or is it with setup_logger()?
thank you in advance!
my current function:
'''
from detectron2.utils.logger import setup_logger
setup_logger()
from detectron2.engine.MyTrainer import MyTrainer
from detectron2.config import get_cfg
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("kernel_train_data",)
cfg.DATASETS.TEST = ("kernel_test_data",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-
Detection/faster_rcnn_R_101_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 6000 # 300 iterations seems good enough for this toy dataset; you
will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64 # faster, and good enough for this toy
dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
cfg.TEST.EVAL_PERIOD= 500
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = MyTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()'''

How to make sure my code runs in GPU and not CPU?

I am new to deep learning and tensorflow. I have a following code. Whenever I run this code my system administrator notifies me that my code is running in CPU and not GPU even thought we have GPU in the system and I have only installed tensorflow-gpu. What changes should I make to my code so that it runs in GPU and not CPU?
import math
import tempfile
import numpy as np
from tensorflow.python.keras.layers import BatchNormalization, Conv2D, Dense, Flatten, MaxPooling2D
from tensorflow.python.keras.models import Sequential
import fastestimator as fe
from fastestimator.dataset.data import cifair10
from fastestimator.architecture.tensorflow import WideResidualNetwork
from fastestimator.op.numpyop.meta import Sometimes
from fastestimator.op.numpyop.multivariate import HorizontalFlip, PadIfNeeded, RandomCrop
from fastestimator.op.numpyop.univariate import CoarseDropout, Normalize
from fastestimator.op.tensorop.loss import CrossEntropy, SuperLoss
from fastestimator.op.tensorop.model import ModelOp, UpdateOp
from fastestimator.trace.io import BestModelSaver
from fastestimator.trace.metric import MCC, Accuracy
from fastestimator.trace.xai import LabelTracker
#training parameters
epochs = 100
batch_size = 128
max_train_steps_per_epoch = None
max_eval_steps_per_epoch = None
save_dir = tempfile.mkdtemp()
train_data, eval_data = cifair10.load_data()
test_data = eval_data.split(0.5)
def corrupt_dataset(dataset, n_classes=10, corruption_fraction=0.4):
# Keep track of which samples were corrupted for visualization later
corrupted = [0 for _ in range(len(dataset))]
# Perform the actual label corruption
n_samples_per_class = len(dataset) // n_classes # dataset size 50000
# n_classes - 100
# n_samples_per_class - 500
n_to_corrupt_per_class = math.floor(corruption_fraction * n_samples_per_class) # 200
n_corrupted = [0] * n_classes
i = 0
while any([elem < n_to_corrupt_per_class for elem in n_corrupted]): # while any class is left to be corrupted
current_class = dataset[i]['y'].item()
if n_corrupted[current_class] < n_to_corrupt_per_class: #check the number of corrupted data of a particular class has reached 200 or not
dataset[i]['y'] = (dataset[i]['y'] + np.random.randint(1, n_classes)) % n_classes # change the y value of a dataset
n_corrupted[current_class] += 1
corrupted[i] = 1
i += 1
# Put the corruption labels into the dataset for visualization
dataset['data_labels'] = np.array(corrupted, dtype=np.int).reshape((len(dataset), 1))
corrupt_dataset(train_data)
def get_wrn():
return WideResidualNetwork((32, 32, 3))
def build_estimator(loss_op):
pipeline = fe.Pipeline(train_data=train_data,
eval_data=eval_data,
test_data=test_data,
batch_size=batch_size,
ops=[Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)),
PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"),
RandomCrop(32, 32, image_in="x", image_out="x", mode="train"),
Sometimes(HorizontalFlip(image_in="x", image_out="x", mode="train")),
CoarseDropout(inputs="x", outputs="x", max_holes=1, mode="train"),
])
model = fe.build(model_fn=get_wrn, optimizer_fn='adam')
network = fe.Network(ops=[
ModelOp(model=model, inputs="x", outputs="y_pred"),
loss_op, # <<<----------------------------- This is where the secret sauce will go
UpdateOp(model=model, loss_name="ce")
])
traces = [
Accuracy(true_key="y", pred_key="y_pred"),
MCC(true_key="y", pred_key="y_pred"),
BestModelSaver(model=model, save_dir=save_dir, metric="mcc", save_best_mode="max", load_best_final=True),
# We will also visualize the difference between the normal and corrupted image confidence scores. You could follow this with an
# ImageViewer trace, but we will get the data out of the system summary instead later for viewing.
LabelTracker(metric="confidence", label="data_labels", label_mapping={"Normal": 0, "Corrupted": 1}, mode="train", outputs="label_confidence"),
]
estimator = fe.Estimator(pipeline=pipeline,
network=network,
epochs=epochs,
traces=traces,
train_steps_per_epoch=max_train_steps_per_epoch,
eval_steps_per_epoch=max_eval_steps_per_epoch,
log_steps=300)
return estimator
loss = SuperLoss(CrossEntropy(inputs=("y_pred", "y"), outputs="ce"), output_confidence="confidence") # The output_confidence arg is only needed if you want to visualize
estimator_super = build_estimator(loss)
superL = estimator_super.fit("SuperLoss")
print("before test")
summary = estimator_super.test()
print("after test")
print(summary.history["test"])

How to do Topic Detection in Unsupervised Aspect Based Sentiment Analysis

I want to make an ABSA using Python where the sentiment of pre-defined aspects (e.g. delivery, quality, service) is analyzed from online reviews. I want to do it unsupervised because this will save me from manually labeling reviews and I can analyze a lot more review data (looking at around 100k reviews). Therefore, my datasets consists of only reviews and no ratings. I would like to have a model that can first detect the aspect category and then assign the sentiment polarity. E.g. when the review says "The shipment went smoothly, but the product is broken" I want the model to assign the word "shipment" to the aspect category "delivery" and "smoothly" relates to a positive sentiment.
I have searched for approaches to take and I would like to know if anyone has experience with this and could guide me into a direction that could help me. It will be highly appreciated!

Aspect Based Sentiment Analysis (ABSA), where the task is first to
extract aspects or features of an entity (i.e. Aspect Term Extraction
or ATE1 ) from a given text, and second to determine the sentiment
polarity (SP), if any, towards each aspect of that entity. The
importance of ABSA led to the creation of the ABSA task
B-LSTM & CRF classifier will be used for feature extraction and aspect
term detection for both supervised and unsupervised ATE.
https://www.researchgate.net/profile/Andreea_Hossmann/publication/319875533_Unsupervised_Aspect_Term_Extraction_with_B-LSTM_and_CRF_using_Automatically_Labelled_Datasets/links/5a3436a70f7e9b10d842b0eb/Unsupervised-Aspect-Term-Extraction-with-B-LSTM-and-CRF-using-Automatically-Labelled-Datasets.pdf
https://github.com/songyouwei/ABSA-PyTorch/blob/master/infer_example.py
# -*- coding: utf-8 -*-
# file: infer.py
# author: songyouwei <youwei0314#gmail.com>
# Copyright (C) 2019. All Rights Reserved.
import torch
import torch.nn.functional as F
import argparse
from data_utils import build_tokenizer, build_embedding_matrix
from models import IAN, MemNet, ATAE_LSTM, AOA
class Inferer:
"""A simple inference example"""
def __init__(self, opt):
self.opt = opt
self.tokenizer = build_tokenizer(
fnames=[opt.dataset_file['train'], opt.dataset_file['test']],
max_seq_len=opt.max_seq_len,
dat_fname='{0}_tokenizer.dat'.format(opt.dataset))
embedding_matrix = build_embedding_matrix(
word2idx=self.tokenizer.word2idx,
embed_dim=opt.embed_dim,
dat_fname='{0}_{1}_embedding_matrix.dat'.format(str(opt.embed_dim), opt.dataset))
self.model = opt.model_class(embedding_matrix, opt)
print('loading model {0} ...'.format(opt.model_name))
self.model.load_state_dict(torch.load(opt.state_dict_path))
self.model = self.model.to(opt.device)
# switch model to evaluation mode
self.model.eval()
torch.autograd.set_grad_enabled(False)
def evaluate(self, raw_texts):
context_seqs = [self.tokenizer.text_to_sequence(raw_text.lower().strip()) for raw_text in raw_texts]
aspect_seqs = [self.tokenizer.text_to_sequence('null')] * len(raw_texts)
context_indices = torch.tensor(context_seqs, dtype=torch.int64).to(self.opt.device)
aspect_indices = torch.tensor(aspect_seqs, dtype=torch.int64).to(self.opt.device)
t_inputs = [context_indices, aspect_indices]
t_outputs = self.model(t_inputs)
t_probs = F.softmax(t_outputs, dim=-1).cpu().numpy()
return t_probs
if __name__ == '__main__':
model_classes = {
'atae_lstm': ATAE_LSTM,
'ian': IAN,
'memnet': MemNet,
'aoa': AOA,
}
# set your trained models here
model_state_dict_paths = {
'atae_lstm': 'state_dict/atae_lstm_restaurant_acc0.7786',
'ian': 'state_dict/ian_restaurant_acc0.7911',
'memnet': 'state_dict/memnet_restaurant_acc0.7911',
'aoa': 'state_dict/aoa_restaurant_acc0.8063',
}
class Option(object): pass
opt = Option()
opt.model_name = 'ian'
opt.model_class = model_classes[opt.model_name]
opt.dataset = 'restaurant'
opt.dataset_file = {
'train': './datasets/semeval14/Restaurants_Train.xml.seg',
'test': './datasets/semeval14/Restaurants_Test_Gold.xml.seg'
}
opt.state_dict_path = model_state_dict_paths[opt.model_name]
opt.embed_dim = 300
opt.hidden_dim = 300
opt.max_seq_len = 80
opt.polarities_dim = 3
opt.hops = 3
opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
inf = Inferer(opt)
t_probs = inf.evaluate(['happy memory', 'the service is terrible', 'just normal food'])
print(t_probs.argmax(axis=-1) - 1)

Tensorflow feed forward network session doesn't stop

I am trying to build a simple feed forward neural network using TensorFlow and its tfr formatting. I have been using TensorFlow's tutorials and examples as a reference:
https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/how_tos/reading_data
Given "food" float values, I want to predict the "happiness" float value that it produces.
food_test.json is a a JSON file that contains the "food" value and its associated "happiness" value. This is the format the data is stored in.
food_to_record.py is based off of tensorflow's convert_to_records.py. It reads in the food_test.json and converts it to a food_record.tfr file.
food_reader.py is based off of tensorflow's fully_connected_reader.py. It reads in the food_record.tfr file and runs the data through a neural network.
I run the program in this order:
1. food_to_record.py
2. food_reader.py
When food_reader.py is run, it starts a TensorFlow session, but the session never terminates, does anyone know the reason for this?
food_test.json:
[
{
"food": 1.0,
"happiness": 2.0
},
{
"food": 1.4,
"happiness": 5.4
}
]
food_to_record.py:
#based off of tensorflow's convert_to_records.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import sys
import json
import tensorflow as tf
FLAGS = None
#feature for integers
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
#feature for floats
def _float_feature(value):
return tf.train.Feature(float_list = tf.train.FloatList(value= [value]))
#feature for strings and others
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def main(unused_argv):
print("food_to_record:main")
script_dir = os.path.dirname(__file__)
file_path = os.path.join(script_dir, 'food_test.json')
with open(file_path) as data_file:
data = json.load(data_file)
print(data)
num_examples = 2
name = 'food_record'
filename = os.path.join(FLAGS.directory, name + '.tfrecords')
print('Writing', filename)
writer = tf.python_io.TFRecordWriter(filename)
for index in range(num_examples):
example = tf.train.Example(features=tf.train.Features(feature={
'food': _float_feature(data[index]['food']),
'happiness': _float_feature(data[index]['happiness'])
}))
writer.write(example.SerializeToString())
writer.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--directory',
type=str,
default='.',
help='Directory to download data files and write the converted result'
)
parser.add_argument(
'--validation_size',
type=int,
default=5000,
help="""\
Number of examples to separate from the training data for the validation
set.\
"""
)
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
food_reader.py:
#based off of tensorflow's fully_connected_reader
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os.path
import sys
import time
import tensorflow as tf
# Basic model parameters as external flags.
FLAGS = None
# Constants used for dealing with the files
TRAIN_FILE = 'food_record.tfrecords'
# For simple testing purposes, use training file for validation
VALIDATION_FILE = 'food_record.tfrecords'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'food': tf.FixedLenFeature([], tf.float32),
'happiness': tf.FixedLenFeature([], tf.float32)
})
food = tf.cast(features['food'], tf.float32)
happiness = tf.cast(features['happiness'], tf.float32)
food = tf.expand_dims(food, -1)
print("food shape: ", tf.shape(food))
print("happiness shape: ", tf.shape(happiness))
return food, happiness
def inputs(train, batch_size, num_epochs):
"""Reads input data num_epochs times.
Args:
train: Selects between the training (True) and validation (False) data.
batch_size: Number of examples per returned batch.
num_epochs: Number of times to read the input data, or 0/None to
train forever.
Returns:
A tuple (images, labels), where:
* images is a float tensor with shape [batch_size, mnist.IMAGE_PIXELS]
in the range [-0.5, 0.5].
* labels is an int32 tensor with shape [batch_size] with the true label,
a number in the range [0, mnist.NUM_CLASSES).
Note that an tf.train.QueueRunner is added to the graph, which
must be run using e.g. tf.train.start_queue_runners().
"""
if not num_epochs: num_epochs = None
filename = os.path.join(FLAGS.train_dir,
TRAIN_FILE if train else VALIDATION_FILE)
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer(
[filename], num_epochs=num_epochs)
# Even when reading in multiple threads, share the filename
# queue.
food, happiness = read_and_decode(filename_queue)
# Shuffle the examples and collect them into batch_size batches.
# (Internally uses a RandomShuffleQueue.)
# We run this in two threads to avoid being a bottleneck.
foods, happinesses= tf.train.shuffle_batch(
[food, happiness], batch_size=batch_size, num_threads=2,
capacity=1000 + 3 * batch_size,
# Ensures a minimum amount of shuffling of examples.
min_after_dequeue=1000)
return foods, happinesses
def main(_):
with tf.Graph().as_default():
# Input images and labels.
foods, happinesses = inputs(train=True, batch_size=FLAGS.batch_size,
num_epochs=FLAGS.num_epochs)
HIDDEN_UNITS = 4
INPUTS = 1
OUTPUTS = 1
weights_1 = tf.Variable(tf.truncated_normal([INPUTS, HIDDEN_UNITS]))
biases_1 = tf.Variable(tf.zeros([HIDDEN_UNITS]))
layer_1_outputs = tf.nn.sigmoid(tf.matmul(foods, weights_1) + biases_1)
weights_2 = tf.Variable(tf.truncated_normal([HIDDEN_UNITS, OUTPUTS]))
biases_2 = tf.Variable(tf.zeros([OUTPUTS]))
logits = tf.nn.sigmoid(tf.matmul(layer_1_outputs, weights_2) + biases_2)
#loss = tf.reduce_mean(logits)
labels = tf.to_int64(happinesses)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits, name='xentropy')
#loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
loss = tf.reduce_sum(tf.sub(logits, happinesses))
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
sess = tf.Session()
sess.run(init_op)
print('staring iteration', 0)
_, loss = sess.run([train_op, loss])
print(loss)
sess.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--learning_rate',
type=float,
default=0.01,
help='Initial learning rate.'
)
parser.add_argument(
'--num_epochs',
type=int,
default=2,
help='Number of epochs to run trainer.'
)
parser.add_argument(
'--hidden1',
type=int,
default=128,
help='Number of units in hidden layer 1.'
)
parser.add_argument(
'--hidden2',
type=int,
default=32,
help='Number of units in hidden layer 2.'
)
parser.add_argument(
'--batch_size',
type=int,
default=100,
help='Batch size.'
)
parser.add_argument(
'--train_dir',
type=str,
default='.',
help='Directory with the training data.'
)
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

You must call tf.train.start_queue_runners to populate the queue before you call run or eval to execute reading files. Otherwise, reading will block while it waits for filenames from the queue. Pleases check run_training method from the original example, or tensorflow's documentation about how_tos/reading_data.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Using CIFAR-100 datased with VGG19 model in simple_fedavg example - python

Related

using python in ubuntu, training stops with a description: Segmentation fault (core dumped)

changing name of tensorflow log in detectron2 model

How to make sure my code runs in GPU and not CPU?

How to do Topic Detection in Unsupervised Aspect Based Sentiment Analysis

Tensorflow feed forward network session doesn't stop

Categories

Resources