changing name of tensorflow log in detectron2 model - python

I want to change the name of my tensorflow logs (for ex. events.out.tfevents.1649248617.AlienwareArea51R5.51093) to a custom name. The logs are are saved in the cfg.OUTPUT_DIR location but I cannot find where to change the name.. Should I change it somewhere in the cfg settings or is it with setup_logger()?
thank you in advance!
my current function:
'''
from detectron2.utils.logger import setup_logger
setup_logger()
from detectron2.engine.MyTrainer import MyTrainer
from detectron2.config import get_cfg
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("kernel_train_data",)
cfg.DATASETS.TEST = ("kernel_test_data",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-
Detection/faster_rcnn_R_101_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 6000 # 300 iterations seems good enough for this toy dataset; you
will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64 # faster, and good enough for this toy
dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
cfg.TEST.EVAL_PERIOD= 500
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = MyTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()'''

Related

Pytorch model weights change when put on GPU

I noticed a very strange behaviour regarding the 3D Resnet by Facebookresearch. Using their sample code from the website, I receive different results, when putting the model on GPU. While on cpu the correct class (archery) is predicted, the model fails to predict it on GPU. Can anyone replicate this and confirm that this is indeed the case? Does anyone know, why this is happening and how to prevent it? Following, you will find some code to quickly test it out:
import torch
import json
import urllib
from pytorchvideo.data.encoded_video import EncodedVideo
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
CenterCropVideo,
NormalizeVideo,
)
from pytorchvideo.transforms import (
ApplyTransformToKey,
ShortSideScale,
UniformTemporalSubsample
)
def predict_archery(model, device):
json_url = "https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json"
json_filename = "kinetics_classnames.json"
try:
urllib.URLopener().retrieve(json_url, json_filename)
except:
urllib.request.urlretrieve(json_url, json_filename)
with open(json_filename, "r") as f:
kinetics_classnames = json.load(f)
# Create an id to label name mapping
kinetics_id_to_classname = {}
for k, v in kinetics_classnames.items():
kinetics_id_to_classname[v] = str(k).replace('"', "")
side_size = 256
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
crop_size = 256
num_frames = 8
sampling_rate = 8
frames_per_second = 30
# Note that this transform is specific to the slow_R50 model.
transform = ApplyTransformToKey(
key="video",
transform=Compose(
[
UniformTemporalSubsample(num_frames),
Lambda(lambda x: x / 255.0),
NormalizeVideo(mean, std),
ShortSideScale(
size=side_size
),
CenterCropVideo(crop_size=(crop_size, crop_size))
]
),
)
# The duration of the input clip is also specific to the model.
clip_duration = (num_frames * sampling_rate) / frames_per_second
url_link = "https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4"
video_path = 'archery.mp4'
try:
urllib.URLopener().retrieve(url_link, video_path)
except:
urllib.request.urlretrieve(url_link, video_path)
# Select the duration of the clip to load by specifying the start and end duration
# The start_sec should correspond to where the action occurs in the video
start_sec = 0
end_sec = start_sec + clip_duration
# Initialize an EncodedVideo helper class and load the video
video = EncodedVideo.from_path(video_path)
# Load the desired clip
video_data = video.get_clip(start_sec=start_sec, end_sec=end_sec)
# Apply a transform to normalize the video input
video_data = transform(video_data)
# Move the inputs to the desired device
inputs = video_data["video"]
inputs = inputs.to(device)
# Pass the input clip through the model
preds = model(inputs[None, ...])
# Get the predicted classes
post_act = torch.nn.Softmax(dim=1)
preds = post_act(preds)
pred_classes = preds.topk(k=5).indices[0]
# Map the predicted classes to the label names
pred_class_names = [kinetics_id_to_classname[int(i)] for i in pred_classes]
print("Top 5 predicted labels: %s" % ", ".join(pred_class_names))
if __name__ == '__main__':
# Choose device
# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device = torch.device("cpu")
# Choose the `slow_r50` model
model = torch.hub.load('facebookresearch/pytorchvideo', 'slow_r50', pretrained=True).to(device)
model = model.eval()
predict_archery(model, device)
Results on cpu:
Top 5 predicted labels: archery, throwing axe, playing paintball,
stretching arm, riding or walking with horse
Results on GPU:
Top 5 predicted labels: flying kite, air drumming, beatboxing,
smoking, reading book
Edit:
Apparently, this issue cannot be reproduced on google colab. I therefore assume that the issue is related to the specific hardware / cuda version. I am using a NVIDIA TITAN Xp and cuda version 11.4.

using python in ubuntu, training stops with a description: Segmentation fault (core dumped)

I'm studying TTS and when doing training, the training stops with the information Segmentation fault (core dumped)
import os
# Trainer: Where the ✨️ happens.
# TrainingArgs: Defines the set of arguments of the Trainer.
from trainer import Trainer, TrainerArgs
# GlowTTSConfig: all model related values for training, validating and testing.
from TTS.tts.configs.tacotron2_config import Tacotron2Config
# BaseDatasetConfig: defines name, formatter and path of the dataset.
from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.tacotron2 import Tacotron2
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor
from TTS.config.shared_configs import BaseAudioConfig
# we use the same path as this script as our training folder.
output_path = os.path.dirname(os.path.abspath(__file__))
# DEFINE DATASET CONFIG
# Set LJSpeech as our target dataset and define its path.
# You can also use a simple Dict to define the dataset and pass it to your custom formatter.
dataset_config = BaseDatasetConfig(
name="ljspeech", meta_file_train="metadata.csv", path=os.path.join(output_path, "../LJSpeech-1.1/")
)
# INITIALIZE THE TRAINING CONFIGURATION
# Configure the model. Every config class inherits the BaseTTSConfig.
audio_config = BaseAudioConfig(
sample_rate=22050,
do_trim_silence=True,
trim_db=60.0,
signal_norm=False,
mel_fmin=0.0,
mel_fmax=8000,
spec_gain=1.0,
log_func="np.log",
ref_level_db=20,
preemphasis=0.0,
)
config = Tacotron2Config( # This is the config that is saved for the future use
audio=audio_config,
batch_size=4,
eval_batch_size=4,
num_loader_workers=2,
num_eval_loader_workers=2,
run_eval=True,
test_delay_epochs=-1,
r=6,
gradual_training=[[0, 6, 4], [10000, 4, 4], [50000, 3, 4], [100000, 2, 4]],
double_decoder_consistency=True,
epochs=1000,
text_cleaner="phoneme_cleaners",
use_phonemes=True,
phoneme_language="en-us",
phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
print_step=10,
print_eval=True,
mixed_precision=False,
output_path=output_path,
datasets=[dataset_config],
)
# INITIALIZE THE AUDIO PROCESSOR
# Audio processor is used for feature extraction and audio I/O.
# It mainly serves to the dataloader and the training loggers.
ap = AudioProcessor.init_from_config(config)
# INITIALIZE THE TOKENIZER
# Tokenizer is used to convert text to sequences of token IDs.
# If characters are not defined in the config, default characters are passed to the config
tokenizer, config = TTSTokenizer.init_from_config(config)
# LOAD DATA SAMPLES
# Each sample is a list of ```[text, audio_file_path, speaker_name]```
# You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details.
from TTS.tts.datasets import load_tts_samples
# custom formatter implementation
def formatter(root_path, manifest_file, **kwargs): # pylint: disable=unused-argument
"""Assumes each line as ```<filename>|<transcription>```
"""
txt_file = os.path.join(root_path, manifest_file)
items = []
speaker_name = "my_speaker"
with open(txt_file, "r", encoding="utf-8") as ttf:
for line in ttf:
cols = line.split("|")
#wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
wav_file = "/media/DATA-2/TTS/coqui/LJSpeech-1.1/wavs/" + cols[0] + ".wav"
text = cols[1]
items.append({"text":text, "audio_file":wav_file, "speaker_name":speaker_name})
return items
# load training samples
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, formatter=formatter)
# INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input
# Config defines the details of the model like the number of layers, the size of the embedding, etc.
# Speaker manager is used by multi-speaker models.
print("================== train_samples ========================")
print("len data : ", len(train_samples))
print(train_samples)
print("======================== eval_samples ================")
print("len data : ", len(eval_samples))
print(eval_samples)
model = Tacotron2(config, ap, tokenizer, speaker_manager=None)
# INITIALIZE THE TRAINER
# Trainer provides a generic API to train all the 🐸TTS models with all its perks like mixed-precision training,
# distributed training, etc.
trainer = Trainer(
TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
)
# AND... 3,2,1... 🚀
trainer.fit()
training runs until step 647/2162 and stops like this:
--> STEP: 647/2162 -- GLOBAL_STEP: 647
| > decoder_loss: 35.02273 (33.81891)
| > postnet_loss: 37.02569 (35.82565)
| > stopnet_loss: 0.82287 (0.85986)
| > decoder_coarse_loss: 35.01795 (33.80500)
| > decoder_ddc_loss: 0.00264 (0.00408)
| > ga_loss: 0.00451 (0.00664)
| > decoder_diff_spec_loss: 0.42732 (0.43585)
| > postnet_diff_spec_loss: 4.44786 (4.47058)
| > decoder_ssim_loss: 0.99999 (0.99978)
| > postnet_ssim_loss: 0.99983 (0.99947)
| > loss: 29.33145 (28.48289)
| > align_error: 0.98594 (0.97906)
| > grad_norm: 3.32803 (3.73880)
| > current_lr: 0.00000
| > step_time: 0.59430 (0.45785)
| > loader_time: 0.00150 (0.00150)
Segmentation fault (core dumped)
there was an error "CUBLAS_STATUS_EXECUTION_FAILED before" so I checked the pytorch version and I'm using 1.11.0 version
and I reduced the batch size because it was previously out of memory
what sould i do?

How to make sure my code runs in GPU and not CPU?

I am new to deep learning and tensorflow. I have a following code. Whenever I run this code my system administrator notifies me that my code is running in CPU and not GPU even thought we have GPU in the system and I have only installed tensorflow-gpu. What changes should I make to my code so that it runs in GPU and not CPU?
import math
import tempfile
import numpy as np
from tensorflow.python.keras.layers import BatchNormalization, Conv2D, Dense, Flatten, MaxPooling2D
from tensorflow.python.keras.models import Sequential
import fastestimator as fe
from fastestimator.dataset.data import cifair10
from fastestimator.architecture.tensorflow import WideResidualNetwork
from fastestimator.op.numpyop.meta import Sometimes
from fastestimator.op.numpyop.multivariate import HorizontalFlip, PadIfNeeded, RandomCrop
from fastestimator.op.numpyop.univariate import CoarseDropout, Normalize
from fastestimator.op.tensorop.loss import CrossEntropy, SuperLoss
from fastestimator.op.tensorop.model import ModelOp, UpdateOp
from fastestimator.trace.io import BestModelSaver
from fastestimator.trace.metric import MCC, Accuracy
from fastestimator.trace.xai import LabelTracker
#training parameters
epochs = 100
batch_size = 128
max_train_steps_per_epoch = None
max_eval_steps_per_epoch = None
save_dir = tempfile.mkdtemp()
train_data, eval_data = cifair10.load_data()
test_data = eval_data.split(0.5)
def corrupt_dataset(dataset, n_classes=10, corruption_fraction=0.4):
# Keep track of which samples were corrupted for visualization later
corrupted = [0 for _ in range(len(dataset))]
# Perform the actual label corruption
n_samples_per_class = len(dataset) // n_classes # dataset size 50000
# n_classes - 100
# n_samples_per_class - 500
n_to_corrupt_per_class = math.floor(corruption_fraction * n_samples_per_class) # 200
n_corrupted = [0] * n_classes
i = 0
while any([elem < n_to_corrupt_per_class for elem in n_corrupted]): # while any class is left to be corrupted
current_class = dataset[i]['y'].item()
if n_corrupted[current_class] < n_to_corrupt_per_class: #check the number of corrupted data of a particular class has reached 200 or not
dataset[i]['y'] = (dataset[i]['y'] + np.random.randint(1, n_classes)) % n_classes # change the y value of a dataset
n_corrupted[current_class] += 1
corrupted[i] = 1
i += 1
# Put the corruption labels into the dataset for visualization
dataset['data_labels'] = np.array(corrupted, dtype=np.int).reshape((len(dataset), 1))
corrupt_dataset(train_data)
def get_wrn():
return WideResidualNetwork((32, 32, 3))
def build_estimator(loss_op):
pipeline = fe.Pipeline(train_data=train_data,
eval_data=eval_data,
test_data=test_data,
batch_size=batch_size,
ops=[Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)),
PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"),
RandomCrop(32, 32, image_in="x", image_out="x", mode="train"),
Sometimes(HorizontalFlip(image_in="x", image_out="x", mode="train")),
CoarseDropout(inputs="x", outputs="x", max_holes=1, mode="train"),
])
model = fe.build(model_fn=get_wrn, optimizer_fn='adam')
network = fe.Network(ops=[
ModelOp(model=model, inputs="x", outputs="y_pred"),
loss_op, # <<<----------------------------- This is where the secret sauce will go
UpdateOp(model=model, loss_name="ce")
])
traces = [
Accuracy(true_key="y", pred_key="y_pred"),
MCC(true_key="y", pred_key="y_pred"),
BestModelSaver(model=model, save_dir=save_dir, metric="mcc", save_best_mode="max", load_best_final=True),
# We will also visualize the difference between the normal and corrupted image confidence scores. You could follow this with an
# ImageViewer trace, but we will get the data out of the system summary instead later for viewing.
LabelTracker(metric="confidence", label="data_labels", label_mapping={"Normal": 0, "Corrupted": 1}, mode="train", outputs="label_confidence"),
]
estimator = fe.Estimator(pipeline=pipeline,
network=network,
epochs=epochs,
traces=traces,
train_steps_per_epoch=max_train_steps_per_epoch,
eval_steps_per_epoch=max_eval_steps_per_epoch,
log_steps=300)
return estimator
loss = SuperLoss(CrossEntropy(inputs=("y_pred", "y"), outputs="ce"), output_confidence="confidence") # The output_confidence arg is only needed if you want to visualize
estimator_super = build_estimator(loss)
superL = estimator_super.fit("SuperLoss")
print("before test")
summary = estimator_super.test()
print("after test")
print(summary.history["test"])

spaCy v3 train NER based on existing model or add custom trained NER to existing model

in spaCy < 3.0 I was able to train the NER component within the trained en_core_web_sm model:
python -m spacy train en model training validation --base-model en_core_web_sm --pipeline "ner" -R -n 10
Specifically, I need the tagger and in the parser of the en_core_web_sm model.
spaCy's new version doesn't take these commands anymore, they need to be set in the config file. According to spaCy's website these components can be added with the corresponding source and then insert to the frozen_component in the training section of the config file (I will provide my full config at the end of this question):
[components]
[components.tagger]
source = "en_core_web_sm"
replace_listeners = ["model.tok2vec"]
[components.parser]
source = "en_core_web_sm"
replace_listeners = ["model.tok2vec"]
.
.
.
[training]
frozen_components = ["tagger","parser"]
When I'm debugging, following error occurs:
ValueError: [E922] Component 'tagger' has been initialized with an output dimension of 49 - cannot add any more labels.
When I put tagger to the disabled components in the nlp section of the config file or if I delete everything related to the tagger, debugging and training works. However, when applying the trained model to a text loaded to a doc, only the trained NER works and none of the other components. E.g. the parser predicts everything is ROOT.
I also tried to train the NER model on its own and then add it to the loaded en_core_web_sm model:
MODEL_PATH = 'data/model/model-best'
nlp = spacy.load(MODEL_PATH)
english_nlp = spacy.load("en_core_web_sm")
ner_labels = nlp.get_pipe("ner")
english_nlp.add_pipe('ner_labels')
This leads to the following error:
ValueError: [E002] Can't find factory for 'ner_labels' for language English (en). This usually happens when spaCy calls `nlp.create_pipe` with a custom component name that's not registered on the current language class. If you're using a Transformer, make sure to install 'spacy-transformers'. If you're using a custom component, make sure you've added the decorator `#Language.component` (for function components) or `#Language.factory` (for class components).
Available factories: attribute_ruler, tok2vec, merge_noun_chunks, merge_entities, merge_subtokens, token_splitter, parser, beam_parser, entity_linker, ner, beam_ner, entity_ruler, lemmatizer, tagger, morphologizer, senter, sentencizer, textcat, textcat_multilabel, en.lemmatizer
Does anyone have a suggestion how I can either train my NER with the en_core_web_sm model or how I could integrate my trained component?
Here's the full config file:
[paths]
train = "training"
dev = "validation"
vectors = null
init_tok2vec = null
[system]
gpu_allocator = null
seed = 0
[nlp]
lang = "en"
pipeline = ["tok2vec","tagger","parser","ner"]
batch_size = 1000
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
tokenizer = {"#tokenizers":"spacy.Tokenizer.v1"}
[components]
[components.tagger]
source = "en_core_web_sm"
replace_listeners = ["model.tok2vec"]
[components.parser]
source = "en_core_web_sm"
replace_listeners = ["model.tok2vec"]
[components.ner]
factory = "ner"
moves = null
update_with_oracle_cut_size = 100
[components.ner.model]
#architectures = "spacy.TransitionBasedParser.v2"
state_type = "ner"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = true
nO = null
[components.ner.model.tok2vec]
#architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
upstream = "*"
[components.tok2vec]
factory = "tok2vec"
[components.tok2vec.model]
#architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
#architectures = "spacy.MultiHashEmbed.v1"
width = ${components.tok2vec.model.encode.width}
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
rows = [5000,2500,2500,2500]
include_static_vectors = false
[components.tok2vec.model.encode]
#architectures = "spacy.MaxoutWindowEncoder.v2"
width = 256
depth = 8
window_size = 1
maxout_pieces = 3
[corpora]
[corpora.dev]
#readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[corpora.train]
#readers = "spacy.Corpus.v1"
path = ${paths.train}
max_length = 2000
gold_preproc = false
limit = 0
augmenter = null
[training]
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
dropout = 0.1
accumulate_gradient = 1
patience = 1600
max_epochs = 0
max_steps = 20000
eval_frequency = 200
frozen_components = ["tagger","parser"]
before_to_disk = null
[training.batcher]
#batchers = "spacy.batch_by_words.v1"
discard_oversize = false
tolerance = 0.2
get_length = null
[training.batcher.size]
#schedules = "compounding.v1"
start = 100
stop = 1000
compound = 1.001
t = 0.0
[training.logger]
#loggers = "spacy.ConsoleLogger.v1"
progress_bar = false
[training.optimizer]
#optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = false
eps = 0.00000001
learn_rate = 0.001
[training.score_weights]
ents_per_type = null
ents_f = 1.0
ents_p = 0.0
ents_r = 0.0
[pretraining]
[initialize]
vectors = "en_core_web_lg"
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null
[initialize.components]
[initialize.tokenizer]
I provided a longer answer on spaCy's discussion forum here, but in a nutshell if you want to source and freeze your parser/tagger, use this in the config:
[components.tagger]
source = "en_core_web_sm"
replace_listeners = ["model.tok2vec"]
[components.parser]
source = "en_core_web_sm"
replace_listeners = ["model.tok2vec"]
[components.tok2vec]
source = "en_core_web_sm"
i.e. make sure that the tagger & parser can connect to the correct tok2vec instance they were initially trained on.
You can then create an independent NER component either on top of the sourced (and pretrained) tok2vec, or create a new internal tok2vec component for the NER, or create a second tok2vec component with a distinct name, that you refer to as the upstream argument of the NER's Tok2VecListener.

How to do Topic Detection in Unsupervised Aspect Based Sentiment Analysis

I want to make an ABSA using Python where the sentiment of pre-defined aspects (e.g. delivery, quality, service) is analyzed from online reviews. I want to do it unsupervised because this will save me from manually labeling reviews and I can analyze a lot more review data (looking at around 100k reviews). Therefore, my datasets consists of only reviews and no ratings. I would like to have a model that can first detect the aspect category and then assign the sentiment polarity. E.g. when the review says "The shipment went smoothly, but the product is broken" I want the model to assign the word "shipment" to the aspect category "delivery" and "smoothly" relates to a positive sentiment.
I have searched for approaches to take and I would like to know if anyone has experience with this and could guide me into a direction that could help me. It will be highly appreciated!
Aspect Based Sentiment Analysis (ABSA), where the task is first to
extract aspects or features of an entity (i.e. Aspect Term Extraction
or ATE1 ) from a given text, and second to determine the sentiment
polarity (SP), if any, towards each aspect of that entity. The
importance of ABSA led to the creation of the ABSA task
B-LSTM & CRF classifier will be used for feature extraction and aspect
term detection for both supervised and unsupervised ATE.
https://www.researchgate.net/profile/Andreea_Hossmann/publication/319875533_Unsupervised_Aspect_Term_Extraction_with_B-LSTM_and_CRF_using_Automatically_Labelled_Datasets/links/5a3436a70f7e9b10d842b0eb/Unsupervised-Aspect-Term-Extraction-with-B-LSTM-and-CRF-using-Automatically-Labelled-Datasets.pdf
https://github.com/songyouwei/ABSA-PyTorch/blob/master/infer_example.py
# -*- coding: utf-8 -*-
# file: infer.py
# author: songyouwei <youwei0314#gmail.com>
# Copyright (C) 2019. All Rights Reserved.
import torch
import torch.nn.functional as F
import argparse
from data_utils import build_tokenizer, build_embedding_matrix
from models import IAN, MemNet, ATAE_LSTM, AOA
class Inferer:
"""A simple inference example"""
def __init__(self, opt):
self.opt = opt
self.tokenizer = build_tokenizer(
fnames=[opt.dataset_file['train'], opt.dataset_file['test']],
max_seq_len=opt.max_seq_len,
dat_fname='{0}_tokenizer.dat'.format(opt.dataset))
embedding_matrix = build_embedding_matrix(
word2idx=self.tokenizer.word2idx,
embed_dim=opt.embed_dim,
dat_fname='{0}_{1}_embedding_matrix.dat'.format(str(opt.embed_dim), opt.dataset))
self.model = opt.model_class(embedding_matrix, opt)
print('loading model {0} ...'.format(opt.model_name))
self.model.load_state_dict(torch.load(opt.state_dict_path))
self.model = self.model.to(opt.device)
# switch model to evaluation mode
self.model.eval()
torch.autograd.set_grad_enabled(False)
def evaluate(self, raw_texts):
context_seqs = [self.tokenizer.text_to_sequence(raw_text.lower().strip()) for raw_text in raw_texts]
aspect_seqs = [self.tokenizer.text_to_sequence('null')] * len(raw_texts)
context_indices = torch.tensor(context_seqs, dtype=torch.int64).to(self.opt.device)
aspect_indices = torch.tensor(aspect_seqs, dtype=torch.int64).to(self.opt.device)
t_inputs = [context_indices, aspect_indices]
t_outputs = self.model(t_inputs)
t_probs = F.softmax(t_outputs, dim=-1).cpu().numpy()
return t_probs
if __name__ == '__main__':
model_classes = {
'atae_lstm': ATAE_LSTM,
'ian': IAN,
'memnet': MemNet,
'aoa': AOA,
}
# set your trained models here
model_state_dict_paths = {
'atae_lstm': 'state_dict/atae_lstm_restaurant_acc0.7786',
'ian': 'state_dict/ian_restaurant_acc0.7911',
'memnet': 'state_dict/memnet_restaurant_acc0.7911',
'aoa': 'state_dict/aoa_restaurant_acc0.8063',
}
class Option(object): pass
opt = Option()
opt.model_name = 'ian'
opt.model_class = model_classes[opt.model_name]
opt.dataset = 'restaurant'
opt.dataset_file = {
'train': './datasets/semeval14/Restaurants_Train.xml.seg',
'test': './datasets/semeval14/Restaurants_Test_Gold.xml.seg'
}
opt.state_dict_path = model_state_dict_paths[opt.model_name]
opt.embed_dim = 300
opt.hidden_dim = 300
opt.max_seq_len = 80
opt.polarities_dim = 3
opt.hops = 3
opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
inf = Inferer(opt)
t_probs = inf.evaluate(['happy memory', 'the service is terrible', 'just normal food'])
print(t_probs.argmax(axis=-1) - 1)

Categories

Resources