General question: How can you prevent that a model needs to be rebuild for each inference request?
I'm trying to develop a web-service that contains multiple trained models which can be used to request a prediction. Producing a results is now very time consuming because the model needs to be rebuild for each request.
The inferring itself only takes 30ms but importing the model takes more than a second.
I'm having difficulty splitting the importing and inference into two separate methods because of the needed session.
The solution i came up with is by using an InteractiveSession that is stored in a variable. On creation of the object the model gets loaded inside of this session that remains open. When a request is submitted this preloaded model is than used to generate the result.
Problem with this solution:
When creating multiple of this objects for different models, multiple Interactive sessions are open at the same time. Tensorflow generate the following warning:
Nesting violated for default stack of <class 'tensorflow.python.framework.ops.Graph'> objects
Any ideas how to manage multiple sessions and preload models?
class model_inference:
def __init__(self, language_name, base_module="models"):
"""
Load a network that can be used to perform inference.
Args:
lang_class (str): The name of an importable language class,
returning an instance of `BaseLanguageModel`. This class
should be importable from `base_module`.
base_module (str): The module from which to import the
`language_name` class.
Attributes:
chkpt (str): The model checkpoint value.
infer_model (g2p_tensor.nmt.model_helper.InferModel):
The language infor_model instance.
"""
language_instance = getattr(
importlib.import_module(base_module), language_name
)()
self.ckpt = language_instance.checkpoint
self.infer_model = language_instance.infer_model
self.hparams = language_instance.hparams
self.rebuild_infer_model()
def rebuild_infer_model(self):
"""
recreate infer model after changing hparams
This is time consuming.
:return:
"""
self.session = tf.InteractiveSession(
graph=self.infer_model.graph, config=utils.get_config_proto()
)
self.model = model_helper.load_model(
self.infer_model.model, self.ckpt, self.session, "infer"
)
def infer_once(self, in_string):
"""
Entrypoint of service, should not contain rebuilding of the model.
"""
in_data = tokenize_input_string(in_string)
self.session.run(
self.infer_model.iterator.initializer,
feed_dict={
self.infer_model.src_placeholder: [in_data],
self.infer_model.batch_size_placeholder: self.hparams.infer_batch_size,
},
)
subword_option = self.hparams.subword_option
beam_width = self.hparams.beam_width
tgt_eos = self.hparams.eos
num_translations_per_input = self.hparams.num_translations_per_input
num_sentences = 0
num_translations_per_input = max(
min(num_translations_per_input, beam_width), 1
)
nmt_outputs, _ = self.model.decode(self.session)
if beam_width == 0:
nmt_outputs = np.expand_dims(nmt_outputs, 0)
batch_size = nmt_outputs.shape[1]
num_sentences += batch_size
for sent_id in range(batch_size):
for beam_id in range(num_translations_per_input):
translation = nmt_utils.get_translation(
nmt_outputs[beam_id],
sent_id,
tgt_eos=tgt_eos,
subword_option=subword_option,
)
return untokenize_output_string(translation.decode("utf-8"))
def __del__(self):
self.session.close()
def __exit__(self, exc_type, exc_val, exc_tb):
self.session.close()
With the help of jdehesa's comments i understood what went wrong.
When not specifying which graph needs to be used. Tensorflow makes a new instance of a graph and adds the operations to it. That's why just changing the InteractiveSession to a normal Session to not nest interactive sessions will throw a new error ValueError: Operation name: "init_all_tables" op: "NoOp" is not an element of this graph.
The use of a InteractiveSession worked because it sets the defined graph to be used as default in stead of creating a new instance. The problem with the InteractiveSession is that its very bad to leave multiple sessions open at the same time. Tensorflow will throw a warning.
The solution was the following:
When changing the InteractiveSession to a normal Session you need to explicitly define in which graph you want to reload the model with model_helper.load_model.
This can be done by defining a context: with self.infer_model.graph.as_default():
The eventual solution was the following:
def rebuild_infer_model(self):
"""
recreate infer model after changing hparams
This is time consuming.
:return:
"""
self.session = tf.Session(
graph=self.infer_model.graph, config=utils.get_config_proto()
)
# added line:
with self.infer_model.graph.as_default(): # the model should be loaded within the same graph as when infering!!
model_helper.load_model(
self.infer_model.model, self.ckpt, self.session, "infer"
)
def infer_once(self, in_string):
"""
Turn an orthographic transcription into a phonetic transcription
The transcription is processed all at once
Long transcriptions may result in incomplete phonetic output
:param in_string: orthographic transcription
:return: string of the phonetic representation
"""
# added line:
with self.infer_model.graph.as_default():
in_data = tokenize_input_string(in_string)
self.session.run(
self.infer_model.iterator.initializer,
feed_dict={
self.infer_model.src_placeholder: [in_data],
self.infer_model.batch_size_placeholder: self.hparams.infer_batch_size,
},
)
subword_option = self.hparams.subword_option
beam_width = self.hparams.beam_width
tgt_eos = self.hparams.eos
num_translations_per_input = self.hparams.num_translations_per_input
num_sentences = 0
num_translations_per_input = max(
min(num_translations_per_input, beam_width), 1
)
nmt_outputs, _ = self.infer_model.model.decode(self.session)
if beam_width == 0:
nmt_outputs = np.expand_dims(nmt_outputs, 0)
batch_size = nmt_outputs.shape[1]
num_sentences += batch_size
for sent_id in range(batch_size):
for beam_id in range(num_translations_per_input):
translation = nmt_utils.get_translation(
nmt_outputs[beam_id],
sent_id,
tgt_eos=tgt_eos,
subword_option=subword_option,
)
return untokenize_output_string(translation.decode("utf-8"))
Related
I have a very difficult problem with a ros2 topic that for some reason keeps more than one message. My project is rather simple: I have a planner in which I can create targets and edit them. The planner consists of several nodes, one for changing each value of the target. List of my nodes:
/add_target
/change_comment
/change_target_index
/clear_state
/remove_target
/rename_target
/set_target
/toggle_select_target
/toggle_visible
Each node extends StateNode (see implementation below), which helps to keep the same state for each node.
The idea is simple: a node receives a service call, for example /planner/rename_target, finds the specific target from the node's state, modifies it, and publishes new state to /planner/state. Each node is subscribed to /planner/state and sets the state to the message received. The idea is to keep the state consistent across all nodes, so each node has access to all state data and can modify it.
I have set my quality of service profile to keep ONLY the latest message. However, my problem is that after using service calls to different nodes, sometimes when running for example
ros2 topic echo --qos-history keep_last --qos-depth 1 --qos-durability transient_local --qos-reliability reliable /planner/state
I receive multiple messages. The order of the messages changes randomly. The state of each node seems to be the same, BUT it seems there are old messages "floating around" in the topic. My qos should allow only the latest message to persist.
For example, if I first service call twice
ros2 service call /planner/add_target mtms_interfaces/srv/AddTarget "{target: {position:{x: 0.0,y: 0.0,z: 0.0}, orientation: {alpha: 0.0,beta: 0.0,gamma: 0.0}}}"
my topic echo looks normal, but if I then
ros2 service call /planner/rename_target mtms_interfaces/srv/RenameTarget "{name: 'Target-0', new_name: 'example'}"
suddenly my topic echo shows two messages. In one of the messages the target has not been modified, and in the other the target has been modified.
What could be the problem here?
Here are some examples of my nodes
StateNode implementation:
class StateNode(Node):
def __init__(self, name):
super().__init__(name)
# Persist the latest sample.
qos = QoSProfile(
depth=1,
durability=DurabilityPolicy.TRANSIENT_LOCAL,
history=HistoryPolicy.KEEP_LAST,
reliability=ReliabilityPolicy.RELIABLE
)
self._state_publisher = self.create_publisher(
PlannerState,
"/planner/state",
qos
)
self._state_subscriber = self.create_subscription(
PlannerState,
'/planner/state',
self.state_updated,
10
)
self._state = None
def state_updated(self, msg):
self._state = msg
RenameTargetNode implementation:
class RenameTargetNode(StateNode):
def __init__(self):
super().__init__('rename_target')
self.create_service(RenameTarget, '/planner/rename_target', self.rename_target_callback)
def rename_target_callback(self, request, response):
state = self._state
if state is None:
response.success = False
return response
self.get_logger().info('Renaming {} to {}'.format(request.name, request.new_name))
i = 0
for target in state.targets:
# Name already exists
if target.name == request.new_name:
response.success = False
return response
# Save index of target in case new_name is unique
if target.name == request.name:
i = state.targets.index(target)
state.targets[i].name = request.new_name
self._state_publisher.publish(state)
response.success = True
return response
AddTargetNode implementation
class AddTargetNode(StateNode):
def __init__(self):
super().__init__('add_target')
self.create_service(AddTarget, '/planner/add_target', self.add_target_callback)
def first_available_target_name(self):
if self._state is None:
return "Target-0"
target_names = [target.name for target in self._state.targets]
idx = 0
while True:
target_name = "Target-{}".format(idx)
if target_name not in target_names:
break
idx += 1
return target_name
def create_new_target(self, pose):
target = Target()
target.name = self.first_available_target_name()
target.type = "Target"
target.comment = ""
target.selected = False
target.target = False # XXX: Misnomer
target.pose = pose
target.intensity = 100.0
target.iti = 100.0
return target
def add_target_callback(self, request, response):
self.get_logger().info('Incoming request')
target = self.create_new_target(
pose=request.target # XXX: Misnomer
)
if self._state is None:
msg = PlannerState()
msg.targets = [
target
]
else:
msg = self._state
msg.targets.append(target)
self._state_publisher.publish(msg)
response.success = True
return response
System information:
Ubuntu 20.04, kernel 5.14.0-1042-oem, x86_64
I'm running the ros nodes in one docker container created with osrf/ros:galactic-desktop.
The problem here was that I had several publishers to the same topic with DurabilityPolicy.TRANSIENT_LOCAL, which is described as follows: "the publisher becomes responsible for persisting samples for “late-joining” subscriptions." In practice this means that when a new subscriber joins, each publisher will send them their last message --> subscriber receives multiple messages.
There are several solutions for this, for example creating a master node that is subscribed to an inner state that is updated by each node, but only the master node is responsible for publishing the state "outside".
Tried accessing the OpenAPI example - Explain code
But it shows error as -
InvalidRequestError: Engine not found
enter code response = openai.Completion.create(
engine="code-davinci-002",
prompt="class Log:\n def __init__(self, path):\n dirname = os.path.dirname(path)\n os.makedirs(dirname, exist_ok=True)\n f = open(path, \"a+\")\n\n # Check that the file is newline-terminated\n size = os.path.getsize(path)\n if size > 0:\n f.seek(size - 1)\n end = f.read(1)\n if end != \"\\n\":\n f.write(\"\\n\")\n self.f = f\n self.path = path\n\n def log(self, event):\n event[\"_event_id\"] = str(uuid.uuid4())\n json.dump(event, self.f)\n self.f.write(\"\\n\")\n\n def state(self):\n state = {\"complete\": set(), \"last\": None}\n for line in open(self.path):\n event = json.loads(line)\n if event[\"type\"] == \"submit\" and event[\"success\"]:\n state[\"complete\"].add(event[\"id\"])\n state[\"last\"] = event\n return state\n\n\"\"\"\nHere's what the above class is doing:\n1.",
temperature=0,
max_tokens=64,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0,
stop=["\"\"\""]
)
I've been trying to access the engine named code-davinci-002 which is a private beta version engine. So without access it's not possible to access the engine. It seems only the GPT-3 models are of public usage. We need to need to join the OpenAI Codex Private Beta Waitlist in order to access Codex models through API.
Please note that your code is not very readable.
However, from the given error, I think it has to do with the missing colon : in the engine name.
Change this line from:
engine="code-davinci-002",
to
engine="code-davinci:002",
If you are using a finetuned model instead of an engine, you'd want to use model= instead of engine=.
response = openai.Completion.create(
model="<finetuned model>",
prompt=
I have created the following vpc class using python aws cdk, I need help understanding how to dynamicaly set the env variable through self.node.try_get_context('env') to represent the environment where the stack will be deployed, for example prod,dev,stg etc. since I'm reusing it in my logic to formulate naming convention for the stack.
I have assigned env variables in the cdk.json as
"env_stg": "stg",
"env_prd": "prd",
I can call them individually but lack understanding calling them dynamically to affect my environments on the fly.
I really appreciate any help
class VPC(Stack):
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
super().__init__(scope, construct_id, **kwargs)
env = self.node.try_get_context('env')
self.vpc =ec2.Vpc(self, "Stg",
cidr = '10.0.0.0/16',
max_azs = 2,
enable_dns_support = True,
enable_dns_hostnames = True,
subnet_configuration = [
ec2.SubnetConfiguration(
name = 'Public',
subnet_type = ec2.SubnetType.PUBLIC,
cidr_mask = 24
),
ec2.SubnetConfiguration(
name = 'Isolated',
subnet_type = ec2.SubnetType.PRIVATE_ISOLATED,
cidr_mask = 24
)
]
)
# Store all private subnets in Parameter store
private_subnets = [subnet.subnet_id for subnet in self.vpc.private_subnets]
# public_subnets = [subnet.subnet_id for subnet in self.vpc.public_subnets]
count = 1
for subnets in private_subnets:
ssm.StringParameter(self, 'private-subnet-'+str(count),
string_value = subnets,
parameter_name = '/'+env+'/pivate-subnet-'+str(count)
)
count += 1
You don't need new env vars or context. Stacks can introspect their Environment (= account + region) at synth-time. Use Python's language features to derive account-specific labels from the Stack's account.
# vpc_stack.py
env_label = "stg"
if self.account == "123456789012":
env_label = "prod"
Even better, lift the label logic up to the app level, using the CDK-provided CDK_DEFAULT_ACCOUNT environment variable. Its value is set at synth-time based on the cli --profile flag value. Pass the labels down to the stacks in the stack props (kwargs). This way, configuration is more central, visible and resusable across stacks.
# app.py
account = os.environ["CDK_DEFAULT_ACCOUNT"]
env_label = "stg"
if account == "123456789012":
env_label = "prod"
You can get even fancier than this, but these are some basic patterns.
I am trying to do some custom manipulation of a torch.utils.data.DataLoader in AzureML but cannot get it to instantiate directly from my azureml.core.Datastore :
ws = Workspace( # ... etc ... )
ds = Datastore.get(ws, datastore_name='my_ds')
am = ds.as_mount()
# HOW DO I GET base_path, data_file from am?
dataloader = DataLoader(
ListDataset(base_path, data_file), #... etc...
)
The value of am.path() is "$AZUREML_DATAREFERENCE_my_ds" but I cannot figure out how to go from that to a pathlib.Path as is expected by the constructor to ListDataset. Things I've tried include Path(am.path()) and Path(os.environ[am.path()]) but they don't seem to work.
It's clear that there's some answer, since :
script_params = {
'--base_path': ds.as_mount(),
'--epochs': 30,
'--batch_size' : 16,
'--use_cuda': 'true'
}
torch = PyTorch(source_directory='./',
script_params=script_params,
compute_target=compute_target,
entry_script='train.py',
pip_packages=packages,
use_gpu=True)
seems to create a legit object.
You can perhaps try using the DataPath class. It exposes attributes such as path_on_datastore which might be the path you're looking for.
To construct this class from your DataReference object i.e. variable am; you can use create_from_data_reference() method.
Example:
ds = Datastore.get(ws, datastore_name='my_ds')
am = ds.as_mount()
dp = DataPath().create_from_data_reference(am)
base_path = dp.path_on_datastore
The above code generated an error for me, removing the parentheses after the DataPath instantiation like below made the code run.
ds = Datastore.get(ws, datastore_name='my_ds')
am = ds.as_mount()
dp = DataPath.create_from_data_reference(am)
base_path = dp.path_on_datastore
Thank you for the code snippet, very useful!
I am using the nidaqmx-python library for acquiring data. Is it possible to access an existing task, which is already defined in the NI MAX?
My solution, thanks to the tip from #nekomatic, is:
import nidaqmx
system = nidaqmx.system.System.local() # load local system
task_names = system.tasks.task_names # returns a list of task names
task = system.tasks[0] # selected the first task
loaded_task = task.load() # load the task
sent_samples = [] # list for saving acquired data
with loaded_task:
loaded_task.timing.cfg_samp_clk_timing(
rate=2560,
sample_mode=nidaqmx.constants.AcquisitionType.CONTINUOUS,
samps_per_chan=1000)
def callback(task_handle, every_n_samples_event_type,
number_of_samples, callback_data):
"""
Callback function/
"""
print('Every N Samples callback invoked.')
samples = loaded_task.read(number_of_samples_per_channel=2560)
sent_samples.extend(samples)
return 0
loaded_task.register_every_n_samples_acquired_into_buffer_event(
200, callback)
loaded_task.start()
input('Running task. Press Enter to stop.\n')