`produce()` only called once in asynchronous kafka implementation? - python

I'm doing a simple kafka produce/consume test on the local machine using a docker.
docker-compose file: https://github.com/confluentinc/cp-all-in-one/blob/6.2.1-post/cp-all-in-one/docker-compose.yml
And I've written a simple python code like below:
import json
import random
import asyncio
from collections import namedtuple
from confluent_kafka.schema_registry.avro import AvroSerializer, AvroDeserializer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry import Schema
from confluent_kafka import SerializingProducer, DeserializingConsumer
from faker import Faker
from dataclasses import dataclass, field, asdict
faker = Faker()
registry_client = SchemaRegistryClient({"url": "http://localhost:8081"})
#dataclass
class CIS:
user_id: str = field(default_factory=faker.user_name)
question_id: int = field(default_factory=lambda: random.randint(1, 20000))
is_correct: bool = field(default_factory=lambda: random.choice([True, False]))
async def produce(topic_name, serializer):
p = SerializingProducer({
"bootstrap.servers": "PLAINTEXT://localhost:9092",
"value.serializer": serializer
})
while True:
p.produce(
topic=topic_name,
value=CIS(),
)
print("put!")
await asyncio.sleep(1)
async def consume(topic_name, deserialzier):
c = DeserializingConsumer(
{
'bootstrap.servers': "PLAINTEXT://localhost:9092",
# 'key.deserializer': string_deserializer,
'value.deserializer': deserialzier,
'group.id': "123",
'auto.offset.reset': "latest"
}
)
c.subscribe([topic_name])
while True:
message = c.poll(0.1)
if message is None:
print(message)
continue
else:
print(message.value())
await asyncio.sleep(1)
if __name__ == "__main__":
topic_name = "my_topic"
schema_str = json.dumps(
{
"type": "record",
"name": "cis",
"namespace": "interaction",
"fields": [
{"name": "user_id", "type": "string"},
{"name": "question_id", "type": "int"},
{"name": "is_correct", "type": "boolean"}
]
}
)
def to_dict(obj, ctx):
return asdict(obj)
def to_obj(obj, ctx):
return CIS(
user_id=obj["user_id"],
question_id=obj["question_id"],
is_correct=obj["is_correct"],
)
avro_serializer = AvroSerializer(registry_client, schema_str, to_dict)
avro_deserializer = AvroDeserializer(registry_client, schema_str, to_obj)
loop = asyncio.get_event_loop()
t1 = loop.create_task(produce(topic_name, avro_serializer))
t2 = loop.create_task(consume(topic_name, avro_deserializer))
results = await asyncio.gather(t1, t2)
When I run this code, output is:
>>>
put!
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
....
I don't see why produce() is called only first time.

confluent_kafka is not asyncio compatible, it uses blocking calls.
For asyncio code I can suggest aiokafka. The project's README has code snippets that illustrates how to write async producer and consumer.

Related

how to add many ids to one list?

I have a code that writes down the user id and the name of the streamer, when the streamer starts the stream, the user who entered the command is notified.
How can I correctly add all user IDs of users so that it works for everyone, and not just for one
import requests
import pymongo
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from aiogram import Bot, types
from aiogram.dispatcher import Dispatcher
from aiogram.utils import executor
TOKEN = ''
bot = Bot(token=TOKEN)
dp = Dispatcher(bot)
scheduler = AsyncIOScheduler(timezone="Europe/Kiev")
client = pymongo.MongoClient('')
db = client['Users']
collection = db['twitch']
def add_user(streamer_name, chat_id):
collection.update_one({
"_id": streamer_name
}, {"$set": {
'online': '-',
'chat_id': chat_id
}}, upsert=True)
def set_online(streamers):
collection.update_one({
'_id': streamers
}, {'$set': {
'online': 'True'
}})
def set_offline(streamers):
collection.update_one({
'_id': streamers
}, {'$set': {
'online': 'False'
}})
async def check(streamer_name, chat_id):
client_id = ''
client_secret = ''
body = {
'client_id': client_id,
'client_secret': client_secret,
"grant_type": 'client_credentials'
}
r = requests.post('https://id.twitch.tv/oauth2/token', body)
keys = r.json()
headers = {
'Client-ID': client_id,
'Authorization': 'Bearer ' + keys['access_token']
}
all_records = collection.find()
users = list(all_records)
for i in users:
streamers = i['_id']
send_users = i['chat_id']
online = i['online']
stream = requests.get('https://api.twitch.tv/helix/streams?user_login=' + streamers, headers=headers)
stream_data = stream.json()
if len(stream_data['data']) == 1:
live = (streamers + ' is live: ' + stream_data['data'][0]['title'])
if online == 'False':
await bot.send_message(send_users, live)
set_online(streamers)
if online == 'True':
print('streamer online')
else:
set_offline(streamers)
scheduler.add_job(check, "interval", seconds=5, args=(streamer_name, chat_id))
#dp.message_handler(commands='check')
async def check_stream(message: types.Message):
streamer_name = message.text[7:]
chat_id = message.chat.id
add_user(streamer_name, chat_id)
await check(streamer_name, chat_id)
if __name__ == "__main__":
scheduler.start()
executor.start_polling(dp, skip_updates=True)
And when the streamer starts the stream, then many messages come in and not just one.

How to post a list of pydantic objects that contain datetime property by using aiohttp to fastapi endpoint?

I have a small web server:
# app.py
from typing import List
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI()
class Item(BaseModel):
id: int
name: str
#app.post("/items")
async def items_list(items: List[Item]):
return items
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
and a python file that posts to the endpoint:
# req.py
import asyncio
from typing import List
import datetime as dt
import aiohttp
from app import Item
async def main():
data = [
Item(id=1, name='A').dict(),
Item(id=2, name='B').dict()
]
async with aiohttp.ClientSession() as session:
async with session.post(
'http://localhost:8000/items',
json=data
) as response:
print(f'response: {await response.json()}')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
It works, I can get an output like:
response: [{'id': 1, 'name': 'A'}, {'id': 2, 'name': 'B'}]
If I set data as (no .dict()):
data = [
Item(id=1, name='A'),
Item(id=2, name='B')
]
it doesn't work because Item is not json serializable.
My goal is to post list of items to the fastapi endpoint.
This working example doesn't work if I extend Item as this:
class Item(BaseModel):
id: int
name: str
created_at: dt.datetime
created_at is a datetime and even I em using Items(..).dict() it is not json serializable.
Funny thing is that if I create an Item as:
Item(id=1, name='A',created_at=dt.datetime.utcnow()).json()
its json is perfect:
{"id": 1, "name": "A", "created_at": "2021-12-15T21:10:36.077435"}
but as aiohttp session.post(json...) uses non pydantic json encoder, Item is not json serializable.
I tried to create a new pydantic object:
class ItemsList(BaseModel):
data: List[Item]
end set it as:
data = [
Item(id=1, name='A', created_at=dt.datetime.utcnow()),
Item(id=2, name='B', created_at=dt.datetime.utcnow())
]
data_list = ItemsList(data=data)
Again, pydantic is clever enough to produce proper json:
data_list.json()
{"data": [{"id": 1, "name": "A", "created_at": "2021-12-15T21:17:34.368555"}, {"id": 2, "name": "B", "created_at": "2021-12-15T21:17:34.368555"}]}
but I am not sure how to send such json using aiohttp post.
My question is: How to by using aiohttp post a list of pydantic objects that contain datetime property to fastapi endpoint?
I would be satisfied with sending/receiving list of itmes ([Item, Item, ... Item])
One way to do this is simply pass your Pydantic JSON string as the raw request body:
# Using the "data_list" Pydantic object in one of your examples.
async with session.post(
'http://localhost:8000/items',
# Pass the JSON string as `data`.
data=data_list.json(),
# Manually set content-type.
content_type="application/json"
) as response:
print(f'response: {await response.json()}')
That way you bypass the automatic serialization that isn't compatible with Pydantic.

Python - multiprocessing - API query

I am preparing code for querying some endpoints. Code is ok, works quite good but it takes too much time. I would like to use Python multiprocessing module to speed up the process. My main target is to put 12 API queries to be processed in parallel. Once jobs are processed I would like to fetch the result and put them into the list of dictionaries, one response as one dictionary in the list. API response is in json format. I am new to Python and don't have experience in such kind of cases.
Code I want to run in parallel below.
def api_query_process(cloud_type, api_name, cloud_account, resource_type):
url = "xxx"
payload = {
"limit": 0,
"query": f'config from cloud.resource where cloud.type = \'{cloud_type}\' AND api.name = \'{api_name}\' AND '
f'cloud.account = \'{cloud_account}\'',
"timeRange": {
"relativeTimeType": "BACKWARD",
"type": "relative",
"value": {
"amount": 0,
"unit": "minute"
}
},
"withResourceJson": True
}
headers = {
"content-type": "application/json; charset=UTF-8",
"x-redlock-auth": api_token_input
}
response = requests.request("POST", url, json=payload, headers=headers)
result = response.json()
resource_count = len(result["data"]["items"])
if resource_count:
dictionary = dictionary_create(cloud_type, cloud_account, resource_type, resource_count)
property_list_summary.append(dictionary)
else:
dictionary = dictionary_create(cloud_type, cloud_account, resource_type, 0)
property_list_summary.append(dictionary)
Interesting problem and I think you should think about idempotency. What would happen if you hit the end-point consecutively. You can use multiprocessing with or without lock.
Without Lock:
import multiprocessing
with multiprocessing.Pool(processes=12) as pool:
jobs = []
for _ in range(12):
jobs.append(pool.apply_async(api_query_process(*args))
for job in jobs:
job.wait()
With Lock:
import multiprocessing
multiprocessing_lock = multiprocessing.Lock()
def locked_api_query_process(cloud_type, api_name, cloud_account, resource_type):
with multiprocessing_lock:
api_query_process(cloud_type, api_name, cloud_account, resource_type)
with multiprocessing.Pool(processes=12) as pool:
jobs = []
for _ in range(12):
jobs.append(pool.apply_async(locked_api_query_process(*args)))
for job in jobs:
job.wait()
Can't really do an End-2-End test but hopefully this general setup helps you get it up and running.
Since a HTTP request is an I/O Bound operation, you do not need multiprocessing. You can use threads to get a better performance. Something like the following would help.
MAX_WORKERS would say how many requests you want to send in
parallel
API_INPUTS are all the requests you want to make
Untested code sample:
from concurrent.futures import ThreadPoolExecutor
import requests
API_TOKEN = "xyzz"
MAX_WORKERS = 4
API_INPUTS = (
("cloud_type_one", "api_name_one", "cloud_account_one", "resource_type_one"),
("cloud_type_two", "api_name_two", "cloud_account_two", "resource_type_two"),
("cloud_type_three", "api_name_three", "cloud_account_three", "resource_type_three"),
)
def make_api_query(api_token_input, cloud_type, api_name, cloud_account):
url = "xxx"
payload = {
"limit": 0,
"query": f'config from cloud.resource where cloud.type = \'{cloud_type}\' AND api.name = \'{api_name}\' AND '
f'cloud.account = \'{cloud_account}\'',
"timeRange": {
"relativeTimeType": "BACKWARD",
"type": "relative",
"value": {
"amount": 0,
"unit": "minute"
}
},
"withResourceJson": True
}
headers = {
"content-type": "application/json; charset=UTF-8",
"x-redlock-auth": api_token_input
}
response = requests.request("POST", url, json=payload, headers=headers)
return response.json()
def main():
futures = []
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool:
for (cloud_type, api_name, cloud_account, resource_type) in API_INPUTS:
futures.append(
pool.submit(make_api_query, API_TOKEN, cloud_type, api_name, cloud_account)
)
property_list_summary = []
for future, api_input in zip(futures, API_INPUTS):
api_response = future.result()
cloud_type, api_name, cloud_account, resource_type = api_input
resource_count = len(api_response["data"]["items"])
dictionary = dictionary_create(cloud_type, cloud_account, resource_type, resource_count)
property_list_summary.append(dictionary)
I think using async functions would help a lot in speeding this up.
Your code is blocking while it waits for a response from the external API. So using more processes or threads is overkill. You dont need more resources on your end. Instead you should just make your code execute the next request instead of idling until the response arrives. This can be done using coroutines.
You could use aiohttp instead of requests, collect the individual tasks and execute them in an event loop.
Here is a small example code to run get requests, and collect the json bodies from the responses. Should be easy to adapt to your use case
from aiohttp import ClientSession
import asyncio
RESULTS = dict()
async def get_url(url, session):
async with session.get(url) as response:
print("Status:", response.status)
print("Content-type:", response.headers['content-type'])
result = await response.json()
RESULTS[url] = result
async def get_all_urls(urls):
async with ClientSession() as session:
tasks = [get_url(url, session) for url in urls]
await asyncio.gather(*tasks)
if __name__ == "__main__":
urls = [
"https://accounts.google.com/.well-known/openid-configuration",
"https://www.facebook.com/.well-known/openid-configuration/"
]
asyncio.run(get_all_urls(urls=urls))
print(RESULTS.keys())

Python: dynamic json with string interpolation

I created a class of functions that provision some cloud infrastructure.
response = self.ecs_client.register_task_definition(
containerDefinitions=[
{
"name": "redis-283C462837EF23AA",
"image": "redis:3.2.7",
"cpu": 1,
"memory": 512,
"essential": True,
},
...
This is a very long json, I show just the beginning.
Then I refactored the code to use a parameter instead of the hard coded hash, memory and cpu.
response = self.ecs_client.register_task_definition(
containerDefinitions=[
{
"name": f"redis-{git_hash}",
"image": "redis:3.2.7",
"cpu": {num_cpu},
"memory": {memory_size},
"essential": True,
},
...
I read the values of git_hash, num_cpu and memory_size from a config file prior to this code.
Now, I also want to read to entire json from a file.
The problem is that if I save {num_cpu} etc. in a file, the string interpolation won't work.
How can I extract the json from my logic and still use string interpolation or variables?
You can use Template from string.
{
"name": "redis-${git_hash}",
"image": "redis:3.2.7",
"cpu": ${num_cpu},
"memory": ${memory_size},
"essential": true
}
from string import Template
import json
if __name__ == '__main__':
data = dict(
num_cpu = 1,
memory_size = 1,
git_hash = 1
)
with open('test.json', 'r') as json_file:
content = ''.join(json_file.readlines())
template = Template(content)
configuration = json.loads(template.substitute(data))
print(configuration)
# {'name': 'redis-1', 'image': 'redis:3.2.7', 'cpu': 1, 'memory': 1, 'essential': True}
Opinion: I think the overall approach is wrong. There is a reason why this method is not as popular as others. You can separate your configuration into two files (1) a static list of options and (2) your compact changeable configuration, and compose them in your code.
EDIT: You can create an object which reads the configuration from a standard (static or changeable) JSON file FileConfig. And then compose them using another object, something line ComposedConfig.
This will allow you to extend the behaviour, and add, for example, a run-time configuration in the mix. This way the configuration from your JSON file no longer depends on the run-time params, and you can separate what is changeable from what is static in your system.
PS: The get method is just an example for explaining the composed behaviour; you can use other methods/designs.
import json
from abc import ABC, abstractmethod
class Configuration(ABC):
#abstractmethod
def get(self, key: str, default: str) -> str:
pass
class FileConfig(Configuration):
def __init__(self, file_path):
self.__content = {}
with open(file_path, 'r') as json_file:
self.__content = json.load(json_file)
def get(self, key: str, default: str) -> str:
return self.__content.get(key, default)
class RunTimeConfig(Configuration):
def __init__(self, option: str):
self.__content = {'option': option}
def get(self, key: str, default: str) -> str:
return self.__content.get(key, default)
class ComposedConfig:
def __init__(self, first: Configuration, second: Configuration):
self.__first = first
self.__second = second
def get(self, key: str, default: str) -> str:
return self.__first.get(key, self.__second.get(key, default))
if __name__ == '__main__':
static = FileConfig("static.json")
changeable = FileConfig("changeable.json")
runTime = RunTimeConfig(option="a")
config = ComposedConfig(static, changeable)
alternative = ComposedConfig(static, runTime)
print(config.get("image", "test")) # redis:3.2.7
print(alternative.get("option", "test")) # a

how to pass json object directly to train in rasa nlu from python

I am using rasa nlu to train data. as per the documentation in http://nlu.rasa.ai/python.html , following code has to be used to train data that exists in the file demo-rasa.json
from rasa_nlu.converters import load_data
from rasa_nlu.config import RasaNLUConfig
from rasa_nlu.model import Trainer
training_data = load_data('data/examples/rasa/demo-rasa.json')
trainer = Trainer(RasaNLUConfig("sample_configs/config_spacy.json"))
trainer.train(training_data)
model_directory = trainer.persist('./projects/default/')
But instead how do we read data from a json object for training.
If you look at the implementation of load_data, it performs two steps:
guess the file format
load the file using the appropriate loading method
The simplest solution would be to write your json object into a file or StringIO object.
Alternatively, you could pick the specific loading function you need, for example load_rasa_data and seperate the file reading from it. For this example, you could probably just take the whole function and remove the line data = _read_json_from_file(filename).
I am somewhat surprised to see that currently there is no way to read an already loaded json object. If you decide to adapt the functions to this, you might consider writing a pull request for it.
I have made a flask app which takes the JSON object from request body, instead of reading it from file.
This code converts an existing LUIS json using spaCy for entities and sklearn-crfsuite for intent recognition.
from flask import Flask, jsonify, request
from flask_cors import CORS
import json, os, msvcrt, psutil, subprocess, datetime
app = Flask(__name__)
CORS(app)
with app.app_context():
with app.test_request_context():
#region REST based RASA API
serverExecutablePID = 0
hasAPIStarted = False
configFileDirectory = "C:\\Code\\RasaAPI\\RASAResources\\config"
chitChatModel = "ChitChat"
assetsDirectory = "C:\\Code\\RasaAPI\\RASAResources"
def createSchema(SchemaPath, dataToBeWritten):
try:
#write LUIS or RASA JSON Schema in json file locking the file to avoid race condition using Python's Windows msvcrt binaries
with open(SchemaPath, "w") as SchemaCreationHandle:
msvcrt.locking(SchemaCreationHandle.fileno(), msvcrt.LK_LOCK, os.path.getsize(SchemaPath))
json.dump(dataToBeWritten, SchemaCreationHandle, indent = 4, sort_keys=False)
SchemaCreationHandle.close()
#Check if written file actually exists on disk or not
doesFileExist = os.path.exists(SchemaPath)
return doesFileExist
except Exception as ex:
return str(ex.args)
def appendTimeStampToModel(ModelName):
return ModelName + '_{:%Y%m%d-%H%M%S}.json'.format(datetime.datetime.now())
def appendTimeStampToConfigSpacy(ModelName):
return ModelName + '_config_spacy_{:%Y%m%d-%H%M%S}.json'.format(datetime.datetime.now())
def createConfigSpacy(ModelName, DataPath, ConfigSpacyPath, TrainedModelsPath, LogDataPath):
try:
with open(ConfigSpacyPath, "w") as configSpacyFileHandle:
msvcrt.locking(configSpacyFileHandle.fileno(), msvcrt.LK_LOCK, os.path.getsize(ConfigSpacyPath))
configDataToBeWritten = dict({
"project": ModelName,
"data": DataPath,
"path": TrainedModelsPath,
"response_log": LogDataPath,
"log_level": "INFO",
"max_training_processes": 1,
"pipeline": "spacy_sklearn",
"language": "en",
"emulate": "luis",
"cors_origins": ["*"],
"aws_endpoint_url": None,
"token": None,
"num_threads": 2,
"port": 5000
})
json.dump(configDataToBeWritten, configSpacyFileHandle, indent = 4, sort_keys=False)
return os.path.getsize(ConfigSpacyPath) > 0
except Exception as ex:
return str(ex.args)
def TrainRASA(configFilePath):
try:
trainingString = 'start /wait python -m rasa_nlu.train -c ' + '\"' + os.path.normpath(configFilePath) + '\"'
returnCode = subprocess.call(trainingString, shell = True)
return returnCode
except Exception as ex:
return str(ex.args)
def StartRASAServer(configFileDirectory, ModelName):
#region Server starting logic
try:
global hasAPIStarted
global serverExecutablePID
#1) for finding which is the most recent config_spacy
root, dirs, files = next(os.walk(os.path.normpath(configFileDirectory)))
configFiles = [configFile for configFile in files if ModelName in configFile]
configFiles.sort(key = str.lower, reverse = True)
mostRecentConfigSpacy = os.path.join(configFileDirectory, configFiles[0])
serverStartingString = 'start /wait python -m rasa_nlu.server -c ' + '\"' + os.path.normpath(mostRecentConfigSpacy) + '\"'
serverProcess = subprocess.Popen(serverStartingString, shell = True)
serverExecutablePID = serverProcess.pid
pingReturnCode = 1
while(pingReturnCode):
pingReturnCode = os.system("netstat -na | findstr /i 5000")
if(pingReturnCode == 0):
hasAPIStarted = True
return pingReturnCode
except Exception as ex:
return jsonify({"message": "Failed because: " + str(ex.args) , "success": False})
#endregion
def KillProcessWindow(hasAPIStarted, serverExecutablePID):
if(hasAPIStarted == True and serverExecutablePID != 0):
me = psutil.Process(serverExecutablePID)
for child in me.children():
child.kill()
#app.route('/api/TrainRASA', methods = ['POST'])
def TrainRASAServer():
try:
#get request body of POST request
postedJSONData = json.loads(request.data, strict = False)
if postedJSONData["data"] is not None:
print("Valid data")
#region JSON file building logic
modelName = postedJSONData["modelName"]
modelNameWithExtension = appendTimeStampToModel(modelName)
schemaPath = os.path.join(assetsDirectory, "data", modelNameWithExtension)
print(createSchema(schemaPath, postedJSONData["data"]))
#endregion
#region config file creation logic
configFilePath = os.path.join(assetsDirectory, "config", appendTimeStampToConfigSpacy(modelName))
logsDirectory = os.path.join(assetsDirectory, "logs")
trainedModelDirectory = os.path.join(assetsDirectory, "models")
configFileCreated = createConfigSpacy(modelName, schemaPath, configFilePath, trainedModelDirectory, logsDirectory)
#endregion
if(configFileCreated == True):
#region Training RASA NLU with schema
TrainingReturnCode = TrainRASA(configFilePath)
#endregion
if(TrainingReturnCode == 0):
return jsonify({"message": "Successfully trained RASA NLU with modelname: " + modelName, "success": True})
# KillProcessWindow(hasAPIStarted, serverExecutablePID)
# serverStartingReturnCode = StartRASAServer(configFileDirectory, modelName)
# #endregion
# if serverStartingReturnCode == 0:
# return jsonify({"message": "Successfully started RASA server on port 5000", "success": True})
# elif serverStartingReturnCode is None:
# return jsonify({"message": "Could not start RASA server, request timed out", "success": False})
else:
return jsonify({"message": "Soemthing wrong happened while training RASA NLU!", "success": False})
else:
return jsonify({"message": "Could not create config file for RASA NLU", "success": False})
#throw exception if request body is empty
return jsonify({"message": "Please enter some JSON, JSON seems to be empty", "success": False})
except Exception as ex:
return jsonify({"Reason": "Failed because" + str(ex.args), "success": False})
#app.route('/api/StopRASAServer', methods = ['GET'])
def StopRASAServer():
try:
global serverExecutablePID
if(serverExecutablePID != 0 or serverExecutablePID != None):
me = psutil.Process(serverExecutablePID)
for child in me.children():
child.kill()
return jsonify({"message": "Server stopped....", "success": True})
except Exception as ex:
return jsonify({"message": "Something went wrong while shutting down the server because: " + str(ex.args), "success": True})
if __name__ == "__main__":
StartRASAServer(configFileDirectory, chitChatModel)
app.run(debug=False, threaded = True, host='0.0.0.0', port = 5050)
There is simple way of doing it, but due to poor code documentation of RASA it is difficult to find.
You will have to create a json in the following format.
training_data = {'rasa_nlu_data': {"common_examples": training_examples,
"regex_features": [],
"lookup_tables": [],
"entity_synonyms": []
}}
In this JSON training_examples is a list and it should contain the data as represented below.
training_examples = [
{
"intent": "greet",
"text": "Hello"
},
{
"intent": "greet",
"text": "Hi, how are you ?"
},
{
"intent": "sad",
"text": "I am not happy with the service"
},
{
"intent": "praise",
"text": "You're a genius"
}
]
with this now, you can train it like this :)
from rasa.nlu import config
# Even config can also be loaded from dict like this
def get_train_config():
return {'language': 'en',
'pipeline': [
{'name': 'WhitespaceTokenizer'},
{'name': 'ConveRTFeaturizer'},
{'name': 'EmbeddingIntentClassifier'}
],
'data': None,
'policies': [
{'name': 'MemoizationPolicy'},
{'name': 'KerasPolicy'},
{'name': 'MappingPolicy'}
]
}
trainer = Trainer(config._load_from_dict(get_train_config()))
interpreter = trainer.train(data)

Categories

Resources