I have the following mapping and settings for an index:
def init_index():
ES_CLIENT.indices.create(
index = "social_media",
body = {
"settings": {
"index": {
"number_of_shards": 3,
"number_of_replicas": 0
},
"analysis": {
"analyzer": {
"my_english": {
"type": "standard",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"cust_stop",
"my_snow"
]
},
"my_english_shingle": {
"type": "standard",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"cust_stop",
"my_snow",
"shingle_filter"
]
}
},
"filter": {
"cust_stop": {
"type": "stop",
"stopwords_path": "stoplist.txt",
},
"shingle_filter" : {
"type" : "shingle",
"min_shingle_size" : 2,
"max_shingle_size" : 2,
"output_unigrams": True
},
"my_snow" : {
"type" : "snowball",
"language" : "English"
}
}
}
}
}
)
press_mapping = {
"tweet": {
"dynamic": "strict",
"properties": {
"_id": {
"type": "string",
"store": True,
"index": "not_analyzed"
},
"text": {
"type": "multi_field",
"fields": {
"text": {
"include_in_all": False,
"type": "string",
"store": False,
"index": "not_analyzed"
},
"_analyzed": {
"type": "string",
"store": True,
"index": "analyzed",
"term_vector": "with_positions_offsets",
"analyzer": "my_english"
},
"_analyzed_shingles": {
"type": "string",
"store": True,
"index": "analyzed",
"term_vector": "with_positions_offsets",
"analyzer": "my_english_shingle"
}
}
}
}
}
}
constants.ES_CLIENT.indices.put_mapping (
index = "social_media",
doc_type = "tweet",
body = press_mapping
)
I notice that except lowercase no other filter is working. The termvectors for both the analyzers are same since shingle_filter is also not working.
GET /social_media/_analyze?analyzer=my_english_shingle&text=complaining when should remove when, stem complaining to complain and return a shingle complain _ but instead it gives me:
{
"tokens": [
{
"token": "complaining",
"start_offset": 0,
"end_offset": 11,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "when",
"start_offset": 12,
"end_offset": 16,
"type": "<ALPHANUM>",
"position": 2
}
]
}
What could be the reason??
Since you are trying to define new custom analyzers and not new standard analyzers you need to change the types in the mapping of both your analyzers from standard to custom. Standard analyzers actually don't take any of the settings you are passing in the mapping - personally would prefer ES to throw an exception in such a case but instead he is just creating new standard analyzers with no custom fields and ignores everything else you passed in (try removing lowercase form both your analyzers and re-run your analyzer, the output will still be lowercased!):
"analyzer": {
"my_english": {
"type": "custom", // <--- CUSTOM
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"stop",
"my_snow"
]
},
"my_english_shingle": {
"type": "custom", // <--- CUSTOM
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"stop",
"my_snow",
"shingle_filter"
]
}
With this the query (I changed the query and the custom stop words to just stop as I don't have your file) GET /social_media/_analyze?analyzer=my_english_shingle&text=COMPLAINING TEST returns:
{
"tokens": [
{
"token": "complain",
"start_offset": 0,
"end_offset": 11,
"type": "word",
"position": 1
},
{
"token": "complain test",
"start_offset": 0,
"end_offset": 16,
"type": "shingle",
"position": 1
},
{
"token": "test",
"start_offset": 12,
"end_offset": 16,
"type": "word",
"position": 2
}
]
}
Also not sure about your ES version but my requires boolean values true and false to be lowercased.
Related
I would like to modify the value of a field on a specific index of a nested type depending on another value of the same nested object or a field outside of the nested object.
As example, I have the current mapping of my index feed:
{
"feed": {
"mappings": {
"properties": {
"attacks_ids": {
"type": "keyword"
},
"created_by": {
"type": "keyword"
},
"date": {
"type": "date"
},
"groups_related": {
"type": "keyword"
},
"indicators": {
"type": "nested",
"properties": {
"date": {
"type": "date"
},
"description": {
"type": "text"
},
"role": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"malware_families": {
"type": "keyword"
},
"published": {
"type": "boolean"
},
"references": {
"type": "keyword"
},
"tags": {
"type": "keyword"
},
"targeted_countries": {
"type": "keyword"
},
"title": {
"type": "text"
},
"tlp": {
"type": "keyword"
}
}
}
}
}
Take the following document as example:
{
"took": 194,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "feed",
"_type": "_doc",
"_id": "W3CS7IABovFpcGfZjfyu",
"_score": 1,
"_source": {
"title": "Test",
"date": "2022-05-22T16:21:09.159711",
"created_by": "finch",
"tlp": "white",
"published": true,
"references": [
"test",
"test"
],
"tags": [
"tag1",
"tag2"
],
"targeted_countries": [
"Italy",
"Germany"
],
"malware_families": [
"family1",
"family2"
],
"groups_related": [
"group1",
"griup2"
],
"attacks_ids": [
""
],
"indicators": [
{
"value": "testest",
"description": "This is a test",
"type": "sha256",
"role": "file",
"date": "2022-05-22T16:21:09.159560"
},
{
"value": "testest2",
"description": "This is a test 2",
"type": "ipv4",
"role": "c2",
"date": "2022-05-22T16:21:09.159699"
}
]
}
}
]
}
}
I would like to make this update: indicators[0].value = 'changed'
if _id == 'W3CS7IABovFpcGfZjfyu'
or if title == 'some_title'
or if indicators[0].role == 'c2'
I already tried with a script, but it seems I can't manage to get it work, I hope the explanation is clear, ask any question if not, thank you.
Edit 1:
I managed to make it work, however it needs the _id, still looking for a way to do that without it.
My partial solution:
update = Pulse.get(id="XHCz7IABovFpcGfZWfz9") #Pulse is my document
update.update(script="for (indicator in ctx._source.indicators) {if (indicator.value=='changed2') {indicator.value='changed3'}}")
# Modify depending on the value of a field inside the same nested object
I am trying to update my original index settings.
My initial setting looks like this:
client.create(index = "movies", body= {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"my_custom_stop_words": {
"type": "stop",
"stopwords": stop_words
}
},
"analyzer": {
"my_custom_analyzer": {
"filter": [
"lowercase",
"my_custom_stop_words"
],
"type": "custom",
"tokenizer": "standard"
}
}
}
},
"mappings": {
"properties": {
"body": {
"type": "text",
"analyzer": "my_custom_analyzer",
"search_analyzer": "my_custom_analyzer",
"search_quote_analyzer": "my_custom_analyzer"
}
}
}
},
ignore=400
)
And I am trying to add the synonym filter to my existing analyzer (my_custom_analyzer) using client.put_settings:
client.put_settings(index='movies', body={
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"filter": [
"lowercase",
"my_stops",
"my_synonyms"
],
"type": "custom",
"tokenizer": "standard"
}
},
"filter": {
"my_custom_stops": {
"type": "stop",
"stopwords": stop_words
},
"my_custom_synonyms": {
"ignore_case": "true",
"type": "synonym",
"synonyms": ["Harry Potter, HP => HP", "Terminator, TM => TM"]
}
}
}
},
"mappings": {
"properties": {
"body": {
"type": "text",
"analyzer": "my_custom_analyzer",
"search_analyzer": "my_custom_analyzer",
"search_quote_analyzer": "my_custom_analyzer"
}
}
}
},
ignore=400
)
However, when I issue a search query (searching for "HP") that queries the movies index and I'm trying to rank the documents so that the document containing "Harry Potter" 5 times is the top element in the list. Right now, it seems like the document with "HP" 3 times tops the list, so the synonyms filter isn't working. I've closed movies index before I do client.put_settings and then re-opened the index.
Any help would be greatly appreciated!
You should re-index all your data in order to apply the updated settings on all your data and fields.
The data that had already been indexed won't be affected by the updated analyzer, only documents that has been indexed after you updated the settings will be affected.
Not re-indexing your data might produce incorrect results since your old data is analyzed with the old custom analyzer and not with the new one.
The most efficient way to resolve this issue is to create a new index, and move your data from the old one to the new one with the updated settings.
Reindex Api
Follow these steps:
POST _reindex
{
"source": {
"index": "movies"
},
"dest": {
"index": "new_movies"
}
}
DELETE movies
PUT movies
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"filter": [
"lowercase",
"my_custom_stops",
"my_custom_synonyms"
],
"type": "custom",
"tokenizer": "standard"
}
},
"filter": {
"my_custom_stops": {
"type": "stop",
"stopwords": "stop_words"
},
"my_custom_synonyms": {
"ignore_case": "true",
"type": "synonym",
"synonyms": [
"Harry Potter, HP => HP",
"Terminator, TM => TM"
]
}
}
}
},
"mappings": {
"properties": {
"body": {
"type": "text",
"analyzer": "my_custom_analyzer",
"search_analyzer": "my_custom_analyzer",
"search_quote_analyzer": "my_custom_analyzer"
}
}
}
}
POST _reindex?wait_for_completion=false
{
"source": {
"index": "new_movies"
},
"dest": {
"index": "movies"
}
}
After you've verified all your data is in place you can delete new_movies index. DELETE new_movies
Hope these help
The following deploys a azure function that run the specified C#. How do I do the same for a function that should run python?
I tried just changing the name to __init__.py as is generated when you use the azure-function-core-tools func command with the --python switch, but couldn't even find error messages as to why things weren't working.
{
"$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"appName": {
"type": "string",
"metadata": {
"description": "The name of the function app that you wish to create."
}
},
"storageAccountType": {
"type": "string",
"defaultValue": "Standard_LRS",
"allowedValues": [
"Standard_LRS",
"Standard_GRS",
"Standard_ZRS",
"Premium_LRS"
],
"metadata": {
"description": "Storage Account type"
}
}
},
"variables": {
"functionAppName": "[parameters('appName')]",
"hostingPlanName": "[parameters('appName')]",
"storageAccountName": "[concat(uniquestring(resourceGroup().id), 'azfunctions')]"
},
"resources": [
{
"type": "Microsoft.Storage/storageAccounts",
"name": "[variables('storageAccountName')]",
"apiVersion": "2015-06-15",
"location": "[resourceGroup().location]",
"properties": {
"accountType": "[parameters('storageAccountType')]"
}
},
{
"type": "Microsoft.Web/serverfarms",
"apiVersion": "2015-04-01",
"name": "[variables('hostingPlanName')]",
"location": "[resourceGroup().location]",
"properties": {
"name": "[variables('hostingPlanName')]",
"computeMode": "Dynamic",
"sku": "Dynamic"
}
},
{
"apiVersion": "2015-08-01",
"type": "Microsoft.Web/sites",
"name": "[variables('functionAppName')]",
"location": "[resourceGroup().location]",
"kind": "functionapp",
"properties": {
"name": "[variables('functionAppName')]",
"serverFarmId": "[resourceId('Microsoft.Web/serverfarms', variables('hostingPlanName'))]"
},
"dependsOn": [
"[resourceId('Microsoft.Web/serverfarms', variables('hostingPlanName'))]",
"[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]"
],
"resources": [
{
"apiVersion": "2016-03-01",
"name": "appsettings",
"type": "config",
"dependsOn": [
"[resourceId('Microsoft.Web/sites', variables('functionAppName'))]",
"[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]"
],
"properties": {
"AzureWebJobsStorage": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageAccountName'),';AccountKey=',listkeys(resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName')), '2015-05-01-preview').key1,';')]",
"AzureWebJobsDashboard": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageAccountName'),';AccountKey=',listkeys(resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName')), '2015-05-01-preview').key1,';')]",
"FUNCTIONS_EXTENSION_VERSION": "latest"
}
},
{
"apiVersion": "2015-08-01",
"name": "TestFunctionCM",
"type": "functions",
"dependsOn": [
"[resourceId('Microsoft.Web/sites', variables('functionAppName'))]"
],
"properties": {
"config": {
"bindings": [
{
"authLevel": "anonymous",
"name": "req",
"type": "httpTrigger",
"direction": "in"
},
{
"name": "res",
"type": "http",
"direction": "out"
}
]
},
"files": {
"run.csx": "using System.Net;\r\n\r\n public static HttpResponseMessage Run(HttpRequestMessage req, TraceWriter log)\r\n\r\n {\r\n\r\nreturn req.CreateResponse(\"Hello from MyFunction\", HttpStatusCode.OK);\r\n\r\n }"
}
}
}
]
}
]
}
Thank you.
You will probably need the following:
Runtime under appsettings
"FUNCTIONS_WORKER_RUNTIME": "python"
My template looks bit different but does deploy a python function, here is the resource from the same:
{
"type": "Microsoft.Web/sites",
"apiVersion": "2018-11-01",
"name": "[parameters('name')]",
"location": "[parameters('location')]",
"dependsOn": [
"microsoft.insights/components/mycoolfunction",
"[concat('Microsoft.Web/serverfarms/', parameters('hostingPlanName'))]",
"[concat('Microsoft.Storage/storageAccounts/', parameters('storageAccountName'))]"
],
"tags": {},
"kind": "functionapp,linux",
"properties": {
"name": "[parameters('name')]",
"siteConfig": {
"appSettings": [
{
"name": "FUNCTIONS_WORKER_RUNTIME",
"value": "python"
},
{
"name": "FUNCTIONS_EXTENSION_VERSION",
"value": "~2"
},
{
"name": "APPINSIGHTS_INSTRUMENTATIONKEY",
"value": "[reference('microsoft.insights/components/mycoolfunction', '2015-05-01').InstrumentationKey]"
},
{
"name": "AzureWebJobsStorage",
"value": "[concat('DefaultEndpointsProtocol=https;AccountName=',parameters('storageAccountName'),';AccountKey=',listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2019-06-01').keys[0].value,';EndpointSuffix=','core.windows.net')]"
}
]
},
"serverFarmId": "[concat('/subscriptions/', parameters('subscriptionId'),'/resourcegroups/', parameters('serverFarmResourceGroup'), '/providers/Microsoft.Web/serverfarms/', parameters('hostingPlanName'))]",
"hostingEnvironment": "[parameters('hostingEnvironment')]",
"clientAffinityEnabled": false
}
}
I'm attempting to develop a Google Action with the Dialogflow v2 API
My function saves a value to userstorage as follows
def save_value(value):
res = {
"fulfillmentText": "Set value to {}".format(int(value)),
"payload": {
"google": {
"userStorage": str(value)
}
}
}
print ("Saved value")
response = jsonify(res)
return response
And I get the following back from testing in Dialogflow
{
"fulfillmentText": "Set value to 36237269",
"payload": {
"google": {
"userStorage": "36237269"
}
}
}
This works for the duration of session, I am able to use this in later intents via
value = request_json['originalRequest']['data']['user']['userStorage']
However, the data is only stored for one session - if I invoke my action again, there is nothing saved.
Is this the correct way of using userstorage? Has anyone successfully used it with Python?
Failed "response"
{
"responseMetadata": {
"status": {
"code": 10,
"message": "Failed to parse Dialogflow response into AppResponse because of empty speech response",
"details": [
{
"#type": "type.googleapis.com/google.protobuf.Value",
"value": "{\"id\":\"816605a7-f7e0-4d37-a490-c84ff63fb7dd\",\"timestamp\":\"2018-11-08T17:18:49.422Z\",\"lang\":\"en-us\",\"result\":{},\"alternateResult\":{},\"status\":{\"code\":206,\"errorType\":\"partial_content\",\"errorDetails\":\"Webhook call failed. Error: 500 Internal Server Error\"},\"sessionId\":\"ABwppHHai3qsY2WPZWezmh9Q_bUF45aD51GbQ81sUDF7iSrRLA2m8KFgZ1ZYavnCv3fAckW1tcoJdydZTXQY5Nw\"}"
}
]
}
}
}
Working "response"
{
"conversationToken": "[]",
"finalResponse": {
"richResponse": {
"items": [
{
"simpleResponse": {
"textToSpeech": "Text"
}
}
]
}
},
"responseMetadata": {
"status": {
"message": "Success (200)"
},
"queryMatchInfo": {
"queryMatched": true,
"intent": "047ad9d9-0180-47f9-88bd-e5ffc8936c08"
}
},
"userStorage": "36237269"
}
Working "Request"
{
"user": {
"userId": "ABwppHE5H0FKrXKk8PjJyzZJ12OSMQcjxuT2NnfPAgLvai0UsfWEoYE8R_L8qLQdqY29sOnsZhQhE5G4XVVXiGs",
"locale": "en-US",
"lastSeen": "2018-11-08T17:18:16Z",
"userStorage": "36237269"
},
"conversation": {
"conversationId": "ABwppHE6BwK2zIBKxHA8hc9uBGumVgKbbNGHhRVFz7O6yrxxa1WJ_xtKNqhesj3EwNCVlestM-bF6tDWzZhqUXE",
"type": "ACTIVE",
"conversationToken": "[]"
},
"inputs": [
{
"intent": "actions.intent.TEXT",
"rawInputs": [
{
"inputType": "KEYBOARD",
"query": "when is the next bus"
}
],
"arguments": [
{
"name": "text",
"rawText": "when is the next bus",
"textValue": "when is the next bus"
}
]
}
],
"surface": {
"capabilities": [
{
"name": "actions.capability.MEDIA_RESPONSE_AUDIO"
},
{
"name": "actions.capability.WEB_BROWSER"
},
{
"name": "actions.capability.AUDIO_OUTPUT"
},
{
"name": "actions.capability.SCREEN_OUTPUT"
}
]
},
"isInSandbox": true,
"availableSurfaces": [
{
"capabilities": [
{
"name": "actions.capability.WEB_BROWSER"
},
{
"name": "actions.capability.AUDIO_OUTPUT"
},
{
"name": "actions.capability.SCREEN_OUTPUT"
}
]
}
],
"requestType": "SIMULATOR"
}
Failed "request"
{
"user": {
"userId": "ABwppHE5H0FKrXKk8PjJyzZJ12OSMQcjxuT2NnfPAgLvai0UsfWEoYE8R_L8qLQdqY29sOnsZhQhE5G4XVVXiGs",
"locale": "en-US",
"lastSeen": "2018-11-08T17:18:41Z"
},
"conversation": {
"conversationId": "ABwppHHai3qsY2WPZWezmh9Q_bUF45aD51GbQ81sUDF7iSrRLA2m8KFgZ1ZYavnCv3fAckW1tcoJdydZTXQY5Nw",
"type": "ACTIVE",
"conversationToken": "[]"
},
"inputs": [
{
"intent": "actions.intent.TEXT",
"rawInputs": [
{
"inputType": "KEYBOARD",
"query": "when is the next bus"
}
],
"arguments": [
{
"name": "text",
"rawText": "when is the next bus",
"textValue": "when is the next bus"
}
]
}
],
"surface": {
"capabilities": [
{
"name": "actions.capability.WEB_BROWSER"
},
{
"name": "actions.capability.MEDIA_RESPONSE_AUDIO"
},
{
"name": "actions.capability.SCREEN_OUTPUT"
},
{
"name": "actions.capability.AUDIO_OUTPUT"
}
]
},
"isInSandbox": true,
"availableSurfaces": [
{
"capabilities": [
{
"name": "actions.capability.WEB_BROWSER"
},
{
"name": "actions.capability.SCREEN_OUTPUT"
},
{
"name": "actions.capability.AUDIO_OUTPUT"
}
]
}
],
"requestType": "SIMULATOR"
}
I have a JSON schema for user's new message:
message_creation = {
"title": "Message",
"type": "object",
"properties": {
"post": {
"oneOf": [
{
"type": "object",
"properties": {
"content": {
"type": "string"
}
},
"additionalProperties": False,
"required": ["content"]
},
{
"type": "object",
"properties": {
"image": {
"type": "string"
}
},
"additionalProperties": False,
"required": ["image"]
},
{
"type": "object",
"properties": {
"video_path": {
"type": "string"
}
},
"additionalProperties": False,
"required": ["video"]
}
]
},
"doc_type": {
"type": "string",
"enum": ["text", "image", "video"]
}
},
"required": ["post", "doc_type"],
"additionalProperties": False
}
It's simple as that! There is two fields one is type and the other is post. So a payload like below succeeds:
{
"post": {
"image": "Hey there!"
},
"type": "image"
}
Now problem is that if user sets type value to text I cannot validate if text's schema has been given. How should I verify this? How should I check in case type is set to image then make sure that image exists inside of post?
You can do it, but it's complicated. This uses a boolean logic concept called implication to ensure that if schema A matches then schema B must also match.
{
"type": "object",
"properties": {
"post": {
"type": "object",
"properties": {
"content": { "type": "string" },
"image": { "type": "string" },
"video_path": { "type": "string" }
},
"additionalProperties": false
},
"doc_type": {
"type": "string",
"enum": ["text", "image", "video"]
}
},
"required": ["post", "doc_type"],
"additionalProperties": false,
"allOf": [
{ "$ref": "#/definitions/image-requires-post-image" },
{ "$ref": "#/definitions/text-requires-post-content" },
{ "$ref": "#/definitions/video-requires-post-video-path" }
],
"definitions": {
"image-requires-post-image": {
"anyOf": [
{ "not": { "$ref": "#/definitions/type-image" } },
{ "$ref": "#/definitions/post-image-required" }
]
},
"type-image": {
"properties": {
"doc_type": { "const": "image" }
}
},
"post-image-required": {
"properties": {
"post": { "required": ["image"] }
}
},
"text-requires-post-content": {
"anyOf": [
{ "not": { "$ref": "#/definitions/type-text" } },
{ "$ref": "#/definitions/post-content-required" }
]
},
"type-text": {
"properties": {
"doc_type": { "const": "text" }
}
},
"post-content-required": {
"properties": {
"post": { "required": ["content"] }
}
},
"video-requires-post-video-path": {
"anyOf": [
{ "not": { "$ref": "#/definitions/type-video" } },
{ "$ref": "#/definitions/post-video-path-required" }
]
},
"type-video": {
"properties": {
"doc_type": { "const": "video" }
}
},
"post-video-path-required": {
"properties": {
"post": { "required": ["video_path"] }
}
}
}
}