How to push object data in elasticksearch array field - python

I want to push object data type inside an array field and i have error, I thing is the mapping
I Use Python to work with ElastickSearch
Create the mappings for the customer
def create_customer_index():
''' Start creating customers index manually '''
mapping = {
"mappings": {
"customer": {
"properties": {
"created": {
"type": "long"
},
"updated": {
"type": "long"
},
"shopping_cart": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
}
}
}
}
es.indices.create(index='customers', body=mapping)
#end
With this method I add the product the user select and the quantity inside the array
def add_item_in_shopping_cart(uid, product_id, quantity=1):
''' Add new item in customer shopping cart '''
print('Start adding a new item in shopping cart')
# print('UID => ', uid)
print('Product id to add is: => ', product_id)
# 1st check if the user has this product on favorites,
# 2nd if the user doesn't have this remove it from the list
customer_shoping_cart = get_customer_shopping_cart(uid)
print('CUSTUMER SHOPING CART => ', customer_shoping_cart)
x = False
for item in customer_shoping_cart:
if item == product_id:
x = True
if x:
print('Item already exist in shopping_cart')
return 'item_already_exist_in_shopping_cart', 500
else:
print('Item dont exist in shopping_cart, i gona added')
timestamp = int(round(time.time() * 1000))
schema = {
"product_id": product_id,
"quantity" : 10
}
doc = {
"script" : {
"inline":"ctx._source.shopping_cart.add(params.data)",
"params":{
"data":{
"product_id": product_id,
"quantity" : quantity
}
}
}
}
try:
es.update(index="customers", doc_type='customer', id=uid, body=doc)
es.indices.refresh(index="customers")
return jsonify({'message': 'item_added_in_shopping_cart'}), 200
except Exception as e:
print('ERROR => ', e)
return 'we have error', 500
#end
And I have this error
ERROR => TransportError(400, 'mapper_parsing_exception', 'failed to parse [shopping_cart]')

From the mapping you posted it looks like ElasticSearch expects documents like this:
{
"created": 1510094303,
"updated": 1510094303,
"shopping_cart": "I am in a shopping cart"
}
Or like this:
{
"created": 1510094303,
"updated": 1510094303,
"shopping_cart": [
"I am in a shopping cart",
"me too!"
]
}
And you are trying to treat "shopping_cart" is an array of objects, which it is not (it is an array of strings). ElasticSearch does not allow to put objects that do not fit the mapping into the index.
What you should try first is to change your mapping to something like this:
mapping = {
"mappings": {
"customer": {
"properties": {
"created": {
"type": "long"
},
"updated": {
"type": "long"
},
"shopping_cart": {
"properties": {
"product_id": {
"type": "keyword"
},
"quantity": {
"type": "integer"
}
}
},
}
}
}
}
Moreover, consider also changing the document entirely on the client side, i.e. in your script, and putting the new version document in the index (replacing the previous one), since it will probably be easier in the implementation logic (for instance, no need for ElasticSearch-side scripts to update the document).
Hope that helps.

Related

ElasticSearch 7 & Kibana unexpected behavior

I am trying to store a data into elastic search index the data of a column look as below
C ID
1234
5678
NA
123D D5614 A7890
Now I know this data is kind of mixed and so I have selected the text field for this with below properties
"mappings": {
"properties":{
"C ID":{"type":"text" , "fields" :{'keyword': {'type':'keyword'}}},
}
}
Even after this I am always getting the error.
failed to parse field[C ID] of type long in document id 4
Please help me out with this. I have not given any reference of long don't know why I am getting this error
Update
My code base
from elasticsearch import Elasticsearrch
ESConnector is a class responisble for kerberos login. We are calling Elasticsearch under ESConnector class
es = ESConnector()
if not ex.indices.exist(INDEX):
set = {"settings":{"index":{"number_of_shards":1, "number_of_replicas":1}}
es.indices.create(INDEX, body = set)
mbody = {
"mappings": {
"properties":{
"C ID":{"type":"text" , "fields" :{'keyword': {'type':'keyword'}}},
}
}
}
es.indices.put_mapping(INDEX, body = mbody)
You can create the index with the mapping in a single call
if not es.indices.exist(INDEX):
body = {
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 1
}
},
"mappings": {
"properties": {
"C ID": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
es.indices.create(INDEX, body = body)
It should work this way.

Sort ElasticSearch results by a custom Compare function on field

If I want to fetch Driver data from elastic sorted on rating where rating could be ["good", "ok", "bad"], how to write the query which can help me get data in sorted order considering good > ok > bad
Ex sorted response list:
[{
"name": "driver1",
"rating": "good"
},
{
"name": "driver3",
"rating": "good"
},
{
"name": "driver2",
"rating": "ok"
},
{
"name": "driver4",
"rating": "bad"
}]
For changing score value based on a field in your index you can use script score query, your query should look like below example:
GET /my-index-2/_search
{
"query": {
"script_score": {
"query": {
"match_all":{}
},
"script": {
"source": "if (doc['rating.keyword'].value == 'good'){2} else if(doc['rating.keyword'].value == 'ok') {1} else if(doc['rating.keyword'].value == 'bad') {0}"
}
}
}
}
For more information about script score query you can check Elastic official documentation here.

Elasticsearch multi field query request in Python

I'm a beginner in Elasticsearch and Python and I have an index created in Elasticsearch with some data, and I want to perform a query request on those data with python. This is my data mapping created in Kibana's Dev tools:
PUT /main-news-test-data
{
"mappings": {
"properties": {
"content": {
"type": "text"
},
"title": {
"type": "text"
},
"lead": {
"type": "text"
},
"agency": {
"type": "keyword"
},
"date_created": {
"type": "date"
},
"url": {
"type": "keyword"
},
"image": {
"type": "keyword"
},
"category": {
"type": "keyword"
},
"id":{
"type": "keyword"
}
}
}
}
and here is my Python code, in which we give it a keyword and a category number and it has to check in title, lead and content fields of the elastic data for the matching keyword and also check the entered category number with the data category number and return/print out any object that matches this criteria:
from elasticsearch import Elasticsearch
import json,requests
es = Elasticsearch(HOST="http://localhost", PORT=9200)
es = Elasticsearch()
def QueryMaker (keyword,category):
response = es.search(index="main-news-test-data",body={"from":0,"size":5,"query":{"multi_match":{
"content":keyword,"category":category,"title":keyword,"lead":keyword}}})
return(response)
if __name__ == '__main__':
keyword = input('Enter Keyword: ')
category = input('Enter Category: ')
#startDate = input('Enter StartDate: ')
#endDate = input('Enter EndDate: ')
data = QueryMaker(keyword,category)
print(data)
but I receive this error when I give the data to the input:
elasticsearch.exceptions.RequestError: RequestError(400, 'parsing_exception', '[multi_match] query does not support [content]')
What am I doing wrong?
Edit: the keyword has to be included in the title, lead and content but it doesn't have to be the same as them
Your multi_match query syntax is wrong here, also I think you need something like this, See more: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html
{
"from":0,
"size":5,
"query": {
"bool": {
"should": [
{
"multi_match" : {
"query": keyword,
"fields": [ "content", "title","lead" ]
}
},
{
"multi_match" : {
"query": category,
"fields": [ "category" ]
}
}
]
}
}
}

How to store a particular portion of json in a variable based on certain condition in Python?

I have a json which looks like below
result_json = {
"status":"Gov info",
"user_input":[
{
"rule":"Location"
},
{
"des": "This is for location1",
"value": 1
},
{
"des": "This is for location2",
"value": 2
},
{
"rule":"District"
},
{
"des": "This is for district1",
"value": 1
},
{
"des": "This is for district2",
"value": 2
},
{
"des": "This is for district3",
"value": 3
},
{
"des": "This is for district4",
"value": 4
},
{
"rule":"Country"
},
{
"des": "This is for country1",
"value": 1
},
{
"rule":"Continent"
},
{
"des": "This is for continent1",
"value": 1
},
{
"des": "This is for continent2",
"value": 2
},
],
"source":"Gov",
"id":"5ass1"
}
I also have a list like so
lookup = [u'Location', u'District', u'Country', u'Continent']
Now what I want to do is that I look at each value of the list, check against the json for the same value (the value is stored against rule key) and get the sub json right after it until I hit the next rule. For example
The first value in the list lookup is Location. Now I loop through user_input key's value, check against the sub key rule and find out that the value Location matches and right after that store the subsequent dictionaries until I hit the next key rule. So for lookup value Location, after checking against the json and collecting the subsequent dictionary, this is how I will store
filtered_output = {
"Location":[
{
"des":"This is for location1",
"value":1
},
{
"des":"This is for location2",
"value":2
}
]
}
Now I look for next lookup value which is District and the subsequent part of json that will be stored is
filtered_output = {
"Location":[
{
"des":"This is for location1",
"value":1
},
{
"des":"This is for location2",
"value":2
}
],
"District":[
{
"des":"This is for district1",
"value":1
},
{
"des":"This is for district2",
"value":2
},
{
"des":"This is for district3",
"value":3
},
{
"des":"This is for district4",
"value":4
}
]
}
I tried doing something like below
filtered_output = {}
for i in lookout:
temp_json = []
for j in result_json["user_input"]:
if j.get("rule") == i:
temp_json.append(j)
Here it only stores the dictionary that contains the key rule but doesn't continue further until it hits the next rule key. I am not sure how to make this work. Any help will be appreciated.
I would first transform your input to the format you want and them I would onluy filter the keys, something like this:
user_input = result_json["user_input"]
transformed_user_input = {}
for el in user_input:
if "rule" in el:
current_rule = el["rule"]
transformed_user_input[current_rule] = []
else:
transformed_user_input[current_rule].append(el)
lookup = [u'Location', u'District', u'Country', u'Continent']
filtered_user_input = { key: transformed_user_input[key] for key in lookup}
This way, you process your input only once (don't know how big it is).

Detect if a json schema has a oneOf type schema

I want to check if a schema has just a single schema in it or if it has several schemas within it with a oneOf property.
The python code should be something like this
If schema1 has oneOf property:
Some code1
If schema1 is just a single schema:
Some code2
Essentially I want to be able to distinguish between these 2 types of schemas
Schema1
"schema1": {
"definitions": {
"schema": {
"type": "object",
"properties": {
"name": {
"type": ["string", "null"]
}
}
}
}
}
Schema2
"schema2": {
"definitions": {
"schema": {
"oneOf": [
{
"type": ["null"]
},
{
"type": ["string"],
"enum": ["NONE"]
}
]
}
}
}
How can I do this in Python?
Edit: Corrected my example schema
Here is an example showing a way of recursively checking if there is a oneOf property in the json supplied. You'll need to check the parent property if you specifically want to only check the 'schema' portion of the json.
#!/usr/bin/env python
import json
def objectHasKey(object_,key_):
_result = False
if (type(object_)==dict):
for _key in object_.keys():
print _key
if (type(object_[_key])==dict):
_dict = object_[_key]
_result = objectHasKey(_dict,key_)
if _key == key_:
_result = True
if _result:
break
return _result
firstJSONText = '''
{
"definitions": {
"schema": {
"type": "object",
"properties": {
"name": {
"type": [
"string",
"null"
]
}
}
}
}
}
'''
first = json.loads(firstJSONText)
secondJSONText = '''
{
"definitions": {
"schema": {
"oneOf": [
{
"type": [
"null"
]
},
{
"type": [
"string"
],
"enum": [
"NONE"
]
}
]
}
}
}
'''
second = json.loads(secondJSONText)
target = first
if objectHasKey(target,'oneOf'):
print "Handle oneOf with first"
else:
print "Handle default with first"
target = second
if objectHasKey(target,'oneOf'):
print "Handle oneOf with second"
else:
print "Handle default with second"
Example call with output
csmu-macbook-pro-2:detect-if-a-json-schema-has-a-oneof-type-schema admin$ ./test-for-schema.py
definitions
schema
type
properties
name
type
Handle default with first
definitions
schema
oneOf
Handle oneOf with second

Categories

Resources