convert elasticsearch query to elasticsearch-dsl query in python - python

i tried to convert this elasticsearch query to elasticsearch-dsl query but i didn't get the same result.
i am using elasticsearch version 5.X also for the elasticsearch-dsl
here the elasticsearch query :
qry = {
"_source": [
"accurate_call_duration",
"call_duration",
"called"
],
"query": {
"bool": {
"should": [
{
"has_child": {
"type": "outgoing",
"query": {
"match_all": {}
},
"inner_hits": {
"size": 5000,
"_source": [
"accurate_call_duration",
"accurate_ringing_duration",
"anonymous_call"
]
}
}
}
],
"must": [
{
"term": {
"client": 1212,
}
},
{
"range": {
"created_time": {
"gte": date_from,
"lte": date_to
}
}
},
{
"term": {
"type": "incoming"
}
}
]
}
},
"size": 5000
}
elaticsearch-dsl query:
result = Search(using=escli, index="cdr").source([
"accurate_call_duration",
"call_duration", "called").filter(
'range', created_time={'gt': date_from, 'lte': date_to}).filter(
"term", type="incoming").extra(size=5000).execute()
how can i get the same result as the elasticsearch query (with inner_hits) ?

Related

How to perform sum aggregation on n number of documents after sorting records (descending) ? elastic

{so, i want latest 30 document between 20/6 to 20/4 and perform the sum aggregation on field duration_seconds of those 30 latest doc. we had tried multiple aggregation on that like top_hits, terms for sorting but then we got the sum of all doc between 20/6 to 20/4}
"size": 1,
"query": {
"bool": {
"must": [
{
"range": {
"create_datetime": {
"gte": "2022-04-20",
"lte": "2022-06-20"
}
}
}
]
}
},
"sort": [
{
"create_datetime": {
"order": "desc"
}
}
],
"aggs": {
"videosession": {
"sampler": {
"shard_size":30
},
"aggs": {
"sum_duration_seconds": {
"sum": {
"field": "duration_seconds"
}
}
}
}
}
}```

Query an elasticsearch index by an attribute, with a given range?

I want to query my index so that it matches whenever a particular attribute shows up called sitename, but I want all the data from a certain time range. I thought it might be something of the below but unsure:
{
"query": {
"range": {
"timestamp": {
"gte": "now-1h/h",
"lt": "now/h"
}
},
"match": {"sitename" : "HARB00ZAF0" }
}
}
You're almost there, but you need to leverage the bool queries
{
"query": {
"bool": {
"filter": [
{
"range": {
"timestamp": {
"gte": "now-1h/h",
"lt": "now/h"
}
}
}
],
"must": [
{
"match": {
"sitename": "HARB00ZAF0"
}
}
]
}
}
}

How do you filter on not null values elasticsearch?

I am trying to filter out values with not null :
Exemple with sql
SELECT ALL FROM Mytable WHERE field_1 NOT NULL and field_2 ="alpha"
How should I be writing this query in elasticsearch-dsl(python)?
I tried things like:
s = Mytable.search().query(
Q('match', field_2 ='alpha')
).filter(~Q('missing', field='field_1'))
but it returns elements with null values of field_1
Also, I tried this down, but it didn't work
field_name_1 = 'field_2'
value_1 = "alpha"
field_name_2 = 'field_1'
value_2 = " "
filter = {
"query": {
"bool": {
"must": [
{
"match": {
field_name_1 : value_1
}
},
{
"bool": {
"should": [
{
"bool": {
"must_not": [
{
field_name_2: {
"textContent": "*"
}
}
]
} }
]
}
}
]
}
}
}
I am not familiar with elasticsearch-dsl(python), but the following search query, will get you the same search result as you want :
SELECT ALL FROM Mytable WHERE field_1 NOT NULL and field_2 ="alpha"
With the help of below search query, the search result will be such that name="alpha" AND cost field will not be null. You can refer exists query to know more about this.
Index Data:
{ "name": "alpha","item": null }
{ "name": "beta","item": null }
{ "name": "alpha","item": 1 }
{ "name": "alpha","item": [] }
Search query:
You can combine a bool query with a exists query like this:
{
"query": {
"bool": {
"must": [
{
"term": {
"name": "alpha"
}
},
{
"exists": {
"field": "item"
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "my-index",
"_type": "_doc",
"_id": "4",
"_score": 1.6931472,
"_source": {
"name": "alpha",
"item": 1
}
}
]
You can try this one:
s = Mytable.search()
.query(Q('bool', must=[Q('match', field_2='alpha'), Q('exists', field='field_1')]))
This is the way to use boolean compound query
query: {
bool: {
must: [
{
bool: {
must_not: {
missing: {
field: 'follower',
existence: true,
null_value: true,
},
},
},
},
{
nested: {
path: 'follower',
query: {
match: {
'follower.id': req.currentUser?.id,
},
},
},
},
],
},
},

elasticsearch.exceptions.TransportError: TransportError 503: Data too large

I'm trying to get the response from ES hitting it from python code but it is showing the below error:
elasticsearch.exceptions.TransportError: TransportError(503, u'search_phase_execution_exception', u'[request] Data too large, data for [<agg [POSCodeModifier]>] would be [623327280/594.4mb], which is larger than the limit of [623326003/594.4mb]')
If i hit the same code from kibana i get the results but using python i'm getting this error. I'm using aggregation in my code. if someone can explain if i need to set some properties or how to optimise it??
Below is the structure for request i'm sending and if i set start and end date greater than 5 days it gives me the error, otherwise i'm getting the results
unmtchd_ESdata= es.search(index='cstore_new',body={'size' : 0, "aggs": {
"filtered": {
"filter": {
"bool": {
"must_not": [
{
"match": {
"CSPAccountNo": store_id
}
}
],
"must": [
{
"range": {
"ReportDate": {
"gte": start_dt,
"lte": end_dt
}
}
}
]
}
}
,
"aggs": {
"POSCode": {
"terms": {
"field": "POSCode",
"size": 10000
},
"aggs": {
"POSCodeModifier": {
"terms": {
"field": "POSCodeModifier",
"size": 10000
},
"aggs": {
"CSP": {
"terms": {
"field": "CSPAccountNo",
"size": 10000
},
"aggs": {
"per_stock": {
"date_histogram": {
"field": "ReportDate",
"interval": "week",
"format": "yyyy-MM-dd",
"min_doc_count": 0,
"extended_bounds": {
"min": start_dt,
"max": end_dt
}
},
"aggs": {
"avg_week_qty_sales": {
"sum": {
"field": "TotalCount"
}
}
}
},
"market_week_metrics": {
"extended_stats_bucket": {
"buckets_path": "per_stock>avg_week_qty_sales"
}
}
}
}
}
}
}
}
}
}
}},request_timeout=1000)
Edit1:
Result variables needed from elastic search response
for i in range(len(unmtchd_ESdata['aggregations']['filtered']['POSCode']['buckets'])):
list6.append(unmtchd_ESdata['aggregations']['filtered']['POSCode']['buckets'][i]['POSCodeModifier']['buckets'][0]['CSP']['buckets'][0]['market_week_metrics']['avg'])
list7.append(unmtchd_ESdata['aggregations']['filtered']['POSCode']['buckets'][i]['key'])
list8.append(unmtchd_ESdata['aggregations']['filtered']['POSCode']['buckets'][i]['POSCodeModifier']['buckets'][0]['CSP']['buckets'][0]['market_week_metrics']['max']-unmtchd_ESdata['aggregations']['filtered']['POSCode']['buckets'][i]['POSCodeModifier']['buckets'][0]['CSP']['buckets'][0]['market_week_metrics']['min'])
list9.append(unmtchd_ESdata['aggregations']['filtered']['POSCode']['buckets'][i]['POSCodeModifier']['buckets'][0]['CSP']['buckets'][0]['market_week_metrics']['max'])
list10.append(unmtchd_ESdata['aggregations']['filtered']['POSCode']['buckets'][i]['POSCodeModifier']['buckets'][0]['CSP']['buckets'][0]['market_week_metrics']['min'])

Mongo Cursor not returnning a cursor but an object

test_cursor = db.command({
"aggregate": "New_layout",
"pipeline": [
{ "$match": { "$and": [
{ "FIRST_DATE": { "$gte": new_date } },
{ "CHAIN_ID": { "$ne": "" } }
] } },
{ "$unwind": { "path": "$ENTERS", "includeArrayIndex": "Date" } },
{ "$project": {
"_id": 0,
"SITE_ID": "$SITE_ID",
"CHAIN_ID": "$CHAIN_ID",
"SEGMENT_ID": "$SEGMENT_ID",
"ZIP": "$ZIP",
"ZIP3": "$ZIP3",
"MARKET_ID": "$MARKET_ID",
"REGION": "$REGION",
"MALL_CODE": "$MALL_CODE",
"MALL_AREA": "$MALL_AREA",
"MALL_NAME": "$MALL_NAME",
"FIRST_DATE": "$FIRST_DATE",
"MARKET_AREA": "$MARKET_AREA",
"REGION_AREA": "$REGION_AREA",
"ZIP_AREA": "$ZIP_AREA",
"ZIP3_AREA": "$ZIP3_AREA",
"DATE": "$Date",
"ENTERS": "$ENTERS"
} }
],
"allowDiskUse": bool(1),
"cursor": {}
})
asd=list(test_cursor)
The contents of the cursor are as below :-
[u'cursor', u'ok', u'waitedMS'] .
However with an $out statement, the output collection has the expected contents.
I am running pymongo v3.2.2 and mongo 3.2. I was told this problem is experienced with v3.0 or lesser, but this is something I am not able to figure out
You should use aggregate() instead of command().
test_cursor = db.New_layout.aggregate([
{ "$match": { "$and": [
{ "FIRST_DATE": { "$gte": new_date } },
{ "CHAIN_ID": { "$ne": "" } }
] } },
{ "$unwind": { "path": "$ENTERS", "includeArrayIndex": "Date" } },
{ "$project": {
"_id": 0,
"SITE_ID": "$SITE_ID",
"CHAIN_ID": "$CHAIN_ID",
"SEGMENT_ID": "$SEGMENT_ID",
"ZIP": "$ZIP",
"ZIP3": "$ZIP3",
"MARKET_ID": "$MARKET_ID",
"REGION": "$REGION",
"MALL_CODE": "$MALL_CODE",
"MALL_AREA": "$MALL_AREA",
"MALL_NAME": "$MALL_NAME",
"FIRST_DATE": "$FIRST_DATE",
"MARKET_AREA": "$MARKET_AREA",
"REGION_AREA": "$REGION_AREA",
"ZIP_AREA": "$ZIP_AREA",
"ZIP3_AREA": "$ZIP3_AREA",
"DATE": "$Date",
"ENTERS": "$ENTERS"
} }
],
allowDiskUse=True)

Categories

Resources