Query subdocument in Monogo DB - python

I have the following Mongo Document:
{
"_id" : ObjectId("5ea0576c2671f799e1ad65db"),
"model" : "Volkswagen",
"make" : "Volkswagen",
"year" : 1969,
"mileage" : 15000.0,
"vi_number" : "ba37d9b98fe940dba100fdf0330c30f0",
"engine" : {
"horsepower" : 400,
"liters" : 5.0,
"mpg" : 20.0,
"serial_number" : "a9ff4120-ee34-4588-af1a-77f62457531f"
},
"service_history" : [
{
"date" : ISODate("2020-04-22T12:10:55.622Z"),
"description" : "Cleaning",
"price" : 150.0,
"customer_rating" : 5
},
{
"date" : ISODate("2020-04-22T12:14:53.439Z"),
"description" : "Flat tire",
"price" : 100.0,
"customer_rating" : 2
},
{
"date" : ISODate("2020-04-22T12:30:54.071Z"),
"description" : "Waxing",
"price" : 77.0,
"customer_rating" : 4
}
]
}
I created the following query in Python and it returns all subdocuments:
cars = Car.objects().filter(service_history__customer_rating__lte=2)
When I run the following query directly in Mongo:
db.getCollection('cars').find({'service_history.customer_rating': {$lte: 2}})
It returns again all the subdocuments instead of just one.
I checked the documentation and everything seems correct.

db.getCollection('cars').find({'service_history.customer_rating': {$lte: 2}})
edit:
What happens here is it checks for the customer_rating and since it finds a match it returns the Document. The whole thing that is returned is the document.
PS: Sorry for the confusion earlier.

please look at the following query. Hope this will help you. You need to use MongoDB Aggregations to find $lte operator.
db.collection.aggregate([
{
$unwind: "$service_history"
},
{
$match: {
"_id": ObjectId("5ea0576c2671f799e1ad65db")
}
},
{
$match: {
$expr: {
$lte: [
"$service_history.customer_rating",
2
]
}
}
}
])
This would be the sample data you would be getting if you would use the above query.
[
{
"_id": ObjectId("5ea0576c2671f799e1ad65db"),
"engine": {
"horsepower": 400,
"liters": 5,
"mpg": 20,
"serial_number": "a9ff4120-ee34-4588-af1a-77f62457531f"
},
"make": "Volkswagen",
"mileage": 15000,
"model": "Volkswagen",
"service_history": {
"customer_rating": 2,
"date": ISODate("2020-04-22T12:14:53.439Z"),
"description": "Flat tire",
"price": 100
},
"vi_number": "ba37d9b98fe940dba100fdf0330c30f0",
"year": 1969
}
]
Updated query
db.collection.aggregate([
{
$unwind: "$service_history"
},
{
$match: {
$expr: {
$lte: [
"$service_history.customer_rating",
2
]
}
}
}
])
You can refer to this code.

Related

How to retrieve elasticsearch data from index based on timestamp?

I want to retrieve data from elasticsearch based on timestamp. The timestamp is in epoch_millis and I tried to retrieve the data like this:
{
"query": {
"bool": {
"must":[
{
"range": {
"TimeStamp": {
"gte": "1632844180",
"lte": "1635436180"
}
}
}
]
}
},
"size": 10
}
But the response is this:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
How can I retrieve data for a given period of time from a certain index?
The data looks like this:
{
"_index" : "my-index",
"_type" : "_doc",
"_id" : "zWpMNXcBTeKmGB84eksSD",
"_score" : 1.0,
"_source" : {
"Source" : "Market",
"Category" : "electronics",
"Value" : 20,
"Price" : 45.6468,
"Currency" : "EUR",
"TimeStamp" : 1611506922000 }
Also, the result has 10.000 hits when using the _search on the index. How could I access other entries? (more than 10.000 results) and to be able to choose the desired timestamp interval.
For your first question, assume that you have the mappings like this:
{
"mappings": {
"properties": {
"Source": {
"type": "keyword"
},
"Category": {
"type": "keyword"
},
"Value": {
"type": "integer"
},
"Price": {
"type": "float"
},
"Currency": {
"type": "keyword"
},
"TimeStamp": {
"type": "date"
}
}
}
}
Then I indexed 2 sample documents (1 is yours above, but the timestamp is definitely not in your range):
[{
"Source": "Market",
"Category": "electronics",
"Value": 30,
"Price": 55.6468,
"Currency": "EUR",
"TimeStamp": 1633844180000
},
{
"Source": "Market",
"Category": "electronics",
"Value": 20,
"Price": 45.6468,
"Currency": "EUR",
"TimeStamp": 1611506922000
}]
If you really need to query using the range above, you will first need to convert your TimeStamp field to seconds (/1000), then query based on that field:
{
"runtime_mappings": {
"secondTimeStamp": {
"type": "long",
"script": "emit(doc['TimeStamp'].value.millis/1000);"
}
},
"query": {
"bool": {
"must": [
{
"range": {
"secondTimeStamp": {
"gte": 1632844180,
"lte": 1635436180
}
}
}
]
}
},
"size": 10
}
Then you will get the first document.
About your second question, by default, Elasticsearch's max_result_window is only 10000. You can increase this limit by updating the settings, but it will increase the memory usage.
PUT /index/_settings
{
"index.max_result_window": 999999
}
You should use the search_after API instead.

PyMongo not returning results on aggregation

I'm a total beginner in PyMongo. I'm trying to find activities that are registered multiple times. This code is returning an empty list. Could you please help me in finding the mistake:
rows = self.db.Activity.aggregate( [
{ '$group':{
"_id":
{
"user_id": "$user_id",
"transportation_mode": "$transportation_mode",
"start_date_time": "$start_date_time",
"end_date_time": "$end_date_time"
},
"count": {'$sum':1}
}
},
{'$match':
{ "count": { '$gt': 1 } }
},
{'$project':
{"_id":0,
"user_id":"_id.user_id",
"transportation_mode":"_id.transportation_mode",
"start_date_time":"_id.start_date_time",
"end_date_time":"_id.end_date_time",
"count": 1
}
}
]
)
5 rows from db:
{ "_id" : 0, "user_id" : "000", "start_date_time" : "2008-10-23 02:53:04", "end_date_time" : "2008-10-23 11:11:12" }
{ "_id" : 1, "user_id" : "000", "start_date_time" : "2008-10-24 02:09:59", "end_date_time" : "2008-10-24 02:47:06" }
{ "_id" : 2, "user_id" : "000", "start_date_time" : "2008-10-26 13:44:07", "end_date_time" : "2008-10-26 15:04:07" }
{ "_id" : 3, "user_id" : "000", "start_date_time" : "2008-10-27 11:54:49", "end_date_time" : "2008-10-27 12:05:54" }
{ "_id" : 4, "user_id" : "000", "start_date_time" : "2008-10-28 00:38:26", "end_date_time" : "2008-10-28 05:03:42" }
Thank you
When you pass _id: 0 in the $project stage, it will not project the sub-objects even if they are projected in the follow up, since the rule is overwritten.
Try the below $project stage.
{
'$project': {
"user_id":"_id.user_id",
"transportation_mode":"_id.transportation_mode",
"start_date_time":"_id.start_date_time",
"end_date_time":"_id.end_date_time",
"count": 1
}
}
rows = self.db.Activity.aggregate( [
{
'$group':{
"_id": {
"user_id": "$user_id",
"transportation_mode": "$transportation_mode",
"start_date_time": "$start_date_time",
"end_date_time": "$end_date_time"
},
"count": {'$sum':1}
}
},
{
'$match':{
"count": { '$gt': 1 }
}
},
{
'$project': {
"user_id":"_id.user_id",
"transportation_mode":"_id.transportation_mode",
"start_date_time":"_id.start_date_time",
"end_date_time":"_id.end_date_time",
"count": 1,
}
}
])
Your group criteria is likely too narrow.
The $group stage will create a separate output document for each distinct value of the _id field. The pipeline in the question will only include two input documents in the same group if they have exactly the same value in all four of those fields.
In order for a count to be greater than 1, there must exist 2 documents with the same user, mode, and exactly the same start and end.
In the same data you show, there are no two documents that would be in the same group, so all of the output documents from the $group stage would have a count of 1, and therefore none of them satisfy the $match, and the return is an empty list.

Search a list of comma separated strings on elasticsearch

I have elasticsearch configured for my django project.
Elasticsearch index has two fields user_id and address, my goal is to search a list of comma separated addresses on elasticsearch.
Example:
i have this list of addresses ["abc", "def","ghi","jkl","mno"] and i want to search them on elasticsearch in one hit, the result i'm expecting for the above list is ["abc", "def","ghi"] if these three addresses "abc", "def" and "ghi" (individually) exist on elasticsearch in address field.
Ingest data
POST test_foki/_doc
{
"user_id": 1,
"address": "abc"
}
POST test_foki/_doc
{
"user_id": 2,
"address": "def"
}
POST test_foki/_doc
{
"user_id": 3,
"address": "ghi"
}
If you want to do exact matches then you can use a terms query to filter up by an array of addresses.
Request
We use filter because we dont care about score on exact matches (it matches or not)
POST test_foki/_search
{
"query": {
"bool": {
"filter": [
{
"terms": {
"address.keyword": [
"abc",
"def",
"ghi",
"jkl",
"mno"
]
}
}
]
}
}
}
Response
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "test_foki",
"_type" : "_doc",
"_id" : "YzkL4HcBv0SJscHMrZB8",
"_score" : 1.0,
"_source" : {
"user_id" : 1,
"address" : "abc"
}
},
{
"_index" : "test_foki",
"_type" : "_doc",
"_id" : "ZDkL4HcBv0SJscHMsZAx",
"_score" : 1.0,
"_source" : {
"user_id" : 2,
"address" : "def"
}
},
{
"_index" : "test_foki",
"_type" : "_doc",
"_id" : "ZTkL4HcBv0SJscHMtpAd",
"_score" : 1.0,
"_source" : {
"user_id" : 3,
"address" : "ghi"
}
}
]
}
}
If you want to do full text searches you will have to do a boolean query
POST test_foki/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"address": "abc"
}
},
{
"match": {
"address": "def"
}
},
{
"match": {
"address": "ghi"
}
},
{
"match": {
"address": "jkl"
}
},
{
"match": {
"address": "mno"
}
}
]
}
}
}
This produces the same Lucene query address:abc address:def address:ghi address:jkl address:mno
POST test_foki/_search
{
"query": {
"match": {
"address": "abc def ghi jkl mno"
}
}
}

Elastic Search composite grouping with range

Consider the following documents are in my elastic search . I want to group the documents based on rank, but any rank below 1000 must be displayed individually and anything above 1000 must be grouped how do I achieve this using composite aggregation, I am new and I am using composite because I want to use the after key function to allow pagination.
Documents
{
rank : 200,
name:abcd,
score1 :100,
score2:200
},
{
rank 300,
name:abcd,
score1:100,
score2:200
}
Expected Result:
{
key:{
rank:101
},
doc_count:1,
_score1: {value:3123}
_score2 : {value :3323}
}
{
key:{
rank:1000-*
},
doc_count:1,
_score1: {value:3123}
_score2 : {value :3323}
},
{
key:{
rank:300
},
doc_count:1,
_score1: {value:3123}
_score2 : {value :3323}
}
######## QUery that I tried
{
"query":{"match_all":{}},
"aggs":{
"_scores":{
"composite"{
"sources":[
{"_rank":{"terms":{"field":"rank"}}}
]
}
},
"aggs":{
"_ranks":{
"field":"rank:[
{"to":1000},
{"from":1000}
]
}
"_score1": {"sum": {"field": "score1"}}
"_score2": {"sum": {"field": "score2"}}
}
}
}
From what I understand, you want to
Group the aggregations whose value is below 1000 rank to their own buckets
Group the aggregations whose value is 1000 and above to a single bucket with key 1000-*
And for each buckets, calculate the sum of _score1 of all buckets
Similarly calculate the sum of _score2 of all buckets
For this scenario, you can simply make use of Terms Aggregation as I've mentioned in below answer.
I've mentioned sample mapping, sample documents, query and response so that you'll have clarity on what's happening.
Mapping:
PUT my_sample_index
{
"mappings": {
"properties": {
"rank":{
"type": "integer"
},
"name":{
"type": "keyword"
},
"_score1": {
"type":"integer"
},
"_score2":{
"type": "integer"
}
}
}
}
Sample Documents:
POST my_sample_index/_doc/1
{
"rank": 100,
"name": "john",
"_score1": 100,
"_score2": 100
}
POST my_sample_index/_doc/2
{
"rank": 1001, <--- Rank > 1000
"name": "constantine",
"_score1": 200,
"_score2": 200
}
POST my_sample_index/_doc/3
{
"rank": 200,
"name": "bruce",
"_score1": 100,
"_score2": 100
}
POST my_sample_index/_doc/4
{
"rank": 2001, <--- Rank > 1000
"name": "arthur",
"_score1": 200,
"_score2": 200
}
Aggregation Query:
POST my_sample_index/_search
{
"size":0,
"aggs": {
"_score": {
"terms": {
"script": {
"source": """
if(doc['rank'].value < 1000){
return doc['rank'];
}else
return '1000-*';
"""
}
},
"aggs":{
"_score1_sum":{
"sum": {
"field": "_score1"
}
},
"_score2_sum":{
"sum":{
"field": "_score2"
}
}
}
}
}
}
Note that I've used Scripted Terms Aggregation where I've mentioned by logic in the script. Logic I believe is self-explainable once you go through it.
Response:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"_score" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1000-*", <---- Note this
"doc_count" : 2, <---- Note this
"_score2_sum" : {
"value" : 400.0
},
"_score1_sum" : {
"value" : 400.0
}
},
{
"key" : "100",
"doc_count" : 1,
"_score2_sum" : {
"value" : 100.0
},
"_score1_sum" : {
"value" : 100.0
}
},
{
"key" : "200",
"doc_count" : 1,
"_score2_sum" : {
"value" : 100.0
},
"_score1_sum" : {
"value" : 100.0
}
}
]
}
}
}
Note that there are two keys having rank > 1000, both of their scores for _score1 and _score2 sum to 400, which is what is expected.
Let me know if this helps!

Mongodb aggregation with $or nested clauses

I have mongodb documents like this:
{
"_id" : ObjectId("5d35ba501545d248c383871f"),
"key1" : 1,
"currentTime" : ISODate("2019-07-18T19:41:54.000Z"),
"iState" : "START - 1",
"errGyro" : -4.0,
"states" : [
{
"ts" : 3,
"accY" : -165.877227783203,
"gyroZ" : 8.2994499206543,
},
{
"ts" : 4,
"accY" : -15.843573,
"gyroZ" : 12.434643,
},
{
"ts" : 3,
"accY" : 121.32667,
"gyroZ" : 98.45566,
}
]
}
I want to return all the states objects and the parent document where "ts" is 3 or 5.
I tried this query at first:
db.getCollection('logs').find(
{"states" :
{ "$elemMatch" : { "$or":[
{ "ts":
{ "$eq" : 3}
},
{ "ts":
{ "$eq" : 5}
}
]
}
}
},{"states.$":1 })
But this returns only the first "state" document where the "eq" occurred.
How can I return all the matching documents?
Thank you.
You can use aggregation pipelines
db.getCollection('logs').aggregate([
{
$unwind: "$states"
},
{
$match: {
$or: [
{ "states.ts": 3 },
{ "states.ts": 5 },
]
}
},
{
$group: {
_id: "$_id",
"key1": { $first: "key1" },
"currentTime": { $first: "currentTime" },
"iState": { $first: "$iState" },
"errGyro": { $first: "$errGyro" },
states: { $push: "$states" }
}
}
])
As $elemMatch returns only the first matching element of array, you have to use aggregation to achieve your goal. Here's the query :
db.collection.aggregate([
{
$match: {
$or: [
{
"states.ts": {
$eq: 3
}
},
{
"states.ts": {
$eq: 5
}
}
]
}
},
{
$project: {
states: 1
}
},
{
$unwind: "$states"
},
{
$match: {
$or: [
{
"states.ts": {
$eq: 3
}
},
{
"states.ts": {
$eq: 5
}
}
]
}
},
{
$group: {
_id: "$_id",
states: {
$push: "$states"
}
}
}
])
First $match and $project stages are here for query optimization and memory save, if many documents are returned.

Categories

Resources