MongoDB - How to aggregate data for each record - python

I have some stored data like this:
{
"_id" : 1,
"serverAddresses" : {
"name" : "0.0.0.0:8000",
"name2": "0.0.0.0:8001"
}
}
I need aggregated data to this:
[
{
"gameId": "1",
"name": "name1",
"url": "0.0.0.0:8000"
},
{
"gameId": "1",
"name": "name2",
"url": "0.0.0.0:8001"
}
]
What is the solution without using for loop?

$project - Add addresses field by converting $serverAddress to (key-value) array.
$unwind - Descontruct addresses field to multiple documents.
$replaceRoot - Decorate the output document based on (2).
db.collection.aggregate([
{
"$project": {
"addresses": {
"$objectToArray": "$serverAddresses"
}
}
},
{
$unwind: "$addresses"
},
{
"$replaceRoot": {
"newRoot": {
gameId: "$_id",
name: "$addresses.k",
address: "$addresses.v"
}
}
}
])
Sample Mongo Playground

Related

cant do case insensitive search in elastic search

I'm new to elastic search and trying to do this query right.
So I'm having a document like this:
{
"id": 1,
"name": "Văn Hiến"
}
I want to get that document in 3 cases:
1/ User input is: "v" or "h" or "i",...
2/ User input is: "Văn" or "văn" or "hiến",...
3/ User input is: "va" or "van" or "van hi",...
I'm currently can search for case 1 and 2, but not case 3, where the user input don't have the 'tonal' of the Vietnamese language
This is my query, I'm using Python:
query = {
"bool": {
"should": [
{
"match": {
"name": name.lower()
}
},
{
"wildcard": {
"name": {
"value": f"*{name.lower()}*"
}
}
}
]
}
}
Can anyone help me with this? Any helps will be apperciated
Use the lowercase_filter and mapping_character_filter functions in your mapping.
the following mapping and query will work for all the three usecases you mentioned
Mapping Example:
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase"
],
"char_filter": [
"my_mappings_char_filter"
]
}
},
"char_filter": {
"my_mappings_char_filter": {
"type": "mapping",
"mappings": [
"ă => a",
"ế => e"
]
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 1,
"max_gram": 10,
"token_chars": [
"letter"
]
}
}
},
"max_ngram_diff" : "9"
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "my_analyzer",
"fields": {
"facet": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
Example Query:
{
"query" : {
"query_string" :{
"query":"van hi",
"type": "best_fields",
"default_field": "name"
}
}
}

How to retrieve elasticsearch data from index based on timestamp?

I want to retrieve data from elasticsearch based on timestamp. The timestamp is in epoch_millis and I tried to retrieve the data like this:
{
"query": {
"bool": {
"must":[
{
"range": {
"TimeStamp": {
"gte": "1632844180",
"lte": "1635436180"
}
}
}
]
}
},
"size": 10
}
But the response is this:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
How can I retrieve data for a given period of time from a certain index?
The data looks like this:
{
"_index" : "my-index",
"_type" : "_doc",
"_id" : "zWpMNXcBTeKmGB84eksSD",
"_score" : 1.0,
"_source" : {
"Source" : "Market",
"Category" : "electronics",
"Value" : 20,
"Price" : 45.6468,
"Currency" : "EUR",
"TimeStamp" : 1611506922000 }
Also, the result has 10.000 hits when using the _search on the index. How could I access other entries? (more than 10.000 results) and to be able to choose the desired timestamp interval.
For your first question, assume that you have the mappings like this:
{
"mappings": {
"properties": {
"Source": {
"type": "keyword"
},
"Category": {
"type": "keyword"
},
"Value": {
"type": "integer"
},
"Price": {
"type": "float"
},
"Currency": {
"type": "keyword"
},
"TimeStamp": {
"type": "date"
}
}
}
}
Then I indexed 2 sample documents (1 is yours above, but the timestamp is definitely not in your range):
[{
"Source": "Market",
"Category": "electronics",
"Value": 30,
"Price": 55.6468,
"Currency": "EUR",
"TimeStamp": 1633844180000
},
{
"Source": "Market",
"Category": "electronics",
"Value": 20,
"Price": 45.6468,
"Currency": "EUR",
"TimeStamp": 1611506922000
}]
If you really need to query using the range above, you will first need to convert your TimeStamp field to seconds (/1000), then query based on that field:
{
"runtime_mappings": {
"secondTimeStamp": {
"type": "long",
"script": "emit(doc['TimeStamp'].value.millis/1000);"
}
},
"query": {
"bool": {
"must": [
{
"range": {
"secondTimeStamp": {
"gte": 1632844180,
"lte": 1635436180
}
}
}
]
}
},
"size": 10
}
Then you will get the first document.
About your second question, by default, Elasticsearch's max_result_window is only 10000. You can increase this limit by updating the settings, but it will increase the memory usage.
PUT /index/_settings
{
"index.max_result_window": 999999
}
You should use the search_after API instead.

How to filter elements of array in mongodb

i have a document in mongodb:
{
"company": "npcompany",
"department": [
{
"name": "it",
"employeeIds": [
"emp1",
"emp2",
"emp3"
]
},
{
"name": "economy",
"employeeIds": [
"emp1",
"emp3",
"emp4"
]
}
]
}
I want to find "emp4". In this case i want to get "economy" department data only. If i found "emp1" then i want to get "npcompany" and "economy" datas. How can i do it in mongodb (or pymongo)?
play
db.collection.aggregate([ //As you need to fetch all matching array elements, reshape them
{
$unwind: "$department"
},
{
"$match": {//look for match
"department.employeeIds": "emp4"
}
},
{
$group: {//regroup them
"_id": "$_id",
data: {
"$push": "$$ROOT"
}
}
}
])

Update or edit a subarray in a MongoDB document

I am currently using this to push a 'review' to my array of reviews in my perfumes collection:
mongo.db.perfumes.update(
{"_id": perfume["_id"]},
{
"$push": {
"reviews": {
"_id": review_id,
"review_content": form.review.data,
"reviewer": current_user.username,
"date_reviewed": datetime.utcnow(),
"reviewer_picture": current_user.avatar,
}
}
},
)
So as a result my document is:
[
{
"_id": {
"$oid": "5ebf29dd1f3fe19434e41761"
},
"author": "Guillermo",
"brand": "A test brand",
"name": "A test perfume",
"perfume_type": "Woody",
"description": "<p>A test description</p>",
"date_updated": {
"$date": "2020-05-15T23:46:37.242Z"
},
"public": false,
"picture": "generic.png",
"reviews": [
{
"_id": {
"$oid": "5ebf29e90000000000000000"
},
"review_content": "<p>A test review</p>",
"reviewer": "Guillermo",
"date_reviewed": {
"$date": "2020-05-15T23:46:49.308Z"
},
"reviewer_picture": "a92de23ae01cdfde.jpg"
}
]
}
]
I want to create another route to update or edit the contents of my review (review_content).
What's the way to update that subarray in my collection?
Thank you!!
Let's assume you want to update review_content of a particular review you will use below query
mongo.db.perfumes.update(
{"_id": perfume["_id"], "reviews._id": review["_id"]},
{ $set: { "reviews.$.review_content" : "This is my new content"} },
)

How do you query an array in mongodb

{
"_id":123456,
"Menu":[{
"Dish":"Apple pie",
"Rating": "Good",
"Method": "Oven Baked"
},
{
"Dish":"Pumpkin Pie",
"Rating": "Bad",
"Method": "Baked"
},
{
"Dish":"Tomato Soup",
"Rating": "Good",
"Method": "Boiled"
}]
}
How do I query this array if I would only like to display the values in the field "Method"?
How do I solve this with Pymongo?
Use the below Mongo Shell query
db.mycollection.find({_id:123456}, {"Menu.Method":1})
This query will yield the below result on your sample document
{
"_id" : 123456,
"Menu" : [
{
"Method" : "Oven Baked"
},
{
"Method" : "Baked"
},
{
"Method" : "Boiled"
}
]
}

Categories

Resources