How do you query an array in mongodb - python

{
"_id":123456,
"Menu":[{
"Dish":"Apple pie",
"Rating": "Good",
"Method": "Oven Baked"
},
{
"Dish":"Pumpkin Pie",
"Rating": "Bad",
"Method": "Baked"
},
{
"Dish":"Tomato Soup",
"Rating": "Good",
"Method": "Boiled"
}]
}
How do I query this array if I would only like to display the values in the field "Method"?
How do I solve this with Pymongo?

Use the below Mongo Shell query
db.mycollection.find({_id:123456}, {"Menu.Method":1})
This query will yield the below result on your sample document
{
"_id" : 123456,
"Menu" : [
{
"Method" : "Oven Baked"
},
{
"Method" : "Baked"
},
{
"Method" : "Boiled"
}
]
}

Related

MongoDB - How to aggregate data for each record

I have some stored data like this:
{
"_id" : 1,
"serverAddresses" : {
"name" : "0.0.0.0:8000",
"name2": "0.0.0.0:8001"
}
}
I need aggregated data to this:
[
{
"gameId": "1",
"name": "name1",
"url": "0.0.0.0:8000"
},
{
"gameId": "1",
"name": "name2",
"url": "0.0.0.0:8001"
}
]
What is the solution without using for loop?
$project - Add addresses field by converting $serverAddress to (key-value) array.
$unwind - Descontruct addresses field to multiple documents.
$replaceRoot - Decorate the output document based on (2).
db.collection.aggregate([
{
"$project": {
"addresses": {
"$objectToArray": "$serverAddresses"
}
}
},
{
$unwind: "$addresses"
},
{
"$replaceRoot": {
"newRoot": {
gameId: "$_id",
name: "$addresses.k",
address: "$addresses.v"
}
}
}
])
Sample Mongo Playground

How to retrieve elasticsearch data from index based on timestamp?

I want to retrieve data from elasticsearch based on timestamp. The timestamp is in epoch_millis and I tried to retrieve the data like this:
{
"query": {
"bool": {
"must":[
{
"range": {
"TimeStamp": {
"gte": "1632844180",
"lte": "1635436180"
}
}
}
]
}
},
"size": 10
}
But the response is this:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
How can I retrieve data for a given period of time from a certain index?
The data looks like this:
{
"_index" : "my-index",
"_type" : "_doc",
"_id" : "zWpMNXcBTeKmGB84eksSD",
"_score" : 1.0,
"_source" : {
"Source" : "Market",
"Category" : "electronics",
"Value" : 20,
"Price" : 45.6468,
"Currency" : "EUR",
"TimeStamp" : 1611506922000 }
Also, the result has 10.000 hits when using the _search on the index. How could I access other entries? (more than 10.000 results) and to be able to choose the desired timestamp interval.
For your first question, assume that you have the mappings like this:
{
"mappings": {
"properties": {
"Source": {
"type": "keyword"
},
"Category": {
"type": "keyword"
},
"Value": {
"type": "integer"
},
"Price": {
"type": "float"
},
"Currency": {
"type": "keyword"
},
"TimeStamp": {
"type": "date"
}
}
}
}
Then I indexed 2 sample documents (1 is yours above, but the timestamp is definitely not in your range):
[{
"Source": "Market",
"Category": "electronics",
"Value": 30,
"Price": 55.6468,
"Currency": "EUR",
"TimeStamp": 1633844180000
},
{
"Source": "Market",
"Category": "electronics",
"Value": 20,
"Price": 45.6468,
"Currency": "EUR",
"TimeStamp": 1611506922000
}]
If you really need to query using the range above, you will first need to convert your TimeStamp field to seconds (/1000), then query based on that field:
{
"runtime_mappings": {
"secondTimeStamp": {
"type": "long",
"script": "emit(doc['TimeStamp'].value.millis/1000);"
}
},
"query": {
"bool": {
"must": [
{
"range": {
"secondTimeStamp": {
"gte": 1632844180,
"lte": 1635436180
}
}
}
]
}
},
"size": 10
}
Then you will get the first document.
About your second question, by default, Elasticsearch's max_result_window is only 10000. You can increase this limit by updating the settings, but it will increase the memory usage.
PUT /index/_settings
{
"index.max_result_window": 999999
}
You should use the search_after API instead.

Update or edit a subarray in a MongoDB document

I am currently using this to push a 'review' to my array of reviews in my perfumes collection:
mongo.db.perfumes.update(
{"_id": perfume["_id"]},
{
"$push": {
"reviews": {
"_id": review_id,
"review_content": form.review.data,
"reviewer": current_user.username,
"date_reviewed": datetime.utcnow(),
"reviewer_picture": current_user.avatar,
}
}
},
)
So as a result my document is:
[
{
"_id": {
"$oid": "5ebf29dd1f3fe19434e41761"
},
"author": "Guillermo",
"brand": "A test brand",
"name": "A test perfume",
"perfume_type": "Woody",
"description": "<p>A test description</p>",
"date_updated": {
"$date": "2020-05-15T23:46:37.242Z"
},
"public": false,
"picture": "generic.png",
"reviews": [
{
"_id": {
"$oid": "5ebf29e90000000000000000"
},
"review_content": "<p>A test review</p>",
"reviewer": "Guillermo",
"date_reviewed": {
"$date": "2020-05-15T23:46:49.308Z"
},
"reviewer_picture": "a92de23ae01cdfde.jpg"
}
]
}
]
I want to create another route to update or edit the contents of my review (review_content).
What's the way to update that subarray in my collection?
Thank you!!
Let's assume you want to update review_content of a particular review you will use below query
mongo.db.perfumes.update(
{"_id": perfume["_id"], "reviews._id": review["_id"]},
{ $set: { "reviews.$.review_content" : "This is my new content"} },
)

How to query mongodb to get data in following format?

Expected Query Output
food = {
'fruit': ['apple', 'banana', 'cherry'],
'vegetables': ['onion', 'cucumber'],
}
Data Format in Database
[{
"category": "fruit",
"name": "banana"
}, {
"category": "fruit",
"name": "apple"
}, {
"category": "fruit",
"name": "cherry"
}, {
"category": "vegetables",
"name": "onion"
}, {
"category": "vegetables",
"name": "cucumber"
}]
Basically, I need to fetch distinct category and list of names against it from mongodb.
TIA
db.collection.aggregate([{
"$group": {
"_id": "$category",
"list": {
"$addToSet": "$name"
}
}
},
{
"$addFields": {
"array": [{
"k": "$_id",
"v": "$list"
}]
}
},
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": "$array"
}
}
}
])
Working eg. https://mongoplayground.net/p/bccPDlORK7W

Improving elasticsearch performance

I'm using elasticsearch in a python web app in order to query news documents. There're actually 100000 documents in the database.
The original db is a mongo one and elasticsearch is plugged through the mongoriver plugin.
The problem is that the function takes ~850ms to return the results. I'd like to decrease that number as much as possible.
Here's the python code I'm using to query the db(the limit is usually 16):
def search_news(term, limit, page, flagged_articles):
query = {
"query": {
"from": page*limit,
"size": limit,
"multi_match" : {
"query" : term,
"fields" : [ "title^3" , "category^5" , "entities.name^5", "art_text^1", "summary^1"]
}
},
"filter" : {
"not" : {
"filter" : {
"ids" : {
"values" : flagged_articles
}
},
"_cache" : True
}
}
}
es_query = json_util.dumps(query)
uri = 'http://localhost:9200/newsidx/_search'
r = requests.get(uri, data=es_query)
results = json.loads( r.text )
data = []
for res in results['hits']['hits']:
data.append(res['_source'])
return data
And here's the index mapping:
{
"news": {
"properties": {
"actual_rank": {
"type": "long"
},
"added": {
"type": "date",
"format": "dateOptionalTime"
},
"api_id": {
"type": "long"
},
"art_text": {
"type": "string"
},
"category": {
"type": "string"
},
"downvotes": {
"type": "long"
},
"entities": {
"properties": {
"etype": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"flags": {
"properties": {
"a": {
"type": "long"
},
"b": {
"type": "long"
},
"bad_image": {
"type": "long"
},
"c": {
"type": "long"
},
"d": {
"type": "long"
},
"innapropiate": {
"type": "long"
},
"irrelevant_info": {
"type": "long"
},
"miscategorized": {
"type": "long"
}
}
},
"media": {
"type": "string"
},
"published": {
"type": "string"
},
"published_date": {
"type": "date",
"format": "dateOptionalTime"
},
"show": {
"type": "boolean"
},
"source": {
"type": "string"
},
"source_rank": {
"type": "double"
},
"summary": {
"type": "string"
},
"times_showed": {
"type": "long"
},
"title": {
"type": "string"
},
"top_entities": {
"properties": {
"einfo_test": {
"type": "string"
},
"etype": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"tweet_article_poster": {
"type": "string"
},
"tweet_favourites": {
"type": "long"
},
"tweet_retweets": {
"type": "long"
},
"tweet_user_rank": {
"type": "double"
},
"upvotes": {
"type": "long"
},
"url": {
"type": "string"
}
}
}
}
Edit: The response time was measured on the server, given the tornado server information output.
I've rewritten your query somewhat here, moving the size and limit to the outside scope, adding the filtered query clause and changing your not query to a bool/must_not query, which should be cached by default:
{
"query": {
"filtered": {
"query": {
"multi_match" : {
"query" : term,
"fields" : [ "title^3" , "category^5" , "entities.name^5", "art_text^1", "summary^1"]
}
},
"filter" : {
"bool" : {
"must_not" : {
"ids" : {"values" : flagged_articles}
}
}
}
}
}
"from": page * limit,
"size": limit,
}
I haven't tested this, and I haven't made sense of your mapping as it is jumbled, so there might be some improvements to be made there.
Edit: This is a great read on why to use the bool filter: http://www.elasticsearch.org/blog/all-about-elasticsearch-filter-bitsets/ - in short, bool uses 'bitsets', which are very fast on subsequent queries.
First of all you can add the boosts to your mapping (assuming it doesn't interfere with your other queries) like this:
"title": {
"boost": 3.0,
"type": "string"
},
"category": {
"boost": 5.0,
"type": "string"
},
etc.
Then setup a bool query with field (or term) queries like this:
"query": {
"bool" : {
"should" : [ {
"field" : {
"title" : term
}
}, {
"field" : {
"category" : term
}
} ],
"must_not" : {
"ids" : {"values" : flagged_articles}
}
}
}
"from": page * limit,
"size": limit
This should perform better, but without access to your setup I can't test it :)

Categories

Resources