Update or edit a subarray in a MongoDB document - python

I am currently using this to push a 'review' to my array of reviews in my perfumes collection:
mongo.db.perfumes.update(
{"_id": perfume["_id"]},
{
"$push": {
"reviews": {
"_id": review_id,
"review_content": form.review.data,
"reviewer": current_user.username,
"date_reviewed": datetime.utcnow(),
"reviewer_picture": current_user.avatar,
}
}
},
)
So as a result my document is:
[
{
"_id": {
"$oid": "5ebf29dd1f3fe19434e41761"
},
"author": "Guillermo",
"brand": "A test brand",
"name": "A test perfume",
"perfume_type": "Woody",
"description": "<p>A test description</p>",
"date_updated": {
"$date": "2020-05-15T23:46:37.242Z"
},
"public": false,
"picture": "generic.png",
"reviews": [
{
"_id": {
"$oid": "5ebf29e90000000000000000"
},
"review_content": "<p>A test review</p>",
"reviewer": "Guillermo",
"date_reviewed": {
"$date": "2020-05-15T23:46:49.308Z"
},
"reviewer_picture": "a92de23ae01cdfde.jpg"
}
]
}
]
I want to create another route to update or edit the contents of my review (review_content).
What's the way to update that subarray in my collection?
Thank you!!

Let's assume you want to update review_content of a particular review you will use below query
mongo.db.perfumes.update(
{"_id": perfume["_id"], "reviews._id": review["_id"]},
{ $set: { "reviews.$.review_content" : "This is my new content"} },
)

Related

cant do case insensitive search in elastic search

I'm new to elastic search and trying to do this query right.
So I'm having a document like this:
{
"id": 1,
"name": "Văn Hiến"
}
I want to get that document in 3 cases:
1/ User input is: "v" or "h" or "i",...
2/ User input is: "Văn" or "văn" or "hiến",...
3/ User input is: "va" or "van" or "van hi",...
I'm currently can search for case 1 and 2, but not case 3, where the user input don't have the 'tonal' of the Vietnamese language
This is my query, I'm using Python:
query = {
"bool": {
"should": [
{
"match": {
"name": name.lower()
}
},
{
"wildcard": {
"name": {
"value": f"*{name.lower()}*"
}
}
}
]
}
}
Can anyone help me with this? Any helps will be apperciated
Use the lowercase_filter and mapping_character_filter functions in your mapping.
the following mapping and query will work for all the three usecases you mentioned
Mapping Example:
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase"
],
"char_filter": [
"my_mappings_char_filter"
]
}
},
"char_filter": {
"my_mappings_char_filter": {
"type": "mapping",
"mappings": [
"ă => a",
"ế => e"
]
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 1,
"max_gram": 10,
"token_chars": [
"letter"
]
}
}
},
"max_ngram_diff" : "9"
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "my_analyzer",
"fields": {
"facet": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
Example Query:
{
"query" : {
"query_string" :{
"query":"van hi",
"type": "best_fields",
"default_field": "name"
}
}
}

How to update a value inside an object which is inside array of another object in mongodb using python code

My collection looks like this
{"ingr": [
{
"ingrName": [
{
"_id": "57aa56e2a06b57b",
"name": "abc",
"type": "ingr"
}
],
"_id": {
"$oid": "62232cd70ce38c50"
},
"quantity": "1.0",
},
{
"ingr": [
{
"_id": "607e7fcca57aa",
"name": "xyz",
"type": "ingr"
}
],
"_id": {
"$oid": "62232cd70ce38c"
},
"quantity": "1.0"
}
}}
I just want to change the id and type based on the object id. what i tried is
db1.update_one({
'ingr.$._id': ObjectId("62232cd70ce38c50")
},
{
'$set': {
"ingr.ingrName.$.type":"alternate",
"ingr.ingrName.$._id":"abc123"
}
})
But the values are not changing.Help me to find the mistake I making. Thanks
expected output
{"ingr": [
{
"ingrName": [
{
"_id": "abc123",
"name": "abc",
"type": "alternate"
}
],
"_id": {
"$oid": "62232cd70ce38c50"
},
"quantity": "1.0",
}
i need to change the id and type

groupby query on joined collection in flask mongoDB

I am currently stuck in this problem, i am relatively new to MongoDB, and i have to retrieve number of reports(count of reports done by users ) for a specific user with his name(name), last reported time(time of last reported post), last reason(report_description) ,
i am stuck here since 2 days now, help will be appreciated .
reported posts collection
{
"created_at": {
"$date": "2021-12-21T18:45:27.489Z"
},
"updated_at": {
"$date": "2021-12-21T18:45:27.489Z"
},
"post_id": {
"$oid": "61955ac35b3475f1d9759255"
},
"user_id": 2,
"report_type": "this is test",
"report_description": "this"
}
Post collection
{
"created_at": {
"$date": "2021-11-17T19:24:53.484Z"
},
"updated_at": {
"$date": "2021-11-17T19:24:53.484Z"
},
"user_id": 8,
"privacy_type": "public",
"post_type": "POST",
"post": "Om Sai Ram",
"total_like": 7,
"total_comment": 0,
"total_share": 0,
"image_url_list": [{
"image_url": "post_images/user-8/a31e39334987463bb9faa964391a935e.jpg",
"image_ratio": "1"
}],
"video_url_list": [],
"tag_list": [],
"is_hidden": false
}
User collection
{
"name": "sathish",
"user_id": 1,
"device_id": "faTOi3aVTjyQnBPFz0L7xm:APA91bHNLE9anWYrKWfwoHgmGWL2BlbWqgiVjU5iy7JooWxu26Atk9yZFxVnNp2OF1IXrXm4I6HdVJPGukEppQjSiUPdMoQ64KbOt78rpctxnYWPWliLrdxc9o1VdKL0DGYwE7Y6hx1H",
"user_name": "sathishkumar",
"updated_at": {
"$date": "2021-11-17T19:13:52.668Z"
},
"profile_picture_url": "1"
}
flask_snip.py
flagged_posts = mb.db_report.aggregate([{
'$group':{
'_id':'$user_id',
}
}])
expected out should be list e.g
[
{
'user_id':1,
'name' :'somename',
'no_of_reports':30,
'last_reported_time':sometime,
'reason':'reason_of lastreported_post',
'post_link':'someurl',
},
{
'user_id':2,
'name' :'somename',
'no_of_reports':30,
'last_reported_time':sometime,
'reason':'reason_of last_reported_post',
'post_link':'someurl',
},
{
'user_id':3,
'name' :'somename',
'no_of_reports':30,
'last_reported_time':sometime,
'reason':'reason_of lastreported_post',
'post_link':'someurl',
},
]
Starting from the reported collection, you can $group to get the last_reason and last_reported_time. Then, perform a $lookup to user collection to get the name.
db.reported.aggregate([
{
"$sort": {
updated_at: -1
}
},
{
"$group": {
"_id": "$user_id",
"last_reported_time": {
"$first": "$updated_at"
},
"last_reason": {
"$first": "$report_description"
},
"no_of_reports": {
$sum: 1
}
}
},
{
"$lookup": {
"from": "user",
"localField": "_id",
"foreignField": "user_id",
"as": "userLookup"
}
},
{
"$unwind": "$userLookup"
},
{
"$project": {
"user_id": "$_id",
"name": "$userLookup.user_name",
"no_of_reports": 1,
"last_reported_time": 1,
"last_reason": 1
}
}
])
Here is the Mongo playground for your reference.

Elasticsearch - IndicesClient.put_settings not working

I am trying to update my original index settings.
My initial setting looks like this:
client.create(index = "movies", body= {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"my_custom_stop_words": {
"type": "stop",
"stopwords": stop_words
}
},
"analyzer": {
"my_custom_analyzer": {
"filter": [
"lowercase",
"my_custom_stop_words"
],
"type": "custom",
"tokenizer": "standard"
}
}
}
},
"mappings": {
"properties": {
"body": {
"type": "text",
"analyzer": "my_custom_analyzer",
"search_analyzer": "my_custom_analyzer",
"search_quote_analyzer": "my_custom_analyzer"
}
}
}
},
ignore=400
)
And I am trying to add the synonym filter to my existing analyzer (my_custom_analyzer) using client.put_settings:
client.put_settings(index='movies', body={
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"filter": [
"lowercase",
"my_stops",
"my_synonyms"
],
"type": "custom",
"tokenizer": "standard"
}
},
"filter": {
"my_custom_stops": {
"type": "stop",
"stopwords": stop_words
},
"my_custom_synonyms": {
"ignore_case": "true",
"type": "synonym",
"synonyms": ["Harry Potter, HP => HP", "Terminator, TM => TM"]
}
}
}
},
"mappings": {
"properties": {
"body": {
"type": "text",
"analyzer": "my_custom_analyzer",
"search_analyzer": "my_custom_analyzer",
"search_quote_analyzer": "my_custom_analyzer"
}
}
}
},
ignore=400
)
However, when I issue a search query (searching for "HP") that queries the movies index and I'm trying to rank the documents so that the document containing "Harry Potter" 5 times is the top element in the list. Right now, it seems like the document with "HP" 3 times tops the list, so the synonyms filter isn't working. I've closed movies index before I do client.put_settings and then re-opened the index.
Any help would be greatly appreciated!
You should re-index all your data in order to apply the updated settings on all your data and fields.
The data that had already been indexed won't be affected by the updated analyzer, only documents that has been indexed after you updated the settings will be affected.
Not re-indexing your data might produce incorrect results since your old data is analyzed with the old custom analyzer and not with the new one.
The most efficient way to resolve this issue is to create a new index, and move your data from the old one to the new one with the updated settings.
Reindex Api
Follow these steps:
POST _reindex
{
"source": {
"index": "movies"
},
"dest": {
"index": "new_movies"
}
}
DELETE movies
PUT movies
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"filter": [
"lowercase",
"my_custom_stops",
"my_custom_synonyms"
],
"type": "custom",
"tokenizer": "standard"
}
},
"filter": {
"my_custom_stops": {
"type": "stop",
"stopwords": "stop_words"
},
"my_custom_synonyms": {
"ignore_case": "true",
"type": "synonym",
"synonyms": [
"Harry Potter, HP => HP",
"Terminator, TM => TM"
]
}
}
}
},
"mappings": {
"properties": {
"body": {
"type": "text",
"analyzer": "my_custom_analyzer",
"search_analyzer": "my_custom_analyzer",
"search_quote_analyzer": "my_custom_analyzer"
}
}
}
}
POST _reindex?wait_for_completion=false
{
"source": {
"index": "new_movies"
},
"dest": {
"index": "movies"
}
}
After you've verified all your data is in place you can delete new_movies index. DELETE new_movies
Hope these help

How to flatten JSON response from Surveymonkey API

I'm setting up a Python function to use the Surveymonkey API to get survey responses from Surveymonkey.
The API returns responses in a JSON format with a deep recursive file structure.
I'm having issues trying to flatten this JSON so that it can go into Google Cloud Storage.
I have tried to flatten the response using the following code. Which works; however, it does not transform it to the format that I am looking for.
{
"per_page": 2,
"total": 1,
"data": [
{
"total_time": 0,
"collection_mode": "default",
"href": "https://api.surveymonkey.com/v3/responses/5007154325",
"custom_variables": {
"custvar_1": "one",
"custvar_2": "two"
},
"custom_value": "custom identifier for the response",
"edit_url": "https://www.surveymonkey.com/r/",
"analyze_url": "https://www.surveymonkey.com/analyze/browse/",
"ip_address": "",
"pages": [
{
"id": "73527947",
"questions": [
{
"id": "273237811",
"answers": [
{
"choice_id": "1842351148"
},
{
"text": "I might be text or null",
"other_id": "1842351149"
}
]
},
{
"id": "273240822",
"answers": [
{
"choice_id": "1863145815",
"row_id": "1863145806"
},
{
"text": "I might be text or null",
"other_id": "1863145817"
}
]
},
{
"id": "273239576",
"answers": [
{
"choice_id": "1863156702",
"row_id": "1863156701"
},
{
"text": "I might be text or null",
"other_id": "1863156707"
}
]
},
{
"id": "296944423",
"answers": [
{
"text": "I might be text or null"
}
]
}
]
}
],
"date_modified": "1970-01-17T19:07:34+00:00",
"response_status": "completed",
"id": "5007154325",
"collector_id": "50253586",
"recipient_id": "0",
"date_created": "1970-01-17T19:07:34+00:00",
"survey_id": "105723396"
}
],
"page": 1,
"links": {
"self": "https://api.surveymonkey.com/v3/surveys/123456/responses/bulk?page=1&per_page=2"
}
}
answers_df = json_normalize(data=response_json['data'],
record_path=['pages', 'questions', 'answers'],
meta=['id', ['pages', 'questions', 'id'], ['pages', 'id']])
Instead of returning a row for each question id, I need it to return a column for each question id, choice_id, and text field.
The columns I would like to see are total_time, collection_mode, href, custom_variables.custvar_1, custom_variables.custvar_2, custom_value, edit_url, analyze_url, ip_address, pages.id, pages.questions.0.id, pages.questions.0.answers.0.choice_id, pages.questions.0.answers.0.text, pages.questions.0.answers.0.other_id
Instead of the each Question ID, Choice_id, text and answer being on a separate row. I would like a column for each one. So that there is only 1 row per survey_id or index in data

Categories

Resources