Return certain fields in mongodb - python

I am trying to return just the arrays that meet my criteria. Here is what i have:
{
"_id": 1,
"awardAmount": 20000,
"url": "www.url.com",
"numAwards": 2,
"award": "Faculty Research Grant",
"Type": "faculty",
"Applicants": [
{
"preAwards": "NO1",
"Name": "Omar1",
"School": "SCSU1",
"citizenship": "YES1",
"budget": 1,
"Advisor": "Dr. DaPonte1",
"Major": "CSC1",
"appId": 100,
"Research": "Test data entry1",
"Time": "12 months1",
"URL": "www.url.com",
"Evaluators": [
{
"abstractScore": 11,
"evalNum": 1,
"goalsObjectivesScore": 11
},
{
"abstractScore": 22,
"evalNum": 2,
"goalsObjectivesScore": 22
}
]
},
{
"preAwards": "NO2",
"citizenship": "YES2",
"Major": "CSC2",
"Time": "12 months2",
"budget": 2,
"URL": "www.2.com",
"appId": 200,
"Advisor": "Dr. DaPonte2",
"Name": "Omar2",
"Research": "Test data entry2",
"School": "SCSU2",
"url": "www.2.com"
},
{
"preAwards": "NO3",
"citizenship": "YES3",
"Major": "CSC3",
"Time": "12 months3",
"budget": 3,
"URL": "www.3.com",
"appId": 300,
"Advisor": "Dr. DaPonte3",
"Name": "Omar3",
"Research": "Test data entry3",
"School": "SCSU3",
"url": "www.3.com",
"Evaluators": [
{
"abstractScore": 454,
"evalNum": 1,
"goalsObjectivesScore": 4546
}
]
}
]
}
I want to get back just the applicants that don't have Evaluators fields.
{
"_id": 1,
"awardAmount": 20000,
"url": "www.url.com",
"numAwards": 2,
"award": "Faculty Research Grant",
"Type": "faculty",
"Applicants": [
{
"preAwards": "NO2",
"citizenship": "YES2",
"Major": "CSC2",
"Time": "12 months2",
"budget": 2,
"URL": "www.2.com",
"appId": 200,
"Advisor": "Dr. DaPonte2",
"Name": "Omar2",
"Research": "Test data entry2",
"School": "SCSU2",
"url": "www.2.com"
}
]
}
This is just an example of one document. I want all the Applicants with no Evaluators fields in all documents.

Using aggregation with pymongo
col.aggregate([{"$unwind": "$Applicants"}, {"$match" : {"Applicants.Evaluators": {"$exists": False}}}]))
Output
{'ok': 1.0,
'result': [{'Applicants': {'Advisor': 'Dr. DaPonte2',
'Major': 'CSC2',
'Name': 'Omar2',
'Research': 'Test data entry2',
'School': 'SCSU2',
'Time': '12 months2',
'URL': 'www.2.com',
'appId': 200,
'budget': 2,
'citizenship': 'YES2',
'preAwards': 'NO2',
'url': 'www.2.com'},
'Type': 'faculty',
'_id': 1,
'award': 'Faculty Research Grant',
'awardAmount': 20000,
'numAwards': 2,
'url': 'www.url.com'}]}

In mongo shell you can do this:
db.test.find(
{
Applicants : { $elemMatch : { "Evaluators" : { $exists : 0 } }}
},
{
"_id" : 1,
"awardAmount" : 1,
"url" : 1,
"numAwards" : 2,
"award" : 1,
"Type" : 1,
'Applicants.$' : 1,
});
One problem is that the above query just return one Applicants with no Evaluators in it, the valid complete solution will achieve via aggregation
db.test.aggregate(
[
{ $match : { Applicants : { $elemMatch : { "Evaluators" : { $exists : 0 } } } } },
{ $unwind : "$Applicants" },
{ $match : { "Applicants.Evaluators" : { $exists : 0 } } },
{
$group :
{
_id : '$_id',
Applicants : { $push : '$Applicants' },
awardAmount : { $first : '$awardAmount' } ,
url : { $first : '$url' } ,
numAwards : { $first : '$numAwards' } ,
award : { $first : '$award' } ,
Type : { $first : '$Type' } ,
}
}
]
)

If I understand your question correctly I would suggest using the aggregation pipeline to $unwind the documents on your 'Applicants' field. You can then filter the resulting documents using $match to remove the documents where 'Evaluators' exist then $group them back together using $first and $push. Hope this is of some help.

Related

MongoDB Aggregation - Creating variable for $sum

Sample input:
{
"students":[
{
"name" : "John",
"semesters":[
{
"semester": "fall",
"grades": [
{"EXAM_1" : 25},
{"EXAM_2" : 45},
{"EXAM_3" : 22}
]
},
{
"semester": "winter",
"grades": [
{"EXAM_1" : 85},
{"EXAM_2" : 32},
{"EXAM_3" : 17}
]
}
]
},{
"name" : "Abraham",
"semesters":[
{
"semester": "fall",
"grades": [
{"EXAM_1" : 5},
{"EXAM_2" : 91},
{"EXAM_3" : 51}
]
},
{
"semester": "winter",
"grades": [
{"EXAM_1" : 55},
{"EXAM_2" : 62},
{"EXAM_3" : 17}
]
}
]
},{
"name" : "Zach",
"semesters":[
{
"semester": "spring",
"grades": [
{"EXAM_1" : 18},
{"EXAM_2" : 19},
{"EXAM_3" : 26}
]
},
{
"semester": "winter",
"grades": [
{"EXAM_1" : 100},
{"EXAM_2" : 94},
{"EXAM_3" : 45}
]
}
]
}
]
}
So this is what I have so far
data = await db.userstats.aggregate([
{ "$unwind": "$students.semesters" },
{ "$unwind": "$students.semesters.fall" },
{ "$unwind": f"$students.semesters.fall.grades" },
{
{ "$sum": [
{"$match" : { "$students.semesters.fall.grades" : "EXAM_3" } },
{"$multiply": [2, {"$match" : { "$students.semesters.fall.grades" : "EXAM_1" } }]}
]
}
},
{
"$project": {
"name" : "$name",
"character" : "$students.semesters.fall",
"exam_name" : "$students.semesters.fall.grades",
"exam_value" : "2*exam 1 + exam 3"
}
},
{ "$sort": { "exam_value": -1 }},
{ '$limit' : 30 }
]).to_list(length=None)
print(data)
I've been trying to implement a calculation performed on exam grades for each student in a data sample and comparing it to other students. I am stuck on how to properly perform the calculation. The basic rundown is that I need the output to be sorted calculations of
2*exam 1 + exam3.
I understand that $sum cannot be used in the pipeline stage, but I am unaware of how to use the $match command within the $sum operator.
Sample output:
{name: John, calculated_exam_grade: 202, 'semester':'winter'},
{name: Abraham, calculated_exam_grade: 101, 'semester':'fall'},
{name: John, calculated_exam_grade: 95, 'semester':'fall'},
etc...
Based on the expected result provided, the query is almost similar to the link I posted in the comment.
$unwind - Deconstruct students array.
$unwind - Deconstruct student.semesters array.
$project - Decorate output documents with the calculation for the calculated_exam_grade field.
$sort
$limit
db.collection.aggregate([
{
"$unwind": "$students"
},
{
"$unwind": "$students.semesters"
},
{
"$project": {
_id: 0,
"name": "$students.name",
"semester": "$students.semesters.semester",
"calculated_exam_grade": {
$sum: [
{
"$multiply": [
2,
{
$sum: [
"$students.semesters.grades.EXAM_1"
]
}
]
},
{
$sum: [
"$students.semesters.grades.EXAM_3"
]
}
]
}
}
},
{
"$sort": {
"calculated_exam_grade": -1
}
},
{
"$limit": 30
}
])
Sample Mongo Playground

groupby query on joined collection in flask mongoDB

I am currently stuck in this problem, i am relatively new to MongoDB, and i have to retrieve number of reports(count of reports done by users ) for a specific user with his name(name), last reported time(time of last reported post), last reason(report_description) ,
i am stuck here since 2 days now, help will be appreciated .
reported posts collection
{
"created_at": {
"$date": "2021-12-21T18:45:27.489Z"
},
"updated_at": {
"$date": "2021-12-21T18:45:27.489Z"
},
"post_id": {
"$oid": "61955ac35b3475f1d9759255"
},
"user_id": 2,
"report_type": "this is test",
"report_description": "this"
}
Post collection
{
"created_at": {
"$date": "2021-11-17T19:24:53.484Z"
},
"updated_at": {
"$date": "2021-11-17T19:24:53.484Z"
},
"user_id": 8,
"privacy_type": "public",
"post_type": "POST",
"post": "Om Sai Ram",
"total_like": 7,
"total_comment": 0,
"total_share": 0,
"image_url_list": [{
"image_url": "post_images/user-8/a31e39334987463bb9faa964391a935e.jpg",
"image_ratio": "1"
}],
"video_url_list": [],
"tag_list": [],
"is_hidden": false
}
User collection
{
"name": "sathish",
"user_id": 1,
"device_id": "faTOi3aVTjyQnBPFz0L7xm:APA91bHNLE9anWYrKWfwoHgmGWL2BlbWqgiVjU5iy7JooWxu26Atk9yZFxVnNp2OF1IXrXm4I6HdVJPGukEppQjSiUPdMoQ64KbOt78rpctxnYWPWliLrdxc9o1VdKL0DGYwE7Y6hx1H",
"user_name": "sathishkumar",
"updated_at": {
"$date": "2021-11-17T19:13:52.668Z"
},
"profile_picture_url": "1"
}
flask_snip.py
flagged_posts = mb.db_report.aggregate([{
'$group':{
'_id':'$user_id',
}
}])
expected out should be list e.g
[
{
'user_id':1,
'name' :'somename',
'no_of_reports':30,
'last_reported_time':sometime,
'reason':'reason_of lastreported_post',
'post_link':'someurl',
},
{
'user_id':2,
'name' :'somename',
'no_of_reports':30,
'last_reported_time':sometime,
'reason':'reason_of last_reported_post',
'post_link':'someurl',
},
{
'user_id':3,
'name' :'somename',
'no_of_reports':30,
'last_reported_time':sometime,
'reason':'reason_of lastreported_post',
'post_link':'someurl',
},
]
Starting from the reported collection, you can $group to get the last_reason and last_reported_time. Then, perform a $lookup to user collection to get the name.
db.reported.aggregate([
{
"$sort": {
updated_at: -1
}
},
{
"$group": {
"_id": "$user_id",
"last_reported_time": {
"$first": "$updated_at"
},
"last_reason": {
"$first": "$report_description"
},
"no_of_reports": {
$sum: 1
}
}
},
{
"$lookup": {
"from": "user",
"localField": "_id",
"foreignField": "user_id",
"as": "userLookup"
}
},
{
"$unwind": "$userLookup"
},
{
"$project": {
"user_id": "$_id",
"name": "$userLookup.user_name",
"no_of_reports": 1,
"last_reported_time": 1,
"last_reason": 1
}
}
])
Here is the Mongo playground for your reference.

Obtain records based on matching key value pairs and comparing date in Python

I have a following collection in MongoDB:
{
"_id" : ObjectId("5bbc86e5c16a27f1e1bd39f8"),
"name" : "swetha",
"nameId" : 123,
"source" : "Blore",
"sourceId" : 10,
"LastUpdate" : "10-Oct-2018"
}
{
"_id" : ObjectId("5bbc86e5c16a27f1e1bd39f9"),
"name" : "swetha",
"nameId" : 123,
"source" : "Mlore",
"sourceId" : "11",
"LastUpdate" : "11-Oct-2018"
}
{
"_id" : ObjectId("5bbc86e5c16a27f1e1bd39fa"),
"name" : "swathi",
"nameId" : 124,
"source" : "Mlore",
"sourceId" : "11",
"LastUpdate" : "9-Oct-2018"
}
I am a beginner to Python and want to compare the 'LastUpdate' between the above records based on matching 'name' or 'nameId' and want to push the record with latest date to another collection. E.g. name:'Swetha' is same in first two records. So compare 'LastUpdate' between them and output the record with latest date.
I have written following code to read data records from MongoDB and to print. I didn't understand how to compare records within a same key and compare their timestamp though I referred few resources on Google.
import json
import pandas as pd
from pymongo import MongoClient
try:
client = MongoClient()
print("Connected successfully!!!")
except:
print("Could not connect to MongoDB")
# database
db = client.conn
collection = db.contactReg
df = collection.find()
for row in df:
print(row)
Links that are ref
Is there a better way to compare dictionary values
https://gis.stackexchange.com/questions/87276/how-to-compare-values-from-a-column-in-attribute-table-with-values-in-dictionary
Comparing two dictionaries and printing key value pair in python and few more.
I think what you need is an aggregation. This might look big but once you get the hang out of mongo aggregations you'll get comfortable.
df = collection.aggregate([
{
"$project": {
"_id": 0,
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"LastUpdateArray": {
"$split": [
"$LastUpdate",
"-"
]
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"LastUpdateArray": 1,
"LastUpdateMonth": {
"$arrayElemAt": [
"$LastUpdateArray",
1
]
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"Year": {
"$arrayElemAt": [
"$LastUpdateArray",
2
]
},
"Date": {
"$arrayElemAt": [
"$LastUpdateArray",
0
]
},
"Month": {
"$switch": {
"branches": [
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Jan"
]
},
"then": "01"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Feb"
]
},
"then": "02"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Mar"
]
},
"then": "03"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Apr"
]
},
"then": "04"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"May"
]
},
"then": "05"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Jun"
]
},
"then": "06"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Jul"
]
},
"then": "07"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Aug"
]
},
"then": "08"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Sep"
]
},
"then": "09"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Oct"
]
},
"then": "10"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Nov"
]
},
"then": "11"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Dec"
]
},
"then": "12"
}
],
"default": "01"
}
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"Year": 1,
"Date": 1,
"Month": 1,
"DateString": {
"$concat": [
"$Year",
"-",
"$Month",
"-",
"$Date"
]
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"Date": {
"$dateFromString": {
"dateString": "$DateString"
}
}
}
},
{
"$sort": {
"Date": -1
}
},
{
"$group": {
"_id": "$name",
"name": {
"$first": "$name"
},
"nameId": {
"$first": "$nameId"
},
"source": {
"$first": "$source"
},
"sourceId": {
"$first": "$sourceId"
},
"LastUpdate": {
"$first": "$LastUpdate"
},
"Date": {
"$first": "$Date"
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1
}
}
])
The first 5 steps of aggregation, I tried to convert it into a date and then sort descending by date. In group by I grouped with name and took the first that comes with that name.
Hope this helps.
I'm assuming what you need is duplicate records and I'm taking the first one that comes. Reference : https://stackoverflow.com/a/26985011/7630071
df = collection.aggregate([
{
"$group": {
"_id": "$name",
"count": {
"$sum": 1
},
"data": {
"$push": {
"nameId": "$nameId",
"source": "$source",
"sourceId": "$sourceId",
"LastUpdate": "$LastUpdate"
}
}
}
},
{
"$match": {
"_id": {
"$ne": null
},
"count": {
"$gt": 1
}
}
}
])

Python ElasticSearch query error

I have used the mapping query to perform a search in ElasticSearch, and it works fine as below.
{
"query": {
"bool" : {
"must" : {
"match_all" : {}
},
"filter" : {
"geo_distance" : {
"distance" : "{}mi".format(list_info.radius_b),
'location': {
"lat": zip.lat,
"lon": zip.lng
}
}
},
},
},
"sort" : [
{
"_geo_distance" : {
'location': {"lat": zip.lat,
"lon": zip.lng},
"order" : "asc",
"unit" : "mi",
"mode" : "min",
"distance_type" : "sloppy_arc"
}
}
],
"from": 0,
"size": 0,
}
However, even I add "terms", the I'm getting error: TransportError(400, u'parsing_exception', u'[term] malformed query, expected [END_OBJECT] but found [FIELD_NAME]')
{
"query": {
"bool" : {
"must" : {
"match_all" : {}
},
"filter" : {
"geo_distance" : {
"distance" : "{}mi".format(list_info.radius_b),
'location': {
"lat": zip.lat,
"lon": zip.lng
}
}
},
},
"term" : { "status" : "approved" }
},
"sort" : [
{
"_geo_distance" : {
'location': {"lat": zip.lat,
"lon": zip.lng},
"order" : "asc",
"unit" : "mi",
"mode" : "min",
"distance_type" : "sloppy_arc"
}
}
],
"from": 0,
"size": 0,
}
Your new term query must be located inside the bool/filter query:
{
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": [
{
"geo_distance": {
"distance": "{}mi".format(list_info.radius_b),
"location": {
"lat": zip.lat,
"lon": zip.lng
}
}
},
{
"term": {
"status": "approved"
}
}
]
}
},
"sort": [
{
"_geo_distance": {
"location": {
"lat": zip.lat,
"lon": zip.lng
},
"order": "asc",
"unit": "mi",
"mode": "min",
"distance_type": "sloppy_arc"
}
}
],
"from": 0,
"size": 0
}

Optimizing MongoDB Aggregation Pipeline (Group, Lookup, Match)

I'm new on NoSQL Database and i choose MongoDB as my first NoSQL Database. I made an aggregation pipeline to shows the data that i want, here's my document sample:
Document sample from Users Collection
{
"_id": 9,
"name": "Sample Name",
"email": "email#example.com",
"password": "password hash"
}
Document sample from Pages Collection (this one doesn't really matter)
{
"_id": 42,
"name": "Product Name",
"description": "Product Description",
"user_id": 8,
"rating_categories": [{
"_id": 114,
"name": "Build Quality"
}, {
"_id": 115,
"name": "Price"
}, {
"_id": 116,
"name": "Feature"
}, {
"_id": 117,
"name": "Comfort"
}, {
"_id": 118,
"name": "Switch"
}]
}
Document sample from Reviews Collection
{
"_id": 10,
"page_id": 42, #ID reference from pages collection
"user_id": 8, #ID reference from users collection
"review": "The review of the product",
"ratings": [{
"_id": 114, #ID Reference from pages collection of what rating category it is
"rating": 5
}, {
"_id": 115,
"rating":4
}, {
"_id": 116,
"rating": 5
}, {
"_id": 117,
"rating": 3
}, {
"_id": 118,
"rating": 4
}],
"created": "1582825968963", #Date Object
"votes": {
"downvotes": [],
"upvotes": [9] #IDs of users who upvote this review
}
}
I want to get reviews by page_id which can be accessed from the API i made, here's the expected result from the aggregation:
[
{
"_id": 10, #Review of the ID
"created": "Thu, 27 Feb 2020 17:52:48 GMT",
"downvote_count": 0, #Length of votes.downvotes from reviews collection
"page_id": 42, #Page ID
"ratings": [ #Stores what rate at what rating category id
{
"_id": 114,
"rating": 5
},
{
"_id": 115,
"rating": 4
},
{
"_id": 116,
"rating": 5
},
{
"_id": 117,
"rating": 3
},
{
"_id": 118,
"rating": 4
}
],
"review": "The Review",
"upvote_count": 0, #Length of votes.upvotes from reviews collection
"user": { #User who reviewed
"_id": 8, #User ID
"downvote_count": 0, #How many downvotes this user receive from all of the user's reviews
"name": "Sample Name", #Username
"review_count": 1, #How many reviews the user made
"upvote_count": 1 #How many upvotes this user receive from all of the user's reviews
},
"vote_state": 0 #Determining vote state from the user (who requested to the API) for this review, 0 for no vote, -1 for downvote, 1 for upvote
},
...
]
Here's the pipeline of the aggregation for reviews collection that i made for the result above:
user_id = 9
page_id = 42
pipeline = [
{"$group": {
"_id": {"user_id":"$user_id", "page_id": "$page_id"},
"review_id": {"$last": "$_id"},
"page_id": {"$last": "$page_id"},
"user_id" : {"$last": "$user_id"},
"ratings": {"$last": "$ratings"},
"review": {"$last": "$review"},
"created": {"$last": "$created"},
"votes": {"$last": "$votes"},
"upvote_count": {"$sum":
{"$cond": [
{"$ifNull": ["$votes.upvotes", False]},
{"$size": "$votes.upvotes"},
0
]}
},
"downvote_count": {"$sum":
{"$cond": [
{"$ifNull": ["$votes.downvotes", False]},
{"$size": "$votes.downvotes"},
0
]}
}}},
{"$lookup": {
"from": "users",
"localField": "user_id",
"foreignField": "_id",
"as": "user"
}},
{"$unwind": "$user"},
{"$lookup": {
"from": "reviews",
"localField": "user._id",
"foreignField": "user_id",
"as": "user.reviews"
}},
{"$addFields":{
"_id": "$review_id",
"user.review_count": {"$size": "$user.reviews"},
"user.upvote_count": {"$sum":{
"$map":{
"input":"$user.reviews",
"in":{"$cond": [
{"$ifNull": ["$$this.votes.upvotes", False]},
{"$size": "$$this.votes.upvotes"},
0
]}
}
}},
"user.downvote_count": {"$sum":{
"$map":{
"input":"$user.reviews",
"in":{"$cond": [
{"$ifNull": ["$$this.votes.downvotes", False]},
{"$size": "$$this.votes.downvotes"},
0
]}
}
}},
"vote_state": {"$switch": {
"branches": [
{"case": { "$and" : [
{"$ifNull": ["$votes.upvotes", False]},
{"$in": [user_id, "$votes.upvotes"]}
]}, "then": 1
},
{"case": { "$and" : [
{"$ifNull": ["$votes.downvotes", False]},
{"$in": [user_id, "$votes.downvotes"]}
]}, "then": -1
},
],
"default": 0
}},
}},
{"$project":{
"user.password": 0,
"user.email": 0,
"user_id": 0,
"review_id" : 0,
"votes": 0,
"user.reviews": 0
}},
{"$sort": {"created": -1}},
{"$match": {"page_id": page_id}},
]
Note: User can make multiple reviews for same page_id, but only the latest will be shown
I'm using pymongo btw, that's why operators have quotation mark
My questions are:
Is there any room to optimize my aggregation pipeline?
Is it considered as a good practice to have multiple small aggregate execution to get datas like above, or its always better to have 1 big aggregation (or as less as possible) to get the data that i want?
As you can see, every time i want to access votes.upvotes or votes.downvotes from a document on review collection, i checked whether the field is null or not, that's because the field votes.upvotes and votes.downvotes isn't being made when user make a review, instead it's being made when an user gives a vote to that review. Should i make an empty field on votes.upvotes and votes.downvotes when user make a review and remove the $ifNull? Will that increase the performance of the aggregation?
Thanks
Check if this aggregation has better performance.
Create these indexes if you don't have already:
db.reviews.create_index([("page_id", 1)])
Note: We can improve even more the performance avoiding $lookup reviews again.
db.reviews.aggregate([
{
$match: {
page_id: page_id
}
},
{
$addFields: {
request_user_id: user_id
}
},
{
$group: {
_id: {
page_id: "$page_id",
user_id: "$user_id",
request_user_id: "$request_user_id"
},
data: {
$push: "$$ROOT"
}
}
},
{
$lookup: {
"from": "users",
"let": {
root_user_id: "$_id.user_id"
},
"pipeline": [
{
$match: {
$expr: {
$eq: [
"$$root_user_id",
"$_id"
]
}
}
},
{
$lookup: {
"from": "reviews",
"let": {
root_user_id: "$$root_user_id"
},
"pipeline": [
{
$match: {
$expr: {
$eq: [
"$$root_user_id",
"$user_id"
]
}
}
},
{
$project: {
user_id: 1,
downvote_count: {
$size: "$votes.downvotes"
},
upvote_count: {
$size: "$votes.upvotes"
}
}
},
{
$group: {
_id: null,
review_count: {
$sum: {
$cond: [
{
$eq: [
"$$root_user_id",
"$user_id"
]
},
1,
0
]
}
},
upvote_count: {
$sum: "$upvote_count"
},
downvote_count: {
$sum: "$downvote_count"
}
}
},
{
$unset: "_id"
}
],
"as": "stats"
}
},
{
$project: {
tmp: {
$mergeObjects: [
{
_id: "$_id",
name: "$name"
},
{
$arrayElemAt: [
"$stats",
0
]
}
]
}
}
},
{
$replaceWith: "$tmp"
}
],
"as": "user"
}
},
{
$addFields: {
first: {
$mergeObjects: [
"$$ROOT",
{
$arrayElemAt: [
"$data",
0
]
},
{
user: {
$arrayElemAt: [
"$user",
0
]
},
created: {
$toDate: {
$toLong: {
$arrayElemAt: [
"$data.created",
0
]
}
}
},
downvote_count: {
$reduce: {
input: "$data.votes.downvotes",
initialValue: 0,
in: {
$add: [
"$$value",
{
$size: "$$this"
}
]
}
}
},
upvote_count: {
$reduce: {
input: "$data.votes.upvotes",
initialValue: 0,
in: {
$add: [
"$$value",
{
$size: "$$this"
}
]
}
}
},
vote_state: {
$cond: [
{
$gt: [
{
$size: {
$filter: {
input: "$data.votes.upvotes",
cond: {
$in: [
"$_id.request_user_id",
"$$this"
]
}
}
}
},
0
]
},
1,
{
$cond: [
{
$gt: [
{
$size: {
$filter: {
input: "$data.votes.downvotes",
cond: {
$in: [
"$_id.request_user_id",
"$$this"
]
}
}
}
},
0
]
},
-1,
0
]
}
]
}
}
]
}
}
},
{
$unset: [
"first.data",
"first.votes",
"first.user_id",
"first.request_user_id"
]
},
{
$replaceWith: "$first"
},
{
"$sort": {
"created": -1
}
}
])
MongoPlayground

Categories

Resources