Adding additional information to aggregated results

Adding additional information to aggregated results - python

Example below of what I'm trying but Im having trouble figuring out the addFields part.
I'd like to add the created_at and amount fields from the matching expr in the pipeline from $$items. Currently it returns every created_at in the $$items
User model
class User(BaseModel):
__schema__ = DictField(dict(
user_id=StringField(required=True),
name=StringField(required=True),
item_list=ListField(default=[])
))
class InventoryAggregateManipulator(Manipulator):
def transform_outgoing(self, doc, model):
cur = User.aggregate([
{"$lookup": {
"from": "item",
"let": {"items": "$item_list"},
"pipeline": [
{"$match": {"$expr": {"$in": ["$_id", "$$items.id"]}}},
{"$project": {"name": "$name", 'rate': "$rate", "payout": "$payout"}},
{"$addFields": {"last_run": "$$items.created_at"}}
],
"as": "inventory"
}},
{"$match": {"_id": doc['_id']}}
])
for doc in cur:
return doc
example item collection:
[
{
"_id": 1,
"name": "some_item",
"rate": 60,
"payout": 15
},
{
"_id": 2,
"name": "another_item",
"rate": 30,
"payout": 20
}
]
example user:
{
'_id': 1,
'user_id': 1234,
'name': 'user123',
'item_list':[
{
"id": 1,
"created_at": datetime,
"amount": 3
},
{
"id": 2,
"created_at": datetime,
"amount": 5
}
]
}
expected result
{
'user_id': 1234,
'name': 'user123',
'item_list':[
{
"id": 1,
"created_at": datetime,
"amount": 3
},
{
"id": 2,
"created_at": datetime,
"amount": 5
}
],
'inventory':[
{
"name": "some_item",
"rate": 60,
"payout": 15,
"last_run": [datetime from item_list],
"amount": [amount from item_list]
},
{
"name": "another_item",
"rate": 30,
"payout": 20,
"last_run": [datetime from item_list],
"amount": [amount from item_list]
}
]
}

Related

Importing a JSON object to a Google Cloud SQL instance authentication error

I have been stuck on this for the last 2 days, first time in a long time I had to use stackoverflow. But I love this community
What I am trying to do?
I am trying to upload a JSON object to update a table in a cloud SQL instance.
Error I am getting
I have tried using a service account and 02Auth credentials but I still get these errors.
Code below
from pprint import pprint
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
from google.oauth2 import service_account
import google.auth.credentials
credentials = GoogleCredentials.get_application_default()
quota_project_id = "alert-groove-360914"
scopes = ["https://www.googleapis.com/auth/cloud-platform","https://www.googleapis.com/auth/sqlservice.admin"]
#credentials, _ = google.auth.load_credentials_from_file(
# "alert-groove-360914-0d5821a7921c.json", scopes=scopes, quota_project_id=quota_project_id
# )
#credentials = service_account.Credentials.from_service_account_file("alert-groove-360914-0d5821a7921c.json")
service = discovery.build('sqladmin', 'v1beta4', credentials=credentials)
# Project ID of the project that contains the instance.
project = 'XXXXXXXX' # TODO: Update placeholder value.
# Cloud SQL instance ID. This does not include the project ID.
instance = 'employment-types' # TODO: Update placeholder value.
database_instance_body = {
"id": 3,
"first_name": "XXXXXXXX",
"last_name": "XXXXXXX",
"email": "XXXXXX28#gmail.com",
"mobile_number": "XXXXXXXXX",
"country_id": 224,
"new_user": False,
"status": 1,
"reason": 'null',
"duration": 'null',
"linkedin_id": 'null',
"sync_contacts": False,
"linkedin_sync": False,
"available": True,
"radar_visibility": 'null',
"details": {
"organization": "Foras",
"isSelfEmployed": False,
"gender_id": 2,
"location_id": 457,
"gender": 'null',
"about": "The best around and nothing will ever get me down",
"image": "https:\/\/foras.tedmob.com\/storage\/users\/image-8596.jpg",
"lat": "25.20082000253009",
"long": "55.280527271443894",
"Gender_Rel": "Male",
"job_title": "Engineer",
"Experience": "10+ years",
"Location": "Dubai"
},
"interests": [
{
"id": 106,
"title": "Nightlife",
"icon": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-5908.png",
"image": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-7960.png",
"category": {
"category": "Culture"
}
},
{
"id": 102,
"title": "Music",
"icon": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-5908.png",
"image": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-7960.png",
"category": {
"category": "Culture"
}
},
{
"id": 98,
"title": "Personal finance",
"icon": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-5908.png",
"image": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-7960.png",
"category": {
"category": "Finance"
}
},
{
"id": 101,
"title": "Crypto currency ",
"icon": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-5908.png",
"image": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-7960.png",
"category": {
"category": "Finance"
}
},
{
"id": 104,
"title": "Breaking",
"icon": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-5908.png",
"image": "https:\/\/foras.tedmob.com\/storage\/interests\/basket-ball-7960.png",
"category": {
"category": "Culture"
}
}
],
"languages": [],
"goals": [
{
"id": 2,
"title": "Find a new job",
"icon": "https:\/\/foras.tedmob.com\/storage\/goals\/find-a-new-job-6804.png",
"pivot": {
"user_id": 3,
"goal_id": 2
},
"category": {
"category": "Career"
}
},
{
"id": 11,
"title": "Explore a career change",
"icon": "https:\/\/foras.tedmob.com\/storage\/goals\/explore-a-career-change-5885.png",
"pivot": {
"user_id": 3,
"goal_id": 11
},
"category": {
"category": "Career"
}
},
{
"id": 12,
"title": "Find co-founders",
"icon": "https:\/\/foras.tedmob.com\/storage\/goals\/find-co-founders-2170.png",
"pivot": {
"user_id": 3,
"goal_id": 12
},
"category": {
"category": "Networking"
}
}
],
"country": {
"id": 224,
"name": "United Arab Emirates",
"iso": "AE",
"phone_code": "+971"
},
"academic_levels": [
{
"id": 4,
"title": "Master's degree",
"pivot": {
"user_id": 3,
"academic_level_id": 4
}
}
],
"experiences": [
{
"id": 1,
"title": "test1",
"company_name": "test",
"isCurrentlyWorking": True,
"start_date": "2021-06-15 00:00:00",
"end_date": "2022-09-13 00:00:00",
"end_position": True,
"headline": "test",
"industry": "test",
"description": "description",
"employment_type": 1
}
]
}
request = service.instances().update(project=project, instance=instance, body=database_instance_body)
request.execute()

Search for key in Nested Json in Python

I have below Json.
My requirement is to search this Json and get 'id' value if 'name' is equal to 'Latisha Chase'
[
{
"_id": "5d3121cd001453772160a791",
"friends": [
{
"id": 6,
"name": "Mcknight Tran"
},
{
"id": 7,
"name": "Helena Bowers"
},
{
"id": 8,
"name": "Dorsey Ayala"
}
]
},
{
"_id": "5d3121cd838efa513e7dda96",
"friends": [ {
"friends": [
{
"id": 90,
"name": "w Stark"
},
{
"id": 91,
"name": "w Jacobs"
},
{
"id": 93,
"name": "w Garner"
}
]},
{
"id": 10,
"name": "Amalia Stark"
},
{
"id": 11,
"name": "Myra Jacobs"
},
{
"id": 12,
"name": "Norton Garner"
}
]
}
]
This is sample code that I have. Could anyone help me with this.?
I tried recursive codes online but didn't work with my example here.
Update:
Its not necessary that 'friends' will have single depth. it can have friends inside friends. ex: friends [{ friends[ {}]}]

A more general approach using recursion:
def recursive_function(name, l):
if isinstance(l,list):
for i in l:
recursive_function(name, i)
elif isinstance(l,dict):
if l.get("name") == name:
print (l.get("id"))
for v in l.values():
if isinstance(v, list) or isinstance(v, dict):
recursive_function(name, v)
recursive_function("Latisha Chase",json_obj)
Result:
3

Try this
j = [{
"_id": "5d3121cd001453772160a791",
"friends": [{
"id": 6,
"name": "Mcknight Tran"
},
{
"id": 7,
"name": "Helena Bowers"
},
{
"id": 8,
"name": "Dorsey Ayala"
}
]
},
{
"_id": "5d3121cded44d8ba6ad96b78",
"friends": [{
"id": 2,
"name": "June Gilbert"
},
{
"id": 3,
"name": "Latisha Chase"
},
{
"id": 4,
"name": "Franco Carlson"
}
]
},
{
"_id": "5d3121cd838efa513e7dda96",
"friends": [{
"id": 10,
"name": "Amalia Stark"
},
{
"id": 11,
"name": "Myra Jacobs"
},
{
"id": 12,
"name": "Norton Garner"
}
]
}
]
for x in j:
for y in x.get('friends'):
if y.get('name') == 'Latisha Chase':
print y.get('id')

Read a JSON file and select node like in Pandas dataframe

I need to read a JSON config file like the example below and change some of its values with a querying structure like in Pandas.
Ex:
[
{
"_id": "5d1f5d0289725ba2c32695ac",
"index": 0,
"guid": "d1a8c2e2-1011-4db2-97a8-b68777c2d18b",
"isActive": false,
"name": {
"first": "Barnett",
"last": "Obrien"
},
"latitude": "-76.327744",
"longitude": "-131.003501",
"friends": [
{
"friend_id": 0,
"name": "Burnett Burke"
},
{
"friend_id": 1,
"name": "Lawrence Hunt"
},
{
"friend_id": 2,
"name": "Nola Benjamin"
}
]
},
{
"_id": "5d1f5d023ef4523b5e326ae2",
"index": 1,
"guid": "6b0ad8a7-2b10-4892-9b91-fc7445038aca",
"isActive": true,
"name": {
"first": "Valerie",
"last": "Preston"
},
"latitude": "27.995886",
"longitude": "170.930419",
"friends": [
{
"friend_id": 0,
"name": "Gretchen Hobbs"
},
{
"friend_id": 1,
"name": "Irene Fox"
},
{
"friend_id": 2,
"name": "Porter King"
}
]
}
]
Then I wanted to change the value for the friend_id == 1 and object with guid == 6b0ad8a7-2b10-4892-9b91-fc7445038aca from Irene Fox to something else.
With Pandas I can have something like this:
valerie = dataframe['guid'] == '6b0ad8a7-2b10-4892-9b91-fc7445038aca'
friend1 = dataframe['friend_1'] == 1
dataframe[valerie & friend1]['name'] = 'Karen Smith'
How can I achieve this without having to add Pandas dependency?

With simple loop:
import json
import sys
data = json.load(open('input.json'))
for d in data:
if d["guid"] == "6b0ad8a7-2b10-4892-9b91-fc7445038aca":
for f in d["friends"]:
if f["friend_id"] == 1:
f["name"] = "Karen Smith"
# break <- uncomment if only one match is implied
# replace sys.stdout with output file pointer
json.dump(data, sys.stdout, indent=4)
you may also break the outer for loop if items/dicts have unique guids.
The output (for demonstration):
[
{
"_id": "5d1f5d0289725ba2c32695ac",
"index": 0,
"guid": "d1a8c2e2-1011-4db2-97a8-b68777c2d18b",
"isActive": false,
"name": {
"first": "Barnett",
"last": "Obrien"
},
"latitude": "-76.327744",
"longitude": "-131.003501",
"friends": [
{
"friend_id": 0,
"name": "Burnett Burke"
},
{
"friend_id": 1,
"name": "Lawrence Hunt"
},
{
"friend_id": 2,
"name": "Nola Benjamin"
}
]
},
{
"_id": "5d1f5d023ef4523b5e326ae2",
"index": 1,
"guid": "6b0ad8a7-2b10-4892-9b91-fc7445038aca",
"isActive": true,
"name": {
"first": "Valerie",
"last": "Preston"
},
"latitude": "27.995886",
"longitude": "170.930419",
"friends": [
{
"friend_id": 0,
"name": "Gretchen Hobbs"
},
{
"friend_id": 1,
"name": "Karen Smith"
},
{
"friend_id": 2,
"name": "Porter King"
}
]
}
]

Merging two json files using python

I'm new to python, I want to merge two JSON files
there should not be any duplicate:
if the values and name are same then I will add both the keys and maintain a single record, otherwise, I will keep the record
File 1:
[ {
"key": 1,
"name": "test",
"value": "NY"
},
{
"key": 1,
"name": "test",
"value": "CA"
},
{
"key": 1,
"name": "test",
"value": "MA"
},
{
"key": 1,
"name": "test",
"value": "MA"
}
]
File 2:
[ {
"key": 1,
"name": "test",
"value": "NJ"
},
{
"key": 1,
"name": "test",
"value": "CA"
},
{
"key": 1,
"name": "test",
"value": "TX"
},
{
"key": 1,
"name": "test",
"value": "MA"
}
]
and the merged file output should be:
[
{
"key": 1,
"name": "test",
"value": "NY"
},
{
"key": 3,
"name": "test",
"value": "MA"
},
{
"key": 1,
"name": "test",
"value": "NJ"
},
{
"key": 2,
"name": "test",
"value": "CA"
},
{
"key": 1,
"name": "test",
"value": "TX"
}
]
order of the record does not matter.
I have tried several approaches, like merging the files and then iterating over then, parsing both files separately but I'm facing issues, being new to python.

This should help.
# -*- coding: utf-8 -*-
f1 = [ {
"key": 1,
"value": "NY"
},
{
"key": 1,
"value": "CA"
},
{
"key": 1,
"value": "MA"
}
]
f2 = [ {
"key": 1,
"value": "NJ"
},
{
"key": 1,
"value": "CA"
},
{
"key": 1,
"value": "TX"
}
]
check = [i["value"] for i in f1] #check list to see if the value already exist in f1.
for i in f2:
if i['value'] not in check:
f1.append(i)
print(f1)
Output:
[{'value': 'NY', 'key': 1}, {'value': 'CA', 'key': 1}, {'value': 'MA', 'key': 1}, {'value': 'NJ', 'key': 1}, {'value': 'TX', 'key': 1}]

Optimizing MongoDB Aggregation Pipeline (Group, Lookup, Match)

I'm new on NoSQL Database and i choose MongoDB as my first NoSQL Database. I made an aggregation pipeline to shows the data that i want, here's my document sample:
Document sample from Users Collection
{
"_id": 9,
"name": "Sample Name",
"email": "email#example.com",
"password": "password hash"
}
Document sample from Pages Collection (this one doesn't really matter)
{
"_id": 42,
"name": "Product Name",
"description": "Product Description",
"user_id": 8,
"rating_categories": [{
"_id": 114,
"name": "Build Quality"
}, {
"_id": 115,
"name": "Price"
}, {
"_id": 116,
"name": "Feature"
}, {
"_id": 117,
"name": "Comfort"
}, {
"_id": 118,
"name": "Switch"
}]
}
Document sample from Reviews Collection
{
"_id": 10,
"page_id": 42, #ID reference from pages collection
"user_id": 8, #ID reference from users collection
"review": "The review of the product",
"ratings": [{
"_id": 114, #ID Reference from pages collection of what rating category it is
"rating": 5
}, {
"_id": 115,
"rating":4
}, {
"_id": 116,
"rating": 5
}, {
"_id": 117,
"rating": 3
}, {
"_id": 118,
"rating": 4
}],
"created": "1582825968963", #Date Object
"votes": {
"downvotes": [],
"upvotes": [9] #IDs of users who upvote this review
}
}
I want to get reviews by page_id which can be accessed from the API i made, here's the expected result from the aggregation:
[
{
"_id": 10, #Review of the ID
"created": "Thu, 27 Feb 2020 17:52:48 GMT",
"downvote_count": 0, #Length of votes.downvotes from reviews collection
"page_id": 42, #Page ID
"ratings": [ #Stores what rate at what rating category id
{
"_id": 114,
"rating": 5
},
{
"_id": 115,
"rating": 4
},
{
"_id": 116,
"rating": 5
},
{
"_id": 117,
"rating": 3
},
{
"_id": 118,
"rating": 4
}
],
"review": "The Review",
"upvote_count": 0, #Length of votes.upvotes from reviews collection
"user": { #User who reviewed
"_id": 8, #User ID
"downvote_count": 0, #How many downvotes this user receive from all of the user's reviews
"name": "Sample Name", #Username
"review_count": 1, #How many reviews the user made
"upvote_count": 1 #How many upvotes this user receive from all of the user's reviews
},
"vote_state": 0 #Determining vote state from the user (who requested to the API) for this review, 0 for no vote, -1 for downvote, 1 for upvote
},
...
]
Here's the pipeline of the aggregation for reviews collection that i made for the result above:
user_id = 9
page_id = 42
pipeline = [
{"$group": {
"_id": {"user_id":"$user_id", "page_id": "$page_id"},
"review_id": {"$last": "$_id"},
"page_id": {"$last": "$page_id"},
"user_id" : {"$last": "$user_id"},
"ratings": {"$last": "$ratings"},
"review": {"$last": "$review"},
"created": {"$last": "$created"},
"votes": {"$last": "$votes"},
"upvote_count": {"$sum":
{"$cond": [
{"$ifNull": ["$votes.upvotes", False]},
{"$size": "$votes.upvotes"},
0
]}
},
"downvote_count": {"$sum":
{"$cond": [
{"$ifNull": ["$votes.downvotes", False]},
{"$size": "$votes.downvotes"},
0
]}
}}},
{"$lookup": {
"from": "users",
"localField": "user_id",
"foreignField": "_id",
"as": "user"
}},
{"$unwind": "$user"},
{"$lookup": {
"from": "reviews",
"localField": "user._id",
"foreignField": "user_id",
"as": "user.reviews"
}},
{"$addFields":{
"_id": "$review_id",
"user.review_count": {"$size": "$user.reviews"},
"user.upvote_count": {"$sum":{
"$map":{
"input":"$user.reviews",
"in":{"$cond": [
{"$ifNull": ["$$this.votes.upvotes", False]},
{"$size": "$$this.votes.upvotes"},
0
]}
}
}},
"user.downvote_count": {"$sum":{
"$map":{
"input":"$user.reviews",
"in":{"$cond": [
{"$ifNull": ["$$this.votes.downvotes", False]},
{"$size": "$$this.votes.downvotes"},
0
]}
}
}},
"vote_state": {"$switch": {
"branches": [
{"case": { "$and" : [
{"$ifNull": ["$votes.upvotes", False]},
{"$in": [user_id, "$votes.upvotes"]}
]}, "then": 1
},
{"case": { "$and" : [
{"$ifNull": ["$votes.downvotes", False]},
{"$in": [user_id, "$votes.downvotes"]}
]}, "then": -1
},
],
"default": 0
}},
}},
{"$project":{
"user.password": 0,
"user.email": 0,
"user_id": 0,
"review_id" : 0,
"votes": 0,
"user.reviews": 0
}},
{"$sort": {"created": -1}},
{"$match": {"page_id": page_id}},
]
Note: User can make multiple reviews for same page_id, but only the latest will be shown
I'm using pymongo btw, that's why operators have quotation mark
My questions are:
Is there any room to optimize my aggregation pipeline?
Is it considered as a good practice to have multiple small aggregate execution to get datas like above, or its always better to have 1 big aggregation (or as less as possible) to get the data that i want?
As you can see, every time i want to access votes.upvotes or votes.downvotes from a document on review collection, i checked whether the field is null or not, that's because the field votes.upvotes and votes.downvotes isn't being made when user make a review, instead it's being made when an user gives a vote to that review. Should i make an empty field on votes.upvotes and votes.downvotes when user make a review and remove the $ifNull? Will that increase the performance of the aggregation?
Thanks

Check if this aggregation has better performance.
Create these indexes if you don't have already:
db.reviews.create_index([("page_id", 1)])
Note: We can improve even more the performance avoiding $lookup reviews again.
db.reviews.aggregate([
{
$match: {
page_id: page_id
}
},
{
$addFields: {
request_user_id: user_id
}
},
{
$group: {
_id: {
page_id: "$page_id",
user_id: "$user_id",
request_user_id: "$request_user_id"
},
data: {
$push: "$$ROOT"
}
}
},
{
$lookup: {
"from": "users",
"let": {
root_user_id: "$_id.user_id"
},
"pipeline": [
{
$match: {
$expr: {
$eq: [
"$$root_user_id",
"$_id"
]
}
}
},
{
$lookup: {
"from": "reviews",
"let": {
root_user_id: "$$root_user_id"
},
"pipeline": [
{
$match: {
$expr: {
$eq: [
"$$root_user_id",
"$user_id"
]
}
}
},
{
$project: {
user_id: 1,
downvote_count: {
$size: "$votes.downvotes"
},
upvote_count: {
$size: "$votes.upvotes"
}
}
},
{
$group: {
_id: null,
review_count: {
$sum: {
$cond: [
{
$eq: [
"$$root_user_id",
"$user_id"
]
},
1,
0
]
}
},
upvote_count: {
$sum: "$upvote_count"
},
downvote_count: {
$sum: "$downvote_count"
}
}
},
{
$unset: "_id"
}
],
"as": "stats"
}
},
{
$project: {
tmp: {
$mergeObjects: [
{
_id: "$_id",
name: "$name"
},
{
$arrayElemAt: [
"$stats",
0
]
}
]
}
}
},
{
$replaceWith: "$tmp"
}
],
"as": "user"
}
},
{
$addFields: {
first: {
$mergeObjects: [
"$$ROOT",
{
$arrayElemAt: [
"$data",
0
]
},
{
user: {
$arrayElemAt: [
"$user",
0
]
},
created: {
$toDate: {
$toLong: {
$arrayElemAt: [
"$data.created",
0
]
}
}
},
downvote_count: {
$reduce: {
input: "$data.votes.downvotes",
initialValue: 0,
in: {
$add: [
"$$value",
{
$size: "$$this"
}
]
}
}
},
upvote_count: {
$reduce: {
input: "$data.votes.upvotes",
initialValue: 0,
in: {
$add: [
"$$value",
{
$size: "$$this"
}
]
}
}
},
vote_state: {
$cond: [
{
$gt: [
{
$size: {
$filter: {
input: "$data.votes.upvotes",
cond: {
$in: [
"$_id.request_user_id",
"$$this"
]
}
}
}
},
0
]
},
1,
{
$cond: [
{
$gt: [
{
$size: {
$filter: {
input: "$data.votes.downvotes",
cond: {
$in: [
"$_id.request_user_id",
"$$this"
]
}
}
}
},
0
]
},
-1,
0
]
}
]
}
}
]
}
}
},
{
$unset: [
"first.data",
"first.votes",
"first.user_id",
"first.request_user_id"
]
},
{
$replaceWith: "$first"
},
{
"$sort": {
"created": -1
}
}
])
MongoPlayground

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Adding additional information to aggregated results - python

Related

Importing a JSON object to a Google Cloud SQL instance authentication error

Search for key in Nested Json in Python

Read a JSON file and select node like in Pandas dataframe

Merging two json files using python

Optimizing MongoDB Aggregation Pipeline (Group, Lookup, Match)

Categories

Resources