Combine two JSON by addition python 2.7

Combine two JSON by addition python 2.7 - python

I want to combine two JSON which have the same form and increment the differents array which already exists in one JSON :
JSON a :
[
{
"productTitle": "Product1",
"api-activity": {
"api1": 1
},
"totalCalls": 1
},
{
"productTitle": "Product2",
"api-activity": {
"api1": 1,
"api2": 2,
"api3": 3,
"api4": 4
},
"totalCalls": 10
}
]
JSON b:
[
{
"productTitle": "Product1",
"api-activity": {
"api1": 1
},
"totalCalls": 1
},
{
"productTitle": "Product2",
"api-activity": {
"api1": 1,
"api2": 2,
"api3": 3,
"api4": 4
},
"totalCalls": 10
},
{
"productTitle": "Product3",
"api-activity": {
"api1": 2
},
"totalCalls": 2
}
]
To obtain something like this :
[
{
"productTitle": "Product1",
"api-activity": {
"api1": 2
},
"totalCalls": 2
},
{
"productTitle": "Product2",
"api-activity": {
"api1": 2,
"api2": 4,
"api3": 6,
"api4": 8
},
"totalCalls": 20
},
{
"productTitle": "Product3",
"api-activity": {
"api1": 2
},
"totalCalls": 2
}
]
I try to combine them using a previous script I used where I compare the existing JSON to a dataList(which here is our second JSON) like this :
import json
with open('testa.json') as json_data:
json_a = json.load(json_data)
with open('testb.json') as json_data:
json_b = json.load(json_data)
with open('outputMerge.json', 'w') as f:
data_list = json_a
for data in json_b:
title = data["productTitle"] #get all product title
exist = False
for existing_data in data_list: #loop in data_list
if data["api-activity"] in existing_data["api-activity"]:
print ("true")
but I have an error with the differents keys I use :
Traceback (most recent call last):
File "merge.py", line 17, in
if data["api-activity"] in existing_data["api-activity"]:
TypeError: unhashable type: 'dict'
Can you help me debugging I think I'm close or if you have a better solution maybe ?

I would approach this by loading your counts into a Counter() object, using the tuple (title, api) as the key. This can then be converted back into an output dictionary, for example:
Counter({('Product2', 'api4'): 8, ('Product2', 'api3'): 6, ('Product2', 'api2'): 4, ('Product1', 'api1'): 2, ('Product2', 'api1'): 2, ('Product3', 'api1'): 2})
This can be done as follows:
from collections import Counter
from itertools import groupby
import json
api_counts = Counter()
def update_counters(json_filename):
with open(json_filename) as f_json:
for product in json.load(f_json):
title = product['productTitle']
api_counts.update({(title, api) : count for api, count in product['api-activity'].items()})
update_counters('testa.json')
update_counters('testb.json')
output = []
for product, apis in groupby(sorted(api_counts.items()), lambda x: x[0][0]):
api_activity = {}
total_calls = 0
for (p, api), count in apis:
api_activity[api] = count
total_calls += count
output.append({'productTitle': product, 'api-activity' : api_activity, 'totalCalls' : total_calls})
with open('outputMerge.json', 'w') as f_output:
json.dump(output, f_output, indent=4)
Giving you the following output:
[
{
"productTitle": "Product1",
"api-activity": {
"api1": 2
},
"totalCalls": 2
},
{
"productTitle": "Product2",
"api-activity": {
"api1": 2,
"api2": 4,
"api3": 6,
"api4": 8
},
"totalCalls": 20
},
{
"productTitle": "Product3",
"api-activity": {
"api1": 2
},
"totalCalls": 2
}
]

Related

pandas.to_json suppress indentation for lists as values

I have a DataFrame with lists in one column.
I want to pretty print the data as JSON.
How can I use indentation without affecting the values in each cell to be indented.
An example:
df = pd.DataFrame(range(3))
df["lists"] = [list(range(i+1)) for i in range(3)]
print(df)
output:
0 lists
0 0 [0]
1 1 [0, 1]
2 2 [0, 1, 2]
Now I want to print the data as JSON using:
print(df.to_json(orient="index", indent=2))
output:
{
"0":{
"0":0,
"lists":[
0
]
},
"1":{
"0":1,
"lists":[
0,
1
]
},
"2":{
"0":2,
"lists":[
0,
1,
2
]
}
}
desired output:
{
"0":{
"0":0,
"lists":[0]
},
"1":{
"0":1,
"lists":[0,1]
},
"2":{
"0":2,
"lists":[0,1,2]
}
}

If you don't want to bother with json format output, you can just turn the list type to string temporarily when printing the dataframe
print(df.astype({'lists':'str'}).to_json(orient="index", indent=2))
{
"0":{
"0":0,
"lists":"[0]"
},
"1":{
"0":1,
"lists":"[0, 1]"
},
"2":{
"0":2,
"lists":"[0, 1, 2]"
}
}
If you don't want to see the quote mark, you use regex to replace them
import re
import re
result = re.sub(r'("lists":)"([^"]*)"', r"\1 \2",
df.astype({'lists':'str'}).to_json(orient="index", indent=2))
{
"0":{
"0":0,
"lists": [0]
},
"1":{
"0":1,
"lists": [0, 1]
},
"2":{
"0":2,
"lists": [0, 1, 2]
}
}

How to get specific data from JSON object in Python

I have a dict stored under the variable parsed:
{
"8119300029": {
"store": 4,
"total": 4,
"web": 4
},
"8119300030": {
"store": 2,
"total": 2,
"web": 2
},
"8119300031": {
"store": 0,
"total": 0,
"web": 0
},
"8119300032": {
"store": 1,
"total": 1,
"web": 1
},
"8119300033": {
"store": 0,
"total": 0,
"web": 0
},
"8119300034": {
"store": 2,
"total": 2,
"web": 2
},
"8119300036": {
"store": 0,
"total": 0,
"web": 0
},
"8119300037": {
"store": 0,
"total": 0,
"web": 0
},
"8119300038": {
"store": 2,
"total": 2,
"web": 2
},
"8119300039": {
"store": 3,
"total": 3,
"web": 3
},
"8119300040": {
"store": 3,
"total": 3,
"web": 3
},
"8119300041": {
"store": 0,
"total": 0,
"web": 0
}
}
I am trying to get the "web" value from each JSON entry but can only get the key values.
for x in parsed:
print(x["web"])
I tried doing this ^ but kept getting this error: "string indices must be integers". Can somebody explain why this is wrong?

because your x variable is dict key name
for x in parsed:
print(parsed[x]['web'])

A little information on your parsed data there: this is basically a dictionary of dictionaries. I won't go into too much of the nitty gritty but it would do well to read up a bit on json: https://www.w3schools.com/python/python_json.asp
In your example, for x in parsed is iterating through the keys of the parsed dictionary, e.g. 8119300029, 8119300030, etc. So x is a key (in this case, a string), not a dictionary. The reason you're getting an error about not indexing with an integer is because you're trying to index a string -- for example x[0] would give you the first character 8 of the key 8119300029.
If you need to get each web value, then you need to access that key in the parsed[x] dictionary:
for x in parsed:
print(parsed[x]["web"])
Output:
4
2
0
...

csv to json with column data that needs to be grouped

I have a CSV file in a format similar to this
order_id, customer_name, item_1_id, item_1_quantity, Item_2_id, Item_2_quantity, Item_3_id, Item_3_quantity
1, John, 4, 1, 24, 4, 16, 1
2, Paul, 8, 3, 41, 1, 33, 1
3, Andrew, 1, 1, 34, 4, 8, 2
I want to export to json, currently I am doing this.
df = pd.read_csv('simple.csv')
print ( df.to_json(orient = 'records') )
And the output is
[
{
"Item_2_id": 24,
"Item_2_quantity": 4,
"Item_3_id": 16,
"Item_3_quantity": 1,
"customer_name": "John",
"item_1_id": 4,
"item_1_quantity": 1,
"order_id": 1
},
......
However, I would like the output to be
[
{
"customer_name": "John",
"order_id": 1,
"items": [
{ "id": 4, "quantity": 1 },
{ "id": 24, "quantity": 4 },
{ "id": 16, "quantity": 1 },
]
},
......
Any suggestions on a good way to do this?
In this particular project, there will not be more than 5 times per order

Try the following:
import pandas as pd
import json
output_lst = []
##specify the first row as header
df = pd.read_csv('simple.csv', header=0)
##iterate through all the rows
for index, row in df.iterrows():
dict = {}
items_lst = []
## column_list is a list of column headers
column_list = df.columns.values
for i, col_name in enumerate(column_list):
## for the first 2 columns simply copy the value into the dictionary
if i<2:
element = row[col_name]
if isinstance(element, str):
## strip if it is a string type value
element = element.strip()
dict[col_name] = element
elif "_id" in col_name:
## i+1 is used assuming that the item_quantity comes right after the corresponding item_id for each item
item_dict = {"id":row[col_name], "quantity":row[column_list[i+1]]}
items_lst.append(item_dict)
dict["items"] = items_lst
output_lst.append(dict)
print json.dumps(output_lst)
If you run the above file with the sample.csv described in the question then you get the following output:
[
{
"order_id": 1,
"items": [
{
"id": 4,
"quantity": 1
},
{
"id": 24,
"quantity": 4
},
{
"id": 16,
"quantity": 1
}
],
" customer_name": "John"
},
{
"order_id": 2,
"items": [
{
"id": 8,
"quantity": 3
},
{
"id": 41,
"quantity": 1
},
{
"id": 33,
"quantity": 1
}
],
" customer_name": "Paul"
},
{
"order_id": 3,
"items": [
{
"id": 1,
"quantity": 1
},
{
"id": 34,
"quantity": 4
},
{
"id": 8,
"quantity": 2
}
],
" customer_name": "Andrew"
}
]

Source DF:
In [168]: df
Out[168]:
order_id customer_name item_1_id item_1_quantity Item_2_id Item_2_quantity Item_3_id Item_3_quantity
0 1 John 4 1 24 4 16 1
1 2 Paul 8 3 41 1 33 1
2 3 Andrew 1 1 34 4 8 2
Solution:
In [169]: %paste
import re
x = df[['order_id','customer_name']].copy()
x['id'] = \
pd.Series(df.loc[:, df.columns.str.contains(r'item_.*?_id',
flags=re.I)].values.tolist(),
index=df.index)
x['quantity'] = \
pd.Series(df.loc[:, df.columns.str.contains(r'item_.*?_quantity',
flags=re.I)].values.tolist(),
index=df.index)
x.to_json(orient='records')
## -- End pasted text --
Out[169]: '[{"order_id":1,"customer_name":"John","id":[4,24,16],"quantity":[1,4,1]},{"order_id":2,"customer_name":"Paul","id":[8,41,33],"qua
ntity":[3,1,1]},{"order_id":3,"customer_name":"Andrew","id":[1,34,8],"quantity":[1,4,2]}]'
Intermediate helper DF:
In [82]: x
Out[82]:
order_id customer_name id quantity
0 1 John [4, 24, 16] [1, 4, 1]
1 2 Paul [8, 41, 33] [3, 1, 1]
2 3 Andrew [1, 34, 8] [1, 4, 2]

j = df.set_index(['order_id','customer_name']) \
.groupby(lambda x: x.split('_')[-1], axis=1) \
.agg(lambda x: x.values.tolist()) \
.reset_index() \
.to_json(orient='records')
import json
Beatufied result:
In [122]: print(json.dumps(json.loads(j), indent=2))
[
{
"order_id": 1,
"customer_name": "John",
"id": [
4,
24,
16
],
"quantity": [
1,
4,
1
]
},
{
"order_id": 2,
"customer_name": "Paul",
"id": [
8,
41,
33
],
"quantity": [
3,
1,
1
]
},
{
"order_id": 3,
"customer_name": "Andrew",
"id": [
1,
34,
8
],
"quantity": [
1,
4,
2
]
}
]

Count usage of foreign key in two tables in Flask-SqlAlchemy

I have three tables. One defines groups, with a groupid column, and the other two define users and events, which can each belong to a group.
class InterestGroup(db.Model):
__tablename__ = "interest_groups"
groupid = db.Column(db.Integer, primary_key=True)
groupcode = db.Column(db.String(10))
groupname = db.Column(db.String(200), unique=True)
class InterestGroupEvent(db.Model):
__tablename__ = "interest_group_events"
eventid = db.Column(db.Integer, db.ForeignKey("social_events.eventid"), primary_key=True)
groupid = db.Column(db.Integer, db.ForeignKey("interest_groups.groupid"), primary_key=True)
class InterestGroupUser(db.Model):
__tablename__ = "interest_group_users"
userid = db.Column(db.Integer, db.ForeignKey("users.userid"), primary_key=True)
groupid = db.Column(db.Integer, db.ForeignKey("interest_groups.groupid"), primary_key=True)
I want to list all the groups, with a count of how many users and how many events belong to each one, even if there aren't any.
I can get all the data I need, as follows:
IGs = InterestGroup.query.all()
IGEs = (
db.session.query(
InterestGroupEvent.groupid,
func.count(InterestGroupEvent.groupid)
)
.group_by(InterestGroupEvent.groupid)
.all()
)
IGUs = (
db.session.query(
InterestGroupUser.groupid,
func.count(InterestGroupUser.groupid)
)
.group_by(InterestGroupUser.groupid)
.all()
)
return json.dumps({
"groups": [{"groupid": IG.groupid} for IG in IGs],
"events": [{"groupid": IGE.groupid, "count": IGE[1]} for IGE in IGEs],
"users": [{"groupid": IGU.groupid, "count": IGU[1]} for IGU in IGUs]
})
which returns the following:
{
"events": [
{
"count": 2,
"groupid": 1
},
{
"count": 1,
"groupid": 2
}
],
"groups": [
{
"groupid": 1
},
{
"groupid": 2
},
{
"groupid": 3
}
],
"users": [
{
"count": 2,
"groupid": 1
},
{
"count": 1,
"groupid": 3
}
]
}
but what I want is the following:
[
{
"groupid": 1,
"eventcount": 2,
"usercount": 2
},
{
"groupid": 2,
"eventcount": 1,
"usercount": 0
},
{
"groupid": 3,
"eventcount": 0,
"usercount": 1
}
]
Obviously I could merge it manually, but I'm sure there's a way to get it direct from the database in a single query. I've tried the following:
IGs = (
db.session.query(InterestGroup.groupid,
func.count(InterestGroupEvent.groupid),
func.count(InterestGroupUser.groupid)
)
.group_by(InterestGroup.groupid)
.all()
)
return json.dumps([{"groupid": IG[0], "eventcount": IG[1], "usercount": IG[2]} for IG in IGs])
but that returns this:
[
{
"usercount": 9,
"groupid": 1,
"eventcount": 9
},
{
"usercount": 9,
"groupid": 2,
"eventcount": 9
},
{
"usercount": 9,
"groupid": 3,
"eventcount": 9
}
]
Hmm, this is close:
IGs = (
db.session.query(InterestGroup.groupid,
func.count(InterestGroupEvent.groupid),
func.count(InterestGroupUser.groupid)
)
.outerjoin(InterestGroupEvent)
.outerjoin(InterestGroupUser)
.group_by(InterestGroup.groupid)
.all()
)
return json.dumps([
{"groupid": IG[0], "eventcount": IG[1], "usercount": IG[2]} for IG in IGs
])
It returns:
[
{
"usercount": 4,
"groupid": 1,
"eventcount": 4
},
{
"usercount": 0,
"groupid": 2,
"eventcount": 1
},
{
"usercount": 1,
"groupid": 3,
"eventcount": 0
}
]
but the counts for group 1 are wrong. How should I go about it, please?
EDIT
In the absence of a better solution, this seems to work and will do for now:
IGEs = (
InterestGroup.query
.outerjoin(InterestGroupEvent)
.add_columns(func.count(InterestGroupEvent.groupid))
.group_by(InterestGroup.groupid)
.order_by(InterestGroup.groupid)
.all()
)
IGUs = (
InterestGroup.query
.outerjoin(InterestGroupUser)
.add_columns(func.count(InterestGroupUser.groupid))
.group_by(InterestGroup.groupid)
.order_by(InterestGroup.groupid)
.all()
)
results = []
for i in range(len(IGEs)):
results.append({"groupid": IGEs[i][0].groupid, "eventcount": IGEs[i][1], "usercount": IGUs[i][1]})
return json.dumps(results)
But I'd still like to know how to do it with a single query.

How to write a nested dictionary to json

I created a nested dictionary in Python like this:
{
"Laptop": {
"sony": 1
"apple": 2
"asus": 5
},
"Camera": {
"sony": 2
"sumsung": 1
"nikon" : 4
},
}
But I couldn't figure out how to write this nested dict into a json file. Any comments will be appreciated..!

d = {
"Laptop": {
"sony": 1,
"apple": 2,
"asus": 5,
},
"Camera": {
"sony": 2,
"sumsung": 1,
"nikon" : 4,
},
}
with open("my.json","w") as f:
json.dump(d,f)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Combine two JSON by addition python 2.7 - python

Related

pandas.to_json suppress indentation for lists as values

How to get specific data from JSON object in Python

csv to json with column data that needs to be grouped

Count usage of foreign key in two tables in Flask-SqlAlchemy

How to write a nested dictionary to json

Categories

Resources