I want to combine two JSON which have the same form and increment the differents array which already exists in one JSON :
JSON a :
[
{
"productTitle": "Product1",
"api-activity": {
"api1": 1
},
"totalCalls": 1
},
{
"productTitle": "Product2",
"api-activity": {
"api1": 1,
"api2": 2,
"api3": 3,
"api4": 4
},
"totalCalls": 10
}
]
JSON b:
[
{
"productTitle": "Product1",
"api-activity": {
"api1": 1
},
"totalCalls": 1
},
{
"productTitle": "Product2",
"api-activity": {
"api1": 1,
"api2": 2,
"api3": 3,
"api4": 4
},
"totalCalls": 10
},
{
"productTitle": "Product3",
"api-activity": {
"api1": 2
},
"totalCalls": 2
}
]
To obtain something like this :
[
{
"productTitle": "Product1",
"api-activity": {
"api1": 2
},
"totalCalls": 2
},
{
"productTitle": "Product2",
"api-activity": {
"api1": 2,
"api2": 4,
"api3": 6,
"api4": 8
},
"totalCalls": 20
},
{
"productTitle": "Product3",
"api-activity": {
"api1": 2
},
"totalCalls": 2
}
]
I try to combine them using a previous script I used where I compare the existing JSON to a dataList(which here is our second JSON) like this :
import json
with open('testa.json') as json_data:
json_a = json.load(json_data)
with open('testb.json') as json_data:
json_b = json.load(json_data)
with open('outputMerge.json', 'w') as f:
data_list = json_a
for data in json_b:
title = data["productTitle"] #get all product title
exist = False
for existing_data in data_list: #loop in data_list
if data["api-activity"] in existing_data["api-activity"]:
print ("true")
but I have an error with the differents keys I use :
Traceback (most recent call last):
File "merge.py", line 17, in
if data["api-activity"] in existing_data["api-activity"]:
TypeError: unhashable type: 'dict'
Can you help me debugging I think I'm close or if you have a better solution maybe ?
I would approach this by loading your counts into a Counter() object, using the tuple (title, api) as the key. This can then be converted back into an output dictionary, for example:
Counter({('Product2', 'api4'): 8, ('Product2', 'api3'): 6, ('Product2', 'api2'): 4, ('Product1', 'api1'): 2, ('Product2', 'api1'): 2, ('Product3', 'api1'): 2})
This can be done as follows:
from collections import Counter
from itertools import groupby
import json
api_counts = Counter()
def update_counters(json_filename):
with open(json_filename) as f_json:
for product in json.load(f_json):
title = product['productTitle']
api_counts.update({(title, api) : count for api, count in product['api-activity'].items()})
update_counters('testa.json')
update_counters('testb.json')
output = []
for product, apis in groupby(sorted(api_counts.items()), lambda x: x[0][0]):
api_activity = {}
total_calls = 0
for (p, api), count in apis:
api_activity[api] = count
total_calls += count
output.append({'productTitle': product, 'api-activity' : api_activity, 'totalCalls' : total_calls})
with open('outputMerge.json', 'w') as f_output:
json.dump(output, f_output, indent=4)
Giving you the following output:
[
{
"productTitle": "Product1",
"api-activity": {
"api1": 2
},
"totalCalls": 2
},
{
"productTitle": "Product2",
"api-activity": {
"api1": 2,
"api2": 4,
"api3": 6,
"api4": 8
},
"totalCalls": 20
},
{
"productTitle": "Product3",
"api-activity": {
"api1": 2
},
"totalCalls": 2
}
]
Related
I have a DataFrame with lists in one column.
I want to pretty print the data as JSON.
How can I use indentation without affecting the values in each cell to be indented.
An example:
df = pd.DataFrame(range(3))
df["lists"] = [list(range(i+1)) for i in range(3)]
print(df)
output:
0 lists
0 0 [0]
1 1 [0, 1]
2 2 [0, 1, 2]
Now I want to print the data as JSON using:
print(df.to_json(orient="index", indent=2))
output:
{
"0":{
"0":0,
"lists":[
0
]
},
"1":{
"0":1,
"lists":[
0,
1
]
},
"2":{
"0":2,
"lists":[
0,
1,
2
]
}
}
desired output:
{
"0":{
"0":0,
"lists":[0]
},
"1":{
"0":1,
"lists":[0,1]
},
"2":{
"0":2,
"lists":[0,1,2]
}
}
If you don't want to bother with json format output, you can just turn the list type to string temporarily when printing the dataframe
print(df.astype({'lists':'str'}).to_json(orient="index", indent=2))
{
"0":{
"0":0,
"lists":"[0]"
},
"1":{
"0":1,
"lists":"[0, 1]"
},
"2":{
"0":2,
"lists":"[0, 1, 2]"
}
}
If you don't want to see the quote mark, you use regex to replace them
import re
import re
result = re.sub(r'("lists":)"([^"]*)"', r"\1 \2",
df.astype({'lists':'str'}).to_json(orient="index", indent=2))
{
"0":{
"0":0,
"lists": [0]
},
"1":{
"0":1,
"lists": [0, 1]
},
"2":{
"0":2,
"lists": [0, 1, 2]
}
}
I have a dict stored under the variable parsed:
{
"8119300029": {
"store": 4,
"total": 4,
"web": 4
},
"8119300030": {
"store": 2,
"total": 2,
"web": 2
},
"8119300031": {
"store": 0,
"total": 0,
"web": 0
},
"8119300032": {
"store": 1,
"total": 1,
"web": 1
},
"8119300033": {
"store": 0,
"total": 0,
"web": 0
},
"8119300034": {
"store": 2,
"total": 2,
"web": 2
},
"8119300036": {
"store": 0,
"total": 0,
"web": 0
},
"8119300037": {
"store": 0,
"total": 0,
"web": 0
},
"8119300038": {
"store": 2,
"total": 2,
"web": 2
},
"8119300039": {
"store": 3,
"total": 3,
"web": 3
},
"8119300040": {
"store": 3,
"total": 3,
"web": 3
},
"8119300041": {
"store": 0,
"total": 0,
"web": 0
}
}
I am trying to get the "web" value from each JSON entry but can only get the key values.
for x in parsed:
print(x["web"])
I tried doing this ^ but kept getting this error: "string indices must be integers". Can somebody explain why this is wrong?
because your x variable is dict key name
for x in parsed:
print(parsed[x]['web'])
A little information on your parsed data there: this is basically a dictionary of dictionaries. I won't go into too much of the nitty gritty but it would do well to read up a bit on json: https://www.w3schools.com/python/python_json.asp
In your example, for x in parsed is iterating through the keys of the parsed dictionary, e.g. 8119300029, 8119300030, etc. So x is a key (in this case, a string), not a dictionary. The reason you're getting an error about not indexing with an integer is because you're trying to index a string -- for example x[0] would give you the first character 8 of the key 8119300029.
If you need to get each web value, then you need to access that key in the parsed[x] dictionary:
for x in parsed:
print(parsed[x]["web"])
Output:
4
2
0
...
I have a CSV file in a format similar to this
order_id, customer_name, item_1_id, item_1_quantity, Item_2_id, Item_2_quantity, Item_3_id, Item_3_quantity
1, John, 4, 1, 24, 4, 16, 1
2, Paul, 8, 3, 41, 1, 33, 1
3, Andrew, 1, 1, 34, 4, 8, 2
I want to export to json, currently I am doing this.
df = pd.read_csv('simple.csv')
print ( df.to_json(orient = 'records') )
And the output is
[
{
"Item_2_id": 24,
"Item_2_quantity": 4,
"Item_3_id": 16,
"Item_3_quantity": 1,
"customer_name": "John",
"item_1_id": 4,
"item_1_quantity": 1,
"order_id": 1
},
......
However, I would like the output to be
[
{
"customer_name": "John",
"order_id": 1,
"items": [
{ "id": 4, "quantity": 1 },
{ "id": 24, "quantity": 4 },
{ "id": 16, "quantity": 1 },
]
},
......
Any suggestions on a good way to do this?
In this particular project, there will not be more than 5 times per order
Try the following:
import pandas as pd
import json
output_lst = []
##specify the first row as header
df = pd.read_csv('simple.csv', header=0)
##iterate through all the rows
for index, row in df.iterrows():
dict = {}
items_lst = []
## column_list is a list of column headers
column_list = df.columns.values
for i, col_name in enumerate(column_list):
## for the first 2 columns simply copy the value into the dictionary
if i<2:
element = row[col_name]
if isinstance(element, str):
## strip if it is a string type value
element = element.strip()
dict[col_name] = element
elif "_id" in col_name:
## i+1 is used assuming that the item_quantity comes right after the corresponding item_id for each item
item_dict = {"id":row[col_name], "quantity":row[column_list[i+1]]}
items_lst.append(item_dict)
dict["items"] = items_lst
output_lst.append(dict)
print json.dumps(output_lst)
If you run the above file with the sample.csv described in the question then you get the following output:
[
{
"order_id": 1,
"items": [
{
"id": 4,
"quantity": 1
},
{
"id": 24,
"quantity": 4
},
{
"id": 16,
"quantity": 1
}
],
" customer_name": "John"
},
{
"order_id": 2,
"items": [
{
"id": 8,
"quantity": 3
},
{
"id": 41,
"quantity": 1
},
{
"id": 33,
"quantity": 1
}
],
" customer_name": "Paul"
},
{
"order_id": 3,
"items": [
{
"id": 1,
"quantity": 1
},
{
"id": 34,
"quantity": 4
},
{
"id": 8,
"quantity": 2
}
],
" customer_name": "Andrew"
}
]
Source DF:
In [168]: df
Out[168]:
order_id customer_name item_1_id item_1_quantity Item_2_id Item_2_quantity Item_3_id Item_3_quantity
0 1 John 4 1 24 4 16 1
1 2 Paul 8 3 41 1 33 1
2 3 Andrew 1 1 34 4 8 2
Solution:
In [169]: %paste
import re
x = df[['order_id','customer_name']].copy()
x['id'] = \
pd.Series(df.loc[:, df.columns.str.contains(r'item_.*?_id',
flags=re.I)].values.tolist(),
index=df.index)
x['quantity'] = \
pd.Series(df.loc[:, df.columns.str.contains(r'item_.*?_quantity',
flags=re.I)].values.tolist(),
index=df.index)
x.to_json(orient='records')
## -- End pasted text --
Out[169]: '[{"order_id":1,"customer_name":"John","id":[4,24,16],"quantity":[1,4,1]},{"order_id":2,"customer_name":"Paul","id":[8,41,33],"qua
ntity":[3,1,1]},{"order_id":3,"customer_name":"Andrew","id":[1,34,8],"quantity":[1,4,2]}]'
Intermediate helper DF:
In [82]: x
Out[82]:
order_id customer_name id quantity
0 1 John [4, 24, 16] [1, 4, 1]
1 2 Paul [8, 41, 33] [3, 1, 1]
2 3 Andrew [1, 34, 8] [1, 4, 2]
j = df.set_index(['order_id','customer_name']) \
.groupby(lambda x: x.split('_')[-1], axis=1) \
.agg(lambda x: x.values.tolist()) \
.reset_index() \
.to_json(orient='records')
import json
Beatufied result:
In [122]: print(json.dumps(json.loads(j), indent=2))
[
{
"order_id": 1,
"customer_name": "John",
"id": [
4,
24,
16
],
"quantity": [
1,
4,
1
]
},
{
"order_id": 2,
"customer_name": "Paul",
"id": [
8,
41,
33
],
"quantity": [
3,
1,
1
]
},
{
"order_id": 3,
"customer_name": "Andrew",
"id": [
1,
34,
8
],
"quantity": [
1,
4,
2
]
}
]
I have three tables. One defines groups, with a groupid column, and the other two define users and events, which can each belong to a group.
class InterestGroup(db.Model):
__tablename__ = "interest_groups"
groupid = db.Column(db.Integer, primary_key=True)
groupcode = db.Column(db.String(10))
groupname = db.Column(db.String(200), unique=True)
class InterestGroupEvent(db.Model):
__tablename__ = "interest_group_events"
eventid = db.Column(db.Integer, db.ForeignKey("social_events.eventid"), primary_key=True)
groupid = db.Column(db.Integer, db.ForeignKey("interest_groups.groupid"), primary_key=True)
class InterestGroupUser(db.Model):
__tablename__ = "interest_group_users"
userid = db.Column(db.Integer, db.ForeignKey("users.userid"), primary_key=True)
groupid = db.Column(db.Integer, db.ForeignKey("interest_groups.groupid"), primary_key=True)
I want to list all the groups, with a count of how many users and how many events belong to each one, even if there aren't any.
I can get all the data I need, as follows:
IGs = InterestGroup.query.all()
IGEs = (
db.session.query(
InterestGroupEvent.groupid,
func.count(InterestGroupEvent.groupid)
)
.group_by(InterestGroupEvent.groupid)
.all()
)
IGUs = (
db.session.query(
InterestGroupUser.groupid,
func.count(InterestGroupUser.groupid)
)
.group_by(InterestGroupUser.groupid)
.all()
)
return json.dumps({
"groups": [{"groupid": IG.groupid} for IG in IGs],
"events": [{"groupid": IGE.groupid, "count": IGE[1]} for IGE in IGEs],
"users": [{"groupid": IGU.groupid, "count": IGU[1]} for IGU in IGUs]
})
which returns the following:
{
"events": [
{
"count": 2,
"groupid": 1
},
{
"count": 1,
"groupid": 2
}
],
"groups": [
{
"groupid": 1
},
{
"groupid": 2
},
{
"groupid": 3
}
],
"users": [
{
"count": 2,
"groupid": 1
},
{
"count": 1,
"groupid": 3
}
]
}
but what I want is the following:
[
{
"groupid": 1,
"eventcount": 2,
"usercount": 2
},
{
"groupid": 2,
"eventcount": 1,
"usercount": 0
},
{
"groupid": 3,
"eventcount": 0,
"usercount": 1
}
]
Obviously I could merge it manually, but I'm sure there's a way to get it direct from the database in a single query. I've tried the following:
IGs = (
db.session.query(InterestGroup.groupid,
func.count(InterestGroupEvent.groupid),
func.count(InterestGroupUser.groupid)
)
.group_by(InterestGroup.groupid)
.all()
)
return json.dumps([{"groupid": IG[0], "eventcount": IG[1], "usercount": IG[2]} for IG in IGs])
but that returns this:
[
{
"usercount": 9,
"groupid": 1,
"eventcount": 9
},
{
"usercount": 9,
"groupid": 2,
"eventcount": 9
},
{
"usercount": 9,
"groupid": 3,
"eventcount": 9
}
]
Hmm, this is close:
IGs = (
db.session.query(InterestGroup.groupid,
func.count(InterestGroupEvent.groupid),
func.count(InterestGroupUser.groupid)
)
.outerjoin(InterestGroupEvent)
.outerjoin(InterestGroupUser)
.group_by(InterestGroup.groupid)
.all()
)
return json.dumps([
{"groupid": IG[0], "eventcount": IG[1], "usercount": IG[2]} for IG in IGs
])
It returns:
[
{
"usercount": 4,
"groupid": 1,
"eventcount": 4
},
{
"usercount": 0,
"groupid": 2,
"eventcount": 1
},
{
"usercount": 1,
"groupid": 3,
"eventcount": 0
}
]
but the counts for group 1 are wrong. How should I go about it, please?
EDIT
In the absence of a better solution, this seems to work and will do for now:
IGEs = (
InterestGroup.query
.outerjoin(InterestGroupEvent)
.add_columns(func.count(InterestGroupEvent.groupid))
.group_by(InterestGroup.groupid)
.order_by(InterestGroup.groupid)
.all()
)
IGUs = (
InterestGroup.query
.outerjoin(InterestGroupUser)
.add_columns(func.count(InterestGroupUser.groupid))
.group_by(InterestGroup.groupid)
.order_by(InterestGroup.groupid)
.all()
)
results = []
for i in range(len(IGEs)):
results.append({"groupid": IGEs[i][0].groupid, "eventcount": IGEs[i][1], "usercount": IGUs[i][1]})
return json.dumps(results)
But I'd still like to know how to do it with a single query.
I created a nested dictionary in Python like this:
{
"Laptop": {
"sony": 1
"apple": 2
"asus": 5
},
"Camera": {
"sony": 2
"sumsung": 1
"nikon" : 4
},
}
But I couldn't figure out how to write this nested dict into a json file. Any comments will be appreciated..!
d = {
"Laptop": {
"sony": 1,
"apple": 2,
"asus": 5,
},
"Camera": {
"sony": 2,
"sumsung": 1,
"nikon" : 4,
},
}
with open("my.json","w") as f:
json.dump(d,f)