Convert Nested JSON into CSV File

Convert Nested JSON into CSV File - python

I want to create a CSV file with the columns 'Duration' and 'Attention' from a nested JSON file that looks like this:
[
{
"Id": {
"Address": "",
"Address": "",
"Name": "Avg"
},
"position": 0,
"sender": [
false
],
"Atten": 0,
"Duration": 5,
"End": 1645,
"EndRaw": 16040.395,
"Index": 0,
"__v": 0
},
{
"Id": {
"local": "",
"Address": "",
"trafficPairName": "Avg"
},
"position": 0,
"sender": [
false
],
"Atten": 1,
"Duration": 5,
"End": 1652,
"EndRaw": 166140.3618,
"Index": 1,
"__v": 0
}
]
I am getting an error on the for loop that says "list indices must be integers or slices, not str". How do I properly parse this JSON file and create the CSV file?
Here is my Python code:
import json
import csv
with open('averages.json') as f:
data = json.load(f)
filename = "data.csv"
with open(filename, "w") as file:
csv_file = csv.writer(file)
csv_file.writerow(["Duration", "Attention"])
for item in data["Id"]:
csv_file.writerow([])

data is list of dicts, and from each dict you want the values for pair of keys.
for item in data["pairId"]:
csv_file.writerow([])
should be
for item in data:
csv_file.writerow([item.get("stepDuration"), item.get("stepIndexAtten")])
or in one-line list comprehension
csv_file.writerows(([item.get("stepDuration"), item.get("stepIndexAtten")] for item in data))

Not sure why are you taking the data["pairId"] if you need the stepDuration and stepIndexAtten?
This should work for you,
import json
import csv
with open('averageThroughputVsTime-Aggregated.json') as f:
data = json.load(f)
fname = "output.csv"
with open(fname, "w") as file:
csv_file = csv.writer(file)
csv_file.writerow(["stepDuration", "stepIndexAtten"])
for key, value in enumerate(data):
csv_file.writerow([key, value["stepDuration"], value["stepIndexAtten"]])
output.csv
stepDuration,stepIndexAtten
0,5,0
1,5,1

Related

generate JSON file content

I am biggener with JSON
I have the following in my JSON file (file-1) :
{
"Aps": [
{
"type": "opo",
"price_min": 7,
"price_max":10,
"app_time": 0,
"arguments": {
"prices": 15,
"apps_num": "112"
},
"attributes": {
"name":"user1",
"priority":"100"
}
}
}
How write python code that generates another JSON file that contain the same content of file-1 but duplicated 100 time in each time the name of user is different user2, user3 ... user100 and also it's priority.
I have tried the following but it is not working :
for lp in range(100):
with open("sample.json", "w") as outfile:
outfile.write(json_object)
but it is not working ..
the required output is as follow :
{
"Aps": [
{
"type": "opo",
"price_min": 7,
"price_max":10,
"app_time": 0,
"arguments": {
"prices": 15,
"apps_num": "112"
},
"attributes": {
"name":"user1",
"priority":"100"
}
},
{
"type": "opo",
"price_min": 7,
"price_max":10,
"app_time": 0,
"arguments": {
"prices": 15,
"apps_num": "112"
},
"attributes": {
"name":"user2",
"priority":"90"
}
},
{
"type": "opo",
"price_min": 7,
"price_max":10,
"app_time": 0,
"arguments": {
"prices": 15,
"apps_num": "112"
},
"attributes": {
"name":"user2",
"priority":"80"
}
},
..............
}

I made a little code here using json and copy module
json for reading and writing json files
copy because I had some trouble with reference variables see documentation for copy; if I changed 'temp' dict then it would affect all occurrences in 'file' dict
import json
import copy
repeats = 100
file = json.loads(open('file.json', 'r').read())
temp1 = json.loads(open('file.json', 'r').read())['Aps'][0]
for repeat in range(repeats):
temp = copy.deepcopy(temp1)
temp['attributes']['name'] = f"user{repeat + 2}"
temp['attributes']['priority'] = f"{repeat*10+100 - repeat*20}"
file['Aps'].append(temp)
temp1 = copy.deepcopy(temp)
json.dump(file, open('file1.json', 'w'), indent=4)

You should first convert your json file to a python object (dict):
import json
file = open('sample.json')
data = json.load(file)
file.close()
Now you can do stuff with your Aps list, like appending the first object 100 times to your list.
for dups in range(100):
data['Aps'].append(data['Aps'][0])
Then you save your dict to a json file again:
with open("sample.json", "w") as outputfile:
json.dump(data, outputfile)

Convert CSV with nested headers to JSON

So far, I have this code (with help from a tutorial):
import csv, json
csvFilePath = "convertcsv.csv"
jsonFilePath = "newResult.json"
# Read the CSV and add the data to a dictionary...
data = {}
with open(csvFilePath) as csvFile:
csvReader = csv.DictReader(csvFile)
for csvRow in csvReader:
data = csvRow
# Write data to a JSON file...
with open(jsonFilePath, "w") as jsonFile:
jsonFile.write(json.dumps(data, indent=4))
My desired output is this:
{
"userID": "string",
"username": "string",
"age": "string",
"location": {
"streetName": "string",
"streetNo": "string",
"city": "string"
}
}
I don't know how to represent the "location".
My actual result is this:
{
"userID": "string",
"username": "string",
"age": "string",
"location/streetName": "string",
"location/streetNo": "string",
"location/city": "string",
}
How can I seperate streetName, streetNo and city and put them into "location"?

Below is a simple script should do what you want. The result will be a json object with the "userID" as keys. Note that, to test deeper nesting, I used a csv file with slightly different headers - but it will work just as well with your original example.
import csv, json
infile = 'convertcsv.csv'
outfile = 'newResult.json'
data = {}
def process(header, value, record):
key, other = header.partition('/')[::2]
if other:
process(other, value, record.setdefault(key, {}))
else:
record[key] = value
with open(infile) as stream:
reader = csv.DictReader(stream)
for row in reader:
data[row['userID']] = record = {}
for header, value in row.items():
process(header, value, record)
with open(outfile, "w") as stream:
json.dump(data, stream, indent=4)
INPUT:
userID,username,age,location/street/name,location/street/number,location/city
0,AAA,20,This Street,5,This City
1,BBB,42,That Street,5,That City
2,CCC,34,Other Street,5,Other City
OUTPUT:
{
"0": {
"userID": "0",
"username": "AAA",
"age": "20",
"location": {
"street": {
"name": "This Street",
"number": "5"
},
"city": "This City"
}
},
"1": {
"userID": "1",
"username": "BBB",
"age": "42",
"location": {
"street": {
"name": "That Street",
"number": "5"
},
"city": "That City"
}
},
"2": {
"userID": "2",
"username": "CCC",
"age": "34",
"location": {
"street": {
"name": "Other Street",
"number": "5"
},
"city": "Other City"
}
}
}

I'd add some custom logic to achieve this, note that this is for the first level only, if you want more, you should create a recoursive function:
# Write data to a JSON file...
with open(jsonFilePath, "w") as jsonFile:
for i, v in data.items():
if '/' in i:
parts = i.split('/', 1)
data[parts[0]] = {parts[1]: v}
data.pop(i)
jsonFile.write(json.dumps(data, indent=4))

You can use something like this:
# https://www.geeksforgeeks.org/convert-csv-to-json-using-python/
import csv
import json
# Function to convert a CSV to JSON
# Takes the file paths as arguments
def make_json(csvFilePath, jsonFilePath):
# create a dictionary
data = {}
# Open a csv reader called DictReader
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
# Convert each row into a dictionary
# and add it to data
for rows in csvReader:
# Assuming a column named 'No' to
# be the primary key
key = rows['No']
data[key] = rows
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
# Driver Code
# Decide the two file paths according to your
# computer system
csvFilePath = r'Names.csv'
jsonFilePath = r'Names.json'
# Call the make_json function
make_json(csvFilePath, jsonFilePath)
For more information check out https://www.geeksforgeeks.org/convert-csv-to-json-using-python/

Python TypeError: '_csv.writer' object is not iterable

I am parsing json to csv. But am getting error as below:
for i in data:
TypeError: '_csv.writer' object is not iterable
Code:
import json
import csv
with open("Data.json", 'r') as file:
data = json.load(file)
CSV_File = 'Data.csv'
with open(CSV_File, 'w') as file:
data = csv.writer(file)
data.writerow([])
for i in data:
data.writerow([])
Data
{
"id": "kljhfksdhkhd",
"name": "BOB",
"birthday": "08/03/1993",
"languages": [
{
"id": "106059522759137",
"name": "English language"
},
{
"id": "107617475934611",
"name": "Telugu language"
},
{
"id": "112969428713061",
"name": "Hindi"
},
{
"id": "343306413260",
"name": "Tamil language"
},
{
"id": "100904156616786",
"name": "Kannada"
}
],
"games": {
"data": [
{
"name": "Modern Combat",
"id": "12134323",
"created_time": "2019-02-21T18:39:41+0000"
},
{
"name": "Cards",
"id": "343232",
"created_time": "2011-06-01T11:13:31+0000"
},
{
"name": "Shuttle Badminton",
"id": "43214321",
"created_time": "2011-06-01T11:13:31+0000"
},
{
"name": "Carrom",
"id": "49y497",
"created_time": "2011-06-01T11:13:31+0000"
},
{
"name": "Chess",
"id": "0984080830",
"created_time": "2011-06-01T11:13:31+0000"
}
],
"paging": {
"cursors": {
"before": "dkkskd",
"after": "dlldlkd"
}
}
}
}

First off, the name data has been assigned to two different objects. Python permits this each assignment overwrites the previous. In the code, data is initially the data from the json file, then a csv.writer instance. A sensible improvement, therefore, is to name the writer writer, and change the code accordingly:
import json
import csv
with open("Data.json", 'r') as file:
data = json.load(file)
CSV_File = 'Data.csv'
with open(CSV_File, 'w') as file:
writer = csv.writer(file)
writer.writerow([])
for i in data:
writer.writerow([])
Now let's deal with how we are writing to the file. writer.writerow expects a list, but writing an empty list: writer.writerow([]) isn't very useful. Probably you want to write the json data to the csv file, so leet's get rid of the empty lists, and indent the writing loop so that it's inside the with block (otherwise the file will be closed).
import json
import csv
with open("Data.json", 'r') as file:
data = json.load(file)
CSV_File = 'Data.csv'
with open(CSV_File, 'w') as file:
writer = csv.writer(file)
for row in data:
writer.writerow(row)
This will work if the json data is a list of lists, that is it looks like this:
[[...], [...], [...], ...]
because each element of the outer list is a list, so iterating over it (for row in data:) yields a list, which writer.writerow can handle. However it's not uncommon for json data to be in the form of a dictionary:
{"k1": [....], "k2": [...], "k3": [...], ...}
In this case, you might want to iterate over the dictionary's values, if they are list:
for row in data.values():
writer.writerow(row)
Finally, the json may be an irregular mix of lists and dictionaries, and may be arbitrarily nested. It's up to you to determine how to map nested json data to the flat csv format.

How to delete values (in my case: 'phones') from json file

I'm new to json file syntax and I was wondering how to delete "phones" and its values from my json file so that end, start, alignedWord, and word are my only values in the words key.
My Code:
with open('dest_file.json', 'w') as dest_file:
with open('test1.json', 'r') as j:
contents = json.loads(j.read())
for value in contents['words']:
del(value['case'])
del(value['endOffset'])
del(value['startOffset'])
dest_file.write(json.dumps(contents,indent = 4))
Example JSON object:
"words": [
{
"alignedWord": "the",
"end": 6.31,
"phones": [
{
"duration": 0.09,
"phone": "dh_B"
},
{
"duration": 0.05,
"phone": "iy_E"
}
],
"start": 6.17,
"word": "The"
},
In addition to this, what exactly datatype is phones exactly and why was I not able to delete it via my current code?

"phones" in above case will be a list of dictionaries. Also the "words" in itself is list of dictionaries itself. You need to access items in list first and then can jump on to dictionary pops as suggested above.
words =[
{
"alignedWord": "the",
"end": 6.31,
"phones": [
{
"duration": 0.09,
"phone": "dh_B"
},
{
"duration": 0.05,
"phone": "iy_E"
}
],
"start": 6.17,
"word": "The"
}
]
to display the single element:
words[0]['phones']
to remove:
words[0].pop('phones')
you can use above code to iterate and pop as suggested!

Try this:
If you want to save the edited json to a new file:
import json
with open('dest_file.json', 'w') as dest_file:
with open('source_file.json', 'r') as source_file:
for line in source_file:
element = json.loads(line.strip())
if 'phones' in element:
del element['phones']
dest_file.write(json.dumps(element))
If you want to re-write to the same json after editing:
import json
with open('dest_file.json') as data_file:
data = json.load(data_file)
for element in data:
if 'phones' in element:
del element['phones']
with open('dest_file.json', 'w') as data_file:
data = json.dump(data, data_file)

Inside a JSON elements, you can manipulate data either like an array / list, or as a dict. In this case, you can do value.pop("phones").
e.g.
import json
data = '''
[
{
"alignedWord": "the",
"end": 6.31,
"phones": [
{
"duration": 0.09,
"phone": "dh_B"
},
{
"duration": 0.05,
"phone": "iy_E"
}
],
"start": 6.17,
"word": "The"
}]'''
contents = json.loads(data)
for value in contents:
value.pop('phones')
print(json.dumps(contents))
See it running: https://repl.it/repls/DarkgreyLightcoralInstitution

Convert csv to json multi-document?

I have below two requirement using Python:
Convert csv to multi-document json.
Ignore "" or null objects.
Have mentioned both code and csv. Currently I am getting only json array objects but I need to create in multi-document json.
my csv
_id,riderDetails.0.category,riderDetails.0.code,riderDetails.1.category,riderDetails.1.code
1111,re,remg,er,error
2111,we,were,ty,
code
import csv
import json
def make_record(row):
return {
"_id" : row["_id"],
"riderDetails" : [
{
"category" : row["riderDetails.0.category"],
"code" : row["riderDetails.0.code"],
},
{
"category" : row["riderDetails.1.category"] ,
"code" : row["riderDetails.1.code"],
}
]
}
with open('N:/Exide/Mongo/rr22.csv', 'r', newline='') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
with open('N:/Exide/Mongo/mm22.json', 'w') as jsonfile:
out = json.dumps([make_record(row) for row in reader])
jsonfile.write(out)
Code Output
[{
"_id": "1111",
"riderDetails": [
{
"category": "re",
"code": "remg"
},
{
"category": "er",
"code": "error"
}
]
},
{
"_id": "2111",
"riderDetails": [
{
"category": "we",
"code": "were"
},
{
"category": "",
"code": ""
}
]
}]
Expected Output
{
"_id": "1111",
"riderDetails": [
{
"category": "re",
"code": "remg"
},
{
"category": "er",
"code": "error"
}
]
}
{
"_id": "2111",
"riderDetails": [
{
"category": "we",
"code": "were"
}
]
}
Can someone help me in achieving expected output?

The data in the my csv file in your question doesn't produce the output shown, but that's probably due to a minor posting error, so I'll ignore it.
Also note that the file you are producing isn't a strictly valid JSON format file — perhaps that's what you meant by the term "multi-document json"…
Regardless, you accomplish what you need by modifying the make_record() function so it "cleans-up" the record and removes any empty/missing values before it returns it.
This is done in two steps.
First go through from each detail in riderDetails and remove any keys that have empty values.
Lastly, go though each riderDetails again and remove any details that are completely empty (because the first step removed all of it contents or none were provided in the csv file being read).
import csv
import json
csv_inp = 'rr22.csv'
json_outp = 'mm22.json'
def make_record(row):
# Reformat data is row.
record = {
"_id": row["_id"],
"riderDetails": [
{
"category": row["riderDetails.0.category"],
"code": row["riderDetails.0.code"],
},
{
"category": row["riderDetails.1.category"],
"code": row["riderDetails.1.code"],
}
]
}
# Remove empty values from each riderDetail.
record['riderDetails'] = [{key: value for key, value in riderDetail.items() if value}
for riderDetail in record['riderDetails']]
# Remove completely empty riderDetails.
record['riderDetails'] = [riderDetail for riderDetail in record['riderDetails']
if riderDetail]
return record
with open(csv_inp, 'r', newline='') as csvfile, \
open(json_outp, 'w') as jsonfile:
for row in csv.DictReader(csvfile, delimiter=','):
jsonfile.write(json.dumps(make_record(row), indent=4) + '\n')
# jsonfile.write(json.dumps(make_record(row)) + '\n')

using glob
import glob, os
pt = 'N:/Exide/Mongo/*.csv'
for file in glob.glob(pt):
get_name = file.split("/")[-1].replace(".csv",".json")
with open(file , 'r', newline='') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
out = [make_record(row) for row in reader]
saving_path = os.path.join('N:/Exide/Mongo/',get_name)
with open(saving_path , 'w') as jsonfile:
json.dump(out , jsonfile)
you get [{},{}] becuse you writing list of dictionary into file

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Convert Nested JSON into CSV File - python

Related

generate JSON file content

Convert CSV with nested headers to JSON

Python TypeError: '_csv.writer' object is not iterable

How to delete values (in my case: 'phones') from json file

Convert csv to json multi-document?

Categories

Resources