Convert CSV with nested headers to JSON - python

So far, I have this code (with help from a tutorial):
import csv, json
csvFilePath = "convertcsv.csv"
jsonFilePath = "newResult.json"
# Read the CSV and add the data to a dictionary...
data = {}
with open(csvFilePath) as csvFile:
csvReader = csv.DictReader(csvFile)
for csvRow in csvReader:
data = csvRow
# Write data to a JSON file...
with open(jsonFilePath, "w") as jsonFile:
jsonFile.write(json.dumps(data, indent=4))
My desired output is this:
{
"userID": "string",
"username": "string",
"age": "string",
"location": {
"streetName": "string",
"streetNo": "string",
"city": "string"
}
}
I don't know how to represent the "location".
My actual result is this:
{
"userID": "string",
"username": "string",
"age": "string",
"location/streetName": "string",
"location/streetNo": "string",
"location/city": "string",
}
How can I seperate streetName, streetNo and city and put them into "location"?

Below is a simple script should do what you want. The result will be a json object with the "userID" as keys. Note that, to test deeper nesting, I used a csv file with slightly different headers - but it will work just as well with your original example.
import csv, json
infile = 'convertcsv.csv'
outfile = 'newResult.json'
data = {}
def process(header, value, record):
key, other = header.partition('/')[::2]
if other:
process(other, value, record.setdefault(key, {}))
else:
record[key] = value
with open(infile) as stream:
reader = csv.DictReader(stream)
for row in reader:
data[row['userID']] = record = {}
for header, value in row.items():
process(header, value, record)
with open(outfile, "w") as stream:
json.dump(data, stream, indent=4)
INPUT:
userID,username,age,location/street/name,location/street/number,location/city
0,AAA,20,This Street,5,This City
1,BBB,42,That Street,5,That City
2,CCC,34,Other Street,5,Other City
OUTPUT:
{
"0": {
"userID": "0",
"username": "AAA",
"age": "20",
"location": {
"street": {
"name": "This Street",
"number": "5"
},
"city": "This City"
}
},
"1": {
"userID": "1",
"username": "BBB",
"age": "42",
"location": {
"street": {
"name": "That Street",
"number": "5"
},
"city": "That City"
}
},
"2": {
"userID": "2",
"username": "CCC",
"age": "34",
"location": {
"street": {
"name": "Other Street",
"number": "5"
},
"city": "Other City"
}
}
}

I'd add some custom logic to achieve this, note that this is for the first level only, if you want more, you should create a recoursive function:
# Write data to a JSON file...
with open(jsonFilePath, "w") as jsonFile:
for i, v in data.items():
if '/' in i:
parts = i.split('/', 1)
data[parts[0]] = {parts[1]: v}
data.pop(i)
jsonFile.write(json.dumps(data, indent=4))

You can use something like this:
# https://www.geeksforgeeks.org/convert-csv-to-json-using-python/
import csv
import json
# Function to convert a CSV to JSON
# Takes the file paths as arguments
def make_json(csvFilePath, jsonFilePath):
# create a dictionary
data = {}
# Open a csv reader called DictReader
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
# Convert each row into a dictionary
# and add it to data
for rows in csvReader:
# Assuming a column named 'No' to
# be the primary key
key = rows['No']
data[key] = rows
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
# Driver Code
# Decide the two file paths according to your
# computer system
csvFilePath = r'Names.csv'
jsonFilePath = r'Names.json'
# Call the make_json function
make_json(csvFilePath, jsonFilePath)
For more information check out https://www.geeksforgeeks.org/convert-csv-to-json-using-python/

Related

Python TypeError: '_csv.writer' object is not iterable

I am parsing json to csv. But am getting error as below:
for i in data:
TypeError: '_csv.writer' object is not iterable
Code:
import json
import csv
with open("Data.json", 'r') as file:
data = json.load(file)
CSV_File = 'Data.csv'
with open(CSV_File, 'w') as file:
data = csv.writer(file)
data.writerow([])
for i in data:
data.writerow([])
Data
{
"id": "kljhfksdhkhd",
"name": "BOB",
"birthday": "08/03/1993",
"languages": [
{
"id": "106059522759137",
"name": "English language"
},
{
"id": "107617475934611",
"name": "Telugu language"
},
{
"id": "112969428713061",
"name": "Hindi"
},
{
"id": "343306413260",
"name": "Tamil language"
},
{
"id": "100904156616786",
"name": "Kannada"
}
],
"games": {
"data": [
{
"name": "Modern Combat",
"id": "12134323",
"created_time": "2019-02-21T18:39:41+0000"
},
{
"name": "Cards",
"id": "343232",
"created_time": "2011-06-01T11:13:31+0000"
},
{
"name": "Shuttle Badminton",
"id": "43214321",
"created_time": "2011-06-01T11:13:31+0000"
},
{
"name": "Carrom",
"id": "49y497",
"created_time": "2011-06-01T11:13:31+0000"
},
{
"name": "Chess",
"id": "0984080830",
"created_time": "2011-06-01T11:13:31+0000"
}
],
"paging": {
"cursors": {
"before": "dkkskd",
"after": "dlldlkd"
}
}
}
}
First off, the name data has been assigned to two different objects. Python permits this each assignment overwrites the previous. In the code, data is initially the data from the json file, then a csv.writer instance. A sensible improvement, therefore, is to name the writer writer, and change the code accordingly:
import json
import csv
with open("Data.json", 'r') as file:
data = json.load(file)
CSV_File = 'Data.csv'
with open(CSV_File, 'w') as file:
writer = csv.writer(file)
writer.writerow([])
for i in data:
writer.writerow([])
Now let's deal with how we are writing to the file. writer.writerow expects a list, but writing an empty list: writer.writerow([]) isn't very useful. Probably you want to write the json data to the csv file, so leet's get rid of the empty lists, and indent the writing loop so that it's inside the with block (otherwise the file will be closed).
import json
import csv
with open("Data.json", 'r') as file:
data = json.load(file)
CSV_File = 'Data.csv'
with open(CSV_File, 'w') as file:
writer = csv.writer(file)
for row in data:
writer.writerow(row)
This will work if the json data is a list of lists, that is it looks like this:
[[...], [...], [...], ...]
because each element of the outer list is a list, so iterating over it (for row in data:) yields a list, which writer.writerow can handle. However it's not uncommon for json data to be in the form of a dictionary:
{"k1": [....], "k2": [...], "k3": [...], ...}
In this case, you might want to iterate over the dictionary's values, if they are list:
for row in data.values():
writer.writerow(row)
Finally, the json may be an irregular mix of lists and dictionaries, and may be arbitrarily nested. It's up to you to determine how to map nested json data to the flat csv format.

Convert csv to json multi-document?

I have below two requirement using Python:
Convert csv to multi-document json.
Ignore "" or null objects.
Have mentioned both code and csv. Currently I am getting only json array objects but I need to create in multi-document json.
my csv
_id,riderDetails.0.category,riderDetails.0.code,riderDetails.1.category,riderDetails.1.code
1111,re,remg,er,error
2111,we,were,ty,
code
import csv
import json
def make_record(row):
return {
"_id" : row["_id"],
"riderDetails" : [
{
"category" : row["riderDetails.0.category"],
"code" : row["riderDetails.0.code"],
},
{
"category" : row["riderDetails.1.category"] ,
"code" : row["riderDetails.1.code"],
}
]
}
with open('N:/Exide/Mongo/rr22.csv', 'r', newline='') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
with open('N:/Exide/Mongo/mm22.json', 'w') as jsonfile:
out = json.dumps([make_record(row) for row in reader])
jsonfile.write(out)
Code Output
[{
"_id": "1111",
"riderDetails": [
{
"category": "re",
"code": "remg"
},
{
"category": "er",
"code": "error"
}
]
},
{
"_id": "2111",
"riderDetails": [
{
"category": "we",
"code": "were"
},
{
"category": "",
"code": ""
}
]
}]
Expected Output
{
"_id": "1111",
"riderDetails": [
{
"category": "re",
"code": "remg"
},
{
"category": "er",
"code": "error"
}
]
}
{
"_id": "2111",
"riderDetails": [
{
"category": "we",
"code": "were"
}
]
}
Can someone help me in achieving expected output?
The data in the my csv file in your question doesn't produce the output shown, but that's probably due to a minor posting error, so I'll ignore it.
Also note that the file you are producing isn't a strictly valid JSON format file — perhaps that's what you meant by the term "multi-document json"…
Regardless, you accomplish what you need by modifying the make_record() function so it "cleans-up" the record and removes any empty/missing values before it returns it.
This is done in two steps.
First go through from each detail in riderDetails and remove any keys that have empty values.
Lastly, go though each riderDetails again and remove any details that are completely empty (because the first step removed all of it contents or none were provided in the csv file being read).
import csv
import json
csv_inp = 'rr22.csv'
json_outp = 'mm22.json'
def make_record(row):
# Reformat data is row.
record = {
"_id": row["_id"],
"riderDetails": [
{
"category": row["riderDetails.0.category"],
"code": row["riderDetails.0.code"],
},
{
"category": row["riderDetails.1.category"],
"code": row["riderDetails.1.code"],
}
]
}
# Remove empty values from each riderDetail.
record['riderDetails'] = [{key: value for key, value in riderDetail.items() if value}
for riderDetail in record['riderDetails']]
# Remove completely empty riderDetails.
record['riderDetails'] = [riderDetail for riderDetail in record['riderDetails']
if riderDetail]
return record
with open(csv_inp, 'r', newline='') as csvfile, \
open(json_outp, 'w') as jsonfile:
for row in csv.DictReader(csvfile, delimiter=','):
jsonfile.write(json.dumps(make_record(row), indent=4) + '\n')
# jsonfile.write(json.dumps(make_record(row)) + '\n')
using glob
import glob, os
pt = 'N:/Exide/Mongo/*.csv'
for file in glob.glob(pt):
get_name = file.split("/")[-1].replace(".csv",".json")
with open(file , 'r', newline='') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
out = [make_record(row) for row in reader]
saving_path = os.path.join('N:/Exide/Mongo/',get_name)
with open(saving_path , 'w') as jsonfile:
json.dump(out , jsonfile)
you get [{},{}] becuse you writing list of dictionary into file

How to add new dictionary into existed json file with dictionary?

I have a json file saved in local server, such as:
{
"user": "user1",
"id": "21779"
}
and I want to write another dict into this json file, I need new content like this:
{
{
"user": "user1",
"id": "21779"
},
{
"user": "user2",
"id": "21780"
}
}
Or:
[
{
"user": "user1",
"id": "21779"
},
{
"user": "user2",
"id": "21780"
}
]
I try to use json.dump() to add the new element, but is displayed as:
{
"user": "user1",
"id": "21779"
}
{
"user": "user2",
"id": "21780"
}
It is not a valid json file.
How can I do use json.dump, json.load, or other methods?
Thanks for help me!
You have to read your JSON file and then convert it to list instead of dict. Then you just need to append to that list and overwrite your JSON file.
import json
data = json.load(open('data.json'))
# convert data to list if not
if type(data) is dict:
data = [data]
# append new item to data lit
data.append({
"user": "user2",
"id": "21780"
})
# write list to file
with open('data.json', 'w') as outfile:
json.dump(data, outfile)
You can do with the list not with the dict , try the below one solution if its help
import json
def appendList():
with open("test.json", mode='r', encoding='utf-8') as f:
feeds = json.load(f)
print(feeds)
with open("test.json", mode='w', encoding='utf-8') as feedsjson:
entry = { "user": "user3","id": "21574"}
feeds.append(entry)
print(json.dump(feeds, feedsjson))

Json value separate from the convert csv to json

I want to convert the data to a csv file with python json, after my run went well as the results below.
[{"id": "1", "name": "billy", "job": "web-develop"}, {"id": "1", "name": "smith", "job": "programming"}]
but none the less I think, how to separate json results based "," as shown below.
[{"id": "1", "name": "billy", "job": "web-develop"},
{"id": "1", "name": "smith", "job": "programming"}]
This myscript
import csv
import json
import sys
input = str(sys.argv[1])
csvfile = open(input, 'r')
fieldnames = ("id","name","job")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
sys.stdout.write(out)

Store new JSON data to existing file in python

I've been looking around the web and I cannot find a way on adding new JSON data to array.
Example: I would like to add player_two, player_three through python.
{
"players": {
"player_one": {
"name": "Bob",
"age": "0",
"email": "bob#example.com"
}
}
}
How can I achieve doing this through python?
What I've tried:
with open("/var/www/html/api/toons/details.json", 'w') as outfile:
json.dump(avatarDetails, outfile)
Here is a simple example, read the file as a dict, update the dict, then use json.dumps() to get the json data:
import json
# open your jsonfile in read mode
with open('jsonfile') as f:
# read the data as a dict use json.load()
jsondata = json.load(f)
# add a new item into the dict
jsondata['players']['player_two'] = {'email': 'kevin#example.com', 'name': 'Kevin', 'age': '0'}
# open that file in write mode
with open('jsonfile', 'w') as f:
# write the data into that file
json.dump(jsondata, f, indent=4, sort_keys=True)
Now the file looks like:
{
"players": {
"player_one": {
"age": "0",
"email": "bob#example.com",
"name": "Bob"
},
"player_two": {
"age": "0",
"email": "kevin#example.com",
"name": "Kevin"
}
}
}
Assuming that your file contains this JSON:
{
"players": {
"player_one": {
"name": "Bob",
"age": "0",
"email": "bob#example.com"
}
}
}
You can parse the data into a Python dictionary using json.load():
with open('/var/www/html/api/toons/details.json') as f:
data = json.load(f)
Add your new players:
data['players']['player_two'] = dict(name='Bobbie', age=100, email='b#blah.com')
data['players']['player_three'] = dict(name='Robert', age=22, email='robert#blah.com')
Then save it back to a file:
with open('/var/www/html/api/toons/details.json', 'w') as f:
json.dump(data, f)

Categories

Resources