convert csv file to multiple nested json format - python

I have written a code to convert csv file to nested json format. I have multiple columns to be nested hence assigning separately for each column. The problem is I'm getting 2 fields for the same column in the json output.
import csv
import json
from collections import OrderedDict
csv_file = 'data.csv'
json_file = csv_file + '.json'
def main(input_file):
csv_rows = []
with open(input_file, 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter='|')
for row in reader:
row['TYPE'] = 'REVIEW', # adding new key, value
row['RAWID'] = 1,
row['CUSTOMER'] = {
"ID": row['CUSTOMER_ID'],
"NAME": row['CUSTOMER_NAME']
}
row['CATEGORY'] = {
"ID": row['CATEGORY_ID'],
"NAME": row['CATEGORY']
}
del (row["CUSTOMER_NAME"], row["CATEGORY_ID"],
row["CATEGORY"], row["CUSTOMER_ID"]) # deleting since fields coccuring twice
csv_rows.append(row)
with open(json_file, 'w') as f:
json.dump(csv_rows, f, sort_keys=True, indent=4, ensure_ascii=False)
f.write('\n')
The output is as below:
[
{
"CATEGORY": {
"ID": "1",
"NAME": "Consumers"
},
"CATEGORY_ID": "1",
"CUSTOMER_ID": "41",
"CUSTOMER": {
"ID": "41",
"NAME": "SA Port"
},
"CUSTOMER_NAME": "SA Port",
"RAWID": [
1
]
}
]
I'm getting 2 entries for the fields I have assigned using row[''].
Is there any other way to get rid of this? I want only one entry for a particular field in each record.
Also how can I convert the keys to lower case after reading from csv.DictReader(). In my csv file all the columns are in upper case and hence I'm using the same to assign. But I want to convert all of them to lower case.

In order to convert the keys to lower case, it would be simpler to generate a new dict per row. BTW, it should be enough to get rid of the duplicate fields:
for row in reader:
orow = collection.OrderedDict()
orow['type'] = 'REVIEW', # adding new key, value
orow['rawid'] = 1,
orow['customer'] = {
"id": row['CUSTOMER_ID'],
"name": row['CUSTOMER_NAME']
}
orow['category'] = {
"id": row['CATEGORY_ID'],
"name": row['CATEGORY']
}
csv_rows.append(orow)

Related

How can i convert CSV in JSON like I want

Hello I show you my problem's :
I right that for convert my csv in Json. But the résult is not exactly what I Want .
main.py
import csv
filename ="forcebrute.csv"
# opening the file using "with"
# statement
with open(filename, 'r') as data:
for line in csv.DictReader(data):
print(line)
csv
name;price;profit
Action-1;20;5
Action-2;30;10
Action-3;50;15
Action-4;70;20
Action-5;60;17
result i have:
{'name;price;profit': 'Action-1;20;5'}
{'name;price;profit': 'Action-2;30;10'}
{'name;price;profit': 'Action-3;50;15'}
{'name;price;profit': 'Action-4;70;20'}
{'name;price;profit': 'Action-5;60;17'}
And I would like this result:
You will need to specify the column delimiter then you can use json.dumps() to give you the required output format
import csv
import json
with open('forcebrute.csv') as data:
print(json.dumps([d for d in csv.DictReader(data, delimiter=';')], indent=2))
Output:
[
{
"name": "Action-1",
"price": "20",
"profit": "5"
},
{
"name": "Action-2",
"price": "30",
"profit": "10"
},
{
"name": "Action-3",
"price": "50",
"profit": "15"
},
{
"name": "Action-4",
"price": "70",
"profit": "20"
},
{
"name": "Action-5",
"price": "60",
"profit": "17"
}
]
You will need to use Dictreader from the csv library to read the contents of the CSV file and then convert the contents to a list before using json.dumps to turn the data into JSON.
import csv
import json
filename ="forcebrute.csv"
# Open the CSV file and read the contents into a list of dictionaries
with open(filename, 'r') as f:
reader = csv.DictReader(f, delimiter=';')
csv_data = list(reader)
# Convert the data to a JSON string and print it to the console
json_data = json.dumps(csv_data)
print(json_data)
An easy approach would be using pandas, also quite fast with large csv files. It might need some tweaking but you get the point.
import pandas as pd
import json
df = pd.read_csv(filename, sep = ';')
data = json.dumps(df.to_dict('records'))

Writing a List of Dictionaries to seperate JSONs

I do have dictionary, with each value as a list.
I want to write individual items to separate JSON files.
For example
data_to_write = {"Names":["name1", "name2", "name3"], "email":["mail1", "mail2", "mail3"]}
Now I want 3 jsons i.e data1.jsob, data2.json, data3.json in the following(approx) format.
data1.json
{
Name: name1,
email: mail1
}
data2.json
{
Name: name2,
email: mail2
}
and so on.
My current approach is
for file_no in range(no_of_files):
for count, (key, info_list) in enumerate(data_to_write.items()):
for info in info_list:
with open(
os.path.join(self.path_to_output_dir, str(file_no)) + ".json",
"a",
) as resume:
json.dump({key: info}, resume)
But this is wrong. Any helps appreciated.
You could use pandas to do the work for you. Read the dictionary into a dataframe, then iterate the rows of the dataframe to produce the json for each row:
import pandas as pd
data_to_write = {"Names":["name1", "name2", "name3"], "email":["mail1", "mail2", "mail3"]}
df = pd.DataFrame(data_to_write).rename(columns={'Names':'Name'})
for i in range(len(df)):
jstr = df.iloc[i].to_json()
with open(f"data{i+1}.json", "w") as f:
f.write(jstr)
Output (each line is in a separate file):
{"Name":"name1","email":"mail1"}
{"Name":"name2","email":"mail2"}
{"Name":"name3","email":"mail3"}
Try:
import json
data_to_write = {
"Names": ["name1", "name2", "name3"],
"email": ["mail1", "mail2", "mail3"],
}
for i, val in enumerate(zip(*data_to_write.values()), 1):
d = dict(zip(data_to_write, val))
with open(f"data{i}.json", "w") as f_out:
json.dump(d, f_out, indent=4)
This writes data(1..3).json with content:
# data1.json
{
"Names": "name1",
"email": "mail1"
}
# data2.json
{
"Names": "name2",
"email": "mail2"
}
...
import json
data_to_write = {
"Names": ["name1", "name2", "name3"],
"email": ["mail1", "mail2", "mail3"],
}
for ind, val in enumerate(zip(*data_to_write.values())):
jsn = dict(zip(data_to_write, val))
print(jsn)
with open("data{}.json".format(ind), "w") as f:
f.write(json.dumps(jsn))

convert a CSV file to JSON file

I am trying to convert CSV file to JSON file based on a column value. The csv file looks somewhat like this.
ID Name Age
CSE001 John 18
CSE002 Marie 20
ECE001 Josh 22
ECE002 Peter 23
currently I am using the following code to obtain json file.
import csv
import json
def csv_to_json(csv_file_path, json_file_path):
data_dict = {}
with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
csv_reader = csv.DictReader(csv_file_handler)
for rows in csv_reader:
key = rows['ID']
data_dict[key] = rows
with open(json_file_path, 'w', encoding = 'utf-8') as json_file_handler:
json_file_handler.write(json.dumps(data_dict, indent = 4))
OUTPUT:
**{
"CSE001":{
"ID":"CSE001",
"Name":"John",
"Age":18
}
"CSE002":{
"ID":"CSE002",
"Name":"Marie",
"Age":20
}
"ECE001":{
"ID":"ECE001",
"Name":"Josh",
"Age":22
}
"ECE002":{
"ID":"ECE002",
"Name":"Peter",
"Age":23
}
}**
I want my output to generate two separate json files for CSE and ECE based on the ID value. Is there a way to achieve this output.
Required Output:
CSE.json:
{
"CSE001":{
"ID":"CSE001",
"Name":"John",
"Age":18
}
"CSE002":{
"ID":"CSE002",
"Name":"Marie",
"Age":20
}
}
ECE.json:
{
"ECE001":{
"ID":"ECE001",
"Name":"Josh",
"Age":22
}
"ECE002":{
"ID":"ECE002",
"Name":"Peter",
"Age":23
}
}
I would suggest you to use pandas, that way will be more easier.
Code may look like:
import pandas as pd
def csv_to_json(csv_file_path):
df = pd.read_csv(csv_file_path)
df_CSE = df[df['ID'].str.contains('CSE')]
df_ECE = df[df['ID'].str.contains('ECE')]
df_CSE.to_json('CSE.json')
df_ECE.to_json('ESE.json')
You can create dataframe and then do the following operation
import pandas as pd
df = pd.DataFrame.from_dict({
"CSE001":{
"ID":"CSE001",
"Name":"John",
"Age":18
},
"CSE002":{
"ID":"CSE002",
"Name":"Marie",
"Age":20
},
"ECE001":{
"ID":"ECE001",
"Name":"Josh",
"Age":22
},
"ECE002":{
"ID":"ECE002",
"Name":"Peter",
"Age":23
}
},orient='index')
df["id_"] = df["ID"].str[0:2] # temp column for storing first two chars
grps = df.groupby("id_")[["ID", "Name", "Age"]]
for k, v in grps:
print(v.to_json(orient="index")) # you can create json file as well
You could store each row into two level dictionary with the top level being the first 3 characters of the ID.
These could then be written out into separate files with the key being part of the filename:
from collections import defaultdict
import csv
import json
def csv_to_json(csv_file_path, json_base_path):
data_dict = defaultdict(dict)
with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
csv_reader = csv.DictReader(csv_file_handler)
for row in csv_reader:
key = row['ID'][:3]
data_dict[key][row['ID']] = row
for key, values in data_dict.items():
with open(f'{json_base_path}_{key}.json', 'w', encoding='utf-8') as json_file_handler:
json_file_handler.write(json.dumps(values, indent = 4))
csv_to_json('input.csv', 'output')
The defaultdict is used to avoid needing to first test if a key is already present before using it.
This would create output_CSE.json and output_ECE.json, e.g.
{
"ECE001": {
"ID": "ECE001",
"Name": "Josh",
"Age": "22"
},
"ECE002": {
"ID": "ECE002",
"Name": "Peter",
"Age": "23"
}
}

Getting value from a JSON file based on condition

In python I'm trying to get the value(s) of the key "relativePaths" from a JSON element if that element contains the value "concept" for the key "tags". The JSON file has the following format.
]
},
{
"fileName": "#Weizman.2011",
"relativePath": "Text/#Weizman.2011.md",
"tags": [
"text",
"concept"
],
"frontmatter": {
"authors": "Weizman",
"year": 2011,
"position": {
"start": {
"line": 0,
"col": 0,
"offset": 0
},
"end": {
"line": 4,
"col": 3,
"offset": 120
}
}
},
"aliases": [
"The least of all possible evils - humanitarian violence from Arendt to Gaza"
],
I have tried the following codes:
import json
with open("/Users/metadata.json") as jsonFile:
data = json.load(jsonFile)
for s in range(len(data)):
if 'tags' in s in range(len(data)):
if data[s]["tags"] == "concept":
files = data[s]["relativePaths"]
print(files)
Which results in the error message:
TypeError: argument of type 'int' is not iterable
I then tried:
with open("/Users/metadata.json") as jsonFile:
data = json.load(jsonFile)
for s in str(data):
if 'tags' in s in str(data):
print(s["relativePaths"])
That code seems to work. But I don't get any output from the print command. What am I doing wrong?
Assuming your json is a list of the type you put on your question, you can get those values like this:
with open("/Users/metadata.json") as jsonFile:
data = json.load(jsonFile)
for item in data: # Assumes the first level of the json is a list
if ('tags' in item) and ('concept' in item['tags']): # Assumes that not all items have a 'tags' entry
print(item['relativePaths']) # Will trigger an error if relativePaths is not in the dictionary
Figured it
import json
f = open("/Users/metadata.json")
# returns JSON object as
# a dictionary
data = json.load(f)
# Iterating through the json
# list
for i in data:
if "tags" in i:
if "concept" in i["tags"]:
print(i["relativePaths"])
# Closing file
f.close()
I think this will do what you want. It is more "pythonic" because it doesn't use numerical indices to access elements of the list — making it easier to write and read).
import json
with open("metadata.json") as jsonFile:
data = json.load(jsonFile)
for elem in data:
if 'tags' in elem and 'concept' in elem['tags']:
files = elem["relativePath"]
print(files)

How to get every value of a key of a JSON file with multiple dicts?

How to get every value of a key of a JSON file with multiple dicts? I want to extract every value of "username" key.
data.json
{
"1476439722046238725": {
"tweet_id": "1476439722046238725",
"username": "elonmusk",
},
"1476437555717541893": {
"tweet_id": "1476437555717541893",
"username": "billgate",
},
"1476437555717541893": {
"tweet_id": "1476437555717541893",
"username": "jeffbezos",
This is what my code so far but it gave me this error KeyError: 'username'.
main.py
import json
with open("data.json", "r") as f:
data = json.load(f)
print(data["username"])
You need to enumerate through the outer dictionary.
import json
with open("data.json", "r") as f:
data = json.load(f)
for val in data.values():
print( val['username'] )

Categories

Resources