I do have dictionary, with each value as a list.
I want to write individual items to separate JSON files.
For example
data_to_write = {"Names":["name1", "name2", "name3"], "email":["mail1", "mail2", "mail3"]}
Now I want 3 jsons i.e data1.jsob, data2.json, data3.json in the following(approx) format.
data1.json
{
Name: name1,
email: mail1
}
data2.json
{
Name: name2,
email: mail2
}
and so on.
My current approach is
for file_no in range(no_of_files):
for count, (key, info_list) in enumerate(data_to_write.items()):
for info in info_list:
with open(
os.path.join(self.path_to_output_dir, str(file_no)) + ".json",
"a",
) as resume:
json.dump({key: info}, resume)
But this is wrong. Any helps appreciated.
You could use pandas to do the work for you. Read the dictionary into a dataframe, then iterate the rows of the dataframe to produce the json for each row:
import pandas as pd
data_to_write = {"Names":["name1", "name2", "name3"], "email":["mail1", "mail2", "mail3"]}
df = pd.DataFrame(data_to_write).rename(columns={'Names':'Name'})
for i in range(len(df)):
jstr = df.iloc[i].to_json()
with open(f"data{i+1}.json", "w") as f:
f.write(jstr)
Output (each line is in a separate file):
{"Name":"name1","email":"mail1"}
{"Name":"name2","email":"mail2"}
{"Name":"name3","email":"mail3"}
Try:
import json
data_to_write = {
"Names": ["name1", "name2", "name3"],
"email": ["mail1", "mail2", "mail3"],
}
for i, val in enumerate(zip(*data_to_write.values()), 1):
d = dict(zip(data_to_write, val))
with open(f"data{i}.json", "w") as f_out:
json.dump(d, f_out, indent=4)
This writes data(1..3).json with content:
# data1.json
{
"Names": "name1",
"email": "mail1"
}
# data2.json
{
"Names": "name2",
"email": "mail2"
}
...
import json
data_to_write = {
"Names": ["name1", "name2", "name3"],
"email": ["mail1", "mail2", "mail3"],
}
for ind, val in enumerate(zip(*data_to_write.values())):
jsn = dict(zip(data_to_write, val))
print(jsn)
with open("data{}.json".format(ind), "w") as f:
f.write(json.dumps(jsn))
Related
I have excel file in the format :
Name
Question
Answer
N1
Q1
a1
N2
Q2
a2
N3
Q3
a3
N4
Q4
a4
N3
Q5
a3
Here some name are same and their correspondings answers are also same. I want to convert this into json in the format where all the columns with same name are merged.
{
{
"name":"N1",
"exampleSentences": ["Q1"],
"defaultReply": {
"text": ["a1"],
"type": "text"
}
},
{
"name":"N2",
"exampleSentences": ["Q2"],
"defaultReply": {
"text": ["a2"],
"type": "text"
}
},
{
"name":"N3",
"exampleSentences": ["Q3","Q5"],
"defaultReply": {
"text": ["a3"],
"type": "text"
}
},
{
"name":"N4",
"exampleSentences": ["Q4"],
"defaultReply": {
"text": ["a4"],
"type": "text"
}
},
}
Here is the code that I wrote:
# Import the required python modules
import pandas as pd
import math
import json
import csv
# Define the name of the Excel file
fileName = "FAQ_eng"
# Read the Excel file
df = pd.read_excel("{}.xlsx".format(fileName))
intents = []
intentNames = df["Name"]
# Loop through the list of Names and create a new intent for each row
for index, name in enumerate(intentNames):
if name is not None:
exampleSentences = []
defaultReplies = []
if df["Question"][index] is not None and df["Question"][index] is not float:
try:
exampleSentences = df["Question"][index]
exampleSentences = [exampleSentences]
defaultReplies = df["Answer"][index]
defaultReplies = [defaultReplies]
except:
continue
intents.append({
"name": name,
"exampleSentences": exampleSentences,
"defaultReply": {
"text": defaultReplies,
"type": "text"
}
})
# Write the list of created intents into a JSON file
with open("{}.json".format(fileName), "w", encoding="utf-8") as outputFile:
json.dump(intents, outputFile, ensure_ascii=False)
My code adds another json data
{
"name":"N3",
"exampleSentences": ["Q5"],
"defaultReply": {
"text": ["a3"],
"type": "text"
}
instead of merging Q3 and Q5. What should I do?
The problem in your code is you are iterating through a set of items and at every iteration you should check the previous items to see if your current element is already present. You can avoid this problem if you use an initially empty dictionary d storing key, value pairs in the form d[name] = {"exampleSentences": [question], "text": [answer]}. You can iterate so over df["Name"] like below:
intentNames = df["Name"]
d = {}
# Loop through intentNames and create the dictionary
for index, name in enumerate(intentNames):
question = df["Question"][index]
answer = df["Answer"][index]
if name not in d:
d[name] = {"exampleSentences": [question], "text": [answer]}
else:
d[name]["exampleSentences"].append(question)
Then you can use the created dictionary to create the json file with the expected output like below:
intentNames = df["Name"]
d = {}
# Loop through intentNames and create the dictionary
for index, name in enumerate(intentNames):
question = df["Question"][index]
answer = df["Answer"][index]
if name not in d:
d[name] = {"exampleSentences": [question], "text": [answer]}
else:
d[name]["exampleSentences"].append(question)
#create the json array file
intents = []
for k, v in d.items():
intents.append({
"name": k,
"exampleSentences": v['exampleSentences'],
"defaultReply": {
"text": v['text'],
"type": "text"
}
})
# Write the list of created intents into a JSON file
with open("{}.json".format(fileName), "w", encoding="utf-8") as outputFile:
json.dump(intents, outputFile, ensure_ascii=False)
I have text file and I want to convert it to JSON:
red|2022-09-29|03:15:00|info 1
blue|2022-09-29|10:50:00|
yellow|2022-09-29|07:15:00|info 2
so i type a script to convert this file into JSON:
import json
filename = 'input_file.txt'
dict1 = {}
fields =['name', 'date', 'time', 'info']
with open(filename) as fh:
l = 1
for line in fh:
description = list( line.strip().split("|", 4))
print(description)
sno ='name'+str(l)
i = 0
dict2 = {}
while i<len(fields):
dict2[fields[i]]= description[i]
i = i + 1
dict1[sno]= dict2
l = l + 1
out_file = open("json_file.json", "w")
json.dump(dict1, out_file, indent = 4)
out_file.close()
and output looks like this:
{
"name1": {
"name": "red",
"date": "2022-09-29",
"time": "03:15:00",
"info": "info 1"
},
"name2": {
"name": "blue",
"date": "2022-09-29",
"time": "10:50:00",
"info": ""
},
"name3": {
"name": "yellow",
"date": "2022-09-29",
"time": "07:15:00",
"info": "info 2"
}
}
As you can see I do so, but now I want to change looks of this JSON file. How can I change it to make my output looks like this:
to look like this:
[
{"name":"red", "date": "2022-09-29", "time": "03:15:00", "info":"info 1"},
{"name":"blue", "date": "2022-09-29", "time": "10:50:00", "info":""},
{"name":"yellow", "date": "2022-09-29", "time": "07:15:00", "info":"info 2"}
]
If you see your required json output, it is a list and not a dict like you have right now. So using a list(data) instead of dict(dict1) should give the correct output.
Following updated code should generate the json data in required format -
import json
filename = 'input_file.txt'
data = []
fields =['name', 'date', 'time', 'info']
with open(filename) as fh:
l = 1
for line in fh:
description = list( line.strip().split("|", 4))
print(description)
sno ='name'+str(l)
i = 0
dict2 = {}
while i<len(fields):
dict2[fields[i]]= description[i]
i = i + 1
data.append(dict2)
l = l + 1
out_file = open("json_file.json", "w")
json.dump(data, out_file, indent = 4)
out_file.close()
I would use pandas, it allows you to solve your problem in one statement and avoid reinventing a wheel:
import pandas as pd
pd.read_table("input_file.txt", sep="|", header=None,
names=["name", "date" , "time", "info"]).fillna("")\
.to_json("json_file.json", orient="records")
I am completely new to python and trying to covert nested json files to csv. The current code I am trying to use is:
import json
def read_json(filename: str) -> dict:
try:
with open(filename, "r") as f:
data = json.loads(f.read())
except:
raise Exception(f"Reading {filename} file encountered an error")
return data
def normalize_json(data: dict) -> dict:
new_data = dict()
for key, value in data.items():
if not isinstance(value, dict):
new_data[key] = value
else:
for k, v in value.items():
new_data[key + "_" + k] = v
return new_data
def generate_csv_data(data: dict) -> str:
# Defining CSV columns in a list to maintain
# the order
csv_columns = data.keys()
# Generate the first row of CSV
csv_data = ",".join(csv_columns) + "\n"
# Generate the single record present
new_row = list()
for col in csv_columns:
new_row.append(str(data[col]))
# Concatenate the record with the column information
# in CSV format
csv_data += ",".join(new_row) + "\n"
return csv_data
def write_to_file(data: str, filepath: str) -> bool:
try:
with open(filepath, "w+") as f:
f.write(data)
except:
raise Exception(f"Saving data to {filepath} encountered an error")
def main():
# Read the JSON file as python dictionary
data = read_json(filename="test2.json")
# Normalize the nested python dict
new_data = normalize_json(data=data)
# Pretty print the new dict object
print("New dict:", new_data)
# Generate the desired CSV data
csv_data = generate_csv_data(data=new_data)
# Save the generated CSV data to a CSV file
write_to_file(data=csv_data, filepath=data2.csv")
if __name__ == '__main__':
main()
It works partly: I get a CSV file that contains all values. However, for the nested key fields it only gives me the "highest" level (e.g. I get "currentEmployments" but not "currentEmployments_firmId").
Could someone help me with this?
Sample json file:
{
"basicInformation": {
"individualId": 10000,
"firstName": "Name",
"middleName": "middleName.",
"lastName": "lastName",
"bcScope": "Active",
"iaScope": "NotInScope",
"daysInIndustryCalculatedDate": "1/1/2000"
},
"currentEmployments": [
{
"firmId": 001,
"firmName": "firm1",
"iaOnly": "N",
"registrationBeginDate": "1/1/2005",
"firmBCScope": "ACTIVE",
"firmIAScope": "ACTIVE",
"iaSECNumber": "10000",
"iaSECNumberType": "100",
"bdSECNumber": "1000",
"branchOfficeLocations": [
{
"locatedAtFlag": "Y",
"supervisedFromFlag": "N",
"privateResidenceFlag": "N",
"branchOfficeId": "10000",
"street1": "street1",
"city": "city",
"state": "MD",
"country": "United States",
"zipCode": "10000"
}
]
}
],
"currentIAEmployments": [],
"previousEmployments": [
{
"iaOnly": "N",
"bdSECNumber": "20000",
"firmId": 200,
"firmName": "firm2",
"street1": "street",
"city": "city",
"state": "MD",
"country": "UNITED STATES",
"zipCode": "10000",
}
],
"examsCount": {
"stateExamCount": 0,
"principalExamCount": 0,
"productExamCount": 1
},
}
I am scraping some information off the web and want to show write the information into a JSON file with this format:
[
{
"name" : "name1",
"value" : 1
},
{
"name" : "name2",
"value" : 2
},
{
"name" : "name3",
"value" : 3
},
{
"name" : "name4",
"value" : 4
},
{
"name" : "name5",
"value" : 5
}
]
I am looping through everything I am scraping but don't know how to convert that information to this format. I tried to create a dictionary and then add to it after every loop but it does not give me the output I want.
dictionary = None
name = None
value = None
for item in someList:
name = item.name
value = item.value
dictionary[""] = {"name": name, "value": value}
with open("data.json", "w") as file:
json.dump(dictionary, file, indent=4)
Try this:
import json
myList = [{"name": item.name, "value": item.value} for item in someList]
with open("data.json", "w") as file:
json.dump(myList, file, indent=4)
The answer was simpler than I thought. I just needed to make a list of dictionaries and use that list in the json.dumps() function. Like this:
myList = list()
name = None
value = None
for item in someList:
name = item.name
value = item.value
myList.append({"name": name, "value": value})
with open("data.json", "w") as file:
json.dump(myList, file, indent=4)
The format you show is a list not a dictionary. So you can make a list and append to it the different dictionaries.
arr = []
for item in someList:
dictionary.append({"name": item.name, "value": item.value})
with open("data.json", "w") as file:
json.dump(array, file, indent=4)
I have written a code to convert csv file to nested json format. I have multiple columns to be nested hence assigning separately for each column. The problem is I'm getting 2 fields for the same column in the json output.
import csv
import json
from collections import OrderedDict
csv_file = 'data.csv'
json_file = csv_file + '.json'
def main(input_file):
csv_rows = []
with open(input_file, 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter='|')
for row in reader:
row['TYPE'] = 'REVIEW', # adding new key, value
row['RAWID'] = 1,
row['CUSTOMER'] = {
"ID": row['CUSTOMER_ID'],
"NAME": row['CUSTOMER_NAME']
}
row['CATEGORY'] = {
"ID": row['CATEGORY_ID'],
"NAME": row['CATEGORY']
}
del (row["CUSTOMER_NAME"], row["CATEGORY_ID"],
row["CATEGORY"], row["CUSTOMER_ID"]) # deleting since fields coccuring twice
csv_rows.append(row)
with open(json_file, 'w') as f:
json.dump(csv_rows, f, sort_keys=True, indent=4, ensure_ascii=False)
f.write('\n')
The output is as below:
[
{
"CATEGORY": {
"ID": "1",
"NAME": "Consumers"
},
"CATEGORY_ID": "1",
"CUSTOMER_ID": "41",
"CUSTOMER": {
"ID": "41",
"NAME": "SA Port"
},
"CUSTOMER_NAME": "SA Port",
"RAWID": [
1
]
}
]
I'm getting 2 entries for the fields I have assigned using row[''].
Is there any other way to get rid of this? I want only one entry for a particular field in each record.
Also how can I convert the keys to lower case after reading from csv.DictReader(). In my csv file all the columns are in upper case and hence I'm using the same to assign. But I want to convert all of them to lower case.
In order to convert the keys to lower case, it would be simpler to generate a new dict per row. BTW, it should be enough to get rid of the duplicate fields:
for row in reader:
orow = collection.OrderedDict()
orow['type'] = 'REVIEW', # adding new key, value
orow['rawid'] = 1,
orow['customer'] = {
"id": row['CUSTOMER_ID'],
"name": row['CUSTOMER_NAME']
}
orow['category'] = {
"id": row['CATEGORY_ID'],
"name": row['CATEGORY']
}
csv_rows.append(orow)