Currently, I have a CSV file with the following example --
File
skill
experience
overall_experience
1
Java
1.5
3
1
Python
1.0
3
1
SQL
0.5
3
There are multiple entries for many such files but I need to merge the skills and their respective experience into a single value belonging to a single key, something like this -
{
"1": {
"file": "1",
"skill": ["Java", "Python", "SQL"],
"experience": [1.5, 1.0, 0.5]
"Overall_exp": 3.0
},}
I tried a Python Code for this but it is giving me only the value of last skill and last experience (and not the whole thing in a list)
Here is the code I was using --
import csv
import json
# Function to convert a CSV to JSON
# Takes the file paths as arguments
def make_json(csvFilePath, jsonFilePath):
# create a dictionary
data = {}
# Open a csv reader called DictReader
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
# Convert each row into a dictionary
# and add it to data
for rows in csvReader:
# Assuming a column named 'file' to
# be the primary key
key = rows['file']
data[key] = rows
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
# Decide the two file paths according to your
# computer system
csvFilePath = 'skill_matrix.csv'
jsonFilePath = 'skill_matrix.json'
# Call the make_json function
make_json(csvFilePath, jsonFilePath)
The output that I get here is this --
{
"1": {
"file": "1",
"skill": "SQL",
"experience": "0.5"
"Overall_exp": "3.0"
},}
How can I convert it to the former json format and not the latter?
You can use pandas to read your csv, group by File and export to json:
df = pd.read_csv(your_csv)
df = df.groupby('File', as_index=False).agg({'skill': list, 'experience': list, 'overall_experience': np.mean})
print(df.to_json(orient='index', indent=4))
Note: you can specify the aggregation functions for your columns in a dictionary
Output:
{
"0":{
"File":1,
"skill":[
"Java",
"Python",
"SQL"
],
"experience":[
1.5,
1.0,
0.5
],
"overall_experience":3.0
}
}
I think that loading into Pandas first and then going from all data to the narrowing strategy is cleaner and easier. You can use the following code for parsing your data into JSON files;
import pandas as pd
import json
# Load the CSV into Pandas
df = pd.read_csv('1.csv', header=0)
data = df.to_dict(orient='list')
# Delete / change as you wish
data['File'] = str(data['File'][0])
data['overall_experience'] = data['overall_experience'][0]
# Save as json
with open('1.json', 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
Result (1.json)
{
"File": "1",
"skill": [
"Java",
"Python",
"SQL"
],
"experience": [
1.5,
1.0,
0.5
],
"overall_experience": 3
}
I suppose hat you have multiple file id in a CSV file. Your given example is too minimalistic. Anyhow, then you can create a master dictionary and add your smaller ones as follows;
import pandas as pd
import json
# Load the CSV into Pandas
df = pd.read_csv('1.csv', header=0)
# Master dictionary
master_dict = {}
for idx, file_id in enumerate(df["File"].unique()):
data = df[df['File'] == file_id].to_dict(orient='list')
# Delete / change as you wish
data['File'] = str(data['File'][0])
data['overall_experience'] = data['overall_experience'][0]
master_dict[idx] = data
# Save as json
with open('1.json', 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(master_dict, indent=4))
Result (1.json)
{
"0": {
"File": "1",
"skill": [
"Java",
"Python",
"SQL"
],
"experience": [
1.5,
1.0,
0.5
],
"overall_experience": 3
},
"1": {
"File": "2",
"skill": [
"Java",
"Python"
],
"experience": [
2.0,
2.5
],
"overall_experience": 1
}
}
If you don't want to use Pandas, you could try:
import csv
import json
def make_json(csvfile_path, jsonfile_path):
data = {}
with open(csvfile_path, "r") as csvfile:
next(csvfile) # Skip header line
for row in csv.reader(csvfile):
fdata = data.setdefault(row[0], {"file": row[0]})
fdata.setdefault("skill", []).append(row[1])
fdata.setdefault("experience", []).append(float(row[2]))
fdata.setdefault("overall_experience", []).append(float(row[3]))
with open(jsonfile_path, "w") as jsonfile:
json.dump(data, jsonfile)
The main difference to your approach is the explicit structuring of the inner dicts: values are lists (except for the 'file' key). The dict.setdefault() is great here: You can set a value for a key if it isn't in the dict, and get the value back (either the newly set one or the existing). So you can put a list in the dict, get it back, and can immediately .append() to it.
If you want to use a csv.DictReader:
def make_json(csvfile_path, jsonfile_path):
data = {}
with open(csvfile_path, "r") as csvfile:
for row in csv.DictReader(csvfile):
fdata = data.setdefault(row["file"], {"file": row["file"]})
for key, value in list(row.items())[1:]:
fdata.setdefault(key, []).append(
value if key == "skill" else float(value)
)
with open(jsonfile_path, "w") as jsonfile:
json.dump(data, jsonfile)
(I haven't, since I wasn't sure about the actual column names.)
Related
I am trying to convert CSV file to JSON file based on a column value. The csv file looks somewhat like this.
ID Name Age
CSE001 John 18
CSE002 Marie 20
ECE001 Josh 22
ECE002 Peter 23
currently I am using the following code to obtain json file.
import csv
import json
def csv_to_json(csv_file_path, json_file_path):
data_dict = {}
with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
csv_reader = csv.DictReader(csv_file_handler)
for rows in csv_reader:
key = rows['ID']
data_dict[key] = rows
with open(json_file_path, 'w', encoding = 'utf-8') as json_file_handler:
json_file_handler.write(json.dumps(data_dict, indent = 4))
OUTPUT:
**{
"CSE001":{
"ID":"CSE001",
"Name":"John",
"Age":18
}
"CSE002":{
"ID":"CSE002",
"Name":"Marie",
"Age":20
}
"ECE001":{
"ID":"ECE001",
"Name":"Josh",
"Age":22
}
"ECE002":{
"ID":"ECE002",
"Name":"Peter",
"Age":23
}
}**
I want my output to generate two separate json files for CSE and ECE based on the ID value. Is there a way to achieve this output.
Required Output:
CSE.json:
{
"CSE001":{
"ID":"CSE001",
"Name":"John",
"Age":18
}
"CSE002":{
"ID":"CSE002",
"Name":"Marie",
"Age":20
}
}
ECE.json:
{
"ECE001":{
"ID":"ECE001",
"Name":"Josh",
"Age":22
}
"ECE002":{
"ID":"ECE002",
"Name":"Peter",
"Age":23
}
}
I would suggest you to use pandas, that way will be more easier.
Code may look like:
import pandas as pd
def csv_to_json(csv_file_path):
df = pd.read_csv(csv_file_path)
df_CSE = df[df['ID'].str.contains('CSE')]
df_ECE = df[df['ID'].str.contains('ECE')]
df_CSE.to_json('CSE.json')
df_ECE.to_json('ESE.json')
You can create dataframe and then do the following operation
import pandas as pd
df = pd.DataFrame.from_dict({
"CSE001":{
"ID":"CSE001",
"Name":"John",
"Age":18
},
"CSE002":{
"ID":"CSE002",
"Name":"Marie",
"Age":20
},
"ECE001":{
"ID":"ECE001",
"Name":"Josh",
"Age":22
},
"ECE002":{
"ID":"ECE002",
"Name":"Peter",
"Age":23
}
},orient='index')
df["id_"] = df["ID"].str[0:2] # temp column for storing first two chars
grps = df.groupby("id_")[["ID", "Name", "Age"]]
for k, v in grps:
print(v.to_json(orient="index")) # you can create json file as well
You could store each row into two level dictionary with the top level being the first 3 characters of the ID.
These could then be written out into separate files with the key being part of the filename:
from collections import defaultdict
import csv
import json
def csv_to_json(csv_file_path, json_base_path):
data_dict = defaultdict(dict)
with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
csv_reader = csv.DictReader(csv_file_handler)
for row in csv_reader:
key = row['ID'][:3]
data_dict[key][row['ID']] = row
for key, values in data_dict.items():
with open(f'{json_base_path}_{key}.json', 'w', encoding='utf-8') as json_file_handler:
json_file_handler.write(json.dumps(values, indent = 4))
csv_to_json('input.csv', 'output')
The defaultdict is used to avoid needing to first test if a key is already present before using it.
This would create output_CSE.json and output_ECE.json, e.g.
{
"ECE001": {
"ID": "ECE001",
"Name": "Josh",
"Age": "22"
},
"ECE002": {
"ID": "ECE002",
"Name": "Peter",
"Age": "23"
}
}
I am trying to delete an element in a json file,
here is my json file:
before:
{
"names": [
{
"PrevStreak": false,
"Streak": 0,
"name": "Brody B#3719",
"points": 0
},
{
"PrevStreak": false,
"Streak": 0,
"name": "XY_MAGIC#1111",
"points": 0
}
]
}
after running script:
{
"names": [
{
"PrevStreak": false,
"Streak": 0,
"name": "Brody B#3719",
"points": 0
}
]
}
how would I do this in python? the file is stored locally and I am deciding which element to delete by the name in each element
Thanks
I would load the file, remove the item, and then save it again. Example:
import json
with open("filename.json") as f:
data = json.load(f)
f.pop(data["names"][1]) # or iterate through entries to find matching name
with open("filename.json", "w") as f:
json.dump(data, f)
You will have to read the file, convert it to python native data type (e.g. dictionary), then delete the element and save the file. In your case something like this could work:
import json
filepath = 'data.json'
with open(filepath, 'r') as fp:
data = json.load(fp)
del data['names'][1]
with open(filepath, 'w') as fp:
json.dump(data, fp)
Try this:
# importing the module
import ast
# reading the data from the file
with open('dictionary.txt') as f:
data = f.read()
print("Data type before reconstruction : ", type(data))
# reconstructing the data as a dictionary
a_dict = ast.literal_eval(data)
{"names":[a for a in a_dict["names"] if a.get("name") !="XY_MAGIC#1111"]}
import json
with open("test.json",'r') as f:
data = json.loads(f.read())
names=data.get('names')
for idx,name in enumerate(names):
if name['name']=='XY_MAGIC#1111':
del names[idx]
break
print(names)
In order to read the file best approach would be using the with statement after which you could just use pythons json library and convert json string to python dict. once you get dict you can access the values and do your operations as required. you could convert it as json using json.dumps() then save it
This does the right thing useing the python json module, and prettyprints the json back to the file afterwards:
import json
jsonpath = '/path/to/json/file.json'
with open(jsonpath) as file:
j = json.loads(file.read())
names_to_remove = ['XY_MAGIC#1111']
for element in j['names']:
if element['name'] in names_to_remove:
j['names'].remove(element)
with open(jsonpath, 'w') as file:
file.write(json.dumps(j, indent=4))
I have two files: json and text files.
I would like to replace one of the dictionary value with the value which in the text file.
Let us say, in the text file file.text, I have the following lists. [11, 15, 10].
In the json file, I have the following dictionary.
"aa": {
"bb": [
"25",
"40",
"05"
],
"cc": [
"20"
]
}
I would like to overwrite the cc value with the text file above.
file.json
"aa": {
"bb": [
"25",
"40",
"05"
],
"cc": [
"11", "15", "10"
]
}
I have tried something in Python.
def replace(text_file, json_file):
tex_file_path = 'C:/Documents/file.txt'
with open(os.path.join(tex_file_path, text_file), 'r') as f:
read_text= f.read()
json_file_path = 'C:/Documents/file.json'
with open(os.path.join(json_file_path, json_file), 'r') as f:
read_json = json.load(f)
text_to_be_replaced = read_json.get('aa')
for value in text_to_be_replaced.items():
for element in value:
# statment
I was wondering if someone can really help with this.
Although you've named it .text, the contents of the file appear to be JSON, so you can use json.load() as well. Then convert the integers in the list to strings and insert it into the desired place in the JSON file.
There's no need to loop over the dictionary items. Just address the specific element you want to replace.
def replace(text_file, json_file):
tex_file_path = 'C:/Documents'
with open(os.path.join(tex_file_path, text_file), 'r') as f:
read_text= json.load(f)
read_text = list(map(str, read_text))
json_file_path = 'C:/Documents'
with open(os.path.join(json_file_path, json_file), 'r') as f:
read_json = json.load(f)
read_json["aa"]["cc"] = read_text
with open(os.path.join(json_file_path, json_file), 'w') as f:
json.dump(read_json, f)
Also, your XXX_path variables should just be directories, the filename comes from the function parameter.
Here's a simple example using a StringIO to demonstrate reading / writing from a file-like object:
import json
from io import StringIO
json_file_obj = StringIO("""
{"aa": {
"bb": [
"25",
"40",
"05"
],
"cc": [
"20"
]
}
}
""")
text_file_obj = StringIO("[11, 15, 10]")
def replace(src_file_obj: StringIO, repl_file_obj: StringIO):
# Load file contents into a Python object
data = json.load(src_file_obj)
# Read in txt file contents
new_cc_value = json.load(repl_file_obj)
# But now result will be a list of int, here we want a list of string
new_cc_value = list(map(str, new_cc_value))
# Replace desired value
data['aa']['cc'] = new_cc_value
# Now we write to our file-like object, `src_file_obj`
# This is to demonstrate replacing the original file contents
src_file_obj = StringIO()
json.dump(data, src_file_obj)
# Seek to the start of the file
src_file_obj.seek(0)
return src_file_obj
json_file_obj = replace(json_file_obj, text_file_obj)
print(json_file_obj.read())
Output:
{"aa": {"bb": ["25", "40", "05"], "cc": ["11", "15", "10"]}}
Hint - If you want to write the output to an actual file, you can replace these lines below:
src_file_obj = StringIO()
json.dump(data, src_file_obj)
src_file_obj.seek(0)
With these lines:
with open("file_name.txt", 'w') as out_file:
json.dump(data, out_file)
I have a JSON file with 20 objects, each containing a resource parameter with an associated value. I would like to extract the value of resource for each object, and save that value as a line in a txt file.
The structure of the JSON is:
"objects": [
{"created": "2020-10-04", "name": "john", "resource": "api/john/",}
{"created": "2020-10-04", "name": "paul", "resource": "api/paul/",}
{"created": "2020-10-04", "name": "george", "resource": "api/george/",}
{"created": "2020-10-04", "name": "ringo", "resource": "api/ringo/",}
]
So far, I have got the following code, however this can only get the resource value from the first object, and does not let me write it to a txt file using Python.
with open(input_json) as json_file:
data = json.load(json_file)
resource = (data["objects"][1]["resource"])
values = resource.items()
k = {str(key): str(value) for key, value in values}
with open ('resource-list.txt', 'w') as resource_file:
resource_file.write(k)
You have to use lists:
txtout=""
with open(input_json) as json_file:
data = json.load(json_file)
objects = data["objects"]
for jobj in objects:
txtout = txtout + jobj["resource"] + "\n"
with open ('resource-list.txt', 'w') as resource_file:
resource_file.write(txtout)
hi there new Pythonista!
well the thing you missed here is the part where you iterate over your json object.
with open(input_json) as json_file:
data = json.load(json_file)
resource = (data["objects"][1]["resource"]) # right here you simply took the second object (which is the [1] position)
a decet fix would be:
with open(input_json) as json_file:
data = json.load(json_file)
all_items = [] # lets keep here all resource values
for item in data["objects"]: # iterate over entire items
all_items.append(item["resource"]) # push the necessary info
# lets concat every item to one string since it's only made of 20, it will not make our buffer explode
to_write = "\n".join(all_items)
with open("resource-list.txt", "w") as f:
f.write(to_write)
and we’re done!
I have written a code to convert csv file to nested json format. I have multiple columns to be nested hence assigning separately for each column. The problem is I'm getting 2 fields for the same column in the json output.
import csv
import json
from collections import OrderedDict
csv_file = 'data.csv'
json_file = csv_file + '.json'
def main(input_file):
csv_rows = []
with open(input_file, 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter='|')
for row in reader:
row['TYPE'] = 'REVIEW', # adding new key, value
row['RAWID'] = 1,
row['CUSTOMER'] = {
"ID": row['CUSTOMER_ID'],
"NAME": row['CUSTOMER_NAME']
}
row['CATEGORY'] = {
"ID": row['CATEGORY_ID'],
"NAME": row['CATEGORY']
}
del (row["CUSTOMER_NAME"], row["CATEGORY_ID"],
row["CATEGORY"], row["CUSTOMER_ID"]) # deleting since fields coccuring twice
csv_rows.append(row)
with open(json_file, 'w') as f:
json.dump(csv_rows, f, sort_keys=True, indent=4, ensure_ascii=False)
f.write('\n')
The output is as below:
[
{
"CATEGORY": {
"ID": "1",
"NAME": "Consumers"
},
"CATEGORY_ID": "1",
"CUSTOMER_ID": "41",
"CUSTOMER": {
"ID": "41",
"NAME": "SA Port"
},
"CUSTOMER_NAME": "SA Port",
"RAWID": [
1
]
}
]
I'm getting 2 entries for the fields I have assigned using row[''].
Is there any other way to get rid of this? I want only one entry for a particular field in each record.
Also how can I convert the keys to lower case after reading from csv.DictReader(). In my csv file all the columns are in upper case and hence I'm using the same to assign. But I want to convert all of them to lower case.
In order to convert the keys to lower case, it would be simpler to generate a new dict per row. BTW, it should be enough to get rid of the duplicate fields:
for row in reader:
orow = collection.OrderedDict()
orow['type'] = 'REVIEW', # adding new key, value
orow['rawid'] = 1,
orow['customer'] = {
"id": row['CUSTOMER_ID'],
"name": row['CUSTOMER_NAME']
}
orow['category'] = {
"id": row['CATEGORY_ID'],
"name": row['CATEGORY']
}
csv_rows.append(orow)