How can i convert CSV in JSON like I want - python

Hello I show you my problem's :
I right that for convert my csv in Json. But the résult is not exactly what I Want .
main.py
import csv
filename ="forcebrute.csv"
# opening the file using "with"
# statement
with open(filename, 'r') as data:
for line in csv.DictReader(data):
print(line)
csv
name;price;profit
Action-1;20;5
Action-2;30;10
Action-3;50;15
Action-4;70;20
Action-5;60;17
result i have:
{'name;price;profit': 'Action-1;20;5'}
{'name;price;profit': 'Action-2;30;10'}
{'name;price;profit': 'Action-3;50;15'}
{'name;price;profit': 'Action-4;70;20'}
{'name;price;profit': 'Action-5;60;17'}
And I would like this result:

You will need to specify the column delimiter then you can use json.dumps() to give you the required output format
import csv
import json
with open('forcebrute.csv') as data:
print(json.dumps([d for d in csv.DictReader(data, delimiter=';')], indent=2))
Output:
[
{
"name": "Action-1",
"price": "20",
"profit": "5"
},
{
"name": "Action-2",
"price": "30",
"profit": "10"
},
{
"name": "Action-3",
"price": "50",
"profit": "15"
},
{
"name": "Action-4",
"price": "70",
"profit": "20"
},
{
"name": "Action-5",
"price": "60",
"profit": "17"
}
]

You will need to use Dictreader from the csv library to read the contents of the CSV file and then convert the contents to a list before using json.dumps to turn the data into JSON.
import csv
import json
filename ="forcebrute.csv"
# Open the CSV file and read the contents into a list of dictionaries
with open(filename, 'r') as f:
reader = csv.DictReader(f, delimiter=';')
csv_data = list(reader)
# Convert the data to a JSON string and print it to the console
json_data = json.dumps(csv_data)
print(json_data)

An easy approach would be using pandas, also quite fast with large csv files. It might need some tweaking but you get the point.
import pandas as pd
import json
df = pd.read_csv(filename, sep = ';')
data = json.dumps(df.to_dict('records'))

Related

convert a CSV file to JSON file

I am trying to convert CSV file to JSON file based on a column value. The csv file looks somewhat like this.
ID Name Age
CSE001 John 18
CSE002 Marie 20
ECE001 Josh 22
ECE002 Peter 23
currently I am using the following code to obtain json file.
import csv
import json
def csv_to_json(csv_file_path, json_file_path):
data_dict = {}
with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
csv_reader = csv.DictReader(csv_file_handler)
for rows in csv_reader:
key = rows['ID']
data_dict[key] = rows
with open(json_file_path, 'w', encoding = 'utf-8') as json_file_handler:
json_file_handler.write(json.dumps(data_dict, indent = 4))
OUTPUT:
**{
"CSE001":{
"ID":"CSE001",
"Name":"John",
"Age":18
}
"CSE002":{
"ID":"CSE002",
"Name":"Marie",
"Age":20
}
"ECE001":{
"ID":"ECE001",
"Name":"Josh",
"Age":22
}
"ECE002":{
"ID":"ECE002",
"Name":"Peter",
"Age":23
}
}**
I want my output to generate two separate json files for CSE and ECE based on the ID value. Is there a way to achieve this output.
Required Output:
CSE.json:
{
"CSE001":{
"ID":"CSE001",
"Name":"John",
"Age":18
}
"CSE002":{
"ID":"CSE002",
"Name":"Marie",
"Age":20
}
}
ECE.json:
{
"ECE001":{
"ID":"ECE001",
"Name":"Josh",
"Age":22
}
"ECE002":{
"ID":"ECE002",
"Name":"Peter",
"Age":23
}
}
I would suggest you to use pandas, that way will be more easier.
Code may look like:
import pandas as pd
def csv_to_json(csv_file_path):
df = pd.read_csv(csv_file_path)
df_CSE = df[df['ID'].str.contains('CSE')]
df_ECE = df[df['ID'].str.contains('ECE')]
df_CSE.to_json('CSE.json')
df_ECE.to_json('ESE.json')
You can create dataframe and then do the following operation
import pandas as pd
df = pd.DataFrame.from_dict({
"CSE001":{
"ID":"CSE001",
"Name":"John",
"Age":18
},
"CSE002":{
"ID":"CSE002",
"Name":"Marie",
"Age":20
},
"ECE001":{
"ID":"ECE001",
"Name":"Josh",
"Age":22
},
"ECE002":{
"ID":"ECE002",
"Name":"Peter",
"Age":23
}
},orient='index')
df["id_"] = df["ID"].str[0:2] # temp column for storing first two chars
grps = df.groupby("id_")[["ID", "Name", "Age"]]
for k, v in grps:
print(v.to_json(orient="index")) # you can create json file as well
You could store each row into two level dictionary with the top level being the first 3 characters of the ID.
These could then be written out into separate files with the key being part of the filename:
from collections import defaultdict
import csv
import json
def csv_to_json(csv_file_path, json_base_path):
data_dict = defaultdict(dict)
with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
csv_reader = csv.DictReader(csv_file_handler)
for row in csv_reader:
key = row['ID'][:3]
data_dict[key][row['ID']] = row
for key, values in data_dict.items():
with open(f'{json_base_path}_{key}.json', 'w', encoding='utf-8') as json_file_handler:
json_file_handler.write(json.dumps(values, indent = 4))
csv_to_json('input.csv', 'output')
The defaultdict is used to avoid needing to first test if a key is already present before using it.
This would create output_CSE.json and output_ECE.json, e.g.
{
"ECE001": {
"ID": "ECE001",
"Name": "Josh",
"Age": "22"
},
"ECE002": {
"ID": "ECE002",
"Name": "Peter",
"Age": "23"
}
}

Separate large JSON object into many different files

I have a JSON file with 10000 data entries like below in a file.
{
"1":{
"name":"0",
"description":"",
"image":""
},
"2":{
"name":"1",
"description":"",
"image":""
},
...
}
I need to write each entry in this object into its own file.
For example, the output of each file looks like this:
1.json
{
"name": "",
"description": "",
"image": ""
}
I have the following code, but I'm not sure how to proceed from here. Can anyone help with this?
import json
with open('sample.json', 'r') as openfile:
# Reading from json file
json_object = json.load(openfile)
You can use a for loop to iterate over all the fields in the outer object, and then create a new file for each inner object:
import json
with open('sample.json', 'r') as input_file:
json_object = json.load(input_file)
for key, value in json_object.items():
with open(f'{key}.json', 'w') as output_file:
json.dump(value, output_file)

Convert CSV into Json in Python. Format problem

I have written a python code to convert csv file into json file. But the output is not the same as I desired. please look and suggest modifications.
Below is the expected json file.
[
{
"id": "1",
"MobileNo": "923002546363"
},
{
"id": "2",
"MobileNo": "923343676143"
}
]
below is the code that I have written in python.
import csv, json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'my_csv_data.csv'
jsonFilePath = r'data.json'
csv_to_json(csvFilePath, jsonFilePath)
As your post doesn't provide current output, I just created a csv file to run your code:
id,MobileNo
1,923002546363
2,923343676143
3,214134367614
And works just fine:
[
{
"id": "1",
"MobileNo": "923002546363"
},
{
"id": "2",
"MobileNo": "923343676143"
},
{
"id": "3",
"MobileNo": "214134367614"
}
]
Check if your csv file isn't corrupted. And if possible, edit your post with current output and your csv file.

Replace json nested dictionary value with a file value

I have two files: json and text files.
I would like to replace one of the dictionary value with the value which in the text file.
Let us say, in the text file file.text, I have the following lists. [11, 15, 10].
In the json file, I have the following dictionary.
"aa": {
"bb": [
"25",
"40",
"05"
],
"cc": [
"20"
]
}
I would like to overwrite the cc value with the text file above.
file.json
"aa": {
"bb": [
"25",
"40",
"05"
],
"cc": [
"11", "15", "10"
]
}
I have tried something in Python.
def replace(text_file, json_file):
tex_file_path = 'C:/Documents/file.txt'
with open(os.path.join(tex_file_path, text_file), 'r') as f:
read_text= f.read()
json_file_path = 'C:/Documents/file.json'
with open(os.path.join(json_file_path, json_file), 'r') as f:
read_json = json.load(f)
text_to_be_replaced = read_json.get('aa')
for value in text_to_be_replaced.items():
for element in value:
# statment
I was wondering if someone can really help with this.
Although you've named it .text, the contents of the file appear to be JSON, so you can use json.load() as well. Then convert the integers in the list to strings and insert it into the desired place in the JSON file.
There's no need to loop over the dictionary items. Just address the specific element you want to replace.
def replace(text_file, json_file):
tex_file_path = 'C:/Documents'
with open(os.path.join(tex_file_path, text_file), 'r') as f:
read_text= json.load(f)
read_text = list(map(str, read_text))
json_file_path = 'C:/Documents'
with open(os.path.join(json_file_path, json_file), 'r') as f:
read_json = json.load(f)
read_json["aa"]["cc"] = read_text
with open(os.path.join(json_file_path, json_file), 'w') as f:
json.dump(read_json, f)
Also, your XXX_path variables should just be directories, the filename comes from the function parameter.
Here's a simple example using a StringIO to demonstrate reading / writing from a file-like object:
import json
from io import StringIO
json_file_obj = StringIO("""
{"aa": {
"bb": [
"25",
"40",
"05"
],
"cc": [
"20"
]
}
}
""")
text_file_obj = StringIO("[11, 15, 10]")
def replace(src_file_obj: StringIO, repl_file_obj: StringIO):
# Load file contents into a Python object
data = json.load(src_file_obj)
# Read in txt file contents
new_cc_value = json.load(repl_file_obj)
# But now result will be a list of int, here we want a list of string
new_cc_value = list(map(str, new_cc_value))
# Replace desired value
data['aa']['cc'] = new_cc_value
# Now we write to our file-like object, `src_file_obj`
# This is to demonstrate replacing the original file contents
src_file_obj = StringIO()
json.dump(data, src_file_obj)
# Seek to the start of the file
src_file_obj.seek(0)
return src_file_obj
json_file_obj = replace(json_file_obj, text_file_obj)
print(json_file_obj.read())
Output:
{"aa": {"bb": ["25", "40", "05"], "cc": ["11", "15", "10"]}}
Hint - If you want to write the output to an actual file, you can replace these lines below:
src_file_obj = StringIO()
json.dump(data, src_file_obj)
src_file_obj.seek(0)
With these lines:
with open("file_name.txt", 'w') as out_file:
json.dump(data, out_file)

convert csv file to multiple nested json format

I have written a code to convert csv file to nested json format. I have multiple columns to be nested hence assigning separately for each column. The problem is I'm getting 2 fields for the same column in the json output.
import csv
import json
from collections import OrderedDict
csv_file = 'data.csv'
json_file = csv_file + '.json'
def main(input_file):
csv_rows = []
with open(input_file, 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter='|')
for row in reader:
row['TYPE'] = 'REVIEW', # adding new key, value
row['RAWID'] = 1,
row['CUSTOMER'] = {
"ID": row['CUSTOMER_ID'],
"NAME": row['CUSTOMER_NAME']
}
row['CATEGORY'] = {
"ID": row['CATEGORY_ID'],
"NAME": row['CATEGORY']
}
del (row["CUSTOMER_NAME"], row["CATEGORY_ID"],
row["CATEGORY"], row["CUSTOMER_ID"]) # deleting since fields coccuring twice
csv_rows.append(row)
with open(json_file, 'w') as f:
json.dump(csv_rows, f, sort_keys=True, indent=4, ensure_ascii=False)
f.write('\n')
The output is as below:
[
{
"CATEGORY": {
"ID": "1",
"NAME": "Consumers"
},
"CATEGORY_ID": "1",
"CUSTOMER_ID": "41",
"CUSTOMER": {
"ID": "41",
"NAME": "SA Port"
},
"CUSTOMER_NAME": "SA Port",
"RAWID": [
1
]
}
]
I'm getting 2 entries for the fields I have assigned using row[''].
Is there any other way to get rid of this? I want only one entry for a particular field in each record.
Also how can I convert the keys to lower case after reading from csv.DictReader(). In my csv file all the columns are in upper case and hence I'm using the same to assign. But I want to convert all of them to lower case.
In order to convert the keys to lower case, it would be simpler to generate a new dict per row. BTW, it should be enough to get rid of the duplicate fields:
for row in reader:
orow = collection.OrderedDict()
orow['type'] = 'REVIEW', # adding new key, value
orow['rawid'] = 1,
orow['customer'] = {
"id": row['CUSTOMER_ID'],
"name": row['CUSTOMER_NAME']
}
orow['category'] = {
"id": row['CATEGORY_ID'],
"name": row['CATEGORY']
}
csv_rows.append(orow)

Categories

Resources