CSV to JSON Mass converter in Python - python

I have a folder it is called DATA, inside that folder there is multiple .logs files and it is formatted as CSV . Now I want to convert every single .logs files inside DATA folder using Python.
import csv
import json
import glob, os
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
os.chdir(r"C:\Users\Arda\Desktop\DATA")# use whatever directory you want
#double\\ no single \
for file in glob.glob("**/*.logs", recursive = True):
csvFilePath = []
csvFilePath = file
jsonFilePath = r'data.json'
csv_to_json(csvFilePath, jsonFilePath)
I can only convert one single file but there is multiple .logs file as CSV
In that list last one has been converted to JSON "T1555.logs"
T1003.001.logs
T1003.002.logs
T1003.003.logs
T1003.004.logs
T1003.logs
T1552.002.logs
T1552.004.logs
T1555.003.logs
T1555.logs

I'd re-arrange where you're traversing the files so that all of the results are stored in a single jsonArray, then written to the file at the end:
import csv
import json
import glob, os
def csvs_to_json(csvFilePaths, jsonFilePath):
jsonArray = []
for csvFilePath in csvFilePaths:
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
os.chdir(r"C:\Users\Arda\Desktop\DATA")# use whatever directory you want
#double\\ no single \
files = list(glob.glob("**/*.logs", recursive = True))
jsonFilePath = r'data.json'
csvs_to_json(files, jsonFilePath)
Let me know if this works for you!

Related

Parsing JSON into CSV in Python

I'm trying to parse JSON files into CSV. I've been able to get the headers of the JSON file to be output into the CSV but I can't figure out how to get the data into the file.
# Python program to convert
# JSON file to CSV
import json
import csv
# Opening JSON file and loading the data
# into the variable data
with open('test1.json') as json_file:
data = json.load(json_file)
for i in range(len(data)):
training_data = data[i]['profile']
# now we will open a file for writing
data_file = open('data_file.csv', 'w')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
#type(training_data)
for profile in training_data:
if count == 0:
header = training_data.keys()
csv_writer.writerow(header)
count += 1
csv_writer.writerow(training_data.values())
data_file.close()
This is the file im trying to parse
https://textdoc.co/OuphoV5saiwWYS8g
If someone could help me out I'd be eternally grateful
would something like this work for you?
import pandas as pd
df = pd.read_json(json_file)
df.to_csv('data_file.csv')
or for more complex nested json, you may have to load as a dictionary and manipulate:
data = json.loads(json_str)
data_transformed = [i for i in data['data']]
df = pd.DataFrame(data_transformed )
df.to_csv('data_file.csv')

python nested json to csv Incomplete conversion

this is my json file
{"_index":"core-bvd-locations","_type":"_doc","_id":"75b82cba4a80784f4fa36d14c86f6d85","_score":1,"_source":{"a_id":"FR518077177","a_id_type":"BVD ID","a_name":"Moisan Patrick Roger","a_name_normal":"MOISAN PATRICK ROGER","a_country_code":"FR","a_country":"France","a_in_compliance_db":false,"a_nationality":"FR","a_street_address":"Les Carmes","a_city":"Decize","a_postcode":"58300","a_region":"Bourgogne-Franche-Comte|Nievre","a_phone":"+33 603740000","a_latitude":46.79402777777778,"a_longitude":3.496277777777778,"a_national_ids":{"European VAT number":["FR58 518077177"],"SIREN number":["518077177"],"TIN":["518077177"],"SIRET number":["518077177-00013"]},"relationship":"Location info","file_name":"/media/hedwig/iforce/data/BvD/s3-transfer/SuperTable_v3_json/locations/part-00021-1f62c713-17a0-410d-9b18-32328d9836d6-c000.json","a_geo_point":{"lat":46.79402777777778,"lon":3.496277777777778}}}
this is my code
import csv
import json
import sys
import codecs
def trans(path):
jsonData = codecs.open('F:\\1.json', 'r', 'utf-8')
# csvfile = open(path+'.csv', 'w') #
# csvfile = open(path+'.csv', 'wb') # python2
csvfile = open('F:\\1.csv', 'w', encoding='utf-8', newline='') #
writer = csv.writer(csvfile, delimiter=',')
flag = True
for line in jsonData:
dic = json.loads(line)
if flag:
keys = list(dic.keys())
print(keys)
writer.writerow(keys)
flag = False
writer.writerow(list(dic.values()))
jsonData.close()
csvfile.close()
if __name__ == '__main__':
path=str(sys.argv[0]) #
print(path)
trans(path)
C:\Users\jeri\PycharmProjects\pythonProject9\venv\Scripts\python.exe C:\Users\jeri\PycharmProjects\pythonProject9\zwc_count_file.py
C:\Users\jeri\PycharmProjects\pythonProject9\zwc_count_file.py
['_index', '_type', '_id', '_score', '_source']
Process finished with exit code 0
output jie
enter image description here
Information in nested json file cannot be parse, how can i modify the code
import json
import pandas as pd
file_data = open("json_filname.json",'r').read()
data= json.loads(file_data)
df = pd.json_normalize(data)
df
json.load(): json.load() accepts file object, parses the JSON data, populates a Python dictionary with the data and returns it back to you.
import json
# Opening JSON file
f = open('data.json')
# returns JSON object as
# a dictionary
data = json.load(f)
writer.writerow write the entire row, rigth sintaxis
writer.writerow(#iterable_object#)

Converting a large CSV file to multiple JSON files using Python

I am currently using the following code to convert a large CSV file to a JSON file.
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for row in csvReader:
jsonArray.append(row)
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'test_data.csv'
jsonFilePath = r'test_data.json'
csv_to_json(csvFilePath, jsonFilePath)
This code works fine and I am able to convert the CSV to JSON without any issues. However, as the CSV file contains 600,000+ rows and hence as many items in my JSON, it has become very difficult to manage the JSON file.
I would like to modify my above code such that for every 5000 rows of the CSV, the data is written into a new JSON file. Ideally, I would be having 120 (600,000/5000) JSON files in this case.
How can I do the same?
Split up your read\write methods and add a simple threshold:
JSON_ENTRIES_THRESHOLD = 5000 # modify to whatever you see suitable
def write_json(json_array, filename):
with open(filename, 'w', encoding='utf-8') as jsonf:
json.dump(json_array, jsonf) # note the usage of .dump directly to a file descriptor
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
filename_index = 0
for row in csvReader:
jsonArray.append(row)
if len(jsonArray) >= JSON_ENTRIES_THRESHOLD:
# if we reached the treshold, write out
write_json(jsonArray, f"jsonFilePath-{filename_index}.json")
filename_index += 1
jsonArray = []
# Finally, write out the remainder
write_json(jsonArray, f"jsonFilePath-{filename_index}.json")

Converting JSON to CSV, CSV is empty

I'm attempting to convert yelps data set that is in JSON to a csv format. The new csv file that is created is empty.
I've tried different ways to iterate through the JSON but they all give me a zero bytes file.
The json file looks like this:
{"business_id":"1SWheh84yJXfytovILXOAQ","name":"Arizona Biltmore Golf Club","address":"2818 E Camino Acequia Drive","city":"Phoenix","state":"AZ","postal_code":"85016","latitude":33.5221425,"longitude":-112.0184807,"stars":3.0,"review_count":5,"is_open":0,"attributes":{"GoodForKids":"False"},"categories":"Golf, Active Life","hours":null}
import json
import csv
infile = open("business.json","r")
outfile = open("business2.csv","w")
data = json.load(infile)
infile.close()
out = csv.writer(outfile)
out.writerow(data[0].keys())
for row in data:
out.writerow(row.values())
I get an "extra data" message when the code runs. The new business2 csv file is empty and the size is zero bytes.
if you JSON has only one row.. then try this
infile = open("business.json","r")
outfile = open("business2.csv","w")
data = json.load(infile)
infile.close()
out = csv.writer(outfile)
#print(data.keys())
out.writerow(data.keys())
out.writerow(data.values())
Hi Please try the below code, by using with command the file access will automatically get closed when the control moves out of scope of with
infile = open("business.json","r")
outfile = open("business2.csv","w")
data = json.load(infile)
infile.close()
headers = list(data.keys())
values = list(data.values())
with open("business2.csv","w") as outfile:
out = csv.writer(outfile)
out.writerow(headers)
out.writerow(values)
You need to use with to close file.
import json
import csv
infile = open("business.json","r")
data = json.load(infile)
infile.close()
with open("business2.csv","w") as outfile:
out = csv.writer(outfile)
out.writerow(list(data.keys()))
out.writerow(list(data.values()))

Running Python program with a file

I want to run this program to convert the Json file into a dictionary, Im using linux mint, what commands do i use to run the program and convert the file.
import csv
import json
class Coverters:
def covert_to_dict(self, filename):
""" Read data from file and transform it to dictionary """
out = []
with open(filename, "r") as file:
output = csv.DictReader(file, fieldnames=self.__labels)
print(output)
for row in output:
print(row)
out.append(row)
# for line in file:
# out.append(dict(zip(self.__labels, line.split('#'))))
return out
def json_to_csv_file(self, csv_filename, json_filename):
""" Helper function to conver JSON to CSV file"""
with open(json_filename) as file:
data = json.load(file)
with open(csv_filename, "wb+") as file:
csv_file = csv.writer(file)
for item in data:
# Need to add all indexes for items
csv_file.writerow([item['ts'], item['visitor_uuid']] + item['fields'].values())
import csv
import json
def covert_to_dict(filename):
""" Read data from file and transform it to dictionary """
out = []
with open(filename, "r") as file:
output = csv.DictReader(file, fieldnames=self.__labels)
print(output)
for row in output:
print(row)
out.append(row)
# for line in file:
# out.append(dict(zip(self.__labels, line.split('#'))))
return out
covert_to_dict("filename") #change ot to the file name
save the above code in a file name it somthing( xyz.py)
place the file to convert in same directory
open terminal ->> go the directory ->> and run this command --> python xyz.py

Categories

Resources