How to convert csv to json in python? - python

I'm very new to programming, have been learning python from past 3/4 weeks and
this is one of the assignments given.
Input
A, B, C, D
1, 2, 3, 4
5, 6, 7, 8
Output
{{A:"1", B:"2", C:"3", D:"4"}, {A:"5", B:"6", C:"7", D:"8"}}
I've been trying with the code as:
import csv
import json
csvfile = open('test.csv','r')
jsonfile = open('test.json','w')
x = ("a","b","c","d")
reader = csv.DictReader(csvfile, x)
for row in reader:
json.dump(row, jsonfile)
The output for this code comes as below:
{"a": "1", "null": ["5", "6", "7", "8", "9"], "c": "3", "b": "2", "d": "4"}
Can anyone help me on this?

Dump after processing whole rows.
import csv
import json
with open('test.csv') as f:
reader = csv.DictReader(f)
rows = list(reader)
with open('test.json', 'w') as f:
json.dump(rows, f)

For those who like one-liners:
import csv
import json
json_data = [json.dumps(d) for d in csv.DictReader(open('file.csv'))]
Checkout this fiddle for a working example:
https://pyfiddle.io/fiddle/5992b8f4-552f-4970-91b6-a52cdee16ebc/?i=true

import csv
import json
# Constants to make everything easier
CSV_PATH = './csv.csv'
JSON_PATH = './json'
# Reads the file the same way that you did
csv_file = csv.DictReader(open(CSV_PATH, 'r'))
# Created a list and adds the rows to the list
json_list = []
for row in csv_file:
json_list.append(row)
# Writes the json output to the file
file(JSON_PATH, 'w').write(json.dumps(json_list))

Convert CSV to Json Python
import csv
import urllib2
url = '<YOURCSVURL>'
response = urllib2.urlopen(url)
cr = csv.reader(response)
line = {}
data = []
for index, row in enumerate(cr):
if index:
for index, col in enumerate(row):
line[name[index]] = col
data.append(line.copy())
else:
name = row
print data

You can attempt it using this code :
def inputfunction(lists):
tmpdict = {}
for element_index in range(len(lists)):
tmpdict[headers[elementindex]] = lists[element_index]
return tmpdict
def run(filename):
filelist = [eachline.split(',') for eachline in open(inputfile,'r')]
headers = filelist[0]
values = filelist[1:]
finallist = []
for lists in values:
finallist.append(inputfunction(lists))
return finallist

Related

Converting data to JSON in same format as CSV

I have the following code which prints the object as CSV:
title = ['Username', 'Name', 'Job']
for x in title:
print(x, end =",")
for d in data:
line = d.get_username() + "," + d.get_name() + "," + d.get_role()
print(line)
I get:
Username,Name,Job
rob,robert,developer
danny21,danny,developer
I want to print the same data as JSON in order to get:
[
{
"Username":"rob",
"Name":"robert",
"Job":"developer"
},
{
"Username":"danny21",
"Name":"danny",
"Job":"developer"
}
]
From previous topics I learn that we can use json.dumps but I'm not sure if it helps in this case.
What is the proper way to achieve it?
You could simply do:
l = []
for d in data:
user_dictionary = {}
user_dictionary[title[0]] = d.get_username()
user_dictionary[title[1]] = d.get_name()
user_dictionary[title[2]] = d.get_role()
l.append(user_dictionary)
to get a json like file.
You can also avoid appending and do:
def get_user_data(user):
user_dictionary = {}
user_dictionary[title[0]] = d.get_username()
user_dictionary[title[1]] = d.get_name()
user_dictionary[title[2]] = d.get_role()
return user_dictionary
l = list(map(get_user_data, data))
You can use json.dump to dump l in a file
import json
with open('data.json', 'w') as outfile:
json.dump(l, outfile)

Edit CSV file in python which reads values from another json file in python

I wanted to edit a csv file which reads the value from one of my another json file in python 2.7
my csv is : a.csv
a,b,c,d
,10,12,14
,11,14,15
my json file is a.json
{"a":20}
i want my where the column 'a' will try to match in json file. if their is a match. it should copy that value from json and paste it to my csv file and the final output of my csv file should be looks like this.
a,b,c,d
20,10,12,14
20,11,14,15
Till now I what I have tried is
fileCSV = open('a.csv', 'a')
fileJSON = open('a.json', 'r')
jsonData = fileJSON.json()
for k in range(jsonData):
for i in csvRow:
for j in jsonData.keys():
if i == j:
if self.count == 0:
self.data = jsonData[j]
self.count = 1
else:
self.data = self.data + "," + jsonData[j]
self.count = 0
fileCSV.write(self.data)
fileCSV.write("\n")
k += 1
fileCSV.close()
print("File created successfully")
I will be really thankful if anyone can help me for this.
please ignore any syntactical and indentation error.
Thank You.
Some basic string parsing will get you here.. I wrote a script which works for the simple scenario which you refer to.
check if this solves your problem:
import json
from collections import OrderedDict
def list_to_csv(listdat):
csv = ""
for val in listdat:
csv = csv+","+str(val)
return csv[1:]
lines = []
csvfile = "csvfile.csv"
outcsvfile = "outcsvfile.csv"
jsonfile = "jsonfile.json"
with open(csvfile, encoding='UTF-8') as a_file:
for line in a_file:
lines.append(line.strip())
columns = lines[0].split(",")
data = lines[1:]
whole_data = []
for row in data:
fields = row.split(",")
i = 0
rowData = OrderedDict()
for column in columns:
rowData[columns[i]] = fields[i]
i += 1
whole_data.append(rowData)
with open(jsonfile) as json_file:
jsondata = json.load(json_file)
keys = list(jsondata.keys())
for key in keys:
value = jsondata[key]
for each_row in whole_data:
each_row[key] = value
with open(outcsvfile, mode='w', encoding='UTF-8') as b_file:
b_file.write(list_to_csv(columns)+'\n')
for row_data in whole_data:
row_list = []
for ecolumn in columns:
row_list.append(row_data.get(ecolumn))
b_file.write(list_to_csv(row_list)+'\n')
CSV output is not written to the source file but to a different file.
The output file is also always truncated and written, hence the 'w' mode.
I would recommend using csv.DictReader and csv.DictWriter classes which will read into and out of python dicts. This would make it easier to modify the dict values that you read in from the JSON file.

Adding a new column on CSV with Python

I have the following list of numbers: ['Number', 1,2,3,4]
If I have the following CSV file:
`Name`
`First`
`Second`
`Third`
`Fourth`
How do I add my list of numbers to it and make it look like this:
`Name Number`
`First 1`
`Second 2`
`Third 3`
`Fourth 4`
You can use fileinput.input with inplace=True to modify the original file:
import fileinput
import sys
l =['Number', 1,2,3,4]
for ind, line in enumerate(fileinput.input("in.csv",inplace=True)):
sys.stdout.write("{} {}\n".format(line.rstrip(), l[ind]))
Input:
Name
First
Second
Third
Fourth
Output:
Name Number
First 1
Second 2
Third 3
Fourth 4
Or write to a tempfile and move with shutil.move to replace the original file:
l =['Number', 1,2,3,4]
from shutil import move
from tempfile import NamedTemporaryFile
with open('in.csv') as csvfile, NamedTemporaryFile("w",dir=".", delete=False) as temp:
r = csv.reader(csvfile)
wr = csv.writer(temp,delimiter=" ")
for row,new in zip(r,l):
wr.writerow(row+[new])
move(temp.name,"in.csv")
Not an elegant way but It works:
#!/usr/bin/python
import csv
import sys
def csv_to_dict(csv_file_path):
csv_file = open(csv_file_path, 'rb')
csv_file.seek(0)
sniffdialect = csv.Sniffer().sniff(csv_file.read(10000), delimiters='\t,;')
csv_file.seek(0)
dict_reader = csv.DictReader(csv_file, dialect=sniffdialect)
csv_file.seek(0)
dict_data = []
for record in dict_reader:
dict_data.append(record)
csv_file.close()
return dict_data
def dict_to_csv(csv_file_path, dict_data):
csv_file = open(csv_file_path, 'wb')
writer = csv.writer(csv_file, dialect='excel')
headers = dict_data[0].keys()
writer.writerow(headers)
for dat in dict_data:
line = []
for field in headers:
line.append(dat[field])
writer.writerow(line)
csv_file.close()
if __name__ == '__main__':
org_path = sys.argv[1]
new_path = sys.argv[2]
your_array = ['Number', 1, 2, 3, 4]
org_csv = csv_to_dict(org_path)
new_data = []
for line in org_csv:
new_line = dict()
new_line['Name'] = line['Name']
new_line[your_array[0]] = your_array[org_csv.index(line)+1]
new_data.append(new_line)
if new_data:
dict_to_csv(new_path, new_data)
Hope that will help!
import csv
with open('existing_file.csv', 'rb') as infile:
reader = csv.reader(infile)
your_list = list(reader)
list2 = ['Number', 1,2,3,4]
zipped= zip(your_list, list2)
with open("test.csv", "wb") as outfile:
writer = csv.writer(outfile)
writer.writerows(zipped)

trying to read xlrd, extract data, and write csv

I am trying to read an excel file, extract some data, and write it out as a csv. This is pretty new to me and I'm messing up somewhere: I keep getting an empty csv. I'm sure I'm missing something very basic, but darned if I can see it. Here is the code:
```
import xlrd
import os
import csv
from zipfile import ZipFile
import datetime
datafile = "./2013_ERCOT_Hourly_Load_Data.xls"
outfile = "./2013_Max_Loads.csv"
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
data = None
outputlist = []
for col in range(1, sheet.ncols):
cv = sheet.col_values(col, start_rowx=1, end_rowx=None)
header = sheet.cell_value(0,col)
maxval = max(cv)
maxpos = cv.index(maxval) + 1
maxtime = sheet.cell_value(maxpos, 0)
realtime = xlrd.xldate_as_tuple(maxtime, 0)
year = realtime[0]
month = realtime[1]
day = realtime[2]
hour = realtime[3]
data = [
'Region:', header,
'Year:', year,
'Month:', month,
'Day:', day,
'Hour:', hour,
maxpos,
maxtime,
realtime,
maxval,
]
path = "./2013_Max_Loads.csv"
return outputlist
def save_file(data, filename):
with open(filename, "wb") as f:
writer = csv.writer(f, delimiter='|')
for line in data:
writer.writerow(line)
parse_file(datafile)
save_file(parse_file(datafile),"2013_Max_Loads.csv")
You declare outfile but you don't use it
You aren't passing a directory (path) for the file to be saved in.
I also think that calling parse_file twice might be messing you up. Just pass the filename and call it from within the save_file function.
I also found that you were returning output list as a blank list.
So here, try this. I will assume your xlrd commands are correct, because I have not personally used the module.
import csv
import xlrd
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
outputlist = []
outputlist_append = outputlist.append
for col in range(1, sheet.ncols):
cv = sheet.col_values(col, start_rowx=1, end_rowx=None)
header = sheet.cell_value(0,col)
maxval = max(cv)
maxpos = cv.index(maxval) + 1
maxtime = sheet.cell_value(maxpos, 0)
realtime = xlrd.xldate_as_tuple(maxtime, 0)
year = realtime[0]
month = realtime[1]
day = realtime[2]
hour = realtime[3]
data = [
'Region:', header,
'Year:', year,
'Month:', month,
'Day:', day,
'Hour:', hour,
maxpos,
maxtime,
realtime,
maxval,
]
outputlist_append(data)
return outputlist
def save_file(data, filename):
parse_file(data)
with open(filename, 'wb') as f:
writer = csv.writer(f, delimiter='|')
for line in data:
writer.writerow(line)
return
datafile = "./2013_ERCOT_Hourly_Load_Data.xls"
outfile = "./2013_Max_Loads.csv"
save_file(datafile, outfile)
UPDATE: Edit in code in function save_file() to implement #wwii's suggestion.
Try substituting the new save_file() below:
def save_file(data, filename):
parse_file(data)
with open(filename, 'wb') as f:
wr = csv.writer(f, delimiter='|')
wr.writerows(data)
return
Also, change the variable (you used writer) to something like wr. You really want to avoid any possible conflicts with having a variable with the same name as a method, a function, or class you are calling.

How to convert CSV file to multiline JSON?

Here's my code, really simple stuff...
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
jsonfile.write(out)
Declare some field names, the reader uses CSV to read the file, and the filed names to dump the file to a JSON format. Here's the problem...
Each record in the CSV file is on a different row. I want the JSON output to be the same way. The problem is it dumps it all on one giant, long line.
I've tried using something like for line in csvfile: and then running my code below that with reader = csv.DictReader( line, fieldnames) which loops through each line, but it does the entire file on one line, then loops through the entire file on another line... continues until it runs out of lines.
Any suggestions for correcting this?
Edit: To clarify, currently I have: (every record on line 1)
[{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"},{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}]
What I'm looking for: (2 records on 2 lines)
{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"}
{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}
Not each individual field indented/on a separate line, but each record on it's own line.
Some sample input.
"John","Doe","001","Message1"
"George","Washington","002","Message2"
The problem with your desired output is that it is not valid json document,; it's a stream of json documents!
That's okay, if its what you need, but that means that for each document you want in your output, you'll have to call json.dumps.
Since the newline you want separating your documents is not contained in those documents, you're on the hook for supplying it yourself. So we just need to pull the loop out of the call to json.dump and interpose newlines for each document written.
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
You can use Pandas DataFrame to achieve this, with the following Example:
import pandas as pd
csv_file = pd.DataFrame(pd.read_csv("path/to/file.csv", sep = ",", header = 0, index_col = False))
csv_file.to_json("/path/to/new/file.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)
import csv
import json
file = 'csv_file_name.csv'
json_file = 'output_file_name.json'
#Read CSV File
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Convert csv data into json
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) #for pretty
f.write(json.dumps(data))
read_CSV(file,json_file)
Documentation of json.dumps()
I took #SingleNegationElimination's response and simplified it into a three-liner that can be used in a pipeline:
import csv
import json
import sys
for row in csv.DictReader(sys.stdin):
json.dump(row, sys.stdout)
sys.stdout.write('\n')
You can try this
import csvmapper
# how does the object look
mapper = csvmapper.DictMapper([
[
{ 'name' : 'FirstName'},
{ 'name' : 'LastName' },
{ 'name' : 'IDNumber', 'type':'int' },
{ 'name' : 'Messages' }
]
])
# parser instance
parser = csvmapper.CSVParser('sample.csv', mapper)
# conversion service
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
Edit:
Simpler approach
import csvmapper
fields = ('FirstName', 'LastName', 'IDNumber', 'Messages')
parser = CSVParser('sample.csv', csvmapper.FieldMapper(fields))
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
I see this is old but I needed the code from SingleNegationElimination however I had issue with the data containing non utf-8 characters. These appeared in fields I was not overly concerned with so I chose to ignore them. However that took some effort. I am new to python so with some trial and error I got it to work. The code is a copy of SingleNegationElimination with the extra handling of utf-8. I tried to do it with https://docs.python.org/2.7/library/csv.html but in the end gave up. The below code worked.
import csv, json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("Scope","Comment","OOS Code","In RMF","Code","Status","Name","Sub Code","CAT","LOB","Description","Owner","Manager","Platform Owner")
reader = csv.DictReader(csvfile , fieldnames)
code = ''
for row in reader:
try:
print('+' + row['Code'])
for key in row:
row[key] = row[key].decode('utf-8', 'ignore').encode('utf-8')
json.dump(row, jsonfile)
jsonfile.write('\n')
except:
print('-' + row['Code'])
raise
Add the indent parameter to json.dumps
data = {'this': ['has', 'some', 'things'],
'in': {'it': 'with', 'some': 'more'}}
print(json.dumps(data, indent=4))
Also note that, you can simply use json.dump with the open jsonfile:
json.dump(data, jsonfile)
Use pandas and the json library:
import pandas as pd
import json
filepath = "inputfile.csv"
output_path = "outputfile.json"
df = pd.read_csv(filepath)
# Create a multiline json
json_list = json.loads(df.to_json(orient = "records"))
with open(output_path, 'w') as f:
for item in json_list:
f.write("%s\n" % item)
How about using Pandas to read the csv file into a DataFrame (pd.read_csv), then manipulating the columns if you want (dropping them or updating values) and finally converting the DataFrame back to JSON (pd.DataFrame.to_json).
Note: I haven't checked how efficient this will be but this is definitely one of the easiest ways to manipulate and convert a large csv to json.
As slight improvement to #MONTYHS answer, iterating through a tup of fieldnames:
import csv
import json
csvfilename = 'filename.csv'
jsonfilename = csvfilename.split('.')[0] + '.json'
csvfile = open(csvfilename, 'r')
jsonfile = open(jsonfilename, 'w')
reader = csv.DictReader(csvfile)
fieldnames = ('FirstName', 'LastName', 'IDNumber', 'Message')
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
json.dump(output, jsonfile, indent=2, sort_keys=True)
def read():
noOfElem = 200 # no of data you want to import
csv_file_name = "hashtag_donaldtrump.csv" # csv file name
json_file_name = "hashtag_donaldtrump.json" # json file name
with open(csv_file_name, mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
with open(json_file_name, 'w') as json_file:
i = 0
json_file.write("[")
for row in csv_reader:
i = i + 1
if i == noOfElem:
json_file.write("]")
return
json_file.write(json.dumps(row))
if i != noOfElem - 1:
json_file.write(",")
Change the above three parameter, everything will be done.
import csv
import json
csvfile = csv.DictReader('filename.csv', 'r'))
output =[]
for each in csvfile:
row ={}
row['FirstName'] = each['FirstName']
row['LastName'] = each['LastName']
row['IDNumber'] = each ['IDNumber']
row['Message'] = each['Message']
output.append(row)
json.dump(output,open('filename.json','w'),indent=4,sort_keys=False)

Categories

Resources