I'm trying to parse JSON files into CSV. I've been able to get the headers of the JSON file to be output into the CSV but I can't figure out how to get the data into the file.
# Python program to convert
# JSON file to CSV
import json
import csv
# Opening JSON file and loading the data
# into the variable data
with open('test1.json') as json_file:
data = json.load(json_file)
for i in range(len(data)):
training_data = data[i]['profile']
# now we will open a file for writing
data_file = open('data_file.csv', 'w')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
#type(training_data)
for profile in training_data:
if count == 0:
header = training_data.keys()
csv_writer.writerow(header)
count += 1
csv_writer.writerow(training_data.values())
data_file.close()
This is the file im trying to parse
https://textdoc.co/OuphoV5saiwWYS8g
If someone could help me out I'd be eternally grateful
would something like this work for you?
import pandas as pd
df = pd.read_json(json_file)
df.to_csv('data_file.csv')
or for more complex nested json, you may have to load as a dictionary and manipulate:
data = json.loads(json_str)
data_transformed = [i for i in data['data']]
df = pd.DataFrame(data_transformed )
df.to_csv('data_file.csv')
Related
I am trying to convert below JSON format text to pandas or spark data frame, but it is giving below error.
ERROR: JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
Python CODE:
# import pandas to read json file
import json
path = "sample.json"
with open(path, 'r') as myfile:
data=myfile.read()
data = data.replace('\t','')
data = data.replace('\n','')
data = data.replace(',}','}')
data = data.replace(',]',']')
obj = json.loads(data)
JSON file format
Output of data after reading .json file by using open function
How can I convert above text as a data frame?
I got, I added few lines of code
path = "sample.json"
with open(path, 'r') as myfile:
data=myfile.read()
data = data.replace('\t','')
data = data.replace('\n','')
data = data.replace(',}','}')
data = data.replace(',]',']')
data = data.replace("null", "''")
liss = []
data1 = data[1:-1]
data2 = data1.split("},")
for i in data2:
last_value = i[len(i)-1]
if last_value != "}":
new_text = i+"}"
liss.append(new_text)
else:
new_text = i
liss.append(new_text)
sample_df = pd.DataFrame({"Col1":liss})
sample_df["Col1"] = sample_df["Col1"].apply(lambda x : dict(eval(x)) )
df3 = sample_df["Col1"].apply(pd.Series )
df3
I think you can read the json and save it in a dictionary.
Once you have this dictionary you can create a spark dataframe with the following line of code
df = spark.createDataFrame(dict)
I'm attempting to convert yelps data set that is in JSON to a csv format. The new csv file that is created is empty.
I've tried different ways to iterate through the JSON but they all give me a zero bytes file.
The json file looks like this:
{"business_id":"1SWheh84yJXfytovILXOAQ","name":"Arizona Biltmore Golf Club","address":"2818 E Camino Acequia Drive","city":"Phoenix","state":"AZ","postal_code":"85016","latitude":33.5221425,"longitude":-112.0184807,"stars":3.0,"review_count":5,"is_open":0,"attributes":{"GoodForKids":"False"},"categories":"Golf, Active Life","hours":null}
import json
import csv
infile = open("business.json","r")
outfile = open("business2.csv","w")
data = json.load(infile)
infile.close()
out = csv.writer(outfile)
out.writerow(data[0].keys())
for row in data:
out.writerow(row.values())
I get an "extra data" message when the code runs. The new business2 csv file is empty and the size is zero bytes.
if you JSON has only one row.. then try this
infile = open("business.json","r")
outfile = open("business2.csv","w")
data = json.load(infile)
infile.close()
out = csv.writer(outfile)
#print(data.keys())
out.writerow(data.keys())
out.writerow(data.values())
Hi Please try the below code, by using with command the file access will automatically get closed when the control moves out of scope of with
infile = open("business.json","r")
outfile = open("business2.csv","w")
data = json.load(infile)
infile.close()
headers = list(data.keys())
values = list(data.values())
with open("business2.csv","w") as outfile:
out = csv.writer(outfile)
out.writerow(headers)
out.writerow(values)
You need to use with to close file.
import json
import csv
infile = open("business.json","r")
data = json.load(infile)
infile.close()
with open("business2.csv","w") as outfile:
out = csv.writer(outfile)
out.writerow(list(data.keys()))
out.writerow(list(data.values()))
The process I am following is -
Converting Avro to JSON
Then converting JSON to CSV
Is there any direct way to convert the Avro file to CSV?
This is how I converted an avro file to csv:
from fastavro import reader
import csv
head = True
count = 0
f = csv.writer(open("test.csv", "w+"))
with open('abc.avro', 'rb') as fo:
avro_reader = reader(fo)
for emp in avro_reader:
#print(emp)
if head == True:
header = emp.keys()
f.writerow(header)
head = False
count += 1
f.writerow(emp.values())
print(count)
Below, is the json structure I am pulling from my online weather station. I am also including a json_to_csv python script that is supposed to convert json data to csv output, but only returns a "Key" error. I want to pull data from "current_observation": only.
{
"response": {
"features": {
"conditions": 1
}
}
, "current_observation": {
"display_location": {
"latitude":"40.466442",
"longitude":"-85.362709",
"elevation":"280.4"
},
"observation_time_rfc822":"Fri, 26 Jan 2018 09:40:16 -0500",
"local_time_rfc822":"Sun, 28 Jan 2018 11:22:47 -0500",
"local_epoch":"1517156567",
"local_tz_short":"EST",
"weather":"Clear",
"temperature_string":"44.6 F (7.0 C)",
}
}
import csv, json, sys
inputFile = open("pywu.cache.json", 'r') #open json file
outputFile = open("CurrentObs.csv", 'w') #load csv file
data = json.load(inputFile) #load json content
inputFile.close() #close the input file
output = csv.writer(outputFile) #create a csv.write
output.writerow(data[0].keys())
for row in data:
output = csv.writer(outputFile) #create a csv.write
output.writerow(data[0].keys())
for row in data:
output.writerow(row.values()) #values row
What's the best method to retrieve the temperature string and convert to .csv format? Thank you!
import pandas as pd
df = pd.read_json("pywu.cache.json")
df = df.loc[["local_time_rfc822", "weather", "temperature_string"],"current_observation"].T
df.to_csv("pywu.cache.csv")
maybe pandas can be of help for you. the .read_json() function creates a nice dataframe, from which you can easily choose the desired rows and columns. and it can save as csv as well.
to add latitude and longitude to the csv-line, you can do this:
df = pd.read_json("pywu.cache.csv")
df = df.loc[["local_time_rfc822", "weather", "temperature_string", "display_location"],"current_observation"].T
df = df.append(pd.Series([df["display_location"]["latitude"], df["display_location"]["longitude"]], index=["latitude", "longitude"]))
df = df.drop("display_location")
df.to_csv("pywu.cache.csv")
to print the location in numeric values, you can do this:
df = pd.to_numeric(df, errors="ignore")
print(df['latitude'], df['longitude'])
This will find all keys (e.g. "temperature_string") specified inside of the json blob and then write them to a csv file. You can modify this code to get multiple keys.
import csv, json, sys
def find_deep_value(d, key):
# Find a the value of keys hidden within a dict[dict[...]]
# Modified from https://stackoverflow.com/questions/9807634/find-all-occurrences-of-a-key-in-nested-python-dictionaries-and-lists
# #param d dictionary to search through
# #param key to find
if key in d:
yield d[key]
for k in d.keys():
if isinstance(d[k], dict):
for j in find_deep_value(d[k], key):
yield j
inputFile = open("pywu.cache.json", 'r') # open json file
outputFile = open("mypws.csv", 'w') # load csv file
data = json.load(inputFile) # load json content
inputFile.close() # close the input file
output = csv.writer(outputFile) # create a csv.write
# Gives you a list of temperature_strings from within the json
temps = list(find_deep_value(data, "temperature_string"))
output.writerow(temps)
outputFile.close()
I wanted to edit a csv file which reads the value from one of my another json file in python 2.7
my csv is : a.csv
a,b,c,d
,10,12,14
,11,14,15
my json file is a.json
{"a":20}
i want my where the column 'a' will try to match in json file. if their is a match. it should copy that value from json and paste it to my csv file and the final output of my csv file should be looks like this.
a,b,c,d
20,10,12,14
20,11,14,15
Till now I what I have tried is
fileCSV = open('a.csv', 'a')
fileJSON = open('a.json', 'r')
jsonData = fileJSON.json()
for k in range(jsonData):
for i in csvRow:
for j in jsonData.keys():
if i == j:
if self.count == 0:
self.data = jsonData[j]
self.count = 1
else:
self.data = self.data + "," + jsonData[j]
self.count = 0
fileCSV.write(self.data)
fileCSV.write("\n")
k += 1
fileCSV.close()
print("File created successfully")
I will be really thankful if anyone can help me for this.
please ignore any syntactical and indentation error.
Thank You.
Some basic string parsing will get you here.. I wrote a script which works for the simple scenario which you refer to.
check if this solves your problem:
import json
from collections import OrderedDict
def list_to_csv(listdat):
csv = ""
for val in listdat:
csv = csv+","+str(val)
return csv[1:]
lines = []
csvfile = "csvfile.csv"
outcsvfile = "outcsvfile.csv"
jsonfile = "jsonfile.json"
with open(csvfile, encoding='UTF-8') as a_file:
for line in a_file:
lines.append(line.strip())
columns = lines[0].split(",")
data = lines[1:]
whole_data = []
for row in data:
fields = row.split(",")
i = 0
rowData = OrderedDict()
for column in columns:
rowData[columns[i]] = fields[i]
i += 1
whole_data.append(rowData)
with open(jsonfile) as json_file:
jsondata = json.load(json_file)
keys = list(jsondata.keys())
for key in keys:
value = jsondata[key]
for each_row in whole_data:
each_row[key] = value
with open(outcsvfile, mode='w', encoding='UTF-8') as b_file:
b_file.write(list_to_csv(columns)+'\n')
for row_data in whole_data:
row_list = []
for ecolumn in columns:
row_list.append(row_data.get(ecolumn))
b_file.write(list_to_csv(row_list)+'\n')
CSV output is not written to the source file but to a different file.
The output file is also always truncated and written, hence the 'w' mode.
I would recommend using csv.DictReader and csv.DictWriter classes which will read into and out of python dicts. This would make it easier to modify the dict values that you read in from the JSON file.