Python Json to csv, Extract the specified keys,KeyError: - python

I need to convert json to csv, I just want to extract some keys in the file, but some keys do not exist in the json file, I hope it can automatically fill in these non-existent keys
import csv
import json
import sys
import codecs
def trans(path):
jsonData = codecs.open('C:/Users/jeri/Desktop/1.json', 'r', 'utf-8')
# csvfile = open(path + '.csv', 'w')
# csvfile = open(path + '.csv', 'wb')
csvfile = open('C:/Users/jeri/Desktop/1.csv', 'w', newline='', encoding='utf-8')
writer = csv.writer(csvfile, delimiter=',')
keys = ['dob','firstname','lastname']
writer.writerow(keys)
for line in jsonData:
dic = json.loads(line)
writer.writerow([dic['dob'],dic['firstname'],dic['lastname'],])
jsonData.close()
csvfile.close()
if __name__ == '__main__':
path = str(sys.argv[0])
print(path)
trans(path)
Console prompt::
Traceback (most recent call last):
File "C:\Users\jeri\PycharmProjects\pythonProject9\main.py", line 25, in <module>
trans(path)
File "C:\Users\jeri\PycharmProjects\pythonProject9\main.py", line 17, in trans
writer.writerow([dic['dob'],dic['firstname'],dic['lastname'],])
KeyError: 'dob'

If the key 'dob' might be missing, instead of dic['dob'], do dic.get('dob', None). That provides the default you want.

I think this would solve your problem.
(I defined a function to test the existence of each item in json, if exists it return the value and if it doesn't exists it returns 'N/A')
def getValue(dic, item):
try:
return dic[item]
except:
return 'N/A'
for line in jsonData:
dic = json.loads(line)
writer.writerow([getValue(dic, 'dob'), getValue(dic, 'firstname'), getValue(dic, 'lastname'),])

you can transform your for loop into something like this.
for line in jsonData:
dic = json.loads(line)
dob = dic['dob'] if "dob" in dic else None
firstname = dic['firstname'] if "firstname" in dic else None
lastname = dic['lastname'] if "lastname" in dic else None
writer.writerow([dob,firstname,lastname])

Related

python nested json to csv Incomplete conversion

this is my json file
{"_index":"core-bvd-locations","_type":"_doc","_id":"75b82cba4a80784f4fa36d14c86f6d85","_score":1,"_source":{"a_id":"FR518077177","a_id_type":"BVD ID","a_name":"Moisan Patrick Roger","a_name_normal":"MOISAN PATRICK ROGER","a_country_code":"FR","a_country":"France","a_in_compliance_db":false,"a_nationality":"FR","a_street_address":"Les Carmes","a_city":"Decize","a_postcode":"58300","a_region":"Bourgogne-Franche-Comte|Nievre","a_phone":"+33 603740000","a_latitude":46.79402777777778,"a_longitude":3.496277777777778,"a_national_ids":{"European VAT number":["FR58 518077177"],"SIREN number":["518077177"],"TIN":["518077177"],"SIRET number":["518077177-00013"]},"relationship":"Location info","file_name":"/media/hedwig/iforce/data/BvD/s3-transfer/SuperTable_v3_json/locations/part-00021-1f62c713-17a0-410d-9b18-32328d9836d6-c000.json","a_geo_point":{"lat":46.79402777777778,"lon":3.496277777777778}}}
this is my code
import csv
import json
import sys
import codecs
def trans(path):
jsonData = codecs.open('F:\\1.json', 'r', 'utf-8')
# csvfile = open(path+'.csv', 'w') #
# csvfile = open(path+'.csv', 'wb') # python2
csvfile = open('F:\\1.csv', 'w', encoding='utf-8', newline='') #
writer = csv.writer(csvfile, delimiter=',')
flag = True
for line in jsonData:
dic = json.loads(line)
if flag:
keys = list(dic.keys())
print(keys)
writer.writerow(keys)
flag = False
writer.writerow(list(dic.values()))
jsonData.close()
csvfile.close()
if __name__ == '__main__':
path=str(sys.argv[0]) #
print(path)
trans(path)
C:\Users\jeri\PycharmProjects\pythonProject9\venv\Scripts\python.exe C:\Users\jeri\PycharmProjects\pythonProject9\zwc_count_file.py
C:\Users\jeri\PycharmProjects\pythonProject9\zwc_count_file.py
['_index', '_type', '_id', '_score', '_source']
Process finished with exit code 0
output jie
enter image description here
Information in nested json file cannot be parse, how can i modify the code
import json
import pandas as pd
file_data = open("json_filname.json",'r').read()
data= json.loads(file_data)
df = pd.json_normalize(data)
df
json.load(): json.load() accepts file object, parses the JSON data, populates a Python dictionary with the data and returns it back to you.
import json
# Opening JSON file
f = open('data.json')
# returns JSON object as
# a dictionary
data = json.load(f)
writer.writerow write the entire row, rigth sintaxis
writer.writerow(#iterable_object#)

Search for list values in a csv file python

There is a csv file, there is a unique id column and a column with a date. The task of this section of the code is that you need to scan the lines using the id key, find the id in the line, write the line to a new file called id. Faced with the problem that the interpreter returns an error by id. Although everything is logical and correct. Where did I go wrong?
id, Numder, Date
123456, 89654535556, 25.11.2021 15:35:00
321654, 96554412255, 23.11.2021 18:50:00
524163, 38095224444, 18.11.2021 13:30:00
from csv import DictReader
from csv import DictWriter
from os.path import isfile
def export_csv(user_id, master_csv, fieldnames, key_id, extension=".csv"):
filename = user_id + extension
file_exists = isfile(filename)
with open(file=master_csv) as in_file, open(
file=filename, mode="a", newline=""
) as out_file:
# Create reading and writing objects
csv_reader = DictReader(in_file)
csv_writer = DictWriter(out_file, fieldnames=fieldnames)
# Only write header once
if not file_exists:
csv_writer.writeheader()
# Go through lines and match ids
for line in csv_reader:
if line[key_id] == user_id:
# Modify line and append to file
line = {k: v.strip() for k, v in line.items() if k in fieldnames}
csv_writer.writerow(line)
export_csv(
user_id="512863",
master_csv="master.csv",
fieldnames=["Number", "Date"],
key_id="id")
Traceback (most recent call last):
File "C:/Users/sedei/PycharmProjects/convector/1.py", line 58, in
export_csv(
File "C:/Users/sedei/PycharmProjects/convector/1.py", line 52, in export_csv
if line[key_id] == user_id:
KeyError: 'id'

How to Convert Multiple Text Files Into Multiple json Files

I have multiple text files which I need to convert to json files. For each text file I want an individual json file.
Text file content
File-1.txt
['education~25,850,103,23', 'experience~28,94,107,27', 'skills~29,904,59,27']
File-2.txt
['introduction~211,143,87,13', 'education~169,302,131,17', 'skills~322,421,84,15', 'experience~325,142,112,14', 'reference~320,699,68,14']
and so on ...
The expected output is a json file which contains:
Keyword(Class name)
Values(Coordinates)
This is what I tried with this code I was able to write data into txt--
with open(PATH_TO_RESULTS + '/' + os.path.join(os.path.basename(os.path.dirname(image_path))) + '.txt', 'w') as f:
image_name = os.path.splitext(os.path.basename(image_path))[0]
# f.write((image_path + '|'))
req_fields = []
for key, value in field_item.items():
#print("=====================")
# print(key)
# print((scores[0, index]))
# print(value)
# print("==================")
merge = str(key.decode('utf-8')) + '~' + str(value)
req_fields.append(merge)
f.write(str(req_fields))
print("#######################Required Fields###########################",req_fields)
And one more thing, the json file name should also be the same as txt file name.
I think that it's what you need. Or at least so close.
You can improve and adapt it(Best naming)
import glob, os
import json
os.chdir(".")
def read_file(file):
with open(file, 'r') as file:
return file.read()
def write_json(file, data):
with open(file, 'w') as fout:
json.dump(data, fout, indent=4)
for file in glob.glob("*.txt"):
content = read_file(file)
to_parse_in_rows = content.replace('[', '').replace(']', '').split(', ')
rows = []
for part in to_parse_in_rows:
field12, field3, field4, field5 = part.replace("'", '').split(',')
field1, field2 = field12.split('~')
row = {
'class': field1,
'field2': int(field2),
'field3': int(field3),
'field4': int(field4),
'field5': int(field5)
}
rows.append(row)
write_json(file.replace('.txt', '.json'), rows)

How to open a csv file for reading purpose using mmap in python?

I want to open csv file for reading purpose. But I'm facing some exceptions regarding to that.
I'm using Python 2.7.
main.python-
if __name__ == "__main__":
f = open('input.csv','r+b')
m = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
reader = csv.DictReader(iter(m.readline, ""))
for read in reader:
num = read['time']
print num
output-
Traceback (most recent call last):
File "/home/PycharmProjects/time_gap_Task/main.py", line 22, in <module>
for read in reader:
File "/usr/lib/python3.4/csv.py", line 109, in __next__
self.fieldnames
File "/usr/lib/python3.4/csv.py", line 96, in fieldnames
self._fieldnames = next(self.reader)
_csv.Error: iterator should return strings, not bytes (did you open the file in text mode?)
How to resolve this error? and how to open csv file using mmap and csv in good manner so code is working perfect?
I know you asked this a while ago, but I actually created a module for myself that does this, because I do a lot of work with large CSV files, and sometimes I need to convert them into dictionaries, based on a key. Below is the code I've been using. Please feel free to modify as needed.
def MmapCsvFileIntoDict(csvFilePath, skipHeader = True, transform = lambda row: row, keySelector = lambda o: o):
"""
Takes a CSV file path and uses mmap to open the file and return a dictionary of the contents keyed
on the results of the keySelector. The default key is the transformed object itself. Mmap is used because it is
a more efficient way to process large files.
The transform method is used to convert the line (converted into a list) into something else. Hence 'transform'.
If you don't pass it in, the transform returns the list itself.
"""
contents = {}
firstline = False
try:
with open(csvFilePath, "r+b") as f:
# memory-map the file, size 0 means whole file
mm = mmap.mmap(f.fileno(), 0)
for line in iter(mm.readline, b''):
if firstline == False:
firstline = True
if skipHeader == True:
continue
row = ''
line = line.decode('utf-8')
line = line.strip()
row = next(csv.reader([line]), '')
if transform != None and callable(transform):
if row == None or row == '':
continue
value = transform(row)
else:
value = row
if callable(keySelector):
key = keySelector(value)
else:
key = keySelector
contents[key] = value
except IOError as ie:
PrintWithTs('Error decomposing the companies: {0}'.format(ie))
return {}
except:
raise
return contents
When you call this method, you have some options.
Assume you have a file that looks like:
Id, Name, PhoneNumber
1, Joe, 7175551212
2, Mary, 4125551212
3, Vince, 2155551212
4, Jane, 8145551212
The easiest way to call it is like this:
dict = MmapCsvFileIntoDict('/path/to/file.csv', keySelector = lambda row: row[0])
What you get back is a dict looking like this:
{ '1' : ['1', 'Joe', '7175551212'], '2' : ['2', 'Mary', '4125551212'] ...
One thing I like to do is create a class or a namedtuple to represent my data:
class CsvData:
def __init__(self, row):
self.Id = int(row[0])
self.Name = row[1].upper()
self.Phone = int(row[2])
And then when I call the method, I pass in a second lambda to transform each row in the file to an object I can work with:
dict = MmapCsvFileIntoDict('/path/to/file.csv', transform = lambda row: CsvData(row), keySelector = lambda o: o.Id)
What I get back that time looks like:
{ 1 : <object instance>, 2 : <object instance>...
I hope this helps! Best of luck
When open a file with the flag b like this:
f = open('input.csv','r+b')
You read the file as bytes and not as string.
So, try to change the flags to r:
f = open('input.csv','r')
if you just want to read data with specific columnes from csv file, just try:
import csv
with open('input.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
print row['time']

How to convert CSV file to multiline JSON?

Here's my code, really simple stuff...
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
jsonfile.write(out)
Declare some field names, the reader uses CSV to read the file, and the filed names to dump the file to a JSON format. Here's the problem...
Each record in the CSV file is on a different row. I want the JSON output to be the same way. The problem is it dumps it all on one giant, long line.
I've tried using something like for line in csvfile: and then running my code below that with reader = csv.DictReader( line, fieldnames) which loops through each line, but it does the entire file on one line, then loops through the entire file on another line... continues until it runs out of lines.
Any suggestions for correcting this?
Edit: To clarify, currently I have: (every record on line 1)
[{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"},{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}]
What I'm looking for: (2 records on 2 lines)
{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"}
{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}
Not each individual field indented/on a separate line, but each record on it's own line.
Some sample input.
"John","Doe","001","Message1"
"George","Washington","002","Message2"
The problem with your desired output is that it is not valid json document,; it's a stream of json documents!
That's okay, if its what you need, but that means that for each document you want in your output, you'll have to call json.dumps.
Since the newline you want separating your documents is not contained in those documents, you're on the hook for supplying it yourself. So we just need to pull the loop out of the call to json.dump and interpose newlines for each document written.
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
You can use Pandas DataFrame to achieve this, with the following Example:
import pandas as pd
csv_file = pd.DataFrame(pd.read_csv("path/to/file.csv", sep = ",", header = 0, index_col = False))
csv_file.to_json("/path/to/new/file.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)
import csv
import json
file = 'csv_file_name.csv'
json_file = 'output_file_name.json'
#Read CSV File
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Convert csv data into json
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) #for pretty
f.write(json.dumps(data))
read_CSV(file,json_file)
Documentation of json.dumps()
I took #SingleNegationElimination's response and simplified it into a three-liner that can be used in a pipeline:
import csv
import json
import sys
for row in csv.DictReader(sys.stdin):
json.dump(row, sys.stdout)
sys.stdout.write('\n')
You can try this
import csvmapper
# how does the object look
mapper = csvmapper.DictMapper([
[
{ 'name' : 'FirstName'},
{ 'name' : 'LastName' },
{ 'name' : 'IDNumber', 'type':'int' },
{ 'name' : 'Messages' }
]
])
# parser instance
parser = csvmapper.CSVParser('sample.csv', mapper)
# conversion service
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
Edit:
Simpler approach
import csvmapper
fields = ('FirstName', 'LastName', 'IDNumber', 'Messages')
parser = CSVParser('sample.csv', csvmapper.FieldMapper(fields))
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
I see this is old but I needed the code from SingleNegationElimination however I had issue with the data containing non utf-8 characters. These appeared in fields I was not overly concerned with so I chose to ignore them. However that took some effort. I am new to python so with some trial and error I got it to work. The code is a copy of SingleNegationElimination with the extra handling of utf-8. I tried to do it with https://docs.python.org/2.7/library/csv.html but in the end gave up. The below code worked.
import csv, json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("Scope","Comment","OOS Code","In RMF","Code","Status","Name","Sub Code","CAT","LOB","Description","Owner","Manager","Platform Owner")
reader = csv.DictReader(csvfile , fieldnames)
code = ''
for row in reader:
try:
print('+' + row['Code'])
for key in row:
row[key] = row[key].decode('utf-8', 'ignore').encode('utf-8')
json.dump(row, jsonfile)
jsonfile.write('\n')
except:
print('-' + row['Code'])
raise
Add the indent parameter to json.dumps
data = {'this': ['has', 'some', 'things'],
'in': {'it': 'with', 'some': 'more'}}
print(json.dumps(data, indent=4))
Also note that, you can simply use json.dump with the open jsonfile:
json.dump(data, jsonfile)
Use pandas and the json library:
import pandas as pd
import json
filepath = "inputfile.csv"
output_path = "outputfile.json"
df = pd.read_csv(filepath)
# Create a multiline json
json_list = json.loads(df.to_json(orient = "records"))
with open(output_path, 'w') as f:
for item in json_list:
f.write("%s\n" % item)
How about using Pandas to read the csv file into a DataFrame (pd.read_csv), then manipulating the columns if you want (dropping them or updating values) and finally converting the DataFrame back to JSON (pd.DataFrame.to_json).
Note: I haven't checked how efficient this will be but this is definitely one of the easiest ways to manipulate and convert a large csv to json.
As slight improvement to #MONTYHS answer, iterating through a tup of fieldnames:
import csv
import json
csvfilename = 'filename.csv'
jsonfilename = csvfilename.split('.')[0] + '.json'
csvfile = open(csvfilename, 'r')
jsonfile = open(jsonfilename, 'w')
reader = csv.DictReader(csvfile)
fieldnames = ('FirstName', 'LastName', 'IDNumber', 'Message')
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
json.dump(output, jsonfile, indent=2, sort_keys=True)
def read():
noOfElem = 200 # no of data you want to import
csv_file_name = "hashtag_donaldtrump.csv" # csv file name
json_file_name = "hashtag_donaldtrump.json" # json file name
with open(csv_file_name, mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
with open(json_file_name, 'w') as json_file:
i = 0
json_file.write("[")
for row in csv_reader:
i = i + 1
if i == noOfElem:
json_file.write("]")
return
json_file.write(json.dumps(row))
if i != noOfElem - 1:
json_file.write(",")
Change the above three parameter, everything will be done.
import csv
import json
csvfile = csv.DictReader('filename.csv', 'r'))
output =[]
for each in csvfile:
row ={}
row['FirstName'] = each['FirstName']
row['LastName'] = each['LastName']
row['IDNumber'] = each ['IDNumber']
row['Message'] = each['Message']
output.append(row)
json.dump(output,open('filename.json','w'),indent=4,sort_keys=False)

Categories

Resources