Convert csv to json multi-document? - python

I have below two requirement using Python:
Convert csv to multi-document json.
Ignore "" or null objects.
Have mentioned both code and csv. Currently I am getting only json array objects but I need to create in multi-document json.
my csv
_id,riderDetails.0.category,riderDetails.0.code,riderDetails.1.category,riderDetails.1.code
1111,re,remg,er,error
2111,we,were,ty,
code
import csv
import json
def make_record(row):
return {
"_id" : row["_id"],
"riderDetails" : [
{
"category" : row["riderDetails.0.category"],
"code" : row["riderDetails.0.code"],
},
{
"category" : row["riderDetails.1.category"] ,
"code" : row["riderDetails.1.code"],
}
]
}
with open('N:/Exide/Mongo/rr22.csv', 'r', newline='') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
with open('N:/Exide/Mongo/mm22.json', 'w') as jsonfile:
out = json.dumps([make_record(row) for row in reader])
jsonfile.write(out)
Code Output
[{
"_id": "1111",
"riderDetails": [
{
"category": "re",
"code": "remg"
},
{
"category": "er",
"code": "error"
}
]
},
{
"_id": "2111",
"riderDetails": [
{
"category": "we",
"code": "were"
},
{
"category": "",
"code": ""
}
]
}]
Expected Output
{
"_id": "1111",
"riderDetails": [
{
"category": "re",
"code": "remg"
},
{
"category": "er",
"code": "error"
}
]
}
{
"_id": "2111",
"riderDetails": [
{
"category": "we",
"code": "were"
}
]
}
Can someone help me in achieving expected output?

The data in the my csv file in your question doesn't produce the output shown, but that's probably due to a minor posting error, so I'll ignore it.
Also note that the file you are producing isn't a strictly valid JSON format file — perhaps that's what you meant by the term "multi-document json"…
Regardless, you accomplish what you need by modifying the make_record() function so it "cleans-up" the record and removes any empty/missing values before it returns it.
This is done in two steps.
First go through from each detail in riderDetails and remove any keys that have empty values.
Lastly, go though each riderDetails again and remove any details that are completely empty (because the first step removed all of it contents or none were provided in the csv file being read).
import csv
import json
csv_inp = 'rr22.csv'
json_outp = 'mm22.json'
def make_record(row):
# Reformat data is row.
record = {
"_id": row["_id"],
"riderDetails": [
{
"category": row["riderDetails.0.category"],
"code": row["riderDetails.0.code"],
},
{
"category": row["riderDetails.1.category"],
"code": row["riderDetails.1.code"],
}
]
}
# Remove empty values from each riderDetail.
record['riderDetails'] = [{key: value for key, value in riderDetail.items() if value}
for riderDetail in record['riderDetails']]
# Remove completely empty riderDetails.
record['riderDetails'] = [riderDetail for riderDetail in record['riderDetails']
if riderDetail]
return record
with open(csv_inp, 'r', newline='') as csvfile, \
open(json_outp, 'w') as jsonfile:
for row in csv.DictReader(csvfile, delimiter=','):
jsonfile.write(json.dumps(make_record(row), indent=4) + '\n')
# jsonfile.write(json.dumps(make_record(row)) + '\n')

using glob
import glob, os
pt = 'N:/Exide/Mongo/*.csv'
for file in glob.glob(pt):
get_name = file.split("/")[-1].replace(".csv",".json")
with open(file , 'r', newline='') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
out = [make_record(row) for row in reader]
saving_path = os.path.join('N:/Exide/Mongo/',get_name)
with open(saving_path , 'w') as jsonfile:
json.dump(out , jsonfile)
you get [{},{}] becuse you writing list of dictionary into file

Related

Convert parameter file to python dict or json and exclude certain parameters

I have a parameter file formatted like this
NAMES.DIRECTORY_PATH=(example1,example2)
SQLNET.ALLOWED_LOGON_VERSION_SERVER=example
SQLNET.ALLOWED_LOGON_VERSION_CLIENT=example
SQLNET.INBOUND_CONNECT_TIMEOUT=example
SQLNET.EXPIRE_TIME=example
SSL_CLIENT_AUTHENTICATION=example
SSL_VERSION=example
SQLNET.AUTHENTICATION_SERVICES=(example1)
SSL_CLIENT_AUTHENTICATION=example
WALLET_LOCATION=
(SOURCE=
(METHOD=FILE)
(METHOD_DATA=
(DIRECTORY=somedir)
I'm trying to load this file into python, change values for a certain key, and then save off the rest of the values.
Here's what I have so far...
# Open file to remove spaces from parameters,values
with open('tempfile', 'r') as f:
lines = f.readlines()
# remove spaces
lines = [line.replace(' ', '') for line in lines]
# finally, write lines in the file
with open('tempfile', 'w') as f:
f.writelines(lines)
# Change value in file, if found
d = {}
with open('tempfile') as f:
d = dict(x.rstrip().split('=', 1) for x in f)
print("dictionary before",d)
d['SQLNET.AUTHENTICATION_SERVICES'] = '(somevalue)'
print("dictionary after modification-", d)
If I have the ability to define the config file, I would likely use json.
config.json:
{
"NAMES": {
"DIRECTORY_PATH": ["example1", "example2"]
},
"SQLNET" : {
"ALLOWED_LOGON_VERSION_SERVER": "example",
"ALLOWED_LOGON_VERSION_CLIENT": "example",
"INBOUND_CONNECT_TIMEOUT": "example",
"EXPIRE_TIME": "example",
"AUTHENTICATION_SERVICES": ["example1"]
},
"SSL_CLIENT_AUTHENTICATION": "example",
"SSL_VERSION": "example",
"WALLET_LOCATION": {
"SOURCE": {"METHOD" : "FILE"},
"METHOD_DATA": {"DIRECTORY" : "somedir"}
}
}
This makes reading and manipulating the data fairly easy.
import json
with open("config.json", "r", encoding="utf-8") as file_in:
config = json.load(file_in)
config["SQLNET"]["AUTHENTICATION_SERVICES"] = ["somevalue"]
with open("config.new.json", "w", encoding="utf-8") as file_out:
json.dump(config, file_out, indent=4)
Of course you could write back to the original file if you wanted.
You should get a result like:
config.new.json:
{
"NAMES": {
"DIRECTORY_PATH": [
"example1",
"example2"
]
},
"SQLNET": {
"ALLOWED_LOGON_VERSION_SERVER": "example",
"ALLOWED_LOGON_VERSION_CLIENT": "example",
"INBOUND_CONNECT_TIMEOUT": "example",
"EXPIRE_TIME": "example",
"AUTHENTICATION_SERVICES": [
"somevalue"
]
},
"SSL_CLIENT_AUTHENTICATION": "example",
"SSL_VERSION": "example",
"WALLET_LOCATION": {
"SOURCE": {
"METHOD": "FILE"
},
"METHOD_DATA": {
"DIRECTORY": "somedir"
}
}
}

Json nested encryption value - Python

I have a json output file and I am trying to encrypt a value of key(name) in it using sha256 encryption method. Have two occurence of name in a list of dict but everytime when I write, the changes reflecting once. Can anybody tell me where I am missing?
Json structure:
Output.json
{
"site": [
{
"name": "google",
"description": "Hi I am google"
},
{
"name": "microsoft",
"description": "Hi, I am microsoft"
}
],
"veg": [
{
"status": "ok",
"slot": null
},
{
"status": "ok"
}
]
}
Code:
import hashlib
import json
class test():
def __init__(self):
def encrypt(self):
with open("Output.json", "r+") as json_file:
res = json.load(json_file)
for i in res['site']:
for key,val in i.iteritems():
if 'name' in key:
hs = hashlib.sha256(val.encode('utf-8')).hexdigest()
res['site'][0]['name'] = hs
json_file.seek(0)
json_file.write(json.dumps(res,indent=4))
json_file.truncate()
Current Output.json
{
"site": [
{
"name": "bbdefa2950f49882f295b1285d4fa9dec45fc4144bfb07ee6acc68762d12c2e3",
"description": "Hi I am google"
},
{
"name": "microsoft",
"description": "Hi, I am microsoft"
}
],
"veg": [
{
"status": "ok",
"slot": null
},
{
"status": "ok"
}
]
}
I think your problem is in this line:
res['site'][0]['name'] = hs
you are always changing the name field of the first map in the site list. I think you want this to be:
i['name'] = hs
so that you are updating the map you are currently working on (pointed to by i).
Instead of iterating over each item in the dictionary, you could make use of the fact that dictionaries are made for looking up values by key, and do this:
if 'name' in i:
val = i['name']
hs = hashlib.sha256(val.encode('utf-8')).hexdigest()
i['name'] = hs
json_file.seek(0)
json_file.write(json.dumps(res, indent=4))
json_file.truncate()
instead of this:
for key,val in i.iteritems():
if 'name' in key:
...
Also, iteritems() should be items(), and if 'name' in key should be if key == 'name', as key is a string. As it is, you'd be matching any entry with a key name containing the substring 'name'.
UPDATE: I noticed that you are writing the entire file multiple times, once for each name entry that you encrypt. Even without this I would recommend that you open the file twice...once for reading and once for writing. This is preferred over opening a file for both reading and writing, and having to seek and truncate. So, here are all of my suggested changes, along with a few other tweaks, in a full version of your code:
import hashlib
import json
class Test:
def encrypt(self, infile, outfile=None):
if outfile is None:
outfile = infile
with open(infile) as json_file:
res = json.load(json_file)
for i in res['site']:
if 'name' in i:
i['name'] = hashlib.sha256(i['name'].encode('utf-8')).hexdigest()
with open(outfile, "w") as json_file:
json.dump(res, json_file, indent=4)
Test().encrypt("/tmp/input.json", "/tmp/output.json")
# Test().encrypt("/tmp/Output.json") # <- this form will read and write to the same file
Resulting file contents:
{
"site": [
{
"name": "bbdefa2950f49882f295b1285d4fa9dec45fc4144bfb07ee6acc68762d12c2e3",
"description": "Hi I am google"
},
{
"name": "9fbf261b62c1d7c00db73afb81dd97fdf20b3442e36e338cb9359b856a03bdc8",
"description": "Hi, I am microsoft"
}
],
"veg": [
{
"status": "ok",
"slot": null
},
{
"status": "ok"
}
]
}

Convert CSV with nested headers to JSON

So far, I have this code (with help from a tutorial):
import csv, json
csvFilePath = "convertcsv.csv"
jsonFilePath = "newResult.json"
# Read the CSV and add the data to a dictionary...
data = {}
with open(csvFilePath) as csvFile:
csvReader = csv.DictReader(csvFile)
for csvRow in csvReader:
data = csvRow
# Write data to a JSON file...
with open(jsonFilePath, "w") as jsonFile:
jsonFile.write(json.dumps(data, indent=4))
My desired output is this:
{
"userID": "string",
"username": "string",
"age": "string",
"location": {
"streetName": "string",
"streetNo": "string",
"city": "string"
}
}
I don't know how to represent the "location".
My actual result is this:
{
"userID": "string",
"username": "string",
"age": "string",
"location/streetName": "string",
"location/streetNo": "string",
"location/city": "string",
}
How can I seperate streetName, streetNo and city and put them into "location"?
Below is a simple script should do what you want. The result will be a json object with the "userID" as keys. Note that, to test deeper nesting, I used a csv file with slightly different headers - but it will work just as well with your original example.
import csv, json
infile = 'convertcsv.csv'
outfile = 'newResult.json'
data = {}
def process(header, value, record):
key, other = header.partition('/')[::2]
if other:
process(other, value, record.setdefault(key, {}))
else:
record[key] = value
with open(infile) as stream:
reader = csv.DictReader(stream)
for row in reader:
data[row['userID']] = record = {}
for header, value in row.items():
process(header, value, record)
with open(outfile, "w") as stream:
json.dump(data, stream, indent=4)
INPUT:
userID,username,age,location/street/name,location/street/number,location/city
0,AAA,20,This Street,5,This City
1,BBB,42,That Street,5,That City
2,CCC,34,Other Street,5,Other City
OUTPUT:
{
"0": {
"userID": "0",
"username": "AAA",
"age": "20",
"location": {
"street": {
"name": "This Street",
"number": "5"
},
"city": "This City"
}
},
"1": {
"userID": "1",
"username": "BBB",
"age": "42",
"location": {
"street": {
"name": "That Street",
"number": "5"
},
"city": "That City"
}
},
"2": {
"userID": "2",
"username": "CCC",
"age": "34",
"location": {
"street": {
"name": "Other Street",
"number": "5"
},
"city": "Other City"
}
}
}
I'd add some custom logic to achieve this, note that this is for the first level only, if you want more, you should create a recoursive function:
# Write data to a JSON file...
with open(jsonFilePath, "w") as jsonFile:
for i, v in data.items():
if '/' in i:
parts = i.split('/', 1)
data[parts[0]] = {parts[1]: v}
data.pop(i)
jsonFile.write(json.dumps(data, indent=4))
You can use something like this:
# https://www.geeksforgeeks.org/convert-csv-to-json-using-python/
import csv
import json
# Function to convert a CSV to JSON
# Takes the file paths as arguments
def make_json(csvFilePath, jsonFilePath):
# create a dictionary
data = {}
# Open a csv reader called DictReader
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
# Convert each row into a dictionary
# and add it to data
for rows in csvReader:
# Assuming a column named 'No' to
# be the primary key
key = rows['No']
data[key] = rows
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
# Driver Code
# Decide the two file paths according to your
# computer system
csvFilePath = r'Names.csv'
jsonFilePath = r'Names.json'
# Call the make_json function
make_json(csvFilePath, jsonFilePath)
For more information check out https://www.geeksforgeeks.org/convert-csv-to-json-using-python/

Python TypeError: '_csv.writer' object is not iterable

I am parsing json to csv. But am getting error as below:
for i in data:
TypeError: '_csv.writer' object is not iterable
Code:
import json
import csv
with open("Data.json", 'r') as file:
data = json.load(file)
CSV_File = 'Data.csv'
with open(CSV_File, 'w') as file:
data = csv.writer(file)
data.writerow([])
for i in data:
data.writerow([])
Data
{
"id": "kljhfksdhkhd",
"name": "BOB",
"birthday": "08/03/1993",
"languages": [
{
"id": "106059522759137",
"name": "English language"
},
{
"id": "107617475934611",
"name": "Telugu language"
},
{
"id": "112969428713061",
"name": "Hindi"
},
{
"id": "343306413260",
"name": "Tamil language"
},
{
"id": "100904156616786",
"name": "Kannada"
}
],
"games": {
"data": [
{
"name": "Modern Combat",
"id": "12134323",
"created_time": "2019-02-21T18:39:41+0000"
},
{
"name": "Cards",
"id": "343232",
"created_time": "2011-06-01T11:13:31+0000"
},
{
"name": "Shuttle Badminton",
"id": "43214321",
"created_time": "2011-06-01T11:13:31+0000"
},
{
"name": "Carrom",
"id": "49y497",
"created_time": "2011-06-01T11:13:31+0000"
},
{
"name": "Chess",
"id": "0984080830",
"created_time": "2011-06-01T11:13:31+0000"
}
],
"paging": {
"cursors": {
"before": "dkkskd",
"after": "dlldlkd"
}
}
}
}
First off, the name data has been assigned to two different objects. Python permits this each assignment overwrites the previous. In the code, data is initially the data from the json file, then a csv.writer instance. A sensible improvement, therefore, is to name the writer writer, and change the code accordingly:
import json
import csv
with open("Data.json", 'r') as file:
data = json.load(file)
CSV_File = 'Data.csv'
with open(CSV_File, 'w') as file:
writer = csv.writer(file)
writer.writerow([])
for i in data:
writer.writerow([])
Now let's deal with how we are writing to the file. writer.writerow expects a list, but writing an empty list: writer.writerow([]) isn't very useful. Probably you want to write the json data to the csv file, so leet's get rid of the empty lists, and indent the writing loop so that it's inside the with block (otherwise the file will be closed).
import json
import csv
with open("Data.json", 'r') as file:
data = json.load(file)
CSV_File = 'Data.csv'
with open(CSV_File, 'w') as file:
writer = csv.writer(file)
for row in data:
writer.writerow(row)
This will work if the json data is a list of lists, that is it looks like this:
[[...], [...], [...], ...]
because each element of the outer list is a list, so iterating over it (for row in data:) yields a list, which writer.writerow can handle. However it's not uncommon for json data to be in the form of a dictionary:
{"k1": [....], "k2": [...], "k3": [...], ...}
In this case, you might want to iterate over the dictionary's values, if they are list:
for row in data.values():
writer.writerow(row)
Finally, the json may be an irregular mix of lists and dictionaries, and may be arbitrarily nested. It's up to you to determine how to map nested json data to the flat csv format.

Converting a Text file to JSON format using Python

I am not new to programming but not good at python data structures. I would like to know a way to convert a text file into JSON format using python since I heard using python the task is much easier with a module called import.json.
The file looks like
Source Target Value
B cells Streptococcus pneumoniae 226
B cells Candida albicans 136
B cells Mycoplasma 120
For the first line "B cells" is the source, target is the "Streptococcus pneumoniae" and value is "226". I just started with the code, but couldnot finish it. Please help
import json
prot2_names = {}
tmpfil = open("file.txt", "r");
for lin in tmpfil.readlines():
flds = lin.rstrip().split("\t")
prot2_names[flds[0]] = "\"" + flds[1] + "\""
print prot2_names+"\t",
tmpfil.close()
Wants the output to be like
{
"nodes": [
{
"name": "B cells"
},
{
"name": "Streptococcus pneumoniae"
},
{
"name": "Candida albicans"
},
{
"name": "Mycoplasma"
},
{
"links": [
{
"source": 0,
"target": 1,
"value": "226"
},
{
"source": 0,
"target": 2,
"value": "136"
},
{
"source": 0,
"target": 3,
"value": "120"
}
]
}
You can read it as a csv file and convert it into json. But, be careful with spaces as you've used it as separator, the values with spaces should be carefully handled. Otherwise, if possible make the separator , instead of space.
the working code for what you're trying,
import csv
import json
with open('file.txt', 'rb') as csvfile:
filereader = csv.reader(csvfile, delimiter=' ')
i = 0
header = []
out_data = []
for row in filereader:
row = [elem for elem in row if elem]
if i == 0:
i += 1
header = row
else:
row[0:2] = [row[0]+" "+row[1]]
_dict = {}
for elem, header_elem in zip(row, header):
_dict[header_elem] = elem
out_data.append(_dict)
print json.dumps(out_data)
output,
[
{
"Source":"B cells",
"Target":"Streptococcus",
"Value":"pneumoniae"
},
{
"Source":"B cells",
"Target":"Candida",
"Value":"albicans"
},
{
"Source":"B cells",
"Target":"Mycoplasma",
"Value":"120"
},
{
"Source":"B cells",
"Target":"Neisseria",
"Value":"111"
},
{
"Source":"B cells",
"Target":"Pseudomonas",
"Value":"aeruginosa"
}
]
UPDATE: Just noticed your updated question with json sample that you require. Hope, you could build it with the above example I've written.

Categories

Resources