Converting two CSV's into one json

Converting two CSV's into one json - python

This code produces the records in terminal but when I open the json it's blank. Can someone help me?
import csv
import json
refcsvfile = open('referralsource.csv', 'r')
jsonfile = open('redrock.json', 'w')
concsvfile = open('contacts.csv', 'r')
reffieldnames = ("ReferralSource_Name","OrganizationType","PrimaryRelationshipManager","ReferralSourceStatus","RSContactSourceType"
)
confieldnames = ("ReferralSource_Name","OrganizationName","IsOrganizationContact","FirstName","Middle","LastName","Role","Line1","City","State","Zip","Phone","PhoneType","PhonePrimary","OkToLeaveVM","PhoneActive","E-mail","OkToSendEmail","ContactPrimaryRelationshipManager","IsPrimaryContact","ContactSourceType"
)
refreader = csv.DictReader( refcsvfile, reffieldnames)
conreader = csv.DictReader( concsvfile, confieldnames)
output=[];
refcount=0
for refrow in refreader:
refrow['ReferralSourceContacts'] = []
output.append(refrow)
concsvfile.seek(0)
for conrow in conreader:
if(conrow["ReferralSource_Name"]==refrow["ReferralSource_Name"]):
refrow['ReferralSourceContacts'].append(conrow)
output.append(refrow)
refcount = refcount +1
print(output)
json.dump(output, jsonfile,sort_keys=True)
I am wanting the json to appear something like:
{
"ReferralSource_Name": "Demo Facility",
"OrganizationType": "Hospital",
"RSContactSourceType": "DirectInboundTelephone",
"ReferralSourceStatus": "Active",
"PrimaryRelationshipManager": "John Doe",
},
"ReferralSourceContacts": [
{
"IsOrganizationContact": true,
"OrganizationName": "Demo Facility",
"FirstName": "John",
"LastName": "Smith",
"Role": "Doctor",
"Line1": "123 abc Street",
"Zip": "44720",
"City": "Canton",
"State": "OH",
"Phone": "555-555-555",
"PhoneType": "Office",
"PhonePrimary": "True",
"PhoneActive": "True",
"Email": "doc#doc.doc",
"OkToLeaveVm": true,
"OkToSendEmail": true,
"ContactSourceType": "DirectInboundTelephone"
"ContactPrimaryRelationshipManager": "John Doe"
}
"IsOrganizationContact": true,
"OrganizationName": "Test Facility",
"FirstName": "Jane",
"LastName": "Smith",
"Role": "Doctor",
"Line1": "123 abc Street",
"Zip": "44720",
"City": "Canton",
"State": "OH",
"Phone": "555-555-555",
"PhoneType": "Office",
"PhonePrimary": "True",
"PhoneActive": "True",
"Email": "doc2#doc.doc",
"OkToLeaveVm": true,
"OkToSendEmail": true,
"ContactSourceType": "DirectInboundTelephone"
"ContactPrimaryRelationshipManager": "John Doe"
}
]
Basically I have a file for the parent entity referral source (think of as Companies), and another csv for the contacts (Think of them as the people at the companies). I need these two combined into the mentioned JSON for an import.

Something like this oughta work...
Use with to manage files.
Assuming the contacts list isn't huge, it's much better to load it into memory instead of seeking the file to the start on every row.
Even better, since it's now in memory, we can use a collections.defaultdict to pre-group by the referrer field, making creating the output a simple dict lookup.
import csv
import json
import collections
reffieldnames = (
"ReferralSource_Name",
"OrganizationType",
"PrimaryRelationshipManager",
"ReferralSourceStatus",
"RSContactSourceType",
)
confieldnames = (
"ReferralSource_Name",
"OrganizationName",
"IsOrganizationContact",
"FirstName",
"Middle",
"LastName",
"Role",
"Line1",
"City",
"State",
"Zip",
"Phone",
"PhoneType",
"PhonePrimary",
"OkToLeaveVM",
"PhoneActive",
"E-mail",
"OkToSendEmail",
"ContactPrimaryRelationshipManager",
"IsPrimaryContact",
"ContactSourceType",
)
# Read contacts into memory to avoid slow seek/re-read
with open("contacts.csv", "r") as concsvfile:
conreader = csv.DictReader(concsvfile, confieldnames)
con_rows = list(conreader)
# Group contacts by referrer for faster lookup
con_map = collections.defaultdict(list)
for con_row in con_rows:
con_map[con_row["ReferralSource_Name"]].append(con_row)
with open("referralsource.csv", "r") as refcsvfile:
output = []
for refrow in csv.DictReader(refcsvfile, reffieldnames):
refrow["ReferralSourceContacts"] = con_map[refrow["ReferralSource_Name"]]
output.append(refrow)
print("len(output):", len(output))
with open("redrock.json", "w") as jsonfile:
json.dump(output, jsonfile, sort_keys=True)

Related

Converting nested JSON to CSV without hard coding column values

I am a rookie at Python and I have data files that I would like to convert from JSON to CSV. The issues are that my code returns an error I am unable to resolve and the data varies from file to file and I would like to have one script that can be applied to multiple files by just changing the file location. I would like to not hard code company name and company type but i don't know how to go about that. The data is structured as follows:
{
"company_name": "Google",
"company_type": "Public",
"employees": [{
"staff": [{
"name": "John Doe",
"type": "FTE",
"id": "1111111111",
"region": "Northeast"
}, {
"name": "Jane Doe",
"type": "FTE",
"id": "222222222",
"region": "Northwest"
}],
"setup": [{
"description": "Onsite",
"location": "New York City"
}, {
"description": "Hybrid",
"location": "Seattle"
}],
"role": [{
"description": "Business Analyst",
"salary": "70000"
}, {
"description": "Manager",
"salary": "90000"
}]
}, {
"contractors": [{
"name": "Jessica Smith",
"type": "PTE",
"id": "333333333",
"region": "Southeast"
}],
"setup": [{
"description": "Remote",
"location": "Miami"
}],
"role": [{
"description": "Project Manager",
"salary": "80000"
}]
}]
}
The code I have so far is:
import json
import csv
import ijson
file = open("C:/Users/User1/sample_file.json","w")
file_writer = csv.writer(file)
file_writer.writerow(("Company Name","Company Type","Name","Type","ID","Region","Description","Location","Description","Salary"))
with open("C:/Users/User1/sample_file.json","rb") as f:
company_name = "Google"
company_type = "Public"
for record in ijson.items(f,"employees.item"):
name = record['staff'][0]['name']
type = record['staff'][0]['type']
id = record['staff'][0]['id']
region = record['staff'][0]['region']
description = record['setup'][0]['description']
location = record['setup'][0]['location']
description = record['role'][0]['description']
salary = record['role'][0]['salary']
file_writer.writerow((comapny_name, company_type, name, type, id, region, description, location, description, salary))
file.close()
Any help is greatly appreciated.

Assuming that all of your files have the same general structure, using a csv.DictWriter should work. Just iterate through the employee sections creating a single dictionary to represent each employee and call writer.writerow() once all of the data has been collected.
For example:
import csv
import json
data = json.load(open(filename))
columns = ["company name","company type","name","type","id","region","description","location","salary"]
def convert(data, headers):
with open("employees.csv", "wt") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=headers, extrasaction="ignore", restval=None)
writer.writeheader()
for emp_type in data["employees"]:
lst = []
for _, v in emp_type.items():
for i,x in enumerate(v):
if len(lst) <= i:
lst.append({"company name": data["company_name"],
"company type": data["company_type"]})
lst[i].update(x)
for item in lst:
writer.writerow(item)
convert(data, columns)
OUTPUT
company name,company type,name,type,id,region,description,location,salary
Google,Public,John Doe,FTE,1111111111,Northeast,Business Analyst,New York City,70000
Google,Public,Jane Doe,FTE,222222222,Northwest,Manager,Seattle,90000
Google,Public,Jessica Smith,PTE,333333333,Southeast,Project Manager,Miami,80000

Want to remove square brackets from json file with script python

I have a JSON file, and I want to remove all fields after the square bracket with scripts in Python.
My JSON file is this:
{
"Employees": [
{
"userId": "krish",
"jobTitle": "Developer",
"firstName": "Krish",
"lastName": "Lee",
"employeeCode": "E1",
"region": "CA",
"phoneNumber": "123456",
"emailAddress": "krish.lee#learningcontainer.com"
},
{
"userId": "devid",
"jobTitle": "Developer",
"firstName": "Devid",
"lastName": "Rome",
"employeeCode": "E2",
"region": "CA",
"phoneNumber": "1111111",
"emailAddress": "devid.rome#learningcontainer.com"
},
{
"userId": "tin",
"jobTitle": "Program Directory",
"firstName": "tin",
"lastName": "jonson",
"employeeCode": "E3",
"region": "CA",
"phoneNumber": "2222222",
"emailAddress": "tin.jonson#learningcontainer.com"
}
]
}
My script is this:
import json
import re
with open('data.json')as f:
data = json.load(f)
for item in data:
re.sub(" *\[.*\] *"," ",item)
with open('new_data.json','w') as f:
json.dump(item, f)
I expect this:
{
"Employees":
}
but I receive this:
"Employees"
Tell me why it takes off the braces, and come and solve this problem.

JSON is a serialization of a data structure and has no canonical format. This means that doing any kind of text matching or regular expressions on it is a very bad idea and just asking for trouble.
The proper way to do is to use a JSON parser to convert it into objects, then synthesize whatever output you want based on object data.
In your case, the parsed object will be a dictionary with a single key which you can obtain for example like this:
print("{{\n \"{0}\":\n}}".format(list(json.load(f).keys())[0]))
Result:
{
"Employees":
}
For whatever is worth, this is not a valid JSON so I'm not sure why you need it.

Convert CSV with nested headers to JSON

So far, I have this code (with help from a tutorial):
import csv, json
csvFilePath = "convertcsv.csv"
jsonFilePath = "newResult.json"
# Read the CSV and add the data to a dictionary...
data = {}
with open(csvFilePath) as csvFile:
csvReader = csv.DictReader(csvFile)
for csvRow in csvReader:
data = csvRow
# Write data to a JSON file...
with open(jsonFilePath, "w") as jsonFile:
jsonFile.write(json.dumps(data, indent=4))
My desired output is this:
{
"userID": "string",
"username": "string",
"age": "string",
"location": {
"streetName": "string",
"streetNo": "string",
"city": "string"
}
}
I don't know how to represent the "location".
My actual result is this:
{
"userID": "string",
"username": "string",
"age": "string",
"location/streetName": "string",
"location/streetNo": "string",
"location/city": "string",
}
How can I seperate streetName, streetNo and city and put them into "location"?

Below is a simple script should do what you want. The result will be a json object with the "userID" as keys. Note that, to test deeper nesting, I used a csv file with slightly different headers - but it will work just as well with your original example.
import csv, json
infile = 'convertcsv.csv'
outfile = 'newResult.json'
data = {}
def process(header, value, record):
key, other = header.partition('/')[::2]
if other:
process(other, value, record.setdefault(key, {}))
else:
record[key] = value
with open(infile) as stream:
reader = csv.DictReader(stream)
for row in reader:
data[row['userID']] = record = {}
for header, value in row.items():
process(header, value, record)
with open(outfile, "w") as stream:
json.dump(data, stream, indent=4)
INPUT:
userID,username,age,location/street/name,location/street/number,location/city
0,AAA,20,This Street,5,This City
1,BBB,42,That Street,5,That City
2,CCC,34,Other Street,5,Other City
OUTPUT:
{
"0": {
"userID": "0",
"username": "AAA",
"age": "20",
"location": {
"street": {
"name": "This Street",
"number": "5"
},
"city": "This City"
}
},
"1": {
"userID": "1",
"username": "BBB",
"age": "42",
"location": {
"street": {
"name": "That Street",
"number": "5"
},
"city": "That City"
}
},
"2": {
"userID": "2",
"username": "CCC",
"age": "34",
"location": {
"street": {
"name": "Other Street",
"number": "5"
},
"city": "Other City"
}
}
}

I'd add some custom logic to achieve this, note that this is for the first level only, if you want more, you should create a recoursive function:
# Write data to a JSON file...
with open(jsonFilePath, "w") as jsonFile:
for i, v in data.items():
if '/' in i:
parts = i.split('/', 1)
data[parts[0]] = {parts[1]: v}
data.pop(i)
jsonFile.write(json.dumps(data, indent=4))

You can use something like this:
# https://www.geeksforgeeks.org/convert-csv-to-json-using-python/
import csv
import json
# Function to convert a CSV to JSON
# Takes the file paths as arguments
def make_json(csvFilePath, jsonFilePath):
# create a dictionary
data = {}
# Open a csv reader called DictReader
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
# Convert each row into a dictionary
# and add it to data
for rows in csvReader:
# Assuming a column named 'No' to
# be the primary key
key = rows['No']
data[key] = rows
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
# Driver Code
# Decide the two file paths according to your
# computer system
csvFilePath = r'Names.csv'
jsonFilePath = r'Names.json'
# Call the make_json function
make_json(csvFilePath, jsonFilePath)
For more information check out https://www.geeksforgeeks.org/convert-csv-to-json-using-python/

Converting a pipe delimited CSV file to a JSON file in a certain format

I am trying to convert from a CSV file each row into JSON format.
When I convert it to JSON it includes the square brackets at the beginning and the end, how can it be omitted?. Also I am looking for a way to split piped values into a list of separate hobbies.
This is what I am getting as output:
[
{
"Name": "John",
"Age": "23",
"Hobby": "Kayaking|Football",
"Location": "Miami",
"Profession": "Sales",
},
{
"Name": "Peter",
"Age": "35",
"Hobby": "Football|Basketball|Swimming",
"Location": "Turin",
"Profession": "Mechanic",
},
{
"Name": "James",
"Age": "50",
"Hobby": "Golf",
"Location": "Berlin",
"Profession": "Accountant",
}
]
My desired output
{
"Name": "John",
"Age": "23",
"Hobby": ["Kayaking","Football"],
"Location": "Miami",
"Profession": "Sales",
},
{
"Name": "Peter",
"Age": "35",
"Hobby": ["Football","Basketball","Swimming"],
"Location": "Turin",
"Profession": "Mechanic",
},
{
"Name": "James",
"Age": "50",
"Hobby": "Golf",
"Location": "Berlin",
"Profession": "Accountant",
}
My code:
import glob
import os
import csv
import json
if __name__ == '__main__':
csv.register_dialect('piper', delimiter='|', quoting=csv.QUOTE_NONE)
for filename in glob.glob('path_to_csv\file.csv'):
csvfile = os.path.splitext(filename)[0]
jsonfile = 'jsfile.json'
fieldnames = ("Name","Age","Hobby","Location", "Profession")
with open(csvfile+'.csv') as f:
reader = csv.DictReader(f,fieldnames)#, dialect='piper')
rows = list(reader)
with open(jsonfile, 'w') as f:
json.dump(rows, f, sort_keys=True, indent=2, separators=(',', ': '))
f.write('\n')

rows = list(reader)
There is no need to wrap it into the list since it is already a list. Removing this line will fix the brackets.
for row in reader:
row['Hobby'] = list(row['Hobby'].split('|'))
We need to split the string of hobbies and convert them to list

Python- writing json file as list of dictionaries

I am writing a json file from information extracted from a url. How do I print each element of the dictionary on a separate line?
This is my current code:
dct=[{"name": name,
"cuisine": cuisine,
"price-range": price,
"address": address,
"rating": rating,
"reviews": score,
"district": district,
"url": link
}]
with open('openrice_data.json', 'a') as file:
file.write(json.dumps(dct))
For example, it currently prints like this:
[{"cuisine": ["Japanese", "Hot Pot", "Buffet"], "rating": [3.5], "address": [22.3825, 114.1901], "url": ["https://www.openrice.com/en/hongkong/r-wagyu-more-sha-tin-japanese-hot-pot-r172321"], "reviews": [35, 17, 8], "name": "Wagyu More", "price-range": ["$101-200"], "district": ["Sha Tin"]}]
I would like it to print like this:
[
{"name": "Chan Kun Kee",
"cuisine": ["Guang Dong", "Dai Pai Dong"],
"price-range": "$51-100",
"address": [22.3884, 114.1958],
"rating": 3.5,
"reviews": [216, 95, 38],
"district": "Shatin",
"url": "www.openrice.com/en/hongkong/r-chan-kun-kee-sha-tin-guangdong-r7918"
}
]

Update Actually what you have is a list of dictionaries. When you want to add more elements you need to remove the [] around the dictionary.
To slve your specific problem you want to use indent=0. Also consider using json.dump directly.
import json
l=[]
dct={"name": 'name',
"cuisine": 'cuisine',
"price-range": 'price',
"address": 'address',
"rating": 'rating',
"reviews": 'score',
"district": 'district',
"url": 'link'
}
l.append(dct)
with open('openrice_data.json', 'w') as file:
json.dump(l,file,indent=0)
Output:
[
{
"name": "name",
"cuisine": "cuisine",
"price-range": "price",
"address": "address",
"rating": "rating",
"reviews": "score",
"district": "district",
"url": "link"
}
]
Continuing
To add more elements you need to do this:
# Load json to list
with open('openrice_data.json') as f:
l = json.load(f)
# A new dict
dct2={"name": 'name',
"cuisine": 'cuisine',
"price-range": 'price',
"address": 'address',
"rating": 'rating',
"reviews": 'score',
"district": 'district',
"url": 'link'
}
# Append new dict
l.append(dct2)
with open('openrice_data.json', 'w') as file:
json.dump(l,file,indent=0)
Output now contains a list with 2 dicts.
[
{
"name": "name",
"cuisine": "cuisine",
"price-range": "price",
"address": "address",
"rating": "rating",
"reviews": "score",
"district": "district",
"url": "link"
},
{
"name": "name",
"cuisine": "cuisine",
"price-range": "price",
"address": "address",
"rating": "rating",
"reviews": "score",
"district": "district",
"url": "link"
}
]

Don't use json, pprint is perfect for this job.
from pprint import pprint
obj = [{"cuisine": ["Japanese", "Hot Pot", "Buffet"], "rating": [3.5], "address": [22.3825, 114.1901], "url": ["https://www.openrice.com/en/hongkong/r-wagyu-more-sha-tin-japanese-hot-pot-r172321"], "reviews": [35, 17, 8], "name": "Wagyu More", "price-range": ["$101-200"], "district": ["Sha Tin"]}]
with open('dumpfile.json', 'w+') as f:
pprint(obj, f)
There are a few parameters for customization, please check the doc for more details :
https://docs.python.org/3/library/pprint.html

Use prettyprinter:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(dct)
Also: you are currently putting the dict in a list. [] is a list {} is a dict in python.
By putting [{}] you are putting the dict into a list. Just remove the [].

Other people have remarked on using pprint, but I would like to add that pprint prints the representation of the Python values in your dictionary. They are not always the same as their JSON counterparts, for example:
>>> from pprint import pprint
>>> d1 = {"value": None}
>>> pprint(d1)
{'value': None}
(the correct JSON serialization here is {"value": null}
The better option, for these kinds of values, is to use json.dump or json.dumps. You can use the indent parameter to sort of make it print one line per element. Note though that this will also print each list element into their separate lines (so you don't exactly get one line per one JSON key):
>>> d2 = [
... {"name": "Chan Kun Kee",
... "cuisine": ["Guang Dong", "Dai Pai Dong"],
... "price-range": "$51-100",
... "address": [22.3884, 114.1958],
... "rating": 3.5,
... "reviews": [216, 95, 38],
... "district": "Shatin",
... "url": "www.openrice.com/en/hongkong/r-chan-kun-kee-sha-tin-guangdong-r7918"
... }
... ]
>>> print(json.dumps(d2, indent=2))
[
{
"name": "Chan Kun Kee",
"cuisine": [
"Guang Dong",
"Dai Pai Dong"
],
"price-range": "$51-100",
"address": [
22.3884,
114.1958
],
"rating": 3.5,
"reviews": [
216,
95,
38
],
"district": "Shatin",
"url": "www.openrice.com/en/hongkong/r-chan-kun-kee-sha-tin-guangdong-r7918"
}
]
But you're guaranteed to at least always get the correct JSON. Plus, you can also extend the behavior with your own JSON encoder. This allows you, for example, to serialize Python datetime objects into JSON strings.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Converting two CSV's into one json - python

Related

Converting nested JSON to CSV without hard coding column values

Want to remove square brackets from json file with script python

Convert CSV with nested headers to JSON

Converting a pipe delimited CSV file to a JSON file in a certain format

Python- writing json file as list of dictionaries

Categories

Resources