I am working with googlemaps API with python and would like to read a csv file containing LOCATIONID, LOCATION, X, Y, LAT, LNG, REF, ID with each of those as a dictionary key
Excerpt from Data Set
ChIJWzKeTARawokRjoqe_C9poOA,THEGIRLSOUTFITS,0,17,40.71380629999999,-74.0222972,dc1885d8c12ac669e9de3a73fedfb08c40deefd3,CmRSAAAA4Mvy3PVnmnYQKIZg-VX9-UrnrhlwOs7YFeY40gdXw1JfuUeEZndlhWxoIdI2K0nt1voCQDg2mqUVKV6EBgOKMTrHjDsMHOy7MJYBCUWguzLroifP1UMWTYSfUJD6E6sMEhB3psbJCGcDh5iS1PHyX4k5GhRBXigoOyADpURVG3NG2bIqXJ2i_g
ChIJDaBbDPlZwokRuHXkJGK9ER0,NYNJ State Line,0,29,40.7245721,-74.02168689999999,3d2cfc0a35b583760187c2369bb957b5a8cd9755,CmRRAAAAN25LD8IWAiu8CAIFM2eMS7eJVReV_UHzmR6sbaRkhDlJgiX-wK0Xlb85hC1qmDkarwQrGoM2u60zfSMr1MxcyzMAg3UDgL6nYwwu3WgG1aNlbpiqR2HlLFlK5XhimqI4EhDrfqXHOxKeztoT01Aicp3hGhSETo6Pj7gTk-dajA0RWco1djlrVA
ChIJQX6hfdlZwokRoCdPrD-8wSY,Shipyard Marina,0,59,40.751581,-74.021287,5f5e1aa75a39ce1880085d5094694a9ceda1ff43,CmRRAAAAp_VV6L2SQgnjC_TeMYRe_m2EYyjo_IdAOUqSeuRM9kbEMHSyguWco9jDV6rdYxk5s1E5s21BX0P7flWfK2LCegcKi1m_AOp2LuYtuzv0ql4-Jeq6-yx50Z180J9U4nrHEhAihnIm2i8MWwk7ATToiLBgGhR-w02BFnO9EGniCms-XWq00VrGOg
ChIJi3MPPNNZwokRDu8nE0ldrwk,Lincoln Harbor,0,68,40.759708,-74.02225,a3192a26d0a5a779da702a7f871612992e26f606,CmRRAAAAGcYQNoc-WgRCwh3rlBzeEmlG_XQgk_coHa4qUpzWU_9DiDWnU8eLjhInrZqpR7nKibXc0dcZIYiZEb-ehKvIrF6OfbLGrpmbf47YNaofAebceKirPZ_g6jMsc-_dcITaEhAKqkWagpXbgd3CcQf8Rtz-GhQtzlAGJlP7gOzV2KfXRGFIF2ZbAA
ChIJq35iOdNZwokRKvT9_KFP8jg,Lincoln Harbor,0,68,40.75984079999999,-74.0223211,38ea8de2bbf6d8a710bec3104d180d71b3d00735,CmRRAAAASNnGG2MHHIDGcXfgH-8iWUQOEahJdMZz6dy24azZrqyXqEtMb_yP1MJRGMV6Cp6lX05MgU2vNIfHcOGBfXiG0yWU7Qe0-t9_Uhm7JxNuAOEge9ZaCwNlrtxuxr8xSYsJEhAkL9vA29DV4nReD4e85D1TGhTbZYRfvNNRZi5a3NMUA0gyMYoR_A
ChIJ07hkOtNZwokRfY7FzKrxTkA,852858 Harbor Blvd,0,68,40.7599265,-74.02213569999999,29cfd2338edc10f94f5464b2fe152ab7bab3fca6,CmRbAAAAdlTDQx7qWhNnvgDQ_1afttKRpJ_MzBULrJLNX0TFEwpvKTjCvCZoXfV1F0A_diKH5D8qDjjucrjUC3gV8iVxqOYoHRGYwifE2FSiVycUrjunQxiDYAd-C6Q7QOqswAomEhAewuH-Q09vSAW_hNbQ0AUOGhQqRXCRrgxc6EAl2xRv1Z73M4MMug
ChIJnYuB1ixYwokR3w26K1mr3JQ,1500 Harbor Blvd,0,69,40.7609258,-74.02135559999999,ed57711af76a2534406a70f21d8a7119170b0f72,CmRbAAAAbPsHkeqeX3cx5MdED0Ao6ic0PE56xhGMdda0zXolIU7KsPmgPbT1CjXfyZ0p4ws_GYgdORZB3qP0idFvWBtMpdYpXCq0VknITsvfAoNfwYXoccYGsP7QvfpBPb1oTnywEhDwyrKX9JiQkYx9YaIXhkOwGhRCtFkL2futi7BkUxo_dMYEtsWVWA
ChIJk7Y29CxYwokRLcD4dsTPCCM,800816 Waterfront Terrace,0,71,40.7629756,-74.0223474,727cf2af87d28ec3a45f11e76e0746acfbec03b0,CmRbAAAAzgEUAIfexip_ePX2VQ29-iZMDPe-5RA4aGhFRoDkEGHrTaUON7ZGyn1r16erRSWIAhESFzycoG4Rwuw4OijSlLjKfK3_HCvC4fkrX-d6I7g7ffkIotwc1KK77UkHPMz-EhBoESHGx4Ke3H9y6XfDt9iaGhSGqEI5BmtpMu4URYwFmrT-2npHWQ
ChIJsZ2B5CxYwokRL8NrRMKxD9Q,Estuary Marketplace,0,71,40.7622809,-74.021577,1b6c2bd55b1bbabe3aac0d1eb2153e812c1026fd,CmRSAAAAO_HAcE9pNnA6r0FDp1eVsfS6jVn31DxU_JhcZNACGz9kfI5xOPjTbgM77JhJLxZPJqLgO_AnNXIIyEQV_wqO5Pr-YWJaIyhCEYA8Ene36VXgQ_90NMQwa_HNJJnWywkpEhBYQL6JbpsO0FJcOawnw5-2GhRE_QU6uh-cP-RKArGSyfEE2cxL3A
ChIJ64dzUNRZwokRIHBPnvi-eJs,Estuary Apartments,0,71,40.76240009999999,-74.0216088,8219ad9d883d8891a651bc35ff7bf7abe7c7b72a,CmRSAAAA7t8RT2MVUpaxNWBPItFU3pTQwNqXDuRY28GxZ_cywaLqOFIJ7taeXDHvGg19Y0MoIerm94HrD0iZQ_iIyoUrCKueeUETgHUGf8LQJMl9mqu784B5iIIXdf9-YrylAFJ9EhCvaowPZkGZvaEVLENy8fywGhRSEtZOm86qEyOCksoFWNJp9L_daw
ChIJWexh_9hZwokRgU6sf8pvbUg,Hoboken Weehawken Notary Public Dorian Cattani,0,71,40.762501,-74.021528,11be9c0505e8db6a7fcaea5fd4003d336b8667f2,CmRRAAAA9hteDDXN0TehPTxfnx6LxRjZTVHZOyOgbE1GJ42XOzB2v6htFgfliFz39e6llMHSlqvFS3uBiIKuH9bNu9qIfeSD8LlrSW5UXKP4U7sN-Zi1-IWH5QJw5S2hQCZHaNfeEhAT4Y9Q2hC0uG0vVi7CJPuVGhQPb5J4VpGisRfvRfiqEyPgTR8F5w
ChIJRaQT-CxYwokRlEhCGnjf6fs,EcoPure Home Cleaning Service Weehawken,0,72,40.76302460000001,-74.0218549,33800368abbf808a39da3e805c685b245e5baac4,CmRSAAAAyod24y6jY17fcd2b2mk6qgIoN_KWOCNxEN1zDniW9n7RHoWTm-MPXFN6N77XwYzORC-WarFmyP9jULhochuKcXcYP2y2ni7SWFviXVXOBtxvYVlHmfsyHctEvBy_GxKbEhB5ihU8my5kq4lQpQrwGIrQGhT-0cYHqRnMImt55xhU7CBDPPlA1w
ChIJFU-XRyxYwokRo5Do2LLO_mE,New Jersey 495,0,73,40.76429109999999,-74.0220365,23e5baf02ae51b7aeaf94efa56624c064bdbb456,CmRbAAAA_30BmNHIeCFJ7SVhLHWIbSa8llsfKQfYpVRC8X0feMrRlQ9ih9_vKtBfq-KRC6lcagEYQCBRdCfDob2divgzQEbrVkc9dR4v3oIfdyc5l9mlRyTnl1fOBxSeR8xMz78sEhD8FAIlIOza3aIMvqsdfjzrGhTj2R00Cv-lzmKIFEwLumKKd8g1cQ
The current code I'm trying:
import pandas as pd
import numpy as np
import numpy.random as rdm
import matplotlib.pyplot as plt
import math as m
data = np.genfromtxt('PlacesFinalNew.csv', delimiter = ',')
size = int(data.size/8)
dict = {'LOCATIONID':[], 'LOCATION':[], 'X':[],'Y':[],'LAT':[],'LNG':[],'ID':[],'REF':[]}
for i in range(size):
dict['LOCATIONID'].append(data[i][0])
dict['LOCATION'].append(data[i][1])
dict['X'].append(data[i][2])
dict['Y'].append(data[i][3])
dict['LAT'].append(data[i][4])
dict['LNG'].append(data[i][5])
dict['ID'].append(data[i][6])
dict['REF'].append(data[i][7])
This works for every key except for Location. When I print out dict['LOCATION'] I get a list of nan. Could someone please point out the problem to me?
You can use zip:
import csv
headers = ['LOCATIONID', 'LOCATION', 'X', 'Y', 'LAT', 'LNG', 'REF', 'ID']
with open('filename.csv') as f:
data = [dict(zip(headers, i)) for i in csv.reader(f)]
final_results = {i:[c[i] for c in data] for i in headers}
import json
print(json.dumps(final_results, indent=4))
Output:
{
"LOCATIONID": [
"ChIJWzKeTARawokRjoqe_C9poOA",
"ChIJDaBbDPlZwokRuHXkJGK9ER0",
"ChIJQX6hfdlZwokRoCdPrD-8wSY",
"ChIJi3MPPNNZwokRDu8nE0ldrwk",
"ChIJq35iOdNZwokRKvT9_KFP8jg",
"ChIJ07hkOtNZwokRfY7FzKrxTkA",
"ChIJnYuB1ixYwokR3w26K1mr3JQ",
"ChIJk7Y29CxYwokRLcD4dsTPCCM",
"ChIJsZ2B5CxYwokRL8NrRMKxD9Q",
"ChIJ64dzUNRZwokRIHBPnvi-eJs",
"ChIJWexh_9hZwokRgU6sf8pvbUg",
"ChIJRaQT-CxYwokRlEhCGnjf6fs",
"ChIJFU-XRyxYwokRo5Do2LLO_mE"
],
"LOCATION": [
"THEGIRLSOUTFITS",
"NYNJ State Line",
"Shipyard Marina",
"Lincoln Harbor",
"Lincoln Harbor",
"852858 Harbor Blvd",
"1500 Harbor Blvd",
"800816 Waterfront Terrace",
"Estuary Marketplace",
"Estuary Apartments",
"Hoboken Weehawken Notary Public Dorian Cattani",
"EcoPure Home Cleaning Service Weehawken",
"New Jersey 495"
],
"X": [
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0"
],
"Y": [
"17",
"29",
"59",
"68",
"68",
"68",
"69",
"71",
"71",
"71",
"71",
"72",
"73"
],
"LAT": [
"40.71380629999999",
"40.7245721",
"40.751581",
"40.759708",
"40.75984079999999",
"40.7599265",
"40.7609258",
"40.7629756",
"40.7622809",
"40.76240009999999",
"40.762501",
"40.76302460000001",
"40.76429109999999"
],
"LNG": [
"-74.0222972",
"-74.02168689999999",
"-74.021287",
"-74.02225",
"-74.0223211",
"-74.02213569999999",
"-74.02135559999999",
"-74.0223474",
"-74.021577",
"-74.0216088",
"-74.021528",
"-74.0218549",
"-74.0220365"
],
"REF": [
"dc1885d8c12ac669e9de3a73fedfb08c40deefd3",
"3d2cfc0a35b583760187c2369bb957b5a8cd9755",
"5f5e1aa75a39ce1880085d5094694a9ceda1ff43",
"a3192a26d0a5a779da702a7f871612992e26f606",
"38ea8de2bbf6d8a710bec3104d180d71b3d00735",
"29cfd2338edc10f94f5464b2fe152ab7bab3fca6",
"ed57711af76a2534406a70f21d8a7119170b0f72",
"727cf2af87d28ec3a45f11e76e0746acfbec03b0",
"1b6c2bd55b1bbabe3aac0d1eb2153e812c1026fd",
"8219ad9d883d8891a651bc35ff7bf7abe7c7b72a",
"11be9c0505e8db6a7fcaea5fd4003d336b8667f2",
"33800368abbf808a39da3e805c685b245e5baac4",
"23e5baf02ae51b7aeaf94efa56624c064bdbb456"
],
"ID": [
"CmRSAAAA4Mvy3PVnmnYQKIZg-VX9-UrnrhlwOs7YFeY40gdXw1JfuUeEZndlhWxoIdI2K0nt1voCQDg2mqUVKV6EBgOKMTrHjDsMHOy7MJYBCUWguzLroifP1UMWTYSfUJD6E6sMEhB3psbJCGcDh5iS1PHyX4k5GhRBXigoOyADpURVG3NG2bIqXJ2i_g",
"CmRRAAAAN25LD8IWAiu8CAIFM2eMS7eJVReV_UHzmR6sbaRkhDlJgiX-wK0Xlb85hC1qmDkarwQrGoM2u60zfSMr1MxcyzMAg3UDgL6nYwwu3WgG1aNlbpiqR2HlLFlK5XhimqI4EhDrfqXHOxKeztoT01Aicp3hGhSETo6Pj7gTk-dajA0RWco1djlrVA",
"CmRRAAAAp_VV6L2SQgnjC_TeMYRe_m2EYyjo_IdAOUqSeuRM9kbEMHSyguWco9jDV6rdYxk5s1E5s21BX0P7flWfK2LCegcKi1m_AOp2LuYtuzv0ql4-Jeq6-yx50Z180J9U4nrHEhAihnIm2i8MWwk7ATToiLBgGhR-w02BFnO9EGniCms-XWq00VrGOg",
"CmRRAAAAGcYQNoc-WgRCwh3rlBzeEmlG_XQgk_coHa4qUpzWU_9DiDWnU8eLjhInrZqpR7nKibXc0dcZIYiZEb-ehKvIrF6OfbLGrpmbf47YNaofAebceKirPZ_g6jMsc-_dcITaEhAKqkWagpXbgd3CcQf8Rtz-GhQtzlAGJlP7gOzV2KfXRGFIF2ZbAA",
"CmRRAAAASNnGG2MHHIDGcXfgH-8iWUQOEahJdMZz6dy24azZrqyXqEtMb_yP1MJRGMV6Cp6lX05MgU2vNIfHcOGBfXiG0yWU7Qe0-t9_Uhm7JxNuAOEge9ZaCwNlrtxuxr8xSYsJEhAkL9vA29DV4nReD4e85D1TGhTbZYRfvNNRZi5a3NMUA0gyMYoR_A",
"CmRbAAAAdlTDQx7qWhNnvgDQ_1afttKRpJ_MzBULrJLNX0TFEwpvKTjCvCZoXfV1F0A_diKH5D8qDjjucrjUC3gV8iVxqOYoHRGYwifE2FSiVycUrjunQxiDYAd-C6Q7QOqswAomEhAewuH-Q09vSAW_hNbQ0AUOGhQqRXCRrgxc6EAl2xRv1Z73M4MMug",
"CmRbAAAAbPsHkeqeX3cx5MdED0Ao6ic0PE56xhGMdda0zXolIU7KsPmgPbT1CjXfyZ0p4ws_GYgdORZB3qP0idFvWBtMpdYpXCq0VknITsvfAoNfwYXoccYGsP7QvfpBPb1oTnywEhDwyrKX9JiQkYx9YaIXhkOwGhRCtFkL2futi7BkUxo_dMYEtsWVWA",
"CmRbAAAAzgEUAIfexip_ePX2VQ29-iZMDPe-5RA4aGhFRoDkEGHrTaUON7ZGyn1r16erRSWIAhESFzycoG4Rwuw4OijSlLjKfK3_HCvC4fkrX-d6I7g7ffkIotwc1KK77UkHPMz-EhBoESHGx4Ke3H9y6XfDt9iaGhSGqEI5BmtpMu4URYwFmrT-2npHWQ",
"CmRSAAAAO_HAcE9pNnA6r0FDp1eVsfS6jVn31DxU_JhcZNACGz9kfI5xOPjTbgM77JhJLxZPJqLgO_AnNXIIyEQV_wqO5Pr-YWJaIyhCEYA8Ene36VXgQ_90NMQwa_HNJJnWywkpEhBYQL6JbpsO0FJcOawnw5-2GhRE_QU6uh-cP-RKArGSyfEE2cxL3A",
"CmRSAAAA7t8RT2MVUpaxNWBPItFU3pTQwNqXDuRY28GxZ_cywaLqOFIJ7taeXDHvGg19Y0MoIerm94HrD0iZQ_iIyoUrCKueeUETgHUGf8LQJMl9mqu784B5iIIXdf9-YrylAFJ9EhCvaowPZkGZvaEVLENy8fywGhRSEtZOm86qEyOCksoFWNJp9L_daw",
"CmRRAAAA9hteDDXN0TehPTxfnx6LxRjZTVHZOyOgbE1GJ42XOzB2v6htFgfliFz39e6llMHSlqvFS3uBiIKuH9bNu9qIfeSD8LlrSW5UXKP4U7sN-Zi1-IWH5QJw5S2hQCZHaNfeEhAT4Y9Q2hC0uG0vVi7CJPuVGhQPb5J4VpGisRfvRfiqEyPgTR8F5w",
"CmRSAAAAyod24y6jY17fcd2b2mk6qgIoN_KWOCNxEN1zDniW9n7RHoWTm-MPXFN6N77XwYzORC-WarFmyP9jULhochuKcXcYP2y2ni7SWFviXVXOBtxvYVlHmfsyHctEvBy_GxKbEhB5ihU8my5kq4lQpQrwGIrQGhT-0cYHqRnMImt55xhU7CBDPPlA1w",
"CmRbAAAA_30BmNHIeCFJ7SVhLHWIbSa8llsfKQfYpVRC8X0feMrRlQ9ih9_vKtBfq-KRC6lcagEYQCBRdCfDob2divgzQEbrVkc9dR4v3oIfdyc5l9mlRyTnl1fOBxSeR8xMz78sEhD8FAIlIOza3aIMvqsdfjzrGhTj2R00Cv-lzmKIFEwLumKKd8g1cQ"
]
}
If your CSV file contain Header name, then just by reading csv which will create pandas DataFrame Object will convert Header into Dictionary key.
e.g;-
df = pd.read_csv(fname)
df['ColumnName']
import pandas as pd
import numpy as np
import numpy.random as rdm
import matplotlib.pyplot as plt
import math as m
df = pd.read_csv('test.csv', delimiter = ',',na_values="nan")
print(df['LOCATIONID']) # Gives you Columns Data for 'LOCATIONID'
print(df['LOCATION'])
You can read each row into a dictionary using csv.DictReader():
import csv
fieldnames = ['LOCATIONID', 'LOCATION', 'X', 'Y', 'LAT', 'LNG', 'REF', 'ID']
with open('data.csv') as in_file:
csv_reader = csv.DictReader(in_file, fieldnames=fieldnames)
for row in csv_reader:
# print out row info
# e.g. row['LOCATION']
Which basically maps each row to a collections.OrderedDict(), which is just an ordered dictionary.
If you want to map the row information to a final dictionary, you can use a collections.defaultdict():
import csv
from collections import defaultdict
fieldnames = ['LOCATIONID', 'LOCATION', 'X', 'Y', 'LAT', 'LNG', 'REF', 'ID']
row_map = defaultdict(list)
with open('data.csv') as in_file:
csv_reader = csv.DictReader(in_file, fieldnames=fieldnames)
for row in csv_reader:
for field in row:
row_map[field].append(row[field])
Related
I created this code where I am able to pull the data I want but not able to sort it as it should be. I am guessing it has to do with the way I am appending each item by ignoring index but I can't find my way around it.
This is my code:
import json
import pandas as pd
#load json object
with open("c:\Sample.json","r",encoding='utf-8') as file:
data = file.read()
data2 = json.loads(data)
print("Type:", type(data2))
cls=['Image', 'Email', 'User', 'Members', 'Time']
df = pd.DataFrame(columns = cls )
for d in data2['mydata']:
for k,v in d.items():
#print(k)
if k == 'attachments':
#print(d.get('attachments')[0]['id'])
image = (d.get('attachments')[0]['id'])
df=df.append({'Image':image},ignore_index = True)
#df['Message'] = image
if k == 'author_user_email':
#print(d.get('author_user_email'))
email = (d.get('author_user_email'))
df=df.append({'Email':email}, ignore_index = True)
#df['Email'] = email
if k == 'author_user_name':
#print(d.get('author_user_name'))
user = (d.get('author_user_name'))
df=df.append({'User':user}, ignore_index = True)
#df['User'] = user
if k == 'room_name':
#print(d.get('room_name'))
members = (d.get('room_name'))
df=df.append({'Members':members}, ignore_index = True)
#df['Members'] = members
if k == 'ts_iso':
#print(d.get('ts_iso'))
time = (d.get('ts_iso'))
df=df.append({'Time':time}, ignore_index = True)
#df['Time'] = time
df
print('Finished getting Data')
df1 = (df.head())
print(df)
print(df.head())
df.to_csv(r'c:\sample.csv', encoding='utf-8')
The code gives me this as the result
I am looking to get this
Data of the file is this:
{
"mydata": [
{
"attachments": [
{
"filename": "image.png",
"id": "888888888"
}
],
"author_user_email": "email#email.com",
"author_user_id": "91",
"author_user_name": "Marlone",
"message": "",
"room_id": "999",
"room_members": [
{
"room_member_id": "91",
"room_member_name": "Marlone"
},
{
"room_member_id": "9191",
"room_member_name": " +16309438985"
}
],
"room_name": "SMS [Marlone] [ +7777777777]",
"room_type": "sms",
"ts": 55,
"ts_iso": "2021-06-13T18:17:32.877369+00:00"
},
{
"author_user_email": "email#email.com",
"author_user_id": "21",
"author_user_name": "Chris",
"message": "Hi",
"room_id": "100",
"room_members": [
{
"room_member_id": "21",
"room_member_name": "Joe"
},
{
"room_member_id": "21",
"room_member_name": "Chris"
}
],
"room_name": "Direct [Chris] [Joe]",
"room_type": "direct",
"ts": 12345678910,
"ts_iso": "2021-06-14T14:42:07.572479+00:00"
}]}
Any help would be appreciated. I am new to python and am learning on my own.
Try:
import json
import pandas as pd
with open("your_data.json", "r") as f_in:
data = json.load(f_in)
tmp = []
for d in data["mydata"]:
image = d.get("attachments", [{"id": None}])[0]["id"]
email = d.get("author_user_email")
user = d.get("author_user_name")
members = d.get("room_name")
time = d.get("ts_iso")
tmp.append((image, email, user, members, time))
df = pd.DataFrame(tmp, columns=["Image", "Email", "User", "Members", "Time"])
print(df)
Prints:
Image Email User Members Time
0 888888888 email#email.com Marlone SMS [Marlone] [ +7777777777] 2021-06-13T18:17:32.877369+00:00
1 None email#email.com Chris Direct [Chris] [Joe] 2021-06-14T14:42:07.572479+00:00
Although the other answer does work, pandas has a built in reader for json files pd.read_json: https://pandas.pydata.org/pandas-docs/version/1.1.3/reference/api/pandas.read_json.html
It has the benefit of being able to handle very large datasets via chunking, as well as processing quite a few different formats. The other answer would not be performant for a large dataset.
This would get you started:
import pandas as pd
df = pd.read_json("c:\Sample.json")
The probblem is that append() adds a new row. So, you have to use at[] https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.at.html specifying the index/row. Se below. Some print/debug messages were left and path to input and output files was changed a little because I'm on Linux.
import json
import pandas as pd
import pprint as pp
#load json object
with open("Sample.json","r",encoding='utf-8') as file:
data = file.read()
data2 = json.loads(data)
#pp.pprint(data2)
cls=['Image', 'Email', 'User', 'Members', 'Time']
df = pd.DataFrame(columns = cls )
pp.pprint(df)
index = 0
for d in data2['mydata']:
for k,v in d.items():
#print(k)
if k == 'attachments':
#print(d.get('attachments')[0]['id'])
image = (d.get('attachments')[0]['id'])
df.at[index, 'Image'] = image
#df['Message'] = image
if k == 'author_user_email':
#print(d.get('author_user_email'))
email = (d.get('author_user_email'))
df.at[index, 'Email'] = email
#df['Email'] = email
if k == 'author_user_name':
#print(d.get('author_user_name'))
user = (d.get('author_user_name'))
df.at[index, 'User'] = user
#df['User'] = user
if k == 'room_name':
#print(d.get('room_name'))
members = (d.get('room_name'))
df.at[index, 'Members'] = members
#df['Members'] = members
if k == 'ts_iso':
#print(d.get('ts_iso'))
time = (d.get('ts_iso'))
df.at[index, 'Time'] = time
#df['Time'] = time
index += 1
# start indexing from 0
df.reset_index()
# replace empty str/cells witn None
df.fillna('None', inplace=True)
pp.pprint(df)
print('Finished getting Data')
df1 = (df.head())
print(df)
print(df.head())
df.to_csv(r'sample.csv', encoding='utf-8')
I am trying to compare two json and then write another json with columns names and with differences as yes or no. I am using pandas and numpy
The below is sample files i am including actually, these json are dynamic, that mean we dont know how many key will be there upfront
Input files:
fut.json
[
{
"AlarmName": "test",
"StateValue": "OK"
}
]
Curr.json:
[
{
"AlarmName": "test",
"StateValue": "OK"
}
]
Below code I have tried:
import pandas as pd
import numpy as np
with open(r"c:\csv\fut.json", 'r+') as f:
data_b = json.load(f)
with open(r"c:\csv\curr.json", 'r+') as f:
data_a = json.load(f)
df_a = pd.json_normalize(data_a)
df_b = pd.json_normalize(data_b)
_, df_a = df_b.align(df_a, fill_value=np.NaN)
_, df_b = df_a.align(df_b, fill_value=np.NaN)
with open(r"c:\csv\report.json", 'w') as _file:
for col in df_a.columns:
df_temp = pd.DataFrame()
df_temp[col + '_curr'], df_temp[col + '_fut'], df_temp[col + '_diff'] = df_a[col], df_b[col], np.where((df_a[col] == df_b[col]), 'No', 'Yes')
#[df_temp.rename(columns={c:'Missing'}, inplace=True) for c in df_temp.columns if df_temp[c].isnull().all()]
df_temp.fillna('Missing', inplace=True)
with pd.option_context('display.max_colwidth', -1):
_file.write(df_temp.to_json(orient='records'))
Expected output:
[
{
"AlarmName_curr": "test",
"AlarmName_fut": "test",
"AlarmName_diff": "No"
},
{
"StateValue_curr": "OK",
"StateValue_fut": "OK",
"StateValue_diff": "No"
}
]
Coming output: Not able to parse it in json validator, below is the problem, those [] should be replaed by ',' to get right json dont know why its printing like that
[{"AlarmName_curr":"test","AlarmName_fut":"test","AlarmName_diff":"No"}][{"StateValue_curr":"OK","StateValue_fut":"OK","StateValue_diff":"No"}]
Edit1:
Tried below as well
_file.write(df_temp.to_json(orient='records',lines=True))
now i get json which is again not parsable, ',' is missing and unless i add , between two dic and [ ] at beginning and end manually , its not parsing..
[{"AlarmName_curr":"test","AlarmName_fut":"test","AlarmName_diff":"No"}{"StateValue_curr":"OK","StateValue_fut":"OK","StateValue_diff":"No"}]
Honestly pandas is overkill for this... however
load dataframes as you did
concat them as columns. rename columns
do calcs and map boolean to desired Yes/No
to_json() returns a string so json.loads() to get it back into a list/dict. Filter columns to get to your required format
import json
data_b = [
{
"AlarmName": "test",
"StateValue": "OK"
}
]
data_a = [
{
"AlarmName": "test",
"StateValue": "OK"
}
]
df_a = pd.json_normalize(data_a)
df_b = pd.json_normalize(data_b)
df = pd.concat([df_a, df_b], axis=1)
df.columns = [c+"_curr" for c in df_a.columns] + [c+"_fut" for c in df_a.columns]
df["AlarmName_diff"] = df["AlarmName_curr"] == df["AlarmName_fut"]
df["StateValue_diff"] = df["StateValue_curr"] == df["StateValue_fut"]
df = df.replace({True:"Yes", False:"No"})
js = json.loads(df.loc[:,(c for c in df.columns if c.startswith("Alarm"))].to_json(orient="records"))
js += json.loads(df.loc[:,(c for c in df.columns if c.startswith("State"))].to_json(orient="records"))
js
output
[{'AlarmName_curr': 'test', 'AlarmName_fut': 'test', 'AlarmName_diff': 'Yes'},
{'StateValue_curr': 'OK', 'StateValue_fut': 'OK', 'StateValue_diff': 'Yes'}]
I have an excel file in which data is saved in csv format in such a way.This data is present in the excel file as shown below,under column A (The CSV File is generated by LabView Software code which i have written to generate data).I have also attached an image of the csv file for reference at the end of my question.
RPM,Load Current,Battery Output,Power Capacity
1200,30,12,37
1600,88,18,55
I want to create a Json file in such format
{
"power_capacity_data" :
{
"rpm" : ["1200","1600"],
"load_curr" : ["30","88"],
"batt_output" : ["12","18"],
"power_cap" : ["37","55"]
}
}
This is my code
import csv
import json
def main():
#created a dictionary so that i can append data to it afterwards
power_data = {"rpm":[],"load_curr":[],"batt_output":[],"power_cap":[]}
with open('power1.lvm') as f:
reader = csv.reader(f)
#trying to append the data of column "RPM" to dictionary
rowcount = 0
for row in reader:
if rowcount == 0:
#trying to skip the first row
rowcount = rowcount + 1
else:
power_data['rpm'].append(row[0])
print(row)
json_report = {}
json_report['pwr_capacity_data'] = power_data
with open('LVMJSON', "w") as f1:
f1.write(json.dumps(json_report, sort_keys=False, indent=4, separators=(',', ': '),encoding="utf-8",ensure_ascii=False))
f1.close()
if __name__ == "__main__":
main()
The output json file that i am getting is this:(please ignore the print(row) statement in my code)
{
"pwr_capacity_data":
{
"load_curr": [],
"rpm": [
"1200,30,12.62,37.88",
"1600,88,18.62,55.88"
],
"batt_output": [],
"power_cap": []
}
}
The whole row is getting saved in the list,but I just want the values under the column RPM to be saved .Can someone help me out with what I may be doing wrong.Thanks in advance.I have attached an image of csv file to just in case it helps
You could use Python's defaultdict to make it a bit easier. Also a dictionary to map all your header values.
from collections import defaultdict
import csv
import json
power_data = defaultdict(list)
header_mappings = {
'RPM' : 'rpm',
'Load Current' : 'load_curr',
'Battery Output' : 'batt_output',
'Power Capacity' : 'power_cap'}
with open('power1.lvm', newline='') as f_input:
csv_input = csv.DictReader(f_input)
for row in csv_input:
for key, value in row.items():
power_data[header_mappings[key]].append(value)
with open('LVMJSON.json', 'w') as f_output:
json.dump({'power_capacity_data' : power_data}, f_output, indent=2)
Giving you an output JSON file looking like:
{
"power_capacity_data": {
"batt_output": [
"12",
"18"
],
"power_cap": [
"37",
"55"
],
"load_curr": [
"30",
"88"
],
"rpm": [
"1200",
"1600"
]
}
}
with my code, i read the values of JSON data and insert into array
def retrive_json():
with open('t_v1.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
the result array is this :
[[39.72417, -104.99984, 39.77446, -104.9379], [39.77481, -104.93618, 39.6984, -104.9652]]
how i can write each element of each array into specific field in csv?
i have try in this way:
wrt = csv.writer(open(t_.csv', 'w'), delimiter=',',lineterminator='\n')
for x in jjson:
wrt.writerow([x])
but the value of each array are store all in one field
How can solved it and write each in a field?
this is my json file:
{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}
Let's say we have this:
jsonstring = """{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}"""
Here is a pandas solution:
import pandas as pd
import json
# Load json to dataframe
df = pd.DataFrame(json.loads(jsonstring)["ride"])
# Create the new columns
df["o1"] = df["origin"].apply(lambda x: x["lat"])
df["o2"] = df["origin"].apply(lambda x: x["lng"])
df["d1"] = df["destination"].apply(lambda x: x["lat"])
df["d2"] = df["destination"].apply(lambda x: x["lng"])
#export
print(df.iloc[:,2:].to_csv(index=False, header=True))
#use below for file
#df.iloc[:,2:].to_csv("output.csv", index=False, header=True)
Returns:
o1,o2,d1,d2
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652
Condensed answer:
import pandas as pd
import json
with open('data.json') as json_data:
d = json.load(json_data)
df = pd.DataFrame(d["ride"])
df["o1"],df["o2"] = zip(*df["origin"].apply(lambda x: (x["lat"],x["lng"])))
df["d1"],df["d2"] = zip(*df["destination"].apply(lambda x: (x["lat"],x["lng"])))
df.iloc[:,2:].to_csv("t_.csv",index=False,header=False)
Or, maybe the most readable solution:
import json
from pandas.io.json import json_normalize
open('data.json') as json_data:
d = json.load(json_data)
df = json_normalize(d["ride"])
cols = ["origin.lat","origin.lng","destination.lat","destination.lng"]
df[cols].to_csv("output.csv",index=False,header=False)
This might help:
import json
import csv
def retrive_json():
with open('data.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
res = retrive_json()
csv_cols = ["orgin_lat", "origin_lng", "dest_lat", "dest_lng"]
with open("output_csv.csv", 'w') as out:
writer = csv.DictWriter(out, fieldnames=csv_cols)
writer.writeheader()
for each_list in res:
d = dict(zip(csv_cols,each_list))
writer.writerow(d)
Output csv generated is:
orgin_lat,origin_lng,dest_lat,dest_lng
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652
To me it looks like you've got an array of arrays and you want the individual elements. Therefore you'll want to use a nested for loop. Your current for loop is getting each array, to then split up each array into it's elements you'll want to loop through those. I'd suggest something like this:
for x in jjson:
for y in x:
wrt.writerow([y])
Obviously you might want to update your bracketing etc this is just me giving you an idea of how to solve your issue.
Let me know how it goes!
Why the csv-Library?
array = [[1, 2, 3, 4], [5, 6, 7, 8]]
with open('test.csv', 'w') as csv_file :
csv_file.write("# Header Info\n" \
"# Value1, Value2, Value3, Value4\n") # The header might be optional
for row in array :
csv_file.write(",".join(row) + "\n")
I have parsed a json file in python and have the results printed on screen.
However, I would also like the results to be output to a csv file, exactly as they appear on screen.
Here is my code:
import json
with open('euroinc.json') as json_data:
d = json.load(json_data)
for p in d['results']:
print(p['sedol']+','+p['company']+','+p['name']+','+ p['unitType']+','+p['perf48t60m']+','+p['perf36t48m']+','+p['perf24t36m']+','+p['perf12t24m']+','+p['perf12m']+','+p['initialCharge']+','+p['netAnnualCharge'])
Any help would be much appreciated!
Thanks,
Craig
Update: here is the json sample:
{
"results": [
{
"sector": "Europe Including UK",
"perf48t60m": "n/a",
"discountedCode": "",
"price_buy": "0",
"plusFund": false,
"unitType": "Accumulation",
"perf6m": "6.35%",
"perf36t48m": "11.29%",
"loaded": "Unbundled",
"fundSize": "2940.1",
"annualCharge": "1.07",
"netAnnualCharge": "1.07",
"sedol": "B7BW9Y0",
"perf24t36m": "0.25%",
"annualSaving": "0.00",
"updated": "06/09/2017",
"incomeFrequency": "N/a",
"perf60m": "n/a",
"perf12t24m": "12.97%",
"company": "BlackRock",
"initialCharge": "0.00",
"paymentType": "Dividend",
"perf3m": "0.32%",
"name": "BlackRock Global European Value (D2 GBP)",
"perf12m": "19.37%",
"price_change": "-39.00",
"yield": "0.00",
"price_sell": "6569.00",
"perf36m": "35.19%",
"numHoldings": "51"
},
{
"sector": "Europe Including UK",
"perf48t60m": "22.01%",
"discountedCode": "",
"price_buy": "0",
"plusFund": false,
"unitType": "Income",
"perf6m": "7.81%",
"perf36t48m": "9.61%",
"loaded": "Unbundled",
"fundSize": "566.1",
"annualCharge": "0.30",
"netAnnualCharge": "0.30",
"sedol": "B76VTR5",
"perf24t36m": "-3.95%",
"annualSaving": "0.00",
"updated": "06/09/2017",
"incomeFrequency": "Quarterly",
"perf60m": "77.38%",
"perf12t24m": "15.38%",
"company": "Vanguard",
"initialCharge": "0.00",
"paymentType": "Dividend",
"perf3m": "0.74%",
"name": "Vanguard SRI European Stock",
"perf12m": "19.69%",
"price_change": "-21.37",
"yield": "2.79",
"price_sell": "15800.81",
"perf36m": "32.65%",
"numHoldings": "502"
}
]
}
This will write a CSV file with a header. Note fieldnames and extrasaction parameters are required to specify the order of columns and prevent an error when there are extra dictionary entries.
#!python2
import json
import csv
with open('euroinc.json') as json_data:
d = json.load(json_data)
# with open('out.csv','w',newline='') as f: # Python 3 version
with open('out.csv','wb') as f:
w = csv.DictWriter(f,
fieldnames='sedol company name unitType perf48t60m perf36t48m perf24t36m perf12t24m perf12m initialCharge netAnnualCharge'.split(),
extrasaction='ignore')
w.writeheader()
# Ways to use a different header.
# Note the direct write should use \r\n as that is the default 'excel' CSV dialect for line terminator.
# f.write('A,B,C,D,E,F,G,H,I,J,K\r\n')
# w.writerow(dict(zip(w.fieldnames,'col1 col2 col3 col4 col5 col6 col7 col8 col9 col10 col11'.split())))
w.writerows(d['results'])
Output:
sedol,company,name,unitType,perf48t60m,perf36t48m,perf24t36m,perf12t24m,perf12m,initialCharge,netAnnualCharge
B7BW9Y0,BlackRock,BlackRock Global European Value (D2 GBP),Accumulation,n/a,11.29%,0.25%,12.97%,19.37%,0.00,1.07
B76VTR5,Vanguard,Vanguard SRI European Stock,Income,22.01%,9.61%,-3.95%,15.38%,19.69%,0.00,0.30
I assume p is a dictionary
You could try:
for p in d['results']:
for key in p.keys():
result = a[i]+',',
print result
for the csv part you could try:
import csv
csv_file = open('your_csv.csv', 'wb')
csv_outp = csv.writer(csv_file, delimiter=',')
csv_outp.writerow(result)
I hope this help you