Changing multiple keys on a JSON FILe using Python - python

I am trying to update a new key into my JSON file if the conditions are met. The following is my python code attempting to make multiple updates in a JSON file.
#!/usr/bin/env python
# Usage: update json file
import json
import os
json_dir="/opt/rdm/adggeth/ADGG-ETH-02/20181008/"
json_dir_processed="/opt/rdm/adggeth/ADGG-ETH-02/20181008updated/"
for json_file in os.listdir(json_dir):
if json_file.endswith(".json"):
processed_json = "%s%s" % (json_dir_processed, json_file)
json_file = json_dir + json_file
print "Processing %s -> %s" % (json_file, processed_json)
with open(json_file, 'r') as f:
json_data = json.load(f)
# replacement mapping
update_map = {"grp_farmerreg/farmerdetails/farmermobile":"grp_farmerdts/hh_id",
"grp_farmerdts/hh_region":"grp_farmerdts/region",
"grp_farmerdts/hh_district":"grp_farmerdts/district",
"grp_farmerdts/hh_ward":"grp_farmerdts/ward",
"grp_farmerdts/hh_village":"grp_farmerdts/village"}
diff_keys = update_map.keys() - json_data.keys()
if not diff_keys:
print("No Update to JSON keys")
else:
for k in diff_keys:
json_data[k] = json_data[update_map[k]]
with open(processed_json, 'w') as f:
f.write(json.dumps(json_data, indent=4))
else:
print "%s not a JSON file" % json_file
The JSON file i am trying to make update to is as follows:
{
....
"farmerregistrd": "1",
"grp_farmerdts/region": "5",
"datacollid": "0923678275",
"_status": "submitted_via_web",
"enumtype": "2",
"deviceid": "352948096845916",
"start_time": "2019-04-03T10:57:23.620+03",
"_uuid": "f1069eae-33f8-4850-a549-49fcde27f077",
"grp_farmerdts/village": "2852",
"_submitted_by": null,
"formhub/uuid": "42cb3fc351a74fd89702078160f849ca",
"grp_farmerdts/hh_id": "623",
"grp_farmerdts/ward": "136",
...
"_userform_id": "adggeth_ADGG-ETH-REG02-20181008",
"_id": 711097,
"grp_farmerdts/district": "31"
}
My expected output from running the following python file is as follows
{
....
"farmerregistrd": "1",
"grp_farmerdts/hh_region": "5",
"datacollid": "0923678275",
"_status": "submitted_via_web",
"enumtype": "2",
"deviceid": "352948096845916",
"start_time": "2019-04-03T10:57:23.620+03",
"_uuid": "f1069eae-33f8-4850-a549-49fcde27f077",
"grp_farmerdts/hh_village": "2852",
"_submitted_by": null,
"formhub/uuid": "42cb3fc351a74fd89702078160f849ca",
"grp_farmerdts/hh_id": "623",
"grp_farmerdts/hh_ward": "136",
...
"_userform_id": "adggeth_ADGG-ETH-REG02-20181008",
"_id": 711097,
"grp_farmerdts/hh_district": "31"
}

Using re module and json.loads() with object_hook= parameter (doc). This script will add hh_ prefix to every grp_farmerdts/* key where isn't:
json_str = '''{
"farmerregistrd": "1",
"grp_farmerdts/region": "5",
"datacollid": "0923678275",
"_status": "submitted_via_web",
"enumtype": "2",
"deviceid": "352948096845916",
"start_time": "2019-04-03T10:57:23.620+03",
"_uuid": "f1069eae-33f8-4850-a549-49fcde27f077",
"grp_farmerdts/village": "2852",
"_submitted_by": null,
"formhub/uuid": "42cb3fc351a74fd89702078160f849ca",
"grp_farmerdts/hh_id": "623",
"grp_farmerdts/ward": "136",
"_userform_id": "adggeth_ADGG-ETH-REG02-20181008",
"_id": 711097,
"grp_farmerdts/district": "31"
}'''
import re
import json
def change_keys(d):
return {re.sub(r'grp_farmerdts/((?!hh_)(\w+))', r'grp_farmerdts/hh_\1', k): v for k, v in d.items()}
print(json.dumps(json.loads(json_str, object_hook=change_keys), indent=4))
Prints:
{
"farmerregistrd": "1",
"grp_farmerdts/hh_region": "5",
"datacollid": "0923678275",
"_status": "submitted_via_web",
"enumtype": "2",
"deviceid": "352948096845916",
"start_time": "2019-04-03T10:57:23.620+03",
"_uuid": "f1069eae-33f8-4850-a549-49fcde27f077",
"grp_farmerdts/hh_village": "2852",
"_submitted_by": null,
"formhub/uuid": "42cb3fc351a74fd89702078160f849ca",
"grp_farmerdts/hh_id": "623",
"grp_farmerdts/hh_ward": "136",
"_userform_id": "adggeth_ADGG-ETH-REG02-20181008",
"_id": 711097,
"grp_farmerdts/hh_district": "31"
}

According to your expected output all particular keys need to be checked (not one of them). Change your logic as shown below:
...
json_data = json.load(f)
# replacement mapping
update_map = {"grp_farmerreg/farmerdetails/farmermobile":"grp_farmerdts/hh_id",
"grp_farmerdts/hh_region":"grp_farmerdts/region",
"grp_farmerdts/hh_district":"grp_farmerdts/district",
"grp_farmerdts/hh_ward":"grp_farmerdts/ward", "grp_farmerdts/hh_village":"grp_farmerdts/village"}
diff_keys = update_map.keys() - json_data.keys()
if not diff_keys:
print("No Update to JSON keys")
else:
for k in diff_keys:
if update_map[k] in json_data:
json_data[k] = json_data[update_map[k]]

Related

How to convert nested JSON files to CSV in python

I am completely new to python and trying to covert nested json files to csv. The current code I am trying to use is:
import json
def read_json(filename: str) -> dict:
try:
with open(filename, "r") as f:
data = json.loads(f.read())
except:
raise Exception(f"Reading {filename} file encountered an error")
return data
def normalize_json(data: dict) -> dict:
new_data = dict()
for key, value in data.items():
if not isinstance(value, dict):
new_data[key] = value
else:
for k, v in value.items():
new_data[key + "_" + k] = v
return new_data
def generate_csv_data(data: dict) -> str:
# Defining CSV columns in a list to maintain
# the order
csv_columns = data.keys()
# Generate the first row of CSV
csv_data = ",".join(csv_columns) + "\n"
# Generate the single record present
new_row = list()
for col in csv_columns:
new_row.append(str(data[col]))
# Concatenate the record with the column information
# in CSV format
csv_data += ",".join(new_row) + "\n"
return csv_data
def write_to_file(data: str, filepath: str) -> bool:
try:
with open(filepath, "w+") as f:
f.write(data)
except:
raise Exception(f"Saving data to {filepath} encountered an error")
def main():
# Read the JSON file as python dictionary
data = read_json(filename="test2.json")
# Normalize the nested python dict
new_data = normalize_json(data=data)
# Pretty print the new dict object
print("New dict:", new_data)
# Generate the desired CSV data
csv_data = generate_csv_data(data=new_data)
# Save the generated CSV data to a CSV file
write_to_file(data=csv_data, filepath=data2.csv")
if __name__ == '__main__':
main()
It works partly: I get a CSV file that contains all values. However, for the nested key fields it only gives me the "highest" level (e.g. I get "currentEmployments" but not "currentEmployments_firmId").
Could someone help me with this?
Sample json file:
{
"basicInformation": {
"individualId": 10000,
"firstName": "Name",
"middleName": "middleName.",
"lastName": "lastName",
"bcScope": "Active",
"iaScope": "NotInScope",
"daysInIndustryCalculatedDate": "1/1/2000"
},
"currentEmployments": [
{
"firmId": 001,
"firmName": "firm1",
"iaOnly": "N",
"registrationBeginDate": "1/1/2005",
"firmBCScope": "ACTIVE",
"firmIAScope": "ACTIVE",
"iaSECNumber": "10000",
"iaSECNumberType": "100",
"bdSECNumber": "1000",
"branchOfficeLocations": [
{
"locatedAtFlag": "Y",
"supervisedFromFlag": "N",
"privateResidenceFlag": "N",
"branchOfficeId": "10000",
"street1": "street1",
"city": "city",
"state": "MD",
"country": "United States",
"zipCode": "10000"
}
]
}
],
"currentIAEmployments": [],
"previousEmployments": [
{
"iaOnly": "N",
"bdSECNumber": "20000",
"firmId": 200,
"firmName": "firm2",
"street1": "street",
"city": "city",
"state": "MD",
"country": "UNITED STATES",
"zipCode": "10000",
}
],
"examsCount": {
"stateExamCount": 0,
"principalExamCount": 0,
"productExamCount": 1
},
}

Python check value of key in element and return element in list

I have a json file, and I'm reading this file with json library
This is the json content (example)
{
"type": "champion",
"format": "standAloneComplex",
"version": "10.18.1",
"data": {
"Aatrox": {
"version": "10.18.1",
"id": "Aatrox",
"key": "266",
"name": "Aatrox"
},
"Ahri": {
"version": "10.18.1",
"id": "Ahri",
"key": "103",
"name": "Ahri",
},
}
Now how can I check if key is equal to 266 and return the value of name?
I was trying with something like this
import json
with open('./source/champion.json') as json_file:
data_champs = json.load(json_file)['data']
for champ in data_champs:
for champ_info in data_champs[champ]:
if champ['key'] == 266:
print(champ)
But return TypeError: string indices must be integers
Try the following:
import json
with open('./source/champion.json') as json_file:
for name, info in json.load(json_file)['data'].items():
if info['key'] == 266:
print(name)
Or even better, we can close the file after we get the data and not keep it open during processing:
import json
with open('./source/champion.json') as json_file:
data = json.load(json_file)['data']
for name, info in data.items():
if info['key'] == 266:
print(name)
Explanation
The easiest way to iterate over a dict's elements is by using its .items() method:
for key, value in d.items():
print(key, "-->", value)
below (iterating over the values only since the keys are not important here)
import json
with open('data.json') as f:
data = json.load(f)['data']
for v in data.values():
if v['key'] == '266':
print(v['name'])
break
output
Aatrox
Here you go:
import json
with open('champion.json') as json_file:
data_champs = json.load(json_file)['data']
for data in data_champs.keys():
if data_champs[data]['key']=='266':
print(data_champs[data]['name'])
Prints:
Aatrox

Not able to display json value

I have an inventory which I collected from ansible and I'm reading it with python but I have no sucess to print the values here, so I have the following:
the content below is in a file called "localhost".
{
"ansible_facts": {
"facter_architecture": "x86_64",
"facter_bios_release_date": "12/01/2006",
"facter_bios_vendor": "innotek GmbH",
"facter_bios_version": "VirtualBox",
"facter_blockdevice_sda_model": "VBOX HARDDISK",
"facter_blockdevice_sda_size": 10737418240,
"facter_blockdevice_sda_vendor": "ATA",
"facter_blockdevice_sr0_model": "CD-ROM",
"facter_blockdevice_sr0_size": 1073741312,
"facter_blockdevice_sr0_vendor": "VBOX",
"facter_blockdevices": "sda,sr0",
"facter_boardmanufacturer": "Oracle Corporation",
"facter_boardproductname": "VirtualBox",
"facter_boardserialnumber": "0",
"facter_dhcp_servers": {
"enp0s3": "10.0.2.2",
"enp0s8": "192.168.1.1",
"system": "10.0.2.2"
},
"facter_domain": "home",
"facter_facterversion": "2.4.1",
"facter_filesystems": "xfs",
"facter_fqdn": "mylab.home",
"facter_gid": "root",
"facter_hardwareisa": "x86_64",
"facter_hardwaremodel": "x86_64",
"facter_hostname": "mylab",
"facter_id": "root",
"facter_interfaces": "enp0s3,enp0s8,lo",
"facter_ipaddress": "10.0.2.15",
"facter_ipaddress_enp0s3": "10.0.2.15",
"facter_ipaddress_enp0s8": "192.168.1.101",
"facter_ipaddress_lo": "127.0.0.1",
"facter_is_virtual": true,
"facter_kernel": "Linux",
"facter_kernelmajversion": "3.10",
"facter_kernelrelease": "3.10.0-1127.13.1.el7.x86_64",
"facter_kernelversion": "3.10.0",
"facter_macaddress": "08:00:27:dd:47:a8",
"facter_macaddress_enp0s3": "08:00:27:dd:47:a8",
"facter_macaddress_enp0s8": "08:00:27:12:ce:46",
"facter_manufacturer": "innotek GmbH",
"facter_memoryfree": "730.39 MB",
"facter_memoryfree_mb": "730.39",
"facter_memorysize": "990.98 MB",
"facter_memorysize_mb": "990.98",
"facter_mtu_enp0s3": 1500,
"facter_mtu_enp0s8": 1500,
"facter_mtu_lo": 65536,
"facter_netmask": "255.255.255.0",
"facter_netmask_enp0s3": "255.255.255.0",
"facter_netmask_enp0s8": "255.255.255.0",
"facter_netmask_lo": "255.0.0.0",
"facter_network_enp0s3": "10.0.2.0",
"facter_network_enp0s8": "192.168.1.0",
"facter_network_lo": "127.0.0.0",
"facter_operatingsystem": "CentOS",
"facter_operatingsystemmajrelease": "7",
"facter_operatingsystemrelease": "7.8.2003",
"facter_os": {
"family": "RedHat",
"name": "CentOS",
"release": {
"full": "7.8.2003",
"major": "7",
"minor": "8"
}
},
"facter_osfamily": "RedHat",
"facter_partitions": {
"sda1": {
"filesystem": "xfs",
"mount": "/boot",
"size": "2097152",
"uuid": "987fb5e2-f636-423b-997d-c2654993708c"
},
"sda2": {
"filesystem": "LVM2_member",
"size": "18872320"
}
},
"facter_path": "/root/.rbenv/shims:/root/.rbenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin/python3:/root/bin",
"facter_physicalprocessorcount": 1,
"facter_processor0": "Intel(R) Core(TM) i5-5350U CPU # 1.80GHz",
"facter_processorcount": 1,
"facter_processors": {
"count": 1,
"models": [
"Intel(R) Core(TM) i5-5350U CPU # 1.80GHz"
],
"physicalcount": 1
},
"facter_productname": "VirtualBox",
"facter_ps": "ps -ef",
"facter_rubyplatform": "x86_64-linux",
"facter_rubysitedir": "/usr/local/share/ruby/site_ruby/",
"facter_rubyversion": "2.0.0",
"facter_selinux": false,
"facter_serialnumber": "0",
"facter_sshecdsakey": "AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBE8u+HCceoSA9mrw1oboY4sAXhkgp4CBTe8QjuW2OAeYIQS7LblPztfQmJFkXpHTWhLvSBYglzcuZiJOzUbZQ/0=",
"facter_sshed25519key": "AAAAC3NzaC1lZDI1NTE5AAAAIDutjvzHFEKqc0bprmbkm9ZUoADflkan6dnCVcYsGOTT",
"facter_sshfp_ecdsa": "SSHFP 3 1 2190a1073f110b50ed6fb912cd04144603a85098\nSSHFP 3 2 3a708ee555593b0ad5a2f1992ae949d56d2f3556c37b201ef683d4a3ea850660",
"facter_sshfp_ed25519": "SSHFP 4 1 695978669c105b4a3a06c4b9a685020363b72c67\nSSHFP 4 2 794f24ad5bd7b91a1d3a0f484ceb166088ac8d57e3e1682e8b8fe006fde1c169",
"facter_sshfp_rsa": "SSHFP 1 1 07afd9583d0785ac923230bef3b0d0ffeefad097\nSSHFP 1 2 29158514b311cc7687fa1c3aab1fa1abee0f2f581eb3d607a6b4ffb8ff258d59",
"facter_sshrsakey": "AAAAB3NzaC1yc2EAAAADAQABAAABAQCtgyTEG+VnEnXiiaP4tFpIiWwWfqxdW8BCATa5W9QE0AsfY1OiFoLRXYGqhL72q0N+VTHQGB7eB1sd9Nas48erDzZXpgLoIDqM1pa/vT/j/SygQB2rwgo2wga0tw+zW1cw+sELjXHAYsi8DADKbGlX2cCeT3MKeWdkg+BQogf74Sy4NEPbYhILXPfvt3cJxCM02sIn/eQL+n06iSzesUIEy5n+AlRgACR3zHnk5rtHipj/RzmPv+J0V3du7+g6/3TiKDcBTNHtb8QSa4DSGkmbW7Wdhvnw9GyhO5ySGB2G3rmLiVIm9vdjB9L/X/L2g8TB5+/dO52UxUSX17nwzuJB",
"facter_swapfree": "1024.00 MB",
"facter_swapfree_mb": "1024.00",
"facter_swapsize": "1024.00 MB",
"facter_swapsize_mb": "1024.00",
"facter_system_uptime": {
"days": 0,
"hours": 3,
"seconds": 12984,
"uptime": "3:36 hours"
},
"facter_timezone": "CEST",
"facter_type": "Other",
"facter_uniqueid": "000a0f02",
"facter_uptime": "3:36 hours",
"facter_uptime_days": 0,
"facter_uptime_hours": 3,
"facter_uptime_seconds": 12984,
"facter_uuid": "b64ed9b0-7168-4e49-a34a-90e6ea6f751a",
"facter_virtual": "kvm"
},
"changed": false
}
I have the following code.
I tried a lot of deferents stuffs, but somehow it complains about "list indices must be integers or slices, not str"
import json
dict = []
with open('localhost', 'r') as jsonfile:
myfile = json.load(jsonfile)
result = json.dumps(myfile, indent=2, sort_keys=True)
dict.append(result)
print(dict['ansible_facts'])
From this list I need to extract
facter_system_uptime and facter_hostname only but I had no success.
I tried to put in a loop, but still same issue.
for data in dict['ansible_facts']:
print(data)
What's wrong here?
you don't need to use json.dumps because you've already used json.load to deserialize the JSON document to a Python object.
import json
with open("localhost") as jsonfile:
myfile = json.load(jsonfile)
print(myfile["ansible_facts"]["facter_system_uptime"])
print(myfile["ansible_facts"]["facter_hostname"])
Don't use dict as the name for a variable - it shadows the builtin and will cause you problems.
You created a list (mydict = []) and then you appended your parsed json to it. That means, to access it, you must use mydict[0]['ansible_facts'].
I do not know what ansible is, but to my knowledge, your error is because you initilized dict as a list:
dict = []
which you can access its elements by numerical indices, but instead you gave it a string:
print(dict['ansible_facts'])
this is how you initialize a dictionary:
dict = {}
There are two issues here.
You have created a list instead of dict
You have used reserved key(dict) as variable name
So the final code should be something like
import json
with open('localhost', 'r') as jsonfile:
myfile = json.load(jsonfile)
print(myfile['ansible_facts']['facter_system_uptime'])
print(myfile['ansible_facts']['facter_hostname'])
If you want to print all the data using loop
You can iterate on myfile['ansible_facts'] like this.
import json
with open('localhost', 'r') as jsonfile:
myfile = json.load(jsonfile)
for key, value in myfile['ansible_facts'].items():
print(key, value)

How to write a variable in a json file?

I searched for a long time but I am not really familiar with python and json and I can't find the answer of my problem.
Here is my Python script
import json
jsonFile = open("config.json", "r")
data = json.load(jsonFile)
data.format(friendly, teaching, leader, target)
print(data)
Here is json the file:
{
"commend": {
"friendly": {},
"teaching": {},
"leader": {}
},
"account": {
"username": "",
"password": "",
"sharedSecret": ""
},
"proxy": {
"enabled": false,
"file": "proxies.txt",
"switchProxyEveryXaccounts": 5
},
"type": "COMMEND",
"method": "SERVER",
"target": "https://steamcommunity.com/id/{}",
"perChunk": 20,
"betweenChunks": 300000,
"cooldown": 28800000,
"steamWebAPIKey": "{}",
"disableUpdateCheck": false
}
I tried .format but we can't use this method with with a dictionary.
With your help I managed to find the answer A big thank you for your speed and your help ! Here is what I did:
import json
jsonFile = open("config.json", "r")
data = json.load(jsonFile)
(data['commend']['friendly']) = nbfriendly
(data['commend']['teaching']) = nbteaching
(data['commend']['leader']) = nbleader
print(data)
print(data)
A json file is a dictionary, so you can use dict methods with it. Here is the code:
import json
with open("config.json", "r") as json_file:
data = json.load(json_file)
# Let's say you want to add the string "Hello, World!" to the "password" key
data["account"]["password"] += "Hello, World!"
# Or you can use this way to overwrite anything already written inside the key
data["account"]["password"] = "Hello, World!"
print(data)
You can add data by tranversing through it like a dictionary:
data['key'] = value
Example:
dic["commend"]["friendly"]={'a':1}

Writing dicitonary to JSON using Python

I'm new to Python programming, so do bear with me if I make any mistakes anywhere
I'm trying to write a json file using 2 dictionaries and dump the output to the file using the following code on Windows
import json
import sys
import string
from time import strftime
scan_results = open("scan_results.txt", "r")
saved = sys.stdout
f = file('report.json', 'wb')
sys.stdout = f
for line in scan_results:
if ".jpg" in line:
lst = []
result = line.split('\\')
result_split = result[5].split(' ')
filename = result_split[0]
raw_status = result_split[3]
if "OK" in raw_status:
status = "Okay"
status_code = "0"
dict = {'FileName': filename, 'DateTime': strftime("%Y-%m-%d %H:%M:%S"), 'statusCode': status_code, 'Description': status}
dict2 = {filename : dict}
print json.dumps(dict2)
sys.stdout = saved
f.close()
print "JSON report written"
The problem is, the output that I have is
{
"car-30537.jpg": {
"statusCode": "0",
"DateTime": "2012-02-07 09:52:26",
"Description": "Okay",
"FileName": "car-30537.jpg"
}
}{
"car-30538.jpg": {
"statusCode": "0",
"DateTime": "2012-02-07 09:52:26",
"Description": "Okay",
"FileName": "car-30538.jpg"
}
}
whereas the output that I want is
{
"car-30537.jpg": {
"statusCode": "0",
"DateTime": "2012-02-07 09:52:26",
"Description": "Okay",
"FileName": "car-30537.jpg"
},
{
"car-30538.jpg": {
"statusCode": "0",
"DateTime": "2012-02-07 09:52:26",
"Description": "Okay",
"FileName": "car-30538.jpg"
}
}
Is there any ways to correct this problem? Thanks in advance
You are making lots of dicts, while you only need one main containing one:
import json
import sys
import string
from time import strftime
scan_results = open("scan_results.txt", "r")
saved = sys.stdout
f = file('report.json', 'wb')
sys.stdout = f
dict2 = {} #Create one output dict
for line in scan_results:
if ".jpg" in line:
lst = []
result = line.split('\\')
result_split = result[5].split(' ')
filename = result_split[0]
raw_status = result_split[3]
if "OK" in raw_status:
status = "Okay"
status_code = "0"
dict2[filename] = {'FileName': filename, 'DateTime': strftime("%Y-%m-%d %H:%M:%S"), 'statusCode': status_code, 'Description': status} #Add to that dict.
print json.dumps(dict2) #Print it out at the end.
sys.stdout = saved
f.close()
print "JSON report written"
I added comments to modified lines.

Categories

Resources