Ordering with default dict append - python

I have a Python code, see below, which takes a JSON file in the structure:
{
"name":"Winking Entertainment",
"imports":"Translink Capital"
},
{
"name":"Wochacha",
"imports":"Sequoia Capital"
},
{
"name":"Wuhan Kindstar Diagnostics",
"imports":"Baird Venture Partners"
},
And aggregates repeat values in "imports" and turns the matching strings into a single array for that entry. (see snippet below)
import json
from collections import defaultdict
def map_names_to_imports(raw_data):
name_to_imports = defaultdict(list)
for row in raw_data:
name_to_imports[row['imports']].append(row['name'])
return name_to_imports
def reformat(name_to_imports):
output = []
for name, imports in name_to_imports.items():
new_dict = {
'name': name,
'imports': list(set(imports))
}
output.append(new_dict)
return output
def run(raw_data):
name_to_imports = map_names_to_imports(raw_data)
output = reformat(name_to_imports)
with open('clean-data2.json','wb') as f:
f.write(json.dumps(output))
if __name__ == '__main__':
raw_data = json.load(open('bricinvestors.json'))
run(raw_data)
The issue I am having is my Json file is not coming out the right way.
For some reason, name and imports are getting reversed. So my output looks like:
{"imports": ["SinoHub"], "name": "Iroquois Capital"}, {"imports": ["Qunar.com", "Lashou.com"], "name": "Tenaya Capital"}
In fact, I want to keep the {"name": "string", "imports": "string"} format -- and not the other way around.
What should I do?
Thanks.

If you're using Python 2.7+, you could use collections.OrderedDict as your input to json.loads(), instead of the standard Python dict. The standard library dict class doesn't guarantee the ordering of keys.

Building on dano's answer, you could use the OrderedDict.setdefault method instead of using a defaultdict:
import json
import collections
OrderedDict = collections.OrderedDict
def map_names_to_imports(raw_data):
name_to_imports = OrderedDict()
for row in raw_data:
name_to_imports.setdefault(row['imports'], []).append(row['name'])
return name_to_imports
def reformat(name_to_imports):
output = []
for name, imports in name_to_imports.items():
new_dict = OrderedDict([('name', name),
('imports', list(set(imports)))])
output.append(new_dict)
return output
def run(raw_data):
name_to_imports = map_names_to_imports(raw_data)
output = reformat(name_to_imports)
with open('clean-data2.json', 'wb') as f:
f.write(json.dumps(output))
if __name__ == '__main__':
raw_data = json.load(open('bricinvestors.json'),
object_pairs_hook=OrderedDict)
run(raw_data)

Final version, which is based in large part on #unutbu's answer.
import json
import collections
OrderedDict = collections.OrderedDict
def map_names_to_imports(raw_data):
name_to_imports = OrderedDict()
for row in raw_data:
name_to_imports.setdefault(row['imports'], []).append(row['name'])
return name_to_imports
def reformat(name_to_imports):
the_output = []
for name, imports in name_to_imports.items():
new_dict = OrderedDict([('name', name),
('imports', list(set(imports)))])
the_output.append(new_dict)
return the_output
def run(raw_data):
name_to_imports = map_names_to_imports(raw_data)
the_output = reformat(name_to_imports)
with open('data/clean-data2.json', 'w+', encoding='utf8') as f:
f.write(json.dumps(the_output))
if __name__ == '__main__':
raw_data = json.load(open('data/bricsinvestorsfirst.json'), object_pairs_hook=OrderedDict)
run(raw_data)

Related

How to replace value for a specific key in json file using python language

Json file
{
"payloadFormatVersion": "9.0",
"payload": {
"ServiceConfiguration": {
"LoggingSettings": {
"NumberOfLogFilesToKeep": 7,
"LogFileSizeBytes": 0,
"LogFolderPath": "C:\\demo\\logs\\feature\\",
"EnvironmentType": "testingenv",
"DataRelayLogSink": {
"PeriodInSeconds": 60,
"TargetAddress": "http://localhost:portNumber/dumm1",
"TargetTokenAddress": "http://localhost:portnumber/token",
"PayloadType": "somedata",
"TokenCredentials": {
"ClientId": "testclientid",
"ClientSecret": "testclientsecret",
"GrantType": "testgranttype"
}
}
},
}
}
JSON Content
def repalcejsonForSpecificKey(keyPath,fileName):
filePath = "C:\\rajesh\\Configurations\\" + fileName + "\\" + fileName + ".json"
print(filePath)
Dict = {}
with open(filePath) as f:
superHeroSquad = json.load(f)
duplicatedict={}
duplicatedict=superHeroSquad
testDict=getDictonaryItems(keyPath[0],**superHeroSquad)
print(testDict)
def getDictonaryItems(searchKey, duplicatedict):
if searchKey in duplicatedict.keys():
testDict = duplicatedict[searchKey]
return testDict
keyPath = ["payload","ServiceConfiguration", "TokenSettings", "ClientId"]
fileName="vestas.sdh.dr.gateway"
repalcejsonForSpecificKey(keyPath,fileName)
Below is my plan
Method1 accepts 2 arguments JsonParsingKeyWhereToBereplaced, filename
Redirecting Json file to dictionary
call the method2 recursively where it accepts 2 arguments, one with searchKey and other is dictonary, this method will return all the key&values from specific Key passed on the method call
Recursively call this method until and unless you reach downstream key and update the value if found
Trying to update nested value from Json file using python language
Note: I was able to update the value in the Json file directly with below line
superHeroSquad ['payload']['ServiceConfiguration']['TokenSettings']['ClientId'] = "text"
But not like below
superHeroSquad[keyPath[0][keyPath[1]][keyPath[2]][keyPath[3]] = "text"
You could traverse your json as a map and replace the specific values like this:
import json
def replace_json_for_specific_key(file: str, key_pairs: dict[str, any]):
content = json.load(open(file))
for k, v in key_pairs.items():
keys = k.split(".")
element = content
for key in keys[:-1]:
element = element.setdefault(key, {})
element[keys[-1]] = v
tmp_file = open(file, "w")
json.dump(content, tmp_file)
tmp_file.flush()
if __name__ == '__main__':
replace_json_for_specific_key(
"input.json",
{
"payload.ServiceConfiguration.LoggingSettings.NumberOfLogFilesToKeep": 90,
"payload.ServiceConfiguration.LoggingSettings.DataRelayLogSink.TokenCredentials.ClientId": "anothervalue"
}
)
Notice it will allow you to replace several values at once. You'll need to pass the dot (.) separated path to the specific key.

python get data json value max

How can I extract the T3 Period, Year and maximum value?
file.json
[
{"Fecha":"2022-08-01T00:00:00.000+02:00", "T3_TipoDato":"Avance", "T3_Periodo":"M08", "Anyo":2022, "value":10.4},
{"Fecha":"2022-07-01T00:00:00.000+02:00", "T3_TipoDato":"Definitivo", "T3_Periodo":"M07", "Anyo":2022, "value":10.8},
{"Fecha":"2022-06-01T00:00:00.000+02:00", "T3_TipoDato":"Definitivo", "T3_Periodo":"M06", "Anyo":2022, "value":10.2}
]
My code:
import json
with open("file.json") as f:
distros_dict = json.load(f)
print (distros_dict)
that is my proposition.
Load data from a file to a list.
Loop thru every dict in a list to edit it.
(At my example I, deleted two keys from every dict in list.)
import json
distros_dict = []
with open(f'file.json', "r", encoding='utf-8') as f:
distros_dict.extend(json.load(f))
for item in distros_dict:
item.pop('Fecha')
item.pop('T3_TipoDato')
distros_dict = sorted(distros_dict, key = lambda i: i['value'], reverse=True)[0]
Try this:
from json import load
with open("file.json") as f:
dictionary_max = max(load(f), key=lambda x: x["value"])
result = {
"T3_Periodo": dictionary_max["T3_Periodo"],
"Anyo": dictionary_max["Anyo"],
"value": dictionary_max["value"],
}
print(result)
output:
{'T3_Periodo': 'M07', 'Anyo': 2022, 'value': 10.8}

Open a JSON files and edit structure

I have produced a couple of json files after scraping a few elements. The structure for each file is as follows:
us.json
{'Pres': 'Biden', 'Vice': 'Harris', 'Secretary': 'Blinken'}
uk.json
{'1st Min': 'Johnson', 'Queen':'Elizabeth', 'Prince': 'Charles'}
I'd like to know how I could edit the structure of each dictionary inside the json file to get an output as it follows:
[
{"title": "Pres",
"name": "Biden"}
,
{"title": "Vice",
"name": "Harris"}
,
{"title": "Secretary",
"name": "Blinken"}
]
As far as I am able to think how to do it (I'm a beginner, studying only since a few weeks) I need first to run a loop to open each file, then I should generate a list of dictionaries and finally modify the dictionary to change the structure. This is what I got NOT WORKING as it overrides always with the same keys.
import os
import json
list_of_dicts = []
for filename in os.listdir("DOCS/Countries Data"):
with open(os.path.join("DOCS/Countries Data", filename), 'r', encoding='utf-8') as f:
text = f.read()
country_json = json.loads(text)
list_of_dicts.append(country_json)
for country in list_of_dicts:
newdict = country
lastdict = {}
for key in newdict:
lastdict = {'Title': key}
for value in newdict.values():
lastdict['Name'] = value
print(lastdict)
Extra bonus if you could also show me how to generate an ID mumber for each entry. Thank you very much
This look like task for list comprehension, I would do it following way
import json
us = '{"Pres": "Biden", "Vice": "Harris", "Secretary": "Blinken"}'
data = json.loads(us)
us2 = [{"title":k,"name":v} for k,v in data.items()]
us2json = json.dumps(us2)
print(us2json)
output
[{"title": "Pres", "name": "Biden"}, {"title": "Vice", "name": "Harris"}, {"title": "Secretary", "name": "Blinken"}]
data is dict, .items() provide key-value pairs, which I unpack into k and v (see tuple unpacking).
You can do this easily by writing a simple function like below
import uuid
def format_dict(data: dict):
return [dict(title=title, name=name, id=str(uuid.uuid4())) for title, name in data.items()]
where you can split the items as different objects and add a identifier for each using uuid.
Full code can be modified like this
import uuid
import os
import json
def format_dict(data: dict):
return [dict(title=title, name=name, id=str(uuid.uuid4())) for title, name in data.items()]
list_of_dicts = []
for filename in os.listdir("DOCS/Countries Data"):
with open(os.path.join("DOCS/Countries Data", filename), 'r', encoding='utf-8') as f:
country_json = json.load(f)
list_of_dicts.append(format_dict(country_json))
# list_of_dicts contains all file contents

json to dictionary in python

This is my json file input.
{"Report":{"id":101,"type":"typeA","Replist":[{"rptid":"r001","subrpt":{"subid":74,"subname":"name1","subval":113},"RelsubList":[{"Relid":8,"Relsubdetails":{"Rel_subname":"name8","Rel_Subval":65}},{"Relid":5,"Relsubdetails":{"Rel_subname":"name5","Rel_Subval":40}}],"fldA":30,"fldB":23}]}}
...
I am writing python program to convert the input into the below format in my dictionary.
I am new to python.
Expected output:
out: {"id": "101", "type": "typeA", "rptid": "r001", "subrpt_subid": "74", "subrpt_subname": "name1", "subrpt_subval":"113","Relid":"8","Rel_subname":"name8","Rel_Subval":"65","Relid":"5","Rel_subname":"name5","Rel_Subval":"40","fldA":"30","fldB":"23"
I used the following logic to convert the output till subrpt.
Current output:
out: {'id': '101', 'type': 'typeA', 'rptid': 'r001', 'subrpt_subid': '74', 'subrpt_subname': 'name1', 'subrpt_subval': '113'}
But I am struggling to get the logic of RelsubList(it looks like it has both list and dictionary[{}] ).
please help me to get the logic for the same.
import json
list1 = []
dict1 = {}
dict2 = {}
data_file = "samp1.json"
file = open(data_file)
for line in file:
json_line = json.loads(line)
json_line = json_line["Report"]
dict1["id"]=str(json_line["id"])
dict1["type"] = str(json_line["type"])
json_line = json_line["Replist"]
dict1["rptid"]= str(json_line[0]["rptid"])
dict1["subrpt_subid"] = str(json_line[0]["subrpt"]["subid"])
dict1["subrpt_subname"] = str(json_line[0]["subrpt"]["subname"])
dict1["subrpt_subval"] = str(json_line[0]["subrpt"]["subval"])
print("out:", dict1)
Some of your logic is confusing to me, i.e. why are you doing json.loads(line) in every loop?
Anyway, the following should get you the logic for RealsubList:
import json
f = open("data.json")
data = json.load(f)
for line in data:
relsublist = data["Report"]["Replist"][0]["RelsubList"]
print(relsublist)
Results in:
[{'Relid': 8, 'Relsubdetails': {'Rel_subname': 'name8', 'Rel_Subval': 65}}, {'Relid': 5, 'Relsubdetails': {'Rel_subname': 'name5', 'Rel_Subval': 40}}]
The reason for the [0] index after ["Replist"] is Replist contains an array of nested dictionaries, so you need to call it out by index. In this case its only a single array, so it would be 0

How to build a nested ordered dict from a csv?

How can I get a nested dictionary, where both the keys and the subkeys are precisely in the same order as in the csv file?
I tried
import csv
from collections import OrderedDict
filename = "test.csv"
aDict = OrderedDict()
with open(filename, 'r') as f:
csvReader = csv.DictReader(f)
for row in csvReader:
key = row.pop("key")
aDict[key] = row
where test.csv looks like
key,number,letter
eins,1,a
zwei,2,b
drei,3,c
But the sub-dictionaries are not ordered (rows letter and number are changed). So how can I populate aDict[key] in an ordered manner?
You have to build the dictionaries and sub-dictionaries yourself from rows returned from csv.reader which are sequences, instead of using csv.DictReader.
Fortunately that's fairly easy:
import csv
from collections import OrderedDict
filename = 'test.csv'
aDict = OrderedDict()
with open(filename, 'rb') as f:
csvReader = csv.reader(f)
fields = next(csvReader)
for row in csvReader:
temp = OrderedDict(zip(fields, row))
key = temp.pop("key")
aDict[key] = temp
import json # just to create output
print(json.dumps(aDict, indent=4))
Output:
{
"eins": {
"number": "1",
"letter": "a"
},
"zwei": {
"number": "2",
"letter": "b"
},
"drei": {
"number": "3",
"letter": "c"
}
}
This is one way:
import csv
from collections import OrderedDict
filename = "test.csv"
aDict = OrderedDict()
with open(filename, 'r') as f:
order = next(csv.reader(f))[1:]
f.seek(0)
csvReader = csv.DictReader(f)
for row in csvReader:
key = row.pop("key")
aDict[key] = OrderedDict((k, row[k]) for k in order)
csv.DictReader loads the rows into a regular dict and not an ordered one. You'll have to read the csv manually into an OrderedDict to get the order you need:
from collections import OrderedDict
filename = "test.csv"
dictRows = []
with open(filename, 'r') as f:
rows = (line.strip().split(',') for line in f)
# read column names from first row
columns = rows.next()
for row in rows:
dictRows.append(OrderedDict(zip(columns, row)))
You can take advantage of the existing csv.DictReader class, but alter the rows it returns. To do that, add the following class to the beginning of your script:
class OrderedDictReader(csv.DictReader):
def next(self):
# Get a row using csv.DictReader
row = csv.DictReader.next(self)
# Create a new row using OrderedDict
new_row = OrderedDict(((k, row[k]) for k in self.fieldnames))
return new_row
Then, use this class in place of csv.DictReader:
csvReader = OrderedDictReader(f)
The rest of your code remains the same.

Categories

Resources