The code below defines a dictionary used to transform field values. Data is read, some of the values are transformed based on this dictionary, and written to a table. It works as-is. The problem, I now want to move this configuration outside the .py file into a JSON configuration file.
lookups = {
11: {
"ST1": ["ABC"],
"UNK01": ["125", "ACD"],
"A": ["52"],
"B": ["91"],
"C": ["92"],
"D": ["95"]
},
10: {
"XYZ01": ["91"],
"XYZ02": ["83"],
"XYZ03": ["27"]
}
}
According to jsonlint.com, in order for the above value being assigned to lookups to be valid JSON, I must quote the 11 and 10 keys. Doing so breaks my Python code and displays TypeError: list indices must be integers, not str.
How do I create valid JSON and minimize changes to my code?
If you want to dump it to a json file:
import json
with open("config.json","w") as f:
json.dump(lookups, f) # dump dict to file
with open("config.json") as f:
s = json.load(f) # load dict from file
print(s)
{'11': {'ST1': ['ABC'], 'A': ['52'], 'D': ['95'], 'UNK01': ['125', 'ACD'], 'B': ['91'], 'C': ['92']}, '10': {'XYZ01': ['91'], 'XYZ03': ['27'], 'XYZ02': ['83']}}
If you need keys as ints you can loop and cast as ints or use pickle:
import pickle
with open("in.pkl","wb") as f:
pickle.dump(lookups, f)
with open("in.pkl","rb") as f:
s = pickle.load(f)
print(s)
{10: {'XYZ03': ['27'], 'XYZ01': ['91'], 'XYZ02': ['83']}, 11: {'UNK01': ['125', 'ACD'], 'B': ['91'], 'D': ['95'], 'ST1': ['ABC'], 'C': ['92'], 'A': ['52']}}
If not just use as is.
If you know what type of data your keys are, a simple int on the keys would suffice:
dictionary_from_json = json.loads(dumped)
newdict = {}
for key, val in dictionary_from_json:
newdict[int(key)] = val
You can extend json.decoder and convert all keys to int when it's possible.
import json
class Json(json.JSONDecoder):
def decode(self,json_string):
default_obj = super(Json,self).decode(json_string)
new_obj = self._rec_serial(default_obj)
return new_obj
def _rec_serial(self,default):
new_dict = {}
for key,value in default.items():
is_dict = isinstance(value,dict)
value = self._rec_serial(value) if is_dict else value
try:
new_dict[int(key)] = value
except ValueError:
new_dict[key] = value
return new_dict
json2= Json()
d = json2.decode(dumped)
Related
I am trying to map some values from data to a template.I want to fill in the values (with some manipulations) in the template only if they are already present in it.My template has hundreds of keys and my goal is to avoid the if statement before each manipulation and assignment.
The point of the if statements is to defer evaluation of the manipulations I am performing as they may be expensive to perform. Any solutions should take this into account.
data = {
'a':1,
'b':2,
'c':3,
'd':4,
'e':5
}
template1 = {
'p':'Nan',
'q':'Nan',
'r':'Nan'
}
template2 = {
'p':'Nan',
's':'Nan',
't':'Nan'
}
def func(template,data):
if 'p' in template.keys():
template['p'] = data['a']
if 'q' in template.keys():
template['q'] = data['b'][:2] + 'some manipulation'
if 'r' in template.keys():
template['r'] = data['c']
if 's' in template.keys():
template['s'] = data['d'] + 'some mainpulation'
if 't' in template.keys():
template['t'] = data['e']
I know I am missing something basic, my actual code and requirements are pretty complex and I tried to simplify them and bring them down to this simple structure.
Thanks for your help in advance!
You could also store manipulations directly in your data dict using lambda functions, then check if any retrieved value from the data dict is callable() when using this dict to update the template. Assuming your can't modify the keys in the data dict, then this approach could still work with the template_dict mapping approach suggested by Jlove.
data = {
'p': 1,
'q': 2,
'r': 3,
's': 4,
't': 5,
'u': lambda x: x * 2
}
template1 = {
'p':'Nan',
'q':'Nan',
'r':'Nan',
'u': 2
}
def func(template, data):
for key in template:
if callable(data[key]):
template[key] = data[key](template[key])
else:
template[key] = data[key]
#driver
func(template1, data)
for k in template1.items():
print(k)
--- expanded solution based on comments ---
basically the same as the above, but shows how to use a mapping dict to direct how the data dict and an actions dict can be combined to modify the template dict. Also shows how to map keys to functions using a dict.
from collections import defaultdict
def qManipulation(x):
return x * 10
def sManipulation(x):
return x * 3
data = {
'a':1,
'b':2,
'c':3,
'd':4,
'e':5
}
actions = {
'q': qManipulation,
's': sManipulation,
'u': lambda x: x * 7
}
tempToDataMap = defaultdict(lambda: None, {
'p': 'a',
'q': 'b',
'r': 'c',
's': 'd',
't': 'e'
})
template1 = {
'p':'Nan',
'q':'Nan',
'r':'Nan',
'u': 2
}
def func(template, data):
for key, val in template.items():
dataKey = tempToDataMap[key]
# check if the template key corrosponds to a data dict key
if dataKey is not None:
# if key mapping from template to data is actually in data dict, use data value in template
if dataKey in data:
template[key] = data[dataKey]
# if the template key is registered to an action in action dict, run action
if key in actions:
template[key] = actions[key](data[dataKey])
# use this if you have a manipulation on a template field that is not populated by data.
# this isn't present in the example, but could be handy if the template ever has default values other that Nan
elif key in actions:
template[key] = actions[key](template[key])
func(template1, data)
for k in template1.items():
print(k)
If your manipulations can be expressed as a simple lambda, you could encapsulate the condition/assigment in a function to reduce the code clutter:
def func(template,data):
def apply(k,action):
if k in template: template[k] = action()
apply('p',lambda: data['a'])
apply('q',lambda: data['b'][:2] + 'some manipulation')
apply('r',lambda: data['c'])
apply('s',lambda: data['d'] + 'some mainpulation')
apply('t',lambda: data['e'])
This is probably not a great idea but you could subclass dict and override __setitem__.
class GuardDict(dict):
def __setitem__(self, key, callable_value):
if key in self:
super().__setitem__(key, callable_value())
# we need a method to transform back to a dict
def to_dict(self):
return dict(self)
data = {
'a': 1,
'b': '2',
'c': 3,
'd': '4',
'e': 5
}
template1 = {
'p':'Nan',
'q':'Nan',
'r':'Nan'
}
template2 = {
'p':'Nan',
's':'Nan',
't':'Nan'
}
def func(template,data):
# create a GuardDict from the dict
# this will utilize __setitem__ and only actually set keys
# that already exist in the original dict
template = GuardDict(template)
template['p'] = lambda: data['a']
template['q'] = lambda: data['b'] + 'some manipulation'
template['r'] = lambda: data['c']
template['s'] = lambda: data['d'] + 'some mainpulation'
template['t'] = lambda: data['e']
# set back to a dict
return template.to_dict()
template1 = func(template1, data)
template2 = func(template2, data)
print(template1)
print(template2)
I should probably note if there are other users of your code they will probably hate you for this.
a dynamically functional approach might relieve you from all the ifs and elses, but might complicate the overall program structure.
data = {
'a':1,
'b':2,
'c':3,
'd':4,
'e':5
}
template1 = {
'p': 'Nan',
'q': 'Nan',
'r': 'Nan'
}
template2 = {
'p': 'Nan',
's': 'Nan',
't': 'Nan'
}
# first, define your complex logic in functions, accounting for every possible template key
def p_logic(data, x):
return data[x]
def q_logic(data, x):
return data[x][:2] + 'some manipulation'
# Then build a dict of every possible template key, the associated value and reference to one of the
# functions defined above
logic = {
'p': {
'value': 'a',
'logic': p_logic
},
'q': {
'value': 'b',
'logic': q_logic
},
}
def func(template, data):
# for every key in a template, lookup that key in our logic dict
# grab the value from the data
# and apply the complex logic that has been defined for this template value
for item in template: # template.keys() is not necessary!
template[item] = logic[item]['logic'](data, logic[item]['value'])
The only thing I could think to do here would be to have some sort of dict and run your template through a for loop instead. Such as:
template_dict = {'p': 'a', 'q': 'b', 'r': 'c', 's': 'd', 't': 'e'}
def func(template, data):
for key, value in template_dict.items():
if key in template.keys():
template[key] = data[value]
Otherwise, I'm not sure how you might be able to avoid all those conditionals.
I have a dictionary that looks like this:
{"first": {"phone": 1900,"other": 1}, "second": {"adwords": 1419, "no_om_source": 1223}}
I convert this dict into json format. I wanted to change all the numbers within the dict to be changes to strings as well.
def convert(o):
if isinstance(o, np.generic): return o.item()
raise TypeError
jsonContent = json.dumps(myDict, default=convert)
with open('data.json', 'w', encoding='utf-8') as f:
json.dump(jsonContent, f, ensure_ascii=False, indent=4)
return jsonContent
However, when I try to print the jsonContent, the values are still in numbers and not strings. How can I change this?
Try this before turning your dict into a json format.
myDict = {"first": {"phone": 1900,"other": 1}, "second": {"adwords": 1419, "no_om_source": 1223}}
for x in myDict:
for k,v in myDict[x].items():
myDict[x][k] = str(v)
output
{'first': {'phone': '1900', 'other': '1'}, 'second': {'adwords': '1419', 'no_om_source': '1223'}}
This is my json file input.
{"Report":{"id":101,"type":"typeA","Replist":[{"rptid":"r001","subrpt":{"subid":74,"subname":"name1","subval":113},"RelsubList":[{"Relid":8,"Relsubdetails":{"Rel_subname":"name8","Rel_Subval":65}},{"Relid":5,"Relsubdetails":{"Rel_subname":"name5","Rel_Subval":40}}],"fldA":30,"fldB":23}]}}
...
I am writing python program to convert the input into the below format in my dictionary.
I am new to python.
Expected output:
out: {"id": "101", "type": "typeA", "rptid": "r001", "subrpt_subid": "74", "subrpt_subname": "name1", "subrpt_subval":"113","Relid":"8","Rel_subname":"name8","Rel_Subval":"65","Relid":"5","Rel_subname":"name5","Rel_Subval":"40","fldA":"30","fldB":"23"
I used the following logic to convert the output till subrpt.
Current output:
out: {'id': '101', 'type': 'typeA', 'rptid': 'r001', 'subrpt_subid': '74', 'subrpt_subname': 'name1', 'subrpt_subval': '113'}
But I am struggling to get the logic of RelsubList(it looks like it has both list and dictionary[{}] ).
please help me to get the logic for the same.
import json
list1 = []
dict1 = {}
dict2 = {}
data_file = "samp1.json"
file = open(data_file)
for line in file:
json_line = json.loads(line)
json_line = json_line["Report"]
dict1["id"]=str(json_line["id"])
dict1["type"] = str(json_line["type"])
json_line = json_line["Replist"]
dict1["rptid"]= str(json_line[0]["rptid"])
dict1["subrpt_subid"] = str(json_line[0]["subrpt"]["subid"])
dict1["subrpt_subname"] = str(json_line[0]["subrpt"]["subname"])
dict1["subrpt_subval"] = str(json_line[0]["subrpt"]["subval"])
print("out:", dict1)
Some of your logic is confusing to me, i.e. why are you doing json.loads(line) in every loop?
Anyway, the following should get you the logic for RealsubList:
import json
f = open("data.json")
data = json.load(f)
for line in data:
relsublist = data["Report"]["Replist"][0]["RelsubList"]
print(relsublist)
Results in:
[{'Relid': 8, 'Relsubdetails': {'Rel_subname': 'name8', 'Rel_Subval': 65}}, {'Relid': 5, 'Relsubdetails': {'Rel_subname': 'name5', 'Rel_Subval': 40}}]
The reason for the [0] index after ["Replist"] is Replist contains an array of nested dictionaries, so you need to call it out by index. In this case its only a single array, so it would be 0
I am trying to create a nested dictionary, whereby the key to each nested dictionary is named from the value from a variable. My end result should look something like this:
data_dict = {
'jane': {'name': 'jane', 'email': 'jane#example.com'},
'jim': {'name': 'jim', 'email': 'jim#example.com'}
}
Here is what I am trying:
data_dict = {}
s = "jane"
data_dict[s][name] = 'jane'
To my surprise, this does not work. Is this possible?
You want something like:
data_dict = {}
s = "jane"
data_dict[s] = {}
data_dict[s]['name'] = s
That should work, though I would recommend instead of a nested dictionary that you use a dictionary of names to either namedtuples or instances of a class.
Try this:
data_dict = {}
s = ["jane", "jim"]
for name in s:
data_dict[name] = {}
data_dict[name]['name'] = name
data_dict[name]['email'] = name + '#example.com'
as #Milad in the comment mentioned, you first need to initialize s as empty dictionary first
data={}
data['Tom']={}
data['Tom']['name'] = 'Tom Marvolo Riddle'
data['Tom']['email'] = 'iamlordvoldermort.com'
For existing dictionaries you can do dict[key] = value although if there is no dict that would raise an error. I think this is the code you want to have:
data_dict = {}
s = "jane"
data_dict[s] = {"name": s, "email": f"{s}#example.com"}
print(data_dict)
I just realized when I got a notification about this question:
data_dict = defaultdict(dict)
data_dict["jane"]["name"] = "jane"
Would be a better answer I think.
Is there a way in Python to serialize a dictionary that is using a tuple as key?
e.g.
a = {(1, 2): 'a'}
simply using json.dumps(a) raises this error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/json/__init__.py", line 230, in dumps
return _default_encoder.encode(obj)
File "/usr/lib/python2.6/json/encoder.py", line 367, in encode
chunks = list(self.iterencode(o))
File "/usr/lib/python2.6/json/encoder.py", line 309, in _iterencode
for chunk in self._iterencode_dict(o, markers):
File "/usr/lib/python2.6/json/encoder.py", line 268, in _iterencode_dict
raise TypeError("key {0!r} is not a string".format(key))
TypeError: key (1, 2) is not a string
You can't serialize that as json, json has a much less flexible idea about what counts as a dict key than python.
You could transform the mapping into a sequence of key, value pairs, something like this:
import json
def remap_keys(mapping):
return [{'key':k, 'value': v} for k, v in mapping.iteritems()]
...
json.dumps(remap_keys({(1, 2): 'foo'}))
>>> '[{"value": "foo", "key": [1, 2]}]'
from json import loads, dumps
from ast import literal_eval
x = {(0, 1): 'la-la la', (0, 2): 'extricate'}
# save: convert each tuple key to a string before saving as json object
s = dumps({str(k): v for k, v in x.items()})
# load in two stages:
# (i) load json object
obj = loads(s)
# (ii) convert loaded keys from string back to tuple
d = {literal_eval(k): v for k, v in obj.items()}
See https://stackoverflow.com/a/12337657/2455413.
JSON only supports strings as keys. You'll need to choose a way to represent those tuples as strings.
You could just use str((1,2)) as key because json only expects the keys as strings but if you use this you'll have to use a[str((1,2))] to get the value.
json can only accept strings as keys for dict,
what you can do, is to replace the tuple keys with string like so
with open("file", "w") as f:
k = dic.keys()
v = dic.values()
k1 = [str(i) for i in k]
json.dump(json.dumps(dict(zip(*[k1,v]))),f)
And than when you want to read it, you can change the keys back to tuples using
with open("file", r) as f:
data = json.load(f)
dic = json.loads(data)
k = dic.keys()
v = dic.values()
k1 = [eval(i) for i in k]
return dict(zip(*[k1,v]))
This solution:
Avoids the security risk of eval().
Is short.
Is copy-pastable as save and load functions.
Keeps the structure of tuple as the key, in case you are editing the JSON by hand.
Adds ugly \" to the tuple representation, which is worse than the other str()/eval() methods here.
Can only handle tuples as keys at the first level for nested dicts (as of this writing no other solution here can do better)
def json_dumps_tuple_keys(mapping):
string_keys = {json.dumps(k): v for k, v in mapping.items()}
return json.dumps(string_keys)
def json_loads_tuple_keys(string):
mapping = json.loads(string)
return {tuple(json.loads(k)): v for k, v in mapping.items()}
m = {(0,"a"): "first", (1, "b"): [9, 8, 7]}
print(m) # {(0, 'a'): 'first', (1, 'b'): [9, 8, 7]}
s = json_dumps_tuple_keys(m)
print(s) # {"[0, \"a\"]": "first", "[1, \"b\"]": [9, 8, 7]}
m2 = json_loads_tuple_keys(s)
print(m2) # {(0, 'a'): 'first', (1, 'b'): [9, 8, 7]}
print(m==m2) # True
Here is one way to do it. It will require the key to be json decoded after the main dictionary is decoded and the whole dictionary re-sequenced, but it is doable:
import json
def jsonEncodeTupleKeyDict(data):
ndict = dict()
# creates new dictionary with the original tuple converted to json string
for key,value in data.iteritems():
nkey = json.dumps(key)
ndict[nkey] = value
# now encode the new dictionary and return that
return json.dumps(ndict)
def main():
tdict = dict()
for i in range(10):
key = (i,"data",5*i)
tdict[key] = i*i
try:
print json.dumps(tdict)
except TypeError,e:
print "JSON Encode Failed!",e
print jsonEncodeTupleKeyDict(tdict)
if __name__ == '__main__':
main()
I make no claim to any efficiency of this method. I needed this for saving some joystick mapping data to a file. I wanted to use something that would create a semi-human readable format so it could be edited if needed.
You can actually not serialize tuples as key to json, but you can convert the tuple to a string and recover it, after you have deserialized the file.
with_tuple = {(0.1, 0.1): 3.14} ## this will work in python but is not serializable in json
{(0.1, 0.1): 3.14}
But you cannot serialize it with json. However, you can use
with_string = {str((0.1, 0.1))[1:-1]: 3.14} ## the expression [1,-1] removes the parenthesis surrounding the tuples in python.
{'0.1, 0.1': 3.14} # This is serializable
With a bit of cheating, you will recover the original tuple (after having deserialized the whole file) by treating each key (as str) separately
tuple(json.loads("["+'0.1, 0.1'+"]")) ## will recover the tuple from string
(0.1, 0.1)
It is a bit of overload to convert a string to a tuple using json.loads, but it will work. Encapsulate it and you are done.
Peace out and happy coding!
Nicolas
Here are two functions you could use to convert a dict_having_tuple_as_key into a json_array_having_key_and_value_as_keys and then de-convert it the way back
import json
def json_dumps_dict_having_tuple_as_key(dict_having_tuple_as_key):
if not isinstance(dict_having_tuple_as_key, dict):
raise Exception('Error using json_dumps_dict_having_tuple_as_key: The input variable is not a dictionary.')
list_of_dicts_having_key_and_value_as_keys = [{'key': k, 'value': v} for k, v in dict_having_tuple_as_key.items()]
json_array_having_key_and_value_as_keys = json.dumps(list_of_dicts_having_key_and_value_as_keys)
return json_array_having_key_and_value_as_keys
def json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(json_array_having_key_and_value_as_keys):
list_of_dicts_having_key_and_value_as_keys = json.loads(json_array_having_key_and_value_as_keys)
if not all(['key' in diz for diz in list_of_dicts_having_key_and_value_as_keys]) and all(['value' in diz for diz in list_of_dicts_having_key_and_value_as_keys]):
raise Exception('Error using json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps: at least one dictionary in list_of_dicts_having_key_and_value_as_keys ismissing key "key" or key "value".')
dict_having_tuple_as_key = {}
for dict_having_key_and_value_as_keys in list_of_dicts_having_key_and_value_as_keys:
dict_having_tuple_as_key[ tuple(dict_having_key_and_value_as_keys['key']) ] = dict_having_key_and_value_as_keys['value']
return dict_having_tuple_as_key
usage example:
my_dict = {
('1', '1001', '2021-12-21', '1', '484'): {"name": "Carl", "surname": "Black", "score": 0},
('1', '1001', '2021-12-22', '1', '485'): {"name": "Joe", "id_number": 134, "percentage": 11}
}
my_json = json_dumps_dict_having_tuple_as_key(my_dict)
print(my_json)
[{'key': ['1', '1001', '2021-12-21', '1', '484'], 'value': {'name': 'Carl', 'surname': 'Black', 'score': 0}},
{'key': ['1', '1001', '2021-12-22', '1', '485'], 'value': {'name': 'Joe', 'id_number': 134, 'percentage': 11}}]
my_dict_reconverted = json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(my_json)
print(my_dict_reconverted)
{('1', '1001', '2021-12-21', '1', '484'): {'name': 'Carl', 'surname': 'Black', 'score': 0},
('1', '1001', '2021-12-22', '1', '485'): {'name': 'Joe', 'id_number': 134, 'percentage': 11}}
# proof of working 1
my_dict == my_dict_reconverted
True
# proof of working 2
my_dict == json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(
json_dumps_dict_having_tuple_as_key(my_dict)
)
True
(Using concepts expressed by #SingleNegationElimination to answer #Kvothe comment)
Here's a complete example to encode/decode nested dictionaries with tuple keys and values into/from json. tuple key will be a string in JSON.
values of types tuple or set will be converted to list
def JSdecoded(item:dict, dict_key=False):
if isinstance(item, list):
return [ JSdecoded(e) for e in item ]
elif isinstance(item, dict):
return { literal_eval(key) : value for key, value in item.items() }
return item
def JSencoded(item, dict_key=False):
if isinstance(item, tuple):
if dict_key:
return str(item)
else:
return list(item)
elif isinstance(item, list):
return [JSencoded(e) for e in item]
elif isinstance(item, dict):
return { JSencoded(key, True) : JSencoded(value) for key, value in item.items() }
elif isinstance(item, set):
return list(item)
return item
usage
import json
pydata = [
{ ('Apple','Green') : "Tree",
('Orange','Yellow'):"Orchard",
('John Doe', 1945) : "New York" }
]
jsstr= json.dumps(JSencoded(pydata), indent='\t')
print(jsstr)
#[
# {
# "('Apple', 'Green')": "Tree",
# "('Orange', 'Yellow')": "Orchard",
# "('John Doe', 1945)": "New York"
# }
#]
data = json.loads(jsstr) #string keys
newdata = JSdecoded(data) #tuple keys
print(newdata)
#[{('Apple', 'Green'): 'Tree', ('Orange', 'Yellow'): 'Orchard', ('John Doe', 1945): 'New York'}]
def stringify_keys(d):
if isinstance(d, dict):
return {str(k): stringify_keys(v) for k, v in d.items()}
if isinstance(d, (list, tuple)):
return type(d)(stringify_keys(v) for v in d)
return d
json.dumps(stringify_keys(mydict))