JSON serialize a dictionary with tuples as key - python

Is there a way in Python to serialize a dictionary that is using a tuple as key?
e.g.
a = {(1, 2): 'a'}
simply using json.dumps(a) raises this error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/json/__init__.py", line 230, in dumps
return _default_encoder.encode(obj)
File "/usr/lib/python2.6/json/encoder.py", line 367, in encode
chunks = list(self.iterencode(o))
File "/usr/lib/python2.6/json/encoder.py", line 309, in _iterencode
for chunk in self._iterencode_dict(o, markers):
File "/usr/lib/python2.6/json/encoder.py", line 268, in _iterencode_dict
raise TypeError("key {0!r} is not a string".format(key))
TypeError: key (1, 2) is not a string

You can't serialize that as json, json has a much less flexible idea about what counts as a dict key than python.
You could transform the mapping into a sequence of key, value pairs, something like this:
import json
def remap_keys(mapping):
return [{'key':k, 'value': v} for k, v in mapping.iteritems()]
...
json.dumps(remap_keys({(1, 2): 'foo'}))
>>> '[{"value": "foo", "key": [1, 2]}]'

from json import loads, dumps
from ast import literal_eval
x = {(0, 1): 'la-la la', (0, 2): 'extricate'}
# save: convert each tuple key to a string before saving as json object
s = dumps({str(k): v for k, v in x.items()})
# load in two stages:
# (i) load json object
obj = loads(s)
# (ii) convert loaded keys from string back to tuple
d = {literal_eval(k): v for k, v in obj.items()}
See https://stackoverflow.com/a/12337657/2455413.

JSON only supports strings as keys. You'll need to choose a way to represent those tuples as strings.

You could just use str((1,2)) as key because json only expects the keys as strings but if you use this you'll have to use a[str((1,2))] to get the value.

json can only accept strings as keys for dict,
what you can do, is to replace the tuple keys with string like so
with open("file", "w") as f:
k = dic.keys()
v = dic.values()
k1 = [str(i) for i in k]
json.dump(json.dumps(dict(zip(*[k1,v]))),f)
And than when you want to read it, you can change the keys back to tuples using
with open("file", r) as f:
data = json.load(f)
dic = json.loads(data)
k = dic.keys()
v = dic.values()
k1 = [eval(i) for i in k]
return dict(zip(*[k1,v]))

This solution:
Avoids the security risk of eval().
Is short.
Is copy-pastable as save and load functions.
Keeps the structure of tuple as the key, in case you are editing the JSON by hand.
Adds ugly \" to the tuple representation, which is worse than the other str()/eval() methods here.
Can only handle tuples as keys at the first level for nested dicts (as of this writing no other solution here can do better)
def json_dumps_tuple_keys(mapping):
string_keys = {json.dumps(k): v for k, v in mapping.items()}
return json.dumps(string_keys)
def json_loads_tuple_keys(string):
mapping = json.loads(string)
return {tuple(json.loads(k)): v for k, v in mapping.items()}
m = {(0,"a"): "first", (1, "b"): [9, 8, 7]}
print(m) # {(0, 'a'): 'first', (1, 'b'): [9, 8, 7]}
s = json_dumps_tuple_keys(m)
print(s) # {"[0, \"a\"]": "first", "[1, \"b\"]": [9, 8, 7]}
m2 = json_loads_tuple_keys(s)
print(m2) # {(0, 'a'): 'first', (1, 'b'): [9, 8, 7]}
print(m==m2) # True

Here is one way to do it. It will require the key to be json decoded after the main dictionary is decoded and the whole dictionary re-sequenced, but it is doable:
import json
def jsonEncodeTupleKeyDict(data):
ndict = dict()
# creates new dictionary with the original tuple converted to json string
for key,value in data.iteritems():
nkey = json.dumps(key)
ndict[nkey] = value
# now encode the new dictionary and return that
return json.dumps(ndict)
def main():
tdict = dict()
for i in range(10):
key = (i,"data",5*i)
tdict[key] = i*i
try:
print json.dumps(tdict)
except TypeError,e:
print "JSON Encode Failed!",e
print jsonEncodeTupleKeyDict(tdict)
if __name__ == '__main__':
main()
I make no claim to any efficiency of this method. I needed this for saving some joystick mapping data to a file. I wanted to use something that would create a semi-human readable format so it could be edited if needed.

You can actually not serialize tuples as key to json, but you can convert the tuple to a string and recover it, after you have deserialized the file.
with_tuple = {(0.1, 0.1): 3.14} ## this will work in python but is not serializable in json
{(0.1, 0.1): 3.14}
But you cannot serialize it with json. However, you can use
with_string = {str((0.1, 0.1))[1:-1]: 3.14} ## the expression [1,-1] removes the parenthesis surrounding the tuples in python.
{'0.1, 0.1': 3.14} # This is serializable
With a bit of cheating, you will recover the original tuple (after having deserialized the whole file) by treating each key (as str) separately
tuple(json.loads("["+'0.1, 0.1'+"]")) ## will recover the tuple from string
(0.1, 0.1)
It is a bit of overload to convert a string to a tuple using json.loads, but it will work. Encapsulate it and you are done.
Peace out and happy coding!
Nicolas

Here are two functions you could use to convert a dict_having_tuple_as_key into a json_array_having_key_and_value_as_keys and then de-convert it the way back
import json
def json_dumps_dict_having_tuple_as_key(dict_having_tuple_as_key):
if not isinstance(dict_having_tuple_as_key, dict):
raise Exception('Error using json_dumps_dict_having_tuple_as_key: The input variable is not a dictionary.')
list_of_dicts_having_key_and_value_as_keys = [{'key': k, 'value': v} for k, v in dict_having_tuple_as_key.items()]
json_array_having_key_and_value_as_keys = json.dumps(list_of_dicts_having_key_and_value_as_keys)
return json_array_having_key_and_value_as_keys
def json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(json_array_having_key_and_value_as_keys):
list_of_dicts_having_key_and_value_as_keys = json.loads(json_array_having_key_and_value_as_keys)
if not all(['key' in diz for diz in list_of_dicts_having_key_and_value_as_keys]) and all(['value' in diz for diz in list_of_dicts_having_key_and_value_as_keys]):
raise Exception('Error using json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps: at least one dictionary in list_of_dicts_having_key_and_value_as_keys ismissing key "key" or key "value".')
dict_having_tuple_as_key = {}
for dict_having_key_and_value_as_keys in list_of_dicts_having_key_and_value_as_keys:
dict_having_tuple_as_key[ tuple(dict_having_key_and_value_as_keys['key']) ] = dict_having_key_and_value_as_keys['value']
return dict_having_tuple_as_key
usage example:
my_dict = {
('1', '1001', '2021-12-21', '1', '484'): {"name": "Carl", "surname": "Black", "score": 0},
('1', '1001', '2021-12-22', '1', '485'): {"name": "Joe", "id_number": 134, "percentage": 11}
}
my_json = json_dumps_dict_having_tuple_as_key(my_dict)
print(my_json)
[{'key': ['1', '1001', '2021-12-21', '1', '484'], 'value': {'name': 'Carl', 'surname': 'Black', 'score': 0}},
{'key': ['1', '1001', '2021-12-22', '1', '485'], 'value': {'name': 'Joe', 'id_number': 134, 'percentage': 11}}]
my_dict_reconverted = json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(my_json)
print(my_dict_reconverted)
{('1', '1001', '2021-12-21', '1', '484'): {'name': 'Carl', 'surname': 'Black', 'score': 0},
('1', '1001', '2021-12-22', '1', '485'): {'name': 'Joe', 'id_number': 134, 'percentage': 11}}
# proof of working 1
my_dict == my_dict_reconverted
True
# proof of working 2
my_dict == json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(
json_dumps_dict_having_tuple_as_key(my_dict)
)
True
(Using concepts expressed by #SingleNegationElimination to answer #Kvothe comment)

Here's a complete example to encode/decode nested dictionaries with tuple keys and values into/from json. tuple key will be a string in JSON.
values of types tuple or set will be converted to list
def JSdecoded(item:dict, dict_key=False):
if isinstance(item, list):
return [ JSdecoded(e) for e in item ]
elif isinstance(item, dict):
return { literal_eval(key) : value for key, value in item.items() }
return item
def JSencoded(item, dict_key=False):
if isinstance(item, tuple):
if dict_key:
return str(item)
else:
return list(item)
elif isinstance(item, list):
return [JSencoded(e) for e in item]
elif isinstance(item, dict):
return { JSencoded(key, True) : JSencoded(value) for key, value in item.items() }
elif isinstance(item, set):
return list(item)
return item
usage
import json
pydata = [
{ ('Apple','Green') : "Tree",
('Orange','Yellow'):"Orchard",
('John Doe', 1945) : "New York" }
]
jsstr= json.dumps(JSencoded(pydata), indent='\t')
print(jsstr)
#[
# {
# "('Apple', 'Green')": "Tree",
# "('Orange', 'Yellow')": "Orchard",
# "('John Doe', 1945)": "New York"
# }
#]
data = json.loads(jsstr) #string keys
newdata = JSdecoded(data) #tuple keys
print(newdata)
#[{('Apple', 'Green'): 'Tree', ('Orange', 'Yellow'): 'Orchard', ('John Doe', 1945): 'New York'}]

def stringify_keys(d):
if isinstance(d, dict):
return {str(k): stringify_keys(v) for k, v in d.items()}
if isinstance(d, (list, tuple)):
return type(d)(stringify_keys(v) for v in d)
return d
json.dumps(stringify_keys(mydict))

Related

Applying keys sequentially to a dict from a list

I'm scraping a website, which returns a dictionary:
person = {'name0':{'first0': 'John', 'last0':'Smith'},
'age0':'10',
'location0':{'city0':'Dublin'}
}
I'm trying to write a function that will return a dictionary {'name':'John', 'age':'10'} when passed the above dictionary.
I want to ideally put a try:... except KeyError around each item since sometimes keys will be missing.
def func(person):
filters = [('age', 'age0'), ('name', ['name0', 'first0'])]
result = {'name': None, 'age': None}
for i in filters:
try:
result[i[0]] = person[i[1]]
except KeyError:
pass
return result
The problem is result[i[0]] = person[i[1]] doesn't work for 'name' since there's two keys that need to be followed sequentially and I don't know how to do that.
I want some way of telling it (in the loop) to go to person['name0']['first0'] (and so on to whatever depth the thing I want is).
I have lots of things to extract, so I'd rather do it in a loop instead of a try..except statement for each variable individually.
In order to follow several key sequentially, you can use get and set the default value to {} (empty dictionary) for the upper levels. Set the default value to None (or whatever suits you) for the last level:
def func(person):
return {'name': person.get('name0', {}).get('first0', None),
'age': person.get('age0', None)}
Best I could manage was using a for loop to iterate through the keys:
person = {'name0':{'first0': 'John', 'last0':'Smith'},
'age0':'10',
'location0':{'city0':'Dublin'}
}
Additionally I used .get(key) rather than try..except as suggested by #wiwi
def func(person):
filters = [('age', ['age0']), ('name', ['name0', 'first0'])]
result = {'name': None, 'age': None}
for filter in filters:
temp = person.copy()
for key in filter[1]:
temp = temp.get(key)
if not temp: # NoneType doesn't have .get method
break
result[filter[0]] = temp
return result
func(person) then returns {'name': 'John', 'age': '10'}.
It handles missing input too:
person2 = {'age0':'10',
'location0':{'city0':'Dublin'}}
func(person2) returns {'name': None, 'age': '10'}
You can put the try...except in another loop, if there's a list of keys instead of a single key:
def getNestedVal(obj, kPath:list, defaultVal=None):
if isinstance(kPath, str) or not hasattr(kPath, '__iter__'):
kPath = [kPath] ## if not iterable, wrap as list
for k in kPath:
try: obj = obj[k]
except: return defaultVal
return obj
def func(person):
filters = [('age', 'age0'), ('name', ['name0', 'first0']),#]
('gender', ['gender0'], 'N/A')] # includes default value
return {k[0]: getNestedVal(person, *k[1:3]) for k in filters}
[I added gender just to demonstrate how defaults can also be specified for missing values.]
With this, func(person) should return
{'age': '10', 'name': 'John', 'gender': 'N/A'}
I also have a flattenObj function, a version of which is defined below:
def flattenDict(orig:dict, kList=[], kSep='_', stripNum=True):
if not isinstance(orig, dict): return [(kList, orig)]
tList = []
for k, v in orig.items():
if isinstance(k, str) and stripNum: k = k.strip('0123456789')
tList += flattenDict(v, kList+[str(k)], None)
if not isinstance(kSep, str): return tList
return {kSep.join(kl): v for kl,v in tList}
[I added stripNum just to get rid of the 0s in your keys...]
flattenDict(person) should return
{'name_first': 'John', 'name_last': 'Smith', 'age': '10', 'location_city': 'Dublin'}

change numbers to strings in json dictionaries

I have a dictionary that looks like this:
{"first": {"phone": 1900,"other": 1}, "second": {"adwords": 1419, "no_om_source": 1223}}
I convert this dict into json format. I wanted to change all the numbers within the dict to be changes to strings as well.
def convert(o):
if isinstance(o, np.generic): return o.item()
raise TypeError
jsonContent = json.dumps(myDict, default=convert)
with open('data.json', 'w', encoding='utf-8') as f:
json.dump(jsonContent, f, ensure_ascii=False, indent=4)
return jsonContent
However, when I try to print the jsonContent, the values are still in numbers and not strings. How can I change this?
Try this before turning your dict into a json format.
myDict = {"first": {"phone": 1900,"other": 1}, "second": {"adwords": 1419, "no_om_source": 1223}}
for x in myDict:
for k,v in myDict[x].items():
myDict[x][k] = str(v)
output
{'first': {'phone': '1900', 'other': '1'}, 'second': {'adwords': '1419', 'no_om_source': '1223'}}

how to convert nested list which contain key - value pair to json format using python

I have ['key','value'] format list, which also contain sub-list. How can I convert nested list to JSON format in python
[[' key ', ' 1542633482511430199'
],
['value=>>>BasicData',
[['isConfirmAndOrder', '0'],['brmRequestId', 'BR-2018-0000124'],
['requestType','batch'],['projectId', 'PRJ-2018-0000477'],
['createdOn', 'Mon Nov 19 18:48:02 IST 2018']]
],
['createdBy=>>>BasicData',
[['userId', '999996279'], ['email', 'ITEST275#ITS.JNJ.com'],
['firstName', 'Iris'], ['lastName', 'TEST275'],
['ntId', 'itest275'], ['region', 'NA'],
[' LastAccessTime ', ' 1542639905785 ']]
]
]
Excepted format is
{
"key": "1542633482511430199",
"value=>>>BasicData": {
"isConfirmAndOrder": "0",
"brmRequestId": "BR-2018-0000124"
.
},
"createdBy=>>>BasicData": {
"userId": "999996279",
"email": "ITEST275#ITS.JNJ.com"
.
}
.
}
Actually format of large data is:
[
[
['key11','value11']
['key12',['key13','value13']]
['key14',['key15','value15']]
]
[
['key21','value21']
['key22',['key23','value23']]
['key24',['key25','value25']]
]
]
You can write a simple recursive function for this:
def to_dict_recursive(x):
d = {}
for key, value in x:
if isinstance(value, list):
value = to_dict_recursive(value)
else:
value = value.strip() # get rid of unnecessary whitespace
d[key.strip()] = value
return d
to_dict_recursive(x)
# {'createdBy=>>>BasicData': {'displayName': 'Iris TEST275',
# 'email': 'ITEST275#ITS.JNJ.com',
# 'firstName': 'Iris',
# 'lastName': 'TEST275',
# 'ntId': 'itest275',
# 'region': 'NA',
# 'roles': '[0]CG510_DHF_AP_Role',
# 'userId': '999996279'},
# 'formulaDetails=>>>BasicData': {'CreationTime': '1542633482512',
# 'LastAccessTime': '1542639905785',
# 'batchSizeUnits': 'kg<<<<<<',
# 'hitCount': '1',
# 'version': '1'},
# 'key': '1542633482511430199',
# 'value=>>>BasicData': {'brmRequestId': 'BR-2018-0000124',
# 'createdMonth': 'Nov',
# 'createdOn': 'Mon Nov 19 18:48:02 IST 2018',
# 'department': 'Global Packaging',
# 'gxp': '1',
# 'id': '1542633482511430199',
# 'isConfirmAndOrder': '0',
# 'isFilling': 'false',
# 'projectId': 'PRJ-2018-0000477',
# 'projectName': 'Automation_Product_By_Admin',
# 'requestType': 'batch',
# 'status': 'New',
# 'statusDescription': 'Batch request created',
# 'updatedOn': 'Mon Nov 19 18:48:02 IST 2018'}}
(I ran this in Python 3.6 so the order of the keys in the dictionary representation is different than insertion order. In Python 3.7+ this would be different.)
You can even make this into a dict comprehension:
def to_dict_recursive(x):
return {key.strip(): to_dict_recursive(value) if isinstance(value, list)
else value.strip
for key, value in x}
Since apparently some elements in your object are not a two-element list of key and value, you can add a simple guard against that:
def to_dict_recursive(x):
d = {}
try:
for key, value in x:
if isinstance(value, list):
value = to_dict_recursive(value)
else:
value = value.strip()
d[key.strip()] = value
except ValueError:
return x
return d
x = [[' key ', ' 1542633482511430199'],
["test", ["a", "b", "c"]]
]
to_dict_recursive(x)
# {'key': '1542633482511430199', 'test': ['a', 'b', 'c']}
Note that if mylist is a key-value pair list, then dict(mylist) simply returns a dictionary version of it. The tricky part is traversing deep into those nested lists to replace them with dictionaries. Here's a recursive function that does that:
# Where <kv> is your giant list-of-lists.
def kv_to_dict(kv):
if isinstance(kv, list):
kv = dict(kv)
for k in kv:
if isinstance(kv[k], list):
kv[k] = kv_to_dict(kv[k])
return kv
newdict = kv_to_dict(kvpairs)
Once you have things converted to a dictionary, you can just use json.dumps() to format it as JSON:
import json
as_json = json.dumps(newdict, indent=4)
print(as_json)
I see though that you've tried something similar and got an error. Are you sure that all of the lists in your data are really key-value pairs, and not for example a list of 3 strings?

Flatten a nested dict structure into a dataset

For some post-processing, I need to flatten a structure like this
{'foo': {
'cat': {'name': 'Hodor', 'age': 7},
'dog': {'name': 'Mordor', 'age': 5}},
'bar': { 'rat': {'name': 'Izidor', 'age': 3}}
}
into this dataset:
[{'foobar': 'foo', 'animal': 'dog', 'name': 'Mordor', 'age': 5},
{'foobar': 'foo', 'animal': 'cat', 'name': 'Hodor', 'age': 7},
{'foobar': 'bar', 'animal': 'rat', 'name': 'Izidor', 'age': 3}]
So I wrote this function:
def flatten(data, primary_keys):
out = []
keys = copy.copy(primary_keys)
keys.reverse()
def visit(node, primary_values, prim):
if len(prim):
p = prim.pop()
for key, child in node.iteritems():
primary_values[p] = key
visit(child, primary_values, copy.copy(prim))
else:
new = copy.copy(node)
new.update(primary_values)
out.append(new)
visit(data, { }, keys)
return out
out = flatten(a, ['foo', 'bar'])
I was not really satisfied because I have to use copy.copy to protect my inputs. Obviously, when using flatten one does not want the inputs be altered.
Then I thought about one alternative that uses more global variables (at least global to flatten) and uses an index instead of directly passing primary_keys to visit. However, this does not really help me to get rid of the ugly initial copy:
keys = copy.copy(primary_keys)
keys.reverse()
So here is my final version:
def flatten(data, keys):
data = copy.copy(data)
keys = copy.copy(keys)
keys.reverse()
out = []
values = {}
def visit(node, id):
if id:
id -= 1
for key, child in node.iteritems():
values[keys[id]] = key
visit(child, id)
else:
node.update(values)
out.append(node)
visit(data, len(keys))
return out
Is there a better implementation (that can avoid the use of copy.copy)?
Edit: modified to account for variable dictionary depth.
By using the merge function from my previous answer (below), you can avoid calling update which modifies the caller. There is then no need to copy the dictionary first.
def flatten(data, keys):
out = []
values = {}
def visit(node, id):
if id:
id -= 1
for key, child in node.items():
values[keys[id]] = key
visit(child, id)
else:
out.append(merge(node, values)) # use merge instead of update
visit(data, len(keys))
return out
One thing I don't understand is why you need to protect the keys input. I don't see them being modified anywhere.
Previous answer
How about list comprehension?
def merge(d1, d2):
return dict(list(d1.items()) + list(d2.items()))
[[merge({'foobar': key, 'animal': sub_key}, sub_sub_dict)
for sub_key, sub_sub_dict in sub_dict.items()]
for key, sub_dict in a.items()]
The tricky part was merging the dictionaries without using update (which returns None).

How do make serialize / deserialize this entity?

The code below defines a dictionary used to transform field values. Data is read, some of the values are transformed based on this dictionary, and written to a table. It works as-is. The problem, I now want to move this configuration outside the .py file into a JSON configuration file.
lookups = {
11: {
"ST1": ["ABC"],
"UNK01": ["125", "ACD"],
"A": ["52"],
"B": ["91"],
"C": ["92"],
"D": ["95"]
},
10: {
"XYZ01": ["91"],
"XYZ02": ["83"],
"XYZ03": ["27"]
}
}
According to jsonlint.com, in order for the above value being assigned to lookups to be valid JSON, I must quote the 11 and 10 keys. Doing so breaks my Python code and displays TypeError: list indices must be integers, not str.
How do I create valid JSON and minimize changes to my code?
If you want to dump it to a json file:
import json
with open("config.json","w") as f:
json.dump(lookups, f) # dump dict to file
with open("config.json") as f:
s = json.load(f) # load dict from file
print(s)
{'11': {'ST1': ['ABC'], 'A': ['52'], 'D': ['95'], 'UNK01': ['125', 'ACD'], 'B': ['91'], 'C': ['92']}, '10': {'XYZ01': ['91'], 'XYZ03': ['27'], 'XYZ02': ['83']}}
If you need keys as ints you can loop and cast as ints or use pickle:
import pickle
with open("in.pkl","wb") as f:
pickle.dump(lookups, f)
with open("in.pkl","rb") as f:
s = pickle.load(f)
print(s)
{10: {'XYZ03': ['27'], 'XYZ01': ['91'], 'XYZ02': ['83']}, 11: {'UNK01': ['125', 'ACD'], 'B': ['91'], 'D': ['95'], 'ST1': ['ABC'], 'C': ['92'], 'A': ['52']}}
If not just use as is.
If you know what type of data your keys are, a simple int on the keys would suffice:
dictionary_from_json = json.loads(dumped)
newdict = {}
for key, val in dictionary_from_json:
newdict[int(key)] = val
You can extend json.decoder and convert all keys to int when it's possible.
import json
class Json(json.JSONDecoder):
def decode(self,json_string):
default_obj = super(Json,self).decode(json_string)
new_obj = self._rec_serial(default_obj)
return new_obj
def _rec_serial(self,default):
new_dict = {}
for key,value in default.items():
is_dict = isinstance(value,dict)
value = self._rec_serial(value) if is_dict else value
try:
new_dict[int(key)] = value
except ValueError:
new_dict[key] = value
return new_dict
json2= Json()
d = json2.decode(dumped)

Categories

Resources