Accessing wrong attribute when trying to reset value...? - python

In this example code I'm trying to make the whole pile of data callable:
class Data(object):
def __init__(self, data):
self._raw_data = data
for key, val in data.items():
if isinstance(val, dict):
if 'data' in val.keys():
setattr(self, key, Dataset(val))
else:
setattr(self, key, Data(val))
else:
setattr(self, key, val)
def __getattr__(self, name):
return None
#property
def raw(self):
return self._raw_data
class Dataset(Data):
def __init__(self, data):
self._raw_data = data
super().__init__(data)
for entry in self.data:
# need to debug this
self.data[self.data.index(entry)] = Data(entry)
What ends up happening is this. I'll try:
>>> example = {'dataset': {'id': 1, 'data': [{'param1': 3, 'param2': 4}, {'param1': 5, 'param2': 6}]}}
>>> data1 = Data(example)
>>> print(data1.dataset.id)
1
Now that works as expected. The problem the last line, which causes change in __raw_data attribute in Dataset object and I have no idea how (and also, looks wrong). Clearly I don't understand something important about inherritance. What I am trying to do is put all data into callable attributes, but also keep the raw data for all kinds of purposes. This is what I get however:
>>> print(data1.raw)
{'dataset': {'id': 1, 'data': [<wtf.Data object at 0x102a8da58, <wtf.Data object at 0x102a8da90>]}}
It should however return raw data as it was passed:
>>> print(data1.raw)
{'dataset': {'id': 1, 'data': [{'param1': 3, 'param2': 4}, {'param1': 5, 'param2': 6}]}}
It works for Data objects:
>>> for item in data1.dataset.data:
... print(item.raw)
{'param2': 4, 'param1': 3}
{'param2': 6, 'param1': 5}
Thanks for help.

Related

Append changing all elements from list, python

I am working on something which should manage multiple water dispensers. I need to get some data from a json file and then load it into objects after that, append the objects to a list. For some reason list.append changes other object's parameters(more specific, location). Here is my code:
WaterDispenser.py
class WaterDispenser():
def __init__(self, id: int = -1, status: bool = False, location: list=[-1, -1]) -> None:
self.id = id
self.status = status
self.location = location
def Dump(self) -> dict:
"""Dumps the propoerties in a json dictionary
Returns
-------
dict
A dictionary with a collection of propoerties and their names
"""
return {"id": self.id, "status": self.status, "location":[self.location[0], self.location[1]]}
def Load(self, object: dict) -> None:
"""Loads the json dictoinary in memory
Parameters
----------
object : dict, required
The json file with the properties of the dispenser
Returns
-------
None
"""
self.id = object["id"]
self.status = object["status"]
self.location[0] = object["location"][0]
self.location[1] = object["location"][1]
return None
main.py
import json
from WaterDispenser import WaterDispenser
dispensers = []
def LoadDispensers(path: str = "dispensers.json") -> int:
"""Loads the json file in memory.
Parameters
---------
path : str, optional
The path of the file to be loaded. Defaults to "dispensers.json".
Returns
-------
int
Count of dispensers data loaded
"""
global dispensers
dispensers = []
data = json.load(open(path, "r"))
for d in data:
x = WaterDispenser()
x.Load(d)
dispensers.append(x)
return len(dispensers)
if __name__ == '__main__':
print(LoadDispensers())
print([o.Dump() for o in dispensers])
dispensers.json
[
{"id": 0, "status": true, "location": [0, 0]},
{"id": 1, "status": true, "location": [0, 1]},
{"id": 2, "status": false, "location": [1, 1]}
]
Output:
3
[{'id': 0, 'status': True, 'location': [1, 1]}, {'id': 1, 'status': True, 'location': [1, 1]}, {'id': 2, 'status': False, 'location': [1, 1]}]
The functional answer:
Change the init of WaterDispenser to
from typing import Optional
class WaterDispenser():
def __init__(self, id: int = -1, status: bool = False, location: Optional[list] = None) -> None:
self.id = id
self.status = status
self.location = location or [-1, -1]
This should result in the expected response of
3
[{'id': 0, 'status': True, 'location': [0, 0]}, {'id': 1, 'status': True, 'location': [0, 1]}, {'id': 2, 'status': False, 'location': [1, 1]}]
The why:
Generally you want to avoid using mutable values as kwarg values because they're pre-computed (so your default location argument was technically the same object in memory across your WaterDispenser instances). append wasn't the culprit here and you can read more about this all via this SO discussion or read a succinct explanation via this answer to a similar question.
Design note:
It's worth noting that the way you are using Load in the above example could just be folded into WaterDispenser.__init__, so something like
from typing import Dict, Any
class WaterDispenser():
def __init__(self, data: Dict[Any]) -> None:
self.id = data.get("id", -1)
self.status = data.get("status", False)
self.location = data.get("location", [-1, -1])
or if you want to avoid typing
class WaterDispenser():
def __init__(self, data: dict) -> None:
self.id = data.get("id", -1)
self.status = data.get("status", False)
self.location = data.get("location", [-1, -1])
That example still includes your default values but if you removed the secondary arguments from those get calls you could protect against missing data at runtime without having to check to see if you had, say, an impossible location data point like [-1, -1].

Recursively creates dataclasses based in nested dictionary

I have a dataclass called Config that is created through the properties and values of a dictionary. Since this dictionary can have nested dictionaries, i would like to make nested dictionaries as Config objects. Here is an example:
## Dummy example of a config dict
data = {
'a' : 1,
'b' : [2,2,2],
'c': {
'c_1' : 3.1
}
}
final_config = create_config(data)
# Expected result
Config(a=1, b=[2,2,2], c=Config(c_1=3.1) )
Here is what i've came up, using dataclasses.make_dataclass:
def _Config(params_dict):
config = make_dataclass('Config', params_dict.keys())
return config(**params_dict)
def get_inner_dict(d):
for _, v in d.items():
if isinstance(v, dict):
return get_inner_dict(v)
else:
return _Config(**d)
Unfortunately, this doesn't work because the recursion will try to create a dataclass object when it finds a single value. I feel like i'm in the right way, but couldn't figure out what needs to change.
It looks like you (technically) don't need to use dataclasses or make_dataclass in this scenario.
You can implement a custom class with a __dict__ update approach as mentioned by #Stef. Check out the following example:
from __future__ import annotations
## Dummy example of a config dict
data = {
'a': 1,
'b': [2, 2, 2],
'c': {
'c_1': 3.1
},
'd': [
1,
'2',
{'k1': 'v1'}
]
}
_CONTAINER_TYPES = (dict, list)
class Config:
def __init__(self, **kwargs):
self.__dict__ = kwargs
#classmethod
def create(cls, data: dict | list) -> Config | list:
if isinstance(data, list):
return [cls.create(e) if isinstance(e, _CONTAINER_TYPES) else e
for e in data]
new_data = {
k: cls.create(v) if isinstance(v, _CONTAINER_TYPES) else v
for k, v in data.items()
}
return cls(**new_data)
def __repr__(self):
return f"Config({', '.join([f'{name}={val!r}' for name, val in self.__dict__.items()])})"
final_config = Config.create(data)
print(final_config)
# Prints:
# Config(a=1, b=[2, 2, 2], c=Config(c_1=3.1), d=[1, '2', Config(k1='v1')])

python-marshmallow: deserializing nested schema with only one exposed key

I am trying to serialize a list of nested objects as scalar values by taking only one field from the nested item. Instead of [{key: value}, ...] I want to receive [value1, value2, ...].
Code:
from marshmallow import *
class MySchema(Schema):
key = fields.String(required=True)
class ParentSchema(Schema):
items = fields.Nested(MySchema, only='key', many=True)
Given the above schemas, I want to serialize some data:
>>> data = {'items': [{'key': 1}, {'key': 2}, {'key': 3}]}
>>> result, errors = ParentSchema().dump(data)
>>> result
{'items': ['1', '2', '3']}
This works as expected, giving me the list of scalar values. However, when trying to deserialize the data using the models above, the data is suddenly invalid:
>>> data, errors = ParentSchema().load(result)
>>> data
{'items': [{}, {}, {}]}
>>> errors
{'items': {0: {}, '_schema': ['Invalid input type.', 'Invalid input type.', 'Invalid input type.'], 1: {}, 2: {}}}
Is there any configuration option I am missing or is this simply not possible?
For anyone stumbling across the same issue, this is the workaround I am using currently:
class MySchema(Schema):
key = fields.String(required=True)
def load(self, data, *args):
data = [
{'key': item} if isinstance(item, str) else item
for item in data
]
return super().load(data, *args)
class ParentSchema(Schema):
items = fields.Nested(MySchema, only='key', many=True)

Flatten a nested dict structure into a dataset

For some post-processing, I need to flatten a structure like this
{'foo': {
'cat': {'name': 'Hodor', 'age': 7},
'dog': {'name': 'Mordor', 'age': 5}},
'bar': { 'rat': {'name': 'Izidor', 'age': 3}}
}
into this dataset:
[{'foobar': 'foo', 'animal': 'dog', 'name': 'Mordor', 'age': 5},
{'foobar': 'foo', 'animal': 'cat', 'name': 'Hodor', 'age': 7},
{'foobar': 'bar', 'animal': 'rat', 'name': 'Izidor', 'age': 3}]
So I wrote this function:
def flatten(data, primary_keys):
out = []
keys = copy.copy(primary_keys)
keys.reverse()
def visit(node, primary_values, prim):
if len(prim):
p = prim.pop()
for key, child in node.iteritems():
primary_values[p] = key
visit(child, primary_values, copy.copy(prim))
else:
new = copy.copy(node)
new.update(primary_values)
out.append(new)
visit(data, { }, keys)
return out
out = flatten(a, ['foo', 'bar'])
I was not really satisfied because I have to use copy.copy to protect my inputs. Obviously, when using flatten one does not want the inputs be altered.
Then I thought about one alternative that uses more global variables (at least global to flatten) and uses an index instead of directly passing primary_keys to visit. However, this does not really help me to get rid of the ugly initial copy:
keys = copy.copy(primary_keys)
keys.reverse()
So here is my final version:
def flatten(data, keys):
data = copy.copy(data)
keys = copy.copy(keys)
keys.reverse()
out = []
values = {}
def visit(node, id):
if id:
id -= 1
for key, child in node.iteritems():
values[keys[id]] = key
visit(child, id)
else:
node.update(values)
out.append(node)
visit(data, len(keys))
return out
Is there a better implementation (that can avoid the use of copy.copy)?
Edit: modified to account for variable dictionary depth.
By using the merge function from my previous answer (below), you can avoid calling update which modifies the caller. There is then no need to copy the dictionary first.
def flatten(data, keys):
out = []
values = {}
def visit(node, id):
if id:
id -= 1
for key, child in node.items():
values[keys[id]] = key
visit(child, id)
else:
out.append(merge(node, values)) # use merge instead of update
visit(data, len(keys))
return out
One thing I don't understand is why you need to protect the keys input. I don't see them being modified anywhere.
Previous answer
How about list comprehension?
def merge(d1, d2):
return dict(list(d1.items()) + list(d2.items()))
[[merge({'foobar': key, 'animal': sub_key}, sub_sub_dict)
for sub_key, sub_sub_dict in sub_dict.items()]
for key, sub_dict in a.items()]
The tricky part was merging the dictionaries without using update (which returns None).

JSON serialize a dictionary with tuples as key

Is there a way in Python to serialize a dictionary that is using a tuple as key?
e.g.
a = {(1, 2): 'a'}
simply using json.dumps(a) raises this error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/json/__init__.py", line 230, in dumps
return _default_encoder.encode(obj)
File "/usr/lib/python2.6/json/encoder.py", line 367, in encode
chunks = list(self.iterencode(o))
File "/usr/lib/python2.6/json/encoder.py", line 309, in _iterencode
for chunk in self._iterencode_dict(o, markers):
File "/usr/lib/python2.6/json/encoder.py", line 268, in _iterencode_dict
raise TypeError("key {0!r} is not a string".format(key))
TypeError: key (1, 2) is not a string
You can't serialize that as json, json has a much less flexible idea about what counts as a dict key than python.
You could transform the mapping into a sequence of key, value pairs, something like this:
import json
def remap_keys(mapping):
return [{'key':k, 'value': v} for k, v in mapping.iteritems()]
...
json.dumps(remap_keys({(1, 2): 'foo'}))
>>> '[{"value": "foo", "key": [1, 2]}]'
from json import loads, dumps
from ast import literal_eval
x = {(0, 1): 'la-la la', (0, 2): 'extricate'}
# save: convert each tuple key to a string before saving as json object
s = dumps({str(k): v for k, v in x.items()})
# load in two stages:
# (i) load json object
obj = loads(s)
# (ii) convert loaded keys from string back to tuple
d = {literal_eval(k): v for k, v in obj.items()}
See https://stackoverflow.com/a/12337657/2455413.
JSON only supports strings as keys. You'll need to choose a way to represent those tuples as strings.
You could just use str((1,2)) as key because json only expects the keys as strings but if you use this you'll have to use a[str((1,2))] to get the value.
json can only accept strings as keys for dict,
what you can do, is to replace the tuple keys with string like so
with open("file", "w") as f:
k = dic.keys()
v = dic.values()
k1 = [str(i) for i in k]
json.dump(json.dumps(dict(zip(*[k1,v]))),f)
And than when you want to read it, you can change the keys back to tuples using
with open("file", r) as f:
data = json.load(f)
dic = json.loads(data)
k = dic.keys()
v = dic.values()
k1 = [eval(i) for i in k]
return dict(zip(*[k1,v]))
This solution:
Avoids the security risk of eval().
Is short.
Is copy-pastable as save and load functions.
Keeps the structure of tuple as the key, in case you are editing the JSON by hand.
Adds ugly \" to the tuple representation, which is worse than the other str()/eval() methods here.
Can only handle tuples as keys at the first level for nested dicts (as of this writing no other solution here can do better)
def json_dumps_tuple_keys(mapping):
string_keys = {json.dumps(k): v for k, v in mapping.items()}
return json.dumps(string_keys)
def json_loads_tuple_keys(string):
mapping = json.loads(string)
return {tuple(json.loads(k)): v for k, v in mapping.items()}
m = {(0,"a"): "first", (1, "b"): [9, 8, 7]}
print(m) # {(0, 'a'): 'first', (1, 'b'): [9, 8, 7]}
s = json_dumps_tuple_keys(m)
print(s) # {"[0, \"a\"]": "first", "[1, \"b\"]": [9, 8, 7]}
m2 = json_loads_tuple_keys(s)
print(m2) # {(0, 'a'): 'first', (1, 'b'): [9, 8, 7]}
print(m==m2) # True
Here is one way to do it. It will require the key to be json decoded after the main dictionary is decoded and the whole dictionary re-sequenced, but it is doable:
import json
def jsonEncodeTupleKeyDict(data):
ndict = dict()
# creates new dictionary with the original tuple converted to json string
for key,value in data.iteritems():
nkey = json.dumps(key)
ndict[nkey] = value
# now encode the new dictionary and return that
return json.dumps(ndict)
def main():
tdict = dict()
for i in range(10):
key = (i,"data",5*i)
tdict[key] = i*i
try:
print json.dumps(tdict)
except TypeError,e:
print "JSON Encode Failed!",e
print jsonEncodeTupleKeyDict(tdict)
if __name__ == '__main__':
main()
I make no claim to any efficiency of this method. I needed this for saving some joystick mapping data to a file. I wanted to use something that would create a semi-human readable format so it could be edited if needed.
You can actually not serialize tuples as key to json, but you can convert the tuple to a string and recover it, after you have deserialized the file.
with_tuple = {(0.1, 0.1): 3.14} ## this will work in python but is not serializable in json
{(0.1, 0.1): 3.14}
But you cannot serialize it with json. However, you can use
with_string = {str((0.1, 0.1))[1:-1]: 3.14} ## the expression [1,-1] removes the parenthesis surrounding the tuples in python.
{'0.1, 0.1': 3.14} # This is serializable
With a bit of cheating, you will recover the original tuple (after having deserialized the whole file) by treating each key (as str) separately
tuple(json.loads("["+'0.1, 0.1'+"]")) ## will recover the tuple from string
(0.1, 0.1)
It is a bit of overload to convert a string to a tuple using json.loads, but it will work. Encapsulate it and you are done.
Peace out and happy coding!
Nicolas
Here are two functions you could use to convert a dict_having_tuple_as_key into a json_array_having_key_and_value_as_keys and then de-convert it the way back
import json
def json_dumps_dict_having_tuple_as_key(dict_having_tuple_as_key):
if not isinstance(dict_having_tuple_as_key, dict):
raise Exception('Error using json_dumps_dict_having_tuple_as_key: The input variable is not a dictionary.')
list_of_dicts_having_key_and_value_as_keys = [{'key': k, 'value': v} for k, v in dict_having_tuple_as_key.items()]
json_array_having_key_and_value_as_keys = json.dumps(list_of_dicts_having_key_and_value_as_keys)
return json_array_having_key_and_value_as_keys
def json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(json_array_having_key_and_value_as_keys):
list_of_dicts_having_key_and_value_as_keys = json.loads(json_array_having_key_and_value_as_keys)
if not all(['key' in diz for diz in list_of_dicts_having_key_and_value_as_keys]) and all(['value' in diz for diz in list_of_dicts_having_key_and_value_as_keys]):
raise Exception('Error using json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps: at least one dictionary in list_of_dicts_having_key_and_value_as_keys ismissing key "key" or key "value".')
dict_having_tuple_as_key = {}
for dict_having_key_and_value_as_keys in list_of_dicts_having_key_and_value_as_keys:
dict_having_tuple_as_key[ tuple(dict_having_key_and_value_as_keys['key']) ] = dict_having_key_and_value_as_keys['value']
return dict_having_tuple_as_key
usage example:
my_dict = {
('1', '1001', '2021-12-21', '1', '484'): {"name": "Carl", "surname": "Black", "score": 0},
('1', '1001', '2021-12-22', '1', '485'): {"name": "Joe", "id_number": 134, "percentage": 11}
}
my_json = json_dumps_dict_having_tuple_as_key(my_dict)
print(my_json)
[{'key': ['1', '1001', '2021-12-21', '1', '484'], 'value': {'name': 'Carl', 'surname': 'Black', 'score': 0}},
{'key': ['1', '1001', '2021-12-22', '1', '485'], 'value': {'name': 'Joe', 'id_number': 134, 'percentage': 11}}]
my_dict_reconverted = json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(my_json)
print(my_dict_reconverted)
{('1', '1001', '2021-12-21', '1', '484'): {'name': 'Carl', 'surname': 'Black', 'score': 0},
('1', '1001', '2021-12-22', '1', '485'): {'name': 'Joe', 'id_number': 134, 'percentage': 11}}
# proof of working 1
my_dict == my_dict_reconverted
True
# proof of working 2
my_dict == json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(
json_dumps_dict_having_tuple_as_key(my_dict)
)
True
(Using concepts expressed by #SingleNegationElimination to answer #Kvothe comment)
Here's a complete example to encode/decode nested dictionaries with tuple keys and values into/from json. tuple key will be a string in JSON.
values of types tuple or set will be converted to list
def JSdecoded(item:dict, dict_key=False):
if isinstance(item, list):
return [ JSdecoded(e) for e in item ]
elif isinstance(item, dict):
return { literal_eval(key) : value for key, value in item.items() }
return item
def JSencoded(item, dict_key=False):
if isinstance(item, tuple):
if dict_key:
return str(item)
else:
return list(item)
elif isinstance(item, list):
return [JSencoded(e) for e in item]
elif isinstance(item, dict):
return { JSencoded(key, True) : JSencoded(value) for key, value in item.items() }
elif isinstance(item, set):
return list(item)
return item
usage
import json
pydata = [
{ ('Apple','Green') : "Tree",
('Orange','Yellow'):"Orchard",
('John Doe', 1945) : "New York" }
]
jsstr= json.dumps(JSencoded(pydata), indent='\t')
print(jsstr)
#[
# {
# "('Apple', 'Green')": "Tree",
# "('Orange', 'Yellow')": "Orchard",
# "('John Doe', 1945)": "New York"
# }
#]
data = json.loads(jsstr) #string keys
newdata = JSdecoded(data) #tuple keys
print(newdata)
#[{('Apple', 'Green'): 'Tree', ('Orange', 'Yellow'): 'Orchard', ('John Doe', 1945): 'New York'}]
def stringify_keys(d):
if isinstance(d, dict):
return {str(k): stringify_keys(v) for k, v in d.items()}
if isinstance(d, (list, tuple)):
return type(d)(stringify_keys(v) for v in d)
return d
json.dumps(stringify_keys(mydict))

Categories

Resources