How to safely unpack dict in python? - python

I have a wrapper class - it's an abstraction that I return from backend to frontend.
from typing import NamedTuple
class NewsItem(NamedTuple):
id: str
title: str
content: str
service: str
published_at: datetime
#classmethod
def from_payload(cls, payload) -> 'NewsItem':
return cls(**payload)
For example, when I get data from elastic I convert it to NewsItem:
return [NewsItem.from_payload(hit['_source'])
for hit in result['hits']['hits']]
The problem is I don't want to fail because of unknown fields that can come from elastic. How to ignore them (or put into a separate dedicated attribute list NewsItem.extra)?

I think the most elegant way is to use ._fields of NewsItem:
#classmethod
def from_payload(cls, payload) -> 'NewsItem':
return cls(*(payload[field] for field in cls._fields))
If you want to keep extras, you would need to do some work (field extra declared as extra: dict = {}):
#classmethod
def from_payload(cls, payload) -> 'NewsItem':
fields_no_extra = set(cls._fields) - {'extra'}
extra_fields = payload.keys() - fields_no_extra
extras = {field: payload[field] for field in extra_fields}
data = {field: payload[field] for field in fields_no_extra}
data['extra'] = extras
return cls(**data)
You can optimize this further, too much computation with sets;)
Of course my solutions do not handle case where payload doesn't contain all of the fields of the NewsItem

You can use **kwargs to let your __init__ take an arbitrary number of keyword arguments ("kwargs" means "keyword arguments") and discard unnecessary arguments:
class NewsItem(NamedTuple):
id: str
title: str
content: str
service: str
published_at: datetime
#classmethod
def from_payload(cls, id=None, title=None, content=None, service=None, published_at=None, **kwargs) -> 'NewsItem':
return cls(id, title, content, service, published_at)
Alternative solution with introspection NamedTuple class attributes (see #MOROZILnic answer + comment)

Since your problem is with the unknown key's you can use get method of the dictionary to safely ignore unknown keys.
For get method, first argument is the key you are looking for and the second argument is the Default value which will be returned when the key is not found.
so, do the following
return [NewsItem.from_payload(hit['_source'])
for hit in result.get('hits',{}).get('hits',"NOT FOUND"):
The above is just a example. do modify what you want to get when the hit does not have the key you want.

Related

Translation of message dict into msg enum

I'm dealing with refactoring code which extensively uses dicts in a circumstance where enums could be used. Unfortunately, to reduce typing the dict keys were abbreviated in a cryptic fashion.
In order to have more meaningful code and fewer string literals as well as a more advanced interface I translated the message dictionary based code into an Enum based code using the same messages.
The message dictionaries looked like the following:
MsgDictionary = {'none': None,
'STJ': 'start_job',
'RPS': 'report_status',
'KLJ': 'kill_job'}
ExecStates = {'none': None,
'JCNS': 'job_could_not_start',
'JSS': 'job_successfully_started',
'JSF': 'job_successfully_finished'}
This, unfortunately lead to cluttered code:
...
self.send_message(id = MsgDictionary["stj"], some_data)
...
msg = self.receive_msg()
if msg.id in (MsgDictionary['STJ'], MsgDictionary['KLJ']):
self.toggle_job()
...
I would merely like to get rid of the string accesses, the cryptic names and the low level interface, like in the following. This send_message should send the str typed value of the Enum not the Enum instance itself.
...
self.send_message(id = MessagesEnum.START_JOB, some_data)
...
msg = self.receive_msg()
if msg.id in (MessagesEnum.START_JOB, MessagesEnum.KILL_JOB):
self.toggle_job()
...
But as in the original case, undefined execution states should still be allowed. This does currently not work. The reason is to not break existing code:
e = ExecStates(None)
-> ValueError: None is not a valid ExecutionStates
And I would like to be able to compare enum instances, e.g.:
e = ExecState[START_JOB]
if e == ExecState[START_JOB]:
pass
if e == ExecState[KILL_JOB]:
pass
Using the following definitions, I believe I'm almost there:
import enum
class _BaseEnum(str, enum.Enum):
#classmethod
def values(cls) -> DictValues:
return cls.__members__.values()
def _generate_next_value_(name: str, *args: object) -> str:
return name.lower()
def __str__(self):
return str(self.value) # Use stringification to cover the None value case
class MessageEnum(_BaseEnum):
NONE = None
START_JOB = enum.auto()
REPORT_STATUS = enum.auto()
KILL_JOB = enum.auto()
class ExecutionState(_BaseEnum):
NONE = None
JOB_COULD_NOT_START = enum.auto()
JOB_SUCCESSFULLY_STARTED = enum.auto()
JOB_SUCCESSFULLY_FINISHED = enum.auto()
However, one problem still remains. How can I deal with None value as well as strings in the enumerations? In my case, all enum items gets mapped to the lowercase of the enum item name. Which is the intended functionality. However, None gets unintendedly mapped to 'None'. This in effect leads to problems at other spots in the existing code which initializes an ExecutionState instance with None. I would like to also cover this case to not break existing code.
When I add a __new__ method to the _BaseEnum,
def __new__(cls, value):
obj = str.__new__(cls)
obj._value_ = value
return obj
I loose the possibility to compare the enumeration instances as all instances compare equal to ``.
My question is, in order to solve my problem, if I can corner case the None either in the _generate_next_value_ or the __new__ method or maybe using a proxy pattern ?
Two things that should help:
in your __new__, the creation line should read obj = str.__new__(cls, value) -- that way each instance will compare equal to its lower-cased name
export your enum members to the global namespace, and use is:
START_JOB, REPORT_STATUS, KILL_JOB = MessageEnum
...
if e is START_JOB: ...
...
if msg.id in (START_JOB, KILL_JOB): ...

Inheritance from class and override method

I have two classes, one inherits of the other. When I hesitate and re-establish the function get_commande_date I receive the following error:
TypeError: BooksCommande.get_commandes_date() missing 1 required positional argument: 'key'
This is my code:
class BaseCommande(ABC):
def __init__(self, list_of_commande: list) -> NoReturn:
if list_of_commande:
self.list_of_commande = list_of_commande
self.commande_date = None
self.comande_payed = None
self.commande_price = None
self.total_commandes = None
self.process_commande(list_of_commande)
super().__init__()
def get_commandes_date(self, list_of_commande):
return [commande['date_start'] for commande in list_of_commande]
def process_commande(self, list_of_commande):
self.commande_date = self.get_commandes_date(list_of_commande)
def my_dict(self):
return{
"commende_date": self.commande_date}
class BooksCommande(BaseCommande):
def __init__(self, list_of_commande: list) -> NoReturn:
super().__init__(list_of_commande)
self.commande_syplies = None
self.commande_books = None
self.process_books(list_of_commande)
def get_commandes_date(self, list_of_commande, key):
commande_date = []
for commande in list_of_commande:
cmd = {
'date_start': commande['date_start'],
'key': key,
'date_end': commande['date_end'],
}
commande_date.append(cmd)
return commande_date
def get_commande_books(self, books: list):
return 10
def process_books(self, list_of_commande):
self.books_list = self.get_commande_books(list_of_commande)
def my_dict2(self):
return{**super().my_dict(),
"books": self.books_list
}
commande_list = [{"date_start": "10/10/2021", "date_end": "12/15/2019"}]
print(BooksCommande(commande_list).my_dict2())
Is there a way to force BaseCommande to use the new redefined function or not? I really don't know how or from where to start.
The problem is you're attempting to change the number of arguments that get passed to the get_commandes_date() method — something that cannot be done when defining a derived class.
The workaround is to make the argument optional. So in class BaseCommande declare a key parameter:
def get_commandes_date(self, list_of_commande, key):
return [commande['date_start'] for commande in list_of_commande]
And then give it a default value in the derived BooksCommande class version of the method. (I'm not sure what might make sense here, so just made it None.)
def get_commandes_date(self, list_of_commande, key=None):
commande_date = []
for commande in list_of_commande:
cmd = {
'date_start': commande['date_start'],
'key': key,
'date_end': commande['date_end'],
}
commande_date.append(cmd)
return commande_date
As others have explained, the issue with your code is that your subclass, BooksCommande, changes the signature of the get_commandes_date method to be different than the version in the base class, BaseCommande. While that might be a bad idea in an abstract sense, it's not forbidden by Python. The real trouble is that one of BaseCommande's other methods, process_commande, tries to use the old signature, so everything breaks when that it gets called.
There is a fairly direct way to fix this, if you want to do so without dramatically changing the code. The general idea is for the two BaseCommande methods to call each other through a private reference. Even if one is overridden in a subclass, the private reference will remain pointing to the original implementation. Name mangling, with two leading underscores is often useful for this:
class BaseCommande(ABC):
...
def get_commandes_date(self, list_of_commande): # this method will be overridden
return [commande['date_start'] for commande in list_of_commande]
__get_commandes_date = get_commandes_date # private reference to previous method
def process_commande(self, list_of_commande):
self.commande_date = self.__get_commandes_date(list_of_commande) # use it here
This kind of design won't always be correct, so you'll need to figure out if it's appropriate for your specific classes or not. If the fact that process_commande is calls get_commandes_date is supposed to be an implementation detail (and so it should keep behaving the same way, even though the latter method is overridden), then this is a good approach. If the relationship between the methods is part of the class's API, then you probably don't want to do this (since overriding the get_commandes_date method may be a deliberate way to change the results of processess_commande in a subclass).
I think you want the method my_dict to have both my_dict and my_dict2 and have a boolean to trigger whenever you want to use one or the other.
def my_dict(self, trigger=False):
if not Trigger:
return{
"commende_date": self.commande_date}
else:
return{**super().my_dict(),
"books": self.books_list
Put this in place of your old my_dict method
def my_dict(self):
return{
"commende_date": self.commande_date}
Edit to add code

How to limit the permitted values that can be passed to a method parameter (Using Type Hinting to allow Static Code Analysis)

In Python 3, I want to limit the permitted values that are passed to this method:
my_request(protocol_type, url)
Using type hinting I can write:
my_request(protocol_type: str, url: str)
so the protocol and url are limited to strings, but how can I validate that protocol_type accepts only limited set of values, e.g. 'http' and 'https'?
One way is to write code in the method to validate that the value passed in is 'http' or 'https', something in the lines of:
if (protocol_type == 'http') or (protocol_type == 'https'):
Do Something
else:
Throw an exception
Which will work fine during runtime, but doesn't provide an indication of a problem while writing the code.
This is why I prefer using Enum and the type-hinting mechanism that Pycharm and mypy implement.
For the code example below you will get a warning in Pycharm from its code-inspection, see attached screenshot.
The screenshot shows that if you enter a value that is not enum you will get the "Expected Type:..." warning.
Code:
"""Test of ENUM"""
from enum import Enum
class ProtocolEnum(Enum):
"""
ENUM to hold the allowed values for protocol
"""
HTTP: str = 'http'
HTTPS: str = 'https'
def try_protocol_enum(protocol: ProtocolEnum) -> None:
"""
Test of ProtocolEnum
:rtype: None
:param protocol: a ProtocolEnum value allows for HTTP or HTTPS only
:return:
"""
print(type(protocol))
print(protocol.value)
print(protocol.name)
try_protocol_enum(ProtocolEnum.HTTP)
try_protocol_enum('https')
Output:
<enum 'ProtocolEnum'>
http
HTTP
I guess you can use decorators, I have a similar situation but I wanted to validate the parameter types:
def accepts(*types):
"""
Enforce parameter types for function
Modified from https://stackoverflow.com/questions/15299878/how-to-use-python-decorators-to-check-function-arguments
:param types: int, (int,float), if False, None or [] will be skipped
"""
def check_accepts(f):
def new_f(*args, **kwds):
for (a, t) in zip(args, types):
if t:
assert isinstance(a, t), \
"arg %r does not match %s" % (a, t)
return f(*args, **kwds)
new_f.func_name = f.__name__
return new_f
return check_accepts
And then use as:
#accepts(Decimal)
def calculate_price(monthly_item_price):
...
You can modify my decorator to achieve what you want.
You can just check if the input is correct in the function:
def my_request(protocol_type: str, url: str):
if protocol_type in ('http', 'https'):
# Do x
else:
return 'Invalid Input' # or raise an error
Why not use a Literal for the method argument?
def my_request(protocol_type: Literal["http","https"], url: str):
Use an if statement that raises an exception if protocol_type isn't in a list of allowed values :
allowed_protocols = ['http', 'https']
if protocol_type not in allowed_protocols:
raise ValueError()

how to return dynamic json as response from google cloud endpoint python

I want the following json to return from google endpoint
{"arts":[{"id":"4","name":"punjabi"},{"id":"5","name":"hindi"}],"Science":[{"id":"1","name":"MCA"},{"id":"2","name":"physics"},{"id":"3","name":"chemistry"}]}
Here is how I am declaring my endpoint
#endpoints.method(TokenAsInput,GetDepartmentListOutput,
path='getdepartmentlist', http_method='GET',
name='GetDepartmentList')
def getDepartmentList(self,request):
objResult = GetDepartmentListOutput()
objResult.data = dynamicJson
return objResult
But I don't know how to declare GetDepartmentListOutput so that it can map the above JSON.The object 'arts','science' are dynamic, may or may not exist.
I use ProtoRpc messages (that underlie Cloud Endpoints) and use the following to return general Json message data:
from protorpc import messages
class DetailMessage(messages.Message):
"""
General-format Json detail response
"""
data = GeneralField(1)
To set the result:
data = {"arts":[{"id":"4","name":"punjabi"},{"id":"5","name":"hindi"}],"Science":[{"id":"1","name":"MCA"},{"id":"2","name":"physics"},{"id":"3","name":"chemistry"}]}
return DetailMessage(data=data)
GeneralField is defined as:
class GeneralField(messages.Field):
"""
Allow for normal non-Message objects to be serialised to JSON.
This allows for variable result objects or dictionaries to be returned (Note: these objects must be Json serialisable).
"""
VARIANTS = frozenset([messages.Variant.MESSAGE])
DEFAULT_VARIANT = messages.Variant.MESSAGE
def __init__(self,
number,
required=False,
repeated=False,
variant=None):
"""Constructor.
Args:
number: Number of field. Must be unique per message class.
required: Whether or not field is required. Mutually exclusive to
'repeated'.
repeated: Whether or not field is repeated. Mutually exclusive to
'required'.
variant: Wire-format variant hint.
Raises:
FieldDefinitionError when invalid message_type is provided.
"""
super(GeneralField, self).__init__(number,
required=required,
repeated=repeated,
variant=variant)
def __set__(self, message_instance, value):
"""Set value on message.
Args:
message_instance: Message instance to set value on.
value: Value to set on message.
"""
if isinstance(value, list):
if len(value) > 0:
self.type = type(value[0])
else:
self.type = type(self)
else:
self.type = type(value)
self.__initialized = True
super(GeneralField, self).__set__(message_instance, value)
def __setattr__(self, name, value):
"""Setter overidden to allow assignment to fields after creation.
Args:
name: Name of attribute to set.
value: Value to assign.
"""
object.__setattr__(self, name, value)
def value_from_message(self, message):
"""Convert a message to a value instance.
Used by deserializers to convert from underlying messages to
value of expected user type.
Args:
message: A message instance of type self.message_type.
Returns:
Value of self.message_type.
"""
return message
def value_to_message(self, value):
"""Convert a value instance to a message.
Used by serializers to convert Python user types to underlying
messages for transmission.
Args:
value: A value of type self.type.
Returns:
An instance of type self.message_type.
"""
return value
Note: GeneralField is derived from other ProtoRpc Message code and overrides Field's set and setattr methods in order to allow normal (json-serialisable) objects or dictionaries to be used in ProtoRpc messages. You may need to adapt this approach to suit your purposes.
Note2: I am unsure how Cloud Endpoints will like this, but it may be worth a shot.

Pythonic - How to initialize a construtor with multiple arguments and validate

I'm a python noob and I'm trying to solve my problems the 'pythonic' way. I have a class, who's __init__ method takes 6 parameters. I need to validate each param and throw/raise an Exception if any fails to validate.
Is this the right way?
class DefinitionRunner:
def __init__(self, canvasSize, flightId, domain, definitionPath, harPath):
self.canvasSize = canvasSize
self.flightId = flightId
self.domain = domain
self.harPath = harPath
self.definitionPath = definitionPath
... bunch of validation checks...
... if fails, raise ValueError ...
If you want the variables to be settable independently of __init__, you could use properties to implement validations in separate methods.
They work only for new style classes though, so you need to define the class as class DefinitionRunner(object)
So for example,
#property
def canvasSize(self):
return self._canvasSize
#canvasSize.setter
def canvasSize(self, value):
# some validation here
self._canvasSize = value
Broadly speaking, that looks like the way you'd do it. Though strictly speaking, you might as well do validation before rather than after assignment, especially if assignment could potentially be time or resource intensive. Also, style convention says not to align assignment blocks like you are.
I would do it like you did it. Except the validating stuff. I would validate in a setter method and use it to set the attributes.
You could do something like this. Make a validator for each type of input. Make a helper function to run validation:
def validate_and_assign(obj, items_d, validators):
#validate all entries
for key, validator in validators.items():
if not validator[key](items_d[key]):
raise ValueError("Validation for %s failed" % (key,))
#set all entries
for key, val in items_d.items():
setattr(obj, key, val)
Which you'd use like this:
class DefinitionRunner:
validators = {
'canvasSize': canvasSize_validator,
'flightId': flightId_validator,
'domain': domain_validator,
'definitionPath': definitionPath_validator,
'harPath': harPath_validator,
}
def __init__(self, canvasSize, flightId, domain, definitionPath, harPath):
validate_and_assign(self, {
'canvasSize': canvasSize,
'flightId': flightId,
'domain': domain,
'definitionPath': definitionPath,
'harPath': harPath,
}, DefinitionRunner.validators)
The validators might be the same function, of course, if the data type is the same.
I'm not sure if this is exactly "Pythonic", but I've defined a function decorator called require_type. (To be honest, I think I found it somewhere online.)
def require_type(my_arg, *valid_types):
'''
A simple decorator that performs type checking.
#param my_arg: string indicating argument name
#param valid_types: list of valid types
'''
def make_wrapper(func):
if hasattr(func, 'wrapped_args'):
wrapped = getattr(func, 'wrapped_args')
else:
body = func.func_code
wrapped = list(body.co_varnames[:body.co_argcount])
try:
idx = wrapped.index(my_arg)
except ValueError:
raise(NameError, my_arg)
def wrapper(*args, **kwargs):
def fail():
all_types = ', '.join(str(typ) for typ in valid_types)
raise(TypeError, '\'%s\' was type %s, expected to be in following list: %s' % (my_arg, all_types, type(arg)))
if len(args) > idx:
arg = args[idx]
if not isinstance(arg, valid_types):
fail()
else:
if my_arg in kwargs:
arg = kwargs[my_arg]
if not isinstance(arg, valid_types):
fail()
return func(*args, **kwargs)
wrapper.wrapped_args = wrapped
return wrapper
return make_wrapper
Then, to use it:
class SomeObject(object):
#require_type("prop1", str)
#require_type("prop2", numpy.complex128)
def __init__(self, prop1, prop2):
pass

Categories

Resources