I'm trying to simplify endpoint data validation for a flask application, with a Swagger endpoint spec. I've avoided using the entire swagger-generated server code, because I find connexion too opaque. Instead I use the model objects generated by Swagger, but write the controllers myself. For example, this is the endpoint for searching for restaurants nearby. The user needs to provide either a zip_code or a combination latitude and longitude, and those values need to be validated:
from flask import jsonify, request, Response
from models import RestaurantSearchRequest, RestaurantSearchResponse
app_blueprint = Blueprint(__name__, __name__, url_prefix='/restaurants')
#app_blueprint.route(
utils.versioned_api_route('/search/'), methods=['GET'])
def find_restaurants_by_zip_or_lat_lng() -> Response:
request_data = utils.multidict_to_dict(request.args)
try:
search_request = RestaurantSearchRequest.from_dict(request_data)
search_response = RestaurantSearchResponse.from_lat_lng_zip(**search_request.to_dict())
return Response(jsonify(search_response.to_dict()), 200)
except ValueError as ex:
return Response(ex.message, 400)
Right now the data validation is being done in the model using Voluptuous, but it requires a lot of boilerplate:
from voluptuous.schema_builder import Schema
from lib import validators
from models.swagger import RestaurantSearchRequest as SwaggerRestaurantSearchRequest
# Inherit all swagger utils, such as `to_dict` and `from_dict`
class RestaurantSearchRequest(SwaggerRestaurantSearchRequest):
def __init__(self: 'RestaurantSearchRequest', latitude: float=None, longitude: float=None, zip_code: str=None):
schema = Schema({
'zip_code': validators.validate_zip_code,
'latitude': validators.validate_latitude,
'longitude': validators.validate_longitude,
})
schema({
'zip_code': zip_code,
'latitude': latitude,
'longitude': longitude,
})
super(RestaurantSearchRequest, self).__init__(latitude, longitude, zip_code)
if (self.longitude is None or self.latitude is None) and self.zip_code is None:
raise ValueError('Must provide either zip code or lat/lng')
I'd also like to get the logic out of the model and move it to a decorator in the view function. The key to all of this is having to the flexability to validate individual fields, as well validate the combination of fields, like above.
Are there any existing libraries out there I can leverage, or is there generic validator that I can use for something like this?
Related
I would like to achieve the following :
My application contains some "sub-domains" which correspond to different parts of the app.
Each domain has its own entities
I would like to write a single controler like so :
#app.get("/{domain}/entity/{entity}/{id}")
async def read_users(domain: Domain, entity: Entity, id: Int):
pass
considering Entity would be an Enum that could change following the selected domain.
For instance, if the domain is "architecture", Entity could be defined like :
class Entity(str, Enum):
building = "building"
floor = "floor"
but if the selected domain is "vehicle", the matching Entity would be :
class Entity(str, Enum):
engine = "engine"
wheels = "wheels"
More generally, I guess what I'm looking for is a way to make a path parameter validation dependent on another path parameter.
This way :
GET /architecture/entity/floor/1 is valid since, floor is a valid entity for domain architecture
GET /vehicle/entity/wheels/5 is valid since, wheels is a valid entity for domain vehicle
GET /architecture/entity/engine/1 is invalid since, engine is not a valid entity for domain architecture
Is there any way to achieve this ?
You can use closures. The following code does not use Enums for brevity :
from fastapi import FastAPI
app = FastAPI()
domains = {"architecture": ["building","floor"], "vehicle": ["engine","wheels"]}
def set_route(domain,entity):
url = "/{}/entity/{}/{{id}}".format(domain,entity)
#app.get(url)
async def read_users(id: int):
return(f"Users of the {domain} {entity} #{id}")
for domain, entities in domains.items():
for entity in entities:
set_route(domain,entity)
And it yields the desired API schema :
I'm not sure if exactly what you want is possible, for sure you can write controller where you add pydantic validation, and handle exception if it throws validation error:
from pydantic import BaseModel, ValidationError
from enum import Enumfrom fastapi import FastAPI, Request, status
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from typing import Union, Literal
class ArchitectureEntity(BaseModel):
entity: Union[Literal['building'], Literal['floor']]
class VehicleEntity(BaseModel):
entity: Union[Literal['wheels'], Literal['engine']]
#app.exception_handler(ValidationError)
async def validation_exception_handler(request: Request, exc: ValidationError):
return JSONResponse(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
content=jsonable_encoder({"detail": exc.errors(), "Error": "Entity not permitted"}),
)
#app.get("/{domain}/entity/{entity}/{id}")
async def read_users(domain: Domain, entity: Entity, id: int):
if domain == 'architecture':
entity = ArchitectureEntity(entity=entity)
elif domain == 'vehicle':
entity = VehicleEntity(entity=entity)
return {'architecture': entity}
However openapi docs will not show, that e.g architecture and engine are not allowed together.
I'm handling this request in my code (Python3.9, FastAPI, Pydantic):
https://myapi.com/api?params[A]=1¶ms[B]=2
I tried to make following model:
BaseModel for handling special get request
(for fastapi.Query and pydantic.Field is same)
I also set up aliases for it, but in swagger docs I see next field:
Snap of the swagger docs
There are fields that are specified as extra_data
So, if I specify query params in parameters of my endpoint like this:
#app.get('/')
def my_handler(a: str = Query(None, alias="params[A]")):
return None
Everything works fine. How can I fix it? I want to initialize my pydantic.BaseModel with speacial aliases using this way and avoid usage of query-params in
class MyModel(BaseModel):
a = Field(alias="params[A]")
b = Field(alias="params[B]")
def my_handler(model: MyModel = Depends()):
return model.dict()
I have an application running in production that I've built for a single client that I want to convert to support multiple "tenants".
Currently I am using a Postgres database where all my data resides in a single database in the default public schema. I would like to isolate each tenant to a separate Postgres schema. Ideally, my application's UI would make a call to my API using the tenant's subdomain. In before_request I would somehow be able to set all database queries during the current request context to only query that tenant's schema, is this possible?
I envisage an ideal solution to be something similar to this contrived example:
from flask import Flask, request, jsonify
from pony.orm import Database, Required
app = Flask(__name__)
db = Database(**{<db_connection_dict>})
class User(db.Entity):
email = Required(str)
password = Required(str)
#classmethod
def login(cls, email: str, password: str) -> str:
user = cls.get(lambda u: u.email.lower() == email.lower())
if not user:
return None
password_is_valid = <method_to_check_hashed_pasword>
if not password_is_valid:
return None
return <method_to_generate_jwt>
db.generate_mapping()
#app.before_request
def set_tenant():
tenant_subdomain = request.host.split(".")[0]
// MISSING STEP.. set_schema is a fictitous method, does something similar to this exist?
db.set_schema(schema=tenant_subdomain)??
#app.route("auth/login", methods=["POST"]
def login_route():
data = request.get_json()
jwt = User.login(data["email"], data["password"])
if not jwt:
return make_response({}, 403)
return make_response(jsonify(data=jwt), 200)
I've come across an interesting/simple example using SQLAlchemy. If not possible with PonyORM I may consider porting my models over to SQLAlchemy but would miss the simplicity of Pony :(
I thought about possibly using the Database.on_connect method to do something as such but not sure if if anyone has any other ideas or if this would even work properly in production. I suspect not because if I had two separate tenants querying the database they would overwrite the search path..
#db.on_connect()
def set_request_context_tenant_schema(db, connection) -> None:
subdomain = request.host.split(".")[0]
cursor = connection.cursor()
cursor.execute(f"SET search_path TO {subdomain}, public;")
I have an endpoint where I need to make make a request to a third party API to get a list of items and return the results to the client.
Which of the following, or any other approach would be better suited to DRF?
Make input parameter validation and the call to the third party API and in the view method, pass the list of items in the response to a serializer for serialization and return serializer data to the client
Pass the request parameters to the serializer as write-only fields, make the field validation, api call and serialization in the serializer
A mixture of 1 and 2; use 2 different serializers, one that takes request parameters as write only fields, validates input parameters and makes the request to the 3rd party api, and another serializer that takes the resulting list from the first serializer and serializes the items for use of client
Since your question not mentioning anything about writing data into DB, undoubtedly you can go with Method-1.
Let's look into this sample api, which return a list of items (a list api).
Case - 1 : We need show the same response as we got from third-party api
In that case, we don't need any serializer or serialization process, all we need is pass the data from third-party API to the client.
from rest_framework.decorators import api_view
from rest_framework.response import Response
import requests
#api_view()
def my_view(request):
tp_api = "https://jsonplaceholder.typicode.com/posts"
response = requests.get(tp_api)
return Response(data=response.json())
Case - 2 : If you don't need complete data, but few parts (id and body)
In this particular situation, you can go with pythonic loop or DRF serializer.
# using DRF serializer
from rest_framework import serializers
# serializer class
class Myserializer(serializers.Serializer):
id = serializers.CharField()
body = serializers.CharField()
#api_view()
def my_view(request):
tp_api = "https://jsonplaceholder.typicode.com/posts"
response_data = requests.get(tp_api).json()
my_serializer = Myserializer(data=response_data, many=True)
my_serializer.is_valid(True)
return Response(data=my_serializer.data)
#Python loop way
#api_view()
def my_view(request):
tp_api = "https://jsonplaceholder.typicode.com/posts"
response_data = requests.get(tp_api).json()
data = [{"id": res['id'], "body": res['body']} for res in response_data]
return Response(data=data)
In case-2, I would reccomend to use DRF serializer, which does lots of things like validation, etc
When coming into your second approch, doing validation of the input data would depends on your requirement. As you said in comments, you need to provide some inputs to the third-party api. So, the validation should be carried out before accessing the third-party api
# Validation example
class MyInputSerializer(serializers.Serializer):
postId = serializers.IntegerField(max_value=10)
class MyOutputSerializer(serializers.Serializer):
id = serializers.CharField()
body = serializers.CharField()
#api_view()
def my_view(request):
input = MyInputSerializer(data=request.GET)
input.is_valid(True)
tp_api = "https://jsonplaceholder.typicode.com/comments?postId={}".format(input.data['postId'])
response_data = requests.get(tp_api).json()
my_serializer = MyOutputSerializer(data=response_data, many=True)
my_serializer.is_valid(True)
return Response(data=my_serializer.data)
Conclusion
The DRF is flexible enough to get desired output format as well as taking data into the system. In short, It all depends on your requirements
I'm using Flask-Restful to create an API to store geojson data. Geojson allows for storing 'properties', and makes no restrictions on what these parameters can be (could store a color, a nickname, etc.) I would like to transmit and store this data using flask-restful, but I'm not sure that I can do this with 'open-ended' data. It appears when I use 'marshal' for my data, I need to specify exactly the fields I expect.
from flask import Flask, abort
from flask.ext.restful import Api, Resource, fields, marshal, reqparse
class GeoAPI(Resource):
def get(self, id):
geo= session.query(data.Geo).filter_by(id= id).first()
if (geo):
return {'geo': marshal(geo, geo_fields)}
else:
abort(404)
geo_fields = {
"name": fields.String,
"coordinates": fields.List(fields.List(fields.List(fields.Float))),
"parameters": fields.String, # String may be the wrong type, tried nested?
'version': fields.String,
'uri': fields.Url('geo')
}
api.add_resource(GeoAPI, '/pathopt/api/0.1/geos/<int:id>', endpoint = 'geo')
The data for Geo pulls from a SQLAlchemy query.
Is it possible to state that 'properties' is an object which can contain many different fields, or does this require me to explicitly state my field names?