I am experiencing latency when doing queries against the Google Admin API.
def get_user(self, email: str) -> dict:
res = (
self.service.users()
.list(
domain="gmail.com",
projection="full",
query="email={0}".format(email),
)
.execute()
)
if "users" not in res or len(res["users"]) != 1:
msg = "Could not find user %s" % email
logging.error(msg)
raise GoogleAdminNonExistentUser(msg)
return res["users"][0]
def create_user(
self,
email: str,
first_name: str,
last_name: str,
org_unit_path: str,
manager_email: str,
) -> dict:
user_info = {...}
try:
res = self.service.users().insert(body=user_info).execute()
return res
except HttpError as error:
exc = self._generate_error(error)
logger.exception(exc.message)
raise exc
Take for example these two calls. In my test suite, I do a test for creating a user and immediately deleting them. In the next test I create the same user and update custom attributes. I then validate that those attributes were set.
test_create_delete()
create_user(EMAIL)
delete_user(EMAIL)
test_create_update()
create_user(EMAIL) # This will variably error out if the delete_user from the last request hasn't replicated throughout Google
update_user(EMAIL, UPDATE_INFO)
user = get_user()
# This assertion will variably fail if get_user() fetches old data
assert the update info is in user
I could liter the tests with sleeps, but build time is important. Is there a way to force the Google Admin API to return the freshest data possible?
Not possible because it has many dependent services. We have on-going projects for improvement on the performance though. (I'm one of the developers behind these services, and just ran across this question.)
Related
In some cases, my code looks a lot cleaner if I create my exceptions up front, then raise them later:
AUTHENTICATION_ERROR = HTTPException(
status_code=fastapi.status.HTTP_401_UNAUTHORIZED,
detail="Authentication failed",
headers={"WWW-Authenticate": "Bearer"},
)
# ...
async def current_user(token: str = Depends(oauth2_scheme)) -> User:
"""FastAPI dependency that returns the current user
If the current user is not authenticated, raises an authentication error.
"""
try:
payload = jwt.decode(token, SECRET, algorithms=["HS256"])
except JWTError:
raise AUTHENTICATION_ERROR
username = payload["sub"]
if username not in stub_users:
raise AUTHENTICATION_ERROR
return stub_users[username]
However, I've never actually seen this done and it feels quite wrong. It appears to work though.
In Python, is it safe to create Exception instances and raise them several times?
(Safe meaning that it works as expected with no surprises)
I would create a custom exception and use it.
Makes the intention clear, is very "pythonic", and you could explicitly catch (the more specific) AuthenticationError in an outer function if needed.
import fastapi
from fastapi.exceptions import HTTPException
class AuthenticationError(HTTPException):
def __init__(self):
super().__init__(
status_code=fastapi.status.HTTP_401_UNAUTHORIZED,
detail="Authentication failed",
headers={"WWW-Authenticate": "Bearer"},
)
# ...
async def current_user(token: str = Depends(oauth2_scheme)) -> User:
"""FastAPI dependency that returns the current user
If the current user is not authenticated, raises AuthenticationError.
"""
try:
payload = jwt.decode(token, SECRET, algorithms=["HS256"])
except JWTError:
raise AuthenticationError
username = payload["sub"]
if username not in stub_users:
raise AuthenticationError
return stub_users[username]
I think I am misunderstanding how dependency injection is used in FastAPI, specifically in the context of DB sessions.
My current set up is FastAPI, SqlAlchhemy & Alembic, although i am writing the raw SQL myself, pydantic etc, pretty straight forward.
I have basic CRUD routes which communicate directly to my repository layer and all is working. In these methods I am able to successfully use the DB dependency injection. See example code below:
Dependencies
def get_database(request: Request) -> Database:
return request.app.state._db
def get_repository(Repo_type: Type[BaseRepository]) -> Callable:
def get_repo(db: Database = Depends(get_database)) -> Type[BaseRepository]:
return Repo_type(db)
return get_repo
Example GET by ID Route
#router.get("/{id}/", response_model=TablePub, name="Get Table by id")
async def get_table_by_id(
id: UUID, table_repo: TableRepository = Depends(get_repository(TableRepository))
) -> TableInDB:
table = await table_repo.get_table_by_id(id=id)
if not table:
raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail="No Table found with that id.")
return table
Corresponding Repository
from databases import Database
class BaseRepository:
def __init__(self, db: Database) -> None:
self.db = db
class TableRepository(BaseRepository):
async def get_table_by_id(self, *, id: UUID) -> TableInDB:
table = await self.db.fetch_one(
query=GET_TABLE_BY_ID_QUERY,
values={"id": id},
)
if not table:
return None
return TableInDB(**table)
Now I want to start doing some more complex operations and want to add a service layer to house all of the business logic.
What is the correct way to structure this so that i can reuse the repositories that i have already written? For example, i want to return all Sales for a Table, but i need to get the table number from the DB first before i can query the Sales Table. The route requires table_id to be passed in as a param -> service layer, where i fetch the table by ID (Using existing repo) -> from that object, get the table number, then do a request to an external API that requires the table number as a param.
What I have so far:
Route
#router.get("/{table_id}", response_model=SalesPub, name="Get Sale Entries by table id")
async def get_sales_by_table_id(
table_id: UUID = Path(..., title="ID of the Table to get Sales Entries for")):
response = await SalesService.get_sales_from_external_API(table_id=table_id)
return response
Service Layer 'SalesService'
async def get_sales_from_external_API(
table_id: UUID,
table_repo: TableRepository = Depends(get_repository(TableRepository))
) -> TableInDB:
table_data = await table_repo.get_table_by_id(id=table_id)
if table_data is None:
logger.info(f"No table with id:{table_id} could not be found")
table_number = table_data.number
client_id = table_data.client_id
sales = await salesGateway.call_external_API(table_number, client_id)
return sales
The code brakes here table_data = await table_repo.get_table_by_id(id=table_id)
With an error AttributeError: 'Depends' object has no attribute 'get_table_by_id'
What i don't understand is that the code is almost identical to the route method that can get the table by ID? The depends object TableRepository does have a get_table_by_id method. What is it that i'm doing incorrectly, and is this the best way to split up business logic from database actions?
Thanks in advance
I seem to have found a solution to this, although i'm not sure if it is the best way.
The Depends Module only works on FastAPI routes and Dependencies. I was trying to use it on a regular function.
I needed to make the parameter table_repo an instance of Depends. and pass it in as a parameter to the external API call function.
#router.get("/table/{table_id}/", response_model=SalePub, name="Get Sales by table id")
async def get_sales_by_table_id(
table_id: UUID = Path(..., title="ID of the Table to get Sales Entries for"),
table_repo: TableRepository = Depends(get_repository(TableRepository))):
response = await get_sales_entries_from_pos(table_id=table_id, table_repo=table_repo)
return response
The issue i am foreseeing is that if i have a large service that may need access to manny repos, i have to give that access on the router through Depends, which just seems a bit strange to me.
I develop a python application based on flask that connects to a postgresql database and exposes the API using flassger (swagger UI).
I already defined a basic API (handle entries by ID, etc) as well a a query api to match different parameters (name=='John Doe'for example).
I would like to expand this query api to integrate more complex queries such as lower than, higher than, between, contains, etc.
I search on internet but couldn't find a proper way to do it. Any suggestion ?
I found this article which was useful but does not say anything about the implementation of the query: https://hackernoon.com/restful-api-designing-guidelines-the-best-practices-60e1d954e7c9
Here is briefly how it looks like so far (some extracted code):
GET_query.xml:
Return an account information
---
tags:
- accounts
parameters:
- name: name
in: query
type: string
example: John Doe
- name: number
in: query
type: string
example: X
- name: opened
in: query
type: boolean
example: False
- name: highlighted
in: query
type: boolean
example: False
- name: date_opened
in: query
type: Date
example: 2018-01-01
Blueprint definition:
ACCOUNTS_BLUEPRINT = Blueprint('accounts', __name__)
Api(ACCOUNTS_BLUEPRINT).add_resource(
AccountQueryResource,
'/accounts/<query>',
endpoint='accountq'
)
Api(ACCOUNTS_BLUEPRINT).add_resource(
AccountResource,
'/accounts/<int:id>',
endpoint='account'
)
Api(ACCOUNTS_BLUEPRINT).add_resource(
AccountListResource,
'/accounts',
endpoint='accounts'
)
Resource:
from flasgger import swag_from
from urllib import parse
from flask_restful import Resource
from flask_restful.reqparse import Argument
from flask import request as req
...
class AccountQueryResource(Resource):
""" Verbs relative to the accounts """
#staticmethod
#swag_from('../swagger/accounts/GET_query.yml')
def get(query):
""" Handle complex queries """
logger.debug('Recv %s:%s from %s', req.url, req.data, req.remote_addr)
query = dict(parse.parse_qsl(parse.urlsplit(req.url).query))
logger.debug('Get query: {}'.format(query))
try:
account = AccountRepository.filter(**query)
except Exception as e:
logger.error(e)
return {'error': '{}'.format(e)}, 409
if account:
result = AccountSchema(many=True).dump(account)
logger.debug('Get query returns: {}({})'.format(account, result))
return {'account': result}, 200
logger.debug('Get query returns: {}'.format(account))
return {'message': 'No account corresponds to {}'.format(query)}, 404
And finally the epository:
class AccountRepository:
""" The repository for the account model """
#staticmethod
def get(id):
""" Query an account by ID """
account = AccountModel.query.filter_by(id=id).first()
logger.debug('Get ID %d: got:%s', id, account)
return account
#staticmethod
def filter(**kwargs):
""" Query an account """
account = AccountModel.query.filter_by(**kwargs).all()
logger.debug('Filter %s: found:%s', kwargs, account)
return account
...
I don't know about your exact problem, but I had a problem similar to yours, and I fixed it with:
query = []
if location:
query.append(obj.location==location)
I will query this list of queries with
obj.query.filter(*query).all()
Where in above examples, obj is the name of a model you have created.
How is this help? this will allow you to fill in the variables you have dynamically and each query has its own conditions. you can use ==, !=, <=, etc.
note you should use filter and not filter_by then you can as many operators as you like.
you can read link1 and link2 for documents on how to query sqlalchemy.
edit:
name = request.args.get("name")
address = request.args.get("address")
age = request.args.get("address")
query = []
if name:
query.append(Myobject.name==name)
if address:
query.append(Myobject.address==name)
if age:
query.append(Myobject.age >= age) # look how we select people with age over the provided number!
query_result = Myobject.query.filter(*query).all()
if's will help you when there is no value provided by the user. this way you are not including those queries in your main query. with use of get, if these values are not provided by the user, they will be None and respected query won't be added to the query list.
I have a method that pulls the information about a specific gerrit revision, and there are two possible endpoints that it could live under. Given the following sample values:
revision_id = rev1
change_id = chg1
proj_branch_id = pbi1
The revision can either live under
/changes/ch1/revisions/rev1/commit
or
/changes/pbi1/revisions/rev1/commit
I'm trying to handle this cleanly with minimal code re-use as follows:
#retry(
wait_exponential_multiplier=1000,
stop_max_delay=3000
)
def get_data(
self,
api_obj: GerritAPI,
writer: cu.LockingWriter = None,
test: bool = False
):
"""
Make an HTTP Request using a pre-authed GerritAPI object to pull the
JSON related to itself. It checks the endpoint using just the change
id first, and if that doesn't return anything, will try the endpoint
using the full id. It also cleans the data and adds ETL tracking
fields.
:param api_obj: An authed GerritAPI object.
:param writer: A LockingWriter object.
:param test: If True then the data will be returned instead of written
out.
:raises: If an HTTP Error occurs, an alternate endpoint will be
attempted. If this also raises an HTTP Error then the error is
re-raised and propagated to the caller.
"""
logging.debug('Getting data for a commit for {f_id}'.format(
f_id=self.proj_branch_id
))
endpoint = (r'/changes/{c_id}/revisions/{r_id}/commit'.format(
c_id=self.change_id,
r_id=self.revision_id
))
try:
data = api_obj.get(endpoint)
except HTTPError:
try:
endpoint = (r'/changes/{f_id}/revisions/{r_id}/commit'.format(
f_id=self.proj_branch_id,
r_id=self.revision_id
))
data = api_obj.get(endpoint)
except HTTPError:
logging.debug('Neither endpoint returned data: {ep}'.format(
ep=endpoint
))
raise
else:
data['name'] = data.get('committer')['name']
data['email'] = data.get('committer')['email']
data['date'] = data.get('committer')['date']
mu.clean_data(data)
except ReadTimeout:
logging.warning('Read Timeout occurred for a commit. Endpoint: '
'{ep}'.format(ep=endpoint))
else:
data['name'] = data.get('committer')['name']
data['email'] = data.get('committer')['email']
data['date'] = data.get('committer')['date']
mu.clean_data(data)
finally:
try:
data.update(self.__dict__)
except NameError:
data = self.__dict__
if test:
return data
mu.add_etl_fields(data)
self._write_data(data, writer)
I don't much like that I'm repeating the portion under the else, so I'm wondering if there is a way to more cleanly handle this? As a side note, as it stands currently my program will write out up to 3 times for every commit if it returns an HTTPError, is creating an instance variable self.written which tracks whether it has already been written out a best practice way to do this?
Is it possible to check if a particular AWS IAM key has permissions for a set of specific commands?
Essentially, is there an API for AWS's privacy simulator?
So far I've been using hacks, such as executing a command with incorrect parameters that utilizes the permission in question, and watching what response I get back.
Example:
# needed resource: 'elasticloadbalancer:SetLoadBalancerListenerSSLCertificate'
# Check:
try:
elb.set_listener_SSL_certificate(443, 'fake')
except BotoServerError as e:
if e.error_code == 'AccessDenied':
print ("You don't have access to "
"elasticloadbalancer:SetLoadBalancerListenerSSLCertificate")
This is obviously hacky. Ideally I'd have some function call like iam.check_against(resource) or something. Any suggestions?
See boto3's simulate_principal_policy.
I've made this function to test for permissions (you'll need to modify it slightly, as it's not completely self-contained):
from typing import Dict, List, Optional
def blocked(
actions: List[str],
resources: Optional[List[str]] = None,
context: Optional[Dict[str, List]] = None
) -> List[str]:
"""test whether IAM user is able to use specified AWS action(s)
Args:
actions (list): AWS action(s) to validate IAM user can use.
resources (list): Check if action(s) can be used on resource(s).
If None, action(s) must be usable on all resources ("*").
context (dict): Check if action(s) can be used with context(s).
If None, it is expected that no context restrictions were set.
Returns:
list: Actions denied by IAM due to insufficient permissions.
"""
if not actions:
return []
actions = list(set(actions))
if resources is None:
resources = ["*"]
_context: List[Dict] = [{}]
if context is not None:
# Convert context dict to list[dict] expected by ContextEntries.
_context = [{
'ContextKeyName': context_key,
'ContextKeyValues': [str(val) for val in context_values],
'ContextKeyType': "string"
} for context_key, context_values in context.items()]
# You'll need to create an IAM client here
results = aws.iam_client().simulate_principal_policy(
PolicySourceArn=consts.IAM_ARN, # Your IAM user's ARN goes here
ActionNames=actions,
ResourceArns=resources,
ContextEntries=_context
)['EvaluationResults']
return sorted([result['EvalActionName'] for result in results
if result['EvalDecision'] != "allowed"])
You need to pass the permission's original action names to actions, like so:
blocked_actions = verify_perms.blocked(actions=[
"iam:ListUsers",
"iam:ListAccessKeys",
"iam:DeleteAccessKey",
"iam:ListGroupsForUser",
"iam:RemoveUserFromGroup",
"iam:DeleteUser"
])
Here's an example that uses the resources and context arguments as well:
def validate_type_and_size_allowed(instance_type, volume_size):
"""validate user is allowed to create instance with type and size"""
if validate_perms.blocked(actions=["ec2:RunInstances"],
resources=["arn:aws:ec2:*:*:instance/*"],
context={'ec2:InstanceType': [instance_type]}):
halt.err(f"Instance type {instance_type} not permitted.")
if validate_perms.blocked(actions=["ec2:RunInstances"],
resources=["arn:aws:ec2:*:*:volume/*"],
context={'ec2:VolumeSize': [volume_size]}):
halt.err(f"Volume size {volume_size}GiB is too large.")
The IAM Policy Simulator provides an excellent UI for determining which users have access to particular API calls.
If you wish to test this programmatically, use the DryRun parameter to make an API call. The function will not actually execute, but you will be informed whether it has sufficient permissions to execute. It will not, however, check whether the call itself would have succeeded (eg having an incorrect certificate name).