Serving resources with turbogears using RestController - python

I'm having a problem understanding how to serve my data best. I have 2 models, one is record and the other is log, they have a 1 to many relationship respectively. I'd like to serve this using tg's RestController so I can do mysite.com/api/record_id/log
So far I have this:
class API(RestController):
#expose('json')
def get_all(self):
records = DB.query(Record).all()
return dict(records=records)
#expose('json')
def get_one(self, record_id):
try:
record = DB.query(Record).filter(
Record.record_id==record_id).one()
except NoResultFound:
abort(404)
return dict(record=record)
#expose('json')
def log(self, record_id):
try:
log = DB.query(Log).filter(
Log.record_id==record_id).all()
except NoResultFound:
abort(404)
return dict(log=log)
This works, however, if I go to mysite.com/api/log then it maps (as expected) to the log method and complains about the missing variable record_id. How can this be done so the log method is only accessible after the record resource?

Related

Replacing def add_video(url, user_id=None, **kwargs): with a video service's api to embed videos in Python web app. What do I need to learn?

Looking to be pointed to the right direction to understand what exactly I need to learn next since there's so much info out there but I feel as if none are helping me at the moment so I'm trying to get a gist of what I need to understand more of to get this part covered.
I learned a bit of Python from Justin Mitchel from his 30 days of Python and from his Create a Video Membership Web App from Scratch with Python, NoSQL, & FastAPI 11 Hr video and I understand more but I realized that there's a few things I need help with.
I plan on learning more about htmx to get better with pages but using the pages for specifics videos via an api service has me baffled at the moment. The 2 apis I plan on using are Streamtape & Doodstream.
https://github.com/wahyubiman/DoodStream
https://github.com/scaldings/streamtape-api-python
My current requirement.txt are
fastapi
uvicorn
cassandra-driver
python-dotenv
email-validator
argon2-cffi
pytest
jinja2
python-multipart
python-jose[cryptography]
algoliasearch
doodstream
Had to manually install streamtape.
So the current code for my model.py for videos is
import uuid
from app.config import get_settings
from app.users.exceptions import InvalidUserIDException
from app.users.models import User
from app.shortcuts import templates
from cassandra.cqlengine import columns
from cassandra.cqlengine.models import Model
from cassandra.cqlengine.query import (DoesNotExist, MultipleObjectsReturned)
settings = get_settings()
from .exceptions import (
InvalidVideoURLException,
VideoAlreadyAddedException
)
# Unlisted Video -> video_id -> lock it down
class Video(Model):
__keyspace__ = settings.keyspace
host_id = columns.Text(primary_key=True) # Streamtape, Doodstream
db_id = columns.UUID(primary_key=True, default=uuid.uuid1) # UUID1
host_service = columns.Text(default='Doodstream')
title = columns.Text()
url = columns.Text() # secure
user_id = columns.UUID()
def __str__(self):
return self.__repr__()
def __repr__(self):
return f"Video(title={self.title}, host_id={self.host_id}, host_service={self.host_service})"
def render(self):
basename = self.host_service # streamtape, doodstream
template_name = f"videos/renderers/{basename}.html"
context = {"host_id": self.host_id}
t = templates.get_template(template_name)
return t.render(context)
def as_data(self):
return {f"{self.host_service}_id": self.host_id, "path": self.path, "title": self.title}
#property
def path(self):
return f"/videos/{self.host_id}"
#staticmethod
def get_or_create(url, user_id=None, **kwargs):
host_id = extract_video_id(url)
obj = None
created = False
try:
obj = Video.objects.get(host_id=host_id)
except MultipleObjectsReturned:
q = Video.objects.allow_filtering().filter(host_id=host_id)
obj = q.first()
except DoesNotExist:
obj = Video.add_video(url, user_id=user_id, **kwargs)
created = True
except:
raise Exception("Invalid Request")
return obj, created
#staticmethod
def add_video(url, user_id=None, **kwargs):
# extract video_id from url
# video_id = host_id
# Service API - Streamtape / Doostream / etc
host_id = extract_video_id(url)
if host_id is None:
raise InvalidVideoURLException("Invalid Video URL")
user_exists = User.check_exists(user_id)
if user_exists is None:
raise InvalidUserIDException("Invalid user_id")
# user_obj = User.by_user_id(user_id)
# user_obj.display_name
q = Video.objects.allow_filtering().filter(host_id=host_id) # , user_id=user_id)
if q.count() != 0:
raise VideoAlreadyAddedException("Video already added")
return Video.create(host_id=host_id, user_id=user_id, url=url, **kwargs)
# class PrivateVideo(Video):
# pass
What I'm trying to figure out is
How can I replace the static method with my API so it can pull on search requests if that makes sense? Like if someone is searching for something specific it brings them to a search page that pulls to whatever's close to what's looked for from Streamtape/Doodstream.
Or if I want to have preset tags for them to click on it shows all the videos available to be played, in which they can choose either the Streamtape server or the Doodstream server.
For the tags, would it be wise to make a separate html page per tag, to where it pulls from the specific folder of videos I want it to pull from? Like on the navigation bar has categories, and when clicking on categories it shows different ones like Education, Travel, etc and when they click on one it shows all the videos available that matches the folder from Stream/Dood that I want it to pull from.
What do I need to learn to get anywhere close to achieving something like that?

mocked service call is not being used

I have the model object that looks like this
class CatalogModel(BaseModel):
#property
def custom_service(self):
return CustomService()
async def get_offers(self, catalog_name):
try:
svc_response = self.custom_service.get_offers(catalog_name=catalog_name)()
except BaseException:
raise SalesForceException()
return CustomOfferResponse().dump(svc_response)
I am trying to write a test for that get_offers function (that uses custom_services which is connecting to Salesforce)
My test is looking like this. I am using pytest, pytest vcr etc.
class TestCatalogModel:
catalog_name = 'CATALOG_1'
#freeze_time("2021-07-12")
async def test_get_offers(self, loop, offers, offers_response):
with MockUser(ident="test_model_get_offers"):
with patch(
"com.services.client.CustomService.get_offers", new=offers
):
eo_offers = await CatalogModel().get_offers(self.catalog_name)
assert offers_response == eo_offers
However when executing the test it fails with the error
E vcr.errors.CannotOverwriteExistingCassetteException: Can't overwrite existing cassette ('/test_api/recordings/2021-07-12/test_get_offers_model/salesforce/auth/client/services_oauth2_token.yaml') in your current record mode ('none').
E No match for the request (<Request (POST) https://server.salesforce.com/services/oauth2/token>) was found.
E No similar requests, that have not been played, found.
During handling of the above exception, another exception occurred:
...model.py:17: in test_get_offers
eo_offers = await CatalogModel().get_offers(self.catalog_name)
model.py:24: in get_offers
raise SalesForceException()
E ...SalesForceException: Error from Salesforce.
As far as I understand it is trying to connect to real Salesforce service, rather than using a mock. What is the problem?
I would avoid trying to partially patch methods on classes. Instead I would use inversion of control to allow mock instances to be used during tests. This avoids having to do any patching at all.
class CatalogModel(BaseModel):
def __init__(self, custom_service=None):
if custom_service is None:
custom_service = CustomService()
self.custom_service = custom_service
async def get_offers(self, catalog_name):
try:
svc_response = self.custom_service.get_offers(catalog_name=catalog_name)()
except BaseException:
raise SalesForceException()
return CustomOfferResponse().dump(svc_response)
Now writing the test becomes much simpler.
class TestCatalogModel:
catalog_name = 'CATALOG_1'
#freeze_time("2021-07-12")
async def test_get_offers(self, loop, offers, offers_response):
with MockUser(ident="test_model_get_offers"):
custom_service = mock.Mock()
custom_service.get_offers.return_value = offers
model = CatalogModel(custom_service)
eo_offers = await model.get_offers(self.catalog_name)
assert offers_response == eo_offers

Why is this code reporting function object is not scriptable

#pytest.fixture
def settings():
with open('../config.yaml') as yaml_stream:
return yaml.load(stream=yaml_stream)
#pytest.fixture
def viewers(settings):
try:
data = requests.get(settings['endpoints']['viewers']).json()
return data[0]['viewers']
except Exception:
print('ERROR retrieving viewers')
raise(SystemExit)
#pytest.fixture
def viewers_buffer_health(viewers):
print(viewers)
viewers_with_buffer_health = {}
for viewer in viewers:
try:
data = requests.get(settings['endpoints']['node_buffer_health']).replace('<NODE_ID>', viewer)
except Exception as e:
print('ERROR retrieving buffer_health for {}'.format(viewer))
raise(SystemExit)
viewers_with_buffer_health[viewer] = data[0]['avg_buffer_health']
return viewers_with_buffer_health
The fixture viewers_buffer_health is failing all the time on the requests because 'function' object is not subscriptable
Other times I have seen such error it has been because I was calling a variable and a function by the same name, but it's not the case (or I'm blind at all).
Although it shouldn't matter, the output of viewers is a list like ['a4a6b1c0-e98a-42c8-abe9-f4289360c220', '152bff1c-e82e-49e1-92b6-f652c58d3145', '55a06a01-9956-4d7c-bfd0-5a2e6a27b62b']
Since viewers_buffer_health() doesn't have a local definition for settings it is using the function defined previously. If it is meant to work in the same manner as viewers() then you will need to add a settings argument to its current set of arguments.
settings is a function.
data = requests.get(settings()['endpoints']['node_buffer_health']).replace('<NODE_ID>', viewer)

#EndpointsAliasProperty and #Model.query_method causes BadRequestError(Key path element must not be incomplete:...)

Hey so right now I'm developing backend api using Google ProtoRPC and Endpoints. I'm using the endpoints-proto-datastore library.
So strange things happen here, here is the EndpointsModel class
class AssetData(EndpointsModel):
type = msgprop.EnumProperty(AssetType, indexed=True)
def auth_id_set(self, value):
if ApplicationID.get_by_id(value) is None:
raise endpoints.UnauthorizedException('no auth_id')
self._auth_id = value
#EndpointsAliasProperty(required=True, setter=auth_id_set, property_type=messages.IntegerField)
def auth_id(self):
return self._auth_id
def app_id_set(self, value):
if ApplicationID.query(ApplicationID.app_id == value).get() is None:
raise endpoints.UnauthorizedException('wrong app_id')
self._app_id = value
if self.check_auth_app_id_pair(self.auth_id, value):
self._app_id = value
else:
raise endpoints.BadRequestException('auth_id and app_id mismatch')
#EndpointsAliasProperty(required=True, setter=app_id_set)
def app_id(self):
return self._app_id
#staticmethod
def check_auth_app_id_pair(authen_id, applic_id):
dat = ApplicationID.get_by_id(authen_id)
if dat.app_id != applic_id:
return False
else:
return True
and this is the API class
#endpoints.api(...)
class AssetDatabaseAPI(remote.Service):
#AssetData.query_method(query_fields=('limit', 'order', 'pageToken', 'type', 'auth_id', 'app_id'),
path='assets', http_method='GET', name='assets.getAssetMultiple')
def assets_get_multiple(self, query):
return query
When I deploy this, everytime I tried to access assets.getMultipleAssets it just gives me this error
raised BadRequestError(Key path element must not be incomplete: [ApplicationID: ]). Strangely enough this only happen to method using #Model.query_method, I have other methods using the same system but using #Model.method and it just runs ok.
If I tried it in development server, sometimes it just gives me RuntimeError: BadRequestError('missing key id/name',) then if I just re-save the .py file and retry it, it will work (sometimes not and another re-save can also make the error happens again).
Can anyone tell me my mistake?
Thanks
I think your problem is how you call this method - it's a static method, so you have to access it through class, not the instance (self):
if AssetData.check_auth_app_id_pair(self.auth_id, value):
self._app_id = value
else:
raise endpoints.BadRequestException('auth_id and app_id mismatch')

How do I log multiple very similar events gracefully in python?

With pythons logging module, is there a way to collect multiple events into one log entry? An ideal solution would be an extension of python's logging module or a custom formatter/filter for it so collecting logging events of the same kind happens in the background and nothing needs to be added in code body (e.g. at every call of a logging function).
Here an example that generates a large number of the same or very similar logging events:
import logging
for i in range(99999):
try:
asdf[i] # not defined!
except NameError:
logging.exception('foo') # generates large number of logging events
else: pass
# ... more code with more logging ...
for i in range(88888): logging.info('more of the same %d' % i)
# ... and so on ...
So we have the same exception 99999 times and log it. It would be nice, if the log just said something like:
ERROR:root:foo (occured 99999 times)
Traceback (most recent call last):
File "./exceptionlogging.py", line 10, in <module>
asdf[i] # not defined!
NameError: name 'asdf' is not defined
INFO:root:foo more of the same (occured 88888 times with various values)
You should probably be writing a message aggregate/statistics class rather than trying to hook onto the logging system's singletons but I guess you may have an existing code base that uses logging.
I'd also suggest that you should instantiate your loggers rather than always using the default root. The Python Logging Cookbook has extensive explanation and examples.
The following class should do what you are asking.
import logging
import atexit
import pprint
class Aggregator(object):
logs = {}
#classmethod
def _aggregate(cls, record):
id = '{0[levelname]}:{0[name]}:{0[msg]}'.format(record.__dict__)
if id not in cls.logs: # first occurrence
cls.logs[id] = [1, record]
else: # subsequent occurrence
cls.logs[id][0] += 1
#classmethod
def _output(cls):
for count, record in cls.logs.values():
record.__dict__['msg'] += ' (occured {} times)'.format(count)
logging.getLogger(record.__dict__['name']).handle(record)
#staticmethod
def filter(record):
# pprint.pprint(record)
Aggregator._aggregate(record)
return False
#staticmethod
def exit():
Aggregator._output()
logging.getLogger().addFilter(Aggregator)
atexit.register(Aggregator.exit)
for i in range(99999):
try:
asdf[i] # not defined!
except NameError:
logging.exception('foo') # generates large number of logging events
else: pass
# ... more code with more logging ...
for i in range(88888): logging.error('more of the same')
# ... and so on ...
Note that you don't get any logs until the program exits.
The result of running it this is:
ERROR:root:foo (occured 99999 times)
Traceback (most recent call last):
File "C:\work\VEMS\python\logcount.py", line 38, in
asdf[i] # not defined!
NameError: name 'asdf' is not defined
ERROR:root:more of the same (occured 88888 times)
Your question hides a subliminal assumption of how "very similar" is defined.
Log records can either be const-only (whose instances are strictly identical), or a mix of consts and variables (no consts at all is also considered a mix).
An aggregator for const-only log records is a piece of cake. You just need to decide whether process/thread will fork your aggregation or not.
For log records which include both consts and variables you'll need to decide whether to split your aggregation based on the variables you have in your record.
A dictionary-style counter (from collections import Counter) can serve as a cache, which will count your instances in O(1), but you may need some higher-level structure in order to write the variables down if you wish. Additionally, you'll have to manually handle the writing of the cache into a file - every X seconds (binning) or once the program has exited (risky - you may lose all in-memory data if something gets stuck).
A framework for aggregation would look something like this (tested on Python v3.4):
from logging import Handler
from threading import RLock, Timer
from collections import defaultdict
class LogAggregatorHandler(Handler):
_default_flush_timer = 300 # Number of seconds between flushes
_default_separator = "\t" # Seperator char between metadata strings
_default_metadata = ["filename", "name", "funcName", "lineno", "levelname"] # metadata defining unique log records
class LogAggregatorCache(object):
""" Keeps whatever is interesting in log records aggregation. """
def __init__(self, record=None):
self.message = None
self.counter = 0
self.timestamp = list()
self.args = list()
if record is not None:
self.cache(record)
def cache(self, record):
if self.message is None: # Only the first message is kept
self.message = record.msg
assert self.message == record.msg, "Non-matching log record" # note: will not work with string formatting for log records; e.g. "blah {}".format(i)
self.timestamp.append(record.created)
self.args.append(record.args)
self.counter += 1
def __str__(self):
""" The string of this object is used as the default output of log records aggregation. For example: record message with occurrences. """
return self.message + "\t (occurred {} times)".format(self.counter)
def __init__(self, flush_timer=None, separator=None, add_process_thread=False):
"""
Log record metadata will be concatenated to a unique string, separated by self._separator.
Process and thread IDs will be added to the metadata if set to True; otherwise log records across processes/threads will be aggregated together.
:param separator: str
:param add_process_thread: bool
"""
super().__init__()
self._flush_timer = flush_timer or self._default_flush_timer
self._cache = self.cache_factory()
self._separator = separator or self._default_separator
self._metadata = self._default_metadata
if add_process_thread is True:
self._metadata += ["process", "thread"]
self._aggregation_lock = RLock()
self._store_aggregation_timer = self.flush_timer_factory()
self._store_aggregation_timer.start()
# Demo logger which outputs aggregations through a StreamHandler:
self.agg_log = logging.getLogger("aggregation_logger")
self.agg_log.addHandler(logging.StreamHandler())
self.agg_log.setLevel(logging.DEBUG)
self.agg_log.propagate = False
def cache_factory(self):
""" Returns an instance of a new caching object. """
return defaultdict(self.LogAggregatorCache)
def flush_timer_factory(self):
""" Returns a threading.Timer daemon object which flushes the Handler aggregations. """
timer = Timer(self._flush_timer, self.flush)
timer.daemon = True
return timer
def find_unique(self, record):
""" Extracts a unique metadata string from log records. """
metadata = ""
for single_metadata in self._metadata:
value = getattr(record, single_metadata, "missing " + str(single_metadata))
metadata += str(value) + self._separator
return metadata[:-len(self._separator)]
def emit(self, record):
try:
with self._aggregation_lock:
metadata = self.find_unique(record)
self._cache[metadata].cache(record)
except Exception:
self.handleError(record)
def flush(self):
self.store_aggregation()
def store_aggregation(self):
""" Write the aggregation data to file. """
self._store_aggregation_timer.cancel()
del self._store_aggregation_timer
with self._aggregation_lock:
temp_aggregation = self._cache
self._cache = self.cache_factory()
# ---> handle temp_aggregation and write to file <--- #
for key, value in sorted(temp_aggregation.items()):
self.agg_log.info("{}\t{}".format(key, value))
# ---> re-create the store_aggregation Timer object <--- #
self._store_aggregation_timer = self.flush_timer_factory()
self._store_aggregation_timer.start()
Testing this Handler class with random log severity in a for-loop:
if __name__ == "__main__":
import random
import logging
logger = logging.getLogger()
handler = LogAggregatorHandler()
logger.addHandler(handler)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)
logger.info("entering logging loop")
for i in range(25):
# Randomly choose log severity:
severity = random.choice([logging.DEBUG, logging.INFO, logging.WARN, logging.ERROR, logging.CRITICAL])
logger.log(severity, "test message number %s", i)
logger.info("end of test code")
If you want to add more stuff, this is what a Python log record looks like:
{'args': ['()'],
'created': ['1413747902.18'],
'exc_info': ['None'],
'exc_text': ['None'],
'filename': ['push_socket_log.py'],
'funcName': ['<module>'],
'levelname': ['DEBUG'],
'levelno': ['10'],
'lineno': ['17'],
'module': ['push_socket_log'],
'msecs': ['181.387901306'],
'msg': ['Test message.'],
'name': ['__main__'],
'pathname': ['./push_socket_log.py'],
'process': ['65486'],
'processName': ['MainProcess'],
'relativeCreated': ['12.6709938049'],
'thread': ['140735262810896'],
'threadName': ['MainThread']}
One more thing to think about:
Most features you run depend on a flow of several consecutive commands (which will ideally report log records accordingly); e.g. a client-server communication will typically depend on receiving a request, processing it, reading some data from the DB (which requires a connection and some read commands), some kind of parsing/processing, constructing the response packet and reporting the response code.
This highlights one of the main disadvantages of using an aggregation approach: by aggregating log records you lose track of the time and order of the actions that took place. It will be extremely difficult to figure out what request was incorrectly structured if you only have the aggregation at hand.
My advice in this case is that you keep both the raw data and the aggregation (using two file handlers or something similar), so that you can investigate a macro-level (aggregation) and a micro-level (normal logging).
However, you are still left with the responsibility of finding out that things have gone wrong, and then manually investe what caused it. When developing on your PC this is an easy enough task; but deploying your code in several production servers makes these tasks cumbersome, wasting a lot of your time.
Accordingly, there are several companies developing products specifically for log management. Most aggregate similar log records together, but others incorporate machine learning algorithms for automatic aggregation and learning your software's behavior. Outsourcing your log handling can then enable you to focus on your product, instead of on your bugs.
Disclaimer: I work for Coralogix, one such solution.
You can subclass the logger class and override the exception method to put your error types in a cache until they reach a certain counter before they are emitted to the log.
import logging
from collections import defaultdict
MAX_COUNT = 99999
class MyLogger(logging.getLoggerClass()):
def __init__(self, name):
super(MyLogger, self).__init__(name)
self.cache = defaultdict(int)
def exception(self, msg, *args, **kwargs):
err = msg.__class__.__name__
self.cache[err] += 1
if self.cache[err] > MAX_COUNT:
new_msg = "{err} occurred {count} times.\n{msg}"
new_msg = new_msg.format(err=err, count=MAX_COUNT, msg=msg)
self.log(logging.ERROR, new_msg, *args, **kwargs)
self.cache[err] = None
log = MyLogger('main')
try:
raise TypeError("Useful error message")
except TypeError as err:
log.exception(err)
Please note this isn't copy paste code.
You need to add your handlers (I recommend formatter, too) yourself.
https://docs.python.org/2/howto/logging.html#handlers
Have fun.
Create a counter and only log it for count=1, then increment thereafter and write out in a finally block (to ensure it gets logged no matter how bad the application crashes and burns). This could of course pose an issue if you have the same exception for different reasons, but you could always search for the line number to verify it's the same issue or something similar. A minimal example:
name_error_exception_count = 0
try:
for i in range(99999):
try:
asdf[i] # not defined!
except NameError:
name_error_exception_count += 1
if name_error_exception_count == 1:
logging.exception('foo')
else: pass
except Exception:
pass # this is just to get the finally block, handle exceptions here too, maybe
finally:
if name_error_exception_count > 0:
logging.exception('NameError exception occurred {} times.'.format(name_error_exception_count))

Categories

Resources