I am working on store data into cache memory using cherrypy. I am using below code to put data into cache :
import cherrypy
import datetime
import sys
from cherrypy.lib.caching import MemoryCache
cache = MemoryCache()
def putDataIntoCache(self, *args, **kwargs):
data = cache.get()
if not data:
obj = kwargs
size = sys.getsizeof(obj)
cache.put(obj, size)
data = obj
return 'obj: %s, id: %s' % (cache.get(), id(cache.get()))
But problem is that cache data is clear automatically after 10 second.
I found that delay = 600 set in cache.py class. For this reason data is cleared after 10 second.
I just want to clear cache data when cherrypy server restarted.
How to solve this issue?
I think that you shouldn't be using the cache directly. Specially when you don't really want to have the benefit of finer control of the cache invalidation.
You can do something like this:
import cherrypy as cp
from cherrypy.process.plugins import SimplePlugin
class CachePlugin(SimplePlugin):
def start(self):
self.bus.log('Initializing cache')
cp.cache = {}
def stop(self):
self.bus.log('Clearing cache')
cp.cache = {}
class Root:
#cp.expose
def default(self, user=None):
if user is not None:
cp.cache['user'] = user
return cp.cache.get('user', 'No user')
CachePlugin(cp.engine).subscribe()
cp.quickstart(Root())
Try it with: /, /?user=test, / and /?user=new_user
You could contain the cache on the plugin and publish the channels to modify it but in general the idea is that you don't really need the cache tool, just to listen when the server starts and stops (I'm assuming you are using the autoreloader).
This is simply a dictionary monkey patched on the cherrypy module. This is quick and simple. It's pretty late here in Mexico... I hope this helps.
More info about the plugins: http://cherrypy.readthedocs.org/en/latest/extend.html#server-wide-functions
Related
I've been struggling with this for a couple months now and have tried a lot of different things to try to alleviate but am not sure what to do anymore. All the examples that I see are different than what I need and in my case it just wouldn't work.
To preface the problem, I have processor applications that get spawned by a manager as a docker container. The processor is a single class that gets run in a forever while loop that processes the same list of items over and over again and runs a function on them. The code I'm working with is quite large so I created a smaller version of the problem below.
this is how I create my engine
db.py
from os import getpid
from pymongo import MongoClient
_mongo_client = None
_mongo_client_pid = None
def get_mongodb_uri(MONGO_DB_HOST, MONGO_DB_PORT) -> str:
return 'mongodb://{}:{}/{}'.format(MONGO_DB_HOST, MONGO_DB_PORT, 'taskprocessor')
def get_db_engine():
global _mongo_client, _mongo_client_pid
curr_pid = getpid()
if curr_pid != _mongo_client_pid:
_mongo_client = MongoClient(get_mongodb_uri(), connect=False)
_mongo_client_pid = curr_pid
return _mongo_client
def get_db(name):
return get_db_engine()['taskprocessor'][name]
These are my DB models
processor.py
from uuid import uuid4
from taskprocessor.db import get_db
class ProcessorModel():
db = get_db("processors")
def __init__(self, **kwargs):
self.uid = kwargs.get('uid', str(uuid4()))
self.exceptions = kwargs.get('exceptions', [])
self.to_process = kwargs.get('to_process', [])
self.functions = kwargs.get('functions', ["int", "round"])
def save(self):
return self.db.insert_one(self.__dict__).inserted_id is not None
#classmethod
def get(cls, uid):
res = cls.db.find_one(dict(uid=uid))
return ProcessorModel(**res)
result.py
from uuid import uuid4
from taskprocessor.db import get_db
class ResultModel():
db = get_db("results")
def __init__(self, **kwargs):
self.uid = kwargs.get('uid', str(uuid4()))
self.res = kwargs.get('res', dict())
def save(self):
return self.db.insert_one(self.__dict__).inserted_id is not None
And my main.py that gets started as a docker container
to run a forever loop
import os
from time import sleep
from taskprocessor.db.processor import ProcessorModel
from taskprocessor.db.result import ResultModel
from multiprocessing import Pool
class Processor:
def __init__(self):
self.id = os.getenv("PROCESSOR_ID")
self.db_model = ProcessorModel.get(self.id)
self.to_process = self.db_model.to_process # list of floats [1.23, 1.535, 1.33499, 242.2352, 352.232]
self.functions = self.db_model.functions # list i.e ["round", "int"]
def run(self):
while True:
try:
pool = Pool(2)
res = list(pool.map(self.analyse, self.to_process))
print(res)
sleep(100)
except Exception as e:
self.db_model = ProcessorModel.get(os.getenv("PROCESSOR_ID"))
self.db_model.exceptions.append(f"exception {e}")
self.db_model.save()
print("Exception")
def analyse(self, item):
res = {}
for func in self.functions:
if func == "round":
res['round'] = round(item)
if func == "int":
res['int'] = int(item)
ResultModel(res=res).save()
return res
if __name__ == "__main__":
p = Processor()
p.run()
I've tried setting connect=False, or even trying to close the connection after the configuration but then end up with connection closed errors. I also tried using a system of recognizing the PID and giving a different client but that still did not help.
Almost all examples I see are where the DB access is not needed before the multiprocessing fork. In my case the initial configuration is heavy and cannot be efficient to do every single time in the process loop. Furthermore the items to process themselves depends on the data from the DB.
I can live with not being able to save the exceptions to the db object from the main pid.
I'm seeing the error logs around fork safety as well as hitting connection pool paused errors as as symptom of this issue.
If anybody sees this, I was using pymongo 4.0.2 and upgraded to 4.3.3 and not seeing the errors I was previously seeing.
Upon looking up event handler modules, I came across pydispatcher, which seemed beginner friendly. My use case for the library is that I want to send a signal if my queue size is over a threshold. The handler function can then start processing and removing items from the queue (and subsequently do a bulk insert into the database).
I would like the handler function to run in the background. I am aware that I can simply overwrite the queue.append() method checking for the queue size and calling the handler function asynchronously, but I would like to implement the listener-dispatcher model to keep the logic clean and separated.
Does pydispatcher do this out of the box? If not, is there another module that can help me do this? Would I need to manage the access to the queue, since there might be multiple threads processing and appending to the queue at the same time?
Note that in my use case there is only a single dispatcher and event handler.
I've recently released the Akuanduba module, that may help you with this task. There's a single example on the repository that may help you understand how it works and it seems similar to what you want.
Anyway, I'll try to explain here a way of implementing your code with Akuanduba:
First you could make a data frame that would hold your queue:
# Mandatory imports
from Akuanduba.core.messenger.macros import *
from Akuanduba.core.constants import *
from Akuanduba.core import NotSet, AkuandubaDataframe
# Your imports go here:
from queue import Queue
class MyQueue (AkuandubaDataframe):
def __init__(self, name):
# Mandatory stuff
AkuandubaDataframe.__init__(self, name)
self.__queue = Queue ()
def getQueue (self):
return self.__queue
def putQueue (self, val):
self.__queue.put(val)
def getQueueSize (self):
return self.__queue.qsize()
#
# "toRawObj" method is a mandatory method that delivers a dict with the desired data
# for file saving
#
def toRawObj(self):
d = {
"Queue" : self.getQueue(),
}
return d
Then you could make a TriggerCondition that would check the queue size:
from Akuanduba.core import StatusCode, NotSet, StatusTrigger
from Akuanduba.core.messenger.macros import *
from Akuanduba.core import TriggerCondition
import time
class CheckQueueSize (TriggerCondition):
def __init__(self, name, maxSize):
TriggerCondition.__init__(self, name)
self._name = name
self._maxSize = maxSize
def initialize(self):
return StatusCode.SUCCESS
def execute (self):
size = self.getContext().getHandler("MyQueue").getQueueSize()
if (size > SIZE_THRESHOLD):
return StatusTrigger.TRIGGERED
else:
return StatusTrigger.NOT_TRIGGERED
def finalize(self):
return StatusCode.SUCCESS
Make a tool that would be your handler function:
# Mandatory imports
from Akuanduba.core import AkuandubaTool, StatusCode, NotSet, retrieve_kw
# Your imports go here:
class SampleTool(AkuandubaTool):
def __init__(self, name, **kw):
# Mandatory stuff
AkuandubaTool.__init__(self, name)
def initialize(self):
# Lock the initialization. After that, this tool can not be initialized once again
self.init_lock()
return StatusCode.SUCCESS
def execute(self,context):
#
# DO SOMETHING HERE
#
# Always return SUCCESS
return StatusCode.SUCCESS
def finalize(self):
self.fina_lock()
return StatusCode.SUCCESS
And finally, make a main script in order to make it all work together:
# Akuanduba imports
from Akuanduba.core import Akuanduba, LoggingLevel, AkuandubaTrigger
from Akuanduba import ServiceManager, ToolManager, DataframeManager
# This sample's imports
import MyQueue, CheckQueueSize, SampleTool
# Creating your handler
your_handler = SampleTool ("Your Handler's name")
# Creating dataframes
queue = MyQueue ("MyQueue")
# Creating trigger
trigger = AkuandubaTrigger("Sample Trigger Name", triggerType = 'or')
# Append conditions and tools to trigger just adding them
# Tools appended to the trigger will only run when trigger is StatusTrigger.TRIGGERED,
# and will run in the order they've been appended
trigger += CheckQueueSize( "CheckQueueSize condition", MAX_QUEUE_SIZE )
trigger += your_handler
# Creating Akuanduba
manager = Akuanduba("Akuanduba", level=LoggingLevel.INFO)
# Appending tools
#
# ToolManager += TOOL_1
# ToolManager += TOOL_2
#
ToolManager += trigger
# Apprending dataframes
DataframeManager += sampleDataframe
# Initializing
manager.initialize()
manager.execute()
manager.finalize()
That way, you'd have clean and separated code.
I have a service that exposes an API which is then feeding tasks, it is implemented with Falcon (API) and Celery (task management).
Specifically, my workers take long time to load and their code looks something like this
class HeavyOp(celery.Task):
def __init__(self):
self._asset = get_heavy_asset() # <-- takes long time
#property
def asset(self):
return self._asset
#app.task(base=HeavyOp)
def my_task(data):
return my_task.asset.do_something(data)
What actually goes on is that in the __init__ function some object is being read from disk and held in memory for as long as the worker lives.
Sometimes, I want to update that object.
Is there a way to reload the worker, without downtime? As this is all behind an API, I don't wish to have those few minutes of loading the heavy object as downtime.
We can assume the host has more than 1 core, but the solution must be a single host solution.
I don't think you need a custom base task class. What you want to achieve is a single instance asset class which gets loaded after the worker has initialised and you can reload from a task.
This approach works:
# worker.py
import os
import sys
import time
from celery import Celery
from celery.signals import worker_ready
app = Celery(include=('tasks',))
class Asset:
def __init__(self):
self.time = time.time()
class AssetLoader:
__shared_state = {}
def __init__(self):
self.__dict__ = self.__shared_state
if '_value' not in self.__dict__:
self.get_heavy_asset()
def get_heavy_asset(self):
self._value = Asset()
#property
def value(self):
return self._value
#worker_ready.connect
def after_worker_ready(sender, **kwargs):
AssetLoader()
Here, I made AssetLoader a Borg class, but you can choose any other pattern/strategy to share a single instance of Asset. For illustrative purposes, I just capture the timestamp when executing get_heavy_asset.
# tasks.py
from worker import app, AssetLoader
#app.task(bind=True)
def load(self):
AssetLoader().get_heavy_asset()
return AssetLoader().value.time
#app.task(bind=True)
def my_task(self):
return AssetLoader().value.time
Bear in mind that Asset is shared per worker process but not across workers. If you run with concurrency=1, it doesn't make a difference, but for anything else it does. But from what I gather in your use case, it should be fine either way.
traits_pickle_problem.py
from traits.api import HasTraits, List
import cPickle
class Client(HasTraits):
data = List
class Person(object):
def __init__(self):
self.client = Client()
# dynamic handler
self.client.on_trait_event(self.report,'data_items')
def report(self,obj,name,old,new):
print 'client added-- ' , new.added
if __name__ == '__main__':
p = Person()
p.client.data = [1,2,3]
p.client.data.append(10)
cPickle.dump(p,open('testTraits.pkl','wb'))
The above code reports a dynamic trait. Everything works as expected in this code. However, using a new python process and doing the following:
>>> from traits_pickle_problem import Person, Client
>>> p=cPickle.load(open('testTraits.pkl','rb'))
>>> p.client.data.append(1000)
causes no report of the list append. However, re-establishing the listener separately as follows:
>>> p.client.on_trait_event(p.report,'data_items')
>>> p.client.data.append(1000)
client added-- [1000]
makes it work again.
Am I missing something or does the handler need to be re-established in __setstate__ during the unpickling process.
Any help appreciated. This is for Python 2.7 (32-bit) on windows with traits version 4.30.
Running pickletools.dis(cPickle.dumps(p)), you can see the handler object being referenced:
...
213: c GLOBAL 'traits.trait_handlers TraitListObject'
...
But there's no further information on how it should be wired to the report method. So either the trait_handler doesn't pickle itself out properly, or it's an ephemeral thing like a file handle that can't be pickled in the first place.
In either case, your best option is to overload __setstate__ and re-wire the event handler when the object is re-created. It's not ideal, but at least everything is contained within the object.
class Person(object):
def __init__(self):
self.client = Client()
# dynamic handler
self.client.on_trait_event(self.report, 'data_items')
def __setstate__(self, d):
self.client = d['client']
self.client.on_trait_event(self.report, 'data_items')
def report(self, obj, name, old, new):
print 'client added-- ', new.added
Unpickling the file now correctly registers the event handler:
p=cPickle.load(open('testTraits.pkl','rb'))
p.client.data.append(1000)
>>> client added-- [1000]
You might find this talk Alex Gaynor did at PyCon interesting. It goes into the high points of how pickling work under the hood.
EDIT - initial response used on_trait_change - a typo that appears to work. Changed it back to on_trait_event for clarity.
I had the same problem but came around like this: Imaging I want to pickle only parts of a quiet big class and some of the objects has been set so transient=True so they're not pickled because there is nothing important to save, e.g.
class LineSpectrum(HasTraits):
andor_cam = Instance(ANDORiKonM, transient=True)
In difference to objects which should be saved, e.g.
spectrometer = Instance(SomeNiceSpectrometer)
In my LineSpectrum class, I have a
def __init__(self, f):
super(LineSpectrum, self).__init__()
self.load_spectrum(f)
def __setstate__(self, state): # WORKING!
print("LineSpectrum: __setstate__ with super(...) call")
self.__dict__.update(state)
super(LineSpectrum, self).__init__() # this has to be done, otherwise pickled sliders won't work, also first update __dict__!
self.from_pickle = True # is not needed by traits, need it for myself
self.andor_cam = ANDORiKonM(self.filename)
self.load_spectrum(self.filename)
In my case, this works perfectly - all sliders are working, all values set at the time the object has been pickled are set back.
Hope this works for you or anybody who's having the same problem. Got Anaconda Python 2.7.11, all packages updated.
PS: I know the thread is old, but didn't want to open a new one just for this.
I have a file containing some data – data.txt (existing in proper localization). I would like that django app processes this file before starting app and reacts for every change (without restart). What is the best way to do it?
For startup you can write middleware that does what you want in init and afterwards raise django.core.exceptions.MiddlewareNotUsed from the init, so the django will not use it for any request processing. docs
And middleware init will be called at startup, not at the first request.
As for react to file changes you can use https://github.com/gorakhargosh/watchdog ( an example of usage can be found here).
So you can either start it somewhere in middleware too, or if its only db updates you can create a separate script(or django management command) that will be run via supervisor or something like this and will monitor this file and update the db.
An option could be pynotify that monitors the filesystem for changes, it works only on Linux though.
Otherwise look at the code of runserver command that seems to do the same thing (the code of the autoreload module is here).
To run a command before starting an app I suppose you can write some code in the settings module.
maybe you could put a object in the settings which will lookup to the file for every change. ...
ie :
make a class who will load the file and reload this one if he is modified
class ExtraConfigWatcher(object):
def __init__(self, file):
self.file = file
self.cached = dict()
self.last_date_modified = None
def update_config(self):
"""
update the config by reloading the file
"""
if has_been_modified(self.file, self.last_date_modified):
# regenerate the config with te file.
self.cached = get_dict_with_file(self.file)
self.last_date_modified = time.time()
def __getitem__(self, *args, **kwargs):
self.update_config()
return self.cached.__getitem__(*args, **kwargs)
def __setitem__(self, *args, **kwargs):
raise NotImplemented("you can't set config into this")
in settings.py: initialize this object
EXTRA_CONFIG = ExtraConfigWatcher("path/to/the/file.dat")
in myapps/views.py: import settings and use EXTRA_CONFIG
from django.conf import settings
def dosomthing(request):
if settings.EXTRA_CONFIG["the_data_from_the_file"] == "foo":
# bouhh
A while ago I was trying to find a mechanism to "hot-swap" Python modules. While that is not exactly what you need, maybe you can use the implementation I proposed, and monitor your configuration file for modifications and act accordingly.
The code I proposed is the following (I did not use inotify because I am working in an NFS file system):
import imp
import time
import hashlib
import threading
import logging
logger = logging.getLogger("")
class MonitorThread(threading.Thread):
def __init__(self, engine, frequency=1):
super(MonitorThread, self).__init__()
self.engine = engine
self.frequency = frequency
# daemonize the thread so that it ends with the master program
self.daemon = True
def run(self):
while True:
with open(self.engine.source, "rb") as fp:
fingerprint = hashlib.sha1(fp.read()).hexdigest()
if not fingerprint == self.engine.fingerprint:
self.engine.notify(fingerprint)
time.sleep(self.frequency)
class Engine(object):
def __init__(self, source):
# store the path to the engine source
self.source = source
# load the module for the first time and create a fingerprint
# for the file
self.mod = imp.load_source("source", self.source)
with open(self.source, "rb") as fp:
self.fingerprint = hashlib.sha1(fp.read()).hexdigest()
# turn on monitoring thread
monitor = MonitorThread(self)
monitor.start()
def notify(self, fingerprint):
logger.info("received notification of fingerprint change ({0})".\
format(fingerprint))
self.fingerprint = fingerprint
self.mod = imp.load_source("source", self.source)
def __getattr__(self, attr):
return getattr(self.mod, attr)
def main():
logging.basicConfig(level=logging.INFO,
filename="hotswap.log")
engine = Engine("engine.py")
# this silly loop is a sample of how the program can be running in
# one thread and the monitoring is performed in another.
while True:
engine.f1()
engine.f2()
time.sleep(1)
if __name__ == "__main__":
main()