I am trying to cache the result of a time consuming request.
First I have a flask template as follow :
#app.route("/")
#app.route("/tabs", methods=['GET','POST'])
def tab():
return render_template("tabs.html")
#app.route("/graph", methods=['GET','POST'])
def graph():
#Some code
return render_template("chart.html", the_div=div, the_script=script,
form=form, tables=table, titles = testyear)
#app.route("/prices", methods=['GET','POST'])
def prices():
#Some other code
return render_template("prices.html", PlotGroup=PlotGroup,
ScriptGroup=ScriptGroup, DivGroup=DivGroup)
I have at the top of my code initialized the app, the cache and a time_out :
# Checking is prod to change server from 5000 to 5001
IS_PROD = sys.argv[1] == "prod"
# Setting up cache timer
CACHE_TIMEOUT = 20
# Defining the Flask App
app = Flask(__name__, template_folder='Template')
# define the cache config :
app.config['CACHE_TYPE'] = 'simple'
app.cache = Cache(app)
I have also created a config class :
class Config(object):
JOBS = [
{
'id' : 'refresh_cache',
'func' : 'main:get_my_cache',
'trigger' : 'interval',
'seconds' : 5
}
]
SCHEDULER_API_ENABLED = True
With a function "get_my_cache()" defined as below :
#app.cache.cached(timeout = CACHE_TIMEOUT, key_prefix='my-cache')
def get_my_cache():
cacheval = app.cache.get('my-cache')
print(cacheval)
if cacheval is None:
#cacheval1, cacheval2 = DataHandling.extract_full_table()
cacheval1, cacheval2 = DataHandling.offlinedata()
cacheval = [cacheval1, cacheval2]
print("Cache updated at : " + time.strftime("%b %d %Y - %H:%M:%S"))
app.cache.set('my-cache', [cacheval1, cacheval2])
return cacheval[0], cacheval[1]
In the main section I load everything :
if __name__ == '__main__':
app.config.from_object(Config())
scheduler = APScheduler()
scheduler.init_app(app)
scheduler.start()
if IS_PROD:
app.run(host='0.0.0.0',debug=False, port=5000)
else:
app.run(debug=True, port=5001)
So if I understand well from the timeline below :
None
Cache updated at : Jun 19 2017 - 11:25:58
None
Cache updated at : Jun 19 2017 - 11:26:23
None
Cache updated at : Jun 19 2017 - 11:26:25
127.0.0.1 - - [19/Jun/2017 11:26:25] "GET /graph HTTP/1.1" 200 -
My scheduler is checking my cache every 5 seconds (timing is for testing it will be longer in reality) and I see effectively a cache update every 25 seconds.
What is my problem is that when I refresh the page, I see a cache update after 2 sec of the last update... From my understanding it seems that there are two kind of cache : one for the page (localhost/graph) and another set up by the scheduler. Even if both are related with the same key_prefix...
I understood that the could be related to different threads? Could it be the issue?
def task1(app):
with app.app_context():
#cache.set("t1","123")
x=cache.get("t1")
print(x)
class Config(object):
JOBS = [ { # add task1
'id': 'job1',
'func': '__main__:task1',
'args': (3, 4,app),
'trigger': 'interval',
'seconds': 5,
}]
Related
I am new to dagster and I'm trying to pass a resource to a dagster op through the job that it's called from I am having issue even after following the docs, I am not sure if I need to pass config again to jobs nothing seems to work. Here is the code.
Error dagster.core.errors.DagsterInvalidConfigError: Error in config for job Error 1: Missing required config entry "resources" at the root.
import os
from dotenv import load_dotenv
load_dotenv()
#op
def return_one(context):
context.log.info(f'return_one {os.environ.get("BUCKET")}')
return 1
#op(required_resource_keys={"boto3_connection"})
def add_two(context, i: int):
context.log.info(f'##### {context.resources.boto3_connection.get_client()}')
return i + 2
#op
def multi_three(i: int):
return i * 3
class Boto3Connector(object):
def __init__(self, aws_access_key_id, aws_secret_access_key):
self.aws_access_key_id = aws_access_key_id
self.aws_secret_access_key = aws_secret_access_key
def get_client(self, resource="s3"):
session = boto3.session.Session()
session_client = session.client(
service_name=resource,
aws_access_key_id=self.aws_access_key_id,
aws_secret_access_key=self.aws_secret_access_key,
)
return session_client
#resource(
config_schema={
'aws_access_key_id': StringSource,
'aws_secret_access_key': StringSource
})
def boto3_connection(context):
return Boto3Connector(
context.resource_config['aws_access_key_id'],
context.resource_config['aws_secret_access_key']
)
#job(resource_defs={'boto3_connection': boto3_connection})
def my_job():
multi_three(add_two(return_one()))```
My problem was assuming resources config are passed down automatically but you have to specify them on your jobs config. So just added the config.
#job(resource_defs={'boto3_connection': boto3_connection},
config={'resources':
{ "boto3_connection": {
"config": {
"aws_access_key_id": {"env": "AWS_ACCESS_KEY_ID"},
"aws_secret_access_key": {"env": "AWS_SECRET_ACCESS_KEY"},
}
}}})
def my_job():
multi_three(add_two(return_one()))`
Was pointed in the right direction on the dagster slack
I have setup job as below:
In settings.py:
CRON_CLASSES = [
"kiteconnect_source.source.cron.MyCronJob",
]
CRONJOBS = [
('*/1 * * * *','kiteconnect_source.source.cron.MyCronJob')
]
In Cron job python file:
from django_cron import CronJobBase, Schedule
import datetime
class MyCronJob(CronJobBase):
ALLOW_PARALLEL_RUNS = False
RUN_EVERY_MINS = 1
schedule = Schedule(run_every_mins=RUN_EVERY_MINS)
code = 'kiteconnect_source.my_cron_job' # a unique code
def do(self):
# time = datetime.datetime.now()
val = "This is cron job function new testing:"
f = open('/dummy.txt','a')
f.write(val)
f.close()
print("demo")
Issue is it's getting executed only once instead of every interval of one minute.
Can anyone explain what I've missed? Thanks.
I am facing a heroku 30s time out error with my django webapp.
The reason it is taking long is because of the views variables created (reddit api onto context)
Here is the code for the same.
def home(request):
reddit = praw.Reddit(client_id='myclientid', client_secret='mysecretcode',
user_agent='user agent name')
hot_posts = reddit.subreddit('AskReddit').top(time_filter="day", limit=7)
x = []
y = []
for post in hot_posts:
x.append(post.title)
y.append(post.url)
print(x)
print(y)
z = []
for url in y:
comments = []
submission = reddit.submission(url=url)
submission.comments.replace_more(limit=0)
for count in range(10):
comments.append(submission.comments[count].body)
z.append(comments)
top_EarthPorn = reddit.subreddit('EarthPorn').top(limit=100)
EarthPorn_links = []
for post in top_EarthPorn:
EarthPorn_links.append(post.url)
request.session['EarthPorn_links'] = EarthPorn_links
return render(request, template_name='base.html', context=context)
How do i make sure the context dict data is being created every hour or so as a background process? which libraries can one use to achieve so
I think this should work:
Put this at the end of your settings.py file:
SESSION_EXPIRE_SECONDS = 1500 # 1500 seconds = 25 minutes
So the session will expire after 25 minutes.
I have made a REST service in Tornado. I have tried a GET with JSON arguments and all works fine. But when I try with parameters in urls, I receive with postman a "socket hang up" error.
This is the url sent
http://127.0.0.1:8080/scenarios/asyncexec?project_name=LBP22&scenario_name=6a27351e-e51f-4349-89d8-a3e326a5bd12
and the handler for GET
def get(self):
# GET function for checking the status of execution
project_name = self.get_argument('project_name')
scenario_name = self.get_argument('scenario_name')
Loggers.access.info("Polling for exec status")
running = False
save_exec_element = None
for exec_element in strategy_lab_config.Scenarios.Execute.exec_list:
if exec_element[1] == project_name and \
exec_element[2] == scenario_name:
exec_future = exec_element[0]
if exec_future.running():
self._generate_output_json_from_dict({"execution_status": "RET_OK_PROCESSING"})
running = True
break
elif exec_future.done():
save_exec_element = exec_element
try:
output = exec_future.result()
scenario = {
'project_name': project_name,
'scenario_name': scenario_name,
"execution_status": 'RET_OK_DONE',
"output": output
}
self._generate_output_json_from_dict(scenario)
break
except Exception as exec_exc:
scenario = {
'project_name': project_name,
'scenario_name': scenario_name,
"execution_status": 'RET_ERR_FAIL',
"error_message": str(exec_exc),
"traceback": "".join(traceback.TracebackException.from_exception(exec_exc).format())
}
self._generate_output_json_from_dict(scenario)
break
else:
self._generate_output_json_from_dict({"execution_status": "RET_ERR_NOT_EXIST"})
return
Note that the previous version was with JSON and it all worked fine.
Here I have the handlers definitions
class Application(tornado.web.Application):
def __init__(self):
handlers = [
("/datasets/add", DatasetAdd),
("/projects/create", ProjectCreate),
("/projects/delete", ProjectDelete),
("/scenarios/execute", ScenarioExecute),
("/scenarios/asyncexec", AsyncScenarioExecute),
("/scenarios/tune", ScenarioTune),
("/scenarios/whatif", ScenarioWhatIfAnalysis)
]
tornado.web.Application.__init__(self, handlers, debug=True)
pass
There was a fatal error on the prepare() function of RequestHandler. So the server started correctly, but without receiving POST.
I'm trying to run a unit test on a celery task that I have set to run daily.
I have tried importing the function and calling it in my test but this doesn't work.
The task is:
#shared_task
def create_a_notification_if_a_product_is_in_or_out_of_season():
"""
Send a notification if a product is now in or out of season
"""
julian_date = date.today().timetuple().tm_yday + 1
active_products = Product.objects.filter(status='ACTIVE')
for products in active_products:
in_season_prd = ProductDescription.objects.filter(
product=products,
early_start_julian=julian_date
)
for prd in in_season_prd:
notification = Notification()
notification.type = notification_choices.PRODUCT_IN_SEASON
notification.description = str(prd.product.name) + " will be in season from tomorrow."
notification.save()
and here is an example of one of my tests:
def test_when_product_is_about_to_come_in_to_seasonality(self):
"""
Make a notification when a product is due to come in to seasonality tomorrow
"""
p = Product.objects.first()
p.status = "ACTIVE"
today = date.today().timetuple().tm_yday
p.early_start_julian = today + 1
create_a_notification_if_a_product_is_in_or_out_of_season()
updated_notifications = Notification.objects.all().count()
self.assertNotEqual(self.current_notifications, updated_notifications)
Any help would be appreciated!
Thanks
You can apply() your celery task to execute it synchronously:
def test_when_product_is_about_to_come_in_to_seasonality(self):
"""
Make a notification when a product is due to come in to seasonality tomorrow
"""
p = Product.objects.first()
p.status = "ACTIVE"
today = date.today().timetuple().tm_yday
p.early_start_julian = today + 1
create_a_notification_if_a_product_is_in_or_out_of_season.apply()
updated_notifications = Notification.objects.all().count()
self.assertNotEqual(self.current_notifications, updated_notifications)
I think you're looking for CELERY_ALWAYS_EAGER setting. If set to True it will run your tasks synchronously. You can set it in your test settings or you can decorate only that test with #override_settings(CELERY_ALWAYS_EAGER=True)