i wanted to write a code that executes a function at a specific time but...
it keeps running the code immediately i tried 6 different ways to code it but they all failed
i will past the code below
first i tried
import schedule
import time
import m5
def run_f5():
m5.runF5()
schedule.every().day.at("02:05").do(run_f5)
while True:
schedule.run_pending()
time.sleep(1)
#this code was supposed to run 5 mins from now but it ran immediately
then i tried
import datetime
import time
import m5
while True:
current_time = datetime.datetime.now()
if current_time.hour == 2 and current_time.minute == 12:
m5.runF5()
time.sleep(60)
#this code was supposed to run 5 mins from now but it ran immediately
then i tried
import datetime
import time
import m5
while True:
current_time = datetime.datetime.now()
if current_time.hour == 2 and current_time.minute == 24:
m5.runF5()
time.sleep(60)import datetime
import time
import m5
while True:
current_time = datetime.datetime.now()
if current_time.hour == 2 and current_time.minute == 24:
m5.runF5()
time.sleep(60)
same result the code ran immediately then i tried
import datetime
import time
import m5
# Get the current time
current_time = datetime.datetime.now()
# Create a datetime object for the desired time
desired_time = datetime.datetime(current_time.year, current_time.month, current_time.day, 2, 59)
# Check if the desired time has passed for today
if desired_time < current_time:
# If it has, set the desired time to tomorrow
desired_time += datetime.timedelta(days=1)
# Calculate the time difference between the current time and the desired time
time_diff = desired_time - current_time
# Sleep for the time difference
time.sleep(time_diff.total_seconds())
#Run your function
m5.runF5()
same the code ran immediately then i tried
import datetime
import time
import m5
while True:
current_time = datetime.datetime.now()
if current_time.hour == 3 and current_time.minute == 30:
m5.runF5()
break
time.sleep(60)
i tried the followed 3 codes but failed each time
import datetime
import time
import m5
current_time = datetime.datetime.now()
# Create a datetime object for the desired time
desired_time = datetime.datetime(current_time.year, current_time.month, current_time.day, 3, 40)
# Check if the desired time has passed for today
if desired_time < current_time:
# If it has, set the desired time to tomorrow
desired_time += datetime.timedelta(days=1)
# Calculate the time difference between the current time and the desired time
time_diff = desired_time - current_time
# Sleep for the time difference
time.sleep(int(time_diff.total_seconds()))
#Run your function
m5.runF5()
then
import datetime
import time
import m5
# Create a datetime object for the desired time
desired_time = datetime.datetime(current_time.year, current_time.month, current_time.day, 3, 56)
while True:
current_time = datetime.datetime.now()
if current_time >= desired_time:
break
time.sleep(1)
# Run your function
m5.runF5()
import schedule
import time
import m5
def run_at_specific_time():
m5.runF5()
# Schedule the function to run every day at 04:10 AM
schedule.every().day.at("12:00").do(run_at_specific_time)
while True:
schedule.run_pending()
time.sleep(1)
plz help i have ran out of ideas and have no clue why any of the above run at once and not at the specific time
i am importing the code from a separate file maybe that is causing some issues
So I made an Apache Airflow system in a Docker and so far it works perfectly well, with one problem, that persists through all dags: they activate on the previous iteration, not the current one.
For example, if I make a DAG that activates every minute, when it is 15:08, it will activate the DAG for 15:07. And if I make a DAG that activates every year, when it is 2023, it will activate the DAG for 2022, but not the current year.
Is there any way to fix this? Or is it supposed to be that way, and I should just account for this?
Here is the code for some of my dags as an example:
from datetime import datetime
from airflow import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator
import logging
import random
import pandas as pd
import sqlalchemy
from airflow.utils.log.logging_mixin import LoggingMixin
from dateutil.relativedelta import relativedelta
import requests
from datetime import datetime
def test_print(ds, foo, **kwargs):
start_date = str(ds)
end_date = str((datetime.strptime(ds, '%Y-%m-%d') + relativedelta(years=1)).date())
print('HOLIDAYS:')
print('--------------')
print('START DATE:' + start_date)
print('END DATE:' + end_date)
print('--------------')
now = ds
data2send = {'the_date_n_hour': now}
r = requests.post("http://[BACKEND SERVER]:8199/do_work/",json=data2send)
print(r.text)
assert now in r.text
task_logger = logging.getLogger('airflow.task')
task_logger.warning(r.text)
return 'ok'
dag = DAG('test_test', description='test DAG',
schedule_interval='*/1 * * * *',
start_date=datetime(2017, 3, 20), catchup=False)
test_operator = PythonOperator(task_id='test_task',
python_callable=test_print,
dag=dag,
provide_context = True,
op_kwargs={'foo': 'bar'})
test_operator
from __future__ import print_function
import time
from builtins import range
from pprint import pprint
import airflow
from airflow.models import DAG
from airflow.operators.python_operator import PythonOperator
import sqlalchemy
import pandas as pd
import datetime
import requests
from dateutil.relativedelta import relativedelta
args = {
'owner': 'airflow',
"depends_on_past": False,
"retries": 12,
"retry_delay": datetime.timedelta(minutes=60)}
dag = DAG(
dag_id='dag_holidays',
default_args=args,
schedule_interval='0 12 1 1 *',
start_date=datetime.datetime(2013, 1, 1),
catchup=True)
def get_holidays(ds, gtp_id, **kwargs):
"""Wait a bit so that SQL isn't overwhelmed"""
holi_start_date = str(ds)
holi_end_date = str((datetime.strptime(ds, '%Y-%m-%d') + relativedelta(years=1)).date())
print('HOLIDAYS:')
print('--------------')
print('GTP ID: {}'.format(str(gtp_id)))
print('START DATE:' + holi_start_date)
print('END DATE:' + holi_end_date)
print('--------------')
r = requests.post("http://[BACKEND SERVER]/load_holidays/",data={'gtp_id': gtp_id, 'start_date': holi_start_date, 'end_date': holi_end_date})
if 'Error' in r.text:
raise Exception(r.text)
else:
return r.text
return ds
engine = sqlalchemy.create_engine('[SQL SERVER]')
query_string1 = f""" select gtp_id from gtps"""
all_ids = list(pd.read_sql_query(query_string1,engine).gtp_id)
for i, gtp_id in enumerate(all_ids):
task = PythonOperator(
task_id='holidays_' + str(gtp_id),
python_callable=get_holidays,
provide_context = True,
op_kwargs={'gtp_id': gtp_id},
dag=dag,
)
task
Yes, this is supposed to be this way and it can definitely be a bit confusing at first.
The reason for this behavior is that Airflow was used for a lot of ETL type processing when it was built and with that pattern you are running your DAG on the data of the previous interval.
For example when your data processing DAG runs every day at 3am, the data it processes is the data what was collected since 3am the previous day.
This period is called the Data Interval in Airflow terms.
The start of the data interval is the Logical Date (in earlier versions called execution date), which is what is incorporated into the Run ID. I think this is what you are seeing as the previous iteration.
The end of the data interval is the Run After date, this is when the DAG actually will be scheduled to run.
When you hover over the Next Run: field in the Airflow UI for a given DAG you will see all of those dates and timestamps for the next run of a specific DAG.
This guide on scheduling DAGs might be helpful as a reference and it has some examples.
Disclaimer: I work for Astronomer, the company behind the guide I linked. :)
Here is the code (only this):
import pytz
from time import sleep
from datetime import datetime
dt_format = "%H:%M"
tz = pytz.timezone('Asia/Riyadh')
jt = datetime.now(tz)
time_now = (jt.strftime(dt_format))
time = time_now.replace(":","")
timed1 = (int("1530")) #the time in 24h format
while True:
#print('azan on')
if timed1 == time_now:
print(time_now)
print(timed1)
print ("its the time")
sleep (90)
I tried to keep the format normal (15:30) but still the same.
(replace) not required you can delete if so.
You just have to update the time and put it in the loop and it will work , thanks to #MatsLindh (check comments)
I have this code right here that I'm making a report, and I'm trying to work with the date but I cant cause pycharm says it cant work with "series" format, I`m trying to convert it to simple datetime but nothing works, can u guys help me?
the "DATA" is coming with the format of "datetime n 64" and I need it to be normal datetime, how can I do this?
import pyodbc
import pandas as pd
import matplotlib.pyplot as plt
import datetime
class generate_report():
def __init__(self):
self.csv = "output.csv"
self.sql_conn = pyodbc.connect('Trusted_Connection=yes', driver = '{SQL Server}',
server = 'localhost', database = 'MPWJ_BI')
self.query = "select * from CTP_EXTRATO_GERAL where HISTORICO = 'Aplicação' order by data"
self.df = pd.read_sql(self.query, self.sql_conn)
self.df['DATA'] = pd.to_datetime(self.df['DATA'])
self.df.to_csv(self.csv)
def analyze_data(self):
pd.read_csv(self.csv)
print(self.df.dtypes)
It depends on how your date looks like
for example
from datetime import datetime
datetime_object = datetime.strptime('Jun 1 2020 7:31PM', '%b %d %Y %I:%M%p')
documentation
https://docs.python.org/3/library/datetime.html#datetime.datetime.strptime
EDIT:
To convert from datetime64 to datetime you can do the following:
import datetime
import numpy as np
# Current time UTC
dt = datetime.datetime.utcnow()
# Convert to datetime64
dt64 = np.datetime64(dt)
# convert to epoch
ts = (dt64 - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')
# Convert to datetime
print(datetime.datetime.fromtimestamp(ts))
I ran when it was noon in turkey..this is what I got:
2017-12-22 20:11:46.038218+03:00
import pytz
from pytz import timezone
from datetime import datetime
utc_now = datetime.now()
utc = pytz.timezone('UTC')
aware_date = utc.localize(utc_now)
turkey = timezone('Europe/Istanbul')
now_turkey = aware_date.astimezone(turkey)
Why did I get 20:11:46?
Because the base time is wrong, just change utc_now = datetime.now() to utc_now = datetime.utcnow() and then it works.
As #RemcoGerlich has said, you should use utcnow to get UTC.
Whole code:
import pytz
from pytz import timezone
from datetime import datetime
utc_now = datetime.utcnow()
utc = pytz.timezone('UTC')
aware_date = utc.localize(utc_now)
turkey = timezone('Europe/Istanbul')
now_turkey = aware_date.astimezone(turkey)