# Airflow DAG syntax
from airflow import DAG
from airflow.operators.python import PythonOperator
from datetime import datetime, timedelta
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2023, 1, 1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
def data_preprocessing(**kwargs):
... # preprocessing logic
def model_training(**kwargs):
... # model training logic
with DAG('ml_pipeline', default_args=default_args, schedule_interval=timedelta(days=1)) as dag:
preprocess_task = PythonOperator(
task_id='preprocess_data',
python_callable=data_preprocessing,
)
train_model_task = PythonOperator(
task_id='train_model',
python_callable=model_training,
)
preprocess_task >> train_model_task