diff --git a/encrypted_file/eugene_etl_pipeline.tar_encrypted.tar.gz b/encrypted_file/eugene_etl_pipeline.tar_encrypted.tar.gz index acbe27b..ea828a0 100644 Binary files a/encrypted_file/eugene_etl_pipeline.tar_encrypted.tar.gz and b/encrypted_file/eugene_etl_pipeline.tar_encrypted.tar.gz differ diff --git a/eugene/eugene_etl_pipeline.tar.gz b/eugene/eugene_etl_pipeline.tar.gz deleted file mode 100644 index 940ffcc..0000000 Binary files a/eugene/eugene_etl_pipeline.tar.gz and /dev/null differ diff --git a/eugene/project_solution.py b/eugene/project_solution.py deleted file mode 100644 index 0ec2a39..0000000 --- a/eugene/project_solution.py +++ /dev/null @@ -1,50 +0,0 @@ -from airflow import DAG -from airflow.utils.dates import days_ago -from airflow.providers.apache.spark.operators.spark_submit import SparkSubmitOperator -from datetime import datetime, timedelta - -# Налаштування за замовчуванням для DAG -default_args = { - 'owner': 'eugene', - 'start_date': datetime(2024, 8, 4), -} - -# Створення DAG -with DAG( - dag_id='multi_hop_datalake_eugene', - default_args=default_args, - schedule_interval=None, - catchup=False, # - tags=["eugene"], - description='Pipeline for multi-hop datalake: landing → bronze → silver → gold', - ) as dag: - - # Landing to bronze - landing_to_bronze_all = SparkSubmitOperator( - application='./landing_to_bronze.py', # Шлях до скрипту - task_id='landing_to_bronze_all_tables', # Унікальний ідентифікатор завдання - conn_id='spark-default', # Ідентифікатор з'єднання Spark - verbose=1, # Рівень деталізації логів - dag=dag - ) - - # Bronze to silver - bronze_to_silver_all = SparkSubmitOperator( - application='./bronze_to_silver.py', - task_id='bronze_to_silver_all_tables', - conn_id='spark-default', - verbose=1, - dag=dag - ) - - # Silver to gold (агрегація) - silver_to_gold_avg_stats = SparkSubmitOperator( - application='./silver_to_gold.py', - task_id='silver_to_gold_avg_stats', - conn_id='spark-default', - verbose=1, - dag=dag - ) - - # Визначення послідовності виконання завдань - landing_to_bronze_all >> bronze_to_silver_all >> silver_to_gold_avg_stats \ No newline at end of file diff --git a/multi_hop_datalake_eugene/dags/project_solution.py b/multi_hop_datalake_eugene/dags/project_solution.py new file mode 100644 index 0000000..ea262ef --- /dev/null +++ b/multi_hop_datalake_eugene/dags/project_solution.py @@ -0,0 +1,54 @@ +from airflow import DAG +from airflow.utils.dates import days_ago +from airflow.providers.apache.spark.operators.spark_submit import SparkSubmitOperator +from datetime import datetime +import os + +# Налаштування за замовчуванням +default_args = { + 'owner': 'eugene', + 'start_date': datetime(2024, 8, 4), +} + +# 🔍 Шлях до скриптів +SCRIPTS_DIR = "/root/airflow-docker/dags/scripts" + +# Створення DAG +with DAG( + dag_id='multi_hop_datalake_eugene', + default_args=default_args, + schedule_interval=None, + catchup=False, + tags=["eugene"], + description='Pipeline for multi-hop datalake: landing → bronze → silver → gold', +) as dag: + + # Landing to bronze + landing_to_bronze_all = SparkSubmitOperator( + application=f"{SCRIPTS_DIR}/landing_to_bronze.py", + task_id='landing_to_bronze_all_tables', + conn_id='spark-default', + verbose=1, + dag=dag + ) + + # Bronze to silver + bronze_to_silver_all = SparkSubmitOperator( + application=f"{SCRIPTS_DIR}/bronze_to_silver.py", + task_id='bronze_to_silver_all_tables', + conn_id='spark-default', + verbose=1, + dag=dag + ) + + # Silver to gold (агрегація) + silver_to_gold_avg_stats = SparkSubmitOperator( + application=f"{SCRIPTS_DIR}/silver_to_gold.py", + task_id='silver_to_gold_avg_stats', + conn_id='spark-default', + verbose=1, + dag=dag + ) + + # Послідовність виконання + landing_to_bronze_all >> bronze_to_silver_all >> silver_to_gold_avg_stats diff --git a/multi_hop_datalake_eugene/eugene_etl_pipeline.tar.gz b/multi_hop_datalake_eugene/eugene_etl_pipeline.tar.gz new file mode 100644 index 0000000..9cddefe Binary files /dev/null and b/multi_hop_datalake_eugene/eugene_etl_pipeline.tar.gz differ diff --git a/eugene/bronze_to_silver.py b/multi_hop_datalake_eugene/scripts/bronze_to_silver.py similarity index 100% rename from eugene/bronze_to_silver.py rename to multi_hop_datalake_eugene/scripts/bronze_to_silver.py diff --git a/eugene/landing_to_bronze.py b/multi_hop_datalake_eugene/scripts/landing_to_bronze.py similarity index 100% rename from eugene/landing_to_bronze.py rename to multi_hop_datalake_eugene/scripts/landing_to_bronze.py diff --git a/eugene/silver_to_gold.py b/multi_hop_datalake_eugene/scripts/silver_to_gold.py similarity index 100% rename from eugene/silver_to_gold.py rename to multi_hop_datalake_eugene/scripts/silver_to_gold.py