├── README.md ├── python ├── articles │ ├── rest_api__to_mysql │ │ ├── .gitignore │ │ ├── README.md │ │ └── demo.py │ └── web_scraping_with_data_contracts │ │ ├── .gitignore │ │ ├── check_robots │ │ ├── data │ │ └── transformed_data.csv │ │ ├── drivers │ │ └── chromedriver.exe │ │ ├── requirements.txt │ │ ├── src │ │ ├── __init__.py │ │ ├── etl_pipeline.py │ │ ├── extraction │ │ │ ├── __init__.py │ │ │ ├── alter_col_type.py │ │ │ ├── main.py │ │ │ └── scraping_bot.py │ │ ├── loading │ │ │ ├── __init__.py │ │ │ └── s3_uploader.py │ │ ├── transformation │ │ │ ├── __init__.py │ │ │ ├── add_columns.py │ │ │ ├── main.py │ │ │ └── transformations.py │ │ └── web_checker │ │ │ ├── __init__.py │ │ │ └── robots_txt_checker.py │ │ ├── tests │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-310.pyc │ │ ├── data_contracts │ │ │ ├── extraction_data_contract.yml │ │ │ └── transformation_data_contract.yml │ │ └── data_quality_checks │ │ │ ├── __pycache__ │ │ │ ├── scan_extraction_data_contract.cpython-310.pyc │ │ │ └── scan_transformation_data_contract.cpython-310.pyc │ │ │ ├── scan_extraction_data_contract.py │ │ │ └── scan_transformation_data_contract.py │ │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── aws_utils.cpython-310.pyc │ │ └── db_utils.cpython-310.pyc │ │ ├── aws_utils.py │ │ └── db_utils.py ├── long_vids │ └── README.md └── short_vids │ ├── airflow │ └── demo_1 │ │ ├── .env │ │ ├── dags │ │ ├── __pycache__ │ │ │ ├── dummy_dag_1.cpython-38.pyc │ │ │ ├── dummy_dag_2.cpython-38.pyc │ │ │ ├── quick_dag.cpython-38.pyc │ │ │ ├── test_dag_1.cpython-38.pyc │ │ │ ├── test_dag_2.cpython-38.pyc │ │ │ └── test_dag_3.cpython-38.pyc │ │ ├── airflow.cfg │ │ └── demo_dag.py │ │ ├── demo.py │ │ ├── docker-compose.yaml │ │ └── logs │ │ ├── dag_id=message_display_dag │ │ ├── run_id=manual__2024-02-01T005613.905830+0000 │ │ │ ├── task_id=display_first_message │ │ │ │ └── attempt=1.log │ │ │ ├── task_id=display_second_message │ │ │ │ └── attempt=1.log │ │ │ └── task_id=display_third_message │ │ │ │ └── attempt=1.log │ │ ├── run_id=manual__2024-02-01T125229.953764+0000 │ │ │ ├── task_id=display_first_message │ │ │ │ └── attempt=1.log │ │ │ ├── task_id=display_second_message │ │ │ │ └── attempt=1.log │ │ │ └── task_id=display_third_message │ │ │ │ └── attempt=1.log │ │ ├── run_id=manual__2024-02-01T125548.893644+0000 │ │ │ ├── task_id=display_first_message │ │ │ │ └── attempt=1.log │ │ │ ├── task_id=display_second_message │ │ │ │ └── attempt=1.log │ │ │ └── task_id=display_third_message │ │ │ │ └── attempt=1.log │ │ └── run_id=scheduled__2024-02-01T134800+0000 │ │ │ ├── task_id=display_first_message │ │ │ └── attempt=1.log │ │ │ ├── task_id=display_second_message │ │ │ └── attempt=1.log │ │ │ └── task_id=display_third_message │ │ │ └── attempt=1.log │ │ ├── dag_processor_manager │ │ └── dag_processor_manager.log │ │ └── scheduler │ │ ├── 2024-01-31 │ │ ├── native_dags │ │ │ └── example_dags │ │ │ │ ├── example_bash_operator.py.log │ │ │ │ ├── example_branch_datetime_operator.py.log │ │ │ │ ├── example_branch_day_of_week_operator.py.log │ │ │ │ ├── example_branch_labels.py.log │ │ │ │ ├── example_branch_operator.py.log │ │ │ │ ├── example_branch_operator_decorator.py.log │ │ │ │ ├── example_branch_python_dop_operator_3.py.log │ │ │ │ ├── example_complex.py.log │ │ │ │ ├── example_dag_decorator.py.log │ │ │ │ ├── example_datasets.py.log │ │ │ │ ├── example_dynamic_task_mapping.py.log │ │ │ │ ├── example_dynamic_task_mapping_with_no_taskflow_operators.py.log │ │ │ │ ├── example_external_task_marker_dag.py.log │ │ │ │ ├── example_kubernetes_executor.py.log │ │ │ │ ├── example_latest_only.py.log │ │ │ │ ├── example_latest_only_with_trigger.py.log │ │ │ │ ├── example_local_kubernetes_executor.py.log │ │ │ │ ├── example_nested_branch_dag.py.log │ │ │ │ ├── example_params_trigger_ui.py.log │ │ │ │ ├── example_params_ui_tutorial.py.log │ │ │ │ ├── example_passing_params_via_test_command.py.log │ │ │ │ ├── example_python_decorator.py.log │ │ │ │ ├── example_python_operator.py.log │ │ │ │ ├── example_sensor_decorator.py.log │ │ │ │ ├── example_sensors.py.log │ │ │ │ ├── example_setup_teardown.py.log │ │ │ │ ├── example_setup_teardown_taskflow.py.log │ │ │ │ ├── example_short_circuit_decorator.py.log │ │ │ │ ├── example_short_circuit_operator.py.log │ │ │ │ ├── example_skip_dag.py.log │ │ │ │ ├── example_sla_dag.py.log │ │ │ │ ├── example_subdag_operator.py.log │ │ │ │ ├── example_task_group.py.log │ │ │ │ ├── example_task_group_decorator.py.log │ │ │ │ ├── example_time_delta_sensor_async.py.log │ │ │ │ ├── example_trigger_controller_dag.py.log │ │ │ │ ├── example_trigger_target_dag.py.log │ │ │ │ ├── example_xcom.py.log │ │ │ │ ├── example_xcomargs.py.log │ │ │ │ ├── plugins │ │ │ │ ├── event_listener.py.log │ │ │ │ ├── listener_plugin.py.log │ │ │ │ └── workday.py.log │ │ │ │ ├── subdags │ │ │ │ └── subdag.py.log │ │ │ │ ├── tutorial.py.log │ │ │ │ ├── tutorial_dag.py.log │ │ │ │ ├── tutorial_objectstorage.py.log │ │ │ │ ├── tutorial_taskflow_api.py.log │ │ │ │ └── tutorial_taskflow_api_virtualenv.py.log │ │ └── quick_dag.py.log │ │ └── 2024-02-01 │ │ ├── dummy_dag_1.py.log │ │ ├── dummy_dag_2.py.log │ │ ├── quick_dag.py.log │ │ ├── test_dag_1.py.log │ │ ├── test_dag_2.py.log │ │ └── test_dag_3.py.log │ ├── deepface │ ├── README.md │ ├── detect_faces.ipynb │ └── photos │ │ ├── image_1.jpg │ │ ├── image_2.jpg │ │ ├── image_3.jpg │ │ ├── image_4.jpg │ │ └── man_utd_team.jpg │ ├── great_expectations │ ├── ge_script_1.py │ └── ge_script_2.py │ ├── pytest │ └── test_data_quality.py │ └── soda │ ├── checks.yml │ ├── configuration.yml │ ├── data_contract.yml │ ├── scan_checks.py │ └── scan_data_contract.py └── sql └── test.md /README.md: -------------------------------------------------------------------------------- 1 | # Code Examples Library 📚 2 | 3 | 4 | ## Why does this repo exist? 🤔 5 | 6 | 7 | For the past couple of weeks (from Dec 2023-Jan 2024), my DMs have been flooded with requests for the scripts I made for the YouTube and TikTok shorts. I tried doing this one by one, but realised quickly this isn't sustainable... 8 | 9 | So...the easiest solution I could come up with was to just make it publicly available for everyone! 10 | 11 | Moving forward, I'll be storing the code snippets I use in my online content so you can replicate the examples in your own environments or use it as a learning resource in a way that makes sense for you! 12 | 13 | 14 | ## Who can use it? 🌍 15 | 16 | Anyone can use the code for any purposes, and it's designed for beginners and advanced developers alike. 17 | 18 | I try to write the code in a way that's easy for myself to read and follow without compromising on best practices, and hopefully the same applies to you. 19 | 20 | 21 | ## Can I copy and paste your code for my project...? 📋 22 | 23 | Yeah of course! 24 | 25 | Feel free to use it as you need. 26 | 27 | 28 | ## How's the repo structured...? 📂 29 | 30 | At the moment, the code examples are split into these directories: 31 | 32 | - `/python` - for Python-related tutorials and examples. 33 | - `/sql` - for SQL-related tutorials and examples. 34 | 35 | 36 | ## Connect with Me 🤝 37 | 38 | If you have any questions, suggestions, or just want to connect, find me on: 39 | 40 | - [**LinkedIn**](https://www.linkedin.com/in/stephen-david-williams-860428123/) 41 | - [**TikTok**](https://www.tiktok.com/@sdw.online) 42 | - [**YouTube**](https://www.youtube.com/@sdw-online/) 43 | - [**Twitter/X**](https://www.twitter.com/sdw_online) 44 | - [**Medium**](https://www.medium.com/@sdw-online) 45 | - [**Instagram**](https://instagram.com/sdw.online) 46 | - [**Newsletter**](http://aianddatainsights.beehiiv.com/) 47 | - [**Hashnode/Blog**](http://www.stephendavidwilliams.com) 48 | 49 | I try to read and respond to as many messages and reach-outs as I can, so don't be offended/upset if I don't get back to you! 🙏 50 | 51 | 52 | ## Send your ideas 💡 53 | 54 | This entire repo is subject to revision, meaning there will be changes made over time to optimize your experience here. 55 | 56 | Let me know if there's anything you want to see on here, or something changed - all feedback and ideas are welcome here! 57 | 58 | 59 | --- 60 | 61 | Stay productive + stay blessed! 👊🏽💻 62 | -------------------------------------------------------------------------------- /python/articles/rest_api__to_mysql/.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | -------------------------------------------------------------------------------- /python/articles/rest_api__to_mysql/README.md: -------------------------------------------------------------------------------- 1 | # REST API to MySQL database using Python 2 | 3 | # Overview 📚 4 | 5 | A quick project for migrating data from REST API to MySQL database 6 | 7 | 8 | ## Resources 🎁 9 | 10 | - **[Blog](https://medium.com/@sdw-online/rest-api-to-mysql-database-using-python-eb7b4606d6c3)** 11 | - **[YouTube](https://www.youtube.com/@sdw-online)** 12 | - **[TikTok](https://www.tiktok.com/@sdw.online)** 13 | - **[LinkedIn](https://www.linkedin.com/in/stephen-david-williams-860428123/)** 14 | - **[Twitter/X](https://twitter.com/sdw_online)** 15 | -------------------------------------------------------------------------------- /python/articles/rest_api__to_mysql/demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import pandas as pd 4 | from datetime import datetime 5 | import mysql.connector 6 | from mysql.connector import Error 7 | from dotenv import load_dotenv 8 | 9 | # Load environment variables from .env file 10 | load_dotenv() 11 | 12 | 13 | # Load API key to make API requests 14 | RAPIDAPI_KEY = os.getenv('RAPIDAPI_KEY') 15 | 16 | 17 | # Set up API request headers to authenticate requests 18 | headers = { 19 | 'X-RapidAPI-Key': RAPIDAPI_KEY, 20 | 'X-RapidAPI-Host': 'api-football-v1.p.rapidapi.com' 21 | } 22 | 23 | # Set up API URL and parameters 24 | url = "https://api-football-v1.p.rapidapi.com/v3/players/topscorers" 25 | params = {"league":"39","season":"2023"} 26 | 27 | 28 | def check_rate_limits(): 29 | """ 30 | Check the API quota allocated to your account 31 | """ 32 | response = requests.get(url, headers=headers) 33 | response.raise_for_status() 34 | 35 | daily_limits = response.headers.get('x-ratelimit-requests-limit') 36 | daily_remaining = response.headers.get('x-ratelimit-requests-remaining') 37 | calls_per_min_allowed = response.headers.get('X-RateLimit-Limit') 38 | calls_per_min_remaining = response.headers.get('X-RateLimit-Remaining') 39 | 40 | rate_limits = { 41 | 'daily_limit': daily_limits, 42 | 'daily_remaining': daily_remaining, 43 | 'minute_limit': calls_per_min_allowed, 44 | 'minute_remaining': calls_per_min_remaining 45 | } 46 | 47 | print(rate_limits) 48 | 49 | 50 | 51 | 52 | def get_top_scorers(url, headers, params): 53 | """ 54 | Fetch the top scorers using the API 55 | 56 | """ 57 | try: 58 | response = requests.get(url, headers=headers, params=params) 59 | response.raise_for_status() 60 | return response.json() 61 | 62 | except requests.exceptions.HTTPError as http_error_message: 63 | print (f"❌ [HTTP ERROR]: {http_error_message}") 64 | 65 | except requests.exceptions.ConnectionError as connection_error_message: 66 | print (f"❌ [CONNECTION ERROR]: {connection_error_message}") 67 | 68 | except requests.exceptions.Timeout as timeout_error_message: 69 | print (f"❌ [TIMEOUT ERROR]: {timeout_error_message}") 70 | 71 | except requests.exceptions.RequestException as other_error_message: 72 | print (f"❌ [UNKNOWN ERROR]: {other_error_message}") 73 | 74 | def process_top_scorers(data): 75 | """ 76 | Parse the JSON data required for the top scorers 77 | """ 78 | top_scorers = [] 79 | for scorer_data in data['response']: 80 | statistics = scorer_data['statistics'][0] 81 | 82 | # Set up constants for processing data 83 | player = scorer_data['player'] 84 | player_name = player['name'] 85 | club_name = statistics['team']['name'] 86 | total_goals = int(statistics['goals']['total']) 87 | penalty_goals = int(statistics['penalty']['scored']) 88 | assists = int(statistics['goals']['assists']) if statistics['goals']['assists'] else 0 89 | matches_played = int(statistics['games']['appearences']) 90 | minutes_played = int(statistics['games']['minutes']) 91 | dob = datetime.strptime(player['birth']['date'], '%Y-%m-%d') 92 | age = (datetime.now() - dob).days // 365 93 | 94 | # Append data 95 | top_scorers.append({ 96 | 'player': player_name, 97 | 'club': club_name, 98 | 'total_goals': total_goals, 99 | 'penalty_goals': penalty_goals, 100 | 'assists': assists, 101 | 'matches': matches_played, 102 | 'mins': minutes_played, 103 | 'age': age 104 | }) 105 | return top_scorers 106 | 107 | # Function to convert the list of dictionaries into a pandas DataFrame 108 | def create_dataframe(top_scorers): 109 | 110 | """ 111 | Convert list of dictionaries into a Pandas dataframe and process it 112 | """ 113 | 114 | df = pd.DataFrame(top_scorers) 115 | 116 | # Sort dataframe first by 'total_goals' in descending order, then by 'assists' in descending order 117 | df.sort_values(by=['total_goals', 'assists'], ascending=[False, False], inplace=True) 118 | 119 | # Reset index after sorting to reflect new order 120 | df.reset_index(drop=True, inplace=True) 121 | 122 | # Recalculate ranks based on the sorted order 123 | df['position'] = df['total_goals'].rank(method='dense', ascending=False).astype(int) 124 | 125 | # Specify the columns to include in the final dataframe in the desired order 126 | df = df[['position', 'player', 'club', 'total_goals', 'penalty_goals', 'assists', 'matches', 'mins', 'age']] 127 | 128 | return df 129 | 130 | 131 | 132 | # Environment variables for MySQL 133 | HOST = os.getenv('HOST') 134 | MYSQL_DATABASE = os.getenv('MYSQL_DATABASE') 135 | MYSQL_USERNAME = os.getenv('MYSQL_USERNAME') 136 | MYSQL_PASSWORD = os.getenv('MYSQL_PASSWORD') 137 | 138 | # Database connection 139 | def create_db_connection(host_name, user_name, user_password, db_name): 140 | """ 141 | Establish a connection to the MySQL database 142 | """ 143 | db_connection = None 144 | try: 145 | db_connection = mysql.connector.connect( 146 | host=host_name, 147 | user=user_name, 148 | passwd=user_password, 149 | database=db_name 150 | ) 151 | print("MySQL Database connection successful ✅") 152 | 153 | except Error as e: 154 | print(f"❌ [DATABASE CONNECTION ERROR]: '{e}'") 155 | 156 | return db_connection 157 | 158 | # Create table if not exists 159 | def create_table(db_connection): 160 | """ 161 | Create a table if it does not exist in the MySQL database 162 | 163 | """ 164 | 165 | CREATE_TABLE_SQL_QUERY = """ 166 | CREATE TABLE IF NOT EXISTS top_scorers ( 167 | `position` INT, 168 | `player` VARCHAR(255), 169 | `club` VARCHAR(255), 170 | `total_goals` INT, 171 | `penalty_goals` INT, 172 | `assists` INT, 173 | `matches` INT, 174 | `mins` INT, 175 | `age` INT, 176 | PRIMARY KEY (`player`, `club`) 177 | ); 178 | """ 179 | try: 180 | cursor = db_connection.cursor() 181 | cursor.execute(CREATE_TABLE_SQL_QUERY) 182 | db_connection.commit() 183 | print("Table created successfully ✅") 184 | 185 | except Error as e: 186 | print(f"❌ [CREATING TABLE ERROR]: '{e}'") 187 | 188 | 189 | # Insert data into the table 190 | def insert_into_table(db_connection, df): 191 | """ 192 | Insert or update the top scorers data in the database from the dataframe 193 | """ 194 | cursor = db_connection.cursor() 195 | 196 | INSERT_DATA_SQL_QUERY = """ 197 | INSERT INTO top_scorers (`position`, `player`, `club`, `total_goals`, `penalty_goals`, `assists`, `matches`, `mins`, `age`) 198 | VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) 199 | ON DUPLICATE KEY UPDATE 200 | `total_goals` = VALUES(`total_goals`), 201 | `penalty_goals` = VALUES(`penalty_goals`), 202 | `assists` = VALUES(`assists`), 203 | `matches` = VALUES(`matches`), 204 | `mins` = VALUES(`mins`), 205 | `age` = VALUES(`age`) 206 | """ 207 | # Create a list of tuples from the dataframe values 208 | data_values_as_tuples = [tuple(x) for x in df.to_numpy()] 209 | 210 | # Execute the query 211 | cursor.executemany(INSERT_DATA_SQL_QUERY, data_values_as_tuples) 212 | db_connection.commit() 213 | print("Data inserted or updated successfully ✅") 214 | 215 | 216 | 217 | def run_data_pipeline(): 218 | """ 219 | Execute the ETL pipeline 220 | """ 221 | check_rate_limits() 222 | 223 | 224 | data = get_top_scorers(url, headers, params) 225 | 226 | if data and 'response' in data and data['response']: 227 | top_scorers = process_top_scorers(data) 228 | df = create_dataframe(top_scorers) 229 | print(df.to_string(index=False)) 230 | 231 | else: 232 | print("No data available or an error occurred ❌") 233 | 234 | db_connection = create_db_connection(HOST, MYSQL_USERNAME, MYSQL_PASSWORD, MYSQL_DATABASE) 235 | 236 | 237 | # If connection is successful, proceed with creating table and inserting data 238 | if db_connection is not None: 239 | create_table(db_connection) 240 | df = create_dataframe(top_scorers) 241 | insert_into_table(db_connection, df) 242 | 243 | 244 | if __name__ == "__main__": 245 | run_data_pipeline() 246 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | /config -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/check_robots: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/check_robots -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/data/transformed_data.csv: -------------------------------------------------------------------------------- 1 | position,team_name,games_played,wins,draws,losses,goals_for,goals_against,goal_difference,points,match_date 2 | 1,Liverpool,24,16,6,2,55,23,32,54,2024-02-13 3 | 2,Manchester City,23,16,4,3,56,25,31,52,2024-02-13 4 | 3,Arsenal,24,16,4,4,53,22,31,52,2024-02-13 5 | 4,Tottenham Hotspur,24,14,5,5,51,36,15,47,2024-02-13 6 | 5,Aston Villa,24,14,4,6,50,32,18,46,2024-02-13 7 | 6,Manchester United,24,13,2,9,33,33,0,41,2024-02-13 8 | 7,Newcastle United,24,11,3,10,51,39,12,36,2024-02-13 9 | 8,West Ham United,24,10,6,8,36,42,-6,36,2024-02-13 10 | 9,Brighton and Hove Albion,24,9,8,7,43,40,3,35,2024-02-13 11 | 10,Chelsea,24,10,4,10,41,40,1,34,2024-02-13 12 | 11,Wolverhampton Wanderers,24,9,5,10,37,39,-2,32,2024-02-13 13 | 12,Fulham,24,8,5,11,33,39,-6,29,2024-02-13 14 | 13,AFC Bournemouth,23,7,6,10,31,44,-13,27,2024-02-13 15 | 14,Brentford,23,7,4,12,34,39,-5,25,2024-02-13 16 | 15,Crystal Palace,24,6,6,12,27,43,-16,24,2024-02-13 17 | 16,Nottingham Forest,24,5,6,13,30,44,-14,21,2024-02-13 18 | 17,Luton Town,23,5,5,13,33,45,-12,20,2024-02-13 19 | 18,Everton,24,8,5,11,26,32,-6,19,2024-02-13 20 | 19,Burnley,24,3,4,17,25,50,-25,13,2024-02-13 21 | 20,Sheffield United,24,3,4,17,22,60,-38,13,2024-02-13 22 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/drivers/chromedriver.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/drivers/chromedriver.exe -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.9.1 2 | aiosignal==1.3.1 3 | alabaster @ file:///home/ktietz/src/ci/alabaster_1611921544520/work 4 | alembic==1.13.1 5 | amqp==5.2.0 6 | anaconda-client==1.7.2 7 | anaconda-navigator==2.1.1 8 | anaconda-project @ file:///tmp/build/80754af9/anaconda-project_1610472525955/work 9 | annotated-types==0.6.0 10 | antlr4-python3-runtime==4.11.1 11 | anyio==4.2.0 12 | apache-airflow==2.8.1 13 | apache-airflow-providers-celery==3.5.1 14 | apache-airflow-providers-common-io==1.2.0 15 | apache-airflow-providers-common-sql==1.10.0 16 | apache-airflow-providers-ftp==3.7.0 17 | apache-airflow-providers-http==4.8.0 18 | apache-airflow-providers-imap==3.5.0 19 | apache-airflow-providers-sqlite==3.7.0 20 | apispec==6.4.0 21 | appdirs==1.4.4 22 | argcomplete==3.2.1 23 | argh==0.26.2 24 | argon2-cffi @ file:///C:/ci/argon2-cffi_1613037959010/work 25 | asgiref==3.7.2 26 | asn1crypto @ file:///tmp/build/80754af9/asn1crypto_1596577642040/work 27 | astroid @ file:///C:/ci/astroid_1613501047216/work 28 | astropy @ file:///C:/ci/astropy_1617745647203/work 29 | async-generator @ file:///home/ktietz/src/ci/async_generator_1611927993394/work 30 | async-timeout==4.0.3 31 | atomicwrites==1.4.0 32 | attrs==23.2.0 33 | autopep8 @ file:///tmp/build/80754af9/autopep8_1615918855173/work 34 | Babel==2.14.0 35 | backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work 36 | backoff==2.2.1 37 | backports.functools-lru-cache @ file:///tmp/build/80754af9/backports.functools_lru_cache_1618170165463/work 38 | backports.shutil-get-terminal-size @ file:///tmp/build/80754af9/backports.shutil_get_terminal_size_1608222128777/work 39 | backports.tempfile @ file:///home/linux1/recipes/ci/backports.tempfile_1610991236607/work 40 | backports.weakref==1.0.post1 41 | backports.zoneinfo==0.2.1 42 | bcrypt @ file:///C:/ci/bcrypt_1597936263757/work 43 | beautifulsoup4==4.8.2 44 | billiard==4.2.0 45 | bitarray @ file:///C:/ci/bitarray_1618435038389/work 46 | bkcharts==0.2 47 | black==19.10b0 48 | bleach @ file:///tmp/build/80754af9/bleach_1612211392645/work 49 | blinker==1.7.0 50 | bokeh @ file:///C:/ci/bokeh_1620784067744/work 51 | boto==2.49.0 52 | Bottleneck==1.3.2 53 | brotlipy==0.7.0 54 | cachelib==0.9.0 55 | cachetools==5.3.0 56 | celery==5.3.6 57 | certifi==2023.11.17 58 | cffi==1.16.0 59 | cfgv==3.3.1 60 | chardet==3.0.4 61 | charset-normalizer==3.3.2 62 | click==8.1.7 63 | click-didyoumean==0.3.0 64 | click-plugins==1.1.1 65 | click-repl==0.3.0 66 | clickclick==20.10.2 67 | cloudpickle @ file:///tmp/build/80754af9/cloudpickle_1598884132938/work 68 | clyent==1.2.2 69 | colorama==0.4.6 70 | colorlog==4.8.0 71 | compressed-rtf==1.0.6 72 | comtypes==1.1.9 73 | conda==4.14.0 74 | conda-build==3.21.4 75 | conda-content-trust @ file:///C:/Windows/TEMP/abs_4589313d-fc62-4ccc-81c0-b801b4449e833j1ajrwu/croots/recipe/conda-content-trust_1658126379362/work 76 | conda-package-handling @ file:///C:/b/abs_fcga8w0uem/croot/conda-package-handling_1672865024290/work 77 | conda-package-streaming @ file:///C:/b/abs_0e5n5hdal3/croot/conda-package-streaming_1670508162902/work 78 | conda-repo-cli==1.0.5 79 | conda-token @ file:///Users/paulyim/miniconda3/envs/c3i/conda-bld/conda-token_1662660369760/work 80 | conda-verify==3.4.2 81 | ConfigUpdater==3.2 82 | connexion==2.14.2 83 | contextlib2==0.6.0.post1 84 | coverage==7.2.3 85 | cron-descriptor==1.4.0 86 | croniter==2.0.1 87 | cryptography==41.0.7 88 | cssselect==1.2.0 89 | cycler==0.10.0 90 | Cython @ file:///C:/ci/cython_1618435363327/work 91 | cytoolz==0.11.0 92 | dask==2022.11.1 93 | dask-sql==2022.12.0 94 | dataclasses-json==0.5.8 95 | decorator @ file:///tmp/build/80754af9/decorator_1617916966915/work 96 | defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work 97 | Deprecated==1.2.14 98 | diff-match-patch @ file:///tmp/build/80754af9/diff-match-patch_1594828741838/work 99 | dill==0.3.1.1 100 | distlib==0.3.6 101 | distributed==2022.11.1 102 | Django==2.2.5 103 | dnspython==2.4.2 104 | docker==6.0.1 105 | docutils==0.20.1 106 | docx2txt==0.8 107 | duckduckgo-search==2.8.6 108 | ebcdic==1.1.1 109 | email-validator==1.3.1 110 | entrypoints==0.3 111 | et-xmlfile==1.0.1 112 | exceptiongroup==1.2.0 113 | extract-msg==0.28.7 114 | faiss-cpu==1.7.4 115 | fastapi==0.109.1 116 | fastcache==1.1.0 117 | filelock==3.11.0 118 | flake8 @ file:///tmp/build/80754af9/flake8_1615834841867/work 119 | Flask==2.2.5 120 | Flask-AppBuilder==4.3.10 121 | Flask-Babel==2.0.0 122 | Flask-Caching==2.1.0 123 | Flask-JWT-Extended==4.6.0 124 | Flask-Limiter==3.5.0 125 | Flask-Login==0.6.3 126 | flask-session==0.5.0 127 | Flask-SQLAlchemy==2.5.1 128 | flask-wtf==1.2.1 129 | flower==2.0.1 130 | frozenlist==1.4.1 131 | fsspec==2023.12.2 132 | future==0.18.2 133 | gevent @ file:///C:/ci/gevent_1616773090559/work 134 | gitdb==4.0.10 135 | GitPython==3.1.31 136 | glob2 @ file:///home/linux1/recipes/ci/glob2_1610991677669/work 137 | google-api-core==2.11.0 138 | google-api-python-client==2.85.0 139 | google-auth==2.17.3 140 | google-auth-httplib2==0.1.0 141 | google-re2==1.1 142 | googleapis-common-protos==1.62.0 143 | greenlet==3.0.3 144 | grpcio==1.60.0 145 | gTTS==2.3.1 146 | gunicorn==21.2.0 147 | h11==0.14.0 148 | h5py==2.10.0 149 | HeapDict==1.0.1 150 | html5lib @ file:///tmp/build/80754af9/html5lib_1593446221756/work 151 | httpcore==0.16.3 152 | httplib2==0.22.0 153 | httpx==0.23.3 154 | huggingface-hub==0.15.1 155 | humanize==4.9.0 156 | identify==2.5.22 157 | idna==3.6 158 | imagecodecs @ file:///C:/ci/imagecodecs_1617996768495/work 159 | imageio @ file:///tmp/build/80754af9/imageio_1617700267927/work 160 | imagesize @ file:///home/ktietz/src/ci/imagesize_1611921604382/work 161 | IMAPClient==2.1.0 162 | importlib-metadata==6.11.0 163 | importlib-resources==5.13.0 164 | inflect==7.0.0 165 | inflection==0.5.1 166 | iniconfig @ file:///home/linux1/recipes/ci/iniconfig_1610983019677/work 167 | intervaltree @ file:///tmp/build/80754af9/intervaltree_1598376443606/work 168 | ipykernel @ file:///C:/ci/ipykernel_1596190155316/work/dist/ipykernel-5.3.4-py3-none-any.whl 169 | ipython @ file:///C:/ci/ipython_1617121002983/work 170 | ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work 171 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1610481889018/work 172 | isort @ file:///tmp/build/80754af9/isort_1616355431277/work 173 | itsdangerous==2.1.2 174 | jdcal==1.4.1 175 | jedi @ file:///C:/ci/jedi_1606914528444/work 176 | Jinja2==3.1.3 177 | joblib @ file:///tmp/build/80754af9/joblib_1613502643832/work 178 | json5==0.9.5 179 | jsonschema==4.20.0 180 | jsonschema-specifications==2023.12.1 181 | jupyter==1.0.0 182 | jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1616770841739/work 183 | jupyter-console @ file:///tmp/build/80754af9/jupyter_console_1616615302928/work 184 | jupyter-core @ file:///C:/ci/jupyter_core_1612213356021/work 185 | jupyter-packaging @ file:///tmp/build/80754af9/jupyter-packaging_1613502826984/work 186 | jupyter-server @ file:///C:/ci/jupyter_server_1616084298403/work 187 | jupyterlab @ file:///tmp/build/80754af9/jupyterlab_1619133235951/work 188 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work 189 | jupyterlab-server @ file:///tmp/build/80754af9/jupyterlab_server_1617134334258/work 190 | jupyterlab-widgets @ file:///tmp/build/80754af9/jupyterlab_widgets_1609884341231/work 191 | keyring @ file:///C:/ci/keyring_1614616910860/work 192 | kiwisolver @ file:///C:/ci/kiwisolver_1612282606037/work 193 | kombu==5.3.5 194 | langchain==0.0.150 195 | lazy-object-proxy==1.10.0 196 | libarchive-c @ file:///tmp/build/80754af9/python-libarchive-c_1617780486945/work 197 | limits==3.7.0 198 | linkify-it-py==2.0.2 199 | llvmlite==0.36.0 200 | locket==1.0.0 201 | lockfile==0.12.2 202 | loguru==0.7.0 203 | lxml @ file:///C:/ci/lxml_1616443455957/work 204 | Mako==1.3.0 205 | Markdown==3.5.2 206 | markdown-it-py==3.0.0 207 | MarkupSafe==2.1.2 208 | marshmallow==3.20.2 209 | marshmallow-enum==1.5.1 210 | marshmallow-oneofschema==3.0.1 211 | marshmallow-sqlalchemy==0.26.1 212 | matplotlib @ file:///C:/ci/matplotlib-suite_1613408055530/work 213 | mccabe==0.6.1 214 | mdit-py-plugins==0.4.0 215 | mdurl==0.1.2 216 | menuinst==1.4.16 217 | mistune==0.8.4 218 | mkl-fft==1.3.0 219 | mkl-random @ file:///C:/ci/mkl_random_1618854156666/work 220 | mkl-service==2.3.0 221 | mock @ file:///tmp/build/80754af9/mock_1607622725907/work 222 | more-itertools @ file:///tmp/build/80754af9/more-itertools_1613676688952/work 223 | mpmath==1.2.1 224 | msgpack @ file:///C:/ci/msgpack-python_1612287368835/work 225 | multidict==6.0.4 226 | multipledispatch==0.6.0 227 | mypy-extensions==0.4.3 228 | navigator-updater==0.2.1 229 | nbclassic @ file:///tmp/build/80754af9/nbclassic_1616085367084/work 230 | nbclient @ file:///tmp/build/80754af9/nbclient_1614364831625/work 231 | nbconvert @ file:///C:/ci/nbconvert_1601914925608/work 232 | nbformat @ file:///tmp/build/80754af9/nbformat_1617383369282/work 233 | nest-asyncio @ file:///tmp/build/80754af9/nest-asyncio_1613680548246/work 234 | networkx @ file:///tmp/build/80754af9/networkx_1598376031484/work 235 | nltk @ file:///tmp/build/80754af9/nltk_1618327084230/work 236 | nodeenv==1.7.0 237 | nose @ file:///tmp/build/80754af9/nose_1606773131901/work 238 | notebook @ file:///C:/ci/notebook_1616443715883/work 239 | numba @ file:///C:/ci/numba_1616774458845/work 240 | numexpr==2.8.4 241 | numpy==1.24.4 242 | numpydoc @ file:///tmp/build/80754af9/numpydoc_1605117425582/work 243 | oauthlib==3.2.2 244 | olefile==0.46 245 | openai==0.27.2 246 | openapi-schema-pydantic==1.2.4 247 | openpyxl @ file:///tmp/build/80754af9/openpyxl_1615411699337/work 248 | opentelemetry-api==1.22.0 249 | opentelemetry-exporter-otlp==1.22.0 250 | opentelemetry-exporter-otlp-proto-common==1.22.0 251 | opentelemetry-exporter-otlp-proto-grpc==1.22.0 252 | opentelemetry-exporter-otlp-proto-http==1.22.0 253 | opentelemetry-proto==1.22.0 254 | opentelemetry-sdk==1.22.0 255 | opentelemetry-semantic-conventions==0.43b0 256 | ordered-set==4.1.0 257 | orjson==3.8.10 258 | outcome==1.2.0 259 | packaging==23.2 260 | pandas==2.0.3 261 | pandocfilters @ file:///C:/ci/pandocfilters_1605102497129/work 262 | paramiko @ file:///tmp/build/80754af9/paramiko_1598886428689/work 263 | parso==0.7.0 264 | partd @ file:///tmp/build/80754af9/partd_1618000087440/work 265 | path @ file:///C:/ci/path_1614022440181/work 266 | pathlib @ file:///Users/ktietz/demo/mc3/conda-bld/pathlib_1629713961906/work 267 | pathlib2 @ file:///C:/ci/pathlib2_1607025069150/work 268 | pathspec==0.12.1 269 | patsy==0.5.1 270 | pdfkit==1.0.0 271 | pdfminer.six==20191110 272 | pendulum==3.0.0 273 | pep8==1.7.1 274 | pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work 275 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work 276 | Pillow @ file:///C:/ci/pillow_1617386341487/work 277 | pinecone-client==2.2.1 278 | pkginfo==1.7.0 279 | pkgutil-resolve-name==1.3.10 280 | platformdirs==3.2.0 281 | playsound==1.2.2 282 | plotly @ file:///opt/conda/conda-bld/plotly_1646671701182/work 283 | pluggy==1.3.0 284 | ply==3.11 285 | pre-commit==3.2.2 286 | prison==0.2.1 287 | prometheus-client==0.19.0 288 | prompt-toolkit==3.0.43 289 | protobuf==4.25.2 290 | psutil==5.9.7 291 | ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl 292 | py @ file:///tmp/build/80754af9/py_1607971587848/work 293 | pyasn1==0.4.8 294 | pyasn1-modules==0.2.8 295 | pycodestyle==2.7.0 296 | pycosat==0.6.3 297 | pycparser==2.21 298 | pycryptodome==3.18.0 299 | pycurl==7.43.0.6 300 | pydantic==2.5.3 301 | pydantic-core==2.14.6 302 | pydocstyle @ file:///tmp/build/80754af9/pydocstyle_1616182067796/work 303 | pyerfa @ file:///C:/ci/pyerfa_1619391071834/work 304 | pyflakes==2.3.1 305 | pygments==2.17.2 306 | PyJWT==2.8.0 307 | pylint @ file:///C:/ci/pylint_1617136058775/work 308 | pyls-black @ file:///tmp/build/80754af9/pyls-black_1607553132291/work 309 | pyls-spyder @ file:///tmp/build/80754af9/pyls-spyder_1613849700860/work 310 | PyNaCl @ file:///C:/ci/pynacl_1595000047588/work 311 | pyodbc===4.0.0-unsupported 312 | pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1608057966937/work 313 | pyparsing @ file:///home/linux1/recipes/ci/pyparsing_1610983426697/work 314 | pypdf==3.11.0 315 | pyreadline==2.1 316 | pyrsistent @ file:///C:/ci/pyrsistent_1600141795814/work 317 | PySocks @ file:///C:/ci/pysocks_1605287845585/work 318 | pytest==6.2.3 319 | pytest-mock==3.10.0 320 | python-daemon==3.0.1 321 | python-dateutil==2.8.2 322 | python-dotenv==1.0.0 323 | python-jsonrpc-server @ file:///tmp/build/80754af9/python-jsonrpc-server_1600278539111/work 324 | python-language-server @ file:///tmp/build/80754af9/python-language-server_1607972495879/work 325 | python-nvd3==0.15.0 326 | python-pptx==0.6.21 327 | python-slugify==8.0.1 328 | pytz==2023.3.post1 329 | PyWavelets @ file:///C:/ci/pywavelets_1601658407916/work 330 | pywin32==306 331 | pywin32-ctypes==0.2.0 332 | pywinpty==0.5.7 333 | PyYAML==6.0.1 334 | pyzmq==20.0.0 335 | QDarkStyle==2.8.1 336 | QtAwesome @ file:///tmp/build/80754af9/qtawesome_1615991616277/work 337 | qtconsole @ file:///tmp/build/80754af9/qtconsole_1616775094278/work 338 | QtPy==1.9.0 339 | readability-lxml==0.8.1 340 | redis==4.5.4 341 | referencing==0.32.1 342 | regex==2023.3.23 343 | requests==2.31.0 344 | requests-oauthlib==1.3.1 345 | requests-toolbelt==1.0.0 346 | rfc3339-validator==0.1.4 347 | rfc3986==1.5.0 348 | rich==13.7.0 349 | rich-argparse==1.4.0 350 | rope @ file:///tmp/build/80754af9/rope_1602264064449/work 351 | rpds-py==0.17.1 352 | rsa==4.9 353 | Rtree @ file:///C:/ci/rtree_1618421009405/work 354 | ruamel-yaml-conda @ file:///C:/ci/ruamel_yaml_1616016967756/work 355 | ruamel.yaml==0.17.40 356 | ruamel.yaml.clib==0.2.8 357 | safetensors==0.3.1 358 | scikit-image==0.18.1 359 | scikit-learn @ file:///C:/ci/scikit-learn_1614446896245/work 360 | scipy @ file:///C:/ci/scipy_1618856128765/work 361 | seaborn @ file:///tmp/build/80754af9/seaborn_1608578541026/work 362 | selenium==4.8.3 363 | Send2Trash @ file:///tmp/build/80754af9/send2trash_1607525499227/work 364 | setproctitle==1.3.3 365 | simplegeneric==0.8.1 366 | singledispatch @ file:///tmp/build/80754af9/singledispatch_1614366001199/work 367 | sip==4.19.13 368 | six==1.16.0 369 | smmap==5.0.0 370 | sniffio==1.3.0 371 | snowballstemmer @ file:///tmp/build/80754af9/snowballstemmer_1611258885636/work 372 | soda-core==3.1.5 373 | soda-core-pandas-dask==3.1.5 374 | sortedcollections @ file:///tmp/build/80754af9/sortedcollections_1611172717284/work 375 | sortedcontainers @ file:///tmp/build/80754af9/sortedcontainers_1606865132123/work 376 | soupsieve @ file:///tmp/build/80754af9/soupsieve_1616183228191/work 377 | sourcery==1.2.0 378 | SpeechRecognition==3.8.1 379 | Sphinx @ file:///tmp/build/80754af9/sphinx_1620777493457/work 380 | sphinxcontrib-applehelp @ file:///home/ktietz/src/ci/sphinxcontrib-applehelp_1611920841464/work 381 | sphinxcontrib-devhelp @ file:///home/ktietz/src/ci/sphinxcontrib-devhelp_1611920923094/work 382 | sphinxcontrib-htmlhelp @ file:///home/ktietz/src/ci/sphinxcontrib-htmlhelp_1611920974801/work 383 | sphinxcontrib-jsmath @ file:///home/ktietz/src/ci/sphinxcontrib-jsmath_1611920942228/work 384 | sphinxcontrib-qthelp @ file:///home/ktietz/src/ci/sphinxcontrib-qthelp_1611921055322/work 385 | sphinxcontrib-serializinghtml @ file:///home/ktietz/src/ci/sphinxcontrib-serializinghtml_1611920755253/work 386 | sphinxcontrib-websupport @ file:///tmp/build/80754af9/sphinxcontrib-websupport_1597081412696/work 387 | spyder @ file:///C:/ci/spyder_1616776239898/work 388 | spyder-kernels @ file:///C:/ci/spyder-kernels_1614030842607/work 389 | SQLAlchemy==1.4.51 390 | SQLAlchemy-JSONField==1.0.2 391 | SQLAlchemy-Utils==0.41.1 392 | sqlparse==0.4.4 393 | starlette==0.35.1 394 | statsmodels==0.12.2 395 | sympy @ file:///C:/ci/sympy_1618255511605/work 396 | tables==3.6.1 397 | tabulate==0.9.0 398 | tblib @ file:///tmp/build/80754af9/tblib_1597928476713/work 399 | tenacity==8.2.3 400 | termcolor==2.4.0 401 | terminado==0.9.4 402 | testpath @ file:///home/ktietz/src/ci/testpath_1611930608132/work 403 | text-unidecode==1.3 404 | textdistance @ file:///tmp/build/80754af9/textdistance_1612461398012/work 405 | textract==1.6.5 406 | threadpoolctl @ file:///tmp/tmp9twdgx9k/threadpoolctl-2.1.0-py3-none-any.whl 407 | three-merge @ file:///tmp/build/80754af9/three-merge_1607553261110/work 408 | tifffile @ file:///tmp/build/80754af9/tifffile_1619636090847/work 409 | tiktoken==0.3.3 410 | time-machine==2.13.0 411 | tokenizers==0.13.3 412 | toml @ file:///tmp/build/80754af9/toml_1616166611790/work 413 | toolz @ file:///home/linux1/recipes/ci/toolz_1610987900194/work 414 | tornado==6.1 415 | tqdm==4.65.0 416 | traitlets @ file:///home/ktietz/src/ci/traitlets_1611929699868/work 417 | transformers==4.30.2 418 | trio==0.22.0 419 | trio-websocket==0.10.2 420 | tweepy==4.13.0 421 | typed-ast @ file:///C:/ci/typed-ast_1610466535590/work 422 | typing-extensions==4.9.0 423 | typing-inspect==0.9.0 424 | tzdata==2023.4 425 | tzlocal==5.0.1 426 | uc-micro-py==1.0.2 427 | ujson @ file:///C:/ci/ujson_1611241570789/work 428 | unicodecsv==0.14.1 429 | universal-pathlib==0.1.4 430 | uritemplate==4.1.1 431 | urllib3==1.26.18 432 | uvicorn==0.27.0.post1 433 | vine==5.1.0 434 | virtualenv==20.21.0 435 | watchdog @ file:///C:/ci/watchdog_1612471251191/work 436 | wcwidth==0.2.13 437 | webdriver-manager==3.8.6 438 | webencodings==0.5.1 439 | websocket-client==1.5.1 440 | Werkzeug==2.2.3 441 | widgetsnbextension==3.5.1 442 | win-inet-pton @ file:///C:/ci/win_inet_pton_1605306167264/work 443 | win-unicode-console==0.5 444 | win32-setctime==1.1.0 445 | wincertstore==0.2 446 | wrapt==1.16.0 447 | wsproto==1.2.0 448 | wtforms==3.1.2 449 | xlrd==1.2.0 450 | XlsxWriter @ file:///tmp/build/80754af9/xlsxwriter_1617224712951/work 451 | xlwings==0.23.0 452 | xlwt==1.3.0 453 | xmltodict @ file:///Users/ktietz/demo/mc3/conda-bld/xmltodict_1629301980723/work 454 | yapf @ file:///tmp/build/80754af9/yapf_1615749224965/work 455 | yarl==1.9.4 456 | zict==2.0.0 457 | zipp==3.17.0 458 | zope.event==4.5.0 459 | zope.interface @ file:///C:/ci/zope.interface_1616357322857/work 460 | zstandard==0.19.0 461 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/src/__init__.py -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/etl_pipeline.py: -------------------------------------------------------------------------------- 1 | from extraction.main import extraction_job 2 | from transformation.main import transformation_job 3 | from loading.s3_uploader import loading_job 4 | 5 | 6 | # Create a single point of entry to run the scraping ETL pipeline 7 | def run_etl_pipeline(): 8 | extraction_job() 9 | transformation_job() 10 | loading_job() 11 | 12 | 13 | # Execute the pipeline 14 | if __name__=="__main__": 15 | run_etl_pipeline() -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/extraction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/src/extraction/__init__.py -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/extraction/alter_col_type.py: -------------------------------------------------------------------------------- 1 | from utils.db_utils import connect_to_db 2 | 3 | 4 | def alter_column_to_varchar(connection, schema_name, table_name, column_name): 5 | cursor = connection.cursor() 6 | alter_statement = f""" 7 | ALTER TABLE {schema_name}.{table_name} 8 | ALTER COLUMN "{column_name}" TYPE VARCHAR; 9 | """ 10 | 11 | try: 12 | cursor.execute(alter_statement) 13 | print(f"✅ Column {column_name} type changed to VARCHAR.") 14 | except Exception as e: 15 | print(f"An error occurred: {e}") 16 | finally: 17 | cursor.close() 18 | 19 | if __name__ == "__main__": 20 | # Connect to the database 21 | connection = connect_to_db() 22 | 23 | # Alter the 'Team' column to 'VARCHAR' 24 | alter_column_to_varchar(connection, 'raw', 'scraped_fb_data', 'team') 25 | 26 | # Close the database connection 27 | connection.close() 28 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/extraction/main.py: -------------------------------------------------------------------------------- 1 | try: 2 | from .scraping_bot import scrape_data 3 | from utils.db_utils import connect_to_db, create_extracted_schema_and_table, insert_extracted_data_to_table 4 | from tests.data_quality_checks.scan_extraction_data_contract import run_dq_checks_for_extraction_stage 5 | except: 6 | from scraping_bot import scrape_data 7 | from utils.db_utils import connect_to_db, create_extracted_schema_and_table, insert_extracted_data_to_table 8 | from tests.data_quality_checks.scan_extraction_data_contract import run_dq_checks_for_extraction_stage 9 | 10 | 11 | 12 | def extraction_job(): 13 | # Flag for running the data quality checks only 14 | RUN_DQ_CHECKS_ONLY = True 15 | 16 | 17 | if not RUN_DQ_CHECKS_ONLY: 18 | # Connect to the database 19 | connection = connect_to_db() 20 | 21 | # Create schema and table if they don't exist 22 | schema_name = 'raw' 23 | table_name = "scraped_fb_data" 24 | create_extracted_schema_and_table(connection, schema_name, table_name) 25 | 26 | 27 | # Scrape the data and store in a DataFrame 28 | dates = [ 29 | "2024-02-13", 30 | # "2024-03-31", 31 | # "2024-04-30", 32 | # "2024-05-31" 33 | ] 34 | df = scrape_data(dates, show_output=False) 35 | 36 | # Insert data into the database from the DataFrame 37 | insert_extracted_data_to_table(connection, schema_name, table_name, df) 38 | 39 | # Close the database connection 40 | connection.close() 41 | 42 | 43 | # Run DQ checks for extraction stage 44 | run_dq_checks_for_extraction_stage() 45 | 46 | else: 47 | run_dq_checks_for_extraction_stage() 48 | 49 | if __name__=="__main__": 50 | extraction_job() -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/extraction/scraping_bot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from selenium import webdriver 3 | from selenium.webdriver.common.by import By 4 | from selenium.webdriver.chrome.service import Service 5 | from selenium.webdriver.support.ui import WebDriverWait 6 | from selenium.webdriver.support import expected_conditions as EC 7 | 8 | 9 | 10 | def scrape_data(dates, executable_path="drivers/chromedriver.exe", show_output=False): 11 | service = Service(executable_path=executable_path) 12 | driver = webdriver.Chrome(service=service) 13 | all_data = [] 14 | 15 | for match_date in dates: 16 | formatted_date = pd.to_datetime(match_date).strftime('%Y-%b-%d') 17 | football_url = f'https://www.twtd.co.uk/league-tables/competition:premier-league/daterange/fromdate:2023-Aug-01/todate:{formatted_date}/type:home-and-away/' 18 | driver.get(football_url) 19 | wait = WebDriverWait(driver, 10) 20 | table_container = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "leaguetable"))) 21 | rows = table_container.find_elements(By.TAG_NAME, "tr") 22 | 23 | for idx, row in enumerate(rows[1:], start=1): 24 | cols = row.find_elements(By.TAG_NAME, "td") 25 | row_data = [col.text.strip() for col in cols if col.text.strip() != ''] 26 | print(f"Row data {idx}: {row_data}") if show_output else None 27 | row_data.append(formatted_date) 28 | all_data.append(row_data) 29 | 30 | if show_output: 31 | print(f"Premier League Table Standings (as of {formatted_date}):") 32 | print('-'*60) 33 | for row_data in all_data: 34 | print(' '.join(row_data)) 35 | print('\n' + '-'*60) 36 | 37 | driver.implicitly_wait(2) 38 | 39 | driver.quit() 40 | 41 | 42 | # columns = ["Pos", "Team", "P", "W1", "D1", "L1", "GF1", "GA1", "W2", "D2", "L2", "GF2", "GA2", "GD", "Pts", "Date"].lower() 43 | columns = ["pos", "team", "p", "w1", "d1", "l1", "gf1", "ga1", "w2", "d2", "l2", "gf2", "ga2", "gd", "pts", "date"] 44 | df = pd.DataFrame(all_data, columns=columns) 45 | return df 46 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/loading/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/src/loading/__init__.py -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/loading/s3_uploader.py: -------------------------------------------------------------------------------- 1 | from utils.aws_utils import connect_to_aws_s3, create_bucket_if_not_exists, upload_file_to_s3, validate_file_in_s3 2 | from utils.db_utils import connect_to_db, fetch_transformed_data, convert_dataframe_to_csv 3 | import os 4 | 5 | def loading_job(): 6 | print("Starting data transfer process...") 7 | connection = None 8 | try: 9 | connection = connect_to_db() 10 | df = fetch_transformed_data(connection) 11 | local_filename = 'transformed_data.csv' 12 | convert_dataframe_to_csv(df, local_filename) 13 | 14 | s3_client = connect_to_aws_s3() 15 | bucket_name = create_bucket_if_not_exists(s3_client, os.getenv("S3_BUCKET"), os.getenv("REGION_NAME")) 16 | s3_folder = os.getenv("S3_FOLDER") 17 | upload_file_to_s3(s3_client, local_filename, bucket_name, s3_folder) 18 | 19 | s3_path = f"{s3_folder}/{local_filename}" if s3_folder else local_filename 20 | if validate_file_in_s3(s3_client, bucket_name, s3_path): 21 | print(f'✅ File {local_filename} successfully uploaded to bucket {bucket_name}/{s3_path}') 22 | else: 23 | print(f'❌ File {local_filename} not found in bucket {bucket_name}/{s3_path}') 24 | 25 | except Exception as e: 26 | print(f"❌ An error occurred: {e}") 27 | 28 | finally: 29 | if connection: 30 | print("Closing the database connection.") 31 | connection.close() 32 | 33 | if __name__ == "__main__": 34 | loading_job() 35 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/transformation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/src/transformation/__init__.py -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/transformation/add_columns.py: -------------------------------------------------------------------------------- 1 | from utils.db_utils import connect_to_db 2 | 3 | def add_column(connection, schema_name, table_name, column_name, data_type): 4 | cursor = connection.cursor() 5 | alter_statement = f""" 6 | ALTER TABLE {schema_name}.{table_name} 7 | ADD COLUMN IF NOT EXISTS "{column_name}" {data_type}; 8 | """ 9 | 10 | try: 11 | cursor.execute(alter_statement) 12 | connection.commit() 13 | print(f"✅ Column '{column_name}' added to table '{schema_name}.{table_name}'.") 14 | except Exception as e: 15 | print(f"❌ An error occurred when adding '{column_name}': {e}") 16 | finally: 17 | cursor.close() 18 | 19 | def add_new_columns(connection, schema_name, table_name, columns_info): 20 | for column_name, data_type in columns_info.items(): 21 | add_column(connection, schema_name, table_name, column_name, data_type) 22 | 23 | if __name__ == "__main__": 24 | # Connect to the database 25 | connection = connect_to_db() 26 | 27 | # Schema and table names 28 | schema_name = 'staging' 29 | table_name = 'transformed_fb_data' 30 | 31 | # New columns to add with their respective data types 32 | new_columns_info = { 33 | 'wins': 'INTEGER', 34 | 'draws': 'INTEGER', 35 | 'losses': 'INTEGER' 36 | } 37 | 38 | # Add new columns to the table 39 | add_new_columns(connection, schema_name, table_name, new_columns_info) 40 | 41 | # Close the database connection 42 | connection.close() 43 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/transformation/main.py: -------------------------------------------------------------------------------- 1 | try: 2 | from utils.db_utils import connect_to_db, create_transformed_schema_and_table, fetch_extraction_data, insert_transformed_data_to_table 3 | from tests.data_quality_checks.scan_transformation_data_contract import run_dq_checks_for_transformation_stage 4 | from transformations import transform_data 5 | except: 6 | from utils.db_utils import connect_to_db, create_transformed_schema_and_table, fetch_extraction_data, insert_transformed_data_to_table 7 | from tests.data_quality_checks.scan_transformation_data_contract import run_dq_checks_for_transformation_stage 8 | from .transformations import transform_data 9 | 10 | 11 | def transformation_job(): 12 | # Establish a connection to the database 13 | connection = connect_to_db() 14 | 15 | # Define schema and table names for extracted and transformed data 16 | extracted_schema_name = 'raw' 17 | extracted_table_name = 'scraped_fb_data' 18 | transformed_schema_name = 'staging' 19 | transformed_table_name = 'transformed_fb_data' 20 | 21 | # Create schema and table for the transformed data if not exist 22 | create_transformed_schema_and_table(connection, transformed_schema_name, transformed_table_name) 23 | 24 | # Fetch data from the extraction layer 25 | extracted_data = fetch_extraction_data(connection, extracted_schema_name, extracted_table_name) 26 | 27 | # Perform data transformation 28 | transformed_data = transform_data(extracted_data) 29 | 30 | # Insert transformed data into the transformed_data table 31 | insert_transformed_data_to_table(connection, transformed_schema_name, transformed_table_name, transformed_data) 32 | 33 | # Run data quality checks for the transformation stage 34 | run_dq_checks_for_transformation_stage() 35 | 36 | # Close the database connection 37 | connection.close() 38 | 39 | if __name__ == "__main__": 40 | transformation_job() 41 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/transformation/transformations.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def rename_fields(df): 4 | """ 5 | Rename each field name to a longer form version. 6 | """ 7 | df_renamed = df.rename(columns={ 8 | 'pos': 'position', 9 | 'team': 'team_name', 10 | 'p': 'games_played', 11 | 'w1': 'home_wins', 12 | 'd1': 'home_draws', 13 | 'l1': 'home_losses', 14 | 'gf1': 'home_goals_for', 15 | 'ga1': 'home_goals_against', 16 | 'w2': 'away_wins', 17 | 'd2': 'away_draws', 18 | 'l2': 'away_losses', 19 | 'gf2': 'away_goals_for', 20 | 'ga2': 'away_goals_against', 21 | 'gd': 'goal_difference', 22 | 'pts': 'points', 23 | 'date': 'match_date' 24 | }) 25 | return df_renamed 26 | 27 | def calculate_points(df): 28 | """ 29 | Use the home and away columns to calculate the points, and 30 | deduct 10 points from Everton FC due to PSR violations starting 31 | from November 2023. 32 | """ 33 | # Calculate points normally for all rows 34 | df['points'] = ( 35 | df['home_wins'] * 3 + df['away_wins'] * 3 + 36 | df['home_draws'] + df['away_draws'] 37 | ) 38 | 39 | # Calculate total wins, draws, and losses 40 | df['wins'] = df['home_wins'] + df['away_wins'] 41 | df['draws'] = df['home_draws'] + df['away_draws'] 42 | df['losses'] = df['home_losses'] + df['away_losses'] 43 | 44 | df['goals_for'] = df['home_goals_for'] + df['away_goals_for'] 45 | df['goals_against'] = df['home_goals_against'] + df['away_goals_against'] 46 | 47 | # Convert the match_date from string to datetime for comparison 48 | df['match_date'] = pd.to_datetime(df['match_date']) 49 | 50 | 51 | return df 52 | 53 | 54 | 55 | def deduct_points_from_everton(df): 56 | """ 57 | Deduct points for Everton FC if the match_date is in or after November 2023 58 | """ 59 | psr_violation_start_date = pd.to_datetime('2023-11-01') 60 | everton_mask = (df['team_name'] == 'Everton') & (df['match_date'] >= psr_violation_start_date) 61 | df.loc[everton_mask, 'points'] -= 10 62 | 63 | return df 64 | 65 | def drop_home_away_columns(df): 66 | """ 67 | Drop/hide the home and away columns for user-friendliness 68 | (after we've calculated the points from them). 69 | """ 70 | columns_to_drop = [ 71 | 'home_wins', 'home_draws', 'home_losses', 72 | 'home_goals_for', 'home_goals_against', 73 | 'away_wins', 'away_draws', 'away_losses', 74 | 'away_goals_for', 'away_goals_against' 75 | ] 76 | df_dropped = df.drop(columns=columns_to_drop) 77 | return df_dropped 78 | 79 | def sort_and_reset_index(df): 80 | """ 81 | Sort the dataframe based on the Premier League table standings rules 82 | and reset the 'position' column to reflect the new ranking. 83 | 84 | 85 | """ 86 | # Sort by points, then goal difference, then goals for 87 | df_sorted = df.sort_values(by=['points', 'goal_difference', 'goals_for'], ascending=[False, False, False]) 88 | 89 | # Reset the index to reflect the new ranking 90 | df_sorted = df_sorted.reset_index(drop=True) 91 | 92 | # Update the 'position' column to match the new index 93 | df_sorted['position'] = df_sorted.index + 1 94 | 95 | return df_sorted 96 | 97 | def transform_data(df): 98 | """ 99 | Apply all the transformation intents on the dataframe. 100 | """ 101 | df_renamed = rename_fields(df) 102 | df_points_calculated = calculate_points(df_renamed) 103 | df_points_deducted = deduct_points_from_everton(df_points_calculated) 104 | 105 | # # Drop the columns related to home and away metrics to clean up the dataframe 106 | # df_cleaned = drop_home_away_columns(df_points_deducted) 107 | 108 | # Create the final dataframe with desired columns only 109 | df_cleaned = df_points_deducted[['position', 'team_name', 'games_played', 'wins', 'draws', 'losses', 'goals_for', 'goals_against', 'goal_difference', 'points', 'match_date']] 110 | 111 | # Sort the dataframe by points, goal_difference, and goals_for to apply the league standings rules 112 | df_final = df_cleaned.sort_values(by=['points', 'goal_difference', 'goals_for'], ascending=[False, False, False]) 113 | 114 | # Reset the position column to reflect the new ranking after sorting 115 | df_final.reset_index(drop=True, inplace=True) 116 | df_final['position'] = df_final.index + 1 117 | 118 | return df_final 119 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/web_checker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/src/web_checker/__init__.py -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/src/web_checker/robots_txt_checker.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import urllib.request 3 | 4 | def fetch_robots_txt(url): 5 | """Fetches the robots.txt file from the specified URL.""" 6 | robots_url = urllib.parse.urljoin(url, '/robots.txt') 7 | request = urllib.request.Request(robots_url, headers={'User-Agent': 'Mozilla/5.0'}) 8 | try: 9 | response = urllib.request.urlopen(request) 10 | content = response.read().decode(response.headers.get_content_charset() or 'utf-8') 11 | print(f"✅ Successfully fetched robots.txt from {url} - 1/3 🟠") 12 | return content 13 | except Exception as e: 14 | print(f"❌ Failed to fetch robots.txt from {url}: {e}") 15 | return None 16 | 17 | def parse_robots_txt(robots_txt): 18 | """Parses the robots.txt content and returns sitemaps + directives.""" 19 | sitemaps = [] 20 | directives = [] 21 | for line in robots_txt.splitlines(): 22 | if line.startswith('Sitemap:'): 23 | sitemaps.append(line.split(':', 1)[1].strip()) 24 | elif line and not line.startswith('#'): 25 | parts = line.split(':', 1) 26 | if len(parts) == 2: 27 | directives.append(parts) 28 | print("✅ Parsed robots.txt content successfully - 2/3 🟡") 29 | return sitemaps, directives 30 | 31 | def convert_robots_to_dataframe(directives): 32 | """Converts robots.txt directives to a pandas DataFrame.""" 33 | df = pd.DataFrame(directives, columns=['Directive', 'Parameter']) 34 | print("✅ Converted robots.txt directives to dataframe - 3/3 🟢") 35 | return df 36 | 37 | 38 | 39 | # Fetch and parse robots.txt content 40 | robots_content = fetch_robots_txt("https://twtd.co.uk/") 41 | 42 | if robots_content: 43 | sitemaps, directives = parse_robots_txt(robots_content) 44 | df_directives = convert_robots_to_dataframe(directives) 45 | 46 | # Check if sitemaps were found and display them 47 | if sitemaps: 48 | print("\nSitemaps found in robots.txt:") 49 | for sitemap in sitemaps: 50 | print(f"- {sitemap}") 51 | else: 52 | print("\nNo sitemaps found in robots.txt.") 53 | 54 | # Check if directives were found and display the first 10 55 | if not df_directives.empty: 56 | print("\nDirectives from robots.txt:") 57 | print(df_directives.head(10)) 58 | else: 59 | print("\nNo directives found in robots.txt.") 60 | 61 | 62 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/tests/__init__.py -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/tests/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/tests/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/tests/data_contracts/extraction_data_contract.yml: -------------------------------------------------------------------------------- 1 | dataset: scraped_fb_data 2 | 3 | columns: 4 | - name: pos 5 | data_type: integer 6 | unique: true 7 | - name: team 8 | data_type: varchar 9 | not_null: true 10 | - name: p 11 | data_type: integer 12 | not_null: true 13 | - name: w1 14 | data_type: integer 15 | not_null: true 16 | - name: d1 17 | data_type: integer 18 | not_null: true 19 | - name: l1 20 | data_type: integer 21 | not_null: true 22 | - name: gf1 23 | data_type: integer 24 | not_null: true 25 | - name: ga1 26 | data_type: integer 27 | not_null: true 28 | - name: w2 29 | data_type: integer 30 | not_null: true 31 | - name: d2 32 | data_type: integer 33 | not_null: true 34 | - name: l2 35 | data_type: integer 36 | not_null: true 37 | - name: gf2 38 | data_type: integer 39 | not_null: true 40 | - name: ga2 41 | data_type: integer 42 | not_null: true 43 | - name: gd 44 | data_type: integer 45 | not_null: true 46 | - name: pts 47 | data_type: integer 48 | not_null: true 49 | - name: date 50 | data_type: date 51 | not_null: true 52 | 53 | checks: 54 | - row_count = 20 -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/tests/data_contracts/transformation_data_contract.yml: -------------------------------------------------------------------------------- 1 | dataset: transformed_fb_data 2 | 3 | columns: 4 | - name: position 5 | data_type: integer 6 | unique: true 7 | - name: team_name 8 | data_type: varchar 9 | not_null: true 10 | - name: games_played 11 | data_type: integer 12 | not_null: true 13 | - name: wins 14 | data_type: integer 15 | not_null: true 16 | - name: draws 17 | data_type: integer 18 | not_null: true 19 | - name: losses 20 | data_type: integer 21 | not_null: true 22 | - name: goals_for 23 | data_type: integer 24 | not_null: true 25 | - name: goals_against 26 | data_type: integer 27 | not_null: true 28 | - name: goal_difference 29 | data_type: integer 30 | not_null: true 31 | - name: points 32 | data_type: integer 33 | not_null: true 34 | valid_min: 0 35 | - name: match_date 36 | data_type: date 37 | not_null: true 38 | 39 | checks: 40 | - row_count = 20 # The table must contain 20 rows 41 | - min(games_played) >= 0 # Games played must be non-negative 42 | - max(goal_difference) <= 100 # Replace 100 with your maximum goal difference value 43 | - missing_count(team_name) = 0 # Ensure no missing team names 44 | - failed rows: 45 | name: No negative points permitted 46 | fail query: | 47 | SELECT team_name, points 48 | FROM transformed_fb_data 49 | WHERE points < 0 50 | - failed rows: 51 | name: Check Everton's points post-PSR penalty 52 | fail query: | 53 | WITH PrePointsDeduction AS ( 54 | SELECT SUM(points) as pre_penalty_points 55 | FROM transformed_fb_data 56 | WHERE team_name = 'Everton' AND match_date < '2023-11-01' 57 | ), PostPointsDeduction AS ( 58 | SELECT SUM(points) as post_penalty_points 59 | FROM transformed_fb_data 60 | WHERE team_name = 'Everton' AND match_date >= '2023-11-01' 61 | ) 62 | SELECT 63 | (SELECT pre_penalty_points FROM PrePointsDeduction) as pre_penalty_points, 64 | (SELECT post_penalty_points FROM PostPointsDeduction) as post_penalty_points, 65 | (SELECT pre_penalty_points FROM PrePointsDeduction) - (SELECT post_penalty_points FROM PostPointsDeduction) as point_difference 66 | WHERE (SELECT pre_penalty_points FROM PrePointsDeduction) - (SELECT post_penalty_points FROM PostPointsDeduction) < 10 -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/tests/data_quality_checks/__pycache__/scan_extraction_data_contract.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/tests/data_quality_checks/__pycache__/scan_extraction_data_contract.cpython-310.pyc -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/tests/data_quality_checks/__pycache__/scan_transformation_data_contract.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/tests/data_quality_checks/__pycache__/scan_transformation_data_contract.cpython-310.pyc -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/tests/data_quality_checks/scan_extraction_data_contract.py: -------------------------------------------------------------------------------- 1 | from soda.contracts.data_contract_translator import DataContractTranslator 2 | from soda.scan import Scan 3 | import logging 4 | import os 5 | 6 | def run_dq_checks_for_extraction_stage(): 7 | # Correctly set the path to the project root directory 8 | project_root_directory = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | # Construct the full file paths for the YAML files 11 | extraction_data_contract_path = os.path.join(project_root_directory, 'tests', 'data_contracts', 'extraction_data_contract.yml') 12 | extraction_config_yaml_path = os.path.join(project_root_directory, 'config', 'extraction_config.yml') 13 | 14 | # Read the data contract file as a Python string 15 | with open(extraction_data_contract_path) as f: 16 | data_contract_yaml_str: str = f.read() 17 | 18 | # Translate the data contract standards into SodaCL 19 | data_contract_parser = DataContractTranslator() 20 | sodacl_yaml_str = data_contract_parser.translate_data_contract_yaml_str(data_contract_yaml_str) 21 | 22 | # Log or save the SodaCL checks file to help with debugging 23 | logging.debug(sodacl_yaml_str) 24 | 25 | # Execute the translated SodaCL checks in a scan 26 | scan = Scan() 27 | scan.set_data_source_name("scraped_fb_data") 28 | scan.add_configuration_yaml_file(file_path=extraction_config_yaml_path) 29 | scan.add_sodacl_yaml_str(sodacl_yaml_str) 30 | scan.execute() 31 | scan.assert_no_checks_fail() 32 | 33 | if __name__ == "__main__": 34 | run_dq_checks_for_extraction_stage() 35 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/tests/data_quality_checks/scan_transformation_data_contract.py: -------------------------------------------------------------------------------- 1 | from soda.contracts.data_contract_translator import DataContractTranslator 2 | from soda.scan import Scan 3 | import logging 4 | import os 5 | 6 | def run_dq_checks_for_transformation_stage(): 7 | # Correctly set the path to the project root directory 8 | project_root_directory = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | # Construct the full file paths for the YAML files 11 | transformation_config_yaml_path = os.path.join(project_root_directory, 'config', 'transformation_config.yml') 12 | transformation_data_contract_path = os.path.join(project_root_directory, 'tests', 'data_contracts', 'transformation_data_contract.yml') 13 | 14 | # Read the data contract file as a Python string 15 | with open(transformation_data_contract_path) as f: 16 | data_contract_yaml_str: str = f.read() 17 | 18 | # Translate the data contract standards into SodaCL 19 | data_contract_parser = DataContractTranslator() 20 | sodacl_yaml_str = data_contract_parser.translate_data_contract_yaml_str(data_contract_yaml_str) 21 | 22 | # Log or save the SodaCL checks file to help with debugging 23 | logging.debug(sodacl_yaml_str) 24 | 25 | # Execute the translated SodaCL checks in a scan 26 | scan = Scan() 27 | scan.set_data_source_name("transformed_fb_data") 28 | scan.add_configuration_yaml_file(file_path=transformation_config_yaml_path) 29 | scan.add_sodacl_yaml_str(sodacl_yaml_str) 30 | scan.execute() 31 | scan.assert_no_checks_fail() 32 | 33 | if __name__ == "__main__": 34 | run_dq_checks_for_transformation_stage() 35 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/utils/__init__.py -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/utils/__pycache__/aws_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/utils/__pycache__/aws_utils.cpython-310.pyc -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/utils/__pycache__/db_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/articles/web_scraping_with_data_contracts/utils/__pycache__/db_utils.cpython-310.pyc -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/utils/aws_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import boto3 3 | from dotenv import load_dotenv 4 | from boto3.exceptions import Boto3Error 5 | 6 | # Load environment variables from .env file 7 | load_dotenv() 8 | 9 | 10 | def connect_to_aws_s3(): 11 | print("Connecting to AWS S3...") 12 | try: 13 | s3_client = boto3.client( 14 | 's3', 15 | aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), 16 | aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), 17 | region_name=os.getenv("REGION_NAME") 18 | ) 19 | print("Connected to AWS S3 successfully.") 20 | return s3_client 21 | except Boto3Error as e: 22 | raise Exception(f"❌[ERROR - AWS S3 CONNECTION]: {e}") 23 | 24 | 25 | 26 | def create_bucket_if_not_exists(s3_client, bucket_name, region): 27 | print(f"Checking if the bucket {bucket_name} exists...") 28 | try: 29 | if bucket_name not in [bucket['Name'] for bucket in s3_client.list_buckets()['Buckets']]: 30 | s3_client.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': region}) 31 | print(f"Bucket {bucket_name} created successfully.") 32 | else: 33 | print(f"Bucket {bucket_name} already exists.") 34 | return bucket_name 35 | except Boto3Error as e: 36 | raise Exception(f"❌[ERROR - BUCKET CREATION]: {e}") 37 | 38 | 39 | 40 | def upload_file_to_s3(s3_client, local_filename, bucket_name, s3_folder=None): 41 | print(f"Uploading {local_filename} to the bucket {bucket_name}...") 42 | try: 43 | csv_folder = "data/" 44 | full_csv_file_path = f"{csv_folder}{local_filename}" 45 | s3_path = f"{s3_folder}/{local_filename}" if s3_folder else local_filename 46 | s3_client.upload_file(full_csv_file_path, bucket_name, s3_path) 47 | print(f"File {local_filename} uploaded to {bucket_name}/{s3_path} successfully.") 48 | except Exception as e: 49 | raise Exception(f"❌[ERROR - FILE UPLOAD]: {e}") 50 | 51 | 52 | 53 | def validate_file_in_s3(s3_client, bucket_name, s3_path): 54 | print(f"Validating the presence of {s3_path} in the bucket {bucket_name}...") 55 | try: 56 | s3_client.head_object(Bucket=bucket_name, Key=s3_path) 57 | print(f"Validation successful: {s3_path} exists in {bucket_name}.") 58 | return True 59 | except Boto3Error as e: 60 | raise Exception(f"❌[ERROR - VALIDATE FILE]: {e}") 61 | 62 | -------------------------------------------------------------------------------- /python/articles/web_scraping_with_data_contracts/utils/db_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import psycopg2 3 | import pandas as pd 4 | from dotenv import load_dotenv 5 | 6 | # Load environment variables from .env file 7 | load_dotenv() 8 | 9 | 10 | def connect_to_db(): 11 | HOST=os.getenv('HOST') 12 | PORT=os.getenv('PORT') 13 | DATABASE=os.getenv('DATABASE') 14 | POSTGRES_USERNAME=os.getenv('POSTGRES_USERNAME') 15 | POSTGRES_PASSWORD=os.getenv('POSTGRES_PASSWORD') 16 | # Use environment variables directly 17 | try: 18 | db_connection = psycopg2.connect( 19 | host=HOST, 20 | port=PORT, 21 | dbname=DATABASE, 22 | user=POSTGRES_USERNAME, 23 | password=POSTGRES_PASSWORD, 24 | ) 25 | db_connection.set_session(autocommit=True) 26 | print("✅ Connection to the database established successfully.") 27 | return db_connection 28 | except Exception as e: 29 | raise Exception(f"❌[ERROR - DB CONNECTION]: Error connecting to the database: {e}") 30 | 31 | 32 | 33 | 34 | 35 | 36 | def create_extracted_schema_and_table(db_connection, schema_name, table_name): 37 | cursor = db_connection.cursor() 38 | cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name}") 39 | db_connection.commit() 40 | 41 | create_table_query = f""" 42 | CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} ( 43 | "pos" INTEGER, 44 | "team" TEXT NOT NULL, 45 | "p" INTEGER, 46 | "w1" INTEGER, 47 | "d1" INTEGER, 48 | "l1" INTEGER, 49 | "gf1" INTEGER, 50 | "ga1" INTEGER, 51 | "w2" INTEGER, 52 | "d2" INTEGER, 53 | "l2" INTEGER, 54 | "gf2" INTEGER, 55 | "ga2" INTEGER, 56 | "gd" INTEGER, 57 | "pts" INTEGER, 58 | "date" DATE 59 | ) 60 | """ 61 | cursor.execute(create_table_query) 62 | db_connection.commit() 63 | cursor.close() 64 | 65 | def insert_extracted_data_to_table(db_connection, schema_name, table_name, dataframe): 66 | cursor = db_connection.cursor() 67 | for index, row in dataframe.iterrows(): 68 | data = tuple(row) 69 | placeholders = ",".join(["%s"] * len(row)) 70 | insert_query = f""" 71 | INSERT INTO {schema_name}.{table_name} ( 72 | "pos", "team", "p", "w1", "d1", "l1", "gf1", "ga1", "w2", "d2", "l2", "gf2", "ga2", "gd", "pts", "date" 73 | ) 74 | VALUES ({placeholders}) 75 | """ 76 | try: 77 | cursor.execute(insert_query, data) 78 | db_connection.commit() 79 | except Exception as e: 80 | print(f"❌Failed to insert data: {data}❌. Error: {e}") 81 | cursor.close() 82 | 83 | 84 | 85 | 86 | 87 | # Transformation 88 | 89 | def create_transformed_schema_and_table(db_connection, schema_name, table_name): 90 | cursor = db_connection.cursor() 91 | cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name}") 92 | db_connection.commit() 93 | 94 | create_table_query = f""" 95 | CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} ( 96 | "position" INTEGER, 97 | "team_name" VARCHAR, 98 | "games_played" INTEGER, 99 | "wins" INTEGER, 100 | "draws" INTEGER, 101 | "losses" INTEGER, 102 | "goals_for" INTEGER, 103 | "goals_against" INTEGER, 104 | "goal_difference" INTEGER, 105 | "points" INTEGER, 106 | "match_date" DATE 107 | ) 108 | """ 109 | cursor.execute(create_table_query) 110 | db_connection.commit() 111 | cursor.close() 112 | 113 | # Function to fetch data from the extraction layer 114 | def fetch_extraction_data(db_connection, schema_name, table_name): 115 | query = f"SELECT * FROM {schema_name}.{table_name};" 116 | return pd.read_sql(query, db_connection) 117 | 118 | 119 | def insert_transformed_data_to_table(db_connection, schema_name, table_name, dataframe): 120 | cursor = db_connection.cursor() 121 | 122 | # Check the dataframe columns before insertion 123 | # print(f"Dataframe columns: {dataframe.columns.tolist()}") 124 | 125 | # Building column names for the INSERT INTO statement 126 | columns = ', '.join([f'"{c}"' for c in dataframe.columns]) 127 | 128 | # Building placeholders for the VALUES part of the INSERT INTO statement 129 | placeholders = ', '.join(['%s' for _ in dataframe.columns]) 130 | 131 | # Construct the INSERT INTO statement 132 | insert_query = f"INSERT INTO {schema_name}.{table_name} ({columns}) VALUES ({placeholders})" 133 | 134 | # Execute the INSERT INTO statement for each row in the dataframe 135 | for index, row in dataframe.iterrows(): 136 | # Print the row to be inserted for debugging purposes 137 | # print(f"Row data: {tuple(row)}") 138 | try: 139 | cursor.execute(insert_query, tuple(row)) 140 | db_connection.commit() 141 | except Exception as e: 142 | print(f"❌Failed to insert transformed data: {tuple(row)}❌. Error: {e}") 143 | cursor.close() 144 | 145 | 146 | 147 | # Loading 148 | def fetch_transformed_data(db_connection): 149 | print("Fetching transformed data from the database...") 150 | try: 151 | query = "SELECT * FROM staging.transformed_fb_data;" 152 | df = pd.read_sql(query, db_connection) 153 | print("Data fetched successfully.") 154 | return df 155 | except Exception as e: 156 | raise Exception(f"❌[ERROR - FETCH DATA]: {e}") 157 | 158 | 159 | def convert_dataframe_to_csv(df, filename): 160 | target_destination = 'data/' 161 | full_file_path = f"{target_destination}{filename}" 162 | print(f"Converting dataframe to CSV ('{filename}')...") 163 | try: 164 | df.to_csv(full_file_path, index=False) 165 | print(f"CSV file '{filename}' created and saved to target destination successfully.") 166 | print(f"Full file path: '{full_file_path}'") 167 | except Exception as e: 168 | raise Exception(f"❌[ERROR - CSV CREATION]: {e}") -------------------------------------------------------------------------------- /python/long_vids/README.md: -------------------------------------------------------------------------------- 1 | "# README for Long Vids" 2 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/.env: -------------------------------------------------------------------------------- 1 | AIRFLOW_UID=1000 2 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/dags/__pycache__/dummy_dag_1.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/airflow/demo_1/dags/__pycache__/dummy_dag_1.cpython-38.pyc -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/dags/__pycache__/dummy_dag_2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/airflow/demo_1/dags/__pycache__/dummy_dag_2.cpython-38.pyc -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/dags/__pycache__/quick_dag.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/airflow/demo_1/dags/__pycache__/quick_dag.cpython-38.pyc -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/dags/__pycache__/test_dag_1.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/airflow/demo_1/dags/__pycache__/test_dag_1.cpython-38.pyc -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/dags/__pycache__/test_dag_2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/airflow/demo_1/dags/__pycache__/test_dag_2.cpython-38.pyc -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/dags/__pycache__/test_dag_3.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/airflow/demo_1/dags/__pycache__/test_dag_3.cpython-38.pyc -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/dags/airflow.cfg: -------------------------------------------------------------------------------- 1 | [core] 2 | # The folder where your airflow pipelines live, most likely a 3 | # subfolder in a code repository 4 | dags_folder = /usr/local/airflow/dags 5 | load_examples = False 6 | 7 | [webserver] 8 | # The base url of your website as airflow cannot guess what domain or 9 | # cname you are using. This is used in automated emails that 10 | # airflow sends to point links to the right web server 11 | base_url = http://localhost:8081 12 | 13 | # The ip specified when starting the web server 14 | web_server_host = 0.0.0.0 15 | 16 | # The port on which to run the web server 17 | web_server_port = 8081 18 | 19 | [scheduler] 20 | # Task instances listen for external kill signal (when you clear 21 | # tasks) this is the frequency at which they should listen (in seconds). 22 | job_heartbeat_sec = 5 23 | 24 | # The scheduler constantly tries to trigger new tasks (look at the 25 | # scheduler section in the docs for more information). This defines 26 | # how often the scheduler should run (in seconds). 27 | scheduler_heartbeat_sec = 5 28 | 29 | [cli] 30 | # In case you want to do a reset of the database, this is the command 31 | # to use. 32 | api_client = airflow 33 | api_args = resetdb 34 | 35 | [celery] 36 | # This should contain a comma-separated list of queues to use 37 | # for tasks 38 | celery_queues = celery 39 | 40 | # The app name that will be used by celery 41 | celery_app_name = airflow.executors.celery_executor 42 | 43 | # The concurrency that will be used when starting workers with the 44 | # "airflow worker" command. This defines the number of task instances that 45 | # a worker will take, so size up your workers based on the resources on 46 | # your worker box and the nature of your tasks 47 | worker_concurrency = 16 48 | 49 | [sqlalchemy] 50 | # The SqlAlchemy connection string to the metadata database. 51 | # SqlAlchemy supports many different database engine, more information 52 | # their website 53 | sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres:5432/airflow 54 | 55 | # The SqlAlchemy pool size is the maximum number of database connections 56 | # in the pool. 57 | sql_alchemy_pool_size = 5 58 | 59 | # The SqlAlchemy pool recycle is the number of seconds a connection 60 | # can be idle in the pool before it is invalidated. This config does 61 | # not apply to sqlite. 62 | sql_alchemy_pool_recycle = 1800 63 | 64 | [atlas] 65 | sasl_enabled = True 66 | 67 | [mesos] 68 | # Mesos master address which MesosExecutor will connect to. 69 | master = localhost:5050 70 | 71 | # The framework name which Airflow scheduler will register itself as on mesos 72 | framework_name = Airflow 73 | 74 | # Number of cpu cores required for running one task instance using 75 | # 'airflow run ...' command 76 | task_cpu = 1 77 | 78 | # Memory in MB required for running one task instance using 79 | # 'airflow run ...' command 80 | task_memory = 256 81 | 82 | # Enable framework checkpointing for mesos 83 | checkpoint = False 84 | 85 | # Failover timeout in seconds. 86 | failover_timeout = 604800 87 | 88 | # Enable framework authentication for mesos 89 | authenticate = False 90 | 91 | # Mesos credentials, if authenticate is True 92 | # mesos_authenticate_to_registry = False 93 | # default_principal = admin 94 | # default_secret = admin -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/dags/demo_dag.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import dag, task 2 | from datetime import datetime, timedelta 3 | 4 | # Add default arguments for demo 5 | default_args = { 6 | 'owner': 'airflow', 7 | 'retries': 1, 8 | 'retry_delay': timedelta(minutes=1), # Retry a failed task after 1 minute 9 | } 10 | 11 | @dag(default_args=default_args, 12 | schedule_interval='*/2 * * * *', 13 | start_date=datetime(2024, 9, 28), 14 | # start_date=datetime.now() - timedelta(minutes=1), 15 | tags=['message_display']) 16 | 17 | def message_display_dag(): 18 | 19 | 20 | @task() 21 | def display_init_message(): 22 | print("--- Let's initiate this workflow...✅---") 23 | 24 | 25 | # Add task for displaying the 1st message 26 | @task() 27 | def display_first_message(): 28 | print("This is the 1st message (1/3)🔴") 29 | 30 | 31 | # Add task for displaying the 2nd message 32 | @task() 33 | def display_second_message(): 34 | print("This is the 2nd message (2/3)🟡") 35 | 36 | 37 | # Add task for displaying the 3rd message 38 | @task() 39 | def display_third_message(): 40 | print("This is the 3rd message (3/3)🟢") 41 | 42 | 43 | @task() 44 | def display_last_message(): 45 | print("--- SUCCESS: Ending this workflow🏁🚨 ---") 46 | 47 | # Define task dependencies to ensure the messages are displayed in order 48 | starting_message = display_init_message() 49 | first_message = display_first_message() 50 | second_message = display_second_message() 51 | third_message = display_third_message() 52 | ending_message = display_last_message() 53 | 54 | starting_message >> first_message >> second_message >> third_message >> ending_message # Set task execution order 55 | 56 | message_display_pipeline = message_display_dag() 57 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | ################################## PYTHON ################################## 3 | 4 | LinkedIn - Stephen David-Williams 5 | TikTok - www.tiktok.com/@sdw.online 6 | Twitter/X - www.twitter.com/sdw_online 7 | Medium - www.medium.com/@sdw-online 8 | 9 | """ 10 | 11 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | x-airflow-common: 3 | &airflow-common 4 | image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.8.1} 5 | environment: 6 | &airflow-common-env 7 | AIRFLOW__CORE__EXECUTOR: CeleryExecutor 8 | AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow 9 | AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow 10 | AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 11 | AIRFLOW__CORE__FERNET_KEY: '' 12 | AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' 13 | AIRFLOW__CORE__LOAD_EXAMPLES: 'false' 14 | AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' 15 | AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' 16 | _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} 17 | volumes: 18 | - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags 19 | - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs 20 | - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config 21 | - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins 22 | user: "${AIRFLOW_UID:-50000}:0" 23 | depends_on: 24 | &airflow-common-depends-on 25 | redis: 26 | condition: service_healthy 27 | postgres: 28 | condition: service_healthy 29 | 30 | services: 31 | postgres: 32 | image: postgres:13 33 | environment: 34 | POSTGRES_USER: airflow 35 | POSTGRES_PASSWORD: airflow 36 | POSTGRES_DB: airflow 37 | volumes: 38 | - postgres-db-volume:/var/lib/postgresql/data 39 | healthcheck: 40 | test: ["CMD", "pg_isready", "-U", "airflow"] 41 | interval: 10s 42 | retries: 5 43 | start_period: 5s 44 | restart: always 45 | 46 | redis: 47 | image: redis:latest 48 | expose: 49 | - 6379 50 | healthcheck: 51 | test: ["CMD", "redis-cli", "ping"] 52 | interval: 10s 53 | timeout: 30s 54 | retries: 50 55 | start_period: 30s 56 | restart: always 57 | 58 | airflow-webserver: 59 | <<: *airflow-common 60 | command: webserver 61 | ports: 62 | - "8084:8080" 63 | healthcheck: 64 | test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] 65 | interval: 30s 66 | timeout: 10s 67 | retries: 5 68 | start_period: 30s 69 | restart: always 70 | depends_on: 71 | <<: *airflow-common-depends-on 72 | airflow-init: 73 | condition: service_completed_successfully 74 | 75 | airflow-scheduler: 76 | <<: *airflow-common 77 | command: scheduler 78 | healthcheck: 79 | test: ["CMD", "curl", "--fail", "http://localhost:8974/health"] 80 | interval: 30s 81 | timeout: 10s 82 | retries: 5 83 | start_period: 30s 84 | restart: always 85 | depends_on: 86 | <<: *airflow-common-depends-on 87 | airflow-init: 88 | condition: service_completed_successfully 89 | 90 | airflow-worker: 91 | <<: *airflow-common 92 | command: celery worker 93 | healthcheck: 94 | test: 95 | - "CMD-SHELL" 96 | - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}" || celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"' 97 | interval: 30s 98 | timeout: 10s 99 | retries: 5 100 | start_period: 30s 101 | environment: 102 | <<: *airflow-common-env 103 | DUMB_INIT_SETSID: "0" 104 | restart: always 105 | depends_on: 106 | <<: *airflow-common-depends-on 107 | airflow-init: 108 | condition: service_completed_successfully 109 | 110 | airflow-triggerer: 111 | <<: *airflow-common 112 | command: triggerer 113 | healthcheck: 114 | test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] 115 | interval: 30s 116 | timeout: 10s 117 | retries: 5 118 | start_period: 30s 119 | restart: always 120 | depends_on: 121 | <<: *airflow-common-depends-on 122 | airflow-init: 123 | condition: service_completed_successfully 124 | 125 | airflow-init: 126 | <<: *airflow-common 127 | entrypoint: /bin/bash 128 | command: 129 | - -c 130 | - | 131 | if [[ -z "${AIRFLOW_UID}" ]]; then 132 | echo 133 | echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" 134 | echo "If you are on Linux, you SHOULD follow the instructions below to set " 135 | echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." 136 | echo "For other operating systems you can get rid of the warning with manually created .env file:" 137 | echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user" 138 | echo 139 | fi 140 | one_meg=1048576 141 | mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) 142 | cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) 143 | disk_available=$$(df / | tail -1 | awk '{print $$4}') 144 | warning_resources="false" 145 | if (( mem_available < 4000 )) ; then 146 | echo 147 | echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" 148 | echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" 149 | echo 150 | warning_resources="true" 151 | fi 152 | if (( cpus_available < 2 )); then 153 | echo 154 | echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" 155 | echo "At least 2 CPUs recommended. You have $${cpus_available}" 156 | echo 157 | warning_resources="true" 158 | fi 159 | if (( disk_available < one_meg * 10 )); then 160 | echo 161 | echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" 162 | echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" 163 | echo 164 | warning_resources="true" 165 | fi 166 | if [[ $${warning_resources} == "true" ]]; then 167 | echo 168 | echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" 169 | echo "Please follow the instructions to increase amount of resources available:" 170 | echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin" 171 | echo 172 | fi 173 | mkdir -p /sources/logs /sources/dags /sources/plugins 174 | chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins} 175 | exec /entrypoint airflow version 176 | 177 | environment: 178 | <<: *airflow-common-env 179 | _AIRFLOW_DB_MIGRATE: 'true' 180 | _AIRFLOW_WWW_USER_CREATE: 'true' 181 | _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} 182 | _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} 183 | _PIP_ADDITIONAL_REQUIREMENTS: '' 184 | user: "0:0" 185 | volumes: 186 | - ${AIRFLOW_PROJ_DIR:-.}:/sources 187 | 188 | airflow-cli: 189 | <<: *airflow-common 190 | profiles: 191 | - debug 192 | environment: 193 | <<: *airflow-common-env 194 | CONNECTION_CHECK_MAX_COUNT: "0" 195 | command: 196 | - bash 197 | - -c 198 | - airflow 199 | 200 | flower: 201 | <<: *airflow-common 202 | command: celery flower 203 | profiles: 204 | - flower 205 | ports: 206 | - "5555:5555" 207 | healthcheck: 208 | test: ["CMD", "curl", "--fail", "http://localhost:5555/"] 209 | interval: 30s 210 | timeout: 10s 211 | retries: 5 212 | start_period: 30s 213 | restart: always 214 | depends_on: 215 | <<: *airflow-common-depends-on 216 | airflow-init: 217 | condition: service_completed_successfully 218 | 219 | volumes: 220 | postgres-db-volume: 221 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=manual__2024-02-01T005613.905830+0000/task_id=display_first_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T00:56:33.712+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T00:56:33.746+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T00:56:33.747+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T00:56:33.796+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 00:56:13.905830+00:00 5 | [2024-02-01T00:56:33.837+0000] {standard_task_runner.py:60} INFO - Started process 6135 to run task 6 | [2024-02-01T00:56:33.843+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_first_message', 'manual__2024-02-01T00:56:13.905830+00:00', '--job-id', '10', '--raw', '--subdir', 'DAGS_FOLDER/quick_dag.py', '--cfg-path', '/tmp/tmprd96i6he'] 7 | [2024-02-01T00:56:33.852+0000] {standard_task_runner.py:88} INFO - Job 10: Subtask display_first_message 8 | [2024-02-01T00:56:33.985+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T00:56:34.170+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_first_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T00:56:13.905830+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2024-02-01T00:56:13.905830+00:00' 10 | [2024-02-01T00:56:34.174+0000] {logging_mixin.py:188} INFO - This is the 1st message 🔴 11 | [2024-02-01T00:56:34.175+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T00:56:34.194+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_first_message, execution_date=20240201T005613, start_date=20240201T005633, end_date=20240201T005634 13 | [2024-02-01T00:56:34.257+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T00:56:34.305+0000] {taskinstance.py:3280} INFO - 1 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=manual__2024-02-01T005613.905830+0000/task_id=display_second_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T00:56:35.558+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T00:56:35.587+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T00:56:35.588+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T00:56:35.617+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 00:56:13.905830+00:00 5 | [2024-02-01T00:56:35.634+0000] {standard_task_runner.py:60} INFO - Started process 6137 to run task 6 | [2024-02-01T00:56:35.638+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_second_message', 'manual__2024-02-01T00:56:13.905830+00:00', '--job-id', '11', '--raw', '--subdir', 'DAGS_FOLDER/quick_dag.py', '--cfg-path', '/tmp/tmp7uuk556s'] 7 | [2024-02-01T00:56:35.639+0000] {standard_task_runner.py:88} INFO - Job 11: Subtask display_second_message 8 | [2024-02-01T00:56:35.725+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T00:56:35.841+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_second_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T00:56:13.905830+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2024-02-01T00:56:13.905830+00:00' 10 | [2024-02-01T00:56:35.844+0000] {logging_mixin.py:188} INFO - This is the 2nd message 🟡 11 | [2024-02-01T00:56:35.845+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T00:56:35.872+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_second_message, execution_date=20240201T005613, start_date=20240201T005635, end_date=20240201T005635 13 | [2024-02-01T00:56:35.936+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T00:56:35.965+0000] {taskinstance.py:3280} INFO - 1 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=manual__2024-02-01T005613.905830+0000/task_id=display_third_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T00:56:36.825+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T00:56:36.850+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T00:56:36.851+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T00:56:36.875+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 00:56:13.905830+00:00 5 | [2024-02-01T00:56:36.895+0000] {standard_task_runner.py:60} INFO - Started process 6139 to run task 6 | [2024-02-01T00:56:36.907+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_third_message', 'manual__2024-02-01T00:56:13.905830+00:00', '--job-id', '12', '--raw', '--subdir', 'DAGS_FOLDER/quick_dag.py', '--cfg-path', '/tmp/tmpkies3fmu'] 7 | [2024-02-01T00:56:36.910+0000] {standard_task_runner.py:88} INFO - Job 12: Subtask display_third_message 8 | [2024-02-01T00:56:37.020+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T00:56:37.203+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_third_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T00:56:13.905830+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2024-02-01T00:56:13.905830+00:00' 10 | [2024-02-01T00:56:37.205+0000] {logging_mixin.py:188} INFO - This is the 3rd message 🟢 11 | [2024-02-01T00:56:37.206+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T00:56:37.229+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_third_message, execution_date=20240201T005613, start_date=20240201T005636, end_date=20240201T005637 13 | [2024-02-01T00:56:37.283+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T00:56:37.311+0000] {taskinstance.py:3280} INFO - 0 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=manual__2024-02-01T125229.953764+0000/task_id=display_first_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T12:52:48.654+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T12:52:48.729+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T12:52:48.733+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T12:52:48.825+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 12:52:29.953764+00:00 5 | [2024-02-01T12:52:48.845+0000] {standard_task_runner.py:60} INFO - Started process 16582 to run task 6 | [2024-02-01T12:52:48.897+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_first_message', 'manual__2024-02-01T12:52:29.953764+00:00', '--job-id', '13', '--raw', '--subdir', 'DAGS_FOLDER/quick_dag.py', '--cfg-path', '/tmp/tmpjo7aa3qr'] 7 | [2024-02-01T12:52:48.935+0000] {standard_task_runner.py:88} INFO - Job 13: Subtask display_first_message 8 | [2024-02-01T12:52:49.459+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T12:52:50.038+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_first_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T12:52:29.953764+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2024-02-01T12:52:29.953764+00:00' 10 | [2024-02-01T12:52:50.045+0000] {logging_mixin.py:188} INFO - This is the 1st message 🔴 11 | [2024-02-01T12:52:50.046+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T12:52:50.078+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_first_message, execution_date=20240201T125229, start_date=20240201T125248, end_date=20240201T125250 13 | [2024-02-01T12:52:50.156+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T12:52:50.211+0000] {taskinstance.py:3280} INFO - 1 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=manual__2024-02-01T125229.953764+0000/task_id=display_second_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T12:52:52.622+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T12:52:52.656+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T12:52:52.658+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T12:52:52.690+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 12:52:29.953764+00:00 5 | [2024-02-01T12:52:52.712+0000] {standard_task_runner.py:60} INFO - Started process 16584 to run task 6 | [2024-02-01T12:52:52.725+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_second_message', 'manual__2024-02-01T12:52:29.953764+00:00', '--job-id', '14', '--raw', '--subdir', 'DAGS_FOLDER/quick_dag.py', '--cfg-path', '/tmp/tmpgapj8lxw'] 7 | [2024-02-01T12:52:52.727+0000] {standard_task_runner.py:88} INFO - Job 14: Subtask display_second_message 8 | [2024-02-01T12:52:52.901+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T12:52:53.167+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_second_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T12:52:29.953764+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2024-02-01T12:52:29.953764+00:00' 10 | [2024-02-01T12:52:53.172+0000] {logging_mixin.py:188} INFO - This is the 2nd message 🟡 11 | [2024-02-01T12:52:53.174+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T12:52:53.195+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_second_message, execution_date=20240201T125229, start_date=20240201T125252, end_date=20240201T125253 13 | [2024-02-01T12:52:53.240+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T12:52:53.299+0000] {taskinstance.py:3280} INFO - 1 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=manual__2024-02-01T125229.953764+0000/task_id=display_third_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T12:52:54.513+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T12:52:54.531+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T12:52:54.532+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T12:52:54.550+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 12:52:29.953764+00:00 5 | [2024-02-01T12:52:54.560+0000] {standard_task_runner.py:60} INFO - Started process 16586 to run task 6 | [2024-02-01T12:52:54.564+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_third_message', 'manual__2024-02-01T12:52:29.953764+00:00', '--job-id', '15', '--raw', '--subdir', 'DAGS_FOLDER/quick_dag.py', '--cfg-path', '/tmp/tmpxui3rqk1'] 7 | [2024-02-01T12:52:54.566+0000] {standard_task_runner.py:88} INFO - Job 15: Subtask display_third_message 8 | [2024-02-01T12:52:54.646+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T12:52:54.774+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_third_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T12:52:29.953764+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2024-02-01T12:52:29.953764+00:00' 10 | [2024-02-01T12:52:54.776+0000] {logging_mixin.py:188} INFO - This is the 3rd message 🟢 11 | [2024-02-01T12:52:54.777+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T12:52:54.792+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_third_message, execution_date=20240201T125229, start_date=20240201T125254, end_date=20240201T125254 13 | [2024-02-01T12:52:54.821+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T12:52:54.846+0000] {taskinstance.py:3280} INFO - 0 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=manual__2024-02-01T125548.893644+0000/task_id=display_first_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T12:55:59.440+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T12:55:59.544+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T12:55:59.551+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T12:55:59.603+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 12:55:48.893644+00:00 5 | [2024-02-01T12:55:59.648+0000] {standard_task_runner.py:60} INFO - Started process 16629 to run task 6 | [2024-02-01T12:55:59.673+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_first_message', 'manual__2024-02-01T12:55:48.893644+00:00', '--job-id', '16', '--raw', '--subdir', 'DAGS_FOLDER/quick_dag.py', '--cfg-path', '/tmp/tmpino12ct9'] 7 | [2024-02-01T12:55:59.685+0000] {standard_task_runner.py:88} INFO - Job 16: Subtask display_first_message 8 | [2024-02-01T12:56:00.826+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T12:56:05.303+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_first_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T12:55:48.893644+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2024-02-01T12:55:48.893644+00:00' 10 | [2024-02-01T12:56:06.022+0000] {logging_mixin.py:188} INFO - This is the 1st message (1/3)🔴 11 | [2024-02-01T12:56:06.037+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T12:56:06.742+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_first_message, execution_date=20240201T125548, start_date=20240201T125559, end_date=20240201T125606 13 | [2024-02-01T12:56:07.878+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T12:56:08.698+0000] {taskinstance.py:3280} INFO - 0 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=manual__2024-02-01T125548.893644+0000/task_id=display_second_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T12:56:24.089+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T12:56:24.194+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T12:56:24.195+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T12:56:24.263+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 12:55:48.893644+00:00 5 | [2024-02-01T12:56:24.307+0000] {standard_task_runner.py:60} INFO - Started process 16631 to run task 6 | [2024-02-01T12:56:24.361+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_second_message', 'manual__2024-02-01T12:55:48.893644+00:00', '--job-id', '17', '--raw', '--subdir', 'DAGS_FOLDER/quick_dag.py', '--cfg-path', '/tmp/tmphh3m3cjw'] 7 | [2024-02-01T12:56:24.391+0000] {standard_task_runner.py:88} INFO - Job 17: Subtask display_second_message 8 | [2024-02-01T12:56:24.872+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T12:56:25.191+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_second_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T12:55:48.893644+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2024-02-01T12:55:48.893644+00:00' 10 | [2024-02-01T12:56:25.200+0000] {logging_mixin.py:188} INFO - This is the 2nd message (2/3)🟡 11 | [2024-02-01T12:56:25.201+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T12:56:25.242+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_second_message, execution_date=20240201T125548, start_date=20240201T125624, end_date=20240201T125625 13 | [2024-02-01T12:56:25.349+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T12:56:25.418+0000] {taskinstance.py:3280} INFO - 1 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=manual__2024-02-01T125548.893644+0000/task_id=display_third_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T12:56:29.521+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T12:56:29.558+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T12:56:29.560+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T12:56:29.586+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 12:55:48.893644+00:00 5 | [2024-02-01T12:56:29.599+0000] {standard_task_runner.py:60} INFO - Started process 16633 to run task 6 | [2024-02-01T12:56:29.607+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_third_message', 'manual__2024-02-01T12:55:48.893644+00:00', '--job-id', '18', '--raw', '--subdir', 'DAGS_FOLDER/quick_dag.py', '--cfg-path', '/tmp/tmp10mxxbtr'] 7 | [2024-02-01T12:56:29.613+0000] {standard_task_runner.py:88} INFO - Job 18: Subtask display_third_message 8 | [2024-02-01T12:56:29.874+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T12:56:30.123+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_third_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T12:55:48.893644+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2024-02-01T12:55:48.893644+00:00' 10 | [2024-02-01T12:56:30.129+0000] {logging_mixin.py:188} INFO - This is the 3rd message (3/3)🟢 11 | [2024-02-01T12:56:30.131+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T12:56:30.148+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_third_message, execution_date=20240201T125548, start_date=20240201T125629, end_date=20240201T125630 13 | [2024-02-01T12:56:30.205+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T12:56:30.234+0000] {taskinstance.py:3280} INFO - 0 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=scheduled__2024-02-01T134800+0000/task_id=display_first_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T13:50:07.642+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T13:50:07.676+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T13:50:07.678+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T13:50:07.714+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 13:48:00+00:00 5 | [2024-02-01T13:50:07.742+0000] {standard_task_runner.py:60} INFO - Started process 17342 to run task 6 | [2024-02-01T13:50:07.760+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_first_message', 'scheduled__2024-02-01T13:48:00+00:00', '--job-id', '19', '--raw', '--subdir', 'DAGS_FOLDER/test_dag_1.py', '--cfg-path', '/tmp/tmpbjkbnl0v'] 7 | [2024-02-01T13:50:07.769+0000] {standard_task_runner.py:88} INFO - Job 19: Subtask display_first_message 8 | [2024-02-01T13:50:07.919+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T13:50:08.386+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_first_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T13:48:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2024-02-01T13:48:00+00:00' 10 | [2024-02-01T13:50:08.390+0000] {logging_mixin.py:188} INFO - This is the 1st message (1/3)🔴 11 | [2024-02-01T13:50:08.391+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T13:50:08.410+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_first_message, execution_date=20240201T134800, start_date=20240201T135007, end_date=20240201T135008 13 | [2024-02-01T13:50:08.481+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T13:50:08.563+0000] {taskinstance.py:3280} INFO - 1 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=scheduled__2024-02-01T134800+0000/task_id=display_second_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T13:50:11.997+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T13:50:12.016+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T13:50:12.022+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T13:50:12.061+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 13:48:00+00:00 5 | [2024-02-01T13:50:12.115+0000] {standard_task_runner.py:60} INFO - Started process 17344 to run task 6 | [2024-02-01T13:50:12.124+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_second_message', 'scheduled__2024-02-01T13:48:00+00:00', '--job-id', '20', '--raw', '--subdir', 'DAGS_FOLDER/test_dag_1.py', '--cfg-path', '/tmp/tmp98fej8nt'] 7 | [2024-02-01T13:50:12.138+0000] {standard_task_runner.py:88} INFO - Job 20: Subtask display_second_message 8 | [2024-02-01T13:50:12.331+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T13:50:12.894+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_second_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T13:48:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2024-02-01T13:48:00+00:00' 10 | [2024-02-01T13:50:12.903+0000] {logging_mixin.py:188} INFO - This is the 2nd message (2/3)🟡 11 | [2024-02-01T13:50:12.905+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T13:50:12.946+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_second_message, execution_date=20240201T134800, start_date=20240201T135012, end_date=20240201T135012 13 | [2024-02-01T13:50:13.577+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T13:50:14.142+0000] {taskinstance.py:3280} INFO - 0 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/dag_id=message_display_dag/run_id=scheduled__2024-02-01T134800+0000/task_id=display_third_message/attempt=1.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T13:50:24.465+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=non-requeueable deps ti= 2 | [2024-02-01T13:50:24.508+0000] {taskinstance.py:1956} INFO - Dependencies all met for dep_context=requeueable deps ti= 3 | [2024-02-01T13:50:24.509+0000] {taskinstance.py:2170} INFO - Starting attempt 1 of 2 4 | [2024-02-01T13:50:24.560+0000] {taskinstance.py:2191} INFO - Executing on 2024-02-01 13:48:00+00:00 5 | [2024-02-01T13:50:24.579+0000] {standard_task_runner.py:60} INFO - Started process 17354 to run task 6 | [2024-02-01T13:50:24.670+0000] {standard_task_runner.py:87} INFO - Running: ['***', 'tasks', 'run', 'message_display_dag', 'display_third_message', 'scheduled__2024-02-01T13:48:00+00:00', '--job-id', '21', '--raw', '--subdir', 'DAGS_FOLDER/test_dag_1.py', '--cfg-path', '/tmp/tmp7yk7qxzg'] 7 | [2024-02-01T13:50:24.681+0000] {standard_task_runner.py:88} INFO - Job 21: Subtask display_third_message 8 | [2024-02-01T13:50:25.082+0000] {task_command.py:423} INFO - Running on host 66086c79257b 9 | [2024-02-01T13:50:25.595+0000] {taskinstance.py:2480} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='***' AIRFLOW_CTX_DAG_ID='message_display_dag' AIRFLOW_CTX_TASK_ID='display_third_message' AIRFLOW_CTX_EXECUTION_DATE='2024-02-01T13:48:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='scheduled__2024-02-01T13:48:00+00:00' 10 | [2024-02-01T13:50:25.599+0000] {logging_mixin.py:188} INFO - This is the 3rd message (3/3)🟢 11 | [2024-02-01T13:50:25.606+0000] {python.py:201} INFO - Done. Returned value was: None 12 | [2024-02-01T13:50:25.687+0000] {taskinstance.py:1138} INFO - Marking task as SUCCESS. dag_id=message_display_dag, task_id=display_third_message, execution_date=20240201T134800, start_date=20240201T135024, end_date=20240201T135025 13 | [2024-02-01T13:50:25.844+0000] {local_task_job_runner.py:234} INFO - Task exited with return code 0 14 | [2024-02-01T13:50:25.925+0000] {taskinstance.py:3280} INFO - 0 downstream tasks scheduled from follow-on schedule check 15 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/scheduler/2024-02-01/dummy_dag_1.py.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T13:46:46.005+0000] {processor.py:161} INFO - Started process (PID=16745) to work on /opt/airflow/dags/dummy_dag_1.py 2 | [2024-02-01T13:46:46.024+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_1.py for tasks to queue 3 | [2024-02-01T13:46:46.052+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:46.038+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_1.py 4 | [2024-02-01T13:46:46.442+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/dummy_dag_1.py 5 | [2024-02-01T13:46:47.752+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.751+0000] {override.py:1769} INFO - Created Permission View: can read on DAG:test_dag_1_dag 6 | [2024-02-01T13:46:47.812+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.810+0000] {override.py:1769} INFO - Created Permission View: can edit on DAG:test_dag_1_dag 7 | [2024-02-01T13:46:47.845+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.845+0000] {override.py:1769} INFO - Created Permission View: can delete on DAG:test_dag_1_dag 8 | [2024-02-01T13:46:47.848+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.848+0000] {dag.py:3036} INFO - Sync 1 DAGs 9 | [2024-02-01T13:46:47.974+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.972+0000] {dag.py:3058} INFO - Creating ORM DAG for test_dag_1_dag 10 | [2024-02-01T13:46:48.048+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:48.047+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:46:00+00:00, run_after=2024-02-01 13:48:00+00:00 11 | [2024-02-01T13:46:48.156+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_1.py took 2.203 seconds 12 | [2024-02-01T13:47:18.513+0000] {processor.py:161} INFO - Started process (PID=16754) to work on /opt/airflow/dags/dummy_dag_1.py 13 | [2024-02-01T13:47:18.523+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_1.py for tasks to queue 14 | [2024-02-01T13:47:18.526+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:47:18.526+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_1.py 15 | [2024-02-01T13:47:18.605+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/dummy_dag_1.py 16 | [2024-02-01T13:47:19.640+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:47:19.640+0000] {dag.py:3036} INFO - Sync 1 DAGs 17 | [2024-02-01T13:47:19.823+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:47:19.822+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:48:00+00:00, run_after=2024-02-01 13:50:00+00:00 18 | [2024-02-01T13:47:20.008+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_1.py took 1.506 seconds 19 | [2024-02-01T13:47:56.639+0000] {processor.py:161} INFO - Started process (PID=16764) to work on /opt/airflow/dags/dummy_dag_1.py 20 | [2024-02-01T13:47:56.726+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_1.py for tasks to queue 21 | [2024-02-01T13:47:57.079+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:47:57.066+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_1.py 22 | [2024-02-01T13:48:00.313+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/dummy_dag_1.py 23 | [2024-02-01T13:48:16.076+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:16.048+0000] {dag.py:3036} INFO - Sync 1 DAGs 24 | [2024-02-01T13:48:17.228+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:17.227+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:48:00+00:00, run_after=2024-02-01 13:50:00+00:00 25 | [2024-02-01T13:48:17.707+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_1.py took 21.506 seconds 26 | [2024-02-01T13:48:48.702+0000] {processor.py:161} INFO - Started process (PID=16780) to work on /opt/airflow/dags/dummy_dag_1.py 27 | [2024-02-01T13:48:48.706+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_1.py for tasks to queue 28 | [2024-02-01T13:48:48.734+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:48.731+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_1.py 29 | [2024-02-01T13:48:48.956+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/dummy_dag_1.py 30 | [2024-02-01T13:48:49.491+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:49.491+0000] {dag.py:3036} INFO - Sync 1 DAGs 31 | [2024-02-01T13:48:49.572+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:49.572+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:48:00+00:00, run_after=2024-02-01 13:50:00+00:00 32 | [2024-02-01T13:48:49.691+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_1.py took 1.020 seconds 33 | [2024-02-01T13:49:20.472+0000] {processor.py:161} INFO - Started process (PID=16788) to work on /opt/airflow/dags/dummy_dag_1.py 34 | [2024-02-01T13:49:20.486+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_1.py for tasks to queue 35 | [2024-02-01T13:49:20.496+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:49:20.494+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_1.py 36 | [2024-02-01T13:49:20.778+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/dummy_dag_1.py 37 | [2024-02-01T13:49:21.516+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:49:21.515+0000] {dag.py:3036} INFO - Sync 1 DAGs 38 | [2024-02-01T13:49:21.573+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:49:21.573+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:50:00+00:00, run_after=2024-02-01 13:52:00+00:00 39 | [2024-02-01T13:49:21.627+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_1.py took 1.221 seconds 40 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/scheduler/2024-02-01/dummy_dag_2.py.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T13:46:45.960+0000] {processor.py:161} INFO - Started process (PID=16744) to work on /opt/airflow/dags/dummy_dag_2.py 2 | [2024-02-01T13:46:45.972+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_2.py for tasks to queue 3 | [2024-02-01T13:46:46.024+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:46.021+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_2.py 4 | [2024-02-01T13:46:46.467+0000] {processor.py:840} INFO - DAG(s) 'test_dag_2_dag' retrieved from /opt/airflow/dags/dummy_dag_2.py 5 | [2024-02-01T13:46:47.716+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.705+0000] {override.py:1769} INFO - Created Permission View: can read on DAG:test_dag_2_dag 6 | [2024-02-01T13:46:47.810+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.809+0000] {override.py:1769} INFO - Created Permission View: can edit on DAG:test_dag_2_dag 7 | [2024-02-01T13:46:47.836+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.836+0000] {override.py:1769} INFO - Created Permission View: can delete on DAG:test_dag_2_dag 8 | [2024-02-01T13:46:47.838+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.838+0000] {dag.py:3036} INFO - Sync 1 DAGs 9 | [2024-02-01T13:46:47.917+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:47.916+0000] {dag.py:3058} INFO - Creating ORM DAG for test_dag_2_dag 10 | [2024-02-01T13:46:48.042+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:46:48.038+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_2_dag to 2024-02-01 13:46:00+00:00, run_after=2024-02-01 13:48:00+00:00 11 | [2024-02-01T13:46:48.159+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_2.py took 2.559 seconds 12 | [2024-02-01T13:47:18.478+0000] {processor.py:161} INFO - Started process (PID=16753) to work on /opt/airflow/dags/dummy_dag_2.py 13 | [2024-02-01T13:47:18.482+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_2.py for tasks to queue 14 | [2024-02-01T13:47:18.489+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:47:18.488+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_2.py 15 | [2024-02-01T13:47:18.544+0000] {processor.py:840} INFO - DAG(s) 'test_dag_2_dag' retrieved from /opt/airflow/dags/dummy_dag_2.py 16 | [2024-02-01T13:47:19.574+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:47:19.574+0000] {dag.py:3036} INFO - Sync 1 DAGs 17 | [2024-02-01T13:47:19.760+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:47:19.742+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_2_dag to 2024-02-01 13:48:00+00:00, run_after=2024-02-01 13:50:00+00:00 18 | [2024-02-01T13:47:19.963+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_2.py took 1.492 seconds 19 | [2024-02-01T13:47:56.639+0000] {processor.py:161} INFO - Started process (PID=16763) to work on /opt/airflow/dags/dummy_dag_2.py 20 | [2024-02-01T13:47:56.726+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_2.py for tasks to queue 21 | [2024-02-01T13:47:57.098+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:47:57.051+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_2.py 22 | [2024-02-01T13:48:00.303+0000] {processor.py:840} INFO - DAG(s) 'test_dag_2_dag' retrieved from /opt/airflow/dags/dummy_dag_2.py 23 | [2024-02-01T13:48:16.049+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:16.027+0000] {dag.py:3036} INFO - Sync 1 DAGs 24 | [2024-02-01T13:48:17.230+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:17.229+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_2_dag to 2024-02-01 13:48:00+00:00, run_after=2024-02-01 13:50:00+00:00 25 | [2024-02-01T13:48:17.713+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_2.py took 22.145 seconds 26 | [2024-02-01T13:48:48.702+0000] {processor.py:161} INFO - Started process (PID=16779) to work on /opt/airflow/dags/dummy_dag_2.py 27 | [2024-02-01T13:48:48.706+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_2.py for tasks to queue 28 | [2024-02-01T13:48:48.734+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:48.731+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_2.py 29 | [2024-02-01T13:48:48.956+0000] {processor.py:840} INFO - DAG(s) 'test_dag_2_dag' retrieved from /opt/airflow/dags/dummy_dag_2.py 30 | [2024-02-01T13:48:49.490+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:49.489+0000] {dag.py:3036} INFO - Sync 1 DAGs 31 | [2024-02-01T13:48:49.572+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:48:49.572+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_2_dag to 2024-02-01 13:48:00+00:00, run_after=2024-02-01 13:50:00+00:00 32 | [2024-02-01T13:48:49.690+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_2.py took 1.028 seconds 33 | [2024-02-01T13:49:20.141+0000] {processor.py:161} INFO - Started process (PID=16787) to work on /opt/airflow/dags/dummy_dag_2.py 34 | [2024-02-01T13:49:20.160+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/dummy_dag_2.py for tasks to queue 35 | [2024-02-01T13:49:20.169+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:49:20.166+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/dummy_dag_2.py 36 | [2024-02-01T13:49:20.251+0000] {processor.py:840} INFO - DAG(s) 'test_dag_2_dag' retrieved from /opt/airflow/dags/dummy_dag_2.py 37 | [2024-02-01T13:49:21.057+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:49:21.056+0000] {dag.py:3036} INFO - Sync 1 DAGs 38 | [2024-02-01T13:49:21.250+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:49:21.238+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_2_dag to 2024-02-01 13:50:00+00:00, run_after=2024-02-01 13:52:00+00:00 39 | [2024-02-01T13:49:21.391+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/dummy_dag_2.py took 1.379 seconds 40 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/scheduler/2024-02-01/test_dag_1.py.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T13:51:48.193+0000] {processor.py:161} INFO - Started process (PID=16814) to work on /opt/airflow/dags/test_dag_1.py 2 | [2024-02-01T13:51:48.196+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 3 | [2024-02-01T13:51:48.198+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:51:48.198+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 4 | [2024-02-01T13:51:48.272+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 5 | [2024-02-01T13:51:48.644+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:51:48.644+0000] {dag.py:3036} INFO - Sync 1 DAGs 6 | [2024-02-01T13:51:48.709+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:51:48.708+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:52:00+00:00, run_after=2024-02-01 13:54:00+00:00 7 | [2024-02-01T13:51:48.777+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.591 seconds 8 | [2024-02-01T13:52:21.075+0000] {processor.py:161} INFO - Started process (PID=16829) to work on /opt/airflow/dags/test_dag_1.py 9 | [2024-02-01T13:52:21.078+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 10 | [2024-02-01T13:52:21.081+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:21.081+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 11 | [2024-02-01T13:52:21.129+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 12 | [2024-02-01T13:52:21.317+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:21.317+0000] {dag.py:3036} INFO - Sync 1 DAGs 13 | [2024-02-01T13:52:21.374+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:21.374+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:52:00+00:00, run_after=2024-02-01 13:54:00+00:00 14 | [2024-02-01T13:52:21.412+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.342 seconds 15 | [2024-02-01T13:52:52.304+0000] {processor.py:161} INFO - Started process (PID=16838) to work on /opt/airflow/dags/test_dag_1.py 16 | [2024-02-01T13:52:52.307+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 17 | [2024-02-01T13:52:52.310+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:52.309+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 18 | [2024-02-01T13:52:52.342+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 19 | [2024-02-01T13:52:52.542+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:52.542+0000] {dag.py:3036} INFO - Sync 1 DAGs 20 | [2024-02-01T13:52:52.584+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:52.584+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:52:00+00:00, run_after=2024-02-01 13:54:00+00:00 21 | [2024-02-01T13:52:52.619+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.318 seconds 22 | [2024-02-01T13:53:26.697+0000] {processor.py:161} INFO - Started process (PID=16846) to work on /opt/airflow/dags/test_dag_1.py 23 | [2024-02-01T13:53:26.702+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 24 | [2024-02-01T13:53:26.716+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:26.715+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 25 | [2024-02-01T13:53:26.854+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 26 | [2024-02-01T13:53:28.282+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:28.281+0000] {dag.py:3036} INFO - Sync 1 DAGs 27 | [2024-02-01T13:53:28.388+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:28.387+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:54:00+00:00, run_after=2024-02-01 13:56:00+00:00 28 | [2024-02-01T13:53:28.487+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 1.803 seconds 29 | [2024-02-01T13:53:59.429+0000] {processor.py:161} INFO - Started process (PID=16855) to work on /opt/airflow/dags/test_dag_1.py 30 | [2024-02-01T13:53:59.432+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 31 | [2024-02-01T13:53:59.435+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:59.434+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 32 | [2024-02-01T13:53:59.503+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 33 | [2024-02-01T13:53:59.732+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:59.731+0000] {dag.py:3036} INFO - Sync 1 DAGs 34 | [2024-02-01T13:53:59.821+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:59.820+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:54:00+00:00, run_after=2024-02-01 13:56:00+00:00 35 | [2024-02-01T13:53:59.893+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.490 seconds 36 | [2024-02-01T13:54:31.007+0000] {processor.py:161} INFO - Started process (PID=16863) to work on /opt/airflow/dags/test_dag_1.py 37 | [2024-02-01T13:54:31.022+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 38 | [2024-02-01T13:54:31.027+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:54:31.025+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 39 | [2024-02-01T13:54:31.104+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 40 | [2024-02-01T13:54:31.317+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:54:31.317+0000] {dag.py:3036} INFO - Sync 1 DAGs 41 | [2024-02-01T13:54:31.350+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:54:31.349+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:54:00+00:00, run_after=2024-02-01 13:56:00+00:00 42 | [2024-02-01T13:54:31.398+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.443 seconds 43 | [2024-02-01T13:55:01.859+0000] {processor.py:161} INFO - Started process (PID=16871) to work on /opt/airflow/dags/test_dag_1.py 44 | [2024-02-01T13:55:01.864+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 45 | [2024-02-01T13:55:01.868+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:01.867+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 46 | [2024-02-01T13:55:01.933+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 47 | [2024-02-01T13:55:02.163+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:02.163+0000] {dag.py:3036} INFO - Sync 1 DAGs 48 | [2024-02-01T13:55:02.280+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:02.280+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:56:00+00:00, run_after=2024-02-01 13:58:00+00:00 49 | [2024-02-01T13:55:02.380+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.527 seconds 50 | [2024-02-01T13:55:33.243+0000] {processor.py:161} INFO - Started process (PID=16880) to work on /opt/airflow/dags/test_dag_1.py 51 | [2024-02-01T13:55:33.245+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 52 | [2024-02-01T13:55:33.248+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:33.247+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 53 | [2024-02-01T13:55:33.291+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 54 | [2024-02-01T13:55:33.469+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:33.468+0000] {dag.py:3036} INFO - Sync 1 DAGs 55 | [2024-02-01T13:55:33.523+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:33.522+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:56:00+00:00, run_after=2024-02-01 13:58:00+00:00 56 | [2024-02-01T13:55:33.567+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.328 seconds 57 | [2024-02-01T13:56:17.306+0000] {processor.py:161} INFO - Started process (PID=16889) to work on /opt/airflow/dags/test_dag_1.py 58 | [2024-02-01T13:56:17.314+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 59 | [2024-02-01T13:56:17.323+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:17.322+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 60 | [2024-02-01T13:56:17.438+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 61 | [2024-02-01T13:56:18.071+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:18.070+0000] {dag.py:3036} INFO - Sync 1 DAGs 62 | [2024-02-01T13:56:18.240+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:18.239+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:56:00+00:00, run_after=2024-02-01 13:58:00+00:00 63 | [2024-02-01T13:56:18.355+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 1.071 seconds 64 | [2024-02-01T13:56:48.784+0000] {processor.py:161} INFO - Started process (PID=16898) to work on /opt/airflow/dags/test_dag_1.py 65 | [2024-02-01T13:56:48.787+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 66 | [2024-02-01T13:56:48.791+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:48.790+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 67 | [2024-02-01T13:56:48.871+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 68 | [2024-02-01T13:56:49.317+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:49.316+0000] {dag.py:3036} INFO - Sync 1 DAGs 69 | [2024-02-01T13:56:49.411+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:49.410+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:56:00+00:00, run_after=2024-02-01 13:58:00+00:00 70 | [2024-02-01T13:56:49.514+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.741 seconds 71 | [2024-02-01T13:57:46.074+0000] {processor.py:161} INFO - Started process (PID=16914) to work on /opt/airflow/dags/test_dag_1.py 72 | [2024-02-01T13:57:46.096+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 73 | [2024-02-01T13:57:46.123+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:57:46.108+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 74 | [2024-02-01T13:57:46.978+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 75 | [2024-02-01T13:57:49.304+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:57:49.303+0000] {dag.py:3036} INFO - Sync 1 DAGs 76 | [2024-02-01T13:57:49.722+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:57:49.680+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:58:00+00:00, run_after=2024-02-01 14:00:00+00:00 77 | [2024-02-01T13:57:50.084+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 4.096 seconds 78 | [2024-02-01T13:58:34.945+0000] {processor.py:161} INFO - Started process (PID=16924) to work on /opt/airflow/dags/test_dag_1.py 79 | [2024-02-01T13:58:35.106+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 80 | [2024-02-01T13:58:35.117+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:58:35.115+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 81 | [2024-02-01T13:58:35.719+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 82 | [2024-02-01T13:58:36.941+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:58:36.940+0000] {dag.py:3036} INFO - Sync 1 DAGs 83 | [2024-02-01T13:58:37.070+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:58:37.069+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 13:58:00+00:00, run_after=2024-02-01 14:00:00+00:00 84 | [2024-02-01T13:58:37.182+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 2.315 seconds 85 | [2024-02-01T13:59:07.306+0000] {processor.py:161} INFO - Started process (PID=16933) to work on /opt/airflow/dags/test_dag_1.py 86 | [2024-02-01T13:59:07.308+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 87 | [2024-02-01T13:59:07.311+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:07.311+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 88 | [2024-02-01T13:59:07.364+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 89 | [2024-02-01T13:59:07.637+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:07.636+0000] {dag.py:3036} INFO - Sync 1 DAGs 90 | [2024-02-01T13:59:07.705+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:07.704+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:00:00+00:00, run_after=2024-02-01 14:02:00+00:00 91 | [2024-02-01T13:59:07.755+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.462 seconds 92 | [2024-02-01T13:59:38.155+0000] {processor.py:161} INFO - Started process (PID=16942) to work on /opt/airflow/dags/test_dag_1.py 93 | [2024-02-01T13:59:38.164+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 94 | [2024-02-01T13:59:38.173+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:38.172+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 95 | [2024-02-01T13:59:38.235+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 96 | [2024-02-01T13:59:38.440+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:38.439+0000] {dag.py:3036} INFO - Sync 1 DAGs 97 | [2024-02-01T13:59:38.492+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:38.492+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:00:00+00:00, run_after=2024-02-01 14:02:00+00:00 98 | [2024-02-01T13:59:38.528+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.389 seconds 99 | [2024-02-01T14:00:09.145+0000] {processor.py:161} INFO - Started process (PID=16951) to work on /opt/airflow/dags/test_dag_1.py 100 | [2024-02-01T14:00:09.148+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 101 | [2024-02-01T14:00:09.149+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:09.149+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 102 | [2024-02-01T14:00:09.185+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 103 | [2024-02-01T14:00:09.420+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:09.419+0000] {dag.py:3036} INFO - Sync 1 DAGs 104 | [2024-02-01T14:00:09.494+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:09.493+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:00:00+00:00, run_after=2024-02-01 14:02:00+00:00 105 | [2024-02-01T14:00:09.561+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.422 seconds 106 | [2024-02-01T14:00:40.000+0000] {processor.py:161} INFO - Started process (PID=16960) to work on /opt/airflow/dags/test_dag_1.py 107 | [2024-02-01T14:00:40.003+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 108 | [2024-02-01T14:00:40.007+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:40.006+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 109 | [2024-02-01T14:00:40.064+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 110 | [2024-02-01T14:00:40.234+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:40.234+0000] {dag.py:3036} INFO - Sync 1 DAGs 111 | [2024-02-01T14:00:40.274+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:40.274+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:00:00+00:00, run_after=2024-02-01 14:02:00+00:00 112 | [2024-02-01T14:00:40.321+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.324 seconds 113 | [2024-02-01T14:01:28.066+0000] {processor.py:161} INFO - Started process (PID=16975) to work on /opt/airflow/dags/test_dag_1.py 114 | [2024-02-01T14:01:28.088+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 115 | [2024-02-01T14:01:28.107+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:28.100+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 116 | [2024-02-01T14:01:28.405+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 117 | [2024-02-01T14:01:29.233+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:29.232+0000] {dag.py:3036} INFO - Sync 1 DAGs 118 | [2024-02-01T14:01:29.322+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:29.322+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:02:00+00:00, run_after=2024-02-01 14:04:00+00:00 119 | [2024-02-01T14:01:29.558+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 1.591 seconds 120 | [2024-02-01T14:01:59.743+0000] {processor.py:161} INFO - Started process (PID=16985) to work on /opt/airflow/dags/test_dag_1.py 121 | [2024-02-01T14:01:59.746+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 122 | [2024-02-01T14:01:59.749+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:59.748+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 123 | [2024-02-01T14:01:59.805+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 124 | [2024-02-01T14:02:00.054+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:02:00.053+0000] {dag.py:3036} INFO - Sync 1 DAGs 125 | [2024-02-01T14:02:00.101+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:02:00.100+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:02:00+00:00, run_after=2024-02-01 14:04:00+00:00 126 | [2024-02-01T14:02:00.153+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.420 seconds 127 | [2024-02-01T14:02:30.716+0000] {processor.py:161} INFO - Started process (PID=16995) to work on /opt/airflow/dags/test_dag_1.py 128 | [2024-02-01T14:02:30.719+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 129 | [2024-02-01T14:02:30.722+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:02:30.721+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 130 | [2024-02-01T14:02:30.777+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 131 | [2024-02-01T14:02:31.034+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:02:31.033+0000] {dag.py:3036} INFO - Sync 1 DAGs 132 | [2024-02-01T14:02:31.080+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:02:31.080+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:02:00+00:00, run_after=2024-02-01 14:04:00+00:00 133 | [2024-02-01T14:02:31.137+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.427 seconds 134 | [2024-02-01T14:03:01.218+0000] {processor.py:161} INFO - Started process (PID=17004) to work on /opt/airflow/dags/test_dag_1.py 135 | [2024-02-01T14:03:01.220+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 136 | [2024-02-01T14:03:01.222+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:01.222+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 137 | [2024-02-01T14:03:01.263+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 138 | [2024-02-01T14:03:01.457+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:01.456+0000] {dag.py:3036} INFO - Sync 1 DAGs 139 | [2024-02-01T14:03:01.551+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:01.551+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:04:00+00:00, run_after=2024-02-01 14:06:00+00:00 140 | [2024-02-01T14:03:01.595+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.381 seconds 141 | [2024-02-01T14:03:32.169+0000] {processor.py:161} INFO - Started process (PID=17013) to work on /opt/airflow/dags/test_dag_1.py 142 | [2024-02-01T14:03:32.171+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 143 | [2024-02-01T14:03:32.174+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:32.173+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 144 | [2024-02-01T14:03:32.218+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 145 | [2024-02-01T14:03:32.443+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:32.442+0000] {dag.py:3036} INFO - Sync 1 DAGs 146 | [2024-02-01T14:03:32.490+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:32.489+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:04:00+00:00, run_after=2024-02-01 14:06:00+00:00 147 | [2024-02-01T14:03:32.552+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.390 seconds 148 | [2024-02-01T14:04:03.426+0000] {processor.py:161} INFO - Started process (PID=17022) to work on /opt/airflow/dags/test_dag_1.py 149 | [2024-02-01T14:04:03.428+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 150 | [2024-02-01T14:04:03.433+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:03.432+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 151 | [2024-02-01T14:04:03.496+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 152 | [2024-02-01T14:04:03.796+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:03.795+0000] {dag.py:3036} INFO - Sync 1 DAGs 153 | [2024-02-01T14:04:03.844+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:03.843+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:04:00+00:00, run_after=2024-02-01 14:06:00+00:00 154 | [2024-02-01T14:04:03.898+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.479 seconds 155 | [2024-02-01T14:04:34.929+0000] {processor.py:161} INFO - Started process (PID=17031) to work on /opt/airflow/dags/test_dag_1.py 156 | [2024-02-01T14:04:34.931+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 157 | [2024-02-01T14:04:34.933+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:34.933+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 158 | [2024-02-01T14:04:34.992+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 159 | [2024-02-01T14:04:35.196+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:35.195+0000] {dag.py:3036} INFO - Sync 1 DAGs 160 | [2024-02-01T14:04:35.246+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:35.245+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:04:00+00:00, run_after=2024-02-01 14:06:00+00:00 161 | [2024-02-01T14:04:35.289+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.368 seconds 162 | [2024-02-01T14:05:05.802+0000] {processor.py:161} INFO - Started process (PID=17041) to work on /opt/airflow/dags/test_dag_1.py 163 | [2024-02-01T14:05:05.811+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 164 | [2024-02-01T14:05:05.820+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:05.815+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 165 | [2024-02-01T14:05:05.877+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 166 | [2024-02-01T14:05:06.118+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:06.117+0000] {dag.py:3036} INFO - Sync 1 DAGs 167 | [2024-02-01T14:05:06.175+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:06.174+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:06:00+00:00, run_after=2024-02-01 14:08:00+00:00 168 | [2024-02-01T14:05:06.239+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.450 seconds 169 | [2024-02-01T14:05:36.588+0000] {processor.py:161} INFO - Started process (PID=17050) to work on /opt/airflow/dags/test_dag_1.py 170 | [2024-02-01T14:05:36.590+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 171 | [2024-02-01T14:05:36.599+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:36.591+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 172 | [2024-02-01T14:05:36.673+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 173 | [2024-02-01T14:05:36.918+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:36.918+0000] {dag.py:3036} INFO - Sync 1 DAGs 174 | [2024-02-01T14:05:36.981+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:36.980+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:06:00+00:00, run_after=2024-02-01 14:08:00+00:00 175 | [2024-02-01T14:05:37.044+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.460 seconds 176 | [2024-02-01T14:06:11.004+0000] {processor.py:161} INFO - Started process (PID=17060) to work on /opt/airflow/dags/test_dag_1.py 177 | [2024-02-01T14:06:11.008+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 178 | [2024-02-01T14:06:11.011+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:06:11.010+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 179 | [2024-02-01T14:06:11.089+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 180 | [2024-02-01T14:06:12.164+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:06:12.162+0000] {dag.py:3036} INFO - Sync 1 DAGs 181 | [2024-02-01T14:06:12.291+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:06:12.290+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:06:00+00:00, run_after=2024-02-01 14:08:00+00:00 182 | [2024-02-01T14:06:12.453+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 1.459 seconds 183 | [2024-02-01T14:07:55.604+0000] {processor.py:161} INFO - Started process (PID=17076) to work on /opt/airflow/dags/test_dag_1.py 184 | [2024-02-01T14:07:55.685+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 185 | [2024-02-01T14:07:55.747+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:07:55.706+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 186 | [2024-02-01T14:07:59.019+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 187 | [2024-02-01T14:08:21.913+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:08:21.820+0000] {dag.py:3036} INFO - Sync 1 DAGs 188 | [2024-02-01T14:08:24.209+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:08:24.200+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:08:00+00:00, run_after=2024-02-01 14:10:00+00:00 189 | [2024-02-01T14:08:25.694+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 31.130 seconds 190 | [2024-02-01T14:09:05.624+0000] {processor.py:161} INFO - Started process (PID=17091) to work on /opt/airflow/dags/test_dag_1.py 191 | [2024-02-01T14:09:05.858+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 192 | [2024-02-01T14:09:06.460+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:09:06.203+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 193 | [2024-02-01T14:09:22.106+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 194 | [2024-02-01T14:11:07.959+0000] {processor.py:161} INFO - Started process (PID=17112) to work on /opt/airflow/dags/test_dag_1.py 195 | [2024-02-01T14:11:07.967+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 196 | [2024-02-01T14:11:07.977+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:11:07.969+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 197 | [2024-02-01T14:11:08.074+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 198 | [2024-02-01T14:11:08.821+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:11:08.820+0000] {dag.py:3036} INFO - Sync 1 DAGs 199 | [2024-02-01T14:11:09.002+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:11:09.001+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:12:00+00:00, run_after=2024-02-01 14:14:00+00:00 200 | [2024-02-01T14:11:09.236+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 1.314 seconds 201 | [2024-02-01T14:12:52.229+0000] {processor.py:161} INFO - Started process (PID=17127) to work on /opt/airflow/dags/test_dag_1.py 202 | [2024-02-01T14:12:52.395+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 203 | [2024-02-01T14:12:52.587+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:12:52.538+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 204 | [2024-02-01T14:12:58.183+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 205 | [2024-02-01T14:13:08.788+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:13:08.743+0000] {dag.py:3036} INFO - Sync 1 DAGs 206 | [2024-02-01T14:13:12.463+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:13:12.459+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:12:00+00:00, run_after=2024-02-01 14:14:00+00:00 207 | [2024-02-01T14:13:14.429+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 24.238 seconds 208 | [2024-02-01T14:46:06.275+0000] {processor.py:161} INFO - Started process (PID=36) to work on /opt/airflow/dags/test_dag_1.py 209 | [2024-02-01T14:46:06.300+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 210 | [2024-02-01T14:46:06.325+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:46:06.321+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 211 | [2024-02-01T14:46:06.494+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 212 | [2024-02-01T14:46:07.169+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:46:07.165+0000] {dag.py:3036} INFO - Sync 1 DAGs 213 | [2024-02-01T14:46:07.371+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:46:07.370+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:46:00+00:00, run_after=2024-02-01 14:48:00+00:00 214 | [2024-02-01T14:46:07.514+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 1.245 seconds 215 | [2024-02-01T14:48:38.849+0000] {processor.py:161} INFO - Started process (PID=36) to work on /opt/airflow/dags/test_dag_1.py 216 | [2024-02-01T14:48:38.855+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 217 | [2024-02-01T14:48:38.866+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:48:38.865+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 218 | [2024-02-01T14:48:38.891+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 219 | [2024-02-01T14:48:39.084+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:48:39.083+0000] {dag.py:3036} INFO - Sync 1 DAGs 220 | [2024-02-01T14:48:39.138+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:48:39.137+0000] {dag.py:3823} INFO - Setting next_dagrun for message_display_dag to 2024-02-01 14:48:00+00:00, run_after=2024-02-01 14:50:00+00:00 221 | [2024-02-01T14:48:39.182+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_1.py took 0.337 seconds 222 | [2024-02-01T14:49:10.651+0000] {processor.py:161} INFO - Started process (PID=44) to work on /opt/airflow/dags/test_dag_1.py 223 | [2024-02-01T14:49:10.654+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 224 | [2024-02-01T14:49:10.656+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:49:10.656+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 225 | [2024-02-01T14:49:10.683+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 226 | [2024-02-01T14:49:10.817+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:49:10.816+0000] {dag.py:3036} INFO - Sync 1 DAGs 227 | [2024-02-01T14:50:32.107+0000] {processor.py:161} INFO - Started process (PID=67) to work on /opt/airflow/dags/test_dag_1.py 228 | [2024-02-01T14:50:32.109+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_1.py for tasks to queue 229 | [2024-02-01T14:50:32.112+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:50:32.112+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_1.py 230 | [2024-02-01T14:50:32.142+0000] {processor.py:840} INFO - DAG(s) 'message_display_dag' retrieved from /opt/airflow/dags/test_dag_1.py 231 | [2024-02-01T14:50:32.263+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:50:32.262+0000] {dag.py:3036} INFO - Sync 1 DAGs 232 | -------------------------------------------------------------------------------- /python/short_vids/airflow/demo_1/logs/scheduler/2024-02-01/test_dag_3.py.log: -------------------------------------------------------------------------------- 1 | [2024-02-01T13:51:46.499+0000] {processor.py:161} INFO - Started process (PID=16813) to work on /opt/airflow/dags/test_dag_3.py 2 | [2024-02-01T13:51:46.506+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 3 | [2024-02-01T13:51:46.517+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:51:46.514+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 4 | [2024-02-01T13:51:46.978+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 5 | [2024-02-01T13:51:47.889+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:51:47.888+0000] {dag.py:3036} INFO - Sync 1 DAGs 6 | [2024-02-01T13:51:47.990+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:51:47.989+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:52:00+00:00, run_after=2024-02-01 13:54:00+00:00 7 | [2024-02-01T13:51:48.079+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.651 seconds 8 | [2024-02-01T13:52:18.370+0000] {processor.py:161} INFO - Started process (PID=16822) to work on /opt/airflow/dags/test_dag_3.py 9 | [2024-02-01T13:52:18.380+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 10 | [2024-02-01T13:52:18.390+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:18.383+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 11 | [2024-02-01T13:52:18.550+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 12 | [2024-02-01T13:52:20.844+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:20.844+0000] {dag.py:3036} INFO - Sync 1 DAGs 13 | [2024-02-01T13:52:20.939+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:20.938+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:52:00+00:00, run_after=2024-02-01 13:54:00+00:00 14 | [2024-02-01T13:52:20.996+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 2.649 seconds 15 | [2024-02-01T13:52:51.554+0000] {processor.py:161} INFO - Started process (PID=16837) to work on /opt/airflow/dags/test_dag_3.py 16 | [2024-02-01T13:52:51.567+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 17 | [2024-02-01T13:52:51.571+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:51.571+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 18 | [2024-02-01T13:52:51.670+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 19 | [2024-02-01T13:52:52.067+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:52.066+0000] {dag.py:3036} INFO - Sync 1 DAGs 20 | [2024-02-01T13:52:52.141+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:52:52.141+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:52:00+00:00, run_after=2024-02-01 13:54:00+00:00 21 | [2024-02-01T13:52:52.229+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.692 seconds 22 | [2024-02-01T13:53:23.230+0000] {processor.py:161} INFO - Started process (PID=16845) to work on /opt/airflow/dags/test_dag_3.py 23 | [2024-02-01T13:53:23.255+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 24 | [2024-02-01T13:53:23.265+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:23.261+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 25 | [2024-02-01T13:53:23.673+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 26 | [2024-02-01T13:53:25.074+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:25.073+0000] {dag.py:3036} INFO - Sync 1 DAGs 27 | [2024-02-01T13:53:25.686+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:25.684+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:54:00+00:00, run_after=2024-02-01 13:56:00+00:00 28 | [2024-02-01T13:53:26.007+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 2.866 seconds 29 | [2024-02-01T13:53:57.438+0000] {processor.py:161} INFO - Started process (PID=16854) to work on /opt/airflow/dags/test_dag_3.py 30 | [2024-02-01T13:53:57.447+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 31 | [2024-02-01T13:53:57.463+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:57.458+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 32 | [2024-02-01T13:53:57.948+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 33 | [2024-02-01T13:53:58.996+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:58.993+0000] {dag.py:3036} INFO - Sync 1 DAGs 34 | [2024-02-01T13:53:59.208+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:53:59.205+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:54:00+00:00, run_after=2024-02-01 13:56:00+00:00 35 | [2024-02-01T13:53:59.315+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.963 seconds 36 | [2024-02-01T13:54:29.965+0000] {processor.py:161} INFO - Started process (PID=16862) to work on /opt/airflow/dags/test_dag_3.py 37 | [2024-02-01T13:54:29.969+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 38 | [2024-02-01T13:54:29.976+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:54:29.973+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 39 | [2024-02-01T13:54:30.199+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 40 | [2024-02-01T13:54:30.633+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:54:30.633+0000] {dag.py:3036} INFO - Sync 1 DAGs 41 | [2024-02-01T13:54:30.694+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:54:30.693+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:54:00+00:00, run_after=2024-02-01 13:56:00+00:00 42 | [2024-02-01T13:54:30.762+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.855 seconds 43 | [2024-02-01T13:55:00.907+0000] {processor.py:161} INFO - Started process (PID=16869) to work on /opt/airflow/dags/test_dag_3.py 44 | [2024-02-01T13:55:00.909+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 45 | [2024-02-01T13:55:00.911+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:00.911+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 46 | [2024-02-01T13:55:00.973+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 47 | [2024-02-01T13:55:01.197+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:01.196+0000] {dag.py:3036} INFO - Sync 1 DAGs 48 | [2024-02-01T13:55:01.248+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:01.247+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:56:00+00:00, run_after=2024-02-01 13:58:00+00:00 49 | [2024-02-01T13:55:01.293+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.394 seconds 50 | [2024-02-01T13:55:32.041+0000] {processor.py:161} INFO - Started process (PID=16878) to work on /opt/airflow/dags/test_dag_3.py 51 | [2024-02-01T13:55:32.084+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 52 | [2024-02-01T13:55:32.096+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:32.095+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 53 | [2024-02-01T13:55:32.395+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 54 | [2024-02-01T13:55:32.868+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:32.868+0000] {dag.py:3036} INFO - Sync 1 DAGs 55 | [2024-02-01T13:55:32.963+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:55:32.963+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:56:00+00:00, run_after=2024-02-01 13:58:00+00:00 56 | [2024-02-01T13:55:33.116+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.130 seconds 57 | [2024-02-01T13:56:06.072+0000] {processor.py:161} INFO - Started process (PID=16888) to work on /opt/airflow/dags/test_dag_3.py 58 | [2024-02-01T13:56:06.158+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 59 | [2024-02-01T13:56:06.223+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:06.198+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 60 | [2024-02-01T13:56:08.498+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 61 | [2024-02-01T13:56:16.093+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:16.092+0000] {dag.py:3036} INFO - Sync 1 DAGs 62 | [2024-02-01T13:56:16.711+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:16.711+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:56:00+00:00, run_after=2024-02-01 13:58:00+00:00 63 | [2024-02-01T13:56:17.090+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 11.451 seconds 64 | [2024-02-01T13:56:47.478+0000] {processor.py:161} INFO - Started process (PID=16897) to work on /opt/airflow/dags/test_dag_3.py 65 | [2024-02-01T13:56:47.481+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 66 | [2024-02-01T13:56:47.484+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:47.484+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 67 | [2024-02-01T13:56:47.617+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 68 | [2024-02-01T13:56:48.260+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:48.259+0000] {dag.py:3036} INFO - Sync 1 DAGs 69 | [2024-02-01T13:56:48.439+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:56:48.429+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:56:00+00:00, run_after=2024-02-01 13:58:00+00:00 70 | [2024-02-01T13:56:48.603+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.135 seconds 71 | [2024-02-01T13:57:23.817+0000] {processor.py:161} INFO - Started process (PID=16907) to work on /opt/airflow/dags/test_dag_3.py 72 | [2024-02-01T13:57:24.025+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 73 | [2024-02-01T13:57:24.123+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:57:24.061+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 74 | [2024-02-01T13:57:27.185+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 75 | [2024-02-01T13:57:43.467+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:57:43.456+0000] {dag.py:3036} INFO - Sync 1 DAGs 76 | [2024-02-01T13:57:44.585+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:57:44.584+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:58:00+00:00, run_after=2024-02-01 14:00:00+00:00 77 | [2024-02-01T13:57:45.251+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 22.391 seconds 78 | [2024-02-01T13:58:18.682+0000] {processor.py:161} INFO - Started process (PID=16922) to work on /opt/airflow/dags/test_dag_3.py 79 | [2024-02-01T13:58:18.707+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 80 | [2024-02-01T13:58:18.762+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:58:18.738+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 81 | [2024-02-01T13:58:20.899+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 82 | [2024-02-01T13:58:32.238+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:58:32.168+0000] {dag.py:3036} INFO - Sync 1 DAGs 83 | [2024-02-01T13:58:33.733+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:58:33.732+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 13:58:00+00:00, run_after=2024-02-01 14:00:00+00:00 84 | [2024-02-01T13:58:34.437+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 16.125 seconds 85 | [2024-02-01T13:59:05.310+0000] {processor.py:161} INFO - Started process (PID=16932) to work on /opt/airflow/dags/test_dag_3.py 86 | [2024-02-01T13:59:05.329+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 87 | [2024-02-01T13:59:05.340+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:05.338+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 88 | [2024-02-01T13:59:05.588+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 89 | [2024-02-01T13:59:06.280+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:06.280+0000] {dag.py:3036} INFO - Sync 1 DAGs 90 | [2024-02-01T13:59:06.382+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:06.382+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:00:00+00:00, run_after=2024-02-01 14:02:00+00:00 91 | [2024-02-01T13:59:06.459+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.182 seconds 92 | [2024-02-01T13:59:37.080+0000] {processor.py:161} INFO - Started process (PID=16941) to work on /opt/airflow/dags/test_dag_3.py 93 | [2024-02-01T13:59:37.092+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 94 | [2024-02-01T13:59:37.094+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:37.093+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 95 | [2024-02-01T13:59:37.266+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 96 | [2024-02-01T13:59:37.921+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:37.920+0000] {dag.py:3036} INFO - Sync 1 DAGs 97 | [2024-02-01T13:59:38.002+0000] {logging_mixin.py:188} INFO - [2024-02-01T13:59:38.002+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:00:00+00:00, run_after=2024-02-01 14:02:00+00:00 98 | [2024-02-01T13:59:38.122+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.050 seconds 99 | [2024-02-01T14:00:08.736+0000] {processor.py:161} INFO - Started process (PID=16950) to work on /opt/airflow/dags/test_dag_3.py 100 | [2024-02-01T14:00:08.739+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 101 | [2024-02-01T14:00:08.743+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:08.742+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 102 | [2024-02-01T14:00:08.783+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 103 | [2024-02-01T14:00:08.978+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:08.978+0000] {dag.py:3036} INFO - Sync 1 DAGs 104 | [2024-02-01T14:00:09.033+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:09.032+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:00:00+00:00, run_after=2024-02-01 14:02:00+00:00 105 | [2024-02-01T14:00:09.099+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.369 seconds 106 | [2024-02-01T14:00:39.569+0000] {processor.py:161} INFO - Started process (PID=16959) to work on /opt/airflow/dags/test_dag_3.py 107 | [2024-02-01T14:00:39.572+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 108 | [2024-02-01T14:00:39.578+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:39.576+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 109 | [2024-02-01T14:00:39.621+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 110 | [2024-02-01T14:00:39.850+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:39.849+0000] {dag.py:3036} INFO - Sync 1 DAGs 111 | [2024-02-01T14:00:39.888+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:00:39.888+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:00:00+00:00, run_after=2024-02-01 14:02:00+00:00 112 | [2024-02-01T14:00:39.932+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.366 seconds 113 | [2024-02-01T14:01:14.765+0000] {processor.py:161} INFO - Started process (PID=16968) to work on /opt/airflow/dags/test_dag_3.py 114 | [2024-02-01T14:01:14.910+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 115 | [2024-02-01T14:01:15.011+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:14.996+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 116 | [2024-02-01T14:01:16.499+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 117 | [2024-02-01T14:01:24.659+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:24.497+0000] {dag.py:3036} INFO - Sync 1 DAGs 118 | [2024-02-01T14:01:25.970+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:25.969+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:02:00+00:00, run_after=2024-02-01 14:04:00+00:00 119 | [2024-02-01T14:01:26.930+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 12.716 seconds 120 | [2024-02-01T14:01:57.955+0000] {processor.py:161} INFO - Started process (PID=16984) to work on /opt/airflow/dags/test_dag_3.py 121 | [2024-02-01T14:01:57.959+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 122 | [2024-02-01T14:01:57.965+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:57.963+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 123 | [2024-02-01T14:01:58.104+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 124 | [2024-02-01T14:01:58.506+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:58.505+0000] {dag.py:3036} INFO - Sync 1 DAGs 125 | [2024-02-01T14:01:58.563+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:01:58.563+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:02:00+00:00, run_after=2024-02-01 14:04:00+00:00 126 | [2024-02-01T14:01:58.636+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.693 seconds 127 | [2024-02-01T14:02:29.313+0000] {processor.py:161} INFO - Started process (PID=16994) to work on /opt/airflow/dags/test_dag_3.py 128 | [2024-02-01T14:02:29.343+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 129 | [2024-02-01T14:02:29.356+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:02:29.354+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 130 | [2024-02-01T14:02:29.467+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 131 | [2024-02-01T14:02:29.700+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:02:29.699+0000] {dag.py:3036} INFO - Sync 1 DAGs 132 | [2024-02-01T14:02:29.738+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:02:29.738+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:02:00+00:00, run_after=2024-02-01 14:04:00+00:00 133 | [2024-02-01T14:02:29.773+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.580 seconds 134 | [2024-02-01T14:03:00.253+0000] {processor.py:161} INFO - Started process (PID=17003) to work on /opt/airflow/dags/test_dag_3.py 135 | [2024-02-01T14:03:00.258+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 136 | [2024-02-01T14:03:00.263+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:00.261+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 137 | [2024-02-01T14:03:00.336+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 138 | [2024-02-01T14:03:00.869+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:00.868+0000] {dag.py:3036} INFO - Sync 1 DAGs 139 | [2024-02-01T14:03:01.024+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:01.023+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:04:00+00:00, run_after=2024-02-01 14:06:00+00:00 140 | [2024-02-01T14:03:01.136+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.888 seconds 141 | [2024-02-01T14:03:31.644+0000] {processor.py:161} INFO - Started process (PID=17012) to work on /opt/airflow/dags/test_dag_3.py 142 | [2024-02-01T14:03:31.646+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 143 | [2024-02-01T14:03:31.648+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:31.648+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 144 | [2024-02-01T14:03:31.741+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 145 | [2024-02-01T14:03:31.943+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:31.943+0000] {dag.py:3036} INFO - Sync 1 DAGs 146 | [2024-02-01T14:03:32.003+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:03:32.001+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:04:00+00:00, run_after=2024-02-01 14:06:00+00:00 147 | [2024-02-01T14:03:32.078+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.442 seconds 148 | [2024-02-01T14:04:02.375+0000] {processor.py:161} INFO - Started process (PID=17021) to work on /opt/airflow/dags/test_dag_3.py 149 | [2024-02-01T14:04:02.384+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 150 | [2024-02-01T14:04:02.391+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:02.388+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 151 | [2024-02-01T14:04:02.578+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 152 | [2024-02-01T14:04:03.162+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:03.161+0000] {dag.py:3036} INFO - Sync 1 DAGs 153 | [2024-02-01T14:04:03.252+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:03.251+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:04:00+00:00, run_after=2024-02-01 14:06:00+00:00 154 | [2024-02-01T14:04:03.318+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.975 seconds 155 | [2024-02-01T14:04:33.883+0000] {processor.py:161} INFO - Started process (PID=17030) to work on /opt/airflow/dags/test_dag_3.py 156 | [2024-02-01T14:04:33.885+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 157 | [2024-02-01T14:04:33.888+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:33.887+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 158 | [2024-02-01T14:04:33.931+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 159 | [2024-02-01T14:04:34.181+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:34.180+0000] {dag.py:3036} INFO - Sync 1 DAGs 160 | [2024-02-01T14:04:34.225+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:04:34.225+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:04:00+00:00, run_after=2024-02-01 14:06:00+00:00 161 | [2024-02-01T14:04:34.265+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.390 seconds 162 | [2024-02-01T14:05:04.423+0000] {processor.py:161} INFO - Started process (PID=17040) to work on /opt/airflow/dags/test_dag_3.py 163 | [2024-02-01T14:05:04.428+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 164 | [2024-02-01T14:05:04.443+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:04.440+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 165 | [2024-02-01T14:05:04.501+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 166 | [2024-02-01T14:05:05.121+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:05.120+0000] {dag.py:3036} INFO - Sync 1 DAGs 167 | [2024-02-01T14:05:05.377+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:05.375+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:06:00+00:00, run_after=2024-02-01 14:08:00+00:00 168 | [2024-02-01T14:05:05.578+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.168 seconds 169 | [2024-02-01T14:05:35.721+0000] {processor.py:161} INFO - Started process (PID=17048) to work on /opt/airflow/dags/test_dag_3.py 170 | [2024-02-01T14:05:35.724+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 171 | [2024-02-01T14:05:35.727+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:35.726+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 172 | [2024-02-01T14:05:35.770+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 173 | [2024-02-01T14:05:35.965+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:35.964+0000] {dag.py:3036} INFO - Sync 1 DAGs 174 | [2024-02-01T14:05:36.006+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:05:36.005+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:06:00+00:00, run_after=2024-02-01 14:08:00+00:00 175 | [2024-02-01T14:05:36.047+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.335 seconds 176 | [2024-02-01T14:06:06.375+0000] {processor.py:161} INFO - Started process (PID=17058) to work on /opt/airflow/dags/test_dag_3.py 177 | [2024-02-01T14:06:06.389+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 178 | [2024-02-01T14:06:06.395+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:06:06.392+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 179 | [2024-02-01T14:06:06.788+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 180 | [2024-02-01T14:06:09.962+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:06:09.961+0000] {dag.py:3036} INFO - Sync 1 DAGs 181 | [2024-02-01T14:06:10.381+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:06:10.380+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:06:00+00:00, run_after=2024-02-01 14:08:00+00:00 182 | [2024-02-01T14:06:10.761+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 4.470 seconds 183 | [2024-02-01T14:06:46.187+0000] {processor.py:161} INFO - Started process (PID=17068) to work on /opt/airflow/dags/test_dag_3.py 184 | [2024-02-01T14:06:46.294+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 185 | [2024-02-01T14:06:46.488+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:06:46.365+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 186 | [2024-02-01T14:06:55.217+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 187 | [2024-02-01T14:08:27.082+0000] {processor.py:161} INFO - Started process (PID=17083) to work on /opt/airflow/dags/test_dag_3.py 188 | [2024-02-01T14:08:27.125+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 189 | [2024-02-01T14:08:27.132+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:08:27.130+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 190 | [2024-02-01T14:08:27.983+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 191 | [2024-02-01T14:08:31.993+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:08:31.990+0000] {dag.py:3036} INFO - Sync 1 DAGs 192 | [2024-02-01T14:08:32.739+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:08:32.695+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:08:00+00:00, run_after=2024-02-01 14:10:00+00:00 193 | [2024-02-01T14:08:33.563+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 6.868 seconds 194 | [2024-02-01T14:10:39.499+0000] {processor.py:161} INFO - Started process (PID=17104) to work on /opt/airflow/dags/test_dag_3.py 195 | [2024-02-01T14:10:39.831+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 196 | [2024-02-01T14:10:40.201+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:10:40.085+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 197 | [2024-02-01T14:10:54.491+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 198 | [2024-02-01T14:11:05.629+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:11:05.627+0000] {dag.py:3036} INFO - Sync 1 DAGs 199 | [2024-02-01T14:11:07.066+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:11:07.066+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:10:00+00:00, run_after=2024-02-01 14:12:00+00:00 200 | [2024-02-01T14:11:07.625+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 31.652 seconds 201 | [2024-02-01T14:12:27.533+0000] {processor.py:161} INFO - Started process (PID=17126) to work on /opt/airflow/dags/test_dag_3.py 202 | [2024-02-01T14:12:28.433+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 203 | [2024-02-01T14:12:29.101+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:12:28.927+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 204 | [2024-02-01T14:12:37.961+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 205 | [2024-02-01T14:46:03.379+0000] {processor.py:161} INFO - Started process (PID=35) to work on /opt/airflow/dags/test_dag_3.py 206 | [2024-02-01T14:46:03.396+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 207 | [2024-02-01T14:46:03.409+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:46:03.408+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 208 | [2024-02-01T14:46:03.617+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 209 | [2024-02-01T14:46:05.381+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:46:05.380+0000] {dag.py:3036} INFO - Sync 1 DAGs 210 | [2024-02-01T14:46:05.725+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:46:05.724+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:46:00+00:00, run_after=2024-02-01 14:48:00+00:00 211 | [2024-02-01T14:46:06.321+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 2.957 seconds 212 | [2024-02-01T14:48:37.501+0000] {processor.py:161} INFO - Started process (PID=35) to work on /opt/airflow/dags/test_dag_3.py 213 | [2024-02-01T14:48:37.514+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 214 | [2024-02-01T14:48:37.524+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:48:37.524+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 215 | [2024-02-01T14:48:37.572+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 216 | [2024-02-01T14:48:38.459+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:48:38.459+0000] {dag.py:3036} INFO - Sync 1 DAGs 217 | [2024-02-01T14:48:38.546+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:48:38.546+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:48:00+00:00, run_after=2024-02-01 14:50:00+00:00 218 | [2024-02-01T14:48:38.737+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.240 seconds 219 | [2024-02-01T14:49:09.655+0000] {processor.py:161} INFO - Started process (PID=43) to work on /opt/airflow/dags/test_dag_3.py 220 | [2024-02-01T14:49:09.668+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 221 | [2024-02-01T14:49:09.713+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:49:09.712+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 222 | [2024-02-01T14:49:09.895+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 223 | [2024-02-01T14:49:10.476+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:49:10.475+0000] {dag.py:3036} INFO - Sync 1 DAGs 224 | [2024-02-01T14:49:10.548+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:49:10.548+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:50:00+00:00, run_after=2024-02-01 14:52:00+00:00 225 | [2024-02-01T14:49:10.600+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.029 seconds 226 | [2024-02-01T14:49:41.914+0000] {processor.py:161} INFO - Started process (PID=52) to work on /opt/airflow/dags/test_dag_3.py 227 | [2024-02-01T14:49:41.916+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 228 | [2024-02-01T14:49:41.919+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:49:41.919+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 229 | [2024-02-01T14:49:41.950+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 230 | [2024-02-01T14:49:42.120+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:49:42.119+0000] {dag.py:3036} INFO - Sync 1 DAGs 231 | [2024-02-01T14:49:42.152+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:49:42.152+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:50:00+00:00, run_after=2024-02-01 14:52:00+00:00 232 | [2024-02-01T14:49:42.191+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 0.280 seconds 233 | [2024-02-01T14:50:12.622+0000] {processor.py:161} INFO - Started process (PID=60) to work on /opt/airflow/dags/test_dag_3.py 234 | [2024-02-01T14:50:12.629+0000] {processor.py:830} INFO - Processing file /opt/airflow/dags/test_dag_3.py for tasks to queue 235 | [2024-02-01T14:50:12.635+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:50:12.633+0000] {dagbag.py:538} INFO - Filling up the DagBag from /opt/airflow/dags/test_dag_3.py 236 | [2024-02-01T14:50:12.743+0000] {processor.py:840} INFO - DAG(s) 'test_dag_1_dag' retrieved from /opt/airflow/dags/test_dag_3.py 237 | [2024-02-01T14:50:13.357+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:50:13.356+0000] {dag.py:3036} INFO - Sync 1 DAGs 238 | [2024-02-01T14:50:13.451+0000] {logging_mixin.py:188} INFO - [2024-02-01T14:50:13.451+0000] {dag.py:3823} INFO - Setting next_dagrun for test_dag_1_dag to 2024-02-01 14:50:00+00:00, run_after=2024-02-01 14:52:00+00:00 239 | [2024-02-01T14:50:13.803+0000] {processor.py:183} INFO - Processing /opt/airflow/dags/test_dag_3.py took 1.233 seconds 240 | -------------------------------------------------------------------------------- /python/short_vids/deepface/README.md: -------------------------------------------------------------------------------- 1 | # DeepFace 2 | 3 | # Overview 📚 4 | 5 | A quick AI project for detecting faces in a photo gallery using the **[DeepFace](https://github.com/serengil/deepface)** library. 6 | 7 | # Resources 🎁 8 | 9 | You can find the vid of this via these handles: 10 | 11 | - **[YouTube](https://www.youtube.com/@sdw-online)** 12 | - **[TikTok](https://www.tiktok.com/@sdw.online)** 13 | - **[LinkedIn](https://www.linkedin.com/in/stephen-david-williams-860428123/)** 14 | - **[Twitter/X](https://twitter.com/sdw_online)** 15 | - **[Blog](https://medium.com/@sdw-online/)** 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /python/short_vids/deepface/photos/image_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/deepface/photos/image_1.jpg -------------------------------------------------------------------------------- /python/short_vids/deepface/photos/image_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/deepface/photos/image_2.jpg -------------------------------------------------------------------------------- /python/short_vids/deepface/photos/image_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/deepface/photos/image_3.jpg -------------------------------------------------------------------------------- /python/short_vids/deepface/photos/image_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/deepface/photos/image_4.jpg -------------------------------------------------------------------------------- /python/short_vids/deepface/photos/man_utd_team.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdw-online/code_examples_library/9c03ecbba527c3086ae56551370953dcc374c255/python/short_vids/deepface/photos/man_utd_team.jpg -------------------------------------------------------------------------------- /python/short_vids/great_expectations/ge_script_1.py: -------------------------------------------------------------------------------- 1 | import great_expectations as ge 2 | import pandas as pd 3 | 4 | # Data representing products in our online electronics store 5 | data = { 6 | 'product': ['phone case', 'phone charger', 'tablet cover', 'laptop sleeve'], 7 | 'category': ['Electronics', 'Accessories', 'Accessories', 'Laptops'], 8 | 'price': [15, 20, 12, 18], 9 | 'in_stock': [True, True, False, True] 10 | } 11 | 12 | # Create a dataframe from our dataset 13 | df = pd.DataFrame(data) 14 | 15 | # Convert the Pandas dataframe to a GX one 16 | ge_df = ge.dataset.PandasDataset(df) 17 | 18 | 19 | # DQ Test 1: The values in the 'category' column should be in the predefined list 20 | ge_df.expect_column_values_to_be_in_set('category', ['Electronics', 'Accessories', 'Fashion', 'Toys']) 21 | 22 | 23 | # DQ Test 2: The row count should be 4 & column count should be 5 24 | ge_df.expect_table_row_count_to_equal(4) 25 | ge_df.expect_table_column_count_to_equal(5) 26 | 27 | # Validate the data against the set expectations 28 | results = ge_df.validate() 29 | print(results) -------------------------------------------------------------------------------- /python/short_vids/great_expectations/ge_script_2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import great_expectations as ge 4 | 5 | 6 | # 1. Extract the data (E) 7 | def extract_data(file_path): 8 | return pd.read_csv(file_path) 9 | 10 | 11 | """ 12 | 13 | I corrected the error message that appeared from the initial extract_data function with this code block: 14 | 15 | def extract_data(file_path): 16 | try: 17 | df = pd.read_csv(file_path) 18 | # Ensure 'unit_price' is of type float 19 | if 'unit_price' in df.columns: 20 | df['unit_price'] = df['unit_price'].astype(float) 21 | return df 22 | except Exception as e: 23 | raise Exception(f"An error occurred during data extraction: {e}") 24 | """ 25 | 26 | 27 | def validate_extracted_data(df): 28 | ge_df = ge.from_pandas(df) 29 | 30 | # The following columns should exist in the dataframe 31 | ge_df.expect_column_to_exist('product_id') 32 | ge_df.expect_column_to_exist('product_name') 33 | ge_df.expect_column_to_exist('unit_price') 34 | ge_df.expect_column_to_exist('quantity_sold') 35 | 36 | # Each column should follow the schema specification provided 37 | ge_df.expect_column_values_to_be_of_type('product_id', 'int64') 38 | ge_df.expect_column_values_to_be_of_type('product_name', 'object') 39 | ge_df.expect_column_values_to_be_of_type('unit_price', 'float64') 40 | ge_df.expect_column_values_to_be_of_type('quantity_sold', 'int64') 41 | 42 | 43 | # Run the validation checks 44 | results = ge_df.validate() 45 | if not results['success']: 46 | raise ValueError("Validation failed for extracted data: " + str(results)) 47 | return results 48 | 49 | 50 | # 2. Transform the data (T) 51 | def transform_data(df): 52 | try: 53 | if 'unit_price' not in df or 'quantity_sold' not in df: 54 | raise KeyError("Required columns for transformation are missing.") 55 | df['total_sales'] = df['unit_price'] * df['quantity_sold'] 56 | return df 57 | except KeyError as e: 58 | raise e 59 | except Exception as e: 60 | raise Exception(f"An error occurred during transformation: {e}") 61 | 62 | 63 | 64 | def validate_transformed_data(df): 65 | ge_df = ge.from_pandas(df) 66 | 67 | # The 'total_sales' column should exist 68 | ge_df.expect_column_to_exist('total_sales') 69 | 70 | # The 'total_sales' column should be a float 71 | ge_df.expect_column_values_to_be_of_type('total_sales', 'float') 72 | 73 | # Run the validation checks 74 | results = ge_df.validate() 75 | if not results['success']: 76 | raise ValueError("Validation failed for transformed data") 77 | return results 78 | 79 | 80 | # 3. Load the data (L) 81 | def load_data(df, file_path): 82 | df.to_csv(file_path, index=False) 83 | 84 | 85 | 86 | def validate_loaded_data(file_path): 87 | if not os.path.exists(file_path) or os.path.getsize(file_path) == 0: 88 | raise FileNotFoundError(f"The file {file_path} was not found or is empty.") 89 | 90 | print(f"File {file_path} has been successfully validated.") 91 | 92 | 93 | # Run the data pipeline (ETL) 94 | def run_etl_pipeline(extract_file_path, load_file_path): 95 | 96 | # 1. Extract (E) 97 | extracted_data = extract_data(extract_file_path) 98 | if validate_extracted_data(extracted_data): 99 | print("Extracted data is valid.") 100 | 101 | # 2. Transform (T) 102 | transformed_data = transform_data(extracted_data) 103 | if validate_transformed_data(transformed_data): 104 | print("Transformed data is valid.") 105 | 106 | # 3. Load (L) 107 | load_data(transformed_data, load_file_path) 108 | validate_loaded_data(load_file_path) 109 | print("Data has been successfully loaded and validated.") 110 | 111 | 112 | run_etl_pipeline('source_sales_data.csv', 'target_sales_data.csv') -------------------------------------------------------------------------------- /python/short_vids/pytest/test_data_quality.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script Name: test_data_quality.py 3 | Description: This script performs data quality tests on car datasets, including checks for completeness, accuracy, duplication, and quantity validation for car models. 4 | 5 | Author: Stephen David-Williams 6 | Creation Date: 2024-01-22 7 | Last Updated: [N/A] 8 | 9 | 10 | You can find the short video demos here: 11 | YouTube - https://youtube.com/shorts/iAWoamvb7QU?si=eu0RJMUKrNxNHuh4 12 | TikTok - https://www.tiktok.com/@sdw.online/video/7326641648157412641?is_from_webapp=1&sender_device=pc&web_id=7213404854906652165 13 | 14 | 15 | 16 | How to run: 17 | -------------- 18 | 1. Ensure the file name begins with "test_". 19 | 2. To execute the tests, type any of the following in the terminal: 20 | - pytest file_name.py 21 | - pytest -m run 22 | 23 | Notes: 24 | ------ 25 | 26 | ⏳ I'll be working on better quality videos to break down how each component of the script works - 27 | 28 | ✅ Subscribe and follow my pages to be notified when the videos drop! 29 | 30 | 31 | Changelog: 32 | ---------- 33 | [Date]: [N/A] 34 | [Date]: [N/A] 35 | ... 36 | """ 37 | 38 | # test_data_quality.py 39 | 40 | import pytest 41 | import pandas as pd 42 | 43 | @pytest.fixture 44 | def cars_data(): 45 | data = { 46 | "order_id": [101, 102, 103, 104], 47 | "car_model": ['Aston Martin', 'Ferrari', 'Lamborghini', 'Rolls Royce'], 48 | 'quantity': [2, 3, 1, 4] 49 | } 50 | cars_df = pd.DataFrame(data) 51 | return cars_df 52 | 53 | 54 | # Completeness test 55 | def test_missing_values(cars_data): 56 | actual_missing_values_count = cars_data.isnull().sum().sum() 57 | assert actual_missing_values_count == 0 58 | 59 | 60 | # Accuracy test 61 | def test_data_accuracy(cars_data): 62 | trusted_data = { 63 | 'order_id': [101, 102, 103, 104], 64 | 'car_model': ['Aston Martin', 'Ferrari', 'Lamborghini', 'Rolls Royce'], 65 | 'quantity': [2, 3, 1, 4] 66 | } 67 | trusted_df = pd.DataFrame(trusted_data) 68 | pd.testing.assert_frame_equal(cars_data, trusted_df) 69 | 70 | 71 | # Deduplication test 72 | def test_duplicates(cars_data): 73 | actual_duplicates_count = cars_data.duplicated().sum() 74 | assert actual_duplicates_count == 0 75 | 76 | 77 | # Parametrized test for car model quantities 78 | @pytest.mark.parametrize("car_model, expected_quantity", [ 79 | ('Aston Martin', 2), 80 | ('Ferrari', 3), 81 | ('Lamborghini', 1), 82 | ('Rolls Royce', 4) 83 | ]) 84 | def test_car_model_quantity(cars_data, car_model, expected_quantity): 85 | actual_quantity = cars_data[cars_data['car_model'] == car_model]['quantity'].sum() 86 | assert actual_quantity == expected_quantity 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /python/short_vids/soda/checks.yml: -------------------------------------------------------------------------------- 1 | checks for sales: 2 | - row_count > 0 3 | - missing_count(sale_id) = 0 4 | - missing_count(product_id) = 0 5 | - missing_count(quantity) = 0 6 | - missing_count(sale_date) = 0 7 | - missing_count(total_amount) = 0 8 | - duplicate_count(sale_id) = 0 9 | - max(total_amount) < 10000 10 | - freshness(sale_date) < 2d 11 | 12 | - schema: 13 | name: Confirm the required columns are present 14 | fail: 15 | when required column missing: 16 | [sale_id, product_id, fake_column] 17 | 18 | - avg_amount >= 10: 19 | avg_amount query: | 20 | SELECT AVG(total_amount) 21 | FROM business_data.sales 22 | WHERE sale_date BETWEEN '2024-01-10' AND '2024-01-14' -------------------------------------------------------------------------------- /python/short_vids/soda/configuration.yml: -------------------------------------------------------------------------------- 1 | data_source my_business_info: 2 | type: postgres 3 | connection: 4 | host: localhost 5 | port: "5434" 6 | username: ${POSTGRES_USERNAME} 7 | password: ${POSTGRES_PASSWORD} 8 | database: test_db 9 | schema: business_data -------------------------------------------------------------------------------- /python/short_vids/soda/data_contract.yml: -------------------------------------------------------------------------------- 1 | dataset: sales 2 | columns: 3 | - name: sale_id 4 | data_type: integer 5 | unique: true 6 | not_null: true 7 | 8 | - name: product_id 9 | data_type: integer 10 | not_null: true 11 | reference: 12 | dataset: products 13 | column: product_id 14 | 15 | - name: quantity 16 | data_type: integer 17 | not_null: true 18 | 19 | - name: sale_date 20 | data_type: date 21 | not_null: true 22 | 23 | - name: total_amount 24 | data_type: decimal 25 | not_null: true 26 | valid_min: 0 27 | valid_max: 10000 28 | 29 | checks: 30 | - row_count > 0 31 | - missing_count(sale_id) = 0 32 | - missing_count(product_id) = 0 33 | - missing_count(quantity) = 0 34 | - missing_count(sale_date) = 0 35 | - missing_count(total_amount) = 0 36 | - duplicate_count(sale_id) = 0 37 | - max(total_amount) < 10000 38 | - freshness(sale_date) < 2d 39 | - avg_amount >= 10: 40 | avg_amount query: | 41 | SELECT AVG(total_amount) 42 | FROM business_data.sales 43 | WHERE sale_date BETWEEN '2024-01-10' AND '2024-01-14' -------------------------------------------------------------------------------- /python/short_vids/soda/scan_checks.py: -------------------------------------------------------------------------------- 1 | from soda.scan import Scan 2 | from datetime import datetime 3 | 4 | 5 | scan = Scan() 6 | 7 | 8 | # Add the data source's name 9 | scan.set_data_source_name("my_business_info") 10 | 11 | 12 | # Add configuration.yml file 13 | scan.add_configuration_yaml_file("configuration.yml") 14 | 15 | # Stamp the scan with a scan date 16 | scan_date = datetime.today() 17 | scan.add_variables({"date": scan_date}) 18 | 19 | 20 | # Add the checks.yml file 21 | scan.add_sodacl_yaml_file("checks.yml") 22 | 23 | 24 | 25 | # Run the scan 26 | scan.execute() 27 | scan.assert_no_error_logs() 28 | scan.assert_no_checks_fail() 29 | 30 | 31 | # Evaluate the scan results 32 | scan.get_scan_results() -------------------------------------------------------------------------------- /python/short_vids/soda/scan_data_contract.py: -------------------------------------------------------------------------------- 1 | from soda.contracts.data_contract_translator import DataContractTranslator 2 | from soda.scan import Scan 3 | import logging 4 | 5 | # Read the data contract file as a Python str 6 | with open("data_contract.yml") as f: 7 | data_contract_yaml_str: str = f.read() 8 | 9 | # Translate the data contract standards into SodaCL 10 | data_contract_parser = DataContractTranslator() 11 | sodacl_yaml_str = data_contract_parser.translate_data_contract_yaml_str(data_contract_yaml_str) 12 | 13 | # Log or save the SodaCL checks file to help with debugging 14 | logging.debug(sodacl_yaml_str) 15 | 16 | # Execute the translated SodaCL checks in a scan 17 | scan = Scan() 18 | scan.set_data_source_name("my_business_info") 19 | scan.add_configuration_yaml_file(file_path="configuration.yml") 20 | scan.add_sodacl_yaml_str(sodacl_yaml_str) 21 | scan.execute() 22 | scan.assert_no_checks_fail() -------------------------------------------------------------------------------- /sql/test.md: -------------------------------------------------------------------------------- 1 | "# SQL Test File" 2 | --------------------------------------------------------------------------------