├── README.md └── weather_dag.py /README.md: -------------------------------------------------------------------------------- 1 | # data_engineering_project_openweathermap_api_airflow_etl_aws -------------------------------------------------------------------------------- /weather_dag.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from datetime import timedelta, datetime 3 | from airflow.providers.http.sensors.http import HttpSensor 4 | import json 5 | from airflow.providers.http.operators.http import SimpleHttpOperator 6 | from airflow.operators.python import PythonOperator 7 | import pandas as pd 8 | 9 | 10 | 11 | 12 | def kelvin_to_fahrenheit(temp_in_kelvin): 13 | temp_in_fahrenheit = (temp_in_kelvin - 273.15) * (9/5) + 32 14 | return temp_in_fahrenheit 15 | 16 | 17 | def transform_load_data(task_instance): 18 | data = task_instance.xcom_pull(task_ids="extract_weather_data") 19 | city = data["name"] 20 | weather_description = data["weather"][0]['description'] 21 | temp_farenheit = kelvin_to_fahrenheit(data["main"]["temp"]) 22 | feels_like_farenheit= kelvin_to_fahrenheit(data["main"]["feels_like"]) 23 | min_temp_farenheit = kelvin_to_fahrenheit(data["main"]["temp_min"]) 24 | max_temp_farenheit = kelvin_to_fahrenheit(data["main"]["temp_max"]) 25 | pressure = data["main"]["pressure"] 26 | humidity = data["main"]["humidity"] 27 | wind_speed = data["wind"]["speed"] 28 | time_of_record = datetime.utcfromtimestamp(data['dt'] + data['timezone']) 29 | sunrise_time = datetime.utcfromtimestamp(data['sys']['sunrise'] + data['timezone']) 30 | sunset_time = datetime.utcfromtimestamp(data['sys']['sunset'] + data['timezone']) 31 | 32 | transformed_data = {"City": city, 33 | "Description": weather_description, 34 | "Temperature (F)": temp_farenheit, 35 | "Feels Like (F)": feels_like_farenheit, 36 | "Minimun Temp (F)":min_temp_farenheit, 37 | "Maximum Temp (F)": max_temp_farenheit, 38 | "Pressure": pressure, 39 | "Humidty": humidity, 40 | "Wind Speed": wind_speed, 41 | "Time of Record": time_of_record, 42 | "Sunrise (Local Time)":sunrise_time, 43 | "Sunset (Local Time)": sunset_time 44 | } 45 | transformed_data_list = [transformed_data] 46 | df_data = pd.DataFrame(transformed_data_list) 47 | aws_credentials = {"key": "xxxxxxxxx", "secret": "xxxxxxxxxx", "token": "xxxxxxxxxxxxxx"} 48 | 49 | now = datetime.now() 50 | dt_string = now.strftime("%d%m%Y%H%M%S") 51 | dt_string = 'current_weather_data_portland_' + dt_string 52 | df_data.to_csv(f"s3://weatherapiairflowyoutubebucket-yml/{dt_string}.csv", index=False, storage_options=aws_credentials) 53 | 54 | 55 | 56 | default_args = { 57 | 'owner': 'airflow', 58 | 'depends_on_past': False, 59 | 'start_date': datetime(2023, 1, 8), 60 | 'email': ['myemail@domain.com'], 61 | 'email_on_failure': False, 62 | 'email_on_retry': False, 63 | 'retries': 2, 64 | 'retry_delay': timedelta(minutes=2) 65 | } 66 | 67 | 68 | 69 | with DAG('weather_dag', 70 | default_args=default_args, 71 | schedule_interval = '@daily', 72 | catchup=False) as dag: 73 | 74 | 75 | is_weather_api_ready = HttpSensor( 76 | task_id ='is_weather_api_ready', 77 | http_conn_id='weathermap_api', 78 | endpoint='/data/2.5/weather?q=Portland&APPID=5031cde3d1a8b9469fd47e998d7aef79' 79 | ) 80 | 81 | 82 | extract_weather_data = SimpleHttpOperator( 83 | task_id = 'extract_weather_data', 84 | http_conn_id = 'weathermap_api', 85 | endpoint='/data/2.5/weather?q=Portland&APPID=5031cde3d1a8b9469fd47e998d7aef79', 86 | method = 'GET', 87 | response_filter= lambda r: json.loads(r.text), 88 | log_response=True 89 | ) 90 | 91 | transform_load_weather_data = PythonOperator( 92 | task_id= 'transform_load_weather_data', 93 | python_callable=transform_load_data 94 | ) 95 | 96 | 97 | 98 | 99 | is_weather_api_ready >> extract_weather_data >> transform_load_weather_data 100 | --------------------------------------------------------------------------------