├── README.md
├── Section 01 - Introduction about the Course
    └── Free Download - Course Material
├── Section 02 - Setting up Environment Using AWS Cloud9
    └── [Commands] Setup Jupyter Lab on Cloud9
├── Section 03 - Setting up Environment - Overview of GCP and Provision Ubuntu VM
    ├── Links
    │   └── Links
    └── Setting up Environment
    │   └── Setup Ubuntu VM on GCP
    │       ├── 02_signing_up_for_gcp.ipynb
    │       ├── 03_understanding_gcp_web_console.ipynb
    │       ├── 04_overview_of_gcp_pricing.ipynb
    │       ├── 05_provision_ubuntu_18.04_vm.ipynb
    │       ├── 06_setup_docker.ipynb
    │       ├── 07_validating_python.ipynb
    │       └── 08_setup_jupyter_lab.ipynb
├── Section 05 - Setting up Environment - Setup Postgres Database on Ubuntu VM
    ├── Links
    │   └── Links
    ├── Scripts
    │   ├── create_db_tables_pg.sql
    │   └── load_db_tables_pg.sql
    └── Setup Postgres Database
    │   ├── 02_docker_cheat_sheet.ipynb
    │   ├── 03_setup_postgres_using_docker.ipynb
    │   ├── 04_accessing_postgres_using_docker_cli.ipynb
    │   ├── 05_create_database_and_user.ipynb
    │   ├── 06_execute_sql_scripts.ipynb
    │   ├── 08_sql_workbench_and_postgres.ipynb
    │   ├── 11_jupyter_lab_and_postgresql.ipynb
    │   └── 12_jupyter_lab_and_postgresql_on_ubuntu_vm.ipynb
├── Section 06 - Database Essentials - Getting Started
    ├── Getting Started
    │   ├── 02_connecting_to_database.ipynb
    │   ├── 03_using_psql.ipynb
    │   ├── 04_setup_postgres_using_docker.ipynb
    │   ├── 05_setup_sql_workbench.ipynb
    │   ├── 06_sql_workbench_and_postgres.ipynb
    │   ├── 07_sql_workbench_features.ipynb
    │   ├── 08_data_loading_utilities.ipynb
    │   └── 09_loading_data_postgres_in_docker.ipynb
    ├── Links
    │   └── Links
    └── Script
    │   └── 01 - Create Table.sql
├── Section 07 - Database Essentials - Database Operations
    ├── DML or CRUD Operations
    │   ├── 04_overview_of_database_operations.ipynb
    │   ├── 05_crud_operations.ipynb
    │   ├── 06_creating_table.ipynb
    │   ├── 07_inserting_data.ipynb
    │   ├── 08_updating_data.ipynb
    │   ├── 09_deleting_data.ipynb
    │   ├── 10_overview_of_transactions.ipynb
    │   └── 11_exercises_database_operations.ipynb
    ├── Exercises
    │   ├── 01 - Create Table.sql
    │   ├── 02 - Insert Into.sql
    │   ├── 03 - Updating Data.sql
    │   ├── 04 - Deleting Data.sql
    │   └── 05 - Selecting.sql
    ├── Links
    │   └── Links
    └── Scripts
    │   ├── 01 - Information Schema Tables.sql
    │   ├── 02 - Information Schema Columns.sql
    │   ├── 03 - Users.sql
    │   ├── 04 - Truncate Table.sql
    │   ├── 05 - Insert Into.sql
    │   ├── 06 - Update.sql
    │   └── 07 - Delete.sql
├── Section 08 - Database Essentials - Writing Basic SQL Queries
    ├── Exercises
    │   ├── 01 - Customer Order Count.sql
    │   ├── 02 - Dormant Customers.sql
    │   ├── 03 - Revenue Per Customer.sql
    │   ├── 04 - Revenue Per Category.sql
    │   └── 05 - Product Count Per Department.sql
    ├── Links
    │   └── Links
    ├── Scripts
    │   ├── 01 - Selecting.sql
    │   ├── 02 - Distinct.sql
    │   ├── 03 - Filtering.sql
    │   ├── 04 - Inner Join.sql
    │   ├── 05 - Outher Join.sql
    │   ├── 06 - Aggregations.sql
    │   ├── 07 - Sorting Data.sql
    │   └── 08 - Daily.sql
    └── Writing Basic SQL Queries
    │   ├── 02_standard_transformations.ipynb
    │   ├── 03_overview_of_data_model.ipynb
    │   ├── 04_define_problem_statement.ipynb
    │   ├── 05_preparing_tables.ipynb
    │   ├── 06_selecting_or_projecting_data.ipynb
    │   ├── 07_filtering_data.ipynb
    │   ├── 08_joining_tables_inner.ipynb
    │   ├── 09_joining_tables_outer.ipynb
    │   ├── 10_performing_aggregations.ipynb
    │   ├── 11_sorting_data.ipynb
    │   ├── 12_solution_daily_product_revenue.ipynb
    │   └── 13_exercises_basic_sql_queries.ipynb
├── Section 09 - Database Essentials - Creating Tables and Indexes
    ├── Creating Tables and Indexes
    │   ├── 02_data_definition_language.ipynb
    │   ├── 03_overview_of_data_types.ipynb
    │   ├── 04_adding_or_modifying_columns.ipynb
    │   ├── 05_different_types_of_constraints.ipynb
    │   ├── 06_managing_constraints.ipynb
    │   ├── 07_indexes_on_tables.ipynb
    │   ├── 08_indexes_for_constraints.ipynb
    │   ├── 09_overview_of_sequences.ipynb
    │   ├── 10_truncating_tables.ipynb
    │   ├── 11_dropping_tables.ipynb
    │   ├── 12_Exercises_Managing_Database_Objects_using_Postgresql.ipynb
    │   └── 13_Solutions_Managing_Database_Objects_in_Postgresql.ipynb
    ├── Links
    │   └── Links
    └── Scripts
    │   ├── 01 - DDL.sql
    │   ├── 02 - DT.sql
    │   ├── 03 - Adding or Modifying Columns.sql
    │   ├── 04 - Managing Constraints.sql
    │   ├── 05 - Indexes for Constraints.sql
    │   ├── 06 - Overview of Sequences.sql
    │   ├── 07 - Truncating Tables.sql
    │   └── 08 - Dropping Tables.sql
├── Section 10 - Database Essentials - Partitioning Tables and Indexes
    ├── Exercises
    │   ├── 01 - Partitioned tables.sql
    │   └── 02 - Load Partitioned Tables.sql
    ├── Links
    │   └── Links
    ├── Partitioning Tables and Indexes
    │   ├── 02_overview_of_partitioning.ipynb
    │   ├── 03_list_partitioning.ipynb
    │   ├── 04_managing_partitions_list.ipynb
    │   ├── 05_manipulating_data.ipynb
    │   ├── 06_range_partitioning.ipynb
    │   ├── 07_managing_partitions_range.ipynb
    │   ├── 08_repartitioning_range.ipynb
    │   ├── 09_hash_partitioning.ipynb
    │   ├── 10_managing_partitions_hash.ipynb
    │   ├── 11_usage_scenarios.ipynb
    │   ├── 12_sub_partitioning.ipynb
    │   └── 13_exercises_partitioning_tables.ipynb
    └── Scripts
    │   ├── 01 - List Partitioning.sql
    │   ├── 02 - Managing Partitions - List.sql
    │   ├── 03 - Manipulating Data.sql
    │   ├── 04 - Range Partitioning.sql
    │   ├── 05 - Managing Partitions - Range.sql
    │   ├── 06 - Repartitioning - Range.sql
    │   ├── 07 - Hash Partitioning.sql
    │   ├── 08 - Managing Partitions - Hash.sql
    │   ├── 09 - Usage Scenarios.sql
    │   └── 10 - Sub Partitioning.sql
├── Section 11 - Database Essentials - Predefined Functions
    ├── Links
    │   └── Links
    ├── Predefined Functions
    │   ├── 02_overview_of_predefined_functions.ipynb
    │   ├── 03_string_manipulation_functions.ipynb
    │   ├── 04_date_manipulation_functions.ipynb
    │   ├── 05_overview_of_numeric_functions.ipynb
    │   ├── 06_data_type_conversion.ipynb
    │   ├── 07_handling_null_values.ipynb
    │   └── 08_using_case_and_when.ipynb
    └── Scripts
    │   ├── 01 - Overview of Pre-Defined Functions.sql
    │   ├── 02 - Case Conversion and Length.sql
    │   ├── 03 - Extracting Data - substr and split_part.sql
    │   ├── 04 - Using position or strpos.sql
    │   ├── 05 - Trimming and Padding Functions.sql
    │   ├── 06 - Reverse and Concatenating multiple strings.sql
    │   ├── 07 - String Replacement.sql
    │   ├── 08 - Getting Current Date and Timestamp.sql
    │   ├── 09 - Date Arithmetic.sql
    │   ├── 10 - Beginning Date or Time - date_trunc.sql
    │   ├── 11 - Using to_char and to_date.sql
    │   ├── 12 - Extracting information - extract.sql
    │   ├── 13 - Dealing with Unix Timestamp.sql
    │   ├── 14 - Overview of Numeric Functions.sql
    │   ├── 15 - Data Type Conversion.sql
    │   ├── 16 - Handling NULL Values.sql
    │   └── 17 - Using CASE and WHEN.sql
├── Section 12 - Database Essentials - Writing Advanced SQL Queries
    ├── Links
    │   └── Links
    ├── Scripts
    │   ├── 01 - Overview of Views.sql
    │   ├── 02 - Named Queries - Using WITH Clause.sql
    │   ├── 03 - Overview of Sub Queries.sql
    │   ├── 04 - CTAS - Create Table as Select.sql
    │   ├── 05 - Advanced DML Operations.sql
    │   ├── 06 - Merging or Upserting Data.sql
    │   ├── 07 - Pivoting Rows into Columns.sql
    │   ├── 08 - Overview of Analytic Functions.sql
    │   ├── 09 - Analytic Functions – Aggregations.sql
    │   ├── 10 - Cumulative or Moving Aggregations.sql
    │   ├── 11 - Analytic Functions – Windowing.sql
    │   ├── 12 - Analytic Functions – Ranking.sql
    │   ├── 13 - Analytic Functions - Filtering.sql
    │   └── 14 - Ranking and Filtering - Recap.sql
    └── Writing Advanced SQL Queries
    │   ├── 02_overview_of_views.ipynb
    │   ├── 03_named_queries_using_with_clause.ipynb
    │   ├── 04_overview_of_sub_queries.ipynb
    │   ├── 05_create_table_as_select.ipynb
    │   ├── 06_advanced_dml_operations.ipynb
    │   ├── 07_merging_or_upserting_data.ipynb
    │   ├── 08_pivoting_rows_into_columns.ipynb
    │   ├── 09_overview_of_analytic_functions.ipynb
    │   ├── 10_analytic_functions_aggregations.ipynb
    │   ├── 11_cumulative_or_moving_aggregations.ipynb
    │   ├── 12_analytic_functions_windowing.ipynb
    │   ├── 13_analytic_functions_ranking.ipynb
    │   ├── 14_analytic_funcions_filtering.ipynb
    │   ├── 15_ranking_and_filtering_recap.ipynb
    │   ├── 16_Exercises_SQL_Analytics_or_Windowing_Functions_using_Postgresql.ipynb
    │   └── 17_Solutions_SQL_Analytics_or_Windowing_Functions_using_Postgres.ipynb
├── Section 13 - Programming Essentials Using Python - Perform Database Operations
    ├── Exercises
    │   ├── 01 - Create Table.sql
    │   ├── 02 - Insert Into.sql
    │   ├── 03 - Updating Data.sql
    │   ├── 04 - Deleting Data.sql
    │   └── 05 - Selecting.sql
    ├── Links
    │   └── Links
    ├── Postgres Database Operations
    │   ├── 02_overview_of_sql.ipynb
    │   ├── 03_create_database.ipynb
    │   ├── 04_ddl_data_definition_language.ipynb
    │   ├── 05_dml.ipynb
    │   ├── 06_dql_data_query_language.ipynb
    │   ├── 07_crud_operations_dml_and_dql.ipynb
    │   ├── 08_tcl.ipynb
    │   ├── 09_example_data_engineering.ipynb
    │   ├── 10_example_web_application.ipynb
    │   └── 11_exercise_database_operations.ipynb
    └── Scripts
    │   ├── 01 - Create Database and Users Table.sql
    │   ├── 02 - DDL – Data Definition Language.sql
    │   ├── 03 - Dml Data Manipulation Language.sql
    │   └── 04 - DQL – Data Query Language.sql
├── Section 14 - Programming Essentials Using Python - Getting Started with Python
    └── Links
├── Section 15 - Programming Essentials Using Python - Basic Programming Constructs
    ├── Links
    │   └── Links
    └── Python Code
    │   ├── 01_Basic_Programming_Constructs.ipynb
    │   ├── 02_getting_help.ipynb
    │   ├── 03_variables_and_objects.ipynb
    │   ├── 04_data_types_commonly_used.ipynb
    │   ├── 05_operators_in_python.ipynb
    │   ├── 06_tasks_data_types_and_operators.ipynb
    │   ├── 07_conditionals.ipynb
    │   ├── 08_all_about_for_loops.ipynb
    │   ├── 09_running_os_commands.ipynb
    │   ├── 10 - Exercises - Basic Programming Constructs.ipynb
    │   └── 11 - Eval and Exec.ipynb
├── Section 16 - Programming Essentials Using Python - Predefined Functions
    ├── Links
    │   └── Links
    └── Python Code
    │   ├── 01 - Pre-defined Functions.ipynb
    │   ├── 02_overview_of_pre-defined_functions.ipynb
    │   ├── 03_numeric_functions.ipynb
    │   ├── 04_overview_of_strings.ipynb
    │   ├── 05_string_manipulation_functions.ipynb
    │   ├── 06_formatting_strings.ipynb
    │   ├── 07_print_and_input_functions.ipynb
    │   ├── 08_date_manipulation_functions.ipynb
    │   ├── 09_special_functions.ipynb
    │   └── 10_exercises_pre_defined_functions.ipynb
├── Section 17 - Programming Essentials Using Python - User Defined Functions
    ├── Links
    │   └── Links
    └── Python Code
    │   ├── 01_user_defined_functions.ipynb
    │   ├── 02_defining_functions.ipynb
    │   ├── 03_doc_strings.ipynb
    │   ├── 04_returning_values.ipynb
    │   ├── 05_function_parameters_and_arguments.ipynb
    │   ├── 06_varying_arguments.ipynb
    │   ├── 07_keyword_arguments.ipynb
    │   ├── 08_recap_of_user_defined_functions.ipynb
    │   ├── 09_passing_functions_as_arguments.ipynb
    │   ├── 10_lambda_functions.ipynb
    │   ├── 11_usage_of_lambda_functions.ipynb
    │   └── 12_exercise_user_defined_functions.ipynb
├── Section 18 - Programming Essentials Using Python - Overview of Collections - List and Set
    ├── Links
    │   └── Links
    └── Python Code
    │   ├── 01_Overview of Collections - list and set.ipynb
    │   ├── 02_overview.ipynb
    │   ├── 03_common_operations.ipynb
    │   ├── 04_accessing_elements_from_list.ipynb
    │   ├── 05_adding_elements.ipynb
    │   ├── 06_updating_and_deleting_elements_list.ipynb
    │   ├── 07_other_list_operations.ipynb
    │   ├── 08_adding_and_deleting_elements_set.ipynb
    │   ├── 09_typical_set_operations.ipynb
    │   ├── 10_validating_set.ipynb
    │   ├── 11_list_and_set_usage.ipynb
    │   ├── 12_Overview of Strings.ipynb
    │   ├── 13_exercises_list_and_set.ipynb
    │   ├── 14_List_of_Delimited_strings.ipynb
    │   ├── 15_sorting_data_in_lists_and_tuples.ipynb
    │   ├── 16_sorting_list_of_delimete_strings.ipynb
    │   └── 17_Exercises_sorting_lists_and_sets.ipynb
├── Section 19 - Programming Essentials Using Python - Overview of Collections - Dict and Tuple
    ├── Links
    │   └── Links
    └── Python Code
    │   ├── 01_manipulating_collections_using_loops.ipynb
    │   ├── 02_overview_of_dict_and_tuple.ipynb
    │   ├── 03_common_operations.ipynb
    │   ├── 04_accessing_elements_tuples.ipynb
    │   ├── 05_accessing_elements_dict.ipynb
    │   ├── 06_manipulating_dict.ipynb
    │   ├── 07_common_examples_dict.ipynb
    │   ├── 08_list_of_tuples.ipynb
    │   ├── 09_list_of_dicts.ipynb
    │   ├── 10_process_dict_values.ipynb
    │   ├── 11_process_dict_items.ipynb
    │   ├── 12_sorting_dict_items.ipynb
    │   └── 13_exercises.ipynb
├── Section 20 - Programming Essentials Using Python - Manipulating Collections using Loops
    ├── Links
    │   └── Links
    └── Python Code
    │   ├── 01_manipulating_collections_using_loops.ipynb
    │   ├── 02_reading_files_into_collections.ipynb
    │   ├── 03_overview_of_standard_transformations.ipynb
    │   ├── 04_row_level_transformations.ipynb
    │   ├── 05_getting_unique_elements.ipynb
    │   ├── 06_filtering_data.ipynb
    │   ├── 07_preparing_data_sets.ipynb
    │   ├── 08_quick_recap_of_dict_operations.ipynb
    │   ├── 09_performing_total_aggregations.ipynb
    │   ├── 10_manipulate_collections_using_comprehensions.ipynb
    │   ├── 11_joining_data_sets.ipynb
    │   ├── 12_limitations_of_using_loops.ipynb
    │   ├── 13_overview_of_grouped_aggregations.ipynb
    │   ├── 14_get_order_count_by_status.ipynb
    │   ├── 15_get_revenue_details_per_order.ipynb
    │   ├── 16_get_order_count_by_month.ipynb
    │   ├── 17_list_comprehensions.ipynb
    │   ├── 18_set_comprehensions.ipynb
    │   ├── 19_dict_comprehensions.ipynb
    │   └── 20_exercises_manipulating_collections_using_loops.ipynb
├── Section 21 - Programming Essentials Using Python - Development of Map Reduce APIs
    ├── Links
    │   └── Links
    └── Python Code
    │   ├── 01_development_of_map_reduce_apis.ipynb
    │   ├── 02_develop_myFilter.ipynb
    │   ├── 03_validate_myFilter.ipynb
    │   ├── 04_develop_myMap.ipynb
    │   ├── 05_validate_myMap.ipynb
    │   ├── 06_develop_myReduce.ipynb
    │   ├── 07_validate_myReduce.ipynb
    │   ├── 08_develop_myReduceByKey.ipynb
    │   ├── 09_validate_myReduceByKey.ipynb
    │   ├── 10_develop_myJoin.ipynb
    │   ├── 11_validate_myJoin.ipynb
    │   └── 12_exercises_custom_map_reduce_functions.ipynb
├── Section 22 - Programming Essentials Using Python - Understanding Map Reduce Libraries
    ├── 01 - Preparing Data Sets.ipynb
    ├── 02 - Filtering Data using Filter.ipynb
    ├── 03 - Projecting Data using Map.ipynb
    ├── 04 - Row Level Transformations using Map.ipynb
    ├── 05 - Aggregations using Reduce.ipynb
    ├── 06 - Get Revenue for a Given Product Id.ipynb
    ├── 07 - Get total items sold and revenue.ipynb
    ├── 08 - Get Total Commission Amount.ipynb
    ├── 09 - Overview of Itertools.ipynb
    ├── 10 - Cumulative Operations using itertools.ipynb
    ├── 11 - Using itertools starmap.ipynb
    ├── 12 - Overview of itertools groupby.ipynb
    ├── 13 - Order Count by Status using itertools.ipynb
    ├── 14 - Revenue per Order using itertools.ipynb
    ├── 15 - Limitations of Map Reduce Libraries.ipynb
    └── 16 - Exercises - Map Reduce Functions.ipynb
├── Section 23 - Programming Essentials Using Python - Basics of File IO using Python
    ├── 01 - Overview of File IO.ipynb
    ├── 02 - Folders and Files.ipynb
    ├── 03 - File Paths and Names.ipynb
    ├── 04 - Ovewrview of Retail Data.ipynb
    ├── 05 - Read text file into string.ipynb
    ├── 06 - Write string to text file.ipynb
    ├── 07 - Overview of modes to write into files.ipynb
    ├── 08 - Overview of Delimited Strings.ipynb
    ├── 09 - Read csv into list of strings.ipynb
    ├── 10 - Basics of File IO using Python.ipynb
    ├── 11 - Write Strings to File in Append Mode.ipynb
    └── 12 - Managing Folders and Files using Python.ipynb
├── Section 24 - Programming Essentials Using Python - Delimited Files and Collections
    ├── 01 - Overview of Delimited Text Files.ipynb
    ├── 02 - Recap of Basic File IO.ipynb
    ├── 03 - Read Delimited Files into list of tuples.ipynb
    ├── 04 - Write Delimited Strings into Files.ipynb
    ├── 05 - Overview of CSV Module.ipynb
    ├── 06 - Read Delimited data using CSV.ipynb
    ├── 07 - Delimited File and Collections.ipynb
    ├── 08 - Write iterables to files using CSV.ipynb
    ├── 09 - Advantages of using CSV.ipynb
    └── 10 - Apply Schema on the lists from files.ipynb
├── Section 25 - Programming Essentials Using Python - Overview of Pandas Libraries
    ├── Links
    │   └── Links
    └── Python Code
    │   ├── 01_overview_of_pandas_libraries.ipynb
    │   ├── 02_pandas_data_structures_overview.ipynb
    │   ├── 03_overview_of_series.ipynb
    │   ├── 04_creating_data_frames_from_lists.ipynb
    │   ├── 05_data_frames_basic_operations.ipynb
    │   ├── 06_csv_to_pandas_data_frame.ipynb
    │   ├── 07_projecting_and_filtering.ipynb
    │   ├── 08_performing_total_aggregations.ipynb
    │   ├── 09_performing_grouped_aggregations.ipynb
    │   ├── 10_writing_data_frames_to_files.ipynb
    │   └── 11_joining_data_frames.ipynb
├── Section 26 - Programming Essentials Using Python - Database Programming CRUD Operations
    ├── Links
    │   └── Links
    ├── Postgre Code
    │   ├── 01 - Creating Table.sql
    │   ├── 02 - Inserting Data into Table.sql
    │   ├── 03 - Updating Existing Table Data.sql
    │   ├── 04 - Deleting Data from Table.sql
    │   └── 05 - Querying Data from Table.sql
    └── Python Code
    │   ├── 01_database_programming_crud_operations.ipynb
    │   ├── 02_overview_of_database_programming.ipynb
    │   ├── 03_recap_of_rdbms_concepts.ipynb
    │   ├── 04_setup_database_client_libraries.ipynb
    │   ├── 05_function_get_database_connection.ipynb
    │   ├── 06_creating_database_table.ipynb
    │   ├── 07_inserting_data_into_table.ipynb
    │   ├── 08_updating_existing_table_data.ipynb
    │   ├── 09_deleting_data_from_table.ipynb
    │   ├── 10_querying_data_from_table.ipynb
    │   └── 11_crud.ipynb
├── Section 27 - Programming Essentials Using Python - Database Programming Bath Operation
    ├── Links
    │   └── Links
    ├── Postgre Code
    │   ├── 01 - Insert.sql
    │   └── 02 - Select.sql
    └── Python Code
    │   ├── 01_database_programming_batch_operations.ipynb
    │   ├── 02_overview_of_database_programming.ipynb
    │   ├── 03_recap_of_rdbms_concepts.ipynb
    │   ├── 04_recap_of_insert.ipynb
    │   ├── 05_function_get_database_connection.ipynb
    │   ├── 06_creating_database_table.ipynb
    │   ├── 07_inserting_data_into_table.ipynb
    │   ├── 08_updating_existing_table_data.ipynb
    │   ├── 09_deleting_data_from_table.ipynb
    │   ├── 10_querying_data_from_table.ipynb
    │   ├── 11_crud.ipynb
    │   ├── 12_preparing_database.ipynb
    │   ├── 13_reading_data_from_file.ipynb
    │   ├── 14 - Batch Loading of data.ipynb
    │   └── 15 - Best Practices - Batch Loading.ipynb
├── Section 28 - Programming Essentials Using Python - Processing JSON Data
    ├── 01 - Overview of JSON.ipynb
    ├── 02 - JSON Data Types.ipynb
    ├── 03 - Create JSON String.ipynb
    ├── 04 - Process JSON String.ipynb
    ├── 05 - Single JSON in Files.ipynb
    ├── 06 - Multiple JSON Documents in files.ipynb
    ├── 07 - Process JSON using Pandas.ipynb
    ├── 08 - Differente JSON Formats supported by Pandas.ipynb
    ├── 09 - Common Use Cases for JSON.ipynb
    ├── 10 - Processing JSON Data.ipynb
    ├── 11 - Write to JSON Files using JSON.ipynb
    └── 12 - Write to JSON files using pandas.ipynb
├── Section 29 - Programming Essentials Using Python - Processing REST Payloads
    ├── 01_Overview_of_REST_APIs.ipynb
    ├── 02_Using_curl_command.ipynb
    ├── 03_Overview_of_Postman.ipynb
    ├── 04_Getting_Started_with_requests.ipynb
    ├── 05_Convert_REST_Payload_to_Python_Objects.ipynb
    ├── 06_Process_REST_Payload_using_Collection_Operations.ipynb
    └── 07_Process_REST_Payload_using_pandas.ipynb
├── Section 30 - Understanding Python Virtual Environments
    ├── 01_Accessing_Python_through_PowerShell.txt
    ├── 02_Create_Virtual_Environment_for_Web_Application.txt
    ├── 03_Reviewing_Dependencies_Virtual_Environment.txt
    ├── 04_Installing_Dependencies_Web_applications.txt
    ├── 05_Getting_Details_About_PIP.txt
    ├── 06_Uninstall_Packages_Using_PIP.txt
    ├── 07_Cleanup_Virtual_Environment.txt
    ├── 08_Recreate_and_Activate_Virtual_Environment_Web_Application.txt
    ├── 09_Define_Requeriments_File_Web_Application.txt
    ├── 10_Install_Dependencies_Web_Application.txt
    ├── 11_Create_Virtual_Environment_Data_Engineering.txt
    └── 12_Install_Dependencias_data_Engineering_Application.txt
├── Section 31 - Overview of Pycharm for Python Application Development
    ├── Link
    │   └── 01_Installation_of_Pycharm_Windowns.txt
    └── gettingStarted
    │   └── test.py
├── Section 32 - Data Copier - Getting Started
    ├── CMD Codes
    │   ├── 01_Codes_CMD
    │   ├── 02_Listing_the_images
    │   └── 03_Checking_all_docker_commands
    └── Manual
    │   ├── 01_Getting_Started.ipynb
    │   ├── 02_Problem_Statement_Data_Copier.ipynb
    │   ├── 03_Setup_Docker.ipynb
    │   ├── 04_Quick_Overview_of_Docker.ipynb
    │   ├── 05_Prepare_Dataset.ipynb
    │   ├── 06_Setup_Postgres_Database.ipynb
    │   ├── 07_Overview_of_Postgres_Database.ipynb
    │   ├── 08_Setup_Project_using_PyCharm.ipynb
    │   ├── 09_Managing_Dependencies.ipynb
    │   └── 10_Create_GitHub_Repository.ipynb
├── Section 33 - Data Copier - Reading Data using Pandas
    ├── 01_Reading_Data_using_Pandas_Introduction.ipynb
    ├── 02_Overview_of_Retail_Data.ipynb
    ├── 03_Adding_Pandas_to_Project.ipynb
    ├── 04_Reading_Data_using_Pandas.ipynb
    ├── 05_Previewing_Data_using_Pandas.ipynb
    ├── 06_Reading_Data_in Chunks_using_Pandas.ipynb
    └── 07_Dynamically_Read_Files.ipynb
├── Section 34 - Data Copier - Database Programming using Pandas
    ├── 01_Database_Programming_using_Pandas.ipynb
    ├── 02_Validate_Postgres_Setup_using_Docker.ipynb
    ├── 03_Add_Required_Dependencies.ipynb
    ├── 04_Create_Users_Table_in_retail_db_Database.ipynb
    ├── 05_Populating_Sample_Data_into_Users_Table.ipynb
    ├── 06_Reading_Data_from_Table_using_Pandas.ipynb
    ├── 07_Truncate_Users_Table.ipynb
    ├── 08_Writing_Pandas_Dataframe_to_Table.ipynb
    ├── 09_Validating_Users_Data.ipynb
    └── 10_Dropping_Users_Table.ipynb
├── Section 35 - Data Copier - Loading Data from Files to Tables
    ├── 01_Loading_Data_from_Files_to_Tables.ipynb
    ├── 02_Populating_Departments_Table.ipynb
    ├── 03_Validate_Departments_Table.ipynb
    ├── 04_Populating_Orders_Table.ipynb
    ├── 05_Validate_Orders_Table_in_Database.ipynb
    └── 06_Validate_Orders_Table_using_Pandas.ipynb
├── Section 36 - Data Copier - Modularizing the Application
    ├── Manual
    │   ├── 01_Modularizing_Data_Copier.ipynb
    │   ├── 02_Overview_of_main_Function.ipynb
    │   ├── 03_Overview_of_Environment_Variables.ipynb
    │   ├── 04_Using_os_Module_for_Environment_Variables.ipynb
    │   ├── 05_Passing_Environment_Variables_using_Pycharm.ipynb
    │   ├── 06_Read_Logic_using_Pandas.ipynb
    │   ├── 07_Validate_Read_Logic.ipynb
    │   ├── 08_Write_Logic_using_Pandas.ipynb
    │   ├── 09_Validate_Write_Logic.ipynb
    │   ├── 10_Integrate_Read_and_Write_Logic.ipynb
    │   ├── 11_Validate_Integration_Logic.ipynb
    │   ├── 12_Develop_Logic_for_ultiple_Tables.ipynb
    │   └── 13_Validate_Logic_for_Multiple_Tables.ipynb
    └── Python Code
    │   ├── app.py
    │   ├── read.py
    │   ├── requirements.txt
    │   └── write.py
├── Section 37 - Data Copier - Dockerizing the Application
    ├── Manual
    │   ├── 01_Prepare_database_for_validation.txt
    │   ├── 02_Pull_and_validate_python_image.txt
    │   ├── 03_Create_and_attach_network_to_database.txt
    │   ├── 04_Quick_recap_about_docker_conatiners.txt
    │   ├── 05_Deploying_application_and_installing_docker
    │   ├── 06_Copy_source_data_file_into_container
    │   ├── 07_Add_data_copier_container
    │   ├── 08_Installing_os_libraries_docker
    │   ├── 09_Validate_network_connectivity
    │   ├── 10_Running_application_docker
    │   └── 11_Delete_docker_container
    └── Python Code
    │   ├── app.py
    │   ├── read.py
    │   ├── requirements.txt
    │   └── write.py
├── Section 38 - Data Copier - Using Custom Docker Image
    ├── Manual
    │   ├── 01_Getting_started_with_docker_image.txt
    │   ├── 02_Install_OS_modules.txt
    │   ├── 03_Copying_source_code.txt
    │   ├── 04_Adding_dependencies.txt
    │   ├── 05_Unsderstanding_docker.txt
    │   ├── 06_Mounting_data_folders.txt
    │   ├── 07_Add_data_copier_container.txt
    │   └── 08_Run_application_using_docker.txt
    └── Python Code
    │   └── app
    │       ├── Dockerfile.txt
    │       ├── app.py
    │       ├── read.py
    │       ├── requirements.txt
    │       └── write.py
├── Section 39 - Data Copier - Deploy and Validate Application on Remote Server
    ├── 01_Clone_application_on_remote.txt
    ├── 02_Setup_network_and_database.txt
    ├── 03_Setup_database_and tables_docker.txt
    ├── 04_Building_custom_docker.txt
    └── 05_Run_and_validate_dockerized_application.txt
├── Section 40 - Setup Single Node Hadoop and Spark Cluster or Lab using Docker
    ├── 01_setup_single_node_hadoop_cluster
    │   ├── 01_introduction_to_single_node_hadoop_cluster.ipynb
    │   ├── 02_setup_prerequisites.ipynb
    │   ├── 03_setup_passwordless_login.ipynb
    │   ├── 04_download_and_install_hadoop.ipynb
    │   ├── 05_configure_hadoop_hdfs.ipynb
    │   ├── 06_start_and_validate_hdfs.ipynb
    │   ├── 07_configure_hadoop_yarn.ipynb
    │   ├── 08_start_and_validate_yarn.ipynb
    │   ├── 09_managing_single_node_hadoop.ipynb
    │   └── 10_accessing_hadoop_uris.ipynb
    ├── 02_setup_hive_and_spark
    │   ├── 02_setup_data_sets_for_practice.ipynb
    │   ├── 03_download_and_install_hive.ipynb
    │   ├── 04_setup_database_for_hive_metastore.ipynb
    │   ├── 05_configure_and_setup_hive_metastore.ipynb
    │   ├── 06_launch_and_validate_hive.ipynb
    │   ├── 07_scripts_to_manage_single_node_cluster.ipynb
    │   ├── 08_download_and_install_spark2.ipynb
    │   ├── 09_configure_spark2.ipynb
    │   ├── 10_validate_spark2_using_clis.ipynb
    │   ├── 11_validate_jupyterlab_setup.ipynb
    │   ├── 12_integrate_spark2_with_jupyterlab.ipynb
    │   ├── 13_download_and_install_spark3.ipynb
    │   ├── 14_configure_spark3.ipynb
    │   ├── 15_validate_spark3_clis.ipynb
    │   └── 16_integrate_spark3_with_jupyterlab.ipynb
    └── 03_setup_single_node_kafka_cluster
    │   ├── 02_download_and_install_kafka.ipynb
    │   ├── 03_configure_and_start_zookeeper.ipynb
    │   ├── 04_configure_and_start_kafka_broker.ipynb
    │   ├── 05_scripts_to_manage_single_node_cluster.ipynb
    │   ├── 06_overview_of_kafka_cli.ipynb
    │   ├── 07_setup_retail_log_generator.ipynb
    │   └── 08_redirecting_logs_to_kafka_topic.ipynb
├── Section 41 - Introduction to Hadoop Eco System - Overview of HDFS
    ├── 03_getting_help_or_usage.ipynb
    ├── 04_listing_hdfs_files.ipynb
    ├── 05_managing_hdfs_directories.ipynb
    ├── 06_copying_files_from_local_to_hdfs.ipynb
    ├── 07_copying_files_from_hdfs_to_local.ipynb
    ├── 09_previewing_data_in_hdfs_files.ipynb
    ├── 10_getting_file_metadata.ipynb
    ├── 11_hdfs_blocksize.ipynb
    ├── 12_hdfs_replication_factor.ipynb
    ├── 13_getting_hdfs_storage_usage.ipynb
    ├── 14_using_hdfs_stat_command.ipynb
    ├── 15_hdfs_file_permissions.ipynb
    └── 16_overriding_properties.ipynb
├── Section 42 - Data Engineering using Spark SQL - Getting Started
    ├── 01_getting_started.ipynb
    ├── 02_overview_of_spark_documentation.ipynb
    ├── 03_overview_of_spark_sql_cli.ipynb
    ├── 04_overview_of_spark_sql_properties.ipynb
    ├── 05_running_os_commands.ipynb
    ├── 06_understanding_warehouse_directory.ipynb
    ├── 07_managing_spark_metastore_databases.ipynb
    ├── 08_managing_spark_metastore_tables.ipynb
    ├── 09_retrieve_metadata_of_tables.ipynb
    ├── 10_role_of_spark_or_hive_metastore.ipynb
    └── 11_exercise_getting_started.ipynb
├── Section 43 - Data Engineering using Spark SQL - Basic Transformations
    ├── 01_basic_transformations.ipynb
    ├── 02_spark_sql_overview.ipynb
    ├── 03_define_problem_statement.ipynb
    ├── 04_preparing_tables.ipynb
    ├── 05_projecting_data.ipynb
    ├── 06_filtering_data.ipynb
    ├── 07_joining_tables_inner.ipynb
    ├── 08_joining_tables_outer.ipynb
    ├── 09_aggregating_data.ipynb
    ├── 10_sorting_data.ipynb
    └── 11_conclusion_final_solution.ipynb
├── Section 44 - Data Engineering using Spark SQL - Basic DDL and DML
    ├── 01_basic_ddl_and_dml.ipynb
    ├── 02_create_spark_metastore_tables.ipynb
    ├── 03_overview_of_data_types.ipynb
    ├── 04_adding_comments.ipynb
    ├── 05_loading_data_into_tables_local.ipynb
    ├── 06_loading_data_into_tables_hdfs.ipynb
    ├── 07_loading_data_append_and_overwrite.ipynb
    ├── 08_creating_external_tables.ipynb
    ├── 09_managed_vs_external_tables.ipynb
    ├── 10_overview_of_file_formats.ipynb
    ├── 11_dropping_tables_and_databases.ipynb
    ├── 12_truncating_tables.ipynb
    └── 13_exercises_managing_tables.ipynb
├── Section 45 - Data Engineering using Spark SQL - Managing Tables - DML and Partitioning
    ├── 01_dml_and_partitioning.ipynb
    ├── 02_introduction_to_partitioning.ipynb
    ├── 03_creating_tables_using_parquet.ipynb
    ├── 04_load_vs_insert.ipynb
    ├── 05_inserting_data_using_stage_table.ipynb
    ├── 06_creating_partitioned_tables.ipynb
    ├── 07_adding_partitions_to_tables.ipynb
    ├── 08_loading_data_into_partitions.ipynb
    ├── 09_inserting_data_into_partitions.ipynb
    ├── 10_using_dynamic_partition_mode.ipynb
    └── 11_exercises_partitioned_tables.ipynb
├── Section 46 - Data Engineering using Spark SQL - Overview of Spark SQL Functions
    ├── 01_predefined_functions.ipynb
    ├── 02_overview_of_functions.ipynb
    ├── 03_validating_functions.ipynb
    ├── 04_string_manipulation_functions.ipynb
    ├── 05_date_manipulation_functions.ipynb
    ├── 06_overview_of_numeric_functions.ipynb
    ├── 07_data_type_conversion.ipynb
    ├── 08_handling_null_values.ipynb
    ├── 09_using_case_and_when.ipynb
    └── 10_query_example_word_count.ipynb
├── Section 47 - Data Engineering using Spark SQL - Windowing Functions
    ├── 01_windowing_functions.ipynb
    ├── 02_prepare_database.ipynb
    ├── 03_overview_of_windowing_functions.ipynb
    ├── 04_aggregations_using_windowing_functions.ipynb
    ├── 05_using_lead_or_lag.ipynb
    ├── 06_getting_first_and_last_values.ipynb
    ├── 07_ranking_using_windowing_functions.ipynb
    ├── 08_order_of_execution_of_sql.ipynb
    ├── 09_overview_of_sub_queries.ipynb
    └── 10_filtering_window_function_results.ipynb
├── Section 48 - Apache Spark using Python - Data Processing Overview
    ├── 03_starting_spark_context.ipynb
    ├── 04_overview_of_spark_read_apis.ipynb
    ├── 05_understand_airlines_data.ipynb
    ├── 06_inferring_schema.ipynb
    ├── 07_previewing_airlines_data.ipynb
    ├── 08_overview_of_dataframe_apis.ipynb
    ├── 09_overview_of_functions.ipynb
    └── 10_overview_of_spark_write_apis.ipynb
├── Section 49 - Apache Spark using Python - Processing Column Data
    ├── 02_predefined_functions.ipynb
    ├── 03_create_dummy_dataframes.ipynb
    ├── 04_categories_of_functions.ipynb
    ├── 05_special_functions_col_and_lit.ipynb
    ├── 06_common_string_manipulation_functions.ipynb
    ├── 07_extracting_strings_using_substring.ipynb
    ├── 08_extracting_strings_using_split.ipynb
    ├── 09_padding_characters_around_strings.ipynb
    ├── 10_trimming_characters_from_strings.ipynb
    ├── 11_date_and_time_manipulation_functions.ipynb
    ├── 12_date_and_time_arithmetic.ipynb
    ├── 13_using_date_and_time_trunc_functions.ipynb
    ├── 14_date_and_time_extract_functions.ipynb
    ├── 15_using_to_date_and_to_timestamp.ipynb
    ├── 16_using_date_format_function.ipynb
    ├── 17_dealing_with_unix_timestamp.ipynb
    ├── 18_dealing_with_nulls.ipynb
    └── 19_using_case_and_when.ipynb
├── Section 50 - Apache Spark using Python - Basic Transformations
    ├── 02_overview_of_basic_transformations.ipynb
    ├── 03_data_frame_for_basic_transformations.ipynb
    ├── 04_basic_filtering_of_data.ipynb
    ├── 05_filtering_example_using_dates.ipynb
    ├── 06_boolean_operators.ipynb
    ├── 07_using_in_operator_or_isin_function.ipynb
    ├── 08_using_like_operator_or_function.ipynb
    ├── 09_using_between_operator.ipynb
    ├── 10_dealing_with_nulls_while_filtering.ipynb
    ├── 11_total_aggregations.ipynb
    ├── 12_aggregate_data_using_groupby.ipynb
    ├── 13_aggregate_data_using_rollup.ipynb
    ├── 14_aggregate_data_using_cube.ipynb
    ├── 15_overview_of_sorting_data_frames.ipynb
    ├── 16_solution_problem_1.ipynb
    └── 17_solution_problem_2.ipynb
├── Section 51 - Apache Spark using Python - Joining Data Sets
    ├── 02_preparing_data_sets_for_joins.ipynb
    ├── 03_analyze_data_sets_for_joins.ipynb
    ├── 04_problem_statements_for_joins.ipynb
    ├── 05_overview_of_joins.ipynb
    ├── 06_using_inner_joins.ipynb
    ├── 07_left_or_right_outer_join.ipynb
    ├── 08_solutions_problem_1.ipynb
    ├── 09_solutions_problem_2.ipynb
    ├── 10_solutions_problem_3.ipynb
    ├── 11_solutions_problem_4.ipynb
    ├── 12_solutions_problem_5.ipynb
    ├── 13_solutions_problem_6.ipynb
    ├── 14_solutions_problem_7.ipynb
    └── 15_solutions_problem_8.ipynb
├── Section 52 - Apache Spark using Python - Spark Metastore
    ├── 02_overview_of_spark_metastore.ipynb
    ├── 03_exploring_spark_catalog.ipynb
    ├── 04_creating_metastore_tables_using_catalog.ipynb
    ├── 05_inferring_schema_for_tables.ipynb
    ├── 06_define_schema_for_tables_using_structtype.ipynb
    ├── 07_inserting_into_existing_tables.ipynb
    ├── 08_read_and_process_data_from_metastore_tables.ipynb
    ├── 09_creating_partitioned_tables.ipynb
    ├── 10_saving_as_partitioned_tables.ipynb
    ├── 11_creating_temp_views.ipynb
    └── 12_using_spark_sql.ipynb
├── Section 53 - Apache Spark - Development Life Cycle using Python
    ├── Getting Started
    ├── Process data using Spark APIs
    ├── Productionize Code
    ├── Read data from files
    ├── Write data to files
    └── set up a virtual environment
└── Section 54 - Spark Application Execution Life Cycle and Spark UI
    ├── Develop Shell Script to run Spark Application
    ├── Develop Word Count Application
    └── Setup Data Set for Word Count Application


/Section 02 - Setting up Environment Using AWS Cloud9/[Commands] Setup Jupyter Lab on Cloud9:
--------------------------------------------------------------------------------
 1 | [Commands] Setup Jupyter Lab on Cloud9
 2 | Steps to setup Jupyter lab
 3 | 
 4 | Validate Python
 5 | 
 6 | Create Virtual Environment
 7 | 
 8 | Activate Virtual Environment
 9 | 
10 | Install Jupyter Lab using pip
11 | 
12 | Start Jupyter Lab
13 | 
14 | Open the port in EC2 Security Groups for the Cloud9 Instance
15 | 
16 | Validate using Browser
17 | 
18 | mkdir delab
19 | cd delab
20 | python3 -m venv delab-venv
21 | source delab-venv/bin/activate
22 | pip install jupyterlab
23 | jupyter lab --ip 0.0.0.0
24 | # You can also run in background using nohup
25 | nohup jupyter lab -ip 0.0.0.0 &


--------------------------------------------------------------------------------
/Section 03 - Setting up Environment - Overview of GCP and Provision Ubuntu VM/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://jupyterlab.itversity.com/02_setup_ubuntu_vm_on_gcp/02_signing_up_for_gcp.html
 4 | 
 5 | https://jupyterlab.itversity.com/02_setup_ubuntu_vm_on_gcp/03_understanding_gcp_web_console.html
 6 | 
 7 | https://jupyterlab.itversity.com/02_setup_ubuntu_vm_on_gcp/04_overview_of_gcp_pricing.html
 8 | 
 9 | https://jupyterlab.itversity.com/02_setup_ubuntu_vm_on_gcp/05_provision_ubuntu_18.04_vm.html
10 | 
11 | https://jupyterlab.itversity.com/02_setup_ubuntu_vm_on_gcp/06_setup_docker.html
12 | 
13 | https://jupyterlab.itversity.com/02_setup_ubuntu_vm_on_gcp/07_validating_python.html
14 | 
15 | https://jupyterlab.itversity.com/02_setup_ubuntu_vm_on_gcp/08_setup_jupyter_lab.html


--------------------------------------------------------------------------------
/Section 03 - Setting up Environment - Overview of GCP and Provision Ubuntu VM/Setting up Environment/Setup Ubuntu VM on GCP/02_signing_up_for_gcp.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Signing up for GCP\n", "\n", "GCP is one of the leading cloud provider. We will be primarily using it to get hands on with respect to several skills such as Linux, Python, SQL etc over the duration of this course as well as other courses."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* Google provides USD 300 credit for one year to try out GCP.\n", "* Go to [Google Cloud](https://cloud.google.com) and complete sign up process.\n", "* Make sure to avail USD 300 credit.\n", "* Once you complete the sign up process you will get access to Web Console of GCP."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 03 - Setting up Environment - Overview of GCP and Provision Ubuntu VM/Setting up Environment/Setup Ubuntu VM on GCP/03_understanding_gcp_web_console.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Understanding GCP Web Console\n", "\n", "Let us have some basic idea about GCP Web Console."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* We can use GCP Web Console for managing Google Cloud Infrastructure. Here I am highlighting some of them.\n", "  * Provision Virtual Machines\n", "  * Deploy Applications\n", "  * Leverage Existing out of the box services\n", "* We should also know how to get billing details. You can go to Billing Section and should be able review the credits.\n", "* For this section, we will be primarily focusing on these.\n", "  * Provision Virtual Machine\n", "  * Stop and Start Virtual Machine\n", "  * Make sure to configure firewalls for the virtual machine created."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 03 - Setting up Environment - Overview of GCP and Provision Ubuntu VM/Setting up Environment/Setup Ubuntu VM on GCP/04_overview_of_gcp_pricing.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Overview of GCP Pricing\n", "\n", "It is very important to spend some time and understand the pricing of GCP for the virtual machine."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* GCP Provides Pricing Calculator to estimate the cost.\n", "* Here is the configuration for our Virtual Machine.\n", "  * 16 GB RAM\n", "  * At least 4 core CPU\n", "  * 60 GB Storage\n", "* For Storage, we will have fixed cost as long as it is provisioned.\n", "* For Virtual Machine (CPU and Memory), we will be paying only for the time for which the Virtual Machine is up and running.\n", "* Best Practices to get most out of the credit or to reduce the cost of using GCP.\n", "  * Ensure that you stop the Virtual Machine when you are not using.\n", "  * Make sure to use static ip address (incurs a nominal fixed cost).\n", "  * Make sure to open only those ports that are relevant to you (to avoid attacks)."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 03 - Setting up Environment - Overview of GCP and Provision Ubuntu VM/Setting up Environment/Setup Ubuntu VM on GCP/05_provision_ubuntu_18.04_vm.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Provision Ubuntu 18.04 Virtual Machine\n", "\n", "As we got GCP account and understood pricing, now it is time to provision Ubuntu 18.04 Virtual Machine."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* Setup Virtual Machine\n", "* Connect to Virtual Machine using Web Console.\n", "* Configure Static ip to the Virtual Machine so that the public ip does not change on reboot."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 03 - Setting up Environment - Overview of GCP and Provision Ubuntu VM/Setting up Environment/Setup Ubuntu VM on GCP/07_validating_python.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Validating Python\n", "\n", "Let us validate Python on Ubuntu VM that is provisioned."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* By default, Ubuntu 18.04 VM will have Python3 installed. You can run `python3` and launch Python CLI.\n", "* However, there might not be additional important modules such as pip, venv etc.\n", "* We need to validate and ensure that they are installed. If `pip` and `venv` are not installed you can install them using these commands.\n", "\n", "```shell\n", "sudo apt install python3-pip -y\n", "python3 -m pip install configparser\n", "\n", "sudo apt install python3-venv -y\n", "python3 -m venv testing\n", "ls -ltr\n", "rm -rf testing\n", "```"]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 03 - Setting up Environment - Overview of GCP and Provision Ubuntu VM/Setting up Environment/Setup Ubuntu VM on GCP/08_setup_jupyter_lab.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {"pycharm": {"name": "#%% md\n"}}, "source": ["## Setup Jupyter Lab\n", "\n", "Let us setup Jupyter Lab on Ubuntu VM. This will also facilitate you to understand firewall configuration in GCP."]}, {"cell_type": "markdown", "metadata": {"pycharm": {"name": "#%% md\n"}}, "source": ["* Create Python based virtual environment - `python3 -m venv demojl`\n", "* Activate virtual environment - `source demojl/bin/activate`\n", "* Install required dependencies for Jupyter Lab - `pip install jupyterlab`\n", "* Launch Jupyter Lab - `jupyter lab --ip 0.0.0.0`\n", "* At this time, you will not be able to access Jupyter Lab\n", "* Go to firewall and open the port using GCP Web Console\n", "* Now enter the ip address and port number to access the Jupyter Lab UI."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 05 - Setting up Environment - Setup Postgres Database on Ubuntu VM/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://jupyterlab.itversity.com/03_setup_postgres_database/03_setup_postgres_using_docker.html
 4 | 
 5 | 
 6 | https://jupyterlab.itversity.com/03_setup_postgres_database/02_docker_cheat_sheet.html
 7 | 
 8 | 
 9 | https://jupyterlab.itversity.com/03_setup_postgres_database/04_accessing_postgres_using_docker_cli.html
10 | 
11 | 
12 | https://jupyterlab.itversity.com/03_setup_postgres_database/05_create_database_and_user.html
13 | 
14 | 
15 | https://jupyterlab.itversity.com/03_setup_postgres_database/05_create_database_and_user.html
16 | 
17 | 
18 | https://jupyterlab.itversity.com/03_setup_postgres_database/08_sql_workbench_and_postgres.html
19 | 
20 | 
21 | https://jupyterlab.itversity.com/03_setup_postgres_database/11_jupyter_lab_and_postgresql.html
22 | 
23 | 
24 | https://jupyterlab.itversity.com/03_setup_postgres_database/12_jupyter_lab_and_postgresql_on_ubuntu_vm.html


--------------------------------------------------------------------------------
/Section 05 - Setting up Environment - Setup Postgres Database on Ubuntu VM/Setup Postgres Database/04_accessing_postgres_using_docker_cli.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Accessing Postgres using Docker CLI\n", "\n", "Let us understand how to connect to Postgres Database running as part of docker container using Docker CLI."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* We can use `docker container exec` or `docker exec` to connect to the container.\n", "* You can attach to the container by running `bash` using `docker exec`.\n", "* Also you can run single commands with out attaching the container - example: `docker exec -it itv_pg hostname -f`\n", "\n", "> You have to use terminal to run these commands\n", "\n", "* Attach to sms_db container - `docker exec -it itv_pg bash`\n", "* Run command to get hostname - `hostname -f`\n", "* Run command to connect to Postgres Database - `psql -U postgres`\n", "* You can also directly connect to Postgres Database using\n", "```\n", "docker exec -it itv_pg psql -U postgres\n", "```\n", "* Use `\\q` to come out of the Postgres CLI."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 06 - Database Essentials - Getting Started/Getting Started/02_connecting_to_database.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Connecting to Database\n", "\n", "We will be using JupyterHub based environment to master Postgresql. Let us go through the steps involved to get started using JupyterHub environment."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* We will use Python Kernel with sql magic command and for that we need to first load the sql extension.\n", "* Create environment variable `DATABASE_URL` using SQL Alchemy format.\n", "* Write a simple query to get data from information schema table to validate database connectivity.\n", "* Here is the information you can leverage to connect to the database.\n", "  * **User Name:** YOUR_OS_USER_sms_user\n", "  * **Database Name:** YOUR_OS_USER_sms_db\n", "  * **Password:** Your lab password provided by us"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["%load_ext sql"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["%env DATABASE_URL=postgresql://itversity_sms_user:sms_password@localhost:5432/itversity_sms_db"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["%sql SELECT * FROM information_schema.tables LIMIT 10"]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 06 - Database Essentials - Getting Started/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://postgresql.itversity.com/01_getting_started/02_connecting_to_database.html#
 4 | 
 5 | https://postgresql.itversity.com/01_getting_started/03_using_psql.html
 6 | 
 7 | https://postgresql.itversity.com/01_getting_started/04_setup_postgres_using_docker.html
 8 | 
 9 | https://postgresql.itversity.com/01_getting_started/05_setup_sql_workbench.html
10 | 
11 | https://postgresql.itversity.com/01_getting_started/06_sql_workbench_and_postgres.html
12 | 
13 | https://postgresql.itversity.com/01_getting_started/07_sql_workbench_features.html
14 | 
15 | https://postgresql.itversity.com/01_getting_started/08_data_loading_utilities.html
16 | 
17 | https://postgresql.itversity.com/01_getting_started/09_loading_data_postgres_in_docker.html


--------------------------------------------------------------------------------
/Section 06 - Database Essentials - Getting Started/Script/01 - Create Table.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE users (
 2 |     user_id SERIAL PRIMARY KEY,
 3 |     user_first_name VARCHAR(30) NOT NULL,
 4 |     user_last_name VARCHAR(30) NOT NULL,
 5 |     user_email_id VARCHAR(50) NOT NULL,
 6 |     user_email_validated BOOLEAN DEFAULT FALSE,
 7 |     user_password VARCHAR(200),
 8 |     user_role VARCHAR(1) NOT NULL DEFAULT 'U', --U and A
 9 |     is_active BOOLEAN DEFAULT FALSE,
10 |     created_dt DATE DEFAULT CURRENT_DATE
11 | );


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/DML or CRUD Operations/04_overview_of_database_operations.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Database Operations - Overview\n", "\n", "Let us get an overview of Database Operations we typically perform on regular basis. They are broadly categorized into the following:"]}, {"cell_type": "markdown", "metadata": {}, "source": ["* DDL - Data Definition Language\n", "  * CREATE/ALTER/DROP Tables\n", "  * CREATE/ALTER/DROP Indexes\n", "  * Add constraints to tables\n", "  * CREATE/ALTER/DROP Views\n", "  * CREATE/ALTER/DROP Sequences\n", "* DML - Data Manipulation Language\n", "  * Inserting new data into the table\n", "  * Updating existing data in the table\n", "  * Deleting existing data from the table\n", "* DQL - Data Query Language\n", "  * Read the data from the table\n", "\n", "On top of these we also use TCL (Transaction Control Language) which include **COMMIT** and **ROLLBACK**. \n", "\n", "As part of this section in the subsequent topics we will primarily focus on basic DDL and DML."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/DML or CRUD Operations/05_crud_operations.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## CRUD Operations\n",
 8 |     "\n",
 9 |     "Let us get an overview of CRUD Operations. They are nothing but DML and queries to read the data while performing database operations."
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "markdown",
14 |    "metadata": {},
15 |    "source": [
16 |     "* CRUD is widely used from application development perspective.\n",
17 |     "* C - CREATE (INSERT)\n",
18 |     "* R - READ (READ)\n",
19 |     "* U - UPDATE (UPDATE)\n",
20 |     "* D - DELETE (DELETE)\n",
21 |     "\n",
22 |     "As part of the application development process we perform CRUD Operations using REST APIs."
23 |    ]
24 |   }
25 |  ],
26 |  "metadata": {
27 |   "kernelspec": {
28 |    "display_name": "Python 3 (ipykernel)",
29 |    "language": "python",
30 |    "name": "python3"
31 |   },
32 |   "language_info": {
33 |    "codemirror_mode": {
34 |     "name": "ipython",
35 |     "version": 3
36 |    },
37 |    "file_extension": ".py",
38 |    "mimetype": "text/x-python",
39 |    "name": "python",
40 |    "nbconvert_exporter": "python",
41 |    "pygments_lexer": "ipython3",
42 |    "version": "3.9.7"
43 |   }
44 |  },
45 |  "nbformat": 4,
46 |  "nbformat_minor": 4
47 | }
48 | 


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Exercises/01 - Create Table.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 1 - Create Table¶
 3 | 
 4 | Create table - courses
 5 | course_id - sequence generated integer and primary key
 6 | course_name - which holds alpha numeric or string values up to 60 characters
 7 | course_author - which holds the name of the author up to 40 characters
 8 | course_status - which holds one of these values (published, draft, inactive).
 9 | course_published_dt - which holds date type value.
10 | */
11 | CREATE TABLE courses
12 | (
13 | 	course_id SERIAL PRIMARY KEY
14 |   , course_name VARCHAR(60) NOT NULL
15 |   , course_author VARCHAR(40) NOT NULL
16 |   , course_status VARCHAR(10) NOT NULL
17 |   , course_published_it DATE
18 | );


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Exercises/02 - Insert Into.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 2 - Inserting Data
 3 | Insert data into courses using the data provided. Make sure id is system generated.
 4 | */
 5 | INSERT INTO
 6 | 	courses
 7 | (
 8 |   	course_name
 9 |   , course_author
10 |   , course_status
11 |   , course_published_it
12 | )
13 | VALUES (
14 | 	'Programming using Python'
15 |   , 'Bob Dillon'
16 |   , 'published'
17 |   , '2020-09-30'
18 | ), (
19 | 	'Data Engineering using Python'
20 |   , 'Bob Dillon'
21 |   , 'published'
22 |   , '2020-07-15'
23 | ), (
24 | 	'Data Engineering using Scala'
25 |   , 'Elvis Presley'
26 |   , 'draft'
27 | ), (
28 | 	'Programming using Scala'
29 |   , 'Elvis Presley'
30 |   , 'published'
31 |   , '2020-05-12'
32 | ), (
33 | 	'Programming using Java'
34 |   , 'Mike Jack'
35 |   , 'inactive'
36 |   , '2020-08-10'
37 | ), (
38 | 	'Web Applications - Python Flask'
39 |   , 'Bob Dillon'
40 |   , 'inactive'
41 |   , '2020-07-20'
42 | ), (
43 | 	'Web Applications - Java Spring'
44 |   , 'Mike Jack'
45 |   , 'draft'
46 | ), (
47 | 	'Pipeline Orchestration - Python'
48 |   , 'Bob Dillon'
49 |   , 'draft'
50 | ), (
51 | 	'Streaming Pipelines - Python'
52 |   , 'Bob Dillon'
53 |   , 'published'
54 |   , '2020-10-05'
55 | ), (
56 | 	'Web Applications - Scala Play'
57 |   , 'Elvis Presley'
58 |   , 'inactive'
59 |   , '2020-09-30'
60 | ), (
61 | 	'Web Applications - Python Django'
62 |   , 'Bob Dillon'
63 |   , 'published'
64 |   , '2020-06-23'
65 | ), (
66 | 	'Server Automation - Ansible'
67 |   , 'Uncle Sam'
68 |   , 'published'
69 |   , '2020-07-05'
70 | );


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Exercises/03 - Updating Data.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 3 - Updating Data
 3 | Update the status of all the draft courses related to Python and Scala to published along with 
 4 | the course_published_dt using system date.
 5 | Provide the update statement as answer for this exercise
 6 | */
 7 | UPDATE
 8 | 	courses
 9 | SET
10 | 	course_status = 'published'
11 |   , course_published_it = CURRENT_DATE
12 | WHERE
13 | 	course_status = 'draft'
14 | AND
15 | 	course_name LIKE '%Python%'
16 | OR
17 | 	course_name LIKE '%Scala%'


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Exercises/04 - Deleting Data.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Exercise 4 - Deleting Data
3 | Delete all the courses which are neither in draft mode nor published.
4 | Provide the delete statement as answer for this exercise.
5 | */
6 | DELETE FROM
7 | 	courses
8 | WHERE
9 | 	course_status IN('inactive', 'draft')


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Exercises/05 - Selecting.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Validation - Get count of all published courses by author and make sure output is sorted in descending order by count.
 3 | */
 4 | SELECT
 5 | 	course_author
 6 |   , COUNT(1) AS course_count
 7 | FROM
 8 | 	courses
 9 | WHERE
10 | 	course_status = 'published'
11 | GROUP BY
12 | 	course_author;


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Links/Links:
--------------------------------------------------------------------------------
 1 | Link
 2 | 
 3 | https://postgresql.itversity.com/02_dml_or_crud_operations/04_overview_of_database_operations.html
 4 | 
 5 | https://postgresql.itversity.com/02_dml_or_crud_operations/05_crud_operations.html
 6 | 
 7 | https://postgresql.itversity.com/02_dml_or_crud_operations/06_creating_table.html
 8 | 
 9 | https://postgresql.itversity.com/02_dml_or_crud_operations/07_inserting_data.html
10 | 
11 | https://postgresql.itversity.com/02_dml_or_crud_operations/08_updating_data.html
12 | 
13 | https://postgresql.itversity.com/02_dml_or_crud_operations/09_deleting_data.html
14 | 
15 | https://postgresql.itversity.com/02_dml_or_crud_operations/10_overview_of_transactions.html
16 | 
17 | https://postgresql.itversity.com/02_dml_or_crud_operations/11_exercises_database_operations.html


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Scripts/01 - Information Schema Tables.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	*
 3 | FROM
 4 | 	information_schema.tables 
 5 | WHERE
 6 | 	table_catalog = 'sms_db'
 7 | AND
 8 | 	table_schema = 'public'
 9 | LIMIT
10 | 	10;


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Scripts/02 - Information Schema Columns.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	*
 3 | FROM
 4 | 	information_schema.columns
 5 | WHERE
 6 | 	table_catalog = 'sms_db'
 7 | AND
 8 | 	table_schema = 'public'
 9 | LIMIT
10 | 	10;


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Scripts/03 - Users.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | 	*
3 | FROM
4 | 	users;


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Scripts/04 - Truncate Table.sql:
--------------------------------------------------------------------------------
1 | TRUNCATE TABLE users;


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Scripts/05 - Insert Into.sql:
--------------------------------------------------------------------------------
 1 | INSERT INTO users
 2 | (
 3 | 	user_first_name
 4 |   , user_last_name
 5 |   , user_email_id
 6 | )
 7 | VALUES (
 8 | 	'Scott'
 9 |   , 'Tiger'
10 |   , 'scott@tiger.com'
11 | );
12 | 
13 | 
14 | INSERT INTO select * from users
15 | (
16 | 	user_first_name
17 |   , user_last_name
18 |   , user_email_id
19 | )
20 | VALUES (
21 | 	'Donald'
22 |   , 'Duck'
23 |   , 'donald@duck.com'
24 | );
25 | 
26 | 
27 | INSERT INTO users 
28 | (
29 | 	user_first_name
30 |   , user_last_name
31 |   , user_email_id
32 |   , user_role
33 |   , is_active
34 | )
35 | VALUES (
36 | 	'Mickey'
37 |   , 'Mouse'
38 |   , 'mickey@mouse.com'
39 |   , 'U'
40 |   , true
41 | );
42 | 
43 | 
44 | INSERT INTO users 
45 | (
46 | 	user_first_name
47 |   , user_last_name
48 |   , user_email_id
49 |   , user_password
50 |   , user_role
51 |   , is_active
52 | ) 
53 | VALUES (
54 | 	'Gordan'
55 |   , 'Bradock'
56 |   , 'gbradock0@barnesandnoble.com'
57 |   , 'h9LAz7p7ub'
58 |   , 'U'
59 |   , true
60 | ), (
61 | 	'Tobe'
62 |   , 'Lyness'
63 |   , 'tlyness1@paginegialle.it'
64 |   , 'oEofndp'
65 |   , 'U'
66 |   , true
67 | ), (
68 | 	'Addie'
69 |   , 'Mesias'
70 |   , 'amesias2@twitpic.com'
71 |   , 'ih7Y69u56'
72 |   , 'U'
73 |   , true
74 | );


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Scripts/06 - Update.sql:
--------------------------------------------------------------------------------
 1 | UPDATE
 2 | 	users 
 3 | SET
 4 | 	user_role = 'A' 
 5 | WHERE
 6 | 	user_id = 1;
 7 | 
 8 | 
 9 | UPDATE 
10 | 	users
11 | SET
12 |     user_email_validated = true,
13 |     is_active = true;
14 | 
15 | 
16 | UPDATE 
17 | 	users
18 | SET
19 |     user_email_id = UPPER(user_email_id);
20 | 
21 | 
22 | ALTER TABLE
23 | 	users
24 | ADD COLUMN
25 | 	user_full_name VARCHAR(50);
26 | 
27 | 
28 | UPDATE
29 | 	users
30 | SET
31 | 	user_full_name = UPPER(CONCAT(user_first_name, ' ', user_last_name));


--------------------------------------------------------------------------------
/Section 07 - Database Essentials - Database Operations/Scripts/07 - Delete.sql:
--------------------------------------------------------------------------------
1 | DELETE FROM
2 | 	users
3 | WHERE
4 | 	user_password IS NULL;


--------------------------------------------------------------------------------
/Section 08 - Database Essentials - Writing Basic SQL Queries/Exercises/01 - Customer Order Count.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 1 - Customer order count
 3 | Get order count per customer for the month of 2014 January.
 4 | 
 5 | Tables - orders and customers
 6 | Data should be sorted in descending order by count and ascending order by customer id.
 7 | Output should contain customer_id, customer_first_name, customer_last_name and customer_order_count.
 8 | */
 9 | SELECT
10 | 	COUNT(o.order_customer_id) AS customer_order_count
11 |   , order_date
12 |   , c.customer_id
13 |   , c.customer_fname
14 |   , c.customer_lname
15 | FROM
16 | 	orders AS o
17 | INNER JOIN
18 | 	customers AS c
19 | ON
20 | 	o.order_customer_id = c.customer_id
21 | WHERE
22 | 	TO_CHAR(o.order_date, 'yyyy-MM-dd') LIKE '2014-01%'
23 | GROUP BY
24 | 	o.order_date
25 |   , c.customer_id
26 |   , c.customer_fname
27 |   , c.customer_lname
28 | ORDER BY
29 | 	customer_order_count
30 |   , c.customer_id DESC;


--------------------------------------------------------------------------------
/Section 08 - Database Essentials - Writing Basic SQL Queries/Exercises/02 - Dormant Customers.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 2 - Dormant Customers¶
 3 | Get the customer details who have not placed any order for the month of 2014 January.
 4 | 
 5 | Tables - orders and customers
 6 | Data should be sorted in ascending order by customer_id
 7 | Output should contain all the fields from customers
 8 | */
 9 | SELECT
10 | 	c.*	
11 | FROM
12 | 	customers AS c
13 | INNER JOIN
14 | 	orders AS o
15 | ON
16 | 	c.customer_id = o.order_customer_id
17 | WHERE
18 | 	TO_CHAR(o.order_date, 'yyyy-MM') <> '2014-01'
19 | ORDER BY
20 | 	c.customer_id


--------------------------------------------------------------------------------
/Section 08 - Database Essentials - Writing Basic SQL Queries/Exercises/03 - Revenue Per Customer.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 3 - Revenue Per Customer
 3 | Get the revenue generated by each customer for the month of 2014 January
 4 | 
 5 | Tables - orders, order_items and customers
 6 | Data should be sorted in descending order by revenue and then ascending order by customer_id
 7 | Output should contain customer_id, customer_first_name, customer_last_name, customer_revenue.
 8 | If there are no orders placed by customer, then the corresponding revenue for a give customer should be 0.
 9 | Consider only COMPLETE and CLOSED orders
10 | */
11 | SELECT
12 | 	c.customer_id
13 |   , c.customer_fname
14 |   , c.customer_lname
15 |   , ROUND(SUM(oi.order_item_subtotal::numeric), 2) AS customer_revenue  
16 | FROM
17 | 	order_items AS oi
18 | LEFT OUTER JOIN
19 | 	orders AS o
20 | ON
21 | 	oi.order_item_order_id = o.order_id
22 | INNER JOIN
23 | 	customers AS c
24 | ON
25 | 	o.order_customer_id = c.customer_id
26 | WHERE
27 | 	o.order_status IN ('CLOSED', 'COMPLETE')
28 | GROUP BY
29 | 	c.customer_id
30 |   , c.customer_fname
31 |   , c.customer_lname
32 | ORDER BY
33 | 	customer_revenue


--------------------------------------------------------------------------------
/Section 08 - Database Essentials - Writing Basic SQL Queries/Exercises/04 - Revenue Per Category.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 4 - Revenue Per Category
 3 | Get the revenue generated for each category for the month of 2014 January
 4 | 
 5 | Tables - orders, order_items, products and categories
 6 | Data should be sorted in ascending order by category_id.
 7 | Output should contain all the fields from category along with the revenue as category_revenue.
 8 | Consider only COMPLETE and CLOSED orders
 9 | */
10 | SELECT
11 | 	category_name
12 |   , ROUND(SUM(oi.order_item_subtotal::numeric), 2) AS customer_revenue
13 | FROM
14 | 	order_items AS oi
15 | INNER JOIN
16 | 	orders 		AS o
17 | ON
18 | 	oi.order_item_order_id = o.order_id
19 | INNER JOIN
20 | 	products 	AS p
21 | ON
22 | 	p.product_id = oi.order_item_product_id
23 | INNER JOIN
24 | 	categories  AS c
25 | ON
26 | 	c.category_department_id = p.product_category_id
27 | WHERE
28 | 	o.order_status IN ('CLOSED', 'COMPLETE')
29 | AND
30 | 	TO_CHAR(order_date, 'yyyy-MM-dd') LIKE '2014-01%'
31 | GROUP BY
32 | 	c.category_name
33 | ORDER BY
34 | 	customer_revenue


--------------------------------------------------------------------------------
/Section 08 - Database Essentials - Writing Basic SQL Queries/Exercises/05 - Product Count Per Department.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 5 - Product Count Per Department
 3 | Get the products for each department.
 4 | 
 5 | Tables - departments, categories, products
 6 | Data should be sorted in ascending order by department_id
 7 | Output should contain all the fields from department and the product count as product_count
 8 | */
 9 | SELECT
10 | 	COUNT(p.product_name) AS product_count
11 |   , p.product_name
12 | FROM
13 | 	products 	AS p
14 | INNER JOIN
15 | 	categories 	AS c
16 | ON
17 | 	p.product_category_id = c.category_department_id
18 | INNER JOIN
19 | 	departments AS d
20 | ON
21 | 	d.department_id = c.category_department_id
22 | GROUP BY
23 | 	d.department_id
24 |   , p.product_name
25 | ORDER BY
26 | 	d.department_id


--------------------------------------------------------------------------------
/Section 08 - Database Essentials - Writing Basic SQL Queries/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://postgresql.itversity.com/03_writing_basic_sql_queries/02_standard_transformations.html#
 4 | 
 5 | https://postgresql.itversity.com/03_writing_basic_sql_queries/03_overview_of_data_model.html
 6 | 
 7 | https://postgresql.itversity.com/03_writing_basic_sql_queries/04_define_problem_statement.html
 8 | 
 9 | https://postgresql.itversity.com/03_writing_basic_sql_queries/05_preparing_tables.html
10 | 
11 | https://postgresql.itversity.com/03_writing_basic_sql_queries/06_selecting_or_projecting_data.html
12 | 
13 | https://postgresql.itversity.com/03_writing_basic_sql_queries/07_filtering_data.html
14 | 
15 | https://postgresql.itversity.com/03_writing_basic_sql_queries/08_joining_tables_inner.html
16 | 
17 | https://postgresql.itversity.com/03_writing_basic_sql_queries/09_joining_tables_outer.html
18 | 
19 | https://postgresql.itversity.com/03_writing_basic_sql_queries/10_performing_aggregations.html
20 | 
21 | https://postgresql.itversity.com/03_writing_basic_sql_queries/11_sorting_data.html
22 | 
23 | https://postgresql.itversity.com/03_writing_basic_sql_queries/12_solution_daily_product_revenue.html
24 | 
25 | https://postgresql.itversity.com/03_writing_basic_sql_queries/13_exercises_basic_sql_queries.html


--------------------------------------------------------------------------------
/Section 08 - Database Essentials - Writing Basic SQL Queries/Scripts/01 - Selecting.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	current_database();
 3 | 
 4 | 
 5 | SELECT
 6 | 	*
 7 | FROM
 8 | 	information_schema.tables 
 9 | WHERE
10 | 	table_catalog = 'retail_db' 
11 | AND
12 | 	table_schema = 'public' 
13 | LIMIT
14 | 	10;
15 | 
16 | 
17 | SELECT
18 | 	*
19 | FROM
20 | 	orders
21 | LIMIT
22 | 	10;
23 | 
24 | 
25 | SELECT
26 | 	*
27 | FROM
28 | 	order_items
29 | LIMIT
30 | 	10;
31 | 
32 | 
33 | SELECT
34 | 	*
35 | FROM
36 | 	products
37 | LIMIT
38 | 	10;
39 | 
40 | 
41 | SELECT
42 | 	COUNT(1)
43 | FROM
44 | 	orders;
45 | 
46 | 
47 | SELECT
48 | 	COUNT(1)
49 | FROM
50 | 	order_items;
51 | 
52 | 
53 | SELECT
54 | 	COUNT(1)
55 | FROM
56 | 	products;


--------------------------------------------------------------------------------
/Section 08 - Database Essentials - Writing Basic SQL Queries/Scripts/02 - Distinct.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	*
 3 | FROM
 4 | 	orders
 5 | LIMIT
 6 | 	10;
 7 | 
 8 | 
 9 | SELECT
10 | 	*
11 | FROM
12 | 	information_schema.columns 
13 | WHERE
14 | 	table_catalog = 'retail_db' 
15 | AND
16 | 	table_name = 'orders';
17 | 
18 | 
19 | SELECT
20 | 	order_customer_id
21 |   , order_date
22 |   , order_status 
23 | FROM
24 | 	orders 
25 | LIMIT
26 | 	10;
27 | 
28 | 
29 | SELECT
30 | 	order_customer_id
31 |   , TO_CHAR(order_date, 'yyyy-MM')
32 |   , order_status 
33 | FROM
34 | 	orders 
35 | LIMIT
36 | 	10;
37 | 
38 | 
39 | SELECT
40 | 	order_customer_id
41 |   , TO_CHAR(order_date, 'yyyy-MM') AS order_month
42 |   , order_status 
43 | FROM
44 | 	orders 
45 | LIMIT
46 | 	10;
47 | 
48 | 
49 | SELECT
50 | 	DISTINCT TO_CHAR(order_date, 'yyyy-MM') AS order_month 
51 | FROM
52 | 	orders;
53 | 
54 | 
55 | SELECT
56 | 	COUNT(1)
57 | FROM
58 | 	orders;
59 | 
60 | 
61 | SELECT
62 | 	COUNT(DISTINCT TO_CHAR(order_date, 'yyyy-MM')) AS distinct_month_count 
63 | FROM
64 | 	orders;


--------------------------------------------------------------------------------
/Section 08 - Database Essentials - Writing Basic SQL Queries/Writing Basic SQL Queries/02_standard_transformations.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Standard Transformations\n", "\n", "Here are some of the transformations we typically perform on regular basis."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* Projection of data\n", "* Filtering data\n", "* Performing Aggregations\n", "* Joins\n", "* Sorting\n", "* Ranking (will be covered as part of advanced queries)"]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 09 - Database Essentials - Creating Tables and Indexes/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://postgresql.itversity.com/04_creating_tables_and_indexes/02_data_definition_language.html
 4 | 
 5 | https://postgresql.itversity.com/04_creating_tables_and_indexes/03_overview_of_data_types.html
 6 | 
 7 | https://postgresql.itversity.com/04_creating_tables_and_indexes/04_adding_or_modifying_columns.html
 8 | 
 9 | https://postgresql.itversity.com/04_creating_tables_and_indexes/05_different_types_of_constraints.html
10 | 
11 | https://postgresql.itversity.com/04_creating_tables_and_indexes/06_managing_constraints.html
12 | 
13 | https://postgresql.itversity.com/04_creating_tables_and_indexes/07_indexes_on_tables.html
14 | 
15 | https://postgresql.itversity.com/04_creating_tables_and_indexes/08_indexes_for_constraints.html
16 | 
17 | https://postgresql.itversity.com/04_creating_tables_and_indexes/09_overview_of_sequences.html
18 | 
19 | https://postgresql.itversity.com/04_creating_tables_and_indexes/10_truncating_tables.html
20 | 
21 | https://postgresql.itversity.com/04_creating_tables_and_indexes/11_dropping_tables.html


--------------------------------------------------------------------------------
/Section 09 - Database Essentials - Creating Tables and Indexes/Scripts/01 - DDL.sql:
--------------------------------------------------------------------------------
 1 | ALTER TABLE
 2 | 	users
 3 | ADD COLUMN
 4 | 	last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP;
 5 | 
 6 | 
 7 | COMMENT ON TABLE
 8 | 	users
 9 | IS
10 | 	'Stores all user details';
11 | 
12 | 
13 | COMMENT ON COLUMN
14 | 	users.user_id
15 | IS
16 | 	'Surrogate Key';
17 | 
18 | 
19 | COMMENT ON COLUMN
20 | 	users.user_first_name
21 | IS
22 | 	'User First Name';
23 | 
24 | 
25 | COMMENT ON COLUMN
26 | 	users.user_role
27 | IS
28 | 	'U for user A for admin';
29 | 
30 | 
31 | SELECT
32 | 	*
33 | FROM
34 | 	information_schema.tables 
35 | WHERE
36 | 	table_name = 'users';
37 | 
38 | 
39 | SELECT
40 | 	*
41 | FROM
42 | 	information_schema.columns 
43 | WHERE
44 | 	table_name = 'users'
45 | ORDER BY
46 | 	ordinal_position;


--------------------------------------------------------------------------------
/Section 09 - Database Essentials - Creating Tables and Indexes/Scripts/02 - DT.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE users_01
 2 | (
 3 |   user_id INT
 4 | , user_first_name VARCHAR(30) NOT NULL
 5 | , user_last_name VARCHAR(30) NOT NULL
 6 | , user_email_id VARCHAR(50) NOT NULL
 7 | , user_email_validated BOOLEAN
 8 | , user_password VARCHAR(200)
 9 | , user_role VARCHAR(1)
10 | , is_active BOOLEAN
11 | , created_dt DATE DEFAULT CURRENT_DATE
12 | );
13 | 
14 | 
15 | SELECT
16 | 	table_catalog
17 |   , table_name
18 |   , column_name
19 |   , data_type
20 |   , character_maximum_length
21 |   , column_default
22 |   , is_nullable
23 |   , ordinal_position
24 | FROM
25 | 	information_schema.columns 
26 | WHERE
27 | 	table_name = 'users'
28 | ORDER BY
29 | 	ordinal_position;


--------------------------------------------------------------------------------
/Section 10 - Database Essentials - Partitioning Tables and Indexes/Exercises/02 - Load Partitioned Tables.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Exercise 2
3 | Let us load and validate data in the partitioned table.
4 | 
5 | Load the data from orders into orders_part.
6 | 
7 | Get count on orders_part as well as all the 14 partitions. You should get 0 for default partition
8 | and all the recordsshould be distributed using the other 13 partitions.
9 | */


--------------------------------------------------------------------------------
/Section 10 - Database Essentials - Partitioning Tables and Indexes/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/02_overview_of_partitioning.html
 4 | 
 5 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/03_list_partitioning.html
 6 | 
 7 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/04_managing_partitions_list.html
 8 | 
 9 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/05_manipulating_data.html
10 | 
11 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/06_range_partitioning.html
12 | 
13 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/07_managing_partitions_range.html
14 | 
15 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/09_hash_partitioning.html
16 | 
17 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/10_managing_partitions_hash.html
18 | 
19 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/11_usage_scenarios.html
20 | 
21 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/12_sub_partitioning.html
22 | 
23 | https://postgresql.itversity.com/05_partitioning_tables_and_indexes/13_exercises_partitioning_tables.html


--------------------------------------------------------------------------------
/Section 10 - Database Essentials - Partitioning Tables and Indexes/Partitioning Tables and Indexes/02_overview_of_partitioning.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Overview of Partitioning\n", "\n", "Most of the modern database technologies support wide variety of partitioning strategies. However, here are the most commonly used ones."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* List Partitioning\n", "* Range Partitioning\n", "* Hash Partitioning\n", "* List and Range are more widely used compared to Hash Partitioning.\n", "* We can also mix and match these to have multi level partitioning. It is known as sub partitioning.\n", "* We can either partition a table with out primary key or partition a table with primary key when partition column is prime attribute (one of the primary key columns).\n", "* Indexes can be added to the partitioned table. If we create on the main table, it is global index and if we create index on each partition then it is partitioned index."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 10 - Database Essentials - Partitioning Tables and Indexes/Scripts/01 - List Partitioning.sql:
--------------------------------------------------------------------------------
 1 | -- List Partitioning
 2 | CREATE TABLE users
 3 | (
 4 |     user_id SERIAL PRIMARY KEY
 5 |   , user_first_name VARCHAR(30) NOT NULL
 6 |   , user_last_name VARCHAR(30) NOT NULL
 7 |   , user_email_id VARCHAR(50) NOT NULL
 8 |   , user_email_validated BOOLEAN DEFAULT FALSE
 9 |   , user_password VARCHAR(200)
10 |   , user_role VARCHAR(1) NOT NULL DEFAULT 'U'
11 |   , is_active BOOLEAN DEFAULT FALSE
12 |   , created_dt DATE DEFAULT CURRENT_DATE
13 |   , last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP
14 | );
15 | 
16 | 
17 | DROP TABLE IF EXISTS
18 |     users_part;
19 | 
20 | 
21 | CREATE TABLE users_part
22 | (
23 |     user_id SERIAL
24 |   , user_first_name VARCHAR(30) NOT NULL
25 |   , user_last_name VARCHAR(30) NOT NULL
26 |   , user_email_id VARCHAR(50) NOT NULL
27 |   , user_email_validated BOOLEAN DEFAULT FALSE
28 |   , user_password VARCHAR(200)
29 |   , user_role VARCHAR(1) NOT NULL DEFAULT 'U'
30 |   , is_active BOOLEAN DEFAULT FALSE
31 |   , created_dt DATE DEFAULT CURRENT_DATE
32 |   , last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP
33 |   , PRIMARY KEY (user_role, user_id)
34 | ) PARTITION BY LIST(user_role);
35 | 
36 | 
37 | CREATE INDEX
38 |     users_part_email_id_idx 
39 | ON
40 |     users_part(user_email_id);
41 | 
42 | 
43 | INSERT INTO users
44 | (
45 |     user_first_name
46 |   , user_last_name
47 |   , user_email_id
48 | ) VALUES (
49 |     'Scott'
50 |   , 'Tiger'
51 |   , 'scott@tiger.com'
52 | ), (
53 |     'Donald'
54 |   , 'Duck'
55 |   , 'donald@duck.com'
56 | ), (
57 |     'Mickey'
58 |   , 'Mouse'
59 |   , 'mickey@mouse.com'
60 | );   


--------------------------------------------------------------------------------
/Section 10 - Database Essentials - Partitioning Tables and Indexes/Scripts/03 - Manipulating Data.sql:
--------------------------------------------------------------------------------
 1 | -- Manipulating Data
 2 | TRUNCATE TABLE
 3 |     users_part;
 4 | 
 5 | 
 6 | INSERT INTO users
 7 | (
 8 |     user_first_name
 9 |   , user_last_name
10 |   , user_email_id
11 |   , user_role
12 | ) VALUES (
13 |     'Scott'
14 |   , 'Tiger'
15 |   , 'scott@tiger.com'
16 |   , 'U'
17 | ), (
18 |     'Donald'
19 |   , 'Duck'
20 |   , 'donald@duck.com'
21 |   , 'U'
22 | ), (
23 |     'Mickey'
24 |   , 'Mouse'
25 |   , 'mickey@mouse.com'
26 |   , 'U'
27 | );
28 | 
29 | SELECT * FROM
30 |     users_part_u;
31 | 
32 | 
33 | INSERT INTO users
34 | (
35 |     user_first_name
36 |   , user_last_name
37 |   , user_email_id
38 |   , user_role
39 | ) VALUES (
40 |     'Matt'
41 |   , 'Clarke'
42 |   , 'matt@clarke.com'
43 |   , 'A'
44 | );
45 | 
46 | 
47 | SELECT
48 |     *
49 | FROM
50 |     users_part;
51 | 
52 | 
53 | UPDATE
54 |     users_part
55 | SET
56 |     user_role = 'A'
57 | WHERE
58 |     user_email_id = 'donald@duck.com';
59 | 
60 | 
61 | SELECT
62 |     *
63 | FROM
64 |     users_part_a;
65 | 
66 | 
67 | DELETE FROM
68 |     users_part
69 | WHERE
70 |     user_email_id = 'donald@duck.com';
71 | 
72 | 
73 | DELETE FROM
74 |     users_part_u
75 | WHERE
76 |     user_email_id = 'mickey@mouse.com';
77 | 
78 | 
79 | SELECT
80 |     *
81 | FROM
82 |     users_part;


--------------------------------------------------------------------------------
/Section 10 - Database Essentials - Partitioning Tables and Indexes/Scripts/04 - Range Partitioning.sql:
--------------------------------------------------------------------------------
 1 | -- Range Partitioning
 2 | DROP TABLE IF EXISTS
 3 |     users_range_part;
 4 | 
 5 | 
 6 | CREATE TABLE users_range_part
 7 | (
 8 |     user_id SERIAL
 9 |   , user_first_name VARCHAR(30) NOT NULL
10 |   , user_last_name VARCHAR(30) NOT NULL
11 |   , user_email_id VARCHAR(50) NOT NULL
12 |   , user_email_validated BOOLEAN DEFAULT FALSE
13 |   , user_password VARCHAR(200)
14 |   , user_role VARCHAR(1) NOT NULL DEFAULT 'U'
15 |   , is_active BOOLEAN DEFAULT FALSE
16 |   , created_dt DATE DEFAULT CURRENT_DATE
17 |   , last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP
18 |   , PRIMARY KEY (created_dt, user_id)
19 | ) PARTITION BY RANGE(created_dt);


--------------------------------------------------------------------------------
/Section 10 - Database Essentials - Partitioning Tables and Indexes/Scripts/07 - Hash Partitioning.sql:
--------------------------------------------------------------------------------
 1 | -- Hash Partitioning
 2 | DROP TABLE IF EXISTS
 3 |     users_hash_part;
 4 | 
 5 | 
 6 | CREATE TABLE users_hash_part
 7 | (
 8 |     user_id SERIAL
 9 |   , user_first_name VARCHAR(30) NOT NULL
10 |   , user_last_name VARCHAR(30) NOT NULL
11 |   , user_email_id VARCHAR(50) NOT NULL
12 |   , user_email_validated BOOLEAN DEFAULT FALSE
13 |   , user_password VARCHAR(200)
14 |   , user_role VARCHAR(1) NOT NULL DEFAULT 'U'
15 |   , is_active BOOLEAN DEFAULT FALSE
16 |   , created_dt DATE DEFAULT CURRENT_DATE
17 |   , last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP
18 |   , PRIMARY KEY (user_id)
19 | ) PARTITION BY HASH(user_id);
20 | 


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://postgresql.itversity.com/06_predefined_functions/02_overview_of_predefined_functions.html
 4 | 
 5 | https://postgresql.itversity.com/06_predefined_functions/03_string_manipulation_functions.html
 6 | 
 7 | https://postgresql.itversity.com/06_predefined_functions/04_date_manipulation_functions.html
 8 | 
 9 | https://postgresql.itversity.com/06_predefined_functions/05_overview_of_numeric_functions.html
10 | 
11 | https://postgresql.itversity.com/06_predefined_functions/06_data_type_conversion.html
12 | 
13 | https://postgresql.itversity.com/06_predefined_functions/07_handling_null_values.html
14 | 
15 | https://postgresql.itversity.com/06_predefined_functions/08_using_case_and_when.html


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/01 - Overview of Pre-Defined Functions.sql:
--------------------------------------------------------------------------------
 1 | -- Overview of Pre-Defined Functions
 2 | SELECT
 3 |     *
 4 | FROM
 5 |     information_schema.routines LIMIT 10;
 6 | 
 7 | 
 8 | SELECT
 9 |     *
10 | FROM
11 |     information_schema.routines 
12 | WHERE
13 |     routine_name ~ 'str';
14 | 
15 | 
16 | SELECT
17 |     substring('Thomas' from 2 for 3);
18 | 
19 | 
20 | SELECT
21 |     substring('Thomas', 2, 3);


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/02 - Case Conversion and Length.sql:
--------------------------------------------------------------------------------
 1 | -- Case Conversion and Length
 2 | SELECT
 3 |     upper('hEllo wOrlD')    AS upper_result
 4 |   , lower('hEllo wOrlD')    AS lower_result
 5 |   , initcap('hEllo wOrlD')  AS initcap_result;
 6 | 
 7 | 
 8 | SELECT
 9 |     length('hEllo wOrlD') AS result;
10 | 
11 | 
12 | SELECT
13 |     *
14 | FROM
15 |     orders LIMIT 10;
16 | 
17 | 
18 | SELECT
19 |     order_id
20 |   , order_date
21 |   , order_customer_id
22 |   , lower(order_status)     AS order_status
23 |   , length(order_status)    AS order_status_length
24 | FROM
25 |     orders
26 | LIMIT
27 |     10;
28 | 


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/05 - Trimming and Padding Functions.sql:
--------------------------------------------------------------------------------
 1 | -- Trimming and Padding Functions
 2 | SELECT
 3 |     LTRIM('     Hello World') AS result;
 4 | 
 5 | 
 6 | SELECT
 7 |     LTRIM('     Hello World', ' ') AS result;
 8 | 
 9 | 
10 | SELECT
11 |     RTRIM('     Hello World       ') AS result;
12 | 
13 | 
14 | SELECT
15 |     LENGTH(TRIM('     Hello World       ')) AS result;
16 | 
17 | 
18 | SELECT
19 |     LTRIM('----Hello World----', '-') AS result;
20 | 
21 | 
22 | SELECT
23 |     RTRIM('----Hello World----', '-') AS result;
24 | 
25 | 
26 | SELECT
27 |     TRIM('----Hello World----', '-') AS result;
28 | 
29 | 
30 | SELECT
31 |     TRIM(LEADING '-' FROM '----Hello World----') AS result;
32 | 
33 | 
34 | SELECT
35 |     TRIM(TRAILING '-' FROM '----Hello World----') AS result;
36 | 
37 | 
38 | SELECT
39 |     TRIM(BOTH '-' FROM '----Hello World----') AS result;
40 | 
41 | 
42 | SELECT
43 |     TRIM(BOTH '- ;' FROM '- -;-Hello World- - -') AS result;
44 | 
45 | 
46 | SELECT
47 |     TRIM('- -;-Hello World- - -', '- ;') AS result;
48 | 
49 | 
50 | SELECT
51 |     2013    AS year
52 |   , 7       AS month
53 |   , 25      AS myDate;
54 | 
55 | 
56 | SELECT
57 |     LPAD(7::varchar, 2, '0') AS result;
58 | 
59 | 
60 | SELECT
61 |     LPAD(10::varchar, 2, '0') AS result;
62 | 
63 | 
64 | SELECT
65 |     LPAD(100::varchar, 2, '0') AS result;


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/08 - Getting Current Date and Timestamp.sql:
--------------------------------------------------------------------------------
 1 | -- Getting Current Date and Timestamp
 2 | SELECT
 3 |     CURRENT_DATE AS CURRENT_DATE;
 4 | 
 5 | 
 6 | SELECT
 7 |     CURRENT_TIMESTAMP AS CURRENT_TIMESTAMP;
 8 | 
 9 | 
10 | SELECT
11 |     SUBSTRING(CURRENT_DATE::VARCHAR, 1, 4) AS CURRENT_DATE;


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/09 - Date Arithmetic.sql:
--------------------------------------------------------------------------------
 1 | -- Date Arithmetic
 2 | SELECT
 3 |    CURRENT_DATE + INTERVAL '32 DAYS' AS result;
 4 | 
 5 | 
 6 | SELECT
 7 |    CURRENT_DATE + INTERVAL '730 DAYS' AS result;
 8 | 
 9 | 
10 | SELECT
11 |    CURRENT_DATE + INTERVAL '-730 DAYS' AS result;
12 | 
13 | 
14 | SELECT
15 |    CURRENT_DATE - INTERVAL '730 DAYS' AS result;
16 | 
17 | 
18 | SELECT
19 |    CURRENT_DATE + INTERVAL '3 MONTHS' AS result;
20 | 
21 | 
22 | SELECT
23 |     '2019-01-31'::DATE + INTERVAL '3 MONTHS' AS result;
24 | 
25 | 
26 | SELECT
27 |     '2019-01-31'::DATE + INTERVAL '3 MONTHS 3 DAYS 3 HOURS' AS result;
28 | 
29 | 
30 | SELECT
31 |     CURRENT_TIMESTAMP + INTERVAL '3 MONTHS' AS result;
32 | 
33 | 
34 | SELECT
35 |     CURRENT_TIMESTAMP + INTERVAL '10 HOURS' AS result;
36 | 
37 | 
38 | SELECT
39 |     CURRENT_TIMESTAMP + INTERVAL '10 MINUTES' AS result;
40 | 
41 | 
42 | SELECT
43 |     CURRENT_TIMESTAMP + INTERVAL '10 HOURS 10 MINUTES' AS result;
44 | 
45 | 
46 | SELECT
47 |     '2019-03-30'::DATE - '2017-12-31'::DATE AS result;
48 | 
49 | 
50 | SELECT
51 | 	'2019-03-30'::DATE - TO_DATE('2017/31/12', 'yyyy/dd/MM') AS Result;
52 | 
53 | 
54 | SELECT
55 |     '2017-12-31'::DATE - '2019-03-30'::DATE AS result;
56 | 
57 | 
58 | SELECT
59 |    CURRENT_DATE - '2019-03-30'::DATE AS result;
60 | 
61 | 
62 | SELECT
63 |     CURRENT_TIMESTAMP - '2019-03-30'::DATE AS result;


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/10 - Beginning Date or Time - date_trunc.sql:
--------------------------------------------------------------------------------
 1 | -- Beginning Date or Time - date_trunc
 2 | SELECT
 3 |     DATE_TRUNC('YEAR', CURRENT_DATE) AS year_beginning;
 4 | 
 5 | 
 6 | SELECT
 7 |     DATE_TRUNC('MONTH', CURRENT_DATE) AS month_beginning;
 8 | 
 9 | 
10 | SELECT
11 |     DATE_TRUNC('WEEK', CURRENT_DATE) AS week_beginning;
12 | 
13 | 
14 | SELECT
15 |     DATE_TRUNC('DAY', CURRENT_DATE) AS day_beginning;
16 | 
17 | 
18 | SELECT
19 | 	CURRENT_DATE
20 |   , DATE_TRUNC('DAY', CURRENT_DATE) AS Day_Beginning;
21 | 
22 | 
23 | SELECT
24 |     DATE_TRUNC('HOUR', CURRENT_TIMESTAMP) AS hour_beginning;
25 | 
26 | 
27 | SELECT
28 | 	*
29 | FROM
30 | 	orders
31 | WHERE
32 | 	order_date BETWEEN DATE_TRUNC('MONTH', '2014-01-10'::DATE) AND '2014-01-10'::DATE
33 | ORDER BY
34 | 	order_date
35 | LIMIT
36 | 	10;
37 | 
38 | 
39 | SELECT
40 | 	COUNT(1)
41 | FROM
42 |  	orders
43 | WHERE
44 | 	order_date BETWEEN DATE_TRUNC('YEAR', '2014-01-10'::DATE) AND '2014-01-10';
45 | 
46 | 
47 | SELECT
48 | 	COUNT(1)
49 |   , MIN(order_date)
50 |   , MAX(order_date)
51 | FROM
52 |  	orders
53 | WHERE
54 | 	order_date BETWEEN DATE_TRUNC('YEAR', '2014-03-20'::DATE) AND '2014-03-20';


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/12 - Extracting information - extract.sql:
--------------------------------------------------------------------------------
 1 | -- Extracting information - extract
 2 | SELECT
 3 |     EXTRACT(century FROM CURRENT_DATE) AS Century
 4 | 
 5 | 
 6 | SELECT
 7 |     DATE_PART('century', CURRENT_DATE) AS Century
 8 | 
 9 | 
10 | SELECT
11 |     EXTRACT(decade FROM CURRENT_DATE) AS Decade
12 | 
13 | 
14 | SELECT
15 |     DATE_PART('decade', CURRENT_DATE) AS Century
16 | 
17 | 
18 | SELECT
19 |     EXTRACT(year FROM CURRENT_DATE) AS Year
20 | 
21 | 
22 | SELECT
23 |     EXTRACT(quarter FROM CURRENT_DATE) AS Quarter
24 | 
25 | 
26 | SELECT
27 |     EXTRACT(month FROM CURRENT_DATE) AS Month
28 | 
29 | 
30 | SELECT
31 |     EXTRACT(week FROM CURRENT_DATE) AS Week
32 | 
33 | 
34 | SELECT
35 |     EXTRACT(day FROM CURRENT_DATE) AS Day
36 | 
37 | 
38 | SELECT
39 |     EXTRACT(doy FROM CURRENT_DATE) AS Day_of_Year
40 | 
41 | 
42 | SELECT
43 |     EXTRACT(dow FROM CURRENT_DATE) AS Day_of_Week
44 | 
45 | 
46 | SELECT
47 |     EXTRACT(hour FROM current_timestamp) AS Hour
48 | 
49 | 
50 | SELECT
51 |     EXTRACT(minute FROM current_timestamp) AS Minute
52 | 
53 | 
54 | SELECT
55 |     EXTRACT(second FROM current_timestamp) AS Second
56 | 
57 | 
58 | SELECT
59 |     EXTRACT(milliseconds FROM current_timestamp) AS Millis


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/13 - Dealing with Unix Timestamp.sql:
--------------------------------------------------------------------------------
 1 | -- Dealing with Unix Timestamp
 2 | SELECT
 3 |     EXTRACT(epoch FROM CURRENT_DATE) AS Date_Epoch
 4 | 
 5 | 
 6 | SELECT
 7 |     DATE_PART('epoch', CURRENT_DATE) AS Date_Epoch
 8 | 
 9 | 
10 | SELECT
11 |     EXTRACT(epoch FROM '2019-04-30 18:18:51'::TIMESTAMP) AS Unixtime
12 | 
13 | 
14 | SELECT
15 |    TO_TIMESTAMP(1556662731) AS Time_from_Epoch
16 | 
17 | 
18 | SELECT
19 |    TO_TIMESTAMP(1556662731)::DATE AS Time_from_Epoch
20 | 
21 | 
22 | SELECT
23 |     TO_CHAR(TO_TIMESTAMP(1556662731), 'yyyyMM')::INT AS yyyyMM_from_epoch
24 | 


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/15 - Data Type Conversion.sql:
--------------------------------------------------------------------------------
 1 | -- Data Type Conversion
 2 | SELECT
 3 | 	'09'
 4 | 
 5 | 
 6 | SELECT
 7 |     '09'::INT
 8 | 
 9 | 
10 | SELECT
11 |     '09'::FLOAT
12 | 
13 | 
14 | SELECT
15 |     CURRENT_DATE AS Current_Date
16 | 
17 | 
18 | SELECT
19 |     SPLIT_PART('2020-09-30', '-', 2) AS Month
20 | 
21 | 
22 | SELECT
23 |     SPLIT_PART('2020-09-30', '-', 2)::INT AS Month
24 | 
25 | 
26 | SELECT
27 |     TO_CHAR('2020-09-30'::DATE, 'MM') AS Month
28 | 
29 | 
30 | SELECT
31 |     TO_CHAR('2020-09-30'::DATE, 'MM')::INT AS Month
32 | 
33 | 
34 | SELECT
35 |     TO_CHAR(CURRENT_DATE, 'MM')::INT AS Month
36 | 
37 | 
38 | SELECT
39 |     CAST('0.04000' AS FLOAT) AS Result
40 | 
41 | 
42 | SELECT
43 |     '0.04000'::FLOAT AS Result
44 | 
45 | 
46 | SELECT
47 |     CAST('09' AS INT) AS Result
48 | 
49 | 
50 | SELECT
51 |     '09'::INT AS Result


--------------------------------------------------------------------------------
/Section 11 - Database Essentials - Predefined Functions/Scripts/16 - Handling NULL Values.sql:
--------------------------------------------------------------------------------
 1 | -- Handling NULL Values
 2 | SELECT
 3 |     1 + NULL AS Result
 4 | 
 5 | 
 6 | SELECT
 7 |     COALESCE(1, 0) AS Result
 8 | 
 9 | 
10 | SELECT
11 |     COALESCE(NULL, NULL, 2, NULL, 3) AS Result
12 | 
13 | 
14 | DROP TABLE IF EXISTS
15 |     sales;
16 | 
17 | 
18 | CREATE TABLE IF NOT EXISTS sales
19 | (
20 |     sales_person_id INT,
21 |     sales_amount FLOAT,
22 |     commission_pct INT
23 | );
24 | 
25 | 
26 | INSERT INTO
27 | 	sales
28 | VALUES (
29 | 	1
30 |   , 1000
31 |   , 10
32 | ), (
33 | 	2
34 |   , 1500
35 |   , 8
36 | ), (
37 | 	3
38 |   , 500
39 |   , NULL
40 | ), (
41 | 	4
42 |   , 800
43 |   , 5
44 | ), (
45 | 	5
46 |   , 250
47 |   , NULL
48 | );
49 | 
50 | 
51 | SELECT
52 |     *
53 | FROM
54 |     sales;
55 | 
56 | 
57 | SELECT
58 |     s.*
59 |   , ROUND((sales_amount * commission_pct / 100)::NUMERIC, 2) AS Incorrect_Commission_Amount
60 | FROM
61 |     sales AS s;
62 | 
63 | 
64 | SELECT
65 |     s.*
66 |   , COALESCE(commission_pct, 0) AS Commission_Pct
67 | FROM
68 |     sales AS s;
69 | 
70 | 
71 | SELECT
72 |     s.*
73 |   , ROUND((sales_amount * COALESCE(commission_pct, 0) / 100)::NUMERIC, 2) AS Commission_Amount
74 | FROM
75 |     sales AS s;
76 | 
77 | 
78 | SELECT
79 |     NULLIF(1, 0);
80 | 
81 | 
82 | SELECT
83 |     NULLIF(1, 1);


--------------------------------------------------------------------------------
/Section 12 - Database Essentials - Writing Advanced SQL Queries/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/02_overview_of_views.html
 4 | 
 5 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/03_named_queries_using_with_clause.html
 6 | 
 7 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/04_overview_of_sub_queries.html
 8 | 
 9 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/05_create_table_as_select.html
10 | 
11 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/06_advanced_dml_operations.html
12 | 
13 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/07_merging_or_upserting_data.html
14 | 
15 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/08_pivoting_rows_into_columns.html
16 | 
17 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/09_overview_of_analytic_functions.html
18 | 
19 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/10_analytic_functions_aggregations.html
20 | 
21 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/11_cumulative_or_moving_aggregations.html
22 | 
23 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/12_analytic_functions_windowing.html
24 | 
25 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/13_analytic_functions_ranking.html
26 | 
27 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/14_analytic_funcions_filtering.html
28 | 
29 | https://postgresql.itversity.com/07_writing_advanced_sql_queries/15_ranking_and_filtering_recap.html


--------------------------------------------------------------------------------
/Section 12 - Database Essentials - Writing Advanced SQL Queries/Scripts/02 - Named Queries - Using WITH Clause.sql:
--------------------------------------------------------------------------------
 1 | -- Named Queries - Using WITH Clause
 2 | WITH order_details_nq AS (
 3 |     SELECT
 4 |         *
 5 |     FROM
 6 |         orders o
 7 |     INNER JOIN
 8 |         order_items oi
 9 |     ON
10 |         o.order_id = oi.order_item_order_id
11 | ) SELECT
12 |     *
13 | FROM
14 |     order_details_nq
15 | LIMIT
16 |     10;
17 | 
18 | 
19 | -- Wrong
20 | SELECT
21 |     *
22 | FROM
23 |     order_details_nq
24 | LIMIT
25 |     10;
26 | 
27 | 
28 | WITH order_details_nq AS (
29 |     SELECT
30 | 		*
31 | 	FROM
32 | 		orders o
33 |     INNER JOIN
34 | 		order_items oi
35 |     ON
36 | 		o.order_id = oi.order_item_order_id
37 | ) SELECT
38 | 	order_date
39 |   , order_item_product_id
40 |   , ROUND(SUM(order_item_subtotal)::NUMERIC, 2) AS revenue
41 | FROM
42 | 	order_details_nq 
43 | GROUP BY
44 | 	order_date
45 |   , order_item_product_id
46 | ORDER BY
47 | 	order_date
48 |   , revenue DESC
49 | LIMIT
50 | 	10;
51 | 
52 | 
53 | CREATE OR REPLACE VIEW
54 |     daily_product_revenue_v
55 | AS
56 | WITH order_details_nq AS (
57 |     SELECT
58 |         *
59 |     FROM
60 |         orders o
61 |     INNER JOIN
62 |         order_items oi
63 |     ON
64 |         o.order_id = oi.order_item_order_id
65 | ) SELECT
66 |     order_date
67 |   , order_item_product_id
68 |   , ROUND(SUM(order_item_subtotal)::NUMERIC, 2) AS Revenue
69 | FROM
70 |     order_details_nq 
71 | GROUP BY
72 |     order_date
73 |   , order_item_product_id;
74 | 
75 | 
76 | SELECT
77 |     *
78 | FROM
79 |     daily_product_revenue_v
80 | ORDER BY
81 |     order_date
82 |   , revenue DESC
83 | LIMIT
84 |     10;


--------------------------------------------------------------------------------
/Section 12 - Database Essentials - Writing Advanced SQL Queries/Scripts/04 - CTAS - Create Table as Select.sql:
--------------------------------------------------------------------------------
 1 | -- CTAS - Create Table as Select
 2 | DROP TABLE IF EXISTS
 3 |     customers_backup;
 4 | 
 5 | 
 6 | CREATE TABLE
 7 |     customers_backup
 8 | AS
 9 | SELECT
10 |     *
11 | FROM
12 |     customers;
13 | 
14 | 
15 | DROP TABLE IF EXISTS
16 |     orders_backup;
17 | 
18 | 
19 | CREATE TABLE
20 |     orders_backup
21 | AS
22 | SELECT
23 |     order_id
24 |   , TO_CHAR(order_date, 'yyyy')::INT AS Order_Year
25 |   , TO_CHAR(order_date, 'MM')::INT   AS order_month
26 |   , TO_CHAR(order_date, 'dd')::INT   AS order_day_of_month
27 |   , TO_CHAR(order_date, 'DDD')::INT  AS order_day_of_year
28 |   , order_customer_id
29 |   , order_status
30 | FROM
31 |     orders;
32 | 
33 | 
34 | SELECT
35 |     *
36 | FROM
37 |     orders_backup
38 | LIMIT
39 |     10;
40 | 
41 | 
42 | DROP TABLE IF EXISTS
43 |     order_items_empty;
44 | 
45 | 
46 | CREATE TABLE
47 |     order_items_empty
48 | AS
49 | SELECT
50 |     *
51 | FROM
52 |     order_items
53 | WHERE
54 |     1 = 2;
55 | 
56 | 
57 | SELECT
58 |     COUNT(1)
59 | FROM
60 |     order_items_empty;
61 | 
62 | 
63 | DROP TABLE IF EXISTS
64 |     customers_backup;
65 | 
66 | 
67 | DROP TABLE IF EXISTS
68 |     orders_backup;
69 | 
70 | 
71 | DROP TABLE IF EXISTS
72 |     order_items_empty;


--------------------------------------------------------------------------------
/Section 12 - Database Essentials - Writing Advanced SQL Queries/Scripts/07 - Pivoting Rows into Columns.sql:
--------------------------------------------------------------------------------
 1 | -- Pivoting Rows into Columns
 2 | SELECT
 3 |     order_date
 4 |   , order_status
 5 |   , COUNT(1)
 6 | FROM
 7 |     orders
 8 | GROUP BY
 9 |     order_date
10 |   , order_status
11 | ORDER BY
12 |     order_date
13 |   , order_status
14 | LIMIT
15 |     18;
16 | 
17 | 
18 | SELECT
19 |     *
20 | FROM
21 |     CROSSTAB(
22 |     'SELECT order_date,
23 |         order_status,
24 |         count(1) AS order_count
25 |     FROM orders
26 |     GROUP BY order_date,
27 |         order_status',
28 |     'SELECT DISTINCT order_status FROM orders ORDER BY 1'
29 | ) AS (
30 |     order_date DATE,
31 |     "CANCELED" INT,
32 |     "CLOSED" INT,
33 |     "COMPLETE" INT,
34 |     "ON_HOLD" INT,
35 |     "PAYMENT_REVIEW" INT,
36 |     "PENDING" INT,
37 |     "PENDING_PAYMENT" INT,
38 |     "PROCESSING" INT,
39 |     "SUSPECTED_FRAUD" INT
40 | )
41 | LIMIT
42 |     10;


--------------------------------------------------------------------------------
/Section 12 - Database Essentials - Writing Advanced SQL Queries/Scripts/08 - Overview of Analytic Functions.sql:
--------------------------------------------------------------------------------
 1 | -- Overview of Analytic Functions
 2 | DROP TABLE IF EXISTS
 3 |     daily_revenue;
 4 | 
 5 | 
 6 | CREATE TABLE
 7 |     daily_revenue
 8 | AS
 9 | SELECT
10 |     o.order_date
11 |   , ROUND(SUM(oi.order_item_subtotal)::NUMERIC, 2) AS revenue
12 | FROM orders o JOIN order_items oi
13 | ON
14 |     o.order_id = oi.order_item_order_id
15 | WHERE
16 |     o.order_status IN ('COMPLETE', 'CLOSED')
17 | GROUP BY
18 |     o.order_date;
19 | 
20 | 
21 | SELECT
22 |     *
23 | FROM
24 |     daily_revenue
25 | ORDER BY
26 |     order_date
27 | LIMIT
28 |     10;
29 | 
30 | 
31 | DROP TABLE IF EXISTS
32 |     daily_product_revenue;
33 | 
34 | 
35 | CREATE TABLE
36 |     daily_product_revenue
37 | AS
38 | SELECT
39 |     o.order_date
40 |   , oi.order_item_product_id
41 |   , ROUND(SUM(oi.order_item_subtotal)::numeric, 2) AS revenue
42 | FROM
43 |     orders o
44 | INNER JOIN
45 |     order_items oi
46 | ON
47 |     o.order_id = oi.order_item_order_id
48 | WHERE
49 |     o.order_status IN ('COMPLETE', 'CLOSED')
50 | GROUP BY
51 |     o.order_date
52 |   , oi.order_item_product_id;
53 | 
54 | 
55 | SELECT
56 |     *
57 | FROM
58 |     daily_product_revenue
59 | ORDER BY
60 |     order_date
61 |   , revenue DESC
62 | LIMIT
63 |     10;


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Exercises/01 - Create Table.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 1 - Create Table¶
 3 | Create table - courses
 4 | course_id - sequence generated integer and primary key
 5 | course_name - which holds alpha numeric or string values up to 60 characters
 6 | course_author - which holds the name of the author up to 40 characters
 7 | course_status - which holds one of these values (published, draft, inactive).
 8 | course_published_dt - which holds date type value.
 9 | */
10 | CREATE TABLE courses
11 | (
12 | 	course_id SERIAL PRIMARY KEY
13 |   , course_name VARCHAR(60) NOT NULL
14 |   , course_author VARCHAR(40) NOT NULL
15 |   , course_status VARCHAR(10) NOT NULL
16 |   , course_published_it DATE
17 | );


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Exercises/02 - Insert Into.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 2 - Inserting Data
 3 | Insert data into courses using the data provided. Make sure id is system generated.
 4 | */
 5 | INSERT INTO
 6 | 	courses
 7 | (
 8 |   	course_name
 9 |   , course_author
10 |   , course_status
11 |   , course_published_it
12 | )
13 | VALUES (
14 | 	'Programming using Python'
15 |   , 'Bob Dillon'
16 |   , 'published'
17 |   , '2020-09-30'
18 | ), (
19 | 	'Data Engineering using Python'
20 |   , 'Bob Dillon'
21 |   , 'published'
22 |   , '2020-07-15'
23 | ), (
24 | 	'Data Engineering using Scala'
25 |   , 'Elvis Presley'
26 |   , 'draft'
27 | ), (
28 | 	'Programming using Scala'
29 |   , 'Elvis Presley'
30 |   , 'published'
31 |   , '2020-05-12'
32 | ), (
33 | 	'Programming using Java'
34 |   , 'Mike Jack'
35 |   , 'inactive'
36 |   , '2020-08-10'
37 | ), (
38 | 	'Web Applications - Python Flask'
39 |   , 'Bob Dillon'
40 |   , 'inactive'
41 |   , '2020-07-20'
42 | ), (
43 | 	'Web Applications - Java Spring'
44 |   , 'Mike Jack'
45 |   , 'draft'
46 | ), (
47 | 	'Pipeline Orchestration - Python'
48 |   , 'Bob Dillon'
49 |   , 'draft'
50 | ), (
51 | 	'Streaming Pipelines - Python'
52 |   , 'Bob Dillon'
53 |   , 'published'
54 |   , '2020-10-05'
55 | ), (
56 | 	'Web Applications - Scala Play'
57 |   , 'Elvis Presley'
58 |   , 'inactive'
59 |   , '2020-09-30'
60 | ), (
61 | 	'Web Applications - Python Django'
62 |   , 'Bob Dillon'
63 |   , 'published'
64 |   , '2020-06-23'
65 | ), (
66 | 	'Server Automation - Ansible'
67 |   , 'Uncle Sam'
68 |   , 'published'
69 |   , '2020-07-05'
70 | );


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Exercises/03 - Updating Data.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Exercise 3 - Updating Data
 3 | Update the status of all the draft courses related to Python and Scala to published along with 
 4 | the course_published_dt using system date.
 5 | Provide the update statement as answer for this exercise
 6 | */
 7 | UPDATE
 8 | 	courses
 9 | SET
10 | 	course_status = 'published'
11 |   , course_published_it = CURRENT_DATE
12 | WHERE
13 | 	course_status = 'draft'
14 | AND
15 | 	course_name LIKE '%Python%'
16 | OR
17 | 	course_name LIKE '%Scala%'


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Exercises/04 - Deleting Data.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Exercise 4 - Deleting Data
3 | Delete all the courses which are neither in draft mode nor published.
4 | Provide the delete statement as answer for this exercise.
5 | */
6 | DELETE FROM
7 | 	courses
8 | WHERE
9 | 	course_status IN('inactive', 'draft')


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Exercises/05 - Selecting.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Validation - Get count of all published courses by author and make sure output is sorted in descending order by count.
 3 | */
 4 | SELECT
 5 | 	course_author
 6 |   , COUNT(1) AS course_count
 7 | FROM
 8 | 	courses
 9 | WHERE
10 | 	course_status = 'published'
11 | GROUP BY
12 | 	course_author;


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/04_postgres_database_operations/02_overview_of_sql.html
 4 | 
 5 | https://python.itversity.com/04_postgres_database_operations/03_create_database_and_users_table.html
 6 | 
 7 | https://python.itversity.com/04_postgres_database_operations/04_ddl_data_definition_language.html
 8 | 
 9 | https://python.itversity.com/04_postgres_database_operations/05_dml_data_manipulation_language.html
10 | 
11 | https://python.itversity.com/04_postgres_database_operations/06_dql_data_query_language.html
12 | 
13 | https://python.itversity.com/04_postgres_database_operations/07_crud_operations_dml_and_dql.html
14 | 
15 | https://python.itversity.com/04_postgres_database_operations/08_tcl_transaction_control_language.html
16 | 
17 | https://python.itversity.com/04_postgres_database_operations/09_example_data_engineering.html
18 | 
19 | https://python.itversity.com/04_postgres_database_operations/10_example_web_application.html
20 | 
21 | https://python.itversity.com/04_postgres_database_operations/11_exercise_database_operations.html


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Postgres Database Operations/02_overview_of_sql.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Overview of SQL\n", "\n", "SQL stands for Structured Query Language and it is broadly categorized into following:"]}, {"cell_type": "markdown", "metadata": {}, "source": ["* DDL - Data Definition Language\n", "  * Creating Database Objects such as Tables, Indexes etc.\n", "  * Define constraints such as not null, primary key, foreign key etc.\n", "* DML - Data Manipulation Language\n", "  * Inserting or Updating data in the tables\n", "  * Deleting data from the tables\n", "* DQL - Data Query Language\n", "  * Project the data\n", "  * Filter based up on the requirements\n", "  * Join multiple tables\n", "* TCL - Transaction Control Language\n", "  * Commit to persistently store the changes.\n", "  * Rollback to revert back changes to the prior state.\n", "\n", "Typically as part of applications we perform CRUD Operations which are nothing but DML and DQL."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Postgres Database Operations/07_crud_operations_dml_and_dql.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## CRUD Operations \u2013 DML and DQL\n", "\n", "Let us get an overview of CRUD Operations. They are nothing but DML and queries to read the data while performing database operations via applications."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* CRUD is widely used from application development perspective.\n", "* C - CREATE (INSERT)\n", "* R - READ (QUERY)\n", "* U - UPDATE (UPDATE)\n", "* D - DELETE (DELETE)\n", "\n", "As part of the application development process we perform CRUD Operations using REST APIs."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Postgres Database Operations/08_tcl.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## TCL \u2013 Transaction Control Language\n", "\n", "Let us go through the details related to TCL (Transacton Control Language)."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* We typically perform operations such as `COMMIT` and `ROLLBACK` via the applications.\n", "* `COMMIT` will persist the changes in the database.\n", "* `ROLLBACK` will revert the uncommitted changes in the database.\n", "* We typically rollback the uncommitted changes in a transaction if there is any exception as part of the application logic flow.\n", "* For example, once the order is placed all the items that are added to shopping cart will be rolled back if the payment using credit card fails.\n", "* By default every operation is typically committed in Postgres. We will get into the details related to transaction as part of application development later."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Scripts/01 - Create Database and Users Table.sql:
--------------------------------------------------------------------------------
 1 | -- Create Database and Users Table
 2 | SELECT
 3 | 	*
 4 | FROM
 5 | 	information_schema.tables
 6 | LIMIT
 7 | 	10;
 8 | 
 9 | 
10 | CREATE TABLE IF NOT EXISTS users
11 | (
12 |   user_id SERIAL PRIMARY KEY
13 | , user_first_name VARCHAR(30) NOT NULL
14 | , user_last_name VARCHAR(30) NOT NULL
15 | , user_email_id VARCHAR(50) NOT NULL
16 | , user_email_validated BOOLEAN DEFAULT FALSE
17 | , user_password VARCHAR(200)
18 | , user_role VARCHAR(1) NOT NULL DEFAULT 'U' --U and A
19 | , is_active BOOLEAN DEFAULT FALSE
20 | , created_dt DATE DEFAULT CURRENT_DATE
21 | , last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP
22 | );
23 | 
24 | 
25 | SELECT
26 | 	* 
27 | FROM
28 | 	information_schema.tables 
29 | WHERE
30 | 	table_name = 'users';
31 | 
32 | 
33 | display(result_set);


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Scripts/02 - DDL – Data Definition Language.sql:
--------------------------------------------------------------------------------
 1 | -- DDL – Data Definition Language
 2 | CREATE TABLE users_02
 3 | (
 4 |   user_id SERIAL PRIMARY KEY
 5 | , user_first_name VARCHAR(30) NOT NULL
 6 | , user_last_name VARCHAR(30) NOT NULL
 7 | , user_email_id VARCHAR(50) NOT NULL
 8 | , user_email_validated BOOLEAN DEFAULT FALSE
 9 | , user_password VARCHAR(200)
10 | , user_role VARCHAR(1) NOT NULL DEFAULT 'U' --U and A
11 | , is_active BOOLEAN DEFAULT FALSE
12 | , created_dt DATE DEFAULT CURRENT_DATE
13 | );
14 | 
15 | 
16 | SELECT
17 | 	*
18 | FROM
19 | 	information_schema.columns
20 | WHERE
21 | 	table_name = 'users'
22 | ORDER BY
23 | 	ordinal_position;
24 | 
25 | 
26 | ALTER TABLE
27 | 	users_02
28 | ADD
29 | 	last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP;
30 | 
31 | 
32 | SELECT
33 | 	*
34 | FROM
35 | 	information_schema.columns
36 | WHERE
37 | 	table_name = 'users'
38 | ORDER BY
39 | 	ordinal_position;
40 | 
41 | 
42 | ALTER TABLE
43 | 	users_02 
44 | ADD CHECK
45 | 	(user_role IN ('A', 'U'));
46 | 
47 | 
48 | ALTER TABLE
49 | 	users_02 
50 | ADD UNIQUE
51 | 	(user_email_id);


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Scripts/03 - Dml Data Manipulation Language.sql:
--------------------------------------------------------------------------------
 1 | -- Dml Data Manipulation Language
 2 | INSERT INTO users
 3 | (
 4 | 	user_first_name
 5 |   , user_last_name
 6 |   , user_email_id
 7 | ) VALUES (
 8 | 	'Gordan'
 9 |   , 'Bradock'
10 |   , 'gbradock0@barnesandnoble.com'
11 | );
12 | 
13 | 
14 | INSERT INTO users
15 | (
16 | 	user_first_name
17 |   , user_last_name
18 |   , user_email_id
19 | ) VALUES (
20 | 	'Tobe'
21 |   , 'Lyness'
22 |   , 'tlyness1@paginegialle.it'
23 | ), (
24 | 	'Addie'
25 |   , 'Mesias'
26 |   , 'amesias2@twitpic.com'
27 | ), (
28 | 	'Corene'
29 |   , 'Kohrsen'
30 |   , 'ckohrsen3@buzzfeed.com'
31 | ), (
32 | 	'Darill'
33 |   , 'Halsall'
34 |   , 'dhalsall4@intel.com'
35 | );
36 | 
37 | 
38 | SELECT
39 | 	*
40 | FROM
41 | 	users;
42 | 
43 | 
44 | UPDATE
45 | 	users
46 | SET
47 | 	user_email_validated = true
48 |   , is_active = true;
49 | 
50 | 
51 | SELECT
52 | 	*
53 | FROM
54 | 	users;
55 | 
56 | 
57 | UPDATE
58 | 	users
59 | SET
60 | 	user_role = 'C'
61 | WHERE
62 | 	user_id = 1;
63 | 
64 | 
65 | UPDATE
66 | 	users
67 | SET
68 | 	user_role = 'A'
69 | WHERE
70 | 	user_id = 1;
71 | 
72 | 
73 | SELECT
74 | 	*
75 | FROM
76 | 	users;
77 | 
78 | 
79 | DELETE FROM
80 | 	users
81 | WHERE
82 | 	user_role = 'U';
83 | 
84 | 
85 | SELECT
86 | 	*
87 | FROM
88 | 	users;


--------------------------------------------------------------------------------
/Section 13 - Programming Essentials Using Python - Perform Database Operations/Scripts/04 - DQL – Data Query Language.sql:
--------------------------------------------------------------------------------
 1 | -- DQL – Data Query Language
 2 | TRUNCATE TABLE
 3 | 	users;
 4 | 
 5 | 
 6 | INSERT INTO users
 7 | (
 8 | 	user_first_name
 9 |   , user_last_name
10 |   , user_email_id
11 | ) VALUES (
12 | 	'Gordan'
13 |   , 'Bradock'
14 |   , 'gbradock0@barnesandnoble.com'
15 | );
16 | 
17 | 
18 | INSERT INTO users
19 | (
20 | 	user_first_name
21 |   , user_last_name
22 |   , user_email_id
23 | ) VALUES (
24 | 	'Tobe'
25 |   , 'Lyness'
26 |   , 'tlyness1@paginegialle.it'
27 | ), (
28 | 	'Addie'
29 |   , 'Mesias'
30 |   , 'amesias2@twitpic.com'
31 | ), (
32 | 	'Corene'
33 |   , 'Kohrsen'
34 |   , 'ckohrsen3@buzzfeed.com'
35 | ), (
36 | 	'Darill'
37 |   , 'Halsall'
38 |   , 'dhalsall4@intel.com'
39 | );
40 | 
41 | 
42 | SELECT
43 | 	*
44 | FROM
45 | 	users;
46 | 
47 | 
48 | SELECT
49 | 	*
50 | FROM
51 | 	users
52 | WHERE
53 | 	user_role = 'A'
54 | AND
55 | 	created_dt BETWEEN '2020-01-01' AND '2020-03-31';
56 | 
57 | 
58 | SELECT
59 | 	user_first_name
60 |   , user_last_name
61 |   , user_email_id
62 |   , user_role
63 | FROM
64 | 	users
65 | WHERE
66 | 	user_role = 'A'
67 | AND
68 | 	created_dt BETWEEN '2020-01-01' AND '2020-03-31';
69 | 
70 | 
71 | SELECT
72 | 	*
73 | FROM
74 | 	users
75 | WHER
76 | 	 user_role != 'A'
77 | AND
78 | 	created_dt BETWEEN '2020-01-01' AND '2020-03-31';
79 | 
80 | 
81 | SELECT
82 | 	user_role
83 |   , COUNT(1)
84 | FROM
85 | 	users
86 | GROUP BY
87 | 	user_role
88 | ORDER BY
89 | 	user_role;


--------------------------------------------------------------------------------
/Section 14 - Programming Essentials Using Python - Getting Started with Python/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/05_getting_started_with_python/02_installing_python.html
 4 | 
 5 | https://python.itversity.com/05_getting_started_with_python/03_overview_of_anaconda.html
 6 | 
 7 | https://python.itversity.com/05_getting_started_with_python/04_python_cli_and_jupyter_notebook.html
 8 | 
 9 | https://python.itversity.com/05_getting_started_with_python/05_overview_of_jupyter_lab.html
10 | 
11 | https://python.itversity.com/05_getting_started_with_python/06_using_ides_pycharm.html
12 | 
13 | https://python.itversity.com/05_getting_started_with_python/07_overview_of_visual_studio_code.html
14 | 
15 | https://python.itversity.com/05_getting_started_with_python/08_using_itversity_labs.html
16 | 
17 | https://python.itversity.com/05_getting_started_with_python/09_leveraging_googles_colab.html


--------------------------------------------------------------------------------
/Section 15 - Programming Essentials Using Python - Basic Programming Constructs/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/06_basic_programming_constructs/02_getting_help.html
 4 | 
 5 | https://python.itversity.com/06_basic_programming_constructs/03_variables_and_objects.html
 6 | 
 7 | https://python.itversity.com/06_basic_programming_constructs/04_data_types_commonly_used.html
 8 | 
 9 | https://python.itversity.com/06_basic_programming_constructs/05_operators_in_python.html
10 | 
11 | https://python.itversity.com/06_basic_programming_constructs/07_conditionals.html
12 | 
13 | https://python.itversity.com/06_basic_programming_constructs/08_all_about_for_loops.html
14 | 
15 | https://python.itversity.com/06_basic_programming_constructs/09_running_os_commands.html


--------------------------------------------------------------------------------
/Section 15 - Programming Essentials Using Python - Basic Programming Constructs/Python Code/01_Basic_Programming_Constructs.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "4433245e",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Basic Programming Constructs\n",
 9 |     "\n",
10 |     "As part of this section we will see basic programming constructs in Python.\n",
11 |     "\n",
12 |     "   * Getting Help\n",
13 |     "   * Variables and Objects\n",
14 |     "   * Data Types - Commonly used\n",
15 |     "   * Operators in Python\n",
16 |     "   * Comments and Doc Strings\n",
17 |     "   * Conditionals\n",
18 |     "   * All about for loops\n",
19 |     "   * Running OS Commands\n",
20 |     "   * Exercises"
21 |    ]
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "Python 3 (ipykernel)",
27 |    "language": "python",
28 |    "name": "python3"
29 |   },
30 |   "language_info": {
31 |    "codemirror_mode": {
32 |     "name": "ipython",
33 |     "version": 3
34 |    },
35 |    "file_extension": ".py",
36 |    "mimetype": "text/x-python",
37 |    "name": "python",
38 |    "nbconvert_exporter": "python",
39 |    "pygments_lexer": "ipython3",
40 |    "version": "3.9.7"
41 |   }
42 |  },
43 |  "nbformat": 4,
44 |  "nbformat_minor": 5
45 | }
46 | 


--------------------------------------------------------------------------------
/Section 15 - Programming Essentials Using Python - Basic Programming Constructs/Python Code/02_getting_help.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Getting Help\n", "\n", "We have already seen how to get help earlier. Let's deep dive to understand more about getting help using Python."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* We can get help either in Python CLI or Jupyter Notebook.\n", "* Help can be launched by calling help()\n", "* It will launch CLI and we can enter a class name or function name.\n", "* We can hit ctrl+c to come out of help\n", "* We can also get help on a class or function by passing them to help function interactively.\n", "* We will be able to get help by passing objects as well. In cases like str it will try to get the help on the value of variable."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Tasks\n", "\n", "Here are some of the tasks we can perform to understand help better.\n", "* Launch help\n", "* Get help for str\n", "* Get help for str.lstrip function\n", "* Exit from help\n", "* Get help on str and str.lstrip directly\n", "* Create an integer object i=0 and get help by passing the object."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 16 - Programming Essentials Using Python - Predefined Functions/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/07_pre_defined_functions/02_overview_of_pre-defined_functions.html
 4 | 
 5 | https://python.itversity.com/07_pre_defined_functions/03_numeric_functions.html
 6 | 
 7 | https://python.itversity.com/07_pre_defined_functions/04_overview_of_strings.html
 8 | 
 9 | https://python.itversity.com/07_pre_defined_functions/05_string_manipulation_functions.html
10 | 
11 | https://python.itversity.com/07_pre_defined_functions/06_formatting_strings.html
12 | 
13 | https://python.itversity.com/07_pre_defined_functions/07_print_and_input_functions.html
14 | 
15 | https://python.itversity.com/07_pre_defined_functions/08_date_manipulation_functions.html


--------------------------------------------------------------------------------
/Section 16 - Programming Essentials Using Python - Predefined Functions/Python Code/01 - Pre-defined Functions.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "7f9aa588",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Pre-defined Functions\n",
 9 |     "\n",
10 |     "Let us go through the list of commonly used Pre-defined Functions.\n",
11 |     "\n",
12 |     "   * Overview of Pre-defined Functions.\n",
13 |     "   * Numeric Functions.\n",
14 |     "   * Overview of Strings\n",
15 |     "   * String Manipulation Functions.\n",
16 |     "   * Formating Strings.\n",
17 |     "   * Print and Input Functions.\n",
18 |     "   * Date Manipulation Functions.\n",
19 |     "   * Special Functions."
20 |    ]
21 |   }
22 |  ],
23 |  "metadata": {
24 |   "kernelspec": {
25 |    "display_name": "Python 3 (ipykernel)",
26 |    "language": "python",
27 |    "name": "python3"
28 |   },
29 |   "language_info": {
30 |    "codemirror_mode": {
31 |     "name": "ipython",
32 |     "version": 3
33 |    },
34 |    "file_extension": ".py",
35 |    "mimetype": "text/x-python",
36 |    "name": "python",
37 |    "nbconvert_exporter": "python",
38 |    "pygments_lexer": "ipython3",
39 |    "version": "3.9.7"
40 |   }
41 |  },
42 |  "nbformat": 4,
43 |  "nbformat_minor": 5
44 | }
45 | 


--------------------------------------------------------------------------------
/Section 17 - Programming Essentials Using Python - User Defined Functions/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/08_user_defined_functions/01_user_defined_functions.html
 4 | 
 5 | https://python.itversity.com/08_user_defined_functions/02_defining_functions.html
 6 | 
 7 | https://python.itversity.com/08_user_defined_functions/03_doc_strings.html
 8 | 
 9 | https://python.itversity.com/08_user_defined_functions/04_returning_values.html
10 | 
11 | https://python.itversity.com/08_user_defined_functions/05_function_parameters_and_arguments.html
12 | 
13 | https://python.itversity.com/08_user_defined_functions/06_varying_arguments.html
14 | 
15 | https://python.itversity.com/08_user_defined_functions/07_keyword_arguments.html
16 | 
17 | https://python.itversity.com/08_user_defined_functions/08_recap_of_user_defined_functions.html
18 | 
19 | https://python.itversity.com/08_user_defined_functions/09_passing_functions_as_arguments.html
20 | 
21 | https://python.itversity.com/08_user_defined_functions/10_lambda_functions.html
22 | 
23 | https://python.itversity.com/08_user_defined_functions/11_usage_of_lambda_functions.html
24 | 
25 | https://python.itversity.com/08_user_defined_functions/12_exercise_user_defined_functions.html


--------------------------------------------------------------------------------
/Section 18 - Programming Essentials Using Python - Overview of Collections - List and Set/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/09_overview_of_collections_list_and_set/02_overview_of_list_and_set.html
 4 | 
 5 | https://python.itversity.com/09_overview_of_collections_list_and_set/03_common_operations.html
 6 | 
 7 | https://python.itversity.com/09_overview_of_collections_list_and_set/04_accessing_elements_from_list.html
 8 | 
 9 | https://python.itversity.com/09_overview_of_collections_list_and_set/05_adding_elements_to_list.html
10 | 
11 | https://python.itversity.com/09_overview_of_collections_list_and_set/07_other_list_operations.html
12 | 
13 | https://python.itversity.com/09_overview_of_collections_list_and_set/08_adding_and_deleting_elements_set.html
14 | 
15 | https://python.itversity.com/09_overview_of_collections_list_and_set/09_typical_set_operations.html
16 | 
17 | https://python.itversity.com/09_overview_of_collections_list_and_set/10_validating_set.html
18 | 
19 | https://python.itversity.com/09_overview_of_collections_list_and_set/11_list_and_set_usage.html


--------------------------------------------------------------------------------
/Section 18 - Programming Essentials Using Python - Overview of Collections - List and Set/Python Code/01_Overview of Collections - list and set.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "433b34db",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Overview of Collections - list and set\n",
 9 |     "\n",
10 |     "Let us get an overview of list and set as part of the Python Collections.\n",
11 |     "\n",
12 |     "   * Overview of list and set\n",
13 |     "   * Common Operations\n",
14 |     "   * Accessing Elements from Lists\n",
15 |     "   * Adding Elements to list\n",
16 |     "   * Updating and Deleting Elements - list\n",
17 |     "   * Otjer list operations\n",
18 |     "   * Adding and Deleting Elements - set\n",
19 |     "   * Typical set operations\n",
20 |     "   * Validating set\n",
21 |     "   * list and set - Usage\n",
22 |     "   * Exercises - list and set\n",
23 |     "   * list of delimited strings\n",
24 |     "   * Sorting data in lists and sets\n",
25 |     "   * Exercises - Sorting lists and sets"
26 |    ]
27 |   }
28 |  ],
29 |  "metadata": {
30 |   "kernelspec": {
31 |    "display_name": "Python 3 (ipykernel)",
32 |    "language": "python",
33 |    "name": "python3"
34 |   },
35 |   "language_info": {
36 |    "codemirror_mode": {
37 |     "name": "ipython",
38 |     "version": 3
39 |    },
40 |    "file_extension": ".py",
41 |    "mimetype": "text/x-python",
42 |    "name": "python",
43 |    "nbconvert_exporter": "python",
44 |    "pygments_lexer": "ipython3",
45 |    "version": "3.9.7"
46 |   }
47 |  },
48 |  "nbformat": 4,
49 |  "nbformat_minor": 5
50 | }
51 | 


--------------------------------------------------------------------------------
/Section 18 - Programming Essentials Using Python - Overview of Collections - List and Set/Python Code/12_Overview of Strings.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 5
6 | }
7 | 


--------------------------------------------------------------------------------
/Section 19 - Programming Essentials Using Python - Overview of Collections - Dict and Tuple/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/10_overview_of_collections_dict_and_tuple/02_overview_of_dict_and_tuple.html
 4 | 
 5 | https://python.itversity.com/10_overview_of_collections_dict_and_tuple/03_common_operations.html
 6 | 
 7 | https://python.itversity.com/10_overview_of_collections_dict_and_tuple/04_accessing_elements_tuples.html
 8 | 
 9 | https://python.itversity.com/10_overview_of_collections_dict_and_tuple/05_accessing_elements_dict.html
10 | 
11 | https://python.itversity.com/10_overview_of_collections_dict_and_tuple/06_manipulating_dict.html
12 | 
13 | https://python.itversity.com/10_overview_of_collections_dict_and_tuple/07_common_examples_dict.html
14 | 
15 | https://python.itversity.com/10_overview_of_collections_dict_and_tuple/08_list_of_tuples.html
16 | 
17 | https://python.itversity.com/10_overview_of_collections_dict_and_tuple/09_list_of_dicts.html


--------------------------------------------------------------------------------
/Section 19 - Programming Essentials Using Python - Overview of Collections - Dict and Tuple/Python Code/01_manipulating_collections_using_loops.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "878054b5",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Manipulating Collections using Loops\n",
 9 |     "\n",
10 |     "Let us understand how to manipulate collections using loops. We will be performing quite a lot of tasks to get enough programming practice.\n",
11 |     "\n",
12 |     "   * Reading Files Into Collections.\n",
13 |     "   * Overview of Standard Transformations.\n",
14 |     "   * Row Level Transformations.\n",
15 |     "   * Getting Unique Elements.\n",
16 |     "   * Filtering Data.\n",
17 |     "   * Preparing Data Sets.\n",
18 |     "   * Quick Recap of Dict Operations.\n",
19 |     "   * Performing Total Aggregations.\n",
20 |     "   * Joining Data Sets.\n",
21 |     "   * Limitations of Using Loops.\n",
22 |     "   * Exercises - Manipulating Collections."
23 |    ]
24 |   }
25 |  ],
26 |  "metadata": {
27 |   "kernelspec": {
28 |    "display_name": "Python 3 (ipykernel)",
29 |    "language": "python",
30 |    "name": "python3"
31 |   },
32 |   "language_info": {
33 |    "codemirror_mode": {
34 |     "name": "ipython",
35 |     "version": 3
36 |    },
37 |    "file_extension": ".py",
38 |    "mimetype": "text/x-python",
39 |    "name": "python",
40 |    "nbconvert_exporter": "python",
41 |    "pygments_lexer": "ipython3",
42 |    "version": "3.9.7"
43 |   }
44 |  },
45 |  "nbformat": 4,
46 |  "nbformat_minor": 5
47 | }
48 | 


--------------------------------------------------------------------------------
/Section 20 - Programming Essentials Using Python - Manipulating Collections using Loops/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/11_manipulating_collections_using_loops/02_reading_files_into_collections.html
 4 | 
 5 | https://python.itversity.com/11_manipulating_collections_using_loops/03_overview_of_standard_transformations.html
 6 | 
 7 | https://python.itversity.com/11_manipulating_collections_using_loops/04_row_level_transformations.html
 8 | 
 9 | https://python.itversity.com/11_manipulating_collections_using_loops/05_getting_unique_elements.html
10 | 
11 | https://python.itversity.com/11_manipulating_collections_using_loops/06_filtering_data.html
12 | 
13 | https://python.itversity.com/11_manipulating_collections_using_loops/07_preparing_data_sets.html
14 | 
15 | https://python.itversity.com/11_manipulating_collections_using_loops/08_quick_recap_of_dict_operations.html
16 | 
17 | https://python.itversity.com/11_manipulating_collections_using_loops/09_performing_total_aggregations.html
18 | 
19 | https://python.itversity.com/11_manipulating_collections_using_loops/11_joining_data_sets.html
20 | 
21 | https://python.itversity.com/11_manipulating_collections_using_loops/12_limitations_of_using_loops.html


--------------------------------------------------------------------------------
/Section 20 - Programming Essentials Using Python - Manipulating Collections using Loops/Python Code/01_manipulating_collections_using_loops.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "c5f8c096",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Manipulating Collections using Loops\n",
 9 |     "\n",
10 |     "Let us understand how to manipulate collections using loops. We will be performing quite a lot of tasks to get enough programming practice.\n",
11 |     "\n",
12 |     "   * Reading files into collections\n",
13 |     "   * Overview of Standard Transformations\n",
14 |     "   * Row level transformations\n",
15 |     "   * Getting unique elements\n",
16 |     "   * Filtering Data\n",
17 |     "   * Preparing Data Sets\n",
18 |     "   * Quick Recap of Dict Operations\n",
19 |     "   * Performing Total Aggregations\n",
20 |     "   * Performing Grouped Aggregations\n",
21 |     "   * Joining Data Sets\n",
22 |     "   * Manipulating collections using Comprehensions\n",
23 |     "   * Limitations of using Loops\n",
24 |     "   * Exercises - manipulating Collections"
25 |    ]
26 |   }
27 |  ],
28 |  "metadata": {
29 |   "kernelspec": {
30 |    "display_name": "Python 3 (ipykernel)",
31 |    "language": "python",
32 |    "name": "python3"
33 |   },
34 |   "language_info": {
35 |    "codemirror_mode": {
36 |     "name": "ipython",
37 |     "version": 3
38 |    },
39 |    "file_extension": ".py",
40 |    "mimetype": "text/x-python",
41 |    "name": "python",
42 |    "nbconvert_exporter": "python",
43 |    "pygments_lexer": "ipython3",
44 |    "version": "3.9.7"
45 |   }
46 |  },
47 |  "nbformat": 4,
48 |  "nbformat_minor": 5
49 | }
50 | 


--------------------------------------------------------------------------------
/Section 20 - Programming Essentials Using Python - Manipulating Collections using Loops/Python Code/10_manipulate_collections_using_comprehensions.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "5e830faa",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Manipulate Collections using Comprehensions\n",
 9 |     "\n",
10 |     "Comprehensions is alternative way of manipulating lists using conventional loops.\n",
11 |     "\n",
12 |     "   * Comprehensions reduce the effort of coding and improves readability when we want to apply basic transformations on the data.\n",
13 |     "   * We can apply comprehensions on lists, sets, as well as dicts.\n",
14 |     "   * We can create a new list using list comprehensions by applying simple row level transformations.\n",
15 |     "   * We can also perform simple aggregations using list comprehensions."
16 |    ]
17 |   }
18 |  ],
19 |  "metadata": {
20 |   "kernelspec": {
21 |    "display_name": "Python 3 (ipykernel)",
22 |    "language": "python",
23 |    "name": "python3"
24 |   },
25 |   "language_info": {
26 |    "codemirror_mode": {
27 |     "name": "ipython",
28 |     "version": 3
29 |    },
30 |    "file_extension": ".py",
31 |    "mimetype": "text/x-python",
32 |    "name": "python",
33 |    "nbconvert_exporter": "python",
34 |    "pygments_lexer": "ipython3",
35 |    "version": "3.9.7"
36 |   }
37 |  },
38 |  "nbformat": 4,
39 |  "nbformat_minor": 5
40 | }
41 | 


--------------------------------------------------------------------------------
/Section 21 - Programming Essentials Using Python - Development of Map Reduce APIs/Links/Links:
--------------------------------------------------------------------------------
 1 | Links:
 2 | 
 3 | https://python.itversity.com/12_development_of_map_reduce_apis/01_development_of_map_reduce_apis.html
 4 | 
 5 | https://python.itversity.com/12_development_of_map_reduce_apis/02_develop_myFilter.html
 6 | 
 7 | https://python.itversity.com/12_development_of_map_reduce_apis/03_validate_myFilter.html
 8 | 
 9 | https://python.itversity.com/12_development_of_map_reduce_apis/04_develop_myMap.html
10 | 
11 | https://python.itversity.com/12_development_of_map_reduce_apis/05_validate_myMap.html
12 | 
13 | https://python.itversity.com/12_development_of_map_reduce_apis/06_develop_myReduce.html
14 | 
15 | https://python.itversity.com/12_development_of_map_reduce_apis/07_validate_myReduce.html
16 | 
17 | https://python.itversity.com/12_development_of_map_reduce_apis/08_develop_myReduceByKey.html
18 | 
19 | https://python.itversity.com/12_development_of_map_reduce_apis/09_validate_myReduceByKey.html
20 | 
21 | https://python.itversity.com/12_development_of_map_reduce_apis/10_develop_myJoin.html
22 | 
23 | https://python.itversity.com/12_development_of_map_reduce_apis/11_validate_myJoin.html
24 | 
25 | https://python.itversity.com/12_development_of_map_reduce_apis/12_exercises_custom_map_reduce_functions.html


--------------------------------------------------------------------------------
/Section 21 - Programming Essentials Using Python - Development of Map Reduce APIs/Python Code/01_development_of_map_reduce_apis.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "pycharm": {
 7 |      "name": "#%% md"
 8 |     }
 9 |    },
10 |    "source": [
11 |     "# Development of Map Reduce APIs\n",
12 |     "\n",
13 |     "* Develop myFilter\n",
14 |     "* Validate myFilter Function\n",
15 |     "* Develop myMap\n",
16 |     "* Validate myMap Function\n",
17 |     "* Develop myReduce\n",
18 |     "* Validate myReduce\n",
19 |     "* Develop myReduceByKey\n",
20 |     "* Validate myReduceByKey\n",
21 |     "* Exercises"
22 |    ]
23 |   }
24 |  ],
25 |  "metadata": {
26 |   "kernelspec": {
27 |    "display_name": "Python 3",
28 |    "language": "python",
29 |    "name": "python3"
30 |   },
31 |   "language_info": {
32 |    "codemirror_mode": {
33 |     "name": "ipython",
34 |     "version": 3
35 |    },
36 |    "file_extension": ".py",
37 |    "mimetype": "text/x-python",
38 |    "name": "python",
39 |    "nbconvert_exporter": "python",
40 |    "pygments_lexer": "ipython3",
41 |    "version": "3.6.12"
42 |   }
43 |  },
44 |  "nbformat": 4,
45 |  "nbformat_minor": 4
46 | }
47 | 


--------------------------------------------------------------------------------
/Section 22 - Programming Essentials Using Python - Understanding Map Reduce Libraries/15 - Limitations of Map Reduce Libraries.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Limitations of Map Reduce Libraries\n",
 8 |     "\n",
 9 |     "Here are some of the limitations of using Map Reduce Libraries.\n",
10 |     "* We cannot refer attributes with names directly.\n",
11 |     "* Functions are scattered and lack consistency.\n",
12 |     "* Readability and maintainability are addressed using libraries such as Pandas.\n",
13 |     "* Libraries such as PySpark takes care of scalability.\n",
14 |     "\n",
15 |     "```{note}\n",
16 |     "We use the approach of loops or itertools as part of mobile or web application development. For Data Engineering applications we prefer Pandas or PySpark.\n",
17 |     "```"
18 |    ]
19 |   }
20 |  ],
21 |  "metadata": {
22 |   "kernelspec": {
23 |    "display_name": "Python 3 (ipykernel)",
24 |    "language": "python",
25 |    "name": "python3"
26 |   },
27 |   "language_info": {
28 |    "codemirror_mode": {
29 |     "name": "ipython",
30 |     "version": 3
31 |    },
32 |    "file_extension": ".py",
33 |    "mimetype": "text/x-python",
34 |    "name": "python",
35 |    "nbconvert_exporter": "python",
36 |    "pygments_lexer": "ipython3",
37 |    "version": "3.9.7"
38 |   }
39 |  },
40 |  "nbformat": 4,
41 |  "nbformat_minor": 4
42 | }
43 | 


--------------------------------------------------------------------------------
/Section 23 - Programming Essentials Using Python - Basics of File IO using Python/10 - Basics of File IO using Python.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "0154b433",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Basics of File I/O using Python\n",
 9 |     "\n",
10 |     "As part of this section or module we will go through some of the basics related to File I/O using Python as programming language.\n",
11 |     "\n",
12 |     "   * Overview of File I/O.\n",
13 |     "   * Folders and Files.\n",
14 |     "   * File Paths and Names.\n",
15 |     "   * Ovewrview of Retail Data.\n",
16 |     "   * Read Text File into String.\n",
17 |     "   * Write String to Text File.\n",
18 |     "   * Overview of Modes to Write into Files.\n",
19 |     "   * Overview of Delimited Strings.\n",
20 |     "   * Read CSV into List of Strings.\n",
21 |     "   * Write Strings to File in Append Mode.\n",
22 |     "   * Managing Files and Folders using Python."
23 |    ]
24 |   }
25 |  ],
26 |  "metadata": {
27 |   "kernelspec": {
28 |    "display_name": "Python 3 (ipykernel)",
29 |    "language": "python",
30 |    "name": "python3"
31 |   },
32 |   "language_info": {
33 |    "codemirror_mode": {
34 |     "name": "ipython",
35 |     "version": 3
36 |    },
37 |    "file_extension": ".py",
38 |    "mimetype": "text/x-python",
39 |    "name": "python",
40 |    "nbconvert_exporter": "python",
41 |    "pygments_lexer": "ipython3",
42 |    "version": "3.9.7"
43 |   }
44 |  },
45 |  "nbformat": 4,
46 |  "nbformat_minor": 5
47 | }
48 | 


--------------------------------------------------------------------------------
/Section 24 - Programming Essentials Using Python - Delimited Files and Collections/07 - Delimited File and Collections.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "21d69690",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Delimited Files and Collections\n",
 9 |     "\n",
10 |     "As part of this section or module, we will be going through the details about dealing with delimited files and collections.\n",
11 |     "\n",
12 |     "   * Overview of Delimited Text Files.\n",
13 |     "   * Recap of Basic File I/O.\n",
14 |     "   * Read Delimited Files into List.\n",
15 |     "   * Write Delimited Strings into Files.\n",
16 |     "   * Overview of CSV Module.\n",
17 |     "   * Read Delimited Data using CSV.\n",
18 |     "   * Write Iterables to Files using CSV.\n",
19 |     "   * Advantages of using CSV Module.\n",
20 |     "   * Apply Schema on Lists from Files."
21 |    ]
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "Python 3 (ipykernel)",
27 |    "language": "python",
28 |    "name": "python3"
29 |   },
30 |   "language_info": {
31 |    "codemirror_mode": {
32 |     "name": "ipython",
33 |     "version": 3
34 |    },
35 |    "file_extension": ".py",
36 |    "mimetype": "text/x-python",
37 |    "name": "python",
38 |    "nbconvert_exporter": "python",
39 |    "pygments_lexer": "ipython3",
40 |    "version": "3.9.7"
41 |   }
42 |  },
43 |  "nbformat": 4,
44 |  "nbformat_minor": 5
45 | }
46 | 


--------------------------------------------------------------------------------
/Section 25 - Programming Essentials Using Python - Overview of Pandas Libraries/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/15_overview_of_pandas_libraries/02_pandas_data_structures_overview.html
 4 | 
 5 | https://python.itversity.com/15_overview_of_pandas_libraries/03_overview_of_series.html
 6 | 
 7 | https://python.itversity.com/15_overview_of_pandas_libraries/04_creating_data_frames_from_lists.html
 8 | 
 9 | https://python.itversity.com/15_overview_of_pandas_libraries/05_data_frames_basic_operations.html
10 | 
11 | https://python.itversity.com/15_overview_of_pandas_libraries/06_csv_to_pandas_data_frame.html
12 | 
13 | https://python.itversity.com/15_overview_of_pandas_libraries/07_projecting_and_filtering.html
14 | 
15 | https://python.itversity.com/15_overview_of_pandas_libraries/08_performing_total_aggregations.html
16 | 
17 | https://python.itversity.com/15_overview_of_pandas_libraries/09_performing_grouped_aggregations.html
18 | 
19 | https://python.itversity.com/15_overview_of_pandas_libraries/10_writing_data_frames_to_files.html
20 | 
21 | https://python.itversity.com/15_overview_of_pandas_libraries/11_joining_data_frames.html


--------------------------------------------------------------------------------
/Section 25 - Programming Essentials Using Python - Overview of Pandas Libraries/Python Code/01_overview_of_pandas_libraries.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "pycharm": {
 7 |      "name": "#%% md"
 8 |     }
 9 |    },
10 |    "source": [
11 |     "# Overview of Pandas Libraries\n",
12 |     "\n",
13 |     "* Pandas Data Structures – Overview\n",
14 |     "* Overview of Series\n",
15 |     "* Creating Data Frames from lists\n",
16 |     "* Data Frames - Basic Operations\n",
17 |     "* CSV to Pandas Data Frame\n",
18 |     "* Projecting and Filtering\n",
19 |     "* Performing Total Aggregations\n",
20 |     "* Performing Global Aggregations\n",
21 |     "* Writing Data Frames to Files\n",
22 |     "* Joining Data Frames"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "code",
27 |    "execution_count": null,
28 |    "metadata": {},
29 |    "outputs": [],
30 |    "source": []
31 |   }
32 |  ],
33 |  "metadata": {
34 |   "kernelspec": {
35 |    "display_name": "Python 3",
36 |    "language": "python",
37 |    "name": "python3"
38 |   },
39 |   "language_info": {
40 |    "codemirror_mode": {
41 |     "name": "ipython",
42 |     "version": 3
43 |    },
44 |    "file_extension": ".py",
45 |    "mimetype": "text/x-python",
46 |    "name": "python",
47 |    "nbconvert_exporter": "python",
48 |    "pygments_lexer": "ipython3",
49 |    "version": "3.6.12"
50 |   }
51 |  },
52 |  "nbformat": 4,
53 |  "nbformat_minor": 4
54 | }
55 | 


--------------------------------------------------------------------------------
/Section 26 - Programming Essentials Using Python - Database Programming CRUD Operations/Links/Links:
--------------------------------------------------------------------------------
 1 | Links
 2 | 
 3 | https://python.itversity.com/17_database_programming_crud_operations/02_overview_of_database_programming.html
 4 | 
 5 | https://python.itversity.com/17_database_programming_crud_operations/03_recap_of_rdbms_concepts.html
 6 | 
 7 | https://python.itversity.com/17_database_programming_crud_operations/04_setup_database_client_libraries.html
 8 | 
 9 | https://python.itversity.com/17_database_programming_crud_operations/05_function_get_database_connection.html
10 | 
11 | https://python.itversity.com/17_database_programming_crud_operations/06_creating_database_table.html
12 | 
13 | https://python.itversity.com/17_database_programming_crud_operations/07_inserting_data_into_table.html
14 | 
15 | https://python.itversity.com/17_database_programming_crud_operations/08_updating_existing_table_data.html
16 | 
17 | https://python.itversity.com/17_database_programming_crud_operations/09_deleting_data_from_table.html
18 | 
19 | https://python.itversity.com/17_database_programming_crud_operations/10_querying_data_from_table.html
20 | 
21 | https://python.itversity.com/17_database_programming_crud_operations/11_recap_crud_operations.html


--------------------------------------------------------------------------------
/Section 26 - Programming Essentials Using Python - Database Programming CRUD Operations/Postgre Code/01 - Creating Table.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	*
 3 | FROM
 4 | 	information_schema.tables 
 5 | WHERE
 6 | 	table_catalog = 'itversity_sms_db'
 7 | AND
 8 | 	table_schema = 'public'
 9 | LIMIT
10 | 	10;
11 | 
12 | 
13 | DROP TABLE IF EXISTS
14 | 	users CASCADE;
15 | 
16 | 
17 | SELECT
18 | 	*
19 | FROM
20 | 	information_schema.tables 
21 | WHERE
22 | 	table_catalog = 'itversity_sms_db'
23 | AND
24 | 	table_schema = 'public'
25 | AND
26 | 	table_name = 'users'
27 | LIMIT
28 | 	10;
29 | 
30 | 
31 | CREATE TABLE users 
32 | (
33 |     user_id SERIAL PRIMARY KEY
34 |   , user_first_name VARCHAR(30) NOT NULL
35 |   , user_last_name VARCHAR(30) NOT NULL
36 |   , user_email_id VARCHAR(50) NOT NULL
37 |   , user_email_validated BOOLEAN DEFAULT FALSE
38 |   , user_password VARCHAR(200)
39 |   , user_role VARCHAR(1) NOT NULL DEFAULT 'U' --U and A
40 |   , is_active BOOLEAN DEFAULT FALSE
41 |   , create_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP
42 |   , last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP
43 | );
44 | 
45 | 
46 | SELECT
47 | 	*
48 | FROM
49 | 	information_schema.tables 
50 | WHERE
51 | 	table_catalog = 'itversity_sms_db'
52 | AND
53 | 	table_schema = 'public'
54 | AND
55 | 	table_name = 'users'
56 | LIMIT
57 | 	10;
58 | 
59 | 
60 | SELECT
61 | 	*
62 | FROM
63 | 	information_schema.columns 
64 | WHERE
65 | 	table_name = 'users'
66 | LIMIT
67 | 	10;
68 | 
69 | 
70 | SELECT
71 | 	*
72 | FROM
73 | 	users;


--------------------------------------------------------------------------------
/Section 26 - Programming Essentials Using Python - Database Programming CRUD Operations/Postgre Code/02 - Inserting Data into Table.sql:
--------------------------------------------------------------------------------
 1 | INSERT INTO users 
 2 | (
 3 |     user_first_name
 4 |   , user_last_name
 5 |   , user_email_id
 6 | ) VALUES (
 7 |     'Scott'
 8 |   , 'Tiger'
 9 |   , 'scott@tiger.com'
10 | );
11 | 
12 | 
13 | SELECT
14 |     *
15 | FROM
16 |     users;
17 | 
18 | 
19 | INSERT INTO users 
20 | (
21 |     user_first_name
22 |   , user_last_name
23 |   , user_email_id
24 | ) VALUES (
25 |     'Donald'
26 |   , 'Duck'
27 |   , 'donald@duck.com'
28 | );
29 | 
30 | 
31 | SELECT
32 |     *
33 | FROM
34 |     users;
35 | 
36 | 
37 | INSERT INTO users 
38 | (
39 |     user_first_name
40 |   , user_last_name
41 |   , user_email_id
42 |   , user_role
43 |   , is_active
44 | ) VALUES (
45 |     'Mickey'
46 |   , 'Mouse'
47 |   , 'mickey@mouse.com'
48 |   , 'U'
49 |   , True
50 | );
51 | 
52 | 
53 | SELECT
54 |     *
55 | FROM
56 |     users;
57 | 
58 | 
59 | INSERT INTO users 
60 | (
61 |     user_first_name
62 |   , user_last_name
63 |   , user_email_id
64 |   , user_password
65 |   , user_role
66 |   , is_active
67 | ) VALUES (
68 |     'Gordan'
69 |   , 'Bradock'
70 |   , 'gbradock0@barnesandnoble.com'
71 |   , 'h9LAz7p7ub'
72 |   , 'U'
73 |   , True
74 | ), (
75 |     'Tobe'
76 |   , 'Lyness'
77 |   , 'tlyness1@paginegialle.it'
78 |   , 'oEofndp'
79 |   , 'U'
80 |   , True
81 | ), (
82 |     'Addie'
83 |   , 'Mesias'
84 |   , 'amesias2@twitpic.com'
85 |   , 'ih7Y69u56'
86 |   , 'U'
87 |   , True
88 | );
89 | 
90 | 
91 | SELECT
92 |     *
93 | FROM
94 |     users;


--------------------------------------------------------------------------------
/Section 26 - Programming Essentials Using Python - Database Programming CRUD Operations/Postgre Code/03 - Updating Existing Table Data.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	user_id
 3 |   , user_role
 4 | FROM
 5 | 	users
 6 | WHERE
 7 | 	user_id = 1;
 8 | 
 9 | 
10 | UPDATE
11 | 	users
12 | SET
13 | 	user_role = 'A'
14 | WHERE
15 | 	user_id = 1;
16 | 
17 | 
18 | SELECT
19 | 	user_id
20 |   , user_role
21 | FROM
22 | 	users
23 | WHERE
24 | 	user_id = 1;


--------------------------------------------------------------------------------
/Section 26 - Programming Essentials Using Python - Database Programming CRUD Operations/Postgre Code/04 - Deleting Data from Table.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	user_id
 3 |   , user_password 
 4 | FROM
 5 | 	users;
 6 | 
 7 | 
 8 | DELETE FROM
 9 | 	users
10 | WHERE
11 | 	user_password IS NULL OR user_id = 4;
12 | 
13 | 
14 | SELECT
15 | 	user_id
16 |   , user_password 
17 | FROM
18 | 	users;


--------------------------------------------------------------------------------
/Section 26 - Programming Essentials Using Python - Database Programming CRUD Operations/Postgre Code/05 - Querying Data from Table.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	*
 3 | FROM
 4 | 	users
 5 | LIMIT
 6 | 	5;
 7 | 
 8 | 
 9 | SELECT
10 | 	*
11 | FROM
12 | 	users 
13 | WHERE
14 | 	user_id = 5;
15 | 
16 | 
17 | SELECT
18 | 	user_id
19 |   , user_email_id
20 |   , user_password
21 | FROM
22 | 	users
23 | WHERE
24 | 	user_password IS NOT NULL;


--------------------------------------------------------------------------------
/Section 26 - Programming Essentials Using Python - Database Programming CRUD Operations/Python Code/11_crud.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Recap - CRUD Operations\n",
 8 |     "\n",
 9 |     "Let us recap the process of performing CRUD operations\n",
10 |     "\n",
11 |     "* Establish connection to the database - Create Connection Object\n",
12 |     "* Create Cursor Object using Connection Object - `connection.cursor`.\n",
13 |     "* Build Statement to perform one or more `INSERT`, `UPDATE` or `DELETE`.\n",
14 |     "* When we perform DML operations as part of transactions such as `INSERT`, `UPDATE` or `DELETE` make sure transactions without exceptions are committed and transactions with exceptions are rolled back.\n",
15 |     "* Make sure cursors as well as connections are closed\n",
16 |     "* If the database operation involves select only, then we don't need to commit or rollback. `SELECT` only reads the data, it will not make any changes to the data in the database."
17 |    ]
18 |   }
19 |  ],
20 |  "metadata": {
21 |   "kernelspec": {
22 |    "display_name": "Python 3 (ipykernel)",
23 |    "language": "python",
24 |    "name": "python3"
25 |   },
26 |   "language_info": {
27 |    "codemirror_mode": {
28 |     "name": "ipython",
29 |     "version": 3
30 |    },
31 |    "file_extension": ".py",
32 |    "mimetype": "text/x-python",
33 |    "name": "python",
34 |    "nbconvert_exporter": "python",
35 |    "pygments_lexer": "ipython3",
36 |    "version": "3.9.7"
37 |   }
38 |  },
39 |  "nbformat": 4,
40 |  "nbformat_minor": 4
41 | }
42 | 


--------------------------------------------------------------------------------
/Section 27 - Programming Essentials Using Python - Database Programming Bath Operation/Links/Links:
--------------------------------------------------------------------------------
1 | Links
2 | 
3 | https://python.itversity.com/18_database_programming_batch_operations/04_recap_of_insert.html
4 | 
5 | https://python.itversity.com/18_database_programming_batch_operations/05_preparing_database.html
6 | 
7 | https://python.itversity.com/18_database_programming_batch_operations/06_reading_data_from_file.html


--------------------------------------------------------------------------------
/Section 27 - Programming Essentials Using Python - Database Programming Bath Operation/Postgre Code/01 - Insert.sql:
--------------------------------------------------------------------------------
 1 | INSERT INTO users 
 2 | (
 3 |     user_first_name
 4 |   , user_last_name
 5 |   , user_email_id
 6 |   , user_password
 7 |   , user_role
 8 |   , is_active
 9 | ) VALUES (
10 |     'Gordan'
11 |   , 'Bradock'
12 |   , 'gbradock0@barnesandnoble.com'
13 |   , 'h9LAz7p7ub'
14 |   , 'U'
15 |   , True
16 | );
17 | 
18 | 
19 | SELECT
20 |     *
21 | FROM
22 |     users;


--------------------------------------------------------------------------------
/Section 27 - Programming Essentials Using Python - Database Programming Bath Operation/Postgre Code/02 - Select.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	*
 3 | FROM
 4 | 	orders
 5 | LIMIT
 6 | 	10;
 7 | 
 8 | 
 9 | SELECT
10 | 	*
11 | FROM
12 | 	order_items
13 | LIMIT
14 | 	10;


--------------------------------------------------------------------------------
/Section 27 - Programming Essentials Using Python - Database Programming Bath Operation/Python Code/01_database_programming_batch_operations.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "pycharm": {
 7 |      "name": "#%% md"
 8 |     }
 9 |    },
10 |    "source": [
11 |     "# Database Programming – Batch Operations\n",
12 |     "\n",
13 |     "As part of this section, we will talk about how we can perform batch operations against database tables using Python.\n",
14 |     "\n",
15 |     "* Recap of Insert\n",
16 |     "* Preparing Database\n",
17 |     "* Reading Data from File\n",
18 |     "* Processing Data using Pandas\n",
19 |     "* Writing Data to Table\n",
20 |     "* Batch Loading of Data\n",
21 |     "* Best Practices - Batch Loading\n",
22 |     "* Read Process Write Pattern"
23 |    ]
24 |   }
25 |  ],
26 |  "metadata": {
27 |   "kernelspec": {
28 |    "display_name": "Python 3",
29 |    "language": "python",
30 |    "name": "python3"
31 |   },
32 |   "language_info": {
33 |    "codemirror_mode": {
34 |     "name": "ipython",
35 |     "version": 3
36 |    },
37 |    "file_extension": ".py",
38 |    "mimetype": "text/x-python",
39 |    "name": "python",
40 |    "nbconvert_exporter": "python",
41 |    "pygments_lexer": "ipython3",
42 |    "version": "3.6.12"
43 |   }
44 |  },
45 |  "nbformat": 4,
46 |  "nbformat_minor": 4
47 | }
48 | 


--------------------------------------------------------------------------------
/Section 27 - Programming Essentials Using Python - Database Programming Bath Operation/Python Code/11_crud.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Recap - CRUD Operations\n",
 8 |     "\n",
 9 |     "Let us recap the process of performing CRUD operations\n",
10 |     "\n",
11 |     "* Establish connection to the database - Create Connection Object\n",
12 |     "* Create Cursor Object using Connection Object - `connection.cursor`.\n",
13 |     "* Build Statement to perform one or more `INSERT`, `UPDATE` or `DELETE`.\n",
14 |     "* When we perform DML operations as part of transactions such as `INSERT`, `UPDATE` or `DELETE` make sure transactions without exceptions are committed and transactions with exceptions are rolled back.\n",
15 |     "* Make sure cursors as well as connections are closed\n",
16 |     "* If the database operation involves select only, then we don't need to commit or rollback. `SELECT` only reads the data, it will not make any changes to the data in the database."
17 |    ]
18 |   }
19 |  ],
20 |  "metadata": {
21 |   "kernelspec": {
22 |    "display_name": "Python 3 (ipykernel)",
23 |    "language": "python",
24 |    "name": "python3"
25 |   },
26 |   "language_info": {
27 |    "codemirror_mode": {
28 |     "name": "ipython",
29 |     "version": 3
30 |    },
31 |    "file_extension": ".py",
32 |    "mimetype": "text/x-python",
33 |    "name": "python",
34 |    "nbconvert_exporter": "python",
35 |    "pygments_lexer": "ipython3",
36 |    "version": "3.9.7"
37 |   }
38 |  },
39 |  "nbformat": 4,
40 |  "nbformat_minor": 4
41 | }
42 | 


--------------------------------------------------------------------------------
/Section 27 - Programming Essentials Using Python - Database Programming Bath Operation/Python Code/15 - Best Practices - Batch Loading.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "0ae97642",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Best Practices - Batch Loading\n",
 9 |     "\n",
10 |     "Let us go through some of the best practices to perform batch load.\n",
11 |     "\n",
12 |     "* We should minimize the number of connections to database.\n",
13 |     "* We should avoid executing queries using hard coded values. Rather, we should prefer bind variables.\n",
14 |     "* Too much commiting is bad as commit incurs overhead.\n",
15 |     " * If we have to load considerable amount of data, we should consider committing every 1,000 records or 10,0000 records or even more based up on the capacity of the database.\n",
16 |     " * Most of the mainstream databases perform direct path I/O or batch load which might perform better compare to looping, inserting and committing data. In some cases, we can use those features."
17 |    ]
18 |   }
19 |  ],
20 |  "metadata": {
21 |   "kernelspec": {
22 |    "display_name": "Python 3 (ipykernel)",
23 |    "language": "python",
24 |    "name": "python3"
25 |   },
26 |   "language_info": {
27 |    "codemirror_mode": {
28 |     "name": "ipython",
29 |     "version": 3
30 |    },
31 |    "file_extension": ".py",
32 |    "mimetype": "text/x-python",
33 |    "name": "python",
34 |    "nbconvert_exporter": "python",
35 |    "pygments_lexer": "ipython3",
36 |    "version": "3.9.7"
37 |   }
38 |  },
39 |  "nbformat": 4,
40 |  "nbformat_minor": 5
41 | }
42 | 


--------------------------------------------------------------------------------
/Section 28 - Programming Essentials Using Python - Processing JSON Data/10 - Processing JSON Data.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "56389153",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Processing JSON Data\n",
 9 |     "\n",
10 |     "As part of this section or module we will get an overview about processing JSON data. We will primarily focus on using `json` or `pandas` to process data in JSON format.\n",
11 |     "\n",
12 |     "   * Overview of JSON.\n",
13 |     "   * JSON Data Types.\n",
14 |     "   * Create JSON String.\n",
15 |     "   * Process JSON String.\n",
16 |     "   * Single JSON Document in Files.\n",
17 |     "   * Multiple JSON Documents in files.\n",
18 |     "   * Process JSON using Pandas.\n",
19 |     "   * Differente JSON Formats supported by Pandas.\n",
20 |     "   * Common Use Cases for JSON.\n",
21 |     "   * Write to JSON Files using JSON module.\n",
22 |     "   * Write to JSON Files using Pandas."
23 |    ]
24 |   }
25 |  ],
26 |  "metadata": {
27 |   "kernelspec": {
28 |    "display_name": "Python 3 (ipykernel)",
29 |    "language": "python",
30 |    "name": "python3"
31 |   },
32 |   "language_info": {
33 |    "codemirror_mode": {
34 |     "name": "ipython",
35 |     "version": 3
36 |    },
37 |    "file_extension": ".py",
38 |    "mimetype": "text/x-python",
39 |    "name": "python",
40 |    "nbconvert_exporter": "python",
41 |    "pygments_lexer": "ipython3",
42 |    "version": "3.9.7"
43 |   }
44 |  },
45 |  "nbformat": 4,
46 |  "nbformat_minor": 5
47 | }
48 | 


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/01_Accessing_Python_through_PowerShell.txt:
--------------------------------------------------------------------------------
1 | # Accessing Python through PowerShell
2 | python
3 | 
4 | # Exiting Python from PowerShell
5 | exit()


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/02_Create_Virtual_Environment_for_Web_Application.txt:
--------------------------------------------------------------------------------
 1 | # Creating a folder
 2 | mkdir flask_demo
 3 | 
 4 | # Changing folder
 5 | cd flask_demo
 6 | 
 7 | # Creating the virtual environment
 8 | python -m venv fd-venv
 9 | 
10 | # Activating the virtual environment
11 | Scripts\activate
12 | 
13 | # Disabling the virtual environment
14 | deactivate


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/03_Reviewing_Dependencies_Virtual_Environment.txt:
--------------------------------------------------------------------------------
 1 | # Activating the virtual environment
 2 | Scripts\activate
 3 | 
 4 | # Listing installed pips
 5 | pip list
 6 | 
 7 | # Updating the pip
 8 | pip install --upgrade pip
 9 | 
10 | # Listing installed pips
11 | pip list
12 | 
13 | # Disabling the virtual environment
14 | deactivate


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/04_Installing_Dependencies_Web_applications.txt:
--------------------------------------------------------------------------------
1 | # Installing pip
2 | pip install psycopg2
3 | 
4 | pip install flask
5 | 
6 | pip install sqlalchemy


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/05_Getting_Details_About_PIP.txt:
--------------------------------------------------------------------------------
 1 | # Checking pip help
 2 | pip
 3 | 
 4 | # Listing all pip packages
 5 | pip list
 6 | 
 7 | # Looking at a specific package
 8 | pip show SQLAlchemy
 9 | 
10 | # Checking packages and version
11 | pip freeze
12 | 
13 | # Checking packages list
14 | pip list


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/06_Uninstall_Packages_Using_PIP.txt:
--------------------------------------------------------------------------------
 1 | # Checking packages list
 2 | pip list
 3 | 
 4 | # Uninstalling a pip package
 5 | pip uninstall psycopg2
 6 | 
 7 | # Checking packages list
 8 | pip list
 9 | 
10 | # Help with PIP uninstall commands
11 | pip uninstall -h
12 | 
13 | # Uninstalling a pip package
14 | pip uninstall SQLAlchemy
15 | 
16 | # Uninstalling a pip package
17 | pip uninstall Flask


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/07_Cleanup_Virtual_Environment.txt:
--------------------------------------------------------------------------------
1 | # Listing the files inside the folder
2 | ls -ltr
3 | 
4 | # Deleting all folders
5 | rm -rf fd-venv


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/08_Recreate_and_Activate_Virtual_Environment_Web_Application.txt:
--------------------------------------------------------------------------------
 1 | # Changing folder
 2 | cd flask_demo
 3 | 
 4 | # Creating the virtual environment
 5 | python -m venv fd-venv
 6 | 
 7 | # Activating the virtual environment
 8 | Scripts\activate
 9 | 
10 | # Changing folder
11 | cd fd-venv


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/09_Define_Requeriments_File_Web_Application.txt:
--------------------------------------------------------------------------------
1 | # Creating a vi file
2 | vi requirements.txt


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/10_Install_Dependencies_Web_Application.txt:
--------------------------------------------------------------------------------
 1 | # Viewing the contents of the file
 2 | type requirements.txt
 3 | 
 4 | # Seeing all pip commands
 5 | pip
 6 | 
 7 | # Help with PIP commands
 8 | pip install -h
 9 | 
10 | # Installing pip
11 | pip install -r requirements.txt
12 | 
13 | # Uninstalling a pip package
14 | pip uninstall -r requirements.txt
15 | 
16 | # Disabling the virtual environment
17 | deactivate
18 | 
19 | # Deleting all folders
20 | del fd-ven*


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/11_Create_Virtual_Environment_Data_Engineering.txt:
--------------------------------------------------------------------------------
 1 | # Choosing a folder
 2 | cd .\Desktop\
 3 | 
 4 | # Creating a folder
 5 | mkdir de_demo
 6 | 
 7 | # Changing folder
 8 | cd de_demo
 9 | 
10 | # Creating the virtual environment
11 | python -m venv de-venv
12 | 
13 | # Changing folder
14 | cd de-venv
15 | 
16 | # Activating the virtual environment
17 | Scripts\activate
18 | 
19 | # Listing installed pips
20 | pip list
21 | 
22 | # Going back a folder
23 | cd ..


--------------------------------------------------------------------------------
/Section 30 - Understanding Python Virtual Environments/12_Install_Dependencias_data_Engineering_Application.txt:
--------------------------------------------------------------------------------
1 | # Creating a vi file
2 | copy con requirements.txt
3 | 
4 | # Installing pip
5 | pip install -r requirements.txt
6 | 
7 | # Listing installed pips
8 | pip list


--------------------------------------------------------------------------------
/Section 31 - Overview of Pycharm for Python Application Development/Link/01_Installation_of_Pycharm_Windowns.txt:
--------------------------------------------------------------------------------
1 | # Link
2 | https://www.jetbrains.com/pycharm/download/#section=windows


--------------------------------------------------------------------------------
/Section 31 - Overview of Pycharm for Python Application Development/gettingStarted/test.py:
--------------------------------------------------------------------------------
1 | print("Hello World Pycharm!!")
2 | 


--------------------------------------------------------------------------------
/Section 32 - Data Copier - Getting Started/CMD Codes/01_Codes_CMD:
--------------------------------------------------------------------------------
 1 | # Choosing the folder
 2 | cd Desktop
 3 | 
 4 | # Creating a folder
 5 | mkdir Projects
 6 | 
 7 | # Choosing the folder
 8 | cd Projects
 9 | 
10 | # Creating a folder
11 | mkdir Internal
12 | 
13 | # Choosing the folder
14 | cd Internal
15 | 
16 | # Creating a folder
17 | mkdir bootcamp
18 | 
19 | # Choosing the folder
20 | cd bootcamp
21 | 
22 | # Creating a folder
23 | mkdir data-copier
24 | 
25 | # Choosing the folder
26 | cd data-copier


--------------------------------------------------------------------------------
/Section 32 - Data Copier - Getting Started/CMD Codes/02_Listing_the_images:
--------------------------------------------------------------------------------
1 | # Listing the images
2 | docker ps
3 | 
4 | # Listing the containers
5 | docker container ps -a
6 | 
7 | # Running the container
8 | docker run hello-world


--------------------------------------------------------------------------------
/Section 32 - Data Copier - Getting Started/CMD Codes/03_Checking_all_docker_commands:
--------------------------------------------------------------------------------
 1 | # Checking all docker commands
 2 | docker
 3 | 
 4 | # Listing the images
 5 | docker images
 6 | 
 7 | docker images ls
 8 | 
 9 | # Listing the containers
10 | docker container ls
11 | 
12 | docker container ps -a
13 | 
14 | # Listing docker image commands
15 | docker image -h
16 | 
17 | # Deleting a container
18 | docker container rm hello-world


--------------------------------------------------------------------------------
/Section 32 - Data Copier - Getting Started/Manual/01_Getting_Started.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "b1e193f3",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Getting Started\n",
 9 |     "\n",
10 |     "As part of this module we will set up the development environment to develop an application to copy data from JSON Files to a target database (Postgres).\n",
11 |     "\n",
12 |     "   * Problem Statement - Data Copier.\n",
13 |     "   * Setup Docker.\n",
14 |     "   * Quick Overview of Docker.\n",
15 |     "   * Prepare Dataset.\n",
16 |     "   * Setup Postgres Database.\n",
17 |     "   * Overview of Postgres.\n",
18 |     "   * Setup Project using PyCharm.\n",
19 |     "   * Managing Dependencies.\n",
20 |     "   * Create GitHub Repository."
21 |    ]
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "Python 3 (ipykernel)",
27 |    "language": "python",
28 |    "name": "python3"
29 |   },
30 |   "language_info": {
31 |    "codemirror_mode": {
32 |     "name": "ipython",
33 |     "version": 3
34 |    },
35 |    "file_extension": ".py",
36 |    "mimetype": "text/x-python",
37 |    "name": "python",
38 |    "nbconvert_exporter": "python",
39 |    "pygments_lexer": "ipython3",
40 |    "version": "3.9.7"
41 |   }
42 |  },
43 |  "nbformat": 4,
44 |  "nbformat_minor": 5
45 | }
46 | 


--------------------------------------------------------------------------------
/Section 32 - Data Copier - Getting Started/Manual/02_Problem_Statement_Data_Copier.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6f42dc9a",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Problem Statement - Data Copier\n",
 9 |     "\n",
10 |     "Let us go through the problem statement for our project. We would like to develop the code to copy data which are in files using JSON Format to Postgres Database.\n",
11 |     "\n",
12 |     "Data is available in another repository called as retail_db_json. We will see how to setup the repository later. But we will review the data now."
13 |    ]
14 |   }
15 |  ],
16 |  "metadata": {
17 |   "kernelspec": {
18 |    "display_name": "Python 3 (ipykernel)",
19 |    "language": "python",
20 |    "name": "python3"
21 |   },
22 |   "language_info": {
23 |    "codemirror_mode": {
24 |     "name": "ipython",
25 |     "version": 3
26 |    },
27 |    "file_extension": ".py",
28 |    "mimetype": "text/x-python",
29 |    "name": "python",
30 |    "nbconvert_exporter": "python",
31 |    "pygments_lexer": "ipython3",
32 |    "version": "3.9.7"
33 |   }
34 |  },
35 |  "nbformat": 4,
36 |  "nbformat_minor": 5
37 | }
38 | 


--------------------------------------------------------------------------------
/Section 32 - Data Copier - Getting Started/Manual/03_Setup_Docker.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "647f797f",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Setup Docker\n",
 9 |     "\n",
10 |     "Let us understand different options to setup Docker using different environments. We will also see how to set up Docker on Cloud9.\n",
11 |     "   * If your Mac or PC have 16 GB RAM and Quad Core, I would recommend to setup Docker Desktop. Just Google and Set it up.\n",
12 |     "   * For Windows, there might be some restrictions on Windows 10 Home and older versions.\n",
13 |     "   * If you do not want to install locally, you can setup the whole development environment using AWS Cloud9. Make sure to choose Ubuntu as OS while setting up AWS Cloud9."
14 |    ]
15 |   }
16 |  ],
17 |  "metadata": {
18 |   "kernelspec": {
19 |    "display_name": "Python 3 (ipykernel)",
20 |    "language": "python",
21 |    "name": "python3"
22 |   },
23 |   "language_info": {
24 |    "codemirror_mode": {
25 |     "name": "ipython",
26 |     "version": 3
27 |    },
28 |    "file_extension": ".py",
29 |    "mimetype": "text/x-python",
30 |    "name": "python",
31 |    "nbconvert_exporter": "python",
32 |    "pygments_lexer": "ipython3",
33 |    "version": "3.9.7"
34 |   }
35 |  },
36 |  "nbformat": 4,
37 |  "nbformat_minor": 5
38 | }
39 | 


--------------------------------------------------------------------------------
/Section 32 - Data Copier - Getting Started/Manual/04_Quick_Overview_of_Docker.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "863a239d",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Quick Overview of Docker\n",
 9 |     "\n",
10 |     "Docker is one of the key technology to learn. Let us quickly review some of the key concepts related to Docker.\n",
11 |     "\n",
12 |     "   * Overview of Docker Images.\n",
13 |     "   * Managing Docker Images.\n",
14 |     "   * Overview of Docker Containers.\n",
15 |     "   * Starting Containers using Images.\n",
16 |     "   * Managing Docker Containers.\n",
17 |     "   * Usage and Characteristics of Docker Containers.\n",
18 |     "      * Images are reusable.\n",
19 |     "      * Containers are ephemeral (stateless).\n",
20 |     "      * Production Databases should not be running on Docker Containers.\n",
21 |     "      * Production Applications are typically deployed using Docker Containers."
22 |    ]
23 |   }
24 |  ],
25 |  "metadata": {
26 |   "kernelspec": {
27 |    "display_name": "Python 3 (ipykernel)",
28 |    "language": "python",
29 |    "name": "python3"
30 |   },
31 |   "language_info": {
32 |    "codemirror_mode": {
33 |     "name": "ipython",
34 |     "version": 3
35 |    },
36 |    "file_extension": ".py",
37 |    "mimetype": "text/x-python",
38 |    "name": "python",
39 |    "nbconvert_exporter": "python",
40 |    "pygments_lexer": "ipython3",
41 |    "version": "3.9.7"
42 |   }
43 |  },
44 |  "nbformat": 4,
45 |  "nbformat_minor": 5
46 | }
47 | 


--------------------------------------------------------------------------------
/Section 32 - Data Copier - Getting Started/Manual/05_Prepare_Dataset.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "a02ed126",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Prepare Dataset\n",
 9 |     "\n",
10 |     "Let us prepare dataset to play around. Dataset is part of GitHub Repository.\n",
11 |     "\n",
12 |     "   * The data set is called as retail_db. It is a hypothetical data set provided by Cloudera as part of Cloudera QuickStart VM.\n",
13 |     "   * Clone **retail_db** repository from GitHub. We will use this repository to setup tables and load data as part of the database.\n",
14 |     "```shell script\n",
15 |     "git clone https://www.github.com/itversity/retail_db_json.git\n",
16 |     "```\n",
17 |     "* It has some scripts as well as json separated files as well.\n",
18 |     "* As part of this usecase we will use scripts which will create tables as well as load data sets.\n",
19 |     "* **create_db_tables_pg.sql** is the script which will facilitate us to create tables alone. It will not load data into the tables. "
20 |    ]
21 |   }
22 |  ],
23 |  "metadata": {
24 |   "kernelspec": {
25 |    "display_name": "Python 3 (ipykernel)",
26 |    "language": "python",
27 |    "name": "python3"
28 |   },
29 |   "language_info": {
30 |    "codemirror_mode": {
31 |     "name": "ipython",
32 |     "version": 3
33 |    },
34 |    "file_extension": ".py",
35 |    "mimetype": "text/x-python",
36 |    "name": "python",
37 |    "nbconvert_exporter": "python",
38 |    "pygments_lexer": "ipython3",
39 |    "version": "3.9.7"
40 |   }
41 |  },
42 |  "nbformat": 4,
43 |  "nbformat_minor": 5
44 | }
45 | 


--------------------------------------------------------------------------------
/Section 32 - Data Copier - Getting Started/Manual/08_Setup_Project_using_PyCharm.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "791a44ca",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Setup Project using PyCharm\n",
 9 |     "Let us setup project using PyCharm. I will be using PyCharm Enterprise Edition. However, you can use Community Edition as well.\n",
10 |     "* Create New Project by name **data-copier**.\n",
11 |     "* Make sure virtual environment is created with name **dc-venv**. It will make sure we are in appropriate virtual environment related to our project.\n",
12 |     "* Create a program by name **app.py**.\n",
13 |     "* Add below code to it and validate using PyCharm.\n",
14 |     "```python\n",
15 |     "def main():\n",
16 |     "    print(\"Hello World!\")\n",
17 |     "\n",
18 |     "\n",
19 |     "if __name__ == \"__main__\":\n",
20 |     "    main()\n",
21 |     "```\n"
22 |    ]
23 |   }
24 |  ],
25 |  "metadata": {
26 |   "kernelspec": {
27 |    "display_name": "Python 3 (ipykernel)",
28 |    "language": "python",
29 |    "name": "python3"
30 |   },
31 |   "language_info": {
32 |    "codemirror_mode": {
33 |     "name": "ipython",
34 |     "version": 3
35 |    },
36 |    "file_extension": ".py",
37 |    "mimetype": "text/x-python",
38 |    "name": "python",
39 |    "nbconvert_exporter": "python",
40 |    "pygments_lexer": "ipython3",
41 |    "version": "3.9.7"
42 |   }
43 |  },
44 |  "nbformat": 4,
45 |  "nbformat_minor": 5
46 | }
47 | 


--------------------------------------------------------------------------------
/Section 33 - Data Copier - Reading Data using Pandas/01_Reading_Data_using_Pandas_Introduction.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "56f4418c",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Reading Data\n",
 9 |     "As part of this section we will primarily see how to read the JSON data from files using Pandas.\n",
10 |     "\n",
11 |     "* Overview of Retail Data\n",
12 |     "* Adding Pandas to the project\n",
13 |     "* Reading Data using Pandas\n",
14 |     "* Previewing Data using Pandas\n",
15 |     "* Reading Data in Chunks\n",
16 |     "* Dynamically read files\n",
17 |     "* Create read program"
18 |    ]
19 |   }
20 |  ],
21 |  "metadata": {
22 |   "kernelspec": {
23 |    "display_name": "Python 3 (ipykernel)",
24 |    "language": "python",
25 |    "name": "python3"
26 |   },
27 |   "language_info": {
28 |    "codemirror_mode": {
29 |     "name": "ipython",
30 |     "version": 3
31 |    },
32 |    "file_extension": ".py",
33 |    "mimetype": "text/x-python",
34 |    "name": "python",
35 |    "nbconvert_exporter": "python",
36 |    "pygments_lexer": "ipython3",
37 |    "version": "3.9.7"
38 |   }
39 |  },
40 |  "nbformat": 4,
41 |  "nbformat_minor": 5
42 | }
43 | 


--------------------------------------------------------------------------------
/Section 33 - Data Copier - Reading Data using Pandas/03_Adding_Pandas_to_Project.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "16dfb101",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Adding Pandas to the project\n",
 9 |     "We will be using Pandas to read the data from JSON files. Let us see how we can install and quickly validate.\n",
10 |     "* Update **requirements.txt** with Pandas 1.3.2.\n",
11 |     "```\n",
12 |     "pandas==1.3.2\n",
13 |     "```\n",
14 |     "* Run `pip install -r requirements.txt` to ensure Pandas is installed as part of our project.\n",
15 |     "* We can quickly validate by running this piece of code. This code will convert list of dicts to Pandas Data Frame.\n",
16 |     "\n",
17 |     "```python\n",
18 |     "import pandas as pd\n",
19 |     "users = [\n",
20 |     "    {'user_id': 1, 'user_name': 'Scott, Tiger'},\n",
21 |     "    {'user_id': 2, 'user_name': 'Donald, Duck'}\n",
22 |     "]\n",
23 |     "users_df = pd.DataFrame(users)\n",
24 |     "users_df\n",
25 |     "```\n"
26 |    ]
27 |   }
28 |  ],
29 |  "metadata": {
30 |   "kernelspec": {
31 |    "display_name": "Python 3 (ipykernel)",
32 |    "language": "python",
33 |    "name": "python3"
34 |   },
35 |   "language_info": {
36 |    "codemirror_mode": {
37 |     "name": "ipython",
38 |     "version": 3
39 |    },
40 |    "file_extension": ".py",
41 |    "mimetype": "text/x-python",
42 |    "name": "python",
43 |    "nbconvert_exporter": "python",
44 |    "pygments_lexer": "ipython3",
45 |    "version": "3.9.7"
46 |   }
47 |  },
48 |  "nbformat": 4,
49 |  "nbformat_minor": 5
50 | }
51 | 


--------------------------------------------------------------------------------
/Section 33 - Data Copier - Reading Data using Pandas/06_Reading_Data_in Chunks_using_Pandas.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6cc8c0b4",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Reading Data in Chunks using Pandas\n",
 9 |     "When size of data is huge, it is recommended to read the data in chunks, then process and then to write to the target. Let us understand how to read the data from JSON files using Pandas in chunks.\n",
10 |     "\n",
11 |     "```python\n",
12 |     "fp = '/Users/itversity/Projects/Internal/bootcamp/data-copier/data/retail_db_json/order_items/part-r-00000-6b83977e-3f20-404b-9b5f-29376ab1419e'\n",
13 |     "\n",
14 |     "import pandas as pd\n",
15 |     "\n",
16 |     "# Here is the piece of code to read the content of the file as reader.\n",
17 |     "json_reader = pd.read_json(fp, lines=True, chunksize=1000)\n",
18 |     "\n",
19 |     "# Here is the piece of code to read each chunk as Dataframe.\n",
20 |     "for idx, df in enumerate(json_reader):\n",
21 |     "  print(f'Number of records in chunk with index {idx} is {df.shape[0]}')\n",
22 |     "```\n"
23 |    ]
24 |   }
25 |  ],
26 |  "metadata": {
27 |   "kernelspec": {
28 |    "display_name": "Python 3 (ipykernel)",
29 |    "language": "python",
30 |    "name": "python3"
31 |   },
32 |   "language_info": {
33 |    "codemirror_mode": {
34 |     "name": "ipython",
35 |     "version": 3
36 |    },
37 |    "file_extension": ".py",
38 |    "mimetype": "text/x-python",
39 |    "name": "python",
40 |    "nbconvert_exporter": "python",
41 |    "pygments_lexer": "ipython3",
42 |    "version": "3.9.7"
43 |   }
44 |  },
45 |  "nbformat": 4,
46 |  "nbformat_minor": 5
47 | }
48 | 


--------------------------------------------------------------------------------
/Section 34 - Data Copier - Database Programming using Pandas/01_Database_Programming_using_Pandas.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "bf65203f",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Database Programming using Pandas\n",
 9 |     "As part of this section, we will see how to write data to Postgres Database Tables using Pandas. Here are the topics which we are going to cover.\n",
10 |     "\n",
11 |     "* Validate Postgres Setup using Docker\n",
12 |     "* Add required dependencies\n",
13 |     "* Create users table in retail_db Database\n",
14 |     "* Populate sample data into users table\n",
15 |     "* Reading data from table using Pandas\n",
16 |     "* Truncate users table\n",
17 |     "* Writing Pandas Dataframe to table\n",
18 |     "* Validating users data\n",
19 |     "* Dropping users table"
20 |    ]
21 |   }
22 |  ],
23 |  "metadata": {
24 |   "kernelspec": {
25 |    "display_name": "Python 3 (ipykernel)",
26 |    "language": "python",
27 |    "name": "python3"
28 |   },
29 |   "language_info": {
30 |    "codemirror_mode": {
31 |     "name": "ipython",
32 |     "version": 3
33 |    },
34 |    "file_extension": ".py",
35 |    "mimetype": "text/x-python",
36 |    "name": "python",
37 |    "nbconvert_exporter": "python",
38 |    "pygments_lexer": "ipython3",
39 |    "version": "3.9.7"
40 |   }
41 |  },
42 |  "nbformat": 4,
43 |  "nbformat_minor": 5
44 | }
45 | 


--------------------------------------------------------------------------------
/Section 34 - Data Copier - Database Programming using Pandas/02_Validate_Postgres_Setup_using_Docker.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6374d620",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Validate Postgres Setup using Docker\n",
 9 |     "\n",
10 |     "Let us make sure Postgres database is setup.\n",
11 |     "\n",
12 |     "```shell\n",
13 |     "docker ps -a # to list all the containers. We can check status to see if Postgres container is running.\n",
14 |     "\n",
15 |     "# Run the below command to start the postgres container if it is stopped\n",
16 |     "docker start retail_pg\n",
17 |     "\n",
18 |     "# Validate connectivity to the database in Postgres Server\n",
19 |     "docker exec -it retail_pg psql -U retail_user -d retail_db -W\n",
20 |     "```\n",
21 |     "\n",
22 |     "We can also run below commands to validate the tables in the database.\n"
23 |    ]
24 |   }
25 |  ],
26 |  "metadata": {
27 |   "kernelspec": {
28 |    "display_name": "Python 3 (ipykernel)",
29 |    "language": "python",
30 |    "name": "python3"
31 |   },
32 |   "language_info": {
33 |    "codemirror_mode": {
34 |     "name": "ipython",
35 |     "version": 3
36 |    },
37 |    "file_extension": ".py",
38 |    "mimetype": "text/x-python",
39 |    "name": "python",
40 |    "nbconvert_exporter": "python",
41 |    "pygments_lexer": "ipython3",
42 |    "version": "3.9.7"
43 |   }
44 |  },
45 |  "nbformat": 4,
46 |  "nbformat_minor": 5
47 | }
48 | 


--------------------------------------------------------------------------------
/Section 34 - Data Copier - Database Programming using Pandas/03_Add_Required_Dependencies.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "c0efba9a",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Add required dependencies\n",
 9 |     "\n",
10 |     "Let us add required dependencies to connect to Postgres database and then perform read as well as write operations against Postgres database tables using Pandas.\n",
11 |     "* `psycopg2` - Database driver to connect to Postgres Database.\n",
12 |     "* `SQLAlchemy` - ORM Wrapper on top of `psycopg2` and other database specific drivers.\n",
13 |     "* Pandas Database related functions uses SQL Alchemy and hence we need to install SQL Alchemy as well.\n",
14 |     "\n",
15 |     "Here is the content of **requirements.txt** file.\n",
16 |     "\n",
17 |     "```text\n",
18 |     "psycopg2-binary==2.8.5\n",
19 |     "SQLAlchemy==1.3.17\n",
20 |     "pandas==1.3.2\n",
21 |     "```\n"
22 |    ]
23 |   }
24 |  ],
25 |  "metadata": {
26 |   "kernelspec": {
27 |    "display_name": "Python 3 (ipykernel)",
28 |    "language": "python",
29 |    "name": "python3"
30 |   },
31 |   "language_info": {
32 |    "codemirror_mode": {
33 |     "name": "ipython",
34 |     "version": 3
35 |    },
36 |    "file_extension": ".py",
37 |    "mimetype": "text/x-python",
38 |    "name": "python",
39 |    "nbconvert_exporter": "python",
40 |    "pygments_lexer": "ipython3",
41 |    "version": "3.9.7"
42 |   }
43 |  },
44 |  "nbformat": 4,
45 |  "nbformat_minor": 5
46 | }
47 | 


--------------------------------------------------------------------------------
/Section 34 - Data Copier - Database Programming using Pandas/04_Create_Users_Table_in_retail_db_Database.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "a30eb05b",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Create users table in retail_db Database\n",
 9 |     "\n",
10 |     "Let us go ahead and create a new table in retail_db database. We will use this new table to understand how to use Pandas to read the data from database tables as well as to write data to database tables.\n",
11 |     "\n",
12 |     "```shell\n",
13 |     "docker exec -it retail_pg psql -U retail_user -d retail_db -W\n",
14 |     "```\n",
15 |     "\n",
16 |     "```sql\n",
17 |     "CREATE TABLE users (\n",
18 |     "    user_id SERIAL PRIMARY KEY,\n",
19 |     "    user_first_name VARCHAR(30),\n",
20 |     "    user_last_name VARCHAR(30)\n",
21 |     ");\n",
22 |     "```"
23 |    ]
24 |   }
25 |  ],
26 |  "metadata": {
27 |   "kernelspec": {
28 |    "display_name": "Python 3 (ipykernel)",
29 |    "language": "python",
30 |    "name": "python3"
31 |   },
32 |   "language_info": {
33 |    "codemirror_mode": {
34 |     "name": "ipython",
35 |     "version": 3
36 |    },
37 |    "file_extension": ".py",
38 |    "mimetype": "text/x-python",
39 |    "name": "python",
40 |    "nbconvert_exporter": "python",
41 |    "pygments_lexer": "ipython3",
42 |    "version": "3.9.7"
43 |   }
44 |  },
45 |  "nbformat": 4,
46 |  "nbformat_minor": 5
47 | }
48 | 


--------------------------------------------------------------------------------
/Section 34 - Data Copier - Database Programming using Pandas/05_Populating_Sample_Data_into_Users_Table.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "2a7ea35e",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Populating Sample Data into users table\n",
 9 |     "\n",
10 |     "Let us connect to the postgres database retail_db and populate the table using `INSERT` command. We will insert multiple records using one `INSERT` statement.\n",
11 |     "\n",
12 |     "* Connect to the database\n",
13 |     "\n",
14 |     "```shell\n",
15 |     "docker exec -it retail_pg psql -U retail_user -d retail_db -W\n",
16 |     "```\n",
17 |     "\n",
18 |     "* Insert data into the table\n",
19 |     "\n",
20 |     "```sql\n",
21 |     "INSERT INTO users (user_first_name, user_last_name)\n",
22 |     "    VALUES ('Scott', 'Tiger'),\n",
23 |     "    ('Donald', 'Duck');\n",
24 |     "\n",
25 |     "SELECT * FROM users;\n",
26 |     "```\n"
27 |    ]
28 |   }
29 |  ],
30 |  "metadata": {
31 |   "kernelspec": {
32 |    "display_name": "Python 3 (ipykernel)",
33 |    "language": "python",
34 |    "name": "python3"
35 |   },
36 |   "language_info": {
37 |    "codemirror_mode": {
38 |     "name": "ipython",
39 |     "version": 3
40 |    },
41 |    "file_extension": ".py",
42 |    "mimetype": "text/x-python",
43 |    "name": "python",
44 |    "nbconvert_exporter": "python",
45 |    "pygments_lexer": "ipython3",
46 |    "version": "3.9.7"
47 |   }
48 |  },
49 |  "nbformat": 4,
50 |  "nbformat_minor": 5
51 | }
52 | 


--------------------------------------------------------------------------------
/Section 34 - Data Copier - Database Programming using Pandas/06_Reading_Data_from_Table_using_Pandas.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "16a821bd",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Reading data from table using Pandas\n",
 9 |     "\n",
10 |     "As we have inserted couple of records into the table let us understand how to read data from Postgres Database table using Pandas.\n",
11 |     "\n",
12 |     "* We can use Pandas `read_sql` to read the data from database table by passing query string and connection string as arguments.\n",
13 |     "* It will result in a Dataframe.\n",
14 |     "```python\n",
15 |     "import pandas as pd\n",
16 |     "query = 'SELECT * FROM users'\n",
17 |     "conn = 'postgresql://retail_user:itversity@localhost:5452/retail_db'\n",
18 |     "df = pd.read_sql(\n",
19 |     "    query,\n",
20 |     "    conn\n",
21 |     ")\n",
22 |     "\n",
23 |     "df\n",
24 |     "\n",
25 |     "df.count()\n",
26 |     "```\n"
27 |    ]
28 |   }
29 |  ],
30 |  "metadata": {
31 |   "kernelspec": {
32 |    "display_name": "Python 3 (ipykernel)",
33 |    "language": "python",
34 |    "name": "python3"
35 |   },
36 |   "language_info": {
37 |    "codemirror_mode": {
38 |     "name": "ipython",
39 |     "version": 3
40 |    },
41 |    "file_extension": ".py",
42 |    "mimetype": "text/x-python",
43 |    "name": "python",
44 |    "nbconvert_exporter": "python",
45 |    "pygments_lexer": "ipython3",
46 |    "version": "3.9.7"
47 |   }
48 |  },
49 |  "nbformat": 4,
50 |  "nbformat_minor": 5
51 | }
52 | 


--------------------------------------------------------------------------------
/Section 34 - Data Copier - Database Programming using Pandas/07_Truncate_Users_Table.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "87ed85c1",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Truncate users table\n",
 9 |     "\n",
10 |     "As we would like to populate the table using Python Pandas let's truncate the data which is currently populated in the table. We can use `TRUNCATE` statement to clean up the table completely.\n",
11 |     "\n",
12 |     "* Connect to the database.\n",
13 |     "\n",
14 |     "```shell\n",
15 |     "docker exec -it retail psql -U retail_user -d retail_db\n",
16 |     "```\n",
17 |     "\n",
18 |     "* Truncate the table using `TRUNCATE` Statement. `TRUNCATE` is DDL Statement.\n",
19 |     "\n",
20 |     "```sql\n",
21 |     "TRUNCATE TABLE users;\n",
22 |     "\\q\n",
23 |     "```"
24 |    ]
25 |   }
26 |  ],
27 |  "metadata": {
28 |   "kernelspec": {
29 |    "display_name": "Python 3 (ipykernel)",
30 |    "language": "python",
31 |    "name": "python3"
32 |   },
33 |   "language_info": {
34 |    "codemirror_mode": {
35 |     "name": "ipython",
36 |     "version": 3
37 |    },
38 |    "file_extension": ".py",
39 |    "mimetype": "text/x-python",
40 |    "name": "python",
41 |    "nbconvert_exporter": "python",
42 |    "pygments_lexer": "ipython3",
43 |    "version": "3.9.7"
44 |   }
45 |  },
46 |  "nbformat": 4,
47 |  "nbformat_minor": 5
48 | }
49 | 


--------------------------------------------------------------------------------
/Section 34 - Data Copier - Database Programming using Pandas/09_Validating_Users_Data.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "9e61abbc",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Validating users data\n",
 9 |     "\n",
10 |     "Let us also validate by running query by connecting to the database to confirm data is populated in the correct database table.\n",
11 |     "\n",
12 |     "```shell\n",
13 |     "docker exec -it retail_pg psql -U retail_user -d retail_db -W\n",
14 |     "```\n",
15 |     "\n",
16 |     "```sql\n",
17 |     "SELECT * FROM users;\n",
18 |     "\\q\n",
19 |     "```\n"
20 |    ]
21 |   }
22 |  ],
23 |  "metadata": {
24 |   "kernelspec": {
25 |    "display_name": "Python 3 (ipykernel)",
26 |    "language": "python",
27 |    "name": "python3"
28 |   },
29 |   "language_info": {
30 |    "codemirror_mode": {
31 |     "name": "ipython",
32 |     "version": 3
33 |    },
34 |    "file_extension": ".py",
35 |    "mimetype": "text/x-python",
36 |    "name": "python",
37 |    "nbconvert_exporter": "python",
38 |    "pygments_lexer": "ipython3",
39 |    "version": "3.9.7"
40 |   }
41 |  },
42 |  "nbformat": 4,
43 |  "nbformat_minor": 5
44 | }
45 | 


--------------------------------------------------------------------------------
/Section 34 - Data Copier - Database Programming using Pandas/10_Dropping_Users_Table.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "2d75d49d",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Dropping users table\n",
 9 |     "\n",
10 |     "As we have gone through the steps related to reading from database table as well as writing to database table using Pandas, let us clean up the table by dropping it.\n",
11 |     "\n",
12 |     "```shell\n",
13 |     "docker exec -it retail_pg psql -U retail_user -d retail_db -W\n",
14 |     "```\n",
15 |     "\n",
16 |     "```sql\n",
17 |     "DROP TABLE users;\n",
18 |     "\\q\n",
19 |     "```"
20 |    ]
21 |   }
22 |  ],
23 |  "metadata": {
24 |   "kernelspec": {
25 |    "display_name": "Python 3 (ipykernel)",
26 |    "language": "python",
27 |    "name": "python3"
28 |   },
29 |   "language_info": {
30 |    "codemirror_mode": {
31 |     "name": "ipython",
32 |     "version": 3
33 |    },
34 |    "file_extension": ".py",
35 |    "mimetype": "text/x-python",
36 |    "name": "python",
37 |    "nbconvert_exporter": "python",
38 |    "pygments_lexer": "ipython3",
39 |    "version": "3.9.7"
40 |   }
41 |  },
42 |  "nbformat": 4,
43 |  "nbformat_minor": 5
44 | }
45 | 


--------------------------------------------------------------------------------
/Section 35 - Data Copier - Loading Data from Files to Tables/01_Loading_Data_from_Files_to_Tables.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "cd458545",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Loading Data from files to tables\n",
 9 |     "\n",
10 |     "As part of this section let us get into the details about loading data from files into database tables using Pandas.\n",
11 |     "\n",
12 |     "* Populating departments table\n",
13 |     "* Validate departments table\n",
14 |     "* Populating orders table\n",
15 |     "* Validate orders table in database\n",
16 |     "* Validate orders table using Pandas"
17 |    ]
18 |   }
19 |  ],
20 |  "metadata": {
21 |   "kernelspec": {
22 |    "display_name": "Python 3 (ipykernel)",
23 |    "language": "python",
24 |    "name": "python3"
25 |   },
26 |   "language_info": {
27 |    "codemirror_mode": {
28 |     "name": "ipython",
29 |     "version": 3
30 |    },
31 |    "file_extension": ".py",
32 |    "mimetype": "text/x-python",
33 |    "name": "python",
34 |    "nbconvert_exporter": "python",
35 |    "pygments_lexer": "ipython3",
36 |    "version": "3.9.7"
37 |   }
38 |  },
39 |  "nbformat": 4,
40 |  "nbformat_minor": 5
41 | }
42 | 


--------------------------------------------------------------------------------
/Section 35 - Data Copier - Loading Data from Files to Tables/03_Validate_Departments_Table.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "c94c9918",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Validate departments table\n",
 9 |     "\n",
10 |     "As the table is populated, let us see if the data is now available in the table.\n",
11 |     "\n",
12 |     "```python\n",
13 |     "import pandas as pd\n",
14 |     "query = 'SELECT * FROM departments'\n",
15 |     "conn = 'postgresql://retail_user:itversity@localhost:5452/retail_db'\n",
16 |     "df = pd.read_sql(\n",
17 |     "    query,\n",
18 |     "    conn\n",
19 |     ")\n",
20 |     "\n",
21 |     "df\n",
22 |     "\n",
23 |     "df.count()\n",
24 |     "\n",
25 |     "pd.read_sql(\n",
26 |     "\t'SELECT count(1) FROM departments',\n",
27 |     "\tconn\n",
28 |     ")\n",
29 |     "```\n"
30 |    ]
31 |   }
32 |  ],
33 |  "metadata": {
34 |   "kernelspec": {
35 |    "display_name": "Python 3 (ipykernel)",
36 |    "language": "python",
37 |    "name": "python3"
38 |   },
39 |   "language_info": {
40 |    "codemirror_mode": {
41 |     "name": "ipython",
42 |     "version": 3
43 |    },
44 |    "file_extension": ".py",
45 |    "mimetype": "text/x-python",
46 |    "name": "python",
47 |    "nbconvert_exporter": "python",
48 |    "pygments_lexer": "ipython3",
49 |    "version": "3.9.7"
50 |   }
51 |  },
52 |  "nbformat": 4,
53 |  "nbformat_minor": 5
54 | }
55 | 


--------------------------------------------------------------------------------
/Section 35 - Data Copier - Loading Data from Files to Tables/05_Validate_Orders_Table_in_Database.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "fd5ec1b7",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Validate orders table in database\n",
 9 |     "\n",
10 |     "As data is loaded into the table using Pandas, let us connect to the database and validate to see if the data is populated into the right table as per our expectations.\n",
11 |     "\n",
12 |     "* Connecting to the database.\n",
13 |     "\n",
14 |     "```shell\n",
15 |     "docker exec -it retail_pg psql -U retail_user -d retail_db -W\n",
16 |     "```\n",
17 |     "\n",
18 |     "* Running queries to validate data in the table.\n",
19 |     "```sql\n",
20 |     "\\d orders\n",
21 |     "\n",
22 |     "SELECT * FROM orders LIMIT 10;\n",
23 |     "\n",
24 |     "SELECT count(*) FROM orders;\n",
25 |     "\n",
26 |     "SELECT order_status, count(1) FROM orders GROUP BY order_status;\n",
27 |     "```\n"
28 |    ]
29 |   }
30 |  ],
31 |  "metadata": {
32 |   "kernelspec": {
33 |    "display_name": "Python 3 (ipykernel)",
34 |    "language": "python",
35 |    "name": "python3"
36 |   },
37 |   "language_info": {
38 |    "codemirror_mode": {
39 |     "name": "ipython",
40 |     "version": 3
41 |    },
42 |    "file_extension": ".py",
43 |    "mimetype": "text/x-python",
44 |    "name": "python",
45 |    "nbconvert_exporter": "python",
46 |    "pygments_lexer": "ipython3",
47 |    "version": "3.9.7"
48 |   }
49 |  },
50 |  "nbformat": 4,
51 |  "nbformat_minor": 5
52 | }
53 | 


--------------------------------------------------------------------------------
/Section 36 - Data Copier - Modularizing the Application/Manual/01_Modularizing_Data_Copier.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "92a78adb",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Modularizing Data Copier\n",
 9 |     "\n",
10 |     "As part of this section we will see how to modularize the code so that it is easily manageable.\n",
11 |     "\n",
12 |     "* Overview of main function\n",
13 |     "* Overview of Environment Variables\n",
14 |     "* Using os module for Environment Variables\n",
15 |     "* Passing Environment Variables using Pycharm\n",
16 |     "* Read logic using Pandas\n",
17 |     "* Validate read logic\n",
18 |     "* Write logic using Pandas\n",
19 |     "* Validate write logic\n",
20 |     "* Create Driver Program\n",
21 |     "* Validate Driver Program\n",
22 |     "* Validate Application using Pycharm\n",
23 |     "* Validate Application using Terminal\n",
24 |     "* Passing Table List as argument\n",
25 |     "* Passing Table List using file"
26 |    ]
27 |   }
28 |  ],
29 |  "metadata": {
30 |   "kernelspec": {
31 |    "display_name": "Python 3 (ipykernel)",
32 |    "language": "python",
33 |    "name": "python3"
34 |   },
35 |   "language_info": {
36 |    "codemirror_mode": {
37 |     "name": "ipython",
38 |     "version": 3
39 |    },
40 |    "file_extension": ".py",
41 |    "mimetype": "text/x-python",
42 |    "name": "python",
43 |    "nbconvert_exporter": "python",
44 |    "pygments_lexer": "ipython3",
45 |    "version": "3.9.7"
46 |   }
47 |  },
48 |  "nbformat": 4,
49 |  "nbformat_minor": 5
50 | }
51 | 


--------------------------------------------------------------------------------
/Section 36 - Data Copier - Modularizing the Application/Manual/05_Passing_Environment_Variables_using_Pycharm.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "58ea3f35",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Passing Environment Variables using Pycharm\n",
 9 |     "\n",
10 |     "Let us understand how we can pass environment variables using Pycharm. We will use below logic as part of `app.py` to understand how to access environment variables using Pycharm\n",
11 |     "\n",
12 |     "```python\n",
13 |     "import os\n",
14 |     "\n",
15 |     "\n",
16 |     "def main():\n",
17 |     "    DB_NAME = os.environ.get('DB_NAME')\n",
18 |     "    print(f'Hello World from {DB_NAME}')\n",
19 |     "```\n",
20 |     "\n",
21 |     "* Now we can run the application and then go to **Run -> \"Edit Configurations\"**.\n",
22 |     "* We can set the environment variables as part of the popup as demonstrated.\n",
23 |     "* We can also validate using Terminal by setting the environment variable and then by running the program using `python` command.\n"
24 |    ]
25 |   }
26 |  ],
27 |  "metadata": {
28 |   "kernelspec": {
29 |    "display_name": "Python 3 (ipykernel)",
30 |    "language": "python",
31 |    "name": "python3"
32 |   },
33 |   "language_info": {
34 |    "codemirror_mode": {
35 |     "name": "ipython",
36 |     "version": 3
37 |    },
38 |    "file_extension": ".py",
39 |    "mimetype": "text/x-python",
40 |    "name": "python",
41 |    "nbconvert_exporter": "python",
42 |    "pygments_lexer": "ipython3",
43 |    "version": "3.9.7"
44 |   }
45 |  },
46 |  "nbformat": 4,
47 |  "nbformat_minor": 5
48 | }
49 | 


--------------------------------------------------------------------------------
/Section 36 - Data Copier - Modularizing the Application/Manual/07_Validate_Read_Logic.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "5b8e1320",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Validate read logic\n",
 9 |     "\n",
10 |     "Let us validate the read logic that is developed as part of **read.py** program.\n",
11 |     "* We can validate either by using Pycharm Run option or using Python CLI.\n",
12 |     "* Here are the steps that are involved using Pycharm Run option.\n",
13 |     "  * Run the program and ignore any errors. It will create application under **Edit Configurations**.\n",
14 |     "  * Go to **Edit Configurations** and set required environment variables - `BASE_DIR` and `TABLE_NAME`.\n",
15 |     "  * Run the application and see the output.\n",
16 |     "* Here are the steps that are involved using Python CLI (under Terminal).\n",
17 |     "  * Launch Terminal within Pycharm.\n",
18 |     "  * Set environment variables. We can use `export` on Linux or Mac and `$env` on Windows to set the environment variables.\n",
19 |     "  * Run using `python app.py` and review the output."
20 |    ]
21 |   }
22 |  ],
23 |  "metadata": {
24 |   "kernelspec": {
25 |    "display_name": "Python 3 (ipykernel)",
26 |    "language": "python",
27 |    "name": "python3"
28 |   },
29 |   "language_info": {
30 |    "codemirror_mode": {
31 |     "name": "ipython",
32 |     "version": 3
33 |    },
34 |    "file_extension": ".py",
35 |    "mimetype": "text/x-python",
36 |    "name": "python",
37 |    "nbconvert_exporter": "python",
38 |    "pygments_lexer": "ipython3",
39 |    "version": "3.9.7"
40 |   }
41 |  },
42 |  "nbformat": 4,
43 |  "nbformat_minor": 5
44 | }
45 | 


--------------------------------------------------------------------------------
/Section 36 - Data Copier - Modularizing the Application/Manual/11_Validate_Integration_Logic.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "2aeba967",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Validate Integration logic\n",
 9 |     "\n",
10 |     "As we are done with integration of read and write logic as part of driver program, let us go ahead and validate.\n",
11 |     "* As we have seen before you can either validate using Pycharm run with proper configurations or Python command using CLI.\n",
12 |     "* We just need to set all required environment variables and run the driver program.\n",
13 |     "* Also, we need to validate by running queries against the table by connecting to the database.\n",
14 |     "* Let us get into the validation using Pycharm Run using configurations."
15 |    ]
16 |   }
17 |  ],
18 |  "metadata": {
19 |   "kernelspec": {
20 |    "display_name": "Python 3 (ipykernel)",
21 |    "language": "python",
22 |    "name": "python3"
23 |   },
24 |   "language_info": {
25 |    "codemirror_mode": {
26 |     "name": "ipython",
27 |     "version": 3
28 |    },
29 |    "file_extension": ".py",
30 |    "mimetype": "text/x-python",
31 |    "name": "python",
32 |    "nbconvert_exporter": "python",
33 |    "pygments_lexer": "ipython3",
34 |    "version": "3.10.2"
35 |   }
36 |  },
37 |  "nbformat": 4,
38 |  "nbformat_minor": 5
39 | }
40 | 


--------------------------------------------------------------------------------
/Section 36 - Data Copier - Modularizing the Application/Manual/12_Develop_Logic_for_ultiple_Tables.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "4c72da13",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Develop logic for multiple tables\n",
 9 |     "\n",
10 |     "Let us go through the details of developing the logic to load the data from files into respective tables where we need to pass list of tables as argument.\n",
11 |     "* We will pass list of tables as part of comma separated string.\n",
12 |     "* For now we will pass the comma separated string as run time argument. It can be accessed using `sys` module.\n",
13 |     "* Once the list of tables is passed as comma separated string, we need to do the following:\n",
14 |     "  * Split the comma separated string into a list of tables.\n",
15 |     "  * Iterate through the list and invoke the logic to read from file and write to the table for each item in the list.\n",
16 |     "  * We will also modularize to make sure we have a function which will read and write for a table which is passed as argument.\n"
17 |    ]
18 |   }
19 |  ],
20 |  "metadata": {
21 |   "kernelspec": {
22 |    "display_name": "Python 3 (ipykernel)",
23 |    "language": "python",
24 |    "name": "python3"
25 |   },
26 |   "language_info": {
27 |    "codemirror_mode": {
28 |     "name": "ipython",
29 |     "version": 3
30 |    },
31 |    "file_extension": ".py",
32 |    "mimetype": "text/x-python",
33 |    "name": "python",
34 |    "nbconvert_exporter": "python",
35 |    "pygments_lexer": "ipython3",
36 |    "version": "3.10.2"
37 |   }
38 |  },
39 |  "nbformat": 4,
40 |  "nbformat_minor": 5
41 | }
42 | 


--------------------------------------------------------------------------------
/Section 36 - Data Copier - Modularizing the Application/Python Code/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import click
 3 | from read import get_json_reader
 4 | from write import load_db_table
 5 | 
 6 | 
 7 | def process_table(BASE_DIR, conn, table_name):
 8 |     json_reader = get_json_reader(BASE_DIR, table_name)
 9 |     for df in json_reader:
10 |         load_db_table(df, conn, table_name, df.columns[0])
11 | 
12 | 
13 | @click.command()
14 | @click.option('--tables', help='Provide comma separated table list')
15 | def main(tables):
16 |     BASE_DIR = os.environ.get('BASE_DIR')
17 |     if tables:
18 |         table_list = tables.split(',')
19 | 
20 |     configs = dict(os.environ.items())
21 |     conn = f'postgresql://{configs["DB_USER"]}:{configs["DB_PASS"]}@{configs["DB_HOST"]}:{configs["DB_PORT"]}/{configs["DB_NAME"]}'
22 |     for table_name in table_list:
23 |         process_table(BASE_DIR, conn, table_name)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/Section 36 - Data Copier - Modularizing the Application/Python Code/read.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def get_json_reader(BASE_DIR, table_name, chunksize=1000):
 6 |     file_name = os.listdir(f'{BASE_DIR}/{table_name}')[0]
 7 |     fp = f'{BASE_DIR}/{table_name}/{file_name}'
 8 |     return pd.read_json(fp, lines=True, chunksize=chunksize)
 9 | 
10 | 
11 | if __name__ == '__main__':
12 |     BASE_DIR = os.environ.get('BASE_DIR')
13 |     table_name = os.environ.get('TABLE_NAME')
14 |     json_reader = get_json_reader(BASE_DIR, table_name)
15 |     for idx, df in enumerate(json_reader):
16 |         print(f'Number of records in chunk with index {idx} is {df.shape[0]}')
17 | 


--------------------------------------------------------------------------------
/Section 36 - Data Copier - Modularizing the Application/Python Code/requirements.txt:
--------------------------------------------------------------------------------
1 | psycopg2-binary==2.8.5
2 | SQLAlchemy==1.3.17
3 | pandas==1.3.2
4 | 


--------------------------------------------------------------------------------
/Section 36 - Data Copier - Modularizing the Application/Python Code/write.py:
--------------------------------------------------------------------------------
 1 | def load_db_table(df, conn, table_name, key):
 2 |     min_key = df[key].min()
 3 |     max_key = df[key].max()
 4 |     df.to_sql(table_name, conn, if_exists='append', index=False)
 5 |     print(f'Loaded data for {table_name} with in the range of {min_key} and {max_key}')
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     import pandas as pd
10 |     import os
11 | 
12 |     data = [
13 |         {'user_id': 1, 'user_first_name': 'Scott', 'user_last_name': 'Tiger'},
14 |         {'user_id': 2, 'user_first_name': 'Donald', 'user_last_name': 'Duck'}
15 |     ]
16 |     df = pd.DataFrame(data)
17 |     configs = dict(os.environ.items())
18 |     conn = f'postgresql://{configs["DB_USER"]}:{configs["DB_PASS"]}@{configs["DB_HOST"]}:{configs["DB_PORT"]}/{configs["DB_NAME"]}'
19 | 
20 |     load_db_table(df, conn, 'users', 'user_id')
21 | 


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/01_Prepare_database_for_validation.txt:
--------------------------------------------------------------------------------
 1 | docker exec it retail_pg psql -U retail_user -d retail_db -w
 2 | 
 3 | docker ps
 4 | 
 5 | # Comandos dentro do banco de dados
 6 | DROP TABLE users;
 7 | 
 8 | TRUNCATE TABLE departments;
 9 | 
10 | TRUNCATE TABLE categories;
11 | 
12 | TRUNCATE TABLE products;
13 | 
14 | TRUNCATE TABLE customers;
15 | 
16 | TRUNCATE TABLE orders;
17 | 
18 | TRUNCATE TABLE order_items;


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/02_Pull_and_validate_python_image.txt:
--------------------------------------------------------------------------------
 1 | docker pull python
 2 | 
 3 | docker run -it --name validate python bash
 4 | 
 5 | python
 6 | 
 7 | exit()
 8 | 
 9 | docker ps -a
10 | 
11 | docker rm validate
12 | 
13 | docker rmi python
14 | 
15 | docker pull python:3.7
16 | 
17 | docker run -it --name validate python:3.7 bash
18 | 
19 | python
20 | 
21 | exit()
22 | 
23 | exit
24 | 
25 | docker ps
26 | 
27 | docker ps -a
28 | 
29 | docker rm validate
30 | 
31 | docker images


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/03_Create_and_attach_network_to_database.txt:
--------------------------------------------------------------------------------
 1 | docker ps
 2 | 
 3 | docker inspect retail_pg
 4 | 
 5 | docker exec -it retail_pg hostname -f
 6 | 
 7 | docker network -h
 8 | 
 9 | docker network ls
10 | 
11 | docker network create data-copier-nw
12 | 
13 | docker network connect -h
14 | 
15 | docker network connect data-copier-nw retail_pg
16 | 
17 | docker inspect retail_pg


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/04_Quick_recap_about_docker_conatiners.txt:
--------------------------------------------------------------------------------
 1 | docker exec -it retail_pg bash
 2 | 
 3 | ls
 4 | 
 5 | uname -a
 6 | 
 7 | psql -U postgres
 8 | 
 9 | \q
10 | 
11 | eit


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/05_Deploying_application_and_installing_docker:
--------------------------------------------------------------------------------
 1 | docker ps -a
 2 | 
 3 | docker images
 4 | 
 5 | docker run -it --name data-copier python:3.7 bash
 6 | 
 7 | python
 8 | 
 9 | exit()
10 | 
11 | pip list
12 | 
13 | docker cp -h
14 | 
15 | docker cp app.py data-copier:/data-copier
16 | 
17 | docker cp read.py data-copier:/data-copier
18 | 
19 | docker cp write.py data-copier:/data-copier
20 | 
21 | docker cp requirements.py data-copier:/data-copier
22 | 
23 | ls -ltr data-copier/
24 | 
25 | pip install -r data-copier/requirements.txt


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/06_Copy_source_data_file_into_container:
--------------------------------------------------------------------------------
1 | docker cp <put file path>
2 | 
3 | ls -ltr
4 | 
5 | ls -ltr retail_db_json


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/07_Add_data_copier_container:
--------------------------------------------------------------------------------
 1 | docker ps
 2 | 
 3 | docker inspect retail_pg
 4 | 
 5 | docker inspect data-copier
 6 | 
 7 | docker network connect data-copier-nw-data-copier
 8 | 
 9 | docker network connect data-copier
10 | 
11 | docker network connect retail_pg


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/08_Installing_os_libraries_docker:
--------------------------------------------------------------------------------
1 | apt install telnet
2 | 
3 | apt updated
4 | 
5 | apt install telnet
6 | 
7 | rm -rf /var/lib/apt/lists/*


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/09_Validate_network_connectivity:
--------------------------------------------------------------------------------
1 | telnet
2 | 
3 | quit
4 | 
5 | docker inspect retail_pg
6 | 
7 | telnet hostname 5432
8 | 
9 | quit


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/10_Running_application_docker:
--------------------------------------------------------------------------------
 1 | ls -ltr retail_db_json/
 2 | 
 3 | ls -ltr data-copier
 4 | 
 5 | pip list
 6 | 
 7 | telnet
 8 | 
 9 | cd data-copier
10 | 
11 | ls -ltr
12 | 
13 | python app.py departments, categories
14 | 
15 | export DB_HOST=
16 | 
17 | export DB_PORT=5432
18 | 
19 | export DB_NAME=
20 | 
21 | export DB_USER=
22 | 
23 | export DB_PASS=
24 | 
25 | export BASE_DIR=/retail_db_json
26 | 
27 | exit
28 | 
29 | docker exec -it retail_db_json -U retail_user -d retail_db -W
30 | 
31 | \d
32 | 
33 | SELECT * FROM departments;
34 | 
35 | SELECT * FROM categories;


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Manual/11_Delete_docker_container:
--------------------------------------------------------------------------------
 1 | docker ps -a
 2 | 
 3 | docker start -h
 4 | 
 5 | docker start -i data-copier
 6 | 
 7 | ls -ltr
 8 | 
 9 | telnet
10 | 
11 | quit
12 | 
13 | docker rm data-copier
14 | 
15 | docker run -it --name=data-copier python:3.7 bash
16 | 
17 | docker rm data-copier


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Python Code/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import click
 3 | from read import get_json_reader
 4 | from write import load_db_table
 5 | 
 6 | 
 7 | def process_table(BASE_DIR, conn, table_name):
 8 |     json_reader = get_json_reader(BASE_DIR, table_name)
 9 |     for df in json_reader:
10 |         load_db_table(df, conn, table_name, df.columns[0])
11 | 
12 | 
13 | @click.command()
14 | @click.option('--tables', help='Provide comma separated table list')
15 | def main(tables):
16 |     BASE_DIR = os.environ.get('BASE_DIR')
17 |     if tables:
18 |         table_list = tables.split(',')
19 | 
20 |     configs = dict(os.environ.items())
21 |     conn = f'postgresql://{configs["DB_USER"]}:{configs["DB_PASS"]}@{configs["DB_HOST"]}:{configs["DB_PORT"]}/{configs["DB_NAME"]}'
22 |     for table_name in table_list:
23 |         process_table(BASE_DIR, conn, table_name)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Python Code/read.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def get_json_reader(BASE_DIR, table_name, chunksize=1000):
 6 |     file_name = os.listdir(f'{BASE_DIR}/{table_name}')[0]
 7 |     fp = f'{BASE_DIR}/{table_name}/{file_name}'
 8 |     return pd.read_json(fp, lines=True, chunksize=chunksize)
 9 | 
10 | 
11 | if __name__ == '__main__':
12 |     BASE_DIR = os.environ.get('BASE_DIR')
13 |     table_name = os.environ.get('TABLE_NAME')
14 |     json_reader = get_json_reader(BASE_DIR, table_name)
15 |     for idx, df in enumerate(json_reader):
16 |         print(f'Number of records in chunk with index {idx} is {df.shape[0]}')
17 | 


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Python Code/requirements.txt:
--------------------------------------------------------------------------------
1 | psycopg2-binary==3.0.8
2 | SQLAlchemy==1.4.31
3 | pandas==2.0.0
4 | 


--------------------------------------------------------------------------------
/Section 37 - Data Copier - Dockerizing the Application/Python Code/write.py:
--------------------------------------------------------------------------------
 1 | def load_db_table(df, conn, table_name, key):
 2 |     min_key = df[key].min()
 3 |     max_key = df[key].max()
 4 |     df.to_sql(table_name, conn, if_exists='append', index=False)
 5 |     print(f'Loaded data for {table_name} with in the range of {min_key} and {max_key}')
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     import pandas as pd
10 |     import os
11 | 
12 |     data = [
13 |         {'user_id': 1, 'user_first_name': 'Scott', 'user_last_name': 'Tiger'},
14 |         {'user_id': 2, 'user_first_name': 'Donald', 'user_last_name': 'Duck'}
15 |     ]
16 |     df = pd.DataFrame(data)
17 |     configs = dict(os.environ.items())
18 |     conn = f'postgresql://{configs["DB_USER"]}:{configs["DB_PASS"]}@{configs["DB_HOST"]}:{configs["DB_PORT"]}/{configs["DB_NAME"]}'
19 | 
20 |     load_db_table(df, conn, 'users', 'user_id')
21 | 


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Manual/01_Getting_started_with_docker_image.txt:
--------------------------------------------------------------------------------
 1 | docker build -t data-copier .
 2 | 
 3 | docker images
 4 | 
 5 | docker run -it --name data-copier data-copier bash
 6 | 
 7 | python
 8 | 
 9 | exit()
10 | 
11 | docker rm data-copier
12 | 
13 | docker rmi data-copier


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Manual/02_Install_OS_modules.txt:
--------------------------------------------------------------------------------
 1 | docker build -t data-copier .
 2 | 
 3 | docker run -it --name data-copier data-copier bash
 4 | 
 5 | telnet
 6 | 
 7 | uit
 8 | 
 9 | quit
10 | 
11 | exit
12 | 
13 | docker ps -a
14 | 
15 | docker images


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Manual/03_Copying_source_code.txt:
--------------------------------------------------------------------------------
 1 | docker build -t data-copier .
 2 | 
 3 | docker run -it --name data-copier -rm data-copier bash
 4 | 
 5 | ls -ltr
 6 | 
 7 | ls -ltr data-copier
 8 | 
 9 | exit
10 | 
11 | docker build -t data-copier .
12 | 
13 | docker images
14 | 
15 | docker run -it --name data-copier -rm data-copier bash
16 | 
17 | ls -ltr
18 | 
19 | ls -ltr data-copier
20 | 
21 | exit
22 | 
23 | docker build -t data-copier .
24 | 
25 | docker images
26 | 
27 | docker run -it --name data-copier -rm data-copier bash
28 | 
29 | ls -ltr
30 | 
31 | ls -ltr data-copier
32 | 
33 | find data-copier/
34 | 
35 | exit
36 | 
37 | docker ps -a


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Manual/04_Adding_dependencies.txt:
--------------------------------------------------------------------------------
 1 | docker build -t data-copier .
 2 | 
 3 | docker images
 4 | 
 5 | docker run -it --name data-copier -rm data-copier bash
 6 | 
 7 | python
 8 | 
 9 | import pandas as pd
10 | 
11 | exit()
12 | 
13 | exit


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Manual/05_Unsderstanding_docker.txt:
--------------------------------------------------------------------------------
1 | docker images
2 | 
3 | docker image prune -a
4 | 
5 | docker images
6 | 
7 | docker build -t data-copier .


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Manual/06_Mounting_data_folders.txt:
--------------------------------------------------------------------------------
 1 | docker run -it --name data-copier --rm -v fold path data-copier bash
 2 | 
 3 | docker run -v -h
 4 | 
 5 | docker run -h
 6 | 
 7 | docker run --help
 8 | 
 9 | docker run -it --name data-copier --rm -v fold path data-copier bash
10 | 
11 | ls -ltr
12 | 
13 | ls -ltr retail_db_json/


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Manual/07_Add_data_copier_container.txt:
--------------------------------------------------------------------------------
1 | docker ps
2 | 
3 | docker ps -a
4 | 
5 | docker inspect retail_db
6 | 
7 | docker container --help
8 | 
9 | docker run container --help


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Manual/08_Run_application_using_docker.txt:
--------------------------------------------------------------------------------
 1 | docker exec -it retail_pg psql -U retail_user -d rentail_db -W
 2 | 
 3 | SELECT count(1) FROM departments;
 4 | 
 5 | SELECT count(1) FROM categories;
 6 | 
 7 | TRUNCATE TABLE departments;
 8 | 
 9 | TRUNCATE TABLE categories;
10 | 
11 | SELECT count(1) FROM departments;
12 | 
13 | SELECT count(1) FROM categories;
14 | 
15 | \q


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Python Code/app/Dockerfile.txt:
--------------------------------------------------------------------------------
 1 | FROM python:3.7
 2 | 
 3 | # Install OS Modules
 4 | RUN apt update -y && install telnet -y && rm -rf /var/lib/apt/lists/*
 5 | 
 6 | # Copy source code
 7 | RUN mkdir -p /data-copier
 8 | COPY app /data-copier/app
 9 | COPY requirements.txt /data-copier
10 | 
11 | # Install application dependencies
12 | RUN pip install -r /data-copier/requirements.txt


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Python Code/app/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import click
 3 | from read import get_json_reader
 4 | from write import load_db_table
 5 | 
 6 | 
 7 | def process_table(BASE_DIR, conn, table_name):
 8 |     json_reader = get_json_reader(BASE_DIR, table_name)
 9 |     for df in json_reader:
10 |         load_db_table(df, conn, table_name, df.columns[0])
11 | 
12 | 
13 | @click.command()
14 | @click.option('--tables', help='Provide comma separated table list')
15 | def main(tables):
16 |     BASE_DIR = os.environ.get('BASE_DIR')
17 |     if tables:
18 |         table_list = tables.split(',')
19 | 
20 |     configs = dict(os.environ.items())
21 |     conn = f'postgresql://{configs["DB_USER"]}:{configs["DB_PASS"]}@{configs["DB_HOST"]}:{configs["DB_PORT"]}/{configs["DB_NAME"]}'
22 |     for table_name in table_list:
23 |         process_table(BASE_DIR, conn, table_name)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Python Code/app/read.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def get_json_reader(BASE_DIR, table_name, chunksize=1000):
 6 |     file_name = os.listdir(f'{BASE_DIR}/{table_name}')[0]
 7 |     fp = f'{BASE_DIR}/{table_name}/{file_name}'
 8 |     return pd.read_json(fp, lines=True, chunksize=chunksize)
 9 | 
10 | 
11 | if __name__ == '__main__':
12 |     BASE_DIR = os.environ.get('BASE_DIR')
13 |     table_name = os.environ.get('TABLE_NAME')
14 |     json_reader = get_json_reader(BASE_DIR, table_name)
15 |     for idx, df in enumerate(json_reader):
16 |         print(f'Number of records in chunk with index {idx} is {df.shape[0]}')
17 | 


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Python Code/app/requirements.txt:
--------------------------------------------------------------------------------
1 | psycopg2-binary==3.0.8
2 | SQLAlchemy==1.4.31
3 | pandas==2.0.0
4 | 


--------------------------------------------------------------------------------
/Section 38 - Data Copier - Using Custom Docker Image/Python Code/app/write.py:
--------------------------------------------------------------------------------
 1 | def load_db_table(df, conn, table_name, key):
 2 |     min_key = df[key].min()
 3 |     max_key = df[key].max()
 4 |     df.to_sql(table_name, conn, if_exists='append', index=False)
 5 |     print(f'Loaded data for {table_name} with in the range of {min_key} and {max_key}')
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     import pandas as pd
10 |     import os
11 | 
12 |     data = [
13 |         {'user_id': 1, 'user_first_name': 'Scott', 'user_last_name': 'Tiger'},
14 |         {'user_id': 2, 'user_first_name': 'Donald', 'user_last_name': 'Duck'}
15 |     ]
16 |     df = pd.DataFrame(data)
17 |     configs = dict(os.environ.items())
18 |     conn = f'postgresql://{configs["DB_USER"]}:{configs["DB_PASS"]}@{configs["DB_HOST"]}:{configs["DB_PORT"]}/{configs["DB_NAME"]}'
19 | 
20 |     load_db_table(df, conn, 'users', 'user_id')
21 | 


--------------------------------------------------------------------------------
/Section 39 - Data Copier - Deploy and Validate Application on Remote Server/01_Clone_application_on_remote.txt:
--------------------------------------------------------------------------------
1 | git clone
2 | 
3 | ls ltr
4 | 
5 | ls -ltr


--------------------------------------------------------------------------------
/Section 39 - Data Copier - Deploy and Validate Application on Remote Server/02_Setup_network_and_database.txt:
--------------------------------------------------------------------------------
1 | docker network
2 | 
3 | docker network create data-copier-network
4 | 
5 | docker network ls
6 | 
7 | docker images


--------------------------------------------------------------------------------
/Section 39 - Data Copier - Deploy and Validate Application on Remote Server/03_Setup_database_and tables_docker.txt:
--------------------------------------------------------------------------------
 1 | docker exec -it retail_pg psql-U postgres
 2 | 
 3 | CREATE DATABASE retail_db;
 4 | 
 5 | CREATE USER retail_user WITH ENCRYPTED PASSWORD 'itversity';
 6 | 
 7 | GRANT ALL PRIVILEGES ON DATABASE retail_db TO retail_user;
 8 | 
 9 | \q
10 | 
11 | docker exec -it retail_pg psql -U retail_user -d retail_db -w -f /data/retail_db_json/create_db_tables_pg.sql
12 | 
13 | docker exec -it retail_pg psql -U retail_user -d retail_db -w
14 | 
15 | \d


--------------------------------------------------------------------------------
/Section 39 - Data Copier - Deploy and Validate Application on Remote Server/04_Building_custom_docker.txt:
--------------------------------------------------------------------------------
 1 | python3.6
 2 | 
 3 | exit()
 4 | 
 5 | python
 6 | 
 7 | cat Dockerfile
 8 | 
 9 | docker build -t data-copier .
10 | 
11 | docker images


--------------------------------------------------------------------------------
/Section 39 - Data Copier - Deploy and Validate Application on Remote Server/05_Run_and_validate_dockerized_application.txt:
--------------------------------------------------------------------------------
 1 | cat dockercommand.sh
 2 | 
 3 | docker run \
 4 | -it \
 5 | --name data-copier \
 6 | --hostname data-copier \
 7 | --network data-copier-nw \
 8 | --rm \
 9 | -v /home/ubuntu/enviroment/retail_db_json:/retail_db_json \
10 | -e BASE_DIR=/retail_db_json \
11 | -e DB_HOST=retail_pg \
12 | -e DB_PORT=5432 \
13 | -e DB_NAME=retail_db \
14 | -e DB_USER=retail_user \
15 | -e DB_PASS=itversity \
16 | data-copier \
17 | python /data-copier/app/aap.py departments, categories
18 | 
19 | docker exec -it retail_pg psql -U retail_user -d retail_db -w
20 | 
21 | SELECT * FROM categories;
22 | 
23 | SELECT * FROM departments;
24 | 
25 | SELECT COUNT(*) FROM categories;
26 | 
27 | SELECT COUNT(*) FROM departments;
28 | 
29 | SELECT * FROM orders;
30 | 
31 | SELECT * FROM order_items;


--------------------------------------------------------------------------------
/Section 40 - Setup Single Node Hadoop and Spark Cluster or Lab using Docker/01_setup_single_node_hadoop_cluster/02_setup_prerequisites.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Setup Prerequisites\n",
 8 |     "\n",
 9 |     "Let us setup prerequisites that are required to setup Hadoop.\n",
10 |     "\n",
11 |     "* We need `wget` to download binaries.\n",
12 |     "* Hadoop requires JDK and we can setup JDK using `apt` on Ubuntu.\n",
13 |     "\n",
14 |     "```shell\n",
15 |     "sudo apt-get install openjdk-8-jdk -y\n",
16 |     "```\n",
17 |     "\n",
18 |     "* We can validate for JDK using below commands.\n",
19 |     "\n",
20 |     "```shell\n",
21 |     "java -version\n",
22 |     "javac -version\n",
23 |     "```"
24 |    ]
25 |   }
26 |  ],
27 |  "metadata": {
28 |   "kernelspec": {
29 |    "display_name": "Pyspark 2",
30 |    "language": "python",
31 |    "name": "pyspark2"
32 |   },
33 |   "language_info": {
34 |    "codemirror_mode": {
35 |     "name": "ipython",
36 |     "version": 3
37 |    },
38 |    "file_extension": ".py",
39 |    "mimetype": "text/x-python",
40 |    "name": "python",
41 |    "nbconvert_exporter": "python",
42 |    "pygments_lexer": "ipython3",
43 |    "version": "3.6.12"
44 |   }
45 |  },
46 |  "nbformat": 4,
47 |  "nbformat_minor": 4
48 | }
49 | 


--------------------------------------------------------------------------------
/Section 40 - Setup Single Node Hadoop and Spark Cluster or Lab using Docker/01_setup_single_node_hadoop_cluster/03_setup_passwordless_login.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Setup Password less login\n",
 8 |     "\n",
 9 |     "Let us see how to setup password less login with in the host on which we are going to setup single node Hadoop Cluster.\n",
10 |     "* We need password less login to run the scripts to start services such as HDFS, YARN etc.\n",
11 |     "* We can generate public and private keys using `ssh-keygen` command. It will generate 2 files.\n",
12 |     "  * **~/.ssh/id_rsa**\n",
13 |     "  * **~/.ssh/id_rsa.pub**\n",
14 |     "* We need to run this command to append contents of **~/.ssh/id_rsa.pub** to **~/.ssh/authorized_keys**.\n",
15 |     "\n",
16 |     "```shell\n",
17 |     "cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys\n",
18 |     "```\n",
19 |     "\n",
20 |     "* We can validate by running `ssh localhost`. It will login using the same user with in the host. We can use `exit` to come back to the previous session on the same host."
21 |    ]
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "Python 3 (ipykernel)",
27 |    "language": "python",
28 |    "name": "python3"
29 |   },
30 |   "language_info": {
31 |    "codemirror_mode": {
32 |     "name": "ipython",
33 |     "version": 3
34 |    },
35 |    "file_extension": ".py",
36 |    "mimetype": "text/x-python",
37 |    "name": "python",
38 |    "nbconvert_exporter": "python",
39 |    "pygments_lexer": "ipython3",
40 |    "version": "3.10.2"
41 |   }
42 |  },
43 |  "nbformat": 4,
44 |  "nbformat_minor": 4
45 | }
46 | 


--------------------------------------------------------------------------------
/Section 40 - Setup Single Node Hadoop and Spark Cluster or Lab using Docker/01_setup_single_node_hadoop_cluster/08_start_and_validate_yarn.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Start and Validate YARN\n",
 8 |     "\n",
 9 |     "Let us start YARN and validate to see if the components related to YARN are started or not.\n",
10 |     "\n",
11 |     "* Ensure environment variables such as PATH are updated with bin as well as sbin folder of Hadooop.\n",
12 |     "\n",
13 |     "```shell\n",
14 |     "echo $PATH\n",
15 |     "```\n",
16 |     "\n",
17 |     "* We can start HDFS components using **start-yarn.sh**.\n",
18 |     "\n",
19 |     "```shell\n",
20 |     "start-yarn.sh\n",
21 |     "```\n",
22 |     "\n",
23 |     "* Run `jps` command to ensure resource manager and node manager are running as daemons."
24 |    ]
25 |   }
26 |  ],
27 |  "metadata": {
28 |   "kernelspec": {
29 |    "display_name": "Python 3 (ipykernel)",
30 |    "language": "python",
31 |    "name": "python3"
32 |   },
33 |   "language_info": {
34 |    "codemirror_mode": {
35 |     "name": "ipython",
36 |     "version": 3
37 |    },
38 |    "file_extension": ".py",
39 |    "mimetype": "text/x-python",
40 |    "name": "python",
41 |    "nbconvert_exporter": "python",
42 |    "pygments_lexer": "ipython3",
43 |    "version": "3.10.2"
44 |   }
45 |  },
46 |  "nbformat": 4,
47 |  "nbformat_minor": 4
48 | }
49 | 


--------------------------------------------------------------------------------
/Section 40 - Setup Single Node Hadoop and Spark Cluster or Lab using Docker/01_setup_single_node_hadoop_cluster/10_accessing_hadoop_uris.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Accessing Hadoop URIs\n",
 8 |     "\n",
 9 |     "As part of Hadoop setup, we can get few web based URIs which will help us accessing files in HDFS as well as applications submitted using YARN.\n",
10 |     "* HDFS URI\n",
11 |     "* Resource Manager URI\n",
12 |     "* Map Reduce Job History Server URI"
13 |    ]
14 |   }
15 |  ],
16 |  "metadata": {
17 |   "kernelspec": {
18 |    "display_name": "Python 3 (ipykernel)",
19 |    "language": "python",
20 |    "name": "python3"
21 |   },
22 |   "language_info": {
23 |    "codemirror_mode": {
24 |     "name": "ipython",
25 |     "version": 3
26 |    },
27 |    "file_extension": ".py",
28 |    "mimetype": "text/x-python",
29 |    "name": "python",
30 |    "nbconvert_exporter": "python",
31 |    "pygments_lexer": "ipython3",
32 |    "version": "3.10.2"
33 |   }
34 |  },
35 |  "nbformat": 4,
36 |  "nbformat_minor": 4
37 | }
38 | 


--------------------------------------------------------------------------------
/Section 40 - Setup Single Node Hadoop and Spark Cluster or Lab using Docker/03_setup_single_node_kafka_cluster/03_configure_and_start_zookeeper.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Configure and Start Zookeeper\n",
 8 |     "\n",
 9 |     "Let us configure zookeeper that is embedded with Kafka and also start it. We will also validate it.\n",
10 |     "\n",
11 |     "* You can review the **zookeeper.properties** under **/opt/kafka/config**.\n",
12 |     "* For single node Kafka cluster, you do not have to update any thing in the zookeeper.properties file.\n",
13 |     "* We can start **zookeeper** as daemon using the following command.\n",
14 |     "\n",
15 |     "```shell\n",
16 |     "zookeeper-server-start.sh -daemon /opt/kafka/config/zookeeper.properties\n",
17 |     "```\n",
18 |     "\n",
19 |     "* You can validate by running the following commands.\n",
20 |     "\n",
21 |     "```shell\n",
22 |     "ps -ef|grep zookeeper\n",
23 |     "netstat -tulpn|grep 2181\n",
24 |     "telnet localhost 2181\n",
25 |     "```"
26 |    ]
27 |   }
28 |  ],
29 |  "metadata": {
30 |   "kernelspec": {
31 |    "display_name": "Python 3 (ipykernel)",
32 |    "language": "python",
33 |    "name": "python3"
34 |   },
35 |   "language_info": {
36 |    "codemirror_mode": {
37 |     "name": "ipython",
38 |     "version": 3
39 |    },
40 |    "file_extension": ".py",
41 |    "mimetype": "text/x-python",
42 |    "name": "python",
43 |    "nbconvert_exporter": "python",
44 |    "pygments_lexer": "ipython3",
45 |    "version": "3.10.2"
46 |   }
47 |  },
48 |  "nbformat": 4,
49 |  "nbformat_minor": 4
50 | }
51 | 


--------------------------------------------------------------------------------
/Section 42 - Data Engineering using Spark SQL - Getting Started/01_getting_started.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Getting Started\n",
 8 |     "\n",
 9 |     "Let us get started to get into Spark SQL. In this module we will see how to launch and use Spark SQL."
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "markdown",
14 |    "metadata": {},
15 |    "source": [
16 |     "* Overview of Spark Documentation\n",
17 |     "* Launching and Using Spark SQL\n",
18 |     "* Overview of Spark SQL Properties\n",
19 |     "* Running OS Commands using Spark SQL\n",
20 |     "* Understanding Warehouse Directory\n",
21 |     "* Managing Spark Metastore Databases\n",
22 |     "* Managing Spark Metastore Tables\n",
23 |     "* Retrieve Metadata of Tables\n",
24 |     "* Role of Spark or Hive Metastore"
25 |    ]
26 |   }
27 |  ],
28 |  "metadata": {
29 |   "kernelspec": {
30 |    "display_name": "Apache Toree - Scala",
31 |    "language": "scala",
32 |    "name": "apache_toree_scala"
33 |   },
34 |   "language_info": {
35 |    "codemirror_mode": "text/x-scala",
36 |    "file_extension": ".scala",
37 |    "mimetype": "text/x-scala",
38 |    "name": "scala",
39 |    "pygments_lexer": "scala",
40 |    "version": "2.11.12"
41 |   }
42 |  },
43 |  "nbformat": 4,
44 |  "nbformat_minor": 4
45 | }
46 | 


--------------------------------------------------------------------------------
/Section 42 - Data Engineering using Spark SQL - Getting Started/02_overview_of_spark_documentation.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Overview of Spark Documentation\n", "\n", "Let us go through the details related to Spark Documentation. It is very important for you to get comfortable with Spark Documentation if you are aspiring for open book certification exams like CCA 175."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* Click [here](https://spark.apache.org/docs/latest/sql-programming-guide.html) to go to latest Spark SQL and Data Frames documentation. \n", "* We typically get documentation for latest version.\n", "* We can replace **latest** in the URL with the version of Spark to get specific version's official documentation.\n", "* Also we have resources provided by **databricks**."]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Scala", "language": "scala", "name": "scala"}, "language_info": {"codemirror_mode": "text/x-scala", "file_extension": ".scala", "mimetype": "", "name": "Scala", "nbconverter_exporter": "", "version": "2.11.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 42 - Data Engineering using Spark SQL - Getting Started/03_overview_of_spark_sql_cli.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Overview of Spark SQL CLI\n", "\n", "Let us understand how to launch Spark SQL CLI."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* Logon to the gateway node of the cluster.\n", "* We have 2 versions of Spark in our labs. One can use `spark-sql` to launch Spark SQL using 1.6.x and `spark2-sql` to launch Spark SQL using 2.3.x.\n", "* Launch Spark SQL CLI using `spark-sql`. In clustered mode we might have to add additional arguments. For example\n", "\n", "```\n", "spark2-sql \\\n", "    --master yarn \\\n", "    --conf spark.ui.port=0 \\\n", "    --conf spark.sql.warehouse.dir=/user/${USER}/warehouse\n", "```\n", "* One can get help using `spark-sql --help`\n", "* For e. g.: we can use `spark-sql --database training_retail` to connect to specific database. Here is the example in clustered mode.\n", "\n", "```\n", "spark2-sql \\\n", "    --master yarn \\\n", "    --conf spark.ui.port=0 \\\n", "    --conf spark.sql.warehouse.dir=/user/${USER}/warehouse \\\n", "    --database ${USER}_retail\n", "```\n", "* Spark SQL CLI will be launched and will be connected to **${USER}_retail** database.\n", "* We can validate to which database we are connected to using `SELECT current_database()`"]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Apache Toree - Scala", "language": "scala", "name": "apache_toree_scala"}, "language_info": {"codemirror_mode": "text/x-scala", "file_extension": ".scala", "mimetype": "text/x-scala", "name": "scala", "pygments_lexer": "scala", "version": "2.11.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 42 - Data Engineering using Spark SQL - Getting Started/05_running_os_commands.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Running OS Commands\n", "\n", "Let us understand how to run OS commands using Spark SQL CLI."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* We can run OS commands using **!** at the beginning.\n", "  * Listing local Files `!ls -ltr;`\n", "  * Listing HDFS Files `!hdfs dfs -ls /public/retail_db;`"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import sys.process._\n", "\n", "\"ls -ltr\"!"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import sys.process._\n", "\n", "\"hdfs dfs -ls /public/retail_db\"!"]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Apache Toree - Scala", "language": "scala", "name": "apache_toree_scala"}, "language_info": {"codemirror_mode": "text/x-scala", "file_extension": ".scala", "mimetype": "text/x-scala", "name": "scala", "pygments_lexer": "scala", "version": "2.11.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 42 - Data Engineering using Spark SQL - Getting Started/10_role_of_spark_or_hive_metastore.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Role of Spark or Hive Metastore\n", "\n", "Let us understand the role of Spark Metastore or Hive Metasore. We need to first understand details related to Metadata generated for Spark Metastore tables."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* When we create a Spark Metastore table, there is metadata associated with it.\n", "  * Table Name\n", "  * Column Names and Data Types\n", "  * Location\n", "  * File Format\n", "  * and more\n", "* This metadata has to be stored some where so that Query Engines such as Spark SQL can access the information to serve our queries.\n", "\n", "Let us understand where the metadata is stored.\n", "\n", "* Information is typically stored in relational database and it is called as metastore.\n", "* It is extensively used by Hive or Spark SQL engine for syntax and semantics check as well as execution of queries.\n", "* In our case it is stored in MySQL Database. Let us review the details by going through relevant properties."]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Apache Toree - Scala", "language": "scala", "name": "apache_toree_scala"}, "language_info": {"codemirror_mode": "text/x-scala", "file_extension": ".scala", "mimetype": "text/x-scala", "name": "scala", "pygments_lexer": "scala", "version": "2.11.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 43 - Data Engineering using Spark SQL - Basic Transformations/03_define_problem_statement.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Define Problem Statement\n", "\n", "Let us define problemt statement to get an overview of basic transformations using Spark SQL."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* Get Daily Product Revenue using orders and order_items data set.\n", "* We have following fields in **orders**.\n", "  * order_id\n", "  * order_date\n", "  * order_customer_id\n", "  * order_status\n", "* We have following fields in **order_items**.\n", "  * order_item_id\n", "  * order_item_order_id\n", "  * order_item_product_id\n", "  * order_item_quantity\n", "  * order_item_subtotal\n", "  * order_item_product_price\n", "* We have one to many relationship between orders and order_items.\n", "* **orders.order_id** is **primary key** and **order_items.order_item_order_id** is foreign key to **orders.order_id**.\n", "* By the end of this module we will explore all standard transformation and get daily product revenue using following fields.\n", "  * **orders.order_date**\n", "  * **order_items.order_item_product_id**\n", "  * **order_items.order_item_subtotal** (aggregated using date and product_id).\n", "* We will consider only **COMPLETE** or **CLOSED** orders."]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Apache Toree - Scala", "language": "scala", "name": "apache_toree_scala"}, "language_info": {"codemirror_mode": "text/x-scala", "file_extension": ".scala", "mimetype": "text/x-scala", "name": "scala", "pygments_lexer": "scala", "version": "2.11.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 45 - Data Engineering using Spark SQL - Managing Tables - DML and Partitioning/02_introduction_to_partitioning.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Introduction to Partitioning\n", "\n", "Let us get an overview of partitioning of Spark Metastore tables."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* It is similar to list partitioning where each partition is equal to a particular value for a given column.\n", "* Spark Metastore does not support range partitioning and bucketing. Bucketing is supported in Hive which is similar to Hash Partitioning.\n", "* Once the table is created, we can add static partitions and then load or insert data into it.\n", "* Spark Metastore also support creation of partitions dynamically, where partitions will be created based up on the partition column value.\n", "* A Partitioned table can be managed or external."]}], "metadata": {"kernelspec": {"display_name": "Apache Toree - Scala", "language": "scala", "name": "apache_toree_scala"}, "language_info": {"name": ""}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 46 - Data Engineering using Spark SQL - Overview of Spark SQL Functions/01_predefined_functions.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Predefined Functions\n",
 8 |     "\n",
 9 |     "Let us go through the functions that can be used while processing the data. These are typically applied on columns to get derived values from existing column values.\n",
10 |     "\n",
11 |     "* Overview of Functions\n",
12 |     "* Validating Functions\n",
13 |     "* String Manipulation Functions\n",
14 |     "* Date Manipulation Functions\n",
15 |     "* Overview of Numeric Functions\n",
16 |     "* Data Type Conversion\n",
17 |     "* Handling NULL Values\n",
18 |     "* Using CASE and WHEN\n",
19 |     "* Query Example - Word Count"
20 |    ]
21 |   }
22 |  ],
23 |  "metadata": {
24 |   "kernelspec": {
25 |    "display_name": "Apache Toree - Scala",
26 |    "language": "scala",
27 |    "name": "apache_toree_scala"
28 |   },
29 |   "language_info": {
30 |    "codemirror_mode": "text/x-scala",
31 |    "file_extension": ".scala",
32 |    "mimetype": "text/x-scala",
33 |    "name": "scala",
34 |    "pygments_lexer": "scala",
35 |    "version": "2.11.12"
36 |   }
37 |  },
38 |  "nbformat": 4,
39 |  "nbformat_minor": 4
40 | }
41 | 


--------------------------------------------------------------------------------
/Section 47 - Data Engineering using Spark SQL - Windowing Functions/01_windowing_functions.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Windowing Functions\n", "\n", "As part of this section we will primarily talk about Windowing Functions. These are also known as Analytic Functions in Databases like Oracle."]}, {"cell_type": "markdown", "metadata": {}, "source": ["* Prepare HR Database\n", "* Overview of Windowing Functions\n", "* Aggregations using Windowing Functions\n", "* Getting LEAD and LAG values\n", "* Getting first and last values\n", "* Ranking using Windowing Functions\n", "* Understanding order of execution of SQL\n", "* Overview of Nested Sub Queries\n", "* Filtering - Window Function Results"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import org.apache.spark.sql.SparkSession\n", "\n", "val username = System.getProperty(\"user.name\")\n", "val spark = SparkSession.\n", "    builder.\n", "    config(\"spark.ui.port\", \"0\").\n", "    config(\"spark.sql.warehouse.dir\", s\"/user/${username}/warehouse\").\n", "    enableHiveSupport.\n", "    appName(s\"${username} | Spark SQL - Windowing Functions\").\n", "    master(\"yarn\").\n", "    getOrCreate"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["%%sql\n", "\n", "SET spark.sql.shuffle.partitions=2"]}], "metadata": {"kernelspec": {"display_name": "Apache Toree - Scala", "language": "scala", "name": "apache_toree_scala"}, "language_info": {"codemirror_mode": "text/x-scala", "file_extension": ".scala", "mimetype": "text/x-scala", "name": "scala", "pygments_lexer": "scala", "version": "2.11.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 51 - Apache Spark using Python - Joining Data Sets/04_problem_statements_for_joins.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Problem Statements for Joins\n",
 8 |     "\n",
 9 |     "Let us understand how to join Data Frames by using some problem statements. We will use 2008 January airtraffic data along with Airport Codes.\n",
10 |     "\n",
11 |     "* Get number of flights departed from each of the US airport.\n",
12 |     "* Get number of flights departed from each of the state.\n",
13 |     "* Get the list of airports in the US from which flights are not departed.\n",
14 |     "* Check if there are any origins in airtraffic data which do not have record in airport-codes.\n",
15 |     "* Get the total number of flights from the airports that do not contain entries in airport-codes.\n",
16 |     "* Get the total number of flights per airport that do not contain entries in airport-codes."
17 |    ]
18 |   }
19 |  ],
20 |  "metadata": {
21 |   "kernelspec": {
22 |    "display_name": "Python 3 (ipykernel)",
23 |    "language": "python",
24 |    "name": "python3"
25 |   },
26 |   "language_info": {
27 |    "codemirror_mode": {
28 |     "name": "ipython",
29 |     "version": 3
30 |    },
31 |    "file_extension": ".py",
32 |    "mimetype": "text/x-python",
33 |    "name": "python",
34 |    "nbconvert_exporter": "python",
35 |    "pygments_lexer": "ipython3",
36 |    "version": "3.10.2"
37 |   }
38 |  },
39 |  "nbformat": 4,
40 |  "nbformat_minor": 4
41 | }
42 | 


--------------------------------------------------------------------------------
/Section 51 - Apache Spark using Python - Joining Data Sets/05_overview_of_joins.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Overview of Joins\n", "\n", "Let us get an overview of joining Data Frames.\n", "* Our data cannot be stored in one table. It will be stored in multiple tables and the tables might be related.\n", "  * When it comes to transactional systems, we typically define tables based on Normalization Principles.\n", "  * When it comes to data warehousing applications, we typically define tables using Dimensional Modeling.\n", "  * Either of the approach data is scattered into multiple tables and relationships are defined.\n", "  * Typically tables are related with one to one, one to many, many to many relationships.\n", "* When we have 2 Data Sets that are related based on a common key we typically perform join.\n", "* There are different types of joins.\n", "  * INNER JOIN\n", "  * OUTER JOIN (LEFT or RIGHT)\n", "  * FULL OUTER JOIN (a LEFT OUTER JOIN b UNION a RIGHT OUTER JOIN b)\n", " "]}], "metadata": {"kernelspec": {"display_name": "Pyspark 2", "language": "python", "name": "pyspark2"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 52 - Apache Spark using Python - Spark Metastore/02_overview_of_spark_metastore.ipynb:
--------------------------------------------------------------------------------
1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["## Overview of Spark Metastore\n", "\n", "Let us get an overview of Spark Metastore and how we can leverage it to manage databases and tables on top of Big Data based file systems such as HDFS, s3 etc.\n", "\n", "* Quite often we need to deal with structured data and the most popular way of processing structured data is by using Databases, Tables and then SQL.\n", "* Spark Metastore (similar to Hive Metastore) will facilitate us to manage databases and tables.\n", "* Typically Metastore is setup using traditional relational database technologies such as **Oracle**, **MySQL**, **Postgres** etc."]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12"}}, "nbformat": 4, "nbformat_minor": 4}


--------------------------------------------------------------------------------
/Section 53 - Apache Spark - Development Life Cycle using Python/Getting Started:
--------------------------------------------------------------------------------
 1 | Getting Started
 2 | 
 3 | Here is how we can get started with local development of data engineering pipelines using Spark.
 4 | 
 5 | Create Virtual Environment - python3.7 -m venv itvg-venv
 6 | 
 7 | Activate virtual environment - source itvg-venv/bin/activate
 8 | 
 9 | Install PySpark for local development - pip install pyspark==2.4.*
10 | 
11 | Open using PyCharm and make sure appropriate virtual environment is used from the virtual environment which we have setup.
12 | 
13 | Create a program called as app.py and enter this code.
14 | 
15 | from pyspark.sql import SparkSession
16 |  
17 | spark = SparkSession. \
18 |     builder. \
19 |     master('local'). \
20 |     appName('GitHub Activity - Getting Started'). \
21 |     getOrCreate()
22 |  
23 | spark.sql('SELECT current_date').show()


--------------------------------------------------------------------------------
/Section 53 - Apache Spark - Development Life Cycle using Python/Process data using Spark APIs:
--------------------------------------------------------------------------------
 1 | Process data using Spark APIs
 2 | 
 3 | We will eventually partition the data by year, month and day of month while writing to target directory. However, to partition the data we need to add new columns.
 4 | 
 5 | Create a Python program by name process.py. We will create a function by name df_transform. It partitions the Dataframe using specified field.
 6 | 
 7 | from pyspark.sql.functions import year, \
 8 |     month, dayofmonth
 9 |  
10 |  
11 | def transform(df):
12 |     return df.withColumn('year', year('created_at')). \
13 |         withColumn('month', month('created_at')). \
14 |         withColumn('day', dayofmonth('created_at'))
15 | Call the program from app.py. For now review schema and data.
16 | 
17 | import os
18 | from util import get_spark_session
19 | from read import from_files
20 | from process import transform
21 |  
22 |  
23 | def main():
24 |     env = os.environ.get('ENVIRON')
25 |     src_dir = os.environ.get('SRC_DIR')
26 |     file_pattern = f"{os.environ.get('SRC_FILE_PATTERN')}-*"
27 |     src_file_format = os.environ.get('SRC_FILE_FORMAT')
28 |     spark = get_spark_session(env, 'GitHub Activity - Partitioning Data')
29 |     df = from_files(spark, src_dir, file_pattern, src_file_format)
30 |     df_transformed = transform(df)
31 |     df_transformed.printSchema()
32 |     df_transformed.select('year', 'month', 'day').show()
33 |  
34 |  
35 | if __name__ == '__main__':
36 |     main()


--------------------------------------------------------------------------------
/Section 53 - Apache Spark - Development Life Cycle using Python/Read data from files:
--------------------------------------------------------------------------------
 1 | Read data from files
 2 | 
 3 | Let us develop the code to read the data from files into Spark Dataframes.
 4 | 
 5 | Create directory for data and copy some files into it.
 6 | 
 7 | mkdir -p data/itv-github/landing/ghactivity
 8 | cd data/itv-github/landing/ghactivity
 9 | wget https://data.gharchive.org/2021-01-13-0.json.gz
10 | wget https://data.gharchive.org/2021-01-14-0.json.gz
11 | wget https://data.gharchive.org/2021-01-15-0.json.gz
12 | Create a Python program by name read.py. We will create a function by name from_files. It reads the data from files into Dataframe and returns it.
13 | 
14 | def from_files(spark, data_dir, file_pattern, file_format):
15 |     df = spark. \
16 |         read. \
17 |         format(file_format). \
18 |         load(f'{data_dir}/{file_pattern}')
19 |     return df
20 | Call the program from app.py. For now review schema and data.
21 | 
22 | import os
23 | from util import get_spark_session
24 | from read import from_files
25 |  
26 |  
27 | def main():
28 |     env = os.environ.get('ENVIRON')
29 |     src_dir = os.environ.get('SRC_DIR')
30 |     file_pattern = f"{os.environ.get('SRC_FILE_PATTERN')}-*"
31 |     src_file_format = os.environ.get('SRC_FILE_FORMAT')
32 |     spark = get_spark_session(env, 'GitHub Activity - Reading Data')
33 |     df = from_files(spark, src_dir, file_pattern, src_file_format)
34 |     df.printSchema()
35 |     df.select('repo.*').show()
36 |  
37 |  
38 | if __name__ == '__main__':
39 |     main()


--------------------------------------------------------------------------------
/Section 53 - Apache Spark - Development Life Cycle using Python/set up a virtual environment:
--------------------------------------------------------------------------------
1 | Here are the commands used to set up a virtual environment and install Pyspark.
2 | 
3 | python3 -m venv deod-venv
4 | source deod-venv/bin/activate
5 | pip install pyspark


--------------------------------------------------------------------------------
/Section 54 - Spark Application Execution Life Cycle and Spark UI/Develop Shell Script to run Spark Application:
--------------------------------------------------------------------------------
 1 | Develop Shell Script to run Spark Application
 2 | Paste this shell script as part of a file called runme.sh or with name of your choice.
 3 | 
 4 | export PYSPARK_PYTHON=python3
 5 | export SPARK_MAJOR_VERSION=3
 6 |  
 7 | spark-submit \
 8 |   --master yarn \
 9 |   --conf spark.ui.port=0 \
10 |   /home/itversity/wordcount/word_count.py
11 | Once shell script is created, it can be run using source runme.sh


--------------------------------------------------------------------------------
/Section 54 - Spark Application Execution Life Cycle and Spark UI/Develop Word Count Application:
--------------------------------------------------------------------------------
 1 | Develop Word Count Application
 2 | Create working directory by name wordcount and in that create python program file by name word_count.py. Paste below code. Make sure to review both input and output paths. They should be valid as per the environment you are running in.
 3 | 
 4 | import getpass
 5 | from pyspark.sql import SparkSession
 6 | from pyspark.sql.functions import split, explode, count, lit
 7 |  
 8 | username = getpass.getuser()
 9 | spark = SparkSession. \
10 |     builder. \
11 |     appName(f'{username} - Word Count'). \
12 |     master('yarn'). \
13 |     getOrCreate()
14 |  
15 | lines = spark.read.text('/public/randomtextwriter/part-m-0000*'). \
16 |     toDF('line')
17 |  
18 | word_count = lines. \
19 |     select(explode(split('line', ' ')).alias('word')). \
20 |     groupBy('word'). \
21 |     agg(count(lit(1)).alias('word_count'))
22 |  
23 | word_count. \
24 |     write. \
25 |     mode('overwrite'). \
26 |     csv(f'/user/{username}/word_count')


--------------------------------------------------------------------------------
/Section 54 - Spark Application Execution Life Cycle and Spark UI/Setup Data Set for Word Count Application:
--------------------------------------------------------------------------------
 1 | Setup Data Set for Word Count Application
 2 | If you are using ITVersity labs, you can skip this and use /public/randomtextwriter to test word count application.
 3 | 
 4 | Step 1: Find hadoop map reduce examples jar. Make sure to use the path relevant to your environment.
 5 | 
 6 | find /opt/hadoop/ -name "hadoop*mapreduce*examples*.jar"
 7 | 
 8 | Step 2: Run relevant command to generate the test data for word count
 9 | 
10 | hadoop jar \
11 |   /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.0.jar \
12 |   randomtextwriter -Dmapreduce.randomtextwriter.mapsperhost=2 \
13 |   /user/`whoami`/randomtextwriter


--------------------------------------------------------------------------------