├── Begin
    ├── temp
    │   └── temp
    ├── CH_4_Requirements.txt
    ├── Coffee_shop_data.xlsx
    ├── CH_3_Requirements.txt
    ├── CH_1_NLP_ChatBot.py
    ├── CH_0_OOP_Refresher.py
    ├── CH_4a_Challenge.py
    ├── CH_3a_Challenge.py
    ├── CH_4_DocuChat_Frontend.py
    ├── CH_3_DocuChat_Backend.py
    └── CH_2_Business_Prediction.ipynb
├── Finish
    ├── temp
    │   └── temp
    ├── CH_4_Requirements.txt
    ├── Coffee_shop_data.xlsx
    ├── CH_3_Requirements.txt
    ├── CH_4a_Challenge.py
    ├── CH_1_NLP_ChatBot.py
    ├── CH_0_OOP Refresher.py
    ├── CH_3a_Challenge.py
    ├── CH_4_DocuChat_Frontend.py
    └── CH_3_DocuChat_Backend.py
├── .gitignore
├── .github
    ├── CODEOWNERS
    ├── PULL_REQUEST_TEMPLATE.md
    ├── workflows
    │   └── main.yml
    └── ISSUE_TEMPLATE.md
├── requirements.txt
├── CONTRIBUTING.md
├── NOTICE
├── .vscode
    └── settings.json
├── .devcontainer
    └── devcontainer.json
├── README.md
└── LICENSE


/Begin/temp/temp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Finish/temp/temp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Begin/CH_4_Requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | 


--------------------------------------------------------------------------------
/Finish/CH_4_Requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules
3 | .tmp
4 | npm-debug.log
5 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Codeowners for these exercise files:
2 | # * (asterisk) denotes "all files and folders"
3 | # Example: * @producer @instructor
4 | 


--------------------------------------------------------------------------------
/Begin/Coffee_shop_data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinkedInLearning/advanced-python-projects-build-ai-applications-4465602-1/main/Begin/Coffee_shop_data.xlsx


--------------------------------------------------------------------------------
/Finish/Coffee_shop_data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinkedInLearning/advanced-python-projects-build-ai-applications-4465602-1/main/Finish/Coffee_shop_data.xlsx


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | <!-- This repository *does not* accept pull requests (PRs). All pull requests will be closed. See CONTRIBUTING.md for further details. -->
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Specify Python package requirements for your project here (e.g., Mako==1.1.1). If your project doesn't require these, you can leave this file unchanged or delete it.
2 | 


--------------------------------------------------------------------------------
/Begin/CH_3_Requirements.txt:
--------------------------------------------------------------------------------
 1 | unstructured[all-docs]
 2 | langchain
 3 | openai
 4 | tiktoken
 5 | fastapi
 6 | uvicorn
 7 | pymongo
 8 | faiss-cpu
 9 | fastapi
10 | uvicorn
11 | itsdangerous
12 | awswrangler
13 | html2text
14 | faiss-cpu
15 | pdf2image
16 | python-dotenv
17 | langchain-community
18 | docx2txt
19 | langchain_openai


--------------------------------------------------------------------------------
/Finish/CH_3_Requirements.txt:
--------------------------------------------------------------------------------
 1 | unstructured[all-docs]
 2 | langchain
 3 | openai
 4 | tiktoken
 5 | fastapi
 6 | uvicorn
 7 | pymongo
 8 | faiss-cpu
 9 | fastapi
10 | uvicorn
11 | itsdangerous
12 | awswrangler
13 | html2text
14 | faiss-cpu
15 | pdf2image
16 | python-dotenv
17 | langchain-community
18 | docx2txt
19 | langchain_openai


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Copy To Branches
 2 | on:
 3 |   workflow_dispatch:
 4 | jobs:
 5 |   copy-to-branches:
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - uses: actions/checkout@v2
 9 |         with:
10 |           fetch-depth: 0
11 |       - name: Copy To Branches Action
12 |         uses: planetoftheweb/copy-to-branches@v1.2
13 |         env:
14 |           key: main
15 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | 
2 | Contribution Agreement
3 | ======================
4 | 
5 | This repository does not accept pull requests (PRs). All pull requests will be closed.
6 | 
7 | However, if any contributions (through pull requests, issues, feedback or otherwise) are provided, as a contributor, you represent that the code you submit is your original work or that of your employer (in which case you represent you have the right to bind your employer). By submitting code (or otherwise providing feedback), you (and, if applicable, your employer) are licensing the submitted code (and/or feedback) to LinkedIn and the open source community subject to the BSD 2-Clause license.
8 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | Copyright 2025 LinkedIn Corporation
 2 | All Rights Reserved.
 3 | 
 4 | Licensed under the LinkedIn Learning Exercise File License (the "License").
 5 | See LICENSE in the project root for license information.
 6 | 
 7 | Please note, this project may automatically load third party code from external 
 8 | repositories (for example, NPM modules, Composer packages, or other dependencies). 
 9 | If so, such third party code may be subject to other license terms than as set 
10 | forth above. In addition, such third party code may also depend on and load 
11 | multiple tiers of dependencies. Please review the applicable licenses of the 
12 | additional dependencies.
13 | 


--------------------------------------------------------------------------------
/Begin/CH_1_NLP_ChatBot.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Define intents and their corresponding responses based on keywords
 4 | 
 5 |     # Convert the message to lowercase for consistent keyword matching
 6 |   
 7 |     # Check if the message contains any keywords associated with defined intents
 8 |     
 9 |     
10 |     # Analyze the sentiment of the message using TextBlob
11 | 
12 |     
13 |     # Return a response based on the sentiment score
14 |    
15 |     # Greet the user and prompt for input
16 |    
17 |     # Continuously prompt the user for input until they choose to exit
18 |    
19 |     # Thank the user for chatting when they exit
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     chat()  # Start the chat when the script is executed
24 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "editor.bracketPairColorization.enabled": true,
 3 |   "editor.cursorBlinking": "solid",
 4 |   "editor.fontFamily": "ui-monospace, Menlo, Monaco, 'Cascadia Mono', 'Segoe UI Mono', 'Roboto Mono', 'Oxygen Mono', 'Ubuntu Monospace', 'Source Code Pro', 'Fira Mono', 'Droid Sans Mono', 'Courier New', monospace",
 5 |   "editor.fontLigatures": false,
 6 |   "editor.fontSize": 22,
 7 |   "editor.formatOnPaste": true,
 8 |   "editor.formatOnSave": true,
 9 |   "editor.lineNumbers": "on",
10 |   "editor.matchBrackets": "always",
11 |   "editor.minimap.enabled": false,
12 |   "editor.smoothScrolling": true,
13 |   "editor.tabSize": 2,
14 |   "editor.useTabStops": true,
15 |   "emmet.triggerExpansionOnTab": true,
16 |   "files.autoSave": "afterDelay",
17 |   "terminal.integrated.fontSize": 18,
18 |   "workbench.colorTheme": "LinkedIn Learning Dark",
19 |   "workbench.fontAliasing": "antialiased",
20 |   "workbench.statusBar.visible": true
21 | }


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "customizations": {
 3 |     "codespaces": {
 4 |       "openFiles": [
 5 |         "README.md"
 6 |       ]
 7 |     },
 8 |     "vscode": {
 9 |       // Set *default* container specific settings.json values on container create.
10 |       "settings": {
11 |         "terminal.integrated.shell.linux": "/bin/bash"
12 |       },
13 |       // Add the IDs of extensions you want installed when the container is created.
14 |       "extensions": [
15 |         "linkedinlearning.linkedinlearning-vscode-theme",
16 |         "ms-toolsai.jupyter",
17 |         "ms-python.python"
18 |         // Additional Extensions Here
19 |       ]
20 |     }
21 |   },
22 |   // Update welcome text and set terminal prompt to '$ '
23 |   "onCreateCommand": "echo PS1='\"$ \"' >> ~/.bashrc",
24 |   // Pull all branches
25 |   "postAttachCommand": "git pull --all"
26 | }
27 | // DevContainer Reference: https://code.visualstudio.com/docs/remote/devcontainerjson-reference


--------------------------------------------------------------------------------
/Begin/CH_0_OOP_Refresher.py:
--------------------------------------------------------------------------------
 1 | # Introduction to Object-Oriented Programming with Python: Creating and Using Classes
 2 | 
 3 | # Class Definition
 4 | 
 5 | # Constructor (Initialization) - __init__ method
 6 | 
 7 | # Encapsulation: Attributes (make and model) are encapsulated within the class.
 8 | 
 9 | 
10 | # Method - start_engine
11 | 
12 | # Encapsulation: Accessing attributes through self.
13 | 
14 | 
15 | # Creating instances (objects) of the Car class
16 | 
17 | # Inheritance: Car is a class that can be used to create objects (instances).
18 | # Abstraction: We create objects without worrying about the internal details of the Car class.
19 | 
20 | # Creating the first car (object)
21 | 
22 | # Creating the second car (object)
23 | 
24 | 
25 | # Accessing object attributes
26 | 
27 | # Encapsulation: Accessing object attributes (make and model) using dot notation.
28 | 
29 | 
30 | # Calling object methods
31 | 
32 | # Polymorphism: Different objects (car1 and car2) can perform the same action (start_engine).
33 | 
34 | 
35 | # Method Call - start_engine
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | BEFORE POSTING YOUR ISSUE:
 3 | - These comments won't show up when you submit the issue.
 4 | - Please use the sections below to provide information about the issue.
 5 | - Be specific: Add as much detail as possible.
 6 | -->
 7 | 
 8 | ## Issue Overview
 9 | <!-- A brief overview of the issue --->
10 | 
11 | ## Describe your environment
12 | <!-- Provide details about your environment: what editor, browser, and other software you are using and any other specifics to your setup -->
13 | 
14 | ## Steps to Reproduce
15 | <!-- Provide an unambiguous set of steps to reproduce this bug. Include code to reproduce, if relevant. Include a live link if available. -->
16 | 1.
17 | 2.
18 | 3.
19 | 4.
20 | 
21 | ## Expected Behavior
22 | <!-- What behavior did you expect? -->
23 | 
24 | ## Current Behavior
25 | <!-- What happened instead of the expected behavior? Describe the difference. -->
26 | 
27 | ## Possible Solution
28 | <!-- Optional: Do you have a fix or a suggestion on how to fix the issue? -->
29 | 
30 | ## Screenshots / Video
31 | <!-- Optional: Add any screenshots or video of the issue if available. -->
32 | 
33 | ## Related Issues
34 | <!-- List related issues -->
35 | 


--------------------------------------------------------------------------------
/Begin/CH_4a_Challenge.py:
--------------------------------------------------------------------------------
 1 | # Import necessary libraries
 2 | 
 3 | 
 4 | # Set the FastAPI endpoint URL where the file will be uploaded
 5 | FASTAPI_URL = "http://127.0.0.1:8000/uploadFile"
 6 | 
 7 | # Function to handle file upload
 8 | def upload_file(file):
 9 |     # Prepare the file to be sent in a POST request to the FastAPI backend
10 |     files = {'data_file': file}
11 |     
12 |     # Send the POST request to the FastAPI server with the file
13 |     
14 |     
15 |     # Check the response status code to ensure the upload was successful
16 |     if 
17 |         # If successful, display a success message and the JSON response
18 |         
19 |     else:
20 |         # If an error occurs, display an error message with the response text
21 |         
22 | 
23 | # Streamlit UI setup
24 | # Configure the page with a title, icon, and layout
25 | 
26 | 
27 | # Streamlit file uploader widget
28 | # Allows the user to select a file of specified types (docx, csv, txt, pdf)
29 | 
30 | 
31 | # If a file is selected and the "Upload" button is clicked
32 | if uploaded_file is not None:
33 |     if st.button("Upload"):
34 |         # Call the function to upload the selected file
35 |         
36 |         
37 | # Command to run the Streamlit app
38 | # streamlit run Begin/CH_4a_Challenge.py
39 | 


--------------------------------------------------------------------------------
/Finish/CH_4a_Challenge.py:
--------------------------------------------------------------------------------
 1 | # Import necessary libraries
 2 | import streamlit as st  # Streamlit for building interactive web apps
 3 | import requests  # Requests library for sending HTTP requests
 4 | 
 5 | # Set the FastAPI endpoint URL where the file will be uploaded
 6 | FASTAPI_URL = "http://127.0.0.1:8000/uploadFile"
 7 | 
 8 | # Function to handle file upload
 9 | def upload_file(file):
10 |     # Prepare the file to be sent in a POST request to the FastAPI backend
11 |     files = {'data_file': file}
12 |     
13 |     # Send the POST request to the FastAPI server with the file
14 |     response = requests.post(FASTAPI_URL, files=files)
15 |     
16 |     # Check the response status code to ensure the upload was successful
17 |     if response.status_code == 200:
18 |         # If successful, display a success message and the JSON response
19 |         st.success("File uploaded successfully!")
20 |         st.json(response.json())  # Display the JSON response in the app
21 |     else:
22 |         # If an error occurs, display an error message with the response text
23 |         st.error("Error during file upload: " + response.text)
24 | 
25 | # Streamlit UI setup
26 | # Configure the page with a title, icon, and layout
27 | st.set_page_config(page_title="Challenge", page_icon="📕", layout="wide")
28 | 
29 | # Streamlit file uploader widget
30 | # Allows the user to select a file of specified types (docx, csv, txt, pdf)
31 | uploaded_file = st.file_uploader("Choose a file to upload", type=["docx", "csv", "txt", "pdf"])
32 | 
33 | # If a file is selected and the "Upload" button is clicked
34 | if uploaded_file is not None:
35 |     if st.button("Upload"):
36 |         # Call the function to upload the selected file
37 |         upload_file(uploaded_file)
38 |         
39 | # Command to run the Streamlit app
40 | # streamlit run Begin/CH_4a_Challenge.py
41 | 


--------------------------------------------------------------------------------
/Finish/CH_1_NLP_ChatBot.py:
--------------------------------------------------------------------------------
 1 | # Importing TextBlob to help the chatbot understand language nuances.
 2 | from textblob import TextBlob
 3 | 
 4 | # Defining the ChatBot class for interaction.
 5 | class ChatBot:
 6 |     def __init__(self):
 7 |         #Defining intents based on keywords
 8 |         self.intents = {
 9 |             "hours": {
10 |                 "keywords": ["hour", "open", "close"],
11 |                 "response": "We are open from 9 AM to 5 PM, Monday to Friday."
12 |             },
13 |             "return": {
14 |                 "keywords": ["refund", "money back", "return"],
15 |                 "response": "I'd be happy to help you process your return. Let me transfer you to a live agent."
16 |             }
17 |         }
18 |     # Analyzing the sentiment of the user's message.
19 |     def get_response(self, message):
20 |         message = message.lower()
21 |         for intent_data in self.intents.values():
22 |             if any(word in message for word in intent_data["keywords"]):
23 |                 return intent_data["response"]
24 |             
25 |         # Generating the chatbot's response based on sentiment.
26 |         sentiment = TextBlob(message).sentiment.polarity
27 |         
28 |         # Printing the chatbot's response and sentiment.
29 |         return ("That's great to hear!" if sentiment > 0 else
30 |                 "I'm so sorry to hear that. How can I help?" if sentiment < 0 else
31 |                 "I see. Can you tell me more about that?")
32 | 
33 |     def chat(self):
34 |         print("ChatBot: Hi, how can I help you today?")
35 |         while (user_message := input("You: ").strip().lower()) not in ['exit', 'quit', 'bye']:
36 |             print(f"\nChatBot: {self.get_response(user_message)}")
37 |         print("ChatBot: Thank you for chatting. Have a great day!")
38 | 
39 | if __name__ == "__main__":
40 |     # Creating the chatbot and starting the chat loop.
41 |     ChatBot().chat()
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Advanced Python Projects: Build AI Applications
 2 | This is the repository for the LinkedIn Learning course `Advanced Python Projects: Build AI Applications`. The full course is available from [LinkedIn Learning][lil-course-url].
 3 | 
 4 | ![lil-thumbnail-url]
 5 | 
 6 | Python is a versatile programming language that is widely used in a variety of industries, including data science, artificial intelligence, web development, and more. As the demand for Python developers continues to grow, having a portfolio of Python projects can significantly increase your job prospects and marketability. This course with instructor Priya Mohan is designed to equip you with the skills and knowledge needed to create a portfolio of Python-based applications and tools that can be showcased to employers or used to bring your own ideas to life. It’s ideal for anyone looking to enhance their Python knowledge by completing hands-on projects or for those seeking to create interesting solutions from scratch for fun.
 7 | 
 8 | This course is integrated with GitHub Codespaces, an instant cloud developer environment that offers all the functionality of your favorite IDE without the need for any local machine setup. With GitHub Codespaces, you can get hands-on practice from any machine, at any time—all while using a tool that you’ll likely encounter in the workplace. Check out the “Using GitHub Codespaces with this course” video to learn how to get started.
 9 | 
10 | ## Instructions
11 | All of the course files are stored in the main branch. There are 2 folders in the main branch called "Begin" and "Finish". The Start folder contains semi-completed code files you can start working on while watching the LinkedIn Learning course. The Finish folder contains completed code files. The naming convention is `CHAPTER_#_ProjectName`. As an example, the first project is labeled "CH_1_NLP_ChatBot".
12 | 
13 | Happy Coding!
14 | 
15 | ### Instructor
16 | 
17 | Priya Mohan
18 | 
19 | Management Consultant, KPMG
20 | 
21 | Please follow me on LinkedIn: https://www.linkedin.com/in/priya123mohan
22 | 
23 | [0]: # (Replace these placeholder URLs with actual course URLs)
24 | 
25 | [lil-course-url]: https://www.linkedin.com/learning/advanced-python-projects-build-ai-applications
26 | [lil-thumbnail-url]: https://media.licdn.com/dms/image/D560DAQHIPR3VAGQGiQ/learning-public-crop_675_1200/0/1713466120470?e=2147483647&v=beta&t=on84QImWhMSkQjBq4E8OiW9BuJeJ7vP_Np1ZmCkhtzo
27 | 
28 | 


--------------------------------------------------------------------------------
/Begin/CH_3a_Challenge.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI, UploadFile, HTTPException
 2 | from fastapi.responses import JSONResponse
 3 | import os
 4 | from datetime import datetime
 5 | import boto3
 6 | import awswrangler as wr
 7 | from pymongo import MongoClient
 8 | import pymongo
 9 | 
10 | # Initialize FastAPI app
11 | 
12 | # Initialize S3 boto3 session and MongoDB client (adjust your connection as needed)
13 | aws_s3 = boto3.Session(
14 |     aws_access_key_id="",
15 |     aws_secret_access_key="",
16 |     region_name=""
17 | )
18 | 
19 | # install mongo db in codespace using below commands
20 | # sudo apt-get update
21 | # sudo apt-get install -y mongodb
22 | 
23 | # start the Database
24 | # sudo service mongodb start
25 | 
26 | client = MongoClient("mongodb://localhost:27017/")
27 | db = client["local"]
28 | 
29 | # Define your S3 bucket and path
30 | S3_BUCKET = ""
31 | S3_PATH = ""
32 | 
33 | @app.post("/uploadFile")
34 | async def uploadtos3(data_file: UploadFile):
35 | 
36 |     try:
37 |         file_name = data_file.filename.split("/")[-1]
38 |         
39 |         # Save the file temporarily to the local file system
40 | 
41 |         
42 |         # Get the file size
43 |         file_size = os.path.getsize(file_name)
44 |         
45 |         # Get the upload timestamp
46 |         upload_time = str(datetime.now())
47 |         
48 |         # Upload file to AWS S3
49 |        
50 |         
51 |         # Remove the local file after upload
52 |  
53 | 
54 |         # Prepare metadata to store in MongoDB
55 |         metadata = {
56 |             "filename": file_name,
57 |             "file_size": file_size,
58 |             "upload_time": upload_time,
59 |             "s3_path": f"s3://{S3_BUCKET}/{S3_PATH}{file_name}",
60 |         }
61 | 
62 |         # Insert file metadata into MongoDB
63 |         result = db["file_metadata"].insert_one(metadata)
64 |         
65 | 
66 |         # Return response
67 |         response = {
68 |             "filename": file_name,
69 |             "file_size": file_size,
70 |             "upload_time": upload_time,
71 |             "file_path": f"s3://{S3_BUCKET}/{S3_PATH}{file_name}",
72 |             "mongo_insert_status": result.acknowledged,
73 |         }
74 | 
75 |     except FileNotFoundError:
76 | 
77 |     
78 |     except Exception as e:
79 |         print(f"Error during file upload: {e}")
80 |         raise HTTPException(status_code=500, detail="Error during file upload")
81 | 
82 |     return JSONResponse(content=response)
83 |         print(f"Error during file upload: {e}")
84 |         raise HTTPException(status_code=500, detail="Error during file upload")
85 | 
86 |     return JSONResponse(content=response)
87 | 
88 | #Once completed use the command below to run the file
89 | #uvicorn Begin.CH_3a_Challenge:app --reload
90 | 


--------------------------------------------------------------------------------
/Finish/CH_0_OOP Refresher.py:
--------------------------------------------------------------------------------
 1 | # Introduction to Object-Oriented Programming with Python: Creating and Using Classes
 2 | 
 3 | # Here, we're defining a new "class" called "Car."
 4 | # Think of it as a blueprint for making cars.
 5 | # Class Definition
 6 | class Car:
 7 |     # This is a special function that gets called when a new car is created.
 8 |     # It's like the instructions for what to do when making a new car.
 9 |     # Constructor (Initialization) - __init__ method
10 |     def __init__(self, make, model):
11 |         # "self" refers to the car that's being created.
12 |         # We're storing the "make" and "model" information in the car.
13 | 
14 |         # Encapsulation: Attributes (make and model) are encapsulated within the class.
15 |         # Encapsulation hides the internal details of an object and exposes only what is necessary.
16 |         # Think of it as specifying the unique characteristics of each car.
17 |         self.make = make
18 |         self.model = model
19 | 
20 |     # This is a method, which is like a function inside the class.
21 |     # It's something the car can do, like starting its engine.
22 |     # Method - start_engine
23 |     def start_engine(self):
24 |         # Encapsulation: Accessing attributes through self.
25 |         # Analogously, this is like the car performing an action specific to itself.
26 |         print(f"The {self.make} {self.model}'s engine is running!")
27 | 
28 | # Creating instances (objects) of the Car class
29 | 
30 | # Inheritance: Car is a class that can be used to create objects (instances).
31 | # Abstraction: We create objects without worrying about the internal details of the Car class.
32 | # Think of creating individual cars without needing to understand the intricacies of car manufacturing.
33 | 
34 | # Creating the first car (object)
35 | car1 = Car("Toyota", "Camry")
36 | 
37 | # Creating the second car (object)
38 | car2 = Car("Ford", "Mustang")
39 | 
40 | # Accessing object attributes
41 | 
42 | # Encapsulation: Accessing object attributes (make and model) using dot notation.
43 | # It's like checking the make and model of each car without knowing how they are implemented internally.
44 | print(f"I have a {car1.make} {car1.model}.")
45 | print(f"I also own a {car2.make} {car2.model}.")
46 | 
47 | # Calling object methods
48 | 
49 | # Polymorphism: Different objects (car1 and car2) can perform the same action (start_engine).
50 | # Method Overriding: The start_engine method may be customized in subclasses.
51 | # This is like both cars can starting their engines, but each might do it in a way specific to its make and model.
52 | 
53 | # Method Call - start_engine
54 | car1.start_engine()  # Polymorphism: Car 1 starts its engine.
55 | car2.start_engine()  # Polymorphism: Car 2 starts its engine.
56 | # This is like instructing each car to engage its engine, and they follow their unique set of instructions.


--------------------------------------------------------------------------------
/Finish/CH_3a_Challenge.py:
--------------------------------------------------------------------------------
  1 | from fastapi import FastAPI, UploadFile, HTTPException
  2 | from fastapi.responses import JSONResponse
  3 | import os
  4 | from datetime import datetime
  5 | import boto3
  6 | import awswrangler as wr
  7 | from pymongo import MongoClient
  8 | import pymongo
  9 | 
 10 | # Initialize FastAPI app
 11 | app = FastAPI()
 12 | 
 13 | # Initialize S3 boto3 session and MongoDB client (adjust your connection as needed)
 14 | aws_s3 = boto3.Session(
 15 |     aws_access_key_id="",
 16 |     aws_secret_access_key="",
 17 |     region_name=""
 18 | )
 19 | 
 20 | # install mongo db in codespace using below commands
 21 | # sudo apt-get update
 22 | # sudo apt-get install -y mongodb
 23 | 
 24 | # start the Database
 25 | # sudo service mongodb start
 26 | 
 27 | client = MongoClient("mongodb://localhost:27017/")
 28 | db = client["local"]
 29 | 
 30 | # Define your S3 bucket and path
 31 | S3_BUCKET = ""
 32 | S3_PATH = ""
 33 | 
 34 | @app.post("/uploadFile")
 35 | async def uploadtos3(data_file: UploadFile):
 36 |     """
 37 |     Uploads a file to Amazon S3 storage and stores metadata in MongoDB.
 38 | 
 39 |     This route allows users to upload a file, which is saved temporarily, uploaded to Amazon S3,
 40 |     and then removed from the local file system. It returns the filename and S3 file path
 41 |     in the response JSON, while storing file metadata in MongoDB.
 42 | 
 43 |     Args:
 44 |         data_file (UploadFile): The file to be uploaded.
 45 | 
 46 |     Returns:
 47 |         JSONResponse: A JSON response containing the filename, file size, upload time, and S3 file path.
 48 | 
 49 |     Raises:
 50 |         HTTPException: If the file specified in `data_file` is not found (HTTP status code 404).
 51 |     """
 52 |     try:
 53 |         file_name = data_file.filename.split("/")[-1]
 54 |         
 55 |         # Save the file temporarily to the local file system
 56 |         with open(f"{file_name}", "wb") as out_file:
 57 |             content = await data_file.read()  # async read
 58 |             out_file.write(content)  # write to disk
 59 |         
 60 |         # Get the file size
 61 |         file_size = os.path.getsize(file_name)
 62 |         
 63 |         # Get the upload timestamp
 64 |         upload_time = str(datetime.now())
 65 |         
 66 |         # Upload file to AWS S3
 67 |         wr.s3.upload(
 68 |             local_file=file_name,
 69 |             path=f"s3://{S3_BUCKET}/{S3_PATH}{file_name}",
 70 |             boto3_session=aws_s3,
 71 |         )
 72 |         
 73 |         # Remove the local file after upload
 74 |         os.remove(file_name)
 75 | 
 76 |         # Prepare metadata to store in MongoDB
 77 |         metadata = {
 78 |             "filename": file_name,
 79 |             "file_size": file_size,
 80 |             "upload_time": upload_time,
 81 |             "s3_path": f"s3://{S3_BUCKET}/{S3_PATH}{file_name}",
 82 |         }
 83 | 
 84 |         # Insert file metadata into MongoDB
 85 |         result = db["file_metadata"].insert_one(metadata)
 86 |         
 87 | 
 88 |         # Return response
 89 |         response = {
 90 |             "filename": file_name,
 91 |             "file_size": file_size,
 92 |             "upload_time": upload_time,
 93 |             "file_path": f"s3://{S3_BUCKET}/{S3_PATH}{file_name}",
 94 |             "mongo_insert_status": result.acknowledged,
 95 |         }
 96 | 
 97 |     except FileNotFoundError:
 98 |         raise HTTPException(status_code=404, detail="File not found")
 99 |     
100 |     except Exception as e:
101 |         print(f"Error during file upload: {e}")
102 |         raise HTTPException(status_code=500, detail="Error during file upload")
103 | 
104 |     return JSONResponse(content=response)
105 | 


--------------------------------------------------------------------------------
/Begin/CH_4_DocuChat_Frontend.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | 
  4 | BACKEND_URL="https://psychic-adventure-p4w6pqpgxw9c6gp6-8000.app.github.dev"
  5 | 
  6 | def chat(user_input, data, session_id=None):
  7 |     """
  8 |     Sends a user input to a chat API and returns the response.
  9 | 
 10 |     Args:
 11 |         user_input (str): The user's input.
 12 |         data (str): The data source.
 13 |         session_id (str, optional): Session identifier. Defaults to None.
 14 | 
 15 |     Returns:
 16 |         tuple: A tuple containing the response answer and the updated session_id.
 17 |     """
 18 |     # API endpoint for chat
 19 |     url = BACKEND_URL+"/chat"
 20 | 
 21 |     # Print inputs for debugging
 22 |     print("user ", user_input)
 23 |     print("data", data)
 24 |     print("session_id", session_id)
 25 | 
 26 |     # Prepare payload for the API request
 27 |     if session_id is None:
 28 |         payload = json.dumps({"user_input": user_input, "data_source": data})
 29 |     else:
 30 |         payload = json.dumps(
 31 |             {"user_input": user_input, "data_source": data, "session_id": session_id}
 32 |         )
 33 | 
 34 |     # Set headers for the API request
 35 |     headers = {
 36 |         "accept": "application/json",
 37 |         "Content-Type": "application/json",
 38 |     }
 39 | 
 40 |     # Make a POST request to the chat API
 41 |     response = requests.request("POST", url, headers=headers, data=payload)
 42 | 
 43 |     # Print the API response for debugging
 44 |     print(response.json())
 45 | 
 46 |     # Check if the request was successful (status code 200)
 47 |     if response.status_code == 200:
 48 |         # Return the response answer and updated session_id
 49 |         return response.json()["response"]["answer"], response.json()["session_id"]
 50 | 
 51 | 
 52 | def upload_file(file_path):
 53 |     """
 54 |     Uploads a file to a specified API endpoint.
 55 | 
 56 |     Args:
 57 |         file_path (str): The path to the file to be uploaded.
 58 | 
 59 |     Returns:
 60 |         str: The file path returned by the API.
 61 |     """
 62 |     # Print file path for debugging
 63 |     print("path", file_path)
 64 | 
 65 |     # Extract the filename from the file path
 66 |     filename = file_path.split("\\")[-1]
 67 | 
 68 |     # API endpoint for file upload
 69 |     url = BACKEND_URL+"/uploadFile"
 70 |     print(url)
 71 | 
 72 |     # Prepare payload for the file upload request
 73 |     payload = {}
 74 |     files = [
 75 |         (
 76 |             "data_file",
 77 |             (filename, open(file_path, "rb"), "application/pdf"),
 78 |         )
 79 |     ]
 80 | 
 81 |     # Set headers for the file upload request
 82 |     headers = {"accept": "application/json"}
 83 | 
 84 |     # Make a POST request to upload the file
 85 |     response = requests.request("POST", url, headers=headers, data=payload, files=files)
 86 |     print(response.status_code)
 87 | 
 88 |     # Check if the file upload was successful (status code 200)
 89 |     if response.status_code == 200:
 90 |         # Print the API response for debugging
 91 |         print(response.json())
 92 |         # Return the file path returned by the API
 93 |         return response.json()["file_path"]
 94 | 
 95 | 
 96 | import streamlit as st
 97 | import time
 98 | import os
 99 | 
100 | # Set page configuration for the Streamlit app
101 | st.set_page_config(page_title="Document Chat", page_icon="📕", layout="wide")
102 | 
103 | # Initialize chat history and session variables
104 | if "messages" not in st.session_state:
105 |     st.session_state.messages = []
106 | if "sessionid" not in st.session_state:
107 |     st.session_state.sessionid = None
108 | 
109 | # Allow user to upload a file (PDF or DOCX)
110 | data_file = st.file_uploader(
111 |     label="Input file", accept_multiple_files=False, type=["pdf", "docx"]
112 | )
113 | st.divider()
114 | 
115 | # Process the uploaded file if available
116 | if data_file is not None:
117 |     # Save the file temporarily
118 |     file_path = os.path.join(os.getcwd(),"temp", data_file.name)
119 |     with open(file_path, "wb") as f:
120 |         f.write(data_file.getbuffer())
121 | 
122 |     # Upload the file to a specified API endpoint
123 |     s3_upload_url = upload_file(file_path=file_path)
124 |     
125 |     s3_upload_url=s3_upload_url.split("/")[-1
126 |                                            ]
127 | 
128 |     # Display chat messages from history on app rerun
129 |     for message in st.session_state.messages:
130 |         with st.chat_message(message["role"]):
131 |             st.markdown(message["content"])
132 | 
133 |     # Accept user input
134 |     if prompt := st.chat_input("You can ask any question"):
135 |         # Add user message to chat history
136 |         st.session_state.messages.append({"role": "user", "content": prompt})
137 |         # Display user message in chat message container
138 |         with st.chat_message("user"):
139 |             st.markdown(prompt)
140 | 
141 |         # Display assistant response in chat message container
142 |         with st.chat_message("assistant"):
143 |             if st.session_state.sessionid is None:
144 |                 # If no existing session ID, start a new session
145 |                 assistant_response, session_id = chat(
146 |                     prompt, data=s3_upload_url, session_id=None
147 |                 )
148 |                 st.session_state.sessionid = session_id
149 |             else:
150 |                 # If existing session ID, continue the session
151 |                 assistant_response, session_id = chat(
152 |                     prompt, session_id=st.session_state.sessionid, data=s3_upload_url
153 |                 )
154 | 
155 |             message_placeholder = st.empty()
156 |             full_response = ""
157 | 
158 |             # Simulate stream of response with milliseconds delay
159 |             for chunk in assistant_response.split():
160 |                 full_response += chunk + " "
161 |                 time.sleep(0.05)
162 | 
163 |                 # Add a blinking cursor to simulate typing
164 |                 message_placeholder.markdown(full_response + "▌")
165 | 
166 |             message_placeholder.markdown(full_response)
167 | 
168 |         # Add assistant response to chat history
169 |         st.session_state.messages.append(
170 |             {"role": "assistant", "content": full_response}
171 |         )


--------------------------------------------------------------------------------
/Finish/CH_4_DocuChat_Frontend.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | 
  4 | BACKEND_URL="https://psychic-adventure-p4w6pqpgxw9c6gp6-8000.app.github.dev"
  5 | 
  6 | def chat(user_input, data, session_id=None):
  7 |     """
  8 |     Sends a user input to a chat API and returns the response.
  9 | 
 10 |     Args:
 11 |         user_input (str): The user's input.
 12 |         data (str): The data source.
 13 |         session_id (str, optional): Session identifier. Defaults to None.
 14 | 
 15 |     Returns:
 16 |         tuple: A tuple containing the response answer and the updated session_id.
 17 |     """
 18 |     # API endpoint for chat
 19 |     url = BACKEND_URL+"/chat"
 20 | 
 21 |     # Print inputs for debugging
 22 |     print("user ", user_input)
 23 |     print("data", data)
 24 |     print("session_id", session_id)
 25 | 
 26 |     # Prepare payload for the API request
 27 |     if session_id is None:
 28 |         payload = json.dumps({"user_input": user_input, "data_source": data})
 29 |     else:
 30 |         payload = json.dumps(
 31 |             {"user_input": user_input, "data_source": data, "session_id": session_id}
 32 |         )
 33 | 
 34 |     # Set headers for the API request
 35 |     headers = {
 36 |         "accept": "application/json",
 37 |         "Content-Type": "application/json",
 38 |     }
 39 | 
 40 |     # Make a POST request to the chat API
 41 |     response = requests.request("POST", url, headers=headers, data=payload)
 42 | 
 43 |     # Print the API response for debugging
 44 |     print(response.json())
 45 | 
 46 |     # Check if the request was successful (status code 200)
 47 |     if response.status_code == 200:
 48 |         # Return the response answer and updated session_id
 49 |         return response.json()["response"]["answer"], response.json()["session_id"]
 50 | 
 51 | 
 52 | def upload_file(file_path):
 53 |     """
 54 |     Uploads a file to a specified API endpoint.
 55 | 
 56 |     Args:
 57 |         file_path (str): The path to the file to be uploaded.
 58 | 
 59 |     Returns:
 60 |         str: The file path returned by the API.
 61 |     """
 62 |     # Print file path for debugging
 63 |     print("path", file_path)
 64 | 
 65 |     # Extract the filename from the file path
 66 |     filename = file_path.split("\\")[-1]
 67 | 
 68 |     # API endpoint for file upload
 69 |     url = BACKEND_URL+"/uploadFile"
 70 |     print(url)
 71 | 
 72 |     # Prepare payload for the file upload request
 73 |     payload = {}
 74 |     files = [
 75 |         (
 76 |             "data_file",
 77 |             (filename, open(file_path, "rb"), "application/pdf"),
 78 |         )
 79 |     ]
 80 | 
 81 |     # Set headers for the file upload request
 82 |     headers = {"accept": "application/json"}
 83 | 
 84 |     # Make a POST request to upload the file
 85 |     response = requests.request("POST", url, headers=headers, data=payload, files=files)
 86 |     print(response.status_code)
 87 | 
 88 |     # Check if the file upload was successful (status code 200)
 89 |     if response.status_code == 200:
 90 |         # Print the API response for debugging
 91 |         print(response.json())
 92 |         # Return the file path returned by the API
 93 |         return response.json()["file_path"]
 94 | 
 95 | 
 96 | import streamlit as st
 97 | import time
 98 | import os
 99 | 
100 | # Set page configuration for the Streamlit app
101 | st.set_page_config(page_title="Document Chat", page_icon="📕", layout="wide")
102 | 
103 | # Initialize chat history and session variables
104 | if "messages" not in st.session_state:
105 |     st.session_state.messages = []
106 | if "sessionid" not in st.session_state:
107 |     st.session_state.sessionid = None
108 | 
109 | # Allow user to upload a file (PDF or DOCX)
110 | data_file = st.file_uploader(
111 |     label="Input file", accept_multiple_files=False, type=["pdf", "docx"]
112 | )
113 | st.divider()
114 | 
115 | # Process the uploaded file if available
116 | if data_file is not None:
117 |     # Save the file temporarily
118 |     file_path = os.path.join(os.getcwd(),"temp", data_file.name)
119 |     with open(file_path, "wb") as f:
120 |         f.write(data_file.getbuffer())
121 | 
122 |     # Upload the file to a specified API endpoint
123 |     s3_upload_url = upload_file(file_path=file_path)
124 |     
125 |     s3_upload_url=s3_upload_url.split("/")[-1
126 |                                            ]
127 | 
128 |     # Display chat messages from history on app rerun
129 |     for message in st.session_state.messages:
130 |         with st.chat_message(message["role"]):
131 |             st.markdown(message["content"])
132 | 
133 |     # Accept user input
134 |     if prompt := st.chat_input("You can ask any question"):
135 |         # Add user message to chat history
136 |         st.session_state.messages.append({"role": "user", "content": prompt})
137 |         # Display user message in chat message container
138 |         with st.chat_message("user"):
139 |             st.markdown(prompt)
140 | 
141 |         # Display assistant response in chat message container
142 |         with st.chat_message("assistant"):
143 |             if st.session_state.sessionid is None:
144 |                 # If no existing session ID, start a new session
145 |                 assistant_response, session_id = chat(
146 |                     prompt, data=s3_upload_url, session_id=None
147 |                 )
148 |                 st.session_state.sessionid = session_id
149 |             else:
150 |                 # If existing session ID, continue the session
151 |                 assistant_response, session_id = chat(
152 |                     prompt, session_id=st.session_state.sessionid, data=s3_upload_url
153 |                 )
154 | 
155 |             message_placeholder = st.empty()
156 |             full_response = ""
157 | 
158 |             # Simulate stream of response with milliseconds delay
159 |             for chunk in assistant_response.split():
160 |                 full_response += chunk + " "
161 |                 time.sleep(0.05)
162 | 
163 |                 # Add a blinking cursor to simulate typing
164 |                 message_placeholder.markdown(full_response + "▌")
165 | 
166 |             message_placeholder.markdown(full_response)
167 | 
168 |         # Add assistant response to chat history
169 |         st.session_state.messages.append(
170 |             {"role": "assistant", "content": full_response}
171 |         )


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | LinkedIn Learning Exercise Files License Agreement
  2 | ==================================================
  3 | 
  4 | This License Agreement (the "Agreement") is a binding legal agreement
  5 | between you (as an individual or entity, as applicable) and LinkedIn
  6 | Corporation (“LinkedIn”). By downloading or using the LinkedIn Learning
  7 | exercise files in this repository (“Licensed Materials”), you agree to
  8 | be bound by the terms of this Agreement. If you do not agree to these
  9 | terms, do not download or use the Licensed Materials. 
 10 | 
 11 | 1. License.
 12 | - a. Subject to the terms of this Agreement, LinkedIn hereby grants LinkedIn
 13 | members during their LinkedIn Learning subscription a non-exclusive,
 14 | non-transferable copyright license, for internal use only, to 1) make a
 15 | reasonable number of copies of the Licensed Materials, and 2) make
 16 | derivative works of the Licensed Materials for the sole purpose of
 17 | practicing skills taught in LinkedIn Learning courses.
 18 | - b. Distribution. Unless otherwise noted in the Licensed Materials, subject
 19 | to the terms of this Agreement, LinkedIn hereby grants LinkedIn members
 20 | with a LinkedIn Learning subscription a non-exclusive, non-transferable
 21 | copyright license to distribute the Licensed Materials, except the
 22 | Licensed Materials may not be included in any product or service (or
 23 | otherwise used) to instruct or educate others.
 24 | 
 25 | 2. Restrictions and Intellectual Property. 
 26 | - a. You may not to use, modify, copy, make derivative works of, publish,
 27 | distribute, rent, lease, sell, sublicense, assign or otherwise transfer the
 28 | Licensed Materials, except as expressly set forth above in Section 1. 
 29 | - b. Linkedin (and its licensors) retains its intellectual property rights
 30 | in the Licensed Materials. Except as expressly set forth in Section 1,
 31 | LinkedIn grants no licenses.
 32 | - c. You indemnify LinkedIn and its licensors and affiliates for i) any
 33 | alleged infringement or misappropriation of any intellectual property rights
 34 | of any third party based on modifications you make to the Licensed Materials,
 35 | ii) any claims arising from your use or distribution of all or part of the
 36 | Licensed Materials and iii) a breach of this Agreement. You will defend, hold
 37 | harmless, and indemnify LinkedIn and its affiliates (and our and their
 38 | respective employees, shareholders, and directors) from any claim or action
 39 | brought by a third party, including all damages, liabilities, costs and
 40 | expenses, including reasonable attorneys’ fees, to the extent resulting from,
 41 | alleged to have resulted from, or in connection with: (a) your breach of your
 42 | obligations herein; or (b) your use or distribution of any Licensed Materials.
 43 | 
 44 | 3. Open source. This code may include open source software, which may be
 45 | subject to other license terms as provided in the files. 
 46 |  
 47 | 4. Warranty Disclaimer. LINKEDIN PROVIDES THE LICENSED MATERIALS ON AN “AS IS”
 48 | AND “AS AVAILABLE” BASIS. LINKEDIN MAKES NO REPRESENTATION OR WARRANTY,
 49 | WHETHER EXPRESS OR IMPLIED, ABOUT THE LICENSED MATERIALS, INCLUDING ANY
 50 | REPRESENTATION THAT THE LICENSED MATERIALS WILL BE FREE OF ERRORS, BUGS OR
 51 | INTERRUPTIONS, OR THAT THE LICENSED MATERIALS ARE ACCURATE, COMPLETE OR
 52 | OTHERWISE VALID. TO THE FULLEST EXTENT PERMITTED BY LAW, LINKEDIN AND ITS
 53 | AFFILIATES DISCLAIM ANY IMPLIED OR STATUTORY WARRANTY OR CONDITION, INCLUDING
 54 | ANY IMPLIED WARRANTY OR CONDITION OF MERCHANTABILITY OR FITNESS FOR A
 55 | PARTICULAR PURPOSE, AVAILABILITY, SECURITY, TITLE AND/OR NON-INFRINGEMENT.
 56 | YOUR USE OF THE LICENSED MATERIALS IS AT YOUR OWN DISCRETION AND RISK, AND
 57 | YOU WILL BE SOLELY RESPONSIBLE FOR ANY DAMAGE THAT RESULTS FROM USE OF THE
 58 | LICENSED MATERIALS TO YOUR COMPUTER SYSTEM OR LOSS OF DATA.  NO ADVICE OR
 59 | INFORMATION, WHETHER ORAL OR WRITTEN, OBTAINED BY YOU FROM US OR THROUGH OR
 60 | FROM THE LICENSED MATERIALS WILL CREATE ANY WARRANTY OR CONDITION NOT
 61 | EXPRESSLY STATED IN THESE TERMS.
 62 | 
 63 | 5. Limitation of Liability. LINKEDIN SHALL NOT BE LIABLE FOR ANY INDIRECT,
 64 | INCIDENTAL, SPECIAL, PUNITIVE, CONSEQUENTIAL OR EXEMPLARY DAMAGES, INCLUDING
 65 | BUT NOT LIMITED TO, DAMAGES FOR LOSS OF PROFITS, GOODWILL, USE, DATA OR OTHER
 66 | INTANGIBLE LOSSES . IN NO EVENT WILL LINKEDIN'S AGGREGATE LIABILITY TO YOU
 67 | EXCEED $100. THIS LIMITATION OF LIABILITY SHALL:
 68 | - i. APPLY REGARDLESS OF WHETHER (A) YOU BASE YOUR CLAIM ON CONTRACT, TORT,
 69 | STATUTE, OR ANY OTHER LEGAL THEORY, (B) WE KNEW OR SHOULD HAVE KNOWN ABOUT
 70 | THE POSSIBILITY OF SUCH DAMAGES, OR (C) THE LIMITED REMEDIES PROVIDED IN THIS
 71 | SECTION FAIL OF THEIR ESSENTIAL PURPOSE; AND
 72 | - ii. NOT APPLY TO ANY DAMAGE THAT LINKEDIN MAY CAUSE YOU INTENTIONALLY OR
 73 | KNOWINGLY IN VIOLATION OF THESE TERMS OR APPLICABLE LAW, OR AS OTHERWISE
 74 | MANDATED BY APPLICABLE LAW THAT CANNOT BE DISCLAIMED IN THESE TERMS.
 75 | 
 76 | 6. Termination. This Agreement automatically terminates upon your breach of
 77 | this Agreement or termination of your LinkedIn Learning subscription. On
 78 | termination, all licenses granted under this Agreement will terminate
 79 | immediately and you will delete the Licensed Materials. Sections 2-7 of this
 80 | Agreement survive any termination of this Agreement. LinkedIn may discontinue
 81 | the availability of some or all of the Licensed Materials at any time for any
 82 | reason.
 83 | 
 84 | 7. Miscellaneous. This Agreement will be governed by and construed in
 85 | accordance with the laws of the State of California without regard to conflict
 86 | of laws principles. The exclusive forum for any disputes arising out of or
 87 | relating to this Agreement shall be an appropriate federal or state court
 88 | sitting in the County of Santa Clara, State of California. If LinkedIn does
 89 | not act to enforce a breach of this Agreement, that does not mean that
 90 | LinkedIn has waived its right to enforce this Agreement. The Agreement does
 91 | not create a partnership, agency relationship, or joint venture between the
 92 | parties.  Neither party has the power or authority to bind the other or to
 93 | create any obligation or responsibility on behalf of the other. You may not,
 94 | without LinkedIn’s prior written consent, assign or delegate any rights or
 95 | obligations under these terms, including in connection with a change of
 96 | control. Any purported assignment and delegation shall be ineffective. The
 97 | Agreement shall bind and inure to the benefit of the parties, their respective
 98 | successors and permitted assigns. If any provision of the Agreement is
 99 | unenforceable, that provision will be modified to render it enforceable to the
100 | extent possible to give effect to the parties’ intentions and the remaining
101 | provisions will not be affected. This Agreement is the only agreement between
102 | you and LinkedIn regarding the Licensed Materials, and supersedes all prior
103 | agreements relating to the Licensed Materials.  
104 | 
105 | Last Updated: March 2019
106 | 


--------------------------------------------------------------------------------
/Finish/CH_3_DocuChat_Backend.py:
--------------------------------------------------------------------------------
  1 | from pydantic import BaseModel
  2 | import pymongo
  3 | # Import traceback for error handling
  4 | import traceback
  5 | 
  6 | # Import os and sys for system-related operations
  7 | import os, sys
  8 | import traceback  # Import traceback for error handling
  9 | from fastapi import (
 10 |     FastAPI,
 11 |     UploadFile,
 12 |     status,
 13 |     HTTPException,
 14 | )  # Import FastAPI components for building the web application
 15 | from fastapi.responses import JSONResponse  # Import JSONResponse for returning JSON responses
 16 | from fastapi.middleware.cors import CORSMiddleware  # Import CORS middleware to handle Cross-Origin Resource Sharing
 17 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 18 | from langchain_openai import OpenAIEmbeddings
 19 | from langchain_community.vectorstores import FAISS
 20 | # from langchain_community.document_loaders import S3FileLoader
 21 | from langchain_community.document_loaders import Docx2txtLoader,PyPDFLoader
 22 | 
 23 | 
 24 | from langchain_community.callbacks import get_openai_callback
 25 | from langchain.chains import ConversationalRetrievalChain
 26 | 
 27 | from langchain_openai import ChatOpenAI
 28 | import gc
 29 | 
 30 | import urllib.parse
 31 | import awswrangler as wr  # Import AWS Wrangler for working with AWS services
 32 | 
 33 | import boto3  # Import the boto3 library for interacting with AWS services
 34 | 
 35 | # Import the os module for system-related operations
 36 | 
 37 | # Check if the operating system is Windows
 38 | if os.name == "nt":  # Windows
 39 |     # If it's Windows, import the `load_dotenv` function from the `dotenv` library
 40 |     from dotenv import load_dotenv
 41 | 
 42 |     # Load environment variables from a `.secrets.env` file (used for local development)
 43 |     load_dotenv(".secrets.env")
 44 | 
 45 | # Retrieve and assign environment variables to variables
 46 | # S3_KEY = os.environ.get("S3_KEY")  # AWS S3 access key
 47 | # S3_SECRET = os.environ.get("S3_SECRET")  # AWS S3 secret access key
 48 | # S3_BUCKET = os.environ.get("S3_BUCKET")  # AWS S3 bucket name
 49 | # S3_REGION = os.environ.get("S3_REGION")  # AWS S3 region
 50 | # OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")  # OpenAI API key
 51 | # MONGO_URL = os.environ.get("MONGO_URL")  # MongoDB connection URL
 52 | # S3_PATH = os.environ.get("S3_PATH")  # AWS S3 pathi
 53 | 
 54 | os.environ['OPENAI_API_KEY']=""
 55 | S3_KEY=""
 56 | S3_SECRET=""
 57 | S3_BUCKET=""
 58 | S3_REGION=""
 59 | S3_PATH=""
 60 | 
 61 | 
 62 | try:
 63 |     MONGO_URL="Add your credentials"
 64 | 
 65 |     # Connect to the MongoDB using the provided MONGO_URL
 66 |     client = pymongo.MongoClient(MONGO_URL, uuidRepresentation="standard")
 67 |     # Access the "chat_with_doc" database
 68 |     db = client["chat_with_doc"]
 69 |     # Access the "chat-history" collection within the database
 70 |     conversationcol = db["chat-history"]
 71 | 
 72 |     # Create an index on the "session_id" field, ensuring uniqueness
 73 |     conversationcol.create_index([("session_id")], unique=True)
 74 | except:
 75 |     # Handle exceptions and print detailed error information
 76 |     print(traceback.format_exc())
 77 | 
 78 |     exc_type, exc_obj, exc_tb = sys.exc_info()
 79 |     fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
 80 |     # Print information about the exception type, filename, and line number
 81 |     print(exc_type, fname, exc_tb.tb_lineno)
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | # Import the necessary modules and libraries
 88 | 
 89 | 
 90 | class ChatMessageSent(BaseModel):
 91 |     session_id: str = None
 92 |     user_input: str
 93 |     data_source: str
 94 | 
 95 | def get_response(
 96 |     file_name: str,
 97 |     session_id: str,
 98 |     query: str,
 99 |     model: str = "gpt-3.5-turbo-16k",
100 |     temperature: float = 0,
101 | ):
102 |     print("file name is ", file_name)
103 |     file_name=file_name.split("/")[-1]
104 |     """
105 |     Generate a response using a conversational model.
106 | 
107 |     This function generates a response using a conversational model like GPT-3.5 Turbo. It takes
108 |     a file name to load data, a session ID to track the conversation, a query or question, and
109 |     optional parameters for model selection and temperature control.
110 | 
111 |     Args:
112 |         file_name (str): The name of the file to load data from.
113 |         session_id (str): The session ID for tracking the conversation history.
114 |         query (str): The user's query or question to be used in the conversation.
115 |         model (str, optional): The model name to use (default is "gpt-3.5-turbo-16k").
116 |         temperature (float, optional): Temperature parameter for response randomness (default is 0).
117 | 
118 |     Returns:
119 |         dict: A dictionary containing the generated response and other information.
120 |             The response is stored under the "message" key in the dictionary.
121 | 
122 |     Note:
123 |         This function relies on various components such as OpenAIEmbeddings, S3FileLoader,
124 |         RecursiveCharacterTextSplitter, and ConversationalRetrievalChain. It prints
125 |         information about token usage and cost during the model interaction.
126 | 
127 | 
128 |     """
129 |     embeddings = OpenAIEmbeddings()  # load embeddings
130 |     # download file from s3
131 |     wr.s3.download(path=f"s3://docchat/documents/{file_name}",local_file=file_name,boto3_session=aws_s3)
132 | 
133 |     # loader = S3FileLoader(
134 |     #     bucket=S3_BUCKET,
135 |     #     key=S3_PATH + file_name.split("/")[-1],
136 |     #     aws_access_key_id=S3_KEY,
137 |     #     aws_secret_access_key=S3_SECRET,
138 |     # )
139 |     if file_name.endswith(".docx"):
140 |         loader=Docx2txtLoader(file_path=file_name.split("/")[-1])
141 |     else:
142 |         loader = PyPDFLoader(file_name)
143 | 
144 |     # 1.load data
145 |     data = loader.load()
146 |     # 2.split data so it can fit gpt token limit
147 |     print("splitting ..")
148 |     text_splitter = RecursiveCharacterTextSplitter(
149 |         chunk_size=1000, chunk_overlap=0, separators=["\n", " ", ""]
150 |     )
151 | 
152 |     all_splits = text_splitter.split_documents(data)
153 |     # 3. store data in vector db to conduct searc
154 |     vectorstore = FAISS.from_documents(all_splits, embeddings)
155 |     # 4. Init openai
156 |     llm = ChatOpenAI(model_name=model, temperature=temperature)
157 | 
158 |     # 5. pass the data to openai chain using vector db
159 |     qa_chain = ConversationalRetrievalChain.from_llm(
160 |         llm,
161 |         retriever=vectorstore.as_retriever(),
162 |     )
163 |     # use the function to determine tokens used
164 |     with get_openai_callback() as cb:
165 |         answer = qa_chain(
166 |             {
167 |                 "question": query,  # user query
168 |                 "chat_history": load_memory_to_pass(
169 |                     session_id=session_id
170 |                 ),  # pass chat history for context
171 |             }
172 |         )
173 |         print(f"Total Tokens: {cb.total_tokens}")
174 |         print(f"Prompt Tokens: {cb.prompt_tokens}")
175 |         print(f"Completion Tokens: {cb.completion_tokens}")
176 |         print(f"Total Cost (USD): ${cb.total_cost}")
177 |         answer["total_tokens_used"] = cb.total_tokens
178 |     gc.collect()  # collect garbage from memory
179 |     return answer
180 | import uuid
181 | from typing import List
182 | 
183 | 
184 | def load_memory_to_pass(session_id: str):
185 |     """
186 |     Load conversation history for a given session ID.
187 | 
188 |     Args:
189 |         session_id (str): The unique session ID to retrieve the conversation history.
190 | 
191 |     Returns:
192 |         List: A list of conversation history as a list of tuples (user_message, bot_response).
193 | 
194 |     """
195 |     data = conversationcol.find_one(
196 |         {"session_id": session_id}
197 |     )  # find the document with the session id
198 |     history = []  # create empty array (incase we do not have any history)
199 |     if data:  # check if data is not None
200 |         data = data["conversation"]  # get the conversation field
201 | 
202 |         for x in range(0, len(data), 2):  # iterate over the field
203 |             history.extend(
204 |                 [(data[x], data[x + 1])]
205 |             )  # our history is expected format is [(human_message,ai_message)] , the even index has human message and odd has ai response
206 |     print(history)
207 |     return history  # return history
208 | 
209 | 
210 | def get_session() -> str:
211 |     """
212 |     Generate a new session ID.
213 | 
214 |     Returns:
215 |         str: A newly generated session ID as a string.
216 |     """
217 |     return str(uuid.uuid4())
218 | 
219 | 
220 | def add_session_history(session_id: str, new_values: List):
221 |     """
222 |     Add conversation history to an existing session or create a new session.
223 | 
224 |     Args:
225 |         session_id (str): The session ID to which the conversation history will be added.
226 |         new_values (List): A list of conversation history to be added to the session.
227 | 
228 |     """
229 |     document = conversationcol.find_one(
230 |         {"session_id": session_id}
231 |     )  # find the document with the session id
232 |     if document:  # check if data is not None
233 |         # Extract the conversation list
234 |         conversation = document["conversation"]
235 | 
236 |         # Append new values
237 |         conversation.extend(new_values)
238 | 
239 |         # Update the document with the modified conversation list (for old session), we use update_one
240 |         conversationcol.update_one(
241 |             {"session_id": session_id}, {"$set": {"conversation": conversation}}
242 |         )
243 |     else:
244 |         conversationcol.insert_one(
245 |             {
246 |                 "session_id": session_id,
247 |                 "conversation": new_values,
248 |             }  # to initiate a history under a newsession, note we uses insert_one
249 |         )
250 | 
251 | 
252 | # Create a FastAPI application
253 | app = FastAPI()
254 | 
255 | # Add CORS middleware to handle Cross-Origin Resource Sharing
256 | app.add_middleware(
257 |     CORSMiddleware,
258 |     allow_origins=["*"],  # Allow requests from any origin Ex, https://www.facebook.com
259 |     allow_credentials=False,  # Allow sending credentials (e.g., cookies)
260 |     allow_methods=["*"],  # Allow all HTTP methods
261 |     allow_headers=["*"],  # Allow all HTTP headers
262 | )
263 | 
264 | # Create an AWS S3 session with provided access credentials
265 | aws_s3 = boto3.Session(
266 |     aws_access_key_id=S3_KEY,  # Set the AWS access key ID
267 |     aws_secret_access_key=S3_SECRET,  # Set the AWS secret access key
268 |     region_name="us-east-2",  # Set the AWS region
269 | )
270 | 
271 | 
272 | @app.post("/chat")
273 | async def create_chat_message(
274 |     chats: ChatMessageSent,
275 | ):
276 |     """
277 |     Create a chat message and obtain a response based on user input and session.
278 | 
279 |     This route allows users to send chat messages, and it returns responses based on
280 |     the provided input and the associated session. If a session ID is not provided
281 |     in the request, a new session is created. The conversation history is updated, and
282 |     the response, along with the session ID, is returned.
283 | 
284 |     Args:
285 |         chats (ChatMessageSent): A Pydantic model representing the chat message, including
286 |         session ID, user input, and data source.
287 | 
288 |     Returns:
289 |         JSONResponse: A JSON response containing the response message and the session ID.
290 | 
291 |     Raises:
292 |         HTTPException: If an unexpected error occurs during the chat message processing,
293 |         it returns a 204 NO CONTENT HTTP status with an "error" detail.
294 |     """
295 |     try:
296 |         if chats.session_id is None:
297 |             session_id = get_session()
298 | 
299 |             payload = ChatMessageSent(
300 |                 session_id=session_id,
301 |                 user_input=chats.user_input,
302 |                 data_source=chats.data_source,
303 |             )
304 |             payload = payload.model_dump()
305 | 
306 |             response = get_response(
307 |                 file_name=payload.get("data_source"),
308 |                 session_id=payload.get("session_id"),
309 |                 query=payload.get("user_input"),
310 |             )
311 | 
312 |             add_session_history(
313 |                 session_id=session_id,
314 |                 new_values=[payload.get("user_input"), response["answer"]],
315 |             )
316 | 
317 |             return JSONResponse(
318 |                 content={
319 |                     "response": response,
320 |                     "session_id": str(session_id),
321 |                 }
322 |             )
323 | 
324 |         else:
325 |             payload = ChatMessageSent(
326 |                 session_id=str(chats.session_id),
327 |                 user_input=chats.user_input,
328 |                 data_source=chats.data_source,
329 |             )
330 |             payload = payload.dict()
331 | 
332 |             response = get_response(
333 |                 file_name=payload.get("data_source"),
334 |                 session_id=payload.get("session_id"),
335 |                 query=payload.get("user_input"),
336 |             )
337 | 
338 |             add_session_history(
339 |                 session_id=str(chats.session_id),
340 |                 new_values=[payload.get("user_input"), response["answer"]],
341 |             )
342 | 
343 |             return JSONResponse(
344 |                 content={
345 |                     "response": response,
346 |                     "session_id": str(chats.session_id),
347 |                 }
348 |             )
349 |     except Exception:
350 |         print(traceback.format_exc())
351 | 
352 |         exc_type, exc_obj, exc_tb = sys.exc_info()
353 |         fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
354 |         print(exc_type, fname, exc_tb.tb_lineno)
355 |         raise HTTPException(status_code=status.HTTP_204_NO_CONTENT, detail="error")
356 | 
357 | 
358 | @app.post("/uploadFile")
359 | async def uploadtos3(data_file: UploadFile):
360 |     """
361 |     Uploads a file to Amazon S3 storage.
362 | 
363 |     This route allows users to upload a file, which is saved temporarily, uploaded to Amazon S3,
364 |     and then removed from the local file system. It returns the filename and S3 file path
365 |     in the response JSON.
366 | 
367 |     Args:
368 |         data_file (UploadFile): The file to be uploaded.
369 | 
370 |     Returns:
371 |         JSONResponse: A JSON response containing the filename and S3 file path.
372 | 
373 |     Raises:
374 |         HTTPException: If the file specified in `data_file` is not found (HTTP status code 404).
375 |     """
376 |     print(data_file.filename.split("/")[-1])
377 |     try:
378 |         with open(f"{data_file.filename}", "wb") as out_file:
379 |             content = await data_file.read()  # async read
380 |             out_file.write(content)  # async write
381 |         wr.s3.upload(
382 |             local_file=data_file.filename,
383 |             path=f"s3://{S3_BUCKET}/{S3_PATH}{data_file.filename.split('/')[-1]}",
384 |             boto3_session=aws_s3,
385 |         )
386 |         os.remove(data_file.filename)
387 |         response = {
388 |             "filename": data_file.filename.split("/")[-1],
389 |             "file_path": f"s3://{S3_BUCKET}/{S3_PATH}{data_file.filename.split('/')[-1]}",
390 |         }
391 | 
392 |     except FileNotFoundError:
393 |         raise HTTPException(status_code=404, detail="Item not found")
394 | 
395 |     return JSONResponse(content=response)
396 | 
397 | 
398 | import uvicorn
399 | if __name__=="__main__":
400 |     uvicorn.run(app)
401 | 


--------------------------------------------------------------------------------
/Begin/CH_3_DocuChat_Backend.py:
--------------------------------------------------------------------------------
  1 | from pydantic import BaseModel
  2 | import pymongo
  3 | # Import traceback for error handling
  4 | import traceback
  5 | 
  6 | # Import os and sys for system-related operations
  7 | import os, sys
  8 | import traceback  # Import traceback for error handling
  9 | from fastapi import (
 10 |     FastAPI,
 11 |     UploadFile,
 12 |     status,
 13 |     HTTPException,
 14 | )  # Import FastAPI components for building the web application
 15 | from fastapi.responses import JSONResponse  # Import JSONResponse for returning JSON responses
 16 | from fastapi.middleware.cors import CORSMiddleware  # Import CORS middleware to handle Cross-Origin Resource Sharing
 17 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 18 | from langchain_openai import OpenAIEmbeddings
 19 | from langchain_community.vectorstores import FAISS
 20 | # from langchain_community.document_loaders import S3FileLoader
 21 | from langchain_community.document_loaders import Docx2txtLoader,PyPDFLoader
 22 | 
 23 | 
 24 | from langchain_community.callbacks import get_openai_callback
 25 | from langchain.chains import ConversationalRetrievalChain
 26 | 
 27 | from langchain_openai import ChatOpenAI
 28 | import gc
 29 | 
 30 | import urllib.parse
 31 | import awswrangler as wr  # Import AWS Wrangler for working with AWS services
 32 | 
 33 | import boto3  # Import the boto3 library for interacting with AWS services
 34 | 
 35 | # Import the OS module for system-related operations
 36 | 
 37 | # Check if the operating system is Windows
 38 | if os.name == "nt":  # Windows
 39 |     # If it's Windows, import the `load_dotenv` function from the `dotenv` library
 40 |     from dotenv import load_dotenv
 41 | 
 42 |     # Load environment variables from a `.secrets.env` file (used for local development)
 43 |     load_dotenv(".secrets.env")
 44 | 
 45 | # Retrieve and assign environment variables to variables
 46 | # S3_KEY = os.environ.get("S3_KEY")  # AWS S3 access key
 47 | # S3_SECRET = os.environ.get("S3_SECRET")  # AWS S3 secret access key
 48 | # S3_BUCKET = os.environ.get("S3_BUCKET")  # AWS S3 bucket name
 49 | # S3_REGION = os.environ.get("S3_REGION")  # AWS S3 region
 50 | # OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")  # OpenAI API key
 51 | # MONGO_URL = os.environ.get("MONGO_URL")  # MongoDB connection URL
 52 | # S3_PATH = os.environ.get("S3_PATH")  # AWS S3 pathi
 53 | 
 54 | os.environ['OPENAI_API_KEY']=""
 55 | S3_KEY=""
 56 | S3_SECRET=""
 57 | S3_BUCKET=""
 58 | S3_REGION=""
 59 | S3_PATH=""
 60 | 
 61 | 
 62 | try:
 63 |     MONGO_URL="Add your credentials"
 64 | 
 65 |     # Connect to the MongoDB using the provided MONGO_URL
 66 |     client = pymongo.MongoClient(MONGO_URL, uuidRepresentation="standard")
 67 |     # Access the "chat_with_doc" database
 68 |     db = client["chat_with_doc"]
 69 |     # Access the "chat-history" collection within the database
 70 |     conversationcol = db["chat-history"]
 71 | 
 72 |     # Create an index on the "session_id" field, ensuring uniqueness
 73 |     conversationcol.create_index([("session_id")], unique=True)
 74 | except:
 75 |     # Handle exceptions and print detailed error information
 76 |     print(traceback.format_exc())
 77 | 
 78 |     exc_type, exc_obj, exc_tb = sys.exc_info()
 79 |     fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
 80 |     # Print information about the exception type, filename, and line number
 81 |     print(exc_type, fname, exc_tb.tb_lineno)
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | # Import the necessary modules and libraries
 88 | 
 89 | 
 90 | class ChatMessageSent(BaseModel):
 91 |     session_id: str = None
 92 |     user_input: str
 93 |     data_source: str
 94 | 
 95 | def get_response(
 96 |     file_name: str,
 97 |     session_id: str,
 98 |     query: str,
 99 |     model: str = "gpt-3.5-turbo-16k",
100 |     temperature: float = 0,
101 | ):
102 |     print("file name is ", file_name)
103 |     file_name=file_name.split("/")[-1]
104 |     """
105 |     Generate a response using a conversational model.
106 | 
107 |     This function generates a response using a conversational model like GPT-3.5 Turbo. It takes
108 |     a file name to load data, a session ID to track the conversation, a query or question, and
109 |     optional parameters for model selection and temperature control.
110 | 
111 |     Args:
112 |         file_name (str): The name of the file to load data from.
113 |         session_id (str): The session ID for tracking the conversation history.
114 |         query (str): The user's query or question to be used in the conversation.
115 |         model (str, optional): The model name to use (default is "gpt-3.5-turbo-16k").
116 |         temperature (float, optional): Temperature parameter for response randomness (default is 0).
117 | 
118 |     Returns:
119 |         dict: A dictionary containing the generated response and other information.
120 |             The response is stored under the "message" key in the dictionary.
121 | 
122 |     Note:
123 |         This function relies on various components such as OpenAIEmbeddings, S3FileLoader,
124 |         RecursiveCharacterTextSplitter, and ConversationalRetrievalChain. It prints
125 |         information about token usage and cost during the model interaction.
126 | 
127 | 
128 |     """
129 |     embeddings = OpenAIEmbeddings()  # load embeddings
130 |     # download file from s3
131 |     wr.s3.download(path=f"s3://docchat/documents/{file_name}",local_file=file_name,boto3_session=aws_s3)
132 | 
133 |     # loader = S3FileLoader(
134 |     #     bucket=S3_BUCKET,
135 |     #     key=S3_PATH + file_name.split("/")[-1],
136 |     #     aws_access_key_id=S3_KEY,
137 |     #     aws_secret_access_key=S3_SECRET,
138 |     # )
139 |     if file_name.endswith(".docx"):
140 |         loader=Docx2txtLoader(file_path=file_name.split("/")[-1])
141 |     else:
142 |         loader = PyPDFLoader(file_name)
143 | 
144 |     # 1. load data
145 |     data = loader.load()
146 |     # 2. split data so it can fit GPT token limit
147 |     print("splitting ..")
148 |     text_splitter = RecursiveCharacterTextSplitter(
149 |         chunk_size=1000, chunk_overlap=0, separators=["\n", " ", ""]
150 |     )
151 | 
152 |     all_splits = text_splitter.split_documents(data)
153 |     # 3. store data in vector db to conduct search
154 |     vectorstore = FAISS.from_documents(all_splits, embeddings)
155 |     # 4. init OpenAI
156 |     llm = ChatOpenAI(model_name=model, temperature=temperature)
157 | 
158 |     # 5. pass the data to openai chain using vector db
159 |     qa_chain = ConversationalRetrievalChain.from_llm(
160 |         llm,
161 |         retriever=vectorstore.as_retriever(),
162 |     )
163 |     # use the function to determine tokens used
164 |     with get_openai_callback() as cb:
165 |         answer = qa_chain(
166 |             {
167 |                 "question": query,  # user query
168 |                 "chat_history": load_memory_to_pass(
169 |                     session_id=session_id
170 |                 ),  # pass chat history for context
171 |             }
172 |         )
173 |         print(f"Total Tokens: {cb.total_tokens}")
174 |         print(f"Prompt Tokens: {cb.prompt_tokens}")
175 |         print(f"Completion Tokens: {cb.completion_tokens}")
176 |         print(f"Total Cost (USD): ${cb.total_cost}")
177 |         answer["total_tokens_used"] = cb.total_tokens
178 |     gc.collect()  # collect garbage from memory
179 |     return answer
180 | import uuid
181 | from typing import List
182 | 
183 | 
184 | def load_memory_to_pass(session_id: str):
185 |     """
186 |     Load conversation history for a given session ID.
187 | 
188 |     Args:
189 |         session_id (str): The unique session ID to retrieve the conversation history.
190 | 
191 |     Returns:
192 |         List: A list of conversation history as a list of tuples (user_message, bot_response).
193 | 
194 |     """
195 |     data = conversationcol.find_one(
196 |         {"session_id": session_id}
197 |     )  # find the document with the session id
198 |     history = []  # create empty array (in case we do not have any history)
199 |     if data:  # check if data is not None
200 |         data = data["conversation"]  # get the conversation field
201 | 
202 |         for x in range(0, len(data), 2):  # iterate over the field
203 |             history.extend(
204 |                 [(data[x], data[x + 1])]
205 |             )  # our history is expected format is [(human_message,ai_message)] , the even index has human message and odd has AI response
206 |     print(history)
207 |     return history  # return history
208 | 
209 | 
210 | def get_session() -> str:
211 |     """
212 |     Generate a new session ID.
213 | 
214 |     Returns:
215 |         str: A newly generated session ID as a string.
216 |     """
217 |     return str(uuid.uuid4())
218 | 
219 | 
220 | def add_session_history(session_id: str, new_values: List):
221 |     """
222 |     Add conversation history to an existing session or create a new session.
223 | 
224 |     Args:
225 |         session_id (str): The session ID to which the conversation history will be added.
226 |         new_values (List): A list of conversation history to be added to the session.
227 | 
228 |     """
229 |     document = conversationcol.find_one(
230 |         {"session_id": session_id}
231 |     )  # find the document with the session id
232 |     if document:  # check if data is not None
233 |         # Extract the conversation list
234 |         conversation = document["conversation"]
235 | 
236 |         # Append new values
237 |         conversation.extend(new_values)
238 | 
239 |         # Update the document with the modified conversation list (for old session), we use update_one
240 |         conversationcol.update_one(
241 |             {"session_id": session_id}, {"$set": {"conversation": conversation}}
242 |         )
243 |     else:
244 |         conversationcol.insert_one(
245 |             {
246 |                 "session_id": session_id,
247 |                 "conversation": new_values,
248 |             }  # to initiate a history under a new session, note we uses insert_one
249 |         )
250 | 
251 | 
252 | # Create a FastAPI application
253 | app = FastAPI()
254 | 
255 | # Add CORS middleware to handle Cross-Origin Resource Sharing
256 | app.add_middleware(
257 |     CORSMiddleware,
258 |     allow_origins=["*"],  # Allow requests from any origin Ex, https://www.facebook.com
259 |     allow_credentials=False,  # Allow sending credentials (e.g., cookies)
260 |     allow_methods=["*"],  # Allow all HTTP methods
261 |     allow_headers=["*"],  # Allow all HTTP headers
262 | )
263 | 
264 | # Create an AWS S3 session with provided access credentials
265 | aws_s3 = boto3.Session(
266 |     aws_access_key_id=S3_KEY,  # Set the AWS access key ID
267 |     aws_secret_access_key=S3_SECRET,  # Set the AWS secret access key
268 |     region_name="us-east-2",  # Set the AWS region
269 | )
270 | 
271 | 
272 | @app.post("/chat")
273 | async def create_chat_message(
274 |     chats: ChatMessageSent,
275 | ):
276 |     """
277 |     Create a chat message and obtain a response based on user input and session.
278 | 
279 |     This route allows users to send chat messages, and it returns responses based on
280 |     the provided input and the associated session. If a session ID is not provided
281 |     in the request, a new session is created. The conversation history is updated, and
282 |     the response, along with the session ID, is returned.
283 | 
284 |     Args:
285 |         chats (ChatMessageSent): A Pydantic model representing the chat message, including
286 |         session ID, user input, and data source.
287 | 
288 |     Returns:
289 |         JSONResponse: A JSON response containing the response message and the session ID.
290 | 
291 |     Raises:
292 |         HTTPException: If an unexpected error occurs during the chat message processing,
293 |         it returns a 204 NO CONTENT HTTP status with an "error" detail.
294 |     """
295 |     try:
296 |         if chats.session_id is None:
297 |             session_id = get_session()
298 | 
299 |             payload = ChatMessageSent(
300 |                 session_id=session_id,
301 |                 user_input=chats.user_input,
302 |                 data_source=chats.data_source,
303 |             )
304 |             payload = payload.model_dump()
305 | 
306 |             response = get_response(
307 |                 file_name=payload.get("data_source"),
308 |                 session_id=payload.get("session_id"),
309 |                 query=payload.get("user_input"),
310 |             )
311 | 
312 |             add_session_history(
313 |                 session_id=session_id,
314 |                 new_values=[payload.get("user_input"), response["answer"]],
315 |             )
316 | 
317 |             return JSONResponse(
318 |                 content={
319 |                     "response": response,
320 |                     "session_id": str(session_id),
321 |                 }
322 |             )
323 | 
324 |         else:
325 |             payload = ChatMessageSent(
326 |                 session_id=str(chats.session_id),
327 |                 user_input=chats.user_input,
328 |                 data_source=chats.data_source,
329 |             )
330 |             payload = payload.dict()
331 | 
332 |             response = get_response(
333 |                 file_name=payload.get("data_source"),
334 |                 session_id=payload.get("session_id"),
335 |                 query=payload.get("user_input"),
336 |             )
337 | 
338 |             add_session_history(
339 |                 session_id=str(chats.session_id),
340 |                 new_values=[payload.get("user_input"), response["answer"]],
341 |             )
342 | 
343 |             return JSONResponse(
344 |                 content={
345 |                     "response": response,
346 |                     "session_id": str(chats.session_id),
347 |                 }
348 |             )
349 |     except Exception:
350 |         print(traceback.format_exc())
351 | 
352 |         exc_type, exc_obj, exc_tb = sys.exc_info()
353 |         fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
354 |         print(exc_type, fname, exc_tb.tb_lineno)
355 |         raise HTTPException(status_code=status.HTTP_204_NO_CONTENT, detail="error")
356 | 
357 | 
358 | @app.post("/uploadFile")
359 | async def uploadtos3(data_file: UploadFile):
360 |     """
361 |     Uploads a file to Amazon S3 storage.
362 | 
363 |     This route allows users to upload a file, which is saved temporarily, uploaded to Amazon S3,
364 |     and then removed from the local file system. It returns the filename and S3 file path
365 |     in the response JSON.
366 | 
367 |     Args:
368 |         data_file (UploadFile): The file to be uploaded.
369 | 
370 |     Returns:
371 |         JSONResponse: A JSON response containing the filename and S3 file path.
372 | 
373 |     Raises:
374 |         HTTPException: If the file specified in `data_file` is not found (HTTP status code 404).
375 |     """
376 |     print(data_file.filename.split("/")[-1])
377 |     try:
378 |         with open(f"{data_file.filename}", "wb") as out_file:
379 |             content = await data_file.read()  # async read
380 |             out_file.write(content)  # async write
381 |         wr.s3.upload(
382 |             local_file=data_file.filename,
383 |             path=f"s3://{S3_BUCKET}/{S3_PATH}{data_file.filename.split('/')[-1]}",
384 |             boto3_session=aws_s3,
385 |         )
386 |         os.remove(data_file.filename)
387 |         response = {
388 |             "filename": data_file.filename.split("/")[-1],
389 |             "file_path": f"s3://{S3_BUCKET}/{S3_PATH}{data_file.filename.split('/')[-1]}",
390 |         }
391 | 
392 |     except FileNotFoundError:
393 |         raise HTTPException(status_code=404, detail="Item not found")
394 | 
395 |     return JSONResponse(content=response)
396 | 
397 | 
398 | import uvicorn
399 | if __name__=="__main__":
400 |     uvicorn.run(app)
401 | 


--------------------------------------------------------------------------------
/Begin/CH_2_Business_Prediction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "363bf99c",
  6 |    "metadata": {
  7 |     "id": "363bf99c"
  8 |    },
  9 |    "source": [
 10 |     "## Installing the required libraries"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "08081f18",
 17 |    "metadata": {
 18 |     "executionInfo": {
 19 |      "elapsed": 10,
 20 |      "status": "ok",
 21 |      "timestamp": 1709836774197,
 22 |      "user": {
 23 |       "displayName": "Priya Mohan",
 24 |       "userId": "10194897099303360694"
 25 |      },
 26 |      "user_tz": 300
 27 |     },
 28 |     "id": "08081f18"
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "#import all of the required libraries and classes right here\n",
 33 |     "\n",
 34 |     "\n",
 35 |     "import pandas as pd\n",
 36 |     "import re\n",
 37 |     "from sklearn.preprocessing import StandardScaler\n",
 38 |     "import matplotlib.pyplot as plt\n",
 39 |     "import numpy as np\n",
 40 |     "import warnings\n",
 41 |     "from sklearn.metrics import mean_squared_error as mse\n",
 42 |     "from sklearn.model_selection import train_test_split, GridSearchCV\n",
 43 |     "from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
 44 |     "from sklearn.linear_model import LinearRegression\n",
 45 |     "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
 46 |     "\n",
 47 |     "warnings.simplefilter('ignore')"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "id": "85ac620e",
 53 |    "metadata": {
 54 |     "id": "85ac620e"
 55 |    },
 56 |    "source": [
 57 |     "## Data Ingestion"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "id": "0961b773",
 64 |    "metadata": {
 65 |     "executionInfo": {
 66 |      "elapsed": 831,
 67 |      "status": "ok",
 68 |      "timestamp": 1709836775020,
 69 |      "user": {
 70 |       "displayName": "Priya Mohan",
 71 |       "userId": "10194897099303360694"
 72 |      },
 73 |      "user_tz": 300
 74 |     },
 75 |     "id": "0961b773"
 76 |    },
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "df=pd.read_excel('CH_2_Coffee Shop data.xlsx')\n",
 80 |     "population=pd.read_csv('population.csv',skiprows=[0])"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "id": "60162560",
 87 |    "metadata": {
 88 |     "colab": {
 89 |      "base_uri": "https://localhost:8080/",
 90 |      "height": 365
 91 |     },
 92 |     "executionInfo": {
 93 |      "elapsed": 34,
 94 |      "status": "ok",
 95 |      "timestamp": 1709836775020,
 96 |      "user": {
 97 |       "displayName": "Priya Mohan",
 98 |       "userId": "10194897099303360694"
 99 |      },
100 |      "user_tz": 300
101 |     },
102 |     "id": "60162560",
103 |     "outputId": "017aa4c8-79c7-4744-ce2b-f2e8e4e6e95e"
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "population.head()"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "id": "b4048b6b",
114 |    "metadata": {
115 |     "colab": {
116 |      "base_uri": "https://localhost:8080/",
117 |      "height": 310
118 |     },
119 |     "executionInfo": {
120 |      "elapsed": 31,
121 |      "status": "ok",
122 |      "timestamp": 1709836775020,
123 |      "user": {
124 |       "displayName": "Priya Mohan",
125 |       "userId": "10194897099303360694"
126 |      },
127 |      "user_tz": 300
128 |     },
129 |     "id": "b4048b6b",
130 |     "outputId": "ab86eb91-7562-4e54-873f-01f9f498e9f8"
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "df.head()#checking first five rows"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "id": "159e8131",
141 |    "metadata": {
142 |     "colab": {
143 |      "base_uri": "https://localhost:8080/"
144 |     },
145 |     "executionInfo": {
146 |      "elapsed": 30,
147 |      "status": "ok",
148 |      "timestamp": 1709836775021,
149 |      "user": {
150 |       "displayName": "Priya Mohan",
151 |       "userId": "10194897099303360694"
152 |      },
153 |      "user_tz": 300
154 |     },
155 |     "id": "159e8131",
156 |     "outputId": "5b973ca1-375d-4a4b-964d-55b5c821d98d"
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "# check for data info\n",
161 |     "df.info()\n"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "id": "fadd613c",
168 |    "metadata": {
169 |     "colab": {
170 |      "base_uri": "https://localhost:8080/"
171 |     },
172 |     "executionInfo": {
173 |      "elapsed": 27,
174 |      "status": "ok",
175 |      "timestamp": 1709836775021,
176 |      "user": {
177 |       "displayName": "Priya Mohan",
178 |       "userId": "10194897099303360694"
179 |      },
180 |      "user_tz": 300
181 |     },
182 |     "id": "fadd613c",
183 |     "outputId": "e18c28b2-41fb-4c66-ee2f-572ef4623d2d"
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "#check the number of records and features\n"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "id": "38f41ea9",
194 |    "metadata": {
195 |     "colab": {
196 |      "base_uri": "https://localhost:8080/"
197 |     },
198 |     "executionInfo": {
199 |      "elapsed": 26,
200 |      "status": "ok",
201 |      "timestamp": 1709836775021,
202 |      "user": {
203 |       "displayName": "Priya Mohan",
204 |       "userId": "10194897099303360694"
205 |      },
206 |      "user_tz": 300
207 |     },
208 |     "id": "38f41ea9",
209 |     "outputId": "10beb0e0-9e18-4297-a7d0-1203385f10e3"
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "population.shape"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "id": "5590d579",
220 |    "metadata": {
221 |     "colab": {
222 |      "base_uri": "https://localhost:8080/",
223 |      "height": 300
224 |     },
225 |     "executionInfo": {
226 |      "elapsed": 23,
227 |      "status": "ok",
228 |      "timestamp": 1709836775021,
229 |      "user": {
230 |       "displayName": "Priya Mohan",
231 |       "userId": "10194897099303360694"
232 |      },
233 |      "user_tz": 300
234 |     },
235 |     "id": "5590d579",
236 |     "outputId": "670ef3c9-3bc2-4335-bb23-9ab8b7cc399f"
237 |    },
238 |    "outputs": [],
239 |    "source": [
240 |     "# get basic stats about the data\n"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "id": "2cdde9c6",
247 |    "metadata": {
248 |     "colab": {
249 |      "base_uri": "https://localhost:8080/",
250 |      "height": 583
251 |     },
252 |     "executionInfo": {
253 |      "elapsed": 452,
254 |      "status": "ok",
255 |      "timestamp": 1709836775451,
256 |      "user": {
257 |       "displayName": "Priya Mohan",
258 |       "userId": "10194897099303360694"
259 |      },
260 |      "user_tz": 300
261 |     },
262 |     "id": "2cdde9c6",
263 |     "outputId": "f45f5b65-9abf-4995-f29f-7f4be0123a27"
264 |    },
265 |    "outputs": [],
266 |    "source": [
267 |     "ax=df['City'].value_counts().head(5).plot(kind='bar')\n",
268 |     "ax.set_title('Top 5 cities with most cofee shops')\n",
269 |     "plt.show()"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": null,
275 |    "id": "57d1c1d5",
276 |    "metadata": {
277 |     "colab": {
278 |      "base_uri": "https://localhost:8080/",
279 |      "height": 642
280 |     },
281 |     "executionInfo": {
282 |      "elapsed": 319,
283 |      "status": "ok",
284 |      "timestamp": 1709836775759,
285 |      "user": {
286 |       "displayName": "Priya Mohan",
287 |       "userId": "10194897099303360694"
288 |      },
289 |      "user_tz": 300
290 |     },
291 |     "id": "57d1c1d5",
292 |     "outputId": "484c78f7-92d4-4ecb-ed54-9a88dee76369"
293 |    },
294 |    "outputs": [],
295 |    "source": [
296 |     "ax=df['Business Name'].value_counts().head(10).plot(kind='bar')\n",
297 |     "ax.set_title('Top 10 most famous brands')\n",
298 |     "plt.show()"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "markdown",
303 |    "id": "22ea26fe",
304 |    "metadata": {
305 |     "id": "22ea26fe"
306 |    },
307 |    "source": [
308 |     "## Data Preprocessing"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": null,
314 |    "id": "f9ac6d7d",
315 |    "metadata": {
316 |     "colab": {
317 |      "base_uri": "https://localhost:8080/"
318 |     },
319 |     "executionInfo": {
320 |      "elapsed": 358,
321 |      "status": "ok",
322 |      "timestamp": 1709836776106,
323 |      "user": {
324 |       "displayName": "Priya Mohan",
325 |       "userId": "10194897099303360694"
326 |      },
327 |      "user_tz": 300
328 |     },
329 |     "id": "f9ac6d7d",
330 |     "outputId": "bcca8809-1e3a-42e1-ce34-eb157bb36fbb"
331 |    },
332 |    "outputs": [],
333 |    "source": [
334 |     "df.isna().sum()\n",
335 |     "# no null values\n",
336 |     "# if we have null values we would impute it. If we have numberical replace mean. Missing values - replace it with the mode (most occuring values)"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": null,
342 |    "id": "b119ace2",
343 |    "metadata": {
344 |     "executionInfo": {
345 |      "elapsed": 14,
346 |      "status": "ok",
347 |      "timestamp": 1709836776106,
348 |      "user": {
349 |       "displayName": "Priya Mohan",
350 |       "userId": "10194897099303360694"
351 |      },
352 |      "user_tz": 300
353 |     },
354 |     "id": "b119ace2"
355 |    },
356 |    "outputs": [],
357 |    "source": [
358 |     "#converting zipcode to object data (str) - We need to join the zip code with the population data. Converting the coffee shop data. In order to store it into alphanumerical value, it should be string.\n",
359 |     "df['Zip Code']=df['Zip Code'].astype(str)"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": null,
365 |    "id": "ce458280",
366 |    "metadata": {
367 |     "executionInfo": {
368 |      "elapsed": 14,
369 |      "status": "ok",
370 |      "timestamp": 1709836776106,
371 |      "user": {
372 |       "displayName": "Priya Mohan",
373 |       "userId": "10194897099303360694"
374 |      },
375 |      "user_tz": 300
376 |     },
377 |     "id": "ce458280"
378 |    },
379 |    "outputs": [],
380 |    "source": [
381 |     "#extract zip code from population\n",
382 |     "# Find all of the zipcode that has a 5 digit pattern. Getting the last 5 digits from the population zip code. Creating a new column called zip code\n",
383 |     "\n",
384 |     "def find_zip_code(geocode):\n",
385 |     "    pattern = r'\\d{5}$'\n",
386 |     "\n",
387 |     "    match = re.search(pattern, geocode)\n",
388 |     "\n",
389 |     "    if match:\n",
390 |     "        zip_code = match.group(0)\n",
391 |     "    return zip_code\n"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": null,
397 |    "id": "38881b0b",
398 |    "metadata": {
399 |     "executionInfo": {
400 |      "elapsed": 14,
401 |      "status": "ok",
402 |      "timestamp": 1709836776106,
403 |      "user": {
404 |       "displayName": "Priya Mohan",
405 |       "userId": "10194897099303360694"
406 |      },
407 |      "user_tz": 300
408 |     },
409 |     "id": "38881b0b"
410 |    },
411 |    "outputs": [],
412 |    "source": [
413 |     "# The actual coversion is below. The above is the function\n",
414 |     "\n",
415 |     "population['Zip Code']=population['Geography'].apply(find_zip_code)"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": null,
421 |    "id": "1b22bb57",
422 |    "metadata": {
423 |     "executionInfo": {
424 |      "elapsed": 14,
425 |      "status": "ok",
426 |      "timestamp": 1709836776107,
427 |      "user": {
428 |       "displayName": "Priya Mohan",
429 |       "userId": "10194897099303360694"
430 |      },
431 |      "user_tz": 300
432 |     },
433 |     "id": "1b22bb57"
434 |    },
435 |    "outputs": [],
436 |    "source": [
437 |     "cafe_data=df.copy()\n",
438 |     "# merging the population via zip code as population is an important feature to determing the price / locations\n",
439 |     "df=pd.merge(cafe_data,population)\n",
440 |     "#notice that the data size is reduced afer a join"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": null,
446 |    "id": "1102897f",
447 |    "metadata": {
448 |     "executionInfo": {
449 |      "elapsed": 14,
450 |      "status": "ok",
451 |      "timestamp": 1709836776107,
452 |      "user": {
453 |       "displayName": "Priya Mohan",
454 |       "userId": "10194897099303360694"
455 |      },
456 |      "user_tz": 300
457 |     },
458 |     "id": "1102897f"
459 |    },
460 |    "outputs": [],
461 |    "source": [
462 |     "#keeping only Total from population. In the pop dataset, keeping total population column and other columns.\n",
463 |     "columns=cafe_data.columns.values.tolist()+['Total']\n",
464 |     "df=df[columns]\n",
465 |     "#rename Total to Population\n",
466 |     "df=df.rename(columns={\"Total\":\"Population\"})"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "code",
471 |    "execution_count": null,
472 |    "id": "4a796f18",
473 |    "metadata": {},
474 |    "outputs": [],
475 |    "source": [
476 |     "df"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": null,
482 |    "id": "9270e1ca",
483 |    "metadata": {
484 |     "colab": {
485 |      "base_uri": "https://localhost:8080/"
486 |     },
487 |     "executionInfo": {
488 |      "elapsed": 14,
489 |      "status": "ok",
490 |      "timestamp": 1709836776107,
491 |      "user": {
492 |       "displayName": "Priya Mohan",
493 |       "userId": "10194897099303360694"
494 |      },
495 |      "user_tz": 300
496 |     },
497 |     "id": "9270e1ca",
498 |     "outputId": "999db953-db2b-42b7-de5b-4a413050f02c",
499 |     "scrolled": true
500 |    },
501 |    "outputs": [],
502 |    "source": [
503 |     "#keeping only relevant features\n",
504 |     "df= df[['Zip Code','Rating','Median Salary','Latte Price','Population']]\n",
505 |     "#df.shape\n"
506 |    ]
507 |   },
508 |   {
509 |    "cell_type": "code",
510 |    "execution_count": null,
511 |    "id": "a9ed9e13",
512 |    "metadata": {},
513 |    "outputs": [],
514 |    "source": [
515 |     "df.columns    "
516 |    ]
517 |   },
518 |   {
519 |    "cell_type": "code",
520 |    "execution_count": null,
521 |    "id": "d804d8c7",
522 |    "metadata": {},
523 |    "outputs": [],
524 |    "source": [
525 |     "# Calculate the total number of coffee shops for each zip code\n",
526 |     "coffee_shop_counts = df['Zip Code'].value_counts().reset_index()\n",
527 |     "coffee_shop_counts.columns = ['Zip Code', 'CoffeeShopCount']\n",
528 |     "\n",
529 |     "# Ensure 'Zip Code' is of type string in both DataFrames\n",
530 |     "df['Zip Code'] = df['Zip Code'].astype(str)\n",
531 |     "coffee_shop_counts['Zip Code'] = coffee_shop_counts['Zip Code'].astype(str)\n",
532 |     "\n",
533 |     "# Merge the counts back into the original DataFrame\n",
534 |     "df = df.merge(coffee_shop_counts, on='Zip Code', how='left')\n",
535 |     "\n",
536 |     "# Print the updated DataFrame\n",
537 |     "print(df)\n",
538 |     "\n",
539 |     "# Criteria:\n",
540 |     "# a. High population\n",
541 |     "# b. Low total number of coffee shops\n",
542 |     "# c. Low ratings\n",
543 |     "# d. High median salary\n",
544 |     "\n",
545 |     "# Sorting the DataFrame based on the criteria\n",
546 |     "sorted_df = df.sort_values(by=['Population', 'CoffeeShopCount', 'Rating', 'Median Salary'],\n",
547 |     "                           ascending=[False, True, True, False]).reset_index(drop=True)\n"
548 |    ]
549 |   },
550 |   {
551 |    "cell_type": "code",
552 |    "execution_count": null,
553 |    "id": "9faa38fa",
554 |    "metadata": {},
555 |    "outputs": [],
556 |    "source": [
557 |     "# Created a list - if length of list 5, if the zip code is already present, it will not add that into the list. \n",
558 |     "# Deduping zip code column and displaying all of the records for the top 5.\n",
559 |     "lst=[]\n",
560 |     "for i in range(len(sorted_df)):\n",
561 |     "    if len(lst)!=5:\n",
562 |     "        if (sorted_df['Zip Code'][i]) not in lst:\n",
563 |     "            lst.append(sorted_df['Zip Code'][i])\n",
564 |     "            \n",
565 |     "# Filter 'sorted_df' to include only rows where 'Zip Code' is in 'lst'\n",
566 |     "top_5_zip_codes_df = sorted_df[sorted_df['Zip Code'].isin(lst)]\n",
567 |     "\n",
568 |     "top_5_zip_codes_df"
569 |    ]
570 |   },
571 |   {
572 |    "cell_type": "code",
573 |    "execution_count": null,
574 |    "id": "29329ce4",
575 |    "metadata": {},
576 |    "outputs": [],
577 |    "source": [
578 |     "X = df.drop(['Latte Price', 'Zip Code'], axis=1)  # Features excluding 'Latte Price' and 'Zip Code'\n",
579 |     "y = df['Latte Price']  # Target variable\n"
580 |    ]
581 |   },
582 |   {
583 |    "cell_type": "code",
584 |    "execution_count": null,
585 |    "id": "80681397",
586 |    "metadata": {},
587 |    "outputs": [],
588 |    "source": [
589 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
590 |    ]
591 |   },
592 |   {
593 |    "cell_type": "code",
594 |    "execution_count": null,
595 |    "id": "239d6741",
596 |    "metadata": {},
597 |    "outputs": [],
598 |    "source": [
599 |     "#scaling\n",
600 |     "sc = StandardScaler()\n",
601 |     "\n",
602 |     "X_train = sc.fit_transform(X_train)\n",
603 |     "X_test=sc.transform(X_test)"
604 |    ]
605 |   },
606 |   {
607 |    "cell_type": "markdown",
608 |    "id": "9ddbd359",
609 |    "metadata": {},
610 |    "source": [
611 |     "Model Selection"
612 |    ]
613 |   },
614 |   {
615 |    "cell_type": "code",
616 |    "execution_count": null,
617 |    "id": "3a70f8ea",
618 |    "metadata": {},
619 |    "outputs": [],
620 |    "source": [
621 |     "#Model Selection\n",
622 |     "models = {\n",
623 |     "    \n",
624 |     "    \n",
625 |     "    \n",
626 |     "}\n"
627 |    ]
628 |   },
629 |   {
630 |    "cell_type": "code",
631 |    "execution_count": null,
632 |    "id": "85c80511",
633 |    "metadata": {},
634 |    "outputs": [],
635 |    "source": [
636 |     "#Hyperparameter Tuning\n",
637 |     "param_grid = {\n",
638 |     "    'Random Forest': {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20]},\n",
639 |     "    'Gradient Boosting': {'n_estimators': [50, 100, 200], 'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 5, 10]},\n",
640 |     "}\n"
641 |    ]
642 |   },
643 |   {
644 |    "cell_type": "code",
645 |    "execution_count": null,
646 |    "id": "db3a1c92",
647 |    "metadata": {},
648 |    "outputs": [],
649 |    "source": [
650 |     "for model_name, model in models.items():\n",
651 |     "    if model_name in param_grid:\n",
652 |     "        # Perform hyperparameter tuning using GridSearchCV\n",
653 |     "        grid_search = GridSearchCV(model, param_grid[model_name], cv=5, scoring='neg_mean_squared_error')\n",
654 |     "        grid_search.fit(X, y)\n",
655 |     "\n",
656 |     "        # Set the best hyperparameters to the model\n",
657 |     "        models[model_name] = grid_search.best_estimator_\n"
658 |    ]
659 |   },
660 |   {
661 |    "cell_type": "markdown",
662 |    "id": "5ea0ec53",
663 |    "metadata": {},
664 |    "source": [
665 |     "Model Trainin and Evaluation"
666 |    ]
667 |   },
668 |   {
669 |    "cell_type": "code",
670 |    "execution_count": null,
671 |    "id": "77649861",
672 |    "metadata": {},
673 |    "outputs": [],
674 |    "source": [
675 |     "#Model Training\n",
676 |     "\n"
677 |    ]
678 |   },
679 |   {
680 |    "cell_type": "code",
681 |    "execution_count": null,
682 |    "id": "0d643968",
683 |    "metadata": {},
684 |    "outputs": [],
685 |    "source": [
686 |     "# Model Evaluation\n",
687 |     "for model_name, model in models.items():\n",
688 |     "    # Evaluate the model on the testing set\n",
689 |     "    y_pred = \n",
690 |     "    print(f\"{model_name} Metrics:\")\n",
691 |     "    print(\"Mean Absolute Error:\", mean_absolute_error(y_test, y_pred))\n",
692 |     "    print(\"Mean Squared Error:\", mean_squared_error(y_test, y_pred))\n",
693 |     "    print(\"R-squared:\", r2_score(y_test, y_pred))\n",
694 |     "    print()\n",
695 |     "\n"
696 |    ]
697 |   },
698 |   {
699 |    "cell_type": "code",
700 |    "execution_count": null,
701 |    "id": "ae165610",
702 |    "metadata": {},
703 |    "outputs": [],
704 |    "source": [
705 |     "#We want this dataframe to be same as the training data so that model can predict the value\n",
706 |     "zip_codes_df= top_5_zip_codes_df.drop(['Zip Code', 'Latte Price'], axis=1)\n",
707 |     "zip_codes_df= sc.transform(zip_codes_df)"
708 |    ]
709 |   },
710 |   {
711 |    "cell_type": "code",
712 |    "execution_count": null,
713 |    "id": "0a9a4dab",
714 |    "metadata": {},
715 |    "outputs": [],
716 |    "source": [
717 |     "for model_name, model in models.items():\n",
718 |     "    # Predict the prices for lattes in the top 5 zip codes\n",
719 |     "    predicted_prices = model.predict(zip_codes_df)\n",
720 |     "    print(f\"{model_name} Predicted Prices for Top 5 Zip Codes:\")\n",
721 |     "    print(predicted_prices)\n",
722 |     "    print()"
723 |    ]
724 |   },
725 |   {
726 |    "cell_type": "code",
727 |    "execution_count": null,
728 |    "id": "d137ea16",
729 |    "metadata": {},
730 |    "outputs": [],
731 |    "source": [
732 |     "predictions = {}\n",
733 |     "\n",
734 |     "for model_name, model in models.items():\n",
735 |     "    # Predict the prices for lattes in the top 5 zip codes\n",
736 |     "    predicted_prices = model.predict(zip_codes_df)\n",
737 |     "    predictions[model_name] = predicted_prices\n",
738 |     "\n",
739 |     "# Convert the predictions dictionary to a DataFrame\n",
740 |     "predictions_df = pd.DataFrame(predictions)\n",
741 |     "# Add the zip codes to the predictions DataFrame\n",
742 |     "predictions_df['Zip Code'] = top_5_zip_codes_df['Zip Code'].values\n",
743 |     "\n",
744 |     "# Rearrange the columns to have 'Zip Code' as the first column\n",
745 |     "cols = ['Zip Code'] + [col for col in predictions_df.columns if col != 'Zip Code']\n",
746 |     "predictions_df = predictions_df[cols]\n",
747 |     "\n",
748 |     "predictions_df"
749 |    ]
750 |   },
751 |   {
752 |    "cell_type": "code",
753 |    "execution_count": null,
754 |    "id": "be5892b9",
755 |    "metadata": {},
756 |    "outputs": [],
757 |    "source": [
758 |     "agg_df = predictions_df.groupby('Zip Code')['Gradient Boosting'].agg([(\"Highest\", \"max\"), (\"Lowest\", \"min\")]).reset_index()\n",
759 |     "agg_df.columns = ['Zip Code', 'Highest', 'Lowest']\n",
760 |     "print(agg_df)"
761 |    ]
762 |   },
763 |   {
764 |    "cell_type": "code",
765 |    "execution_count": null,
766 |    "id": "27956af9",
767 |    "metadata": {},
768 |    "outputs": [],
769 |    "source": []
770 |   }
771 |  ],
772 |  "metadata": {
773 |   "colab": {
774 |    "provenance": []
775 |   },
776 |   "kernelspec": {
777 |    "display_name": "Python 3 (ipykernel)",
778 |    "language": "python",
779 |    "name": "python3"
780 |   },
781 |   "language_info": {
782 |    "codemirror_mode": {
783 |     "name": "ipython",
784 |     "version": 3
785 |    },
786 |    "file_extension": ".py",
787 |    "mimetype": "text/x-python",
788 |    "name": "python",
789 |    "nbconvert_exporter": "python",
790 |    "pygments_lexer": "ipython3",
791 |    "version": "3.10.13"
792 |   }
793 |  },
794 |  "nbformat": 4,
795 |  "nbformat_minor": 5
796 | }
797 | 


--------------------------------------------------------------------------------