├── .aws ├── task-definition-actions.json └── task-definition.json ├── .devcontainer └── devcontainer.json ├── .github └── workflows │ ├── cd.yml │ └── ci.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── docs ├── bedrock_tutorial.md ├── github_actions_secret_keys.md ├── microservice.md ├── network.md ├── s3.md ├── terraform.md └── vector_store.md ├── documents ├── cnu │ ├── edital-cpnu-bloco-1-10jan2024.pdf │ ├── edital-cpnu-bloco-3-10jan2024.pdf │ ├── edital-cpnu-bloco-4-10jan2024.pdf │ ├── edital-cpnu-bloco-5-10jan2024.txt │ ├── edital-cpnu-bloco-6-10jan2024.txt │ ├── edital-cpnu-bloco-7-10jan2024.txt │ └── edital-cpnu-bloco-8-10jan2024.txt └── immigration │ ├── welcome_ca.pdf │ └── welcome_eua.pdf ├── files └── lambda_payload.zip ├── format.sh ├── lambda_functions ├── docker │ └── Dockerfile └── src │ ├── create_vector_store.py │ ├── main.py │ └── utils.py ├── lint.sh ├── poetry.lock ├── pyproject.toml ├── scripts ├── deploy.sh ├── deploy_lambda.sh ├── docker_run.sh ├── install_aws_cli.sh ├── install_graph_viz.sh ├── install_poetry.sh ├── install_terraform.sh ├── package_lambda_layer.sh ├── set_secrets.sh ├── terraform_fmt.sh ├── terraform_init.sh ├── terraform_migrate.sh └── upload_state.sh ├── src ├── __init__.py ├── app │ └── main.py └── cli │ ├── __init__.py │ ├── bedrock_cli.py │ ├── qdrant_cli.py │ └── utils.py ├── terraform ├── .terraform.lock.hcl ├── api_gateway │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── ecr │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── ecs │ ├── ecs_task_executor_policy.tf │ ├── ecs_task_policy.tf │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── iam │ ├── main.tf │ ├── outputs.tf │ ├── s3_state_policy.tf │ └── variables.tf ├── lambda_functions │ ├── lambda_policy.tf │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── load_balancer │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── main.tf ├── network │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── vpc_endpoints.tf ├── outputs.tf ├── s3 │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── secrets_manager │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── terraform_state │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── variables.tf └── visualize │ ├── graph.svg │ └── plan.json └── tests ├── __init__.py ├── test_main.py └── test_utils.py /.aws/task-definition-actions.json: -------------------------------------------------------------------------------- 1 | { 2 | "containerDefinitions": [ 3 | { 4 | "name": "{name}", 5 | "image": "{ecr}:{tag}", 6 | "cpu": 256, 7 | "memory": 1024, 8 | "portMappings": [ 9 | { 10 | "containerPort": 80, 11 | "hostPort": 80, 12 | "protocol": "tcp" 13 | } 14 | ], 15 | "logConfiguration": { 16 | "logDriver": "awslogs", 17 | "options": { 18 | "awslogs-group": "{logs_group_name}", 19 | "awslogs-region": "{region}", 20 | "awslogs-create-group": "true", 21 | "awslogs-stream-prefix": "ecs" 22 | } 23 | }, 24 | "essential": true 25 | } 26 | ], 27 | "family": "{ecs_task_family_name}", 28 | "executionRoleArn": "arn:aws:iam::{account_id}:role/{ecs_execution_role_name}", 29 | "taskRoleArn": "arn:aws:iam::{account_id}:role/{ecs_task_role_name}", 30 | "networkMode": "awsvpc", 31 | "requiresCompatibilities": [ 32 | "FARGATE" 33 | ], 34 | "cpu": "256", 35 | "memory": "1024", 36 | "runtimePlatform": { 37 | "cpuArchitecture": "X86_64", 38 | "operatingSystemFamily": "LINUX" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /.aws/task-definition.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "${service_name}", 4 | "image": "${ecr}:${tag}", 5 | "cpu": 256, 6 | "memory": 1024, 7 | "portMappings": [ 8 | { 9 | "containerPort": 80, 10 | "hostPort": 80, 11 | "protocol": "tcp" 12 | } 13 | ], 14 | "logConfiguration": { 15 | "logDriver": "awslogs", 16 | "options": { 17 | "awslogs-group": "${logs_group_name}", 18 | "awslogs-region": "${region}", 19 | "awslogs-create-group": "true", 20 | "awslogs-stream-prefix": "ecs" 21 | } 22 | }, 23 | "essential": true 24 | } 25 | ] -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/python 3 | { 4 | "name": "Python 3", 5 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 6 | "image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye", 7 | "customizations": { 8 | "vscode": { 9 | "extensions": [ 10 | "rangav.vscode-thunder-client", 11 | "amazonwebservices.aws-toolkit-vscode", 12 | "ms-azuretools.vscode-docker", 13 | "ms-python.vscode-pylance", 14 | "ms-python.python", 15 | "ms-python.isort", 16 | "hashicorp.terraform", 17 | "github.vscode-github-actions" 18 | ] 19 | } 20 | }, 21 | "features": { 22 | "ghcr.io/devcontainers/features/aws-cli:1": {}, 23 | "ghcr.io/devcontainers/features/docker-in-docker:2": {} 24 | }, 25 | 26 | // Features to add to the dev container. More info: https://containers.dev/features. 27 | // "features": {}, 28 | 29 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 30 | //"forwardPorts": [8000] 31 | 32 | // Use 'postCreateCommand' to run commands after the container is created. 33 | "postCreateCommand": "make install-tools" 34 | 35 | // Configure tool-specific properties. 36 | // "customizations": {}, 37 | 38 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 39 | // "remoteUser": "root" 40 | } 41 | -------------------------------------------------------------------------------- /.github/workflows/cd.yml: -------------------------------------------------------------------------------- 1 | name: 'Deploy' 2 | 3 | # Trigger on CI workflow completion 4 | on: 5 | workflow_run: 6 | workflows: ["CI"] 7 | types: 8 | - completed 9 | 10 | env: 11 | AWS_REGION: us-east-1 # set this to your preferred AWS region, e.g. us-west-1 12 | ECR_REPOSITORY_ECS: ecs-repo # set this to your Amazon ECR repository name 13 | ECR_REPOSITORY_LAMBDA: lambda-repo 14 | QDRANT_URL: ${{ secrets.QDRANT_URL }} 15 | QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} 16 | 17 | permissions: 18 | contents: read 19 | 20 | jobs: 21 | deploy-lambda: 22 | name: Deploy (Amazon ECR - Lambda Functions) 23 | runs-on: ubuntu-latest 24 | environment: production 25 | 26 | defaults: 27 | run: 28 | shell: bash 29 | 30 | steps: 31 | - name: Checkout 32 | uses: actions/checkout@v3 33 | 34 | - name: Configure AWS credentials 35 | uses: aws-actions/configure-aws-credentials@v1 36 | with: 37 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 38 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 39 | aws-region: ${{ env.AWS_REGION }} 40 | 41 | - name: Login to Amazon ECR 42 | id: login-ecr 43 | uses: aws-actions/amazon-ecr-login@v1 44 | 45 | - name: Create ECR repository 46 | run: | 47 | AWS_ECR_REPOSITORY_NAME=${{ env.ECR_REPOSITORY_LAMBDA }} 48 | if ! aws ecr describe-repositories --repository-names $AWS_ECR_REPOSITORY_NAME > /dev/null 2>&1; then 49 | aws ecr create-repository --repository-name $AWS_ECR_REPOSITORY_NAME --image-scanning-configuration scanOnPush=true 50 | fi 51 | 52 | - name: Build, tag, and push image to Amazon ECR (Lambda Functions) 53 | id: build-image 54 | uses: docker/build-push-action@v3 55 | with: 56 | context: . 57 | file: ./lambda_functions/docker/Dockerfile 58 | push: true 59 | tags: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY_LAMBDA}}:${{ github.sha }} 60 | 61 | terraform: 62 | name: 'Terraform (IaC)' 63 | needs: deploy-lambda 64 | runs-on: ubuntu-latest 65 | environment: production 66 | 67 | defaults: 68 | run: 69 | shell: bash 70 | 71 | steps: 72 | # Checkout the repository to the GitHub Actions runner 73 | - name: Checkout 74 | uses: actions/checkout@v3 75 | 76 | - name: Configure AWS credentials 77 | uses: aws-actions/configure-aws-credentials@v1 78 | with: 79 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 80 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 81 | aws-region: ${{ env.AWS_REGION }} 82 | 83 | - name: Setup Terraform 84 | uses: hashicorp/setup-terraform@v1 85 | with: 86 | cli_config_credentials_token: ${{ secrets.TF_API_TOKEN }} 87 | 88 | - name: Terraform Init 89 | run: | 90 | AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) && \ 91 | cd terraform && terraform init \ 92 | -backend-config="region=$AWS_REGION" \ 93 | -backend-config='assume_role={"role_arn":"arn:aws:iam::'$AWS_ACCOUNT_ID':role/terraform_state_role"}' 94 | 95 | - name: Terraform Format 96 | run: make tf-fmt 97 | 98 | - name: Terraform Plan 99 | run: make tf-plan 100 | 101 | # On push to "main", build or change infrastructure according to Terraform configuration files 102 | # Note: It is recommended to set up a required "strict" status check in your repository for "Terraform Cloud". See the documentation on "strict" required status checks for more information: https://help.github.com/en/github/administering-a-repository/types-of-required-status-checks 103 | - name: Terraform Apply 104 | # if: github.ref == 'refs/heads/"main"' && github.event_name == 'push' 105 | run: make tf-deploy 106 | 107 | deploy-secrets: 108 | name: Deploy secrets to AWS Secrets Manager 109 | needs: [terraform, deploy-lambda] 110 | runs-on: ubuntu-latest 111 | environment: production 112 | 113 | defaults: 114 | run: 115 | shell: bash 116 | 117 | steps: 118 | - name: Checkout 119 | uses: actions/checkout@v3 120 | 121 | - name: Configure AWS credentials 122 | uses: aws-actions/configure-aws-credentials@v1 123 | with: 124 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 125 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 126 | aws-region: ${{ env.AWS_REGION }} 127 | 128 | - name: Upload Qdrant secrets to AWS Secrets Manager 129 | env: 130 | QDRANT_URL: ${{ secrets.QDRANT_URL }} 131 | QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} 132 | run: | 133 | aws secretsmanager put-secret-value --secret-id prod/qdrant_url --secret-string $QDRANT_URL 134 | aws secretsmanager put-secret-value --secret-id prod/qdrant_api_key --secret-string $QDRANT_API_KEY 135 | 136 | deploy-ecs: 137 | name: Deploy (Amazon ECR - ECS) 138 | needs: [terraform, deploy-lambda, deploy-secrets] 139 | runs-on: ubuntu-latest 140 | environment: production 141 | 142 | defaults: 143 | run: 144 | shell: bash 145 | 146 | steps: 147 | - name: Checkout 148 | uses: actions/checkout@v3 149 | 150 | - name: Configure AWS credentials 151 | uses: aws-actions/configure-aws-credentials@v1 152 | with: 153 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 154 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 155 | aws-region: ${{ env.AWS_REGION }} 156 | 157 | - name: Login to Amazon ECR 158 | id: login-ecr 159 | uses: aws-actions/amazon-ecr-login@v1 160 | 161 | - name: Build, tag, and push image to Amazon ECR 162 | id: build-image 163 | env: 164 | ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} 165 | IMAGE_TAG: ${{ github.sha }} 166 | run: | 167 | # Build a docker container and 168 | # push it to ECR so that it can 169 | # be deployed to ECS. 170 | docker build -t $ECR_REGISTRY/$ECR_REPOSITORY_ECS:$IMAGE_TAG . 171 | docker push $ECR_REGISTRY/$ECR_REPOSITORY_ECS:$IMAGE_TAG 172 | echo "image=$ECR_REGISTRY/$ECR_REPOSITORY_ECS:$IMAGE_TAG" >> $GITHUB_OUTPUT 173 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | 4 | env: 5 | AWS_REGION: us-east-1 # set this to your preferred AWS region, e.g. us-west-1 6 | QDRANT_URL: ${{ secrets.QDRANT_URL }} # set this to your Qdrant URL, e.g. https://qdrant.yourdomain.com 7 | QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} # set this to your Qdrant API key, e.g. 1234567890abcdef1234567890abcdef 8 | 9 | on: 10 | push: 11 | branches: 12 | - main 13 | paths-ignore: 14 | - 'README.md' 15 | - 'docs/**' 16 | - 'files/**' 17 | - 'documents/**' 18 | - '.gitingore' 19 | pull_request: 20 | branches: 21 | - main 22 | paths-ignore: 23 | - 'README.md' 24 | - 'docs/**' 25 | - 'files/**' 26 | - 'documents/**' 27 | - '.gitingore' 28 | workflow_dispatch: 29 | 30 | jobs: 31 | test: 32 | runs-on: ubuntu-latest 33 | 34 | steps: 35 | - name: Checkout sources 36 | uses: actions/checkout@v3 37 | 38 | - name: Configure AWS credentials (For test API calls to AWS Bedrock) 39 | uses: aws-actions/configure-aws-credentials@v1 40 | with: 41 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 42 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 43 | aws-region: ${{ env.AWS_REGION }} 44 | 45 | - name: Setup Python 3.12 46 | uses: actions/setup-python@v4 47 | with: 48 | python-version: "3.12" 49 | 50 | - name: Install dependencies 51 | run: | 52 | pipx install poetry 53 | poetry install 54 | 55 | - name: Run format 56 | run: make format 57 | 58 | - name: Run lint 59 | run: make lint 60 | 61 | - name: Run tests 62 | run: make test 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore terraform 2 | terraform/.terraform 3 | terraform/terraform.tfstate 4 | terraform/terraform.tfstate.backup 5 | lambda/temp/python 6 | files/* 7 | terraform/lambda/temp/python 8 | lambda/python 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | cover/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | db.sqlite3-journal 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | .pybuilder/ 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | # For a library or package, you might want to ignore these files since the code is 96 | # intended to run in multiple environments; otherwise, check them in: 97 | # .python-version 98 | 99 | # pipenv 100 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 101 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 102 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 103 | # install all needed dependencies. 104 | #Pipfile.lock 105 | 106 | # poetry 107 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 108 | # This is especially recommended for binary packages to ensure reproducibility, and is more 109 | # commonly ignored for libraries. 110 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 111 | #poetry.lock 112 | 113 | # pdm 114 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 115 | #pdm.lock 116 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 117 | # in version control. 118 | # https://pdm.fming.dev/#use-with-ide 119 | .pdm.toml 120 | 121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 122 | __pypackages__/ 123 | 124 | # Celery stuff 125 | celerybeat-schedule 126 | celerybeat.pid 127 | 128 | # SageMath parsed files 129 | *.sage.py 130 | 131 | # Environments 132 | .env 133 | .venv 134 | env/ 135 | venv/ 136 | ENV/ 137 | env.bak/ 138 | venv.bak/ 139 | 140 | # Spyder project settings 141 | .spyderproject 142 | .spyproject 143 | 144 | # Rope project settings 145 | .ropeproject 146 | 147 | # mkdocs documentation 148 | /site 149 | 150 | # mypy 151 | .mypy_cache/ 152 | .dmypy.json 153 | dmypy.json 154 | 155 | # Pyre type checker 156 | .pyre/ 157 | 158 | # pytype static type analyzer 159 | .pytype/ 160 | 161 | # Cython debug symbols 162 | cython_debug/ 163 | 164 | # PyCharm 165 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 166 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 167 | # and can be added to the global gitignore or merged into this file. For a more nuclear 168 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 169 | #.idea/ 170 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim-bullseye 2 | 3 | # Install curl and the poetry installer 4 | RUN apt-get update && \ 5 | DEBIAN_FRONTEND=noninteractive && \ 6 | apt install -y curl && \ 7 | curl -sSL https://install.python-poetry.org | python 8 | 9 | # Sets the PATH to get the poetry bin 10 | ENV PATH="/root/.local/bin:${PATH}" 11 | 12 | # Set the working directory 13 | WORKDIR /code 14 | 15 | # Copy the files to the working directory 16 | COPY ./pyproject.toml /code/pyproject.toml 17 | COPY ./poetry.lock /code/poetry.lock 18 | COPY ./README.md /code/README.md 19 | COPY ./src/app /code/app 20 | 21 | # Configure poetry to create virtualenvs inside the project 22 | RUN poetry config virtualenvs.in-project true 23 | 24 | # Install dependencies using poetry 25 | RUN poetry install --no-root 26 | 27 | # Defines the port that the application listens on 28 | EXPOSE 80 29 | 30 | # Run the application using unicorn on port 8000 31 | CMD ["poetry", "run", "uvicorn", "--host", "0.0.0.0", "--port", "80", "app.main:app", "--reload"] 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | setup: 2 | @echo "Setting up virtual environment" 3 | poetry shell 4 | 5 | install: 6 | @echo "Installing dependencies" 7 | poetry install 8 | 9 | format: 10 | @echo "Formating code" 11 | chmod +x ./format.sh 12 | ./format.sh 13 | 14 | lint: 15 | @echo "Liting code" 16 | chmod +x ./lint.sh 17 | ./lint.sh 18 | 19 | test: 20 | @echo "Running tests" 21 | poetry run python -m pytest -vv tests/*.py --cov=tests 22 | 23 | run-app: 24 | @echo "Running local app with uvicorn" 25 | poetry run uvicorn src.app.main:app --host 127.0.0.1 --port 8000 26 | 27 | ask: 28 | @echo "Running local app with ask" 29 | curl -X POST http://localhost:8000/ask -H "Content-Type: application/json" -d '{"text":"What is Concurso Unificado?"}' 30 | 31 | docker-inspect: 32 | @echo "Inspecting Docker container" 33 | docker inspect app 34 | 35 | docker-build: 36 | @echo "Building Docker container" 37 | docker build -t app . 38 | 39 | docker-run: 40 | @echo "Starting Docker container" 41 | chmod +x ./scripts/docker_run.sh 42 | ./scripts/docker_run.sh 43 | 44 | deploy-lambda: 45 | @echo "Building Lambda container" 46 | chmod +x ./scripts/deploy_lambda.sh 47 | ./scripts/deploy_lambda.sh 48 | 49 | lambda-test: 50 | @echo "Testing Lambda function" 51 | curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"payload":"hello world!"}' 52 | 53 | deploy-ecs: 54 | @echo "Deploying to AWS" 55 | chmod +x ./scripts/deploy.sh 56 | ./scripts/deploy.sh 57 | 58 | hf-del-cache: 59 | @echo "Deleting downloaded models" 60 | huggingface-cli delete-cache 61 | 62 | tf-init: 63 | @echo "Initializing Terraform " 64 | chmod +x ./scripts/terraform_init.sh 65 | ./scripts/terraform_init.sh 66 | 67 | tf-plan: 68 | @echo "Planning Terraform " 69 | cd terraform/ && terraform plan -input=false 70 | 71 | tf-outp: 72 | @echo "Output Terraform " 73 | cd terraform && terraform output 74 | 75 | tf-destroy: 76 | @echo "Destroying Terraform " 77 | cd terraform && terraform destroy -auto-approve 78 | 79 | tf-fmt: 80 | @echo "Formating Terraform " 81 | cd terraform && terraform fmt -recursive 82 | 83 | tf-val: 84 | @echo "Validating Terraform " 85 | cd terraform && terraform validate 86 | 87 | tf-graph: 88 | @echo "Graph Terraform " 89 | cd terraform && mkdir -p visualize && terraform graph | dot -Tsvg > visualize/graph.svg 90 | 91 | tf-plan-json: 92 | @echo "Graph Terraform " 93 | cd terraform && mkdir -p visualize && terraform plan -out=plan.out && terraform show -json plan.out > visualize/plan.json 94 | 95 | tf-deploy: 96 | @echo "Deploying Terraform " 97 | cd terraform && terraform fmt -recursive && terraform validate && terraform apply -auto-approve -input=false 98 | 99 | tf-upload: 100 | @echo "Uploading Terraform " 101 | cd terraform && terraform init 102 | chmod +x ./scripts/upload_state.sh 103 | chmod +x ./scripts/terraform_migrate.sh 104 | ./scripts/upload_state.sh 105 | ./scripts/terraform_migrate.sh 106 | 107 | tf-mgt: 108 | @echo "Migrating Terraform " 109 | chmod +x ./scripts/terraform_migrate.sh 110 | ./scripts/terraform_migrate.sh 111 | 112 | tf-refresh: 113 | @echo "Refreshing Terraform " 114 | cd terraform && terraform refresh 115 | 116 | tf-st-list: 117 | @echo "List Terraform state " 118 | cd terraform && terraform state list 119 | 120 | json-fmt: 121 | @echo "Formating JSON " 122 | jq . .aws/task-definition.json > temp.json && mv temp.json .aws/task-definition.json 123 | jq . .aws/task-definition-actions.json > temp.json && mv temp.json .aws/task-definition-actions.json 124 | 125 | aws-user: 126 | @echo "Check current AWS user signed in to AWS CLI" 127 | aws sts get-caller-identity 128 | 129 | aws-region: 130 | @echo "Check current AWS region" 131 | aws configure get region 132 | 133 | qdrant-create: 134 | @echo "Create Qdrant collection" 135 | poetry run python src/cli/qdrant_cli.py create 136 | 137 | qdrant-delete: 138 | @echo "Delete Qdrant collection" 139 | poetry run python src/cli/qdrant_cli.py delete 140 | 141 | qdrant-info: 142 | @echo "Info Qdrant collection" 143 | poetry run python src/cli/qdrant_cli.py info 144 | 145 | upload_secrets: 146 | @echo "Uploading secret to AWS Secret Manager" 147 | chmod +x ./scripts/upload_secrets.sh 148 | ./scripts/upload_secrets.sh 149 | 150 | zip-lambda: 151 | @echo "Zipping Lambda function" 152 | chmod +x ./scripts/package_lambda.sh 153 | ./scripts/package_lambda.sh 154 | 155 | lambda-info: 156 | @echo "Info Lambda functions" 157 | aws lambda list-functions --max-items 10 158 | 159 | install-tools: 160 | @echo "Installing tools" 161 | @echo "Installing tools" 162 | chmod +x scripts/install_poetry.sh 163 | chmod +x scripts/install_awscli.sh 164 | chmod +x scripts/install_terraform.sh 165 | @echo "Checking if Poetry is installed..." 166 | @if ! command -v poetry &> /dev/null; then scripts/install_poetry.sh; fi 167 | @echo "Checking if AWS CLI is installed..." 168 | @if ! command -v aws &> /dev/null; then scripts/install_awscli.sh; fi 169 | @echo "Checking if Terraform is installed..." 170 | @if ! command -v terraform &> /dev/null; then scripts/install_terraform.sh; fi 171 | 172 | all: install format lint 173 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CI](https://github.com/mathewsrc/generativeai-questions-and-answers-app/actions/workflows/ci.yml/badge.svg)](https://github.com/mathewsrc/generativeai-questions-and-answers-app/actions/workflows/ci.yml) 2 | [![Deploy](https://github.com/mathewsrc/generativeai-questions-and-answers-app/actions/workflows/cd.yml/badge.svg)](https://github.com/mathewsrc/generativeai-questions-and-answers-app/actions/workflows/cd.yml) 3 | 4 | # Generative AI: Questions and Answers app for competition notices 5 | Question and Answer application for competition notices using Amazon Bedrock, Langchain, Qdrant, AWS ECS, and FastAPI 6 | 7 | 8 | ## What is RAG (Retrieval Augmented Generation)? 9 | 10 | The Retrieval Augmented Generation architecture combines the power of Large Language Models (LLMs) (the generation component) with an external 11 | vector store (the retrieve component) which stores proprietary data to create a more accurate answer. RAG combines existing 12 | information and new content generated by LLMs. The existing information solves a well-known problem with LLMs called hallucinations (incorrect results). 13 | 14 | ## How does the retrieval component work? 15 | 16 | The retrieval component finds information that matches an input query and ranks the collection of documents stored in the Vector store database to 17 | return the best ones. Therefore, RAGs allow LLMs to generate new content about content that it has never been trained on 18 | without updating its weights. 19 | 20 | Reference: 21 | 22 | https://superlinked.com/vectorhub/retrieval-augmented-generation 23 | 24 | 25 | ## Objective 26 | 27 | Many people end up giving up reading competition notices due to different factors such as too much information, inaccessible font size, and difficulty in interpretation. This project aims to build a generative AI application to help candidates quickly and easily understand competition notices. 28 | 29 | PT-BR 30 | 31 | Muitas pessoas acabam por desistir de ler editais de concursos devidos a diferentes fatores como: muitas informações, tamanho de letras não acessíveis e dificuldade de interpretação. Este projeto tem como objetivo construir uma aplicação de IA generativa para auxiliar candidados a compreender de forma facil e rápida editais de concursos. 32 | 33 | ## Overview 34 | 35 | ![Generative AI - RAG](https://github.com/mathewsrc/generativeai-questions-and-answers-app/assets/94936606/e5ea6499-e52e-4a44-a478-78355dd4839a) 36 | 37 | ## Step by step 38 | 39 | 1. Upload the Terraform state file to AWS S3. 40 | 2. Push the code and Terraform scripts to GitHub. 41 | 3. Trigger GitHub Actions. 42 | 4. Use GitHub Actions to leverage Terraform for creating S3 and Lambda Function infrastructure, and for uploading documents. 43 | 5. Trigger the Lambda Function via S3 to process documents. 44 | 6. Utilize a container image stored in ECR within the Lambda Function. This image contains all the necessary code to convert PDFs to embeddings using Langchain and the AWS Bedrock embeddings model. 45 | 7. Upload the embeddings to Qdrant Cloud using the Python API client. 46 | 8. Use GitHub Actions and AWS CLI to upload the Qdrant URL and API key to AWS Secrets Manager. 47 | 9. Use GitHub Actions and Terraform to create an ECR repository and all other required resources such as AWS network (VPC, Subnets, Internet gateway, NAT gateway, Routes, security groups, etc), AWS CloudWatcher, Elastic Load Balancer, API Gateway, and VPC link. Log in to ECR and use AWS aws-actions to build, tag, and push the Docker image to ECR. 48 | 10. Pull the Docker image from ECR using ECS. 49 | 11. Make a call to the AWS API Gateway from the user's end. 50 | 12. Route the request from the AWS API Gateway to the VPC link, enabling communication between the API Gateway and the Amazon ECS service within the Amazon VPC. 51 | 13. Redirect traffic via the Elastic Load Balancer to the least used node, ensuring a balanced load across each container running the same service. 52 | 14. Retrieve the Qdrant Cloud credentials from the AWS Secrets Manager using the ECS service. 53 | 15. Access Qdrant Cloud using its API to get the document collection via the ECS Service. 54 | 16. Integrate the AWS Bedrock Foundation Model and the embeddings from Qdrant Cloud using Langchain. 55 | 17. Generate an answer about the documents for the user using the embeddings from Qdrant Cloud via the LLM. 56 | 57 | ## Here is more details about this project 58 | 59 | [Setup Bedrock](https://github.com/mathewsrc/generativeai-questions-and-answers-app/blob/main/docs/bedrock_tutorial.md) 60 | 61 | [Setup GitHub Actions secrets](https://github.com/mathewsrc/generativeai-questions-and-answers-app/blob/main/docs/github_actions_secret_keys.md) 62 | 63 | [Setup Terraform Cloud](https://github.com/mathewsrc/generativeai-questions-and-answers-app/blob/main/docs/terraform.md) 64 | 65 | [AWS network details](https://github.com/mathewsrc/generativeai-questions-and-answers-app/blob/main/docs/network.md) 66 | 67 | [How to create a vector store with Lambda, s3, Bedrock and Qdrant Cloud](https://github.com/mathewsrc/generativeai-questions-and-answers-app/blob/main/docs/vector_store.md) 68 | 69 | [Microservice with ECS, Docker and FastAPI](https://github.com/mathewsrc/generativeai-questions-and-answers-app/blob/main/docs/microservice.md) 70 | 71 | [How to upload the Terraform state file and PDFs](https://github.com/mathewsrc/generativeai-questions-and-answers-app/blob/main/docs/s3.md) 72 | 73 | ## Endpoints response 74 | 75 | / 76 | 77 | ![root_endpoint](https://github.com/mathewsrc/generativeai-questions-and-answers-app/assets/94936606/113d1f7c-206b-4836-b6db-42e0787507c4) 78 | 79 | /ask 80 | 81 | ![ask_endpoint](https://github.com/mathewsrc/generativeai-questions-and-answers-app/assets/94936606/8cd84883-76f7-4cfb-9b73-121cadae7f03) 82 | 83 | 84 | ## Requirements 85 | - [Python](https://www.python.org/downloads/) 86 | - [Poetry](https://python-poetry.org/docs/#installation) 87 | - [Docker](https://docs.docker.com/desktop/install/windows-install/) 88 | - [AWS Account](https://aws.amazon.com/resources/create-account/) 89 | - [Terraform](https://developer.hashicorp.com/terraform/install?product_intent=terraform) 90 | - [Terraform API token](tutorials/terraform.md) 91 | - [GitHub Actions](https://docs.github.com/en/actions) 92 | - [Qdrant](https://cloud.qdrant.io/login) 93 | 94 | ## How to run this application 95 | 96 | 1. Install pipx (in GitHub Codespaces jump to `Install Poetry` as it already has pipx installed) 97 | 98 | Linux 99 | ```bash 100 | sudo apt update 101 | sudo apt install pipx 102 | pipx ensurepath 103 | ``` 104 | 105 | Windows 106 | ``` 107 | Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser 108 | Invoke-RestMethod -Uri https://get.scoop.sh | Invoke-Expression 109 | scoop install pipx 110 | pipx ensurepath 111 | ``` 112 | Now open a new terminal to use pipx 113 | 114 | 2. Install Poetry 115 | 116 | ```bash 117 | # Install Poetry 118 | pipx install --force poetry 119 | 120 | # Enable tab completion for Bash 121 | poetry completions bash >> ~/.bash_completion 122 | 123 | # Init Poetry 124 | poetry init 125 | 126 | # Install Poetry dependencies 127 | poetry install 128 | 129 | # Check Poetry version 130 | poetry --version 131 | ``` 132 | 133 | 3. Install Terraform (Linux). For more information see [Terraform](https://developer.hashicorp.com/terraform/install#Linux) 134 | 135 | ```bash 136 | # Install Terraform by HashiCorp 137 | wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg 138 | 139 | # Add the official HashiCorp Linux repository 140 | echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list 141 | 142 | # Update and install 143 | sudo apt update && sudo apt install terraform 144 | 145 | # Verify the installation 146 | terraform --version 147 | ``` 148 | 149 | Alternatively, run the Bash script `install_terraform.sh` in the terminal. 150 | 151 | 4. Enable Bedrock Foundation Models 152 | 153 | Then, navigate to the AWS console, access Amazon Bedrock, and go to Template Access. Enable the base templates that you wish to utilize. I created a [bedrock_tutorial](docs/bedrock_tutorial.md) tutorial for you on how to request model access. 154 | 155 | 5. Install AWS CLI 156 | 157 | Finally, we need to install AWS CLI to use Terraform with AWS provider. Refer to the [cliv2-linux-install](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html#cliv2-linux-install) for more information: 158 | 159 | To install the AWS CLI, execute the following commands in the terminal: 160 | 161 | ```bash 162 | # Install the AWS CLI 163 | curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" 164 | 165 | # Unzip the AWS CLI 166 | unzip awscliv2.zip 167 | 168 | # Install the AWS CLI 169 | sudo ./aws/install 170 | 171 | # Clean up the files 172 | rm -rf awscliv2.zip aws 173 | 174 | # Verify the AWS CLI 175 | aws --version 176 | ``` 177 | 178 | Alternatively, you can run the provided Bash script `install_aws_cli.sh` in the terminal to streamline the installation process. 179 | 180 | ### Configure AWS CLI 181 | 182 | In the terminal run the following command: 183 | 184 | ```bash 185 | # Configure the AWS CLI 186 | aws configure 187 | ``` 188 | 189 | To verify your credentials, you can use one of the following commands in the terminal: 190 | 191 | ```bash 192 | aws sts get-caller-identity 193 | make aws-user 194 | ``` 195 | 196 | This command will retrieve details about the user, including user ID and account ID. 197 | 198 | ``` 199 | { 200 | "UserId": "##############", 201 | "Account": "############", 202 | "Arn": "arn:aws:iam::###########:user/##########" 203 | } 204 | ``` 205 | 206 | ## Setup Qdrant Cloud 207 | 208 | To access Qdrant Cloud via the Client SDK, you need to create a cluster in Qdrant Cloud and obtain a Token and the cluster URL. 209 | 210 | 1. Follow the instructions on how to set up a free cluster by visiting the following link: 211 | 212 | https://qdrant.tech/documentation/cloud/quickstart-cloud/ 213 | 214 | 3. Export the Qdrant token and cluster URL 215 | 216 | Use the following command in the terminal to export secrets: 217 | 218 | ```bash 219 | export QDRANT_URL="" 220 | export QDRANT_API_KEY="" 221 | ``` 222 | 223 | ### Qdrant cluster 224 | 225 | ![image](https://github.com/mathewsrc/GenerativeAI-Questions-and-Answers-app-with-Bedrock-Langchain-and-FastAPI/assets/94936606/18216ebe-a6e7-4c82-9baf-da20f633c8d9) 226 | 227 | 2. (Optional) Run the app locally for testing 228 | 229 | ```bash 230 | make run-app 231 | ``` 232 | Next, navigate to http://127.0.0.1:8000 or http://127.0.0.1:8000/docs in your web browser. 233 | 234 | 235 | ## Deploy 236 | 237 | This project offers two deployment options: manual execution in the terminal and CI/CD with GitHub Actions. 238 | 239 | As the Terraform backend is configured to utilize a Terraform state file stored in AWS S3, the initial step is to upload the state file to S3. 240 | 241 | 1. Execute the following command to initialize Terraform 242 | 243 | ```bash 244 | make tf-init 245 | ``` 246 | 247 | 2. In the terminal, execute the following command to upload the state file to AWS S3: 248 | 249 | ```bash 250 | make tf-upload 251 | ``` 252 | ### Manually deploy using the terminal 253 | 254 | Follow the steps below to create the AWS infrastructure: 255 | 256 | 1. First update the AWS region in the `src/app/mai.py` file if you are using another region 257 | 258 | Directory: `src/app/main/py` 259 | 260 | ```python 261 | AWS_DEFAULT_REGION = "us-east-1" # Set this to your preferred AWS region, e.g. us-west-1 262 | ``` 263 | 264 | 2. Use the following command in the terminal to create all AWS resources using 265 | Terraform. This command will invoke Terraform to configure all the necessary infrastructure. 266 | 267 | ```bash 268 | make tf-apply 269 | ``` 270 | 271 | 3. Deploy the application to ECS using the make command: 272 | 273 | ```bash 274 | make aws-deploy 275 | ``` 276 | 277 | ### Automatically deploy using GitHub actions for Continuous Integration and Continuous Deployment (CI/CD) 278 | 279 | If you want to deploy this application to AWS ECS using GitHub actions you will need to follow some more steps: 280 | 281 | 1. Generate a Terraform API Token and a secret key in GitHub. Refer to the [Terraform API token](docs/terraform.md) inside this project 282 | 2. Save secret keys in GitHub Actions by providing your AWS credentials, and Qdrant credentials. Check out the [Github Actions Secret Keys](docs/github_actions_secret_keys.md) 283 | 284 | 3. Replace the following environment variables in `.github/workflows/ci.yml`, `.github/workflows/cd.yml`, `src/app/main.py` files if you are using a different 285 | AWS region 286 | 287 | Directory: `.github/workflows` 288 | 289 | ```yaml 290 | env: 291 | AWS_REGION: us-east-1 # Set this to your preferred AWS region, e.g. us-west-1 292 | ``` 293 | 294 | Directory: `src/app/main/py` 295 | 296 | ```python 297 | AWS_DEFAULT_REGION = "us-east-1" # Set this to your preferred AWS region, e.g. us-west-1 298 | ``` 299 | 300 | Congratulations! You are now ready to deploy this application using CI/CD 301 | 302 | ## Tear down the AWS resources 303 | 304 | Terraform excels in this aspect, eliminating the need for manual navigation through the console to locate each created resource. With Terraform we can just use `terraform destroy` or `make tf-destroy` in the terminal: 305 | 306 | ```bash 307 | cd terraform && terraform destroy 308 | ``` 309 | 310 | ```bash 311 | make tf-destroy 312 | ``` 313 | 314 | ## Tools used in this project 315 | 316 | ### Bedrock 317 | 318 | Amazon Bedrock is a fully managed service that provides a selection of high-performing foundation models (FMs) from leading AI companies such as AI21 Labs, Anthropic, Cohere, Meta, Stability AI, and Amazon. It offers a single API and a wide range of capabilities for building generative AI applications with a focus on security, privacy, and responsible AI. 319 | 320 | Key benefits 321 | 322 | - Offers a choice of high-performing FMs from leading AI companies, allowing users to experiment with and evaluate the best models for their use case. 323 | 324 | - Provides the ability to privately customize FMs with user data using techniques such as fine-tuning and Retrieval Augmented Generation (RAG). 325 | 326 | - As a serverless service, Amazon Bedrock eliminates the need for users to manage any infrastructure. 327 | 328 | - Allows for secure integration and deployment of generative AI capabilities into user applications using familiar AWS services such 329 | as Lambda Functions and Elastic Container Service (ECS). 330 | 331 | ### Terraform 332 | 333 | Terraform is an open-source Infrastructure as Code (IaC) tool, crafted for provisioning and managing cloud resources. 334 | 335 | Key benefits: 336 | 337 | - Declarative approach 338 | - Enable collaboration, versioning, and integration into CI/CD pipelines 339 | - Reusable modules 340 | - Multi-Cloud deployment 341 | - Automation and standardization 342 | 343 | ### Amazon ECS (Elastic Container Service) 344 | 345 | Amazon ECS (Elastic Container Service) is a fully managed container orchestration service facilitating the effortless deployment and scaling of containerized applications on AWS. 346 | 347 | Key benefits: 348 | 349 | - Simplified Operation: Eliminate the need to install or manage your container orchestration 350 | - Auto-Scaling Configuration: Easily configure auto-scaling to match application demands 351 | - Multiple instance types, including EC2 and Fargate, to meet specific application requirements 352 | 353 | Fargate 354 | 355 | - Fargate is a serverless computing engine for containers. Fargate automatically scales in and out and manages the infrastructure 356 | - It eliminates the need to choose EC2 instances, cluster capacity, and scaling 357 | - Fargate has native integration with AWS VPC which permits to control of connectivity 358 | 359 | ### Amazon ECR (Elastic Container Register) 360 | 361 | Amazon ECR is a managed container registry service designed to store Docker images, supporting public and private repositories. 362 | 363 | Key benefits: 364 | 365 | - Image Scanning for vulnerabilities within your container images 366 | - Effectively manage image lifecycles with customizable policies 367 | - Cross-Region and Cross-Account Replication: Facilitate seamless replication of images across regions and accounts for enhanced accessibility and redundancy 368 | 369 | ### API Gateway 370 | 371 | API Gateway is a fully managed service that supports containerized and web applications. API Gateway makes it easy for developers to create, publish, maintain, monitor, and secure APIs at any scale. 372 | 373 | API: A set of rules that allow different software entities to communicate with each other. 374 | 375 | Gateway: A point of entry into a system. It often serves as a proxy that forwards requests to multiple services. 376 | 377 | Key benefits: 378 | - Supports RESTful APIs and WebSocket APIs 379 | - Handles traffic management and throttling 380 | - Handles authorization and access control 381 | - Monitoring, and API version management 382 | 383 | ### GitHub Actions 384 | 385 | GitHub Actions is a versatile CI/CD platform facilitating build, testing, and deployment pipelines. Key advantages include: 386 | 387 | Key benefits: 388 | 389 | - Support for automatic, manual, scheduled, and event-triggered workflows 390 | - Compatibility with Linux, Windows, and macOS virtual machines for running workflows 391 | - Intuitive visual workflow for efficient debugging and error resolution 392 | - Seamless integration with AWS ECS and Terraform 393 | 394 | ### Docker 395 | 396 | Docker is a platform that uses OS-level virtualization to deliver software in packages called containers. We can use Docker to create microservices applications using FastAPI and run them locally or on cloud services as ECS. 397 | 398 | Key benefits: 399 | 400 | - Isolation 401 | - Easy setup using Dockerfile 402 | - Portability (run on on-premises servers and in the cloud) 403 | 404 | ### Qdrant 405 | 406 | Qdrant Cloud offers managed Qdrant instances on the cloud, serving as a powerful similarity search engine. 407 | 408 | Key benefits: 409 | 410 | - Seamless Integration with LangChain 411 | - Software-as-a-Service (SaaS) 412 | - Easily scalability 413 | - Comprehensive Monitoring and Logging for Cluster Performance 414 | - Availability on Major Cloud Platforms: AWS, GCP, and Azure 415 | 416 | ### Lambda Functions 417 | 418 | Lambda Function is a serverless computing service that allows you to run code without provisioning or managing servers. It provides automatic scaling based on workload. 419 | 420 | Key benefits 421 | 422 | - Eliminates the need to provision or manage servers, allowing you to focus on writing code. 423 | 424 | - Automatically scales your applications in response to incoming requests or events, handling any scale of traffic. 425 | 426 | - Supports various programming languages including Python, Go, Java, and more. 427 | 428 | - Works with serverless and container tools such as Docker CLI for building, testing, and deploying functions. 429 | 430 | ### S3 431 | Amazon S3 is an object storage service that offers industry-leading scalability, data availability, security, and performance. It caters to customers of all sizes and industries, providing storage solutions for a wide range of use cases. 432 | 433 | Key benefits 434 | 435 | - Offers industry-leading scalability to store and protect any amount of data. 436 | 437 | - Provides cost-effective storage classes to help optimize costs. 438 | 439 | ### Secrets Manager 440 | 441 | AWS Secrets Manager is a service that helps manage, retrieve, and rotate database credentials, API keys, and other secrets throughout their lifecycles. 442 | 443 | Key benefits 444 | 445 | - Provides a centralized service to manage secrets, such as database credentials and API keys. 446 | 447 | - Allows for secure and easy retrieval of secrets when needed. 448 | 449 | - Supports automatic rotation of secrets to enhance security. 450 | 451 | ### Elastic Load Balancer 452 | 453 | Elastic Load Balancing is a service that automatically distributes incoming traffic across multiple targets in one or more Availability Zones, ensuring high availability and fault tolerance in your applications. 454 | 455 | Key benefits 456 | 457 | - Automatically distributes incoming traffic across multiple targets, such as EC2 instances, containers, and IP addresses. 458 | 459 | - Monitors the health of its registered targets and routes traffic only to the healthy ones. 460 | 461 | - Scales as incoming traffic changes over time. 462 | 463 | ### Langchain 464 | 465 | LangChain is a robust framework designed for developing applications powered by language models. It enables the creation of context-aware applications that can reason based on the provided context. 466 | 467 | Key benefits 468 | 469 | - Allows the development of applications that can connect a language model to sources of context such as prompt instructions, few-shot examples, and content to ground its response in. 470 | 471 | - Includes Python and JavaScript libraries and integrations such as Qdrant for a myriad of components, a basic runtime for combining these components into chains and agents. 472 | 473 | 474 | -------------------------------------------------------------------------------- /docs/bedrock_tutorial.md: -------------------------------------------------------------------------------- 1 | # Bedrock setup 2 | 3 | 1. Sign in to AWS 4 | 2. Search for Bedrock -> Model access -> `Manage model access` 5 | 6 | ![image](https://github.com/mathewsrc/GenerativeAI-Questions-and-Answers-app-with-Bedrock-Langchain-and-FastAPI/assets/94936606/e54573f9-6eeb-4f36-897e-76ea20b74211) 7 | 8 | 3. Then request access to `Titan Embeddings G1 - Text` and `Claude` 9 | 10 | ![image](https://github.com/mathewsrc/GenerativeAI-Questions-and-Answers-app-with-Bedrock-Langchain-and-FastAPI/assets/94936606/b05d9f5a-c42c-4a20-a0d4-3f021e50a04f) 11 | 12 | 13 | 4. Wait until you are granted access to models. 14 | 5. Congratulations! Now you have access to the Foundation Model and the Embedding Model. 15 | -------------------------------------------------------------------------------- /docs/github_actions_secret_keys.md: -------------------------------------------------------------------------------- 1 | # Creating Secret Keys for GitHub Actions 2 | 3 | 1. Go to GitHub -> this project cloned -> Settings -> Secrets and variables -> actions -> `New repository secret` 4 | 5 | ![image](https://github.com/mathewsrc/GenerativeAI-Questions-and-Answers-app-with-Bedrock-Langchain-and-FastAPI/assets/94936606/52cfed6d-c628-40ea-9150-28e569ab491c) 6 | 7 | Figure 1. GitHub secrets required 8 | 9 | 2. You need to create the following secrete keys: `AWS_ACCESS_KEY_ID`, `AWS_ACCOUNT_ID`, `AWS_SECRET_ACCESS_KEY`, 10 | `QDRANT_API_KEY`, `QDRANT_URL` 11 | 12 | 13 | ![image](https://github.com/mathewsrc/GenerativeAI-Questions-and-Answers-app-with-Bedrock-Langchain-and-FastAPI/assets/94936606/1ee1ede6-0cad-4fa4-acdf-2e8c6758ee58) 14 | 15 | Figure 2. Creating a new secret 16 | 17 | 3. Click `Add secret` 18 | 4. Congratulations! Now you can deploy this application to AWS ECS 19 | -------------------------------------------------------------------------------- /docs/microservice.md: -------------------------------------------------------------------------------- 1 | # Microservice with ECS, Docker and FastAPI 2 | 3 | This project application employs the microservices architecture, featuring a Rest API developed with the FastAPI library. The application is containerized using Docker and deployed in the cloud through AWS ECS. 4 | 5 | ![microservice drawio (6)](https://github.com/mathewsrc/generativeai-questions-and-answers-app/assets/94936606/1c915b53-d635-4bbe-898c-7c81e43491cf) 6 | 7 | ## ECR 8 | 9 | Unlike Lambda functions, the ECS service is a Platform as a Service (PaaS) but can used as Serveless 10 | using the Fargate option, enabling the execution of containers housed in the Elastic Container Registry (ECR). 11 | ECR serves as a repository, facilitating the storage of both private and public container images. 12 | 13 | The `scan_on_push` option helps to identify software vulnerabilities in container images and the `image_tag_mutability` allow image tags from being overwritten. 14 | 15 | Directory: `terraform/ecr` 16 | 17 | ```terraform 18 | # Create an ECR repository 19 | resource "aws_ecr_repository" "ecr_repo" { 20 | name = var.ecr_name 21 | image_tag_mutability = "MUTABLE" 22 | 23 | image_scanning_configuration { 24 | scan_on_push = true 25 | } 26 | 27 | force_delete = true 28 | 29 | tags = { 30 | Environment = var.environment 31 | Application = var.application_name 32 | Name = var.ecr_name 33 | } 34 | } 35 | ``` 36 | 37 | ### ECR policy 38 | 39 | Set of actions to create ECR repository 40 | 41 | ```json 42 | { 43 | "Version": "2012-10-17", 44 | "Statement": [ 45 | { 46 | "Sid": "Statement1", 47 | "Effect": "Allow", 48 | "Action": [ 49 | "ecr:*" 50 | ], 51 | "Resource": [ 52 | "arn:aws:ecr:us-east-1:account-id:repository/*" 53 | ] 54 | }, 55 | { 56 | "Sid": "Statement2", 57 | "Effect": "Allow", 58 | "Action": [ 59 | "ecr:GetAuthorizationToken" 60 | ], 61 | "Resource": [ 62 | "*" 63 | ] 64 | } 65 | ] 66 | } 67 | ``` 68 | 69 | ## Secrets Manager 70 | 71 | In contrast to Lambda functions, the ECS service lacks the Environment feature for securely storing sensitive information like API keys. An alternative approach involves leveraging the AWS Secrets Manager service to securely store such confidential data. 72 | 73 | The following code snippet shows two Terraform resources used to create Qdrant key-pair secrets: 74 | 75 | Directory: `terraform/secrets_manager` 76 | 77 | ```terraform 78 | resource "aws_secretsmanager_secret" "qdrant_url" { 79 | name = var.qdrant_url_key 80 | description = "Qdrant URL Key" 81 | recovery_window_in_days = 0 # Force deletion without recovery 82 | force_overwrite_replica_secret = true # Force overwrite a secret with the same name in the destination Region. 83 | tags = { 84 | Name = "Qdrant URL" 85 | Environment = var.environment 86 | Application = var.application_name 87 | } 88 | } 89 | 90 | # Create a secret for Qdrant API Key, so ECS can access it 91 | resource "aws_secretsmanager_secret" "qdrant_api_key" { 92 | name = var.qdrant_api_key 93 | description = "Qdrant API Key" 94 | recovery_window_in_days = 0 # Force deletion without recovery 95 | force_overwrite_replica_secret = true # Force overwrite a secret with the same name in the destination Region. 96 | tags = { 97 | Name = "Qdrant API Key" 98 | Environment = var.environment 99 | Application = var.application_name 100 | } 101 | } 102 | ``` 103 | 104 | ### Secrets Manager policy 105 | 106 | ```json 107 | { 108 | "Version": "2012-10-17", 109 | "Statement": [ 110 | { 111 | "Sid": "secretsmanager", 112 | "Effect": "Allow", 113 | "Action": [ 114 | "secretsmanager:CreateSecret", 115 | "secretsmanager:ListSecrets", 116 | "secretsmanager:BatchGetSecretValue", 117 | "secretsmanager:DescribeSecret", 118 | "secretsmanager:GetSecretValue", 119 | "secretsmanager:ListSecretVersionIds", 120 | "secretsmanager:DeleteSecret", 121 | "secretsmanager:PutSecretValue", 122 | "secretsmanager:RestoreSecret", 123 | "secretsmanager:UpdateSecret", 124 | "secretsmanager:TagResource", 125 | "secretsmanager:UntagResource", 126 | "secretsmanager:GetResourcePolicy", 127 | "secretsmanager:DeleteResourcePolicy", 128 | "secretsmanager:PutResourcePolicy", 129 | "secretsmanager:ValidateResourcePolicy" 130 | ], 131 | "Resource": [ 132 | "*" 133 | ] 134 | } 135 | ] 136 | } 137 | ``` 138 | 139 | ## Load Balancer 140 | 141 | The Load Balancer is a service that helps to redirect the traffic to the least used node to make sure load is always balanced between each container holding the same service. 142 | 143 | The `internal` option when set to True block the direct access to the services, as this project uses API Gateway 144 | to access the service we can set it to True. 145 | 146 | The `load_balancer_type` has three option application, gateway, or network. 147 | 148 | Directory: `terraform/load_balancer` 149 | 150 | ```terraform 151 | # Create a Network Load Balancer 152 | resource "aws_lb" "lb" { 153 | name = var.nlb_name 154 | internal = true 155 | load_balancer_type = "application" 156 | subnets = var.public_subnets 157 | security_groups = var.security_group_ids 158 | enable_deletion_protection = false 159 | 160 | tags = { 161 | Environment = var.environment 162 | Name = var.nlb_name 163 | Terraform = "true" 164 | Application = var.application_name 165 | } 166 | } 167 | ``` 168 | 169 | ## Load balancer listener 170 | 171 | The listener checks for connection requests from clients, using the protocol (HTTP) and port (80) and redirects the traffic from the load balancer to the target group. 172 | 173 | The `type` option defines the type of routing action. The `forward` type routes requests to one or more target groups. 174 | 175 | Directory: `terraform/load_balancer` 176 | 177 | ```terraform 178 | # Redirect traffic from the Load Balancer to the target group 179 | resource "aws_lb_listener" "listener" { 180 | load_balancer_arn = aws_lb.lb.arn 181 | port = var.container_port 182 | protocol = "HTTP" 183 | 184 | default_action { 185 | type = "forward" 186 | target_group_arn = aws_lb_target_group.target_group.arn 187 | } 188 | } 189 | ``` 190 | 191 | ### Load balancer and Load balancer listener policy 192 | 193 | ```json 194 | { 195 | "Version": "2012-10-17", 196 | "Statement": [ 197 | { 198 | "Sid": "ELBPermissions1", 199 | "Effect": "Allow", 200 | "Action": [ 201 | "iam:CreateServiceLinkedRole" 202 | ], 203 | "Resource": [ 204 | "arn:aws:iam::account-id:role/aws-service-role/elasticloadbalancing.amazonaws.com/AWSServiceRoleForElasticLoadBalancing" 205 | ] 206 | }, 207 | { 208 | "Sid": "ELBPermissions2", 209 | "Effect": "Allow", 210 | "Action": [ 211 | "elasticloadbalancing:DescribeLoadBalancerAttributes", 212 | "elasticloadbalancing:DescribeLoadBalancers", 213 | "elasticloadbalancing:DescribeTargetGroupAttributes", 214 | "elasticloadbalancing:DescribeListeners", 215 | "elasticloadbalancing:DescribeTags", 216 | "elasticloadbalancing:DescribeTargetGroups", 217 | "elasticloadbalancing:DescribeRules", 218 | "elasticloadbalancing:DescribeInstanceHealth" 219 | ], 220 | "Resource": "*" 221 | }, 222 | { 223 | "Sid": "ELBPermissions3", 224 | "Effect": "Allow", 225 | "Action": [ 226 | "elasticloadbalancing:SetSecurityGroups", 227 | "elasticloadbalancing:SetSubnets", 228 | "elasticloadbalancing:DeleteLoadBalancer", 229 | "elasticloadbalancing:CreateListener", 230 | "elasticloadbalancing:CreateLoadBalancer", 231 | "elasticloadbalancing:AddTags", 232 | "elasticloadbalancing:CreateTargetGroup", 233 | "elasticloadbalancing:CreateRule", 234 | "elasticloadbalancing:DeleteTargetGroup", 235 | "elasticloadbalancing:ModifyTargetGroupAttributes", 236 | "elasticloadbalancing:ModifyLoadBalancerAttributes", 237 | "elasticloadbalancing:DeleteListener" 238 | ], 239 | "Resource": [ 240 | "arn:aws:elasticloadbalancing:us-east-1:account-id:targetgroup/*/*", 241 | "arn:aws:elasticloadbalancing:us-east-1:account-id:loadbalancer/app/*/*", 242 | "arn:aws:elasticloadbalancing:us-east-1:account-id:listener/app/*/*/*", 243 | "arn:aws:elasticloadbalancing:us-east-1:account-id:loadbalancer/net/*/*", 244 | "arn:aws:elasticloadbalancing:us-east-1:account-id:listener/net/*/*/*" 245 | ] 246 | } 247 | ] 248 | } 249 | ``` 250 | 251 | ## Target group 252 | 253 | The target group route requests to one or more registered targets - ECS, Lambda Functions, EC2 instances. 254 | 255 | The `vpc_id` specify where the target group will be created. 256 | The `protocol = HTTP` sets the protocol to use for routing traffic to the targets and `target_type = ip` sets the type of target that the target group routes traffic to in this case 257 | the targets are specified by IP address. 258 | 259 | Directory: `terraform/load_balancer` 260 | 261 | ```terraform 262 | # Create a target group 263 | resource "aws_lb_target_group" "target_group" { 264 | depends_on = [aws_lb.lb] 265 | name = var.target_group_name 266 | port = var.container_port 267 | protocol = "HTTP" 268 | vpc_id = var.vpc_id 269 | target_type = "ip" 270 | } 271 | ``` 272 | 273 | ## ECS 274 | 275 | The ECS Terraform script create three required resources: cluster, task definition, and service 276 | 277 | ### Cluster 278 | 279 | The cluster is a logical grouping of tasks or services. The cluster also contains the infrastructure capacity: 280 | Amazon EC2 instances, AWS Fargate, and network (VPC and subnet). 281 | 282 | Directory: `terraform/ecs` 283 | 284 | ```terraform 285 | # Create an ECS cluster 286 | resource "aws_ecs_cluster" "ecs_cluster" { 287 | name = var.ecs_cluster_name 288 | 289 | tags = { 290 | Environment = var.environment 291 | Name = var.ecs_cluster_name 292 | Application = var.application_name 293 | } 294 | } 295 | ``` 296 | 297 | ### Task definition 298 | 299 | The task definition defines containers configurations: ECR Docker image, runtime plataform (OS), ports, 300 | network mode, maximum memory, maximum CPU, as well as the specific CPU and memory resources allocated 301 | to each task. Additionally, the task definition outlines the IAM role utilized by the tasks 302 | and the chosen launch type, which determines the underlying infrastructure hosting the tasks. 303 | 304 | The following Terraform snippet is designed to fetch the latest Git commit hash, serving as a dynamic and version-specific tag for the container. 305 | 306 | Directory: `terraform/ecs` 307 | 308 | ```terraform 309 | # Get the latest git commit hash (feel free to add more variables) 310 | data "external" "envs" { 311 | program = ["sh", "-c", <<-EOSCRIPT 312 | jq -n '{ "sha": $SHA }' \ 313 | --arg SHA "$(git rev-parse HEAD)" \ # Call git rev-parse HEAD command 314 | EOSCRIPT 315 | ] 316 | } 317 | 318 | # Create an ECS task definition 319 | resource "aws_ecs_task_definition" "ecs_task_definition" { 320 | 321 | #container_definitions = file("../.aws/task-definition.json") 322 | container_definitions = templatefile("${path.module}/../../.aws/task-definition.json", 323 | { tag = data.external.envs.result.sha, 324 | ecr = var.ecr_repository_url, 325 | service_name = var.ecs_service_name, 326 | region = var.region, 327 | logs_group_name = var.logs_group_name 328 | }) 329 | 330 | runtime_platform { 331 | operating_system_family = "LINUX" 332 | cpu_architecture = "X86_64" 333 | } 334 | 335 | family = var.ecs_task_family_name 336 | requires_compatibilities = ["FARGATE"] # use Fargate as the launch type 337 | network_mode = "awsvpc" # add the AWS VPN network mode as this is required for Fargate 338 | memory = var.memory # Specify the memory the container requires 339 | cpu = var.cpu # Specify the CPU the container requires 340 | execution_role_arn = aws_iam_role.ecs_task_executor_role.arn 341 | task_role_arn = aws_iam_role.ecs_task_role.arn 342 | 343 | tags = { 344 | Environment = var.environment 345 | FamilyName = var.ecs_task_family_name 346 | Application = var.application_name 347 | } 348 | } 349 | ``` 350 | 351 | ### Service 352 | 353 | The following Terraform code snippet defines the ECS service using the task definition, the load balancer, and 354 | the network configuration: 355 | 356 | Directory: `terraform/ecs` 357 | 358 | ```terraform 359 | # Create an ECS service 360 | resource "aws_ecs_service" "ecs_service" { 361 | name = var.ecs_service_name 362 | cluster = aws_ecs_cluster.ecs_cluster.id 363 | task_definition = aws_ecs_task_definition.ecs_task_definition.arn 364 | launch_type = "FARGATE" 365 | desired_count = 2 # Number of containers 366 | depends_on = [] 367 | 368 | load_balancer { 369 | target_group_arn = var.lb_target_group_arn 370 | container_name = var.ecs_service_name 371 | container_port = 80 372 | } 373 | 374 | network_configuration { 375 | subnets = var.private_subnets # Instance under this subnet can’t be accessed from the Internet directly 376 | #assign_public_ip = true 377 | security_groups = var.ecs_tasks_security_group_id 378 | } 379 | 380 | tags = { 381 | Environment = var.environment 382 | Application = var.application_name 383 | Name = var.ecs_service_name 384 | } 385 | } 386 | ``` 387 | 388 | ## Policies 389 | 390 | ### ECS task execution role policy 391 | 392 | The ECS tasks necessitate this role for the purpose of retrieving container images and seamlessly publishing container logs to Amazon CloudWatch on your behalf. 393 | 394 | Directory: `terraform/ecs` 395 | 396 | ```terraform 397 | # Generates an IAM policy document for the ECS task executor role 398 | data "aws_iam_policy_document" "ecs_task_executor_policy" { 399 | statement { 400 | sid = 1 401 | actions = [ 402 | "logs:CreateLogStream", 403 | "logs:PutLogEvents", 404 | "logs:CreateLogGroup" 405 | ] 406 | resources = [ 407 | "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:*"] 408 | } 409 | statement { 410 | sid = 2 411 | actions = [ 412 | "ecr:GetAuthorizationToken", 413 | "ecr:BatchCheckLayerAvailability", 414 | "ecr:GetDownloadUrlForLayer", 415 | "ecr:BatchGetImage" 416 | ] 417 | resources = ["*"] 418 | } 419 | } 420 | ``` 421 | 422 | ### Tasks role policy 423 | 424 | This role is employed to grant access to your services deployed in ECS containers, facilitating seamless communication with other AWS services. The following Terraform code snippet grant access to AWS Bedrock, 425 | AWS S3, and AWS Secrets Mananger services. 426 | 427 | Directory: `terraform/ecs` 428 | 429 | ```terraform 430 | data "aws_iam_policy_document" "ecs_task_policy" { 431 | statement { 432 | sid = 1 433 | actions = ["sts:AssumeRole"] 434 | resources = ["*"] 435 | } 436 | statement { 437 | sid = 2 438 | actions = ["bedrock:InvokeModel", "bedrock:ListCustomModels", 439 | "bedrock:ListFoundationModels", "bedrock:InvokeModelWithResponseStream"] 440 | resources = ["arn:aws:bedrock:*::foundation-model/*"] 441 | } 442 | statement { 443 | sid = 3 444 | actions = ["s3:GetObject"] 445 | resources = ["arn:aws:s3:::bedrock-qa-bucket-tf/*"] 446 | } 447 | statement { 448 | sid = 4 449 | actions = ["secretsmanager:GetSecretValue"] 450 | resources = var.secrets_manager_arns 451 | } 452 | } 453 | ``` 454 | 455 | ## Docker image 456 | 457 | The following Docker image defines the container that will be running in ECS: 458 | 459 | ```terraform 460 | FROM python:3.12-slim-bullseye 461 | 462 | # Install curl and curl the poetry installer 463 | RUN apt-get update && \ 464 | DEBIAN_FRONTEND=noninteractive && \ 465 | apt install -y curl && \ 466 | curl -sSL https://install.python-poetry.org | python 467 | 468 | # Sets the PATH to get the poetry bin 469 | ENV PATH="/root/.local/bin:${PATH}" 470 | 471 | # Set the working directory 472 | WORKDIR /code 473 | 474 | # Copy the files to the working directory 475 | COPY ./pyproject.toml /code/pyproject.toml 476 | COPY ./poetry.lock /code/poetry.lock 477 | COPY ./README.md /code/README.md 478 | COPY ./src/app /code/app 479 | 480 | # Configure poetry to create virtualenvs inside the project 481 | RUN poetry config virtualenvs.in-project true 482 | 483 | # Install dependencies using poetry 484 | RUN poetry install --no-root 485 | 486 | # Defines the port that the application listens on 487 | EXPOSE 80 488 | 489 | # Run the application using unicorn on port 8000 490 | CMD ["poetry", "run", "uvicorn", "--host", "0.0.0.0", "--port", "80", "app.main:app", "--reload"] 491 | ``` 492 | 493 | 494 | ## Rest API 495 | 496 | The following code snippet shows the endpoints created with FastAPI library 497 | 498 | Directory: `src/app` 499 | 500 | ```python 501 | @app.get("/", response_class=HTMLResponse) 502 | async def root(): 503 | """ 504 | Endpoint for handling GET requests at the root path ("/"). 505 | Return a welcome message 506 | """ 507 | ... 508 | ``` 509 | 510 | ```python 511 | @app.post("/ask") 512 | async def question(body: Body): 513 | """ 514 | Endpoint for handling POST requests at the "/ask" path. 515 | Receives a request body parameter named 'body' of type 'Body'. 516 | Return the model answer 517 | """ 518 | ... 519 | ``` 520 | 521 | ```python 522 | @app.get("/collectioninfo") 523 | async def collection_info(): 524 | """ 525 | Endpoint for handling GET requests at the root path ("/collectioninfo") 526 | Returns Qdrant collection information 527 | """ 528 | ... 529 | ``` 530 | 531 | ### ECS policy 532 | 533 | Set of actions to create ECS resources 534 | 535 | ```json 536 | { 537 | "Version": "2012-10-17", 538 | "Statement": [ 539 | { 540 | "Sid": "ECSPermissions1", 541 | "Effect": "Allow", 542 | "Action": [ 543 | "ecs:DeregisterTaskDefinition", 544 | "ecs:RegisterTaskDefinition", 545 | "ecs:DescribeTaskDefinition", 546 | "ecs:DescribeClusters", 547 | "ecs:ListClusters" 548 | ], 549 | "Resource": "*" 550 | }, 551 | { 552 | "Sid": "ECSPermissions2", 553 | "Effect": "Allow", 554 | "Action": [ 555 | "ecs:UpdateCluster", 556 | "ecs:UpdateClusterSettings", 557 | "ecs:DeleteCluster", 558 | "ecs:CreateCluster" 559 | ], 560 | "Resource": "arn:aws:ecs:us-east-1:account-id:*" 561 | }, 562 | { 563 | "Sid": "ECSPermissions3", 564 | "Effect": "Allow", 565 | "Action": [ 566 | "ecs:UpdateService", 567 | "ecs:CreateService", 568 | "ecs:DeleteService", 569 | "ecs:DescribeServices", 570 | "ecs:ListServices", 571 | "ecs:ListServicesByNamespace" 572 | ], 573 | "Resource": "arn:aws:ecs:us-east-1:account-id:service/*/*" 574 | }, 575 | ] 576 | } 577 | ``` 578 | 579 | ## VPC Link 580 | 581 | The VPC link facilitates the API Gateway's access to the Amazon ECS service running within the Amazon VPC. Subsequently, you establish an Rest or HTTP API that leverages the VPC link to establish a connection with the Amazon ECS service. 582 | 583 | The `target_arns` argument receives a list of network load balancer arns in the VPC targeted by the VPC link. 584 | 585 | Directory: `terraform/api_gateway` 586 | 587 | ```terraform 588 | # Create a VPC Link from the API Gateway to the Load Balancer 589 | resource "aws_apigatewayv2_vpc_link" "vpc_link" { 590 | name = var.vpc_link_name 591 | security_group_ids = var.security_group_ids 592 | subnet_ids = var.subnet_ids 593 | } 594 | ``` 595 | 596 | ### VPC link policy 597 | 598 | ```json 599 | { 600 | "Version": "2012-10-17", 601 | "Statement": [ 602 | { 603 | "Effect": "Allow", 604 | "Action": [ 605 | "apigateway:POST", 606 | "apigateway:GET", 607 | "apigateway:PATCH", 608 | "apigateway:DELETE" 609 | ], 610 | "Resource": [ 611 | "arn:aws:apigateway:us-east-1::/vpclinks", 612 | "arn:aws:apigateway:us-east-1::/vpclinks/*" 613 | ] 614 | }, 615 | { 616 | "Effect": "Allow", 617 | "Action": [ 618 | "elasticloadbalancing:DescribeLoadBalancers" 619 | ], 620 | "Resource": "*" 621 | }, 622 | { 623 | "Effect": "Allow", 624 | "Action": [ 625 | "ec2:CreateVpcEndpointServiceConfiguration", 626 | "ec2:DeleteVpcEndpointServiceConfigurations", 627 | "ec2:DescribeVpcEndpointServiceConfigurations", 628 | "ec2:ModifyVpcEndpointServicePermissions" 629 | ], 630 | "Resource": "*" 631 | } 632 | ] 633 | } 634 | ``` 635 | 636 | ## API Gateway 637 | 638 | The `aws_apigatewayv2_api` are used for creating and deploying HTTP APIs 639 | 640 | Directory: `terraform/api_gateway` 641 | 642 | ```terraform 643 | resource "aws_apigatewayv2_api" "example" { 644 | name = var.api_name 645 | protocol_type = "HTTP" 646 | description = "HTTP API for Question and Answer App" 647 | version = "1.0" 648 | 649 | tags = { 650 | Environment = var.environment 651 | Application = var.application_name 652 | } 653 | } 654 | ``` 655 | 656 | ### Integration 657 | 658 | The integration integrates the HTTP API to the the Load balancer. It also defines 659 | 660 | the `integration_type` (AWS, AWS_PROXY, HTTP, HTTP_PROXY, and MOCK), the HTTP method, the URI, 661 | and the connection type. 662 | 663 | The `integration_type` argument expects the ARN from the Load balancer listener 664 | 665 | The `HTTP_PROXY` type permit API Gateway passes the incoming request from the client to the HTTP endpoint and passes the outgoing response from the HTTP endpoint to the client. Setting the integration request or integration response is not required when utilizing the HTTP proxy type. More information: https://docs.aws.amazon.com/apigateway/latest/developerguide/api-gateway-api-integration-types.html 666 | 667 | Directory: `terraform/api_gateway` 668 | 669 | ```terraform 670 | # Integration for POST /ask 671 | resource "aws_apigatewayv2_integration" "ask_integration" { 672 | api_id = aws_apigatewayv2_api.example.id 673 | integration_type = "HTTP_PROXY" 674 | integration_uri = var.lb_listener_arn 675 | integration_method = "POST" 676 | connection_type = "VPC_LINK" 677 | connection_id = aws_apigatewayv2_vpc_link.vpc_link.id 678 | timeout_milliseconds = 30000 # 30 seconds 679 | } 680 | ``` 681 | 682 | ### Routes 683 | 684 | The `aws_apigatewayv2_route` define the HTTP method and the backend endpoint `/ask` 685 | 686 | Directory: `terraform/api_gateway` 687 | 688 | ```terraform 689 | resource "aws_apigatewayv2_route" "ask_route" { 690 | api_id = aws_apigatewayv2_api.example.id 691 | route_key = "POST /ask" 692 | target = "integrations/${aws_apigatewayv2_integration.ask_integration.id}" 693 | } 694 | ``` 695 | 696 | ### CloudWatch logs 697 | 698 | This resource create a log group to store logs from API Gateway 699 | 700 | Directory: `terraform/api_gateway` 701 | 702 | ```terraform 703 | resource "aws_cloudwatch_log_group" "apigateway" { 704 | name = "/aws/apigateway/${var.application_name}/${var.api_name}" 705 | retention_in_days = 7 706 | tags = { 707 | Environment = var.environment 708 | Application = var.application_name 709 | } 710 | } 711 | ``` 712 | 713 | ### CloudWatch policy 714 | 715 | ```json 716 | { 717 | "Version": "2012-10-17", 718 | "Statement": [ 719 | { 720 | "Sid": "CloudWatchLogsPermissions", 721 | "Effect": "Allow", 722 | "Action": [ 723 | "logs:DescribeLogGroups", 724 | "logs:FilterLogEvents", 725 | "logs:CreateLogGroup" 726 | ], 727 | "Resource": "*" 728 | } 729 | ] 730 | } 731 | ``` 732 | 733 | ### Stage 734 | 735 | Each stage is a named reference to a deployment of the API and is made available for client applications to call. 736 | 737 | Directory: `terraform/api_gateway` 738 | 739 | ```terraform 740 | resource "aws_apigatewayv2_stage" "example" { 741 | api_id = aws_apigatewayv2_api.example.id 742 | description = "Stage for HTTP API" 743 | name = "$default" # The $default stage is a special stage that's automatically associated with new deployments. 744 | auto_deploy = true # Whether updates to an API automatically trigger a new deployment. 745 | 746 | access_log_settings { 747 | destination_arn = aws_cloudwatch_log_group.apigateway.arn 748 | format = jsonencode({ 749 | requestId = "$context.requestId", 750 | ip = "$context.identity.sourceIp", 751 | user = "$context.identity.user", 752 | caller = "$context.identity.caller", 753 | request = "$context.requestTime", 754 | status = "$context.status", 755 | response = "$context.responseLength" 756 | }) 757 | } 758 | tags = { 759 | Environment = var.environment 760 | Application = var.application_name 761 | } 762 | } 763 | ``` 764 | 765 | ## Policy for API Gateway 766 | 767 | ```json 768 | ### API Gateway policy 769 | 770 | ```json 771 | { 772 | "Version": "2012-10-17", 773 | "Statement": [ 774 | { 775 | "Sid": "APIGatewayPermissions", 776 | "Effect": "Allow", 777 | "Action": [ 778 | "apigateway:DELETE", 779 | "apigateway:GET", 780 | "apigateway:PATCH", 781 | "apigateway:POST", 782 | "apigateway:PUT", 783 | "apigateway:TagResource" 784 | ], 785 | "Resource": [ 786 | "arn:aws:apigateway:us-east-1::/account", 787 | "arn:aws:apigateway:us-east-1::/apis", 788 | "arn:aws:apigateway:us-east-1::/apis/*", 789 | "arn:aws:apigateway:us-east-1::/tags/*", 790 | "arn:aws:apigateway:us-east-1::/usageplans", 791 | "arn:aws:apigateway:us-east-1::/usageplans/*", 792 | ], 793 | "Condition": { 794 | "StringLikeIfExists": { 795 | "apigateway:Request/apiName": "competition-notices*" 796 | } 797 | } 798 | }, 799 | { 800 | "Sid": "LogsPermissions", 801 | "Effect": "Allow", 802 | "Action": [ 803 | "logs:CreateLogDelivery", 804 | "logs:CreateLogGroup", 805 | "logs:DeleteLogDelivery", 806 | "logs:DeleteLogGroup", 807 | "logs:DescribeLogGroups", 808 | "logs:DescribeLogStreams", 809 | "logs:DescribeResourcePolicies", 810 | "logs:FilterLogEvents", 811 | "logs:GetLogDelivery", 812 | "logs:GetLogEvents", 813 | "logs:ListLogDeliveries", 814 | "logs:ListTagsLogGroup", 815 | "logs:PutResourcePolicy", 816 | "logs:PutRetentionPolicy", 817 | "logs:TagResource", 818 | "logs:UpdateLogDelivery" 819 | ], 820 | "Resource": [ 821 | "arn:aws:logs:us-east-1:account-id:log-group:*" 822 | ] 823 | } 824 | ] 825 | } 826 | ``` 827 | -------------------------------------------------------------------------------- /docs/network.md: -------------------------------------------------------------------------------- 1 | # AWS Network 2 | 3 | ## AWS VPC 4 | 5 | Directory: `terraform/network` 6 | 7 | ```terraform 8 | variable "aws_vpc_cidr_block" { 9 | type = string 10 | default = "10.0.0.0/16" 11 | description = "CIDR block for vpc" 12 | } 13 | ``` 14 | 15 | IP address: `10.0.0.0`
16 | IP range with CIDR notation: `10.0.0.0/16`
17 | IP range: the first 16 bits of the IP address are fixed and the rest are flexible 18 | 19 | A single IP address is a 32 bits grouped into 8 bits 20 | 21 | ``` 22 | 10 0 0 0 23 | 00001010 00000000 00000000 00000000 24 | FIXED | FLEXIBLE 25 | ``` 26 | 27 | A IP range with the CIDR notation /16 create 2^(32-16) = 2^16 = 65,536 possible IP addresses. 28 | 29 | 30 | ## Subnets 31 | 32 | The VPC has two private subnets and two public subnets. Both subnets have a CIDR which must be 33 | a subset of the VPC CIDR `10.0.0.0/16`. The subnets configured in two different zones 34 | increases the redundancy and fault tolerance. 35 | 36 | AWS reserves five IP addresses in each subnet for routing, Domain Name System (DNS), and network management. 37 | The remaining IP addresses are diveded by the four subnets. 38 | 39 | Directory: `terraform/network` 40 | 41 | ```terraform 42 | # Create public subnets 43 | resource "aws_subnet" "public_subnets" { 44 | count = length(var.aws_public_subnet_cidr_blocks) 45 | vpc_id = aws_vpc.main.id 46 | cidr_block = element(var.aws_public_subnet_cidr_blocks, count.index) 47 | availability_zone = element(data.aws_availability_zones.available.names, count.index) 48 | map_public_ip_on_launch = true 49 | ... 50 | } 51 | ``` 52 | 53 | Directory: `terraform/network` 54 | 55 | ```terraform 56 | # Create private subnets 57 | resource "aws_subnet" "private_subnets" { 58 | count = length(var.aws_private_subnet_cidr_blocks) 59 | vpc_id = aws_vpc.main.id 60 | cidr_block = element(var.aws_private_subnet_cidr_blocks, count.index) 61 | availability_zone = element(data.aws_availability_zones.available.names, count.index) 62 | ... 63 | } 64 | ``` 65 | 66 | ## Internet gateway 67 | 68 | The Internet Gateway allows traffic to flow in and out of the VPC to the public internet. 69 | In this case, it will allow your ECS service to make outbound connections to Qdrant service 70 | hosted on Google Cloud. 71 | 72 | Directory: `terraform/network` 73 | 74 | ```terraform 75 | # Create an Internet Gateway and attach it to the VPC 76 | resource "aws_internet_gateway" "main" { 77 | vpc_id = aws_vpc.main.id 78 | } 79 | ``` 80 | 81 | ## Route table 82 | 83 | The route table has a set of rules called routes that determine where the network traffic 84 | is directed. The route table allow traffic between all subnets to the VPC. 85 | 86 | ### Route table for public networks 87 | 88 | Directory: `terraform/network` 89 | 90 | ```terraform 91 | # Create a Route Table 92 | resource "aws_route_table" "public_route_table" { 93 | vpc_id = aws_vpc.main.id 94 | 95 | route { 96 | cidr_block = "0.0.0.0/0" 97 | gateway_id = aws_internet_gateway.main.id 98 | } 99 | 100 | tags = { 101 | Name = "public-route-table" 102 | Application = var.application_name 103 | Environment = var.environment 104 | } 105 | } 106 | 107 | resource "aws_route_table_association" "public_route_table_association" { 108 | count = length(var.aws_public_subnet_cidr_blocks) 109 | subnet_id = element(aws_subnet.public_subnets.*.id, count.index) 110 | route_table_id = element(aws_route_table.public_route_table.*.id, count.index) 111 | } 112 | ``` 113 | 114 | ### Route table for private subnets 115 | 116 | Directory: `terraform/network` 117 | 118 | ```terraform 119 | resource "aws_route_table" "private_route_table" { 120 | count = length(var.aws_private_subnet_cidr_blocks) 121 | vpc_id = aws_vpc.main.id 122 | 123 | route { 124 | cidr_block = "0.0.0.0/0" 125 | nat_gateway_id = element(aws_nat_gateway.nat_gateway.*.id, count.index) 126 | } 127 | 128 | tags = { 129 | Name = "private-route-table-${count.index}" 130 | Application = var.application_name 131 | Environment = var.environment 132 | } 133 | } 134 | 135 | resource "aws_route_table_association" "private_route_table_association" { 136 | count = length(var.aws_private_subnet_cidr_blocks) 137 | subnet_id = element(aws_subnet.private_subnets.*.id, count.index) 138 | route_table_id = element(aws_route_table.private_route_table.*.id, count.index) 139 | } 140 | ``` 141 | 142 | ## NAT gateway 143 | 144 |

145 | 146 |

147 | 148 | Figure 2. Elastic Container Service communication with Qdrant Cloud using NAT gateway 149 | 150 | ### Elastic IP 151 | 152 | The elastic IP address is a static, IPv4 address designed for dynamic cloud computing. 153 | The elastic IP provides a fixes, public IP address that routes to the NAT gateway. 154 | 155 | Directory: `terraform/network` 156 | 157 | ```terrafom 158 | # Create an Elastic IP address for the NAT Gateway 159 | resource "aws_eip" "nat" { 160 | count = length(var.aws_public_subnet_cidr_blocks) 161 | domain = "vpc" 162 | depends_on = [aws_internet_gateway.main] 163 | } 164 | ``` 165 | ### NAT gateway 166 | The NAT gateway enables instances in a private subnet to connect to the internet, but prevents the internet 167 | from initiating a connection with those instances. We need a NAT gateway to connect to the Qdrant Cloud service. 168 | 169 | Directory: `terraform/network` 170 | 171 | ```terraform 172 | # Create a NAT Gateway for the public subnets 173 | # Required to allow the ECS tasks to access the internet and communicate with Qdrant Cloud 174 | resource "aws_nat_gateway" "nat_gateway" { 175 | count = length(var.aws_public_subnet_cidr_blocks) 176 | allocation_id = element(aws_eip.nat.*.id, count.index) 177 | subnet_id = element(aws_subnet.public_subnets.*.id, count.index) 178 | 179 | tags = { 180 | Name = "nat-gateway" 181 | Subnet = "public" 182 | Application = var.application_name 183 | Environment = var.environment 184 | } 185 | 186 | # To ensure proper ordering, it is recommended to add an explicit dependency 187 | # on the Internet Gateway for the VPC. 188 | depends_on = [aws_internet_gateway.main] 189 | } 190 | ``` 191 | 192 | ## Security groups 193 | 194 | 195 | The security groups controls the imbound and outbound traffic from Load Balancer and 196 | ECS tasks. 197 | 198 | Directory: `terraform/network` 199 | 200 | ```terraform 201 | # Create a security group for the load balancer 202 | resource "aws_security_group" "lb" { 203 | vpc_id = aws_vpc.main.id 204 | name = var.security_group_name_lb 205 | 206 | # This ingress rule allows incoming HTTP traffic. 207 | ingress { 208 | from_port = 80 # Allow port 80 (HTTP) 209 | to_port = 80 # Allow port 80 (HTTP) 210 | protocol = "tcp" # The protocol that should be allowed. 211 | cidr_blocks = ["0.0.0.0/0"] # This allows traffic from any IP address. 212 | } 213 | 214 | # This egress rule allows all outgoing traffic. 215 | egress { 216 | from_port = 0 217 | to_port = 0 218 | protocol = "-1" # This allows all protocols. 219 | cidr_blocks = ["0.0.0.0/0"] # This allows traffic to any IP address. 220 | } 221 | ... 222 | } 223 | ``` 224 | 225 | ```terraform 226 | # Create a security group for the ECS tasks 227 | resource "aws_security_group" "ecs_tasks" { 228 | name = var.security_group_name_ecs_tasks 229 | vpc_id = aws_vpc.main.id 230 | 231 | # Allows incoming TCP traffic on the port specified by var.container_port from the IP 232 | # addresses in the CIDR block specified by var.aws_vpc_cidr_block. 233 | ingress { 234 | from_port = var.container_port 235 | to_port = var.container_port 236 | protocol = "tcp" 237 | cidr_blocks = [var.aws_vpc_cidr_block] 238 | } 239 | 240 | # Allows incoming TCP traffic on port 443 from the IP addresses in 241 | # the CIDR block specified by var.# aws_vpc_cidr_block. 242 | # The security group attached to the VPC endpoint must allow incoming 243 | # connections on TCP port 443 from the private subnet of the VPC. 244 | ingress { 245 | protocol = "tcp" 246 | from_port = 443 # Allow port 443 (HTTPS) 247 | to_port = 443 # Allow port 443 (HTTPS) 248 | cidr_blocks = [var.aws_vpc_cidr_block] 249 | } 250 | 251 | # Allows all outgoing traffic to any IP address (0.0.0.0/0) and any protocol 252 | egress { 253 | from_port = 0 254 | to_port = 0 255 | protocol = "-1" 256 | cidr_blocks = ["0.0.0.0/0"] 257 | } 258 | 259 | # Allows outgoing TCP traffic on port 443 to the destinations specified by the 260 | # prefix list of your S3 VPC endpoint. 261 | egress { 262 | from_port = 443 # Allow port 443 (HTTPS) 263 | to_port = 443 # Allow port 443 (HTTPS) 264 | protocol = "tcp" 265 | prefix_list_ids = [ 266 | aws_vpc_endpoint.s3.prefix_list_id 267 | ] 268 | } 269 | 270 | # Allows outgoing TCP traffic on port 443 to the IP addresses in the CIDR block 271 | # specified by var.aws_vpc_cidr_block. 272 | egress { 273 | from_port = 443 # Allow port 443 (HTTPS) 274 | to_port = 443 # Allow port 443 (HTTPS) 275 | protocol = "tcp" 276 | cidr_blocks = [var.aws_vpc_cidr_block] 277 | } 278 | 279 | ... 280 | } 281 | ``` 282 | 283 | ### Network policy 284 | 285 | ```json 286 | { 287 | "Version": "2012-10-17", 288 | "Statement": [ 289 | { 290 | "Sid": "EC2Permissions", 291 | "Effect": "Allow", 292 | "Action": [ 293 | "ec2:AllocateAddress", 294 | "ec2:AssociateRouteTable", 295 | "ec2:AttachInternetGateway", 296 | "ec2:AuthorizeSecurityGroupEgress", 297 | "ec2:AuthorizeSecurityGroupIngress", 298 | "ec2:CreateDefaultSubnet", 299 | "ec2:CreateDefaultVpc", 300 | "ec2:CreateEgressOnlyInternetGateway", 301 | "ec2:CreateInternetGateway", 302 | "ec2:CreateLocalGatewayRouteTable", 303 | "ec2:CreateNatGateway", 304 | "ec2:CreateRoute", 305 | "ec2:CreateRouteTable", 306 | "ec2:CreateSecurityGroup", 307 | "ec2:CreateSubnet", 308 | "ec2:CreateTags", 309 | "ec2:CreateVpc", 310 | "ec2:DeleteEgressOnlyInternetGateway", 311 | "ec2:DeleteInternetGateway", 312 | "ec2:DeleteLocalGatewayRouteTable", 313 | "ec2:DeleteNatGateway", 314 | "ec2:DeleteRoute", 315 | "ec2:DeleteRouteTable", 316 | "ec2:DeleteSecurityGroup", 317 | "ec2:DeleteSubnet", 318 | "ec2:DeleteVpc", 319 | "ec2:Describe*", 320 | "ec2:DescribeNatGateways", 321 | "ec2:DescribeNetworkAcls", 322 | "ec2:DetachInternetGateway", 323 | "ec2:DisassociateAddress", 324 | "ec2:DisassociateRouteTable", 325 | "ec2:ModifySubnetAttribute", 326 | "ec2:ModifyVpcAttribute", 327 | "ec2:ReleaseAddress", 328 | "ec2:RevokeSecurityGroupEgress", 329 | "ec2:RunInstances" 330 | ], 331 | "Resource": "arn:aws:logs:us-east-1:account-id:*" 332 | } 333 | ] 334 | } 335 | ``` 336 | 337 | ## VPC Endpoints 338 | 339 | VPC endpoints permit to access others AWS services from within the VPC without needing to traverse 340 | the public internet 341 | 342 |

343 | 344 |

345 | 346 | Figure 3. VPC endpoint example 347 | 348 | 349 | The ECR Docker endpoint permits ECS to pull Docker images. This endpoint's network interfaces is created in the private subnets and the security group rules are the same as the ECS tasks. More information: https://docs.aws.amazon.com/AmazonECR/latest/userguide/vpc-endpoints.html 350 | 351 | Directory: `terraform/network` 352 | 353 | ```terraform 354 | resource "aws_vpc_endpoint" "ecr_dkr" { 355 | vpc_id = aws_vpc.main.id 356 | service_name = "com.amazonaws.${var.region}.ecr.dkr" 357 | vpc_endpoint_type = "Interface" 358 | private_dns_enabled = true 359 | subnet_ids = aws_subnet.private_subnets.*.id 360 | 361 | security_group_ids = [ 362 | aws_security_group.ecs_tasks.id, 363 | ] 364 | ... 365 | } 366 | ``` 367 | 368 | The ECR API endpoint permits ECS push and pull Docker images to and from ECR. This endpoint's network 369 | interfaces is created in the private subnets and the security group rules are the same 370 | as the ECS tasks. 371 | 372 | Directory: `terraform/network` 373 | 374 | ```terraform 375 | # Create a VPC Endpoint for ECR API 376 | resource "aws_vpc_endpoint" "ecr_api" { 377 | vpc_id = aws_vpc.main.id 378 | service_name = "com.amazonaws.${var.region}.ecr.api" 379 | vpc_endpoint_type = "Interface" 380 | private_dns_enabled = true 381 | subnet_ids = aws_subnet.private_subnets.*.id 382 | 383 | security_group_ids = [ 384 | aws_security_group.ecs_tasks.id, 385 | ] 386 | 387 | ... 388 | } 389 | ``` 390 | 391 | The Secrets Manager Endpoint allow ECS to get secrets without leave the Amazon network 392 | 393 | Directory: `terraform/network` 394 | 395 | ```terraform 396 | # Create a VPC Endpoint for Secrests Manager 397 | resource "aws_vpc_endpoint" "secretsmanager" { 398 | vpc_id = aws_vpc.main.id 399 | service_name = "com.amazonaws.${var.region}.secretsmanager" 400 | vpc_endpoint_type = "Interface" 401 | private_dns_enabled = true 402 | subnet_ids = aws_subnet.private_subnets.*.id 403 | 404 | security_group_ids = [ 405 | aws_security_group.ecs_tasks.id, 406 | ] 407 | 408 | tags = { 409 | Name = "Secrets Manager VPC Endpoint" 410 | Environment = var.environment 411 | } 412 | } 413 | ``` 414 | 415 | The Cloudwatch endpoint permit to send logs from resources within your VPC to CloudWatch 416 | 417 | Directory: `terraform/network` 418 | 419 | ```terraform 420 | # Create a VPC Endpoint for CloudWatch 421 | resource "aws_vpc_endpoint" "cloudwatch" { 422 | vpc_id = aws_vpc.main.id 423 | service_name = "com.amazonaws.${var.region}.logs" 424 | vpc_endpoint_type = "Interface" 425 | subnet_ids = aws_subnet.private_subnets.*.id 426 | private_dns_enabled = true 427 | 428 | security_group_ids = [ 429 | aws_security_group.ecs_tasks.id, 430 | ] 431 | 432 | ... 433 | } 434 | ``` 435 | 436 | The S3 endpoint permit to access S3 from within your VPC without needing to traverse the public internet. 437 | The gateway endpoint is required because Amazon ECR uses Amazon S3 to store image layers. 438 | 439 | Directory: `terraform/network` 440 | 441 | ```terraform 442 | # Create a VPC Endpoint for S3 443 | resource "aws_vpc_endpoint" "s3" { 444 | vpc_id = aws_vpc.main.id 445 | service_name = "com.amazonaws.${var.region}.s3" 446 | vpc_endpoint_type = "Gateway" 447 | route_table_ids = [aws_vpc.main.default_route_table_id] 448 | 449 | ... 450 | } 451 | ``` 452 | 453 | The Bedrock Endpoint allow ECS to access Bedrock APIs 454 | 455 | Directory: `terraform/network` 456 | 457 | ```terraform 458 | # Create a VPC endpoint for Bedrock 459 | resource "aws_vpc_endpoint" "bedrock" { 460 | vpc_id = aws_vpc.main.id 461 | service_name = "com.amazonaws.${var.region}.bedrock" 462 | vpc_endpoint_type = "Interface" 463 | private_dns_enabled = true 464 | subnet_ids = aws_subnet.private_subnets.*.id 465 | 466 | security_group_ids = [ 467 | aws_security_group.ecs_tasks.id, 468 | ] 469 | 470 | tags = { 471 | Name = "Bedrock VPC Endpoint" 472 | Environment = var.environment 473 | } 474 | } 475 | ``` 476 | 477 | The Bedrock-runtime Endpoint allow ECS to access Bedrock inference API 478 | 479 | Directory: `terraform/network` 480 | 481 | ```terraform 482 | # Create a VPC endpoint for Bedrock runtime 483 | resource "aws_vpc_endpoint" "bedrock_runtime" { 484 | vpc_id = aws_vpc.main.id 485 | service_name = "com.amazonaws.${var.region}.bedrock-runtime" 486 | vpc_endpoint_type = "Interface" 487 | private_dns_enabled = true 488 | subnet_ids = aws_subnet.private_subnets.*.id 489 | 490 | security_group_ids = [ 491 | aws_security_group.ecs_tasks.id, 492 | ] 493 | 494 | tags = { 495 | Name = "Bedrock Runtime VPC Endpoint" 496 | Environment = var.environment 497 | } 498 | } 499 | ``` 500 | 501 | ### VPC endpoint policy 502 | 503 | Set of actions to create VPC endpoints 504 | 505 | ```json 506 | { 507 | "Version": "2012-10-17", 508 | "Statement": [ 509 | { 510 | "Sid": "Statement1", 511 | "Effect": "Allow", 512 | "Action": [ 513 | "ec2:CreateVpcEndpoint", 514 | "ec2:DescribeVpcEndpoints", 515 | "ec2:DeleteVpcEndpoints", 516 | "ec2:ModifyVpcEndpoint" 517 | ], 518 | "Resource": [ 519 | "arn:aws:ec2:us-east-1:account-id:route-table/*", 520 | "arn:aws:ec2:us-east-1:account-id:vpc-endpoint/*", 521 | "arn:aws:ec2:us-east-1:account-id:vpc/*", 522 | "arn:aws:ec2:us-east-1:account-id:subnet/*", 523 | "arn:aws:ec2:us-east-1:account-id:security-group/*" 524 | ] 525 | }, 526 | { 527 | "Sid": "Statement2", 528 | "Effect": "Allow", 529 | "Action": [ 530 | "ec2:RevokeSecurityGroupIngress", 531 | "ec2:AuthorizeSecurityGroupIngress" 532 | ], 533 | "Resource": [ 534 | "arn:aws:ec2:us-east-1:account-id:security-group/*" 535 | ] 536 | } 537 | ] 538 | } 539 | ``` 540 | -------------------------------------------------------------------------------- /docs/s3.md: -------------------------------------------------------------------------------- 1 | # S3 buckets 2 | 3 | ## Terraform state bucket 4 | 5 | Script to upload Terraform state file to AWS S3 6 | 7 | Directory: `scripts/upload_state.sh` 8 | 9 | ```bash 10 | #!/bin/bash 11 | 12 | # This script is used to upload the state file to the S3 bucket 13 | 14 | # Set the variables 15 | BUCKET_NAME="terraform-bucket-state-tf" 16 | STATE_FILE="terraform/terraform.tfstate" 17 | AWS_REGION=$(aws configure get region) 18 | 19 | # Create the S3 bucket if it doesn't exist 20 | aws s3api create-bucket --bucket $BUCKET_NAME --region $AWS_REGION 21 | 22 | # Upload the state file to the S3 bucket 23 | aws s3 cp $STATE_FILE s3://$BUCKET_NAME/state/ 24 | ``` 25 | 26 | ### Policy 27 | 28 | Set of actions to manage Terraform state file 29 | 30 | ```json 31 | { 32 | "Statement": [ 33 | { 34 | "Action": [ 35 | "s3:PutObject", 36 | "s3:ListBucket", 37 | "s3:GetObject", 38 | "s3:DeleteObject" 39 | ], 40 | "Effect": "Allow", 41 | "Resource": [ 42 | "arn:aws:s3:::terraform-bucket-state-tf/*/*", 43 | "arn:aws:s3:::terraform-bucket-state-tf" 44 | ], 45 | "Sid": "bucket" 46 | } 47 | ], 48 | "Version": "2012-10-17" 49 | } 50 | ``` 51 | 52 | ## Documents (PDF) bucket. 53 | 54 | Directory: `terraform/s3` 55 | 56 | Resource to create a new S3 bucket 57 | 58 | ```terraform 59 | # Create an S3 bucket 60 | resource "aws_s3_bucket" "bucket" { 61 | bucket = var.bucket_name 62 | force_destroy = true 63 | 64 | tags = { 65 | Name = "${var.bucket_name} Bucket" 66 | Environment = var.environment 67 | Application = var.application_name 68 | } 69 | } 70 | ``` 71 | 72 | Resource to upload documents from `documents` folder and its subfolders to S3 bucket 73 | 74 | Directory: `terraform/s3` 75 | 76 | ```terraform 77 | # Create an S3 bucket object for each PDF file in the documents directory 78 | resource "aws_s3_object" "object" { 79 | # Recursively look for pdf files inside documents/ 80 | bucket = aws_s3_bucket.bucket.id 81 | for_each = fileset("../documents/${var.subfolder}/", "**/*.pdf") 82 | key = each.value 83 | source = "../documents/${var.subfolder}/${each.value}" 84 | etag = filemd5("../documents/${var.subfolder}/${each.value}") 85 | 86 | tags = { 87 | Name = "${var.bucket_name} Bucket" 88 | Environment = var.environment 89 | Application = var.application_name 90 | } 91 | 92 | depends_on = [ 93 | aws_s3_bucket.bucket, 94 | var.wait_for_lambda_deployment 95 | ] 96 | } 97 | ``` 98 | 99 | ### Policy 100 | 101 | ```json 102 | { 103 | "Version": "2012-10-17", 104 | "Statement": [ 105 | { 106 | "Effect": "Allow", 107 | "Action": [ 108 | "s3:*", 109 | "s3-object-lambda:*" 110 | ], 111 | "Resource": "*" 112 | } 113 | ] 114 | } 115 | ``` -------------------------------------------------------------------------------- /docs/terraform.md: -------------------------------------------------------------------------------- 1 | This tutorial shows how to create a Terraform API Token for GitHub Actions 2 | 3 | 1. Go to https://app.terraform.io/public/signup/account and create an account if you do not have one 4 | 2. Create a new organization 5 | 6 | 3. Got to https://app.terraform.io/app/settings/tokens and create a new API Token 7 | 8 | ![image](https://github.com/mathewsrc/GenerativeAI-Questions-and-Answers-app-with-Bedrock-Langchain-and-FastAPI/assets/94936606/509f508b-ae7b-4165-90fb-c550c111f81f) 9 | 10 | ![image](https://github.com/mathewsrc/GenerativeAI-Questions-and-Answers-app-with-Bedrock-Langchain-and-FastAPI/assets/94936606/b76db7d4-3ebf-40d4-97c6-86110813f6db) 11 | 12 | 4. Go to GitHub -> this project -> Settings -> Secrets and variables -> actions -> `New repository secret` 13 | 14 | ![image](https://github.com/mathewsrc/GenerativeAI-Questions-and-Answers-app-with-Bedrock-Langchain-and-FastAPI/assets/94936606/1c4fe6cc-1f71-4476-b7c9-23e2e24b3670) 15 | 16 | ![image](https://github.com/mathewsrc/GenerativeAI-Questions-and-Answers-app-with-Bedrock-Langchain-and-FastAPI/assets/94936606/4c92ca76-4cc3-4732-9a1c-7fd325f97697) 17 | 18 | 5. Click on `Add secret` 19 | 20 | 21 | 6. Well done! You finished the Terraform setup for GitHub Actions 22 | -------------------------------------------------------------------------------- /docs/vector_store.md: -------------------------------------------------------------------------------- 1 | # Qdrant Vector Store with Lambda and S3 2 | 3 | This project utilizes a Lambda Function with S3 as a trigger to generate a vector store within a 4 | Qdrant cluster. The documents are located in the `documents/` directory. 5 | 6 |

7 | 8 |

9 | 10 | ## Docker Image 11 | 12 | The Lambda Function relies on a Docker image stored in ECR. Here are the details of the image 13 | employed by Lambda: 14 | 15 | Directory: `lambda_functions/docker` 16 | 17 | ```docker 18 | FROM public.ecr.aws/lambda/python:3.12 19 | 20 | # Install dependencies 21 | RUN pip3 install \ 22 | --no-cache-dir \ 23 | --platform manylinux2014_x86_64 \ 24 | --target "${LAMBDA_TASK_ROOT}" \ 25 | --implementation cp \ 26 | --python-version 3.12 \ 27 | --only-binary=:all: --upgrade boto3 \ 28 | langchain \ 29 | langchain-community \ 30 | qdrant-client \ 31 | python-dotenv \ 32 | pypdf 33 | 34 | # Copy function code 35 | COPY ./lambda_functions/src/main.py ${LAMBDA_TASK_ROOT} 36 | COPY ./lambda_functions/src/utils.py ${LAMBDA_TASK_ROOT} 37 | COPY ./lambda_functions/src/create_vector_store.py ${LAMBDA_TASK_ROOT} 38 | 39 | # Set the CMD to your handler 40 | CMD [ "main.lambda_handler" ] 41 | ``` 42 | 43 | This Docker image uses a base image provided by AWS which comes with `python 3.12` as runtime. 44 | The dependencies are installed into a directory provided by the base image as well the Python code 45 | required. The CMD is set to call the `main.py` module and its entrypoint function `lambda_handle(event, context)`. 46 | The `lambda_handler(event, context)` function call the `create_vectorstore(...)` from `create_vector_store` module. 47 | 48 | Directory: `lambda_functions/src` 49 | 50 | ```python 51 | import json 52 | from create_vector_store import create_vectostore 53 | import os 54 | from dotenv import load_dotenv 55 | import os 56 | 57 | load_dotenv() 58 | 59 | EMBEDDING_MODEL = "amazon.titan-embed-text-v1" 60 | COLLECTION_NAME = "cnu" 61 | 62 | QDRANT_URL = os.environ.get("QDRANT_URL") 63 | QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY") 64 | BUCKET_NAME = os.environ.get("BUCKET_NAME") 65 | AWS_REGION = os.environ.get("AWS_REGION") 66 | 67 | 68 | def lambda_handler(event, context): 69 | bucket_name = event["Records"][0]["s3"]["bucket"]["name"] 70 | object_key = event["Records"][0]["s3"]["object"]["key"] 71 | 72 | create_vectostore( 73 | url=QDRANT_URL, 74 | api_key=QDRANT_API_KEY, 75 | bucket_name=bucket_name, 76 | region_name=AWS_REGION, 77 | object_key=object_key, 78 | collection_name=COLLECTION_NAME, 79 | embedding_model=EMBEDDING_MODEL, 80 | ) 81 | return {"statusCode": 200, "body": json.dumps("Succesfull!")} 82 | ``` 83 | 84 | 85 | 86 | The `create_vectorstore(...)` function initiates the get_documents_from_pdf function, which, in turn, downloads the uploaded PDF file from S3 to the Lambda `/tmp` directory—the exclusive location with write permissions. Subsequently, it divides the document into smaller sections, referred to as chunks, converts these chunks into a vector representation, also known as embedding, and uploads the resulting data to Qdrant Cloud via Langchain. 87 | 88 | Directory: `lambda_functions/src` 89 | 90 | ```python 91 | 92 | def create_vectorstore(...) -> None: 93 | 94 | get_documents_from_pdf(...) 95 | 96 | ... 97 | 98 | embeddings = get_embeddings(...) 99 | 100 | vectorstore = Qdrant.from_documents( 101 | documents=docs, 102 | embedding=embeddings, 103 | url=url, 104 | prefer_grpc=True, 105 | api_key=api_key, 106 | collection_name=collection_name, 107 | force_recreate=False, 108 | ) 109 | 110 | def get_documents_from_pdf( 111 | bucket_name: str, key: str, collection_name: str, region_name: str 112 | ) -> list: 113 | 114 | ... 115 | 116 | s3.download_file(bucket_name, key, f"/tmp/{s3_object_name}") 117 | 118 | loader = PyPDFLoader(f"/tmp/{s3_object_name}") 119 | documents = loader.load() 120 | 121 | # Split documents 122 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) 123 | docs = text_splitter.split_documents(documents) 124 | logger.info(f"Number of documents after split: {len(docs)}") 125 | return docs 126 | ``` 127 | 128 | ## Lambda Function Policy 129 | 130 | The Lambda Function requires permission to interact with other services. These include permissions 131 | to access CloudWatch for monitoring, S3 for downloading PDF files, and Bedrock for invoking the 132 | Amazon `amazon.titan-embed-text-v1` model for embedding documents. 133 | 134 | **Terraform Policy Document** 135 | 136 | Directory: `terraform/lambda_functions` 137 | 138 | ```terraform 139 | data "aws_iam_policy_document" "lambda_policy" { 140 | statement { 141 | effect = "Allow" 142 | actions = ["sts:AssumeRole"] 143 | resources = ["*"] 144 | } 145 | 146 | statement { 147 | sid = "CloudWatchAccess" 148 | effect = "Allow" 149 | 150 | actions = [ 151 | "logs:CreateLogGroup", 152 | "logs:CreateLogStream", 153 | "logs:PutLogEvents", 154 | ] 155 | 156 | resources = ["arn:aws:logs:${data.aws_region.current.name}:*:*"] 157 | } 158 | 159 | statement { 160 | sid = "S3Access" 161 | 162 | effect = "Allow" 163 | 164 | actions = [ 165 | "s3:GetObject", 166 | "s3:PutObject", 167 | "s3:ListBucket", 168 | "s3:DeleteObject" 169 | ] 170 | 171 | resources = [ 172 | "arn:aws:s3:::*", 173 | ] 174 | } 175 | 176 | statement { 177 | sid = "BedrockAccess" 178 | actions = ["bedrock:InvokeModel", "bedrock:ListCustomModels", "bedrock:ListFoundationModels"] 179 | resources = ["arn:aws:bedrock:*::foundation-model/*"] 180 | } 181 | } 182 | ``` 183 | 184 | ## User Policy 185 | 186 | Set of actions to create Lambda Functions 187 | 188 | ```json 189 | { 190 | "Version": "2012-10-17", 191 | "Statement": [ 192 | { 193 | "Effect": "Allow", 194 | "Action": [ 195 | "cloudformation:DescribeStacks", 196 | "cloudformation:ListStackResources", 197 | "cloudwatch:ListMetrics", 198 | "cloudwatch:GetMetricData", 199 | "ec2:DescribeSecurityGroups", 200 | "ec2:DescribeSubnets", 201 | "ec2:DescribeVpcs", 202 | "kms:ListAliases", 203 | "iam:GetPolicy", 204 | "iam:GetPolicyVersion", 205 | "iam:GetRole", 206 | "iam:GetRolePolicy", 207 | "iam:ListAttachedRolePolicies", 208 | "iam:ListRolePolicies", 209 | "iam:ListRoles", 210 | "lambda:*", 211 | "logs:DescribeLogGroups", 212 | "states:DescribeStateMachine", 213 | "states:ListStateMachines", 214 | "tag:GetResources", 215 | "xray:GetTraceSummaries", 216 | "xray:BatchGetTraces" 217 | ], 218 | "Resource": "*" 219 | }, 220 | { 221 | "Effect": "Allow", 222 | "Action": "iam:PassRole", 223 | "Resource": "*", 224 | "Condition": { 225 | "StringEquals": { 226 | "iam:PassedToService": "lambda.amazonaws.com" 227 | } 228 | } 229 | }, 230 | { 231 | "Effect": "Allow", 232 | "Action": [ 233 | "logs:DescribeLogStreams", 234 | "logs:GetLogEvents", 235 | "logs:FilterLogEvents" 236 | ], 237 | "Resource": "arn:aws:logs:*:account-id:log-group:/aws/lambda/*" 238 | } 239 | ] 240 | } 241 | ``` 242 | -------------------------------------------------------------------------------- /documents/cnu/edital-cpnu-bloco-1-10jan2024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/documents/cnu/edital-cpnu-bloco-1-10jan2024.pdf -------------------------------------------------------------------------------- /documents/cnu/edital-cpnu-bloco-3-10jan2024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/documents/cnu/edital-cpnu-bloco-3-10jan2024.pdf -------------------------------------------------------------------------------- /documents/cnu/edital-cpnu-bloco-4-10jan2024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/documents/cnu/edital-cpnu-bloco-4-10jan2024.pdf -------------------------------------------------------------------------------- /documents/cnu/edital-cpnu-bloco-5-10jan2024.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/documents/cnu/edital-cpnu-bloco-5-10jan2024.txt -------------------------------------------------------------------------------- /documents/cnu/edital-cpnu-bloco-6-10jan2024.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/documents/cnu/edital-cpnu-bloco-6-10jan2024.txt -------------------------------------------------------------------------------- /documents/cnu/edital-cpnu-bloco-7-10jan2024.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/documents/cnu/edital-cpnu-bloco-7-10jan2024.txt -------------------------------------------------------------------------------- /documents/cnu/edital-cpnu-bloco-8-10jan2024.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/documents/cnu/edital-cpnu-bloco-8-10jan2024.txt -------------------------------------------------------------------------------- /documents/immigration/welcome_ca.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/documents/immigration/welcome_ca.pdf -------------------------------------------------------------------------------- /documents/immigration/welcome_eua.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/documents/immigration/welcome_eua.pdf -------------------------------------------------------------------------------- /files/lambda_payload.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/files/lambda_payload.zip -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for DIR in */; do 4 | DIRNAME=$(basename "$DIR") 5 | echo "==> $DIRNAME <==" 6 | (cd $DIR && poetry run ruff format .) 7 | done 8 | 9 | echo "Format complete." -------------------------------------------------------------------------------- /lambda_functions/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.12 2 | 3 | # Install dependencies 4 | RUN pip3 install \ 5 | --no-cache-dir \ 6 | --platform manylinux2014_x86_64 \ 7 | --target "${LAMBDA_TASK_ROOT}" \ 8 | --implementation cp \ 9 | --python-version 3.12 \ 10 | --only-binary=:all: --upgrade boto3 \ 11 | langchain \ 12 | langchain-community \ 13 | qdrant-client \ 14 | python-dotenv \ 15 | pypdf 16 | 17 | # Copy function code 18 | COPY ./lambda_functions/src/main.py ${LAMBDA_TASK_ROOT} 19 | COPY ./lambda_functions/src/utils.py ${LAMBDA_TASK_ROOT} 20 | COPY ./lambda_functions/src/create_vector_store.py ${LAMBDA_TASK_ROOT} 21 | 22 | # Set the CMD to your handler 23 | CMD [ "main.lambda_handler" ] -------------------------------------------------------------------------------- /lambda_functions/src/create_vector_store.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | import logging 4 | from langchain.text_splitter import RecursiveCharacterTextSplitter 5 | from langchain_community.document_loaders import PyPDFLoader 6 | from langchain_community.vectorstores.qdrant import Qdrant 7 | from qdrant_client import QdrantClient 8 | from utils import Embedding, Embeddings, get_embeddings 9 | 10 | # Constants 11 | AWS_EMBEDDINGS = ["amazon.titan-embed-text-v1"] 12 | 13 | # AWS S3 client 14 | s3 = boto3.client("s3") 15 | 16 | # Logging setup 17 | logger = logging.getLogger() 18 | logger.setLevel(logging.INFO) 19 | 20 | def get_client(url: str, api_key: str) -> QdrantClient: 21 | """Create and return a Qdrant client.""" 22 | return QdrantClient(url=url, api_key=api_key) 23 | 24 | def get_collection_info(url: str, api_key: str, collection_name: str) -> None: 25 | """Log the collection info.""" 26 | client = get_client(url, api_key) 27 | info = client.get_collection(collection_name=collection_name) 28 | logger.info(json.dumps(f"Collection info\n: {info}", indent=4)) 29 | 30 | def get_documents_from_pdf(bucket_name: str, key: str) -> list: 31 | """Download a PDF from S3, load it, and split it into documents.""" 32 | s3_object_name = key.split("/")[-1] 33 | 34 | logger.info(f"Downloading file from s3://{bucket_name}/{key}") 35 | logger.info(f"File name: {s3_object_name}") 36 | 37 | s3.download_file(bucket_name, key, f"/tmp/{s3_object_name}") 38 | 39 | loader = PyPDFLoader(f"/tmp/{s3_object_name}") 40 | documents = loader.load() 41 | 42 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) 43 | docs = text_splitter.split_documents(documents) 44 | logger.info(f"Number of documents after split: {len(docs)}") 45 | return docs 46 | 47 | def create_vectorstore( 48 | url: str, 49 | api_key: str, 50 | bucket_name: str, 51 | object_key: str, 52 | collection_name: str, 53 | region_name: str, 54 | embedding_model: str, 55 | ) -> None: 56 | """Create a vector store from a PDF in an S3 bucket.""" 57 | docs = get_documents_from_pdf(bucket_name=bucket_name, key=object_key) 58 | 59 | embeddings = get_embeddings( 60 | embedding=Embedding(embeddings=Embeddings.BEDROCK, model_name=embedding_model), 61 | region_name=region_name, 62 | ) 63 | logger.info("Creating collection...") 64 | 65 | vectorstore = Qdrant.from_documents( 66 | documents=docs, 67 | embedding=embeddings, 68 | url=url, 69 | prefer_grpc=True, 70 | api_key=api_key, 71 | collection_name=collection_name, 72 | force_recreate=False, 73 | ) 74 | vectorstore.client.close() 75 | get_collection_info(url, api_key, collection_name) -------------------------------------------------------------------------------- /lambda_functions/src/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from dotenv import load_dotenv 4 | from create_vector_store import create_vectorstore 5 | 6 | # Load environment variables 7 | load_dotenv() 8 | 9 | EMBEDDING_MODEL = "amazon.titan-embed-text-v1" 10 | COLLECTION_NAME = "cnu" 11 | 12 | # Environment variables 13 | QDRANT_URL = os.getenv("QDRANT_URL_AWS") 14 | QDRANT_API_KEY = os.getenv("QDRANT_API_KEY_AWS") 15 | BUCKET_NAME = os.getenv("BUCKET_NAME") 16 | AWS_REGION = os.getenv("REGION") 17 | 18 | def lambda_handler(event, context): 19 | """AWS Lambda function handler.""" 20 | # Extract bucket name and object key from event 21 | bucket_name = event["Records"][0]["s3"]["bucket"]["name"] 22 | object_key = event["Records"][0]["s3"]["object"]["key"] 23 | 24 | # Create vector store 25 | create_vectorstore( 26 | url=QDRANT_URL, 27 | api_key=QDRANT_API_KEY, 28 | bucket_name=bucket_name, 29 | region_name=AWS_REGION, 30 | object_key=object_key, 31 | collection_name=COLLECTION_NAME, 32 | embedding_model=EMBEDDING_MODEL, 33 | ) 34 | 35 | return {"statusCode": 200, "body": json.dumps("Successful!")} -------------------------------------------------------------------------------- /lambda_functions/src/utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | from dataclasses import dataclass 4 | from enum import Enum 5 | from langchain_community.embeddings import BedrockEmbeddings 6 | 7 | # Logging setup 8 | logger = logging.getLogger() 9 | logger.setLevel(logging.INFO) 10 | 11 | class Embeddings(Enum): 12 | """Enum for different types of embeddings.""" 13 | BEDROCK = "BedrockEmbeddings" 14 | 15 | @dataclass 16 | class Embedding: 17 | """Dataclass for embedding details.""" 18 | embeddings: Embeddings 19 | model_name: str 20 | 21 | def get_bedrock_embeddings(model_name: str, region_name: str) -> BedrockEmbeddings: 22 | """Create and return Bedrock embeddings.""" 23 | bedrock_runtime = boto3.client("bedrock-runtime", region_name=region_name) 24 | embeddings = BedrockEmbeddings(client=bedrock_runtime, model_id=model_name) 25 | logger.info("Embedding finished!") 26 | return embeddings 27 | 28 | def get_embeddings(embedding: Embedding, region_name: str): 29 | """Get embeddings based on the embedding type.""" 30 | logger.info(f"Embedding model: {embedding.model_name}") 31 | logger.info(f"Embedding type: {embedding.embeddings}") 32 | if embedding.embeddings == Embeddings.BEDROCK: 33 | return get_bedrock_embeddings(embedding.model_name, region_name=region_name) 34 | else: 35 | raise ValueError("Invalid embedding type") -------------------------------------------------------------------------------- /lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for DIR in */; do 4 | DIRNAME=$(basename "$DIR") 5 | echo "==> $DIRNAME <==" 6 | (cd $DIR && poetry run ruff check . --fix) 7 | done 8 | 9 | echo "Format complete." -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "generativeai-questions-and-answers-app-with-bedrock-langchain-and-fastapi" 3 | version = "0.1.0" 4 | description = "GenerativeAI Question and Answer application" 5 | authors = ["Matheus Ribeiro <94936606+mathewsrc@users.noreply.github.com>"] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.12" 10 | langchain = "^0.1.0" 11 | awscli = "^1.32.14" 12 | pytest = "^7.4.4" 13 | ruff = "^0.1.11" 14 | click = "^8.1.7" 15 | pypdf = "^3.17.4" 16 | fastapi = "^0.108.0" 17 | uvicorn = "^0.25.0" 18 | pytest-cov = "^4.1.0" 19 | joblib = "^1.3.2" 20 | pydantic = "^2.5.3" 21 | langchain-community = "^0.0.11" 22 | lancedb = "^0.4.4" 23 | qdrant-client = "^1.7.0" 24 | python-dotenv = "^1.0.1" 25 | boto3 = "^1.34.37" 26 | botocore = "^1.34.37" 27 | s3transfer = "^0.10.0" 28 | nltk = "^3.8.1" 29 | 30 | 31 | [build-system] 32 | requires = ["poetry-core"] 33 | build-backend = "poetry.core.masonry.api" 34 | 35 | [tool.ruff] 36 | line-length = 100 37 | 38 | [tool.ruff.lint] 39 | # 2. Avoid enforcing line-length violations (`E501`) 40 | #ignore = ["E501"] 41 | extend-safe-fixes = ["F541"] 42 | 43 | [tool.ruff.format] 44 | quote-style = "double" 45 | indent-style = "tab" 46 | docstring-code-format = true 47 | line-ending = "auto" 48 | -------------------------------------------------------------------------------- /scripts/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # Exit if any command fails 3 | 4 | # This script is used to deploy the application 5 | 6 | # Get region and account id using aws cli 7 | AWS_REGION=$(aws configure get region) 8 | AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) 9 | AWS_ECR_REPOSITORY_NAME=ecs-repo # Replace with your ECR repository name 10 | AWS_ECR_REPOSITORY_URL=$AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$AWS_ECR_REPOSITORY_NAME 11 | TAG=$(git rev-parse HEAD) # Get current short commit hash 12 | 13 | # Login to AWS ECR 14 | echo "Logging in to AWS ECR..." 15 | aws ecr get-login-password \ 16 | --region $AWS_REGION | docker login \ 17 | --username AWS \ 18 | --password-stdin $AWS_ECR_REPOSITORY_URL 19 | 20 | # Build the Docker image 21 | echo "Building Docker image..." 22 | docker build -t $AWS_ECR_REPOSITORY_NAME . 23 | 24 | Check if the ECR repository exists 25 | echo "Checking if ECR repository exists..." 26 | if aws ecr describe-repositories \ 27 | --repository-names $AWS_ECR_REPOSITORY_NAME \ 28 | --region $AWS_REGION > /dev/null 2>&1; then 29 | echo "ECR repository exists, deleting..." 30 | aws ecr delete-repository \ 31 | --repository-name $AWS_ECR_REPOSITORY_NAME \ 32 | --region $AWS_REGION \ 33 | --force 34 | fi 35 | 36 | # Create the ECR repository 37 | echo "Creating ECR repository..." 38 | aws ecr create-repository \ 39 | --repository-name $AWS_ECR_REPOSITORY_NAME \ 40 | --region $AWS_REGION \ 41 | --image-scanning-configuration scanOnPush=true \ 42 | --image-tag-mutability MUTABLE \ 43 | --no-cli-pager 44 | 45 | # Tag the Docker image 46 | echo "Tagging Docker image..." 47 | docker tag $AWS_ECR_REPOSITORY_NAME:latest $AWS_ECR_REPOSITORY_URL:$TAG 48 | 49 | # Push the Docker image 50 | echo "Pushing Docker image..." 51 | docker push $AWS_ECR_REPOSITORY_URL:$TAG 52 | 53 | echo $AWS_ECR_REPOSITORY_URL:$TAG 54 | echo "image=$AWS_ECR_REPOSITORY_NAME/$AWS_ECR_REPOSITORY_URL:$TAG" >> $GITHUB_OUTPUT 55 | 56 | -------------------------------------------------------------------------------- /scripts/deploy_lambda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # Exit if any command fails 3 | 4 | # This script is used to deploy the application 5 | 6 | # Get region and account id using aws cli 7 | AWS_REGION=$(aws configure get region) 8 | AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) 9 | AWS_ECR_REPOSITORY_NAME=lambda-repo # Replace with your ECR repository name 10 | AWS_ECR_REPOSITORY_URL=$AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$AWS_ECR_REPOSITORY_NAME 11 | TAG=$(git rev-parse HEAD) # Get current short commit hash 12 | 13 | # Login to AWS ECR 14 | echo "Logging in to AWS ECR..." 15 | aws ecr get-login-password \ 16 | --region $AWS_REGION | docker login \ 17 | --username AWS \ 18 | --password-stdin $AWS_ECR_REPOSITORY_URL 19 | 20 | # Build the Docker image 21 | echo "Building Docker image..." 22 | docker build --platform linux/amd64 -t $AWS_ECR_REPOSITORY_NAME -f ./lambda_functions/docker/Dockerfile . 23 | 24 | # Check if the ECR repository exists 25 | echo "Checking if ECR repository exists..." 26 | if aws ecr describe-repositories \ 27 | --repository-names $AWS_ECR_REPOSITORY_NAME \ 28 | --region $AWS_REGION > /dev/null 2>&1; then 29 | echo "ECR repository exists, deleting..." 30 | aws ecr delete-repository \ 31 | --repository-name $AWS_ECR_REPOSITORY_NAME \ 32 | --region $AWS_REGION \ 33 | --force 34 | fi 35 | 36 | # Create the ECR repository 37 | echo "Creating ECR repository..." 38 | aws ecr create-repository \ 39 | --repository-name $AWS_ECR_REPOSITORY_NAME \ 40 | --region $AWS_REGION \ 41 | --image-scanning-configuration scanOnPush=true \ 42 | --image-tag-mutability MUTABLE \ 43 | --no-cli-pager 44 | 45 | # Tag the Docker image 46 | echo "Tagging Docker image..." 47 | docker tag $AWS_ECR_REPOSITORY_NAME:latest $AWS_ECR_REPOSITORY_URL:$TAG 48 | 49 | # Push the Docker image 50 | echo "Pushing Docker image..." 51 | docker push $AWS_ECR_REPOSITORY_URL:$TAG 52 | -------------------------------------------------------------------------------- /scripts/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # check if Environment variables are not set, ask user to enter them 4 | if [ -z "$QDRANT_URL" ]; then 5 | echo "Please enter QDRANT_URL" 6 | read QDRANT_URL 7 | fi 8 | 9 | if [ -z "$QDRANT_API_KEY" ]; then 10 | echo "Please enter QDRANT_API_KEY" 11 | read QDRANT_API_KEY 12 | fi 13 | 14 | if [ -z "$AWS_ACCESS_KEY_ID" ]; then 15 | echo "Please enter AWS_ACCESS_KEY_ID" 16 | read AWS_ACCESS_KEY_ID 17 | fi 18 | 19 | if [ -z "$AWS_SECRET_ACCESS_KEY" ]; then 20 | echo "Please enter AWS_SECRET_ACCESS_KEY" 21 | read AWS_SECRET_ACCESS_KEY 22 | fi 23 | 24 | docker run -p 80:80 \ 25 | -e QDRANT_URL=$QDRANT_URL \ 26 | -e QDRANT_API_KEY=$QDRANT_API_KEY \ 27 | -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ 28 | -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ 29 | -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION \ 30 | app -------------------------------------------------------------------------------- /scripts/install_aws_cli.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is used to install the AWS CLI 4 | 5 | # Install the AWS CLI 6 | curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" 7 | 8 | # Unzip the AWS CLI 9 | unzip awscliv2.zip 10 | 11 | # Install the AWS CLI 12 | sudo ./aws/install 13 | 14 | # Clean up the files 15 | rm -rf awscliv2.zip aws 16 | 17 | # Verify the AWS CLI 18 | aws --version 19 | 20 | # Configure the AWS CLI 21 | #aws configure -------------------------------------------------------------------------------- /scripts/install_graph_viz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is used to install Graphviz 4 | 5 | sudo apt install graphviz 6 | -------------------------------------------------------------------------------- /scripts/install_poetry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is used to install Poetry 4 | 5 | # Install Poetry 6 | pipx install --force poetry 7 | 8 | # Enable tab completion for Bash 9 | poetry completions bash >> ~/.bash_completion 10 | 11 | # Init Poetry 12 | poetry init 13 | 14 | # Install Poetry dependencies 15 | poetry install 16 | 17 | # Check Poetry version 18 | poetry --version -------------------------------------------------------------------------------- /scripts/install_terraform.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is used to install Terraform 4 | 5 | # Install Terraform by HashiCorp 6 | wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg 7 | 8 | # Add the official HashiCorp Linux repository 9 | echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list 10 | 11 | # Update and install 12 | sudo apt update && sudo apt install terraform 13 | 14 | # Verify the installation 15 | terraform --version -------------------------------------------------------------------------------- /scripts/package_lambda_layer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create a directory for the python [place the libraries in the /python or python/lib/python3.x/site-packages folders] 4 | mkdir -p temp/python 5 | 6 | # Install dependencies 7 | RUN pip3 install \ 8 | --no-cache-dir \ 9 | --platform manylinux2014_x86_64 \ 10 | --target ./temp/python \ 11 | --implementation cp \ 12 | --python-version 3.12 \ 13 | --only-binary=:all: --upgrade boto3 \ 14 | langchain \ 15 | langchain-community \ 16 | qdrant-client \ 17 | python-dotenv \ 18 | numpy \ 19 | pypdf 20 | 21 | echo "Installed dependencies" 22 | 23 | # Create a layer .zip file with the installed libraries at the root [Not required if you are using Terraform] 24 | zip -r lambda_layer.zip ./temp/python 25 | -------------------------------------------------------------------------------- /scripts/set_secrets.sh: -------------------------------------------------------------------------------- 1 | 2 | echo "Enter QDRANT_URL:" 3 | read QDRANT_URL 4 | echo "Enter QDRANT_API_KEY:" 5 | read QDRANT_API_KEY 6 | aws secretsmanager put-secret-value --secret-id prod/qdrant_url --secret-string $QDRANT_URL 7 | aws secretsmanager put-secret-value --secret-id prod/qdrant_api_key --secret-string $QDRANT_API_KEY -------------------------------------------------------------------------------- /scripts/terraform_fmt.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | for DIR in */; do 4 | DIRNAME=$(basename "$DIR") 5 | echo "==> $DIRNAME <==" 6 | (cd $DIR && terraform fmt) 7 | done 8 | 9 | echo "Format complete." -------------------------------------------------------------------------------- /scripts/terraform_init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | AWS_REGION=$(aws configure get region) && \ 3 | AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) && \ 4 | cd terraform && terraform init \ 5 | -backend-config="region=$AWS_REGION" \ 6 | -backend-config='assume_role={"role_arn":"arn:aws:iam::'$AWS_ACCOUNT_ID':role/terraform_state_role"}' 7 | -------------------------------------------------------------------------------- /scripts/terraform_migrate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | AWS_REGION=$(aws configure get region) && \ 4 | AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) && \ 5 | cd terraform && terraform init \ 6 | -backend-config="region=$AWS_REGION" \ 7 | -backend-config='assume_role={"role_arn":"arn:aws:iam::'$AWS_ACCOUNT_ID':role/terraform_state_role"}' \ 8 | -migrate-state && \ 9 | terraform refresh -------------------------------------------------------------------------------- /scripts/upload_state.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is used to upload the state file to the S3 bucket 4 | 5 | # Set the variables 6 | BUCKET_NAME="terraform-bucket-state-tf" 7 | STATE_FILE="terraform/terraform.tfstate" 8 | AWS_REGION=$(aws configure get region) 9 | 10 | # Create the S3 bucket if it doesn't exist 11 | aws s3api create-bucket --bucket $BUCKET_NAME --region $AWS_REGION 12 | 13 | # Upload the state file to the S3 bucket 14 | aws s3 cp $STATE_FILE s3://$BUCKET_NAME/state/ -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/src/__init__.py -------------------------------------------------------------------------------- /src/app/main.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | import os 4 | import time 5 | from botocore.exceptions import ClientError 6 | from dotenv import load_dotenv 7 | from fastapi import FastAPI, HTTPException 8 | from fastapi.responses import HTMLResponse 9 | from langchain.chains import RetrievalQA 10 | from langchain.prompts import PromptTemplate 11 | from langchain_community.embeddings.bedrock import BedrockEmbeddings 12 | from langchain_community.llms.bedrock import Bedrock 13 | from langchain_community.vectorstores.qdrant import Qdrant 14 | from pydantic import BaseModel 15 | from qdrant_client import AsyncQdrantClient, QdrantClient 16 | 17 | COLLECTION_NAME = "cnu" 18 | BEDROCK_MODEL_NAME = "anthropic.claude-v2" 19 | BEDROCK_EMBEDDINGS_MODEL_NAME = "amazon.titan-embed-text-v1" 20 | AWS_DEFAULT_REGION = "us-east-1" 21 | 22 | # Logging setup 23 | logger = logging.getLogger() 24 | logger.setLevel(logging.INFO) 25 | 26 | app = FastAPI() 27 | 28 | class Body(BaseModel): 29 | text: str 30 | temperature: float = 0.5 31 | 32 | # Load environment variables 33 | load_dotenv() 34 | 35 | session = boto3.Session(region_name=AWS_DEFAULT_REGION) 36 | 37 | prompt_template = """ 38 | Use the following pieces of context to provide a concise answer to the question at the end. 39 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 40 | Human: {question} 41 | {context} 42 | Assistant: 43 | """ 44 | 45 | def get_secret(secret_name): 46 | """ Get the secret from AWS Secrets Manager.""" 47 | client = session.client(service_name="secretsmanager", region_name=AWS_DEFAULT_REGION) 48 | try: 49 | get_secret_value_response = client.get_secret_value(SecretId=secret_name) 50 | except ClientError as e: 51 | raise e 52 | secret = get_secret_value_response["SecretString"] 53 | if not isinstance(secret, str): 54 | secret = str(secret) 55 | return secret 56 | 57 | def get_bedrock_embeddings(model_name: str, bedrock_runtime) -> BedrockEmbeddings: 58 | """Get the Bedrock embeddings Model""" 59 | embeddings = BedrockEmbeddings(client=bedrock_runtime, model_id=model_name) 60 | return embeddings 61 | 62 | @app.get("/", response_class=HTMLResponse) 63 | async def root(): 64 | """Root endpoint.""" 65 | return HTMLResponse( 66 | """ 67 |

Welcome to our Question/Answering application where you can ask 68 | questions about the Concurso Nacional Unificado


69 |

Use the /ask endpoint to ask a question.

70 | """ 71 | ) 72 | 73 | @app.get("/collectioninfo") 74 | async def collection_info(): 75 | """Get the collection info from Qdrant.""" 76 | try: 77 | qdrant_url = os.getenv("QDRANT_URL") or get_secret("prod/qdrant_url") 78 | qdrant_api_key = os.getenv("QDRANT_API_KEY") or get_secret("prod/qdrant_api_key") 79 | 80 | async_client = AsyncQdrantClient( 81 | url=qdrant_url, 82 | api_key=qdrant_api_key, 83 | port=6333, 84 | grpc_port=6334, 85 | timeout=10 86 | ) 87 | 88 | info = await async_client.get_collection(collection_name=COLLECTION_NAME) 89 | 90 | logger.info(f"Collection info: {info}") 91 | return {"collection_info": info} 92 | except Exception as e: 93 | logger.error(f"Error: {e}") 94 | raise HTTPException(status_code=500, detail=f"Error getting collection info:{e}") 95 | 96 | @app.post("/ask") 97 | async def question(body: Body): 98 | """Ask a question and get an answer from the model.""" 99 | try: 100 | start_time = time.time() 101 | 102 | qdrant_url = os.getenv("QDRANT_URL") or get_secret("prod/qdrant_url") 103 | qdrant_api_key = os.getenv("QDRANT_API_KEY") or get_secret("prod/qdrant_api_key") 104 | 105 | bedrock_runtime = boto3.client("bedrock-runtime", region_name=AWS_DEFAULT_REGION) 106 | 107 | async_client = AsyncQdrantClient( 108 | url=qdrant_url, 109 | api_key=qdrant_api_key, 110 | port=6333, 111 | grpc_port=6334 112 | ) 113 | 114 | client = QdrantClient( 115 | url=qdrant_url, 116 | api_key=qdrant_api_key, 117 | port=6333, 118 | grpc_port=6334 119 | ) 120 | 121 | logger.info("Qdrant client created successfully") 122 | 123 | embeddings = get_bedrock_embeddings(BEDROCK_EMBEDDINGS_MODEL_NAME, bedrock_runtime) 124 | 125 | qdrant = Qdrant( 126 | client=client, 127 | async_client=async_client, 128 | embeddings=embeddings, 129 | collection_name=COLLECTION_NAME, 130 | ).as_retriever(search_kwargs={"k": 2}) 131 | 132 | prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) 133 | 134 | inference_modifier = { 135 | "max_tokens_to_sample": 100, 136 | "temperature": body.temperature, 137 | "top_k": 250, 138 | "top_p": 1, 139 | "stop_sequences": ["\n\nHuman"], 140 | } 141 | 142 | llm = Bedrock( 143 | model_id=BEDROCK_MODEL_NAME, client=bedrock_runtime, model_kwargs=inference_modifier 144 | ) 145 | 146 | qa = RetrievalQA.from_chain_type( 147 | llm=llm, 148 | chain_type="stuff", 149 | retriever=qdrant, 150 | return_source_documents=False, 151 | chain_type_kwargs={"prompt": prompt, "verbose": False}, 152 | ) 153 | 154 | logger.info("Invoking the model") 155 | 156 | result = await qa.ainvoke(input={"query": body.text}) 157 | answer = result["result"] 158 | elapsed_time = time.time() - start_time 159 | 160 | logger.info(f"{elapsed_time:.2f} seconds to complete.") 161 | return {"answer": answer,"time": f"{elapsed_time:.2f} seconds."} 162 | except Exception as e: 163 | logger.error(f"Error: {e}") 164 | raise HTTPException(status_code=500, detail=str(e)) -------------------------------------------------------------------------------- /src/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/src/cli/__init__.py -------------------------------------------------------------------------------- /src/cli/bedrock_cli.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import click 3 | import json 4 | from langchain.llms.bedrock import Bedrock 5 | from langchain_community.vectorstores import Qdrant 6 | from langchain.prompts import PromptTemplate 7 | from langchain.chains import RetrievalQA 8 | import os 9 | from dotenv import load_dotenv 10 | from utils import ( 11 | get_embeddings, 12 | get_client, 13 | get_prompt, 14 | Embedding, 15 | Embeddings, 16 | aws_embeddings, 17 | aws_llm, 18 | ) 19 | 20 | load_dotenv() 21 | 22 | QDRANT_URL = os.environ.get("QDRANT_URL") 23 | QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY") 24 | AWS_REGION = os.environ.get("AWS_DEFAULT_REGION") 25 | COLLECTION_NAME = os.environ.get("COLLECTION_NAME") 26 | 27 | boto_session = boto3.Session(region_name=AWS_REGION) 28 | 29 | # bedrock 30 | bedrock_models = boto3.client("bedrock", region_name=AWS_REGION) 31 | bedrock_runtime = boto3.client("bedrock-runtime", region_name=AWS_REGION) 32 | 33 | 34 | @click.group() 35 | def cli(): 36 | pass 37 | 38 | 39 | @cli.command("list-models") 40 | @click.option("--by-provider", help="Filter by provider") 41 | @click.option( 42 | "--by-output-modality", 43 | help="Filter by output modality", 44 | type=click.Choice(["TEXT", "EMBEDDING"], case_sensitive=True), 45 | ) 46 | def list_foundation_models(by_provider=None, by_output_modality=None): 47 | if by_provider is not None and by_output_modality is None: 48 | models = bedrock_models.list_foundation_models(byProvider=by_provider) 49 | click.echo(click.style(f"Models: {json.dumps(models, indent=4)}", fg="green")) 50 | elif by_output_modality is not None and by_provider is None: 51 | models = bedrock_models.list_foundation_models(byOutputModality=by_output_modality) 52 | click.echo(click.style(f"Models: {json.dumps(models, indent=4)}", fg="green")) 53 | elif by_provider is not None and by_output_modality is not None: 54 | models = bedrock_models.list_foundation_models( 55 | byProvider=by_provider, byOutputModality=by_output_modality 56 | ) 57 | click.echo(click.style(f"Models: {json.dumps(models, indent=4)}", fg="green")) 58 | else: 59 | models = bedrock_models.list_foundation_models() 60 | click.echo(click.style(f"Models: {json.dumps(models, indent=4)}", fg="green")) 61 | 62 | 63 | @cli.command("ask") 64 | @click.option("--question", required=True, type=str, prompt=True, help="What do you like to ask?") 65 | @click.option("--url", default=QDRANT_URL, help="Qdrant server URL") 66 | @click.option("--api-key", default=QDRANT_API_KEY, help="Qdrant API key") 67 | @click.option("--collection-name", default=COLLECTION_NAME, help="Qdrant collection name") 68 | @click.option("--temperature", default=0.5, help="Randomness and creativity of the generated text") 69 | @click.option("--top_k", default=250, help="Top k") 70 | @click.option("--top_p", default=1, help="Top p") 71 | @click.option("--stop_sequences", default=["Human"], help="Stop sequences") 72 | @click.option( 73 | "--max_tokens_to_sample", 74 | default=200, 75 | help="Maximum number of tokens to sample from the model", 76 | ) 77 | @click.option( 78 | "--model_name", 79 | default="anthropic.claude-v2", 80 | type=click.Choice(aws_llm, case_sensitive=True), 81 | ) 82 | @click.option( 83 | "--embedding_model", 84 | default="amazon.titan-embed-text-v1", 85 | type=click.Choice(aws_embeddings), 86 | help="AWS embedding model name used to embedding the documents", 87 | ) 88 | def question( 89 | question, 90 | url, 91 | api_key, 92 | collection_name, 93 | temperature, 94 | top_k, 95 | top_p, 96 | stop_sequences, 97 | max_tokens_to_sample, 98 | model_name, 99 | embedding_model, 100 | ): 101 | client = get_client(url, api_key) 102 | retriever = Qdrant( 103 | client=client, 104 | embeddings=get_embeddings( 105 | embedding=Embedding(embeddings=Embeddings.AWS, embedding_model=embedding_model) 106 | ), 107 | collection_name=collection_name, 108 | ).as_retriever(search_type="similarity", search_kwargs={"k": 2}) 109 | 110 | prompt_template = get_prompt() 111 | 112 | prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) 113 | 114 | # Bedrock Hyperparameters 115 | inference_modifier = { 116 | "max_tokens_to_sample": max_tokens_to_sample, 117 | "temperature": temperature, 118 | "top_k": top_k, 119 | "top_p": top_p, 120 | "stop_sequences": [f"\n\n{stop_sequences}"], 121 | } 122 | 123 | llm = Bedrock(model_id=model_name, client=bedrock_runtime, model_kwargs=inference_modifier) 124 | 125 | qa = RetrievalQA.from_chain_type( 126 | llm=llm, 127 | chain_type="stuff", 128 | retriever=retriever, 129 | return_source_documents=True, 130 | chain_type_kwargs={"prompt": prompt, "verbose": False}, 131 | ) 132 | 133 | result = qa({"query": question}) 134 | answer = result["result"] 135 | click.echo(click.style(f"\nAnswer: {answer}", fg="green")) 136 | 137 | 138 | if __name__ == "__main__": 139 | cli() 140 | -------------------------------------------------------------------------------- /src/cli/qdrant_cli.py: -------------------------------------------------------------------------------- 1 | from langchain_community.vectorstores.qdrant import Qdrant 2 | import click 3 | import boto3 4 | import os 5 | from dotenv import load_dotenv 6 | from utils import ( 7 | get_embeddings, 8 | get_documents_from_pdf, 9 | get_client, 10 | Embedding, 11 | Embeddings, 12 | huggingface_embeddings, 13 | aws_embeddings, 14 | ) 15 | 16 | load_dotenv() 17 | 18 | QDRANT_URL = os.environ.get("QDRANT_URL") 19 | QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY") 20 | AWS_REGION = os.environ.get("AWS_DEFAULT_REGION") 21 | AWS_S3_BUCKET = os.environ.get("AWS_S3_BUCKET") 22 | DOCUMENTS_PATH = os.environ.get("DOCUMENTS_PATH") 23 | COLLECTION_NAME = os.environ.get("COLLECTION_NAME") 24 | 25 | boto_session = boto3.Session(region_name=AWS_REGION) 26 | 27 | 28 | @click.group() 29 | def cli(): 30 | pass 31 | 32 | 33 | @cli.command("download-docs") 34 | @click.option("--collection-name", required=True, prompt=True, help="Collection name") 35 | @click.option("--bucket", help="S3 bucket") 36 | def download_files(collection_name, bucket=None): 37 | s3 = boto3.resource("s3") 38 | if bucket is None: 39 | bucket = AWS_S3_BUCKET 40 | bucket = s3.Bucket(bucket) 41 | for obj in bucket.objects.all(): 42 | key = obj.key 43 | if key.endswith(".pdf"): 44 | bucket.download_file(key, f"{DOCUMENTS_PATH}/{collection_name}/{key}") 45 | click.echo(click.style(f"Downloaded {key}", fg="green")) 46 | 47 | 48 | @cli.command("create") 49 | @click.option("--url", default=QDRANT_URL, help="Qdrant server URL") 50 | @click.option("--api-key", default=QDRANT_API_KEY, help="Qdrant API key") 51 | @click.option("--collection-name", required=True, prompt=True, help="Qdrant collection name") 52 | @click.option( 53 | "--embedding-model", 54 | required=True, 55 | prompt=True, 56 | type=click.Choice(huggingface_embeddings + aws_embeddings), 57 | help="Embedding model name", 58 | ) 59 | def create_vectostore(url, api_key, collection_name, embedding_model): 60 | try: 61 | docs = get_documents_from_pdf(collection_name) 62 | 63 | if embedding_model in huggingface_embeddings: 64 | embedding = Embeddings.HUGGINGFACE 65 | model_name = embedding_model 66 | 67 | elif embedding_model in aws_embeddings: 68 | embedding = Embeddings.BEDROCK 69 | model_name = embedding_model 70 | 71 | else: 72 | raise ValueError("Invalid embedding model name") 73 | 74 | embeddings = get_embeddings( 75 | embedding=Embedding(embeddings=embedding, model_name=model_name) 76 | ) 77 | click.echo(click.style("Creating collection... (15-35 minutes)", fg="green")) 78 | 79 | vectorstore = Qdrant.from_documents( 80 | documents=docs, 81 | embedding=embeddings, 82 | url=url, 83 | prefer_grpc=True, 84 | api_key=api_key, 85 | collection_name=collection_name, 86 | force_recreate=False, 87 | ) 88 | vectorstore.client.close() 89 | click.echo(click.style("Collection created!", fg="green")) 90 | except Exception as e: 91 | click.echo(click.style(f"Error: {e}", fg="red")) 92 | return 93 | return vectorstore 94 | 95 | 96 | @cli.command("delete") 97 | @click.option("--url", default=QDRANT_URL, help="Qdrant server URL") 98 | @click.option("--api-key", default=QDRANT_API_KEY, help="Qdrant API key") 99 | @click.option("--collection-name", default=COLLECTION_NAME, help="Qdrant collection name") 100 | def delete_collection(url, api_key, collection_name): 101 | client = get_client(url, api_key) 102 | client.delete_collection(collection_name=collection_name) 103 | click.echo(click.style("Collection deleted!", fg="green")) 104 | 105 | 106 | @cli.command("info") 107 | @click.option("--url", default=QDRANT_URL, help="Qdrant server URL") 108 | @click.option("--api-key", default=QDRANT_API_KEY, help="Qdrant API key") 109 | @click.option("--collection-name", required=True, prompt=True, help="Qdrant collection name") 110 | def info(url, api_key, collection_name): 111 | try: 112 | client = get_client(url, api_key) 113 | info = client.get_collection(collection_name=collection_name) 114 | click.echo(click.style(f"Collection info: {info}", fg="green")) 115 | except Exception as e: 116 | click.echo(click.style(f"Error: {e}", fg="red")) 117 | 118 | 119 | if __name__ == "__main__": 120 | cli() 121 | -------------------------------------------------------------------------------- /src/cli/utils.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from langchain_community.document_loaders import PyPDFDirectoryLoader 3 | from langchain.text_splitter import RecursiveCharacterTextSplitter 4 | import click 5 | from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings 6 | from langchain_community.embeddings import BedrockEmbeddings 7 | import boto3 8 | from dataclasses import dataclass 9 | from dotenv import load_dotenv 10 | import os 11 | from qdrant_client import QdrantClient 12 | 13 | load_dotenv() 14 | 15 | QDRANT_URL = os.environ.get("QDRANT_URL") 16 | QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY") 17 | AWS_REGION = os.environ.get("AWS_DEFAULT_REGION") 18 | 19 | boto_session = boto3.Session(region_name=AWS_REGION) 20 | credentials = boto_session.get_credentials() 21 | 22 | # bedrock 23 | bedrock_models = boto3.client("bedrock", region_name=AWS_REGION) 24 | bedrock_runtime = boto3.client("bedrock-runtime", region_name=AWS_REGION) 25 | 26 | 27 | class Embeddings(Enum): 28 | HUGGINGFACE = "HuggingFaceEmbeddings" 29 | BEDROCK = "BedrockEmbeddings" 30 | 31 | 32 | @dataclass() 33 | class Embedding: 34 | embeddings: Embeddings 35 | model_name: str 36 | 37 | 38 | huggingface_embeddings = [ 39 | "BAAI/bge-small-en", 40 | "sentence-transformers/all-MiniLM-L6-v2", 41 | "sentence-transformers/all-mpnet-base-v2", 42 | "sentence-transformers/all-distilroberta-v1", 43 | ] 44 | 45 | huggingface_llm = [ 46 | "meta-llama/Llama-2-7b-chat-hf", 47 | "microsoft/phi-2", # 5G 48 | "TinyLlama/TinyLlama-1.1B-Chat-v1.0", # 2.2G 49 | ] 50 | 51 | aws_embeddings = ["amazon.titan-embed-text-v1"] 52 | 53 | aws_llm = ["anthropic.claude-v2"] 54 | 55 | 56 | def get_huggingface_embeddings(model_name: str) -> HuggingFaceEmbeddings: 57 | model_kwargs = {"device": "cpu"} 58 | encode_kwargs = {"normalize_embeddings": False} 59 | embeddings = HuggingFaceEmbeddings( 60 | model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs 61 | ) 62 | click.echo(click.style("Embedding finished!", fg="green")) 63 | return embeddings 64 | 65 | 66 | def get_bedrock_embeddings(model_name: str) -> BedrockEmbeddings: 67 | embeddings = BedrockEmbeddings(client=bedrock_runtime, model_id=model_name) 68 | click.echo(click.style("Embedding finished!", fg="green")) 69 | return embeddings 70 | 71 | 72 | def get_embeddings(embedding: Embedding): 73 | click.echo(click.style(f"Embedding model: {embedding.model_name}", fg="green")) 74 | click.echo(click.style(f"Embedding type: {embedding.embeddings}", fg="green")) 75 | if embedding.embeddings == Embeddings.HUGGINGFACE: 76 | return get_huggingface_embeddings(embedding.model_name) 77 | elif embedding.embeddings == Embeddings.BEDROCK: 78 | return get_bedrock_embeddings(embedding.model_name) 79 | else: 80 | raise ValueError("Invalid embedding type") 81 | 82 | 83 | def get_documents_from_pdf(collection_name: str) -> list: 84 | # Load documents 85 | loader = PyPDFDirectoryLoader(f"./documents/{collection_name}") 86 | documents = loader.load() 87 | 88 | # Split documents 89 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) 90 | docs = text_splitter.split_documents(documents) 91 | click.echo(click.style(f"Number of documents after split: {len(docs)}", fg="green")) 92 | return docs 93 | 94 | 95 | def get_client(url: str, api_key: str) -> QdrantClient: 96 | return QdrantClient(url=url, api_key=api_key) 97 | 98 | 99 | def get_prompt() -> str: 100 | return """ 101 | Use the following pieces of context to provide a concise answer to the question at the end. 102 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 103 | 104 | Question: {question} 105 | 106 | {context} 107 | 108 | Answer: 109 | """ 110 | -------------------------------------------------------------------------------- /terraform/.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/aws" { 5 | version = "5.35.0" 6 | constraints = "~> 5.0" 7 | hashes = [ 8 | "h1:WIWNdmbE/n0fvlNA72hC+QWWyOHaZZPM1l+o3MUvnZg=", 9 | "zh:3a2a6f40db82d30ea8c5e3e251ca5e16b08e520570336e7e342be823df67e945", 10 | "zh:420a23b69b412438a15b8b2e2c9aac2cf2e4976f990f117e4bf8f630692d3949", 11 | "zh:4d8b887f6a71b38cff77ad14af9279528433e279eed702d96b81ea48e16e779c", 12 | "zh:4edd41f8e1c7d29931608a7b01a7ae3d89d6f95ef5502cf8200f228a27917c40", 13 | "zh:6337544e2ded5cf37b55a70aa6ce81c07fd444a2644ff3c5aad1d34680051bdc", 14 | "zh:668faa3faaf2e0758bf319ea40d2304340f4a2dc2cd24460ddfa6ab66f71b802", 15 | "zh:79ddc6d7c90e59fdf4a51e6ea822ba9495b1873d6a9d70daf2eeaf6fc4eb6ff3", 16 | "zh:885822027faf1aa57787f980ead7c26e7d0e55b4040d926b65709b764f804513", 17 | "zh:8c50a8f397b871388ff2e048f5eb280af107faa2e8926694f1ffd9f32a7a7cdf", 18 | "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", 19 | "zh:a2f5d2553df5573a060641f18ee7585587047c25ba73fd80617f59b5893d22b4", 20 | "zh:c43833ae2a152213ee92eb5be7653f9493779eddbe0ce403ea49b5f1d87fd766", 21 | "zh:dab01527a3a55b4f0f958af6f46313d775e27f9ad9d10bedbbfea4a35a06dc5f", 22 | "zh:ed49c65620ec42718d681a7fc00c166c295ff2795db6cede2c690b83f9fb3e65", 23 | "zh:f0a358c0ae1087c466d0fbcc3b4da886f33f881a145c3836ec43149878b86a1a", 24 | ] 25 | } 26 | 27 | provider "registry.terraform.io/hashicorp/external" { 28 | version = "2.3.2" 29 | hashes = [ 30 | "h1:7F6FVQh7OcCgIH3YEJg1SJDSb1CU4qrCtGuI2EBHnL8=", 31 | "h1:XItlDFohFgCuUTo+5ML8OeTdNXWJoM1RkqMboSqmhL0=", 32 | "zh:020bf652739ecd841d696e6c1b85ce7dd803e9177136df8fb03aa08b87365389", 33 | "zh:0c7ea5a1cbf2e01a8627b8a84df69c93683f39fe947b288e958e72b9d12a827f", 34 | "zh:25a68604c7d6aa736d6e99225051279eaac3a7cf4cab33b00ff7eae7096166f6", 35 | "zh:34f46d82ca34604f6522de3b36eda19b7ad3be1e38947afc6ac31656eab58c8a", 36 | "zh:6959f8f2f3de93e61e0abb90dbec41e28a66daec1607c46f43976bd6da50bcfd", 37 | "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", 38 | "zh:a81e5d65a343da9caa6f1d17ae0aced9faecb36b4f8554bd445dbd4f8be21ab6", 39 | "zh:b1d3f1557214d652c9120862ce27e9a7b61cb5aec5537a28240a5a37bf0b1413", 40 | "zh:b71588d006471ae2d4a7eca2c51d69fd7c5dec9b088315599b794e2ad0cc5e90", 41 | "zh:cfdaae4028b644dff3530c77b49d31f7e6f4c4e2a9e5c8ac6a88e383c80c9e9c", 42 | "zh:dbde15154c2eb38a5f54d0e7646bc67510004179696f3cc2bc1d877cecacf83b", 43 | "zh:fb681b363f83fb5f64dfa6afbf32d100d0facd2a766cf3493b8ddb0398e1b0f7", 44 | ] 45 | } 46 | 47 | provider "registry.terraform.io/hashicorp/null" { 48 | version = "3.2.2" 49 | hashes = [ 50 | "h1:JViWrgF7Ks2GqB6UfcLDUbusXeSfhfhFymo4c0N5e+I=", 51 | "zh:3248aae6a2198f3ec8394218d05bd5e42be59f43a3a7c0b71c66ec0df08b69e7", 52 | "zh:32b1aaa1c3013d33c245493f4a65465eab9436b454d250102729321a44c8ab9a", 53 | "zh:38eff7e470acb48f66380a73a5c7cdd76cc9b9c9ba9a7249c7991488abe22fe3", 54 | "zh:4c2f1faee67af104f5f9e711c4574ff4d298afaa8a420680b0cb55d7bbc65606", 55 | "zh:544b33b757c0b954dbb87db83a5ad921edd61f02f1dc86c6186a5ea86465b546", 56 | "zh:696cf785090e1e8cf1587499516b0494f47413b43cb99877ad97f5d0de3dc539", 57 | "zh:6e301f34757b5d265ae44467d95306d61bef5e41930be1365f5a8dcf80f59452", 58 | "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", 59 | "zh:913a929070c819e59e94bb37a2a253c228f83921136ff4a7aa1a178c7cce5422", 60 | "zh:aa9015926cd152425dbf86d1abdbc74bfe0e1ba3d26b3db35051d7b9ca9f72ae", 61 | "zh:bb04798b016e1e1d49bcc76d62c53b56c88c63d6f2dfe38821afef17c416a0e1", 62 | "zh:c23084e1b23577de22603cff752e59128d83cfecc2e6819edadd8cf7a10af11e", 63 | ] 64 | } 65 | -------------------------------------------------------------------------------- /terraform/api_gateway/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_apigatewayv2_api" "apigateway" { 2 | name = var.api_name 3 | protocol_type = "HTTP" 4 | description = "HTTP API for Question and Answer App" 5 | version = "1.0" 6 | 7 | tags = { 8 | Environment = var.environment 9 | Application = var.application_name 10 | } 11 | } 12 | 13 | resource "aws_apigatewayv2_vpc_link" "vpc_link" { 14 | name = var.vpc_link_name 15 | security_group_ids = var.security_group_ids 16 | subnet_ids = var.subnet_ids 17 | } 18 | 19 | resource "aws_apigatewayv2_integration" "root_integration" { 20 | api_id = aws_apigatewayv2_api.apigateway.id 21 | integration_type = "HTTP_PROXY" 22 | integration_uri = var.lb_listener_arn 23 | integration_method = "GET" 24 | connection_type = "VPC_LINK" 25 | connection_id = aws_apigatewayv2_vpc_link.vpc_link.id 26 | timeout_milliseconds = 30000 # 30 seconds 27 | } 28 | 29 | resource "aws_apigatewayv2_integration" "ask_integration" { 30 | api_id = aws_apigatewayv2_api.apigateway.id 31 | integration_type = "HTTP_PROXY" 32 | integration_uri = var.lb_listener_arn 33 | integration_method = "POST" 34 | connection_type = "VPC_LINK" 35 | connection_id = aws_apigatewayv2_vpc_link.vpc_link.id 36 | timeout_milliseconds = 30000 # 30 seconds 37 | } 38 | 39 | # Use {proxy+} to capture all requests to the API, and route them to the root integration. 40 | # Only for testing purposes 41 | # resource "aws_apigatewayv2_route" "root_route" { 42 | # api_id = aws_apigatewayv2_api.apigateway.id 43 | # route_key = "ANY /{proxy+}" 44 | # target = "integrations/${aws_apigatewayv2_integration.root_integration.id}" 45 | # } 46 | 47 | # Production route 48 | resource "aws_apigatewayv2_route" "root_route" { 49 | api_id = aws_apigatewayv2_api.apigateway.id 50 | route_key = "GET /" 51 | target = "integrations/${aws_apigatewayv2_integration.root_integration.id}" 52 | } 53 | 54 | # Production route 55 | resource "aws_apigatewayv2_route" "ask_route" { 56 | api_id = aws_apigatewayv2_api.apigateway.id 57 | route_key = "POST /ask" 58 | target = "integrations/${aws_apigatewayv2_integration.ask_integration.id}" 59 | } 60 | 61 | resource "aws_cloudwatch_log_group" "apigateway" { 62 | name = "/aws/apigateway/${var.application_name}/${var.api_name}" 63 | retention_in_days = 7 64 | tags = { 65 | Environment = var.environment 66 | Application = var.application_name 67 | } 68 | } 69 | 70 | resource "aws_apigatewayv2_stage" "apigateway" { 71 | api_id = aws_apigatewayv2_api.apigateway.id 72 | description = "Stage for HTTP API" 73 | name = "$default" # The $default stage is a special stage that's automatically associated with new deployments. 74 | auto_deploy = true # Whether updates to an API automatically trigger a new deployment. 75 | 76 | access_log_settings { 77 | destination_arn = aws_cloudwatch_log_group.apigateway.arn 78 | format = jsonencode({ 79 | requestId = "$context.requestId", 80 | ip = "$context.identity.sourceIp", 81 | user = "$context.identity.user", 82 | caller = "$context.identity.caller", 83 | request = "$context.requestTime", 84 | status = "$context.status", 85 | response = "$context.responseLength" 86 | }) 87 | } 88 | tags = { 89 | Environment = var.environment 90 | Application = var.application_name 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /terraform/api_gateway/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathewsrc/generativeai-questions-and-answers-app/9015e8ecdba13b11c4c0caa0953460abcff5d6c2/terraform/api_gateway/outputs.tf -------------------------------------------------------------------------------- /terraform/api_gateway/variables.tf: -------------------------------------------------------------------------------- 1 | variable "vpc_link_name" { 2 | description = "The name of the VPC link" 3 | default = "vpc-link" 4 | type = string 5 | } 6 | 7 | variable "api_name" { 8 | description = "The name of the API" 9 | default = "competition-notices-api" 10 | type = string 11 | } 12 | 13 | variable "api_stage_name" { 14 | description = "The name of the API stage" 15 | default = "prod" 16 | type = string 17 | } 18 | 19 | variable "logs_retantion_in_days" { 20 | description = "The number of days to retain the logs" 21 | default = 7 22 | type = number 23 | } 24 | 25 | variable "usage_plan_name" { 26 | description = "The name of the usage plan" 27 | default = "qa-usage-plan" 28 | type = string 29 | } 30 | 31 | variable "period" { 32 | description = "The period of the usage plan" 33 | default = "WEEK" 34 | type = string 35 | } 36 | 37 | variable "quota_limit" { 38 | description = "Maximum number of requests that can be made in a given time period." 39 | default = 100 40 | type = number 41 | } 42 | 43 | variable "quota_offset" { 44 | description = "Number of requests to subtract from the given limit. " 45 | default = 12 46 | type = number 47 | } 48 | 49 | variable "burst_limit" { 50 | description = "The maximum rate limit over a time ranging from one to a few seconds" 51 | default = 500 # Default value is 5.000 requests per second 52 | type = number 53 | } 54 | 55 | variable "rate_limit" { 56 | description = "The API request steady-state rate limit." 57 | default = 1000 # Default value is 10.000 requests per second 58 | type = number 59 | } 60 | 61 | variable "application_name" { 62 | description = "Application name" 63 | } 64 | 65 | variable "region" { 66 | description = "The region where the resources will be created" 67 | } 68 | 69 | variable "environment" { 70 | description = "The environment the bucket is used in [DEV, STAG, PROD]" 71 | validation { 72 | condition = contains(["DEV", "STAG", "PROD"], var.environment) 73 | error_message = "Environment must be one of DEV, STAG, PROD" 74 | } 75 | } 76 | 77 | variable "container_port" { 78 | description = "The port the application is listening on" 79 | } 80 | 81 | variable "lb_arn" { 82 | type = string 83 | description = "The ARN of the internal NLB" 84 | } 85 | 86 | variable "lb_dns_name" { 87 | type = string 88 | description = "The DNS name of the internal NLB" 89 | } 90 | 91 | variable "api_timeout_milliseconds" { 92 | description = "The timeout for the API (25000=25s)" 93 | default = 25000 94 | type = number 95 | } 96 | 97 | variable "subnet_ids" { 98 | description = "The subnet IDs for the VPC link" 99 | type = list(string) 100 | } 101 | 102 | variable "security_group_ids" { 103 | description = "The security group IDs for the VPC link" 104 | type = list(string) 105 | } 106 | 107 | variable "throttling_burst_limit" { 108 | description = "The maximum rate limit over a time ranging from one to a few seconds" 109 | default = 500 # Default value is 5.000 requests per second 110 | type = number 111 | } 112 | 113 | variable "throttling_rate_limit" { 114 | description = "The API request steady-state rate limit." 115 | default = 1000 # Default value is 10.000 requests per second 116 | type = number 117 | } 118 | 119 | variable "lb_listener_arn" { 120 | type = string 121 | description = "The ARN of the load balance listener" 122 | } -------------------------------------------------------------------------------- /terraform/ecr/main.tf: -------------------------------------------------------------------------------- 1 | # Get the current AWS account ID 2 | data "aws_caller_identity" "current" {} 3 | 4 | # Get the current AWS region 5 | data "aws_region" "current" {} 6 | 7 | # Create an ECR repository for ECS 8 | resource "aws_ecr_repository" "ecr_repo" { 9 | name = var.ecr_name_ecs 10 | image_tag_mutability = "MUTABLE" 11 | 12 | image_scanning_configuration { 13 | scan_on_push = true 14 | } 15 | 16 | force_delete = false 17 | 18 | tags = { 19 | Environment = var.environment 20 | Application = var.application_name 21 | Name = var.ecr_name_ecs 22 | } 23 | } 24 | 25 | # Create an ECR repository for Lambda 26 | resource "aws_ecr_repository" "lambda_repo" { 27 | name = var.ecr_name_lambda 28 | image_tag_mutability = "MUTABLE" 29 | 30 | image_scanning_configuration { 31 | scan_on_push = true 32 | } 33 | 34 | force_delete = false 35 | 36 | tags = { 37 | Environment = var.environment 38 | Application = var.application_name 39 | Name = var.ecr_name_lambda 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /terraform/ecr/outputs.tf: -------------------------------------------------------------------------------- 1 | output "ecr_repository_name" { 2 | value = aws_ecr_repository.ecr_repo.name 3 | } 4 | 5 | output "ecr_repository_url" { 6 | value = aws_ecr_repository.ecr_repo.repository_url 7 | } -------------------------------------------------------------------------------- /terraform/ecr/variables.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | description = "The region of deploy" 3 | } 4 | 5 | variable "application_name" { 6 | description = "Application name" 7 | } 8 | 9 | variable "environment" { 10 | description = "The environment the bucket is used in [DEV, STAG, PROD]" 11 | validation { 12 | condition = contains(["DEV", "STAG", "PROD"], var.environment) 13 | error_message = "Environment must be one of DEV, STAG, PROD" 14 | } 15 | } 16 | 17 | variable "ecr_name_ecs" { 18 | description = "The name of ECR repository" 19 | default = "ecs-repo" 20 | } 21 | 22 | variable "ecr_name_lambda" { 23 | description = "The name of ECR repository" 24 | default = "lambda-repo" 25 | } -------------------------------------------------------------------------------- /terraform/ecs/ecs_task_executor_policy.tf: -------------------------------------------------------------------------------- 1 | 2 | # Generates an IAM policy document for the ECS task executor role 3 | data "aws_iam_policy_document" "ecs_task_executor_policy" { 4 | statement { 5 | sid = 1 6 | actions = [ 7 | "logs:CreateLogStream", 8 | "logs:PutLogEvents", 9 | "logs:CreateLogGroup" 10 | ] 11 | resources = [ 12 | "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:*"] 13 | } 14 | statement { 15 | sid = 2 16 | actions = [ 17 | "ecr:GetAuthorizationToken", 18 | "ecr:BatchCheckLayerAvailability", 19 | "ecr:GetDownloadUrlForLayer", 20 | "ecr:BatchGetImage" 21 | ] 22 | resources = ["*"] 23 | } 24 | } 25 | 26 | # IAM policy for ECS task executor role 27 | resource "aws_iam_policy" "ecs_task_executor_policy" { 28 | name = "ecs-task-executor-policy-tf" 29 | description = "Policy for ECS task executor role" 30 | 31 | policy = data.aws_iam_policy_document.ecs_task_executor_policy.json 32 | } 33 | 34 | # IAM role for ECS task executor 35 | resource "aws_iam_role" "ecs_task_executor_role" { 36 | name = var.ecs_execution_role_name 37 | assume_role_policy = <Welcome to our Question/Answering application" in response.text 12 | 13 | 14 | def test_question_endpoint(): 15 | response = client.post("/ask", json={"text": "What is AWS?", "temperature": 0.5}) 16 | assert response.status_code == 200 17 | assert "answer" in response.json() 18 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from lambda_functions.src.utils import get_embeddings, Embedding, Embeddings 3 | from langchain_community.embeddings import BedrockEmbeddings 4 | 5 | 6 | def test_embedding_invalid(): 7 | with pytest.raises(ValueError, match="'INVALID' is not a valid Embeddings"): 8 | Embedding(Embeddings("INVALID"), "invalid_model") 9 | 10 | 11 | def test_embedding_valid(): 12 | embedding = Embedding(Embeddings.BEDROCK, "amazon.titan-embed-text-v1") 13 | assert embedding.model_name == "amazon.titan-embed-text-v1" 14 | assert embedding.embeddings == Embeddings.BEDROCK 15 | 16 | 17 | def test_get_embeddings_bedrock(): 18 | embedding = Embedding(Embeddings.BEDROCK, "amazon.titan-embed-text-v1") 19 | result = get_embeddings(embedding, "us-east-1") 20 | assert isinstance(result, BedrockEmbeddings) 21 | assert result.model_id == "amazon.titan-embed-text-v1" 22 | --------------------------------------------------------------------------------