├── .editorconfig
├── .flake8
├── .gitattributes
├── .github
    ├── CONTRIBUTING.md
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── config.yml
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── actions
    │   ├── merge-branch
    │   │   └── action.yml
    │   └── tests
    │   │   └── python
    │   │       └── action.yml
    ├── dependabot.yml
    └── workflows
    │   ├── auto-assign.yml
    │   ├── precommitVersionBumps.yml
    │   ├── pullRequestController.yml
    │   ├── pushMain.yml
    │   ├── semanticVersionBump.yml
    │   └── testsPython.yml
├── .gitignore
├── .mergify.yml
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc
├── .pylintrc
├── .vscode
    ├── extensions.json
    └── settings.json
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── codespell.txt
├── commitlint.config.js
├── models
    ├── __init__.py
    ├── __version__.py
    ├── conf.py
    ├── const.py
    ├── examples
    │   ├── __init__.py
    │   ├── certification_programs.py
    │   ├── load.py
    │   ├── online_courses.py
    │   ├── pinecone_init.py
    │   ├── prompt.py
    │   └── rag.py
    ├── exceptions.py
    ├── hybrid_search_retreiver.py
    ├── pinecone.py
    ├── prompt_templates.py
    ├── tests
    │   ├── __init__.py
    │   ├── mock_data
    │   │   ├── .env.test_01
    │   │   ├── .env.test_illegal_nulls
    │   │   ├── .env.test_legal_nulls
    │   │   └── test_load.pdf
    │   ├── test_configuration.py
    │   ├── test_examples.py
    │   ├── test_hsr.py
    │   ├── test_openai.py
    │   ├── test_pinecone.py
    │   ├── test_prompt_templates.py
    │   └── test_prompts.py
    └── yt.py
├── package.json
├── pyproject.toml
├── release.config.js
├── requirements
    ├── base.txt
    └── local.txt
├── run_pylint.sh
├── setup.py
├── setup_test.py
├── setup_utils.py
└── tox.ini


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # see http://editorconfig.org
 2 | root = true
 3 | 
 4 | [*]
 5 | end_of_line = lf
 6 | trim_trailing_whitespace = true
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | indent_size = 2
10 | charset = utf-8
11 | tab_width = 4
12 | 
13 | [*.md]
14 | trim_trailing_whitespace = false
15 | 
16 | [*.py]
17 | indent_size = 4
18 | 
19 | [go.mod]
20 | indent_style = tab
21 | indent_size = 1
22 | 
23 | [*.go]
24 | indent_style = tab
25 | indent_size = 1
26 | 
27 | [Makefile]
28 | indent_style = tab
29 | indent_size = 1
30 | 
31 | [Makefile.*]
32 | indent_style = tab
33 | indent_size = 1
34 | 
35 | [LICENSE]
36 | indent_size = none
37 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore=D205,D413,D400,D401
3 | max-line-length=120
4 | max-complexity=10
5 | exclude=venv
6 | extend-exclude="*__init__.py,*__version__.py,venv"
7 | select="C101"
8 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | # * text eol=lf
3 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 | 
3 | The repository is released under the GNU AFFERO GENERAL PUBLIC LICENSE license, and follows a standard Github development process, using Github tracker for issues and merging pull requests into master.
4 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: FullStackWithLawrence
4 | patreon: FullStackWithLawrence
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | ---
 5 | 
 6 | **Describe the bug**
 7 | A clear and concise description of what the bug is.
 8 | 
 9 | **Workflow**
10 | If applicable, provide a workflow file to help explain your problem.
11 | 
12 | **Expected behavior**
13 | A clear and concise description of what you expected to happen.
14 | 
15 | **Additional context**
16 | Add any other context about the problem here.
17 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | ---
 5 | 
 6 | **Is your feature request related to a problem? Please describe.**
 7 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
 8 | 
 9 | **Describe the solution you'd like**
10 | A clear and concise description of what you want to happen.
11 | 
12 | **Describe alternatives you've considered**
13 | A clear and concise description of any alternative solutions or features you've considered.
14 | 
15 | **Additional context**
16 | Add any other context or screenshots about the feature request here.
17 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Pull Request Template
 2 | 
 3 | ## Type of Change
 4 | 
 5 | <!-- What type of change does your code introduce? -->
 6 | 
 7 | - [ ] New feature
 8 | - [ ] Bug fix
 9 | - [ ] Documentation
10 | - [ ] Refactor
11 | - [ ] Chore
12 | 
13 | ## Resolves
14 | 
15 | - Fixes #[Add issue number here.]
16 | 
17 | ## Changes
18 | 
19 | <!-- Describe your changes in detail, if applicable. -->
20 | 
21 | _Describe what this Pull Request does_
22 | 
23 | ## Testing
24 | 
25 | <!-- Describe how the changes can be tested -->
26 | 
27 | _Describe the testing that has been done or needs to be done_
28 | 
29 | ## Screenshots
30 | 
31 | <!-- If applicable, add screenshots to help explain your changes -->
32 | 
33 | _Add any relevant screenshots_
34 | 
35 | ## Dependencies
36 | 
37 | <!-- List any dependencies that are required for this change -->
38 | 
39 | _List dependencies_
40 | 
41 | ## Breaking Changes
42 | 
43 | <!-- Does this PR contain any breaking changes? -->
44 | 
45 | _Describe any breaking changes_
46 | 


--------------------------------------------------------------------------------
/.github/actions/merge-branch/action.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #------------------------------------------------------------------------------
 3 | # Run pre-commit
 4 | #------------------------------------------------------------------------------
 5 | name: Merge
 6 | branding:
 7 |   icon: "git-pull-request"
 8 |   color: "orange"
 9 | inputs:
10 |   github-token:
11 |     description: "The GitHub token to use for authentication"
12 |     required: true
13 |     type: string
14 |   source-branch:
15 |     description: "The branch to merge from"
16 |     required: false
17 |     type: string
18 |     default: "main"
19 |   target-branch:
20 |     description: "The branch to merge to"
21 |     required: true
22 |     type: string
23 | 
24 |   python-version:
25 |     description: "The version of Python to use, such as 3.12"
26 |     required: true
27 |     type: string
28 | 
29 | runs:
30 |   using: "composite"
31 |   steps:
32 |     - name: Checkout code
33 |       id: checkout
34 |       uses: actions/checkout@v4
35 |       with:
36 |         fetch-depth: 0
37 |         persist-credentials: false
38 | 
39 |     - name: Remember current branch
40 |       shell: bash
41 |       run: |
42 |         echo "CURRENT_BRANCH=$(git branch --show-current)" >> $GITHUB_ENV
43 | 
44 |     - name: Merge
45 |       id: merge
46 |       shell: bash
47 |       run: |
48 |         git config --local user.email "action@github.com"
49 |         git config --local user.name "GitHub Action"
50 |         git checkout ${{ inputs.source-branch }}
51 |         git pull
52 |         git checkout ${{ inputs.target-branch }}
53 |         git merge -Xtheirs ${{ inputs.source-branch }}
54 |         git push https://${{ inputs.github-token }}@github.com/${{ github.repository }}.git HEAD:${{ inputs.target-branch }}
55 | 
56 |     - name: Checkout current branch
57 |       shell: bash
58 |       run: |
59 |         git checkout ${{ env.CURRENT_BRANCH }}
60 | 


--------------------------------------------------------------------------------
/.github/actions/tests/python/action.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #------------------------------------------------------------------------------
 3 | # Run Python unit tests
 4 | #------------------------------------------------------------------------------
 5 | name: Test Python
 6 | branding:
 7 |   icon: "git-pull-request"
 8 |   color: "orange"
 9 | inputs:
10 |   python-version:
11 |     description: "The version of Python to use, such as 3.12"
12 |     required: true
13 |     type: string
14 |   openai-api-organization:
15 |     description: "The OpenAI API organization"
16 |     required: true
17 |     type: string
18 |   openai-api-key:
19 |     description: "The OpenAI API key"
20 |     required: true
21 |     type: string
22 |   pinecone-api-key:
23 |     description: "The Pinecone API key"
24 |     required: true
25 |     type: string
26 |   pinecone-environment:
27 |     description: "The Pinecone environment"
28 |     required: true
29 |     type: string
30 | 
31 | runs:
32 |   using: "composite"
33 |   steps:
34 |     - name: Checkout code
35 |       id: checkout
36 |       uses: actions/checkout@v4
37 | 
38 |     - name: Cache Python dependencies
39 |       uses: actions/cache@v3
40 |       with:
41 |         path: ~/.cache/pip
42 |         key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements/local.txt') }}
43 |         restore-keys: |
44 |           ${{ runner.os }}-pip
45 | 
46 |     - name: Set up Python
47 |       uses: actions/setup-python@v4
48 |       with:
49 |         python-version: ${{ inputs.python-version }}
50 | 
51 |     - name: locate site-packages path
52 |       shell: bash
53 |       run: |
54 |         echo "SITE_PACKAGES_PATH=$(python -c 'import site; print(site.getsitepackages()[0])')" >> $GITHUB_ENV
55 | 
56 |     - name: Install pip
57 |       shell: bash
58 |       run: |
59 |         python -m pip install --upgrade pip
60 | 
61 |     - name: Install dependencies
62 |       shell: bash
63 |       run: |
64 |         pip install -r ./requirements/local.txt
65 |       env:
66 |         SITE_PACKAGES_PATH: ${{ env.SITE_PACKAGES_PATH }}
67 | 
68 |     - name: Create .env
69 |       shell: bash
70 |       run: |
71 |         touch ./.env
72 |         echo "OPENAI_API_ORGANIZATION=${{ env.OPENAI_API_ORGANIZATION }}" >> ./.env
73 |         echo "OPENAI_API_KEY=${{ env.OPENAI_API_KEY }}" >> ./.env
74 |         echo "PINECONE_API_KEY=${{ env.PINECONE_API_KEY }}" >> ./.env
75 |         echo "PINECONE_ENVIRONMENT=${{ env.PINECONE_ENVIRONMENT }}" >> ./.env
76 |         echo "DEBUG_MODE=False" >> ./.env
77 |       env:
78 |         OPENAI_API_ORGANIZATION: ${{ inputs.openai-api-organization }}
79 |         OPENAI_API_KEY: ${{ inputs.openai-api-key }}
80 |         PINECONE_API_KEY: ${{ inputs.pinecone-api-key }}
81 |         PINECONE_ENVIRONMENT: ${{ inputs.pinecone-environment }}
82 | 
83 |     - name: Run Python unit tests
84 |       shell: bash
85 |       run: |
86 |         make test
87 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "github-actions"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "monthly"
 7 |     assignees:
 8 |       - "lpm0073"
 9 |     reviewers:
10 |       - "lpm0073"
11 |   - package-ecosystem: "npm"
12 |     directory: "/"
13 |     schedule:
14 |       interval: "monthly"
15 |     labels:
16 |       - "dependencies"
17 |       - "javascript"
18 |     assignees:
19 |       - "FullStackWithLawrence"
20 |     reviewers:
21 |       - "FullStackWithLawrence"
22 |   - package-ecosystem: "pip"
23 |     directory: "/"
24 |     schedule:
25 |       interval: "monthly"
26 |     labels:
27 |       - "dependencies"
28 |       - "python"
29 |     assignees:
30 |       - "lpm0073"
31 |     reviewers:
32 |       - "lpm0073"
33 | 


--------------------------------------------------------------------------------
/.github/workflows/auto-assign.yml:
--------------------------------------------------------------------------------
 1 | name: Auto Assign
 2 | on:
 3 |   issues:
 4 |     types: [opened]
 5 |   pull_request:
 6 |     types: [opened]
 7 | jobs:
 8 |   run:
 9 |     runs-on: ubuntu-latest
10 |     permissions:
11 |       issues: write
12 |       pull-requests: write
13 |     steps:
14 |       - name: "Auto-assign issue"
15 |         uses: pozil/auto-assign-issue@v2
16 |         with:
17 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
18 |           assignees: lpm0073
19 |           numOfAssignee: 1
20 | 


--------------------------------------------------------------------------------
/.github/workflows/precommitVersionBumps.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | #------------------------------------------------------------------------------
  3 | # Lawrence McDaniel - https://lawrencemcdaniel.com
  4 | # Version Bump Workflow for .pre-commit-config.yaml
  5 | #
  6 | # This workflow runs on a cron schedule and checks for updates to the
  7 | # .pre-commit-config.yaml file. If updates are found, the workflow
  8 | # commits the changes to the next branch and pushes the changes to GitHub.
  9 | #
 10 | # This is a workaround for the fact that the pre-commit autoupdate command
 11 | # is not supported by Dependabot.
 12 | #------------------------------------------------------------------------------
 13 | name: pre-commit Version Bumps
 14 | 
 15 | on:
 16 |   schedule:
 17 |     - cron: "0 0 * * 3"
 18 |   workflow_dispatch:
 19 | 
 20 | jobs:
 21 |   evaluate_precommit_config:
 22 |     runs-on: ubuntu-latest
 23 | 
 24 |     steps:
 25 |       - uses: actions/checkout@v4
 26 |         with:
 27 |           persist-credentials: false
 28 | 
 29 |       - name: Checkout next branch
 30 |         run: |
 31 |           git fetch
 32 |           git checkout next
 33 |           git pull origin next
 34 | 
 35 |       - name: Cache NPM dependencies
 36 |         uses: actions/cache@v4
 37 |         with:
 38 |           path: ~/.npm
 39 |           key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
 40 |           restore-keys: |
 41 |             ${{ runner.os }}-node
 42 | 
 43 |       - name: Cache Python dependencies
 44 |         uses: actions/cache@v4
 45 |         with:
 46 |           path: ~/.cache/pip
 47 |           key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements/local.txt') }}
 48 |           restore-keys: |
 49 |             ${{ runner.os }}-pip
 50 | 
 51 |       - name: Set up Python
 52 |         uses: actions/setup-python@v5
 53 |         with:
 54 |           python-version: "3.12"
 55 | 
 56 |       - name: locate site-packages path
 57 |         shell: bash
 58 |         run: |
 59 |           echo "SITE_PACKAGES_PATH=$(python -c 'import site; print(site.getsitepackages()[0])')" >> $GITHUB_ENV
 60 | 
 61 |       - name: Install pip
 62 |         shell: bash
 63 |         run: |
 64 |           python -m pip install --upgrade pip
 65 | 
 66 |       - name: Install dependencies
 67 |         shell: bash
 68 |         run: |
 69 |           pip install -r ./requirements/local.txt
 70 |         env:
 71 |           SITE_PACKAGES_PATH: ${{ env.SITE_PACKAGES_PATH }}
 72 | 
 73 |       - name: Setup Node.js environment
 74 |         uses: actions/setup-node@v4
 75 |         with:
 76 |           node-version: "20.9.0"
 77 | 
 78 |       - name: Install npm dev dependencies
 79 |         run: npm install
 80 | 
 81 |       - name: Update .pre-commit-config.yaml
 82 |         run: |
 83 |           pre-commit autoupdate
 84 | 
 85 |       - name: Check for unstaged changes
 86 |         id: check_changes
 87 |         run: |
 88 |           if [[ -n "$(git status --porcelain .pre-commit-config.yaml)" ]]; then
 89 |             echo "::set-output name=changes::true"
 90 |           else
 91 |             echo "::set-output name=changes::false"
 92 |           fi
 93 | 
 94 |       - name: Commit and push changes
 95 |         if: steps.check_changes.outputs.changes == 'true'
 96 |         shell: bash
 97 |         run: |
 98 |           git config --local user.email "action@github.com"
 99 |           git config --local user.name "GitHub Action"
100 |           git add .pre-commit-config.yaml
101 |           git commit -m "chore: [gh] version bumps in .pre-commit-config.yaml [skip ci]"
102 |           git push https://${{ secrets.PAT }}@github.com/${{ github.repository }}.git HEAD:next
103 | 


--------------------------------------------------------------------------------
/.github/workflows/pullRequestController.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #------------------------------------------------------------------------------
 3 | # Pull Request Workflow Controller.
 4 | #
 5 | # Triggers:
 6 | # - Called automatically on relevant actions performed on pull requests.
 7 | # - Can also be run manually by clicking the "Run workflow" button.
 8 | #
 9 | # Actions:
10 | # - Use semantic release rules to determine if a new release will be published.
11 | # - run Python tests, but only if Python-related files have changed.
12 | # - run Terraform tests, but only if Terraform-related files have changed.
13 | # - run ReactJS tests, but only if ReactJS-related files have changed.
14 | # - run pre-commit hooks to ensure code is formatted correctly.
15 | #
16 | # To-Do:
17 | # If a new release is to be published then we want to consider running QA tests
18 | # to ensure formatting and documentation is correct.
19 | #------------------------------------------------------------------------------
20 | name: Pull Request Controller
21 | 
22 | on:
23 |   workflow_dispatch:
24 |   # GitHub Copilot: The `pull_request` and `pull_request_target` are two different
25 |   # event types in GitHub Actions that trigger workflows when activity related
26 |   # to pull requests occurs.
27 |   # - `pull_request`: This event triggers a workflow run whenever a pull
28 |   #   request is opened, synchronized, or closed. The workflow runs in the context of the
29 |   #   pull request, meaning it has access to the code and environment variables of the head
30 |   #   branch of the pull request. This is safe for pull requests within the same repository,
31 |   #   but for pull requests from a fork, this could potentially expose sensitive information.
32 |   #
33 |   # - `pull_request_target`: This event is similar to `pull_request`, but it runs in the context
34 |   #   of the base of the pull request, rather than the head. This means it has access to the code
35 |   #   and environment variables of the base branch, not the head branch. This is safer for
36 |   #   pull requests from forks, as it prevents the fork from accessing sensitive information
37 |   #   in the base repository. However, it means the workflow does not have access to the code
38 |   #   in the pull request by default. If you need to access the code in the pull request,
39 |   #   you can use the `actions/checkout` action with the `ref` input
40 |   #   set to `github.event.pull_request.head.ref`.
41 |   #
42 |   # In general, use `pull_request` for workflows that need to access the code in the pull request,
43 |   # and `pull_request_target` for workflows that need to be safe for pull requests from forks.
44 |   pull_request_target:
45 |     types: [opened, synchronize]
46 |     paths:
47 |       - "**.py"
48 |       - "./requirements"
49 |       - "**.package.json"
50 |       - "./models/**"
51 | 
52 | env:
53 |   python-version: "3.12"
54 | 
55 | jobs:
56 |   check_for_pending_release:
57 |     name: test-semantic-release
58 |     runs-on: ubuntu-latest
59 |     steps:
60 |       - name: Checkout
61 |         uses: actions/checkout@v4
62 | 
63 |       - name: Semantic Release
64 |         uses: cycjimmy/semantic-release-action@v4
65 |         id: semantic
66 |         with:
67 |           dry_run: true
68 |           branches: |
69 |             [
70 |               '+([0-9])?(.{+([0-9]),x}).x',
71 |               'main',
72 |               'next',
73 |               'next-major',
74 |               {
75 |                 name: 'beta',
76 |                 prerelease: true
77 |               },
78 |               {
79 |                 name: 'alpha',
80 |                 prerelease: true
81 |               }
82 |             ]
83 |           extra_plugins: |
84 |             @semantic-release/git
85 |             @semantic-release/changelog
86 |         env:
87 |           GITHUB_TOKEN: ${{ secrets.PAT }}
88 | 
89 |       - name: Test Outputs
90 |         if: steps.semantic.outputs.new_release_published == 'true'
91 |         run: |
92 |           echo ${{ steps.semantic.outputs.new_release_version }}
93 |           echo ${{ steps.semantic.outputs.new_release_major_version }}
94 |           echo ${{ steps.semantic.outputs.new_release_minor_version }}
95 |           echo ${{ steps.semantic.outputs.new_release_patch_version }}
96 | 


--------------------------------------------------------------------------------
/.github/workflows/pushMain.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | #---------------------------------------------------------
  3 | # - Create a semantical release
  4 | # - Merge main into next, alpha, beta, and next-major
  5 | #---------------------------------------------------------
  6 | name: Push to main
  7 | 
  8 | on:
  9 |   workflow_dispatch:
 10 |   push:
 11 |     branches:
 12 |       - main
 13 | jobs:
 14 |   merge-main-to-dev-branches:
 15 |     runs-on: ubuntu-latest
 16 |     env:
 17 |       GITHUB_TOKEN: ${{ secrets.PAT }}
 18 | 
 19 |     steps:
 20 |       - name: Checkout code
 21 |         id: checkout
 22 |         uses: actions/checkout@v4
 23 | 
 24 |       - name: Merge main into next
 25 |         uses: ./.github/actions/merge-branch
 26 |         with:
 27 |           github-token: ${{ env.GITHUB_TOKEN }}
 28 |           source-branch: main
 29 |           target-branch: next
 30 | 
 31 |       - name: Merge main into next-major
 32 |         uses: ./.github/actions/merge-branch
 33 |         with:
 34 |           github-token: ${{ env.GITHUB_TOKEN }}
 35 |           source-branch: main
 36 |           target-branch: next-major
 37 | 
 38 |       - name: Merge main into alpha
 39 |         uses: ./.github/actions/merge-branch
 40 |         with:
 41 |           github-token: ${{ env.GITHUB_TOKEN }}
 42 |           source-branch: main
 43 |           target-branch: alpha
 44 | 
 45 |       - name: Merge main into beta
 46 |         uses: ./.github/actions/merge-branch
 47 |         with:
 48 |           github-token: ${{ env.GITHUB_TOKEN }}
 49 |           source-branch: main
 50 |           target-branch: beta
 51 | 
 52 |   semantic-release:
 53 |     needs: merge-main-to-dev-branches
 54 |     runs-on: ubuntu-latest
 55 |     env:
 56 |       GITHUB_TOKEN: ${{ secrets.PAT }}
 57 | 
 58 |     steps:
 59 |       - uses: actions/checkout@v4
 60 |         id: checkout
 61 |         with:
 62 |           persist-credentials: false
 63 | 
 64 |       - name: Semantic Release
 65 |         uses: cycjimmy/semantic-release-action@v4
 66 |         id: semantic
 67 |         with:
 68 |           branches: |
 69 |             [
 70 |               '+([0-9])?(.{+([0-9]),x}).x',
 71 |               'main',
 72 |               'next',
 73 |               'next-major',
 74 |               {
 75 |                 name: 'beta',
 76 |                 prerelease: true
 77 |               },
 78 |               {
 79 |                 name: 'alpha',
 80 |                 prerelease: true
 81 |               }
 82 |             ]
 83 |           extra_plugins: |
 84 |             @semantic-release/git
 85 |             @semantic-release/changelog
 86 |         env:
 87 |           GIT_COMMITTER_NAME: github-actions[bot]
 88 |           GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com
 89 |           GIT_AUTHOR_NAME: github-actions[bot]
 90 |           GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com
 91 | 
 92 |       - name: Publish To GitHub Package Registry
 93 |         id: publish
 94 |         if: steps.semantic.outputs.new_release_published == 'true'
 95 |         run: echo "new release was published"
 96 |         shell: bash
 97 | 
 98 |       - name: Push updates to branch for major version
 99 |         id: push_major
100 |         if: steps.semantic.outputs.new_release_published == 'true'
101 |         run: "git push https://x-access-token:${{ env.GITHUB_TOKEN }}@github.com/${GITHUB_REPOSITORY}.git HEAD:refs/heads/v${{steps.semantic.outputs.new_release_major_version}}"
102 |         shell: bash
103 | 


--------------------------------------------------------------------------------
/.github/workflows/semanticVersionBump.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | #------------------------------------------------------------------------------
  3 | # Lawrence McDaniel - https://lawrencemcdaniel.com
  4 | # Version Bump Workflow for Python package.
  5 | #
  6 | # Calculate the version of the 'next' branch based on semantic-release rules.
  7 | # Compares the existing value of __version__.py to the calculated value.
  8 | # If they are different, it will update __version__.py and push the changes
  9 | # to the main branch.
 10 | #------------------------------------------------------------------------------
 11 | name: Semantic Version Bump (next)
 12 | 
 13 | on:
 14 |   workflow_dispatch:
 15 |   push:
 16 |     branches:
 17 |       - alpha
 18 |       - beta
 19 |       - next
 20 |       - next-major
 21 | 
 22 | jobs:
 23 |   bump-version-next:
 24 |     runs-on: ubuntu-latest
 25 |     env:
 26 |       VERSION_FILE: __version__.py
 27 |       PACKAGE_PATH: ${{ github.workspace }}/models/
 28 | 
 29 |     steps:
 30 |       - uses: actions/checkout@v4
 31 |         with:
 32 |           persist-credentials: false
 33 | 
 34 |       - name: Cache NPM dependencies
 35 |         uses: actions/cache@v4
 36 |         with:
 37 |           path: ~/.npm
 38 |           key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
 39 |           restore-keys: |
 40 |             ${{ runner.os }}-node
 41 | 
 42 |       - name: Set up Python 3.12
 43 |         uses: actions/setup-python@v5
 44 |         with:
 45 |           python-version: "3.12"
 46 | 
 47 |       - name: Setup Node.js environment
 48 |         uses: actions/setup-node@v4
 49 |         with:
 50 |           node-version: "20.9.0"
 51 | 
 52 |       - name: Install npm dev dependencies
 53 |         run: npm install
 54 | 
 55 |       - name: Get current version
 56 |         #  step 1
 57 |         # the current version persisted to __version__.py
 58 |         id: current_version
 59 |         run: |
 60 |           cd ${{ env.PACKAGE_PATH }}
 61 |           echo "CURRENT_VERSION=$(python -c 'from __version__ import __version__; print(__version__)')" >> $GITHUB_ENV
 62 |         env:
 63 |           GITHUB_TOKEN: ${{ secrets.PAT }}
 64 | 
 65 |       - name: null step
 66 |         id: null_step1
 67 |         run: echo "i ensure that CURRENT_VERSION is set."
 68 | 
 69 |       - name: Get next version
 70 |         # step 2
 71 |         # calculate the next version based on semantic-release rules
 72 |         # this will return a null string is there in fact is no version bump.
 73 |         # so set NEXT_VERSION to CURRENT_VERSION if there is no version bump.
 74 |         id: next_version
 75 |         run: |
 76 |           NEXT_VERSION=$(npx semantic-release --dry-run --no-ci  | awk '/The next release version is/{print $NF}')
 77 |           echo "NEXT_VERSION=${NEXT_VERSION:-${{ env.CURRENT_VERSION }}}" >> $GITHUB_ENV
 78 |         env:
 79 |           GITHUB_TOKEN: ${{ secrets.PAT }}
 80 |           CURRENT_VERSION: ${{ env.CURRENT_VERSION }}
 81 | 
 82 |       - name: null step
 83 |         id: null_step2
 84 |         run: echo "i ensure that NEXT_VERSION is set."
 85 | 
 86 |       - name: Check versions
 87 |         # step 3
 88 |         # compare the current version to the next version.
 89 |         # if they are different, set VERSION_CHANGED to true
 90 |         id: check_versions
 91 |         run: |
 92 |           if [ "$CURRENT_VERSION" != "$NEXT_VERSION" ]; then
 93 |             echo "VERSION_CHANGED=true" >> $GITHUB_ENV
 94 |           else
 95 |             echo "VERSION_CHANGED=false" >> $GITHUB_ENV
 96 |           fi
 97 |         env:
 98 |           CURRENT_VERSION: ${{ env.CURRENT_VERSION }}
 99 |           NEXT_VERSION: ${{ env.NEXT_VERSION }}
100 | 
101 |       - name: another null step
102 |         id: null_step3
103 |         run: echo "i ensure that CURRENT_VERSION, NEXT_VERSION and VERSION_CHANGED are set."
104 | 
105 |       - name: Update __version__.py
106 |         # step 4
107 |         # if VERSION_CHANGED is true, update __version__.py and push the changes to the
108 |         # branch that triggered this workflow.
109 |         if: env.VERSION_CHANGED == 'true'
110 |         id: update_version
111 |         run: |
112 |           echo "# -*- coding: utf-8 -*-" > ${{ env.VERSION_FILE }}
113 |           echo "# DO NOT EDIT." > ${{ env.VERSION_FILE }}
114 |           echo "# Managed via automated CI/CD in .github/workflows/semanticVersionBump.yml." > ${{ env.VERSION_FILE }}
115 |           echo "__version__ = \"${{ env.NEXT_VERSION }}\"" >>  ${{ env.VERSION_FILE }}
116 |           git config --local user.email "action@github.com"
117 |           git config --local user.name "GitHub Action"
118 |           git add ${{ env.VERSION_FILE }}
119 |           git commit -m "chore: [gh] Update __version__.py to ${{ env.NEXT_VERSION }} [skip ci]"
120 |           git push https://${{ secrets.PAT }}@github.com/${{ github.repository }}.git HEAD:${{ github.ref }}
121 |         env:
122 |           VERSION_FILE: ${{ env.PACKAGE_PATH }}${{ env.VERSION_FILE }}
123 |           GITHUB_TOKEN: ${{ secrets.PAT }}
124 |           NEXT_VERSION: ${{ env.NEXT_VERSION }}
125 |           VERSION_CHANGED: ${{ env.VERSION_CHANGED }}
126 | 


--------------------------------------------------------------------------------
/.github/workflows/testsPython.yml:
--------------------------------------------------------------------------------
 1 | name: Python Unit Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |     paths:
 7 |       - "**.py"
 8 |   push:
 9 |     paths:
10 |       - "**.py"
11 |     branches:
12 |       - main
13 | 
14 | env:
15 |   python-version: "3.12"
16 | 
17 | jobs:
18 |   python-unit-tests:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - name: Checkout code
22 |         id: checkout
23 |         uses: actions/checkout@v4
24 | 
25 |       - name: Configure AWS credentials
26 |         uses: aws-actions/configure-aws-credentials@v4
27 |         with:
28 |           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
29 |           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
30 |           aws-region: ${{ secrets.AWS_REGION }}
31 | 
32 |       - name: Run Python tests
33 |         uses: ./.github/actions/tests/python
34 |         with:
35 |           python-version: "${{ env.python-version}}"
36 |           openai-api-organization: "${{ secrets.OPENAI_API_ORGANIZATION }}"
37 |           openai-api-key: "${{ secrets.OPENAI_API_KEY }}"
38 |           pinecone-api-key: "${{ secrets.PINECONE_API_KEY }}"
39 |           pinecone-environment: "${{ secrets.PINECONE_ENVIRONMENT }}"
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | .env
 3 | data
 4 | .DS_Store
 5 | *.zip
 6 | 
 7 | # Python
 8 | build
 9 | __pycache__
10 | .pytest_cache
11 | *.egg-info
12 | venv
13 | .venv
14 | .pytest_cache
15 | *.pyc
16 | *.pyo
17 | *.pyd
18 | *.swp
19 | *.log
20 | 
21 | # npm
22 | node_modules
23 | package-lock.json
24 | 


--------------------------------------------------------------------------------
/.mergify.yml:
--------------------------------------------------------------------------------
 1 | # see:
 2 | #  - https://docs.mergify.com/getting-started/
 3 | pull_request_rules:
 4 |   - name: automatic approve dependabot pull requests
 5 |     conditions:
 6 |       - "author~=dependabot[bot]|dependabot-preview[bot]|dependabot"
 7 |     actions:
 8 |       review:
 9 |         type: APPROVE
10 | 
11 |   - name: automatic merge dependabot pull requests
12 |     conditions:
13 |       - "author~=dependabot[bot]|dependabot-preview[bot]|dependabot"
14 |       - "#approved-reviews-by>=1"
15 |       - "base=main" # replace 'main' with the name of the branch you want to auto-merge into
16 |     actions:
17 |       merge:
18 |         method: merge
19 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |   # default language version for each language
 3 |   python: python3.12
 4 | repos:
 5 |   - repo: https://github.com/codespell-project/codespell
 6 |     rev: v2.3.0
 7 |     hooks:
 8 |       - id: codespell
 9 |         args: ["--ignore-words=codespell.txt"]
10 |         exclude: 'codespell.txt|\.svg$'
11 |   - repo: https://github.com/pre-commit/mirrors-prettier
12 |     rev: v4.0.0-alpha.8
13 |     hooks:
14 |       - id: prettier
15 |   - repo: https://github.com/psf/black
16 |     rev: 24.10.0
17 |     hooks:
18 |       - id: black
19 |   - repo: https://github.com/PyCQA/flake8
20 |     rev: 7.1.1
21 |     hooks:
22 |       - id: flake8
23 |   - repo: https://github.com/PyCQA/isort
24 |     rev: 5.13.2
25 |     hooks:
26 |       - id: isort
27 |         args: ["--settings-path=pyproject.toml"]
28 |   - repo: local
29 |     hooks:
30 |       - id: pylint
31 |         name: pylint
32 |         entry: ./run_pylint.sh
33 |         language: script
34 |         types: [python]
35 |   - repo: https://github.com/PyCQA/bandit
36 |     rev: 1.8.0
37 |     hooks:
38 |       - id: bandit
39 |         args: ["-ll"]
40 |   - repo: https://github.com/pre-commit/pre-commit-hooks
41 |     rev: v5.0.0
42 |     hooks:
43 |       # See https://pre-commit.com/hooks.html for more hooks
44 |       #- id: check-added-large-files
45 |       - id: fix-byte-order-marker
46 |       - id: fix-encoding-pragma
47 |       - id: check-case-conflict
48 |       - id: check-json
49 |       - id: check-merge-conflict
50 |       - id: check-symlinks
51 |       - id: check-toml
52 |       - id: check-xml
53 |       - id: check-yaml
54 |       - id: destroyed-symlinks
55 |       - id: detect-aws-credentials
56 |       - id: detect-private-key
57 |       - id: end-of-file-fixer
58 |       - id: forbid-new-submodules
59 |       - id: trailing-whitespace
60 |       - id: check-case-conflict
61 |       - id: check-merge-conflict
62 |       - id: debug-statements
63 |   - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
64 |     rev: v9.20.0
65 |     hooks:
66 |       - id: commitlint
67 |         stages: [commit-msg]
68 |         additional_dependencies: ["@commitlint/config-angular"]
69 | ci:
70 |   # for more information, see https://pre-commit.ci
71 |   autofix_commit_msg: |
72 |     [pre-commit.ci] auto fixes from pre-commit.com hooks
73 |   autofix_prs: true
74 |   autoupdate_branch: ""
75 |   autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
76 |   autoupdate_schedule: weekly
77 |   skip: [shellcheck, markdown-link-check, commitlint]
78 |   submodules: false
79 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/.prettierignore


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "tabWidth": 2
3 | }
4 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MASTER]
 2 | init-hook='import sys; print(sys.executable); print(sys.path)'
 3 | ignore-paths=venv
 4 | ignore=__version__.py
 5 | 
 6 | [FORMAT]
 7 | max-line-length=120
 8 | 
 9 | [MESSAGES CONTROL]
10 | disable=C0103
11 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 |   "recommendations": ["ms-python.black-formatter"]
3 | }
4 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "cornflakes.linter.executablePath": "./venv/bin/flake8",
3 |   "[python]": {
4 |     "editor.defaultFormatter": "ms-python.black-formatter"
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | ## [1.3.8](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.7...v1.3.8) (2025-05-14)
  2 | 
  3 | 
  4 | ### Bug Fixes
  5 | 
  6 | * force a new release ([48d8a70](https://github.com/FullStackWithLawrence/openai-embeddings/commit/48d8a70b6f2c53733d05366040de9d2812428084))
  7 | 
  8 | ## [1.3.7](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.6...v1.3.7) (2025-02-07)
  9 | 
 10 | 
 11 | ### Bug Fixes
 12 | 
 13 | * broken yaml ([db3ccfa](https://github.com/FullStackWithLawrence/openai-embeddings/commit/db3ccfa8a6310f04c24a72f49140d6eada7c8f18))
 14 | * remove superfluous checks ([716ede1](https://github.com/FullStackWithLawrence/openai-embeddings/commit/716ede136628193040f4d9863aa2a36b34e3e345))
 15 | 
 16 | ## [1.3.6](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.5...v1.3.6) (2025-02-07)
 17 | 
 18 | 
 19 | ### Bug Fixes
 20 | 
 21 | * breaking changes in unit tests ([90926a9](https://github.com/FullStackWithLawrence/openai-embeddings/commit/90926a95a30a30f12e98841ecce6ac910625be90))
 22 | 
 23 | # Change Log
 24 | 
 25 | All notable changes to this project will be documented in this file.
 26 | The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/).
 27 | 
 28 | ## [1.3.5](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.4...v1.3.5) (2025-02-05)
 29 | 
 30 | ### Bug Fixes
 31 | 
 32 | - LangChain breaking changes and deprecations ([ac7b57e](https://github.com/FullStackWithLawrence/openai-embeddings/commit/ac7b57e75705afdea1d563c6a9e929504d782e87))
 33 | 
 34 | ## [1.3.4](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.3...v1.3.4) (2025-02-05)
 35 | 
 36 | ### Bug Fixes
 37 | 
 38 | - deprecation warnings and breaking changes ([604353e](https://github.com/FullStackWithLawrence/openai-embeddings/commit/604353e60d1197a60c517b14c02dd02909754307))
 39 | 
 40 | ## [1.3.2](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.1...v1.3.2) (2024-04-12)
 41 | 
 42 | ### Bug Fixes
 43 | 
 44 | - fix deprecations and breaking changes in LangChain and Pinecone
 45 | 
 46 | ## [1.3.0](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.2.2...v1.3.0) (2023-12-19)
 47 | 
 48 | ### Features
 49 | 
 50 | - add pydantic and refactor settings and credentials management ([332e4da](https://github.com/FullStackWithLawrence/openai-embeddings/commit/332e4dab89924b6ac2436e6d260e645bed26a0b4))
 51 | 
 52 | ## [1.2.2](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.2.1...v1.2.2) (2023-12-19)
 53 | 
 54 | ## [1.2.2](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.2.1...v1.2.2) (2023-12-19)
 55 | 
 56 | ### Bug Fixes
 57 | 
 58 | - force a new release ([6c04b0b](https://github.com/FullStackWithLawrence/openai-embeddings/commit/6c04b0b95486fa25b40c6f4d1954bd22b58df7c9))
 59 | 
 60 | ## [1.2.1](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.2.0...v1.2.1) (2023-12-04)
 61 | 
 62 | ### Bug Fixes
 63 | 
 64 | - force a new release ([e21f9c5](https://github.com/FullStackWithLawrence/openai-embeddings/commit/e21f9c56b6dc3be3320afb88a491b43fc04d365b))
 65 | 
 66 | ## [1.2.0](https://github.com/lpm0073/hybrid-search-retriever/compare/v1.1.3...v1.2.0) (2023-12-03)
 67 | 
 68 | ### Features
 69 | 
 70 | - refactor pinecone logic and add pinecone unit tests ([2b8585b](https://github.com/lpm0073/hybrid-search-retriever/commit/2b8585b36e400d04f22e2a5565ea96f4482fd5f4))
 71 | 
 72 | ## [1.1.3](https://github.com/lpm0073/hybrid-search-retriever/compare/v1.1.2...v1.1.3) (2023-12-02)
 73 | 
 74 | ### Bug Fixes
 75 | 
 76 | - add langchain-experimental for yt example ([f9d6d6d](https://github.com/lpm0073/hybrid-search-retriever/commit/f9d6d6d0b11ff9c1f06faf7eb69511bc5702066d))
 77 | - correct type error with DEBUG_MODE ([a96bdfd](https://github.com/lpm0073/hybrid-search-retriever/commit/a96bdfdb5a0b015740110e02f9f9b06917cd31c7))
 78 | - move retriever results to system_message ([203c8b3](https://github.com/lpm0073/hybrid-search-retriever/commit/203c8b300cda156ac44a0c6e02510c2ab6a2b074))
 79 | 
 80 | ## [1.1.2](https://github.com/lpm0073/hybrid-search-retriever/compare/v1.1.1...v1.1.2) (2023-12-01)
 81 | 
 82 | ### Bug Fixes
 83 | 
 84 | - syntax error in examples.prompt ([230b709](https://github.com/lpm0073/hybrid-search-retriever/commit/230b7090c96bdd4d7d8757b182f891ab1b82c6f4))
 85 | 
 86 | ## [1.1.1](https://github.com/lpm0073/netec-llm/compare/v1.1.0...v1.1.1) (2023-12-01)
 87 | 
 88 | ### Bug Fixes
 89 | 
 90 | - had to switch to bm25_encoder so that vector store is searchable ([bad6994](https://github.com/lpm0073/netec-llm/commit/bad699481d217dde81877d85124395529652dabe))
 91 | 
 92 | # [1.1.0](https://github.com/lpm0073/netec-llm/compare/v1.0.0...v1.1.0) (2023-12-01)
 93 | 
 94 | ### Bug Fixes
 95 | 
 96 | - fix load problem with existing index ([62cd18f](https://github.com/lpm0073/netec-llm/commit/62cd18f8088873a794ec363c4e18770dfdc41ea5))
 97 | 
 98 | ### Features
 99 | 
100 | - perfect load(). revert rag() to openai only calls ([8de793d](https://github.com/lpm0073/netec-llm/commit/8de793dcca77ec23f09e68ca9e8dba7f64623b3c))
101 | - ssm.rag() w load, split, embed, store ([2335d22](https://github.com/lpm0073/netec-llm/commit/2335d22c5fd9092642ff1eb67a34fbcd712d7f9b))
102 | 
103 | # 1.0.0 (2023-11-30)
104 | 
105 | ### Features
106 | 
107 | - first commit ([9fe5fbb](https://github.com/lpm0073/netec-llm/commit/9fe5fbbd03d278a90a7351a4d907a74783e48684))
108 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published by
637 |     the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <http://www.gnu.org/licenses/>.
662 | 
663 | EdX Inc. wishes to state, in clarification of the above license terms, that
664 | any public, independently available web service offered over the network and
665 | communicating with edX's copyrighted works by any form of inter-service
666 | communication, including but not limited to Remote Procedure Call (RPC)
667 | interfaces, is not a work based on our copyrighted work within the meaning
668 | of the license. "Corresponding Source" of this work, or works based on this
669 | work, as defined by the terms of this license do not include source code
670 | files for programs used solely to provide those public, independently
671 | available web services.
672 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL := /bin/bash
 2 | ifeq ($(OS),Windows_NT)
 3 |     PYTHON = python.exe
 4 |     ACTIVATE_VENV = venv\Scripts\activate
 5 | else
 6 |     PYTHON = python3.12
 7 |     ACTIVATE_VENV = source venv/bin/activate
 8 | endif
 9 | PIP = $(PYTHON) -m pip
10 | 
11 | ifneq ("$(wildcard .env)","")
12 |     include .env
13 | else
14 | $(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\n\
15 | OPENAI_API_KEY=PLEASE-ADD-ME\n\
16 | PINECONE_API_KEY=PLEASE-ADD-ME\n\
17 | PINECONE_ENVIRONMENT=gcp-starter\n\
18 | PINECONE_INDEX_NAME=rag\n\
19 | PINECONE_VECTORSTORE_TEXT_KEY=lc_id\n\
20 | PINECONE_METRIC=dotproduct\n\
21 | PINECONE_DIMENSIONS=1536\n\
22 | OPENAI_CHAT_MODEL_NAME=gpt-4\n\
23 | OPENAI_PROMPT_MODEL_NAME=gpt-4\n\
24 | OPENAI_CHAT_TEMPERATURE=0.0\n\
25 | OPENAI_CHAT_MAX_RETRIES=3\n\
26 | DEBUG_MODE=True\n" >> .env)
27 | endif
28 | 
29 | .PHONY: analyze init activate test lint clean
30 | 
31 | # Default target executed when no arguments are given to make.
32 | all: help
33 | 
34 | analyze:
35 | 	cloc . --exclude-ext=svg,json,zip --vcs=git
36 | 
37 | init:
38 | 	make clean && \
39 | 	$(PYTHON) -m venv venv && \
40 | 	$(ACTIVATE_VENV) && \
41 | 	$(PIP) install --upgrade pip && \
42 | 	$(PIP) install -r requirements/local.txt && \
43 | 	npm install && \
44 | 	pre-commit install
45 | 
46 | activate:
47 | 	. venv/bin/activate
48 | 
49 | test:
50 | 	cd models && pytest -v -s tests/
51 | 	python -m setup_test
52 | 
53 | lint:
54 | 	pre-commit run --all-files && \
55 | 	pylint models && \
56 | 	flake8 . && \
57 | 	isort . && \
58 | 	black .
59 | 
60 | clean:
61 | 	rm -rf venv && rm -rf node_modules && \
62 | 	find ./models/ -name __pycache__ -type d -exec rm -rf {} +
63 | 
64 | release:
65 | 	git commit -m "fix: force a new release" --allow-empty && git push
66 | 
67 | ######################
68 | # HELP
69 | ######################
70 | 
71 | help:
72 | 	@echo '===================================================================='
73 | 	@echo 'analyze             - generate code analysis report'
74 | 	@echo 'init            - create a Python virtual environment and install dependencies'
75 | 	@echo 'activate        - activate the Python virtual environment'
76 | 	@echo 'test            - run Python unit tests'
77 | 	@echo 'lint            - run Python linting'
78 | 	@echo 'clean           - destroy the Python virtual environment'
79 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # OpenAI Embeddings Example
  2 | 
  3 | 🤖 Retrieval Augmented Generation and Hybrid Search 🤖
  4 | 
  5 | [![FullStackWithLawrence](https://a11ybadges.com/badge?text=FullStackWithLawrence&badgeColor=orange&logo=youtube&logoColor=282828)](https://www.youtube.com/@FullStackWithLawrence)<br>
  6 | [![OpenAI](https://a11ybadges.com/badge?logo=openai)](https://platform.openai.com/)
  7 | [![LangChain](https://a11ybadges.com/badge?text=LangChain&badgeColor=0834ac)](https://www.langchain.com/)
  8 | [![Pinecone](https://a11ybadges.com/badge?text=Pinecone&badgeColor=000000)](https://www.pinecone.io/)
  9 | [![Python](https://a11ybadges.com/badge?logo=python)](https://www.python.org/)
 10 | [![Pydantic](https://a11ybadges.com/badge?text=Pydantic&badgeColor=E520E9)](https://pydantic.dev/)<br>
 11 | [![Release Notes](https://img.shields.io/github/release/FullStackWithLawrence/openai-embeddings)](https://github.com/FullStackWithLawrence/openai-embeddings/releases)
 12 | ![GHA pushMain Status](https://img.shields.io/github/actions/workflow/status/FullStackWithLawrence/openai-embeddings/pushMain.yml?branch=main)
 13 | [![AGPL License](https://img.shields.io/github/license/overhangio/tutor.svg?style=flat-square)](https://www.gnu.org/licenses/agpl-3.0.en.html)
 14 | [![hack.d Lawrence McDaniel](https://img.shields.io/badge/hack.d-Lawrence%20McDaniel-orange.svg)](https://lawrencemcdaniel.com)
 15 | 
 16 | A Hybrid Search and Augmented Generation prompting solution using Python [OpenAI API Embeddings](https://platform.openai.com/docs/guides/embeddings) persisted to a [Pinecone](https://docs.pinecone.io/docs/python-client) vector database index and managed by [LangChain](https://www.langchain.com/). Implements the following:
 17 | 
 18 | - **PDF Loader**. a command-line pdf loader program that extracts text, vectorizes, and
 19 |   loads into a Pinecone dot product vector database that is dimensioned to match OpenAI embeddings.
 20 | - **Retrieval Augmented Generation**. A chatGPT prompt based on a hybrid search retriever that locates relevant documents from the vector database and includes these in OpenAI prompts.
 21 | 
 22 | Secondarily, I also use this repo for demonstrating how to setup [Pydantic](https://docs.pydantic.dev/latest/) to manage your project settings and how to safely work with sensitive credentials data inside your project.
 23 | 
 24 | ## Installation
 25 | 
 26 | ```console
 27 | git clone https://github.com/FullStackWithLawrence/openai-embeddings.git
 28 | cd openai-embeddings
 29 | make init
 30 | 
 31 | # Linux/macOS
 32 | source venv/bin/activate
 33 | 
 34 | # Windows Powershell (admin)
 35 | venv\Scripts\activate
 36 | ```
 37 | 
 38 | You'll also need to add your api keys to the .env file in the root of the repo.
 39 | 
 40 | - Get your [OpenAI API key](https://platform.openai.com/api-keys)
 41 | - Get your [Pinecone API Key](https://app.pinecone.io/)
 42 | 
 43 | ```console
 44 | OPENAI_API_ORGANIZATION=PLEASE-ADD-ME
 45 | OPENAI_API_KEY=PLEASE-ADD-ME
 46 | PINECONE_API_KEY=PLEASE-ADD-ME
 47 | ```
 48 | 
 49 | ## Usage
 50 | 
 51 | ```console
 52 | # example 1 - generic assistant
 53 | python3 -m models.examples.prompt "your are a helpful assistant" "What analytics and accounting courses does Wharton offer?"
 54 | 
 55 | # example 2 - assistant with improved system prompting
 56 | python3 -m models.examples.prompt "You are a student advisor at University of Pennsylvania. You provide concise answers of 100 words or less." "What analytics and accounting courses does Wharton offer?"
 57 | 
 58 | # example 3 - templated assistant: Online courses
 59 | python3 -m models.examples.online_courses "analytics and accounting"
 60 | 
 61 | # example 4 - templated assistant: Certification programs
 62 | python3 -m models.examples.certification_programs "analytics and accounting"
 63 | 
 64 | # example 5 - Retrieval Augmented Generation
 65 | python3 -m models.examples.load "/path/to/your/pdf/documents"
 66 | python3 -m models.examples.rag "What analytics and accounting courses does Wharton offer?"
 67 | ```
 68 | 
 69 | ### Retrieval Augmented Generation
 70 | 
 71 | For the question, _"What analytics and accounting courses does Wharton offer?"_, an
 72 | embedding can potentially dramatically alter the response generated by chatGPT. To illustrate, I uploaded a batch of 21 sets of lecture notes in PDF format for an online analytics course taught by Wharton professor [Brian Bushee](https://accounting.wharton.upenn.edu/profile/bushee/). You can download these from https://cdn.lawrencemcdaniel.com/fswl/openai-embeddings-data.zip to test whether your results are consistent.
 73 | 
 74 | #### The control set
 75 | 
 76 | Example 1 above, a generic chatGPT prompt with no additional guidance provided by a system prompt nor an embedding, generates the following response:
 77 | 
 78 | ```console
 79 | Wharton offers a variety of analytics and accounting courses. Some of the analytics courses include:
 80 | 
 81 | 1. Introduction to Business Analytics: This course provides an overview of the fundamentals of business analytics, including data analysis, statistical modeling, and decision-making.
 82 | 
 83 | 2. Data Visualization and Communication: This course focuses on the effective presentation and communication of data through visualizations and storytelling techniques.
 84 | 
 85 | 3. Predictive Analytics: This course explores the use of statistical models and machine learning algorithms to predict future outcomes and make data-driven decisions.
 86 | 
 87 | 4. Big Data Analytics: This course covers the analysis of large and complex datasets using advanced techniques and tools, such as Hadoop and Spark.
 88 | 
 89 | In terms of accounting courses, Wharton offers:
 90 | 
 91 | 1. Financial Accounting: This course provides an introduction to the principles and concepts of financial accounting, including the preparation and analysis of financial statements.
 92 | 
 93 | 2. Managerial Accounting: This course focuses on the use of accounting information for internal decision-making and planning, including cost analysis and budgeting.
 94 | 
 95 | 3. Advanced Financial Accounting: This course delves into more complex accounting topics, such as consolidations, partnerships, and international accounting standards.
 96 | 
 97 | 4. Auditing and Assurance Services: This course covers the principles and practices of auditing, including risk assessment, internal controls, and audit procedures.
 98 | 
 99 | These are just a few examples of the analytics and accounting courses offered at Wharton. The school offers a wide range of courses to cater to different interests and skill levels in these fields.
100 | (venv) (base) mcdaniel@MacBookAir-Lawrence openai-embeddings % python3 -m models.examples.online_courses "analytics and accounting"
101 | ```
102 | 
103 | #### Same prompt but with an embedding
104 | 
105 | After creating an embedding from the sample set of pdf documents, you can prompt models.examples.rag with the same question, and it should provide a quite different response compared to the control from example 1. It should resemble the following:
106 | 
107 | ```console
108 | Wharton offers a variety of analytics and accounting courses. Some of the courses offered include:
109 | 
110 | 1. Accounting-Based Valuation: This course, taught by Professor Brian Bushee, focuses on using accounting information to value companies and make investment decisions.
111 | 
112 | 2. Review of Financial Statements: Also taught by Professor Brian Bushee, this course provides an in-depth understanding of financial statements and how to analyze them for decision-making purposes.
113 | 
114 | 3. Discretionary Accruals Model: Another course taught by Professor Brian Bushee, this course explores the concept of discretionary accruals and their impact on financial statements and financial analysis.
115 | 
116 | 4. Discretionary Accruals Cases: This course, also taught by Professor Brian Bushee, provides practical applications of the discretionary accruals model through case studies and real-world examples.
117 | 
118 | These are just a few examples of the analytics and accounting courses offered at Wharton. The school offers a wide range of courses in these areas to provide students with a comprehensive understanding of financial analysis and decision-making.
119 | ```
120 | 
121 | ## Requirements
122 | 
123 | - [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git). _pre-installed on Linux and macOS_
124 | - [make](https://gnuwin32.sourceforge.net/packages/make.htm). _pre-installed on Linux and macOS._
125 | - [OpenAI platform API key](https://platform.openai.com/).
126 |   _If you're new to OpenAI API then see [How to Get an OpenAI API Key](./doc/OPENAI_API_GETTING_STARTED_GUIDE.md)_
127 | - [Pinecone](https://www.pinecone.io/) API key. A vector database for storing embedding results.
128 | - [Python 3.12](https://www.python.org/downloads/): for creating virtual environment. Also used by pre-commit linters and code formatters.
129 | - [NodeJS](https://nodejs.org/en/download): used with NPM for configuring/testing Semantic Release.
130 | 
131 | ## Configuration defaults
132 | 
133 | Set these as environment variables on the command line, or in a .env file that should be located in the root of the repo.
134 | 
135 | ```console
136 | # OpenAI API
137 | OPENAI_API_ORGANIZATION=ADD-ME-PLEASE
138 | OPENAI_API_KEY=ADD-ME-PLEASE
139 | OPENAI_CHAT_MODEL_NAME=gpt-4
140 | OPENAI_PROMPT_MODEL_NAME=gpt-4
141 | OPENAI_CHAT_TEMPERATURE=0.0
142 | OPENAI_CHAT_MAX_RETRIES=3
143 | 
144 | # Pinecone API
145 | PINECONE_API_KEY=ADD-ME-PLEASE
146 | PINECONE_ENVIRONMENT=gcp-starter
147 | PINECONE_INDEX_NAME=openai-embeddings
148 | PINECONE_VECTORSTORE_TEXT_KEY=lc_id
149 | PINECONE_METRIC=dotproduct
150 | PINECONE_DIMENSIONS=1536
151 | 
152 | # This package
153 | DEBUG_MODE=False
154 | ```
155 | 
156 | ## Contributing
157 | 
158 | This project uses a mostly automated pull request and unit testing process. See the resources in .github for additional details. You additionally should ensure that pre-commit is installed and working correctly on your dev machine by running the following command from the root of the repo.
159 | 
160 | ```console
161 | pre-commit run --all-files
162 | ```
163 | 
164 | Pull requests should pass these tests before being submitted:
165 | 
166 | ```console
167 | make test
168 | ```
169 | 
170 | ### Developer setup
171 | 
172 | ```console
173 | git clone https://github.com/lpm0073/automatic-models.git
174 | cd automatic-models
175 | make init
176 | make activate
177 | ```
178 | 
179 | ### Github Actions
180 | 
181 | Actions requires the following secrets:
182 | 
183 | ```console
184 | PAT: {{ secrets.PAT }}  # a GitHub Personal Access Token
185 | OPENAI_API_ORGANIZATION: {{ secrets.OPENAI_API_ORGANIZATION }}
186 | OPENAI_API_KEY: {{ secrets.OPENAI_API_KEY }}
187 | PINECONE_API_KEY: {{ secrets.PINECONE_API_KEY }}
188 | PINECONE_ENVIRONMENT: {{ secrets.PINECONE_ENVIRONMENT }}
189 | PINECONE_INDEX_NAME: {{ secrets.PINECONE_INDEX_NAME }}
190 | ```
191 | 
192 | ## Additional reading
193 | 
194 | - [Youtube - Vector Embeddings Tutorial – Code Your Own AI Assistant with GPT-4 API + LangChain + NLP](https://www.youtube.com/watch?v=yfHHvmaMkcA)
195 | - [Youtube - LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://www.youtube.com/watch?v=aywZrzNaKjs)
196 | - [OpenAI Embeddings](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings)
197 | - [What is a Vector Database?](https://www.pinecone.io/learn/vector-database/)
198 | - [LangChain RAG](https://python.langchain.com/docs/use_cases/question_answering/)
199 | - [LangChain Document Loaders](https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf)
200 | - [LanchChain Caching](https://python.langchain.com/docs/modules/model_io/llms/llm_caching)
201 | 


--------------------------------------------------------------------------------
/codespell.txt:
--------------------------------------------------------------------------------
1 | OCE
2 | 


--------------------------------------------------------------------------------
/commitlint.config.js:
--------------------------------------------------------------------------------
 1 | const Configuration = {
 2 |   /*
 3 |    * Resolve and load @commitlint/config-conventional from node_modules.
 4 |    * Referenced packages must be installed
 5 |    */
 6 |   extends: ["@commitlint/config-conventional", "@commitlint/config-angular"],
 7 |   /*
 8 |    * Resolve and load conventional-changelog-atom from node_modules.
 9 |    * Referenced packages must be installed
10 |    */
11 |   parserPreset: "conventional-changelog-atom",
12 |   /*
13 |    * Resolve and load @commitlint/format from node_modules.
14 |    * Referenced package must be installed
15 |    */
16 |   formatter: "@commitlint/format",
17 |   /*
18 |    * Any rules defined here will override rules from @commitlint/config-conventional
19 |    */
20 |   rules: {},
21 |   /*
22 |    * Array of functions that return true if commitlint should ignore the given message.
23 |    * Given array is merged with predefined functions, which consist of matchers like:
24 |    *
25 |    * - 'Merge pull request', 'Merge X into Y' or 'Merge branch X'
26 |    * - 'Revert X'
27 |    * - 'v1.2.3' (ie semver matcher)
28 |    * - 'Automatic merge X' or 'Auto-merged X into Y'
29 |    *
30 |    * To see full list, check https://github.com/conventional-changelog/commitlint/blob/master/%40commitlint/is-ignored/src/defaults.ts.
31 |    * To disable those ignores and run rules always, set `defaultIgnores: false` as shown below.
32 |    */
33 |   /*
34 |     ignores: [(commit) => commit === ''],
35 |      * Whether commitlint uses the default ignore rules, see the description above.
36 |      */
37 |   defaultIgnores: true,
38 |   /*
39 |    * Custom URL to show upon failure
40 |    */
41 |   helpUrl:
42 |     "https://github.com/conventional-changelog/commitlint/#what-is-commitlint",
43 |   /*
44 |    * Custom prompt configs
45 |    */
46 |   prompt: {
47 |     messages: {},
48 |     questions: {
49 |       type: {
50 |         description: "please input type:",
51 |       },
52 |     },
53 |   },
54 | };
55 | 
56 | module.exports = Configuration;
57 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/models/__init__.py


--------------------------------------------------------------------------------
/models/__version__.py:
--------------------------------------------------------------------------------
1 | # Managed via automated CI/CD in .github/workflows/semanticVersionBump.yml.
2 | __version__ = "1.3.7"
3 | 


--------------------------------------------------------------------------------
/models/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # pylint: disable=no-member
  3 | # pylint: disable=E0213,C0103
  4 | """
  5 | Configuration for Lambda functions.
  6 | 
  7 | This module is used to configure the Lambda functions. It uses the pydantic_settings
  8 | library to validate the configuration values. The configuration values are read from
  9 | any of the following sources:
 10 |     - constructor arguments
 11 |     - environment variables
 12 |     - terraform.tfvars
 13 |     - default values
 14 | """
 15 | 
 16 | import importlib.util
 17 | import os  # library for interacting with the operating system
 18 | import platform  # library to view information about the server host this Lambda runs on
 19 | import re
 20 | from typing import Any, Dict, List, Optional
 21 | 
 22 | from dotenv import load_dotenv
 23 | from pydantic import Field, SecretStr, ValidationError, field_validator
 24 | from pydantic_settings import BaseSettings
 25 | 
 26 | from models.const import HERE
 27 | from models.exceptions import ModelConfigurationError, ModelValueError
 28 | 
 29 | 
 30 | DOT_ENV_LOADED = load_dotenv()
 31 | 
 32 | 
 33 | def load_version() -> Dict[str, str]:
 34 |     """Stringify the __version__ module."""
 35 |     version_file_path = os.path.join(HERE, "__version__.py")
 36 |     spec = importlib.util.spec_from_file_location("__version__", version_file_path)
 37 |     version_module = importlib.util.module_from_spec(spec)
 38 |     spec.loader.exec_module(version_module)
 39 |     return version_module.__dict__
 40 | 
 41 | 
 42 | VERSION = load_version()
 43 | 
 44 | 
 45 | def get_semantic_version() -> str:
 46 |     """
 47 |     Return the semantic version number.
 48 | 
 49 |     Example valid values of __version__.py are:
 50 |     0.1.17
 51 |     0.1.17-next.1
 52 |     0.1.17-next.2
 53 |     0.1.17-next.123456
 54 |     0.1.17-next-major.1
 55 |     0.1.17-next-major.2
 56 |     0.1.17-next-major.123456
 57 | 
 58 |     Note:
 59 |     - pypi does not allow semantic version numbers to contain a dash.
 60 |     - pypi does not allow semantic version numbers to contain a 'v' prefix.
 61 |     - pypi does not allow semantic version numbers to contain a 'next' suffix.
 62 |     """
 63 |     version = VERSION["__version__"]
 64 |     version = re.sub(r"-next\.\d+", "", version)
 65 |     return re.sub(r"-next-major\.\d+", "", version)
 66 | 
 67 | 
 68 | # pylint: disable=too-few-public-methods
 69 | class SettingsDefaults:
 70 |     """Default values for Settings"""
 71 | 
 72 |     DEBUG_MODE = False
 73 |     DUMP_DEFAULTS = False
 74 | 
 75 |     LANGCHAIN_MEMORY_KEY = "chat_history"
 76 | 
 77 |     PINECONE_API_KEY: SecretStr = SecretStr(None)
 78 |     PINECONE_ENVIRONMENT = "gcp-starter"
 79 |     PINECONE_INDEX_NAME = "openai-embeddings"
 80 |     PINECONE_VECTORSTORE_TEXT_KEY = "lc_id"
 81 |     PINECONE_METRIC = "dotproduct"
 82 |     PINECONE_DIMENSIONS = 1536
 83 | 
 84 |     OPENAI_API_ORGANIZATION: str = None
 85 |     OPENAI_API_KEY: SecretStr = SecretStr(None)
 86 |     OPENAI_ENDPOINT_IMAGE_N = 4
 87 |     OPENAI_ENDPOINT_IMAGE_SIZE = "1024x768"
 88 |     OPENAI_CHAT_CACHE = True
 89 |     OPENAI_CHAT_MODEL_NAME = "gpt-4"
 90 |     OPENAI_PROMPT_MODEL_NAME = "gpt-4"
 91 |     OPENAI_CHAT_TEMPERATURE = 0.0
 92 |     OPENAI_CHAT_MAX_RETRIES = 3
 93 | 
 94 |     @classmethod
 95 |     def to_dict(cls):
 96 |         """Convert SettingsDefaults to dict"""
 97 |         return {
 98 |             key: value
 99 |             for key, value in SettingsDefaults.__dict__.items()
100 |             if not key.startswith("__") and not callable(key) and key != "to_dict"
101 |         }
102 | 
103 | 
104 | def empty_str_to_bool_default(v: str, default: bool) -> bool:
105 |     """Convert empty string to default boolean value"""
106 |     if v in [None, ""]:
107 |         return default
108 |     return v.lower() in ["true", "1", "t", "y", "yes"]
109 | 
110 | 
111 | def empty_str_to_int_default(v: str, default: int) -> int:
112 |     """Convert empty string to default integer value"""
113 |     if v in [None, ""]:
114 |         return default
115 |     try:
116 |         return int(v)
117 |     except ValueError:
118 |         return default
119 | 
120 | 
121 | # pylint: disable=too-many-public-methods
122 | # pylint: disable=too-many-instance-attributes
123 | class Settings(BaseSettings):
124 |     """Settings for Lambda functions"""
125 | 
126 |     _dump: dict = None
127 |     _pinecone_api_key_source: str = "unset"
128 |     _openai_api_key_source: str = "unset"
129 |     _initialized: bool = False
130 | 
131 |     def __init__(self, **data: Any):
132 |         super().__init__(**data)
133 |         if "PINECONE_API_KEY" in os.environ:
134 |             self._pinecone_api_key_source = "environment variable"
135 |         elif data.get("pinecone_api_key"):
136 |             self._pinecone_api_key_source = "init argument"
137 |         if "OPENAI_API_KEY" in os.environ:
138 |             self._openai_api_key_source = "environment variable"
139 |         elif data.get("openai_api_key"):
140 |             self._openai_api_key_source = "init argument"
141 |         self._initialized = True
142 | 
143 |     debug_mode: Optional[bool] = Field(
144 |         SettingsDefaults.DEBUG_MODE,
145 |         env="DEBUG_MODE",
146 |         pre=True,
147 |         getter=lambda v: empty_str_to_bool_default(v, SettingsDefaults.DEBUG_MODE),
148 |     )
149 |     dump_defaults: Optional[bool] = Field(
150 |         SettingsDefaults.DUMP_DEFAULTS,
151 |         env="DUMP_DEFAULTS",
152 |         pre=True,
153 |         getter=lambda v: empty_str_to_bool_default(v, SettingsDefaults.DUMP_DEFAULTS),
154 |     )
155 | 
156 |     langchain_memory_key: Optional[str] = Field(SettingsDefaults.LANGCHAIN_MEMORY_KEY, env="LANGCHAIN_MEMORY_KEY")
157 | 
158 |     openai_api_organization: Optional[str] = Field(
159 |         SettingsDefaults.OPENAI_API_ORGANIZATION, env="OPENAI_API_ORGANIZATION"
160 |     )
161 |     openai_api_key: Optional[SecretStr] = Field(SettingsDefaults.OPENAI_API_KEY, env="OPENAI_API_KEY")
162 |     openai_endpoint_image_n: Optional[int] = Field(
163 |         SettingsDefaults.OPENAI_ENDPOINT_IMAGE_N, env="OPENAI_ENDPOINT_IMAGE_N"
164 |     )
165 |     openai_endpoint_image_size: Optional[str] = Field(
166 |         SettingsDefaults.OPENAI_ENDPOINT_IMAGE_SIZE, env="OPENAI_ENDPOINT_IMAGE_SIZE"
167 |     )
168 |     openai_chat_cache: Optional[bool] = Field(
169 |         SettingsDefaults.OPENAI_CHAT_CACHE,
170 |         env="OPENAI_CHAT_CACHE",
171 |         pre=True,
172 |         getter=lambda v: empty_str_to_bool_default(v, SettingsDefaults.OPENAI_CHAT_CACHE),
173 |     )
174 |     openai_chat_model_name: Optional[str] = Field(SettingsDefaults.OPENAI_CHAT_MODEL_NAME, env="OPENAI_CHAT_MODEL_NAME")
175 |     openai_prompt_model_name: Optional[str] = Field(
176 |         SettingsDefaults.OPENAI_PROMPT_MODEL_NAME, env="OPENAI_PROMPT_MODEL_NAME"
177 |     )
178 |     openai_chat_temperature: Optional[float] = Field(
179 |         SettingsDefaults.OPENAI_CHAT_TEMPERATURE,
180 |         env="OPENAI_CHAT_TEMPERATURE",
181 |         ge=0.0,
182 |         le=1.0,
183 |     )
184 |     openai_chat_max_retries: Optional[int] = Field(
185 |         SettingsDefaults.OPENAI_CHAT_MAX_RETRIES,
186 |         env="OPENAI_CHAT_MAX_RETRIES",
187 |         ge=0,
188 |     )
189 | 
190 |     pinecone_api_key: Optional[SecretStr] = Field(SettingsDefaults.PINECONE_API_KEY, env="PINECONE_API_KEY")
191 |     pinecone_environment: Optional[str] = Field(SettingsDefaults.PINECONE_ENVIRONMENT, env="PINECONE_ENVIRONMENT")
192 |     pinecone_index_name: Optional[str] = Field(SettingsDefaults.PINECONE_INDEX_NAME, env="PINECONE_INDEX_NAME")
193 |     pinecone_vectorstore_text_key: Optional[str] = Field(
194 |         SettingsDefaults.PINECONE_VECTORSTORE_TEXT_KEY, env="PINECONE_VECTORSTORE_TEXT_KEY"
195 |     )
196 |     pinecone_metric: Optional[str] = Field(SettingsDefaults.PINECONE_METRIC, env="PINECONE_METRIC")
197 |     pinecone_dimensions: Optional[int] = Field(SettingsDefaults.PINECONE_DIMENSIONS, env="PINECONE_DIMENSIONS", gt=0)
198 | 
199 |     @property
200 |     def pinecone_api_key_source(self) -> str:
201 |         """Pinecone API key source"""
202 |         return self._pinecone_api_key_source
203 | 
204 |     @property
205 |     def openai_api_key_source(self) -> str:
206 |         """OpenAI API key source"""
207 |         return self._openai_api_key_source
208 | 
209 |     @property
210 |     def is_using_dotenv_file(self) -> bool:
211 |         """Is the dotenv file being used?"""
212 |         return DOT_ENV_LOADED
213 | 
214 |     @property
215 |     def environment_variables(self) -> List[str]:
216 |         """Environment variables"""
217 |         return list(os.environ.keys())
218 | 
219 |     @property
220 |     def is_using_tfvars_file(self) -> bool:
221 |         """Is the tfvars file being used?"""
222 |         return False
223 | 
224 |     @property
225 |     def tfvars_variables(self) -> List[str]:
226 |         """Terraform variables"""
227 |         return []
228 | 
229 |     @property
230 |     def is_using_aws_rekognition(self) -> bool:
231 |         """Future: Is the AWS Rekognition service being used?"""
232 |         return False
233 | 
234 |     @property
235 |     def is_using_aws_dynamodb(self) -> bool:
236 |         """Future: Is the AWS DynamoDB service being used?"""
237 |         return False
238 | 
239 |     @property
240 |     def version(self) -> str:
241 |         """OpenAI API version"""
242 |         return get_semantic_version()
243 | 
244 |     @property
245 |     def dump(self) -> dict:
246 |         """Dump all settings."""
247 | 
248 |         def recursive_sort_dict(d):
249 |             return {k: recursive_sort_dict(v) if isinstance(v, dict) else v for k, v in sorted(d.items())}
250 | 
251 |         if self._dump and self._initialized:
252 |             return self._dump
253 | 
254 |         self._dump = {
255 |             "secrets": {
256 |                 "openai_api_source": self.openai_api_key_source,
257 |                 "pinecone_api_source": self.pinecone_api_key_source,
258 |             },
259 |             "environment": {
260 |                 "is_using_tfvars_file": self.is_using_tfvars_file,
261 |                 "is_using_dotenv_file": self.is_using_dotenv_file,
262 |                 "os": os.name,
263 |                 "system": platform.system(),
264 |                 "release": platform.release(),
265 |                 "debug_mode": self.debug_mode,
266 |                 "dump_defaults": self.dump_defaults,
267 |                 "version": self.version,
268 |             },
269 |             "langchain": {
270 |                 "langchain_memory_key": self.langchain_memory_key,
271 |             },
272 |             "openai_api": {
273 |                 "openai_endpoint_image_n": self.openai_endpoint_image_n,
274 |                 "openai_endpoint_image_size": self.openai_endpoint_image_size,
275 |                 "openai_chat_cache": self.openai_chat_cache,
276 |                 "openai_chat_model_name": self.openai_chat_model_name,
277 |                 "openai_prompt_model_name": self.openai_prompt_model_name,
278 |                 "openai_chat_temperature": self.openai_chat_temperature,
279 |                 "openai_chat_max_retries": self.openai_chat_max_retries,
280 |             },
281 |             "pinecone_api": {
282 |                 "pinecone_environment": self.pinecone_environment,
283 |                 "pinecone_index_name": self.pinecone_index_name,
284 |                 "pinecone_vectorstore_text_key": self.pinecone_vectorstore_text_key,
285 |                 "pinecone_metric": self.pinecone_metric,
286 |                 "pinecone_dimensions": self.pinecone_dimensions,
287 |             },
288 |         }
289 |         if self.dump_defaults:
290 |             settings_defaults = SettingsDefaults.to_dict()
291 |             self._dump["settings_defaults"] = settings_defaults
292 | 
293 |         if self.is_using_dotenv_file:
294 |             self._dump["environment"]["dotenv"] = self.environment_variables
295 | 
296 |         if self.is_using_tfvars_file:
297 |             self._dump["environment"]["tfvars"] = self.tfvars_variables
298 | 
299 |         self._dump = recursive_sort_dict(self._dump)
300 |         return self._dump
301 | 
302 |     # pylint: disable=too-few-public-methods
303 |     class Config:
304 |         """Pydantic configuration"""
305 | 
306 |         frozen = True
307 | 
308 |     @field_validator("debug_mode")
309 |     def parse_debug_mode(cls, v) -> bool:
310 |         """Parse debug_mode"""
311 |         if isinstance(v, bool):
312 |             return v
313 |         if v in [None, ""]:
314 |             return SettingsDefaults.DEBUG_MODE
315 |         return v.lower() in ["true", "1", "t", "y", "yes"]
316 | 
317 |     @field_validator("dump_defaults")
318 |     def parse_dump_defaults(cls, v) -> bool:
319 |         """Parse dump_defaults"""
320 |         if isinstance(v, bool):
321 |             return v
322 |         if v in [None, ""]:
323 |             return SettingsDefaults.DUMP_DEFAULTS
324 |         return v.lower() in ["true", "1", "t", "y", "yes"]
325 | 
326 |     @field_validator("langchain_memory_key")
327 |     def check_langchain_memory_key(cls, v) -> str:
328 |         """Check langchain_memory_key"""
329 |         if v in [None, ""]:
330 |             return SettingsDefaults.LANGCHAIN_MEMORY_KEY
331 |         return v
332 | 
333 |     @field_validator("openai_api_organization")
334 |     def check_openai_api_organization(cls, v) -> str:
335 |         """Check openai_api_organization"""
336 |         if v in [None, ""]:
337 |             return SettingsDefaults.OPENAI_API_ORGANIZATION
338 |         return v
339 | 
340 |     @field_validator("openai_api_key")
341 |     def check_openai_api_key(cls, v) -> SecretStr:
342 |         """Check openai_api_key"""
343 |         if v in [None, ""]:
344 |             return SettingsDefaults.OPENAI_API_KEY
345 |         return v
346 | 
347 |     @field_validator("openai_endpoint_image_n")
348 |     def check_openai_endpoint_image_n(cls, v) -> int:
349 |         """Check openai_endpoint_image_n"""
350 |         if isinstance(v, int):
351 |             return v
352 |         if v in [None, ""]:
353 |             return SettingsDefaults.OPENAI_ENDPOINT_IMAGE_N
354 |         return int(v)
355 | 
356 |     @field_validator("openai_endpoint_image_size")
357 |     def check_openai_endpoint_image_size(cls, v) -> str:
358 |         """Check openai_endpoint_image_size"""
359 |         if v in [None, ""]:
360 |             return SettingsDefaults.OPENAI_ENDPOINT_IMAGE_SIZE
361 |         return v
362 | 
363 |     @field_validator("openai_chat_cache")
364 |     def check_openai_chat_cache(cls, v) -> bool:
365 |         """Check openai_chat_cache"""
366 |         if isinstance(v, bool):
367 |             return v
368 |         if v in [None, ""]:
369 |             return SettingsDefaults.OPENAI_CHAT_CACHE
370 |         return v.lower() in ["true", "1", "t", "y", "yes"]
371 | 
372 |     @field_validator("openai_chat_model_name")
373 |     def check_openai_chat_model_name(cls, v) -> str:
374 |         """Check openai_chat_model_name"""
375 |         if v in [None, ""]:
376 |             return SettingsDefaults.OPENAI_CHAT_MODEL_NAME
377 |         return v
378 | 
379 |     @field_validator("openai_prompt_model_name")
380 |     def check_openai_prompt_model_name(cls, v) -> str:
381 |         """Check openai_prompt_model_name"""
382 |         if v in [None, ""]:
383 |             return SettingsDefaults.OPENAI_PROMPT_MODEL_NAME
384 |         return v
385 | 
386 |     @field_validator("openai_chat_temperature")
387 |     def check_openai_chat_temperature(cls, v) -> float:
388 |         """Check openai_chat_temperature"""
389 |         if v in [None, ""]:
390 |             return SettingsDefaults.OPENAI_CHAT_TEMPERATURE
391 |         return float(v)
392 | 
393 |     @field_validator("openai_chat_max_retries")
394 |     def check_openai_chat_max_retries(cls, v) -> int:
395 |         """Check openai_chat_max_retries"""
396 |         if v in [None, ""]:
397 |             return SettingsDefaults.OPENAI_CHAT_MAX_RETRIES
398 |         return int(v)
399 | 
400 |     @field_validator("pinecone_api_key")
401 |     def check_pinecone_api_key(cls, v) -> SecretStr:
402 |         """Check pinecone_api_key"""
403 |         if v in [None, ""]:
404 |             return SettingsDefaults.PINECONE_API_KEY
405 |         return v
406 | 
407 |     @field_validator("pinecone_environment")
408 |     def check_pinecone_environment(cls, v) -> str:
409 |         """Check pinecone_environment"""
410 |         if v in [None, ""]:
411 |             return SettingsDefaults.PINECONE_ENVIRONMENT
412 |         return v
413 | 
414 |     @field_validator("pinecone_index_name")
415 |     def check_pinecone_index_name(cls, v) -> str:
416 |         """Check pinecone_index_name"""
417 |         if v in [None, ""]:
418 |             return SettingsDefaults.PINECONE_INDEX_NAME
419 |         return v
420 | 
421 |     @field_validator("pinecone_vectorstore_text_key")
422 |     def check_pinecone_vectorstore_text_key(cls, v) -> str:
423 |         """Check pinecone_vectorstore_text_key"""
424 |         if v in [None, ""]:
425 |             return SettingsDefaults.PINECONE_VECTORSTORE_TEXT_KEY
426 |         return v
427 | 
428 |     @field_validator("pinecone_metric")
429 |     def check_pinecone_metric(cls, v) -> str:
430 |         """Check pinecone_metric"""
431 |         if v in [None, ""]:
432 |             return SettingsDefaults.PINECONE_METRIC
433 |         return v
434 | 
435 |     @field_validator("pinecone_dimensions")
436 |     def check_pinecone_dimensions(cls, v) -> int:
437 |         """Check pinecone_dimensions"""
438 |         if v in [None, ""]:
439 |             return SettingsDefaults.PINECONE_DIMENSIONS
440 |         return int(v)
441 | 
442 | 
443 | settings = None
444 | try:
445 |     settings = Settings()
446 | except (ValidationError, ValueError, ModelConfigurationError, ModelValueError) as e:
447 |     raise ModelConfigurationError("Invalid configuration: " + str(e)) from e
448 | 


--------------------------------------------------------------------------------
/models/const.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # pylint: disable=too-few-public-methods
 3 | """Sales Support Model (hsr) for the LangChain project."""
 4 | 
 5 | import os
 6 | from pathlib import Path
 7 | 
 8 | 
 9 | MODULE_NAME = "models"
10 | HERE = os.path.abspath(os.path.dirname(__file__))
11 | REPO_ROOT = str(Path(HERE).parent)
12 | IS_USING_TFVARS = False
13 | 


--------------------------------------------------------------------------------
/models/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/models/examples/__init__.py


--------------------------------------------------------------------------------
/models/examples/certification_programs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Sales Support Model (hsr) for the LangChain project."""
 3 | import argparse
 4 | 
 5 | from models.hybrid_search_retreiver import HybridSearchRetriever
 6 | from models.prompt_templates import UofPennPromptTemplates
 7 | 
 8 | 
 9 | hsr = HybridSearchRetriever()
10 | templates = UofPennPromptTemplates()
11 | 
12 | if __name__ == "__main__":
13 |     parser = argparse.ArgumentParser(description="Hybrid search retrieval - University of Pennsylvania examples")
14 |     parser.add_argument("concept", type=str, help="A certification program.")
15 |     args = parser.parse_args()
16 | 
17 |     prompt = templates.certification_programs
18 |     result = hsr.prompt_with_template(prompt=prompt, concept=args.concept)
19 |     print(result)
20 | 


--------------------------------------------------------------------------------
/models/examples/load.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Sales Support Model (hsr) Retrieval Augmented Generation (RAG)"""
 3 | import argparse
 4 | 
 5 | from models.hybrid_search_retreiver import HybridSearchRetriever
 6 | 
 7 | 
 8 | hsr = HybridSearchRetriever()
 9 | 
10 | if __name__ == "__main__":
11 |     parser = argparse.ArgumentParser(description="RAG example")
12 |     parser.add_argument("filepath", type=str, help="Location of PDF documents")
13 |     args = parser.parse_args()
14 | 
15 |     hsr.load(filepath=args.filepath)
16 | 


--------------------------------------------------------------------------------
/models/examples/online_courses.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Sales Support Model (hsr) for the LangChain project."""
 3 | import argparse
 4 | 
 5 | from models.hybrid_search_retreiver import HybridSearchRetriever
 6 | from models.prompt_templates import UofPennPromptTemplates
 7 | 
 8 | 
 9 | hsr = HybridSearchRetriever()
10 | templates = UofPennPromptTemplates()
11 | 
12 | if __name__ == "__main__":
13 |     parser = argparse.ArgumentParser(description="Hybrid search retrieval - University of Pennsylvania examples")
14 |     parser.add_argument("concept", type=str, help="A subject to study: accounting, finance, etc.")
15 |     args = parser.parse_args()
16 | 
17 |     prompt = templates.online_courses
18 |     result = hsr.prompt_with_template(prompt=prompt, concept=args.concept)
19 |     print(result)
20 | 


--------------------------------------------------------------------------------
/models/examples/pinecone_init.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Sales Support Model (hsr) Retrieval Augmented Generation (RAG)"""
 3 | 
 4 | import logging
 5 | 
 6 | # this project
 7 | from models.conf import settings
 8 | from models.pinecone import PineconeIndex
 9 | 
10 | 
11 | logging.basicConfig(level=logging.DEBUG if settings.debug_mode else logging.INFO)
12 | logger = logging.getLogger(__name__)
13 | 
14 | pinecone = PineconeIndex()
15 | 
16 | if __name__ == "__main__":
17 |     pinecone.initialize()
18 |     print("Pinecone index initialized. name: ", pinecone.index_name)
19 |     print(pinecone.index.describe_index_stats())
20 | 


--------------------------------------------------------------------------------
/models/examples/prompt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Sales Support Model (hsr)"""
 3 | import argparse
 4 | 
 5 | from langchain.schema import HumanMessage, SystemMessage
 6 | 
 7 | from models.hybrid_search_retreiver import HybridSearchRetriever
 8 | 
 9 | 
10 | hsr = HybridSearchRetriever()
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     parser = argparse.ArgumentParser(description="hybrid search retrieval examples")
15 |     parser.add_argument("system_message", type=str, help="A system prompt to send to the model.")
16 |     parser.add_argument("human_message", type=str, help="A human prompt to send to the model.")
17 |     args = parser.parse_args()
18 | 
19 |     system_message = SystemMessage(content=args.system_message)
20 |     human_message = HumanMessage(content=args.human_message)
21 |     result = hsr.cached_chat_request(system_message=system_message, human_message=human_message)
22 |     print(result.content)
23 | 


--------------------------------------------------------------------------------
/models/examples/rag.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Sales Support Model (hsr) Retrieval Augmented Generation (RAG)"""
 3 | import argparse
 4 | 
 5 | from langchain.schema import HumanMessage
 6 | 
 7 | from models.hybrid_search_retreiver import HybridSearchRetriever
 8 | 
 9 | 
10 | hsr = HybridSearchRetriever()
11 | 
12 | if __name__ == "__main__":
13 |     parser = argparse.ArgumentParser(description="Retrieval Augmented Generation (RAG)")
14 |     parser.add_argument("prompt", type=str, help="A question about the vectorized PDF contents")
15 |     args = parser.parse_args()
16 | 
17 |     human_message = HumanMessage(content=args.prompt)
18 |     result = hsr.rag(human_message=human_message)
19 |     print(result)
20 | 


--------------------------------------------------------------------------------
/models/exceptions.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Module exceptions.py"""
 3 | 
 4 | import openai
 5 | 
 6 | 
 7 | # pylint: disable=too-few-public-methods
 8 | class OpenAIResponseCodes:
 9 |     """Http response codes from openai API"""
10 | 
11 |     HTTP_RESPONSE_OK = 200
12 |     HTTP_RESPONSE_BAD_REQUEST = 400
13 |     HTTP_RESPONSE_INTERNAL_SERVER_ERROR = 500
14 | 
15 | 
16 | class ModelConfigurationError(Exception):
17 |     """Exception raised for errors in the configuration."""
18 | 
19 |     def __init__(self, message):
20 |         self.message = message
21 |         super().__init__(self.message)
22 | 
23 | 
24 | class ModelValueError(Exception):
25 |     """Exception raised for errors in the configuration."""
26 | 
27 |     def __init__(self, message):
28 |         self.message = message
29 |         super().__init__(self.message)
30 | 
31 | 
32 | class ModelIlligalInvocationError(Exception):
33 |     """Exception raised when the service is called by an unknown service."""
34 | 
35 |     def __init__(self, message):
36 |         self.message = message
37 |         super().__init__(self.message)
38 | 
39 | 
40 | EXCEPTION_MAP = {
41 |     ModelValueError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
42 |     ModelConfigurationError: (OpenAIResponseCodes.HTTP_RESPONSE_INTERNAL_SERVER_ERROR, "InternalServerError"),
43 |     ModelIlligalInvocationError: (OpenAIResponseCodes.HTTP_RESPONSE_INTERNAL_SERVER_ERROR, "InternalServerError"),
44 |     openai.APIError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
45 |     ValueError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
46 |     TypeError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
47 |     NotImplementedError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
48 |     openai.OpenAIError: (OpenAIResponseCodes.HTTP_RESPONSE_INTERNAL_SERVER_ERROR, "InternalServerError"),
49 |     Exception: (OpenAIResponseCodes.HTTP_RESPONSE_INTERNAL_SERVER_ERROR, "InternalServerError"),
50 | }
51 | 
52 | 
53 | class ConfigurationError(Exception):
54 |     """Exception raised for errors in the configuration."""
55 | 
56 |     def __init__(self, message):
57 |         self.message = message
58 |         super().__init__(self.message)
59 | 


--------------------------------------------------------------------------------
/models/hybrid_search_retreiver.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # pylint: disable=E0611,E1101
  3 | """
  4 | Hybrid Search Retriever. A class that combines the following:
  5 |     - OpenAI prompting and ChatModel
  6 |     - PromptingWrapper
  7 |     - Vector embedding with Pinecone
  8 |     - Hybrid Retriever to combine vector embeddings with text search
  9 | 
 10 | Provides a pdf loader program that extracts text, vectorizes, and
 11 | loads into a Pinecone dot product vector database that is dimensioned
 12 | to match OpenAI embeddings.
 13 | 
 14 | See: https://python.langchain.com/docs/modules/model_io/llms/llm_caching
 15 |      https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf
 16 |      https://python.langchain.com/docs/integrations/retrievers/pinecone_hybrid_search
 17 | """
 18 | 
 19 | # general purpose imports
 20 | import logging
 21 | import textwrap
 22 | from typing import Union
 23 | 
 24 | # embedding
 25 | from langchain.globals import set_llm_cache
 26 | from langchain.prompts import PromptTemplate
 27 | from langchain.schema import BaseMessage, HumanMessage, SystemMessage
 28 | 
 29 | # pinecone integration
 30 | from langchain_community.cache import InMemoryCache
 31 | 
 32 | # hybrid search capability
 33 | from langchain_community.retrievers.pinecone_hybrid_search import (
 34 |     PineconeHybridSearchRetriever,
 35 | )
 36 | 
 37 | # from langchain_community.chat_models import ChatOpenAI
 38 | # prompting and chat
 39 | from langchain_openai import ChatOpenAI
 40 | from pinecone_text.sparse import BM25Encoder  # pylint: disable=import-error
 41 | 
 42 | # this project
 43 | from models.conf import settings
 44 | from models.pinecone import PineconeIndex
 45 | 
 46 | 
 47 | logging.basicConfig(level=logging.DEBUG if settings.debug_mode else logging.INFO)
 48 | logger = logging.getLogger(__name__)
 49 | 
 50 | 
 51 | class HybridSearchRetriever:
 52 |     """Hybrid Search Retriever"""
 53 | 
 54 |     _chat: ChatOpenAI = None
 55 |     _b25_encoder: BM25Encoder = None
 56 |     _pinecone: PineconeIndex = None
 57 |     _retriever: PineconeHybridSearchRetriever = None
 58 | 
 59 |     def __init__(self):
 60 |         """Constructor"""
 61 |         set_llm_cache(InMemoryCache())
 62 | 
 63 |     @property
 64 |     def pinecone(self) -> PineconeIndex:
 65 |         """PineconeIndex lazy read-only property."""
 66 |         if self._pinecone is None:
 67 |             self._pinecone = PineconeIndex()
 68 |         return self._pinecone
 69 | 
 70 |     # prompting wrapper
 71 |     @property
 72 |     def chat(self) -> ChatOpenAI:
 73 |         """ChatOpenAI lazy read-only property."""
 74 |         if self._chat is None:
 75 |             self._chat = ChatOpenAI(
 76 |                 api_key=settings.openai_api_key.get_secret_value(),  # pylint: disable=no-member
 77 |                 organization=settings.openai_api_organization,
 78 |                 cache=settings.openai_chat_cache,
 79 |                 max_retries=settings.openai_chat_max_retries,
 80 |                 model=settings.openai_chat_model_name,
 81 |                 temperature=settings.openai_chat_temperature,
 82 |             )
 83 |         return self._chat
 84 | 
 85 |     @property
 86 |     def bm25_encoder(self) -> BM25Encoder:
 87 |         """BM25Encoder lazy read-only property."""
 88 |         if self._b25_encoder is None:
 89 |             self._b25_encoder = BM25Encoder().default()
 90 |         return self._b25_encoder
 91 | 
 92 |     @property
 93 |     def retriever(self) -> PineconeHybridSearchRetriever:
 94 |         """PineconeHybridSearchRetriever lazy read-only property."""
 95 |         if self._retriever is None:
 96 |             self._retriever = PineconeHybridSearchRetriever(
 97 |                 embeddings=self.pinecone.openai_embeddings, sparse_encoder=self.bm25_encoder, index=self.pinecone.index
 98 |             )
 99 |         return self._retriever
100 | 
101 |     def cached_chat_request(
102 |         self, system_message: Union[str, SystemMessage], human_message: Union[str, HumanMessage]
103 |     ) -> BaseMessage:
104 |         """Cached chat request."""
105 |         if not isinstance(system_message, SystemMessage):
106 |             logger.info("Converting system message to SystemMessage")
107 |             system_message = SystemMessage(content=str(system_message))
108 | 
109 |         if not isinstance(human_message, HumanMessage):
110 |             logger.info("Converting human message to HumanMessage")
111 |             human_message = HumanMessage(content=str(human_message))
112 |         messages = [system_message, human_message]
113 |         # pylint: disable=not-callable
114 |         # retval = self.chat(messages)
115 |         retval = self.chat.invoke(messages)
116 |         return retval
117 | 
118 |     # pylint: disable=unused-argument
119 |     def prompt_with_template(
120 |         self, prompt: PromptTemplate, concept: str, model: str = settings.openai_prompt_model_name
121 |     ) -> str:
122 |         """Prompt with template."""
123 |         retval = self.chat.invoke(prompt.format(concept=concept))
124 |         return str(retval.content) if retval else "no response"
125 | 
126 |     def load(self, filepath: str):
127 |         """Pdf loader."""
128 |         self.pinecone.pdf_loader(filepath=filepath)
129 | 
130 |     def rag(self, human_message: Union[str, HumanMessage]):
131 |         """
132 |         Retrieval Augmented Generation prompt.
133 |         1. Retrieve human message prompt: Given a user input, relevant splits are retrieved
134 |            from storage using a Retriever.
135 |         2. Generate: A ChatModel / LLM produces an answer using a prompt that includes
136 |            the question and the retrieved data
137 | 
138 |         To prompt OpenAI's GPT-3 model to consider the embeddings from the Pinecone
139 |         vector database, you would typically need to convert the embeddings back
140 |         into a format that GPT-3 can understand, such as text. However, GPT-3 does
141 |         not natively support direct input of embeddings.
142 | 
143 |         The typical workflow is to use the embeddings to retrieve relevant documents,
144 |         and then use the text of these documents as part of the prompt for GPT-3.
145 |         """
146 |         if not isinstance(human_message, HumanMessage):
147 |             logger.info("Converting human_message to HumanMessage")
148 |             human_message = HumanMessage(content=human_message)
149 | 
150 |         # ---------------------------------------------------------------------
151 |         # 1.) Retrieve relevant documents from Pinecone vector database
152 |         # ---------------------------------------------------------------------
153 |         documents = self.pinecone.vector_store.similarity_search(query=human_message.content)
154 | 
155 |         # Extract the text from the documents
156 |         document_texts = [doc.page_content for doc in documents]
157 |         leader = textwrap.dedent(
158 |             """\n
159 |             You are a helpful assistant. You should assume that all of the
160 |             following bullet points that follow are completely factual.
161 |             You should prioritize these enumerated facts when formulating your response:"""
162 |         )
163 |         system_message_content = f"{leader} {'\n\n'.join(document_texts)}"
164 |         system_message_content = (
165 |             f"{leader} {''.join([f'\n\n{40 * "-"}\n{i + 1}.) {text}\n' for i, text in enumerate(document_texts)])}"
166 |         )
167 |         system_message = SystemMessage(content=system_message_content)
168 |         # ---------------------------------------------------------------------
169 |         # finished with hybrid search setup
170 |         # ---------------------------------------------------------------------
171 |         star_line = 80 * "*"
172 |         logger.info(
173 |             "\n%s\n"
174 |             "rag() Retrieval Augmented Generation prompt"
175 |             "Diagnostic information:\n"
176 |             "  Retrieved %i related documents from Pinecone\n"
177 |             "  System messages contains %i words\n"
178 |             "  System Prompt:"
179 |             "\n  <============================ BEGIN ===============================>"
180 |             "%s"
181 |             "\n  <============================= END ================================>\n\n",
182 |             star_line,
183 |             len(documents),
184 |             len(system_message.content.split()),
185 |             system_message.content,
186 |         )
187 | 
188 |         # 2.) get a response from the chat model
189 |         response = self.cached_chat_request(system_message=system_message, human_message=human_message)
190 | 
191 |         return str(response.content)
192 | 


--------------------------------------------------------------------------------
/models/pinecone.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # pylint: disable=E0611,E1101
  3 | """A class to manage the lifecycle of Pinecone vector database indexes."""
  4 | 
  5 | # document loading
  6 | import glob
  7 | 
  8 | # general purpose imports
  9 | import json
 10 | import logging
 11 | import os
 12 | 
 13 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 14 | from langchain_community.document_loaders.pdf import PyPDFLoader
 15 | from langchain_openai import OpenAIEmbeddings
 16 | from langchain_pinecone import PineconeVectorStore
 17 | 
 18 | # pinecone integration
 19 | from pinecone import Pinecone, ServerlessSpec
 20 | from pinecone.core.openapi.shared.exceptions import PineconeApiException
 21 | from pinecone.models import IndexList
 22 | 
 23 | # this project
 24 | from models.conf import settings
 25 | 
 26 | 
 27 | logging.basicConfig(level=logging.DEBUG if settings.debug_mode else logging.INFO)
 28 | 
 29 | 
 30 | class PineconeIndex:
 31 |     """Pinecone helper class."""
 32 | 
 33 |     _pinecone = None
 34 |     _index: Pinecone.Index = None
 35 |     _index_name: str = None
 36 |     _text_splitter: RecursiveCharacterTextSplitter = None
 37 |     _openai_embeddings: OpenAIEmbeddings = None
 38 |     _vector_store: PineconeVectorStore = None
 39 | 
 40 |     def __init__(self, index_name: str = None):
 41 |         self.init()
 42 |         self.index_name = index_name or settings.pinecone_index_name
 43 |         logging.debug("PineconeIndex initialized with index_name: %s", self.index_name)
 44 |         logging.debug(self.index_stats)
 45 | 
 46 |     @property
 47 |     def index_name(self) -> str:
 48 |         """index name."""
 49 |         return self._index_name
 50 | 
 51 |     @index_name.setter
 52 |     def index_name(self, value: str) -> None:
 53 |         """Set index name."""
 54 |         if self._index_name != value:
 55 |             self.init()
 56 |             self._index_name = value
 57 |             self.init_index()
 58 | 
 59 |     @property
 60 |     def index(self) -> Pinecone.Index:
 61 |         """pinecone.Index lazy read-only property."""
 62 |         if self._index is None:
 63 |             self.init_index()
 64 |             self._index = self.pinecone.Index(name=self.index_name)
 65 |         return self._index
 66 | 
 67 |     @property
 68 |     def index_stats(self) -> dict:
 69 |         """index stats."""
 70 |         retval = self.index.describe_index_stats()
 71 |         return json.dumps(retval.to_dict(), indent=4)
 72 | 
 73 |     @property
 74 |     def initialized(self) -> bool:
 75 |         """initialized read-only property."""
 76 |         indexes = self.pinecone.list_indexes()
 77 |         return self.index_name in indexes.names()
 78 | 
 79 |     @property
 80 |     def vector_store(self) -> PineconeVectorStore:
 81 |         """Pinecone lazy read-only property."""
 82 |         if self._vector_store is None:
 83 |             if not self.initialized:
 84 |                 self.init_index()
 85 |             self._vector_store = PineconeVectorStore(
 86 |                 index=self.index,
 87 |                 embedding=self.openai_embeddings,
 88 |                 text_key=settings.pinecone_vectorstore_text_key,
 89 |             )
 90 |         return self._vector_store
 91 | 
 92 |     @property
 93 |     def openai_embeddings(self) -> OpenAIEmbeddings:
 94 |         """OpenAIEmbeddings lazy read-only property."""
 95 |         if self._openai_embeddings is None:
 96 |             # pylint: disable=no-member
 97 |             self._openai_embeddings = OpenAIEmbeddings(
 98 |                 api_key=settings.openai_api_key.get_secret_value(),
 99 |                 organization=settings.openai_api_organization,
100 |             )
101 |         return self._openai_embeddings
102 | 
103 |     @property
104 |     def pinecone(self) -> Pinecone:
105 |         """Pinecone lazy read-only property."""
106 |         if self._pinecone is None:
107 |             print("Initializing Pinecone...")
108 |             api_key = settings.pinecone_api_key.get_secret_value()
109 |             print(f"API Key: {api_key[:12]}****------")
110 |             self._pinecone = Pinecone(api_key=api_key)
111 |         return self._pinecone
112 | 
113 |     @property
114 |     def text_splitter(self) -> RecursiveCharacterTextSplitter:
115 |         """lazy read-only property."""
116 |         if self._text_splitter is None:
117 |             self._text_splitter = RecursiveCharacterTextSplitter()
118 |         return self._text_splitter
119 | 
120 |     def init_index(self):
121 |         """Verify that an index named self.index_name exists in Pinecone. If not, create it."""
122 |         indexes: IndexList = None
123 |         indexes = self.pinecone.list_indexes()
124 |         if self.index_name not in indexes.names():
125 |             logging.debug("Index does not exist.")
126 |             self.create()
127 | 
128 |     # pylint: disable=no-member
129 |     def init(self):
130 |         """Initialize Pinecone."""
131 | 
132 |         self._index = None
133 |         self._index_name = None
134 |         self._text_splitter = None
135 |         self._openai_embeddings = None
136 |         self._vector_store = None
137 | 
138 |     def delete(self):
139 |         """Delete index."""
140 |         if not self.initialized:
141 |             logging.debug("Index does not exist. Nothing to delete.")
142 |             return
143 |         print("Deleting index...")
144 |         self.pinecone.delete_index(self.index_name)
145 | 
146 |     def create(self):
147 |         """Create index."""
148 |         print("Creating index. This may take a few minutes...")
149 |         serverless_spec = ServerlessSpec(
150 |             cloud="aws",
151 |             region="us-east-1",
152 |         )
153 |         try:
154 |             self.pinecone.create_index(
155 |                 name=self.index_name,
156 |                 dimension=settings.pinecone_dimensions,
157 |                 metric=settings.pinecone_metric,
158 |                 spec=serverless_spec,
159 |             )
160 |             print("Index created.")
161 |         except PineconeApiException:
162 |             pass
163 | 
164 |     def initialize(self):
165 |         """Initialize index."""
166 |         self.delete()
167 |         self.create()
168 | 
169 |     def pdf_loader(self, filepath: str):
170 |         """
171 |         Embed PDF.
172 |         1. Load PDF document text data
173 |         2. Split into pages
174 |         3. Embed each page
175 |         4. Store in Pinecone
176 | 
177 |         Note: it's important to make sure that the "context" field that holds the document text
178 |         in the metadata is not indexed. Currently you need to specify explicitly the fields you
179 |         do want to index. For more information checkout
180 |         https://docs.pinecone.io/docs/manage-indexes#selective-metadata-indexing
181 |         """
182 |         self.initialize()
183 | 
184 |         pdf_files = glob.glob(os.path.join(filepath, "*.pdf"))
185 |         i = 0
186 |         for pdf_file in pdf_files:
187 |             i += 1
188 |             j = len(pdf_files)
189 |             print(f"Loading PDF {i} of {j}: {pdf_file}")
190 |             loader = PyPDFLoader(file_path=pdf_file)
191 |             docs = loader.load()
192 |             k = 0
193 |             for doc in docs:
194 |                 k += 1
195 |                 print(k * "-", end="\r")
196 |                 documents = self.text_splitter.create_documents([doc.page_content])
197 |                 document_texts = [doc.page_content for doc in documents]
198 |                 embeddings = self.openai_embeddings.embed_documents(document_texts)
199 |                 self.vector_store.add_documents(documents=documents, embeddings=embeddings)
200 | 
201 |         print("Finished loading PDFs. \n" + self.index_stats)
202 | 


--------------------------------------------------------------------------------
/models/prompt_templates.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # pylint: disable=too-few-public-methods
 3 | """Sales Support Model (hsr) prompt templates"""
 4 | 
 5 | from langchain.prompts import PromptTemplate
 6 | 
 7 | 
 8 | class UofPennPromptTemplates:
 9 |     """Netec Prompt Templates."""
10 | 
11 |     sales_role: str = """You are a helpful student advisor at Wharton School of the
12 |         University of Pennsylvania. You provide concise explanations to questions about
13 |         the courses they offer in 100 words or less."""
14 | 
15 |     @classmethod
16 |     def get_properties(cls):
17 |         """return a list of properties of this class."""
18 |         return [attr for attr in dir(cls) if isinstance(getattr(cls, attr), property)]
19 | 
20 |     @property
21 |     def online_courses(self) -> PromptTemplate:
22 |         """Get prompt."""
23 |         template = (
24 |             self.sales_role
25 |             + """
26 |         Explain the online courses Wharton offers about {concept}
27 |         """
28 |         )
29 |         return PromptTemplate(input_variables=["concept"], template=template)
30 | 
31 |     @property
32 |     def certification_programs(self) -> PromptTemplate:
33 |         """Get prompt."""
34 |         template = (
35 |             self.sales_role
36 |             + """
37 |         Summarize their executive and online programs in which learner
38 |         can earns certificates for {concept}
39 |         """
40 |         )
41 |         return PromptTemplate(input_variables=["concept"], template=template)
42 | 
43 | 
44 | class NetecPromptTemplates:
45 |     """Netec Prompt Templates."""
46 | 
47 |     sales_role: str = """You are a helpful sales assistant at Netec who sells
48 |         specialized training and exam preparation services to existing customers.
49 |         You provide concise explanations of the services that Netec offers in 100
50 |         words or less."""
51 | 
52 |     @classmethod
53 |     def get_properties(cls):
54 |         """return a list of properties of this class."""
55 |         return [attr for attr in dir(cls) if isinstance(getattr(cls, attr), property)]
56 | 
57 |     @property
58 |     def training_services(self) -> PromptTemplate:
59 |         """Get prompt."""
60 |         template = (
61 |             self.sales_role
62 |             + """
63 |         Explain the training services that Netec offers about {concept}
64 |         """
65 |         )
66 |         return PromptTemplate(input_variables=["concept"], template=template)
67 | 
68 |     @property
69 |     def oracle_training_services(self) -> PromptTemplate:
70 |         """Get prompt."""
71 |         template = (
72 |             self.sales_role
73 |             + """
74 |         Note that Netec is the exclusive provider in Latin America of Oracle training services
75 |         for the 6 levels of Oracle Certification credentials: Oracle Certified Junior Associate (OCJA),
76 |         Oracle Certified Associate (OCA), Oracle Certified Professional (OCP),
77 |         Oracle Certified Master (OCM), Oracle Certified Expert (OCE) and
78 |         Oracle Certified Specialist (OCS).
79 |         Summarize their programs for {concept}
80 |         """
81 |         )
82 |         return PromptTemplate(input_variables=["concept"], template=template)
83 | 


--------------------------------------------------------------------------------
/models/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/models/tests/__init__.py


--------------------------------------------------------------------------------
/models/tests/mock_data/.env.test_01:
--------------------------------------------------------------------------------
 1 |     DEBUG_MODE = True
 2 |     DUMP_DEFAULTS = True
 3 |     LANGCHAIN_MEMORY_KEY = "TEST_chat_history"
 4 |     PINECONE_ENVIRONMENT = "TEST_gcp-starter"
 5 |     PINECONE_INDEX_NAME = "TEST_rag"
 6 |     PINECONE_VECTORSTORE_TEXT_KEY = "TEST_lc_id"
 7 |     PINECONE_METRIC = "TEST_dotproduct"
 8 |     PINECONE_DIMENSIONS = 1
 9 |     OPENAI_ENDPOINT_IMAGE_N = 1
10 |     OPENAI_ENDPOINT_IMAGE_SIZE = "TEST_1024x768"
11 |     OPENAI_CHAT_CACHE = False
12 |     OPENAI_CHAT_MODEL_NAME = "TEST_gpt-4"
13 |     OPENAI_PROMPT_MODEL_NAME = "TEST_gpt-4"
14 |     OPENAI_CHAT_TEMPERATURE = 1.0
15 |     OPENAI_CHAT_MAX_RETRIES = 5
16 | 


--------------------------------------------------------------------------------
/models/tests/mock_data/.env.test_illegal_nulls:
--------------------------------------------------------------------------------
1 | DEBUG_MODE=
2 | AWS_REKOGNITION_FACE_DETECT_MAX_FACES_COUNT=
3 | AWS_REKOGNITION_FACE_DETECT_THRESHOLD=
4 | 


--------------------------------------------------------------------------------
/models/tests/mock_data/.env.test_legal_nulls:
--------------------------------------------------------------------------------
1 | LANGCHAIN_MEMORY_KEY=
2 | OPENAI_ENDPOINT_IMAGE_SIZE=
3 | 


--------------------------------------------------------------------------------
/models/tests/mock_data/test_load.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/models/tests/mock_data/test_load.pdf


--------------------------------------------------------------------------------
/models/tests/test_configuration.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # flake8: noqa: F401
  3 | """
  4 | Test conf module.
  5 | """
  6 | import os
  7 | from unittest.mock import patch
  8 | 
  9 | import pytest  # pylint: disable=unused-import
 10 | from dotenv import load_dotenv
 11 | from pydantic import ValidationError as PydanticValidationError
 12 | 
 13 | from models.conf import Settings, SettingsDefaults
 14 | 
 15 | 
 16 | HERE = os.path.dirname(os.path.abspath(__file__))
 17 | 
 18 | 
 19 | class TestConfig:
 20 |     """Test config.settings."""
 21 | 
 22 |     def env_path(self, filename):
 23 |         """Return the path to the .env file."""
 24 |         return os.path.join(HERE, "mock_data", filename)
 25 | 
 26 |     def test_conf_defaults(self):
 27 |         """Test that settings == SettingsDefaults when no .env is in use."""
 28 |         os.environ.clear()
 29 |         mock_settings = Settings()
 30 |         assert mock_settings.langchain_memory_key == SettingsDefaults.LANGCHAIN_MEMORY_KEY
 31 |         assert mock_settings.debug_mode == SettingsDefaults.DEBUG_MODE
 32 | 
 33 |         assert mock_settings.openai_api_key == SettingsDefaults.OPENAI_API_KEY
 34 |         assert mock_settings.openai_api_organization == SettingsDefaults.OPENAI_API_ORGANIZATION
 35 |         assert mock_settings.openai_chat_cache == SettingsDefaults.OPENAI_CHAT_CACHE
 36 |         assert mock_settings.openai_chat_max_retries == SettingsDefaults.OPENAI_CHAT_MAX_RETRIES
 37 |         assert mock_settings.openai_chat_model_name == SettingsDefaults.OPENAI_CHAT_MODEL_NAME
 38 |         assert mock_settings.openai_endpoint_image_n == SettingsDefaults.OPENAI_ENDPOINT_IMAGE_N
 39 |         assert mock_settings.openai_endpoint_image_size == SettingsDefaults.OPENAI_ENDPOINT_IMAGE_SIZE
 40 |         assert mock_settings.openai_prompt_model_name == SettingsDefaults.OPENAI_PROMPT_MODEL_NAME
 41 | 
 42 |         assert mock_settings.pinecone_api_key == SettingsDefaults.PINECONE_API_KEY
 43 |         assert mock_settings.pinecone_dimensions == SettingsDefaults.PINECONE_DIMENSIONS
 44 |         assert mock_settings.pinecone_environment == SettingsDefaults.PINECONE_ENVIRONMENT
 45 |         assert mock_settings.pinecone_index_name == SettingsDefaults.PINECONE_INDEX_NAME
 46 |         assert mock_settings.pinecone_metric == SettingsDefaults.PINECONE_METRIC
 47 |         assert mock_settings.pinecone_vectorstore_text_key == SettingsDefaults.PINECONE_VECTORSTORE_TEXT_KEY
 48 | 
 49 |     # pylint: disable=no-member
 50 |     def test_conf_defaults_secrets(self):
 51 |         """Test that settings secrets match the defaults."""
 52 |         os.environ.clear()
 53 |         mock_settings = Settings()
 54 |         assert mock_settings.openai_api_key.get_secret_value() == SettingsDefaults.OPENAI_API_KEY.get_secret_value()
 55 |         assert mock_settings.pinecone_api_key.get_secret_value() == SettingsDefaults.PINECONE_API_KEY.get_secret_value()
 56 | 
 57 |     def test_env_legal_nulls(self):
 58 |         """Test that settings handles missing .env values."""
 59 |         os.environ.clear()
 60 |         env_path = self.env_path(".env.test_legal_nulls")
 61 |         print("env_path", env_path)
 62 |         loaded = load_dotenv(env_path)
 63 |         assert loaded
 64 | 
 65 |         mock_settings = Settings()
 66 |         assert mock_settings.langchain_memory_key == SettingsDefaults.LANGCHAIN_MEMORY_KEY
 67 |         assert mock_settings.openai_endpoint_image_size == SettingsDefaults.OPENAI_ENDPOINT_IMAGE_SIZE
 68 | 
 69 |     def test_env_illegal_nulls(self):
 70 |         """Test that settings handles missing .env values."""
 71 |         os.environ.clear()
 72 |         env_path = self.env_path(".env.test_illegal_nulls")
 73 |         print("env_path", env_path)
 74 |         loaded = load_dotenv(env_path)
 75 |         assert loaded
 76 | 
 77 |         with pytest.raises(PydanticValidationError):
 78 |             Settings()
 79 | 
 80 |     def test_env_overrides(self):
 81 |         """Test that settings takes custom .env values."""
 82 |         os.environ.clear()
 83 |         env_path = self.env_path(".env.test_01")
 84 |         loaded = load_dotenv(env_path)
 85 |         assert loaded
 86 | 
 87 |         mock_settings = Settings()
 88 | 
 89 |         assert mock_settings.debug_mode is True
 90 |         assert mock_settings.dump_defaults is True
 91 |         assert mock_settings.langchain_memory_key == "TEST_chat_history"
 92 |         assert mock_settings.pinecone_environment == "TEST_gcp-starter"
 93 |         assert mock_settings.pinecone_index_name == "TEST_rag"
 94 |         assert mock_settings.pinecone_vectorstore_text_key == "TEST_lc_id"
 95 |         assert mock_settings.pinecone_metric == "TEST_dotproduct"
 96 |         assert mock_settings.pinecone_dimensions == 1
 97 |         assert mock_settings.openai_endpoint_image_n == 1
 98 |         assert mock_settings.openai_endpoint_image_size == "TEST_1024x768"
 99 |         assert mock_settings.openai_chat_cache is False
100 |         assert mock_settings.openai_chat_model_name == "TEST_gpt-4"
101 |         assert mock_settings.openai_prompt_model_name == "TEST_gpt-4"
102 |         assert mock_settings.openai_chat_temperature == 1.0
103 |         assert mock_settings.openai_chat_max_retries == 5
104 | 
105 |     @patch.dict(os.environ, {"OPENAI_CHAT_MAX_RETRIES": "-1"})
106 |     def test_invalid_chat_max_retries(self):
107 |         """Test that Pydantic raises a validation error for environment variable w negative integer values."""
108 | 
109 |         with pytest.raises(PydanticValidationError):
110 |             Settings()
111 | 
112 |     @patch.dict(os.environ, {"OPENAI_CHAT_TEMPERATURE": "-1"})
113 |     def test_invalid_chat_temperature(self):
114 |         """Test that Pydantic raises a validation error for environment variable w negative integer values."""
115 | 
116 |         with pytest.raises(PydanticValidationError):
117 |             Settings()
118 | 
119 |     @patch.dict(os.environ, {"PINECONE_DIMENSIONS": "-1"})
120 |     def test_invalid_pinecone_dimensions(self):
121 |         """Test that Pydantic raises a validation error for environment variable w negative integer values."""
122 | 
123 |         with pytest.raises(PydanticValidationError):
124 |             Settings()
125 | 
126 |     def test_configure_with_class_constructor(self):
127 |         """test that we can set values with the class constructor"""
128 |         os.environ.clear()
129 | 
130 |         mock_settings = Settings(
131 |             debug_mode=True,
132 |             dump_defaults=True,
133 |             langchain_memory_key="TEST_chat_history",
134 |             pinecone_environment="TEST_gcp-starter",
135 |             pinecone_index_name="TEST_rag",
136 |             pinecone_vectorstore_text_key="TEST_lc_id",
137 |             pinecone_metric="TEST_dotproduct",
138 |             pinecone_dimensions=1,
139 |             openai_endpoint_image_n=1,
140 |             openai_endpoint_image_size="TEST_1024x768",
141 |             openai_chat_cache=False,
142 |             openai_chat_model_name="TEST_gpt-4",
143 |             openai_prompt_model_name="TEST_text-davinci-003",
144 |             openai_chat_temperature=1.0,
145 |             openai_chat_max_retries=5,
146 |         )
147 | 
148 |         assert mock_settings.debug_mode is True
149 |         assert mock_settings.dump_defaults is True
150 |         assert mock_settings.langchain_memory_key == "TEST_chat_history"
151 |         assert mock_settings.pinecone_environment == "TEST_gcp-starter"
152 |         assert mock_settings.pinecone_index_name == "TEST_rag"
153 |         assert mock_settings.pinecone_vectorstore_text_key == "TEST_lc_id"
154 |         assert mock_settings.pinecone_metric == "TEST_dotproduct"
155 |         assert mock_settings.pinecone_dimensions == 1
156 |         assert mock_settings.openai_endpoint_image_n == 1
157 |         assert mock_settings.openai_endpoint_image_size == "TEST_1024x768"
158 |         assert mock_settings.openai_chat_cache is False
159 |         assert mock_settings.openai_chat_model_name == "TEST_gpt-4"
160 |         assert mock_settings.openai_prompt_model_name == "TEST_text-davinci-003"
161 |         assert mock_settings.openai_chat_temperature == 1.0
162 |         assert mock_settings.openai_chat_max_retries == 5
163 | 
164 |     def test_readonly_settings(self):
165 |         """test that we can't set readonly values with the class constructor"""
166 | 
167 |         mock_settings = Settings()
168 |         with pytest.raises(PydanticValidationError):
169 |             mock_settings.langchain_memory_key = "TEST_chat_history"
170 |         with pytest.raises(PydanticValidationError):
171 |             mock_settings.pinecone_environment = "TEST_gcp-starter"
172 |         with pytest.raises(PydanticValidationError):
173 |             mock_settings.pinecone_index_name = "TEST_rag"
174 |         with pytest.raises(PydanticValidationError):
175 |             mock_settings.pinecone_vectorstore_text_key = "TEST_lc_id"
176 |         with pytest.raises(PydanticValidationError):
177 |             mock_settings.pinecone_metric = "TEST_dotproduct"
178 |         with pytest.raises(PydanticValidationError):
179 |             mock_settings.pinecone_dimensions = 1
180 |         with pytest.raises(PydanticValidationError):
181 |             mock_settings.openai_endpoint_image_n = 1
182 |         with pytest.raises(PydanticValidationError):
183 |             mock_settings.openai_endpoint_image_size = "TEST_1024x768"
184 |         with pytest.raises(PydanticValidationError):
185 |             mock_settings.openai_chat_cache = False
186 |         with pytest.raises(PydanticValidationError):
187 |             mock_settings.openai_chat_model_name = "TEST_gpt-4"
188 |         with pytest.raises(PydanticValidationError):
189 |             mock_settings.openai_prompt_model_name = "TEST_text-davinci-003"
190 |         with pytest.raises(PydanticValidationError):
191 |             mock_settings.openai_chat_temperature = 1.0
192 |         with pytest.raises(PydanticValidationError):
193 |             mock_settings.openai_chat_max_retries = 5
194 | 
195 |     def test_dump(self):
196 |         """Test that dump is a dict."""
197 | 
198 |         mock_settings = Settings()
199 |         assert isinstance(mock_settings.dump, dict)
200 | 
201 |     def test_dump_keys(self):
202 |         """Test that dump contains the expected keys."""
203 | 
204 |         dump = Settings().dump
205 |         assert "secrets" in dump.keys()
206 |         assert "environment" in dump.keys()
207 |         assert "langchain" in dump.keys()
208 |         assert "openai_api" in dump.keys()
209 |         assert "pinecone_api" in dump.keys()
210 | 


--------------------------------------------------------------------------------
/models/tests/test_examples.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # flake8: noqa: F401
 3 | """
 4 | Test command line example prompts.
 5 | """
 6 | from unittest.mock import MagicMock, patch
 7 | 
 8 | import pytest  # pylint: disable=unused-import
 9 | from langchain.schema import HumanMessage, SystemMessage
10 | 
11 | from models.examples.certification_programs import hsr as uofpenn_certification_program
12 | from models.examples.online_courses import hsr as uofpenn_online_hsr
13 | from models.examples.prompt import hsr as prompt_hrs
14 | from models.examples.rag import hsr as rag_hsr
15 | from models.prompt_templates import NetecPromptTemplates
16 | 
17 | 
18 | HUMAN_MESSAGE = "this is a test"
19 | SYSTEM_PROMPT = """you are a helpful assistant. If you are prompted,
20 | 'this is a test', then return the word 'SUCCESS' in upper case. Return only
21 | this single word, in upper case. Do not embellish. do not further prompt
22 | the user for any reason."""
23 | 
24 | 
25 | class TestExamples:
26 |     """Test command line examples."""
27 | 
28 |     @patch("argparse.ArgumentParser.parse_args")
29 |     def test_prompt(self, mock_parse_args):
30 |         """Test prompt example."""
31 | 
32 |         mock_args = MagicMock()
33 |         mock_args.system_prompt = SYSTEM_PROMPT
34 |         mock_args.human_prompt = HUMAN_MESSAGE
35 |         mock_parse_args.return_value = mock_args
36 | 
37 |         system_message = SystemMessage(content=SYSTEM_PROMPT)
38 |         human_message = HumanMessage(content=HUMAN_MESSAGE)
39 |         result = prompt_hrs.cached_chat_request(system_message=system_message, human_message=human_message)
40 |         assert result.content == "SUCCESS"
41 | 
42 |     @patch("argparse.ArgumentParser.parse_args")
43 |     def test_rag(self, mock_parse_args):
44 |         """Test RAG example."""
45 |         mock_args = MagicMock()
46 |         mock_args.human_message = HUMAN_MESSAGE
47 |         mock_parse_args.return_value = mock_args
48 | 
49 |         human_message = HumanMessage(content=mock_args.human_message)
50 |         result = rag_hsr.rag(human_message=human_message)
51 |         assert isinstance(result, str)
52 |         assert len(result) > 0
53 | 
54 |     @patch("argparse.ArgumentParser.parse_args")
55 |     def test_training_services(self, mock_parse_args):
56 |         """Test training services templates."""
57 |         mock_args = MagicMock()
58 |         mock_args.human_message = HUMAN_MESSAGE
59 |         mock_parse_args.return_value = mock_args
60 | 
61 |         templates = NetecPromptTemplates()
62 |         prompt = templates.training_services
63 | 
64 |         result = uofpenn_certification_program.prompt_with_template(prompt=prompt, concept=mock_args.human_message)
65 |         assert isinstance(result, str)
66 |         assert len(result) > 0
67 | 
68 |     @patch("argparse.ArgumentParser.parse_args")
69 |     def test_oracle_training_services(self, mock_parse_args):
70 |         """Test oracle training services."""
71 |         mock_args = MagicMock()
72 |         mock_args.human_message = HUMAN_MESSAGE
73 |         mock_parse_args.return_value = mock_args
74 | 
75 |         templates = NetecPromptTemplates()
76 |         prompt = templates.oracle_training_services
77 | 
78 |         result = uofpenn_online_hsr.prompt_with_template(prompt=prompt, concept=mock_args.human_message)
79 |         assert isinstance(result, str)
80 |         assert len(result) > 0
81 | 


--------------------------------------------------------------------------------
/models/tests/test_hsr.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # pylint: disable=E0611,E1101
 3 | # flake8: noqa: F401
 4 | """
 5 | Test integrity of base class.
 6 | """
 7 | import pytest  # pylint: disable=unused-import
 8 | 
 9 | # from langchain_community.chat_models import ChatOpenAI
10 | from langchain_openai import ChatOpenAI
11 | 
12 | from models.hybrid_search_retreiver import HybridSearchRetriever
13 | from models.pinecone import PineconeIndex
14 | 
15 | 
16 | class TestSalesSupportModel:
17 |     """Test HybridSearchRetriever class."""
18 | 
19 |     def test_01_basic(self):
20 |         """Ensure that we can instantiate the class."""
21 | 
22 |         # pylint: disable=broad-except
23 |         try:
24 |             HybridSearchRetriever()
25 |         except Exception as e:
26 |             assert False, f"initialization of HybridSearchRetriever() failed with exception: {e}"
27 | 
28 |     def test_02_class_aatribute_types(self):
29 |         """ensure that class attributes are of the correct type"""
30 | 
31 |         hsr = HybridSearchRetriever()
32 |         assert isinstance(hsr.chat, ChatOpenAI)
33 |         assert isinstance(hsr.pinecone, PineconeIndex)
34 | 


--------------------------------------------------------------------------------
/models/tests/test_openai.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # flake8: noqa: F401
 3 | # pylint: disable=too-few-public-methods
 4 | """
 5 | Test integrity of base class.
 6 | """
 7 | import pytest  # pylint: disable=unused-import
 8 | 
 9 | from models.hybrid_search_retreiver import HybridSearchRetriever
10 | 
11 | 
12 | class TestOpenAI:
13 |     """Test HybridSearchRetriever class."""
14 | 
15 |     def test_03_test_openai_connectivity(self):
16 |         """Ensure that we have connectivity to OpenAI."""
17 | 
18 |         hsr = HybridSearchRetriever()
19 |         retval = hsr.cached_chat_request(
20 |             "your are a helpful assistant", "please return the value 'CORRECT' in all upper case."
21 |         )
22 |         assert retval.content == "CORRECT"
23 | 


--------------------------------------------------------------------------------
/models/tests/test_pinecone.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # flake8: noqa: F401
  3 | """
  4 | Test this model's Pinecone helper class.
  5 | """
  6 | 
  7 | import os
  8 | 
  9 | import pinecone as oem_pinecone
 10 | import pytest  # pylint: disable=unused-import
 11 | from pinecone import Pinecone
 12 | 
 13 | from models.conf import settings
 14 | from models.pinecone import PineconeIndex
 15 | 
 16 | 
 17 | class TestPinecone:
 18 |     """Test HybridSearchRetriever class."""
 19 | 
 20 |     def test_01_can_instantiate(self):
 21 |         """Ensure that we instantiate the object."""
 22 |         # pylint: disable=broad-except
 23 |         try:
 24 |             PineconeIndex()
 25 |         except Exception as e:
 26 |             assert False, f"Pinecone() failed with exception: {e}"
 27 | 
 28 |     def test_02_init(self):
 29 |         """Ensure that we can initialize Pinecone."""
 30 |         pinecone = PineconeIndex()
 31 |         # pylint: disable=broad-except
 32 |         try:
 33 |             pinecone.init()
 34 |         except Exception as e:
 35 |             assert False, f"Pinecone.init() failed with exception: {e}"
 36 | 
 37 |     def test_03_index(self):
 38 |         """Test that the index name is correct."""
 39 |         pinecone = PineconeIndex()
 40 |         assert pinecone.index_name == settings.pinecone_index_name
 41 | 
 42 |     def test_04_initialize(self):
 43 |         """Test that the index initializes."""
 44 |         pinecone = PineconeIndex()
 45 |         # pylint: disable=broad-except
 46 |         try:
 47 |             pinecone.initialize()
 48 |         except Exception as e:
 49 |             assert False, f"Pinecone.initialize() failed with exception: {e}"
 50 |         assert isinstance(pinecone.index, oem_pinecone.Index)
 51 | 
 52 |     def test_05_delete(self):
 53 |         """Test that the index can be deleted."""
 54 |         pinecone_index = PineconeIndex()
 55 | 
 56 |         # pylint: disable=E1101
 57 |         api_key = settings.pinecone_api_key.get_secret_value()
 58 |         pinecone = Pinecone(api_key=api_key)
 59 |         indexes = pinecone.list_indexes().names()
 60 |         assert pinecone_index.index_name in indexes
 61 |         # pylint: disable=broad-except
 62 |         try:
 63 |             pinecone_index.delete()
 64 |         except Exception as e:
 65 |             assert False, f"Pinecone.delete() failed with exception: {e}"
 66 | 
 67 |     def test_06_create(self):
 68 |         """Test that the index can be created."""
 69 |         pinecone_index = PineconeIndex()
 70 | 
 71 |         # pylint: disable=E1101
 72 |         api_key = settings.pinecone_api_key.get_secret_value()
 73 |         pinecone = Pinecone(api_key=api_key)
 74 | 
 75 |         indexes = pinecone.list_indexes().names()
 76 |         if pinecone_index.index_name in indexes:
 77 |             pinecone_index.delete()
 78 | 
 79 |         # pylint: disable=broad-except
 80 |         try:
 81 |             pinecone_index.create()
 82 |         except Exception as e:
 83 |             assert False, f"Pinecone.create() failed with exception: {e}"
 84 |         assert isinstance(pinecone_index.index, oem_pinecone.Index)
 85 |         pinecone_index.delete()
 86 | 
 87 |     def test_07_load_pdf(self):
 88 |         """Test that we can load a PDF document to the index."""
 89 |         HERE = os.path.dirname(os.path.abspath(__file__))
 90 |         test_file = os.path.join(HERE, "mock_data", "test_load.pdf")
 91 | 
 92 |         if not os.path.exists(test_file):
 93 |             pytest.skip(f"File {test_file} does not exist")
 94 | 
 95 |         pinecone = PineconeIndex()
 96 |         # pylint: disable=broad-except
 97 |         try:
 98 |             pinecone.pdf_loader(filepath=test_file)
 99 |         except Exception as e:
100 |             assert False, f"Pinecone.load_pdf() failed with exception: {e}"
101 |         pinecone.delete()
102 | 


--------------------------------------------------------------------------------
/models/tests/test_prompt_templates.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # flake8: noqa: F401
 3 | # pylint: disable=too-few-public-methods
 4 | """
 5 | Test integrity of base class.
 6 | """
 7 | import pytest  # pylint: disable=unused-import
 8 | from langchain.prompts import PromptTemplate
 9 | 
10 | from models.prompt_templates import NetecPromptTemplates
11 | 
12 | 
13 | class TestPromptTemplates:
14 |     """Test HybridSearchRetriever class."""
15 | 
16 |     def test_01_prompt_with_template(self):
17 |         """Ensure that all properties of the template class are PromptTemplate instances."""
18 |         templates = NetecPromptTemplates()
19 |         for prop_name in templates.get_properties():
20 |             prop = getattr(templates, prop_name)
21 |             assert isinstance(prop, PromptTemplate)
22 | 


--------------------------------------------------------------------------------
/models/tests/test_prompts.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # flake8: noqa: F401
 3 | """
 4 | Test integrity of base class.
 5 | """
 6 | import pytest  # pylint: disable=unused-import
 7 | 
 8 | from models.hybrid_search_retreiver import HybridSearchRetriever
 9 | from models.prompt_templates import NetecPromptTemplates
10 | 
11 | 
12 | class TestPrompts:
13 |     """Test HybridSearchRetriever class."""
14 | 
15 |     hsr = HybridSearchRetriever()
16 |     templates = NetecPromptTemplates()
17 | 
18 |     def test_oracle_training_services(self):
19 |         """Test a prompt with the Oracle training services template"""
20 | 
21 |         prompt = self.templates.oracle_training_services
22 |         result = self.hsr.prompt_with_template(prompt=prompt, concept="Oracle database administrator")
23 |         assert result
24 |         assert "Oracle" in result
25 |         assert "training" in result
26 | 
27 |     def test_training_services(self):
28 |         """Test a prompt with the training services template"""
29 | 
30 |         prompt = self.templates.training_services
31 |         result = self.hsr.prompt_with_template(prompt=prompt, concept="Microsoft certified Azure AI engineer associate")
32 |         assert result
33 |         assert "Microsoft" in result or "Azure" in result
34 |         assert "training" in result
35 | 


--------------------------------------------------------------------------------
/models/yt.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # pylint: disable=E0611
  3 | """
  4 | LangChain Quickstart
  5 | ~~~~~~~~~~~~~~~~~~~~
  6 | LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners
  7 | 
  8 | see: https://www.youtube.com/watch?v=aywZrzNaKjs
  9 |      https://github.com/rabbitmetrics/langchain-13-min
 10 | """
 11 | import logging
 12 | import os
 13 | 
 14 | import pinecone
 15 | from dotenv import find_dotenv, load_dotenv
 16 | 
 17 | # 5.) sequential chains
 18 | # 4.) chains
 19 | from langchain.chains.llm import LLMChain
 20 | from langchain.chains.sequential import SimpleSequentialChain
 21 | 
 22 | # 3.) prompt templates
 23 | from langchain.prompts import PromptTemplate
 24 | 
 25 | # 2.) models and messages
 26 | from langchain.schema import HumanMessage, SystemMessage  # AIMessage (not used)
 27 | 
 28 | # 6.) embeddings
 29 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 30 | 
 31 | # 1.) wrappers
 32 | from langchain_community.llms.openai import OpenAI
 33 | 
 34 | # 8.) LangChain agents
 35 | from langchain_experimental.agents.agent_toolkits.python.base import create_python_agent
 36 | from langchain_experimental.utilities.python import PythonREPL
 37 | 
 38 | # from langchain_community.chat_models import ChatOpenAI
 39 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 40 | 
 41 | # 7.) pinecode client
 42 | from langchain_pinecone import PineconeVectorStore as Pinecone
 43 | 
 44 | from models.conf import settings
 45 | 
 46 | 
 47 | logging.basicConfig(level=logging.DEBUG if settings.debug_mode else logging.INFO)
 48 | logger = logging.getLogger(__name__)
 49 | 
 50 | # Load environment variables from .env file in all folders
 51 | # pylint: disable=duplicate-code
 52 | dotenv_path = find_dotenv()
 53 | if os.path.exists(dotenv_path):
 54 |     load_dotenv(dotenv_path=dotenv_path, verbose=True)
 55 |     OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
 56 |     OPENAI_API_ORGANIZATION = os.environ["OPENAI_API_ORGANIZATION"]
 57 | else:
 58 |     raise FileNotFoundError("No .env file found in root directory of repository")
 59 | 
 60 | 
 61 | class LangChainDev:
 62 |     """LangChain Quickstart"""
 63 | 
 64 |     PINECONE_INDEX_NAME = "langchain-quickstart"
 65 | 
 66 |     multi_prompt_explanation = None
 67 |     texts_splitter_results = None
 68 |     pinecone_search = None
 69 |     openai_embedding = OpenAIEmbeddings(model_name="ada")  # minute: 10:05
 70 |     query_result = None
 71 |     agent_executor = create_python_agent(  # minute: 11:45
 72 |         llm=OpenAI(temperature=0, max_tokens=1000),
 73 |         tool=PythonREPL(),
 74 |         verbose=True,
 75 |     )
 76 |     # pylint: disable=no-member
 77 |     pinecone.init(
 78 |         api_key=settings.pinecone_api_key.get_secret_value(), environment=settings.pinecone_environment
 79 |     )  # minute 10:43
 80 | 
 81 |     # LLM wrappers. minute 5:46
 82 |     def test_01_basic(self):
 83 |         """Test a basic request"""
 84 | 
 85 |         llm = OpenAI(model_name="gpt-4")
 86 |         retval = llm("explain large language models in one sentence")
 87 |         print(retval)
 88 | 
 89 |     # 2.) models and messages. minute 6:08
 90 |     def test_02_chat_model(self):
 91 |         """Test a chat model"""
 92 |         chat = ChatOpenAI(model_name="gpt-4", temperature=0.3)
 93 |         messages = [
 94 |             SystemMessage(content="You are an expert data scientist"),
 95 |             HumanMessage(content="Write a Python script that trains a neural network on simulated data"),
 96 |         ]
 97 |         retval = chat(messages)
 98 |         print(retval.content, end="\n")
 99 | 
100 |     # 3.) prompt templates. minute 6:56
101 |     def get_prompt(self):
102 |         """Get a prompt"""
103 |         template = """
104 |         You are an expert data scientist with an expertise in building deep learning models.
105 |         Explain the concept of {concept} in a couple of lines.
106 |         """
107 |         prompt = PromptTemplate(input_variables=["concept"], template=template)
108 |         return prompt
109 | 
110 |     def test_03_prompt_templates(self):
111 |         """Test prompt templates"""
112 |         llm = OpenAI(model_name="gpt-4")
113 |         prompt = self.get_prompt()
114 |         retval = llm(prompt.format(concept="regularization"))
115 |         print(retval)
116 | 
117 |     # 4.) chains. minute 7:45
118 |     def get_chain(self, llm, prompt):
119 |         """Get a chain"""
120 |         chain = LLMChain(llm=llm, prompt=prompt)
121 |         return chain
122 | 
123 |     def test_04_chain(self):
124 |         """Test a chain"""
125 |         llm = OpenAI(model_name="gpt-4")
126 |         prompt = self.get_prompt()
127 |         chain = self.get_chain(llm=llm, prompt=prompt)
128 |         print(chain.run("autoencoder"))
129 | 
130 |     # 5.) sequential chains. minute 8:06
131 |     def get_overall_chain(self, chains):
132 |         """Get an overall chain"""
133 |         return SimpleSequentialChain(chains=chains, verbose=True)
134 | 
135 |     def get_prompt_two(self):
136 |         """Get a second prompt"""
137 |         second_prompt = PromptTemplate(
138 |             input_variables=["ml_concept"],
139 |             template="""
140 |             Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words.
141 |             """,
142 |         )
143 |         return second_prompt
144 | 
145 |     def get_explanation(self):
146 |         """Get an explanation"""
147 |         llm = OpenAI(model_name="gpt-4")
148 |         prompt = self.get_prompt()
149 |         chain_one = self.get_chain(llm=llm, prompt=prompt)
150 | 
151 |         second_prompt = self.get_prompt_two()
152 |         chain_two = self.get_chain(llm=llm, prompt=second_prompt)
153 |         overall_chain = self.get_overall_chain(chains=[chain_one, chain_two])
154 |         return overall_chain.run("autoencoder")
155 | 
156 |     def test_05_chains(self):
157 |         """Test chains"""
158 |         self.multi_prompt_explanation = self.get_explanation()
159 |         print(self.multi_prompt_explanation)
160 | 
161 |     # 6.) embeddings. minute 9:00
162 |     def test_06_embeddings(self):
163 |         """Test embeddings"""
164 |         # minute 9:32
165 |         text_splitter = RecursiveCharacterTextSplitter(
166 |             chunk_size=100,
167 |             chunk_overlap=0,
168 |         )
169 |         self.multi_prompt_explanation = self.get_explanation()
170 |         if not self.texts_splitter_results:
171 |             self.texts_splitter_results = text_splitter.create_documents([self.multi_prompt_explanation])
172 |             print(self.texts_splitter_results[0].page_content)
173 | 
174 |     # minute 10:05
175 |     def test_06_embeddings_b(self):
176 |         """Test embeddings b"""
177 |         if not self.query_result:
178 |             self.query_result = self.openai_embedding.embed_query(  # minute 10:21
179 |                 self.texts_splitter_results[0].page_content
180 |             )
181 |             print(self.query_result)
182 | 
183 |         # 7.) pinecone client. minute 11:00
184 |         self.pinecone_search = Pinecone.from_documents(
185 |             documents=self.texts_splitter_results,
186 |             embedding=self.openai_embedding,
187 |             index_name=self.PINECONE_INDEX_NAME,
188 |         )
189 | 
190 |     # pinecone (continued). minute 11:12
191 |     def test_07_pinecone_search(self):
192 |         """Test pinecone search"""
193 |         query = "What is magical about an autoencoder?"
194 |         result = self.pinecone_search.similarity_search(query)
195 |         print(result)
196 | 
197 |     # 8.) LangChain agents. minute 11:45
198 |     #     (unrelated.)
199 |     def test_08_agent_executor(self):
200 |         """Test agent executor"""
201 |         retval = self.agent_executor.run("Find the roots (zeros) of the quadratic function 3 * x**2 + 2*x -1")
202 |         print(retval)
203 | 
204 |     def main(self):
205 |         """Main function"""
206 |         # self.test_06_embeddings()
207 |         # self.test_06_embeddings_b()
208 |         # self.test_07_pinecone_search()
209 |         # self.test_08_agent_executor
210 |         self.test_03_prompt_templates()
211 | 
212 | 
213 | def main():
214 |     """Main function"""
215 |     pintcode_tests = LangChainDev()
216 |     pintcode_tests.main()
217 | 
218 | 
219 | if __name__ == "__main__":
220 |     main()
221 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "private": true,
 3 |   "scripts": {
 4 |     "test": "echo \"Error: no test specified\" && exit 1",
 5 |     "prettier": "prettier --write \"**/*.{js,jsx,ts,tsx,json,css,scss,md}\""
 6 |   },
 7 |   "devDependencies": {
 8 |     "@semantic-release/changelog": "^6.0.3",
 9 |     "@semantic-release/commit-analyzer": "^13.0.0",
10 |     "@semantic-release/git": "^10.0.1",
11 |     "@semantic-release/github": "^11.0.0",
12 |     "@semantic-release/release-notes-generator": "^14.0.0",
13 |     "prettier": "^3.1.1",
14 |     "typescript": "^5.2.2"
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "wheel"]
 3 | 
 4 | [tool.isort]
 5 | profile = "black"
 6 | lines_after_imports = 2
 7 | 
 8 | [tool.black]
 9 | line-length = 120
10 | target-version = ['py311']
11 | include = '\.pyi?$'
12 | exclude = '''
13 | /(
14 |     \.git
15 |   | \.hg
16 |   | \.mypy_cache
17 |   | \.tox
18 |   | \.venv
19 |   | venv
20 |   | node_modules
21 |   | build
22 |   | buck-out
23 |   | build
24 |   | dist
25 | )/
26 | '''
27 | 
28 | [tool.codespell]
29 | skip = '*.svg,models/prompt_templates.py'
30 | ignore-words = 'codespell.txt'
31 | 


--------------------------------------------------------------------------------
/release.config.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   dryRun: false,
 3 |   plugins: [
 4 |     "@semantic-release/commit-analyzer",
 5 |     "@semantic-release/release-notes-generator",
 6 |     [
 7 |       "@semantic-release/changelog",
 8 |       {
 9 |         changelogFile: "CHANGELOG.md",
10 |       },
11 |     ],
12 |     "@semantic-release/github",
13 |     [
14 |       "@semantic-release/git",
15 |       {
16 |         assets: ["CHANGELOG.md", "requirements/base.txt"],
17 |         message:
18 |           "chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}",
19 |       },
20 |     ],
21 |   ],
22 | };
23 | 


--------------------------------------------------------------------------------
/requirements/base.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | python-decouple==3.8
 3 | langchainhub==0.1.21
 4 | langchain-openai==0.3.18
 5 | langchain-experimental
 6 | openai>=1.40.0
 7 | langchain
 8 | langchain-pinecone
 9 | langchain-experimental
10 | pinecone-client==5.0.1
11 | pinecone-text==0.10.0
12 | pydantic==2.10.4
13 | pydantic-settings==2.9.1
14 | python-dotenv==1.1.0
15 | pypdf==5.6.0
16 | tiktoken==0.9.0
17 | 


--------------------------------------------------------------------------------
/requirements/local.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | -r base.txt
 3 | 
 4 | # dev and test
 5 | # ------------
 6 | pytest==8.3.4
 7 | pytest_mock==3.14.0
 8 | 
 9 | # Code linters, formatters, and security scanners
10 | # ------------
11 | black==25.1.0
12 | flake8==7.2.0
13 | flake8-coding==1.3.2
14 | pre-commit==4.0.1
15 | isort==6.0.1
16 | mypy==1.16.0
17 | pylint==3.3.7
18 | bandit==1.8.3
19 | pydocstringformatter==0.7.3
20 | tox==4.25.0
21 | codespell==2.4.1
22 | 


--------------------------------------------------------------------------------
/run_pylint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Called from pre-commit. Run pylint on all python files in the current directory
3 | python -m pylint "$@"
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Future use: setup for openai_embeddings package. I use this for instructional purposes,
 4 | for demonstrating best practices on how to create a Python package.
 5 | 
 6 | This package is not actually published to PyPi.
 7 | """
 8 | import io
 9 | import os
10 | from typing import List
11 | 
12 | from setuptools import find_packages, setup
13 | 
14 | from setup_utils import get_semantic_version  # pylint: disable=import-error
15 | 
16 | 
17 | HERE = os.path.abspath(os.path.dirname(__file__))
18 | 
19 | 
20 | def is_requirement(line: str) -> bool:
21 |     """
22 |     True if line is a valid requirement line from a
23 |     Python requirements file.
24 |     """
25 |     return not (line.strip() == "" or line.startswith("#"))
26 | 
27 | 
28 | def load_requirements(filename: str) -> List[str]:
29 |     """
30 |     Returns Python package requirements as a list of semantically
31 |     versioned pip packages.
32 | 
33 |     Args:
34 |         filename: The name of the requirements file to load. example: "base.txt"
35 | 
36 |     Returns:
37 |         A list of package requirements.
38 |         ['pytest==8.3.4', 'pytest_mock==3.14.0', 'black==25.1.0', ... more packages ]
39 |     """
40 |     with io.open(os.path.join(HERE, "requirements", filename), "rt", encoding="utf-8") as f:
41 |         return [line.strip() for line in f if is_requirement(line) and not line.startswith("-r")]
42 | 
43 | 
44 | setup(
45 |     name="openai_embeddings",
46 |     version=get_semantic_version(),
47 |     description="""A Hybrid Search and Augmented Generation prompting solution using
48 |     Python [OpenAI](https://openai.com/) embeddings sourced from
49 |     [Pinecone](https://docs.pinecone.io/docs/python-client) vector database indexes and
50 |     managed by [LangChain](https://www.langchain.com/).""",
51 |     author="Lawrence McDaniel",
52 |     author_email="lpm0073@gmail.com",
53 |     url="https://lawrencemcdaniel.com/",
54 |     packages=find_packages(),
55 |     package_data={
56 |         "openai_embeddings": ["*.md"],
57 |     },
58 |     install_requires=load_requirements("base.txt"),
59 |     extras_require={
60 |         "dev": load_requirements("local.txt"),
61 |     },
62 | )
63 | 


--------------------------------------------------------------------------------
/setup_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Test setup.py."""
 3 | import subprocess
 4 | import unittest
 5 | 
 6 | 
 7 | class TestSetup(unittest.TestCase):
 8 |     """Test setup.py."""
 9 | 
10 |     def test_setup_syntax(self):
11 |         """Test setup.py syntax."""
12 |         result = subprocess.run(["python", "setup.py", "check"], capture_output=True, text=True, check=False)
13 |         assert result.returncode == 0, f"setup.py failed with output:\n{result.stdout}\n{result.stderr}"
14 |         assert not result.stderr, "Expected no error output"
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     unittest.main()
19 | 


--------------------------------------------------------------------------------
/setup_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # pylint: disable=duplicate-code
 3 | """Lawrence McDaniel https://lawrencemcdaniel.com."""
 4 | import importlib.util
 5 | import os
 6 | import re
 7 | from typing import Dict
 8 | 
 9 | 
10 | MODULE_NAME = "models"
11 | HERE = os.path.abspath(os.path.dirname(__file__))
12 | PROJECT_ROOT = os.path.abspath(os.path.join(HERE, MODULE_NAME))
13 | 
14 | # allow setup.py to be run from any path
15 | os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir)))
16 | 
17 | 
18 | def load_version() -> Dict[str, str]:
19 |     """Stringify the __version__ module."""
20 |     version_file_path = os.path.join(PROJECT_ROOT, "__version__.py")
21 |     spec = importlib.util.spec_from_file_location("__version__", version_file_path)
22 |     version_module = importlib.util.module_from_spec(spec)
23 |     spec.loader.exec_module(version_module)
24 |     return version_module.__dict__
25 | 
26 | 
27 | VERSION = load_version()
28 | 
29 | 
30 | def get_semantic_version() -> str:
31 |     """
32 |     Return the semantic version number.
33 | 
34 |     Example valid values of __version__.py are:
35 |     0.1.17
36 |     0.1.17-next.1
37 |     0.1.17-next.2
38 |     0.1.17-next.123456
39 |     0.1.17-next-major.1
40 |     0.1.17-next-major.2
41 |     0.1.17-next-major.123456
42 | 
43 |     Note:
44 |     - pypi does not allow semantic version numbers to contain a dash.
45 |     - pypi does not allow semantic version numbers to contain a 'v' prefix.
46 |     - pypi does not allow semantic version numbers to contain a 'next' suffix.
47 |     """
48 |     version = VERSION["__version__"]
49 |     version = re.sub(r"-next\.\d+", "", version)
50 |     return re.sub(r"-next-major\.\d+", "", version)
51 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # setup a basic tox environment for flake8 with the default python3.11
 2 | # environment
 3 | [tox]
 4 | envlist = py3.11,flake8
 5 | skip_missing_interpreters = true
 6 | 
 7 | [tool.isort]
 8 | profile = "black"
 9 | skip =venv,node_modules
10 | 
11 | [gh-actions]
12 | python =
13 |     3.8: gitlint,py38,flake8
14 |     3.9: gitlint,py39,flake8
15 |     3.10: gitlint,py310,flake8
16 |     3.11: gitlint,py311,flake8,mypy,black,pylint
17 |     3.12: gitlint,py311,flake8,mypy,black,pylint
18 | 
19 | [testenv]
20 | deps = -rrequirements.txt
21 | commands = pytest
22 | 
23 | [testenv:flake8]
24 | skip_install = True
25 | deps = flake8
26 | commands = flake8
27 | 
28 | [testenv:gitlint]
29 | skip_install = True
30 | deps = gitlint
31 | commands = gitlint {posargs}
32 | 
33 | [testenv:bumpversion]
34 | skip_install = True
35 | passenv =
36 |   # Git can only find its global configuration if it knows where the
37 |   # user's HOME is.
38 |   HOME
39 |   # We set sign_tags in .bumpversion.cfg, so pass in the GnuPG agent
40 |   # reference to avoid having to retype the passphrase for an
41 |   # already-cached private key.
42 |   GPG_AGENT_INFO
43 | deps = bump2version
44 | commands = bump2version {posargs}
45 | 
46 | [testenv:pylint]
47 | deps = pylint
48 | commands =
49 |     pylint .
50 | 


--------------------------------------------------------------------------------