├── .editorconfig
├── .flake8
├── .gitattributes
├── .github
├── CONTRIBUTING.md
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── config.yml
│ └── feature_request.md
├── PULL_REQUEST_TEMPLATE.md
├── actions
│ ├── merge-branch
│ │ └── action.yml
│ └── tests
│ │ └── python
│ │ └── action.yml
├── dependabot.yml
└── workflows
│ ├── auto-assign.yml
│ ├── precommitVersionBumps.yml
│ ├── pullRequestController.yml
│ ├── pushMain.yml
│ ├── semanticVersionBump.yml
│ └── testsPython.yml
├── .gitignore
├── .mergify.yml
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc
├── .pylintrc
├── .vscode
├── extensions.json
└── settings.json
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── codespell.txt
├── commitlint.config.js
├── models
├── __init__.py
├── __version__.py
├── conf.py
├── const.py
├── examples
│ ├── __init__.py
│ ├── certification_programs.py
│ ├── load.py
│ ├── online_courses.py
│ ├── pinecone_init.py
│ ├── prompt.py
│ └── rag.py
├── exceptions.py
├── hybrid_search_retreiver.py
├── pinecone.py
├── prompt_templates.py
├── tests
│ ├── __init__.py
│ ├── mock_data
│ │ ├── .env.test_01
│ │ ├── .env.test_illegal_nulls
│ │ ├── .env.test_legal_nulls
│ │ └── test_load.pdf
│ ├── test_configuration.py
│ ├── test_examples.py
│ ├── test_hsr.py
│ ├── test_openai.py
│ ├── test_pinecone.py
│ ├── test_prompt_templates.py
│ └── test_prompts.py
└── yt.py
├── package.json
├── pyproject.toml
├── release.config.js
├── requirements
├── base.txt
└── local.txt
├── run_pylint.sh
├── setup.py
├── setup_test.py
├── setup_utils.py
└── tox.ini
/.editorconfig:
--------------------------------------------------------------------------------
1 | # see http://editorconfig.org
2 | root = true
3 |
4 | [*]
5 | end_of_line = lf
6 | trim_trailing_whitespace = true
7 | insert_final_newline = true
8 | indent_style = space
9 | indent_size = 2
10 | charset = utf-8
11 | tab_width = 4
12 |
13 | [*.md]
14 | trim_trailing_whitespace = false
15 |
16 | [*.py]
17 | indent_size = 4
18 |
19 | [go.mod]
20 | indent_style = tab
21 | indent_size = 1
22 |
23 | [*.go]
24 | indent_style = tab
25 | indent_size = 1
26 |
27 | [Makefile]
28 | indent_style = tab
29 | indent_size = 1
30 |
31 | [Makefile.*]
32 | indent_style = tab
33 | indent_size = 1
34 |
35 | [LICENSE]
36 | indent_size = none
37 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore=D205,D413,D400,D401
3 | max-line-length=120
4 | max-complexity=10
5 | exclude=venv
6 | extend-exclude="*__init__.py,*__version__.py,venv"
7 | select="C101"
8 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | # * text eol=lf
3 |
--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | The repository is released under the GNU AFFERO GENERAL PUBLIC LICENSE license, and follows a standard Github development process, using Github tracker for issues and merging pull requests into master.
4 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: FullStackWithLawrence
4 | patreon: FullStackWithLawrence
5 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | ---
5 |
6 | **Describe the bug**
7 | A clear and concise description of what the bug is.
8 |
9 | **Workflow**
10 | If applicable, provide a workflow file to help explain your problem.
11 |
12 | **Expected behavior**
13 | A clear and concise description of what you expected to happen.
14 |
15 | **Additional context**
16 | Add any other context about the problem here.
17 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | ---
5 |
6 | **Is your feature request related to a problem? Please describe.**
7 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
8 |
9 | **Describe the solution you'd like**
10 | A clear and concise description of what you want to happen.
11 |
12 | **Describe alternatives you've considered**
13 | A clear and concise description of any alternative solutions or features you've considered.
14 |
15 | **Additional context**
16 | Add any other context or screenshots about the feature request here.
17 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # Pull Request Template
2 |
3 | ## Type of Change
4 |
5 |
6 |
7 | - [ ] New feature
8 | - [ ] Bug fix
9 | - [ ] Documentation
10 | - [ ] Refactor
11 | - [ ] Chore
12 |
13 | ## Resolves
14 |
15 | - Fixes #[Add issue number here.]
16 |
17 | ## Changes
18 |
19 |
20 |
21 | _Describe what this Pull Request does_
22 |
23 | ## Testing
24 |
25 |
26 |
27 | _Describe the testing that has been done or needs to be done_
28 |
29 | ## Screenshots
30 |
31 |
32 |
33 | _Add any relevant screenshots_
34 |
35 | ## Dependencies
36 |
37 |
38 |
39 | _List dependencies_
40 |
41 | ## Breaking Changes
42 |
43 |
44 |
45 | _Describe any breaking changes_
46 |
--------------------------------------------------------------------------------
/.github/actions/merge-branch/action.yml:
--------------------------------------------------------------------------------
1 | ---
2 | #------------------------------------------------------------------------------
3 | # Run pre-commit
4 | #------------------------------------------------------------------------------
5 | name: Merge
6 | branding:
7 | icon: "git-pull-request"
8 | color: "orange"
9 | inputs:
10 | github-token:
11 | description: "The GitHub token to use for authentication"
12 | required: true
13 | type: string
14 | source-branch:
15 | description: "The branch to merge from"
16 | required: false
17 | type: string
18 | default: "main"
19 | target-branch:
20 | description: "The branch to merge to"
21 | required: true
22 | type: string
23 |
24 | python-version:
25 | description: "The version of Python to use, such as 3.12"
26 | required: true
27 | type: string
28 |
29 | runs:
30 | using: "composite"
31 | steps:
32 | - name: Checkout code
33 | id: checkout
34 | uses: actions/checkout@v4
35 | with:
36 | fetch-depth: 0
37 | persist-credentials: false
38 |
39 | - name: Remember current branch
40 | shell: bash
41 | run: |
42 | echo "CURRENT_BRANCH=$(git branch --show-current)" >> $GITHUB_ENV
43 |
44 | - name: Merge
45 | id: merge
46 | shell: bash
47 | run: |
48 | git config --local user.email "action@github.com"
49 | git config --local user.name "GitHub Action"
50 | git checkout ${{ inputs.source-branch }}
51 | git pull
52 | git checkout ${{ inputs.target-branch }}
53 | git merge -Xtheirs ${{ inputs.source-branch }}
54 | git push https://${{ inputs.github-token }}@github.com/${{ github.repository }}.git HEAD:${{ inputs.target-branch }}
55 |
56 | - name: Checkout current branch
57 | shell: bash
58 | run: |
59 | git checkout ${{ env.CURRENT_BRANCH }}
60 |
--------------------------------------------------------------------------------
/.github/actions/tests/python/action.yml:
--------------------------------------------------------------------------------
1 | ---
2 | #------------------------------------------------------------------------------
3 | # Run Python unit tests
4 | #------------------------------------------------------------------------------
5 | name: Test Python
6 | branding:
7 | icon: "git-pull-request"
8 | color: "orange"
9 | inputs:
10 | python-version:
11 | description: "The version of Python to use, such as 3.12"
12 | required: true
13 | type: string
14 | openai-api-organization:
15 | description: "The OpenAI API organization"
16 | required: true
17 | type: string
18 | openai-api-key:
19 | description: "The OpenAI API key"
20 | required: true
21 | type: string
22 | pinecone-api-key:
23 | description: "The Pinecone API key"
24 | required: true
25 | type: string
26 | pinecone-environment:
27 | description: "The Pinecone environment"
28 | required: true
29 | type: string
30 |
31 | runs:
32 | using: "composite"
33 | steps:
34 | - name: Checkout code
35 | id: checkout
36 | uses: actions/checkout@v4
37 |
38 | - name: Cache Python dependencies
39 | uses: actions/cache@v3
40 | with:
41 | path: ~/.cache/pip
42 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements/local.txt') }}
43 | restore-keys: |
44 | ${{ runner.os }}-pip
45 |
46 | - name: Set up Python
47 | uses: actions/setup-python@v4
48 | with:
49 | python-version: ${{ inputs.python-version }}
50 |
51 | - name: locate site-packages path
52 | shell: bash
53 | run: |
54 | echo "SITE_PACKAGES_PATH=$(python -c 'import site; print(site.getsitepackages()[0])')" >> $GITHUB_ENV
55 |
56 | - name: Install pip
57 | shell: bash
58 | run: |
59 | python -m pip install --upgrade pip
60 |
61 | - name: Install dependencies
62 | shell: bash
63 | run: |
64 | pip install -r ./requirements/local.txt
65 | env:
66 | SITE_PACKAGES_PATH: ${{ env.SITE_PACKAGES_PATH }}
67 |
68 | - name: Create .env
69 | shell: bash
70 | run: |
71 | touch ./.env
72 | echo "OPENAI_API_ORGANIZATION=${{ env.OPENAI_API_ORGANIZATION }}" >> ./.env
73 | echo "OPENAI_API_KEY=${{ env.OPENAI_API_KEY }}" >> ./.env
74 | echo "PINECONE_API_KEY=${{ env.PINECONE_API_KEY }}" >> ./.env
75 | echo "PINECONE_ENVIRONMENT=${{ env.PINECONE_ENVIRONMENT }}" >> ./.env
76 | echo "DEBUG_MODE=False" >> ./.env
77 | env:
78 | OPENAI_API_ORGANIZATION: ${{ inputs.openai-api-organization }}
79 | OPENAI_API_KEY: ${{ inputs.openai-api-key }}
80 | PINECONE_API_KEY: ${{ inputs.pinecone-api-key }}
81 | PINECONE_ENVIRONMENT: ${{ inputs.pinecone-environment }}
82 |
83 | - name: Run Python unit tests
84 | shell: bash
85 | run: |
86 | make test
87 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: "monthly"
7 | assignees:
8 | - "lpm0073"
9 | reviewers:
10 | - "lpm0073"
11 | - package-ecosystem: "npm"
12 | directory: "/"
13 | schedule:
14 | interval: "monthly"
15 | labels:
16 | - "dependencies"
17 | - "javascript"
18 | assignees:
19 | - "FullStackWithLawrence"
20 | reviewers:
21 | - "FullStackWithLawrence"
22 | - package-ecosystem: "pip"
23 | directory: "/"
24 | schedule:
25 | interval: "monthly"
26 | labels:
27 | - "dependencies"
28 | - "python"
29 | assignees:
30 | - "lpm0073"
31 | reviewers:
32 | - "lpm0073"
33 |
--------------------------------------------------------------------------------
/.github/workflows/auto-assign.yml:
--------------------------------------------------------------------------------
1 | name: Auto Assign
2 | on:
3 | issues:
4 | types: [opened]
5 | pull_request:
6 | types: [opened]
7 | jobs:
8 | run:
9 | runs-on: ubuntu-latest
10 | permissions:
11 | issues: write
12 | pull-requests: write
13 | steps:
14 | - name: "Auto-assign issue"
15 | uses: pozil/auto-assign-issue@v2
16 | with:
17 | repo-token: ${{ secrets.GITHUB_TOKEN }}
18 | assignees: lpm0073
19 | numOfAssignee: 1
20 |
--------------------------------------------------------------------------------
/.github/workflows/precommitVersionBumps.yml:
--------------------------------------------------------------------------------
1 | ---
2 | #------------------------------------------------------------------------------
3 | # Lawrence McDaniel - https://lawrencemcdaniel.com
4 | # Version Bump Workflow for .pre-commit-config.yaml
5 | #
6 | # This workflow runs on a cron schedule and checks for updates to the
7 | # .pre-commit-config.yaml file. If updates are found, the workflow
8 | # commits the changes to the next branch and pushes the changes to GitHub.
9 | #
10 | # This is a workaround for the fact that the pre-commit autoupdate command
11 | # is not supported by Dependabot.
12 | #------------------------------------------------------------------------------
13 | name: pre-commit Version Bumps
14 |
15 | on:
16 | schedule:
17 | - cron: "0 0 * * 3"
18 | workflow_dispatch:
19 |
20 | jobs:
21 | evaluate_precommit_config:
22 | runs-on: ubuntu-latest
23 |
24 | steps:
25 | - uses: actions/checkout@v4
26 | with:
27 | persist-credentials: false
28 |
29 | - name: Checkout next branch
30 | run: |
31 | git fetch
32 | git checkout next
33 | git pull origin next
34 |
35 | - name: Cache NPM dependencies
36 | uses: actions/cache@v4
37 | with:
38 | path: ~/.npm
39 | key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
40 | restore-keys: |
41 | ${{ runner.os }}-node
42 |
43 | - name: Cache Python dependencies
44 | uses: actions/cache@v4
45 | with:
46 | path: ~/.cache/pip
47 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements/local.txt') }}
48 | restore-keys: |
49 | ${{ runner.os }}-pip
50 |
51 | - name: Set up Python
52 | uses: actions/setup-python@v5
53 | with:
54 | python-version: "3.12"
55 |
56 | - name: locate site-packages path
57 | shell: bash
58 | run: |
59 | echo "SITE_PACKAGES_PATH=$(python -c 'import site; print(site.getsitepackages()[0])')" >> $GITHUB_ENV
60 |
61 | - name: Install pip
62 | shell: bash
63 | run: |
64 | python -m pip install --upgrade pip
65 |
66 | - name: Install dependencies
67 | shell: bash
68 | run: |
69 | pip install -r ./requirements/local.txt
70 | env:
71 | SITE_PACKAGES_PATH: ${{ env.SITE_PACKAGES_PATH }}
72 |
73 | - name: Setup Node.js environment
74 | uses: actions/setup-node@v4
75 | with:
76 | node-version: "20.9.0"
77 |
78 | - name: Install npm dev dependencies
79 | run: npm install
80 |
81 | - name: Update .pre-commit-config.yaml
82 | run: |
83 | pre-commit autoupdate
84 |
85 | - name: Check for unstaged changes
86 | id: check_changes
87 | run: |
88 | if [[ -n "$(git status --porcelain .pre-commit-config.yaml)" ]]; then
89 | echo "::set-output name=changes::true"
90 | else
91 | echo "::set-output name=changes::false"
92 | fi
93 |
94 | - name: Commit and push changes
95 | if: steps.check_changes.outputs.changes == 'true'
96 | shell: bash
97 | run: |
98 | git config --local user.email "action@github.com"
99 | git config --local user.name "GitHub Action"
100 | git add .pre-commit-config.yaml
101 | git commit -m "chore: [gh] version bumps in .pre-commit-config.yaml [skip ci]"
102 | git push https://${{ secrets.PAT }}@github.com/${{ github.repository }}.git HEAD:next
103 |
--------------------------------------------------------------------------------
/.github/workflows/pullRequestController.yml:
--------------------------------------------------------------------------------
1 | ---
2 | #------------------------------------------------------------------------------
3 | # Pull Request Workflow Controller.
4 | #
5 | # Triggers:
6 | # - Called automatically on relevant actions performed on pull requests.
7 | # - Can also be run manually by clicking the "Run workflow" button.
8 | #
9 | # Actions:
10 | # - Use semantic release rules to determine if a new release will be published.
11 | # - run Python tests, but only if Python-related files have changed.
12 | # - run Terraform tests, but only if Terraform-related files have changed.
13 | # - run ReactJS tests, but only if ReactJS-related files have changed.
14 | # - run pre-commit hooks to ensure code is formatted correctly.
15 | #
16 | # To-Do:
17 | # If a new release is to be published then we want to consider running QA tests
18 | # to ensure formatting and documentation is correct.
19 | #------------------------------------------------------------------------------
20 | name: Pull Request Controller
21 |
22 | on:
23 | workflow_dispatch:
24 | # GitHub Copilot: The `pull_request` and `pull_request_target` are two different
25 | # event types in GitHub Actions that trigger workflows when activity related
26 | # to pull requests occurs.
27 | # - `pull_request`: This event triggers a workflow run whenever a pull
28 | # request is opened, synchronized, or closed. The workflow runs in the context of the
29 | # pull request, meaning it has access to the code and environment variables of the head
30 | # branch of the pull request. This is safe for pull requests within the same repository,
31 | # but for pull requests from a fork, this could potentially expose sensitive information.
32 | #
33 | # - `pull_request_target`: This event is similar to `pull_request`, but it runs in the context
34 | # of the base of the pull request, rather than the head. This means it has access to the code
35 | # and environment variables of the base branch, not the head branch. This is safer for
36 | # pull requests from forks, as it prevents the fork from accessing sensitive information
37 | # in the base repository. However, it means the workflow does not have access to the code
38 | # in the pull request by default. If you need to access the code in the pull request,
39 | # you can use the `actions/checkout` action with the `ref` input
40 | # set to `github.event.pull_request.head.ref`.
41 | #
42 | # In general, use `pull_request` for workflows that need to access the code in the pull request,
43 | # and `pull_request_target` for workflows that need to be safe for pull requests from forks.
44 | pull_request_target:
45 | types: [opened, synchronize]
46 | paths:
47 | - "**.py"
48 | - "./requirements"
49 | - "**.package.json"
50 | - "./models/**"
51 |
52 | env:
53 | python-version: "3.12"
54 |
55 | jobs:
56 | check_for_pending_release:
57 | name: test-semantic-release
58 | runs-on: ubuntu-latest
59 | steps:
60 | - name: Checkout
61 | uses: actions/checkout@v4
62 |
63 | - name: Semantic Release
64 | uses: cycjimmy/semantic-release-action@v4
65 | id: semantic
66 | with:
67 | dry_run: true
68 | branches: |
69 | [
70 | '+([0-9])?(.{+([0-9]),x}).x',
71 | 'main',
72 | 'next',
73 | 'next-major',
74 | {
75 | name: 'beta',
76 | prerelease: true
77 | },
78 | {
79 | name: 'alpha',
80 | prerelease: true
81 | }
82 | ]
83 | extra_plugins: |
84 | @semantic-release/git
85 | @semantic-release/changelog
86 | env:
87 | GITHUB_TOKEN: ${{ secrets.PAT }}
88 |
89 | - name: Test Outputs
90 | if: steps.semantic.outputs.new_release_published == 'true'
91 | run: |
92 | echo ${{ steps.semantic.outputs.new_release_version }}
93 | echo ${{ steps.semantic.outputs.new_release_major_version }}
94 | echo ${{ steps.semantic.outputs.new_release_minor_version }}
95 | echo ${{ steps.semantic.outputs.new_release_patch_version }}
96 |
--------------------------------------------------------------------------------
/.github/workflows/pushMain.yml:
--------------------------------------------------------------------------------
1 | ---
2 | #---------------------------------------------------------
3 | # - Create a semantical release
4 | # - Merge main into next, alpha, beta, and next-major
5 | #---------------------------------------------------------
6 | name: Push to main
7 |
8 | on:
9 | workflow_dispatch:
10 | push:
11 | branches:
12 | - main
13 | jobs:
14 | merge-main-to-dev-branches:
15 | runs-on: ubuntu-latest
16 | env:
17 | GITHUB_TOKEN: ${{ secrets.PAT }}
18 |
19 | steps:
20 | - name: Checkout code
21 | id: checkout
22 | uses: actions/checkout@v4
23 |
24 | - name: Merge main into next
25 | uses: ./.github/actions/merge-branch
26 | with:
27 | github-token: ${{ env.GITHUB_TOKEN }}
28 | source-branch: main
29 | target-branch: next
30 |
31 | - name: Merge main into next-major
32 | uses: ./.github/actions/merge-branch
33 | with:
34 | github-token: ${{ env.GITHUB_TOKEN }}
35 | source-branch: main
36 | target-branch: next-major
37 |
38 | - name: Merge main into alpha
39 | uses: ./.github/actions/merge-branch
40 | with:
41 | github-token: ${{ env.GITHUB_TOKEN }}
42 | source-branch: main
43 | target-branch: alpha
44 |
45 | - name: Merge main into beta
46 | uses: ./.github/actions/merge-branch
47 | with:
48 | github-token: ${{ env.GITHUB_TOKEN }}
49 | source-branch: main
50 | target-branch: beta
51 |
52 | semantic-release:
53 | needs: merge-main-to-dev-branches
54 | runs-on: ubuntu-latest
55 | env:
56 | GITHUB_TOKEN: ${{ secrets.PAT }}
57 |
58 | steps:
59 | - uses: actions/checkout@v4
60 | id: checkout
61 | with:
62 | persist-credentials: false
63 |
64 | - name: Semantic Release
65 | uses: cycjimmy/semantic-release-action@v4
66 | id: semantic
67 | with:
68 | branches: |
69 | [
70 | '+([0-9])?(.{+([0-9]),x}).x',
71 | 'main',
72 | 'next',
73 | 'next-major',
74 | {
75 | name: 'beta',
76 | prerelease: true
77 | },
78 | {
79 | name: 'alpha',
80 | prerelease: true
81 | }
82 | ]
83 | extra_plugins: |
84 | @semantic-release/git
85 | @semantic-release/changelog
86 | env:
87 | GIT_COMMITTER_NAME: github-actions[bot]
88 | GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com
89 | GIT_AUTHOR_NAME: github-actions[bot]
90 | GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com
91 |
92 | - name: Publish To GitHub Package Registry
93 | id: publish
94 | if: steps.semantic.outputs.new_release_published == 'true'
95 | run: echo "new release was published"
96 | shell: bash
97 |
98 | - name: Push updates to branch for major version
99 | id: push_major
100 | if: steps.semantic.outputs.new_release_published == 'true'
101 | run: "git push https://x-access-token:${{ env.GITHUB_TOKEN }}@github.com/${GITHUB_REPOSITORY}.git HEAD:refs/heads/v${{steps.semantic.outputs.new_release_major_version}}"
102 | shell: bash
103 |
--------------------------------------------------------------------------------
/.github/workflows/semanticVersionBump.yml:
--------------------------------------------------------------------------------
1 | ---
2 | #------------------------------------------------------------------------------
3 | # Lawrence McDaniel - https://lawrencemcdaniel.com
4 | # Version Bump Workflow for Python package.
5 | #
6 | # Calculate the version of the 'next' branch based on semantic-release rules.
7 | # Compares the existing value of __version__.py to the calculated value.
8 | # If they are different, it will update __version__.py and push the changes
9 | # to the main branch.
10 | #------------------------------------------------------------------------------
11 | name: Semantic Version Bump (next)
12 |
13 | on:
14 | workflow_dispatch:
15 | push:
16 | branches:
17 | - alpha
18 | - beta
19 | - next
20 | - next-major
21 |
22 | jobs:
23 | bump-version-next:
24 | runs-on: ubuntu-latest
25 | env:
26 | VERSION_FILE: __version__.py
27 | PACKAGE_PATH: ${{ github.workspace }}/models/
28 |
29 | steps:
30 | - uses: actions/checkout@v4
31 | with:
32 | persist-credentials: false
33 |
34 | - name: Cache NPM dependencies
35 | uses: actions/cache@v4
36 | with:
37 | path: ~/.npm
38 | key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
39 | restore-keys: |
40 | ${{ runner.os }}-node
41 |
42 | - name: Set up Python 3.12
43 | uses: actions/setup-python@v5
44 | with:
45 | python-version: "3.12"
46 |
47 | - name: Setup Node.js environment
48 | uses: actions/setup-node@v4
49 | with:
50 | node-version: "20.9.0"
51 |
52 | - name: Install npm dev dependencies
53 | run: npm install
54 |
55 | - name: Get current version
56 | # step 1
57 | # the current version persisted to __version__.py
58 | id: current_version
59 | run: |
60 | cd ${{ env.PACKAGE_PATH }}
61 | echo "CURRENT_VERSION=$(python -c 'from __version__ import __version__; print(__version__)')" >> $GITHUB_ENV
62 | env:
63 | GITHUB_TOKEN: ${{ secrets.PAT }}
64 |
65 | - name: null step
66 | id: null_step1
67 | run: echo "i ensure that CURRENT_VERSION is set."
68 |
69 | - name: Get next version
70 | # step 2
71 | # calculate the next version based on semantic-release rules
72 | # this will return a null string is there in fact is no version bump.
73 | # so set NEXT_VERSION to CURRENT_VERSION if there is no version bump.
74 | id: next_version
75 | run: |
76 | NEXT_VERSION=$(npx semantic-release --dry-run --no-ci | awk '/The next release version is/{print $NF}')
77 | echo "NEXT_VERSION=${NEXT_VERSION:-${{ env.CURRENT_VERSION }}}" >> $GITHUB_ENV
78 | env:
79 | GITHUB_TOKEN: ${{ secrets.PAT }}
80 | CURRENT_VERSION: ${{ env.CURRENT_VERSION }}
81 |
82 | - name: null step
83 | id: null_step2
84 | run: echo "i ensure that NEXT_VERSION is set."
85 |
86 | - name: Check versions
87 | # step 3
88 | # compare the current version to the next version.
89 | # if they are different, set VERSION_CHANGED to true
90 | id: check_versions
91 | run: |
92 | if [ "$CURRENT_VERSION" != "$NEXT_VERSION" ]; then
93 | echo "VERSION_CHANGED=true" >> $GITHUB_ENV
94 | else
95 | echo "VERSION_CHANGED=false" >> $GITHUB_ENV
96 | fi
97 | env:
98 | CURRENT_VERSION: ${{ env.CURRENT_VERSION }}
99 | NEXT_VERSION: ${{ env.NEXT_VERSION }}
100 |
101 | - name: another null step
102 | id: null_step3
103 | run: echo "i ensure that CURRENT_VERSION, NEXT_VERSION and VERSION_CHANGED are set."
104 |
105 | - name: Update __version__.py
106 | # step 4
107 | # if VERSION_CHANGED is true, update __version__.py and push the changes to the
108 | # branch that triggered this workflow.
109 | if: env.VERSION_CHANGED == 'true'
110 | id: update_version
111 | run: |
112 | echo "# -*- coding: utf-8 -*-" > ${{ env.VERSION_FILE }}
113 | echo "# DO NOT EDIT." > ${{ env.VERSION_FILE }}
114 | echo "# Managed via automated CI/CD in .github/workflows/semanticVersionBump.yml." > ${{ env.VERSION_FILE }}
115 | echo "__version__ = \"${{ env.NEXT_VERSION }}\"" >> ${{ env.VERSION_FILE }}
116 | git config --local user.email "action@github.com"
117 | git config --local user.name "GitHub Action"
118 | git add ${{ env.VERSION_FILE }}
119 | git commit -m "chore: [gh] Update __version__.py to ${{ env.NEXT_VERSION }} [skip ci]"
120 | git push https://${{ secrets.PAT }}@github.com/${{ github.repository }}.git HEAD:${{ github.ref }}
121 | env:
122 | VERSION_FILE: ${{ env.PACKAGE_PATH }}${{ env.VERSION_FILE }}
123 | GITHUB_TOKEN: ${{ secrets.PAT }}
124 | NEXT_VERSION: ${{ env.NEXT_VERSION }}
125 | VERSION_CHANGED: ${{ env.VERSION_CHANGED }}
126 |
--------------------------------------------------------------------------------
/.github/workflows/testsPython.yml:
--------------------------------------------------------------------------------
1 | name: Python Unit Tests
2 |
3 | on:
4 | workflow_dispatch:
5 | pull_request:
6 | paths:
7 | - "**.py"
8 | push:
9 | paths:
10 | - "**.py"
11 | branches:
12 | - main
13 |
14 | env:
15 | python-version: "3.12"
16 |
17 | jobs:
18 | python-unit-tests:
19 | runs-on: ubuntu-latest
20 | steps:
21 | - name: Checkout code
22 | id: checkout
23 | uses: actions/checkout@v4
24 |
25 | - name: Configure AWS credentials
26 | uses: aws-actions/configure-aws-credentials@v4
27 | with:
28 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
29 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
30 | aws-region: ${{ secrets.AWS_REGION }}
31 |
32 | - name: Run Python tests
33 | uses: ./.github/actions/tests/python
34 | with:
35 | python-version: "${{ env.python-version}}"
36 | openai-api-organization: "${{ secrets.OPENAI_API_ORGANIZATION }}"
37 | openai-api-key: "${{ secrets.OPENAI_API_KEY }}"
38 | pinecone-api-key: "${{ secrets.PINECONE_API_KEY }}"
39 | pinecone-environment: "${{ secrets.PINECONE_ENVIRONMENT }}"
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | .env
3 | data
4 | .DS_Store
5 | *.zip
6 |
7 | # Python
8 | build
9 | __pycache__
10 | .pytest_cache
11 | *.egg-info
12 | venv
13 | .venv
14 | .pytest_cache
15 | *.pyc
16 | *.pyo
17 | *.pyd
18 | *.swp
19 | *.log
20 |
21 | # npm
22 | node_modules
23 | package-lock.json
24 |
--------------------------------------------------------------------------------
/.mergify.yml:
--------------------------------------------------------------------------------
1 | # see:
2 | # - https://docs.mergify.com/getting-started/
3 | pull_request_rules:
4 | - name: automatic approve dependabot pull requests
5 | conditions:
6 | - "author~=dependabot[bot]|dependabot-preview[bot]|dependabot"
7 | actions:
8 | review:
9 | type: APPROVE
10 |
11 | - name: automatic merge dependabot pull requests
12 | conditions:
13 | - "author~=dependabot[bot]|dependabot-preview[bot]|dependabot"
14 | - "#approved-reviews-by>=1"
15 | - "base=main" # replace 'main' with the name of the branch you want to auto-merge into
16 | actions:
17 | merge:
18 | method: merge
19 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | default_language_version:
2 | # default language version for each language
3 | python: python3.12
4 | repos:
5 | - repo: https://github.com/codespell-project/codespell
6 | rev: v2.3.0
7 | hooks:
8 | - id: codespell
9 | args: ["--ignore-words=codespell.txt"]
10 | exclude: 'codespell.txt|\.svg$'
11 | - repo: https://github.com/pre-commit/mirrors-prettier
12 | rev: v4.0.0-alpha.8
13 | hooks:
14 | - id: prettier
15 | - repo: https://github.com/psf/black
16 | rev: 24.10.0
17 | hooks:
18 | - id: black
19 | - repo: https://github.com/PyCQA/flake8
20 | rev: 7.1.1
21 | hooks:
22 | - id: flake8
23 | - repo: https://github.com/PyCQA/isort
24 | rev: 5.13.2
25 | hooks:
26 | - id: isort
27 | args: ["--settings-path=pyproject.toml"]
28 | - repo: local
29 | hooks:
30 | - id: pylint
31 | name: pylint
32 | entry: ./run_pylint.sh
33 | language: script
34 | types: [python]
35 | - repo: https://github.com/PyCQA/bandit
36 | rev: 1.8.0
37 | hooks:
38 | - id: bandit
39 | args: ["-ll"]
40 | - repo: https://github.com/pre-commit/pre-commit-hooks
41 | rev: v5.0.0
42 | hooks:
43 | # See https://pre-commit.com/hooks.html for more hooks
44 | #- id: check-added-large-files
45 | - id: fix-byte-order-marker
46 | - id: fix-encoding-pragma
47 | - id: check-case-conflict
48 | - id: check-json
49 | - id: check-merge-conflict
50 | - id: check-symlinks
51 | - id: check-toml
52 | - id: check-xml
53 | - id: check-yaml
54 | - id: destroyed-symlinks
55 | - id: detect-aws-credentials
56 | - id: detect-private-key
57 | - id: end-of-file-fixer
58 | - id: forbid-new-submodules
59 | - id: trailing-whitespace
60 | - id: check-case-conflict
61 | - id: check-merge-conflict
62 | - id: debug-statements
63 | - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
64 | rev: v9.20.0
65 | hooks:
66 | - id: commitlint
67 | stages: [commit-msg]
68 | additional_dependencies: ["@commitlint/config-angular"]
69 | ci:
70 | # for more information, see https://pre-commit.ci
71 | autofix_commit_msg: |
72 | [pre-commit.ci] auto fixes from pre-commit.com hooks
73 | autofix_prs: true
74 | autoupdate_branch: ""
75 | autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
76 | autoupdate_schedule: weekly
77 | skip: [shellcheck, markdown-link-check, commitlint]
78 | submodules: false
79 |
--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/.prettierignore
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "tabWidth": 2
3 | }
4 |
--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 | init-hook='import sys; print(sys.executable); print(sys.path)'
3 | ignore-paths=venv
4 | ignore=__version__.py
5 |
6 | [FORMAT]
7 | max-line-length=120
8 |
9 | [MESSAGES CONTROL]
10 | disable=C0103
11 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": ["ms-python.black-formatter"]
3 | }
4 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "cornflakes.linter.executablePath": "./venv/bin/flake8",
3 | "[python]": {
4 | "editor.defaultFormatter": "ms-python.black-formatter"
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## [1.3.8](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.7...v1.3.8) (2025-05-14)
2 |
3 |
4 | ### Bug Fixes
5 |
6 | * force a new release ([48d8a70](https://github.com/FullStackWithLawrence/openai-embeddings/commit/48d8a70b6f2c53733d05366040de9d2812428084))
7 |
8 | ## [1.3.7](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.6...v1.3.7) (2025-02-07)
9 |
10 |
11 | ### Bug Fixes
12 |
13 | * broken yaml ([db3ccfa](https://github.com/FullStackWithLawrence/openai-embeddings/commit/db3ccfa8a6310f04c24a72f49140d6eada7c8f18))
14 | * remove superfluous checks ([716ede1](https://github.com/FullStackWithLawrence/openai-embeddings/commit/716ede136628193040f4d9863aa2a36b34e3e345))
15 |
16 | ## [1.3.6](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.5...v1.3.6) (2025-02-07)
17 |
18 |
19 | ### Bug Fixes
20 |
21 | * breaking changes in unit tests ([90926a9](https://github.com/FullStackWithLawrence/openai-embeddings/commit/90926a95a30a30f12e98841ecce6ac910625be90))
22 |
23 | # Change Log
24 |
25 | All notable changes to this project will be documented in this file.
26 | The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/).
27 |
28 | ## [1.3.5](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.4...v1.3.5) (2025-02-05)
29 |
30 | ### Bug Fixes
31 |
32 | - LangChain breaking changes and deprecations ([ac7b57e](https://github.com/FullStackWithLawrence/openai-embeddings/commit/ac7b57e75705afdea1d563c6a9e929504d782e87))
33 |
34 | ## [1.3.4](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.3...v1.3.4) (2025-02-05)
35 |
36 | ### Bug Fixes
37 |
38 | - deprecation warnings and breaking changes ([604353e](https://github.com/FullStackWithLawrence/openai-embeddings/commit/604353e60d1197a60c517b14c02dd02909754307))
39 |
40 | ## [1.3.2](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.3.1...v1.3.2) (2024-04-12)
41 |
42 | ### Bug Fixes
43 |
44 | - fix deprecations and breaking changes in LangChain and Pinecone
45 |
46 | ## [1.3.0](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.2.2...v1.3.0) (2023-12-19)
47 |
48 | ### Features
49 |
50 | - add pydantic and refactor settings and credentials management ([332e4da](https://github.com/FullStackWithLawrence/openai-embeddings/commit/332e4dab89924b6ac2436e6d260e645bed26a0b4))
51 |
52 | ## [1.2.2](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.2.1...v1.2.2) (2023-12-19)
53 |
54 | ## [1.2.2](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.2.1...v1.2.2) (2023-12-19)
55 |
56 | ### Bug Fixes
57 |
58 | - force a new release ([6c04b0b](https://github.com/FullStackWithLawrence/openai-embeddings/commit/6c04b0b95486fa25b40c6f4d1954bd22b58df7c9))
59 |
60 | ## [1.2.1](https://github.com/FullStackWithLawrence/openai-embeddings/compare/v1.2.0...v1.2.1) (2023-12-04)
61 |
62 | ### Bug Fixes
63 |
64 | - force a new release ([e21f9c5](https://github.com/FullStackWithLawrence/openai-embeddings/commit/e21f9c56b6dc3be3320afb88a491b43fc04d365b))
65 |
66 | ## [1.2.0](https://github.com/lpm0073/hybrid-search-retriever/compare/v1.1.3...v1.2.0) (2023-12-03)
67 |
68 | ### Features
69 |
70 | - refactor pinecone logic and add pinecone unit tests ([2b8585b](https://github.com/lpm0073/hybrid-search-retriever/commit/2b8585b36e400d04f22e2a5565ea96f4482fd5f4))
71 |
72 | ## [1.1.3](https://github.com/lpm0073/hybrid-search-retriever/compare/v1.1.2...v1.1.3) (2023-12-02)
73 |
74 | ### Bug Fixes
75 |
76 | - add langchain-experimental for yt example ([f9d6d6d](https://github.com/lpm0073/hybrid-search-retriever/commit/f9d6d6d0b11ff9c1f06faf7eb69511bc5702066d))
77 | - correct type error with DEBUG_MODE ([a96bdfd](https://github.com/lpm0073/hybrid-search-retriever/commit/a96bdfdb5a0b015740110e02f9f9b06917cd31c7))
78 | - move retriever results to system_message ([203c8b3](https://github.com/lpm0073/hybrid-search-retriever/commit/203c8b300cda156ac44a0c6e02510c2ab6a2b074))
79 |
80 | ## [1.1.2](https://github.com/lpm0073/hybrid-search-retriever/compare/v1.1.1...v1.1.2) (2023-12-01)
81 |
82 | ### Bug Fixes
83 |
84 | - syntax error in examples.prompt ([230b709](https://github.com/lpm0073/hybrid-search-retriever/commit/230b7090c96bdd4d7d8757b182f891ab1b82c6f4))
85 |
86 | ## [1.1.1](https://github.com/lpm0073/netec-llm/compare/v1.1.0...v1.1.1) (2023-12-01)
87 |
88 | ### Bug Fixes
89 |
90 | - had to switch to bm25_encoder so that vector store is searchable ([bad6994](https://github.com/lpm0073/netec-llm/commit/bad699481d217dde81877d85124395529652dabe))
91 |
92 | # [1.1.0](https://github.com/lpm0073/netec-llm/compare/v1.0.0...v1.1.0) (2023-12-01)
93 |
94 | ### Bug Fixes
95 |
96 | - fix load problem with existing index ([62cd18f](https://github.com/lpm0073/netec-llm/commit/62cd18f8088873a794ec363c4e18770dfdc41ea5))
97 |
98 | ### Features
99 |
100 | - perfect load(). revert rag() to openai only calls ([8de793d](https://github.com/lpm0073/netec-llm/commit/8de793dcca77ec23f09e68ca9e8dba7f64623b3c))
101 | - ssm.rag() w load, split, embed, store ([2335d22](https://github.com/lpm0073/netec-llm/commit/2335d22c5fd9092642ff1eb67a34fbcd712d7f9b))
102 |
103 | # 1.0.0 (2023-11-30)
104 |
105 | ### Features
106 |
107 | - first commit ([9fe5fbb](https://github.com/lpm0073/netec-llm/commit/9fe5fbbd03d278a90a7351a4d907a74783e48684))
108 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU AFFERO GENERAL PUBLIC LICENSE
2 | Version 3, 19 November 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU Affero General Public License is a free, copyleft license for
11 | software and other kinds of works, specifically designed to ensure
12 | cooperation with the community in the case of network server software.
13 |
14 | The licenses for most software and other practical works are designed
15 | to take away your freedom to share and change the works. By contrast,
16 | our General Public Licenses are intended to guarantee your freedom to
17 | share and change all versions of a program--to make sure it remains free
18 | software for all its users.
19 |
20 | When we speak of free software, we are referring to freedom, not
21 | price. Our General Public Licenses are designed to make sure that you
22 | have the freedom to distribute copies of free software (and charge for
23 | them if you wish), that you receive source code or can get it if you
24 | want it, that you can change the software or use pieces of it in new
25 | free programs, and that you know you can do these things.
26 |
27 | Developers that use our General Public Licenses protect your rights
28 | with two steps: (1) assert copyright on the software, and (2) offer
29 | you this License which gives you legal permission to copy, distribute
30 | and/or modify the software.
31 |
32 | A secondary benefit of defending all users' freedom is that
33 | improvements made in alternate versions of the program, if they
34 | receive widespread use, become available for other developers to
35 | incorporate. Many developers of free software are heartened and
36 | encouraged by the resulting cooperation. However, in the case of
37 | software used on network servers, this result may fail to come about.
38 | The GNU General Public License permits making a modified version and
39 | letting the public access it on a server without ever releasing its
40 | source code to the public.
41 |
42 | The GNU Affero General Public License is designed specifically to
43 | ensure that, in such cases, the modified source code becomes available
44 | to the community. It requires the operator of a network server to
45 | provide the source code of the modified version running there to the
46 | users of that server. Therefore, public use of a modified version, on
47 | a publicly accessible server, gives the public access to the source
48 | code of the modified version.
49 |
50 | An older license, called the Affero General Public License and
51 | published by Affero, was designed to accomplish similar goals. This is
52 | a different license, not a version of the Affero GPL, but Affero has
53 | released a new version of the Affero GPL which permits relicensing under
54 | this license.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | TERMS AND CONDITIONS
60 |
61 | 0. Definitions.
62 |
63 | "This License" refers to version 3 of the GNU Affero General Public License.
64 |
65 | "Copyright" also means copyright-like laws that apply to other kinds of
66 | works, such as semiconductor masks.
67 |
68 | "The Program" refers to any copyrightable work licensed under this
69 | License. Each licensee is addressed as "you". "Licensees" and
70 | "recipients" may be individuals or organizations.
71 |
72 | To "modify" a work means to copy from or adapt all or part of the work
73 | in a fashion requiring copyright permission, other than the making of an
74 | exact copy. The resulting work is called a "modified version" of the
75 | earlier work or a work "based on" the earlier work.
76 |
77 | A "covered work" means either the unmodified Program or a work based
78 | on the Program.
79 |
80 | To "propagate" a work means to do anything with it that, without
81 | permission, would make you directly or secondarily liable for
82 | infringement under applicable copyright law, except executing it on a
83 | computer or modifying a private copy. Propagation includes copying,
84 | distribution (with or without modification), making available to the
85 | public, and in some countries other activities as well.
86 |
87 | To "convey" a work means any kind of propagation that enables other
88 | parties to make or receive copies. Mere interaction with a user through
89 | a computer network, with no transfer of a copy, is not conveying.
90 |
91 | An interactive user interface displays "Appropriate Legal Notices"
92 | to the extent that it includes a convenient and prominently visible
93 | feature that (1) displays an appropriate copyright notice, and (2)
94 | tells the user that there is no warranty for the work (except to the
95 | extent that warranties are provided), that licensees may convey the
96 | work under this License, and how to view a copy of this License. If
97 | the interface presents a list of user commands or options, such as a
98 | menu, a prominent item in the list meets this criterion.
99 |
100 | 1. Source Code.
101 |
102 | The "source code" for a work means the preferred form of the work
103 | for making modifications to it. "Object code" means any non-source
104 | form of a work.
105 |
106 | A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 |
111 | The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form. A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 |
122 | The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities. However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work. For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 |
135 | The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 |
139 | The Corresponding Source for a work in source code form is that
140 | same work.
141 |
142 | 2. Basic Permissions.
143 |
144 | All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met. This License explicitly affirms your unlimited
147 | permission to run the unmodified Program. The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work. This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 |
152 | You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force. You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright. Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 |
163 | Conveying under any other circumstances is permitted solely under
164 | the conditions stated below. Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 |
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 |
169 | No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 |
175 | When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 |
183 | 4. Conveying Verbatim Copies.
184 |
185 | You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 |
193 | You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 |
196 | 5. Conveying Modified Source Versions.
197 |
198 | You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 |
202 | a) The work must carry prominent notices stating that you modified
203 | it, and giving a relevant date.
204 |
205 | b) The work must carry prominent notices stating that it is
206 | released under this License and any conditions added under section
207 | 7. This requirement modifies the requirement in section 4 to
208 | "keep intact all notices".
209 |
210 | c) You must license the entire work, as a whole, under this
211 | License to anyone who comes into possession of a copy. This
212 | License will therefore apply, along with any applicable section 7
213 | additional terms, to the whole of the work, and all its parts,
214 | regardless of how they are packaged. This License gives no
215 | permission to license the work in any other way, but it does not
216 | invalidate such permission if you have separately received it.
217 |
218 | d) If the work has interactive user interfaces, each must display
219 | Appropriate Legal Notices; however, if the Program has interactive
220 | interfaces that do not display Appropriate Legal Notices, your
221 | work need not make them do so.
222 |
223 | A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit. Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 |
233 | 6. Conveying Non-Source Forms.
234 |
235 | You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 |
240 | a) Convey the object code in, or embodied in, a physical product
241 | (including a physical distribution medium), accompanied by the
242 | Corresponding Source fixed on a durable physical medium
243 | customarily used for software interchange.
244 |
245 | b) Convey the object code in, or embodied in, a physical product
246 | (including a physical distribution medium), accompanied by a
247 | written offer, valid for at least three years and valid for as
248 | long as you offer spare parts or customer support for that product
249 | model, to give anyone who possesses the object code either (1) a
250 | copy of the Corresponding Source for all the software in the
251 | product that is covered by this License, on a durable physical
252 | medium customarily used for software interchange, for a price no
253 | more than your reasonable cost of physically performing this
254 | conveying of source, or (2) access to copy the
255 | Corresponding Source from a network server at no charge.
256 |
257 | c) Convey individual copies of the object code with a copy of the
258 | written offer to provide the Corresponding Source. This
259 | alternative is allowed only occasionally and noncommercially, and
260 | only if you received the object code with such an offer, in accord
261 | with subsection 6b.
262 |
263 | d) Convey the object code by offering access from a designated
264 | place (gratis or for a charge), and offer equivalent access to the
265 | Corresponding Source in the same way through the same place at no
266 | further charge. You need not require recipients to copy the
267 | Corresponding Source along with the object code. If the place to
268 | copy the object code is a network server, the Corresponding Source
269 | may be on a different server (operated by you or a third party)
270 | that supports equivalent copying facilities, provided you maintain
271 | clear directions next to the object code saying where to find the
272 | Corresponding Source. Regardless of what server hosts the
273 | Corresponding Source, you remain obligated to ensure that it is
274 | available for as long as needed to satisfy these requirements.
275 |
276 | e) Convey the object code using peer-to-peer transmission, provided
277 | you inform other peers where the object code and Corresponding
278 | Source of the work are being offered to the general public at no
279 | charge under subsection 6d.
280 |
281 | A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 |
285 | A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling. In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage. For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product. A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 |
298 | "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source. The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 |
306 | If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information. But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 |
317 | The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed. Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 |
325 | Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 |
331 | 7. Additional Terms.
332 |
333 | "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law. If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 |
342 | When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it. (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.) You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 |
349 | Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 |
353 | a) Disclaiming warranty or limiting liability differently from the
354 | terms of sections 15 and 16 of this License; or
355 |
356 | b) Requiring preservation of specified reasonable legal notices or
357 | author attributions in that material or in the Appropriate Legal
358 | Notices displayed by works containing it; or
359 |
360 | c) Prohibiting misrepresentation of the origin of that material, or
361 | requiring that modified versions of such material be marked in
362 | reasonable ways as different from the original version; or
363 |
364 | d) Limiting the use for publicity purposes of names of licensors or
365 | authors of the material; or
366 |
367 | e) Declining to grant rights under trademark law for use of some
368 | trade names, trademarks, or service marks; or
369 |
370 | f) Requiring indemnification of licensors and authors of that
371 | material by anyone who conveys the material (or modified versions of
372 | it) with contractual assumptions of liability to the recipient, for
373 | any liability that these contractual assumptions directly impose on
374 | those licensors and authors.
375 |
376 | All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10. If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term. If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 |
386 | If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 |
391 | Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 |
395 | 8. Termination.
396 |
397 | You may not propagate or modify a covered work except as expressly
398 | provided under this License. Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 |
403 | However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 |
410 | Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 |
417 | Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License. If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 |
423 | 9. Acceptance Not Required for Having Copies.
424 |
425 | You are not required to accept this License in order to receive or
426 | run a copy of the Program. Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance. However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work. These actions infringe copyright if you do
431 | not accept this License. Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 |
434 | 10. Automatic Licensing of Downstream Recipients.
435 |
436 | Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License. You are not responsible
439 | for enforcing compliance by third parties with this License.
440 |
441 | An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations. If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 |
451 | You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License. For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 |
459 | 11. Patents.
460 |
461 | A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based. The
463 | work thus licensed is called the contributor's "contributor version".
464 |
465 | A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version. For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 |
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 |
480 | In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement). To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 |
487 | If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients. "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 |
501 | If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 |
509 | A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License. You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 |
524 | Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 |
528 | 12. No Surrender of Others' Freedom.
529 |
530 | If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License. If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all. For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 |
540 | 13. Remote Network Interaction; Use with the GNU General Public License.
541 |
542 | Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software. This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 |
553 | Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work. The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 |
561 | 14. Revised Versions of this License.
562 |
563 | The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time. Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 |
568 | Each version is given a distinguishing version number. If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation. If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 |
577 | If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 |
582 | Later license versions may give you additional or different
583 | permissions. However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 |
587 | 15. Disclaimer of Warranty.
588 |
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 |
598 | 16. Limitation of Liability.
599 |
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 |
610 | 17. Interpretation of Sections 15 and 16.
611 |
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 |
619 | END OF TERMS AND CONDITIONS
620 |
621 | How to Apply These Terms to Your New Programs
622 |
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 |
627 | To do so, attach the following notices to the program. It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 |
632 |
633 | Copyright (C)
634 |
635 | This program is free software: you can redistribute it and/or modify
636 | it under the terms of the GNU Affero General Public License as published by
637 | the Free Software Foundation, either version 3 of the License, or
638 | (at your option) any later version.
639 |
640 | This program is distributed in the hope that it will be useful,
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 | GNU Affero General Public License for more details.
644 |
645 | You should have received a copy of the GNU Affero General Public License
646 | along with this program. If not, see .
647 |
648 | Also add information on how to contact you by electronic and paper mail.
649 |
650 | If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source. For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code. There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 |
658 | You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | .
662 |
663 | EdX Inc. wishes to state, in clarification of the above license terms, that
664 | any public, independently available web service offered over the network and
665 | communicating with edX's copyrighted works by any form of inter-service
666 | communication, including but not limited to Remote Procedure Call (RPC)
667 | interfaces, is not a work based on our copyrighted work within the meaning
668 | of the license. "Corresponding Source" of this work, or works based on this
669 | work, as defined by the terms of this license do not include source code
670 | files for programs used solely to provide those public, independently
671 | available web services.
672 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | SHELL := /bin/bash
2 | ifeq ($(OS),Windows_NT)
3 | PYTHON = python.exe
4 | ACTIVATE_VENV = venv\Scripts\activate
5 | else
6 | PYTHON = python3.12
7 | ACTIVATE_VENV = source venv/bin/activate
8 | endif
9 | PIP = $(PYTHON) -m pip
10 |
11 | ifneq ("$(wildcard .env)","")
12 | include .env
13 | else
14 | $(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\n\
15 | OPENAI_API_KEY=PLEASE-ADD-ME\n\
16 | PINECONE_API_KEY=PLEASE-ADD-ME\n\
17 | PINECONE_ENVIRONMENT=gcp-starter\n\
18 | PINECONE_INDEX_NAME=rag\n\
19 | PINECONE_VECTORSTORE_TEXT_KEY=lc_id\n\
20 | PINECONE_METRIC=dotproduct\n\
21 | PINECONE_DIMENSIONS=1536\n\
22 | OPENAI_CHAT_MODEL_NAME=gpt-4\n\
23 | OPENAI_PROMPT_MODEL_NAME=gpt-4\n\
24 | OPENAI_CHAT_TEMPERATURE=0.0\n\
25 | OPENAI_CHAT_MAX_RETRIES=3\n\
26 | DEBUG_MODE=True\n" >> .env)
27 | endif
28 |
29 | .PHONY: analyze init activate test lint clean
30 |
31 | # Default target executed when no arguments are given to make.
32 | all: help
33 |
34 | analyze:
35 | cloc . --exclude-ext=svg,json,zip --vcs=git
36 |
37 | init:
38 | make clean && \
39 | $(PYTHON) -m venv venv && \
40 | $(ACTIVATE_VENV) && \
41 | $(PIP) install --upgrade pip && \
42 | $(PIP) install -r requirements/local.txt && \
43 | npm install && \
44 | pre-commit install
45 |
46 | activate:
47 | . venv/bin/activate
48 |
49 | test:
50 | cd models && pytest -v -s tests/
51 | python -m setup_test
52 |
53 | lint:
54 | pre-commit run --all-files && \
55 | pylint models && \
56 | flake8 . && \
57 | isort . && \
58 | black .
59 |
60 | clean:
61 | rm -rf venv && rm -rf node_modules && \
62 | find ./models/ -name __pycache__ -type d -exec rm -rf {} +
63 |
64 | release:
65 | git commit -m "fix: force a new release" --allow-empty && git push
66 |
67 | ######################
68 | # HELP
69 | ######################
70 |
71 | help:
72 | @echo '===================================================================='
73 | @echo 'analyze - generate code analysis report'
74 | @echo 'init - create a Python virtual environment and install dependencies'
75 | @echo 'activate - activate the Python virtual environment'
76 | @echo 'test - run Python unit tests'
77 | @echo 'lint - run Python linting'
78 | @echo 'clean - destroy the Python virtual environment'
79 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OpenAI Embeddings Example
2 |
3 | 🤖 Retrieval Augmented Generation and Hybrid Search 🤖
4 |
5 | [](https://www.youtube.com/@FullStackWithLawrence)
6 | [](https://platform.openai.com/)
7 | [](https://www.langchain.com/)
8 | [](https://www.pinecone.io/)
9 | [](https://www.python.org/)
10 | [](https://pydantic.dev/)
11 | [](https://github.com/FullStackWithLawrence/openai-embeddings/releases)
12 | 
13 | [](https://www.gnu.org/licenses/agpl-3.0.en.html)
14 | [](https://lawrencemcdaniel.com)
15 |
16 | A Hybrid Search and Augmented Generation prompting solution using Python [OpenAI API Embeddings](https://platform.openai.com/docs/guides/embeddings) persisted to a [Pinecone](https://docs.pinecone.io/docs/python-client) vector database index and managed by [LangChain](https://www.langchain.com/). Implements the following:
17 |
18 | - **PDF Loader**. a command-line pdf loader program that extracts text, vectorizes, and
19 | loads into a Pinecone dot product vector database that is dimensioned to match OpenAI embeddings.
20 | - **Retrieval Augmented Generation**. A chatGPT prompt based on a hybrid search retriever that locates relevant documents from the vector database and includes these in OpenAI prompts.
21 |
22 | Secondarily, I also use this repo for demonstrating how to setup [Pydantic](https://docs.pydantic.dev/latest/) to manage your project settings and how to safely work with sensitive credentials data inside your project.
23 |
24 | ## Installation
25 |
26 | ```console
27 | git clone https://github.com/FullStackWithLawrence/openai-embeddings.git
28 | cd openai-embeddings
29 | make init
30 |
31 | # Linux/macOS
32 | source venv/bin/activate
33 |
34 | # Windows Powershell (admin)
35 | venv\Scripts\activate
36 | ```
37 |
38 | You'll also need to add your api keys to the .env file in the root of the repo.
39 |
40 | - Get your [OpenAI API key](https://platform.openai.com/api-keys)
41 | - Get your [Pinecone API Key](https://app.pinecone.io/)
42 |
43 | ```console
44 | OPENAI_API_ORGANIZATION=PLEASE-ADD-ME
45 | OPENAI_API_KEY=PLEASE-ADD-ME
46 | PINECONE_API_KEY=PLEASE-ADD-ME
47 | ```
48 |
49 | ## Usage
50 |
51 | ```console
52 | # example 1 - generic assistant
53 | python3 -m models.examples.prompt "your are a helpful assistant" "What analytics and accounting courses does Wharton offer?"
54 |
55 | # example 2 - assistant with improved system prompting
56 | python3 -m models.examples.prompt "You are a student advisor at University of Pennsylvania. You provide concise answers of 100 words or less." "What analytics and accounting courses does Wharton offer?"
57 |
58 | # example 3 - templated assistant: Online courses
59 | python3 -m models.examples.online_courses "analytics and accounting"
60 |
61 | # example 4 - templated assistant: Certification programs
62 | python3 -m models.examples.certification_programs "analytics and accounting"
63 |
64 | # example 5 - Retrieval Augmented Generation
65 | python3 -m models.examples.load "/path/to/your/pdf/documents"
66 | python3 -m models.examples.rag "What analytics and accounting courses does Wharton offer?"
67 | ```
68 |
69 | ### Retrieval Augmented Generation
70 |
71 | For the question, _"What analytics and accounting courses does Wharton offer?"_, an
72 | embedding can potentially dramatically alter the response generated by chatGPT. To illustrate, I uploaded a batch of 21 sets of lecture notes in PDF format for an online analytics course taught by Wharton professor [Brian Bushee](https://accounting.wharton.upenn.edu/profile/bushee/). You can download these from https://cdn.lawrencemcdaniel.com/fswl/openai-embeddings-data.zip to test whether your results are consistent.
73 |
74 | #### The control set
75 |
76 | Example 1 above, a generic chatGPT prompt with no additional guidance provided by a system prompt nor an embedding, generates the following response:
77 |
78 | ```console
79 | Wharton offers a variety of analytics and accounting courses. Some of the analytics courses include:
80 |
81 | 1. Introduction to Business Analytics: This course provides an overview of the fundamentals of business analytics, including data analysis, statistical modeling, and decision-making.
82 |
83 | 2. Data Visualization and Communication: This course focuses on the effective presentation and communication of data through visualizations and storytelling techniques.
84 |
85 | 3. Predictive Analytics: This course explores the use of statistical models and machine learning algorithms to predict future outcomes and make data-driven decisions.
86 |
87 | 4. Big Data Analytics: This course covers the analysis of large and complex datasets using advanced techniques and tools, such as Hadoop and Spark.
88 |
89 | In terms of accounting courses, Wharton offers:
90 |
91 | 1. Financial Accounting: This course provides an introduction to the principles and concepts of financial accounting, including the preparation and analysis of financial statements.
92 |
93 | 2. Managerial Accounting: This course focuses on the use of accounting information for internal decision-making and planning, including cost analysis and budgeting.
94 |
95 | 3. Advanced Financial Accounting: This course delves into more complex accounting topics, such as consolidations, partnerships, and international accounting standards.
96 |
97 | 4. Auditing and Assurance Services: This course covers the principles and practices of auditing, including risk assessment, internal controls, and audit procedures.
98 |
99 | These are just a few examples of the analytics and accounting courses offered at Wharton. The school offers a wide range of courses to cater to different interests and skill levels in these fields.
100 | (venv) (base) mcdaniel@MacBookAir-Lawrence openai-embeddings % python3 -m models.examples.online_courses "analytics and accounting"
101 | ```
102 |
103 | #### Same prompt but with an embedding
104 |
105 | After creating an embedding from the sample set of pdf documents, you can prompt models.examples.rag with the same question, and it should provide a quite different response compared to the control from example 1. It should resemble the following:
106 |
107 | ```console
108 | Wharton offers a variety of analytics and accounting courses. Some of the courses offered include:
109 |
110 | 1. Accounting-Based Valuation: This course, taught by Professor Brian Bushee, focuses on using accounting information to value companies and make investment decisions.
111 |
112 | 2. Review of Financial Statements: Also taught by Professor Brian Bushee, this course provides an in-depth understanding of financial statements and how to analyze them for decision-making purposes.
113 |
114 | 3. Discretionary Accruals Model: Another course taught by Professor Brian Bushee, this course explores the concept of discretionary accruals and their impact on financial statements and financial analysis.
115 |
116 | 4. Discretionary Accruals Cases: This course, also taught by Professor Brian Bushee, provides practical applications of the discretionary accruals model through case studies and real-world examples.
117 |
118 | These are just a few examples of the analytics and accounting courses offered at Wharton. The school offers a wide range of courses in these areas to provide students with a comprehensive understanding of financial analysis and decision-making.
119 | ```
120 |
121 | ## Requirements
122 |
123 | - [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git). _pre-installed on Linux and macOS_
124 | - [make](https://gnuwin32.sourceforge.net/packages/make.htm). _pre-installed on Linux and macOS._
125 | - [OpenAI platform API key](https://platform.openai.com/).
126 | _If you're new to OpenAI API then see [How to Get an OpenAI API Key](./doc/OPENAI_API_GETTING_STARTED_GUIDE.md)_
127 | - [Pinecone](https://www.pinecone.io/) API key. A vector database for storing embedding results.
128 | - [Python 3.12](https://www.python.org/downloads/): for creating virtual environment. Also used by pre-commit linters and code formatters.
129 | - [NodeJS](https://nodejs.org/en/download): used with NPM for configuring/testing Semantic Release.
130 |
131 | ## Configuration defaults
132 |
133 | Set these as environment variables on the command line, or in a .env file that should be located in the root of the repo.
134 |
135 | ```console
136 | # OpenAI API
137 | OPENAI_API_ORGANIZATION=ADD-ME-PLEASE
138 | OPENAI_API_KEY=ADD-ME-PLEASE
139 | OPENAI_CHAT_MODEL_NAME=gpt-4
140 | OPENAI_PROMPT_MODEL_NAME=gpt-4
141 | OPENAI_CHAT_TEMPERATURE=0.0
142 | OPENAI_CHAT_MAX_RETRIES=3
143 |
144 | # Pinecone API
145 | PINECONE_API_KEY=ADD-ME-PLEASE
146 | PINECONE_ENVIRONMENT=gcp-starter
147 | PINECONE_INDEX_NAME=openai-embeddings
148 | PINECONE_VECTORSTORE_TEXT_KEY=lc_id
149 | PINECONE_METRIC=dotproduct
150 | PINECONE_DIMENSIONS=1536
151 |
152 | # This package
153 | DEBUG_MODE=False
154 | ```
155 |
156 | ## Contributing
157 |
158 | This project uses a mostly automated pull request and unit testing process. See the resources in .github for additional details. You additionally should ensure that pre-commit is installed and working correctly on your dev machine by running the following command from the root of the repo.
159 |
160 | ```console
161 | pre-commit run --all-files
162 | ```
163 |
164 | Pull requests should pass these tests before being submitted:
165 |
166 | ```console
167 | make test
168 | ```
169 |
170 | ### Developer setup
171 |
172 | ```console
173 | git clone https://github.com/lpm0073/automatic-models.git
174 | cd automatic-models
175 | make init
176 | make activate
177 | ```
178 |
179 | ### Github Actions
180 |
181 | Actions requires the following secrets:
182 |
183 | ```console
184 | PAT: {{ secrets.PAT }} # a GitHub Personal Access Token
185 | OPENAI_API_ORGANIZATION: {{ secrets.OPENAI_API_ORGANIZATION }}
186 | OPENAI_API_KEY: {{ secrets.OPENAI_API_KEY }}
187 | PINECONE_API_KEY: {{ secrets.PINECONE_API_KEY }}
188 | PINECONE_ENVIRONMENT: {{ secrets.PINECONE_ENVIRONMENT }}
189 | PINECONE_INDEX_NAME: {{ secrets.PINECONE_INDEX_NAME }}
190 | ```
191 |
192 | ## Additional reading
193 |
194 | - [Youtube - Vector Embeddings Tutorial – Code Your Own AI Assistant with GPT-4 API + LangChain + NLP](https://www.youtube.com/watch?v=yfHHvmaMkcA)
195 | - [Youtube - LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://www.youtube.com/watch?v=aywZrzNaKjs)
196 | - [OpenAI Embeddings](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings)
197 | - [What is a Vector Database?](https://www.pinecone.io/learn/vector-database/)
198 | - [LangChain RAG](https://python.langchain.com/docs/use_cases/question_answering/)
199 | - [LangChain Document Loaders](https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf)
200 | - [LanchChain Caching](https://python.langchain.com/docs/modules/model_io/llms/llm_caching)
201 |
--------------------------------------------------------------------------------
/codespell.txt:
--------------------------------------------------------------------------------
1 | OCE
2 |
--------------------------------------------------------------------------------
/commitlint.config.js:
--------------------------------------------------------------------------------
1 | const Configuration = {
2 | /*
3 | * Resolve and load @commitlint/config-conventional from node_modules.
4 | * Referenced packages must be installed
5 | */
6 | extends: ["@commitlint/config-conventional", "@commitlint/config-angular"],
7 | /*
8 | * Resolve and load conventional-changelog-atom from node_modules.
9 | * Referenced packages must be installed
10 | */
11 | parserPreset: "conventional-changelog-atom",
12 | /*
13 | * Resolve and load @commitlint/format from node_modules.
14 | * Referenced package must be installed
15 | */
16 | formatter: "@commitlint/format",
17 | /*
18 | * Any rules defined here will override rules from @commitlint/config-conventional
19 | */
20 | rules: {},
21 | /*
22 | * Array of functions that return true if commitlint should ignore the given message.
23 | * Given array is merged with predefined functions, which consist of matchers like:
24 | *
25 | * - 'Merge pull request', 'Merge X into Y' or 'Merge branch X'
26 | * - 'Revert X'
27 | * - 'v1.2.3' (ie semver matcher)
28 | * - 'Automatic merge X' or 'Auto-merged X into Y'
29 | *
30 | * To see full list, check https://github.com/conventional-changelog/commitlint/blob/master/%40commitlint/is-ignored/src/defaults.ts.
31 | * To disable those ignores and run rules always, set `defaultIgnores: false` as shown below.
32 | */
33 | /*
34 | ignores: [(commit) => commit === ''],
35 | * Whether commitlint uses the default ignore rules, see the description above.
36 | */
37 | defaultIgnores: true,
38 | /*
39 | * Custom URL to show upon failure
40 | */
41 | helpUrl:
42 | "https://github.com/conventional-changelog/commitlint/#what-is-commitlint",
43 | /*
44 | * Custom prompt configs
45 | */
46 | prompt: {
47 | messages: {},
48 | questions: {
49 | type: {
50 | description: "please input type:",
51 | },
52 | },
53 | },
54 | };
55 |
56 | module.exports = Configuration;
57 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/models/__init__.py
--------------------------------------------------------------------------------
/models/__version__.py:
--------------------------------------------------------------------------------
1 | # Managed via automated CI/CD in .github/workflows/semanticVersionBump.yml.
2 | __version__ = "1.3.7"
3 |
--------------------------------------------------------------------------------
/models/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pylint: disable=no-member
3 | # pylint: disable=E0213,C0103
4 | """
5 | Configuration for Lambda functions.
6 |
7 | This module is used to configure the Lambda functions. It uses the pydantic_settings
8 | library to validate the configuration values. The configuration values are read from
9 | any of the following sources:
10 | - constructor arguments
11 | - environment variables
12 | - terraform.tfvars
13 | - default values
14 | """
15 |
16 | import importlib.util
17 | import os # library for interacting with the operating system
18 | import platform # library to view information about the server host this Lambda runs on
19 | import re
20 | from typing import Any, Dict, List, Optional
21 |
22 | from dotenv import load_dotenv
23 | from pydantic import Field, SecretStr, ValidationError, field_validator
24 | from pydantic_settings import BaseSettings
25 |
26 | from models.const import HERE
27 | from models.exceptions import ModelConfigurationError, ModelValueError
28 |
29 |
30 | DOT_ENV_LOADED = load_dotenv()
31 |
32 |
33 | def load_version() -> Dict[str, str]:
34 | """Stringify the __version__ module."""
35 | version_file_path = os.path.join(HERE, "__version__.py")
36 | spec = importlib.util.spec_from_file_location("__version__", version_file_path)
37 | version_module = importlib.util.module_from_spec(spec)
38 | spec.loader.exec_module(version_module)
39 | return version_module.__dict__
40 |
41 |
42 | VERSION = load_version()
43 |
44 |
45 | def get_semantic_version() -> str:
46 | """
47 | Return the semantic version number.
48 |
49 | Example valid values of __version__.py are:
50 | 0.1.17
51 | 0.1.17-next.1
52 | 0.1.17-next.2
53 | 0.1.17-next.123456
54 | 0.1.17-next-major.1
55 | 0.1.17-next-major.2
56 | 0.1.17-next-major.123456
57 |
58 | Note:
59 | - pypi does not allow semantic version numbers to contain a dash.
60 | - pypi does not allow semantic version numbers to contain a 'v' prefix.
61 | - pypi does not allow semantic version numbers to contain a 'next' suffix.
62 | """
63 | version = VERSION["__version__"]
64 | version = re.sub(r"-next\.\d+", "", version)
65 | return re.sub(r"-next-major\.\d+", "", version)
66 |
67 |
68 | # pylint: disable=too-few-public-methods
69 | class SettingsDefaults:
70 | """Default values for Settings"""
71 |
72 | DEBUG_MODE = False
73 | DUMP_DEFAULTS = False
74 |
75 | LANGCHAIN_MEMORY_KEY = "chat_history"
76 |
77 | PINECONE_API_KEY: SecretStr = SecretStr(None)
78 | PINECONE_ENVIRONMENT = "gcp-starter"
79 | PINECONE_INDEX_NAME = "openai-embeddings"
80 | PINECONE_VECTORSTORE_TEXT_KEY = "lc_id"
81 | PINECONE_METRIC = "dotproduct"
82 | PINECONE_DIMENSIONS = 1536
83 |
84 | OPENAI_API_ORGANIZATION: str = None
85 | OPENAI_API_KEY: SecretStr = SecretStr(None)
86 | OPENAI_ENDPOINT_IMAGE_N = 4
87 | OPENAI_ENDPOINT_IMAGE_SIZE = "1024x768"
88 | OPENAI_CHAT_CACHE = True
89 | OPENAI_CHAT_MODEL_NAME = "gpt-4"
90 | OPENAI_PROMPT_MODEL_NAME = "gpt-4"
91 | OPENAI_CHAT_TEMPERATURE = 0.0
92 | OPENAI_CHAT_MAX_RETRIES = 3
93 |
94 | @classmethod
95 | def to_dict(cls):
96 | """Convert SettingsDefaults to dict"""
97 | return {
98 | key: value
99 | for key, value in SettingsDefaults.__dict__.items()
100 | if not key.startswith("__") and not callable(key) and key != "to_dict"
101 | }
102 |
103 |
104 | def empty_str_to_bool_default(v: str, default: bool) -> bool:
105 | """Convert empty string to default boolean value"""
106 | if v in [None, ""]:
107 | return default
108 | return v.lower() in ["true", "1", "t", "y", "yes"]
109 |
110 |
111 | def empty_str_to_int_default(v: str, default: int) -> int:
112 | """Convert empty string to default integer value"""
113 | if v in [None, ""]:
114 | return default
115 | try:
116 | return int(v)
117 | except ValueError:
118 | return default
119 |
120 |
121 | # pylint: disable=too-many-public-methods
122 | # pylint: disable=too-many-instance-attributes
123 | class Settings(BaseSettings):
124 | """Settings for Lambda functions"""
125 |
126 | _dump: dict = None
127 | _pinecone_api_key_source: str = "unset"
128 | _openai_api_key_source: str = "unset"
129 | _initialized: bool = False
130 |
131 | def __init__(self, **data: Any):
132 | super().__init__(**data)
133 | if "PINECONE_API_KEY" in os.environ:
134 | self._pinecone_api_key_source = "environment variable"
135 | elif data.get("pinecone_api_key"):
136 | self._pinecone_api_key_source = "init argument"
137 | if "OPENAI_API_KEY" in os.environ:
138 | self._openai_api_key_source = "environment variable"
139 | elif data.get("openai_api_key"):
140 | self._openai_api_key_source = "init argument"
141 | self._initialized = True
142 |
143 | debug_mode: Optional[bool] = Field(
144 | SettingsDefaults.DEBUG_MODE,
145 | env="DEBUG_MODE",
146 | pre=True,
147 | getter=lambda v: empty_str_to_bool_default(v, SettingsDefaults.DEBUG_MODE),
148 | )
149 | dump_defaults: Optional[bool] = Field(
150 | SettingsDefaults.DUMP_DEFAULTS,
151 | env="DUMP_DEFAULTS",
152 | pre=True,
153 | getter=lambda v: empty_str_to_bool_default(v, SettingsDefaults.DUMP_DEFAULTS),
154 | )
155 |
156 | langchain_memory_key: Optional[str] = Field(SettingsDefaults.LANGCHAIN_MEMORY_KEY, env="LANGCHAIN_MEMORY_KEY")
157 |
158 | openai_api_organization: Optional[str] = Field(
159 | SettingsDefaults.OPENAI_API_ORGANIZATION, env="OPENAI_API_ORGANIZATION"
160 | )
161 | openai_api_key: Optional[SecretStr] = Field(SettingsDefaults.OPENAI_API_KEY, env="OPENAI_API_KEY")
162 | openai_endpoint_image_n: Optional[int] = Field(
163 | SettingsDefaults.OPENAI_ENDPOINT_IMAGE_N, env="OPENAI_ENDPOINT_IMAGE_N"
164 | )
165 | openai_endpoint_image_size: Optional[str] = Field(
166 | SettingsDefaults.OPENAI_ENDPOINT_IMAGE_SIZE, env="OPENAI_ENDPOINT_IMAGE_SIZE"
167 | )
168 | openai_chat_cache: Optional[bool] = Field(
169 | SettingsDefaults.OPENAI_CHAT_CACHE,
170 | env="OPENAI_CHAT_CACHE",
171 | pre=True,
172 | getter=lambda v: empty_str_to_bool_default(v, SettingsDefaults.OPENAI_CHAT_CACHE),
173 | )
174 | openai_chat_model_name: Optional[str] = Field(SettingsDefaults.OPENAI_CHAT_MODEL_NAME, env="OPENAI_CHAT_MODEL_NAME")
175 | openai_prompt_model_name: Optional[str] = Field(
176 | SettingsDefaults.OPENAI_PROMPT_MODEL_NAME, env="OPENAI_PROMPT_MODEL_NAME"
177 | )
178 | openai_chat_temperature: Optional[float] = Field(
179 | SettingsDefaults.OPENAI_CHAT_TEMPERATURE,
180 | env="OPENAI_CHAT_TEMPERATURE",
181 | ge=0.0,
182 | le=1.0,
183 | )
184 | openai_chat_max_retries: Optional[int] = Field(
185 | SettingsDefaults.OPENAI_CHAT_MAX_RETRIES,
186 | env="OPENAI_CHAT_MAX_RETRIES",
187 | ge=0,
188 | )
189 |
190 | pinecone_api_key: Optional[SecretStr] = Field(SettingsDefaults.PINECONE_API_KEY, env="PINECONE_API_KEY")
191 | pinecone_environment: Optional[str] = Field(SettingsDefaults.PINECONE_ENVIRONMENT, env="PINECONE_ENVIRONMENT")
192 | pinecone_index_name: Optional[str] = Field(SettingsDefaults.PINECONE_INDEX_NAME, env="PINECONE_INDEX_NAME")
193 | pinecone_vectorstore_text_key: Optional[str] = Field(
194 | SettingsDefaults.PINECONE_VECTORSTORE_TEXT_KEY, env="PINECONE_VECTORSTORE_TEXT_KEY"
195 | )
196 | pinecone_metric: Optional[str] = Field(SettingsDefaults.PINECONE_METRIC, env="PINECONE_METRIC")
197 | pinecone_dimensions: Optional[int] = Field(SettingsDefaults.PINECONE_DIMENSIONS, env="PINECONE_DIMENSIONS", gt=0)
198 |
199 | @property
200 | def pinecone_api_key_source(self) -> str:
201 | """Pinecone API key source"""
202 | return self._pinecone_api_key_source
203 |
204 | @property
205 | def openai_api_key_source(self) -> str:
206 | """OpenAI API key source"""
207 | return self._openai_api_key_source
208 |
209 | @property
210 | def is_using_dotenv_file(self) -> bool:
211 | """Is the dotenv file being used?"""
212 | return DOT_ENV_LOADED
213 |
214 | @property
215 | def environment_variables(self) -> List[str]:
216 | """Environment variables"""
217 | return list(os.environ.keys())
218 |
219 | @property
220 | def is_using_tfvars_file(self) -> bool:
221 | """Is the tfvars file being used?"""
222 | return False
223 |
224 | @property
225 | def tfvars_variables(self) -> List[str]:
226 | """Terraform variables"""
227 | return []
228 |
229 | @property
230 | def is_using_aws_rekognition(self) -> bool:
231 | """Future: Is the AWS Rekognition service being used?"""
232 | return False
233 |
234 | @property
235 | def is_using_aws_dynamodb(self) -> bool:
236 | """Future: Is the AWS DynamoDB service being used?"""
237 | return False
238 |
239 | @property
240 | def version(self) -> str:
241 | """OpenAI API version"""
242 | return get_semantic_version()
243 |
244 | @property
245 | def dump(self) -> dict:
246 | """Dump all settings."""
247 |
248 | def recursive_sort_dict(d):
249 | return {k: recursive_sort_dict(v) if isinstance(v, dict) else v for k, v in sorted(d.items())}
250 |
251 | if self._dump and self._initialized:
252 | return self._dump
253 |
254 | self._dump = {
255 | "secrets": {
256 | "openai_api_source": self.openai_api_key_source,
257 | "pinecone_api_source": self.pinecone_api_key_source,
258 | },
259 | "environment": {
260 | "is_using_tfvars_file": self.is_using_tfvars_file,
261 | "is_using_dotenv_file": self.is_using_dotenv_file,
262 | "os": os.name,
263 | "system": platform.system(),
264 | "release": platform.release(),
265 | "debug_mode": self.debug_mode,
266 | "dump_defaults": self.dump_defaults,
267 | "version": self.version,
268 | },
269 | "langchain": {
270 | "langchain_memory_key": self.langchain_memory_key,
271 | },
272 | "openai_api": {
273 | "openai_endpoint_image_n": self.openai_endpoint_image_n,
274 | "openai_endpoint_image_size": self.openai_endpoint_image_size,
275 | "openai_chat_cache": self.openai_chat_cache,
276 | "openai_chat_model_name": self.openai_chat_model_name,
277 | "openai_prompt_model_name": self.openai_prompt_model_name,
278 | "openai_chat_temperature": self.openai_chat_temperature,
279 | "openai_chat_max_retries": self.openai_chat_max_retries,
280 | },
281 | "pinecone_api": {
282 | "pinecone_environment": self.pinecone_environment,
283 | "pinecone_index_name": self.pinecone_index_name,
284 | "pinecone_vectorstore_text_key": self.pinecone_vectorstore_text_key,
285 | "pinecone_metric": self.pinecone_metric,
286 | "pinecone_dimensions": self.pinecone_dimensions,
287 | },
288 | }
289 | if self.dump_defaults:
290 | settings_defaults = SettingsDefaults.to_dict()
291 | self._dump["settings_defaults"] = settings_defaults
292 |
293 | if self.is_using_dotenv_file:
294 | self._dump["environment"]["dotenv"] = self.environment_variables
295 |
296 | if self.is_using_tfvars_file:
297 | self._dump["environment"]["tfvars"] = self.tfvars_variables
298 |
299 | self._dump = recursive_sort_dict(self._dump)
300 | return self._dump
301 |
302 | # pylint: disable=too-few-public-methods
303 | class Config:
304 | """Pydantic configuration"""
305 |
306 | frozen = True
307 |
308 | @field_validator("debug_mode")
309 | def parse_debug_mode(cls, v) -> bool:
310 | """Parse debug_mode"""
311 | if isinstance(v, bool):
312 | return v
313 | if v in [None, ""]:
314 | return SettingsDefaults.DEBUG_MODE
315 | return v.lower() in ["true", "1", "t", "y", "yes"]
316 |
317 | @field_validator("dump_defaults")
318 | def parse_dump_defaults(cls, v) -> bool:
319 | """Parse dump_defaults"""
320 | if isinstance(v, bool):
321 | return v
322 | if v in [None, ""]:
323 | return SettingsDefaults.DUMP_DEFAULTS
324 | return v.lower() in ["true", "1", "t", "y", "yes"]
325 |
326 | @field_validator("langchain_memory_key")
327 | def check_langchain_memory_key(cls, v) -> str:
328 | """Check langchain_memory_key"""
329 | if v in [None, ""]:
330 | return SettingsDefaults.LANGCHAIN_MEMORY_KEY
331 | return v
332 |
333 | @field_validator("openai_api_organization")
334 | def check_openai_api_organization(cls, v) -> str:
335 | """Check openai_api_organization"""
336 | if v in [None, ""]:
337 | return SettingsDefaults.OPENAI_API_ORGANIZATION
338 | return v
339 |
340 | @field_validator("openai_api_key")
341 | def check_openai_api_key(cls, v) -> SecretStr:
342 | """Check openai_api_key"""
343 | if v in [None, ""]:
344 | return SettingsDefaults.OPENAI_API_KEY
345 | return v
346 |
347 | @field_validator("openai_endpoint_image_n")
348 | def check_openai_endpoint_image_n(cls, v) -> int:
349 | """Check openai_endpoint_image_n"""
350 | if isinstance(v, int):
351 | return v
352 | if v in [None, ""]:
353 | return SettingsDefaults.OPENAI_ENDPOINT_IMAGE_N
354 | return int(v)
355 |
356 | @field_validator("openai_endpoint_image_size")
357 | def check_openai_endpoint_image_size(cls, v) -> str:
358 | """Check openai_endpoint_image_size"""
359 | if v in [None, ""]:
360 | return SettingsDefaults.OPENAI_ENDPOINT_IMAGE_SIZE
361 | return v
362 |
363 | @field_validator("openai_chat_cache")
364 | def check_openai_chat_cache(cls, v) -> bool:
365 | """Check openai_chat_cache"""
366 | if isinstance(v, bool):
367 | return v
368 | if v in [None, ""]:
369 | return SettingsDefaults.OPENAI_CHAT_CACHE
370 | return v.lower() in ["true", "1", "t", "y", "yes"]
371 |
372 | @field_validator("openai_chat_model_name")
373 | def check_openai_chat_model_name(cls, v) -> str:
374 | """Check openai_chat_model_name"""
375 | if v in [None, ""]:
376 | return SettingsDefaults.OPENAI_CHAT_MODEL_NAME
377 | return v
378 |
379 | @field_validator("openai_prompt_model_name")
380 | def check_openai_prompt_model_name(cls, v) -> str:
381 | """Check openai_prompt_model_name"""
382 | if v in [None, ""]:
383 | return SettingsDefaults.OPENAI_PROMPT_MODEL_NAME
384 | return v
385 |
386 | @field_validator("openai_chat_temperature")
387 | def check_openai_chat_temperature(cls, v) -> float:
388 | """Check openai_chat_temperature"""
389 | if v in [None, ""]:
390 | return SettingsDefaults.OPENAI_CHAT_TEMPERATURE
391 | return float(v)
392 |
393 | @field_validator("openai_chat_max_retries")
394 | def check_openai_chat_max_retries(cls, v) -> int:
395 | """Check openai_chat_max_retries"""
396 | if v in [None, ""]:
397 | return SettingsDefaults.OPENAI_CHAT_MAX_RETRIES
398 | return int(v)
399 |
400 | @field_validator("pinecone_api_key")
401 | def check_pinecone_api_key(cls, v) -> SecretStr:
402 | """Check pinecone_api_key"""
403 | if v in [None, ""]:
404 | return SettingsDefaults.PINECONE_API_KEY
405 | return v
406 |
407 | @field_validator("pinecone_environment")
408 | def check_pinecone_environment(cls, v) -> str:
409 | """Check pinecone_environment"""
410 | if v in [None, ""]:
411 | return SettingsDefaults.PINECONE_ENVIRONMENT
412 | return v
413 |
414 | @field_validator("pinecone_index_name")
415 | def check_pinecone_index_name(cls, v) -> str:
416 | """Check pinecone_index_name"""
417 | if v in [None, ""]:
418 | return SettingsDefaults.PINECONE_INDEX_NAME
419 | return v
420 |
421 | @field_validator("pinecone_vectorstore_text_key")
422 | def check_pinecone_vectorstore_text_key(cls, v) -> str:
423 | """Check pinecone_vectorstore_text_key"""
424 | if v in [None, ""]:
425 | return SettingsDefaults.PINECONE_VECTORSTORE_TEXT_KEY
426 | return v
427 |
428 | @field_validator("pinecone_metric")
429 | def check_pinecone_metric(cls, v) -> str:
430 | """Check pinecone_metric"""
431 | if v in [None, ""]:
432 | return SettingsDefaults.PINECONE_METRIC
433 | return v
434 |
435 | @field_validator("pinecone_dimensions")
436 | def check_pinecone_dimensions(cls, v) -> int:
437 | """Check pinecone_dimensions"""
438 | if v in [None, ""]:
439 | return SettingsDefaults.PINECONE_DIMENSIONS
440 | return int(v)
441 |
442 |
443 | settings = None
444 | try:
445 | settings = Settings()
446 | except (ValidationError, ValueError, ModelConfigurationError, ModelValueError) as e:
447 | raise ModelConfigurationError("Invalid configuration: " + str(e)) from e
448 |
--------------------------------------------------------------------------------
/models/const.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pylint: disable=too-few-public-methods
3 | """Sales Support Model (hsr) for the LangChain project."""
4 |
5 | import os
6 | from pathlib import Path
7 |
8 |
9 | MODULE_NAME = "models"
10 | HERE = os.path.abspath(os.path.dirname(__file__))
11 | REPO_ROOT = str(Path(HERE).parent)
12 | IS_USING_TFVARS = False
13 |
--------------------------------------------------------------------------------
/models/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/models/examples/__init__.py
--------------------------------------------------------------------------------
/models/examples/certification_programs.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Sales Support Model (hsr) for the LangChain project."""
3 | import argparse
4 |
5 | from models.hybrid_search_retreiver import HybridSearchRetriever
6 | from models.prompt_templates import UofPennPromptTemplates
7 |
8 |
9 | hsr = HybridSearchRetriever()
10 | templates = UofPennPromptTemplates()
11 |
12 | if __name__ == "__main__":
13 | parser = argparse.ArgumentParser(description="Hybrid search retrieval - University of Pennsylvania examples")
14 | parser.add_argument("concept", type=str, help="A certification program.")
15 | args = parser.parse_args()
16 |
17 | prompt = templates.certification_programs
18 | result = hsr.prompt_with_template(prompt=prompt, concept=args.concept)
19 | print(result)
20 |
--------------------------------------------------------------------------------
/models/examples/load.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Sales Support Model (hsr) Retrieval Augmented Generation (RAG)"""
3 | import argparse
4 |
5 | from models.hybrid_search_retreiver import HybridSearchRetriever
6 |
7 |
8 | hsr = HybridSearchRetriever()
9 |
10 | if __name__ == "__main__":
11 | parser = argparse.ArgumentParser(description="RAG example")
12 | parser.add_argument("filepath", type=str, help="Location of PDF documents")
13 | args = parser.parse_args()
14 |
15 | hsr.load(filepath=args.filepath)
16 |
--------------------------------------------------------------------------------
/models/examples/online_courses.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Sales Support Model (hsr) for the LangChain project."""
3 | import argparse
4 |
5 | from models.hybrid_search_retreiver import HybridSearchRetriever
6 | from models.prompt_templates import UofPennPromptTemplates
7 |
8 |
9 | hsr = HybridSearchRetriever()
10 | templates = UofPennPromptTemplates()
11 |
12 | if __name__ == "__main__":
13 | parser = argparse.ArgumentParser(description="Hybrid search retrieval - University of Pennsylvania examples")
14 | parser.add_argument("concept", type=str, help="A subject to study: accounting, finance, etc.")
15 | args = parser.parse_args()
16 |
17 | prompt = templates.online_courses
18 | result = hsr.prompt_with_template(prompt=prompt, concept=args.concept)
19 | print(result)
20 |
--------------------------------------------------------------------------------
/models/examples/pinecone_init.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Sales Support Model (hsr) Retrieval Augmented Generation (RAG)"""
3 |
4 | import logging
5 |
6 | # this project
7 | from models.conf import settings
8 | from models.pinecone import PineconeIndex
9 |
10 |
11 | logging.basicConfig(level=logging.DEBUG if settings.debug_mode else logging.INFO)
12 | logger = logging.getLogger(__name__)
13 |
14 | pinecone = PineconeIndex()
15 |
16 | if __name__ == "__main__":
17 | pinecone.initialize()
18 | print("Pinecone index initialized. name: ", pinecone.index_name)
19 | print(pinecone.index.describe_index_stats())
20 |
--------------------------------------------------------------------------------
/models/examples/prompt.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Sales Support Model (hsr)"""
3 | import argparse
4 |
5 | from langchain.schema import HumanMessage, SystemMessage
6 |
7 | from models.hybrid_search_retreiver import HybridSearchRetriever
8 |
9 |
10 | hsr = HybridSearchRetriever()
11 |
12 |
13 | if __name__ == "__main__":
14 | parser = argparse.ArgumentParser(description="hybrid search retrieval examples")
15 | parser.add_argument("system_message", type=str, help="A system prompt to send to the model.")
16 | parser.add_argument("human_message", type=str, help="A human prompt to send to the model.")
17 | args = parser.parse_args()
18 |
19 | system_message = SystemMessage(content=args.system_message)
20 | human_message = HumanMessage(content=args.human_message)
21 | result = hsr.cached_chat_request(system_message=system_message, human_message=human_message)
22 | print(result.content)
23 |
--------------------------------------------------------------------------------
/models/examples/rag.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Sales Support Model (hsr) Retrieval Augmented Generation (RAG)"""
3 | import argparse
4 |
5 | from langchain.schema import HumanMessage
6 |
7 | from models.hybrid_search_retreiver import HybridSearchRetriever
8 |
9 |
10 | hsr = HybridSearchRetriever()
11 |
12 | if __name__ == "__main__":
13 | parser = argparse.ArgumentParser(description="Retrieval Augmented Generation (RAG)")
14 | parser.add_argument("prompt", type=str, help="A question about the vectorized PDF contents")
15 | args = parser.parse_args()
16 |
17 | human_message = HumanMessage(content=args.prompt)
18 | result = hsr.rag(human_message=human_message)
19 | print(result)
20 |
--------------------------------------------------------------------------------
/models/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Module exceptions.py"""
3 |
4 | import openai
5 |
6 |
7 | # pylint: disable=too-few-public-methods
8 | class OpenAIResponseCodes:
9 | """Http response codes from openai API"""
10 |
11 | HTTP_RESPONSE_OK = 200
12 | HTTP_RESPONSE_BAD_REQUEST = 400
13 | HTTP_RESPONSE_INTERNAL_SERVER_ERROR = 500
14 |
15 |
16 | class ModelConfigurationError(Exception):
17 | """Exception raised for errors in the configuration."""
18 |
19 | def __init__(self, message):
20 | self.message = message
21 | super().__init__(self.message)
22 |
23 |
24 | class ModelValueError(Exception):
25 | """Exception raised for errors in the configuration."""
26 |
27 | def __init__(self, message):
28 | self.message = message
29 | super().__init__(self.message)
30 |
31 |
32 | class ModelIlligalInvocationError(Exception):
33 | """Exception raised when the service is called by an unknown service."""
34 |
35 | def __init__(self, message):
36 | self.message = message
37 | super().__init__(self.message)
38 |
39 |
40 | EXCEPTION_MAP = {
41 | ModelValueError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
42 | ModelConfigurationError: (OpenAIResponseCodes.HTTP_RESPONSE_INTERNAL_SERVER_ERROR, "InternalServerError"),
43 | ModelIlligalInvocationError: (OpenAIResponseCodes.HTTP_RESPONSE_INTERNAL_SERVER_ERROR, "InternalServerError"),
44 | openai.APIError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
45 | ValueError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
46 | TypeError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
47 | NotImplementedError: (OpenAIResponseCodes.HTTP_RESPONSE_BAD_REQUEST, "BadRequest"),
48 | openai.OpenAIError: (OpenAIResponseCodes.HTTP_RESPONSE_INTERNAL_SERVER_ERROR, "InternalServerError"),
49 | Exception: (OpenAIResponseCodes.HTTP_RESPONSE_INTERNAL_SERVER_ERROR, "InternalServerError"),
50 | }
51 |
52 |
53 | class ConfigurationError(Exception):
54 | """Exception raised for errors in the configuration."""
55 |
56 | def __init__(self, message):
57 | self.message = message
58 | super().__init__(self.message)
59 |
--------------------------------------------------------------------------------
/models/hybrid_search_retreiver.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pylint: disable=E0611,E1101
3 | """
4 | Hybrid Search Retriever. A class that combines the following:
5 | - OpenAI prompting and ChatModel
6 | - PromptingWrapper
7 | - Vector embedding with Pinecone
8 | - Hybrid Retriever to combine vector embeddings with text search
9 |
10 | Provides a pdf loader program that extracts text, vectorizes, and
11 | loads into a Pinecone dot product vector database that is dimensioned
12 | to match OpenAI embeddings.
13 |
14 | See: https://python.langchain.com/docs/modules/model_io/llms/llm_caching
15 | https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf
16 | https://python.langchain.com/docs/integrations/retrievers/pinecone_hybrid_search
17 | """
18 |
19 | # general purpose imports
20 | import logging
21 | import textwrap
22 | from typing import Union
23 |
24 | # embedding
25 | from langchain.globals import set_llm_cache
26 | from langchain.prompts import PromptTemplate
27 | from langchain.schema import BaseMessage, HumanMessage, SystemMessage
28 |
29 | # pinecone integration
30 | from langchain_community.cache import InMemoryCache
31 |
32 | # hybrid search capability
33 | from langchain_community.retrievers.pinecone_hybrid_search import (
34 | PineconeHybridSearchRetriever,
35 | )
36 |
37 | # from langchain_community.chat_models import ChatOpenAI
38 | # prompting and chat
39 | from langchain_openai import ChatOpenAI
40 | from pinecone_text.sparse import BM25Encoder # pylint: disable=import-error
41 |
42 | # this project
43 | from models.conf import settings
44 | from models.pinecone import PineconeIndex
45 |
46 |
47 | logging.basicConfig(level=logging.DEBUG if settings.debug_mode else logging.INFO)
48 | logger = logging.getLogger(__name__)
49 |
50 |
51 | class HybridSearchRetriever:
52 | """Hybrid Search Retriever"""
53 |
54 | _chat: ChatOpenAI = None
55 | _b25_encoder: BM25Encoder = None
56 | _pinecone: PineconeIndex = None
57 | _retriever: PineconeHybridSearchRetriever = None
58 |
59 | def __init__(self):
60 | """Constructor"""
61 | set_llm_cache(InMemoryCache())
62 |
63 | @property
64 | def pinecone(self) -> PineconeIndex:
65 | """PineconeIndex lazy read-only property."""
66 | if self._pinecone is None:
67 | self._pinecone = PineconeIndex()
68 | return self._pinecone
69 |
70 | # prompting wrapper
71 | @property
72 | def chat(self) -> ChatOpenAI:
73 | """ChatOpenAI lazy read-only property."""
74 | if self._chat is None:
75 | self._chat = ChatOpenAI(
76 | api_key=settings.openai_api_key.get_secret_value(), # pylint: disable=no-member
77 | organization=settings.openai_api_organization,
78 | cache=settings.openai_chat_cache,
79 | max_retries=settings.openai_chat_max_retries,
80 | model=settings.openai_chat_model_name,
81 | temperature=settings.openai_chat_temperature,
82 | )
83 | return self._chat
84 |
85 | @property
86 | def bm25_encoder(self) -> BM25Encoder:
87 | """BM25Encoder lazy read-only property."""
88 | if self._b25_encoder is None:
89 | self._b25_encoder = BM25Encoder().default()
90 | return self._b25_encoder
91 |
92 | @property
93 | def retriever(self) -> PineconeHybridSearchRetriever:
94 | """PineconeHybridSearchRetriever lazy read-only property."""
95 | if self._retriever is None:
96 | self._retriever = PineconeHybridSearchRetriever(
97 | embeddings=self.pinecone.openai_embeddings, sparse_encoder=self.bm25_encoder, index=self.pinecone.index
98 | )
99 | return self._retriever
100 |
101 | def cached_chat_request(
102 | self, system_message: Union[str, SystemMessage], human_message: Union[str, HumanMessage]
103 | ) -> BaseMessage:
104 | """Cached chat request."""
105 | if not isinstance(system_message, SystemMessage):
106 | logger.info("Converting system message to SystemMessage")
107 | system_message = SystemMessage(content=str(system_message))
108 |
109 | if not isinstance(human_message, HumanMessage):
110 | logger.info("Converting human message to HumanMessage")
111 | human_message = HumanMessage(content=str(human_message))
112 | messages = [system_message, human_message]
113 | # pylint: disable=not-callable
114 | # retval = self.chat(messages)
115 | retval = self.chat.invoke(messages)
116 | return retval
117 |
118 | # pylint: disable=unused-argument
119 | def prompt_with_template(
120 | self, prompt: PromptTemplate, concept: str, model: str = settings.openai_prompt_model_name
121 | ) -> str:
122 | """Prompt with template."""
123 | retval = self.chat.invoke(prompt.format(concept=concept))
124 | return str(retval.content) if retval else "no response"
125 |
126 | def load(self, filepath: str):
127 | """Pdf loader."""
128 | self.pinecone.pdf_loader(filepath=filepath)
129 |
130 | def rag(self, human_message: Union[str, HumanMessage]):
131 | """
132 | Retrieval Augmented Generation prompt.
133 | 1. Retrieve human message prompt: Given a user input, relevant splits are retrieved
134 | from storage using a Retriever.
135 | 2. Generate: A ChatModel / LLM produces an answer using a prompt that includes
136 | the question and the retrieved data
137 |
138 | To prompt OpenAI's GPT-3 model to consider the embeddings from the Pinecone
139 | vector database, you would typically need to convert the embeddings back
140 | into a format that GPT-3 can understand, such as text. However, GPT-3 does
141 | not natively support direct input of embeddings.
142 |
143 | The typical workflow is to use the embeddings to retrieve relevant documents,
144 | and then use the text of these documents as part of the prompt for GPT-3.
145 | """
146 | if not isinstance(human_message, HumanMessage):
147 | logger.info("Converting human_message to HumanMessage")
148 | human_message = HumanMessage(content=human_message)
149 |
150 | # ---------------------------------------------------------------------
151 | # 1.) Retrieve relevant documents from Pinecone vector database
152 | # ---------------------------------------------------------------------
153 | documents = self.pinecone.vector_store.similarity_search(query=human_message.content)
154 |
155 | # Extract the text from the documents
156 | document_texts = [doc.page_content for doc in documents]
157 | leader = textwrap.dedent(
158 | """\n
159 | You are a helpful assistant. You should assume that all of the
160 | following bullet points that follow are completely factual.
161 | You should prioritize these enumerated facts when formulating your response:"""
162 | )
163 | system_message_content = f"{leader} {'\n\n'.join(document_texts)}"
164 | system_message_content = (
165 | f"{leader} {''.join([f'\n\n{40 * "-"}\n{i + 1}.) {text}\n' for i, text in enumerate(document_texts)])}"
166 | )
167 | system_message = SystemMessage(content=system_message_content)
168 | # ---------------------------------------------------------------------
169 | # finished with hybrid search setup
170 | # ---------------------------------------------------------------------
171 | star_line = 80 * "*"
172 | logger.info(
173 | "\n%s\n"
174 | "rag() Retrieval Augmented Generation prompt"
175 | "Diagnostic information:\n"
176 | " Retrieved %i related documents from Pinecone\n"
177 | " System messages contains %i words\n"
178 | " System Prompt:"
179 | "\n <============================ BEGIN ===============================>"
180 | "%s"
181 | "\n <============================= END ================================>\n\n",
182 | star_line,
183 | len(documents),
184 | len(system_message.content.split()),
185 | system_message.content,
186 | )
187 |
188 | # 2.) get a response from the chat model
189 | response = self.cached_chat_request(system_message=system_message, human_message=human_message)
190 |
191 | return str(response.content)
192 |
--------------------------------------------------------------------------------
/models/pinecone.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pylint: disable=E0611,E1101
3 | """A class to manage the lifecycle of Pinecone vector database indexes."""
4 |
5 | # document loading
6 | import glob
7 |
8 | # general purpose imports
9 | import json
10 | import logging
11 | import os
12 |
13 | from langchain.text_splitter import RecursiveCharacterTextSplitter
14 | from langchain_community.document_loaders.pdf import PyPDFLoader
15 | from langchain_openai import OpenAIEmbeddings
16 | from langchain_pinecone import PineconeVectorStore
17 |
18 | # pinecone integration
19 | from pinecone import Pinecone, ServerlessSpec
20 | from pinecone.core.openapi.shared.exceptions import PineconeApiException
21 | from pinecone.models import IndexList
22 |
23 | # this project
24 | from models.conf import settings
25 |
26 |
27 | logging.basicConfig(level=logging.DEBUG if settings.debug_mode else logging.INFO)
28 |
29 |
30 | class PineconeIndex:
31 | """Pinecone helper class."""
32 |
33 | _pinecone = None
34 | _index: Pinecone.Index = None
35 | _index_name: str = None
36 | _text_splitter: RecursiveCharacterTextSplitter = None
37 | _openai_embeddings: OpenAIEmbeddings = None
38 | _vector_store: PineconeVectorStore = None
39 |
40 | def __init__(self, index_name: str = None):
41 | self.init()
42 | self.index_name = index_name or settings.pinecone_index_name
43 | logging.debug("PineconeIndex initialized with index_name: %s", self.index_name)
44 | logging.debug(self.index_stats)
45 |
46 | @property
47 | def index_name(self) -> str:
48 | """index name."""
49 | return self._index_name
50 |
51 | @index_name.setter
52 | def index_name(self, value: str) -> None:
53 | """Set index name."""
54 | if self._index_name != value:
55 | self.init()
56 | self._index_name = value
57 | self.init_index()
58 |
59 | @property
60 | def index(self) -> Pinecone.Index:
61 | """pinecone.Index lazy read-only property."""
62 | if self._index is None:
63 | self.init_index()
64 | self._index = self.pinecone.Index(name=self.index_name)
65 | return self._index
66 |
67 | @property
68 | def index_stats(self) -> dict:
69 | """index stats."""
70 | retval = self.index.describe_index_stats()
71 | return json.dumps(retval.to_dict(), indent=4)
72 |
73 | @property
74 | def initialized(self) -> bool:
75 | """initialized read-only property."""
76 | indexes = self.pinecone.list_indexes()
77 | return self.index_name in indexes.names()
78 |
79 | @property
80 | def vector_store(self) -> PineconeVectorStore:
81 | """Pinecone lazy read-only property."""
82 | if self._vector_store is None:
83 | if not self.initialized:
84 | self.init_index()
85 | self._vector_store = PineconeVectorStore(
86 | index=self.index,
87 | embedding=self.openai_embeddings,
88 | text_key=settings.pinecone_vectorstore_text_key,
89 | )
90 | return self._vector_store
91 |
92 | @property
93 | def openai_embeddings(self) -> OpenAIEmbeddings:
94 | """OpenAIEmbeddings lazy read-only property."""
95 | if self._openai_embeddings is None:
96 | # pylint: disable=no-member
97 | self._openai_embeddings = OpenAIEmbeddings(
98 | api_key=settings.openai_api_key.get_secret_value(),
99 | organization=settings.openai_api_organization,
100 | )
101 | return self._openai_embeddings
102 |
103 | @property
104 | def pinecone(self) -> Pinecone:
105 | """Pinecone lazy read-only property."""
106 | if self._pinecone is None:
107 | print("Initializing Pinecone...")
108 | api_key = settings.pinecone_api_key.get_secret_value()
109 | print(f"API Key: {api_key[:12]}****------")
110 | self._pinecone = Pinecone(api_key=api_key)
111 | return self._pinecone
112 |
113 | @property
114 | def text_splitter(self) -> RecursiveCharacterTextSplitter:
115 | """lazy read-only property."""
116 | if self._text_splitter is None:
117 | self._text_splitter = RecursiveCharacterTextSplitter()
118 | return self._text_splitter
119 |
120 | def init_index(self):
121 | """Verify that an index named self.index_name exists in Pinecone. If not, create it."""
122 | indexes: IndexList = None
123 | indexes = self.pinecone.list_indexes()
124 | if self.index_name not in indexes.names():
125 | logging.debug("Index does not exist.")
126 | self.create()
127 |
128 | # pylint: disable=no-member
129 | def init(self):
130 | """Initialize Pinecone."""
131 |
132 | self._index = None
133 | self._index_name = None
134 | self._text_splitter = None
135 | self._openai_embeddings = None
136 | self._vector_store = None
137 |
138 | def delete(self):
139 | """Delete index."""
140 | if not self.initialized:
141 | logging.debug("Index does not exist. Nothing to delete.")
142 | return
143 | print("Deleting index...")
144 | self.pinecone.delete_index(self.index_name)
145 |
146 | def create(self):
147 | """Create index."""
148 | print("Creating index. This may take a few minutes...")
149 | serverless_spec = ServerlessSpec(
150 | cloud="aws",
151 | region="us-east-1",
152 | )
153 | try:
154 | self.pinecone.create_index(
155 | name=self.index_name,
156 | dimension=settings.pinecone_dimensions,
157 | metric=settings.pinecone_metric,
158 | spec=serverless_spec,
159 | )
160 | print("Index created.")
161 | except PineconeApiException:
162 | pass
163 |
164 | def initialize(self):
165 | """Initialize index."""
166 | self.delete()
167 | self.create()
168 |
169 | def pdf_loader(self, filepath: str):
170 | """
171 | Embed PDF.
172 | 1. Load PDF document text data
173 | 2. Split into pages
174 | 3. Embed each page
175 | 4. Store in Pinecone
176 |
177 | Note: it's important to make sure that the "context" field that holds the document text
178 | in the metadata is not indexed. Currently you need to specify explicitly the fields you
179 | do want to index. For more information checkout
180 | https://docs.pinecone.io/docs/manage-indexes#selective-metadata-indexing
181 | """
182 | self.initialize()
183 |
184 | pdf_files = glob.glob(os.path.join(filepath, "*.pdf"))
185 | i = 0
186 | for pdf_file in pdf_files:
187 | i += 1
188 | j = len(pdf_files)
189 | print(f"Loading PDF {i} of {j}: {pdf_file}")
190 | loader = PyPDFLoader(file_path=pdf_file)
191 | docs = loader.load()
192 | k = 0
193 | for doc in docs:
194 | k += 1
195 | print(k * "-", end="\r")
196 | documents = self.text_splitter.create_documents([doc.page_content])
197 | document_texts = [doc.page_content for doc in documents]
198 | embeddings = self.openai_embeddings.embed_documents(document_texts)
199 | self.vector_store.add_documents(documents=documents, embeddings=embeddings)
200 |
201 | print("Finished loading PDFs. \n" + self.index_stats)
202 |
--------------------------------------------------------------------------------
/models/prompt_templates.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pylint: disable=too-few-public-methods
3 | """Sales Support Model (hsr) prompt templates"""
4 |
5 | from langchain.prompts import PromptTemplate
6 |
7 |
8 | class UofPennPromptTemplates:
9 | """Netec Prompt Templates."""
10 |
11 | sales_role: str = """You are a helpful student advisor at Wharton School of the
12 | University of Pennsylvania. You provide concise explanations to questions about
13 | the courses they offer in 100 words or less."""
14 |
15 | @classmethod
16 | def get_properties(cls):
17 | """return a list of properties of this class."""
18 | return [attr for attr in dir(cls) if isinstance(getattr(cls, attr), property)]
19 |
20 | @property
21 | def online_courses(self) -> PromptTemplate:
22 | """Get prompt."""
23 | template = (
24 | self.sales_role
25 | + """
26 | Explain the online courses Wharton offers about {concept}
27 | """
28 | )
29 | return PromptTemplate(input_variables=["concept"], template=template)
30 |
31 | @property
32 | def certification_programs(self) -> PromptTemplate:
33 | """Get prompt."""
34 | template = (
35 | self.sales_role
36 | + """
37 | Summarize their executive and online programs in which learner
38 | can earns certificates for {concept}
39 | """
40 | )
41 | return PromptTemplate(input_variables=["concept"], template=template)
42 |
43 |
44 | class NetecPromptTemplates:
45 | """Netec Prompt Templates."""
46 |
47 | sales_role: str = """You are a helpful sales assistant at Netec who sells
48 | specialized training and exam preparation services to existing customers.
49 | You provide concise explanations of the services that Netec offers in 100
50 | words or less."""
51 |
52 | @classmethod
53 | def get_properties(cls):
54 | """return a list of properties of this class."""
55 | return [attr for attr in dir(cls) if isinstance(getattr(cls, attr), property)]
56 |
57 | @property
58 | def training_services(self) -> PromptTemplate:
59 | """Get prompt."""
60 | template = (
61 | self.sales_role
62 | + """
63 | Explain the training services that Netec offers about {concept}
64 | """
65 | )
66 | return PromptTemplate(input_variables=["concept"], template=template)
67 |
68 | @property
69 | def oracle_training_services(self) -> PromptTemplate:
70 | """Get prompt."""
71 | template = (
72 | self.sales_role
73 | + """
74 | Note that Netec is the exclusive provider in Latin America of Oracle training services
75 | for the 6 levels of Oracle Certification credentials: Oracle Certified Junior Associate (OCJA),
76 | Oracle Certified Associate (OCA), Oracle Certified Professional (OCP),
77 | Oracle Certified Master (OCM), Oracle Certified Expert (OCE) and
78 | Oracle Certified Specialist (OCS).
79 | Summarize their programs for {concept}
80 | """
81 | )
82 | return PromptTemplate(input_variables=["concept"], template=template)
83 |
--------------------------------------------------------------------------------
/models/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/models/tests/__init__.py
--------------------------------------------------------------------------------
/models/tests/mock_data/.env.test_01:
--------------------------------------------------------------------------------
1 | DEBUG_MODE = True
2 | DUMP_DEFAULTS = True
3 | LANGCHAIN_MEMORY_KEY = "TEST_chat_history"
4 | PINECONE_ENVIRONMENT = "TEST_gcp-starter"
5 | PINECONE_INDEX_NAME = "TEST_rag"
6 | PINECONE_VECTORSTORE_TEXT_KEY = "TEST_lc_id"
7 | PINECONE_METRIC = "TEST_dotproduct"
8 | PINECONE_DIMENSIONS = 1
9 | OPENAI_ENDPOINT_IMAGE_N = 1
10 | OPENAI_ENDPOINT_IMAGE_SIZE = "TEST_1024x768"
11 | OPENAI_CHAT_CACHE = False
12 | OPENAI_CHAT_MODEL_NAME = "TEST_gpt-4"
13 | OPENAI_PROMPT_MODEL_NAME = "TEST_gpt-4"
14 | OPENAI_CHAT_TEMPERATURE = 1.0
15 | OPENAI_CHAT_MAX_RETRIES = 5
16 |
--------------------------------------------------------------------------------
/models/tests/mock_data/.env.test_illegal_nulls:
--------------------------------------------------------------------------------
1 | DEBUG_MODE=
2 | AWS_REKOGNITION_FACE_DETECT_MAX_FACES_COUNT=
3 | AWS_REKOGNITION_FACE_DETECT_THRESHOLD=
4 |
--------------------------------------------------------------------------------
/models/tests/mock_data/.env.test_legal_nulls:
--------------------------------------------------------------------------------
1 | LANGCHAIN_MEMORY_KEY=
2 | OPENAI_ENDPOINT_IMAGE_SIZE=
3 |
--------------------------------------------------------------------------------
/models/tests/mock_data/test_load.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FullStackWithLawrence/openai-embeddings/a72c8f78404f65938f59543d89072393717856f0/models/tests/mock_data/test_load.pdf
--------------------------------------------------------------------------------
/models/tests/test_configuration.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # flake8: noqa: F401
3 | """
4 | Test conf module.
5 | """
6 | import os
7 | from unittest.mock import patch
8 |
9 | import pytest # pylint: disable=unused-import
10 | from dotenv import load_dotenv
11 | from pydantic import ValidationError as PydanticValidationError
12 |
13 | from models.conf import Settings, SettingsDefaults
14 |
15 |
16 | HERE = os.path.dirname(os.path.abspath(__file__))
17 |
18 |
19 | class TestConfig:
20 | """Test config.settings."""
21 |
22 | def env_path(self, filename):
23 | """Return the path to the .env file."""
24 | return os.path.join(HERE, "mock_data", filename)
25 |
26 | def test_conf_defaults(self):
27 | """Test that settings == SettingsDefaults when no .env is in use."""
28 | os.environ.clear()
29 | mock_settings = Settings()
30 | assert mock_settings.langchain_memory_key == SettingsDefaults.LANGCHAIN_MEMORY_KEY
31 | assert mock_settings.debug_mode == SettingsDefaults.DEBUG_MODE
32 |
33 | assert mock_settings.openai_api_key == SettingsDefaults.OPENAI_API_KEY
34 | assert mock_settings.openai_api_organization == SettingsDefaults.OPENAI_API_ORGANIZATION
35 | assert mock_settings.openai_chat_cache == SettingsDefaults.OPENAI_CHAT_CACHE
36 | assert mock_settings.openai_chat_max_retries == SettingsDefaults.OPENAI_CHAT_MAX_RETRIES
37 | assert mock_settings.openai_chat_model_name == SettingsDefaults.OPENAI_CHAT_MODEL_NAME
38 | assert mock_settings.openai_endpoint_image_n == SettingsDefaults.OPENAI_ENDPOINT_IMAGE_N
39 | assert mock_settings.openai_endpoint_image_size == SettingsDefaults.OPENAI_ENDPOINT_IMAGE_SIZE
40 | assert mock_settings.openai_prompt_model_name == SettingsDefaults.OPENAI_PROMPT_MODEL_NAME
41 |
42 | assert mock_settings.pinecone_api_key == SettingsDefaults.PINECONE_API_KEY
43 | assert mock_settings.pinecone_dimensions == SettingsDefaults.PINECONE_DIMENSIONS
44 | assert mock_settings.pinecone_environment == SettingsDefaults.PINECONE_ENVIRONMENT
45 | assert mock_settings.pinecone_index_name == SettingsDefaults.PINECONE_INDEX_NAME
46 | assert mock_settings.pinecone_metric == SettingsDefaults.PINECONE_METRIC
47 | assert mock_settings.pinecone_vectorstore_text_key == SettingsDefaults.PINECONE_VECTORSTORE_TEXT_KEY
48 |
49 | # pylint: disable=no-member
50 | def test_conf_defaults_secrets(self):
51 | """Test that settings secrets match the defaults."""
52 | os.environ.clear()
53 | mock_settings = Settings()
54 | assert mock_settings.openai_api_key.get_secret_value() == SettingsDefaults.OPENAI_API_KEY.get_secret_value()
55 | assert mock_settings.pinecone_api_key.get_secret_value() == SettingsDefaults.PINECONE_API_KEY.get_secret_value()
56 |
57 | def test_env_legal_nulls(self):
58 | """Test that settings handles missing .env values."""
59 | os.environ.clear()
60 | env_path = self.env_path(".env.test_legal_nulls")
61 | print("env_path", env_path)
62 | loaded = load_dotenv(env_path)
63 | assert loaded
64 |
65 | mock_settings = Settings()
66 | assert mock_settings.langchain_memory_key == SettingsDefaults.LANGCHAIN_MEMORY_KEY
67 | assert mock_settings.openai_endpoint_image_size == SettingsDefaults.OPENAI_ENDPOINT_IMAGE_SIZE
68 |
69 | def test_env_illegal_nulls(self):
70 | """Test that settings handles missing .env values."""
71 | os.environ.clear()
72 | env_path = self.env_path(".env.test_illegal_nulls")
73 | print("env_path", env_path)
74 | loaded = load_dotenv(env_path)
75 | assert loaded
76 |
77 | with pytest.raises(PydanticValidationError):
78 | Settings()
79 |
80 | def test_env_overrides(self):
81 | """Test that settings takes custom .env values."""
82 | os.environ.clear()
83 | env_path = self.env_path(".env.test_01")
84 | loaded = load_dotenv(env_path)
85 | assert loaded
86 |
87 | mock_settings = Settings()
88 |
89 | assert mock_settings.debug_mode is True
90 | assert mock_settings.dump_defaults is True
91 | assert mock_settings.langchain_memory_key == "TEST_chat_history"
92 | assert mock_settings.pinecone_environment == "TEST_gcp-starter"
93 | assert mock_settings.pinecone_index_name == "TEST_rag"
94 | assert mock_settings.pinecone_vectorstore_text_key == "TEST_lc_id"
95 | assert mock_settings.pinecone_metric == "TEST_dotproduct"
96 | assert mock_settings.pinecone_dimensions == 1
97 | assert mock_settings.openai_endpoint_image_n == 1
98 | assert mock_settings.openai_endpoint_image_size == "TEST_1024x768"
99 | assert mock_settings.openai_chat_cache is False
100 | assert mock_settings.openai_chat_model_name == "TEST_gpt-4"
101 | assert mock_settings.openai_prompt_model_name == "TEST_gpt-4"
102 | assert mock_settings.openai_chat_temperature == 1.0
103 | assert mock_settings.openai_chat_max_retries == 5
104 |
105 | @patch.dict(os.environ, {"OPENAI_CHAT_MAX_RETRIES": "-1"})
106 | def test_invalid_chat_max_retries(self):
107 | """Test that Pydantic raises a validation error for environment variable w negative integer values."""
108 |
109 | with pytest.raises(PydanticValidationError):
110 | Settings()
111 |
112 | @patch.dict(os.environ, {"OPENAI_CHAT_TEMPERATURE": "-1"})
113 | def test_invalid_chat_temperature(self):
114 | """Test that Pydantic raises a validation error for environment variable w negative integer values."""
115 |
116 | with pytest.raises(PydanticValidationError):
117 | Settings()
118 |
119 | @patch.dict(os.environ, {"PINECONE_DIMENSIONS": "-1"})
120 | def test_invalid_pinecone_dimensions(self):
121 | """Test that Pydantic raises a validation error for environment variable w negative integer values."""
122 |
123 | with pytest.raises(PydanticValidationError):
124 | Settings()
125 |
126 | def test_configure_with_class_constructor(self):
127 | """test that we can set values with the class constructor"""
128 | os.environ.clear()
129 |
130 | mock_settings = Settings(
131 | debug_mode=True,
132 | dump_defaults=True,
133 | langchain_memory_key="TEST_chat_history",
134 | pinecone_environment="TEST_gcp-starter",
135 | pinecone_index_name="TEST_rag",
136 | pinecone_vectorstore_text_key="TEST_lc_id",
137 | pinecone_metric="TEST_dotproduct",
138 | pinecone_dimensions=1,
139 | openai_endpoint_image_n=1,
140 | openai_endpoint_image_size="TEST_1024x768",
141 | openai_chat_cache=False,
142 | openai_chat_model_name="TEST_gpt-4",
143 | openai_prompt_model_name="TEST_text-davinci-003",
144 | openai_chat_temperature=1.0,
145 | openai_chat_max_retries=5,
146 | )
147 |
148 | assert mock_settings.debug_mode is True
149 | assert mock_settings.dump_defaults is True
150 | assert mock_settings.langchain_memory_key == "TEST_chat_history"
151 | assert mock_settings.pinecone_environment == "TEST_gcp-starter"
152 | assert mock_settings.pinecone_index_name == "TEST_rag"
153 | assert mock_settings.pinecone_vectorstore_text_key == "TEST_lc_id"
154 | assert mock_settings.pinecone_metric == "TEST_dotproduct"
155 | assert mock_settings.pinecone_dimensions == 1
156 | assert mock_settings.openai_endpoint_image_n == 1
157 | assert mock_settings.openai_endpoint_image_size == "TEST_1024x768"
158 | assert mock_settings.openai_chat_cache is False
159 | assert mock_settings.openai_chat_model_name == "TEST_gpt-4"
160 | assert mock_settings.openai_prompt_model_name == "TEST_text-davinci-003"
161 | assert mock_settings.openai_chat_temperature == 1.0
162 | assert mock_settings.openai_chat_max_retries == 5
163 |
164 | def test_readonly_settings(self):
165 | """test that we can't set readonly values with the class constructor"""
166 |
167 | mock_settings = Settings()
168 | with pytest.raises(PydanticValidationError):
169 | mock_settings.langchain_memory_key = "TEST_chat_history"
170 | with pytest.raises(PydanticValidationError):
171 | mock_settings.pinecone_environment = "TEST_gcp-starter"
172 | with pytest.raises(PydanticValidationError):
173 | mock_settings.pinecone_index_name = "TEST_rag"
174 | with pytest.raises(PydanticValidationError):
175 | mock_settings.pinecone_vectorstore_text_key = "TEST_lc_id"
176 | with pytest.raises(PydanticValidationError):
177 | mock_settings.pinecone_metric = "TEST_dotproduct"
178 | with pytest.raises(PydanticValidationError):
179 | mock_settings.pinecone_dimensions = 1
180 | with pytest.raises(PydanticValidationError):
181 | mock_settings.openai_endpoint_image_n = 1
182 | with pytest.raises(PydanticValidationError):
183 | mock_settings.openai_endpoint_image_size = "TEST_1024x768"
184 | with pytest.raises(PydanticValidationError):
185 | mock_settings.openai_chat_cache = False
186 | with pytest.raises(PydanticValidationError):
187 | mock_settings.openai_chat_model_name = "TEST_gpt-4"
188 | with pytest.raises(PydanticValidationError):
189 | mock_settings.openai_prompt_model_name = "TEST_text-davinci-003"
190 | with pytest.raises(PydanticValidationError):
191 | mock_settings.openai_chat_temperature = 1.0
192 | with pytest.raises(PydanticValidationError):
193 | mock_settings.openai_chat_max_retries = 5
194 |
195 | def test_dump(self):
196 | """Test that dump is a dict."""
197 |
198 | mock_settings = Settings()
199 | assert isinstance(mock_settings.dump, dict)
200 |
201 | def test_dump_keys(self):
202 | """Test that dump contains the expected keys."""
203 |
204 | dump = Settings().dump
205 | assert "secrets" in dump.keys()
206 | assert "environment" in dump.keys()
207 | assert "langchain" in dump.keys()
208 | assert "openai_api" in dump.keys()
209 | assert "pinecone_api" in dump.keys()
210 |
--------------------------------------------------------------------------------
/models/tests/test_examples.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # flake8: noqa: F401
3 | """
4 | Test command line example prompts.
5 | """
6 | from unittest.mock import MagicMock, patch
7 |
8 | import pytest # pylint: disable=unused-import
9 | from langchain.schema import HumanMessage, SystemMessage
10 |
11 | from models.examples.certification_programs import hsr as uofpenn_certification_program
12 | from models.examples.online_courses import hsr as uofpenn_online_hsr
13 | from models.examples.prompt import hsr as prompt_hrs
14 | from models.examples.rag import hsr as rag_hsr
15 | from models.prompt_templates import NetecPromptTemplates
16 |
17 |
18 | HUMAN_MESSAGE = "this is a test"
19 | SYSTEM_PROMPT = """you are a helpful assistant. If you are prompted,
20 | 'this is a test', then return the word 'SUCCESS' in upper case. Return only
21 | this single word, in upper case. Do not embellish. do not further prompt
22 | the user for any reason."""
23 |
24 |
25 | class TestExamples:
26 | """Test command line examples."""
27 |
28 | @patch("argparse.ArgumentParser.parse_args")
29 | def test_prompt(self, mock_parse_args):
30 | """Test prompt example."""
31 |
32 | mock_args = MagicMock()
33 | mock_args.system_prompt = SYSTEM_PROMPT
34 | mock_args.human_prompt = HUMAN_MESSAGE
35 | mock_parse_args.return_value = mock_args
36 |
37 | system_message = SystemMessage(content=SYSTEM_PROMPT)
38 | human_message = HumanMessage(content=HUMAN_MESSAGE)
39 | result = prompt_hrs.cached_chat_request(system_message=system_message, human_message=human_message)
40 | assert result.content == "SUCCESS"
41 |
42 | @patch("argparse.ArgumentParser.parse_args")
43 | def test_rag(self, mock_parse_args):
44 | """Test RAG example."""
45 | mock_args = MagicMock()
46 | mock_args.human_message = HUMAN_MESSAGE
47 | mock_parse_args.return_value = mock_args
48 |
49 | human_message = HumanMessage(content=mock_args.human_message)
50 | result = rag_hsr.rag(human_message=human_message)
51 | assert isinstance(result, str)
52 | assert len(result) > 0
53 |
54 | @patch("argparse.ArgumentParser.parse_args")
55 | def test_training_services(self, mock_parse_args):
56 | """Test training services templates."""
57 | mock_args = MagicMock()
58 | mock_args.human_message = HUMAN_MESSAGE
59 | mock_parse_args.return_value = mock_args
60 |
61 | templates = NetecPromptTemplates()
62 | prompt = templates.training_services
63 |
64 | result = uofpenn_certification_program.prompt_with_template(prompt=prompt, concept=mock_args.human_message)
65 | assert isinstance(result, str)
66 | assert len(result) > 0
67 |
68 | @patch("argparse.ArgumentParser.parse_args")
69 | def test_oracle_training_services(self, mock_parse_args):
70 | """Test oracle training services."""
71 | mock_args = MagicMock()
72 | mock_args.human_message = HUMAN_MESSAGE
73 | mock_parse_args.return_value = mock_args
74 |
75 | templates = NetecPromptTemplates()
76 | prompt = templates.oracle_training_services
77 |
78 | result = uofpenn_online_hsr.prompt_with_template(prompt=prompt, concept=mock_args.human_message)
79 | assert isinstance(result, str)
80 | assert len(result) > 0
81 |
--------------------------------------------------------------------------------
/models/tests/test_hsr.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pylint: disable=E0611,E1101
3 | # flake8: noqa: F401
4 | """
5 | Test integrity of base class.
6 | """
7 | import pytest # pylint: disable=unused-import
8 |
9 | # from langchain_community.chat_models import ChatOpenAI
10 | from langchain_openai import ChatOpenAI
11 |
12 | from models.hybrid_search_retreiver import HybridSearchRetriever
13 | from models.pinecone import PineconeIndex
14 |
15 |
16 | class TestSalesSupportModel:
17 | """Test HybridSearchRetriever class."""
18 |
19 | def test_01_basic(self):
20 | """Ensure that we can instantiate the class."""
21 |
22 | # pylint: disable=broad-except
23 | try:
24 | HybridSearchRetriever()
25 | except Exception as e:
26 | assert False, f"initialization of HybridSearchRetriever() failed with exception: {e}"
27 |
28 | def test_02_class_aatribute_types(self):
29 | """ensure that class attributes are of the correct type"""
30 |
31 | hsr = HybridSearchRetriever()
32 | assert isinstance(hsr.chat, ChatOpenAI)
33 | assert isinstance(hsr.pinecone, PineconeIndex)
34 |
--------------------------------------------------------------------------------
/models/tests/test_openai.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # flake8: noqa: F401
3 | # pylint: disable=too-few-public-methods
4 | """
5 | Test integrity of base class.
6 | """
7 | import pytest # pylint: disable=unused-import
8 |
9 | from models.hybrid_search_retreiver import HybridSearchRetriever
10 |
11 |
12 | class TestOpenAI:
13 | """Test HybridSearchRetriever class."""
14 |
15 | def test_03_test_openai_connectivity(self):
16 | """Ensure that we have connectivity to OpenAI."""
17 |
18 | hsr = HybridSearchRetriever()
19 | retval = hsr.cached_chat_request(
20 | "your are a helpful assistant", "please return the value 'CORRECT' in all upper case."
21 | )
22 | assert retval.content == "CORRECT"
23 |
--------------------------------------------------------------------------------
/models/tests/test_pinecone.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # flake8: noqa: F401
3 | """
4 | Test this model's Pinecone helper class.
5 | """
6 |
7 | import os
8 |
9 | import pinecone as oem_pinecone
10 | import pytest # pylint: disable=unused-import
11 | from pinecone import Pinecone
12 |
13 | from models.conf import settings
14 | from models.pinecone import PineconeIndex
15 |
16 |
17 | class TestPinecone:
18 | """Test HybridSearchRetriever class."""
19 |
20 | def test_01_can_instantiate(self):
21 | """Ensure that we instantiate the object."""
22 | # pylint: disable=broad-except
23 | try:
24 | PineconeIndex()
25 | except Exception as e:
26 | assert False, f"Pinecone() failed with exception: {e}"
27 |
28 | def test_02_init(self):
29 | """Ensure that we can initialize Pinecone."""
30 | pinecone = PineconeIndex()
31 | # pylint: disable=broad-except
32 | try:
33 | pinecone.init()
34 | except Exception as e:
35 | assert False, f"Pinecone.init() failed with exception: {e}"
36 |
37 | def test_03_index(self):
38 | """Test that the index name is correct."""
39 | pinecone = PineconeIndex()
40 | assert pinecone.index_name == settings.pinecone_index_name
41 |
42 | def test_04_initialize(self):
43 | """Test that the index initializes."""
44 | pinecone = PineconeIndex()
45 | # pylint: disable=broad-except
46 | try:
47 | pinecone.initialize()
48 | except Exception as e:
49 | assert False, f"Pinecone.initialize() failed with exception: {e}"
50 | assert isinstance(pinecone.index, oem_pinecone.Index)
51 |
52 | def test_05_delete(self):
53 | """Test that the index can be deleted."""
54 | pinecone_index = PineconeIndex()
55 |
56 | # pylint: disable=E1101
57 | api_key = settings.pinecone_api_key.get_secret_value()
58 | pinecone = Pinecone(api_key=api_key)
59 | indexes = pinecone.list_indexes().names()
60 | assert pinecone_index.index_name in indexes
61 | # pylint: disable=broad-except
62 | try:
63 | pinecone_index.delete()
64 | except Exception as e:
65 | assert False, f"Pinecone.delete() failed with exception: {e}"
66 |
67 | def test_06_create(self):
68 | """Test that the index can be created."""
69 | pinecone_index = PineconeIndex()
70 |
71 | # pylint: disable=E1101
72 | api_key = settings.pinecone_api_key.get_secret_value()
73 | pinecone = Pinecone(api_key=api_key)
74 |
75 | indexes = pinecone.list_indexes().names()
76 | if pinecone_index.index_name in indexes:
77 | pinecone_index.delete()
78 |
79 | # pylint: disable=broad-except
80 | try:
81 | pinecone_index.create()
82 | except Exception as e:
83 | assert False, f"Pinecone.create() failed with exception: {e}"
84 | assert isinstance(pinecone_index.index, oem_pinecone.Index)
85 | pinecone_index.delete()
86 |
87 | def test_07_load_pdf(self):
88 | """Test that we can load a PDF document to the index."""
89 | HERE = os.path.dirname(os.path.abspath(__file__))
90 | test_file = os.path.join(HERE, "mock_data", "test_load.pdf")
91 |
92 | if not os.path.exists(test_file):
93 | pytest.skip(f"File {test_file} does not exist")
94 |
95 | pinecone = PineconeIndex()
96 | # pylint: disable=broad-except
97 | try:
98 | pinecone.pdf_loader(filepath=test_file)
99 | except Exception as e:
100 | assert False, f"Pinecone.load_pdf() failed with exception: {e}"
101 | pinecone.delete()
102 |
--------------------------------------------------------------------------------
/models/tests/test_prompt_templates.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # flake8: noqa: F401
3 | # pylint: disable=too-few-public-methods
4 | """
5 | Test integrity of base class.
6 | """
7 | import pytest # pylint: disable=unused-import
8 | from langchain.prompts import PromptTemplate
9 |
10 | from models.prompt_templates import NetecPromptTemplates
11 |
12 |
13 | class TestPromptTemplates:
14 | """Test HybridSearchRetriever class."""
15 |
16 | def test_01_prompt_with_template(self):
17 | """Ensure that all properties of the template class are PromptTemplate instances."""
18 | templates = NetecPromptTemplates()
19 | for prop_name in templates.get_properties():
20 | prop = getattr(templates, prop_name)
21 | assert isinstance(prop, PromptTemplate)
22 |
--------------------------------------------------------------------------------
/models/tests/test_prompts.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # flake8: noqa: F401
3 | """
4 | Test integrity of base class.
5 | """
6 | import pytest # pylint: disable=unused-import
7 |
8 | from models.hybrid_search_retreiver import HybridSearchRetriever
9 | from models.prompt_templates import NetecPromptTemplates
10 |
11 |
12 | class TestPrompts:
13 | """Test HybridSearchRetriever class."""
14 |
15 | hsr = HybridSearchRetriever()
16 | templates = NetecPromptTemplates()
17 |
18 | def test_oracle_training_services(self):
19 | """Test a prompt with the Oracle training services template"""
20 |
21 | prompt = self.templates.oracle_training_services
22 | result = self.hsr.prompt_with_template(prompt=prompt, concept="Oracle database administrator")
23 | assert result
24 | assert "Oracle" in result
25 | assert "training" in result
26 |
27 | def test_training_services(self):
28 | """Test a prompt with the training services template"""
29 |
30 | prompt = self.templates.training_services
31 | result = self.hsr.prompt_with_template(prompt=prompt, concept="Microsoft certified Azure AI engineer associate")
32 | assert result
33 | assert "Microsoft" in result or "Azure" in result
34 | assert "training" in result
35 |
--------------------------------------------------------------------------------
/models/yt.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pylint: disable=E0611
3 | """
4 | LangChain Quickstart
5 | ~~~~~~~~~~~~~~~~~~~~
6 | LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners
7 |
8 | see: https://www.youtube.com/watch?v=aywZrzNaKjs
9 | https://github.com/rabbitmetrics/langchain-13-min
10 | """
11 | import logging
12 | import os
13 |
14 | import pinecone
15 | from dotenv import find_dotenv, load_dotenv
16 |
17 | # 5.) sequential chains
18 | # 4.) chains
19 | from langchain.chains.llm import LLMChain
20 | from langchain.chains.sequential import SimpleSequentialChain
21 |
22 | # 3.) prompt templates
23 | from langchain.prompts import PromptTemplate
24 |
25 | # 2.) models and messages
26 | from langchain.schema import HumanMessage, SystemMessage # AIMessage (not used)
27 |
28 | # 6.) embeddings
29 | from langchain.text_splitter import RecursiveCharacterTextSplitter
30 |
31 | # 1.) wrappers
32 | from langchain_community.llms.openai import OpenAI
33 |
34 | # 8.) LangChain agents
35 | from langchain_experimental.agents.agent_toolkits.python.base import create_python_agent
36 | from langchain_experimental.utilities.python import PythonREPL
37 |
38 | # from langchain_community.chat_models import ChatOpenAI
39 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings
40 |
41 | # 7.) pinecode client
42 | from langchain_pinecone import PineconeVectorStore as Pinecone
43 |
44 | from models.conf import settings
45 |
46 |
47 | logging.basicConfig(level=logging.DEBUG if settings.debug_mode else logging.INFO)
48 | logger = logging.getLogger(__name__)
49 |
50 | # Load environment variables from .env file in all folders
51 | # pylint: disable=duplicate-code
52 | dotenv_path = find_dotenv()
53 | if os.path.exists(dotenv_path):
54 | load_dotenv(dotenv_path=dotenv_path, verbose=True)
55 | OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
56 | OPENAI_API_ORGANIZATION = os.environ["OPENAI_API_ORGANIZATION"]
57 | else:
58 | raise FileNotFoundError("No .env file found in root directory of repository")
59 |
60 |
61 | class LangChainDev:
62 | """LangChain Quickstart"""
63 |
64 | PINECONE_INDEX_NAME = "langchain-quickstart"
65 |
66 | multi_prompt_explanation = None
67 | texts_splitter_results = None
68 | pinecone_search = None
69 | openai_embedding = OpenAIEmbeddings(model_name="ada") # minute: 10:05
70 | query_result = None
71 | agent_executor = create_python_agent( # minute: 11:45
72 | llm=OpenAI(temperature=0, max_tokens=1000),
73 | tool=PythonREPL(),
74 | verbose=True,
75 | )
76 | # pylint: disable=no-member
77 | pinecone.init(
78 | api_key=settings.pinecone_api_key.get_secret_value(), environment=settings.pinecone_environment
79 | ) # minute 10:43
80 |
81 | # LLM wrappers. minute 5:46
82 | def test_01_basic(self):
83 | """Test a basic request"""
84 |
85 | llm = OpenAI(model_name="gpt-4")
86 | retval = llm("explain large language models in one sentence")
87 | print(retval)
88 |
89 | # 2.) models and messages. minute 6:08
90 | def test_02_chat_model(self):
91 | """Test a chat model"""
92 | chat = ChatOpenAI(model_name="gpt-4", temperature=0.3)
93 | messages = [
94 | SystemMessage(content="You are an expert data scientist"),
95 | HumanMessage(content="Write a Python script that trains a neural network on simulated data"),
96 | ]
97 | retval = chat(messages)
98 | print(retval.content, end="\n")
99 |
100 | # 3.) prompt templates. minute 6:56
101 | def get_prompt(self):
102 | """Get a prompt"""
103 | template = """
104 | You are an expert data scientist with an expertise in building deep learning models.
105 | Explain the concept of {concept} in a couple of lines.
106 | """
107 | prompt = PromptTemplate(input_variables=["concept"], template=template)
108 | return prompt
109 |
110 | def test_03_prompt_templates(self):
111 | """Test prompt templates"""
112 | llm = OpenAI(model_name="gpt-4")
113 | prompt = self.get_prompt()
114 | retval = llm(prompt.format(concept="regularization"))
115 | print(retval)
116 |
117 | # 4.) chains. minute 7:45
118 | def get_chain(self, llm, prompt):
119 | """Get a chain"""
120 | chain = LLMChain(llm=llm, prompt=prompt)
121 | return chain
122 |
123 | def test_04_chain(self):
124 | """Test a chain"""
125 | llm = OpenAI(model_name="gpt-4")
126 | prompt = self.get_prompt()
127 | chain = self.get_chain(llm=llm, prompt=prompt)
128 | print(chain.run("autoencoder"))
129 |
130 | # 5.) sequential chains. minute 8:06
131 | def get_overall_chain(self, chains):
132 | """Get an overall chain"""
133 | return SimpleSequentialChain(chains=chains, verbose=True)
134 |
135 | def get_prompt_two(self):
136 | """Get a second prompt"""
137 | second_prompt = PromptTemplate(
138 | input_variables=["ml_concept"],
139 | template="""
140 | Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words.
141 | """,
142 | )
143 | return second_prompt
144 |
145 | def get_explanation(self):
146 | """Get an explanation"""
147 | llm = OpenAI(model_name="gpt-4")
148 | prompt = self.get_prompt()
149 | chain_one = self.get_chain(llm=llm, prompt=prompt)
150 |
151 | second_prompt = self.get_prompt_two()
152 | chain_two = self.get_chain(llm=llm, prompt=second_prompt)
153 | overall_chain = self.get_overall_chain(chains=[chain_one, chain_two])
154 | return overall_chain.run("autoencoder")
155 |
156 | def test_05_chains(self):
157 | """Test chains"""
158 | self.multi_prompt_explanation = self.get_explanation()
159 | print(self.multi_prompt_explanation)
160 |
161 | # 6.) embeddings. minute 9:00
162 | def test_06_embeddings(self):
163 | """Test embeddings"""
164 | # minute 9:32
165 | text_splitter = RecursiveCharacterTextSplitter(
166 | chunk_size=100,
167 | chunk_overlap=0,
168 | )
169 | self.multi_prompt_explanation = self.get_explanation()
170 | if not self.texts_splitter_results:
171 | self.texts_splitter_results = text_splitter.create_documents([self.multi_prompt_explanation])
172 | print(self.texts_splitter_results[0].page_content)
173 |
174 | # minute 10:05
175 | def test_06_embeddings_b(self):
176 | """Test embeddings b"""
177 | if not self.query_result:
178 | self.query_result = self.openai_embedding.embed_query( # minute 10:21
179 | self.texts_splitter_results[0].page_content
180 | )
181 | print(self.query_result)
182 |
183 | # 7.) pinecone client. minute 11:00
184 | self.pinecone_search = Pinecone.from_documents(
185 | documents=self.texts_splitter_results,
186 | embedding=self.openai_embedding,
187 | index_name=self.PINECONE_INDEX_NAME,
188 | )
189 |
190 | # pinecone (continued). minute 11:12
191 | def test_07_pinecone_search(self):
192 | """Test pinecone search"""
193 | query = "What is magical about an autoencoder?"
194 | result = self.pinecone_search.similarity_search(query)
195 | print(result)
196 |
197 | # 8.) LangChain agents. minute 11:45
198 | # (unrelated.)
199 | def test_08_agent_executor(self):
200 | """Test agent executor"""
201 | retval = self.agent_executor.run("Find the roots (zeros) of the quadratic function 3 * x**2 + 2*x -1")
202 | print(retval)
203 |
204 | def main(self):
205 | """Main function"""
206 | # self.test_06_embeddings()
207 | # self.test_06_embeddings_b()
208 | # self.test_07_pinecone_search()
209 | # self.test_08_agent_executor
210 | self.test_03_prompt_templates()
211 |
212 |
213 | def main():
214 | """Main function"""
215 | pintcode_tests = LangChainDev()
216 | pintcode_tests.main()
217 |
218 |
219 | if __name__ == "__main__":
220 | main()
221 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "private": true,
3 | "scripts": {
4 | "test": "echo \"Error: no test specified\" && exit 1",
5 | "prettier": "prettier --write \"**/*.{js,jsx,ts,tsx,json,css,scss,md}\""
6 | },
7 | "devDependencies": {
8 | "@semantic-release/changelog": "^6.0.3",
9 | "@semantic-release/commit-analyzer": "^13.0.0",
10 | "@semantic-release/git": "^10.0.1",
11 | "@semantic-release/github": "^11.0.0",
12 | "@semantic-release/release-notes-generator": "^14.0.0",
13 | "prettier": "^3.1.1",
14 | "typescript": "^5.2.2"
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 |
4 | [tool.isort]
5 | profile = "black"
6 | lines_after_imports = 2
7 |
8 | [tool.black]
9 | line-length = 120
10 | target-version = ['py311']
11 | include = '\.pyi?$'
12 | exclude = '''
13 | /(
14 | \.git
15 | | \.hg
16 | | \.mypy_cache
17 | | \.tox
18 | | \.venv
19 | | venv
20 | | node_modules
21 | | build
22 | | buck-out
23 | | build
24 | | dist
25 | )/
26 | '''
27 |
28 | [tool.codespell]
29 | skip = '*.svg,models/prompt_templates.py'
30 | ignore-words = 'codespell.txt'
31 |
--------------------------------------------------------------------------------
/release.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | dryRun: false,
3 | plugins: [
4 | "@semantic-release/commit-analyzer",
5 | "@semantic-release/release-notes-generator",
6 | [
7 | "@semantic-release/changelog",
8 | {
9 | changelogFile: "CHANGELOG.md",
10 | },
11 | ],
12 | "@semantic-release/github",
13 | [
14 | "@semantic-release/git",
15 | {
16 | assets: ["CHANGELOG.md", "requirements/base.txt"],
17 | message:
18 | "chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}",
19 | },
20 | ],
21 | ],
22 | };
23 |
--------------------------------------------------------------------------------
/requirements/base.txt:
--------------------------------------------------------------------------------
1 |
2 | python-decouple==3.8
3 | langchainhub==0.1.21
4 | langchain-openai==0.3.18
5 | langchain-experimental
6 | openai>=1.40.0
7 | langchain
8 | langchain-pinecone
9 | langchain-experimental
10 | pinecone-client==5.0.1
11 | pinecone-text==0.10.0
12 | pydantic==2.10.4
13 | pydantic-settings==2.9.1
14 | python-dotenv==1.1.0
15 | pypdf==5.6.0
16 | tiktoken==0.9.0
17 |
--------------------------------------------------------------------------------
/requirements/local.txt:
--------------------------------------------------------------------------------
1 |
2 | -r base.txt
3 |
4 | # dev and test
5 | # ------------
6 | pytest==8.3.4
7 | pytest_mock==3.14.0
8 |
9 | # Code linters, formatters, and security scanners
10 | # ------------
11 | black==25.1.0
12 | flake8==7.2.0
13 | flake8-coding==1.3.2
14 | pre-commit==4.0.1
15 | isort==6.0.1
16 | mypy==1.16.0
17 | pylint==3.3.7
18 | bandit==1.8.3
19 | pydocstringformatter==0.7.3
20 | tox==4.25.0
21 | codespell==2.4.1
22 |
--------------------------------------------------------------------------------
/run_pylint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Called from pre-commit. Run pylint on all python files in the current directory
3 | python -m pylint "$@"
4 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Future use: setup for openai_embeddings package. I use this for instructional purposes,
4 | for demonstrating best practices on how to create a Python package.
5 |
6 | This package is not actually published to PyPi.
7 | """
8 | import io
9 | import os
10 | from typing import List
11 |
12 | from setuptools import find_packages, setup
13 |
14 | from setup_utils import get_semantic_version # pylint: disable=import-error
15 |
16 |
17 | HERE = os.path.abspath(os.path.dirname(__file__))
18 |
19 |
20 | def is_requirement(line: str) -> bool:
21 | """
22 | True if line is a valid requirement line from a
23 | Python requirements file.
24 | """
25 | return not (line.strip() == "" or line.startswith("#"))
26 |
27 |
28 | def load_requirements(filename: str) -> List[str]:
29 | """
30 | Returns Python package requirements as a list of semantically
31 | versioned pip packages.
32 |
33 | Args:
34 | filename: The name of the requirements file to load. example: "base.txt"
35 |
36 | Returns:
37 | A list of package requirements.
38 | ['pytest==8.3.4', 'pytest_mock==3.14.0', 'black==25.1.0', ... more packages ]
39 | """
40 | with io.open(os.path.join(HERE, "requirements", filename), "rt", encoding="utf-8") as f:
41 | return [line.strip() for line in f if is_requirement(line) and not line.startswith("-r")]
42 |
43 |
44 | setup(
45 | name="openai_embeddings",
46 | version=get_semantic_version(),
47 | description="""A Hybrid Search and Augmented Generation prompting solution using
48 | Python [OpenAI](https://openai.com/) embeddings sourced from
49 | [Pinecone](https://docs.pinecone.io/docs/python-client) vector database indexes and
50 | managed by [LangChain](https://www.langchain.com/).""",
51 | author="Lawrence McDaniel",
52 | author_email="lpm0073@gmail.com",
53 | url="https://lawrencemcdaniel.com/",
54 | packages=find_packages(),
55 | package_data={
56 | "openai_embeddings": ["*.md"],
57 | },
58 | install_requires=load_requirements("base.txt"),
59 | extras_require={
60 | "dev": load_requirements("local.txt"),
61 | },
62 | )
63 |
--------------------------------------------------------------------------------
/setup_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Test setup.py."""
3 | import subprocess
4 | import unittest
5 |
6 |
7 | class TestSetup(unittest.TestCase):
8 | """Test setup.py."""
9 |
10 | def test_setup_syntax(self):
11 | """Test setup.py syntax."""
12 | result = subprocess.run(["python", "setup.py", "check"], capture_output=True, text=True, check=False)
13 | assert result.returncode == 0, f"setup.py failed with output:\n{result.stdout}\n{result.stderr}"
14 | assert not result.stderr, "Expected no error output"
15 |
16 |
17 | if __name__ == "__main__":
18 | unittest.main()
19 |
--------------------------------------------------------------------------------
/setup_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pylint: disable=duplicate-code
3 | """Lawrence McDaniel https://lawrencemcdaniel.com."""
4 | import importlib.util
5 | import os
6 | import re
7 | from typing import Dict
8 |
9 |
10 | MODULE_NAME = "models"
11 | HERE = os.path.abspath(os.path.dirname(__file__))
12 | PROJECT_ROOT = os.path.abspath(os.path.join(HERE, MODULE_NAME))
13 |
14 | # allow setup.py to be run from any path
15 | os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir)))
16 |
17 |
18 | def load_version() -> Dict[str, str]:
19 | """Stringify the __version__ module."""
20 | version_file_path = os.path.join(PROJECT_ROOT, "__version__.py")
21 | spec = importlib.util.spec_from_file_location("__version__", version_file_path)
22 | version_module = importlib.util.module_from_spec(spec)
23 | spec.loader.exec_module(version_module)
24 | return version_module.__dict__
25 |
26 |
27 | VERSION = load_version()
28 |
29 |
30 | def get_semantic_version() -> str:
31 | """
32 | Return the semantic version number.
33 |
34 | Example valid values of __version__.py are:
35 | 0.1.17
36 | 0.1.17-next.1
37 | 0.1.17-next.2
38 | 0.1.17-next.123456
39 | 0.1.17-next-major.1
40 | 0.1.17-next-major.2
41 | 0.1.17-next-major.123456
42 |
43 | Note:
44 | - pypi does not allow semantic version numbers to contain a dash.
45 | - pypi does not allow semantic version numbers to contain a 'v' prefix.
46 | - pypi does not allow semantic version numbers to contain a 'next' suffix.
47 | """
48 | version = VERSION["__version__"]
49 | version = re.sub(r"-next\.\d+", "", version)
50 | return re.sub(r"-next-major\.\d+", "", version)
51 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | # setup a basic tox environment for flake8 with the default python3.11
2 | # environment
3 | [tox]
4 | envlist = py3.11,flake8
5 | skip_missing_interpreters = true
6 |
7 | [tool.isort]
8 | profile = "black"
9 | skip =venv,node_modules
10 |
11 | [gh-actions]
12 | python =
13 | 3.8: gitlint,py38,flake8
14 | 3.9: gitlint,py39,flake8
15 | 3.10: gitlint,py310,flake8
16 | 3.11: gitlint,py311,flake8,mypy,black,pylint
17 | 3.12: gitlint,py311,flake8,mypy,black,pylint
18 |
19 | [testenv]
20 | deps = -rrequirements.txt
21 | commands = pytest
22 |
23 | [testenv:flake8]
24 | skip_install = True
25 | deps = flake8
26 | commands = flake8
27 |
28 | [testenv:gitlint]
29 | skip_install = True
30 | deps = gitlint
31 | commands = gitlint {posargs}
32 |
33 | [testenv:bumpversion]
34 | skip_install = True
35 | passenv =
36 | # Git can only find its global configuration if it knows where the
37 | # user's HOME is.
38 | HOME
39 | # We set sign_tags in .bumpversion.cfg, so pass in the GnuPG agent
40 | # reference to avoid having to retype the passphrase for an
41 | # already-cached private key.
42 | GPG_AGENT_INFO
43 | deps = bump2version
44 | commands = bump2version {posargs}
45 |
46 | [testenv:pylint]
47 | deps = pylint
48 | commands =
49 | pylint .
50 |
--------------------------------------------------------------------------------