├── .devcontainer ├── Dockerfile ├── README.md ├── devcontainer.json └── post_create_command.sh ├── FAQ.md ├── LICENSE ├── README.md ├── backend ├── .env.development ├── .env.docker ├── .flake8 ├── .gitignore ├── .pylintrc ├── .python-version ├── .vscode │ └── settings.json ├── Dockerfile ├── Makefile ├── README.md ├── alembic.ini ├── alembic │ ├── README │ ├── env.py │ ├── script.py.mako │ └── versions │ │ ├── 1b0b616e08c6_replace_value_within_.py │ │ ├── 477cee72edc4_init_tables.py │ │ ├── 663b3fea3024_update_sub_process_columns.py │ │ ├── 873c0c4616ea_add_foreign_key_indices.py │ │ ├── 90a1d6a26343_create_doc_tables.py │ │ └── c008bb4f3f48_update_sub_process_columns.py ├── app │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── api.py │ │ ├── crud.py │ │ ├── deps.py │ │ └── endpoints │ │ │ ├── __init__.py │ │ │ ├── conversation.py │ │ │ ├── documents.py │ │ │ └── health.py │ ├── chat │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── engine.py │ │ ├── messaging.py │ │ ├── pg_vector.py │ │ ├── qa_response_synth.py │ │ ├── tools.py │ │ └── utils.py │ ├── core │ │ ├── __init__.py │ │ └── config.py │ ├── db │ │ ├── base.py │ │ ├── session.py │ │ └── wait_for_db.py │ ├── llama_index_settings.py │ ├── loader_io.py │ ├── main.py │ ├── models │ │ ├── __init__.py │ │ ├── base.py │ │ └── db.py │ └── schema.py ├── docker-compose.yml ├── localstack-cors-config.json ├── poetry.lock ├── pyproject.toml ├── scripts │ ├── build_vector_tables.py │ ├── chat_llama.py │ ├── dedupe_vector_store.py │ ├── download_sec_pdf.py │ ├── file_utils.py │ ├── seed_db.py │ ├── seed_storage_context.py │ ├── stock_utils.py │ ├── upsert_db_sec_documents.py │ └── upsert_document.py ├── tests │ └── app │ │ └── chat │ │ └── test_engine.py └── troubleshooting.md ├── frontend ├── .devcontainer │ └── devcontainer.json ├── .env.example ├── .eslintrc.cjs ├── .gitignore ├── README.md ├── next.config.mjs ├── package-lock.json ├── package.json ├── postcss.config.cjs ├── prettier.config.cjs ├── public │ ├── Gradient.png │ ├── chat-1.svg │ ├── chat-2.svg │ ├── chat-3.svg │ ├── chat-4.svg │ ├── chat-5.svg │ ├── citations.svg │ ├── doc-selector-no-highlight.svg │ ├── docs-with-highlight.svg │ ├── favicon.ico │ ├── full-chat.png │ ├── full-chat.svg │ ├── highlight-1.svg │ ├── highlight-2.svg │ ├── highlight-3.svg │ ├── highlight-4.svg │ ├── logo-black.svg │ ├── logo-white.svg │ ├── lyft-2021-10k.pdf │ ├── multi-doc-1.svg │ ├── multi-doc-2.svg │ ├── multi-doc-3.svg │ ├── multi-doc-4.svg │ └── uber-2021-10k.pdf ├── sentry.client.config.ts ├── sentry.edge.config.ts ├── sentry.server.config.ts ├── src │ ├── api │ │ ├── backend.tsx │ │ └── utils │ │ │ └── documents.tsx │ ├── components │ │ ├── Layout.tsx │ │ ├── basics │ │ │ ├── Loading.tsx │ │ │ ├── Modal.tsx │ │ │ └── ModalPortal.tsx │ │ ├── conversations │ │ │ └── RenderConversations.tsx │ │ ├── landing-page │ │ │ ├── AnimateSvg.tsx │ │ │ ├── MarketingSection.tsx │ │ │ ├── SelectTicker.tsx │ │ │ └── TitleAndDropdown.tsx │ │ ├── modals │ │ │ └── ShareLinkModal.tsx │ │ └── pdf-viewer │ │ │ ├── DisplayMultiplePdfs.tsx │ │ │ ├── PdfOptionsBar.tsx │ │ │ ├── ViewPdf.tsx │ │ │ ├── VirtualizedPdf.tsx │ │ │ └── pdfDisplayConstants.tsx │ ├── config.js │ ├── constants.tsx │ ├── context │ │ └── pdf.tsx │ ├── env.mjs │ ├── hooks │ │ ├── useDocumentSelector.tsx │ │ ├── useMessages.tsx │ │ ├── useMultiplePdfs.tsx │ │ ├── usePdfViewer.tsx │ │ └── utils │ │ │ ├── useFocus.tsx │ │ │ ├── useIsMobile.tsx │ │ │ ├── useLocalStorage.ts │ │ │ ├── useModal.tsx │ │ │ └── useScrollBreakpoint.tsx │ ├── modules │ │ └── react-pdf.d.ts │ ├── pages │ │ ├── _app.tsx │ │ ├── _document.tsx │ │ ├── _error.tsx │ │ ├── conversation │ │ │ └── [id].tsx │ │ └── index.tsx │ ├── styles │ │ ├── globals.css │ │ └── react-select.tsx │ ├── svgs │ │ ├── llama.tsx │ │ └── right-arrow.tsx │ ├── types │ │ ├── backend │ │ │ └── document.tsx │ │ ├── conversation.tsx │ │ ├── document.tsx │ │ └── selection.tsx │ └── utils │ │ ├── colors.tsx │ │ ├── documents.tsx │ │ ├── landing-page-selection.tsx │ │ ├── multi-line-highlight.tsx │ │ └── timezone.tsx ├── tailwind.config.ts └── tsconfig.json └── render.yaml /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # https://hub.docker.com/_/python 2 | FROM python:3.11.3-slim-bullseye 3 | 4 | ENV PYTHONUNBUFFERED True 5 | # Install other backend deps 6 | RUN apt-get update 7 | RUN apt-get install libpq-dev gcc build-essential wkhtmltopdf s3fs -y 8 | RUN pip install poetry==1.6.1 9 | # Install frontend node modules 10 | ENV APP_HOME /app 11 | COPY . $APP_HOME 12 | 13 | CMD ["/bin/bash"] 14 | -------------------------------------------------------------------------------- /.devcontainer/README.md: -------------------------------------------------------------------------------- 1 | # SEC Insights Dev Container 2 | 3 | This dev container configuration sets up a development environment that is specifically configured for this project. 4 | 5 | This is useful in getting the project setup faster by having many of the system dependencies already pre-installed. 6 | 7 | ## How do I use this? 8 | 9 | You can either click this button to open the dev container on a Github Codespace: 10 | 11 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/run-llama/sec-insights) 12 | 13 | Or you can spin up the dev container locally using [VS Code's dev container feature](https://code.visualstudio.com/docs/devcontainers/create-dev-container#_create-a-devcontainerjson-file). 14 | 15 | ## What are the benefits of using this? 16 | * System level dependencies are pre-installed 17 | * Project-specific python version 18 | * Other dependencies like `wkhtmltopdf` & `s3fs` are pre-installed 19 | * Uses the same base Docker image as what's used for the production service 20 | * So higher fidelity between your dev environment and prod environment. 21 | 22 | ## Are there any downsides to using this? 23 | One downside is that when you're using the dev container via Github Codespaces, that service isn't entirely free. There's a free tier limit after which Github Codespace usage is paid. 24 | Also, if you're running the dev container locally via the VS Code dev container feature, you may find that Docker can take up quite a bit of storage space on your machine. Make sure you have the necessary storage space. 25 | 26 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sec_insights", 3 | "build": { 4 | "dockerfile": "./Dockerfile", 5 | "context": ".." 6 | }, 7 | "features": { 8 | "ghcr.io/devcontainers-contrib/features/pipx-package:1": {}, 9 | "ghcr.io/devcontainers-contrib/features/poetry:2": {}, 10 | "ghcr.io/warrenbuckley/codespace-features/sqlite:1": {}, 11 | "ghcr.io/devcontainers/features/docker-in-docker:2": {}, 12 | "ghcr.io/devcontainers/features/aws-cli:1": {}, 13 | "ghcr.io/devcontainers/features/node:1": {} 14 | }, 15 | "postCreateCommand": "bash .devcontainer/post_create_command.sh" 16 | } -------------------------------------------------------------------------------- /.devcontainer/post_create_command.sh: -------------------------------------------------------------------------------- 1 | cd backend/ 2 | 3 | # install poetry dependencies 4 | poetry install 5 | 6 | cp .env.development .env 7 | set -a 8 | source .env 9 | make migrate 10 | -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- 1 | # Frequently Asked Questions 🔍 2 | 3 | Here we will go over a list of commonly asked questions and/or concerns regarding this project. You may run into some of these questions yourself when reviewing the project! 4 | 5 | ## How do I add more SEC documents beyond the selected pool of SEC filings? 6 | You can do this by using our [seed script](https://github.com/run-llama/sec-insights/tree/main/backend#seed-db-script-)! 7 | 8 | You can run the seed script with the `--ciks` CLI arg *(e.g. `python scripts/seed_db.py --ciks '["1640147"]'`)*. The `ciks` arg allows you to define which companies you want to download SEC filings for. You can search for the CIK value for a given company using the SECs search tool on [this website](https://www.sec.gov/edgar/searchedgar/companysearch). 9 | 10 | Alternatively, you may also just add the CIKs you want to include in your project by modifying the `DEFAULT_CIKS` list [here](https://github.com/run-llama/sec-insights/blob/main/backend/scripts/download_sec_pdf.py#L12). 11 | 12 | Just make sure you follow the setup instructions as a pre-requisite to running the seed script :) 13 | 14 | ## How do I use different types of documents besides SEC filings? e.g. Research papers, internal documents, etc. 15 | This can be done! 16 | 17 | While our frontend is fairly specific to the SEC filing use-case, our backend is setup to be very flexible in terms of the types of documents you can ingest and start asking questions about. 18 | 19 | An in-depth walkthrough on doing this can be found in [our YouTube tutorial](https://youtu.be/2O52Tfj79T4?si=kiRxB2dLES0Gaad7&t=1311). 20 | 21 | Here are some high level steps: 22 | 1. Insert the PDF document into your database by using the script in `scripts/upsert_document.py` 23 | * The script will print out the newly inserted document's UUID. Make sure to copy this to your clipboard for later! 24 | 1. Start the backend service locally using `make run` 25 | 1. Start the shell-based Chat REPL using `make chat` 26 | 1. Within the REPL: 27 | 1. First, run `pick_docs` 28 | 1. Then run `select_id ` e.g. `select_id 421b8099-6155-2f6e-8c5b-674ee0ab0e7d` 29 | 1. Type `finish` to wrap up document selection 30 | 1. Create your conversation by typing `create` 31 | 1. Send a message within the newly created conversation with `message ` e.g. `message What is the document about?` 32 | * The first time that there is a message for a newly inserted document, the backend will need to go through the embedding + indexing process for that document which can take some time. 33 | 1. Start chatting away! The platform should now be ready for questions regarding this document within this Chat REPL. 34 | 35 | You will also find that some of the prompts used in the application are specific to the SEC Insights use case. These will need to be changed to fit your particular use case. Here's an initial list of places in the codebase that may need to be changed to tune the prompts to your use case: 36 | * [Custom Response Synth prompt](https://github.com/run-llama/sec-insights/blob/e81c839/backend/app/chat/qa_response_synth.py#L15-L48) 37 | * [Vector Index tool descriptions](https://github.com/run-llama/sec-insights/blob/e81c83958a428e2aa02e8cb1280c3a17c55c4aa9/backend/app/chat/engine.py#L295-L296) 38 | * System Message ([template](https://github.com/run-llama/sec-insights/blob/e81c83958a428e2aa02e8cb1280c3a17c55c4aa9/backend/app/chat/constants.py#L3-L17) and [construction](https://github.com/run-llama/sec-insights/blob/e81c83958a428e2aa02e8cb1280c3a17c55c4aa9/backend/app/chat/engine.py#L336)) 39 | * [User Message Prefix](https://github.com/run-llama/sec-insights/blob/e81c83958a428e2aa02e8cb1280c3a17c55c4aa9/backend/app/chat/messaging.py#L143-L145) 40 | 41 | ## How do I completely refresh my database? 42 | During development, you may find it useful or necessary to completely wipe out your database and start fresh with empty tables. 43 | 44 | To make this process simple, we have included a `make refresh_db` command in `backend/Makefile`. To use it, just do the following: 45 | - `cd` into the `backend/` folder if you're not already in it 46 | - Run `set -a` then `source .env` 47 | - See instructions in `README.md` for more information on what this step does 48 | - Run `make refresh_db` 49 | - This will ask for confirmation first and run as soon as you type either `y` or `N`. 50 | 51 | **What is this script doing?** 52 | 53 | When you run the database in the `db` container using `docker compose` and the various `make` commands, the container shares a data volume with your local machine. This ensures that the data in this local database is persisted even as the `db` container is started and stopped. As such, to completely refresh this database, you would first need to stop your DB container, delete these volumes, re-create the DB container, and re-apply the alembic migrations. That's what `make refresh_db` does. 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 LlamaIndex 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /backend/.env.development: -------------------------------------------------------------------------------- 1 | DATABASE_URL=postgresql://user:password@127.0.0.1:5432/llama_app_db 2 | BACKEND_CORS_ORIGINS='["http://localhost", "http://localhost:8000", "http://localhost:3000", "http://127.0.0.1:3000", "https://llama-app-backend.onrender.com", "https://llama-app-frontend.vercel.app", "http://secinsights.ai", "http://www.secinsights.ai", "https://secinsights.ai", "https://www.secinsights.ai"]' 3 | OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXXXXXX 4 | LOG_LEVEL=debug 5 | RENDER=False 6 | S3_BUCKET_NAME=llama-app-backend-local 7 | S3_ASSET_BUCKET_NAME=llama-app-web-assets-local 8 | CDN_BASE_URL=http://llama-app-web-assets-local.s3-website.localhost.localstack.cloud:4566 9 | AWS_KEY=xxx 10 | AWS_SECRET=xxx 11 | POLYGON_IO_API_KEY=xxx 12 | SEC_EDGAR_COMPANY_NAME=YourOrgName 13 | SEC_EDGAR_EMAIL=you@example.com 14 | -------------------------------------------------------------------------------- /backend/.env.docker: -------------------------------------------------------------------------------- 1 | DATABASE_URL=postgresql://user:password@db:5432/llama_app_db 2 | -------------------------------------------------------------------------------- /backend/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | exclude = .git,__pycache__,__init__.py,.mypy_cache,.pytest_cache 4 | -------------------------------------------------------------------------------- /backend/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | storage/ 163 | # created by localstack 164 | volume/ 165 | -------------------------------------------------------------------------------- /backend/.python-version: -------------------------------------------------------------------------------- 1 | 3.11.3 2 | -------------------------------------------------------------------------------- /backend/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.formatting.provider": "black", 3 | "editor.formatOnSave": true, 4 | "editor.codeActionsOnSave": { 5 | "source.organizeImports": true, 6 | "source.fixAll": true 7 | }, 8 | } -------------------------------------------------------------------------------- /backend/Dockerfile: -------------------------------------------------------------------------------- 1 | # https://hub.docker.com/_/python 2 | FROM python:3.11.3-slim-bullseye 3 | 4 | ENV PYTHONUNBUFFERED True 5 | ENV APP_HOME /app 6 | WORKDIR $APP_HOME 7 | RUN pip install poetry 8 | COPY . ./ 9 | RUN apt-get update 10 | RUN apt-get install libpq-dev gcc build-essential wkhtmltopdf -y 11 | RUN poetry install 12 | 13 | ARG DATABASE_URL 14 | ENV DATABASE_URL=$DATABASE_URL 15 | 16 | ARG OPENAI_API_KEY 17 | ENV OPENAI_API_KEY=$OPENAI_API_KEY 18 | 19 | CMD ["poetry", "run", "start"] 20 | -------------------------------------------------------------------------------- /backend/Makefile: -------------------------------------------------------------------------------- 1 | run: 2 | echo "Running in local mode." 3 | docker compose create db localstack phoenix 4 | docker compose start db localstack phoenix 5 | poetry run start 6 | 7 | run_docker: 8 | echo "Running in local mode with docker." 9 | docker compose up 10 | 11 | migrate: 12 | echo "Running migrations." 13 | docker compose create db 14 | docker compose start db 15 | poetry run python -m alembic upgrade head 16 | # workaround for having PGVector create its tables 17 | poetry run python -m scripts.build_vector_tables 18 | 19 | refresh_db: 20 | # First ask for confirmation. 21 | @echo -n "Are you sure you want to refresh the local database? This will delete all data in your local db. [Y/n] "; \ 22 | read ans; \ 23 | if [ $${ans:-'N'} = 'Y' ]; then make confirmed_refresh_db; else echo "Aborting."; fi 24 | 25 | confirmed_refresh_db: 26 | echo "Refreshing database." 27 | docker compose down db 28 | docker volume rm backend_postgres_data 29 | make migrate 30 | 31 | test: 32 | poetry run python -m pytest tests/ 33 | 34 | chat: 35 | poetry run python -m scripts.chat_llama 36 | 37 | setup_localstack: 38 | docker compose create localstack 39 | docker compose start localstack 40 | echo "Waiting for localstack to start..." 41 | # Ping http://localhost:4566/health until we get a 200 response 42 | until $$(curl --output /dev/null --silent --head --fail http://localhost:4566/_localstack/health); do \ 43 | printf '.'; \ 44 | sleep 0.5; \ 45 | done 46 | # Check that S3_ASSET_BUCKET_NAME is set 47 | if [ -z ${S3_ASSET_BUCKET_NAME} ]; then \ 48 | echo "S3_ASSET_BUCKET_NAME is not set. Please set it and try again."; \ 49 | exit 1; \ 50 | fi 51 | awslocal s3 mb s3://${S3_ASSET_BUCKET_NAME} 52 | echo "LocalStack S3 bucket website is alive" > /tmp/index.html 53 | awslocal s3 cp /tmp/index.html s3://${S3_ASSET_BUCKET_NAME}/index.html 54 | rm /tmp/index.html 55 | awslocal s3 website s3://${S3_ASSET_BUCKET_NAME}/ --index-document index.html 56 | awslocal s3api put-bucket-cors --bucket ${S3_ASSET_BUCKET_NAME} --cors-configuration file://./localstack-cors-config.json 57 | echo "LocalStack S3 bucket website is ready. Open http://${S3_ASSET_BUCKET_NAME}.s3-website.localhost.localstack.cloud:4566 in your browser to verify." 58 | 59 | seed_db_based_on_env: 60 | # Call either seed_db or seed_db_preview, seed_db_local based on the environment 61 | # This is used by the CI/CD pipeline 62 | ENVIRONMENT=$$(poetry run python -c "from app.core.config import settings;print(settings.ENVIRONMENT.value)"); \ 63 | echo "Environment: $$ENVIRONMENT"; \ 64 | if [ "$$ENVIRONMENT" = "preview" ]; then \ 65 | make seed_db_preview; \ 66 | elif [ "$$ENVIRONMENT" = "production" ]; then \ 67 | make seed_db; \ 68 | else \ 69 | make seed_db_local; \ 70 | fi 71 | 72 | seed_db: 73 | echo "Seeding database." 74 | poetry run python scripts/seed_db.py 75 | 76 | seed_db_preview: 77 | echo "Seeding database for Preview." 78 | # only need to populate with two companies for Preview 79 | poetry run python scripts/seed_db.py --ciks '["0001018724", "1326801"]' 80 | 81 | seed_db_local: 82 | echo "Seeding database for local." 83 | docker compose create db 84 | docker compose start db 85 | make setup_localstack 86 | python scripts/seed_db.py --ciks '["0001018724", "1326801"]' --filing_types '["10-K"]' 87 | -------------------------------------------------------------------------------- /backend/alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = alembic 6 | 7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s 8 | # Uncomment the line below if you want the files to be prepended with date and time 9 | # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s 10 | 11 | # sys.path path, will be prepended to sys.path if present. 12 | # defaults to the current working directory. 13 | prepend_sys_path = . 14 | 15 | # timezone to use when rendering the date within the migration file 16 | # as well as the filename. 17 | # If specified, requires the python-dateutil library that can be 18 | # installed by adding `alembic[tz]` to the pip requirements 19 | # string value is passed to dateutil.tz.gettz() 20 | # leave blank for localtime 21 | # timezone = 22 | 23 | # max length of characters to apply to the 24 | # "slug" field 25 | # truncate_slug_length = 40 26 | 27 | # set to 'true' to run the environment during 28 | # the 'revision' command, regardless of autogenerate 29 | # revision_environment = false 30 | 31 | # set to 'true' to allow .pyc and .pyo files without 32 | # a source .py file to be detected as revisions in the 33 | # versions/ directory 34 | # sourceless = false 35 | 36 | # version location specification; This defaults 37 | # to alembic/versions. When using multiple version 38 | # directories, initial revisions must be specified with --version-path. 39 | # The path separator used here should be the separator specified by "version_path_separator" below. 40 | # version_locations = %(here)s/bar:%(here)s/bat:alembic/versions 41 | 42 | # version path separator; As mentioned above, this is the character used to split 43 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. 44 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. 45 | # Valid values for version_path_separator are: 46 | # 47 | # version_path_separator = : 48 | # version_path_separator = ; 49 | # version_path_separator = space 50 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects. 51 | 52 | # set to 'true' to search source files recursively 53 | # in each "version_locations" directory 54 | # new in Alembic version 1.10 55 | # recursive_version_locations = false 56 | 57 | # the output encoding used when revision files 58 | # are written from script.py.mako 59 | # output_encoding = utf-8 60 | 61 | sqlalchemy.url = postgresql://user:password@127.0.0.1:5432/llama_app_db 62 | 63 | 64 | [post_write_hooks] 65 | # post_write_hooks defines scripts or Python functions that are run 66 | # on newly generated revision scripts. See the documentation for further 67 | # detail and examples 68 | 69 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 70 | # hooks = black 71 | # black.type = console_scripts 72 | # black.entrypoint = black 73 | # black.options = -l 79 REVISION_SCRIPT_FILENAME 74 | 75 | # Logging configuration 76 | [loggers] 77 | keys = root,sqlalchemy,alembic 78 | 79 | [handlers] 80 | keys = console 81 | 82 | [formatters] 83 | keys = generic 84 | 85 | [logger_root] 86 | level = WARN 87 | handlers = console 88 | qualname = 89 | 90 | [logger_sqlalchemy] 91 | level = WARN 92 | handlers = 93 | qualname = sqlalchemy.engine 94 | 95 | [logger_alembic] 96 | level = INFO 97 | handlers = 98 | qualname = alembic 99 | 100 | [handler_console] 101 | class = StreamHandler 102 | args = (sys.stderr,) 103 | level = NOTSET 104 | formatter = generic 105 | 106 | [formatter_generic] 107 | format = %(levelname)-5.5s [%(name)s] %(message)s 108 | datefmt = %H:%M:%S 109 | -------------------------------------------------------------------------------- /backend/alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration with an async dbapi. -------------------------------------------------------------------------------- /backend/alembic/env.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from logging.config import fileConfig 3 | 4 | from sqlalchemy import pool 5 | from sqlalchemy.engine import Connection 6 | from sqlalchemy.ext.asyncio import async_engine_from_config 7 | 8 | from alembic import context 9 | from app.core.config import settings 10 | 11 | # this is the Alembic Config object, which provides 12 | # access to the values within the .ini file in use. 13 | config = context.config 14 | 15 | # Interpret the config file for Python logging. 16 | # This line sets up loggers basically. 17 | if config.config_file_name is not None: 18 | fileConfig(config.config_file_name) 19 | 20 | # add your model's MetaData object here 21 | # for 'autogenerate' support 22 | from app.db.base import Base # noqa: E402 23 | 24 | target_metadata = Base.metadata 25 | 26 | # other values from the config, defined by the needs of env.py, 27 | # can be acquired: 28 | # my_important_option = config.get_main_option("my_important_option") 29 | # ... etc. 30 | db_url = config.get_main_option("sqlalchemy.url") 31 | if settings.DATABASE_URL.strip(): 32 | db_url = settings.DATABASE_URL.strip() 33 | print(f"Using DATABASE_URL {db_url} from environment for migrations") 34 | config.set_main_option("sqlalchemy.url", db_url) 35 | 36 | 37 | def run_migrations_offline() -> None: 38 | """Run migrations in 'offline' mode. 39 | 40 | This configures the context with just a URL 41 | and not an Engine, though an Engine is acceptable 42 | here as well. By skipping the Engine creation 43 | we don't even need a DBAPI to be available. 44 | 45 | Calls to context.execute() here emit the given string to the 46 | script output. 47 | 48 | """ 49 | context.configure( 50 | url=config.get_main_option("sqlalchemy.url"), 51 | target_metadata=target_metadata, 52 | literal_binds=True, 53 | dialect_opts={"paramstyle": "named"}, 54 | transaction_per_migration=True, 55 | ) 56 | 57 | with context.begin_transaction(): 58 | context.run_migrations() 59 | 60 | 61 | def do_run_migrations(connection: Connection) -> None: 62 | context.configure( 63 | connection=connection, 64 | target_metadata=target_metadata, 65 | transaction_per_migration=True, 66 | ) 67 | 68 | with context.begin_transaction(): 69 | context.run_migrations() 70 | 71 | 72 | async def run_async_migrations() -> None: 73 | """In this scenario we need to create an Engine 74 | and associate a connection with the context. 75 | 76 | """ 77 | connectable = async_engine_from_config( 78 | config.get_section(config.config_ini_section, {}), 79 | prefix="sqlalchemy.", 80 | poolclass=pool.NullPool, 81 | ) 82 | 83 | async with connectable.connect() as connection: 84 | await connection.run_sync(do_run_migrations) 85 | 86 | await connectable.dispose() 87 | 88 | 89 | def run_migrations_online() -> None: 90 | """Run migrations in 'online' mode.""" 91 | 92 | asyncio.run(run_async_migrations()) 93 | 94 | 95 | if context.is_offline_mode(): 96 | run_migrations_offline() 97 | else: 98 | run_migrations_online() 99 | -------------------------------------------------------------------------------- /backend/alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade() -> None: 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade() -> None: 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /backend/alembic/versions/1b0b616e08c6_replace_value_within_.py: -------------------------------------------------------------------------------- 1 | """replace value within MessageSubProcessSourceEnum 2 | 3 | Revision ID: 1b0b616e08c6 4 | Revises: 90a1d6a26343 5 | Create Date: 2023-07-28 19:39:03.256581 6 | 7 | """ 8 | from typing import Set 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.dialects import postgresql 12 | 13 | 14 | # revision identifiers, used by Alembic. 15 | revision = "1b0b616e08c6" 16 | down_revision = "90a1d6a26343" 17 | branch_labels = None 18 | depends_on = None 19 | 20 | 21 | existing_sub_process_source_enum_values = { 22 | "CHUNKING", 23 | "NODE_PARSING", 24 | "EMBEDDING", 25 | "LLM", 26 | "QUERY", 27 | "RETRIEVE", 28 | "SYNTHESIZE", 29 | "TREE", 30 | "CONSTRUCTED_QUERY_ENGINE", 31 | "SUB_QUESTIONS", 32 | } 33 | 34 | new_sub_process_source_enum_values = { 35 | *existing_sub_process_source_enum_values, 36 | "SUB_QUESTION", 37 | } 38 | 39 | 40 | def replace_enum_values(enum_name: str, table: str, new_values: Set[str]): 41 | """ 42 | Create a new type, add the value to it, update the column to use the new type and delete the old type 43 | """ 44 | op.execute(f'ALTER TYPE public."{enum_name}" RENAME TO "{enum_name}Old"') 45 | sa.Enum(*new_values, name=enum_name).create(op.get_bind()) 46 | op.execute( 47 | f'ALTER TABLE {table} ALTER COLUMN source TYPE public."{enum_name}" USING source::text::public."{enum_name}"' 48 | ) 49 | op.execute(f'DROP TYPE public."{enum_name}Old"') 50 | 51 | 52 | def upgrade() -> None: 53 | # Alter MessageSubProcessEnum to add "SUB_QUESTION" as a valid value 54 | replace_enum_values( 55 | "MessageSubProcessSourceEnum", 56 | "messagesubprocess", 57 | new_sub_process_source_enum_values, 58 | ) 59 | 60 | # ### end Alembic commands ### 61 | 62 | 63 | def downgrade() -> None: 64 | # ### commands auto generated by Alembic - please adjust! ### 65 | # revert back to the old enum type 66 | # Note that this won't work if the DB already has rows with the new enum values 67 | replace_enum_values( 68 | "MessageSubProcessSourceEnum", 69 | "messagesubprocess", 70 | existing_sub_process_source_enum_values, 71 | ) 72 | # ### end Alembic commands ### 73 | -------------------------------------------------------------------------------- /backend/alembic/versions/477cee72edc4_init_tables.py: -------------------------------------------------------------------------------- 1 | """init tables 2 | 3 | Revision ID: 477cee72edc4 4 | Revises: 5 | Create Date: 2023-06-15 20:55:49.318398 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects import postgresql 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "477cee72edc4" 14 | down_revision = None 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') 22 | op.create_table( 23 | "conversation", 24 | sa.Column("id", sa.UUID(), nullable=False), 25 | sa.Column( 26 | "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False 27 | ), 28 | sa.Column( 29 | "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False 30 | ), 31 | sa.PrimaryKeyConstraint("id"), 32 | ) 33 | op.create_index(op.f("ix_conversation_id"), "conversation", ["id"], unique=False) 34 | op.create_table( 35 | "message", 36 | sa.Column("conversation_id", sa.UUID(), nullable=True), 37 | sa.Column("content", sa.String(), nullable=True), 38 | sa.Column( 39 | "role", 40 | postgresql.ENUM("user", "assistant", name="MessageRoleEnum"), 41 | nullable=True, 42 | ), 43 | sa.Column( 44 | "status", 45 | postgresql.ENUM("PENDING", "SUCCESS", "ERROR", name="MessageStatusEnum"), 46 | nullable=True, 47 | ), 48 | sa.Column("id", sa.UUID(), nullable=False), 49 | sa.Column( 50 | "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False 51 | ), 52 | sa.Column( 53 | "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False 54 | ), 55 | sa.ForeignKeyConstraint( 56 | ["conversation_id"], 57 | ["conversation.id"], 58 | ), 59 | sa.PrimaryKeyConstraint("id"), 60 | ) 61 | op.create_index(op.f("ix_message_id"), "message", ["id"], unique=False) 62 | op.create_table( 63 | "messagesubprocess", 64 | sa.Column("message_id", sa.UUID(), nullable=True), 65 | sa.Column("content", sa.String(), nullable=True), 66 | sa.Column( 67 | "source", 68 | postgresql.ENUM( 69 | "CHUNKING", 70 | "NODE_PARSING", 71 | "EMBEDDING", 72 | "LLM", 73 | "QUERY", 74 | "RETRIEVE", 75 | "SYNTHESIZE", 76 | "TREE", 77 | "CONSTRUCTED_QUERY_ENGINE", 78 | name="MessageSubProcessSourceEnum", 79 | ), 80 | nullable=True, 81 | ), 82 | sa.Column("id", sa.UUID(), nullable=False), 83 | sa.Column( 84 | "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False 85 | ), 86 | sa.Column( 87 | "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False 88 | ), 89 | sa.ForeignKeyConstraint( 90 | ["message_id"], 91 | ["message.id"], 92 | ), 93 | sa.PrimaryKeyConstraint("id"), 94 | ) 95 | op.create_index( 96 | op.f("ix_messagesubprocess_id"), "messagesubprocess", ["id"], unique=False 97 | ) 98 | # ### end Alembic commands ### 99 | 100 | 101 | def downgrade() -> None: 102 | # ### commands auto generated by Alembic - please adjust! ### 103 | op.drop_index(op.f("ix_messagesubprocess_id"), table_name="messagesubprocess") 104 | op.drop_table("messagesubprocess") 105 | op.drop_index(op.f("ix_message_id"), table_name="message") 106 | op.drop_table("message") 107 | op.drop_index(op.f("ix_conversation_id"), table_name="conversation") 108 | op.drop_table("conversation") 109 | # remove enum types 110 | op.execute('DROP TYPE "MessageRoleEnum"') 111 | op.execute('DROP TYPE "MessageStatusEnum"') 112 | op.execute('DROP TYPE "MessageSubProcessSourceEnum"') 113 | # ### end Alembic commands ### 114 | -------------------------------------------------------------------------------- /backend/alembic/versions/663b3fea3024_update_sub_process_columns.py: -------------------------------------------------------------------------------- 1 | """update_sub_process_columns 2 | 3 | Revision ID: 663b3fea3024 4 | Revises: 873c0c4616ea 5 | Create Date: 2023-10-30 17:23:51.517821 6 | 7 | """ 8 | from typing import Set 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.dialects import postgresql 12 | 13 | 14 | # revision identifiers, used by Alembic. 15 | revision = '663b3fea3024' 16 | down_revision = '873c0c4616ea' 17 | branch_labels = None 18 | depends_on = None 19 | 20 | 21 | existing_sub_process_source_enum_values = { 22 | "CHUNKING", 23 | "NODE_PARSING", 24 | "EMBEDDING", 25 | "LLM", 26 | "QUERY", 27 | "RETRIEVE", 28 | "SYNTHESIZE", 29 | "TREE", 30 | "CONSTRUCTED_QUERY_ENGINE", 31 | "SUB_QUESTIONS", 32 | "SUB_QUESTION", 33 | } 34 | 35 | new_sub_process_source_enum_values = { 36 | *existing_sub_process_source_enum_values, 37 | "AGENT_STEP", 38 | "SUB_QUESTION", 39 | "TEMPLATING", 40 | "FUNCTION_CALL", 41 | "RERANKING", 42 | "EXCEPTION", 43 | "AGENT_STEP" 44 | } 45 | 46 | 47 | def replace_enum_values(enum_name: str, table: str, new_values: Set[str]): 48 | """ 49 | Create a new type, add the value to it, update the column to use the new type and delete the old type 50 | """ 51 | op.execute(f'ALTER TYPE public."{enum_name}" RENAME TO "{enum_name}Old"') 52 | sa.Enum(*new_values, name=enum_name).create(op.get_bind()) 53 | op.execute( 54 | f'ALTER TABLE {table} ALTER COLUMN source TYPE public."{enum_name}" USING source::text::public."{enum_name}"' 55 | ) 56 | op.execute(f'DROP TYPE public."{enum_name}Old"') 57 | 58 | 59 | def upgrade() -> None: 60 | # ### commands auto generated by Alembic - please adjust! ### 61 | # Alter MessageSubProcessEnum to add new CBEventType enum values as valid values 62 | replace_enum_values( 63 | "MessageSubProcessSourceEnum", 64 | "messagesubprocess", 65 | new_sub_process_source_enum_values, 66 | ) 67 | 68 | # ### end Alembic commands ### 69 | 70 | 71 | def downgrade() -> None: 72 | # ### commands auto generated by Alembic - please adjust! ### 73 | # revert back to the old enum type 74 | # Note that this won't work if the DB already has rows with the new enum values 75 | replace_enum_values( 76 | "MessageSubProcessSourceEnum", 77 | "messagesubprocess", 78 | existing_sub_process_source_enum_values, 79 | ) 80 | # ### end Alembic commands ### 81 | -------------------------------------------------------------------------------- /backend/alembic/versions/873c0c4616ea_add_foreign_key_indices.py: -------------------------------------------------------------------------------- 1 | """add foreign key indices 2 | 3 | Revision ID: 873c0c4616ea 4 | Revises: 1b0b616e08c6 5 | Create Date: 2023-08-15 23:10:01.739927 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects import postgresql 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "873c0c4616ea" 14 | down_revision = "1b0b616e08c6" 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_index( 22 | op.f("ix_conversationdocument_conversation_id"), 23 | "conversationdocument", 24 | ["conversation_id"], 25 | unique=False, 26 | ) 27 | op.create_index( 28 | op.f("ix_conversationdocument_document_id"), 29 | "conversationdocument", 30 | ["document_id"], 31 | unique=False, 32 | ) 33 | op.create_index( 34 | op.f("ix_message_conversation_id"), "message", ["conversation_id"], unique=False 35 | ) 36 | op.create_index( 37 | op.f("ix_messagesubprocess_message_id"), 38 | "messagesubprocess", 39 | ["message_id"], 40 | unique=False, 41 | ) 42 | # ### end Alembic commands ### 43 | 44 | 45 | def downgrade() -> None: 46 | # ### commands auto generated by Alembic - please adjust! ### 47 | op.drop_index( 48 | op.f("ix_messagesubprocess_message_id"), table_name="messagesubprocess" 49 | ) 50 | op.drop_index(op.f("ix_message_conversation_id"), table_name="message") 51 | op.drop_index( 52 | op.f("ix_conversationdocument_document_id"), table_name="conversationdocument" 53 | ) 54 | op.drop_index( 55 | op.f("ix_conversationdocument_conversation_id"), 56 | table_name="conversationdocument", 57 | ) 58 | # ### end Alembic commands ### 59 | -------------------------------------------------------------------------------- /backend/alembic/versions/90a1d6a26343_create_doc_tables.py: -------------------------------------------------------------------------------- 1 | """create doc tables 2 | 3 | Revision ID: 90a1d6a26343 4 | Revises: c008bb4f3f48 5 | Create Date: 2023-07-11 05:42:05.054926 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects import postgresql 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '90a1d6a26343' 14 | down_revision = 'c008bb4f3f48' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('document', 22 | sa.Column('url', sa.String(), nullable=False), 23 | sa.Column('metadata_map', postgresql.JSONB(astext_type=sa.Text()), nullable=True), 24 | sa.Column('id', sa.UUID(), nullable=False), 25 | sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), 26 | sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), 27 | sa.PrimaryKeyConstraint('id'), 28 | sa.UniqueConstraint('url') 29 | ) 30 | op.create_index(op.f('ix_document_id'), 'document', ['id'], unique=False) 31 | op.create_table('conversationdocument', 32 | sa.Column('conversation_id', sa.UUID(), nullable=True), 33 | sa.Column('document_id', sa.UUID(), nullable=True), 34 | sa.Column('id', sa.UUID(), nullable=False), 35 | sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), 36 | sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), 37 | sa.ForeignKeyConstraint(['conversation_id'], ['conversation.id'], ), 38 | sa.ForeignKeyConstraint(['document_id'], ['document.id'], ), 39 | sa.PrimaryKeyConstraint('id') 40 | ) 41 | op.create_index(op.f('ix_conversationdocument_id'), 'conversationdocument', ['id'], unique=False) 42 | # ### end Alembic commands ### 43 | 44 | 45 | def downgrade() -> None: 46 | # ### commands auto generated by Alembic - please adjust! ### 47 | op.drop_index(op.f('ix_conversationdocument_id'), table_name='conversationdocument') 48 | op.drop_table('conversationdocument') 49 | op.drop_index(op.f('ix_document_id'), table_name='document') 50 | op.drop_table('document') 51 | # ### end Alembic commands ### 52 | -------------------------------------------------------------------------------- /backend/alembic/versions/c008bb4f3f48_update_sub_process_columns.py: -------------------------------------------------------------------------------- 1 | """update sub process columns 2 | 3 | Revision ID: c008bb4f3f48 4 | Revises: 477cee72edc4 5 | Create Date: 2023-07-07 18:11:35.087271 6 | 7 | """ 8 | from typing import Set 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.dialects import postgresql 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "c008bb4f3f48" 15 | down_revision = "477cee72edc4" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | existing_sub_process_source_enum_values = { 20 | "CHUNKING", 21 | "NODE_PARSING", 22 | "EMBEDDING", 23 | "LLM", 24 | "QUERY", 25 | "RETRIEVE", 26 | "SYNTHESIZE", 27 | "TREE", 28 | "CONSTRUCTED_QUERY_ENGINE", 29 | } 30 | 31 | new_sub_process_source_enum_values = { 32 | *existing_sub_process_source_enum_values, 33 | "SUB_QUESTIONS", 34 | } 35 | 36 | 37 | def replace_enum_values(enum_name: str, table: str, new_values: Set[str]): 38 | """ 39 | Create a new type, add the value to it, update the column to use the new type and delete the old type 40 | """ 41 | op.execute(f'ALTER TYPE public."{enum_name}" RENAME TO "{enum_name}Old"') 42 | sa.Enum(*new_values, name=enum_name).create(op.get_bind()) 43 | op.execute( 44 | f'ALTER TABLE {table} ALTER COLUMN source TYPE public."{enum_name}" USING source::text::public."{enum_name}"' 45 | ) 46 | op.execute(f'DROP TYPE public."{enum_name}Old"') 47 | 48 | 49 | def upgrade() -> None: 50 | # ### commands auto generated by Alembic - please adjust! ### 51 | enum = postgresql.ENUM("PENDING", "FINISHED", name="MessageSubProcessStatusEnum") 52 | enum.create(op.get_bind()) 53 | op.add_column( 54 | "messagesubprocess", 55 | sa.Column("status", enum, nullable=False, server_default="FINISHED"), 56 | ) 57 | op.add_column( 58 | "messagesubprocess", 59 | sa.Column( 60 | "metadata_map", postgresql.JSONB(astext_type=sa.Text()), nullable=True 61 | ), 62 | ) 63 | op.drop_column("messagesubprocess", "content") 64 | 65 | # Alter MessageSubProcessEnum to add "SUB_QUESTIONS" as a valid value 66 | replace_enum_values( 67 | "MessageSubProcessSourceEnum", 68 | "messagesubprocess", 69 | new_sub_process_source_enum_values, 70 | ) 71 | 72 | # ### end Alembic commands ### 73 | 74 | 75 | def downgrade() -> None: 76 | # ### commands auto generated by Alembic - please adjust! ### 77 | # revert back to the old enum type 78 | # Note that this won't work if the DB already has rows with the new enum values 79 | replace_enum_values( 80 | "MessageSubProcessSourceEnum", 81 | "messagesubprocess", 82 | existing_sub_process_source_enum_values, 83 | ) 84 | 85 | op.add_column( 86 | "messagesubprocess", 87 | sa.Column("content", sa.VARCHAR(), autoincrement=False, nullable=True), 88 | ) 89 | op.drop_column("messagesubprocess", "metadata_map") 90 | op.drop_column("messagesubprocess", "status") 91 | # ### end Alembic commands ### 92 | -------------------------------------------------------------------------------- /backend/app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/__init__.py -------------------------------------------------------------------------------- /backend/app/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/api/__init__.py -------------------------------------------------------------------------------- /backend/app/api/api.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | from app.api.endpoints import conversation, health, documents 4 | 5 | api_router = APIRouter() 6 | api_router.include_router( 7 | conversation.router, prefix="/conversation", tags=["conversation"] 8 | ) 9 | api_router.include_router(documents.router, prefix="/document", tags=["document"]) 10 | api_router.include_router(health.router, prefix="/health", tags=["health"]) 11 | -------------------------------------------------------------------------------- /backend/app/api/crud.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence, List 2 | from sqlalchemy.orm import joinedload 3 | from sqlalchemy.ext.asyncio import AsyncSession 4 | from app.models.db import Conversation, Message, Document, ConversationDocument 5 | from app import schema 6 | from sqlalchemy import select, delete 7 | from sqlalchemy.dialects.postgresql import insert 8 | 9 | 10 | async def fetch_conversation_with_messages( 11 | db: AsyncSession, conversation_id: str 12 | ) -> Optional[schema.Conversation]: 13 | """ 14 | Fetch a conversation with its messages + messagesubprocesses 15 | return None if the conversation with the given id does not exist 16 | """ 17 | # Eagerly load required relationships 18 | stmt = ( 19 | select(Conversation) 20 | .options(joinedload(Conversation.messages).subqueryload(Message.sub_processes)) 21 | .options( 22 | joinedload(Conversation.conversation_documents).subqueryload( 23 | ConversationDocument.document 24 | ) 25 | ) 26 | .where(Conversation.id == conversation_id) 27 | ) 28 | 29 | result = await db.execute(stmt) # execute the statement 30 | conversation = result.scalars().first() # get the first result 31 | if conversation is not None: 32 | convo_dict = { 33 | **conversation.__dict__, 34 | "documents": [ 35 | convo_doc.document for convo_doc in conversation.conversation_documents 36 | ], 37 | } 38 | return schema.Conversation(**convo_dict) 39 | return None 40 | 41 | 42 | async def create_conversation( 43 | db: AsyncSession, convo_payload: schema.ConversationCreate 44 | ) -> schema.Conversation: 45 | conversation = Conversation() 46 | convo_doc_db_objects = [ 47 | ConversationDocument(document_id=doc_id, conversation=conversation) 48 | for doc_id in convo_payload.document_ids 49 | ] 50 | db.add(conversation) 51 | db.add_all(convo_doc_db_objects) 52 | await db.commit() 53 | await db.refresh(conversation) 54 | return await fetch_conversation_with_messages(db, conversation.id) 55 | 56 | 57 | async def delete_conversation(db: AsyncSession, conversation_id: str) -> bool: 58 | stmt = delete(Conversation).where(Conversation.id == conversation_id) 59 | result = await db.execute(stmt) 60 | await db.commit() 61 | return result.rowcount > 0 62 | 63 | 64 | async def fetch_message_with_sub_processes( 65 | db: AsyncSession, message_id: str 66 | ) -> Optional[schema.Message]: 67 | """ 68 | Fetch a message with its sub processes 69 | return None if the message with the given id does not exist 70 | """ 71 | # Eagerly load required relationships 72 | stmt = ( 73 | select(Message) 74 | .options(joinedload(Message.sub_processes)) 75 | .where(Message.id == message_id) 76 | ) 77 | result = await db.execute(stmt) # execute the statement 78 | message = result.scalars().first() # get the first result 79 | if message is not None: 80 | return schema.Message.model_validate(message, from_attributes=True) 81 | return None 82 | 83 | 84 | async def fetch_documents( 85 | db: AsyncSession, 86 | id: Optional[str] = None, 87 | ids: Optional[List[str]] = None, 88 | url: Optional[str] = None, 89 | limit: Optional[int] = None, 90 | ) -> Optional[Sequence[schema.Document]]: 91 | """ 92 | Fetch a document by its url or id 93 | """ 94 | 95 | stmt = select(Document) 96 | if id is not None: 97 | stmt = stmt.where(Document.id == id) 98 | limit = 1 99 | elif ids is not None: 100 | stmt = stmt.where(Document.id.in_(ids)) 101 | if url is not None: 102 | stmt = stmt.where(Document.url == url) 103 | if limit is not None: 104 | stmt = stmt.limit(limit) 105 | result = await db.execute(stmt) 106 | documents = result.scalars().all() 107 | return [schema.Document.model_validate(doc, from_attributes=True) for doc in documents] 108 | 109 | 110 | async def upsert_document_by_url( 111 | db: AsyncSession, document: schema.Document 112 | ) -> schema.Document: 113 | """ 114 | Upsert a document 115 | """ 116 | stmt = insert(Document).values(**document.dict(exclude_none=True)) 117 | stmt = stmt.on_conflict_do_update( 118 | index_elements=[Document.url], 119 | set_=document.model_dump(mode="json", include={"metadata_map"}), 120 | ) 121 | stmt = stmt.returning(Document) 122 | result = await db.execute(stmt) 123 | upserted_doc = schema.Document.model_validate(result.scalars().first(), from_attributes=True) 124 | await db.commit() 125 | return upserted_doc 126 | -------------------------------------------------------------------------------- /backend/app/api/deps.py: -------------------------------------------------------------------------------- 1 | from typing import Generator 2 | from sqlalchemy.ext.asyncio import AsyncSession 3 | from app.db.session import SessionLocal 4 | 5 | 6 | async def get_db() -> Generator[AsyncSession, None, None]: 7 | async with SessionLocal() as db: 8 | yield db 9 | -------------------------------------------------------------------------------- /backend/app/api/endpoints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/api/endpoints/__init__.py -------------------------------------------------------------------------------- /backend/app/api/endpoints/documents.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | import logging 3 | from fastapi import Depends, APIRouter, HTTPException, Query 4 | from sqlalchemy.ext.asyncio import AsyncSession 5 | from uuid import UUID 6 | 7 | from app.api.deps import get_db 8 | from app.api import crud 9 | from app import schema 10 | 11 | router = APIRouter() 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | @router.get("/") 16 | async def get_documents( 17 | document_ids: Optional[List[UUID]] = Query(None), 18 | db: AsyncSession = Depends(get_db), 19 | ) -> List[schema.Document]: 20 | """ 21 | Get all documents or documents by their ids 22 | """ 23 | if document_ids is None: 24 | # If no ids provided, fetch all documents 25 | docs = await crud.fetch_documents(db) 26 | else: 27 | # If ids are provided, fetch documents by ids 28 | docs = await crud.fetch_documents(db, ids=document_ids) 29 | 30 | if len(docs) == 0: 31 | raise HTTPException(status_code=404, detail="Document(s) not found") 32 | 33 | return docs 34 | 35 | 36 | @router.get("/{document_id}") 37 | async def get_document( 38 | document_id: UUID, 39 | db: AsyncSession = Depends(get_db), 40 | ) -> schema.Document: 41 | """ 42 | Get all documents 43 | """ 44 | docs = await crud.fetch_documents(db, id=document_id) 45 | if len(docs) == 0: 46 | raise HTTPException(status_code=404, detail="Document not found") 47 | 48 | return docs[0] 49 | -------------------------------------------------------------------------------- /backend/app/api/endpoints/health.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from fastapi import APIRouter, Depends 4 | from sqlalchemy.ext.asyncio import AsyncSession 5 | from sqlalchemy.sql import text 6 | from app.api import deps 7 | 8 | router = APIRouter() 9 | 10 | 11 | @router.get("/") 12 | async def health(db: AsyncSession = Depends(deps.get_db)) -> Dict[str, str]: 13 | """ 14 | Health check endpoint. 15 | """ 16 | await db.execute(text("SELECT 1")) 17 | return {"status": "alive"} 18 | -------------------------------------------------------------------------------- /backend/app/chat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/chat/__init__.py -------------------------------------------------------------------------------- /backend/app/chat/constants.py: -------------------------------------------------------------------------------- 1 | DB_DOC_ID_KEY = "db_document_id" 2 | 3 | SYSTEM_MESSAGE = """ 4 | You are an expert financial analyst that always answers questions with the most relevant information using the tools at your disposal. 5 | These tools have information regarding companies that the user has expressed interest in. 6 | Here are some guidelines that you must follow: 7 | * For financial questions, you must use the tools to find the answer and then write a response. 8 | * Even if it seems like your tools won't be able to answer the question, you must still use them to find the most relevant information and insights. Not using them will appear as if you are not doing your job. 9 | * You may assume that the users financial questions are related to the documents they've selected. 10 | * For any user message that isn't related to financial analysis, respectfully decline to respond and suggest that the user ask a relevant question. 11 | * If your tools are unable to find an answer, you should say that you haven't found an answer but still relay any useful information the tools found. 12 | 13 | The tools at your disposal have access to the following SEC documents that the user has selected to discuss with you: 14 | {doc_titles} 15 | 16 | The current date is: {curr_date} 17 | """.strip() 18 | 19 | NODE_PARSER_CHUNK_SIZE = 512 20 | NODE_PARSER_CHUNK_OVERLAP = 10 21 | -------------------------------------------------------------------------------- /backend/app/chat/messaging.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Optional, List 2 | import asyncio 3 | import logging 4 | from uuid import uuid4 5 | from anyio import ClosedResourceError 6 | from anyio.streams.memory import MemoryObjectSendStream 7 | 8 | from llama_index.core.callbacks.base import BaseCallbackHandler 9 | from llama_index.core.callbacks.schema import CBEventType, EventPayload 10 | from llama_index.core.query_engine.sub_question_query_engine import SubQuestionAnswerPair 11 | from llama_index.core.chat_engine.types import StreamingAgentChatResponse 12 | from pydantic import BaseModel 13 | 14 | from app import schema 15 | from app.schema import SubProcessMetadataKeysEnum, SubProcessMetadataMap 16 | from app.models.db import MessageSubProcessSourceEnum 17 | from app.chat.engine import get_chat_engine 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class StreamedMessage(BaseModel): 23 | content: str 24 | 25 | 26 | class StreamedMessageSubProcess(BaseModel): 27 | source: MessageSubProcessSourceEnum 28 | has_ended: bool 29 | event_id: str 30 | metadata_map: Optional[SubProcessMetadataMap] = None 31 | 32 | 33 | class ChatCallbackHandler(BaseCallbackHandler): 34 | def __init__( 35 | self, 36 | send_chan: MemoryObjectSendStream, 37 | ): 38 | """Initialize the base callback handler.""" 39 | ignored_events = [CBEventType.CHUNKING, CBEventType.NODE_PARSING] 40 | super().__init__(ignored_events, ignored_events) 41 | self._send_chan = send_chan 42 | 43 | def on_event_start( 44 | self, 45 | event_type: CBEventType, 46 | payload: Optional[Dict[str, Any]] = None, 47 | event_id: str = "", 48 | **kwargs: Any, 49 | ) -> str: 50 | """Create the MessageSubProcess row for the event that started.""" 51 | asyncio.create_task( 52 | self.async_on_event( 53 | event_type, payload, event_id, is_start_event=True, **kwargs 54 | ) 55 | ) 56 | 57 | def on_event_end( 58 | self, 59 | event_type: CBEventType, 60 | payload: Optional[Dict[str, Any]] = None, 61 | event_id: str = "", 62 | **kwargs: Any, 63 | ) -> None: 64 | """Create the MessageSubProcess row for the event that completed.""" 65 | asyncio.create_task( 66 | self.async_on_event( 67 | event_type, payload, event_id, is_start_event=False, **kwargs 68 | ) 69 | ) 70 | 71 | def get_metadata_from_event( 72 | self, 73 | event_type: CBEventType, 74 | payload: Optional[Dict[str, Any]] = None, 75 | is_start_event: bool = False, 76 | ) -> SubProcessMetadataMap: 77 | metadata_map = {} 78 | 79 | if ( 80 | event_type == CBEventType.SUB_QUESTION 81 | and EventPayload.SUB_QUESTION in payload 82 | ): 83 | sub_q: SubQuestionAnswerPair = payload[EventPayload.SUB_QUESTION] 84 | metadata_map[ 85 | SubProcessMetadataKeysEnum.SUB_QUESTION.value 86 | ] = schema.QuestionAnswerPair.from_sub_question_answer_pair(sub_q).dict() 87 | return metadata_map 88 | 89 | async def async_on_event( 90 | self, 91 | event_type: CBEventType, 92 | payload: Optional[Dict[str, Any]] = None, 93 | event_id: str = "", 94 | is_start_event: bool = False, 95 | **kwargs: Any, 96 | ) -> None: 97 | metadata_map = self.get_metadata_from_event( 98 | event_type, payload=payload, is_start_event=is_start_event 99 | ) 100 | metadata_map = metadata_map or None 101 | source = MessageSubProcessSourceEnum[event_type.name] 102 | if self._send_chan._closed: 103 | logger.debug("Received event after send channel closed. Ignoring.") 104 | return 105 | try: 106 | await self._send_chan.send( 107 | StreamedMessageSubProcess( 108 | source=source, 109 | metadata_map=metadata_map, 110 | event_id=event_id, 111 | has_ended=not is_start_event, 112 | ) 113 | ) 114 | except ClosedResourceError: 115 | logger.exception("Tried sending SubProcess event %s after channel was closed", f"(source={source})") 116 | 117 | def start_trace(self, trace_id: Optional[str] = None) -> None: 118 | """No-op.""" 119 | 120 | def end_trace( 121 | self, 122 | trace_id: Optional[str] = None, 123 | trace_map: Optional[Dict[str, List[str]]] = None, 124 | ) -> None: 125 | """No-op.""" 126 | 127 | 128 | async def handle_chat_message( 129 | conversation: schema.Conversation, 130 | user_message: schema.UserMessageCreate, 131 | send_chan: MemoryObjectSendStream, 132 | ) -> None: 133 | async with send_chan: 134 | chat_engine = await get_chat_engine( 135 | ChatCallbackHandler(send_chan), conversation 136 | ) 137 | await send_chan.send( 138 | StreamedMessageSubProcess( 139 | event_id=str(uuid4()), 140 | has_ended=True, 141 | source=MessageSubProcessSourceEnum.CONSTRUCTED_QUERY_ENGINE, 142 | ) 143 | ) 144 | logger.debug("Engine received") 145 | templated_message = f""" 146 | Remember - if I have asked a relevant financial question, use your tools. 147 | 148 | {user_message.content} 149 | """.strip() 150 | streaming_chat_response: StreamingAgentChatResponse = ( 151 | await chat_engine.astream_chat(templated_message) 152 | ) 153 | response_str = "" 154 | async for text in streaming_chat_response.async_response_gen(): 155 | response_str += text 156 | if send_chan._closed: 157 | logger.debug( 158 | "Received streamed token after send channel closed. Ignoring." 159 | ) 160 | return 161 | await send_chan.send(StreamedMessage(content=response_str)) 162 | 163 | if response_str.strip() == "": 164 | await send_chan.send( 165 | StreamedMessage( 166 | content="Sorry, I either wasn't able to understand your question or I don't have an answer for it." 167 | ) 168 | ) 169 | -------------------------------------------------------------------------------- /backend/app/chat/pg_vector.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.vector_stores.types import VectorStore 2 | from llama_index.vector_stores.postgres import PGVectorStore 3 | from sqlalchemy.engine import make_url 4 | from app.db.session import SessionLocal as AppSessionLocal, engine as app_engine 5 | import sqlalchemy 6 | from sqlalchemy import create_engine 7 | from sqlalchemy.orm import sessionmaker 8 | from app.core.config import settings 9 | 10 | singleton_instance = None 11 | did_run_setup = False 12 | 13 | 14 | class CustomPGVectorStore(PGVectorStore): 15 | """ 16 | Custom PGVectorStore that uses the same connection pool as the FastAPI app. 17 | """ 18 | 19 | def _connect(self) -> None: 20 | self._engine = create_engine(self.connection_string) 21 | self._session = sessionmaker(self._engine) 22 | 23 | # Use our existing app engine and session so we can use the same connection pool 24 | self._async_engine = app_engine 25 | self._async_session = AppSessionLocal 26 | 27 | async def close(self) -> None: 28 | self._session.close_all() 29 | self._engine.dispose() 30 | 31 | await self._async_engine.dispose() 32 | 33 | def _create_tables_if_not_exists(self) -> None: 34 | pass 35 | 36 | def _create_extension(self) -> None: 37 | pass 38 | 39 | async def run_setup(self) -> None: 40 | global did_run_setup 41 | if did_run_setup: 42 | return 43 | self._initialize() 44 | async with self._async_session() as session: 45 | async with session.begin(): 46 | statement = sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS vector") 47 | await session.execute(statement) 48 | await session.commit() 49 | 50 | async with self._async_session() as session: 51 | async with session.begin(): 52 | conn = await session.connection() 53 | await conn.run_sync(self._base.metadata.create_all) 54 | did_run_setup = True 55 | 56 | 57 | async def get_vector_store_singleton() -> VectorStore: 58 | global singleton_instance 59 | if singleton_instance is not None: 60 | return singleton_instance 61 | url = make_url(settings.DATABASE_URL) 62 | singleton_instance = CustomPGVectorStore.from_params( 63 | url.host, 64 | url.port or 5432, 65 | url.database, 66 | url.username, 67 | url.password, 68 | settings.VECTOR_STORE_TABLE_NAME, 69 | ) 70 | return singleton_instance 71 | -------------------------------------------------------------------------------- /backend/app/chat/qa_response_synth.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from llama_index.core.response_synthesizers import BaseSynthesizer 3 | from llama_index.core.prompts.prompts import RefinePrompt, QuestionAnswerPrompt 4 | from llama_index.core.prompts.prompt_type import PromptType 5 | from app.schema import Document as DocumentSchema 6 | from app.chat.utils import build_title_for_document 7 | from llama_index.core.callbacks import CallbackManager 8 | from llama_index.core.response_synthesizers.factory import get_response_synthesizer 9 | 10 | 11 | def get_custom_response_synth( 12 | callback_manager: CallbackManager, documents: List[DocumentSchema] 13 | ) -> BaseSynthesizer: 14 | doc_titles = "\n".join("- " + build_title_for_document(doc) for doc in documents) 15 | refine_template_str = f""" 16 | A user has selected a set of SEC filing documents and has asked a question about them. \ 17 | The SEC documents have the following titles: 18 | {doc_titles} 19 | The original query is as follows: {{query_str}} 20 | We have provided an existing answer: {{existing_answer}} 21 | We have the opportunity to refine the existing answer \ 22 | (only if needed) with some more context below. 23 | ------------ 24 | {{context_msg}} 25 | ------------ 26 | Given the new context, refine the original answer to better \ 27 | answer the query. \ 28 | If the context isn't useful, return the original answer. 29 | Refined Answer: 30 | """.strip() 31 | refine_prompt = RefinePrompt( 32 | template=refine_template_str, 33 | prompt_type=PromptType.REFINE, 34 | ) 35 | 36 | qa_template_str = f""" 37 | A user has selected a set of SEC filing documents and has asked a question about them. \ 38 | The SEC documents have the following titles: 39 | {doc_titles} 40 | Context information is below. 41 | --------------------- 42 | {{context_str}} 43 | --------------------- 44 | Given the context information and not prior knowledge, \ 45 | answer the query. 46 | Query: {{query_str}} 47 | Answer: 48 | """.strip() 49 | qa_prompt = QuestionAnswerPrompt( 50 | template=qa_template_str, 51 | prompt_type=PromptType.QUESTION_ANSWER, 52 | ) 53 | 54 | return get_response_synthesizer( 55 | callback_manager=callback_manager, 56 | refine_template=refine_prompt, 57 | text_qa_template=qa_prompt, 58 | # only useful for gpt-3.5 59 | structured_answer_filtering=False, 60 | ) 61 | -------------------------------------------------------------------------------- /backend/app/chat/tools.py: -------------------------------------------------------------------------------- 1 | from typing import List, Iterator, cast 2 | import logging 3 | 4 | # This is from the unofficial polygon.io client: https://polygon.readthedocs.io/ 5 | from polygon.reference_apis import ReferenceClient 6 | from polygon.reference_apis.reference_api import AsyncReferenceClient 7 | 8 | # This is from the official polygon.io client: https://polygon-api-client.readthedocs.io/ 9 | from polygon.rest.models import StockFinancial 10 | 11 | from app.schema import ( 12 | Document as DocumentSchema, 13 | DocumentMetadataKeysEnum, 14 | SecDocumentMetadata, 15 | ) 16 | from llama_index.core.tools import FunctionTool, ToolMetadata, QueryEngineTool 17 | from llama_index.core.callbacks import CallbackManager 18 | from llama_index.core import Settings 19 | from llama_index.agent.openai import OpenAIAgent 20 | from app.core.config import settings 21 | from app.chat.utils import build_title_for_document 22 | 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | from typing import List 28 | 29 | 30 | def describe_financials(financials: StockFinancial) -> str: 31 | sentences: List[str] = [] 32 | 33 | company = financials.company_name 34 | fiscal_year = financials.fiscal_year 35 | fiscal_period = financials.fiscal_period 36 | 37 | sentences.append( 38 | f"For {company} in fiscal year {fiscal_year} covering the period {fiscal_period}:" 39 | ) 40 | 41 | income_statement = financials.financials.income_statement 42 | 43 | if income_statement: 44 | revenues = income_statement.revenues 45 | if revenues: 46 | revenue_str = f"{revenues.label}: {revenues.value} {revenues.unit}" 47 | sentences.append(f"Revenues were {revenue_str}.") 48 | 49 | expenses = income_statement.operating_expenses 50 | if expenses: 51 | expenses_str = f"{expenses.label}: {expenses.value} {expenses.unit}" 52 | sentences.append(f"Operating expenses were {expenses_str}.") 53 | 54 | gross_profit = income_statement.gross_profit 55 | if gross_profit: 56 | gross_profit_str = f"{gross_profit.value} {gross_profit.unit}" 57 | sentences.append(f"Gross profit was {gross_profit_str}.") 58 | 59 | net_income = ( 60 | financials.financials.comprehensive_income.comprehensive_income_loss_attributable_to_parent 61 | ) 62 | if net_income: 63 | net_income_str = f"{net_income.label}: {net_income.value} {net_income.unit}" 64 | sentences.append(f"Net income was {net_income_str}.") 65 | 66 | cash_flows = financials.financials.cash_flow_statement 67 | if cash_flows: 68 | operating_cash_flows = cash_flows.net_cash_flow 69 | if operating_cash_flows: 70 | operating_str = f"{operating_cash_flows.label}: {operating_cash_flows.value} {operating_cash_flows.unit}" 71 | sentences.append(f"Net cash from operating activities was {operating_str}.") 72 | 73 | financing_cash_flows = cash_flows.net_cash_flow_from_financing_activities 74 | if financing_cash_flows: 75 | financing_str = f"{financing_cash_flows.label}: {financing_cash_flows.value} {financing_cash_flows.unit}" 76 | sentences.append(f"Net cash from financing activities was {financing_str}.") 77 | 78 | return " ".join(sentences) 79 | 80 | 81 | def get_tool_metadata_for_document(doc: DocumentSchema) -> ToolMetadata: 82 | doc_title = build_title_for_document(doc) 83 | name = f"extract_json_from_sec_document[{doc_title}]" 84 | description = f"Returns basic financial data extracted from the SEC filing document {doc_title}" 85 | return ToolMetadata( 86 | name=name, 87 | description=description, 88 | ) 89 | 90 | 91 | def get_polygon_io_sec_tool(document: DocumentSchema) -> FunctionTool: 92 | sec_metadata = SecDocumentMetadata.parse_obj( 93 | document.metadata_map[DocumentMetadataKeysEnum.SEC_DOCUMENT] 94 | ) 95 | tool_metadata = get_tool_metadata_for_document(document) 96 | 97 | async def extract_data_from_sec_document(*args, **kwargs) -> List[str]: 98 | try: 99 | client = ReferenceClient( 100 | api_key=settings.POLYGON_IO_API_KEY, 101 | connect_timeout=10, 102 | read_timeout=10, 103 | max_connections=20, 104 | use_async=True, 105 | ) 106 | client = cast(AsyncReferenceClient, client) 107 | response_dict = await client.get_stock_financials_vx( 108 | ticker=sec_metadata.company_ticker, 109 | period_of_report_date=str(sec_metadata.period_of_report_date.date()), 110 | limit=100, # max limit is 100 111 | ) 112 | stock_financials = [] 113 | for result_dict in response_dict["results"]: 114 | stock_financials.append(StockFinancial.from_dict(result_dict)) 115 | 116 | descriptions = [] 117 | for stock_financial in stock_financials: 118 | description = describe_financials(stock_financial) 119 | logger.debug( 120 | "Built the following description for document_id=%s: %s", 121 | str(document.id), 122 | description, 123 | ) 124 | descriptions.append(description) 125 | return descriptions 126 | except: 127 | logger.error( 128 | "Error retrieving data from polygon.io for document_id %s", 129 | str(document.id), 130 | exc_info=True, 131 | ) 132 | return ["No answer found."] 133 | 134 | def sync_func_placeholder(*args, **kwargs) -> None: 135 | raise NotImplementedError( 136 | "Sync function was called for document_id=" + str(document.id) 137 | ) 138 | 139 | return FunctionTool.from_defaults( 140 | fn=sync_func_placeholder, 141 | async_fn=extract_data_from_sec_document, 142 | description=tool_metadata.description, 143 | ) 144 | 145 | 146 | def get_api_query_engine_tool( 147 | document: DocumentSchema, callback_manager: CallbackManager, 148 | ) -> QueryEngineTool: 149 | polygon_io_tool = get_polygon_io_sec_tool(document) 150 | tool_metadata = get_tool_metadata_for_document(document) 151 | doc_title = build_title_for_document(document) 152 | llm = Settings.llm.model_copy( 153 | update={"callback_manager": callback_manager}, 154 | deep=True 155 | ) 156 | agent = OpenAIAgent.from_tools( 157 | [polygon_io_tool], 158 | llm=llm, 159 | callback_manager=callback_manager, 160 | system_prompt=f"You are an agent that is asked quantitative questions about a SEC filing named {doc_title} and you answer them by using your tools.", 161 | ) 162 | return QueryEngineTool.from_defaults( 163 | query_engine=agent, 164 | name=tool_metadata.name, 165 | description=tool_metadata.description, 166 | ) 167 | -------------------------------------------------------------------------------- /backend/app/chat/utils.py: -------------------------------------------------------------------------------- 1 | from app.schema import ( 2 | Document as DocumentSchema, 3 | DocumentMetadataKeysEnum, 4 | SecDocumentMetadata, 5 | ) 6 | 7 | 8 | def build_title_for_document(document: DocumentSchema) -> str: 9 | if DocumentMetadataKeysEnum.SEC_DOCUMENT not in document.metadata_map: 10 | return "No Title Document" 11 | 12 | sec_metadata = SecDocumentMetadata.parse_obj( 13 | document.metadata_map[DocumentMetadataKeysEnum.SEC_DOCUMENT] 14 | ) 15 | time_period = ( 16 | f"{sec_metadata.year} Q{sec_metadata.quarter}" 17 | if sec_metadata.quarter is not None 18 | else str(sec_metadata.year) 19 | ) 20 | return f"{sec_metadata.company_name} ({sec_metadata.company_ticker}) {sec_metadata.doc_type.value} ({time_period})" 21 | -------------------------------------------------------------------------------- /backend/app/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/core/__init__.py -------------------------------------------------------------------------------- /backend/app/core/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from enum import Enum 3 | from typing import List, Union, Optional, Literal 4 | from pydantic_settings import BaseSettings, SettingsConfigDict 5 | from pydantic import AnyHttpUrl, EmailStr, field_validator 6 | 7 | 8 | class AppEnvironment(str, Enum): 9 | """ 10 | Enum for app environments. 11 | """ 12 | 13 | LOCAL = "local" 14 | PREVIEW = "preview" 15 | PRODUCTION = "production" 16 | 17 | 18 | is_pull_request: bool = os.environ.get("IS_PULL_REQUEST") == "true" 19 | is_preview_env: bool = os.environ.get("IS_PREVIEW_ENV") == "true" 20 | 21 | model_config = SettingsConfigDict( 22 | env_prefix="PREVIEW_" if is_pull_request or is_preview_env else "" 23 | ) 24 | 25 | class PreviewPrefixedSettings(BaseSettings): 26 | """ 27 | Settings class that uses a different env_prefix for PR Preview deployments. 28 | 29 | PR Preview deployments should source their secret environment variables with 30 | the `PREVIEW_` prefix, while regular deployments should source them from the 31 | environment variables with no prefix. 32 | 33 | Some environment variables (like `DATABASE_URL`) use Render.com's capability to 34 | automatically set environment variables to their preview value for PR Preview 35 | deployments, so they are not prefixed. 36 | """ 37 | 38 | OPENAI_API_KEY: str 39 | AWS_KEY: str 40 | AWS_SECRET: str 41 | POLYGON_IO_API_KEY: str 42 | 43 | model_config = model_config 44 | 45 | 46 | class Settings(PreviewPrefixedSettings): 47 | """ 48 | Application settings. 49 | """ 50 | 51 | PROJECT_NAME: str = "llama_app" 52 | API_PREFIX: str = "/api" 53 | DATABASE_URL: str 54 | LOG_LEVEL: str = "DEBUG" 55 | IS_PULL_REQUEST: bool = False 56 | RENDER: bool = False 57 | CODESPACES: bool = False 58 | CODESPACE_NAME: Optional[str] = None 59 | S3_BUCKET_NAME: str 60 | S3_ASSET_BUCKET_NAME: str 61 | CDN_BASE_URL: str 62 | VECTOR_STORE_TABLE_NAME: str = "pg_vector_store" 63 | SENTRY_DSN: Optional[str] = None 64 | RENDER_GIT_COMMIT: Optional[str] = None 65 | LOADER_IO_VERIFICATION_STR: str = "loaderio-e51043c635e0f4656473d3570ae5d9ec" 66 | SEC_EDGAR_COMPANY_NAME: str = "YourOrgName" 67 | SEC_EDGAR_EMAIL: EmailStr = "you@example.com" 68 | OPENAI_CHAT_LLM_NAME: str = "gpt-4o-mini" 69 | 70 | # BACKEND_CORS_ORIGINS is a JSON-formatted list of origins 71 | # e.g: '["http://localhost", "http://localhost:4200", "http://localhost:3000", \ 72 | # "http://localhost:8080", "http://local.dockertoolbox.tiangolo.com"]' 73 | BACKEND_CORS_ORIGINS: List[AnyHttpUrl | Literal["*"]] = [] 74 | 75 | @property 76 | def VERBOSE(self) -> bool: 77 | """ 78 | Used for setting verbose flag in LlamaIndex modules. 79 | """ 80 | return self.LOG_LEVEL == "DEBUG" or self.IS_PULL_REQUEST or not self.RENDER 81 | 82 | @property 83 | def S3_ENDPOINT_URL(self) -> str: 84 | """ 85 | Used for setting S3 endpoint URL in the s3fs module. 86 | When running locally, this should be set to the localstack endpoint. 87 | """ 88 | return None if self.RENDER else "http://localhost:4566" 89 | 90 | @field_validator("BACKEND_CORS_ORIGINS", mode='before') 91 | def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]: 92 | if isinstance(v, str) and not v.startswith("["): 93 | return [i.strip() for i in v.split(",")] 94 | elif isinstance(v, (list, str)): 95 | return v 96 | raise ValueError(v) 97 | 98 | @field_validator("DATABASE_URL", mode='before') 99 | def assemble_db_url(cls, v: str) -> str: 100 | """Preprocesses the database URL to make it compatible with asyncpg.""" 101 | if not v or not v.startswith("postgres"): 102 | raise ValueError("Invalid database URL: " + str(v)) 103 | return ( 104 | v.replace("postgres://", "postgresql://") 105 | .replace("postgresql://", "postgresql+asyncpg://") 106 | .strip() 107 | ) 108 | 109 | @field_validator("LOG_LEVEL", mode='before') 110 | def assemble_log_level(cls, v: str) -> str: 111 | """Preprocesses the log level to ensure its validity.""" 112 | v = v.strip().upper() 113 | if v not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: 114 | raise ValueError("Invalid log level: " + str(v)) 115 | return v 116 | 117 | @field_validator("IS_PULL_REQUEST", mode='before') 118 | def assemble_is_pull_request(cls, v: str) -> bool: 119 | """Preprocesses the IS_PULL_REQUEST flag. 120 | 121 | See Render.com docs for more info: 122 | https://render.com/docs/pull-request-previews#how-pull-request-previews-work 123 | """ 124 | if isinstance(v, bool): 125 | return v 126 | return v.lower() == "true" 127 | 128 | @property 129 | def ENVIRONMENT(self) -> AppEnvironment: 130 | """Returns the app environment.""" 131 | if self.RENDER: 132 | if self.IS_PULL_REQUEST: 133 | return AppEnvironment.PREVIEW 134 | else: 135 | return AppEnvironment.PRODUCTION 136 | else: 137 | return AppEnvironment.LOCAL 138 | 139 | @property 140 | def UVICORN_WORKER_COUNT(self) -> int: 141 | if self.ENVIRONMENT == AppEnvironment.LOCAL: 142 | return 1 143 | # The recommended number of workers is (2 x $num_cores) + 1: 144 | # Source: https://docs.gunicorn.org/en/stable/design.html#how-many-workers 145 | # But the Render.com servers don't have enough memory to support that many workers, 146 | # so we instead go by the number of server instances that can be run given the memory 147 | return 3 148 | 149 | @property 150 | def SENTRY_SAMPLE_RATE(self) -> float: 151 | # TODO: before full release, set this to 0.1 for production 152 | return 0.07 if self.ENVIRONMENT == AppEnvironment.PRODUCTION else 1.0 153 | 154 | model_config = SettingsConfigDict(env_prefix="") 155 | 156 | 157 | settings = Settings() 158 | os.environ["OPENAI_API_KEY"] = settings.OPENAI_API_KEY 159 | -------------------------------------------------------------------------------- /backend/app/db/base.py: -------------------------------------------------------------------------------- 1 | # Import all the models, so that Base has them before being 2 | # imported by Alembic 3 | from app.models.base import Base # noqa 4 | from app.models.db import * # noqa 5 | -------------------------------------------------------------------------------- /backend/app/db/session.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.ext.asyncio import create_async_engine 2 | from sqlalchemy.ext.asyncio import async_sessionmaker 3 | 4 | from app.core.config import settings 5 | 6 | engine = create_async_engine( 7 | settings.DATABASE_URL, 8 | pool_pre_ping=True, 9 | pool_size=4, # Number of connections to keep open in the pool 10 | max_overflow=4, # Number of connections that can be opened beyond the pool_size 11 | pool_recycle=3600, # Recycle connections after 1 hour 12 | pool_timeout=120, # Raise an exception after 2 minutes if no connection is available from the pool 13 | ) 14 | SessionLocal = async_sessionmaker(autocommit=False, autoflush=False, bind=engine) 15 | -------------------------------------------------------------------------------- /backend/app/db/wait_for_db.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from app.db.session import SessionLocal 3 | from sqlalchemy.sql import text 4 | 5 | 6 | async def check_database_connection(max_attempts: int = 30, sleep_interval: int = 1) -> None: 7 | for attempt in range(1, max_attempts + 1): 8 | try: 9 | async with SessionLocal() as db: 10 | await db.execute(text("SELECT 1")) 11 | print(f"Connected to the database on attempt {attempt}.") 12 | return 13 | except Exception as e: 14 | print(f"Attempt {attempt}: Database is not yet available. Error: {e}") 15 | if attempt == max_attempts: 16 | raise ValueError( 17 | f"Couldn't connect to database after {max_attempts} attempts." 18 | ) from e 19 | await asyncio.sleep(sleep_interval) 20 | -------------------------------------------------------------------------------- /backend/app/llama_index_settings.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Settings 2 | from llama_index.core.settings import _Settings 3 | from llama_index.llms.openai import OpenAI 4 | from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingMode, OpenAIEmbeddingModelType 5 | from app.core.config import settings 6 | from llama_index.core.node_parser import SentenceSplitter 7 | 8 | from app.chat.constants import ( 9 | NODE_PARSER_CHUNK_OVERLAP, 10 | NODE_PARSER_CHUNK_SIZE, 11 | ) 12 | 13 | def _setup_llama_index_settings() -> _Settings: 14 | Settings.llm = OpenAI( 15 | model=settings.OPENAI_CHAT_LLM_NAME, 16 | api_key=settings.OPENAI_API_KEY 17 | ) 18 | Settings.embed_model = OpenAIEmbedding( 19 | mode=OpenAIEmbeddingMode.SIMILARITY_MODE, 20 | model_type=OpenAIEmbeddingModelType.TEXT_EMBED_3_SMALL, 21 | api_key=settings.OPENAI_API_KEY, 22 | ) 23 | Settings.node_parser = SentenceSplitter( 24 | chunk_size=NODE_PARSER_CHUNK_SIZE, 25 | chunk_overlap=NODE_PARSER_CHUNK_OVERLAP, 26 | ) 27 | return Settings 28 | -------------------------------------------------------------------------------- /backend/app/loader_io.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Response 2 | from app.core.config import settings 3 | 4 | loader_io_router = APIRouter() 5 | 6 | 7 | @loader_io_router.get("/") 8 | async def get_verification_file() -> str: 9 | """ 10 | Verification string for loader.io 11 | """ 12 | return Response(settings.LOADER_IO_VERIFICATION_STR, media_type="text/plain") 13 | -------------------------------------------------------------------------------- /backend/app/main.py: -------------------------------------------------------------------------------- 1 | from typing import cast 2 | import uvicorn 3 | import logging 4 | import sys 5 | import sentry_sdk 6 | from fastapi import FastAPI 7 | from starlette.middleware.cors import CORSMiddleware 8 | from alembic.config import Config 9 | import alembic.config 10 | from alembic import script 11 | from alembic.runtime import migration 12 | from sqlalchemy.engine import create_engine, Engine 13 | from llama_index.core.node_parser.text.utils import split_by_sentence_tokenizer 14 | import llama_index.core 15 | 16 | from app.api.api import api_router 17 | from app.db.wait_for_db import check_database_connection 18 | from app.core.config import settings, AppEnvironment 19 | from app.loader_io import loader_io_router 20 | from contextlib import asynccontextmanager 21 | from app.chat.pg_vector import get_vector_store_singleton, CustomPGVectorStore 22 | from app.llama_index_settings import _setup_llama_index_settings 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | def check_current_head(alembic_cfg: Config, connectable: Engine) -> bool: 28 | directory = script.ScriptDirectory.from_config(alembic_cfg) 29 | with connectable.begin() as connection: 30 | context = migration.MigrationContext.configure(connection) 31 | return set(context.get_current_heads()) == set(directory.get_heads()) 32 | 33 | 34 | def __setup_logging(log_level: str): 35 | log_level = getattr(logging, log_level.upper()) 36 | log_formatter = logging.Formatter( 37 | "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s" 38 | ) 39 | root_logger = logging.getLogger() 40 | root_logger.setLevel(log_level) 41 | 42 | stream_handler = logging.StreamHandler(sys.stdout) 43 | stream_handler.setFormatter(log_formatter) 44 | root_logger.addHandler(stream_handler) 45 | logger.info("Set up logging with log level %s", log_level) 46 | 47 | 48 | def __setup_sentry(): 49 | if settings.SENTRY_DSN: 50 | logger.info("Setting up Sentry") 51 | if settings.ENVIRONMENT == AppEnvironment.PRODUCTION: 52 | profiles_sample_rate = None 53 | else: 54 | profiles_sample_rate = settings.SENTRY_SAMPLE_RATE 55 | sentry_sdk.init( 56 | dsn=settings.SENTRY_DSN, 57 | environment=settings.ENVIRONMENT.value, 58 | release=settings.RENDER_GIT_COMMIT, 59 | debug=settings.VERBOSE, 60 | traces_sample_rate=settings.SENTRY_SAMPLE_RATE, 61 | profiles_sample_rate=profiles_sample_rate, 62 | ) 63 | else: 64 | logger.info("Skipping Sentry setup") 65 | 66 | 67 | @asynccontextmanager 68 | async def lifespan(app: FastAPI): 69 | # first wait for DB to be connectable 70 | await check_database_connection() 71 | cfg = Config("alembic.ini") 72 | # Change DB URL to use psycopg2 driver for this specific check 73 | db_url = settings.DATABASE_URL.replace( 74 | "postgresql+asyncpg://", "postgresql+psycopg2://" 75 | ) 76 | cfg.set_main_option("sqlalchemy.url", db_url) 77 | engine = create_engine(db_url, echo=True) 78 | if not check_current_head(cfg, engine): 79 | raise Exception( 80 | "Database is not up to date. Please run `poetry run alembic upgrade head`" 81 | ) 82 | # initialize pg vector store singleton 83 | vector_store = await get_vector_store_singleton() 84 | vector_store = cast(CustomPGVectorStore, vector_store) 85 | await vector_store.run_setup() 86 | 87 | try: 88 | # Some setup is required to initialize the llama-index sentence splitter 89 | split_by_sentence_tokenizer() 90 | except FileExistsError: 91 | # Sometimes seen in deployments, should be benign. 92 | logger.info("Tried to re-download NLTK files but already exists.") 93 | 94 | if not settings.RENDER: 95 | llama_index.core.set_global_handler("arize_phoenix") 96 | 97 | yield 98 | # This section is run on app shutdown 99 | await vector_store.close() 100 | 101 | 102 | app = FastAPI( 103 | title=settings.PROJECT_NAME, 104 | openapi_url=f"{settings.API_PREFIX}/openapi.json", 105 | lifespan=lifespan, 106 | ) 107 | 108 | 109 | if settings.BACKEND_CORS_ORIGINS: 110 | origins = settings.BACKEND_CORS_ORIGINS.copy() 111 | if settings.CODESPACES and settings.CODESPACE_NAME and \ 112 | settings.ENVIRONMENT == AppEnvironment.LOCAL: 113 | # add codespace origin if running in Github codespace 114 | origins.append(f"https://{settings.CODESPACE_NAME}-3000.app.github.dev") 115 | # allow all origins 116 | app.add_middleware( 117 | CORSMiddleware, 118 | allow_origins=[str(origin) for origin in origins], 119 | allow_origin_regex="https://llama-app-frontend.*\.vercel\.app", 120 | allow_credentials=True, 121 | allow_methods=["*"], 122 | allow_headers=["*"], 123 | ) 124 | 125 | app.include_router(api_router, prefix=settings.API_PREFIX) 126 | app.mount(f"/{settings.LOADER_IO_VERIFICATION_STR}", loader_io_router) 127 | 128 | 129 | def start(): 130 | print("Running in AppEnvironment: " + settings.ENVIRONMENT.value) 131 | __setup_logging(settings.LOG_LEVEL) 132 | __setup_sentry() 133 | _setup_llama_index_settings() 134 | """Launched with `poetry run start` at root level""" 135 | if settings.RENDER: 136 | # on render.com deployments, run migrations 137 | logger.debug("Running migrations") 138 | alembic_args = ["--raiseerr", "upgrade", "head"] 139 | alembic.config.main(argv=alembic_args) 140 | logger.debug("Migrations complete") 141 | else: 142 | logger.debug("Skipping migrations") 143 | live_reload = not settings.RENDER 144 | uvicorn.run( 145 | "app.main:app", 146 | host="0.0.0.0", 147 | port=8000, 148 | reload=live_reload, 149 | workers=settings.UVICORN_WORKER_COUNT, 150 | ) 151 | -------------------------------------------------------------------------------- /backend/app/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/models/__init__.py -------------------------------------------------------------------------------- /backend/app/models/base.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, DateTime, UUID 2 | 3 | from sqlalchemy.sql import func 4 | from sqlalchemy.ext.declarative import as_declarative, declared_attr 5 | 6 | 7 | @as_declarative() 8 | class Base: 9 | id = Column(UUID, primary_key=True, index=True, default=func.uuid_generate_v4()) 10 | created_at = Column(DateTime, server_default=func.now(), nullable=False) 11 | updated_at = Column( 12 | DateTime, server_default=func.now(), onupdate=func.now(), nullable=False 13 | ) 14 | 15 | __name__: str 16 | 17 | # Generate __tablename__ automatically 18 | @declared_attr 19 | def __tablename__(cls) -> str: 20 | return cls.__name__.lower() 21 | -------------------------------------------------------------------------------- /backend/app/models/db.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, String, Enum, ForeignKey 2 | from sqlalchemy.dialects.postgresql import UUID, ENUM, JSONB 3 | from sqlalchemy.orm import relationship 4 | from enum import Enum 5 | from llama_index.core.callbacks.schema import CBEventType 6 | from app.models.base import Base 7 | 8 | 9 | class MessageRoleEnum(str, Enum): 10 | user = "user" 11 | assistant = "assistant" 12 | 13 | 14 | class MessageStatusEnum(str, Enum): 15 | PENDING = "PENDING" 16 | SUCCESS = "SUCCESS" 17 | ERROR = "ERROR" 18 | 19 | 20 | class MessageSubProcessStatusEnum(str, Enum): 21 | PENDING = "PENDING" 22 | FINISHED = "FINISHED" 23 | 24 | 25 | # python doesn't allow enums to be extended, so we have to do this 26 | additional_message_subprocess_fields = { 27 | "CONSTRUCTED_QUERY_ENGINE": "constructed_query_engine", 28 | "SUB_QUESTIONS": "sub_questions", 29 | } 30 | MessageSubProcessSourceEnum = Enum( 31 | "MessageSubProcessSourceEnum", 32 | [(event_type.name, event_type.value) for event_type in CBEventType] 33 | + list(additional_message_subprocess_fields.items()), 34 | ) 35 | 36 | 37 | def to_pg_enum(enum_class) -> ENUM: 38 | return ENUM(enum_class, name=enum_class.__name__) 39 | 40 | 41 | class Document(Base): 42 | """ 43 | A document along with its metadata 44 | """ 45 | 46 | # URL to the actual document (e.g. a PDF) 47 | url = Column(String, nullable=False, unique=True) 48 | metadata_map = Column(JSONB, nullable=True) 49 | conversations = relationship("ConversationDocument", back_populates="document") 50 | 51 | 52 | class Conversation(Base): 53 | """ 54 | A conversation with messages and linked documents 55 | """ 56 | 57 | messages = relationship("Message", back_populates="conversation") 58 | conversation_documents = relationship( 59 | "ConversationDocument", back_populates="conversation" 60 | ) 61 | 62 | 63 | class ConversationDocument(Base): 64 | """ 65 | A many-to-many relationship between a conversation and a document 66 | """ 67 | 68 | conversation_id = Column( 69 | UUID(as_uuid=True), ForeignKey("conversation.id"), index=True 70 | ) 71 | document_id = Column(UUID(as_uuid=True), ForeignKey("document.id"), index=True) 72 | conversation = relationship("Conversation", back_populates="conversation_documents") 73 | document = relationship("Document", back_populates="conversations") 74 | 75 | 76 | class Message(Base): 77 | """ 78 | A message in a conversation 79 | """ 80 | 81 | conversation_id = Column( 82 | UUID(as_uuid=True), ForeignKey("conversation.id"), index=True 83 | ) 84 | content = Column(String) 85 | role = Column(to_pg_enum(MessageRoleEnum)) 86 | status = Column(to_pg_enum(MessageStatusEnum), default=MessageStatusEnum.PENDING) 87 | conversation = relationship("Conversation", back_populates="messages") 88 | sub_processes = relationship("MessageSubProcess", back_populates="message") 89 | 90 | 91 | class MessageSubProcess(Base): 92 | """ 93 | A record of a sub-process that occurred as part of the generation of a message from an AI assistant 94 | """ 95 | 96 | message_id = Column(UUID(as_uuid=True), ForeignKey("message.id"), index=True) 97 | source = Column(to_pg_enum(MessageSubProcessSourceEnum)) 98 | message = relationship("Message", back_populates="sub_processes") 99 | status = Column( 100 | to_pg_enum(MessageSubProcessStatusEnum), 101 | default=MessageSubProcessStatusEnum.FINISHED, 102 | nullable=False, 103 | ) 104 | metadata_map = Column(JSONB, nullable=True) 105 | -------------------------------------------------------------------------------- /backend/app/schema.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pydantic Schemas for the API 3 | """ 4 | from pydantic import BaseModel, Field, validator 5 | from enum import Enum 6 | from typing import List, Optional, Dict, Union, Any 7 | from uuid import UUID 8 | from datetime import datetime 9 | from llama_index.core.schema import BaseNode, NodeWithScore 10 | from llama_index.core.callbacks.schema import EventPayload 11 | from llama_index.core.query_engine.sub_question_query_engine import SubQuestionAnswerPair 12 | from app.models.db import ( 13 | MessageRoleEnum, 14 | MessageStatusEnum, 15 | MessageSubProcessSourceEnum, 16 | MessageSubProcessStatusEnum, 17 | ) 18 | from app.chat.constants import DB_DOC_ID_KEY 19 | 20 | 21 | def build_uuid_validator(*field_names: str): 22 | return validator(*field_names)(lambda x: str(x) if x else x) 23 | 24 | 25 | class Base(BaseModel): 26 | id: Optional[UUID] = Field(None, description="Unique identifier") 27 | created_at: Optional[datetime] = Field(None, description="Creation datetime") 28 | updated_at: Optional[datetime] = Field(None, description="Update datetime") 29 | 30 | class Config: 31 | from_attributes = True 32 | 33 | 34 | class BaseMetadataObject(BaseModel): 35 | class Config: 36 | from_attributes = True 37 | 38 | 39 | class Citation(BaseMetadataObject): 40 | document_id: UUID 41 | text: str 42 | page_number: int 43 | score: Optional[float] = None 44 | 45 | @validator("document_id") 46 | def validate_document_id(cls, value): 47 | if value: 48 | return str(value) 49 | return value 50 | 51 | @classmethod 52 | def from_node(cls, node_w_score: NodeWithScore) -> "Citation": 53 | node: BaseNode = node_w_score.node 54 | page_number = int(node.source_node.metadata["page_label"]) 55 | document_id = node.source_node.metadata[DB_DOC_ID_KEY] 56 | return cls( 57 | document_id=document_id, 58 | text=node.get_content(), 59 | page_number=page_number, 60 | score=node_w_score.score, 61 | ) 62 | 63 | 64 | class QuestionAnswerPair(BaseMetadataObject): 65 | """ 66 | A question-answer pair that is used to store the sub-questions and answers 67 | """ 68 | 69 | question: str 70 | answer: Optional[str] = None 71 | citations: Optional[List[Citation]] = None 72 | 73 | @classmethod 74 | def from_sub_question_answer_pair( 75 | cls, sub_question_answer_pair: SubQuestionAnswerPair 76 | ): 77 | if sub_question_answer_pair.sources is None: 78 | citations = None 79 | else: 80 | citations = [ 81 | Citation.from_node(node_w_score) 82 | for node_w_score in sub_question_answer_pair.sources 83 | if node_w_score.node.source_node is not None 84 | and DB_DOC_ID_KEY in node_w_score.node.source_node.metadata 85 | ] 86 | citations = citations or None 87 | return cls( 88 | question=sub_question_answer_pair.sub_q.sub_question, 89 | answer=sub_question_answer_pair.answer, 90 | citations=citations, 91 | ) 92 | 93 | 94 | # later will be Union[QuestionAnswerPair, more to add later... ] 95 | class SubProcessMetadataKeysEnum(str, Enum): 96 | SUB_QUESTION = EventPayload.SUB_QUESTION.value 97 | 98 | 99 | # keeping the typing pretty loose here, in case there are changes to the metadata data formats. 100 | SubProcessMetadataMap = Dict[Union[SubProcessMetadataKeysEnum, str], Any] 101 | 102 | 103 | class MessageSubProcess(Base): 104 | message_id: UUID 105 | source: MessageSubProcessSourceEnum 106 | status: MessageSubProcessStatusEnum 107 | metadata_map: Optional[SubProcessMetadataMap] = None 108 | 109 | 110 | class Message(Base): 111 | conversation_id: UUID 112 | content: str 113 | role: MessageRoleEnum 114 | status: MessageStatusEnum 115 | sub_processes: List[MessageSubProcess] 116 | 117 | 118 | class UserMessageCreate(BaseModel): 119 | content: str 120 | 121 | 122 | class DocumentMetadataKeysEnum(str, Enum): 123 | """ 124 | Enum for the keys of the metadata map for a document 125 | """ 126 | 127 | SEC_DOCUMENT = "sec_document" 128 | 129 | 130 | class SecDocumentTypeEnum(str, Enum): 131 | """ 132 | Enum for the type of sec document 133 | """ 134 | 135 | TEN_K = "10-K" 136 | TEN_Q = "10-Q" 137 | 138 | 139 | class SecDocumentMetadata(BaseModel): 140 | """ 141 | Metadata for a document that is a sec document 142 | """ 143 | 144 | company_name: str 145 | company_ticker: str 146 | doc_type: SecDocumentTypeEnum 147 | year: int 148 | quarter: Optional[int] = None 149 | accession_number: Optional[str] = None 150 | cik: Optional[str] = None 151 | period_of_report_date: Optional[datetime] = None 152 | filed_as_of_date: Optional[datetime] = None 153 | date_as_of_change: Optional[datetime] = None 154 | 155 | 156 | DocumentMetadataMap = Dict[Union[DocumentMetadataKeysEnum, str], Any] 157 | 158 | 159 | class Document(Base): 160 | url: str 161 | metadata_map: Optional[DocumentMetadataMap] = None 162 | 163 | 164 | class Conversation(Base): 165 | messages: List[Message] 166 | documents: List[Document] 167 | 168 | 169 | class ConversationCreate(BaseModel): 170 | document_ids: List[UUID] 171 | -------------------------------------------------------------------------------- /backend/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | llama-app-fastapi: 4 | build: 5 | context: . 6 | volumes: 7 | # allows for live reloading of the app 8 | # when the code within the ./app directory changes 9 | - ./:/app 10 | ports: 11 | - "127.0.0.1:8000:8000" 12 | depends_on: 13 | - db 14 | - phoenix 15 | env_file: 16 | - .env 17 | - .env.docker 18 | environment: 19 | BACKEND_CORS_ORIGINS: '["http://localhost", "http://localhost:8000"]' 20 | 21 | db: 22 | image: ankane/pgvector:v0.5.0 23 | environment: 24 | POSTGRES_USER: user 25 | POSTGRES_PASSWORD: password 26 | POSTGRES_DB: llama_app_db 27 | ports: 28 | - "127.0.0.1:5432:5432" 29 | volumes: 30 | - postgres_data:/var/lib/postgresql/data/ 31 | 32 | localstack: 33 | container_name: "${LOCALSTACK_DOCKER_NAME-localstack_main}" 34 | image: localstack/localstack 35 | ports: 36 | - "127.0.0.1:4566:4566" # LocalStack Gateway 37 | - "127.0.0.1:4510-4559:4510-4559" # external services port range 38 | environment: 39 | - DEBUG=${DEBUG-} 40 | - DOCKER_HOST=unix:///var/run/docker.sock 41 | volumes: 42 | - "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack" 43 | - "/var/run/docker.sock:/var/run/docker.sock" 44 | 45 | # useful for local workflow debugging 46 | # taken from here: https://docs.arize.com/phoenix/deployment/docker#postgresql 47 | phoenix: 48 | image: arizephoenix/phoenix:latest # Must be greater than 4.0 version to work 49 | ports: 50 | - 6006:6006 # PHOENIX_PORT 51 | - 4317:4317 # PHOENIX_GRPC_PORT 52 | - 9090:9090 # [Optional] PROMETHEUS PORT IF ENABLED 53 | 54 | volumes: 55 | postgres_data: 56 | -------------------------------------------------------------------------------- /backend/localstack-cors-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "CORSRules": [ 3 | { 4 | "AllowedHeaders": [ 5 | "*" 6 | ], 7 | "AllowedMethods": [ 8 | "GET", 9 | "HEAD" 10 | ], 11 | "AllowedOrigins": [ 12 | "*" 13 | ], 14 | "ExposeHeaders": [ 15 | "ETag" 16 | ] 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /backend/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "llama-app-backend" 3 | version = "0.1.0" 4 | description = "Backend for Llama App" 5 | authors = ["Sourabh Desai "] 6 | readme = "README.md" 7 | packages = [{include = "app"}] 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.11,<3.12" 11 | fastapi = "0.115.11" 12 | pydantic = "^2.9.2" 13 | uvicorn = "^0.22.0" 14 | sqlalchemy = {extras = ["async"], version = "^2.0.15"} 15 | aiosqlite = "^0.19.0" 16 | asyncpg = "^0.29.0" 17 | alembic = "^1.11.1" 18 | psycopg2 = {extras = ["binary"], version = "^2.9.6"} 19 | psycopg2-binary = "^2.9.6" 20 | sse-starlette = "^1.6.1" 21 | pypdf = "^5.3.1" 22 | anyio = "^3.7.0" 23 | s3fs = "^2023.6.0" 24 | fsspec = "^2023.6.0" 25 | pdfkit = "^1.0.0" 26 | pgvector = "^0.3.6" 27 | sentry-sdk = {extras = ["fastapi"], version = "^1.28.1"} 28 | llama-index-core = "0.12.23" 29 | polygon = "^1.2.6" 30 | polygon-api-client = "^1.14.4" 31 | nltk = "^3.8.1" 32 | cachetools = "^5.3.1" 33 | greenlet = "^2.0.2" 34 | email-validator = "^2.0.0.post2" 35 | pydantic-settings = "^2.8.1" 36 | openai = "^1.66.2" 37 | llama-index-vector-stores-postgres = "^0.4.2" 38 | llama-index-embeddings-openai = "^0.3.1" 39 | llama-index-readers-file = "^0.4.6" 40 | llama-index-llms-openai = "^0.3.25" 41 | llama-index-agent-openai = "^0.4.6" 42 | llama-index-question-gen-openai = "^0.3.0" 43 | 44 | 45 | [tool.poetry.group.dev.dependencies] 46 | pylint = "^2.17.4" 47 | pytest = "^7.3.2" 48 | sseclient-py = "^1.7.2" 49 | pdfkit = "^1.0.0" 50 | fire = "^0.5.0" 51 | sec-edgar-downloader = "~5.0" 52 | pytickersymbols = "^1.13.0" 53 | awscli-local = "^0.20" 54 | llama-index-callbacks-arize-phoenix = "^0.4.0" 55 | arize-phoenix = "^8.12.1" 56 | 57 | [tool.poetry.scripts] 58 | start = "app.main:start" 59 | migrate_db = "app.main:migrate_db" 60 | 61 | [build-system] 62 | requires = ["poetry-core"] 63 | build-backend = "poetry.core.masonry.api" 64 | -------------------------------------------------------------------------------- /backend/scripts/build_vector_tables.py: -------------------------------------------------------------------------------- 1 | from fire import Fire 2 | from app.chat.pg_vector import get_vector_store_singleton 3 | import asyncio 4 | 5 | async def build_vector_tables(): 6 | vector_store = await get_vector_store_singleton() 7 | await vector_store.run_setup() 8 | 9 | 10 | def main_build_vector_tables(): 11 | """ 12 | Script to build the PGVector table if they don't already exist 13 | """ 14 | asyncio.run(build_vector_tables()) 15 | 16 | if __name__ == "__main__": 17 | Fire(main_build_vector_tables) 18 | -------------------------------------------------------------------------------- /backend/scripts/chat_llama.py: -------------------------------------------------------------------------------- 1 | import cmd 2 | import requests 3 | from sseclient import SSEClient 4 | import json 5 | import random 6 | from urllib.parse import quote 7 | 8 | 9 | def sse_with_requests(url, headers) -> requests.Response: 10 | """Get a streaming response for the given event feed using requests.""" 11 | return requests.get(url, stream=True, headers=headers) 12 | 13 | 14 | class DocumentPickerCmd(cmd.Cmd): 15 | prompt = "(Pick📄) " 16 | 17 | def __init__(self, base_url): 18 | super().__init__() 19 | self.base_url = base_url 20 | self.documents = None 21 | self.selected_documents = [] 22 | 23 | def do_fetch(self, args): 24 | "Get 5 documents: fetch" 25 | response = requests.get(f"{self.base_url}/api/document/") 26 | if response.status_code == 200: 27 | self.documents = random.choices(response.json(), k=5) 28 | for idx, doc in enumerate(self.documents): 29 | print(f"[{idx}]: {doc['url']}") 30 | else: 31 | print(f"Error: {response.text}") 32 | 33 | def do_select(self, document_idx): 34 | "Select a document by its index: select " 35 | if self.documents is None: 36 | print("Please fetch documents first: fetch") 37 | return 38 | try: 39 | idx = int(document_idx) 40 | if idx < len(self.documents): 41 | self.selected_documents.append(self.documents[idx]) 42 | print(f"Selected document: {self.documents[idx]['url']}") 43 | else: 44 | print("Invalid index. Use the GET command to view available documents.") 45 | except ValueError: 46 | print("Invalid index. Please enter a number.") 47 | 48 | def do_select_id(self, document_id): 49 | "Select a document by it's ID" 50 | if not document_id: 51 | print("Please enter a valid document ID") 52 | else: 53 | self.selected_documents.append({"id": document_id}) 54 | print(f"Selected document ID {document_id}") 55 | 56 | def do_finish(self, args): 57 | "Finish the document selection process: FINISH" 58 | if len(self.selected_documents) > 0: 59 | return True 60 | else: 61 | print("No documents selected. Use the SELECT command to select documents.") 62 | 63 | def do_quit(self, args): 64 | "Quits the program." 65 | print("Quitting document picker.") 66 | raise SystemExit 67 | 68 | 69 | class ConversationCmd(cmd.Cmd): 70 | prompt = "(Chat🦙) " 71 | 72 | def __init__(self, base_url): 73 | super().__init__() 74 | self.base_url = base_url 75 | self.conversation_id = None 76 | self.document_ids = [] 77 | 78 | def do_pick_docs(self, args): 79 | "Pick documents for the new conversation: pick_docs" 80 | picker = DocumentPickerCmd(self.base_url) 81 | try: 82 | picker.cmdloop() 83 | except KeyboardInterrupt: 84 | picker.do_quit("") 85 | except Exception as e: 86 | print(e) 87 | picker.do_quit("") 88 | self.document_ids = [doc["id"] for doc in picker.selected_documents] 89 | 90 | def do_create(self, args): 91 | "Create a new conversation: CREATE" 92 | req_body = {"document_ids": self.document_ids} 93 | response = requests.post(f"{self.base_url}/api/conversation/", json=req_body) 94 | if response.status_code == 200: 95 | self.conversation_id = response.json()["id"] 96 | print(f"Created conversation with ID {self.conversation_id}") 97 | else: 98 | print(f"Error: {response.text}") 99 | 100 | def do_detail(self, args): 101 | "Get the details of the current conversation: DETAIL" 102 | if not self.conversation_id: 103 | print("No active conversation. Use CREATE to start a new conversation.") 104 | return 105 | response = requests.get( 106 | f"{self.base_url}/api/conversation/{self.conversation_id}" 107 | ) 108 | if response.status_code == 200: 109 | print(json.dumps(response.json(), indent=4)) 110 | else: 111 | print(f"Error: {response.text}") 112 | 113 | def do_delete(self, args): 114 | "Delete the current conversation: DELETE" 115 | if not self.conversation_id: 116 | print("No active conversation to delete.") 117 | return 118 | response = requests.delete( 119 | f"{self.base_url}/api/conversation/{self.conversation_id}" 120 | ) 121 | if response.status_code == 204: 122 | print(f"Deleted conversation with ID {self.conversation_id}") 123 | self.conversation_id = None 124 | else: 125 | print(f"Error: {response.text}") 126 | 127 | def do_message(self, message): 128 | "Send a user message to the current conversation and get back the AI's response: MESSAGE " 129 | if not self.conversation_id: 130 | print("No active conversation. Use CREATE to start a new conversation.") 131 | return 132 | message = quote(message.strip()) # URI encode the message 133 | url = f"{self.base_url}/api/conversation/{self.conversation_id}/message?user_message={message}" 134 | headers = {"Accept": "text/event-stream"} 135 | response = sse_with_requests(url, headers) 136 | messages = SSEClient(response).events() 137 | message_idx = 0 138 | final_message = None 139 | for msg in messages: 140 | print(f"\n\n=== Message {message_idx} ===") 141 | msg_json = json.loads(msg.data) 142 | print(msg_json) 143 | final_message = msg_json.get("content") 144 | message_idx += 1 145 | 146 | if final_message is not None: 147 | print(f"\n\n====== Final Message ======") 148 | print(final_message) 149 | 150 | def do_quit(self, args): 151 | "Quits the program." 152 | print("Quitting.") 153 | raise SystemExit 154 | 155 | 156 | if __name__ == "__main__": 157 | import argparse 158 | 159 | parser = argparse.ArgumentParser(description="Start the chat terminal.") 160 | parser.add_argument( 161 | "--base_url", 162 | type=str, 163 | default="http://localhost:8000", 164 | help="an optional base url for the API endpoints", 165 | ) 166 | args = parser.parse_args() 167 | 168 | cmd = ConversationCmd(args.base_url) 169 | try: 170 | cmd.cmdloop() 171 | except KeyboardInterrupt: 172 | cmd.do_quit("") 173 | except Exception as e: 174 | print(e) 175 | cmd.do_quit("") 176 | -------------------------------------------------------------------------------- /backend/scripts/dedupe_vector_store.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from fire import Fire 3 | from sqlalchemy import text 4 | from app.db.session import SessionLocal 5 | 6 | 7 | async def _async_dedupe_vectore_store(dry_run: bool = False): 8 | async with SessionLocal() as db: 9 | try: 10 | common_table_expression = """ 11 | WITH cte AS ( 12 | SELECT 13 | max(id) as max_id, 14 | text, 15 | (metadata_ ->> 'page_label'):: text as page_label, 16 | (metadata_ ->> 'db_document_id'):: text as db_document_id 17 | FROM 18 | data_pg_vector_store 19 | GROUP BY 20 | text, 21 | page_label, 22 | db_document_id 23 | ) 24 | """ 25 | # Count rows that would be deleted 26 | stmt = text( 27 | f""" 28 | {common_table_expression} 29 | SELECT COUNT(id) FROM data_pg_vector_store WHERE id NOT IN (SELECT max_id FROM cte); 30 | """ 31 | ) 32 | result = await db.execute(stmt) 33 | num_duplicate_rows = result.scalar() 34 | 35 | num_rows = ( 36 | await db.execute(text("SELECT COUNT(*) FROM data_pg_vector_store")) 37 | ).scalar() 38 | 39 | print(f"{num_duplicate_rows} duplicate rows found out of {num_rows} total.") 40 | print( 41 | f"{num_rows - num_duplicate_rows} rows would be remaining if deleted." 42 | ) 43 | if dry_run or num_duplicate_rows == 0: 44 | return 45 | 46 | # Ask for confirmation before deleting rows 47 | confirmation = input("Do you want to delete these rows? (y/n) ") 48 | if confirmation.lower() != "y": 49 | print("Aborted.") 50 | return 51 | 52 | # Delete the rows 53 | delete_stmt = text( 54 | f""" 55 | {common_table_expression} 56 | DELETE FROM data_pg_vector_store WHERE id NOT IN (SELECT max_id FROM cte); 57 | """ 58 | ) 59 | await db.execute(delete_stmt) 60 | await db.commit() # Explicitly commit the transaction 61 | print(f"{num_duplicate_rows} rows have been deleted.") 62 | except Exception as e: 63 | print(f"An error occurred: {e}") 64 | 65 | 66 | def dedupe_vectore_store(dry_run: bool = False): 67 | """ 68 | Deduplicate the vector store. 69 | 70 | :param dry_run: If True, do not commit changes to the database. 71 | """ 72 | asyncio.run(_async_dedupe_vectore_store(dry_run=dry_run)) 73 | 74 | 75 | if __name__ == "__main__": 76 | Fire(dedupe_vectore_store) 77 | -------------------------------------------------------------------------------- /backend/scripts/download_sec_pdf.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Optional 3 | 4 | import pdfkit 5 | from file_utils import filing_exists 6 | from fire import Fire 7 | from sec_edgar_downloader import Downloader 8 | from distutils.spawn import find_executable 9 | from tqdm.contrib.itertools import product 10 | from app.core.config import settings 11 | 12 | DEFAULT_OUTPUT_DIR = "data/" 13 | # You can lookup the CIK for a company here: https://www.sec.gov/edgar/searchedgar/companysearch 14 | DEFAULT_CIKS = [ 15 | # AAPL 16 | "320193", 17 | # MSFT 18 | "789019", 19 | # AMZN 20 | "0001018724", 21 | # GOOGL 22 | "1652044", 23 | # META 24 | "1326801", 25 | # TSLA 26 | "1318605", 27 | # NVDA 28 | "1045810", 29 | # NFLX 30 | "1065280", 31 | # PYPL 32 | "0001633917", 33 | # PFE (Pfizer) 34 | "78003", 35 | # AZNCF (AstraZeneca) 36 | "901832", 37 | # LLY (Eli Lilly) 38 | "59478", 39 | # MRNA (Moderna) 40 | "1682852", 41 | # JNJ (Johnson & Johnson) 42 | "200406", 43 | ] 44 | DEFAULT_FILING_TYPES = [ 45 | "10-K", 46 | "10-Q", 47 | ] 48 | 49 | 50 | def _download_filing( 51 | cik: str, filing_type: str, output_dir: str, limit=None, before=None, after=None 52 | ): 53 | dl = Downloader(settings.SEC_EDGAR_COMPANY_NAME, settings.SEC_EDGAR_EMAIL, output_dir) 54 | dl.get(filing_type, cik, limit=limit, before=before, after=after, download_details=True) 55 | 56 | 57 | def _convert_to_pdf(output_dir: str): 58 | """Converts all html files in a directory to pdf files.""" 59 | 60 | # NOTE: directory structure is assumed to be: 61 | # output_dir 62 | # ├── sec-edgar-filings 63 | # │ ├── AAPL 64 | # │ │ ├── 10-K 65 | # │ │ │ ├── 0000320193-20-000096 66 | # │ │ │ │ ├── primary-document.html 67 | # │ │ │ │ ├── primary-document.pdf <-- this is what we want 68 | 69 | data_dir = Path(output_dir) / "sec-edgar-filings" 70 | 71 | for cik_dir in data_dir.iterdir(): 72 | for filing_type_dir in cik_dir.iterdir(): 73 | for filing_dir in filing_type_dir.iterdir(): 74 | filing_doc = filing_dir / "primary-document.html" 75 | filing_pdf = filing_dir / "primary-document.pdf" 76 | if filing_doc.exists() and not filing_pdf.exists(): 77 | print("- Converting {}".format(filing_doc)) 78 | input_path = str(filing_doc.absolute()) 79 | output_path = str(filing_pdf.absolute()) 80 | try: 81 | # fix for issue here: 82 | # https://github.com/wkhtmltopdf/wkhtmltopdf/issues/4460#issuecomment-661345113 83 | options = {'enable-local-file-access': None} 84 | pdfkit.from_file(input_path, output_path, options=options, verbose=True) 85 | except Exception as e: 86 | print(f"Error converting {input_path} to {output_path}: {e}") 87 | 88 | 89 | def main( 90 | output_dir: str = DEFAULT_OUTPUT_DIR, 91 | ciks: List[str] = DEFAULT_CIKS, 92 | file_types: List[str] = DEFAULT_FILING_TYPES, 93 | before: Optional[str] = None, 94 | after: Optional[str] = None, 95 | limit: Optional[int] = 3, 96 | convert_to_pdf: bool = True, 97 | ): 98 | print('Downloading filings to "{}"'.format(Path(output_dir).absolute())) 99 | print("File Types: {}".format(file_types)) 100 | if convert_to_pdf: 101 | if find_executable("wkhtmltopdf") is None: 102 | raise Exception( 103 | "ERROR: wkhtmltopdf (https://wkhtmltopdf.org/) not found, " 104 | "please install it to convert html to pdf " 105 | "`sudo apt-get install wkhtmltopdf`" 106 | ) 107 | for symbol, file_type in product(ciks, file_types): 108 | try: 109 | if filing_exists(symbol, file_type, output_dir): 110 | print(f"- Filing for {symbol} {file_type} already exists, skipping") 111 | else: 112 | print(f"- Downloading filing for {symbol} {file_type}") 113 | _download_filing(symbol, file_type, output_dir, limit, before, after) 114 | except Exception as e: 115 | print( 116 | f"Error downloading filing for symbol={symbol} & file_type={file_type}: {e}" 117 | ) 118 | 119 | if convert_to_pdf: 120 | print("Converting html files to pdf files") 121 | _convert_to_pdf(output_dir) 122 | 123 | 124 | if __name__ == "__main__": 125 | Fire(main) 126 | -------------------------------------------------------------------------------- /backend/scripts/file_utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Optional, Tuple 3 | import datetime 4 | 5 | import pandas as pd 6 | from pydantic import BaseModel 7 | 8 | 9 | class Filing(BaseModel): 10 | file_path: str 11 | symbol: str 12 | filing_type: str 13 | year: int 14 | quarter: Optional[int] = None 15 | cik: str 16 | accession_number: str 17 | period_of_report_date: datetime.datetime 18 | filed_as_of_date: datetime.datetime 19 | date_as_of_change: datetime.datetime 20 | 21 | 22 | def filing_exists(cik: str, filing_type: str, output_dir: str) -> bool: 23 | """Checks if a filing exists for a given cik and filing type.""" 24 | data_dir = Path(output_dir) / "sec-edgar-filings" 25 | filing_dir = data_dir / cik / filing_type 26 | return filing_dir.exists() 27 | 28 | 29 | def parse_quarter_from_full_submission_txt(full_submission_txt_file_path: Path) -> int: 30 | """ 31 | The full-submission.txt file contains a like like the following line: 32 | Document Fiscal Period Focus 33 | Q1 34 | 35 | This method parses the quarter from that second line 36 | """ 37 | with open(full_submission_txt_file_path) as f: 38 | try: 39 | line = next(f) 40 | while "Document Fiscal Period Focus" not in line: 41 | line = next(f) 42 | quarter_line = next(f) 43 | quarter_line = quarter_line.split(">")[1].split("<")[0] 44 | quarter = quarter_line.strip("Q ") 45 | return int(quarter) 46 | except StopIteration: 47 | raise ValueError( 48 | f"Could not find Document Fiscal Period Focus in file {full_submission_txt_file_path}" 49 | ) 50 | 51 | 52 | def get_line_with_substring_in_file(file_path: Path, substring: str) -> str: 53 | """Returns the first line in a file that contains a given substring.""" 54 | with open(file_path) as f: 55 | for line in f: 56 | if substring in line: 57 | return line 58 | raise ValueError(f"Could not find substring '{substring}' in file {file_path}") 59 | 60 | 61 | def parse_dates_from_full_submission_txt( 62 | full_submission_txt_file_path: Path, 63 | ) -> Tuple[datetime.datetime, datetime.datetime, datetime.datetime]: 64 | period_of_report_line = get_line_with_substring_in_file( 65 | full_submission_txt_file_path, "CONFORMED PERIOD OF REPORT:" 66 | ) 67 | period_of_report_line = period_of_report_line.split(":")[1].strip() 68 | # Example value for date format: 20220930 69 | period_of_report_date = datetime.datetime.strptime( 70 | period_of_report_line.strip(), "%Y%m%d" 71 | ) 72 | 73 | filed_as_of_date_line = get_line_with_substring_in_file( 74 | full_submission_txt_file_path, "FILED AS OF DATE:" 75 | ) 76 | filed_as_of_date_line = filed_as_of_date_line.split(":")[1].strip() 77 | filed_as_of_date = datetime.datetime.strptime( 78 | filed_as_of_date_line.strip(), "%Y%m%d" 79 | ) 80 | 81 | date_as_of_change_line = get_line_with_substring_in_file( 82 | full_submission_txt_file_path, "DATE AS OF CHANGE:" 83 | ) 84 | date_as_of_change_line = date_as_of_change_line.split(":")[1].strip() 85 | date_as_of_change = datetime.datetime.strptime( 86 | date_as_of_change_line.strip(), "%Y%m%d" 87 | ) 88 | return period_of_report_date, filed_as_of_date, date_as_of_change 89 | 90 | 91 | def parse_cik_from_full_submission_txt( 92 | full_submission_txt_file_path: Path, 93 | ) -> str: 94 | cik_line = get_line_with_substring_in_file( 95 | full_submission_txt_file_path, "CENTRAL INDEX KEY:" 96 | ) 97 | cik_line = cik_line.split(":")[1].strip() 98 | return cik_line 99 | 100 | 101 | def parse_ticker_symbol_from_full_submission_txt( 102 | full_submission_txt_file_path: Path, 103 | ) -> str: 104 | """ 105 | Very hacky approach to parsing the ticker symbol from the full-submission.txt file. 106 | The file usually has a line that reads something like "amzn-20220930.htm" 107 | We can extract "amzn" from that line. 108 | """ 109 | ticker_symbol_line = get_line_with_substring_in_file( 110 | full_submission_txt_file_path, "" 111 | ) 112 | ticker_symbol_line = ticker_symbol_line.split("")[1].strip() 113 | ticker_symbol = ticker_symbol_line.split("-")[0].strip() 114 | return ticker_symbol.upper() 115 | 116 | 117 | def get_available_filings(output_dir: str) -> List[Filing]: 118 | data_dir = Path(output_dir) / "sec-edgar-filings" 119 | filings = [] 120 | for cik_dir in data_dir.iterdir(): 121 | for filing_type_dir in cik_dir.iterdir(): 122 | for filing_dir in filing_type_dir.iterdir(): 123 | filing_pdf = filing_dir / "primary-document.pdf" 124 | full_submission_txt = filing_dir / "full-submission.txt" 125 | if filing_pdf.exists(): 126 | filing_type = filing_type_dir.name 127 | file_path = str(filing_pdf.absolute()) 128 | quarter = None 129 | assert full_submission_txt.exists() 130 | if filing_type == "10-Q": 131 | quarter = parse_quarter_from_full_submission_txt( 132 | full_submission_txt 133 | ) 134 | ( 135 | period_of_report_date, 136 | filed_as_of_date, 137 | date_as_of_change, 138 | ) = parse_dates_from_full_submission_txt(full_submission_txt) 139 | accession_number = filing_dir.name.strip() 140 | cik = parse_cik_from_full_submission_txt(full_submission_txt) 141 | symbol = parse_ticker_symbol_from_full_submission_txt( 142 | full_submission_txt 143 | ) 144 | filing = Filing( 145 | file_path=file_path, 146 | symbol=symbol, 147 | filing_type=filing_type, 148 | year=period_of_report_date.year, 149 | quarter=quarter, 150 | accession_number=accession_number, 151 | cik=cik, 152 | period_of_report_date=period_of_report_date, 153 | filed_as_of_date=filed_as_of_date, 154 | date_as_of_change=date_as_of_change, 155 | ) 156 | filings.append(filing) 157 | return filings 158 | 159 | 160 | def get_available_filings_as_df(output_dir: str) -> pd.DataFrame: 161 | filings = get_available_filings(output_dir) 162 | return pd.DataFrame([filing.dict() for filing in filings]) 163 | -------------------------------------------------------------------------------- /backend/scripts/seed_db.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import asyncio 3 | from tempfile import TemporaryDirectory 4 | from pathlib import Path 5 | from fire import Fire 6 | import s3fs 7 | from app.core.config import settings 8 | import upsert_db_sec_documents 9 | import download_sec_pdf 10 | from download_sec_pdf import DEFAULT_CIKS, DEFAULT_FILING_TYPES 11 | import seed_storage_context 12 | 13 | 14 | def copy_to_s3(dir_path: str, s3_bucket: str = settings.S3_ASSET_BUCKET_NAME): 15 | """ 16 | Copy all files in dir_path to S3. 17 | """ 18 | s3 = s3fs.S3FileSystem( 19 | key=settings.AWS_KEY, 20 | secret=settings.AWS_SECRET, 21 | endpoint_url=settings.S3_ENDPOINT_URL, 22 | ) 23 | 24 | if not (settings.RENDER or s3.exists(s3_bucket)): 25 | s3.mkdir(s3_bucket) 26 | 27 | s3.put(dir_path, s3_bucket, recursive=True) 28 | 29 | 30 | async def async_seed_db( 31 | ciks: List[str] = DEFAULT_CIKS, filing_types: List[str] = DEFAULT_FILING_TYPES 32 | ): 33 | with TemporaryDirectory() as temp_dir: 34 | print("Downloading SEC filings") 35 | download_sec_pdf.main( 36 | output_dir=temp_dir, 37 | ciks=ciks, 38 | file_types=filing_types, 39 | ) 40 | 41 | print("Copying downloaded SEC filings to S3") 42 | copy_to_s3(str(Path(temp_dir) / "sec-edgar-filings")) 43 | 44 | print("Upserting records of downloaded SEC filings into database") 45 | await upsert_db_sec_documents.async_upsert_documents_from_filings( 46 | url_base=settings.CDN_BASE_URL, 47 | doc_dir=temp_dir, 48 | ) 49 | 50 | print("Seeding storage context") 51 | await seed_storage_context.async_main_seed_storage_context() 52 | print( 53 | """ 54 | Done! 🏁 55 | \t- SEC PDF documents uploaded to the S3 assets bucket ✅ 56 | \t- Documents database table has been populated ✅ 57 | \t- Vector storage table has been seeded with embeddings ✅ 58 | """.strip() 59 | ) 60 | 61 | 62 | def seed_db( 63 | ciks: List[str] = DEFAULT_CIKS, filing_types: List[str] = DEFAULT_FILING_TYPES 64 | ): 65 | asyncio.run(async_seed_db(ciks, filing_types)) 66 | 67 | 68 | if __name__ == "__main__": 69 | Fire(seed_db) 70 | -------------------------------------------------------------------------------- /backend/scripts/seed_storage_context.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | from fire import Fire 3 | import asyncio 4 | from app.db.session import SessionLocal 5 | from app.api import crud 6 | from llama_index.core.callbacks import CallbackManager 7 | from app.chat.engine import ( 8 | build_doc_id_to_index_map, 9 | get_s3_fs, 10 | ) 11 | 12 | 13 | async def async_main_seed_storage_context(): 14 | fs = get_s3_fs() 15 | async with SessionLocal() as db: 16 | docs = await crud.fetch_documents(db) 17 | callback_manager = CallbackManager([]) 18 | for doc in tqdm(docs, desc="Seeding storage with DB documents"): 19 | await build_doc_id_to_index_map(callback_manager, [doc], fs=fs) 20 | 21 | 22 | def main_seed_storage_context(): 23 | asyncio.run(async_main_seed_storage_context()) 24 | 25 | 26 | if __name__ == "__main__": 27 | Fire(main_seed_storage_context) 28 | -------------------------------------------------------------------------------- /backend/scripts/stock_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional 2 | 3 | from pydantic import BaseModel, ValidationError 4 | from pytickersymbols import PyTickerSymbols 5 | 6 | DEFAULT_INDICES = ["DOW JONES", "S&P 500", "NASDAQ 100"] 7 | 8 | 9 | class Stock(BaseModel): 10 | name: str 11 | symbol: str 12 | indices: List[str] 13 | 14 | 15 | def _parse_stock(stock: dict) -> Optional[Stock]: 16 | try: 17 | return Stock( 18 | name=stock["name"], 19 | symbol=stock["symbol"], 20 | indices=stock["indices"], 21 | ) 22 | except ValidationError: 23 | return None 24 | 25 | 26 | def get_stocks(indices: List[str] = DEFAULT_INDICES) -> List[Stock]: 27 | stock_data = PyTickerSymbols() 28 | if indices: 29 | # get stocks for given indices 30 | all_stocks = [] 31 | for index in indices: 32 | stocks = stock_data.get_stocks_by_index(index) 33 | all_stocks.extend(stocks) 34 | else: 35 | # get stocks for all indices 36 | all_stocks = stock_data.get_all_stocks() 37 | 38 | stocks = [_parse_stock(stock) for stock in all_stocks] 39 | return list(filter(None, stocks)) 40 | 41 | 42 | def get_stocks_by_symbol(indices: List[str] = DEFAULT_INDICES) -> Dict[str, Stock]: 43 | stocks = get_stocks(indices) 44 | return {stock.symbol: stock for stock in stocks} 45 | -------------------------------------------------------------------------------- /backend/scripts/upsert_db_sec_documents.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from fire import Fire 3 | from tqdm import tqdm 4 | import asyncio 5 | from pytickersymbols import PyTickerSymbols 6 | from file_utils import get_available_filings, Filing 7 | from stock_utils import get_stocks_by_symbol, Stock 8 | from fastapi.encoders import jsonable_encoder 9 | from app.models.db import Document 10 | from app.schema import ( 11 | SecDocumentMetadata, 12 | DocumentMetadataMap, 13 | DocumentMetadataKeysEnum, 14 | SecDocumentTypeEnum, 15 | Document, 16 | ) 17 | from app.db.session import SessionLocal 18 | from app.api import crud 19 | 20 | DEFAULT_URL_BASE = "https://dl94gqvzlh4k8.cloudfront.net" 21 | DEFAULT_DOC_DIR = "data/" 22 | 23 | 24 | async def upsert_document(doc_dir: str, stock: Stock, filing: Filing, url_base: str): 25 | # construct a string for just the document's sub-path after the doc_dir 26 | # e.g. "sec-edgar-filings/AAPL/10-K/0000320193-20-000096/primary-document.pdf" 27 | doc_path = Path(filing.file_path).relative_to(doc_dir) 28 | url_path = url_base.rstrip("/") + "/" + str(doc_path).lstrip("/") 29 | doc_type = ( 30 | SecDocumentTypeEnum.TEN_K 31 | if filing.filing_type == "10-K" 32 | else SecDocumentTypeEnum.TEN_Q 33 | ) 34 | sec_doc_metadata = SecDocumentMetadata( 35 | company_name=stock.name, 36 | company_ticker=stock.symbol, 37 | doc_type=doc_type, 38 | year=filing.year, 39 | quarter=filing.quarter, 40 | accession_number=filing.accession_number, 41 | cik=filing.cik, 42 | period_of_report_date=filing.period_of_report_date, 43 | filed_as_of_date=filing.filed_as_of_date, 44 | date_as_of_change=filing.date_as_of_change, 45 | ) 46 | metadata_map: DocumentMetadataMap = { 47 | DocumentMetadataKeysEnum.SEC_DOCUMENT: jsonable_encoder( 48 | sec_doc_metadata.dict(exclude_none=True) 49 | ) 50 | } 51 | doc = Document(url=str(url_path), metadata_map=metadata_map) 52 | async with SessionLocal() as db: 53 | await crud.upsert_document_by_url(db, doc) 54 | 55 | 56 | async def async_upsert_documents_from_filings(url_base: str, doc_dir: str): 57 | """ 58 | Upserts SEC documents into the database based on what has been downloaded to the filesystem. 59 | """ 60 | filings = get_available_filings(doc_dir) 61 | stocks_data = PyTickerSymbols() 62 | stocks_dict = get_stocks_by_symbol(stocks_data.get_all_indices()) 63 | for filing in tqdm(filings, desc="Upserting docs from filings"): 64 | if filing.symbol not in stocks_dict: 65 | print(f"Symbol {filing.symbol} not found in stocks_dict. Skipping.") 66 | continue 67 | stock = stocks_dict[filing.symbol] 68 | await upsert_document(doc_dir, stock, filing, url_base) 69 | 70 | 71 | def main_upsert_documents_from_filings( 72 | url_base: str = DEFAULT_URL_BASE, doc_dir: str = DEFAULT_DOC_DIR 73 | ): 74 | """ 75 | Upserts SEC documents into the database based on what has been downloaded to the filesystem. 76 | """ 77 | 78 | asyncio.run(async_upsert_documents_from_filings(url_base, doc_dir)) 79 | 80 | 81 | if __name__ == "__main__": 82 | Fire(main_upsert_documents_from_filings) 83 | -------------------------------------------------------------------------------- /backend/scripts/upsert_document.py: -------------------------------------------------------------------------------- 1 | from fire import Fire 2 | from app.schema import Document 3 | from app.db.session import SessionLocal 4 | from app.api import crud 5 | import asyncio 6 | 7 | async def upsert_single_document(doc_url: str): 8 | """ 9 | Upserts a single SEC document into the database using its URL. 10 | """ 11 | if not doc_url or not doc_url.startswith('http'): 12 | print("DOC_URL must be an http(s) based url value") 13 | return 14 | metadata_map = {} 15 | doc = Document(url=doc_url, metadata_map=metadata_map) 16 | 17 | async with SessionLocal() as db: 18 | document = await crud.upsert_document_by_url(db, doc) 19 | print(f"Upserted document. Database ID:\n{document.id}") 20 | 21 | 22 | def main_upsert_single_document(doc_url: str): 23 | """ 24 | Script to upsert a single document by URL. metada_map parameter will be empty dict ({}) 25 | This script is useful when trying to use your own PDF files. 26 | """ 27 | asyncio.run(upsert_single_document(doc_url)) 28 | 29 | if __name__ == "__main__": 30 | Fire(main_upsert_single_document) 31 | -------------------------------------------------------------------------------- /backend/troubleshooting.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | This file contains some solutions to common questions or pitfalls that may come up during development with this project. 3 | 4 | ## I'm seeing this error `pydantic.error_wrappers.ValidationError: 8 validation errors for Settings` 5 | You may have just run a command from the `Makefile` like `make migrate` and seen an error output like this: 6 | 7 | ``` 8 | pydantic.error_wrappers.ValidationError: 8 validation errors for Settings 9 | OPENAI_API_KEY 10 | field required (type=value_error.missing) 11 | AWS_KEY 12 | field required (type=value_error.missing) 13 | AWS_SECRET 14 | field required (type=value_error.missing) 15 | POLYGON_IO_API_KEY 16 | field required (type=value_error.missing) 17 | DATABASE_URL 18 | field required (type=value_error.missing) 19 | S3_BUCKET_NAME 20 | field required (type=value_error.missing) 21 | S3_ASSET_BUCKET_NAME 22 | field required (type=value_error.missing) 23 | CDN_BASE_URL 24 | field required (type=value_error.missing) 25 | make: *** [migrate] Error 1 26 | ``` 27 | 28 | This happens when you haven't set all the environment variables in your shell environment. 29 | You can remedy this quickly by doing the following: 30 | 1. Create a `.env` file and source it. 31 | - The `.env.development` file is a good template so you can just do `cp .env.development .env` 32 | 1. `set -a` 33 | 1. `source .env` 34 | 35 | -------------------------------------------------------------------------------- /frontend/.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "image": "mcr.microsoft.com/devcontainers/universal:2", 3 | "features": { 4 | "ghcr.io/devcontainers/features/node:1": {}, 5 | "ghcr.io/devcontainers-contrib/features/typescript:2": {} 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /frontend/.env.example: -------------------------------------------------------------------------------- 1 | # Since the ".env" file is gitignored, you can use the ".env.example" file to 2 | # build a new ".env" file when you clone the repo. Keep this file up-to-date 3 | # when you add new variables to `.env`. 4 | 5 | # This file will be committed to version control, so make sure not to have any 6 | # secrets in it. If you are cloning this repo, create a copy of this file named 7 | # ".env" and populate it with your secrets. 8 | 9 | # When adding additional environment variables, the schema in "/src/env.mjs" 10 | # should be updated accordingly. 11 | 12 | # Example: 13 | # SERVERVAR="foo" 14 | # NEXT_PUBLIC_CLIENTVAR="bar" 15 | NEXT_PUBLIC_BACKEND_URL=http://localhost:8000/ 16 | -------------------------------------------------------------------------------- /frontend/.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | // eslint-disable-next-line @typescript-eslint/no-var-requires 2 | const path = require("path"); 3 | 4 | /** @type {import("eslint").Linter.Config} */ 5 | const config = { 6 | overrides: [ 7 | { 8 | extends: [ 9 | "plugin:@typescript-eslint/recommended-requiring-type-checking", 10 | ], 11 | files: ["*.ts", "*.tsx"], 12 | parserOptions: { 13 | project: path.join(__dirname, "tsconfig.json"), 14 | }, 15 | }, 16 | ], 17 | parser: "@typescript-eslint/parser", 18 | parserOptions: { 19 | project: path.join(__dirname, "tsconfig.json"), 20 | }, 21 | plugins: ["@typescript-eslint"], 22 | extends: ["next/core-web-vitals", "plugin:@typescript-eslint/recommended"], 23 | rules: { 24 | "@typescript-eslint/consistent-type-imports": [ 25 | "warn", 26 | { 27 | prefer: "type-imports", 28 | fixStyle: "inline-type-imports", 29 | }, 30 | ], 31 | "@typescript-eslint/no-unused-vars": ["warn", { argsIgnorePattern: "^_" }], 32 | }, 33 | }; 34 | 35 | module.exports = config; 36 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # database 12 | /prisma/db.sqlite 13 | /prisma/db.sqlite-journal 14 | 15 | # next.js 16 | /.next/ 17 | /out/ 18 | next-env.d.ts 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # local env files 34 | # do not commit any .env files to git, except for the .env.example file. https://create.t3.gg/en/usage/env-variables#using-environment-variables 35 | .env 36 | .env*.local 37 | 38 | # vercel 39 | .vercel 40 | 41 | # typescript 42 | *.tsbuildinfo 43 | 44 | # Sentry Auth Token 45 | .sentryclirc 46 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # SEC Insights Frontend 2 | 3 | This is SEC Insights, a tool that let's you analyze multiple financial documents, powered by LlamaIndex. [Live URL](https://secinsights.ai/) 4 | 5 | ## Technical Details 6 | 7 | Built with `next.js`, `tailwindcss`, and `typescript react`, based on the [T3 starter kit](https://create.t3.gg/en/usage/next-js). 8 | 9 | ## Architecture 10 | 11 | This app consists of two main routes, 12 | 13 | 1. `/`, located in `src/pages/index.tsx`. This route is the landing page, and consists of the document selector and a marketing section. 14 | 2. `/conversation/{conversation_id}`, located in `src/pages/conversation/[id].tsx` This page consists of the chat window on the left hand side, and the pdf viewer on the right hand side. 15 | 16 | - PDFs are rendered using `react-pdf`; a single pdf is rendered by the `VirtualizedPdf.tsx` component 17 | - The Chat component is located in `RenderConversations.tsx` 18 | 19 | ## How to develop locally 20 | 21 | 1. `npm i` 22 | 2. `npm run dev` 23 | 24 | 3. And before pushing to the repo, `npm run build` to catch any typescript errors (TODO: pre-commit hook) 25 | 26 | Follow our deployment guides for [Vercel](https://create.t3.gg/en/deployment/vercel), [Netlify](https://create.t3.gg/en/deployment/netlify) and [Docker](https://create.t3.gg/en/deployment/docker) for more information. 27 | -------------------------------------------------------------------------------- /frontend/next.config.mjs: -------------------------------------------------------------------------------- 1 | import {withSentryConfig} from "@sentry/nextjs"; 2 | /** 3 | * Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation. This is especially useful 4 | * for Docker builds. 5 | */ 6 | await import("./src/env.mjs"); 7 | 8 | /** @type {import("next").NextConfig} */ 9 | const config = { 10 | reactStrictMode: true, 11 | 12 | /** 13 | * If you have `experimental: { appDir: true }` set, then you must comment the below `i18n` config 14 | * out. 15 | * 16 | * @see https://github.com/vercel/next.js/issues/41980 17 | */ 18 | i18n: { 19 | locales: ["en"], 20 | defaultLocale: "en", 21 | }, 22 | }; 23 | export default withSentryConfig(config, { 24 | // For all available options, see: 25 | // https://github.com/getsentry/sentry-webpack-plugin#options 26 | 27 | // Suppresses source map uploading logs during build 28 | silent: true, 29 | 30 | org: "llama-test", 31 | project: "javascript-nextjs", 32 | }, { 33 | // For all available options, see: 34 | // https://docs.sentry.io/platforms/javascript/guides/nextjs/manual-setup/ 35 | 36 | // Upload a larger set of source maps for prettier stack traces (increases build time) 37 | widenClientFileUpload: true, 38 | 39 | // Transpiles SDK to be compatible with IE11 (increases bundle size) 40 | transpileClientSDK: true, 41 | 42 | // Routes browser requests to Sentry through a Next.js rewrite to circumvent ad-blockers (increases server load) 43 | tunnelRoute: "/monitoring", 44 | 45 | // Hides source maps from generated client bundles 46 | hideSourceMaps: true, 47 | 48 | // Automatically tree-shake Sentry logger statements to reduce bundle size 49 | disableLogger: true, 50 | }); -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "llama-app-frontend", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "build": "next build", 7 | "dev": "next dev", 8 | "lint": "next lint", 9 | "start": "next start" 10 | }, 11 | "dependencies": { 12 | "@headlessui/react": "1.7.15", 13 | "@heroicons/react": "2.0.18", 14 | "@sentry/nextjs": "^7.57.0", 15 | "@t3-oss/env-nextjs": "^0.3.1", 16 | "@tailwindcss/forms": "0.5.3", 17 | "@wojtekmaj/react-hooks": "1.17.2", 18 | "classnames": "^2.3.2", 19 | "downshift": "^7.6.0", 20 | "fuse.js": "^6.6.2", 21 | "lodash": "^4.17.21", 22 | "lodash.debounce": "^4.0.8", 23 | "md5": "2.3.0", 24 | "next": "^13.4.2", 25 | "react": "18.2.0", 26 | "react-dom": "18.2.0", 27 | "react-ga4": "^2.1.0", 28 | "react-github-btn": "^1.4.0", 29 | "react-icons": "^4.10.1", 30 | "react-intersection-observer": "9.5.1", 31 | "react-pdf": "6.2.2", 32 | "react-select": "^5.7.3", 33 | "react-use-intercom": "^5.1.4", 34 | "react-window": "1.8.9", 35 | "uuid": "^9.0.0", 36 | "zod": "^3.21.4" 37 | }, 38 | "devDependencies": { 39 | "@tailwindcss/forms": "^0.5.3", 40 | "@types/eslint": "^8.37.0", 41 | "@types/lodash": "^4.14.195", 42 | "@types/lodash.debounce": "^4.0.7", 43 | "@types/md5": "^2.3.2", 44 | "@types/node": "^18.16.0", 45 | "@types/prettier": "^2.7.2", 46 | "@types/react": "^18.2.6", 47 | "@types/react-dom": "^18.2.4", 48 | "@types/react-window": "^1.8.5", 49 | "@types/uuid": "^9.0.2", 50 | "@typescript-eslint/eslint-plugin": "^5.59.6", 51 | "@typescript-eslint/parser": "^5.59.6", 52 | "autoprefixer": "^10.4.14", 53 | "eslint": "^8.43.0", 54 | "eslint-config-next": "^13.4.2", 55 | "eslint-config-prettier": "^8.8.0", 56 | "postcss": "^8.4.21", 57 | "prettier": "^2.8.8", 58 | "prettier-plugin-tailwindcss": "^0.2.8", 59 | "tailwindcss": "^3.3.0", 60 | "typescript": "^5.0.4" 61 | }, 62 | "ct3aMetadata": { 63 | "initVersion": "7.13.1" 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /frontend/postcss.config.cjs: -------------------------------------------------------------------------------- 1 | const config = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | 8 | module.exports = config; 9 | -------------------------------------------------------------------------------- /frontend/prettier.config.cjs: -------------------------------------------------------------------------------- 1 | /** @type {import("prettier").Config} */ 2 | const config = { 3 | plugins: [require.resolve("prettier-plugin-tailwindcss")], 4 | }; 5 | 6 | module.exports = config; 7 | -------------------------------------------------------------------------------- /frontend/public/Gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/Gradient.png -------------------------------------------------------------------------------- /frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/favicon.ico -------------------------------------------------------------------------------- /frontend/public/full-chat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/full-chat.png -------------------------------------------------------------------------------- /frontend/public/lyft-2021-10k.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/lyft-2021-10k.pdf -------------------------------------------------------------------------------- /frontend/public/uber-2021-10k.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/uber-2021-10k.pdf -------------------------------------------------------------------------------- /frontend/sentry.client.config.ts: -------------------------------------------------------------------------------- 1 | // This file configures the initialization of Sentry on the client. 2 | // The config you add here will be used whenever a users loads a page in their browser. 3 | // https://docs.sentry.io/platforms/javascript/guides/nextjs/ 4 | 5 | import * as Sentry from "@sentry/nextjs"; 6 | import { SENTRY_DSN } from "~/constants"; 7 | 8 | Sentry.init({ 9 | dsn: SENTRY_DSN, 10 | 11 | // Adjust this value in production, or use tracesSampler for greater control 12 | tracesSampleRate: 1, 13 | 14 | // Setting this option to true will print useful information to the console while you're setting up Sentry. 15 | debug: false, 16 | 17 | replaysOnErrorSampleRate: 1.0, 18 | 19 | // This sets the sample rate to be 10%. You may want this to be 100% while 20 | // in development and sample at a lower rate in production 21 | replaysSessionSampleRate: 0.1, 22 | 23 | // You can remove this option if you're not planning to use the Sentry Session Replay feature: 24 | integrations: [ 25 | new Sentry.Replay({ 26 | // Additional Replay configuration goes in here, for example: 27 | maskAllText: true, 28 | blockAllMedia: true, 29 | }), 30 | ], 31 | }); 32 | -------------------------------------------------------------------------------- /frontend/sentry.edge.config.ts: -------------------------------------------------------------------------------- 1 | // This file configures the initialization of Sentry for edge features (middleware, edge routes, and so on). 2 | // The config you add here will be used whenever one of the edge features is loaded. 3 | // Note that this config is unrelated to the Vercel Edge Runtime and is also required when running locally. 4 | // https://docs.sentry.io/platforms/javascript/guides/nextjs/ 5 | 6 | import * as Sentry from "@sentry/nextjs"; 7 | import { SENTRY_DSN } from "~/constants"; 8 | 9 | Sentry.init({ 10 | dsn: SENTRY_DSN, 11 | 12 | // Adjust this value in production, or use tracesSampler for greater control 13 | tracesSampleRate: 1, 14 | 15 | // Setting this option to true will print useful information to the console while you're setting up Sentry. 16 | debug: false, 17 | }); 18 | -------------------------------------------------------------------------------- /frontend/sentry.server.config.ts: -------------------------------------------------------------------------------- 1 | // This file configures the initialization of Sentry on the server. 2 | // The config you add here will be used whenever the server handles a request. 3 | // https://docs.sentry.io/platforms/javascript/guides/nextjs/ 4 | 5 | import * as Sentry from "@sentry/nextjs"; 6 | import { SENTRY_DSN } from "~/constants"; 7 | 8 | Sentry.init({ 9 | dsn: SENTRY_DSN, 10 | 11 | // Adjust this value in production, or use tracesSampler for greater control 12 | tracesSampleRate: 1, 13 | 14 | // Setting this option to true will print useful information to the console while you're setting up Sentry. 15 | debug: false, 16 | }); 17 | -------------------------------------------------------------------------------- /frontend/src/api/backend.tsx: -------------------------------------------------------------------------------- 1 | import { backendUrl } from "~/config"; 2 | import type { Message } from "~/types/conversation"; 3 | import type { BackendDocument } from "~/types/backend/document"; 4 | import { SecDocument } from "~/types/document"; 5 | import { fromBackendDocumentToFrontend } from "./utils/documents"; 6 | 7 | interface CreateConversationPayload { 8 | id: string; 9 | } 10 | 11 | interface GetConversationPayload { 12 | id: string; 13 | messages: Message[]; 14 | documents: BackendDocument[]; 15 | } 16 | 17 | interface GetConversationReturnType { 18 | messages: Message[]; 19 | documents: SecDocument[]; 20 | } 21 | 22 | class BackendClient { 23 | private async get(endpoint: string) { 24 | const url = backendUrl + endpoint; 25 | const res = await fetch(url); 26 | 27 | if (!res.ok) { 28 | throw new Error(`HTTP error! status: ${res.status}`); 29 | } 30 | return res; 31 | } 32 | 33 | private async post(endpoint: string, body?: any) { 34 | const url = backendUrl + endpoint; 35 | const res = await fetch(url, { 36 | method: "POST", 37 | headers: { "Content-Type": "application/json" }, 38 | body: JSON.stringify(body), 39 | }); 40 | 41 | if (!res.ok) { 42 | throw new Error(`HTTP error! status: ${res.status}`); 43 | } 44 | return res; 45 | } 46 | 47 | public async createConversation(documentIds: string[]): Promise { 48 | const endpoint = "api/conversation/"; 49 | const payload = { document_ids: documentIds }; 50 | const res = await this.post(endpoint, payload); 51 | const data = (await res.json()) as CreateConversationPayload; 52 | 53 | return data.id; 54 | } 55 | 56 | public async fetchConversation( 57 | id: string 58 | ): Promise { 59 | const endpoint = `api/conversation/${id}`; 60 | const res = await this.get(endpoint); 61 | const data = (await res.json()) as GetConversationPayload; 62 | 63 | return { 64 | messages: data.messages, 65 | documents: fromBackendDocumentToFrontend(data.documents), 66 | }; 67 | } 68 | 69 | public async fetchDocuments(): Promise { 70 | const endpoint = `api/document/`; 71 | const res = await this.get(endpoint); 72 | const data = (await res.json()) as BackendDocument[]; 73 | const docs = fromBackendDocumentToFrontend(data); 74 | return docs; 75 | } 76 | } 77 | 78 | export const backendClient = new BackendClient(); 79 | -------------------------------------------------------------------------------- /frontend/src/api/utils/documents.tsx: -------------------------------------------------------------------------------- 1 | import { MAX_NUMBER_OF_SELECTED_DOCUMENTS } from "~/hooks/useDocumentSelector"; 2 | import { BackendDocument, BackendDocumentType } from "~/types/backend/document"; 3 | import { SecDocument, DocumentType } from "~/types/document"; 4 | import { documentColors } from "~/utils/colors"; 5 | import _ from "lodash"; 6 | 7 | export const fromBackendDocumentToFrontend = ( 8 | backendDocuments: BackendDocument[] 9 | ) => { 10 | // sort by created_at so that de-dupe filter later keeps oldest duplicate docs 11 | backendDocuments = _.sortBy(backendDocuments, 'created_at'); 12 | let frontendDocs: SecDocument[] = backendDocuments 13 | .filter((backendDoc) => 'sec_document' in backendDoc.metadata_map) 14 | .map((backendDoc, index) => { 15 | const backendDocType = backendDoc.metadata_map.sec_document.doc_type; 16 | const frontendDocType = 17 | backendDocType === BackendDocumentType.TenK 18 | ? DocumentType.TenK 19 | : DocumentType.TenQ; 20 | 21 | // we have 10 colors for 10 documents 22 | const colorIndex = index < 10 ? index : 0; 23 | return { 24 | id: backendDoc.id, 25 | url: backendDoc.url, 26 | ticker: backendDoc.metadata_map.sec_document.company_ticker, 27 | fullName: backendDoc.metadata_map.sec_document.company_name, 28 | year: String(backendDoc.metadata_map.sec_document.year), 29 | docType: frontendDocType, 30 | color: documentColors[colorIndex], 31 | quarter: backendDoc.metadata_map.sec_document.quarter || "", 32 | } as SecDocument; 33 | }); 34 | // de-dupe hotfix 35 | const getDocDeDupeKey = (doc: SecDocument) => `${doc.ticker}-${doc.year}-${doc.quarter || ''}`; 36 | frontendDocs = _.chain(frontendDocs).sortBy(getDocDeDupeKey).sortedUniqBy(getDocDeDupeKey).value(); 37 | 38 | return frontendDocs; 39 | }; 40 | -------------------------------------------------------------------------------- /frontend/src/components/Layout.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import type { PropsWithChildren } from "react"; 3 | const Layout = ({ children }: PropsWithChildren) => { 4 | return <>{children}; 5 | }; 6 | export default Layout; 7 | -------------------------------------------------------------------------------- /frontend/src/components/basics/Loading.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | export const LoadingSpinner: React.FC = () => { 4 | return ( 5 |
6 | ); 7 | }; 8 | -------------------------------------------------------------------------------- /frontend/src/components/basics/Modal.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import ModalPortal from "./ModalPortal"; 3 | import { AiOutlineClose } from "react-icons/ai"; 4 | interface ModalProps { 5 | isOpen: boolean; 6 | toggleModal: () => void; 7 | title: string; 8 | children: React.ReactNode; 9 | } 10 | 11 | const Modal: React.FC = ({ 12 | isOpen, 13 | toggleModal, 14 | title, 15 | children, 16 | }) => { 17 | if (!isOpen) return null; 18 | 19 | return ( 20 | 21 |
22 |
26 |
27 |

{title}

28 | {children} 29 | 35 |
36 |
37 |
38 | ); 39 | }; 40 | 41 | export default Modal; 42 | -------------------------------------------------------------------------------- /frontend/src/components/basics/ModalPortal.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import ReactDOM from "react-dom"; 3 | 4 | interface ModalPortalProps { 5 | children: React.ReactNode; 6 | } 7 | 8 | const ModalPortal = ({ children }: ModalPortalProps) => { 9 | const domNode = document.getElementById("modal-root"); 10 | return domNode ? ReactDOM.createPortal(children, domNode) : null; 11 | }; 12 | 13 | export default ModalPortal; 14 | -------------------------------------------------------------------------------- /frontend/src/components/landing-page/AnimateSvg.tsx: -------------------------------------------------------------------------------- 1 | import React, { useEffect, useState } from "react"; 2 | 3 | interface ScrollSVGProps { 4 | breakpoint: number; 5 | increment: number; 6 | svgs: JSX.Element[]; 7 | } 8 | 9 | export const AnimateSvg: React.FC = ({ 10 | breakpoint, 11 | increment, 12 | svgs, 13 | }) => { 14 | const [scrollPosition, setScrollPosition] = useState(0); 15 | 16 | // Listen to scroll event 17 | useEffect(() => { 18 | const handleScroll = () => { 19 | const currentScrollPos = window.pageYOffset; 20 | if (currentScrollPos > breakpoint) { 21 | setScrollPosition( 22 | Math.floor((currentScrollPos - breakpoint) / increment) 23 | ); 24 | } 25 | }; 26 | 27 | window.addEventListener("scroll", handleScroll); 28 | 29 | // Clean up event listener 30 | return () => { 31 | window.removeEventListener("scroll", handleScroll); 32 | }; 33 | }, [breakpoint, increment]); 34 | 35 | // Function to render SVGs 36 | const renderSVG = () => { 37 | // If we've scrolled past all SVGs, keep showing the last one 38 | if (scrollPosition >= svgs.length) { 39 | return svgs[svgs.length - 1]; 40 | } 41 | 42 | // Otherwise, show the SVG for the current scroll position 43 | return svgs[scrollPosition]; 44 | }; 45 | 46 | return
{renderSVG()}
; 47 | }; 48 | 49 | export default AnimateSvg; 50 | -------------------------------------------------------------------------------- /frontend/src/components/landing-page/SelectTicker.tsx: -------------------------------------------------------------------------------- 1 | import React, { Dispatch, SetStateAction, useEffect, useState } from "react"; 2 | 3 | import type { Ticker } from "~/types/document"; 4 | import { useCombobox } from "downshift"; 5 | import cx from "classnames"; 6 | import { HiOutlineBuildingOffice2 } from "react-icons/hi2"; 7 | import useFocus from "~/hooks/utils/useFocus"; 8 | 9 | function getTickerFilter(inputValue: string) { 10 | const lowerCasedInputValue = inputValue.toLowerCase(); 11 | 12 | return function tickerFilter(ticker: Ticker) { 13 | return ( 14 | !inputValue || 15 | ticker.fullName.toLowerCase().includes(lowerCasedInputValue) || 16 | ticker.ticker.toLowerCase().includes(lowerCasedInputValue) 17 | ); 18 | }; 19 | } 20 | 21 | interface DocumentSelectComboboxProps { 22 | selectedItem: Ticker | null; 23 | setSelectedItem: (ticker: Ticker) => void; 24 | availableDocuments: Ticker[]; 25 | shouldFocusTicker: boolean; 26 | setFocusState: Dispatch>; 27 | } 28 | 29 | export const DocumentSelectCombobox: React.FC = ({ 30 | selectedItem, 31 | availableDocuments, 32 | setSelectedItem, 33 | shouldFocusTicker, 34 | setFocusState, 35 | }) => { 36 | const [focusRef, setFocus] = useFocus(); 37 | 38 | useEffect(() => { 39 | if (shouldFocusTicker) { 40 | setInputValue(""); 41 | setFocus(); 42 | setFocusState(false); 43 | } 44 | }, [shouldFocusTicker]); 45 | 46 | const [filteredDocuments, setFilteredDocuments] = 47 | useState(availableDocuments); 48 | 49 | useEffect(() => { 50 | setFilteredDocuments(availableDocuments); 51 | }, [availableDocuments]); 52 | 53 | const { 54 | isOpen, 55 | getMenuProps, 56 | getInputProps, 57 | highlightedIndex, 58 | getItemProps, 59 | setInputValue, 60 | } = useCombobox({ 61 | onInputValueChange({ inputValue }) { 62 | if (inputValue) { 63 | setFilteredDocuments( 64 | availableDocuments.filter(getTickerFilter(inputValue)) 65 | ); 66 | } else { 67 | setFilteredDocuments(availableDocuments); 68 | } 69 | }, 70 | items: filteredDocuments, 71 | itemToString(item) { 72 | return item ? item.ticker : ""; 73 | }, 74 | selectedItem, 75 | onSelectedItemChange: ({ selectedItem: newSelectedItem }) => { 76 | if (newSelectedItem) { 77 | setSelectedItem(newSelectedItem); 78 | } 79 | }, 80 | }); 81 | return ( 82 |
83 |
84 |
85 |
86 | 87 |
88 | 94 |
95 |
96 |
    103 | {isOpen && 104 | filteredDocuments.map((item, index) => ( 105 |
  • 114 | {item.fullName} 115 | {item.ticker} 116 |
  • 117 | ))} 118 |
119 |
120 | ); 121 | }; 122 | -------------------------------------------------------------------------------- /frontend/src/components/modals/ShareLinkModal.tsx: -------------------------------------------------------------------------------- 1 | import React, { useRef, useEffect } from "react"; 2 | import Modal from "../basics/Modal"; 3 | 4 | interface ShareLinkModalProps { 5 | isOpen: boolean; 6 | toggleModal: () => void; 7 | } 8 | 9 | const ShareLinkModal: React.FC = ({ 10 | isOpen, 11 | toggleModal, 12 | }) => { 13 | const inputRef = useRef(null); 14 | 15 | const copyToClipboard = (e: React.MouseEvent) => { 16 | e.preventDefault(); 17 | inputRef.current?.select(); 18 | document.execCommand("copy"); 19 | }; 20 | 21 | useEffect(() => { 22 | if (isOpen) { 23 | inputRef.current?.select(); 24 | } 25 | }, [isOpen]); 26 | 27 | return ( 28 | 29 |

30 | Note: this is a public page. Anyone with this link can view the 31 | contents of the page. This statement is for informational purposes only 32 | and does not serve as professional financial advice. 33 |

34 | 35 |
36 | 43 | 49 |
50 |
51 | ); 52 | }; 53 | 54 | export default ShareLinkModal; 55 | -------------------------------------------------------------------------------- /frontend/src/components/pdf-viewer/DisplayMultiplePdfs.tsx: -------------------------------------------------------------------------------- 1 | import { ViewPdf } from "~/components/pdf-viewer/ViewPdf"; 2 | import { useMultiplePdfs } from "../../hooks/useMultiplePdfs"; 3 | import { SecDocument } from "~/types/document"; 4 | import cx from "classnames"; 5 | import { borderColors } from "~/utils/colors"; 6 | 7 | interface DisplayMultiplePdfsProps { 8 | pdfs: SecDocument[]; 9 | } 10 | 11 | export const DisplayMultiplePdfs: React.FC = ({ 12 | pdfs, 13 | }) => { 14 | const { isActivePdf, handlePdfFocus } = useMultiplePdfs(pdfs); 15 | 16 | return ( 17 | <> 18 |
19 | {pdfs.map((file) => { 20 | return ( 21 |
25 | 26 |
27 | ); 28 | })} 29 | 30 |
31 |
32 | {pdfs.map((file, index) => ( 33 |
34 | 57 |
58 | ))} 59 |
60 |
61 |
62 | 63 | ); 64 | }; 65 | 66 | export default DisplayMultiplePdfs; 67 | -------------------------------------------------------------------------------- /frontend/src/components/pdf-viewer/PdfOptionsBar.tsx: -------------------------------------------------------------------------------- 1 | // PDFOptionsBar.tsx 2 | import { useEffect, useState } from "react"; 3 | import { 4 | HiMiniMagnifyingGlassMinus, 5 | HiMiniMagnifyingGlassPlus, 6 | } from "react-icons/hi2"; 7 | import { PiCaretDownBold, PiCaretUpBold } from "react-icons/pi"; 8 | import { zoomLevels } from "~/hooks/usePdfViewer"; 9 | import { SecDocument } from "~/types/document"; 10 | import { borderColors } from "~/utils/colors"; 11 | 12 | interface PDFOptionsBarProps { 13 | file: SecDocument; 14 | scrolledIndex: number; 15 | numPages: number; 16 | scaleText: string; 17 | nextPage: () => void; 18 | prevPage: () => void; 19 | handleZoomIn: () => void; 20 | handleZoomOut: () => void; 21 | goToPage: (n: number) => void; 22 | setZoomLevel: (percent: string) => void; 23 | zoomInEnabled: boolean; 24 | zoomOutEnabled: boolean; 25 | } 26 | 27 | export const PDFOptionsBar: React.FC = ({ 28 | file, 29 | scrolledIndex, 30 | numPages, 31 | scaleText, 32 | nextPage, 33 | prevPage, 34 | handleZoomIn, 35 | handleZoomOut, 36 | goToPage, 37 | setZoomLevel, 38 | zoomInEnabled, 39 | zoomOutEnabled, 40 | }) => { 41 | const [zoomPopoverOpen, setZoomPopoverOpen] = useState(false); 42 | 43 | const handleZoomSelection = (zoom: string) => { 44 | setZoomLevel(zoom); 45 | setZoomPopoverOpen(false); 46 | }; 47 | 48 | const [inputValue, setInputValue] = useState(`${scrolledIndex + 1}`); 49 | 50 | useEffect(() => { 51 | setInputValue(`${scrolledIndex + 1}`); 52 | }, [scrolledIndex]); 53 | 54 | const handleChange = (e: React.ChangeEvent) => { 55 | setInputValue(e.target.value); 56 | }; 57 | 58 | const handleKeyDown = (e: React.KeyboardEvent) => { 59 | if (e.key === "Enter") { 60 | const value = parseInt(inputValue, 10); 61 | if (!isNaN(value) && value > 0) { 62 | scrollToPage(value - 1); 63 | } 64 | } 65 | }; 66 | 67 | const scrollToPage = (page: number) => { 68 | goToPage(page); 69 | }; 70 | 71 | return ( 72 |
75 |
76 |
81 |
{file.ticker}
82 |
83 | {" "} 84 | {file.year} {file.quarter && `Q${file.quarter}`} 85 |
86 |
87 |
88 |
89 |
90 |
91 | 98 |
99 | 105 |
106 |
/ {numPages}
107 | 114 |
115 |
{" "} 116 |
117 |
118 | 125 |
setZoomPopoverOpen(!zoomPopoverOpen)} 128 | > 129 |
130 | {scaleText} 131 | {!zoomPopoverOpen ? ( 132 | 133 | ) : ( 134 | 135 | )} 136 |
137 |
138 | {zoomPopoverOpen && ( 139 |
140 | {zoomLevels.map((zoom, index) => ( 141 | 148 | ))} 149 |
150 | )} 151 | 158 |
159 |
160 |
161 |
162 |
163 | ); 164 | }; 165 | -------------------------------------------------------------------------------- /frontend/src/components/pdf-viewer/ViewPdf.tsx: -------------------------------------------------------------------------------- 1 | // ViewPdf.tsx 2 | import usePDFViewer from "~/hooks/usePdfViewer"; 3 | import { PDFOptionsBar } from "./PdfOptionsBar"; 4 | import React from "react"; 5 | import MemoizedVirtualizedPDF from "./VirtualizedPdf"; 6 | import { SecDocument } from "~/types/document"; 7 | 8 | interface ViewPdfProps { 9 | file: SecDocument; 10 | } 11 | 12 | export const ViewPdf: React.FC = ({ file }) => { 13 | const { 14 | scrolledIndex, 15 | setCurrentPageNumber, 16 | scale, 17 | setScaleFit, 18 | numPages, 19 | setNumPages, 20 | handleZoomIn, 21 | handleZoomOut, 22 | nextPage, 23 | prevPage, 24 | scaleText, 25 | pdfFocusRef, 26 | goToPage, 27 | setZoomLevel, 28 | zoomInEnabled, 29 | zoomOutEnabled, 30 | } = usePDFViewer(file); 31 | 32 | return ( 33 |
34 | {scaleText && ( 35 | 49 | )} 50 | 51 | 60 |
61 | ); 62 | }; 63 | -------------------------------------------------------------------------------- /frontend/src/components/pdf-viewer/pdfDisplayConstants.tsx: -------------------------------------------------------------------------------- 1 | export const VERTICAL_GUTTER_SIZE_PX = 20; 2 | export const HORIZONTAL_GUTTER_SIZE_PX = 20; 3 | export const PAGE_HEIGHT = 792; 4 | export const PDF_WIDTH_PERCENTAGE = 56; 5 | export const PDF_HEIGHT_PERCENTAGE = 94; 6 | export const OBSERVER_THRESHOLD_PERCENTAGE = 0.4; 7 | 8 | export const PDF_HEADER_SIZE_PX = 44; 9 | export const PDF_SIDEBAR_SIZE_PX = 80; 10 | -------------------------------------------------------------------------------- /frontend/src/config.js: -------------------------------------------------------------------------------- 1 | import { env } from "~/env.mjs"; 2 | 3 | if (env.NEXT_PUBLIC_CODESPACES === 'true' && env.NEXT_PUBLIC_CODESPACE_NAME) { 4 | const suggestedUrl = `https://${env.NEXT_PUBLIC_CODESPACE_NAME}-8000.app.github.dev/`; 5 | if (!env.NEXT_PUBLIC_BACKEND_URL.startsWith(suggestedUrl)) { 6 | console.warn(`It looks like you're running on a Github codespace. You may want to set the NEXT_PUBLIC_BACKEND_URL environment variable to ${suggestedUrl}`); 7 | } 8 | } 9 | 10 | export const backendUrl = env.NEXT_PUBLIC_BACKEND_URL; 11 | 12 | -------------------------------------------------------------------------------- /frontend/src/constants.tsx: -------------------------------------------------------------------------------- 1 | export const GOOGLE_ANALYTICS_ID = "G-LGHB46ZGWR"; 2 | export const INTERCOM_ID = "rx71g1uo"; 3 | // TODO: Populate with your own Sentry DSN: 4 | // https://docs.sentry.io/product/sentry-basics/concepts/dsn-explainer/ 5 | export const SENTRY_DSN: string | undefined = undefined; 6 | -------------------------------------------------------------------------------- /frontend/src/context/pdf.tsx: -------------------------------------------------------------------------------- 1 | import React, { createContext, useState, useContext } from "react"; 2 | import type { Citation } from "~/types/conversation"; 3 | 4 | interface PdfFocusState { 5 | documentId: string; 6 | pageNumber: number; 7 | citation?: Citation; 8 | } 9 | 10 | interface PdfFocusContextProps { 11 | pdfFocusState: PdfFocusState; 12 | setPdfFocusState: React.Dispatch>; 13 | } 14 | 15 | // Initialize Context 16 | const PdfFocusContext = createContext( 17 | undefined 18 | ); 19 | 20 | interface PdfFocusProviderProps { 21 | children: React.ReactNode; 22 | } 23 | // PDF Provider 24 | export const PdfFocusProvider: React.FC = ({ 25 | children, 26 | }) => { 27 | const [pdfFocusState, setPdfFocusState] = useState({ 28 | documentId: "", 29 | pageNumber: 0, 30 | }); 31 | 32 | return ( 33 | 39 | {children} 40 | 41 | ); 42 | }; 43 | 44 | // Custom Hook to use PDF Context 45 | export const usePdfFocus = (): PdfFocusContextProps => { 46 | const context = useContext(PdfFocusContext); 47 | if (context === undefined) { 48 | throw new Error("usePDF must be used within a PDFProvider"); 49 | } 50 | return context; 51 | }; 52 | -------------------------------------------------------------------------------- /frontend/src/env.mjs: -------------------------------------------------------------------------------- 1 | import { createEnv } from "@t3-oss/env-nextjs"; 2 | import { z } from "zod"; 3 | 4 | export const env = createEnv({ 5 | /** 6 | * Specify your server-side environment variables schema here. This way you can ensure the app 7 | * isn't built with invalid env vars. 8 | */ 9 | server: { 10 | NODE_ENV: z.enum(["development", "test", "production"]), 11 | }, 12 | 13 | /** 14 | * Specify your client-side environment variables schema here. This way you can ensure the app 15 | * isn't built with invalid env vars. To expose them to the client, prefix them with 16 | * `NEXT_PUBLIC_`. 17 | */ 18 | client: { 19 | NEXT_PUBLIC_BACKEND_URL: z.string().min(1), 20 | NEXT_PUBLIC_CODESPACES: z.string().default("false").optional(), 21 | NEXT_PUBLIC_CODESPACE_NAME: z.string().optional(), 22 | }, 23 | 24 | /** 25 | * You can't destruct `process.env` as a regular object in the Next.js edge runtimes (e.g. 26 | * middlewares) or client-side so we need to destruct manually. 27 | */ 28 | runtimeEnv: { 29 | NODE_ENV: process.env.NODE_ENV, 30 | NEXT_PUBLIC_BACKEND_URL: process.env.NEXT_PUBLIC_BACKEND_URL, 31 | NEXT_PUBLIC_CODESPACES: process.env.CODESPACES, 32 | NEXT_PUBLIC_CODESPACE_NAME: process.env.CODESPACE_NAME, 33 | }, 34 | /** 35 | * Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation. 36 | * This is especially useful for Docker builds. 37 | */ 38 | skipValidation: !!process.env.SKIP_ENV_VALIDATION, 39 | }); 40 | -------------------------------------------------------------------------------- /frontend/src/hooks/useDocumentSelector.tsx: -------------------------------------------------------------------------------- 1 | import { useState, useEffect, useRef } from "react"; 2 | import { GroupBase } from "react-select"; 3 | import Select from "react-select/dist/declarations/src/Select"; 4 | import { SecDocument, DocumentType, Ticker } from "~/types/document"; 5 | import type { SelectOption } from "~/types/selection"; 6 | import { 7 | findDocumentById, 8 | getAllTickers, 9 | sortDocuments, 10 | sortSelectOptions, 11 | } from "~/utils/documents"; 12 | import { 13 | documentTypeOptions, 14 | getAvailableYears, 15 | } from "~/utils/landing-page-selection"; 16 | import useLocalStorage from "./utils/useLocalStorage"; 17 | import { backendClient } from "~/api/backend"; 18 | 19 | export const MAX_NUMBER_OF_SELECTED_DOCUMENTS = 10; 20 | 21 | export const useDocumentSelector = () => { 22 | const [availableDocuments, setAvailableDocuments] = useState( 23 | [] 24 | ); 25 | const [availableTickers, setAvailableTickers] = useState([]); 26 | const availableDocumentTypes = documentTypeOptions; 27 | const [availableYears, setAvailableYears] = useState( 28 | null 29 | ); 30 | 31 | const sortedAvailableYears = sortSelectOptions(availableYears); 32 | 33 | useEffect(() => { 34 | setAvailableTickers(getAllTickers(availableDocuments)); 35 | }, [availableDocuments]); 36 | 37 | useEffect(() => { 38 | async function getDocuments() { 39 | const docs = await backendClient.fetchDocuments(); 40 | setAvailableDocuments(docs); 41 | } 42 | getDocuments().catch(() => console.error("could not fetch documents")); 43 | }, []); 44 | 45 | const [selectedDocuments, setSelectedDocuments] = useLocalStorage< 46 | SecDocument[] 47 | >("selectedDocuments", []); 48 | const sortedSelectedDocuments = sortDocuments(selectedDocuments); 49 | 50 | const [selectedTicker, setSelectedTicker] = useState(null); 51 | const [selectedDocumentType, setSelectedDocumentType] = 52 | useState(null); 53 | const [selectedYear, setSelectedYear] = useState(null); 54 | 55 | const handleAddDocument = () => { 56 | if (selectedTicker && selectedDocumentType && selectedYear) { 57 | setSelectedDocuments((prevDocs = []) => { 58 | if (prevDocs.find((doc) => doc.id === selectedYear.value)) { 59 | return prevDocs; 60 | } 61 | const newDoc = findDocumentById(selectedYear.value, availableDocuments); 62 | return newDoc ? [newDoc, ...prevDocs] : prevDocs; 63 | }); 64 | setSelectedTicker(null); 65 | setSelectedDocumentType(null); 66 | setSelectedYear(null); 67 | setShouldFocusCompanySelect(true); 68 | } 69 | }; 70 | 71 | const handleRemoveDocument = (documentIndex: number) => { 72 | setSelectedDocuments((prevDocs) => 73 | prevDocs.filter((_, index) => index !== documentIndex) 74 | ); 75 | }; 76 | 77 | useEffect(() => { 78 | setSelectedDocumentType(null); 79 | setSelectedYear(null); 80 | }, [selectedTicker]); 81 | 82 | useEffect(() => { 83 | setSelectedYear(null); 84 | }, [selectedDocumentType]); 85 | 86 | useEffect(() => { 87 | if (selectedTicker && selectedDocumentType) { 88 | setAvailableYears( 89 | getAvailableYears( 90 | selectedTicker?.ticker, 91 | selectedDocumentType?.value as DocumentType, 92 | availableDocuments 93 | ) 94 | ); 95 | } 96 | }, [selectedTicker, selectedDocumentType, availableDocuments]); 97 | 98 | useEffect(() => { 99 | const handleKeyDown = (event: KeyboardEvent) => { 100 | if ( 101 | (event.key === "Enter" && event.shiftKey) || 102 | (event.key === "Enter" && event.metaKey) 103 | ) { 104 | handleAddDocument(); 105 | } 106 | if (event.key === "k" && event.metaKey) { 107 | setShouldFocusCompanySelect(true); 108 | } 109 | }; 110 | document.addEventListener("keydown", handleKeyDown); 111 | return () => { 112 | document.removeEventListener("keydown", handleKeyDown); 113 | }; 114 | }, [handleAddDocument]); 115 | 116 | const isDocumentSelectionEnabled = 117 | selectedDocuments.length < MAX_NUMBER_OF_SELECTED_DOCUMENTS; 118 | 119 | const isStartConversationButtonEnabled = selectedDocuments.length > 0; 120 | 121 | const selectTicker = (ticker: Ticker) => { 122 | setSelectedTicker(ticker); 123 | setFocusDocumentType(true); 124 | }; 125 | 126 | const selectDocumentType = (docType: SelectOption | null) => { 127 | setSelectedDocumentType(docType); 128 | setFocusYear(true); 129 | }; 130 | 131 | const [shouldFocusCompanySelect, setShouldFocusCompanySelect] = 132 | useState(false); 133 | 134 | const [focusYear, setFocusYear] = useState(false); 135 | const yearFocusRef = useRef 139 | > | null>(null); 140 | 141 | useEffect(() => { 142 | if (focusYear && yearFocusRef.current) { 143 | yearFocusRef.current?.focus(); 144 | setFocusYear(false); 145 | } 146 | }, [focusYear]); 147 | 148 | const [focusDocumentType, setFocusDocumentType] = useState(false); 149 | const documentTypeFocusRef = useRef 153 | > | null>(null); 154 | 155 | useEffect(() => { 156 | if (focusDocumentType && documentTypeFocusRef.current) { 157 | documentTypeFocusRef.current?.focus(); 158 | setFocusDocumentType(false); 159 | } 160 | }, [focusDocumentType]); 161 | 162 | return { 163 | availableDocuments, 164 | availableTickers, 165 | availableDocumentTypes, 166 | availableYears, 167 | sortedAvailableYears, 168 | selectedDocuments, 169 | sortedSelectedDocuments, 170 | selectedTicker, 171 | selectedDocumentType, 172 | selectedYear, 173 | setSelectedYear, 174 | handleAddDocument, 175 | handleRemoveDocument, 176 | isDocumentSelectionEnabled, 177 | isStartConversationButtonEnabled, 178 | yearFocusRef, 179 | documentTypeFocusRef, 180 | selectTicker, 181 | selectDocumentType, 182 | shouldFocusCompanySelect, 183 | setShouldFocusCompanySelect, 184 | }; 185 | }; 186 | -------------------------------------------------------------------------------- /frontend/src/hooks/useMessages.tsx: -------------------------------------------------------------------------------- 1 | // hooks/useMessages.js 2 | import { useState } from "react"; 3 | import { v4 as uuidv4 } from "uuid"; 4 | import { ROLE, MESSAGE_STATUS } from "~/types/conversation"; 5 | import type { Message } from "~/types/conversation"; 6 | import { getDateWithUTCOffset } from "~/utils/timezone"; 7 | 8 | const useMessages = (conversationId: string) => { 9 | const [messages, setMessages] = useState([]); 10 | 11 | const userSendMessage = (content: string) => { 12 | setMessages((prevMessages) => [ 13 | ...prevMessages, 14 | 15 | { 16 | id: uuidv4(), 17 | conversationId, 18 | content, 19 | role: ROLE.USER, 20 | status: MESSAGE_STATUS.PENDING, 21 | created_at: getDateWithUTCOffset(), 22 | }, 23 | ]); 24 | }; 25 | 26 | const systemSendMessage = (message: Message) => { 27 | setMessages((prevMessages) => { 28 | const existingMessageIndex = prevMessages.findIndex( 29 | (msg) => msg.id === message.id 30 | ); 31 | 32 | // Update the existing message 33 | if (existingMessageIndex > -1) { 34 | const updatedMessages = [...prevMessages]; 35 | updatedMessages[existingMessageIndex] = message; 36 | return updatedMessages; 37 | } 38 | 39 | // Add a new message if it doesn't exist 40 | return [...prevMessages, message]; 41 | }); 42 | }; 43 | 44 | return { 45 | messages, 46 | userSendMessage, 47 | setMessages, 48 | systemSendMessage, 49 | }; 50 | }; 51 | 52 | export default useMessages; 53 | -------------------------------------------------------------------------------- /frontend/src/hooks/useMultiplePdfs.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from "react"; 2 | import { usePdfFocus } from "~/context/pdf"; 3 | import { SecDocument } from "~/types/document"; 4 | 5 | export const useMultiplePdfs = (pdfs: SecDocument[]) => { 6 | const [activePdfUrl, setActivePdfUrl] = useState(""); 7 | const { pdfFocusState } = usePdfFocus(); 8 | 9 | useEffect(() => { 10 | if (pdfs && pdfs[0]) { 11 | setActivePdfUrl(pdfs[0].url); 12 | } 13 | }, [pdfs]); 14 | 15 | useEffect(() => { 16 | if (pdfFocusState.documentId) { 17 | const selectedPdf = pdfs.find( 18 | (doc) => doc.id == pdfFocusState.documentId 19 | ); 20 | if (selectedPdf) { 21 | setActivePdfUrl(selectedPdf.url); 22 | } 23 | } 24 | }, [pdfFocusState.pageNumber, pdfFocusState.documentId, setActivePdfUrl]); 25 | 26 | const isActivePdf = (file: SecDocument) => { 27 | return file.url == activePdfUrl; 28 | }; 29 | 30 | const handlePdfFocus = (file: SecDocument) => { 31 | setActivePdfUrl(file.url); 32 | }; 33 | 34 | return { 35 | activePdfUrl, 36 | isActivePdf, 37 | handlePdfFocus, 38 | }; 39 | }; 40 | -------------------------------------------------------------------------------- /frontend/src/hooks/usePdfViewer.tsx: -------------------------------------------------------------------------------- 1 | // usePDFViewer.ts 2 | import { useState, useEffect, useCallback } from "react"; 3 | import { usePdfFocus } from "~/context/pdf"; 4 | 5 | import type { PdfFocusHandler as PdfFocusHandler } from "~/components/pdf-viewer/VirtualizedPdf"; 6 | import React from "react"; 7 | import { SecDocument } from "~/types/document"; 8 | 9 | export const zoomLevels = [ 10 | "50%", 11 | "80%", 12 | "100%", 13 | "130%", 14 | "200%", 15 | "300%", 16 | "400%", 17 | ]; 18 | const startZoomLevelIdx = 2; 19 | 20 | const usePDFViewer = (file: SecDocument) => { 21 | const [scrolledIndex, setScrolledIndex] = useState(1); 22 | const [scale, setScale] = useState(1.0); 23 | const [scaleFit, setScaleFit] = useState(1.0); 24 | const [numPages, setNumPages] = useState(0); 25 | const [isPdfRendered, setIsPdfRendered] = useState(false); 26 | const [zoomLevelIdx, setZoomLevelIdx] = useState(startZoomLevelIdx); 27 | 28 | const { pdfFocusState } = usePdfFocus(); 29 | 30 | const pdfFocusRef = React.useRef(null); 31 | 32 | const goToPage = (page: number) => { 33 | if (pdfFocusRef.current) { 34 | pdfFocusRef.current.scrollToPage(page); 35 | } 36 | }; 37 | 38 | useEffect(() => { 39 | const activeDocumentId = pdfFocusState.documentId; 40 | if (activeDocumentId === file.id) { 41 | if (pdfFocusState.pageNumber) { 42 | goToPage(pdfFocusState.pageNumber - 1); 43 | } 44 | } 45 | }, [file, pdfFocusState]); 46 | 47 | const setCurrentPageNumber = useCallback((n: number) => { 48 | setScrolledIndex(n); 49 | }, []); 50 | 51 | const handleZoomIn = useCallback(() => { 52 | const nextLevel = zoomLevelIdx + 1; 53 | if (nextLevel >= zoomLevels.length) { 54 | return; 55 | } 56 | setZoomLevel(zoomLevels[nextLevel] || "100%"); 57 | }, [zoomLevelIdx, scrolledIndex, pdfFocusRef]); 58 | 59 | const handleZoomOut = useCallback(() => { 60 | const nextLevel = zoomLevelIdx - 1; 61 | if (nextLevel < 0) { 62 | return; 63 | } 64 | setZoomLevel(zoomLevels[nextLevel] || "100%"); 65 | }, [zoomLevelIdx, scrolledIndex, pdfFocusRef]); 66 | 67 | const nextPage = () => { 68 | goToPage(scrolledIndex + 1); 69 | }; 70 | 71 | const prevPage = () => { 72 | goToPage(scrolledIndex - 1); 73 | }; 74 | 75 | const toPercentPlusBase = (n: number) => { 76 | return `${100 + n * 100}%`; 77 | }; 78 | 79 | const setZoomLevel = useCallback( 80 | (zoomLevel: string) => { 81 | const newZoomLevelIdx = zoomLevels.indexOf(zoomLevel); 82 | const newScale = percentToScale(zoomLevel) + scaleFit - 1; 83 | setScale(newScale); 84 | setTimeout(() => { 85 | goToPage(scrolledIndex); 86 | }, 30); 87 | setZoomLevelIdx(newZoomLevelIdx); 88 | }, 89 | [scrolledIndex] 90 | ); 91 | 92 | function percentToScale(percent: string): number { 93 | const number = parseInt(percent, 10); 94 | return number / 100; 95 | } 96 | 97 | const scaleDiff = Math.round((scale - scaleFit) * 10) / 10; 98 | const scaleText = toPercentPlusBase(scaleDiff); 99 | 100 | useEffect(() => { 101 | setScale(scaleFit); 102 | }, [scaleFit]); 103 | 104 | const zoomInEnabled = zoomLevelIdx < zoomLevels.length - 1; 105 | const zoomOutEnabled = zoomLevelIdx > 0; 106 | 107 | return { 108 | scrolledIndex, 109 | setCurrentPageNumber, 110 | scale, 111 | setScaleFit, 112 | numPages, 113 | setNumPages, 114 | handleZoomIn, 115 | handleZoomOut, 116 | nextPage, 117 | prevPage, 118 | scaleText, 119 | isPdfRendered, 120 | setIsPdfRendered, 121 | pdfFocusRef, 122 | goToPage, 123 | setZoomLevel, 124 | zoomInEnabled, 125 | zoomOutEnabled, 126 | }; 127 | }; 128 | 129 | export default usePDFViewer; 130 | -------------------------------------------------------------------------------- /frontend/src/hooks/utils/useFocus.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | // https://gist.github.com/carpben/de968e377cbac0ffbdefe1ab56237573 4 | export default function useFocus() { 5 | const ref = React.useRef(null); 6 | const setFocus = () => ref?.current?.focus?.(); 7 | 8 | return [ref, setFocus] as const; 9 | } 10 | -------------------------------------------------------------------------------- /frontend/src/hooks/utils/useIsMobile.tsx: -------------------------------------------------------------------------------- 1 | import { useWindowWidth } from "@wojtekmaj/react-hooks"; 2 | import { useEffect, useState } from "react"; 3 | 4 | export const MOBILE_BREAKPOINT = 768; 5 | export default function useIsMobile() { 6 | const windowWidth = useWindowWidth(); 7 | const [isMobile, setIsMobile] = useState(false); 8 | useEffect(() => { 9 | if ((windowWidth || 0) < MOBILE_BREAKPOINT) { 10 | setIsMobile(true); 11 | } else { 12 | setIsMobile(false); 13 | } 14 | }, [windowWidth]); 15 | 16 | return { isMobile }; 17 | } 18 | -------------------------------------------------------------------------------- /frontend/src/hooks/utils/useLocalStorage.ts: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from "react"; 2 | 3 | function useLocalStorage( 4 | key: string, 5 | initialValue: T 6 | ): [T, (value: T | ((val: T) => T)) => void] { 7 | const [storedValue, setStoredValue] = useState(initialValue); 8 | 9 | useEffect(() => { 10 | try { 11 | const item = window.localStorage.getItem(key); 12 | if (item) { 13 | setStoredValue(JSON.parse(item) as T); 14 | } 15 | } catch (error) { 16 | console.error(error); 17 | } 18 | }, [key]); 19 | 20 | const setValue = (value: T | ((val: T) => T)) => { 21 | try { 22 | const valueToStore = 23 | value instanceof Function ? value(storedValue) : value; 24 | setStoredValue(valueToStore); 25 | window.localStorage.setItem(key, JSON.stringify(valueToStore)); 26 | } catch (error) { 27 | console.error(error); 28 | } 29 | }; 30 | 31 | return [storedValue, setValue]; 32 | } 33 | 34 | export default useLocalStorage; 35 | -------------------------------------------------------------------------------- /frontend/src/hooks/utils/useModal.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | 3 | export const useModal = () => { 4 | const [isOpen, setIsOpen] = useState(false); 5 | 6 | const toggleModal = () => { 7 | setIsOpen(!isOpen); 8 | }; 9 | 10 | return { 11 | isOpen, 12 | toggleModal, 13 | }; 14 | }; 15 | -------------------------------------------------------------------------------- /frontend/src/hooks/utils/useScrollBreakpoint.tsx: -------------------------------------------------------------------------------- 1 | import { useRef, useEffect, useState } from "react"; 2 | 3 | export const useScrollBreakpoint = (offset = 0) => { 4 | const ref = useRef(null); 5 | const [breakpoint, setBreakpoint] = useState(0); 6 | 7 | useEffect(() => { 8 | const setTop = () => { 9 | if (ref.current) { 10 | const rect = ref.current.getBoundingClientRect(); 11 | setBreakpoint(rect.top + window.scrollY - rect.height + offset); 12 | } 13 | }; 14 | 15 | window.addEventListener("load", setTop); 16 | window.addEventListener("resize", setTop); 17 | 18 | return () => { 19 | window.removeEventListener("load", setTop); 20 | window.removeEventListener("resize", setTop); 21 | }; 22 | }, []); 23 | 24 | return { ref, breakpoint }; 25 | }; 26 | 27 | export default useScrollBreakpoint; 28 | -------------------------------------------------------------------------------- /frontend/src/modules/react-pdf.d.ts: -------------------------------------------------------------------------------- 1 | // we have to use react-pdf 6.2.2 instead of 2 | // 7.^ because of a known text-layer issue. 3 | // There are no types for this early version, 4 | // so we need to declare a module file to get 5 | // rid of type compilation issues 6 | declare module "react-pdf"; 7 | -------------------------------------------------------------------------------- /frontend/src/pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import { type AppType } from "next/dist/shared/lib/utils"; 2 | import Layout from "~/components/Layout"; 3 | import "~/styles/globals.css"; 4 | import ReactGA from "react-ga4"; 5 | 6 | import { IntercomProvider } from "react-use-intercom"; 7 | import { GOOGLE_ANALYTICS_ID, INTERCOM_ID } from "~/constants"; 8 | 9 | ReactGA.initialize(GOOGLE_ANALYTICS_ID); 10 | 11 | const MyApp: AppType = ({ Component, pageProps }) => { 12 | return ( 13 | <> 14 | 15 | 16 | 17 | 18 | 19 | 20 | ); 21 | }; 22 | 23 | export default MyApp; 24 | -------------------------------------------------------------------------------- /frontend/src/pages/_document.tsx: -------------------------------------------------------------------------------- 1 | // pages/_document.js 2 | import { Html, Head, Main, NextScript } from "next/document"; 3 | 4 | export default function Document() { 5 | return ( 6 | 7 | 8 | 12 | 16 | 17 | 18 |
19 | 20 | 21 | 22 | 23 | ); 24 | } 25 | -------------------------------------------------------------------------------- /frontend/src/pages/_error.tsx: -------------------------------------------------------------------------------- 1 | import { NextPageContext } from "next"; 2 | import React from "react"; 3 | 4 | interface ErrorProps { 5 | statusCode?: number; 6 | } 7 | 8 | const ErrorPage = ({ statusCode }: ErrorProps): JSX.Element => { 9 | return ( 10 |

11 | {statusCode 12 | ? `An error ${statusCode} occurred on server` 13 | : "An error occurred on client"} 14 |

15 | ); 16 | }; 17 | 18 | ErrorPage.getInitialProps = ({ res, err }: NextPageContext) => { 19 | const statusCode = res ? res.statusCode : err ? err.statusCode : 404; 20 | return { statusCode }; 21 | }; 22 | 23 | export default ErrorPage; 24 | -------------------------------------------------------------------------------- /frontend/src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | import type { NextPage } from "next"; 4 | import { MarketingSection } from "~/components/landing-page/MarketingSection"; 5 | import { TitleAndDropdown } from "~/components/landing-page/TitleAndDropdown"; 6 | 7 | const LandingPage: NextPage = () => { 8 | return ( 9 | <> 10 | 11 | 12 | 13 | ); 14 | }; 15 | export default LandingPage; 16 | -------------------------------------------------------------------------------- /frontend/src/styles/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | *, 6 | *::before, 7 | *::after { 8 | box-sizing: border-box; 9 | } 10 | 11 | @keyframes spin { 12 | 0% { 13 | transform: rotate(0deg); 14 | } 15 | 100% { 16 | transform: rotate(360deg); 17 | } 18 | } 19 | 20 | .loader { 21 | border-top-color: #9ca3af; /* The color of your spinner */ 22 | animation: spin 1s linear infinite; 23 | } 24 | 25 | @layer components { 26 | .landing-page-gradient-1 { 27 | background-color: hsla(0,0%,100%,1); 28 | background-image: 29 | radial-gradient(at 21% 11%, hsla(240,51%,82%,0.53) 0px, transparent 50%), 30 | radial-gradient(at 85% 0%, hsla(46,57%,78%,0.52) 0px, transparent 50%), 31 | radial-gradient(at 91% 36%, hsla(221,100%,88%,0.68) 0px, transparent 50%), 32 | radial-gradient(at 8% 40%, hsla(323,82%,92%,0.46) 0px, transparent 50%); 33 | } 34 | } 35 | 36 | @layer components { 37 | .landing-page-gradient-2 { 38 | background-color: hsla(41, 0%, 100%, .5); 39 | background-image: 40 | radial-gradient(at 68% 65%, hsla(207, 75%, 92%, .5) 0px, transparent 20%), 41 | radial-gradient(at 100% 38%, hsla(257, 98%, 92%, .5) 0px, transparent 20%), 42 | radial-gradient(at 85% 56%, hsla(219, 0%, 100%, 0.7) 0px, transparent 20%), 43 | radial-gradient(at 67% 82%, hsla(323, 0%, 100%, .5) 0px, transparent 20%), 44 | radial-gradient(at 73% 46%, hsla(176, 72%, 92%, .5) 0px, transparent 20%), 45 | radial-gradient(at 51% 53%, hsla(317, 60%, 92%, .5) 0px, transparent 20%); 46 | } 47 | } 48 | 49 | @layer components { 50 | .landing-page-gradient-3 { 51 | background-color: hsla(0,0%,100%,1); 52 | background-image: 53 | radial-gradient(at 26% 56%, hsla(207,40%,91%,.5) 0px, transparent 20%), 54 | radial-gradient(at 19% 43%, hsla(257,40%,91%,.51) 0px, transparent 20%), 55 | radial-gradient(at 56% 54%, hsla(323,40%,91%,.51) 0px, transparent 20%), 56 | radial-gradient(at 44% 62%, hsla(176,40%,91%,.51) 0px, transparent 20%), 57 | radial-gradient(at 57% 45%, hsla(317,40%,91%,.51) 0px, transparent 20%); 58 | } 59 | } 60 | 61 | @layer components { 62 | .landing-page-gradient-4 { 63 | background-color: hsla(0,0%,100%,1); 64 | background-image: 65 | radial-gradient(at 79% 89%, hsla(240,51%,82%,0.23) 0px, transparent 20%), /* 21% 11% -> 79% 89% */ 66 | radial-gradient(at 15% 100%, hsla(46,57%,78%,0.22) 0px, transparent 20%), /* 85% 0% -> 15% 100% */ 67 | radial-gradient(at 9% 64%, hsla(221,100%,88%,0.28) 0px, transparent 20%), /* 91% 36% -> 9% 64% */ 68 | radial-gradient(at 92% 60%, hsla(323,82%,92%,0.26) 0px, transparent 20%); /* 8% 40% -> 92% 60% */ 69 | } 70 | } 71 | 72 | 73 | @layer components { 74 | .landing-page-gradient-5 { 75 | background-color: hsla(0,0%,100%,1); 76 | background-image: 77 | radial-gradient(at 21% 11%, hsla(240,51%,82%,0.83) 0px, transparent 20%), 78 | radial-gradient(at 85% 0%, hsla(46,57%,78%,0.82) 0px, transparent 20%), 79 | radial-gradient(at 91% 36%, hsla(221,100%,88%,0.88) 0px, transparent 20%), 80 | radial-gradient(at 8% 40%, hsla(323,82%,92%,0.86) 0px, transparent 20%); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /frontend/src/styles/react-select.tsx: -------------------------------------------------------------------------------- 1 | export const customReactSelectStyles = { 2 | // eslint-disable-next-line @typescript-eslint/no-unsafe-return 3 | control: (base: any, state: { isFocused: any }) => ({ 4 | ...base, 5 | background: "#F7F7F7", 6 | borderRadius: 0, 7 | borderWidth: 0, 8 | boxShadow: state.isFocused ? 0 : 0, 9 | "&:hover": { 10 | border: "0", 11 | }, 12 | }), 13 | option: (styles: any, { isFocused, isSelected }: any) => { 14 | // eslint-disable-next-line @typescript-eslint/no-unsafe-return 15 | return { 16 | ...styles, 17 | backgroundColor: isSelected ? "#3B3775" : isFocused ? "#817AF2" : null, 18 | color: isFocused ? "white" : isSelected ? "white" : "black", 19 | }; 20 | }, 21 | }; 22 | -------------------------------------------------------------------------------- /frontend/src/svgs/right-arrow.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | const RightArrow = ( 3 | props: React.JSX.IntrinsicAttributes & React.SVGProps 4 | ) => ( 5 | 12 | 16 | 17 | ); 18 | export default RightArrow; 19 | -------------------------------------------------------------------------------- /frontend/src/types/backend/document.tsx: -------------------------------------------------------------------------------- 1 | export enum BackendDocumentType { 2 | TenK = "10-K", 3 | TenQ = "10-Q", 4 | } 5 | 6 | export interface BackendDocument { 7 | created_at: string; 8 | id: string; 9 | updated_at: string; 10 | metadata_map: BackendMetadataMap; 11 | url: string; 12 | } 13 | 14 | export interface BackendMetadataMap { 15 | sec_document: BackendSecDocument; 16 | } 17 | 18 | export interface BackendSecDocument { 19 | company_name: string; 20 | company_ticker: string; 21 | doc_type: BackendDocumentType; 22 | year: number; 23 | quarter: number; 24 | } 25 | -------------------------------------------------------------------------------- /frontend/src/types/conversation.tsx: -------------------------------------------------------------------------------- 1 | import { DocumentColorEnum } from "~/utils/colors"; 2 | 3 | export enum MESSAGE_STATUS { 4 | PENDING = "PENDING", 5 | SUCCESS = "SUCCESS", 6 | ERROR = "ERROR", 7 | } 8 | 9 | export enum ROLE { 10 | USER = "user", 11 | ASSISTANT = "assistant", 12 | } 13 | 14 | export enum MessageSubprocessSource { 15 | PLACEHOLDER = "placeholder", 16 | } 17 | 18 | export interface hasId { 19 | id: string; 20 | } 21 | 22 | export interface Citation { 23 | documentId: string; 24 | snippet: string; 25 | pageNumber: number; 26 | ticker: string; 27 | displayDate: string; 28 | color: DocumentColorEnum; 29 | } 30 | 31 | export interface Conversation extends hasId { 32 | messages?: Message[]; 33 | } 34 | 35 | export interface Message extends hasId { 36 | content: string; 37 | role: ROLE; 38 | status: MESSAGE_STATUS; 39 | conversationId: string; 40 | sub_processes?: MessageSubProcess[]; 41 | created_at: Date; 42 | } 43 | export interface MessageSubProcess extends hasId { 44 | messageId: string; 45 | content: string; 46 | source: MessageSubprocessSource; 47 | metadata_map?: MetaDataMap; 48 | } 49 | 50 | export interface ParsedData { 51 | content?: string; 52 | status?: string; 53 | } 54 | 55 | export interface MetaDataMap { 56 | sub_question?: SubQuestion; 57 | sub_questions?: SubQuestion[]; 58 | } 59 | 60 | export interface SubQuestion { 61 | question: string; 62 | answer?: string; 63 | citations?: BackendCitation[]; 64 | } 65 | 66 | export interface BackendCitation { 67 | document_id: string; 68 | page_number: number; 69 | score: number; 70 | text: string; 71 | } 72 | -------------------------------------------------------------------------------- /frontend/src/types/document.tsx: -------------------------------------------------------------------------------- 1 | import { DocumentColorEnum } from "~/utils/colors"; 2 | 3 | export enum DocumentType { 4 | TenK = "Form 10K", 5 | TenQ = "Form 10Q", 6 | } 7 | 8 | export type Ticker = { 9 | ticker: string; 10 | fullName: string; 11 | }; 12 | 13 | export interface SecDocument extends Ticker { 14 | id: string; 15 | url: string; 16 | year: string; 17 | docType: DocumentType; 18 | quarter?: string; 19 | color: DocumentColorEnum; 20 | } 21 | -------------------------------------------------------------------------------- /frontend/src/types/selection.tsx: -------------------------------------------------------------------------------- 1 | export interface SelectOption { 2 | value: string; 3 | label: string; 4 | } 5 | -------------------------------------------------------------------------------- /frontend/src/utils/colors.tsx: -------------------------------------------------------------------------------- 1 | export enum DocumentColorEnum { 2 | purple = "llama-purple", 3 | magenta = "llama-magenta", 4 | red = "llama-red", 5 | orange = "llama-orange", 6 | yellow = "llama-yellow", 7 | lime = "llama-lime", 8 | teal = "llama-teal", 9 | cyan = "llama-cyan", 10 | blue = "llama-blue", 11 | indigo = "llama-indigo", 12 | } 13 | 14 | // order matters! must be high contrast 15 | export const documentColors = [ 16 | DocumentColorEnum.lime, 17 | DocumentColorEnum.orange, 18 | DocumentColorEnum.cyan, 19 | DocumentColorEnum.yellow, 20 | DocumentColorEnum.magenta, 21 | DocumentColorEnum.red, 22 | DocumentColorEnum.purple, 23 | DocumentColorEnum.teal, 24 | DocumentColorEnum.indigo, 25 | DocumentColorEnum.blue, 26 | ]; 27 | 28 | // need this because tailwind doesn't support dynamic template literals 29 | 30 | export const borderColors: { [key in DocumentColorEnum]: string } = { 31 | [DocumentColorEnum.purple]: "border-llama-purple", 32 | [DocumentColorEnum.magenta]: "border-llama-magenta", 33 | [DocumentColorEnum.red]: "border-llama-red", 34 | [DocumentColorEnum.indigo]: "border-llama-indigo", 35 | [DocumentColorEnum.lime]: "border-llama-lime", 36 | [DocumentColorEnum.orange]: "border-llama-orange", 37 | [DocumentColorEnum.blue]: "border-llama-blue", 38 | [DocumentColorEnum.yellow]: "border-llama-yellow", 39 | [DocumentColorEnum.teal]: "border-llama-teal", 40 | [DocumentColorEnum.cyan]: "border-llama-cyan", 41 | }; 42 | 43 | export const highlightColors: { [key in DocumentColorEnum]: string } = { 44 | [DocumentColorEnum.purple]: "bg-llama-purple-light", 45 | [DocumentColorEnum.magenta]: "bg-llama-magenta-light", 46 | [DocumentColorEnum.red]: "bg-llama-red-light", 47 | [DocumentColorEnum.indigo]: "bg-llama-indigo-light", 48 | [DocumentColorEnum.lime]: "bg-llama-lime-light", 49 | [DocumentColorEnum.orange]: "bg-llama-orange-light", 50 | [DocumentColorEnum.blue]: "bg-llama-blue-light", 51 | [DocumentColorEnum.yellow]: "bg-llama-yellow-light", 52 | [DocumentColorEnum.teal]: "bg-llama-teal-light", 53 | [DocumentColorEnum.cyan]: "bg-llama-cyan-light", 54 | }; 55 | -------------------------------------------------------------------------------- /frontend/src/utils/documents.tsx: -------------------------------------------------------------------------------- 1 | import type { SecDocument, Ticker, DocumentType } from "~/types/document"; 2 | import { SelectOption } from "~/types/selection"; 3 | 4 | export function getAllTickers(documents: SecDocument[]): Ticker[] { 5 | const result: Ticker[] = []; 6 | const seen: { [key: string]: boolean } = {}; 7 | 8 | for (const doc of documents) { 9 | // Skip if we've seen this ticker before 10 | if (seen[doc.ticker]) { 11 | continue; 12 | } 13 | 14 | seen[doc.ticker] = true; 15 | result.push({ 16 | fullName: doc.fullName, 17 | ticker: doc.ticker, 18 | }); 19 | } 20 | 21 | return result; 22 | } 23 | 24 | export function filterByTickerAndType( 25 | ticker: string, 26 | docType: DocumentType, 27 | documents: SecDocument[] 28 | ): SecDocument[] { 29 | if (!ticker) { 30 | return []; 31 | } 32 | return documents.filter( 33 | (document) => document.ticker === ticker && document.docType === docType 34 | ); 35 | } 36 | 37 | export function findDocumentById( 38 | id: string, 39 | documents: SecDocument[] 40 | ): SecDocument | null { 41 | return documents.find((val) => val.id === id) || null; 42 | } 43 | 44 | export function sortDocuments(selectedDocuments: SecDocument[]): SecDocument[] { 45 | return selectedDocuments.sort((a, b) => { 46 | // Sort by fullName 47 | const nameComparison = a.fullName.localeCompare(b.fullName); 48 | if (nameComparison !== 0) return nameComparison; 49 | 50 | // If fullNames are equal, sort by year 51 | return a.year.localeCompare(b.year); 52 | }); 53 | } 54 | 55 | export function sortSelectOptions( 56 | options: SelectOption[] | null = [] 57 | ): SelectOption[] { 58 | if (!options) { 59 | return []; 60 | } 61 | 62 | return options.sort((a, b) => parseInt(a.label) - parseInt(b.label)); 63 | } 64 | -------------------------------------------------------------------------------- /frontend/src/utils/landing-page-selection.tsx: -------------------------------------------------------------------------------- 1 | import { DocumentType } from "~/types/document"; 2 | import type { SecDocument } from "~/types/document"; 3 | 4 | import type { SelectOption } from "~/types/selection"; 5 | import { filterByTickerAndType } from "./documents"; 6 | 7 | export const documentTypeOptions = [ 8 | { value: DocumentType.TenK, label: DocumentType.TenK }, 9 | { value: DocumentType.TenQ, label: DocumentType.TenQ }, 10 | ] as SelectOption[]; 11 | 12 | function documentToYearOption(document: SecDocument): SelectOption { 13 | if (document.quarter) { 14 | return { 15 | value: document.id, 16 | label: document.year + " Q" + document.quarter, 17 | }; 18 | } 19 | return { 20 | value: document.id, 21 | label: document.year, 22 | }; 23 | } 24 | 25 | export function getAvailableYears( 26 | ticker: string, 27 | type: DocumentType, 28 | documents: SecDocument[] 29 | ): SelectOption[] { 30 | const docs = filterByTickerAndType(ticker, type, documents); 31 | const yearOptions: SelectOption[] = docs.map(documentToYearOption); 32 | return yearOptions; 33 | } 34 | -------------------------------------------------------------------------------- /frontend/src/utils/timezone.tsx: -------------------------------------------------------------------------------- 1 | export const getDateWithUTCOffset = () => { 2 | const now = new Date(); 3 | const offsetInMilliseconds = now.getTimezoneOffset() * 60 * 1000; 4 | const utcDate = new Date(now.getTime() + offsetInMilliseconds); 5 | return utcDate; 6 | }; 7 | 8 | export const formatDisplayDate = (dateToDisplay: Date) => { 9 | // Create a regular expression to match the time portion up to the milliseconds. 10 | const regex = /(\d{2}:\d{2}:\d{2}\.\d{3})\d*/; 11 | 12 | // Extract the time portion up to the milliseconds. 13 | const matchedDateTimeString = String(dateToDisplay).replace(regex, "$1"); 14 | 15 | // Create a new Date object from the matched string. 16 | const datetime = new Date(matchedDateTimeString); 17 | 18 | // Convert it to the local time 19 | datetime.setMinutes(datetime.getMinutes() - datetime.getTimezoneOffset()); 20 | 21 | // Get user's timezone 22 | const userTimezone = Intl.DateTimeFormat().resolvedOptions().timeZone; 23 | 24 | // Create an options object for formatting the time. 25 | const options: Intl.DateTimeFormatOptions = { 26 | hour: "2-digit", 27 | minute: "2-digit", 28 | hour12: true, 29 | timeZone: userTimezone, // use the user's timezone 30 | }; 31 | 32 | // Convert the date to the desired format. 33 | const formattedTime = new Intl.DateTimeFormat("en-US", options).format( 34 | datetime 35 | ); 36 | return formattedTime; 37 | }; 38 | -------------------------------------------------------------------------------- /frontend/tailwind.config.ts: -------------------------------------------------------------------------------- 1 | import { type Config } from "tailwindcss"; 2 | 3 | export default { 4 | content: ["./src/**/*.{js,ts,jsx,tsx}"], 5 | 6 | theme: { 7 | extend: { 8 | fontFamily: { 9 | lora: ["Lora", "serif"], // The second font is a fallback. 10 | nunito: ["Nunito Sans", "sans-serif"], // The second font is a fallback. 11 | }, 12 | colors: { 13 | "gradient-start": "rgba(255, 255, 204, 0.2)", // Change this with your color. 14 | "gradient-end": "rgba(204, 153, 255, 0.2)", // Change this with your color. 15 | "gradient-start-light": "rgba(255, 255, 204, 0.1)", // Change this with your color. 16 | "gradient-end-light": "rgba(204, 153, 255, 0.1)", // Change this with your color. 17 | "gray-00": "#F9F9FA", 18 | "gray-15": "#E9E9ED", 19 | "gray-30": "#D2D2DC", 20 | "gray-60": "#9EA2B0", 21 | "gray-90": "#3F3F46", 22 | "gray-pdf": "#F7F7F7", 23 | "llama-purple-light": "#EDDDFC", 24 | "llama-purple": "#D09FF6", 25 | "llama-magenta-light": "#FBD7F9", 26 | "llama-magenta": "#F48FEF", 27 | "llama-red-light": "#FBDBD9", 28 | "llama-red": "#F49B95", 29 | "llama-orange-light": "#FAE9D3", 30 | "llama-orange": "#F1BA72", 31 | "llama-yellow-light": "#FDF6DD", 32 | "llama-yellow": "#F8EC78", 33 | "llama-lime-light": "#E5FAD2", 34 | "llama-lime": "#A1E66D", 35 | "llama-teal-light": "#D9FBEC", 36 | "llama-teal": "#66D8A7", 37 | "llama-cyan-light": "#DAFAFB", 38 | "llama-cyan": "#70E4EC", 39 | "llama-blue-light": "#EDF5FD", 40 | "llama-blue": "#87B6F3", 41 | "llama-indigo-light": "#EDECFD", 42 | "llama-indigo": "#817AF2", 43 | }, 44 | backgroundImage: (theme) => ({ 45 | gradient: "url('https://llama-app-frontend.vercel.app/Gradient.png')", 46 | }), 47 | backgroundSize: { 48 | "100%": "100%", 49 | }, 50 | backgroundPosition: { 51 | center: "center", 52 | }, 53 | backgroundRepeat: { 54 | "no-repeat": "no-repeat", 55 | }, 56 | }, 57 | }, 58 | plugins: [], 59 | } satisfies Config; 60 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "checkJs": true, 7 | "skipLibCheck": true, 8 | "strict": true, 9 | "forceConsistentCasingInFileNames": true, 10 | "noEmit": true, 11 | "esModuleInterop": true, 12 | "module": "esnext", 13 | "moduleResolution": "node", 14 | "resolveJsonModule": true, 15 | "isolatedModules": true, 16 | "jsx": "preserve", 17 | "incremental": true, 18 | "noUncheckedIndexedAccess": true, 19 | "baseUrl": ".", 20 | "paths": { 21 | "~/*": ["./src/*"] 22 | } 23 | }, 24 | "include": [ 25 | ".eslintrc.cjs", 26 | "next-env.d.ts", 27 | "**/*.ts", 28 | "**/*.tsx", 29 | "**/*.cjs", 30 | "**/*.mjs" 31 | ], 32 | "exclude": ["node_modules"] 33 | } 34 | -------------------------------------------------------------------------------- /render.yaml: -------------------------------------------------------------------------------- 1 | previewsEnabled: true 2 | databases: 3 | - name: llama-app-db 4 | databaseName: llama_app_db 5 | plan: pro 6 | previewPlan: starter 7 | 8 | services: 9 | # A Docker web service 10 | # Docs for Render blueprints: 11 | # https://render.com/docs/blueprint-spec 12 | - type: web 13 | name: llama-app-backend 14 | runtime: docker 15 | repo: https://github.com/run-llama/sec-insights.git 16 | region: oregon 17 | plan: standard 18 | rootDir: ./backend 19 | # https://render.com/docs/blueprint-spec#scaling 20 | scaling: 21 | minInstances: 2 22 | maxInstances: 10 23 | targetMemoryPercent: 75 # optional if targetCPUPercent is set (valid: 1-90) 24 | targetCPUPercent: 75 # optional if targetMemory is set (valid: 1-90) 25 | healthCheckPath: /api/health/ 26 | initialDeployHook: make seed_db_based_on_env 27 | envVars: 28 | - key: DATABASE_URL 29 | fromDatabase: 30 | name: llama-app-db 31 | property: connectionString 32 | - fromGroup: general-settings 33 | - fromGroup: prod-web-secrets 34 | - fromGroup: preview-web-secrets 35 | # A Docker cron service 36 | # Runs the seed_db job which should only be upserts and otherwise idempotent 37 | - type: cron 38 | name: llama-app-cron 39 | runtime: docker 40 | repo: https://github.com/run-llama/sec-insights.git 41 | region: oregon 42 | plan: standard 43 | rootDir: ./backend 44 | # set to the fake date of Feb 31st so it never runs. Meant to be manually triggered. 45 | schedule: "0 5 31 2 ?" 46 | dockerCommand: make seed_db_based_on_env 47 | envVars: 48 | - key: DATABASE_URL 49 | fromDatabase: 50 | name: llama-app-db 51 | property: connectionString 52 | - fromGroup: general-settings 53 | - fromGroup: prod-web-secrets 54 | - fromGroup: preview-web-secrets 55 | envVarGroups: 56 | - name: general-settings 57 | envVars: 58 | - key: IS_PREVIEW_ENV 59 | value: false 60 | previewValue: true 61 | - key: LOG_LEVEL 62 | value: INFO 63 | previewValue: DEBUG 64 | - key: BACKEND_CORS_ORIGINS 65 | value: '["http://localhost", "http://localhost:8000", "http://localhost:3000", "http://127.0.0.1:3000", "https://llama-app-backend.onrender.com", "https://llama-app-frontend.vercel.app", "http://secinsights.ai", "http://www.secinsights.ai", "https://secinsights.ai", "https://www.secinsights.ai"]' 66 | # S3_BUCKET_NAME is the bucket used for the StorageContext of the backend's LlamaIndex chat engine 67 | - key: S3_BUCKET_NAME 68 | value: llama-app-backend-prod 69 | previewValue: llama-app-backend-preview 70 | # S3_ASSET_BUCKET_NAME is the bucket used for app assets (e.g. document PDFs) 71 | - key: S3_ASSET_BUCKET_NAME 72 | value: llama-app-web-assets-prod 73 | previewValue: llama-app-web-assets-preview 74 | - key: CDN_BASE_URL 75 | value: https://d687lz8k56fia.cloudfront.net 76 | previewValue: https://dl94gqvzlh4k8.cloudfront.net 77 | - key: SENTRY_DSN 78 | sync: false 79 | - name: prod-web-secrets 80 | envVars: 81 | # Manually add a prod value for OPENAI_API_KEY in Render dashboard 82 | - key: OPENAI_API_KEY 83 | sync: false 84 | - key: AWS_KEY 85 | sync: false 86 | - key: AWS_SECRET 87 | sync: false 88 | - key: POLYGON_IO_API_KEY 89 | sync: false 90 | - name: preview-web-secrets 91 | envVars: 92 | # All env vars in this group should be prefixed with "PREVIEW_" 93 | # Manually add a preview value for PREVIEW_OPENAI_API_KEY in Render dashboard 94 | - key: PREVIEW_OPENAI_API_KEY 95 | sync: false 96 | - key: PREVIEW_AWS_KEY 97 | sync: false 98 | - key: PREVIEW_AWS_SECRET 99 | sync: false 100 | - key: PREVIEW_POLYGON_IO_API_KEY 101 | sync: false 102 | --------------------------------------------------------------------------------