├── .devcontainer
    ├── Dockerfile
    ├── README.md
    ├── devcontainer.json
    └── post_create_command.sh
├── FAQ.md
├── LICENSE
├── README.md
├── backend
    ├── .env.development
    ├── .env.docker
    ├── .flake8
    ├── .gitignore
    ├── .pylintrc
    ├── .python-version
    ├── .vscode
    │   └── settings.json
    ├── Dockerfile
    ├── Makefile
    ├── README.md
    ├── alembic.ini
    ├── alembic
    │   ├── README
    │   ├── env.py
    │   ├── script.py.mako
    │   └── versions
    │   │   ├── 1b0b616e08c6_replace_value_within_.py
    │   │   ├── 477cee72edc4_init_tables.py
    │   │   ├── 663b3fea3024_update_sub_process_columns.py
    │   │   ├── 873c0c4616ea_add_foreign_key_indices.py
    │   │   ├── 90a1d6a26343_create_doc_tables.py
    │   │   └── c008bb4f3f48_update_sub_process_columns.py
    ├── app
    │   ├── __init__.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── crud.py
    │   │   ├── deps.py
    │   │   └── endpoints
    │   │   │   ├── __init__.py
    │   │   │   ├── conversation.py
    │   │   │   ├── documents.py
    │   │   │   └── health.py
    │   ├── chat
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   ├── engine.py
    │   │   ├── messaging.py
    │   │   ├── pg_vector.py
    │   │   ├── qa_response_synth.py
    │   │   ├── tools.py
    │   │   └── utils.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   └── config.py
    │   ├── db
    │   │   ├── base.py
    │   │   ├── session.py
    │   │   └── wait_for_db.py
    │   ├── llama_index_settings.py
    │   ├── loader_io.py
    │   ├── main.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── db.py
    │   └── schema.py
    ├── docker-compose.yml
    ├── localstack-cors-config.json
    ├── poetry.lock
    ├── pyproject.toml
    ├── scripts
    │   ├── build_vector_tables.py
    │   ├── chat_llama.py
    │   ├── dedupe_vector_store.py
    │   ├── download_sec_pdf.py
    │   ├── file_utils.py
    │   ├── seed_db.py
    │   ├── seed_storage_context.py
    │   ├── stock_utils.py
    │   ├── upsert_db_sec_documents.py
    │   └── upsert_document.py
    ├── tests
    │   └── app
    │   │   └── chat
    │   │       └── test_engine.py
    └── troubleshooting.md
├── frontend
    ├── .devcontainer
    │   └── devcontainer.json
    ├── .env.example
    ├── .eslintrc.cjs
    ├── .gitignore
    ├── README.md
    ├── next.config.mjs
    ├── package-lock.json
    ├── package.json
    ├── postcss.config.cjs
    ├── prettier.config.cjs
    ├── public
    │   ├── Gradient.png
    │   ├── chat-1.svg
    │   ├── chat-2.svg
    │   ├── chat-3.svg
    │   ├── chat-4.svg
    │   ├── chat-5.svg
    │   ├── citations.svg
    │   ├── doc-selector-no-highlight.svg
    │   ├── docs-with-highlight.svg
    │   ├── favicon.ico
    │   ├── full-chat.png
    │   ├── full-chat.svg
    │   ├── highlight-1.svg
    │   ├── highlight-2.svg
    │   ├── highlight-3.svg
    │   ├── highlight-4.svg
    │   ├── logo-black.svg
    │   ├── logo-white.svg
    │   ├── lyft-2021-10k.pdf
    │   ├── multi-doc-1.svg
    │   ├── multi-doc-2.svg
    │   ├── multi-doc-3.svg
    │   ├── multi-doc-4.svg
    │   └── uber-2021-10k.pdf
    ├── sentry.client.config.ts
    ├── sentry.edge.config.ts
    ├── sentry.server.config.ts
    ├── src
    │   ├── api
    │   │   ├── backend.tsx
    │   │   └── utils
    │   │   │   └── documents.tsx
    │   ├── components
    │   │   ├── Layout.tsx
    │   │   ├── basics
    │   │   │   ├── Loading.tsx
    │   │   │   ├── Modal.tsx
    │   │   │   └── ModalPortal.tsx
    │   │   ├── conversations
    │   │   │   └── RenderConversations.tsx
    │   │   ├── landing-page
    │   │   │   ├── AnimateSvg.tsx
    │   │   │   ├── MarketingSection.tsx
    │   │   │   ├── SelectTicker.tsx
    │   │   │   └── TitleAndDropdown.tsx
    │   │   ├── modals
    │   │   │   └── ShareLinkModal.tsx
    │   │   └── pdf-viewer
    │   │   │   ├── DisplayMultiplePdfs.tsx
    │   │   │   ├── PdfOptionsBar.tsx
    │   │   │   ├── ViewPdf.tsx
    │   │   │   ├── VirtualizedPdf.tsx
    │   │   │   └── pdfDisplayConstants.tsx
    │   ├── config.js
    │   ├── constants.tsx
    │   ├── context
    │   │   └── pdf.tsx
    │   ├── env.mjs
    │   ├── hooks
    │   │   ├── useDocumentSelector.tsx
    │   │   ├── useMessages.tsx
    │   │   ├── useMultiplePdfs.tsx
    │   │   ├── usePdfViewer.tsx
    │   │   └── utils
    │   │   │   ├── useFocus.tsx
    │   │   │   ├── useIsMobile.tsx
    │   │   │   ├── useLocalStorage.ts
    │   │   │   ├── useModal.tsx
    │   │   │   └── useScrollBreakpoint.tsx
    │   ├── modules
    │   │   └── react-pdf.d.ts
    │   ├── pages
    │   │   ├── _app.tsx
    │   │   ├── _document.tsx
    │   │   ├── _error.tsx
    │   │   ├── conversation
    │   │   │   └── [id].tsx
    │   │   └── index.tsx
    │   ├── styles
    │   │   ├── globals.css
    │   │   └── react-select.tsx
    │   ├── svgs
    │   │   ├── llama.tsx
    │   │   └── right-arrow.tsx
    │   ├── types
    │   │   ├── backend
    │   │   │   └── document.tsx
    │   │   ├── conversation.tsx
    │   │   ├── document.tsx
    │   │   └── selection.tsx
    │   └── utils
    │   │   ├── colors.tsx
    │   │   ├── documents.tsx
    │   │   ├── landing-page-selection.tsx
    │   │   ├── multi-line-highlight.tsx
    │   │   └── timezone.tsx
    ├── tailwind.config.ts
    └── tsconfig.json
└── render.yaml


/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | # https://hub.docker.com/_/python
 2 | FROM python:3.11.3-slim-bullseye
 3 | 
 4 | ENV PYTHONUNBUFFERED True
 5 | # Install other backend deps
 6 | RUN apt-get update
 7 | RUN apt-get install libpq-dev gcc build-essential wkhtmltopdf s3fs -y
 8 | RUN pip install poetry==1.6.1
 9 | # Install frontend node modules
10 | ENV APP_HOME /app
11 | COPY . $APP_HOME
12 | 
13 | CMD ["/bin/bash"]
14 | 


--------------------------------------------------------------------------------
/.devcontainer/README.md:
--------------------------------------------------------------------------------
 1 | # SEC Insights Dev Container
 2 | 
 3 | This dev container configuration sets up a development environment that is specifically configured for this project.
 4 | 
 5 | This is useful in getting the project setup faster by having many of the system dependencies already pre-installed.
 6 | 
 7 | ## How do I use this?
 8 | 
 9 | You can either click this button to open the dev container on a Github Codespace:
10 | 
11 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/run-llama/sec-insights)
12 | 
13 | Or you can spin up the dev container locally using [VS Code's dev container feature](https://code.visualstudio.com/docs/devcontainers/create-dev-container#_create-a-devcontainerjson-file).
14 | 
15 | ## What are the benefits of using this?
16 | * System level dependencies are pre-installed
17 |   * Project-specific python version
18 |   * Other dependencies like `wkhtmltopdf` & `s3fs` are pre-installed
19 |   * Uses the same base Docker image as what's used for the production service
20 |     * So higher fidelity between your dev environment and prod environment.
21 | 
22 | ## Are there any downsides to using this?
23 | One downside is that when you're using the dev container via Github Codespaces, that service isn't entirely free. There's a free tier limit after which Github Codespace usage is paid.
24 | Also, if you're running the dev container locally via the VS Code dev container feature, you may find that Docker can take up quite a bit of storage space on your machine. Make sure you have the necessary storage space.
25 | 
26 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "sec_insights",
 3 |   "build": {
 4 |     "dockerfile": "./Dockerfile",
 5 |     "context": ".."
 6 |   },
 7 |   "features": {
 8 |     "ghcr.io/devcontainers-contrib/features/pipx-package:1": {},
 9 |     "ghcr.io/devcontainers-contrib/features/poetry:2": {},
10 |     "ghcr.io/warrenbuckley/codespace-features/sqlite:1": {},
11 |     "ghcr.io/devcontainers/features/docker-in-docker:2": {},
12 |     "ghcr.io/devcontainers/features/aws-cli:1": {},
13 |     "ghcr.io/devcontainers/features/node:1": {}
14 |   },
15 |   "postCreateCommand": "bash .devcontainer/post_create_command.sh"
16 | }


--------------------------------------------------------------------------------
/.devcontainer/post_create_command.sh:
--------------------------------------------------------------------------------
 1 | cd backend/
 2 | 
 3 | # install poetry dependencies
 4 | poetry install
 5 | 
 6 | cp .env.development .env
 7 | set -a
 8 | source .env
 9 | make migrate
10 | 


--------------------------------------------------------------------------------
/FAQ.md:
--------------------------------------------------------------------------------
 1 | # Frequently Asked Questions 🔍
 2 | 
 3 | Here we will go over a list of commonly asked questions and/or concerns regarding this project. You may run into some of these questions yourself when reviewing the project!
 4 | 
 5 | ## How do I add more SEC documents beyond the selected pool of SEC filings?
 6 | You can do this by using our [seed script](https://github.com/run-llama/sec-insights/tree/main/backend#seed-db-script-)!
 7 | 
 8 | You can run the seed script with the `--ciks` CLI arg *(e.g. `python scripts/seed_db.py --ciks '["1640147"]'`)*. The `ciks` arg allows you to define which companies you want to download SEC filings for. You can search for the CIK value for a given company using the SECs search tool on [this website](https://www.sec.gov/edgar/searchedgar/companysearch).
 9 | 
10 | Alternatively, you may also just add the CIKs you want to include in your project by modifying the `DEFAULT_CIKS` list [here](https://github.com/run-llama/sec-insights/blob/main/backend/scripts/download_sec_pdf.py#L12).
11 | 
12 | Just make sure you follow the setup instructions as a pre-requisite to running the seed script :)
13 | 
14 | ## How do I use different types of documents besides SEC filings? e.g. Research papers, internal documents, etc.
15 | This can be done!
16 | 
17 | While our frontend is fairly specific to the SEC filing use-case, our backend is setup to be very flexible in terms of the types of documents you can ingest and start asking questions about.
18 | 
19 | An in-depth walkthrough on doing this can be found in [our YouTube tutorial](https://youtu.be/2O52Tfj79T4?si=kiRxB2dLES0Gaad7&t=1311).
20 | 
21 | Here are some high level steps:
22 | 1. Insert the PDF document into your database by using the script in `scripts/upsert_document.py`
23 |    * The script will print out the newly inserted document's UUID. Make sure to copy this to your clipboard for later!
24 | 1. Start the backend service locally using `make run`
25 | 1. Start the shell-based Chat REPL using `make chat`
26 | 1. Within the REPL:
27 |    1. First, run `pick_docs`
28 |    1. Then run `select_id <paste the document UUID you copied earlier>` e.g. `select_id 421b8099-6155-2f6e-8c5b-674ee0ab0e7d`
29 |    1. Type `finish` to wrap up document selection
30 |    1. Create your conversation by typing `create`
31 |    1. Send a message within the newly created conversation with `message <your message here>` e.g. `message What is the document about?`
32 |       * The first time that there is a message for a newly inserted document, the backend will need to go through the embedding + indexing process for that document which can take some time.
33 |    1. Start chatting away! The platform should now be ready for questions regarding this document within this Chat REPL.
34 | 
35 | You will also find that some of the prompts used in the application are specific to the SEC Insights use case. These will need to be changed to fit your particular use case. Here's an initial list of places in the codebase that may need to be changed to tune the prompts to your use case:
36 | * [Custom Response Synth prompt](https://github.com/run-llama/sec-insights/blob/e81c839/backend/app/chat/qa_response_synth.py#L15-L48)
37 | * [Vector Index tool descriptions](https://github.com/run-llama/sec-insights/blob/e81c83958a428e2aa02e8cb1280c3a17c55c4aa9/backend/app/chat/engine.py#L295-L296)
38 | * System Message ([template](https://github.com/run-llama/sec-insights/blob/e81c83958a428e2aa02e8cb1280c3a17c55c4aa9/backend/app/chat/constants.py#L3-L17) and [construction](https://github.com/run-llama/sec-insights/blob/e81c83958a428e2aa02e8cb1280c3a17c55c4aa9/backend/app/chat/engine.py#L336))
39 | * [User Message Prefix](https://github.com/run-llama/sec-insights/blob/e81c83958a428e2aa02e8cb1280c3a17c55c4aa9/backend/app/chat/messaging.py#L143-L145)
40 | 
41 | ## How do I completely refresh my database?
42 | During development, you may find it useful or necessary to completely wipe out your database and start fresh with empty tables.
43 | 
44 | To make this process simple, we have included a `make refresh_db` command in `backend/Makefile`. To use it, just do the following:
45 | - `cd` into the `backend/` folder if you're not already in it
46 | - Run `set -a` then `source .env`
47 |    - See instructions in `README.md` for more information on what this step does
48 | - Run `make refresh_db`
49 |    - This will ask for confirmation first and run as soon as you type either `y` or `N`.
50 | 
51 | **What is this script doing?**
52 | 
53 | When you run the database in the `db` container using `docker compose` and the various `make` commands, the container shares a data volume with your local machine. This ensures that the data in this local database is persisted even as the `db` container is started and stopped. As such, to completely refresh this database, you would first need to stop your DB container, delete these volumes, re-create the DB container, and re-apply the alembic migrations. That's what `make refresh_db` does.
54 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 LlamaIndex
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/backend/.env.development:
--------------------------------------------------------------------------------
 1 | DATABASE_URL=postgresql://user:password@127.0.0.1:5432/llama_app_db
 2 | BACKEND_CORS_ORIGINS='["http://localhost", "http://localhost:8000", "http://localhost:3000", "http://127.0.0.1:3000", "https://llama-app-backend.onrender.com", "https://llama-app-frontend.vercel.app", "http://secinsights.ai", "http://www.secinsights.ai", "https://secinsights.ai", "https://www.secinsights.ai"]'
 3 | OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXXXXXX
 4 | LOG_LEVEL=debug
 5 | RENDER=False
 6 | S3_BUCKET_NAME=llama-app-backend-local
 7 | S3_ASSET_BUCKET_NAME=llama-app-web-assets-local
 8 | CDN_BASE_URL=http://llama-app-web-assets-local.s3-website.localhost.localstack.cloud:4566
 9 | AWS_KEY=xxx
10 | AWS_SECRET=xxx
11 | POLYGON_IO_API_KEY=xxx
12 | SEC_EDGAR_COMPANY_NAME=YourOrgName
13 | SEC_EDGAR_EMAIL=you@example.com
14 | 


--------------------------------------------------------------------------------
/backend/.env.docker:
--------------------------------------------------------------------------------
1 | DATABASE_URL=postgresql://user:password@db:5432/llama_app_db
2 | 


--------------------------------------------------------------------------------
/backend/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | exclude = .git,__pycache__,__init__.py,.mypy_cache,.pytest_cache
4 | 


--------------------------------------------------------------------------------
/backend/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | storage/
163 | # created by localstack
164 | volume/
165 | 


--------------------------------------------------------------------------------
/backend/.python-version:
--------------------------------------------------------------------------------
1 | 3.11.3
2 | 


--------------------------------------------------------------------------------
/backend/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.formatting.provider": "black",
3 |     "editor.formatOnSave": true,
4 |     "editor.codeActionsOnSave": {
5 |         "source.organizeImports": true,
6 |         "source.fixAll": true
7 |     },
8 | }


--------------------------------------------------------------------------------
/backend/Dockerfile:
--------------------------------------------------------------------------------
 1 | # https://hub.docker.com/_/python
 2 | FROM python:3.11.3-slim-bullseye
 3 | 
 4 | ENV PYTHONUNBUFFERED True
 5 | ENV APP_HOME /app
 6 | WORKDIR $APP_HOME
 7 | RUN pip install poetry
 8 | COPY . ./
 9 | RUN apt-get update
10 | RUN apt-get install libpq-dev gcc build-essential wkhtmltopdf  -y
11 | RUN poetry install
12 | 
13 | ARG DATABASE_URL
14 | ENV DATABASE_URL=$DATABASE_URL
15 | 
16 | ARG OPENAI_API_KEY
17 | ENV OPENAI_API_KEY=$OPENAI_API_KEY
18 | 
19 | CMD ["poetry", "run", "start"]
20 | 


--------------------------------------------------------------------------------
/backend/Makefile:
--------------------------------------------------------------------------------
 1 | run:
 2 | 	echo "Running in local mode."
 3 | 	docker compose create db localstack phoenix
 4 | 	docker compose start db localstack phoenix
 5 | 	poetry run start
 6 | 
 7 | run_docker:
 8 | 	echo "Running in local mode with docker."
 9 | 	docker compose up
10 | 
11 | migrate:
12 | 	echo "Running migrations."
13 | 	docker compose create db
14 | 	docker compose start db
15 | 	poetry run python -m alembic upgrade head
16 | 	# workaround for having PGVector create its tables
17 | 	poetry run python -m scripts.build_vector_tables
18 | 
19 | refresh_db:
20 | 	# First ask for confirmation.
21 | 	@echo -n "Are you sure you want to refresh the local database? This will delete all data in your local db. [Y/n] "; \
22 | 	read ans; \
23 | 	if [ $${ans:-'N'} = 'Y' ]; then make confirmed_refresh_db; else echo "Aborting."; fi
24 | 
25 | confirmed_refresh_db:
26 | 	echo "Refreshing database."
27 | 	docker compose down db
28 | 	docker volume rm backend_postgres_data
29 | 	make migrate
30 | 
31 | test:
32 | 	poetry run python -m pytest tests/
33 | 
34 | chat:
35 | 	poetry run python -m scripts.chat_llama
36 | 
37 | setup_localstack:
38 | 	docker compose create localstack
39 | 	docker compose start localstack
40 | 	echo "Waiting for localstack to start..."
41 | 	# Ping http://localhost:4566/health until we get a 200 response
42 | 	until $$(curl --output /dev/null --silent --head --fail http://localhost:4566/_localstack/health); do \
43 | 		printf '.'; \
44 | 		sleep 0.5; \
45 | 	done
46 | 	# Check that S3_ASSET_BUCKET_NAME is set
47 | 	if [ -z ${S3_ASSET_BUCKET_NAME} ]; then \
48 | 		echo "S3_ASSET_BUCKET_NAME is not set. Please set it and try again."; \
49 | 		exit 1; \
50 | 	fi
51 | 	awslocal s3 mb s3://${S3_ASSET_BUCKET_NAME}
52 | 	echo "<html>LocalStack S3 bucket website is alive</html>" > /tmp/index.html
53 | 	awslocal s3 cp /tmp/index.html s3://${S3_ASSET_BUCKET_NAME}/index.html
54 | 	rm /tmp/index.html
55 | 	awslocal s3 website s3://${S3_ASSET_BUCKET_NAME}/ --index-document index.html
56 | 	awslocal s3api put-bucket-cors --bucket ${S3_ASSET_BUCKET_NAME} --cors-configuration file://./localstack-cors-config.json
57 | 	echo "LocalStack S3 bucket website is ready. Open http://${S3_ASSET_BUCKET_NAME}.s3-website.localhost.localstack.cloud:4566 in your browser to verify."
58 | 
59 | seed_db_based_on_env:
60 | 	# Call either seed_db or seed_db_preview, seed_db_local based on the environment
61 | 	# This is used by the CI/CD pipeline
62 | 	ENVIRONMENT=$$(poetry run python -c "from app.core.config import settings;print(settings.ENVIRONMENT.value)"); \
63 | 	echo "Environment: $$ENVIRONMENT"; \
64 | 	if [ "$$ENVIRONMENT" = "preview" ]; then \
65 | 		make seed_db_preview; \
66 | 	elif [ "$$ENVIRONMENT" = "production" ]; then \
67 | 		make seed_db; \
68 | 	else \
69 | 		make seed_db_local; \
70 | 	fi
71 | 
72 | seed_db:
73 | 	echo "Seeding database."
74 | 	poetry run python scripts/seed_db.py
75 | 
76 | seed_db_preview:
77 | 	echo "Seeding database for Preview."
78 | 	# only need to populate with two companies for Preview
79 | 	poetry run python scripts/seed_db.py  --ciks '["0001018724", "1326801"]'
80 | 
81 | seed_db_local:
82 | 	echo "Seeding database for local."
83 | 	docker compose create db
84 | 	docker compose start db
85 | 	make setup_localstack
86 | 	python scripts/seed_db.py --ciks '["0001018724", "1326801"]'  --filing_types '["10-K"]'
87 | 


--------------------------------------------------------------------------------
/backend/alembic.ini:
--------------------------------------------------------------------------------
  1 | # A generic, single database configuration.
  2 | 
  3 | [alembic]
  4 | # path to migration scripts
  5 | script_location = alembic
  6 | 
  7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
  8 | # Uncomment the line below if you want the files to be prepended with date and time
  9 | # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
 10 | 
 11 | # sys.path path, will be prepended to sys.path if present.
 12 | # defaults to the current working directory.
 13 | prepend_sys_path = .
 14 | 
 15 | # timezone to use when rendering the date within the migration file
 16 | # as well as the filename.
 17 | # If specified, requires the python-dateutil library that can be
 18 | # installed by adding `alembic[tz]` to the pip requirements
 19 | # string value is passed to dateutil.tz.gettz()
 20 | # leave blank for localtime
 21 | # timezone =
 22 | 
 23 | # max length of characters to apply to the
 24 | # "slug" field
 25 | # truncate_slug_length = 40
 26 | 
 27 | # set to 'true' to run the environment during
 28 | # the 'revision' command, regardless of autogenerate
 29 | # revision_environment = false
 30 | 
 31 | # set to 'true' to allow .pyc and .pyo files without
 32 | # a source .py file to be detected as revisions in the
 33 | # versions/ directory
 34 | # sourceless = false
 35 | 
 36 | # version location specification; This defaults
 37 | # to alembic/versions.  When using multiple version
 38 | # directories, initial revisions must be specified with --version-path.
 39 | # The path separator used here should be the separator specified by "version_path_separator" below.
 40 | # version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
 41 | 
 42 | # version path separator; As mentioned above, this is the character used to split
 43 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
 44 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
 45 | # Valid values for version_path_separator are:
 46 | #
 47 | # version_path_separator = :
 48 | # version_path_separator = ;
 49 | # version_path_separator = space
 50 | version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
 51 | 
 52 | # set to 'true' to search source files recursively
 53 | # in each "version_locations" directory
 54 | # new in Alembic version 1.10
 55 | # recursive_version_locations = false
 56 | 
 57 | # the output encoding used when revision files
 58 | # are written from script.py.mako
 59 | # output_encoding = utf-8
 60 | 
 61 | sqlalchemy.url = postgresql://user:password@127.0.0.1:5432/llama_app_db
 62 | 
 63 | 
 64 | [post_write_hooks]
 65 | # post_write_hooks defines scripts or Python functions that are run
 66 | # on newly generated revision scripts.  See the documentation for further
 67 | # detail and examples
 68 | 
 69 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
 70 | # hooks = black
 71 | # black.type = console_scripts
 72 | # black.entrypoint = black
 73 | # black.options = -l 79 REVISION_SCRIPT_FILENAME
 74 | 
 75 | # Logging configuration
 76 | [loggers]
 77 | keys = root,sqlalchemy,alembic
 78 | 
 79 | [handlers]
 80 | keys = console
 81 | 
 82 | [formatters]
 83 | keys = generic
 84 | 
 85 | [logger_root]
 86 | level = WARN
 87 | handlers = console
 88 | qualname =
 89 | 
 90 | [logger_sqlalchemy]
 91 | level = WARN
 92 | handlers =
 93 | qualname = sqlalchemy.engine
 94 | 
 95 | [logger_alembic]
 96 | level = INFO
 97 | handlers =
 98 | qualname = alembic
 99 | 
100 | [handler_console]
101 | class = StreamHandler
102 | args = (sys.stderr,)
103 | level = NOTSET
104 | formatter = generic
105 | 
106 | [formatter_generic]
107 | format = %(levelname)-5.5s [%(name)s] %(message)s
108 | datefmt = %H:%M:%S
109 | 


--------------------------------------------------------------------------------
/backend/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration with an async dbapi.


--------------------------------------------------------------------------------
/backend/alembic/env.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from logging.config import fileConfig
 3 | 
 4 | from sqlalchemy import pool
 5 | from sqlalchemy.engine import Connection
 6 | from sqlalchemy.ext.asyncio import async_engine_from_config
 7 | 
 8 | from alembic import context
 9 | from app.core.config import settings
10 | 
11 | # this is the Alembic Config object, which provides
12 | # access to the values within the .ini file in use.
13 | config = context.config
14 | 
15 | # Interpret the config file for Python logging.
16 | # This line sets up loggers basically.
17 | if config.config_file_name is not None:
18 |     fileConfig(config.config_file_name)
19 | 
20 | # add your model's MetaData object here
21 | # for 'autogenerate' support
22 | from app.db.base import Base  # noqa: E402
23 | 
24 | target_metadata = Base.metadata
25 | 
26 | # other values from the config, defined by the needs of env.py,
27 | # can be acquired:
28 | # my_important_option = config.get_main_option("my_important_option")
29 | # ... etc.
30 | db_url = config.get_main_option("sqlalchemy.url")
31 | if settings.DATABASE_URL.strip():
32 |     db_url = settings.DATABASE_URL.strip()
33 |     print(f"Using DATABASE_URL {db_url} from environment for migrations")
34 | config.set_main_option("sqlalchemy.url", db_url)
35 | 
36 | 
37 | def run_migrations_offline() -> None:
38 |     """Run migrations in 'offline' mode.
39 | 
40 |     This configures the context with just a URL
41 |     and not an Engine, though an Engine is acceptable
42 |     here as well.  By skipping the Engine creation
43 |     we don't even need a DBAPI to be available.
44 | 
45 |     Calls to context.execute() here emit the given string to the
46 |     script output.
47 | 
48 |     """
49 |     context.configure(
50 |         url=config.get_main_option("sqlalchemy.url"),
51 |         target_metadata=target_metadata,
52 |         literal_binds=True,
53 |         dialect_opts={"paramstyle": "named"},
54 |         transaction_per_migration=True,
55 |     )
56 | 
57 |     with context.begin_transaction():
58 |         context.run_migrations()
59 | 
60 | 
61 | def do_run_migrations(connection: Connection) -> None:
62 |     context.configure(
63 |         connection=connection,
64 |         target_metadata=target_metadata,
65 |         transaction_per_migration=True,
66 |     )
67 | 
68 |     with context.begin_transaction():
69 |         context.run_migrations()
70 | 
71 | 
72 | async def run_async_migrations() -> None:
73 |     """In this scenario we need to create an Engine
74 |     and associate a connection with the context.
75 | 
76 |     """
77 |     connectable = async_engine_from_config(
78 |         config.get_section(config.config_ini_section, {}),
79 |         prefix="sqlalchemy.",
80 |         poolclass=pool.NullPool,
81 |     )
82 | 
83 |     async with connectable.connect() as connection:
84 |         await connection.run_sync(do_run_migrations)
85 | 
86 |     await connectable.dispose()
87 | 
88 | 
89 | def run_migrations_online() -> None:
90 |     """Run migrations in 'online' mode."""
91 | 
92 |     asyncio.run(run_async_migrations())
93 | 
94 | 
95 | if context.is_offline_mode():
96 |     run_migrations_offline()
97 | else:
98 |     run_migrations_online()
99 | 


--------------------------------------------------------------------------------
/backend/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 | 
18 | 
19 | def upgrade() -> None:
20 |     ${upgrades if upgrades else "pass"}
21 | 
22 | 
23 | def downgrade() -> None:
24 |     ${downgrades if downgrades else "pass"}
25 | 


--------------------------------------------------------------------------------
/backend/alembic/versions/1b0b616e08c6_replace_value_within_.py:
--------------------------------------------------------------------------------
 1 | """replace value within MessageSubProcessSourceEnum
 2 | 
 3 | Revision ID: 1b0b616e08c6
 4 | Revises: 90a1d6a26343
 5 | Create Date: 2023-07-28 19:39:03.256581
 6 | 
 7 | """
 8 | from typing import Set
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | from sqlalchemy.dialects import postgresql
12 | 
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision = "1b0b616e08c6"
16 | down_revision = "90a1d6a26343"
17 | branch_labels = None
18 | depends_on = None
19 | 
20 | 
21 | existing_sub_process_source_enum_values = {
22 |     "CHUNKING",
23 |     "NODE_PARSING",
24 |     "EMBEDDING",
25 |     "LLM",
26 |     "QUERY",
27 |     "RETRIEVE",
28 |     "SYNTHESIZE",
29 |     "TREE",
30 |     "CONSTRUCTED_QUERY_ENGINE",
31 |     "SUB_QUESTIONS",
32 | }
33 | 
34 | new_sub_process_source_enum_values = {
35 |     *existing_sub_process_source_enum_values,
36 |     "SUB_QUESTION",
37 | }
38 | 
39 | 
40 | def replace_enum_values(enum_name: str, table: str, new_values: Set[str]):
41 |     """
42 |     Create a new type, add the value to it, update the column to use the new type and delete the old type
43 |     """
44 |     op.execute(f'ALTER TYPE public."{enum_name}" RENAME TO "{enum_name}Old"')
45 |     sa.Enum(*new_values, name=enum_name).create(op.get_bind())
46 |     op.execute(
47 |         f'ALTER TABLE {table} ALTER COLUMN source TYPE public."{enum_name}" USING source::text::public."{enum_name}"'
48 |     )
49 |     op.execute(f'DROP TYPE public."{enum_name}Old"')
50 | 
51 | 
52 | def upgrade() -> None:
53 |     # Alter MessageSubProcessEnum to add "SUB_QUESTION" as a valid value
54 |     replace_enum_values(
55 |         "MessageSubProcessSourceEnum",
56 |         "messagesubprocess",
57 |         new_sub_process_source_enum_values,
58 |     )
59 | 
60 |     # ### end Alembic commands ###
61 | 
62 | 
63 | def downgrade() -> None:
64 |     # ### commands auto generated by Alembic - please adjust! ###
65 |     # revert back to the old enum type
66 |     # Note that this won't work if the DB already has rows with the new enum values
67 |     replace_enum_values(
68 |         "MessageSubProcessSourceEnum",
69 |         "messagesubprocess",
70 |         existing_sub_process_source_enum_values,
71 |     )
72 |     # ### end Alembic commands ###
73 | 


--------------------------------------------------------------------------------
/backend/alembic/versions/477cee72edc4_init_tables.py:
--------------------------------------------------------------------------------
  1 | """init tables
  2 | 
  3 | Revision ID: 477cee72edc4
  4 | Revises: 
  5 | Create Date: 2023-06-15 20:55:49.318398
  6 | 
  7 | """
  8 | from alembic import op
  9 | import sqlalchemy as sa
 10 | from sqlalchemy.dialects import postgresql
 11 | 
 12 | # revision identifiers, used by Alembic.
 13 | revision = "477cee72edc4"
 14 | down_revision = None
 15 | branch_labels = None
 16 | depends_on = None
 17 | 
 18 | 
 19 | def upgrade() -> None:
 20 |     # ### commands auto generated by Alembic - please adjust! ###
 21 |     op.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";')
 22 |     op.create_table(
 23 |         "conversation",
 24 |         sa.Column("id", sa.UUID(), nullable=False),
 25 |         sa.Column(
 26 |             "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
 27 |         ),
 28 |         sa.Column(
 29 |             "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
 30 |         ),
 31 |         sa.PrimaryKeyConstraint("id"),
 32 |     )
 33 |     op.create_index(op.f("ix_conversation_id"), "conversation", ["id"], unique=False)
 34 |     op.create_table(
 35 |         "message",
 36 |         sa.Column("conversation_id", sa.UUID(), nullable=True),
 37 |         sa.Column("content", sa.String(), nullable=True),
 38 |         sa.Column(
 39 |             "role",
 40 |             postgresql.ENUM("user", "assistant", name="MessageRoleEnum"),
 41 |             nullable=True,
 42 |         ),
 43 |         sa.Column(
 44 |             "status",
 45 |             postgresql.ENUM("PENDING", "SUCCESS", "ERROR", name="MessageStatusEnum"),
 46 |             nullable=True,
 47 |         ),
 48 |         sa.Column("id", sa.UUID(), nullable=False),
 49 |         sa.Column(
 50 |             "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
 51 |         ),
 52 |         sa.Column(
 53 |             "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
 54 |         ),
 55 |         sa.ForeignKeyConstraint(
 56 |             ["conversation_id"],
 57 |             ["conversation.id"],
 58 |         ),
 59 |         sa.PrimaryKeyConstraint("id"),
 60 |     )
 61 |     op.create_index(op.f("ix_message_id"), "message", ["id"], unique=False)
 62 |     op.create_table(
 63 |         "messagesubprocess",
 64 |         sa.Column("message_id", sa.UUID(), nullable=True),
 65 |         sa.Column("content", sa.String(), nullable=True),
 66 |         sa.Column(
 67 |             "source",
 68 |             postgresql.ENUM(
 69 |                 "CHUNKING",
 70 |                 "NODE_PARSING",
 71 |                 "EMBEDDING",
 72 |                 "LLM",
 73 |                 "QUERY",
 74 |                 "RETRIEVE",
 75 |                 "SYNTHESIZE",
 76 |                 "TREE",
 77 |                 "CONSTRUCTED_QUERY_ENGINE",
 78 |                 name="MessageSubProcessSourceEnum",
 79 |             ),
 80 |             nullable=True,
 81 |         ),
 82 |         sa.Column("id", sa.UUID(), nullable=False),
 83 |         sa.Column(
 84 |             "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
 85 |         ),
 86 |         sa.Column(
 87 |             "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
 88 |         ),
 89 |         sa.ForeignKeyConstraint(
 90 |             ["message_id"],
 91 |             ["message.id"],
 92 |         ),
 93 |         sa.PrimaryKeyConstraint("id"),
 94 |     )
 95 |     op.create_index(
 96 |         op.f("ix_messagesubprocess_id"), "messagesubprocess", ["id"], unique=False
 97 |     )
 98 |     # ### end Alembic commands ###
 99 | 
100 | 
101 | def downgrade() -> None:
102 |     # ### commands auto generated by Alembic - please adjust! ###
103 |     op.drop_index(op.f("ix_messagesubprocess_id"), table_name="messagesubprocess")
104 |     op.drop_table("messagesubprocess")
105 |     op.drop_index(op.f("ix_message_id"), table_name="message")
106 |     op.drop_table("message")
107 |     op.drop_index(op.f("ix_conversation_id"), table_name="conversation")
108 |     op.drop_table("conversation")
109 |     # remove enum types
110 |     op.execute('DROP TYPE "MessageRoleEnum"')
111 |     op.execute('DROP TYPE "MessageStatusEnum"')
112 |     op.execute('DROP TYPE "MessageSubProcessSourceEnum"')
113 |     # ### end Alembic commands ###
114 | 


--------------------------------------------------------------------------------
/backend/alembic/versions/663b3fea3024_update_sub_process_columns.py:
--------------------------------------------------------------------------------
 1 | """update_sub_process_columns
 2 | 
 3 | Revision ID: 663b3fea3024
 4 | Revises: 873c0c4616ea
 5 | Create Date: 2023-10-30 17:23:51.517821
 6 | 
 7 | """
 8 | from typing import Set
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | from sqlalchemy.dialects import postgresql
12 | 
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision = '663b3fea3024'
16 | down_revision = '873c0c4616ea'
17 | branch_labels = None
18 | depends_on = None
19 | 
20 | 
21 | existing_sub_process_source_enum_values = {
22 |     "CHUNKING",
23 |     "NODE_PARSING",
24 |     "EMBEDDING",
25 |     "LLM",
26 |     "QUERY",
27 |     "RETRIEVE",
28 |     "SYNTHESIZE",
29 |     "TREE",
30 |     "CONSTRUCTED_QUERY_ENGINE",
31 |     "SUB_QUESTIONS",
32 |     "SUB_QUESTION",
33 | }
34 | 
35 | new_sub_process_source_enum_values = {
36 |     *existing_sub_process_source_enum_values,
37 |     "AGENT_STEP",
38 |     "SUB_QUESTION",
39 |     "TEMPLATING",
40 |     "FUNCTION_CALL",
41 |     "RERANKING",
42 |     "EXCEPTION",
43 |     "AGENT_STEP"
44 | }
45 | 
46 | 
47 | def replace_enum_values(enum_name: str, table: str, new_values: Set[str]):
48 |     """
49 |     Create a new type, add the value to it, update the column to use the new type and delete the old type
50 |     """
51 |     op.execute(f'ALTER TYPE public."{enum_name}" RENAME TO "{enum_name}Old"')
52 |     sa.Enum(*new_values, name=enum_name).create(op.get_bind())
53 |     op.execute(
54 |         f'ALTER TABLE {table} ALTER COLUMN source TYPE public."{enum_name}" USING source::text::public."{enum_name}"'
55 |     )
56 |     op.execute(f'DROP TYPE public."{enum_name}Old"')
57 | 
58 | 
59 | def upgrade() -> None:
60 |     # ### commands auto generated by Alembic - please adjust! ###
61 |     # Alter MessageSubProcessEnum to add new CBEventType enum values as valid values
62 |     replace_enum_values(
63 |         "MessageSubProcessSourceEnum",
64 |         "messagesubprocess",
65 |         new_sub_process_source_enum_values,
66 |     )
67 | 
68 |     # ### end Alembic commands ###
69 | 
70 | 
71 | def downgrade() -> None:
72 |     # ### commands auto generated by Alembic - please adjust! ###
73 |     # revert back to the old enum type
74 |     # Note that this won't work if the DB already has rows with the new enum values
75 |     replace_enum_values(
76 |         "MessageSubProcessSourceEnum",
77 |         "messagesubprocess",
78 |         existing_sub_process_source_enum_values,
79 |     )
80 |     # ### end Alembic commands ###
81 | 


--------------------------------------------------------------------------------
/backend/alembic/versions/873c0c4616ea_add_foreign_key_indices.py:
--------------------------------------------------------------------------------
 1 | """add foreign key indices
 2 | 
 3 | Revision ID: 873c0c4616ea
 4 | Revises: 1b0b616e08c6
 5 | Create Date: 2023-08-15 23:10:01.739927
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | from sqlalchemy.dialects import postgresql
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = "873c0c4616ea"
14 | down_revision = "1b0b616e08c6"
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade() -> None:
20 |     # ### commands auto generated by Alembic - please adjust! ###
21 |     op.create_index(
22 |         op.f("ix_conversationdocument_conversation_id"),
23 |         "conversationdocument",
24 |         ["conversation_id"],
25 |         unique=False,
26 |     )
27 |     op.create_index(
28 |         op.f("ix_conversationdocument_document_id"),
29 |         "conversationdocument",
30 |         ["document_id"],
31 |         unique=False,
32 |     )
33 |     op.create_index(
34 |         op.f("ix_message_conversation_id"), "message", ["conversation_id"], unique=False
35 |     )
36 |     op.create_index(
37 |         op.f("ix_messagesubprocess_message_id"),
38 |         "messagesubprocess",
39 |         ["message_id"],
40 |         unique=False,
41 |     )
42 |     # ### end Alembic commands ###
43 | 
44 | 
45 | def downgrade() -> None:
46 |     # ### commands auto generated by Alembic - please adjust! ###
47 |     op.drop_index(
48 |         op.f("ix_messagesubprocess_message_id"), table_name="messagesubprocess"
49 |     )
50 |     op.drop_index(op.f("ix_message_conversation_id"), table_name="message")
51 |     op.drop_index(
52 |         op.f("ix_conversationdocument_document_id"), table_name="conversationdocument"
53 |     )
54 |     op.drop_index(
55 |         op.f("ix_conversationdocument_conversation_id"),
56 |         table_name="conversationdocument",
57 |     )
58 |     # ### end Alembic commands ###
59 | 


--------------------------------------------------------------------------------
/backend/alembic/versions/90a1d6a26343_create_doc_tables.py:
--------------------------------------------------------------------------------
 1 | """create doc tables
 2 | 
 3 | Revision ID: 90a1d6a26343
 4 | Revises: c008bb4f3f48
 5 | Create Date: 2023-07-11 05:42:05.054926
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | from sqlalchemy.dialects import postgresql
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = '90a1d6a26343'
14 | down_revision = 'c008bb4f3f48'
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade() -> None:
20 |     # ### commands auto generated by Alembic - please adjust! ###
21 |     op.create_table('document',
22 |     sa.Column('url', sa.String(), nullable=False),
23 |     sa.Column('metadata_map', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
24 |     sa.Column('id', sa.UUID(), nullable=False),
25 |     sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
26 |     sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
27 |     sa.PrimaryKeyConstraint('id'),
28 |     sa.UniqueConstraint('url')
29 |     )
30 |     op.create_index(op.f('ix_document_id'), 'document', ['id'], unique=False)
31 |     op.create_table('conversationdocument',
32 |     sa.Column('conversation_id', sa.UUID(), nullable=True),
33 |     sa.Column('document_id', sa.UUID(), nullable=True),
34 |     sa.Column('id', sa.UUID(), nullable=False),
35 |     sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
36 |     sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
37 |     sa.ForeignKeyConstraint(['conversation_id'], ['conversation.id'], ),
38 |     sa.ForeignKeyConstraint(['document_id'], ['document.id'], ),
39 |     sa.PrimaryKeyConstraint('id')
40 |     )
41 |     op.create_index(op.f('ix_conversationdocument_id'), 'conversationdocument', ['id'], unique=False)
42 |     # ### end Alembic commands ###
43 | 
44 | 
45 | def downgrade() -> None:
46 |     # ### commands auto generated by Alembic - please adjust! ###
47 |     op.drop_index(op.f('ix_conversationdocument_id'), table_name='conversationdocument')
48 |     op.drop_table('conversationdocument')
49 |     op.drop_index(op.f('ix_document_id'), table_name='document')
50 |     op.drop_table('document')
51 |     # ### end Alembic commands ###
52 | 


--------------------------------------------------------------------------------
/backend/alembic/versions/c008bb4f3f48_update_sub_process_columns.py:
--------------------------------------------------------------------------------
 1 | """update sub process columns
 2 | 
 3 | Revision ID: c008bb4f3f48
 4 | Revises: 477cee72edc4
 5 | Create Date: 2023-07-07 18:11:35.087271
 6 | 
 7 | """
 8 | from typing import Set
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | from sqlalchemy.dialects import postgresql
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "c008bb4f3f48"
15 | down_revision = "477cee72edc4"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | existing_sub_process_source_enum_values = {
20 |     "CHUNKING",
21 |     "NODE_PARSING",
22 |     "EMBEDDING",
23 |     "LLM",
24 |     "QUERY",
25 |     "RETRIEVE",
26 |     "SYNTHESIZE",
27 |     "TREE",
28 |     "CONSTRUCTED_QUERY_ENGINE",
29 | }
30 | 
31 | new_sub_process_source_enum_values = {
32 |     *existing_sub_process_source_enum_values,
33 |     "SUB_QUESTIONS",
34 | }
35 | 
36 | 
37 | def replace_enum_values(enum_name: str, table: str, new_values: Set[str]):
38 |     """
39 |     Create a new type, add the value to it, update the column to use the new type and delete the old type
40 |     """
41 |     op.execute(f'ALTER TYPE public."{enum_name}" RENAME TO "{enum_name}Old"')
42 |     sa.Enum(*new_values, name=enum_name).create(op.get_bind())
43 |     op.execute(
44 |         f'ALTER TABLE {table} ALTER COLUMN source TYPE public."{enum_name}" USING source::text::public."{enum_name}"'
45 |     )
46 |     op.execute(f'DROP TYPE public."{enum_name}Old"')
47 | 
48 | 
49 | def upgrade() -> None:
50 |     # ### commands auto generated by Alembic - please adjust! ###
51 |     enum = postgresql.ENUM("PENDING", "FINISHED", name="MessageSubProcessStatusEnum")
52 |     enum.create(op.get_bind())
53 |     op.add_column(
54 |         "messagesubprocess",
55 |         sa.Column("status", enum, nullable=False, server_default="FINISHED"),
56 |     )
57 |     op.add_column(
58 |         "messagesubprocess",
59 |         sa.Column(
60 |             "metadata_map", postgresql.JSONB(astext_type=sa.Text()), nullable=True
61 |         ),
62 |     )
63 |     op.drop_column("messagesubprocess", "content")
64 | 
65 |     # Alter MessageSubProcessEnum to add "SUB_QUESTIONS" as a valid value
66 |     replace_enum_values(
67 |         "MessageSubProcessSourceEnum",
68 |         "messagesubprocess",
69 |         new_sub_process_source_enum_values,
70 |     )
71 | 
72 |     # ### end Alembic commands ###
73 | 
74 | 
75 | def downgrade() -> None:
76 |     # ### commands auto generated by Alembic - please adjust! ###
77 |     # revert back to the old enum type
78 |     # Note that this won't work if the DB already has rows with the new enum values
79 |     replace_enum_values(
80 |         "MessageSubProcessSourceEnum",
81 |         "messagesubprocess",
82 |         existing_sub_process_source_enum_values,
83 |     )
84 | 
85 |     op.add_column(
86 |         "messagesubprocess",
87 |         sa.Column("content", sa.VARCHAR(), autoincrement=False, nullable=True),
88 |     )
89 |     op.drop_column("messagesubprocess", "metadata_map")
90 |     op.drop_column("messagesubprocess", "status")
91 |     # ### end Alembic commands ###
92 | 


--------------------------------------------------------------------------------
/backend/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/__init__.py


--------------------------------------------------------------------------------
/backend/app/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/api/__init__.py


--------------------------------------------------------------------------------
/backend/app/api/api.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter
 2 | 
 3 | from app.api.endpoints import conversation, health, documents
 4 | 
 5 | api_router = APIRouter()
 6 | api_router.include_router(
 7 |     conversation.router, prefix="/conversation", tags=["conversation"]
 8 | )
 9 | api_router.include_router(documents.router, prefix="/document", tags=["document"])
10 | api_router.include_router(health.router, prefix="/health", tags=["health"])
11 | 


--------------------------------------------------------------------------------
/backend/app/api/crud.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Sequence, List
  2 | from sqlalchemy.orm import joinedload
  3 | from sqlalchemy.ext.asyncio import AsyncSession
  4 | from app.models.db import Conversation, Message, Document, ConversationDocument
  5 | from app import schema
  6 | from sqlalchemy import select, delete
  7 | from sqlalchemy.dialects.postgresql import insert
  8 | 
  9 | 
 10 | async def fetch_conversation_with_messages(
 11 |     db: AsyncSession, conversation_id: str
 12 | ) -> Optional[schema.Conversation]:
 13 |     """
 14 |     Fetch a conversation with its messages + messagesubprocesses
 15 |     return None if the conversation with the given id does not exist
 16 |     """
 17 |     # Eagerly load required relationships
 18 |     stmt = (
 19 |         select(Conversation)
 20 |         .options(joinedload(Conversation.messages).subqueryload(Message.sub_processes))
 21 |         .options(
 22 |             joinedload(Conversation.conversation_documents).subqueryload(
 23 |                 ConversationDocument.document
 24 |             )
 25 |         )
 26 |         .where(Conversation.id == conversation_id)
 27 |     )
 28 | 
 29 |     result = await db.execute(stmt)  # execute the statement
 30 |     conversation = result.scalars().first()  # get the first result
 31 |     if conversation is not None:
 32 |         convo_dict = {
 33 |             **conversation.__dict__,
 34 |             "documents": [
 35 |                 convo_doc.document for convo_doc in conversation.conversation_documents
 36 |             ],
 37 |         }
 38 |         return schema.Conversation(**convo_dict)
 39 |     return None
 40 | 
 41 | 
 42 | async def create_conversation(
 43 |     db: AsyncSession, convo_payload: schema.ConversationCreate
 44 | ) -> schema.Conversation:
 45 |     conversation = Conversation()
 46 |     convo_doc_db_objects = [
 47 |         ConversationDocument(document_id=doc_id, conversation=conversation)
 48 |         for doc_id in convo_payload.document_ids
 49 |     ]
 50 |     db.add(conversation)
 51 |     db.add_all(convo_doc_db_objects)
 52 |     await db.commit()
 53 |     await db.refresh(conversation)
 54 |     return await fetch_conversation_with_messages(db, conversation.id)
 55 | 
 56 | 
 57 | async def delete_conversation(db: AsyncSession, conversation_id: str) -> bool:
 58 |     stmt = delete(Conversation).where(Conversation.id == conversation_id)
 59 |     result = await db.execute(stmt)
 60 |     await db.commit()
 61 |     return result.rowcount > 0
 62 | 
 63 | 
 64 | async def fetch_message_with_sub_processes(
 65 |     db: AsyncSession, message_id: str
 66 | ) -> Optional[schema.Message]:
 67 |     """
 68 |     Fetch a message with its sub processes
 69 |     return None if the message with the given id does not exist
 70 |     """
 71 |     # Eagerly load required relationships
 72 |     stmt = (
 73 |         select(Message)
 74 |         .options(joinedload(Message.sub_processes))
 75 |         .where(Message.id == message_id)
 76 |     )
 77 |     result = await db.execute(stmt)  # execute the statement
 78 |     message = result.scalars().first()  # get the first result
 79 |     if message is not None:
 80 |         return schema.Message.model_validate(message, from_attributes=True)
 81 |     return None
 82 | 
 83 | 
 84 | async def fetch_documents(
 85 |     db: AsyncSession,
 86 |     id: Optional[str] = None,
 87 |     ids: Optional[List[str]] = None,
 88 |     url: Optional[str] = None,
 89 |     limit: Optional[int] = None,
 90 | ) -> Optional[Sequence[schema.Document]]:
 91 |     """
 92 |     Fetch a document by its url or id
 93 |     """
 94 | 
 95 |     stmt = select(Document)
 96 |     if id is not None:
 97 |         stmt = stmt.where(Document.id == id)
 98 |         limit = 1
 99 |     elif ids is not None:
100 |         stmt = stmt.where(Document.id.in_(ids))
101 |     if url is not None:
102 |         stmt = stmt.where(Document.url == url)
103 |     if limit is not None:
104 |         stmt = stmt.limit(limit)
105 |     result = await db.execute(stmt)
106 |     documents = result.scalars().all()
107 |     return [schema.Document.model_validate(doc, from_attributes=True) for doc in documents]
108 | 
109 | 
110 | async def upsert_document_by_url(
111 |     db: AsyncSession, document: schema.Document
112 | ) -> schema.Document:
113 |     """
114 |     Upsert a document
115 |     """
116 |     stmt = insert(Document).values(**document.dict(exclude_none=True))
117 |     stmt = stmt.on_conflict_do_update(
118 |         index_elements=[Document.url],
119 |         set_=document.model_dump(mode="json", include={"metadata_map"}),
120 |     )
121 |     stmt = stmt.returning(Document)
122 |     result = await db.execute(stmt)
123 |     upserted_doc = schema.Document.model_validate(result.scalars().first(), from_attributes=True)
124 |     await db.commit()
125 |     return upserted_doc
126 | 


--------------------------------------------------------------------------------
/backend/app/api/deps.py:
--------------------------------------------------------------------------------
1 | from typing import Generator
2 | from sqlalchemy.ext.asyncio import AsyncSession
3 | from app.db.session import SessionLocal
4 | 
5 | 
6 | async def get_db() -> Generator[AsyncSession, None, None]:
7 |     async with SessionLocal() as db:
8 |         yield db
9 | 


--------------------------------------------------------------------------------
/backend/app/api/endpoints/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/api/endpoints/__init__.py


--------------------------------------------------------------------------------
/backend/app/api/endpoints/documents.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | import logging
 3 | from fastapi import Depends, APIRouter, HTTPException, Query
 4 | from sqlalchemy.ext.asyncio import AsyncSession
 5 | from uuid import UUID
 6 | 
 7 | from app.api.deps import get_db
 8 | from app.api import crud
 9 | from app import schema
10 | 
11 | router = APIRouter()
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | @router.get("/")
16 | async def get_documents(
17 |     document_ids: Optional[List[UUID]] = Query(None),
18 |     db: AsyncSession = Depends(get_db),
19 | ) -> List[schema.Document]:
20 |     """
21 |     Get all documents or documents by their ids
22 |     """
23 |     if document_ids is None:
24 |         # If no ids provided, fetch all documents
25 |         docs = await crud.fetch_documents(db)
26 |     else:
27 |         # If ids are provided, fetch documents by ids
28 |         docs = await crud.fetch_documents(db, ids=document_ids)
29 | 
30 |     if len(docs) == 0:
31 |         raise HTTPException(status_code=404, detail="Document(s) not found")
32 | 
33 |     return docs
34 | 
35 | 
36 | @router.get("/{document_id}")
37 | async def get_document(
38 |     document_id: UUID,
39 |     db: AsyncSession = Depends(get_db),
40 | ) -> schema.Document:
41 |     """
42 |     Get all documents
43 |     """
44 |     docs = await crud.fetch_documents(db, id=document_id)
45 |     if len(docs) == 0:
46 |         raise HTTPException(status_code=404, detail="Document not found")
47 | 
48 |     return docs[0]
49 | 


--------------------------------------------------------------------------------
/backend/app/api/endpoints/health.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | from fastapi import APIRouter, Depends
 4 | from sqlalchemy.ext.asyncio import AsyncSession
 5 | from sqlalchemy.sql import text
 6 | from app.api import deps
 7 | 
 8 | router = APIRouter()
 9 | 
10 | 
11 | @router.get("/")
12 | async def health(db: AsyncSession = Depends(deps.get_db)) -> Dict[str, str]:
13 |     """
14 |     Health check endpoint.
15 |     """
16 |     await db.execute(text("SELECT 1"))
17 |     return {"status": "alive"}
18 | 


--------------------------------------------------------------------------------
/backend/app/chat/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/chat/__init__.py


--------------------------------------------------------------------------------
/backend/app/chat/constants.py:
--------------------------------------------------------------------------------
 1 | DB_DOC_ID_KEY = "db_document_id"
 2 | 
 3 | SYSTEM_MESSAGE = """
 4 | You are an expert financial analyst that always answers questions with the most relevant information using the tools at your disposal.
 5 | These tools have information regarding companies that the user has expressed interest in.
 6 | Here are some guidelines that you must follow:
 7 | * For financial questions, you must use the tools to find the answer and then write a response.
 8 | * Even if it seems like your tools won't be able to answer the question, you must still use them to find the most relevant information and insights. Not using them will appear as if you are not doing your job.
 9 | * You may assume that the users financial questions are related to the documents they've selected.
10 | * For any user message that isn't related to financial analysis, respectfully decline to respond and suggest that the user ask a relevant question.
11 | * If your tools are unable to find an answer, you should say that you haven't found an answer but still relay any useful information the tools found.
12 | 
13 | The tools at your disposal have access to the following SEC documents that the user has selected to discuss with you:
14 | {doc_titles}
15 | 
16 | The current date is: {curr_date}
17 | """.strip()
18 | 
19 | NODE_PARSER_CHUNK_SIZE = 512
20 | NODE_PARSER_CHUNK_OVERLAP = 10
21 | 


--------------------------------------------------------------------------------
/backend/app/chat/messaging.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Any, Optional, List
  2 | import asyncio
  3 | import logging
  4 | from uuid import uuid4
  5 | from anyio import ClosedResourceError
  6 | from anyio.streams.memory import MemoryObjectSendStream
  7 | 
  8 | from llama_index.core.callbacks.base import BaseCallbackHandler
  9 | from llama_index.core.callbacks.schema import CBEventType, EventPayload
 10 | from llama_index.core.query_engine.sub_question_query_engine import SubQuestionAnswerPair
 11 | from llama_index.core.chat_engine.types import StreamingAgentChatResponse
 12 | from pydantic import BaseModel
 13 | 
 14 | from app import schema
 15 | from app.schema import SubProcessMetadataKeysEnum, SubProcessMetadataMap
 16 | from app.models.db import MessageSubProcessSourceEnum
 17 | from app.chat.engine import get_chat_engine
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | class StreamedMessage(BaseModel):
 23 |     content: str
 24 | 
 25 | 
 26 | class StreamedMessageSubProcess(BaseModel):
 27 |     source: MessageSubProcessSourceEnum
 28 |     has_ended: bool
 29 |     event_id: str
 30 |     metadata_map: Optional[SubProcessMetadataMap] = None
 31 | 
 32 | 
 33 | class ChatCallbackHandler(BaseCallbackHandler):
 34 |     def __init__(
 35 |         self,
 36 |         send_chan: MemoryObjectSendStream,
 37 |     ):
 38 |         """Initialize the base callback handler."""
 39 |         ignored_events = [CBEventType.CHUNKING, CBEventType.NODE_PARSING]
 40 |         super().__init__(ignored_events, ignored_events)
 41 |         self._send_chan = send_chan
 42 | 
 43 |     def on_event_start(
 44 |         self,
 45 |         event_type: CBEventType,
 46 |         payload: Optional[Dict[str, Any]] = None,
 47 |         event_id: str = "",
 48 |         **kwargs: Any,
 49 |     ) -> str:
 50 |         """Create the MessageSubProcess row for the event that started."""
 51 |         asyncio.create_task(
 52 |             self.async_on_event(
 53 |                 event_type, payload, event_id, is_start_event=True, **kwargs
 54 |             )
 55 |         )
 56 | 
 57 |     def on_event_end(
 58 |         self,
 59 |         event_type: CBEventType,
 60 |         payload: Optional[Dict[str, Any]] = None,
 61 |         event_id: str = "",
 62 |         **kwargs: Any,
 63 |     ) -> None:
 64 |         """Create the MessageSubProcess row for the event that completed."""
 65 |         asyncio.create_task(
 66 |             self.async_on_event(
 67 |                 event_type, payload, event_id, is_start_event=False, **kwargs
 68 |             )
 69 |         )
 70 | 
 71 |     def get_metadata_from_event(
 72 |         self,
 73 |         event_type: CBEventType,
 74 |         payload: Optional[Dict[str, Any]] = None,
 75 |         is_start_event: bool = False,
 76 |     ) -> SubProcessMetadataMap:
 77 |         metadata_map = {}
 78 | 
 79 |         if (
 80 |             event_type == CBEventType.SUB_QUESTION
 81 |             and EventPayload.SUB_QUESTION in payload
 82 |         ):
 83 |             sub_q: SubQuestionAnswerPair = payload[EventPayload.SUB_QUESTION]
 84 |             metadata_map[
 85 |                 SubProcessMetadataKeysEnum.SUB_QUESTION.value
 86 |             ] = schema.QuestionAnswerPair.from_sub_question_answer_pair(sub_q).dict()
 87 |         return metadata_map
 88 | 
 89 |     async def async_on_event(
 90 |         self,
 91 |         event_type: CBEventType,
 92 |         payload: Optional[Dict[str, Any]] = None,
 93 |         event_id: str = "",
 94 |         is_start_event: bool = False,
 95 |         **kwargs: Any,
 96 |     ) -> None:
 97 |         metadata_map = self.get_metadata_from_event(
 98 |             event_type, payload=payload, is_start_event=is_start_event
 99 |         )
100 |         metadata_map = metadata_map or None
101 |         source = MessageSubProcessSourceEnum[event_type.name]
102 |         if self._send_chan._closed:
103 |             logger.debug("Received event after send channel closed. Ignoring.")
104 |             return
105 |         try:
106 |             await self._send_chan.send(
107 |                 StreamedMessageSubProcess(
108 |                     source=source,
109 |                     metadata_map=metadata_map,
110 |                     event_id=event_id,
111 |                     has_ended=not is_start_event,
112 |                 )
113 |             )
114 |         except ClosedResourceError:
115 |             logger.exception("Tried sending SubProcess event %s after channel was closed", f"(source={source})")
116 | 
117 |     def start_trace(self, trace_id: Optional[str] = None) -> None:
118 |         """No-op."""
119 | 
120 |     def end_trace(
121 |         self,
122 |         trace_id: Optional[str] = None,
123 |         trace_map: Optional[Dict[str, List[str]]] = None,
124 |     ) -> None:
125 |         """No-op."""
126 | 
127 | 
128 | async def handle_chat_message(
129 |     conversation: schema.Conversation,
130 |     user_message: schema.UserMessageCreate,
131 |     send_chan: MemoryObjectSendStream,
132 | ) -> None:
133 |     async with send_chan:
134 |         chat_engine = await get_chat_engine(
135 |             ChatCallbackHandler(send_chan), conversation
136 |         )
137 |         await send_chan.send(
138 |             StreamedMessageSubProcess(
139 |                 event_id=str(uuid4()),
140 |                 has_ended=True,
141 |                 source=MessageSubProcessSourceEnum.CONSTRUCTED_QUERY_ENGINE,
142 |             )
143 |         )
144 |         logger.debug("Engine received")
145 |         templated_message = f"""
146 | Remember - if I have asked a relevant financial question, use your tools.
147 | 
148 | {user_message.content}
149 |         """.strip()
150 |         streaming_chat_response: StreamingAgentChatResponse = (
151 |             await chat_engine.astream_chat(templated_message)
152 |         )
153 |         response_str = ""
154 |         async for text in streaming_chat_response.async_response_gen():
155 |             response_str += text
156 |             if send_chan._closed:
157 |                 logger.debug(
158 |                     "Received streamed token after send channel closed. Ignoring."
159 |                 )
160 |                 return
161 |             await send_chan.send(StreamedMessage(content=response_str))
162 | 
163 |         if response_str.strip() == "":
164 |             await send_chan.send(
165 |                 StreamedMessage(
166 |                     content="Sorry, I either wasn't able to understand your question or I don't have an answer for it."
167 |                 )
168 |             )
169 | 


--------------------------------------------------------------------------------
/backend/app/chat/pg_vector.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core.vector_stores.types import VectorStore
 2 | from llama_index.vector_stores.postgres import PGVectorStore
 3 | from sqlalchemy.engine import make_url
 4 | from app.db.session import SessionLocal as AppSessionLocal, engine as app_engine
 5 | import sqlalchemy
 6 | from sqlalchemy import create_engine
 7 | from sqlalchemy.orm import sessionmaker
 8 | from app.core.config import settings
 9 | 
10 | singleton_instance = None
11 | did_run_setup = False
12 | 
13 | 
14 | class CustomPGVectorStore(PGVectorStore):
15 |     """
16 |     Custom PGVectorStore that uses the same connection pool as the FastAPI app.
17 |     """
18 | 
19 |     def _connect(self) -> None:
20 |         self._engine = create_engine(self.connection_string)
21 |         self._session = sessionmaker(self._engine)
22 | 
23 |         # Use our existing app engine and session so we can use the same connection pool
24 |         self._async_engine = app_engine
25 |         self._async_session = AppSessionLocal
26 | 
27 |     async def close(self) -> None:
28 |         self._session.close_all()
29 |         self._engine.dispose()
30 | 
31 |         await self._async_engine.dispose()
32 | 
33 |     def _create_tables_if_not_exists(self) -> None:
34 |         pass
35 | 
36 |     def _create_extension(self) -> None:
37 |         pass
38 | 
39 |     async def run_setup(self) -> None:
40 |         global did_run_setup
41 |         if did_run_setup:
42 |             return
43 |         self._initialize()
44 |         async with self._async_session() as session:
45 |             async with session.begin():
46 |                 statement = sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS vector")
47 |                 await session.execute(statement)
48 |                 await session.commit()
49 | 
50 |         async with self._async_session() as session:
51 |             async with session.begin():
52 |                 conn = await session.connection()
53 |                 await conn.run_sync(self._base.metadata.create_all)
54 |         did_run_setup = True
55 | 
56 | 
57 | async def get_vector_store_singleton() -> VectorStore:
58 |     global singleton_instance
59 |     if singleton_instance is not None:
60 |         return singleton_instance
61 |     url = make_url(settings.DATABASE_URL)
62 |     singleton_instance = CustomPGVectorStore.from_params(
63 |         url.host,
64 |         url.port or 5432,
65 |         url.database,
66 |         url.username,
67 |         url.password,
68 |         settings.VECTOR_STORE_TABLE_NAME,
69 |     )
70 |     return singleton_instance
71 | 


--------------------------------------------------------------------------------
/backend/app/chat/qa_response_synth.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from llama_index.core.response_synthesizers import BaseSynthesizer
 3 | from llama_index.core.prompts.prompts import RefinePrompt, QuestionAnswerPrompt
 4 | from llama_index.core.prompts.prompt_type import PromptType
 5 | from app.schema import Document as DocumentSchema
 6 | from app.chat.utils import build_title_for_document
 7 | from llama_index.core.callbacks import CallbackManager
 8 | from llama_index.core.response_synthesizers.factory import get_response_synthesizer
 9 | 
10 | 
11 | def get_custom_response_synth(
12 |     callback_manager: CallbackManager, documents: List[DocumentSchema]
13 | ) -> BaseSynthesizer:
14 |     doc_titles = "\n".join("- " + build_title_for_document(doc) for doc in documents)
15 |     refine_template_str = f"""
16 | A user has selected a set of SEC filing documents and has asked a question about them. \
17 | The SEC documents have the following titles:
18 | {doc_titles}
19 | The original query is as follows: {{query_str}}
20 | We have provided an existing answer: {{existing_answer}}
21 | We have the opportunity to refine the existing answer \
22 | (only if needed) with some more context below.
23 | ------------
24 | {{context_msg}}
25 | ------------
26 | Given the new context, refine the original answer to better \
27 | answer the query. \
28 | If the context isn't useful, return the original answer.
29 | Refined Answer:
30 | """.strip()
31 |     refine_prompt = RefinePrompt(
32 |         template=refine_template_str,
33 |         prompt_type=PromptType.REFINE,
34 |     )
35 | 
36 |     qa_template_str = f"""
37 | A user has selected a set of SEC filing documents and has asked a question about them. \
38 | The SEC documents have the following titles:
39 | {doc_titles}
40 | Context information is below.
41 | ---------------------
42 | {{context_str}}
43 | ---------------------
44 | Given the context information and not prior knowledge, \
45 | answer the query.
46 | Query: {{query_str}}
47 | Answer:
48 | """.strip()
49 |     qa_prompt = QuestionAnswerPrompt(
50 |         template=qa_template_str,
51 |         prompt_type=PromptType.QUESTION_ANSWER,
52 |     )
53 | 
54 |     return get_response_synthesizer(
55 |         callback_manager=callback_manager,
56 |         refine_template=refine_prompt,
57 |         text_qa_template=qa_prompt,
58 |         # only useful for gpt-3.5
59 |         structured_answer_filtering=False,
60 |     )
61 | 


--------------------------------------------------------------------------------
/backend/app/chat/tools.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Iterator, cast
  2 | import logging
  3 | 
  4 | # This is from the unofficial polygon.io client: https://polygon.readthedocs.io/
  5 | from polygon.reference_apis import ReferenceClient
  6 | from polygon.reference_apis.reference_api import AsyncReferenceClient
  7 | 
  8 | # This is from the official polygon.io client: https://polygon-api-client.readthedocs.io/
  9 | from polygon.rest.models import StockFinancial
 10 | 
 11 | from app.schema import (
 12 |     Document as DocumentSchema,
 13 |     DocumentMetadataKeysEnum,
 14 |     SecDocumentMetadata,
 15 | )
 16 | from llama_index.core.tools import FunctionTool, ToolMetadata, QueryEngineTool
 17 | from llama_index.core.callbacks import CallbackManager
 18 | from llama_index.core import Settings
 19 | from llama_index.agent.openai import OpenAIAgent
 20 | from app.core.config import settings
 21 | from app.chat.utils import build_title_for_document
 22 | 
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | from typing import List
 28 | 
 29 | 
 30 | def describe_financials(financials: StockFinancial) -> str:
 31 |     sentences: List[str] = []
 32 | 
 33 |     company = financials.company_name
 34 |     fiscal_year = financials.fiscal_year
 35 |     fiscal_period = financials.fiscal_period
 36 | 
 37 |     sentences.append(
 38 |         f"For {company} in fiscal year {fiscal_year} covering the period {fiscal_period}:"
 39 |     )
 40 | 
 41 |     income_statement = financials.financials.income_statement
 42 | 
 43 |     if income_statement:
 44 |         revenues = income_statement.revenues
 45 |         if revenues:
 46 |             revenue_str = f"{revenues.label}: {revenues.value} {revenues.unit}"
 47 |             sentences.append(f"Revenues were {revenue_str}.")
 48 | 
 49 |         expenses = income_statement.operating_expenses
 50 |         if expenses:
 51 |             expenses_str = f"{expenses.label}: {expenses.value} {expenses.unit}"
 52 |             sentences.append(f"Operating expenses were {expenses_str}.")
 53 | 
 54 |         gross_profit = income_statement.gross_profit
 55 |         if gross_profit:
 56 |             gross_profit_str = f"{gross_profit.value} {gross_profit.unit}"
 57 |             sentences.append(f"Gross profit was {gross_profit_str}.")
 58 | 
 59 |     net_income = (
 60 |         financials.financials.comprehensive_income.comprehensive_income_loss_attributable_to_parent
 61 |     )
 62 |     if net_income:
 63 |         net_income_str = f"{net_income.label}: {net_income.value} {net_income.unit}"
 64 |         sentences.append(f"Net income was {net_income_str}.")
 65 | 
 66 |     cash_flows = financials.financials.cash_flow_statement
 67 |     if cash_flows:
 68 |         operating_cash_flows = cash_flows.net_cash_flow
 69 |         if operating_cash_flows:
 70 |             operating_str = f"{operating_cash_flows.label}: {operating_cash_flows.value} {operating_cash_flows.unit}"
 71 |             sentences.append(f"Net cash from operating activities was {operating_str}.")
 72 | 
 73 |         financing_cash_flows = cash_flows.net_cash_flow_from_financing_activities
 74 |         if financing_cash_flows:
 75 |             financing_str = f"{financing_cash_flows.label}: {financing_cash_flows.value} {financing_cash_flows.unit}"
 76 |             sentences.append(f"Net cash from financing activities was {financing_str}.")
 77 | 
 78 |     return " ".join(sentences)
 79 | 
 80 | 
 81 | def get_tool_metadata_for_document(doc: DocumentSchema) -> ToolMetadata:
 82 |     doc_title = build_title_for_document(doc)
 83 |     name = f"extract_json_from_sec_document[{doc_title}]"
 84 |     description = f"Returns basic financial data extracted from the SEC filing document {doc_title}"
 85 |     return ToolMetadata(
 86 |         name=name,
 87 |         description=description,
 88 |     )
 89 | 
 90 | 
 91 | def get_polygon_io_sec_tool(document: DocumentSchema) -> FunctionTool:
 92 |     sec_metadata = SecDocumentMetadata.parse_obj(
 93 |         document.metadata_map[DocumentMetadataKeysEnum.SEC_DOCUMENT]
 94 |     )
 95 |     tool_metadata = get_tool_metadata_for_document(document)
 96 | 
 97 |     async def extract_data_from_sec_document(*args, **kwargs) -> List[str]:
 98 |         try:
 99 |             client = ReferenceClient(
100 |                 api_key=settings.POLYGON_IO_API_KEY,
101 |                 connect_timeout=10,
102 |                 read_timeout=10,
103 |                 max_connections=20,
104 |                 use_async=True,
105 |             )
106 |             client = cast(AsyncReferenceClient, client)
107 |             response_dict = await client.get_stock_financials_vx(
108 |                 ticker=sec_metadata.company_ticker,
109 |                 period_of_report_date=str(sec_metadata.period_of_report_date.date()),
110 |                 limit=100,  # max limit is 100
111 |             )
112 |             stock_financials = []
113 |             for result_dict in response_dict["results"]:
114 |                 stock_financials.append(StockFinancial.from_dict(result_dict))
115 | 
116 |             descriptions = []
117 |             for stock_financial in stock_financials:
118 |                 description = describe_financials(stock_financial)
119 |                 logger.debug(
120 |                     "Built the following description for document_id=%s: %s",
121 |                     str(document.id),
122 |                     description,
123 |                 )
124 |                 descriptions.append(description)
125 |             return descriptions
126 |         except:
127 |             logger.error(
128 |                 "Error retrieving data from polygon.io for document_id %s",
129 |                 str(document.id),
130 |                 exc_info=True,
131 |             )
132 |             return ["No answer found."]
133 | 
134 |     def sync_func_placeholder(*args, **kwargs) -> None:
135 |         raise NotImplementedError(
136 |             "Sync function was called for document_id=" + str(document.id)
137 |         )
138 | 
139 |     return FunctionTool.from_defaults(
140 |         fn=sync_func_placeholder,
141 |         async_fn=extract_data_from_sec_document,
142 |         description=tool_metadata.description,
143 |     )
144 | 
145 | 
146 | def get_api_query_engine_tool(
147 |     document: DocumentSchema, callback_manager: CallbackManager,
148 | ) -> QueryEngineTool:
149 |     polygon_io_tool = get_polygon_io_sec_tool(document)
150 |     tool_metadata = get_tool_metadata_for_document(document)
151 |     doc_title = build_title_for_document(document)
152 |     llm = Settings.llm.model_copy(
153 |         update={"callback_manager": callback_manager},
154 |         deep=True
155 |     )
156 |     agent = OpenAIAgent.from_tools(
157 |         [polygon_io_tool],
158 |         llm=llm,
159 |         callback_manager=callback_manager,
160 |         system_prompt=f"You are an agent that is asked quantitative questions about a SEC filing named {doc_title} and you answer them by using your tools.",
161 |     )
162 |     return QueryEngineTool.from_defaults(
163 |         query_engine=agent,
164 |         name=tool_metadata.name,
165 |         description=tool_metadata.description,
166 |     )
167 | 


--------------------------------------------------------------------------------
/backend/app/chat/utils.py:
--------------------------------------------------------------------------------
 1 | from app.schema import (
 2 |     Document as DocumentSchema,
 3 |     DocumentMetadataKeysEnum,
 4 |     SecDocumentMetadata,
 5 | )
 6 | 
 7 | 
 8 | def build_title_for_document(document: DocumentSchema) -> str:
 9 |     if DocumentMetadataKeysEnum.SEC_DOCUMENT not in document.metadata_map:
10 |         return "No Title Document"
11 | 
12 |     sec_metadata = SecDocumentMetadata.parse_obj(
13 |         document.metadata_map[DocumentMetadataKeysEnum.SEC_DOCUMENT]
14 |     )
15 |     time_period = (
16 |         f"{sec_metadata.year} Q{sec_metadata.quarter}"
17 |         if sec_metadata.quarter is not None
18 |         else str(sec_metadata.year)
19 |     )
20 |     return f"{sec_metadata.company_name} ({sec_metadata.company_ticker}) {sec_metadata.doc_type.value} ({time_period})"
21 | 


--------------------------------------------------------------------------------
/backend/app/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/core/__init__.py


--------------------------------------------------------------------------------
/backend/app/core/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from enum import Enum
  3 | from typing import List, Union, Optional, Literal
  4 | from pydantic_settings import BaseSettings, SettingsConfigDict
  5 | from pydantic import AnyHttpUrl, EmailStr, field_validator
  6 | 
  7 | 
  8 | class AppEnvironment(str, Enum):
  9 |     """
 10 |     Enum for app environments.
 11 |     """
 12 | 
 13 |     LOCAL = "local"
 14 |     PREVIEW = "preview"
 15 |     PRODUCTION = "production"
 16 | 
 17 | 
 18 | is_pull_request: bool = os.environ.get("IS_PULL_REQUEST") == "true"
 19 | is_preview_env: bool = os.environ.get("IS_PREVIEW_ENV") == "true"
 20 | 
 21 | model_config = SettingsConfigDict(
 22 |     env_prefix="PREVIEW_" if is_pull_request or is_preview_env else ""
 23 | )
 24 | 
 25 | class PreviewPrefixedSettings(BaseSettings):
 26 |     """
 27 |     Settings class that uses a different env_prefix for PR Preview deployments.
 28 | 
 29 |     PR Preview deployments should source their secret environment variables with
 30 |     the `PREVIEW_` prefix, while regular deployments should source them from the
 31 |     environment variables with no prefix.
 32 | 
 33 |     Some environment variables (like `DATABASE_URL`) use Render.com's capability to
 34 |     automatically set environment variables to their preview value for PR Preview
 35 |     deployments, so they are not prefixed.
 36 |     """
 37 | 
 38 |     OPENAI_API_KEY: str
 39 |     AWS_KEY: str
 40 |     AWS_SECRET: str
 41 |     POLYGON_IO_API_KEY: str
 42 | 
 43 |     model_config = model_config
 44 | 
 45 | 
 46 | class Settings(PreviewPrefixedSettings):
 47 |     """
 48 |     Application settings.
 49 |     """
 50 | 
 51 |     PROJECT_NAME: str = "llama_app"
 52 |     API_PREFIX: str = "/api"
 53 |     DATABASE_URL: str
 54 |     LOG_LEVEL: str = "DEBUG"
 55 |     IS_PULL_REQUEST: bool = False
 56 |     RENDER: bool = False
 57 |     CODESPACES: bool = False
 58 |     CODESPACE_NAME: Optional[str] = None
 59 |     S3_BUCKET_NAME: str
 60 |     S3_ASSET_BUCKET_NAME: str
 61 |     CDN_BASE_URL: str
 62 |     VECTOR_STORE_TABLE_NAME: str = "pg_vector_store"
 63 |     SENTRY_DSN: Optional[str] = None
 64 |     RENDER_GIT_COMMIT: Optional[str] = None
 65 |     LOADER_IO_VERIFICATION_STR: str = "loaderio-e51043c635e0f4656473d3570ae5d9ec"
 66 |     SEC_EDGAR_COMPANY_NAME: str = "YourOrgName"
 67 |     SEC_EDGAR_EMAIL: EmailStr = "you@example.com"
 68 |     OPENAI_CHAT_LLM_NAME: str = "gpt-4o-mini"
 69 | 
 70 |     # BACKEND_CORS_ORIGINS is a JSON-formatted list of origins
 71 |     # e.g: '["http://localhost", "http://localhost:4200", "http://localhost:3000", \
 72 |     # "http://localhost:8080", "http://local.dockertoolbox.tiangolo.com"]'
 73 |     BACKEND_CORS_ORIGINS: List[AnyHttpUrl | Literal["*"]] = []
 74 | 
 75 |     @property
 76 |     def VERBOSE(self) -> bool:
 77 |         """
 78 |         Used for setting verbose flag in LlamaIndex modules.
 79 |         """
 80 |         return self.LOG_LEVEL == "DEBUG" or self.IS_PULL_REQUEST or not self.RENDER
 81 | 
 82 |     @property
 83 |     def S3_ENDPOINT_URL(self) -> str:
 84 |         """
 85 |         Used for setting S3 endpoint URL in the s3fs module.
 86 |         When running locally, this should be set to the localstack endpoint.
 87 |         """
 88 |         return None if self.RENDER else "http://localhost:4566"
 89 | 
 90 |     @field_validator("BACKEND_CORS_ORIGINS", mode='before')
 91 |     def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]:
 92 |         if isinstance(v, str) and not v.startswith("["):
 93 |             return [i.strip() for i in v.split(",")]
 94 |         elif isinstance(v, (list, str)):
 95 |             return v
 96 |         raise ValueError(v)
 97 | 
 98 |     @field_validator("DATABASE_URL", mode='before')
 99 |     def assemble_db_url(cls, v: str) -> str:
100 |         """Preprocesses the database URL to make it compatible with asyncpg."""
101 |         if not v or not v.startswith("postgres"):
102 |             raise ValueError("Invalid database URL: " + str(v))
103 |         return (
104 |             v.replace("postgres://", "postgresql://")
105 |             .replace("postgresql://", "postgresql+asyncpg://")
106 |             .strip()
107 |         )
108 | 
109 |     @field_validator("LOG_LEVEL", mode='before')
110 |     def assemble_log_level(cls, v: str) -> str:
111 |         """Preprocesses the log level to ensure its validity."""
112 |         v = v.strip().upper()
113 |         if v not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
114 |             raise ValueError("Invalid log level: " + str(v))
115 |         return v
116 | 
117 |     @field_validator("IS_PULL_REQUEST", mode='before')
118 |     def assemble_is_pull_request(cls, v: str) -> bool:
119 |         """Preprocesses the IS_PULL_REQUEST flag.
120 | 
121 |         See Render.com docs for more info:
122 |         https://render.com/docs/pull-request-previews#how-pull-request-previews-work
123 |         """
124 |         if isinstance(v, bool):
125 |             return v
126 |         return v.lower() == "true"
127 | 
128 |     @property
129 |     def ENVIRONMENT(self) -> AppEnvironment:
130 |         """Returns the app environment."""
131 |         if self.RENDER:
132 |             if self.IS_PULL_REQUEST:
133 |                 return AppEnvironment.PREVIEW
134 |             else:
135 |                 return AppEnvironment.PRODUCTION
136 |         else:
137 |             return AppEnvironment.LOCAL
138 | 
139 |     @property
140 |     def UVICORN_WORKER_COUNT(self) -> int:
141 |         if self.ENVIRONMENT == AppEnvironment.LOCAL:
142 |             return 1
143 |         # The recommended number of workers is (2 x $num_cores) + 1:
144 |         # Source: https://docs.gunicorn.org/en/stable/design.html#how-many-workers
145 |         # But the Render.com servers don't have enough memory to support that many workers,
146 |         # so we instead go by the number of server instances that can be run given the memory
147 |         return 3
148 | 
149 |     @property
150 |     def SENTRY_SAMPLE_RATE(self) -> float:
151 |         # TODO: before full release, set this to 0.1 for production
152 |         return 0.07 if self.ENVIRONMENT == AppEnvironment.PRODUCTION else 1.0
153 | 
154 |     model_config = SettingsConfigDict(env_prefix="")
155 | 
156 | 
157 | settings = Settings()
158 | os.environ["OPENAI_API_KEY"] = settings.OPENAI_API_KEY
159 | 


--------------------------------------------------------------------------------
/backend/app/db/base.py:
--------------------------------------------------------------------------------
1 | # Import all the models, so that Base has them before being
2 | # imported by Alembic
3 | from app.models.base import Base  # noqa
4 | from app.models.db import *  # noqa
5 | 


--------------------------------------------------------------------------------
/backend/app/db/session.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy.ext.asyncio import create_async_engine
 2 | from sqlalchemy.ext.asyncio import async_sessionmaker
 3 | 
 4 | from app.core.config import settings
 5 | 
 6 | engine = create_async_engine(
 7 |     settings.DATABASE_URL,
 8 |     pool_pre_ping=True,
 9 |     pool_size=4,  # Number of connections to keep open in the pool
10 |     max_overflow=4,  # Number of connections that can be opened beyond the pool_size
11 |     pool_recycle=3600,  # Recycle connections after 1 hour
12 |     pool_timeout=120,  # Raise an exception after 2 minutes if no connection is available from the pool
13 | )
14 | SessionLocal = async_sessionmaker(autocommit=False, autoflush=False, bind=engine)
15 | 


--------------------------------------------------------------------------------
/backend/app/db/wait_for_db.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from app.db.session import SessionLocal
 3 | from sqlalchemy.sql import text
 4 | 
 5 | 
 6 | async def check_database_connection(max_attempts: int = 30, sleep_interval: int = 1) -> None:
 7 |     for attempt in range(1, max_attempts + 1):
 8 |         try:
 9 |             async with SessionLocal() as db:
10 |                 await db.execute(text("SELECT 1"))
11 |                 print(f"Connected to the database on attempt {attempt}.")
12 |                 return
13 |         except Exception as e:
14 |             print(f"Attempt {attempt}: Database is not yet available. Error: {e}")
15 |             if attempt == max_attempts:
16 |                 raise ValueError(
17 |                     f"Couldn't connect to database after {max_attempts} attempts."
18 |                 ) from e
19 |             await asyncio.sleep(sleep_interval)
20 | 


--------------------------------------------------------------------------------
/backend/app/llama_index_settings.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core import Settings
 2 | from llama_index.core.settings import _Settings
 3 | from llama_index.llms.openai import OpenAI
 4 | from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingMode, OpenAIEmbeddingModelType
 5 | from app.core.config import settings
 6 | from llama_index.core.node_parser import SentenceSplitter
 7 | 
 8 | from app.chat.constants import (
 9 |     NODE_PARSER_CHUNK_OVERLAP,
10 |     NODE_PARSER_CHUNK_SIZE,
11 | )
12 | 
13 | def _setup_llama_index_settings() -> _Settings:
14 |     Settings.llm = OpenAI(
15 |         model=settings.OPENAI_CHAT_LLM_NAME,
16 |         api_key=settings.OPENAI_API_KEY
17 |     )
18 |     Settings.embed_model = OpenAIEmbedding(
19 |         mode=OpenAIEmbeddingMode.SIMILARITY_MODE,
20 |         model_type=OpenAIEmbeddingModelType.TEXT_EMBED_3_SMALL,
21 |         api_key=settings.OPENAI_API_KEY,
22 |     )
23 |     Settings.node_parser = SentenceSplitter(
24 |         chunk_size=NODE_PARSER_CHUNK_SIZE,
25 |         chunk_overlap=NODE_PARSER_CHUNK_OVERLAP,
26 |     )
27 |     return Settings
28 | 


--------------------------------------------------------------------------------
/backend/app/loader_io.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Response
 2 | from app.core.config import settings
 3 | 
 4 | loader_io_router = APIRouter()
 5 | 
 6 | 
 7 | @loader_io_router.get("/")
 8 | async def get_verification_file() -> str:
 9 |     """
10 |     Verification string for loader.io
11 |     """
12 |     return Response(settings.LOADER_IO_VERIFICATION_STR, media_type="text/plain")
13 | 


--------------------------------------------------------------------------------
/backend/app/main.py:
--------------------------------------------------------------------------------
  1 | from typing import cast
  2 | import uvicorn
  3 | import logging
  4 | import sys
  5 | import sentry_sdk
  6 | from fastapi import FastAPI
  7 | from starlette.middleware.cors import CORSMiddleware
  8 | from alembic.config import Config
  9 | import alembic.config
 10 | from alembic import script
 11 | from alembic.runtime import migration
 12 | from sqlalchemy.engine import create_engine, Engine
 13 | from llama_index.core.node_parser.text.utils import split_by_sentence_tokenizer
 14 | import llama_index.core
 15 | 
 16 | from app.api.api import api_router
 17 | from app.db.wait_for_db import check_database_connection
 18 | from app.core.config import settings, AppEnvironment
 19 | from app.loader_io import loader_io_router
 20 | from contextlib import asynccontextmanager
 21 | from app.chat.pg_vector import get_vector_store_singleton, CustomPGVectorStore
 22 | from app.llama_index_settings import _setup_llama_index_settings
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | def check_current_head(alembic_cfg: Config, connectable: Engine) -> bool:
 28 |     directory = script.ScriptDirectory.from_config(alembic_cfg)
 29 |     with connectable.begin() as connection:
 30 |         context = migration.MigrationContext.configure(connection)
 31 |         return set(context.get_current_heads()) == set(directory.get_heads())
 32 | 
 33 | 
 34 | def __setup_logging(log_level: str):
 35 |     log_level = getattr(logging, log_level.upper())
 36 |     log_formatter = logging.Formatter(
 37 |         "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s"
 38 |     )
 39 |     root_logger = logging.getLogger()
 40 |     root_logger.setLevel(log_level)
 41 | 
 42 |     stream_handler = logging.StreamHandler(sys.stdout)
 43 |     stream_handler.setFormatter(log_formatter)
 44 |     root_logger.addHandler(stream_handler)
 45 |     logger.info("Set up logging with log level %s", log_level)
 46 | 
 47 | 
 48 | def __setup_sentry():
 49 |     if settings.SENTRY_DSN:
 50 |         logger.info("Setting up Sentry")
 51 |         if settings.ENVIRONMENT == AppEnvironment.PRODUCTION:
 52 |             profiles_sample_rate = None
 53 |         else:
 54 |             profiles_sample_rate = settings.SENTRY_SAMPLE_RATE
 55 |         sentry_sdk.init(
 56 |             dsn=settings.SENTRY_DSN,
 57 |             environment=settings.ENVIRONMENT.value,
 58 |             release=settings.RENDER_GIT_COMMIT,
 59 |             debug=settings.VERBOSE,
 60 |             traces_sample_rate=settings.SENTRY_SAMPLE_RATE,
 61 |             profiles_sample_rate=profiles_sample_rate,
 62 |         )
 63 |     else:
 64 |         logger.info("Skipping Sentry setup")
 65 | 
 66 | 
 67 | @asynccontextmanager
 68 | async def lifespan(app: FastAPI):
 69 |     # first wait for DB to be connectable
 70 |     await check_database_connection()
 71 |     cfg = Config("alembic.ini")
 72 |     # Change DB URL to use psycopg2 driver for this specific check
 73 |     db_url = settings.DATABASE_URL.replace(
 74 |         "postgresql+asyncpg://", "postgresql+psycopg2://"
 75 |     )
 76 |     cfg.set_main_option("sqlalchemy.url", db_url)
 77 |     engine = create_engine(db_url, echo=True)
 78 |     if not check_current_head(cfg, engine):
 79 |         raise Exception(
 80 |             "Database is not up to date. Please run `poetry run alembic upgrade head`"
 81 |         )
 82 |     # initialize pg vector store singleton
 83 |     vector_store = await get_vector_store_singleton()
 84 |     vector_store = cast(CustomPGVectorStore, vector_store)
 85 |     await vector_store.run_setup()
 86 | 
 87 |     try:
 88 |         # Some setup is required to initialize the llama-index sentence splitter
 89 |         split_by_sentence_tokenizer()
 90 |     except FileExistsError:
 91 |         # Sometimes seen in deployments, should be benign.
 92 |         logger.info("Tried to re-download NLTK files but already exists.")
 93 | 
 94 |     if not settings.RENDER:
 95 |         llama_index.core.set_global_handler("arize_phoenix")
 96 | 
 97 |     yield
 98 |     # This section is run on app shutdown
 99 |     await vector_store.close()
100 | 
101 | 
102 | app = FastAPI(
103 |     title=settings.PROJECT_NAME,
104 |     openapi_url=f"{settings.API_PREFIX}/openapi.json",
105 |     lifespan=lifespan,
106 | )
107 | 
108 | 
109 | if settings.BACKEND_CORS_ORIGINS:
110 |     origins = settings.BACKEND_CORS_ORIGINS.copy()
111 |     if settings.CODESPACES and settings.CODESPACE_NAME and \
112 |         settings.ENVIRONMENT == AppEnvironment.LOCAL:
113 |         # add codespace origin if running in Github codespace
114 |         origins.append(f"https://{settings.CODESPACE_NAME}-3000.app.github.dev")
115 |     # allow all origins
116 |     app.add_middleware(
117 |         CORSMiddleware,
118 |         allow_origins=[str(origin) for origin in origins],
119 |         allow_origin_regex="https://llama-app-frontend.*\.vercel\.app",
120 |         allow_credentials=True,
121 |         allow_methods=["*"],
122 |         allow_headers=["*"],
123 |     )
124 | 
125 | app.include_router(api_router, prefix=settings.API_PREFIX)
126 | app.mount(f"/{settings.LOADER_IO_VERIFICATION_STR}", loader_io_router)
127 | 
128 | 
129 | def start():
130 |     print("Running in AppEnvironment: " + settings.ENVIRONMENT.value)
131 |     __setup_logging(settings.LOG_LEVEL)
132 |     __setup_sentry()
133 |     _setup_llama_index_settings()
134 |     """Launched with `poetry run start` at root level"""
135 |     if settings.RENDER:
136 |         # on render.com deployments, run migrations
137 |         logger.debug("Running migrations")
138 |         alembic_args = ["--raiseerr", "upgrade", "head"]
139 |         alembic.config.main(argv=alembic_args)
140 |         logger.debug("Migrations complete")
141 |     else:
142 |         logger.debug("Skipping migrations")
143 |     live_reload = not settings.RENDER
144 |     uvicorn.run(
145 |         "app.main:app",
146 |         host="0.0.0.0",
147 |         port=8000,
148 |         reload=live_reload,
149 |         workers=settings.UVICORN_WORKER_COUNT,
150 |     )
151 | 


--------------------------------------------------------------------------------
/backend/app/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/backend/app/models/__init__.py


--------------------------------------------------------------------------------
/backend/app/models/base.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, DateTime, UUID
 2 | 
 3 | from sqlalchemy.sql import func
 4 | from sqlalchemy.ext.declarative import as_declarative, declared_attr
 5 | 
 6 | 
 7 | @as_declarative()
 8 | class Base:
 9 |     id = Column(UUID, primary_key=True, index=True, default=func.uuid_generate_v4())
10 |     created_at = Column(DateTime, server_default=func.now(), nullable=False)
11 |     updated_at = Column(
12 |         DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
13 |     )
14 | 
15 |     __name__: str
16 | 
17 |     # Generate __tablename__ automatically
18 |     @declared_attr
19 |     def __tablename__(cls) -> str:
20 |         return cls.__name__.lower()
21 | 


--------------------------------------------------------------------------------
/backend/app/models/db.py:
--------------------------------------------------------------------------------
  1 | from sqlalchemy import Column, String, Enum, ForeignKey
  2 | from sqlalchemy.dialects.postgresql import UUID, ENUM, JSONB
  3 | from sqlalchemy.orm import relationship
  4 | from enum import Enum
  5 | from llama_index.core.callbacks.schema import CBEventType
  6 | from app.models.base import Base
  7 | 
  8 | 
  9 | class MessageRoleEnum(str, Enum):
 10 |     user = "user"
 11 |     assistant = "assistant"
 12 | 
 13 | 
 14 | class MessageStatusEnum(str, Enum):
 15 |     PENDING = "PENDING"
 16 |     SUCCESS = "SUCCESS"
 17 |     ERROR = "ERROR"
 18 | 
 19 | 
 20 | class MessageSubProcessStatusEnum(str, Enum):
 21 |     PENDING = "PENDING"
 22 |     FINISHED = "FINISHED"
 23 | 
 24 | 
 25 | # python doesn't allow enums to be extended, so we have to do this
 26 | additional_message_subprocess_fields = {
 27 |     "CONSTRUCTED_QUERY_ENGINE": "constructed_query_engine",
 28 |     "SUB_QUESTIONS": "sub_questions",
 29 | }
 30 | MessageSubProcessSourceEnum = Enum(
 31 |     "MessageSubProcessSourceEnum",
 32 |     [(event_type.name, event_type.value) for event_type in CBEventType]
 33 |     + list(additional_message_subprocess_fields.items()),
 34 | )
 35 | 
 36 | 
 37 | def to_pg_enum(enum_class) -> ENUM:
 38 |     return ENUM(enum_class, name=enum_class.__name__)
 39 | 
 40 | 
 41 | class Document(Base):
 42 |     """
 43 |     A document along with its metadata
 44 |     """
 45 | 
 46 |     # URL to the actual document (e.g. a PDF)
 47 |     url = Column(String, nullable=False, unique=True)
 48 |     metadata_map = Column(JSONB, nullable=True)
 49 |     conversations = relationship("ConversationDocument", back_populates="document")
 50 | 
 51 | 
 52 | class Conversation(Base):
 53 |     """
 54 |     A conversation with messages and linked documents
 55 |     """
 56 | 
 57 |     messages = relationship("Message", back_populates="conversation")
 58 |     conversation_documents = relationship(
 59 |         "ConversationDocument", back_populates="conversation"
 60 |     )
 61 | 
 62 | 
 63 | class ConversationDocument(Base):
 64 |     """
 65 |     A many-to-many relationship between a conversation and a document
 66 |     """
 67 | 
 68 |     conversation_id = Column(
 69 |         UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
 70 |     )
 71 |     document_id = Column(UUID(as_uuid=True), ForeignKey("document.id"), index=True)
 72 |     conversation = relationship("Conversation", back_populates="conversation_documents")
 73 |     document = relationship("Document", back_populates="conversations")
 74 | 
 75 | 
 76 | class Message(Base):
 77 |     """
 78 |     A message in a conversation
 79 |     """
 80 | 
 81 |     conversation_id = Column(
 82 |         UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
 83 |     )
 84 |     content = Column(String)
 85 |     role = Column(to_pg_enum(MessageRoleEnum))
 86 |     status = Column(to_pg_enum(MessageStatusEnum), default=MessageStatusEnum.PENDING)
 87 |     conversation = relationship("Conversation", back_populates="messages")
 88 |     sub_processes = relationship("MessageSubProcess", back_populates="message")
 89 | 
 90 | 
 91 | class MessageSubProcess(Base):
 92 |     """
 93 |     A record of a sub-process that occurred as part of the generation of a message from an AI assistant
 94 |     """
 95 | 
 96 |     message_id = Column(UUID(as_uuid=True), ForeignKey("message.id"), index=True)
 97 |     source = Column(to_pg_enum(MessageSubProcessSourceEnum))
 98 |     message = relationship("Message", back_populates="sub_processes")
 99 |     status = Column(
100 |         to_pg_enum(MessageSubProcessStatusEnum),
101 |         default=MessageSubProcessStatusEnum.FINISHED,
102 |         nullable=False,
103 |     )
104 |     metadata_map = Column(JSONB, nullable=True)
105 | 


--------------------------------------------------------------------------------
/backend/app/schema.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Pydantic Schemas for the API
  3 | """
  4 | from pydantic import BaseModel, Field, validator
  5 | from enum import Enum
  6 | from typing import List, Optional, Dict, Union, Any
  7 | from uuid import UUID
  8 | from datetime import datetime
  9 | from llama_index.core.schema import BaseNode, NodeWithScore
 10 | from llama_index.core.callbacks.schema import EventPayload
 11 | from llama_index.core.query_engine.sub_question_query_engine import SubQuestionAnswerPair
 12 | from app.models.db import (
 13 |     MessageRoleEnum,
 14 |     MessageStatusEnum,
 15 |     MessageSubProcessSourceEnum,
 16 |     MessageSubProcessStatusEnum,
 17 | )
 18 | from app.chat.constants import DB_DOC_ID_KEY
 19 | 
 20 | 
 21 | def build_uuid_validator(*field_names: str):
 22 |     return validator(*field_names)(lambda x: str(x) if x else x)
 23 | 
 24 | 
 25 | class Base(BaseModel):
 26 |     id: Optional[UUID] = Field(None, description="Unique identifier")
 27 |     created_at: Optional[datetime] = Field(None, description="Creation datetime")
 28 |     updated_at: Optional[datetime] = Field(None, description="Update datetime")
 29 | 
 30 |     class Config:
 31 |         from_attributes = True
 32 | 
 33 | 
 34 | class BaseMetadataObject(BaseModel):
 35 |     class Config:
 36 |         from_attributes = True
 37 | 
 38 | 
 39 | class Citation(BaseMetadataObject):
 40 |     document_id: UUID
 41 |     text: str
 42 |     page_number: int
 43 |     score: Optional[float] = None
 44 | 
 45 |     @validator("document_id")
 46 |     def validate_document_id(cls, value):
 47 |         if value:
 48 |             return str(value)
 49 |         return value
 50 | 
 51 |     @classmethod
 52 |     def from_node(cls, node_w_score: NodeWithScore) -> "Citation":
 53 |         node: BaseNode = node_w_score.node
 54 |         page_number = int(node.source_node.metadata["page_label"])
 55 |         document_id = node.source_node.metadata[DB_DOC_ID_KEY]
 56 |         return cls(
 57 |             document_id=document_id,
 58 |             text=node.get_content(),
 59 |             page_number=page_number,
 60 |             score=node_w_score.score,
 61 |         )
 62 | 
 63 | 
 64 | class QuestionAnswerPair(BaseMetadataObject):
 65 |     """
 66 |     A question-answer pair that is used to store the sub-questions and answers
 67 |     """
 68 | 
 69 |     question: str
 70 |     answer: Optional[str] = None
 71 |     citations: Optional[List[Citation]] = None
 72 | 
 73 |     @classmethod
 74 |     def from_sub_question_answer_pair(
 75 |         cls, sub_question_answer_pair: SubQuestionAnswerPair
 76 |     ):
 77 |         if sub_question_answer_pair.sources is None:
 78 |             citations = None
 79 |         else:
 80 |             citations = [
 81 |                 Citation.from_node(node_w_score)
 82 |                 for node_w_score in sub_question_answer_pair.sources
 83 |                 if node_w_score.node.source_node is not None
 84 |                 and DB_DOC_ID_KEY in node_w_score.node.source_node.metadata
 85 |             ]
 86 |         citations = citations or None
 87 |         return cls(
 88 |             question=sub_question_answer_pair.sub_q.sub_question,
 89 |             answer=sub_question_answer_pair.answer,
 90 |             citations=citations,
 91 |         )
 92 | 
 93 | 
 94 | # later will be Union[QuestionAnswerPair, more to add later... ]
 95 | class SubProcessMetadataKeysEnum(str, Enum):
 96 |     SUB_QUESTION = EventPayload.SUB_QUESTION.value
 97 | 
 98 | 
 99 | # keeping the typing pretty loose here, in case there are changes to the metadata data formats.
100 | SubProcessMetadataMap = Dict[Union[SubProcessMetadataKeysEnum, str], Any]
101 | 
102 | 
103 | class MessageSubProcess(Base):
104 |     message_id: UUID
105 |     source: MessageSubProcessSourceEnum
106 |     status: MessageSubProcessStatusEnum
107 |     metadata_map: Optional[SubProcessMetadataMap] = None
108 | 
109 | 
110 | class Message(Base):
111 |     conversation_id: UUID
112 |     content: str
113 |     role: MessageRoleEnum
114 |     status: MessageStatusEnum
115 |     sub_processes: List[MessageSubProcess]
116 | 
117 | 
118 | class UserMessageCreate(BaseModel):
119 |     content: str
120 | 
121 | 
122 | class DocumentMetadataKeysEnum(str, Enum):
123 |     """
124 |     Enum for the keys of the metadata map for a document
125 |     """
126 | 
127 |     SEC_DOCUMENT = "sec_document"
128 | 
129 | 
130 | class SecDocumentTypeEnum(str, Enum):
131 |     """
132 |     Enum for the type of sec document
133 |     """
134 | 
135 |     TEN_K = "10-K"
136 |     TEN_Q = "10-Q"
137 | 
138 | 
139 | class SecDocumentMetadata(BaseModel):
140 |     """
141 |     Metadata for a document that is a sec document
142 |     """
143 | 
144 |     company_name: str
145 |     company_ticker: str
146 |     doc_type: SecDocumentTypeEnum
147 |     year: int
148 |     quarter: Optional[int] = None
149 |     accession_number: Optional[str] = None
150 |     cik: Optional[str] = None
151 |     period_of_report_date: Optional[datetime] = None
152 |     filed_as_of_date: Optional[datetime] = None
153 |     date_as_of_change: Optional[datetime] = None
154 | 
155 | 
156 | DocumentMetadataMap = Dict[Union[DocumentMetadataKeysEnum, str], Any]
157 | 
158 | 
159 | class Document(Base):
160 |     url: str
161 |     metadata_map: Optional[DocumentMetadataMap] = None
162 | 
163 | 
164 | class Conversation(Base):
165 |     messages: List[Message]
166 |     documents: List[Document]
167 | 
168 | 
169 | class ConversationCreate(BaseModel):
170 |     document_ids: List[UUID]
171 | 


--------------------------------------------------------------------------------
/backend/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   llama-app-fastapi:
 4 |     build:
 5 |       context: .
 6 |     volumes:
 7 |       # allows for live reloading of the app
 8 |       # when the code within the ./app directory changes
 9 |       - ./:/app
10 |     ports:
11 |       - "127.0.0.1:8000:8000"
12 |     depends_on:
13 |       - db
14 |       - phoenix
15 |     env_file:
16 |       - .env
17 |       - .env.docker
18 |     environment:
19 |       BACKEND_CORS_ORIGINS: '["http://localhost", "http://localhost:8000"]'
20 | 
21 |   db:
22 |     image: ankane/pgvector:v0.5.0
23 |     environment:
24 |       POSTGRES_USER: user
25 |       POSTGRES_PASSWORD: password
26 |       POSTGRES_DB: llama_app_db
27 |     ports:
28 |       - "127.0.0.1:5432:5432"
29 |     volumes:
30 |       - postgres_data:/var/lib/postgresql/data/
31 | 
32 |   localstack:
33 |     container_name: "${LOCALSTACK_DOCKER_NAME-localstack_main}"
34 |     image: localstack/localstack
35 |     ports:
36 |       - "127.0.0.1:4566:4566" # LocalStack Gateway
37 |       - "127.0.0.1:4510-4559:4510-4559" # external services port range
38 |     environment:
39 |       - DEBUG=${DEBUG-}
40 |       - DOCKER_HOST=unix:///var/run/docker.sock
41 |     volumes:
42 |       - "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack"
43 |       - "/var/run/docker.sock:/var/run/docker.sock"
44 | 
45 |   # useful for local workflow debugging
46 |   # taken from here: https://docs.arize.com/phoenix/deployment/docker#postgresql
47 |   phoenix:
48 |     image: arizephoenix/phoenix:latest # Must be greater than 4.0 version to work
49 |     ports:
50 |       - 6006:6006 # PHOENIX_PORT
51 |       - 4317:4317 # PHOENIX_GRPC_PORT
52 |       - 9090:9090 # [Optional] PROMETHEUS PORT IF ENABLED
53 | 
54 | volumes:
55 |   postgres_data:
56 | 


--------------------------------------------------------------------------------
/backend/localstack-cors-config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "CORSRules": [
 3 |         {
 4 |             "AllowedHeaders": [
 5 |                 "*"
 6 |             ],
 7 |             "AllowedMethods": [
 8 |                 "GET",
 9 |                 "HEAD"
10 |             ],
11 |             "AllowedOrigins": [
12 |                 "*"
13 |             ],
14 |             "ExposeHeaders": [
15 |                 "ETag"
16 |             ]
17 |         }
18 |     ]
19 | }


--------------------------------------------------------------------------------
/backend/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "llama-app-backend"
 3 | version = "0.1.0"
 4 | description = "Backend for Llama App"
 5 | authors = ["Sourabh Desai <sourabhdesai@gmail.com>"]
 6 | readme = "README.md"
 7 | packages = [{include = "app"}]
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11,<3.12"
11 | fastapi = "0.115.11"
12 | pydantic = "^2.9.2"
13 | uvicorn = "^0.22.0"
14 | sqlalchemy = {extras = ["async"], version = "^2.0.15"}
15 | aiosqlite = "^0.19.0"
16 | asyncpg = "^0.29.0"
17 | alembic = "^1.11.1"
18 | psycopg2 = {extras = ["binary"], version = "^2.9.6"}
19 | psycopg2-binary = "^2.9.6"
20 | sse-starlette = "^1.6.1"
21 | pypdf = "^5.3.1"
22 | anyio = "^3.7.0"
23 | s3fs = "^2023.6.0"
24 | fsspec = "^2023.6.0"
25 | pdfkit = "^1.0.0"
26 | pgvector = "^0.3.6"
27 | sentry-sdk = {extras = ["fastapi"], version = "^1.28.1"}
28 | llama-index-core = "0.12.23"
29 | polygon = "^1.2.6"
30 | polygon-api-client = "^1.14.4"
31 | nltk = "^3.8.1"
32 | cachetools = "^5.3.1"
33 | greenlet = "^2.0.2"
34 | email-validator = "^2.0.0.post2"
35 | pydantic-settings = "^2.8.1"
36 | openai = "^1.66.2"
37 | llama-index-vector-stores-postgres = "^0.4.2"
38 | llama-index-embeddings-openai = "^0.3.1"
39 | llama-index-readers-file = "^0.4.6"
40 | llama-index-llms-openai = "^0.3.25"
41 | llama-index-agent-openai = "^0.4.6"
42 | llama-index-question-gen-openai = "^0.3.0"
43 | 
44 | 
45 | [tool.poetry.group.dev.dependencies]
46 | pylint = "^2.17.4"
47 | pytest = "^7.3.2"
48 | sseclient-py = "^1.7.2"
49 | pdfkit = "^1.0.0"
50 | fire = "^0.5.0"
51 | sec-edgar-downloader = "~5.0"
52 | pytickersymbols = "^1.13.0"
53 | awscli-local = "^0.20"
54 | llama-index-callbacks-arize-phoenix = "^0.4.0"
55 | arize-phoenix = "^8.12.1"
56 | 
57 | [tool.poetry.scripts]
58 | start = "app.main:start"
59 | migrate_db = "app.main:migrate_db"
60 | 
61 | [build-system]
62 | requires = ["poetry-core"]
63 | build-backend = "poetry.core.masonry.api"
64 | 


--------------------------------------------------------------------------------
/backend/scripts/build_vector_tables.py:
--------------------------------------------------------------------------------
 1 | from fire import Fire
 2 | from app.chat.pg_vector import get_vector_store_singleton
 3 | import asyncio
 4 | 
 5 | async def build_vector_tables():
 6 |     vector_store = await get_vector_store_singleton()
 7 |     await vector_store.run_setup()
 8 | 
 9 | 
10 | def main_build_vector_tables():
11 |     """
12 |     Script to build the PGVector table if they don't already exist
13 |     """
14 |     asyncio.run(build_vector_tables())
15 | 
16 | if __name__ == "__main__":
17 |     Fire(main_build_vector_tables)
18 | 


--------------------------------------------------------------------------------
/backend/scripts/chat_llama.py:
--------------------------------------------------------------------------------
  1 | import cmd
  2 | import requests
  3 | from sseclient import SSEClient
  4 | import json
  5 | import random
  6 | from urllib.parse import quote
  7 | 
  8 | 
  9 | def sse_with_requests(url, headers) -> requests.Response:
 10 |     """Get a streaming response for the given event feed using requests."""
 11 |     return requests.get(url, stream=True, headers=headers)
 12 | 
 13 | 
 14 | class DocumentPickerCmd(cmd.Cmd):
 15 |     prompt = "(Pick📄) "
 16 | 
 17 |     def __init__(self, base_url):
 18 |         super().__init__()
 19 |         self.base_url = base_url
 20 |         self.documents = None
 21 |         self.selected_documents = []
 22 | 
 23 |     def do_fetch(self, args):
 24 |         "Get 5 documents: fetch"
 25 |         response = requests.get(f"{self.base_url}/api/document/")
 26 |         if response.status_code == 200:
 27 |             self.documents = random.choices(response.json(), k=5)
 28 |             for idx, doc in enumerate(self.documents):
 29 |                 print(f"[{idx}]: {doc['url']}")
 30 |         else:
 31 |             print(f"Error: {response.text}")
 32 | 
 33 |     def do_select(self, document_idx):
 34 |         "Select a document by its index: select <Index>"
 35 |         if self.documents is None:
 36 |             print("Please fetch documents first: fetch")
 37 |             return
 38 |         try:
 39 |             idx = int(document_idx)
 40 |             if idx < len(self.documents):
 41 |                 self.selected_documents.append(self.documents[idx])
 42 |                 print(f"Selected document: {self.documents[idx]['url']}")
 43 |             else:
 44 |                 print("Invalid index. Use the GET command to view available documents.")
 45 |         except ValueError:
 46 |             print("Invalid index. Please enter a number.")
 47 | 
 48 |     def do_select_id(self, document_id):
 49 |         "Select a document by it's ID"
 50 |         if not document_id:
 51 |             print("Please enter a valid document ID")
 52 |         else:
 53 |             self.selected_documents.append({"id": document_id})
 54 |             print(f"Selected document ID {document_id}")
 55 | 
 56 |     def do_finish(self, args):
 57 |         "Finish the document selection process: FINISH"
 58 |         if len(self.selected_documents) > 0:
 59 |             return True
 60 |         else:
 61 |             print("No documents selected. Use the SELECT command to select documents.")
 62 | 
 63 |     def do_quit(self, args):
 64 |         "Quits the program."
 65 |         print("Quitting document picker.")
 66 |         raise SystemExit
 67 | 
 68 | 
 69 | class ConversationCmd(cmd.Cmd):
 70 |     prompt = "(Chat🦙) "
 71 | 
 72 |     def __init__(self, base_url):
 73 |         super().__init__()
 74 |         self.base_url = base_url
 75 |         self.conversation_id = None
 76 |         self.document_ids = []
 77 | 
 78 |     def do_pick_docs(self, args):
 79 |         "Pick documents for the new conversation: pick_docs"
 80 |         picker = DocumentPickerCmd(self.base_url)
 81 |         try:
 82 |             picker.cmdloop()
 83 |         except KeyboardInterrupt:
 84 |             picker.do_quit("")
 85 |         except Exception as e:
 86 |             print(e)
 87 |             picker.do_quit("")
 88 |         self.document_ids = [doc["id"] for doc in picker.selected_documents]
 89 | 
 90 |     def do_create(self, args):
 91 |         "Create a new conversation: CREATE"
 92 |         req_body = {"document_ids": self.document_ids}
 93 |         response = requests.post(f"{self.base_url}/api/conversation/", json=req_body)
 94 |         if response.status_code == 200:
 95 |             self.conversation_id = response.json()["id"]
 96 |             print(f"Created conversation with ID {self.conversation_id}")
 97 |         else:
 98 |             print(f"Error: {response.text}")
 99 | 
100 |     def do_detail(self, args):
101 |         "Get the details of the current conversation: DETAIL"
102 |         if not self.conversation_id:
103 |             print("No active conversation. Use CREATE to start a new conversation.")
104 |             return
105 |         response = requests.get(
106 |             f"{self.base_url}/api/conversation/{self.conversation_id}"
107 |         )
108 |         if response.status_code == 200:
109 |             print(json.dumps(response.json(), indent=4))
110 |         else:
111 |             print(f"Error: {response.text}")
112 | 
113 |     def do_delete(self, args):
114 |         "Delete the current conversation: DELETE"
115 |         if not self.conversation_id:
116 |             print("No active conversation to delete.")
117 |             return
118 |         response = requests.delete(
119 |             f"{self.base_url}/api/conversation/{self.conversation_id}"
120 |         )
121 |         if response.status_code == 204:
122 |             print(f"Deleted conversation with ID {self.conversation_id}")
123 |             self.conversation_id = None
124 |         else:
125 |             print(f"Error: {response.text}")
126 | 
127 |     def do_message(self, message):
128 |         "Send a user message to the current conversation and get back the AI's response: MESSAGE <Your message>"
129 |         if not self.conversation_id:
130 |             print("No active conversation. Use CREATE to start a new conversation.")
131 |             return
132 |         message = quote(message.strip())  # URI encode the message
133 |         url = f"{self.base_url}/api/conversation/{self.conversation_id}/message?user_message={message}"
134 |         headers = {"Accept": "text/event-stream"}
135 |         response = sse_with_requests(url, headers)
136 |         messages = SSEClient(response).events()
137 |         message_idx = 0
138 |         final_message = None
139 |         for msg in messages:
140 |             print(f"\n\n=== Message {message_idx} ===")
141 |             msg_json = json.loads(msg.data)
142 |             print(msg_json)
143 |             final_message = msg_json.get("content")
144 |             message_idx += 1
145 | 
146 |         if final_message is not None:
147 |             print(f"\n\n====== Final Message ======")
148 |             print(final_message)
149 | 
150 |     def do_quit(self, args):
151 |         "Quits the program."
152 |         print("Quitting.")
153 |         raise SystemExit
154 | 
155 | 
156 | if __name__ == "__main__":
157 |     import argparse
158 | 
159 |     parser = argparse.ArgumentParser(description="Start the chat terminal.")
160 |     parser.add_argument(
161 |         "--base_url",
162 |         type=str,
163 |         default="http://localhost:8000",
164 |         help="an optional base url for the API endpoints",
165 |     )
166 |     args = parser.parse_args()
167 | 
168 |     cmd = ConversationCmd(args.base_url)
169 |     try:
170 |         cmd.cmdloop()
171 |     except KeyboardInterrupt:
172 |         cmd.do_quit("")
173 |     except Exception as e:
174 |         print(e)
175 |         cmd.do_quit("")
176 | 


--------------------------------------------------------------------------------
/backend/scripts/dedupe_vector_store.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from fire import Fire
 3 | from sqlalchemy import text
 4 | from app.db.session import SessionLocal
 5 | 
 6 | 
 7 | async def _async_dedupe_vectore_store(dry_run: bool = False):
 8 |     async with SessionLocal() as db:
 9 |         try:
10 |             common_table_expression = """
11 |                 WITH cte AS (
12 |                     SELECT 
13 |                         max(id) as max_id,
14 |                         text, 
15 |                         (metadata_ ->> 'page_label'):: text as page_label, 
16 |                         (metadata_ ->> 'db_document_id'):: text as db_document_id
17 |                     FROM 
18 |                         data_pg_vector_store 
19 |                     GROUP BY 
20 |                         text, 
21 |                         page_label, 
22 |                         db_document_id
23 |                 )
24 |             """
25 |             # Count rows that would be deleted
26 |             stmt = text(
27 |                 f"""
28 |                 {common_table_expression}
29 |                 SELECT COUNT(id) FROM data_pg_vector_store WHERE id NOT IN (SELECT max_id FROM cte);
30 |                 """
31 |             )
32 |             result = await db.execute(stmt)
33 |             num_duplicate_rows = result.scalar()
34 | 
35 |             num_rows = (
36 |                 await db.execute(text("SELECT COUNT(*) FROM data_pg_vector_store"))
37 |             ).scalar()
38 | 
39 |             print(f"{num_duplicate_rows} duplicate rows found out of {num_rows} total.")
40 |             print(
41 |                 f"{num_rows - num_duplicate_rows} rows would be remaining if deleted."
42 |             )
43 |             if dry_run or num_duplicate_rows == 0:
44 |                 return
45 | 
46 |             # Ask for confirmation before deleting rows
47 |             confirmation = input("Do you want to delete these rows? (y/n) ")
48 |             if confirmation.lower() != "y":
49 |                 print("Aborted.")
50 |                 return
51 | 
52 |             # Delete the rows
53 |             delete_stmt = text(
54 |                 f"""
55 |                 {common_table_expression}
56 |                 DELETE FROM data_pg_vector_store WHERE id NOT IN (SELECT max_id FROM cte);
57 |                 """
58 |             )
59 |             await db.execute(delete_stmt)
60 |             await db.commit()  # Explicitly commit the transaction
61 |             print(f"{num_duplicate_rows} rows have been deleted.")
62 |         except Exception as e:
63 |             print(f"An error occurred: {e}")
64 | 
65 | 
66 | def dedupe_vectore_store(dry_run: bool = False):
67 |     """
68 |     Deduplicate the vector store.
69 | 
70 |     :param dry_run: If True, do not commit changes to the database.
71 |     """
72 |     asyncio.run(_async_dedupe_vectore_store(dry_run=dry_run))
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     Fire(dedupe_vectore_store)
77 | 


--------------------------------------------------------------------------------
/backend/scripts/download_sec_pdf.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import List, Optional
  3 | 
  4 | import pdfkit
  5 | from file_utils import filing_exists
  6 | from fire import Fire
  7 | from sec_edgar_downloader import Downloader
  8 | from distutils.spawn import find_executable
  9 | from tqdm.contrib.itertools import product
 10 | from app.core.config import settings
 11 | 
 12 | DEFAULT_OUTPUT_DIR = "data/"
 13 | # You can lookup the CIK for a company here: https://www.sec.gov/edgar/searchedgar/companysearch
 14 | DEFAULT_CIKS = [
 15 |     # AAPL
 16 |     "320193",
 17 |     # MSFT
 18 |     "789019",
 19 |     # AMZN
 20 |     "0001018724",
 21 |     # GOOGL
 22 |     "1652044",
 23 |     # META
 24 |     "1326801",
 25 |     # TSLA
 26 |     "1318605",
 27 |     # NVDA
 28 |     "1045810",
 29 |     # NFLX
 30 |     "1065280",
 31 |     # PYPL
 32 |     "0001633917",
 33 |     # PFE (Pfizer)
 34 |     "78003",
 35 |     # AZNCF (AstraZeneca)
 36 |     "901832",
 37 |     # LLY (Eli Lilly)
 38 |     "59478",
 39 |     # MRNA (Moderna)
 40 |     "1682852",
 41 |     # JNJ (Johnson & Johnson)
 42 |     "200406",
 43 | ]
 44 | DEFAULT_FILING_TYPES = [
 45 |     "10-K",
 46 |     "10-Q",
 47 | ]
 48 | 
 49 | 
 50 | def _download_filing(
 51 |     cik: str, filing_type: str, output_dir: str, limit=None, before=None, after=None
 52 | ):
 53 |     dl = Downloader(settings.SEC_EDGAR_COMPANY_NAME, settings.SEC_EDGAR_EMAIL, output_dir)
 54 |     dl.get(filing_type, cik, limit=limit, before=before, after=after, download_details=True)
 55 | 
 56 | 
 57 | def _convert_to_pdf(output_dir: str):
 58 |     """Converts all html files in a directory to pdf files."""
 59 | 
 60 |     # NOTE: directory structure is assumed to be:
 61 |     # output_dir
 62 |     # ├── sec-edgar-filings
 63 |     # │   ├── AAPL
 64 |     # │   │   ├── 10-K
 65 |     # │   │   │   ├── 0000320193-20-000096
 66 |     # │   │   │   │   ├── primary-document.html
 67 |     # │   │   │   │   ├── primary-document.pdf   <-- this is what we want
 68 | 
 69 |     data_dir = Path(output_dir) / "sec-edgar-filings"
 70 | 
 71 |     for cik_dir in data_dir.iterdir():
 72 |         for filing_type_dir in cik_dir.iterdir():
 73 |             for filing_dir in filing_type_dir.iterdir():
 74 |                 filing_doc = filing_dir / "primary-document.html"
 75 |                 filing_pdf = filing_dir / "primary-document.pdf"
 76 |                 if filing_doc.exists() and not filing_pdf.exists():
 77 |                     print("- Converting {}".format(filing_doc))
 78 |                     input_path = str(filing_doc.absolute())
 79 |                     output_path = str(filing_pdf.absolute())
 80 |                     try:
 81 |                         # fix for issue here:
 82 |                         # https://github.com/wkhtmltopdf/wkhtmltopdf/issues/4460#issuecomment-661345113
 83 |                         options = {'enable-local-file-access': None}
 84 |                         pdfkit.from_file(input_path, output_path, options=options, verbose=True)
 85 |                     except Exception as e:
 86 |                         print(f"Error converting {input_path} to {output_path}: {e}")
 87 | 
 88 | 
 89 | def main(
 90 |     output_dir: str = DEFAULT_OUTPUT_DIR,
 91 |     ciks: List[str] = DEFAULT_CIKS,
 92 |     file_types: List[str] = DEFAULT_FILING_TYPES,
 93 |     before: Optional[str] = None,
 94 |     after: Optional[str] = None,
 95 |     limit: Optional[int] = 3,
 96 |     convert_to_pdf: bool = True,
 97 | ):
 98 |     print('Downloading filings to "{}"'.format(Path(output_dir).absolute()))
 99 |     print("File Types: {}".format(file_types))
100 |     if convert_to_pdf:
101 |         if find_executable("wkhtmltopdf") is None:
102 |             raise Exception(
103 |                 "ERROR: wkhtmltopdf (https://wkhtmltopdf.org/) not found, "
104 |                 "please install it to convert html to pdf "
105 |                 "`sudo apt-get install wkhtmltopdf`"
106 |             )
107 |     for symbol, file_type in product(ciks, file_types):
108 |         try:
109 |             if filing_exists(symbol, file_type, output_dir):
110 |                 print(f"- Filing for {symbol} {file_type} already exists, skipping")
111 |             else:
112 |                 print(f"- Downloading filing for {symbol} {file_type}")
113 |                 _download_filing(symbol, file_type, output_dir, limit, before, after)
114 |         except Exception as e:
115 |             print(
116 |                 f"Error downloading filing for symbol={symbol} & file_type={file_type}: {e}"
117 |             )
118 | 
119 |     if convert_to_pdf:
120 |         print("Converting html files to pdf files")
121 |         _convert_to_pdf(output_dir)
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     Fire(main)
126 | 


--------------------------------------------------------------------------------
/backend/scripts/file_utils.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import List, Optional, Tuple
  3 | import datetime
  4 | 
  5 | import pandas as pd
  6 | from pydantic import BaseModel
  7 | 
  8 | 
  9 | class Filing(BaseModel):
 10 |     file_path: str
 11 |     symbol: str
 12 |     filing_type: str
 13 |     year: int
 14 |     quarter: Optional[int] = None
 15 |     cik: str
 16 |     accession_number: str
 17 |     period_of_report_date: datetime.datetime
 18 |     filed_as_of_date: datetime.datetime
 19 |     date_as_of_change: datetime.datetime
 20 | 
 21 | 
 22 | def filing_exists(cik: str, filing_type: str, output_dir: str) -> bool:
 23 |     """Checks if a filing exists for a given cik and filing type."""
 24 |     data_dir = Path(output_dir) / "sec-edgar-filings"
 25 |     filing_dir = data_dir / cik / filing_type
 26 |     return filing_dir.exists()
 27 | 
 28 | 
 29 | def parse_quarter_from_full_submission_txt(full_submission_txt_file_path: Path) -> int:
 30 |     """
 31 |     The full-submission.txt file contains a like like the following line:
 32 |     <td class="pl" style="border-bottom: 0px;" valign="top"><a class="a" href="javascript:void(0);" onclick="Show.showAR( this, 'defref_dei_DocumentFiscalPeriodFocus', window );">Document Fiscal Period Focus</a></td>
 33 |     <td class="text">Q1<span></span>
 34 | 
 35 |     This method parses the quarter from that second line
 36 |     """
 37 |     with open(full_submission_txt_file_path) as f:
 38 |         try:
 39 |             line = next(f)
 40 |             while "Document Fiscal Period Focus</a>" not in line:
 41 |                 line = next(f)
 42 |             quarter_line = next(f)
 43 |             quarter_line = quarter_line.split(">")[1].split("<")[0]
 44 |             quarter = quarter_line.strip("Q ")
 45 |             return int(quarter)
 46 |         except StopIteration:
 47 |             raise ValueError(
 48 |                 f"Could not find Document Fiscal Period Focus in file {full_submission_txt_file_path}"
 49 |             )
 50 | 
 51 | 
 52 | def get_line_with_substring_in_file(file_path: Path, substring: str) -> str:
 53 |     """Returns the first line in a file that contains a given substring."""
 54 |     with open(file_path) as f:
 55 |         for line in f:
 56 |             if substring in line:
 57 |                 return line
 58 |     raise ValueError(f"Could not find substring '{substring}' in file {file_path}")
 59 | 
 60 | 
 61 | def parse_dates_from_full_submission_txt(
 62 |     full_submission_txt_file_path: Path,
 63 | ) -> Tuple[datetime.datetime, datetime.datetime, datetime.datetime]:
 64 |     period_of_report_line = get_line_with_substring_in_file(
 65 |         full_submission_txt_file_path, "CONFORMED PERIOD OF REPORT:"
 66 |     )
 67 |     period_of_report_line = period_of_report_line.split(":")[1].strip()
 68 |     # Example value for date format: 20220930
 69 |     period_of_report_date = datetime.datetime.strptime(
 70 |         period_of_report_line.strip(), "%Y%m%d"
 71 |     )
 72 | 
 73 |     filed_as_of_date_line = get_line_with_substring_in_file(
 74 |         full_submission_txt_file_path, "FILED AS OF DATE:"
 75 |     )
 76 |     filed_as_of_date_line = filed_as_of_date_line.split(":")[1].strip()
 77 |     filed_as_of_date = datetime.datetime.strptime(
 78 |         filed_as_of_date_line.strip(), "%Y%m%d"
 79 |     )
 80 | 
 81 |     date_as_of_change_line = get_line_with_substring_in_file(
 82 |         full_submission_txt_file_path, "DATE AS OF CHANGE:"
 83 |     )
 84 |     date_as_of_change_line = date_as_of_change_line.split(":")[1].strip()
 85 |     date_as_of_change = datetime.datetime.strptime(
 86 |         date_as_of_change_line.strip(), "%Y%m%d"
 87 |     )
 88 |     return period_of_report_date, filed_as_of_date, date_as_of_change
 89 | 
 90 | 
 91 | def parse_cik_from_full_submission_txt(
 92 |     full_submission_txt_file_path: Path,
 93 | ) -> str:
 94 |     cik_line = get_line_with_substring_in_file(
 95 |         full_submission_txt_file_path, "CENTRAL INDEX KEY:"
 96 |     )
 97 |     cik_line = cik_line.split(":")[1].strip()
 98 |     return cik_line
 99 | 
100 | 
101 | def parse_ticker_symbol_from_full_submission_txt(
102 |     full_submission_txt_file_path: Path,
103 | ) -> str:
104 |     """
105 |     Very hacky approach to parsing the ticker symbol from the full-submission.txt file.
106 |     The file usually has a line that reads something like "<FILENAME>amzn-20220930.htm"
107 |     We can extract "amzn" from that line.
108 |     """
109 |     ticker_symbol_line = get_line_with_substring_in_file(
110 |         full_submission_txt_file_path, "<FILENAME>"
111 |     )
112 |     ticker_symbol_line = ticker_symbol_line.split("<FILENAME>")[1].strip()
113 |     ticker_symbol = ticker_symbol_line.split("-")[0].strip()
114 |     return ticker_symbol.upper()
115 | 
116 | 
117 | def get_available_filings(output_dir: str) -> List[Filing]:
118 |     data_dir = Path(output_dir) / "sec-edgar-filings"
119 |     filings = []
120 |     for cik_dir in data_dir.iterdir():
121 |         for filing_type_dir in cik_dir.iterdir():
122 |             for filing_dir in filing_type_dir.iterdir():
123 |                 filing_pdf = filing_dir / "primary-document.pdf"
124 |                 full_submission_txt = filing_dir / "full-submission.txt"
125 |                 if filing_pdf.exists():
126 |                     filing_type = filing_type_dir.name
127 |                     file_path = str(filing_pdf.absolute())
128 |                     quarter = None
129 |                     assert full_submission_txt.exists()
130 |                     if filing_type == "10-Q":
131 |                         quarter = parse_quarter_from_full_submission_txt(
132 |                             full_submission_txt
133 |                         )
134 |                     (
135 |                         period_of_report_date,
136 |                         filed_as_of_date,
137 |                         date_as_of_change,
138 |                     ) = parse_dates_from_full_submission_txt(full_submission_txt)
139 |                     accession_number = filing_dir.name.strip()
140 |                     cik = parse_cik_from_full_submission_txt(full_submission_txt)
141 |                     symbol = parse_ticker_symbol_from_full_submission_txt(
142 |                         full_submission_txt
143 |                     )
144 |                     filing = Filing(
145 |                         file_path=file_path,
146 |                         symbol=symbol,
147 |                         filing_type=filing_type,
148 |                         year=period_of_report_date.year,
149 |                         quarter=quarter,
150 |                         accession_number=accession_number,
151 |                         cik=cik,
152 |                         period_of_report_date=period_of_report_date,
153 |                         filed_as_of_date=filed_as_of_date,
154 |                         date_as_of_change=date_as_of_change,
155 |                     )
156 |                     filings.append(filing)
157 |     return filings
158 | 
159 | 
160 | def get_available_filings_as_df(output_dir: str) -> pd.DataFrame:
161 |     filings = get_available_filings(output_dir)
162 |     return pd.DataFrame([filing.dict() for filing in filings])
163 | 


--------------------------------------------------------------------------------
/backend/scripts/seed_db.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | import asyncio
 3 | from tempfile import TemporaryDirectory
 4 | from pathlib import Path
 5 | from fire import Fire
 6 | import s3fs
 7 | from app.core.config import settings
 8 | import upsert_db_sec_documents
 9 | import download_sec_pdf
10 | from download_sec_pdf import DEFAULT_CIKS, DEFAULT_FILING_TYPES
11 | import seed_storage_context
12 | 
13 | 
14 | def copy_to_s3(dir_path: str, s3_bucket: str = settings.S3_ASSET_BUCKET_NAME):
15 |     """
16 |     Copy all files in dir_path to S3.
17 |     """
18 |     s3 = s3fs.S3FileSystem(
19 |         key=settings.AWS_KEY,
20 |         secret=settings.AWS_SECRET,
21 |         endpoint_url=settings.S3_ENDPOINT_URL,
22 |     )
23 | 
24 |     if not (settings.RENDER or s3.exists(s3_bucket)):
25 |         s3.mkdir(s3_bucket)
26 | 
27 |     s3.put(dir_path, s3_bucket, recursive=True)
28 | 
29 | 
30 | async def async_seed_db(
31 |     ciks: List[str] = DEFAULT_CIKS, filing_types: List[str] = DEFAULT_FILING_TYPES
32 | ):
33 |     with TemporaryDirectory() as temp_dir:
34 |         print("Downloading SEC filings")
35 |         download_sec_pdf.main(
36 |             output_dir=temp_dir,
37 |             ciks=ciks,
38 |             file_types=filing_types,
39 |         )
40 | 
41 |         print("Copying downloaded SEC filings to S3")
42 |         copy_to_s3(str(Path(temp_dir) / "sec-edgar-filings"))
43 | 
44 |         print("Upserting records of downloaded SEC filings into database")
45 |         await upsert_db_sec_documents.async_upsert_documents_from_filings(
46 |             url_base=settings.CDN_BASE_URL,
47 |             doc_dir=temp_dir,
48 |         )
49 | 
50 |         print("Seeding storage context")
51 |         await seed_storage_context.async_main_seed_storage_context()
52 |         print(
53 |             """
54 | Done! 🏁
55 | \t- SEC PDF documents uploaded to the S3 assets bucket ✅
56 | \t- Documents database table has been populated ✅
57 | \t- Vector storage table has been seeded with embeddings ✅
58 |         """.strip()
59 |         )
60 | 
61 | 
62 | def seed_db(
63 |     ciks: List[str] = DEFAULT_CIKS, filing_types: List[str] = DEFAULT_FILING_TYPES
64 | ):
65 |     asyncio.run(async_seed_db(ciks, filing_types))
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     Fire(seed_db)
70 | 


--------------------------------------------------------------------------------
/backend/scripts/seed_storage_context.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | from fire import Fire
 3 | import asyncio
 4 | from app.db.session import SessionLocal
 5 | from app.api import crud
 6 | from llama_index.core.callbacks import CallbackManager
 7 | from app.chat.engine import (
 8 |     build_doc_id_to_index_map,
 9 |     get_s3_fs,
10 | )
11 | 
12 | 
13 | async def async_main_seed_storage_context():
14 |     fs = get_s3_fs()
15 |     async with SessionLocal() as db:
16 |         docs = await crud.fetch_documents(db)
17 |     callback_manager = CallbackManager([])
18 |     for doc in tqdm(docs, desc="Seeding storage with DB documents"):
19 |         await build_doc_id_to_index_map(callback_manager, [doc], fs=fs)
20 | 
21 | 
22 | def main_seed_storage_context():
23 |     asyncio.run(async_main_seed_storage_context())
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     Fire(main_seed_storage_context)
28 | 


--------------------------------------------------------------------------------
/backend/scripts/stock_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional
 2 | 
 3 | from pydantic import BaseModel, ValidationError
 4 | from pytickersymbols import PyTickerSymbols
 5 | 
 6 | DEFAULT_INDICES = ["DOW JONES", "S&P 500", "NASDAQ 100"]
 7 | 
 8 | 
 9 | class Stock(BaseModel):
10 |     name: str
11 |     symbol: str
12 |     indices: List[str]
13 | 
14 | 
15 | def _parse_stock(stock: dict) -> Optional[Stock]:
16 |     try:
17 |         return Stock(
18 |             name=stock["name"],
19 |             symbol=stock["symbol"],
20 |             indices=stock["indices"],
21 |         )
22 |     except ValidationError:
23 |         return None
24 | 
25 | 
26 | def get_stocks(indices: List[str] = DEFAULT_INDICES) -> List[Stock]:
27 |     stock_data = PyTickerSymbols()
28 |     if indices:
29 |         # get stocks for given indices
30 |         all_stocks = []
31 |         for index in indices:
32 |             stocks = stock_data.get_stocks_by_index(index)
33 |             all_stocks.extend(stocks)
34 |     else:
35 |         # get stocks for all indices
36 |         all_stocks = stock_data.get_all_stocks()
37 | 
38 |     stocks = [_parse_stock(stock) for stock in all_stocks]
39 |     return list(filter(None, stocks))
40 | 
41 | 
42 | def get_stocks_by_symbol(indices: List[str] = DEFAULT_INDICES) -> Dict[str, Stock]:
43 |     stocks = get_stocks(indices)
44 |     return {stock.symbol: stock for stock in stocks}
45 | 


--------------------------------------------------------------------------------
/backend/scripts/upsert_db_sec_documents.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from fire import Fire
 3 | from tqdm import tqdm
 4 | import asyncio
 5 | from pytickersymbols import PyTickerSymbols
 6 | from file_utils import get_available_filings, Filing
 7 | from stock_utils import get_stocks_by_symbol, Stock
 8 | from fastapi.encoders import jsonable_encoder
 9 | from app.models.db import Document
10 | from app.schema import (
11 |     SecDocumentMetadata,
12 |     DocumentMetadataMap,
13 |     DocumentMetadataKeysEnum,
14 |     SecDocumentTypeEnum,
15 |     Document,
16 | )
17 | from app.db.session import SessionLocal
18 | from app.api import crud
19 | 
20 | DEFAULT_URL_BASE = "https://dl94gqvzlh4k8.cloudfront.net"
21 | DEFAULT_DOC_DIR = "data/"
22 | 
23 | 
24 | async def upsert_document(doc_dir: str, stock: Stock, filing: Filing, url_base: str):
25 |     # construct a string for just the document's sub-path after the doc_dir
26 |     # e.g. "sec-edgar-filings/AAPL/10-K/0000320193-20-000096/primary-document.pdf"
27 |     doc_path = Path(filing.file_path).relative_to(doc_dir)
28 |     url_path = url_base.rstrip("/") + "/" + str(doc_path).lstrip("/")
29 |     doc_type = (
30 |         SecDocumentTypeEnum.TEN_K
31 |         if filing.filing_type == "10-K"
32 |         else SecDocumentTypeEnum.TEN_Q
33 |     )
34 |     sec_doc_metadata = SecDocumentMetadata(
35 |         company_name=stock.name,
36 |         company_ticker=stock.symbol,
37 |         doc_type=doc_type,
38 |         year=filing.year,
39 |         quarter=filing.quarter,
40 |         accession_number=filing.accession_number,
41 |         cik=filing.cik,
42 |         period_of_report_date=filing.period_of_report_date,
43 |         filed_as_of_date=filing.filed_as_of_date,
44 |         date_as_of_change=filing.date_as_of_change,
45 |     )
46 |     metadata_map: DocumentMetadataMap = {
47 |         DocumentMetadataKeysEnum.SEC_DOCUMENT: jsonable_encoder(
48 |             sec_doc_metadata.dict(exclude_none=True)
49 |         )
50 |     }
51 |     doc = Document(url=str(url_path), metadata_map=metadata_map)
52 |     async with SessionLocal() as db:
53 |         await crud.upsert_document_by_url(db, doc)
54 | 
55 | 
56 | async def async_upsert_documents_from_filings(url_base: str, doc_dir: str):
57 |     """
58 |     Upserts SEC documents into the database based on what has been downloaded to the filesystem.
59 |     """
60 |     filings = get_available_filings(doc_dir)
61 |     stocks_data = PyTickerSymbols()
62 |     stocks_dict = get_stocks_by_symbol(stocks_data.get_all_indices())
63 |     for filing in tqdm(filings, desc="Upserting docs from filings"):
64 |         if filing.symbol not in stocks_dict:
65 |             print(f"Symbol {filing.symbol} not found in stocks_dict. Skipping.")
66 |             continue
67 |         stock = stocks_dict[filing.symbol]
68 |         await upsert_document(doc_dir, stock, filing, url_base)
69 | 
70 | 
71 | def main_upsert_documents_from_filings(
72 |     url_base: str = DEFAULT_URL_BASE, doc_dir: str = DEFAULT_DOC_DIR
73 | ):
74 |     """
75 |     Upserts SEC documents into the database based on what has been downloaded to the filesystem.
76 |     """
77 | 
78 |     asyncio.run(async_upsert_documents_from_filings(url_base, doc_dir))
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     Fire(main_upsert_documents_from_filings)
83 | 


--------------------------------------------------------------------------------
/backend/scripts/upsert_document.py:
--------------------------------------------------------------------------------
 1 | from fire import Fire
 2 | from app.schema import Document
 3 | from app.db.session import SessionLocal
 4 | from app.api import crud
 5 | import asyncio
 6 | 
 7 | async def upsert_single_document(doc_url: str):
 8 |     """
 9 |     Upserts a single SEC document into the database using its URL.
10 |     """
11 |     if not doc_url or not doc_url.startswith('http'):
12 |         print("DOC_URL must be an http(s) based url value")
13 |         return
14 |     metadata_map = {}
15 |     doc = Document(url=doc_url, metadata_map=metadata_map)
16 | 
17 |     async with SessionLocal() as db:
18 |         document = await crud.upsert_document_by_url(db, doc)
19 |         print(f"Upserted document. Database ID:\n{document.id}")
20 | 
21 | 
22 | def main_upsert_single_document(doc_url: str):
23 |     """
24 |     Script to upsert a single document by URL. metada_map parameter will be empty dict ({})
25 |     This script is useful when trying to use your own PDF files.
26 |     """
27 |     asyncio.run(upsert_single_document(doc_url))
28 | 
29 | if __name__ == "__main__":
30 |     Fire(main_upsert_single_document)
31 | 


--------------------------------------------------------------------------------
/backend/troubleshooting.md:
--------------------------------------------------------------------------------
 1 | # Troubleshooting
 2 | This file contains some solutions to common questions or pitfalls that may come up during development with this project.
 3 | 
 4 | ## I'm seeing this error `pydantic.error_wrappers.ValidationError: 8 validation errors for Settings`
 5 | You may have just run a command from the `Makefile` like `make migrate` and seen an error output like this:
 6 | 
 7 | ```
 8 | pydantic.error_wrappers.ValidationError: 8 validation errors for Settings
 9 | OPENAI_API_KEY
10 | field required (type=value_error.missing)
11 | AWS_KEY
12 | field required (type=value_error.missing)
13 | AWS_SECRET
14 | field required (type=value_error.missing)
15 | POLYGON_IO_API_KEY
16 | field required (type=value_error.missing)
17 | DATABASE_URL
18 | field required (type=value_error.missing)
19 | S3_BUCKET_NAME
20 | field required (type=value_error.missing)
21 | S3_ASSET_BUCKET_NAME
22 | field required (type=value_error.missing)
23 | CDN_BASE_URL
24 | field required (type=value_error.missing)
25 | make: *** [migrate] Error 1
26 | ```
27 | 
28 | This happens when you haven't set all the environment variables in your shell environment.
29 | You can remedy this quickly by doing the following:
30 | 1. Create a `.env` file and source it.
31 |    - The `.env.development` file is a good template so you can just do `cp .env.development .env`
32 | 1. `set -a`
33 | 1. `source .env`
34 | 
35 | 


--------------------------------------------------------------------------------
/frontend/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 |   "image": "mcr.microsoft.com/devcontainers/universal:2",
3 |   "features": {
4 |     "ghcr.io/devcontainers/features/node:1": {},
5 |     "ghcr.io/devcontainers-contrib/features/typescript:2": {}
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/frontend/.env.example:
--------------------------------------------------------------------------------
 1 | # Since the ".env" file is gitignored, you can use the ".env.example" file to
 2 | # build a new ".env" file when you clone the repo. Keep this file up-to-date
 3 | # when you add new variables to `.env`.
 4 | 
 5 | # This file will be committed to version control, so make sure not to have any
 6 | # secrets in it. If you are cloning this repo, create a copy of this file named
 7 | # ".env" and populate it with your secrets.
 8 | 
 9 | # When adding additional environment variables, the schema in "/src/env.mjs"
10 | # should be updated accordingly.
11 | 
12 | # Example:
13 | # SERVERVAR="foo"
14 | # NEXT_PUBLIC_CLIENTVAR="bar"
15 | NEXT_PUBLIC_BACKEND_URL=http://localhost:8000/
16 | 


--------------------------------------------------------------------------------
/frontend/.eslintrc.cjs:
--------------------------------------------------------------------------------
 1 | // eslint-disable-next-line @typescript-eslint/no-var-requires
 2 | const path = require("path");
 3 | 
 4 | /** @type {import("eslint").Linter.Config} */
 5 | const config = {
 6 |   overrides: [
 7 |     {
 8 |       extends: [
 9 |         "plugin:@typescript-eslint/recommended-requiring-type-checking",
10 |       ],
11 |       files: ["*.ts", "*.tsx"],
12 |       parserOptions: {
13 |         project: path.join(__dirname, "tsconfig.json"),
14 |       },
15 |     },
16 |   ],
17 |   parser: "@typescript-eslint/parser",
18 |   parserOptions: {
19 |     project: path.join(__dirname, "tsconfig.json"),
20 |   },
21 |   plugins: ["@typescript-eslint"],
22 |   extends: ["next/core-web-vitals", "plugin:@typescript-eslint/recommended"],
23 |   rules: {
24 |     "@typescript-eslint/consistent-type-imports": [
25 |       "warn",
26 |       {
27 |         prefer: "type-imports",
28 |         fixStyle: "inline-type-imports",
29 |       },
30 |     ],
31 |     "@typescript-eslint/no-unused-vars": ["warn", { argsIgnorePattern: "^_" }],
32 |   },
33 | };
34 | 
35 | module.exports = config;
36 | 


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | /coverage
10 | 
11 | # database
12 | /prisma/db.sqlite
13 | /prisma/db.sqlite-journal
14 | 
15 | # next.js
16 | /.next/
17 | /out/
18 | next-env.d.ts
19 | 
20 | # production
21 | /build
22 | 
23 | # misc
24 | .DS_Store
25 | *.pem
26 | 
27 | # debug
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | .pnpm-debug.log*
32 | 
33 | # local env files
34 | # do not commit any .env files to git, except for the .env.example file. https://create.t3.gg/en/usage/env-variables#using-environment-variables
35 | .env
36 | .env*.local
37 | 
38 | # vercel
39 | .vercel
40 | 
41 | # typescript
42 | *.tsbuildinfo
43 | 
44 | # Sentry Auth Token
45 | .sentryclirc
46 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # SEC Insights Frontend
 2 | 
 3 | This is SEC Insights, a tool that let's you analyze multiple financial documents, powered by LlamaIndex. [Live URL](https://secinsights.ai/)
 4 | 
 5 | ## Technical Details
 6 | 
 7 | Built with `next.js`, `tailwindcss`, and `typescript react`, based on the [T3 starter kit](https://create.t3.gg/en/usage/next-js).
 8 | 
 9 | ## Architecture
10 | 
11 | This app consists of two main routes,
12 | 
13 | 1. `/`, located in `src/pages/index.tsx`. This route is the landing page, and consists of the document selector and a marketing section.
14 | 2. `/conversation/{conversation_id}`, located in `src/pages/conversation/[id].tsx` This page consists of the chat window on the left hand side, and the pdf viewer on the right hand side.
15 | 
16 | - PDFs are rendered using `react-pdf`; a single pdf is rendered by the `VirtualizedPdf.tsx` component
17 | - The Chat component is located in `RenderConversations.tsx`
18 | 
19 | ## How to develop locally
20 | 
21 | 1. `npm i`
22 | 2. `npm run dev`
23 | 
24 | 3. And before pushing to the repo, `npm run build` to catch any typescript errors (TODO: pre-commit hook)
25 | 
26 | Follow our deployment guides for [Vercel](https://create.t3.gg/en/deployment/vercel), [Netlify](https://create.t3.gg/en/deployment/netlify) and [Docker](https://create.t3.gg/en/deployment/docker) for more information.
27 | 


--------------------------------------------------------------------------------
/frontend/next.config.mjs:
--------------------------------------------------------------------------------
 1 | import {withSentryConfig} from "@sentry/nextjs";
 2 | /**
 3 |  * Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation. This is especially useful
 4 |  * for Docker builds.
 5 |  */
 6 | await import("./src/env.mjs");
 7 | 
 8 | /** @type {import("next").NextConfig} */
 9 | const config = {
10 |   reactStrictMode: true,
11 | 
12 |   /**
13 |    * If you have `experimental: { appDir: true }` set, then you must comment the below `i18n` config
14 |    * out.
15 |    *
16 |    * @see https://github.com/vercel/next.js/issues/41980
17 |    */
18 |   i18n: {
19 |     locales: ["en"],
20 |     defaultLocale: "en",
21 |   },
22 | };
23 | export default withSentryConfig(config, {
24 | // For all available options, see:
25 | // https://github.com/getsentry/sentry-webpack-plugin#options
26 | 
27 | // Suppresses source map uploading logs during build
28 | silent: true,
29 | 
30 | org: "llama-test",
31 | project: "javascript-nextjs",
32 | }, {
33 | // For all available options, see:
34 | // https://docs.sentry.io/platforms/javascript/guides/nextjs/manual-setup/
35 | 
36 | // Upload a larger set of source maps for prettier stack traces (increases build time)
37 | widenClientFileUpload: true,
38 | 
39 | // Transpiles SDK to be compatible with IE11 (increases bundle size)
40 | transpileClientSDK: true,
41 | 
42 | // Routes browser requests to Sentry through a Next.js rewrite to circumvent ad-blockers (increases server load)
43 | tunnelRoute: "/monitoring",
44 | 
45 | // Hides source maps from generated client bundles
46 | hideSourceMaps: true,
47 | 
48 | // Automatically tree-shake Sentry logger statements to reduce bundle size
49 | disableLogger: true,
50 | });


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "llama-app-frontend",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "build": "next build",
 7 |     "dev": "next dev",
 8 |     "lint": "next lint",
 9 |     "start": "next start"
10 |   },
11 |   "dependencies": {
12 |     "@headlessui/react": "1.7.15",
13 |     "@heroicons/react": "2.0.18",
14 |     "@sentry/nextjs": "^7.57.0",
15 |     "@t3-oss/env-nextjs": "^0.3.1",
16 |     "@tailwindcss/forms": "0.5.3",
17 |     "@wojtekmaj/react-hooks": "1.17.2",
18 |     "classnames": "^2.3.2",
19 |     "downshift": "^7.6.0",
20 |     "fuse.js": "^6.6.2",
21 |     "lodash": "^4.17.21",
22 |     "lodash.debounce": "^4.0.8",
23 |     "md5": "2.3.0",
24 |     "next": "^13.4.2",
25 |     "react": "18.2.0",
26 |     "react-dom": "18.2.0",
27 |     "react-ga4": "^2.1.0",
28 |     "react-github-btn": "^1.4.0",
29 |     "react-icons": "^4.10.1",
30 |     "react-intersection-observer": "9.5.1",
31 |     "react-pdf": "6.2.2",
32 |     "react-select": "^5.7.3",
33 |     "react-use-intercom": "^5.1.4",
34 |     "react-window": "1.8.9",
35 |     "uuid": "^9.0.0",
36 |     "zod": "^3.21.4"
37 |   },
38 |   "devDependencies": {
39 |     "@tailwindcss/forms": "^0.5.3",
40 |     "@types/eslint": "^8.37.0",
41 |     "@types/lodash": "^4.14.195",
42 |     "@types/lodash.debounce": "^4.0.7",
43 |     "@types/md5": "^2.3.2",
44 |     "@types/node": "^18.16.0",
45 |     "@types/prettier": "^2.7.2",
46 |     "@types/react": "^18.2.6",
47 |     "@types/react-dom": "^18.2.4",
48 |     "@types/react-window": "^1.8.5",
49 |     "@types/uuid": "^9.0.2",
50 |     "@typescript-eslint/eslint-plugin": "^5.59.6",
51 |     "@typescript-eslint/parser": "^5.59.6",
52 |     "autoprefixer": "^10.4.14",
53 |     "eslint": "^8.43.0",
54 |     "eslint-config-next": "^13.4.2",
55 |     "eslint-config-prettier": "^8.8.0",
56 |     "postcss": "^8.4.21",
57 |     "prettier": "^2.8.8",
58 |     "prettier-plugin-tailwindcss": "^0.2.8",
59 |     "tailwindcss": "^3.3.0",
60 |     "typescript": "^5.0.4"
61 |   },
62 |   "ct3aMetadata": {
63 |     "initVersion": "7.13.1"
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/frontend/postcss.config.cjs:
--------------------------------------------------------------------------------
1 | const config = {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | };
7 | 
8 | module.exports = config;
9 | 


--------------------------------------------------------------------------------
/frontend/prettier.config.cjs:
--------------------------------------------------------------------------------
1 | /** @type {import("prettier").Config} */
2 | const config = {
3 |   plugins: [require.resolve("prettier-plugin-tailwindcss")],
4 | };
5 | 
6 | module.exports = config;
7 | 


--------------------------------------------------------------------------------
/frontend/public/Gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/Gradient.png


--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/favicon.ico


--------------------------------------------------------------------------------
/frontend/public/full-chat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/full-chat.png


--------------------------------------------------------------------------------
/frontend/public/lyft-2021-10k.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/lyft-2021-10k.pdf


--------------------------------------------------------------------------------
/frontend/public/uber-2021-10k.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/sec-insights/a9b6da0f5c4bff52437a5285954ff17bc713f14f/frontend/public/uber-2021-10k.pdf


--------------------------------------------------------------------------------
/frontend/sentry.client.config.ts:
--------------------------------------------------------------------------------
 1 | // This file configures the initialization of Sentry on the client.
 2 | // The config you add here will be used whenever a users loads a page in their browser.
 3 | // https://docs.sentry.io/platforms/javascript/guides/nextjs/
 4 | 
 5 | import * as Sentry from "@sentry/nextjs";
 6 | import { SENTRY_DSN } from "~/constants";
 7 | 
 8 | Sentry.init({
 9 |   dsn: SENTRY_DSN,
10 | 
11 |   // Adjust this value in production, or use tracesSampler for greater control
12 |   tracesSampleRate: 1,
13 | 
14 |   // Setting this option to true will print useful information to the console while you're setting up Sentry.
15 |   debug: false,
16 | 
17 |   replaysOnErrorSampleRate: 1.0,
18 | 
19 |   // This sets the sample rate to be 10%. You may want this to be 100% while
20 |   // in development and sample at a lower rate in production
21 |   replaysSessionSampleRate: 0.1,
22 | 
23 |   // You can remove this option if you're not planning to use the Sentry Session Replay feature:
24 |   integrations: [
25 |     new Sentry.Replay({
26 |       // Additional Replay configuration goes in here, for example:
27 |       maskAllText: true,
28 |       blockAllMedia: true,
29 |     }),
30 |   ],
31 | });
32 | 


--------------------------------------------------------------------------------
/frontend/sentry.edge.config.ts:
--------------------------------------------------------------------------------
 1 | // This file configures the initialization of Sentry for edge features (middleware, edge routes, and so on).
 2 | // The config you add here will be used whenever one of the edge features is loaded.
 3 | // Note that this config is unrelated to the Vercel Edge Runtime and is also required when running locally.
 4 | // https://docs.sentry.io/platforms/javascript/guides/nextjs/
 5 | 
 6 | import * as Sentry from "@sentry/nextjs";
 7 | import { SENTRY_DSN } from "~/constants";
 8 | 
 9 | Sentry.init({
10 |   dsn: SENTRY_DSN,
11 | 
12 |   // Adjust this value in production, or use tracesSampler for greater control
13 |   tracesSampleRate: 1,
14 | 
15 |   // Setting this option to true will print useful information to the console while you're setting up Sentry.
16 |   debug: false,
17 | });
18 | 


--------------------------------------------------------------------------------
/frontend/sentry.server.config.ts:
--------------------------------------------------------------------------------
 1 | // This file configures the initialization of Sentry on the server.
 2 | // The config you add here will be used whenever the server handles a request.
 3 | // https://docs.sentry.io/platforms/javascript/guides/nextjs/
 4 | 
 5 | import * as Sentry from "@sentry/nextjs";
 6 | import { SENTRY_DSN } from "~/constants";
 7 | 
 8 | Sentry.init({
 9 |   dsn: SENTRY_DSN,
10 | 
11 |   // Adjust this value in production, or use tracesSampler for greater control
12 |   tracesSampleRate: 1,
13 | 
14 |   // Setting this option to true will print useful information to the console while you're setting up Sentry.
15 |   debug: false,
16 | });
17 | 


--------------------------------------------------------------------------------
/frontend/src/api/backend.tsx:
--------------------------------------------------------------------------------
 1 | import { backendUrl } from "~/config";
 2 | import type { Message } from "~/types/conversation";
 3 | import type { BackendDocument } from "~/types/backend/document";
 4 | import { SecDocument } from "~/types/document";
 5 | import { fromBackendDocumentToFrontend } from "./utils/documents";
 6 | 
 7 | interface CreateConversationPayload {
 8 |   id: string;
 9 | }
10 | 
11 | interface GetConversationPayload {
12 |   id: string;
13 |   messages: Message[];
14 |   documents: BackendDocument[];
15 | }
16 | 
17 | interface GetConversationReturnType {
18 |   messages: Message[];
19 |   documents: SecDocument[];
20 | }
21 | 
22 | class BackendClient {
23 |   private async get(endpoint: string) {
24 |     const url = backendUrl + endpoint;
25 |     const res = await fetch(url);
26 | 
27 |     if (!res.ok) {
28 |       throw new Error(`HTTP error! status: ${res.status}`);
29 |     }
30 |     return res;
31 |   }
32 | 
33 |   private async post(endpoint: string, body?: any) {
34 |     const url = backendUrl + endpoint;
35 |     const res = await fetch(url, {
36 |       method: "POST",
37 |       headers: { "Content-Type": "application/json" },
38 |       body: JSON.stringify(body),
39 |     });
40 | 
41 |     if (!res.ok) {
42 |       throw new Error(`HTTP error! status: ${res.status}`);
43 |     }
44 |     return res;
45 |   }
46 | 
47 |   public async createConversation(documentIds: string[]): Promise<string> {
48 |     const endpoint = "api/conversation/";
49 |     const payload = { document_ids: documentIds };
50 |     const res = await this.post(endpoint, payload);
51 |     const data = (await res.json()) as CreateConversationPayload;
52 | 
53 |     return data.id;
54 |   }
55 | 
56 |   public async fetchConversation(
57 |     id: string
58 |   ): Promise<GetConversationReturnType> {
59 |     const endpoint = `api/conversation/${id}`;
60 |     const res = await this.get(endpoint);
61 |     const data = (await res.json()) as GetConversationPayload;
62 | 
63 |     return {
64 |       messages: data.messages,
65 |       documents: fromBackendDocumentToFrontend(data.documents),
66 |     };
67 |   }
68 | 
69 |   public async fetchDocuments(): Promise<SecDocument[]> {
70 |     const endpoint = `api/document/`;
71 |     const res = await this.get(endpoint);
72 |     const data = (await res.json()) as BackendDocument[];
73 |     const docs = fromBackendDocumentToFrontend(data);
74 |     return docs;
75 |   }
76 | }
77 | 
78 | export const backendClient = new BackendClient();
79 | 


--------------------------------------------------------------------------------
/frontend/src/api/utils/documents.tsx:
--------------------------------------------------------------------------------
 1 | import { MAX_NUMBER_OF_SELECTED_DOCUMENTS } from "~/hooks/useDocumentSelector";
 2 | import { BackendDocument, BackendDocumentType } from "~/types/backend/document";
 3 | import { SecDocument, DocumentType } from "~/types/document";
 4 | import { documentColors } from "~/utils/colors";
 5 | import _ from "lodash";
 6 | 
 7 | export const fromBackendDocumentToFrontend = (
 8 |   backendDocuments: BackendDocument[]
 9 | ) => {
10 |   // sort by created_at so that de-dupe filter later keeps oldest duplicate docs
11 |   backendDocuments = _.sortBy(backendDocuments, 'created_at');
12 |   let frontendDocs: SecDocument[] = backendDocuments
13 |   .filter((backendDoc) => 'sec_document' in backendDoc.metadata_map)
14 |   .map((backendDoc, index) => {
15 |     const backendDocType = backendDoc.metadata_map.sec_document.doc_type;
16 |     const frontendDocType =
17 |       backendDocType === BackendDocumentType.TenK
18 |         ? DocumentType.TenK
19 |         : DocumentType.TenQ;
20 | 
21 |     // we have 10 colors for 10 documents
22 |     const colorIndex = index < 10 ? index : 0;
23 |     return {
24 |       id: backendDoc.id,
25 |       url: backendDoc.url,
26 |       ticker: backendDoc.metadata_map.sec_document.company_ticker,
27 |       fullName: backendDoc.metadata_map.sec_document.company_name,
28 |       year: String(backendDoc.metadata_map.sec_document.year),
29 |       docType: frontendDocType,
30 |       color: documentColors[colorIndex],
31 |       quarter: backendDoc.metadata_map.sec_document.quarter || "",
32 |     } as SecDocument;
33 |   });
34 |   // de-dupe hotfix
35 |   const getDocDeDupeKey = (doc: SecDocument) => `${doc.ticker}-${doc.year}-${doc.quarter || ''}`;
36 |   frontendDocs = _.chain(frontendDocs).sortBy(getDocDeDupeKey).sortedUniqBy(getDocDeDupeKey).value();
37 | 
38 |   return frontendDocs;
39 | };
40 | 


--------------------------------------------------------------------------------
/frontend/src/components/Layout.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import type { PropsWithChildren } from "react";
3 | const Layout = ({ children }: PropsWithChildren) => {
4 |   return <>{children}</>;
5 | };
6 | export default Layout;
7 | 


--------------------------------------------------------------------------------
/frontend/src/components/basics/Loading.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | 
3 | export const LoadingSpinner: React.FC = () => {
4 |   return (
5 |     <div className="loader h-3 w-3 rounded-full border-2 border-gray-200 ease-linear"></div>
6 |   );
7 | };
8 | 


--------------------------------------------------------------------------------
/frontend/src/components/basics/Modal.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | import ModalPortal from "./ModalPortal";
 3 | import { AiOutlineClose } from "react-icons/ai";
 4 | interface ModalProps {
 5 |   isOpen: boolean;
 6 |   toggleModal: () => void;
 7 |   title: string;
 8 |   children: React.ReactNode;
 9 | }
10 | 
11 | const Modal: React.FC<ModalProps> = ({
12 |   isOpen,
13 |   toggleModal,
14 |   title,
15 |   children,
16 | }) => {
17 |   if (!isOpen) return null;
18 | 
19 |   return (
20 |     <ModalPortal>
21 |       <div className="fixed left-0 top-0 flex h-full w-full items-center justify-center">
22 |         <div
23 |           onClick={toggleModal}
24 |           className="absolute h-full w-full bg-black opacity-50"
25 |         ></div>
26 |         <div className="relative z-10 max-w-[500px] rounded bg-white p-6 shadow-xl ">
27 |           <h2 className="mb-2 text-xl font-bold">{title}</h2>
28 |           {children}
29 |           <button
30 |             onClick={toggleModal}
31 |             className="b absolute right-2 top-2 inline-flex h-7 w-7 items-center justify-center rounded-full p-1 text-base font-medium text-gray-90 hover:bg-gray-15 "
32 |           >
33 |             <AiOutlineClose size={24} />
34 |           </button>
35 |         </div>
36 |       </div>
37 |     </ModalPortal>
38 |   );
39 | };
40 | 
41 | export default Modal;
42 | 


--------------------------------------------------------------------------------
/frontend/src/components/basics/ModalPortal.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | import ReactDOM from "react-dom";
 3 | 
 4 | interface ModalPortalProps {
 5 |   children: React.ReactNode;
 6 | }
 7 | 
 8 | const ModalPortal = ({ children }: ModalPortalProps) => {
 9 |   const domNode = document.getElementById("modal-root");
10 |   return domNode ? ReactDOM.createPortal(children, domNode) : null;
11 | };
12 | 
13 | export default ModalPortal;
14 | 


--------------------------------------------------------------------------------
/frontend/src/components/landing-page/AnimateSvg.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useEffect, useState } from "react";
 2 | 
 3 | interface ScrollSVGProps {
 4 |   breakpoint: number;
 5 |   increment: number;
 6 |   svgs: JSX.Element[];
 7 | }
 8 | 
 9 | export const AnimateSvg: React.FC<ScrollSVGProps> = ({
10 |   breakpoint,
11 |   increment,
12 |   svgs,
13 | }) => {
14 |   const [scrollPosition, setScrollPosition] = useState(0);
15 | 
16 |   // Listen to scroll event
17 |   useEffect(() => {
18 |     const handleScroll = () => {
19 |       const currentScrollPos = window.pageYOffset;
20 |       if (currentScrollPos > breakpoint) {
21 |         setScrollPosition(
22 |           Math.floor((currentScrollPos - breakpoint) / increment)
23 |         );
24 |       }
25 |     };
26 | 
27 |     window.addEventListener("scroll", handleScroll);
28 | 
29 |     // Clean up event listener
30 |     return () => {
31 |       window.removeEventListener("scroll", handleScroll);
32 |     };
33 |   }, [breakpoint, increment]);
34 | 
35 |   // Function to render SVGs
36 |   const renderSVG = () => {
37 |     // If we've scrolled past all SVGs, keep showing the last one
38 |     if (scrollPosition >= svgs.length) {
39 |       return svgs[svgs.length - 1];
40 |     }
41 | 
42 |     // Otherwise, show the SVG for the current scroll position
43 |     return svgs[scrollPosition];
44 |   };
45 | 
46 |   return <div>{renderSVG()}</div>;
47 | };
48 | 
49 | export default AnimateSvg;
50 | 


--------------------------------------------------------------------------------
/frontend/src/components/landing-page/SelectTicker.tsx:
--------------------------------------------------------------------------------
  1 | import React, { Dispatch, SetStateAction, useEffect, useState } from "react";
  2 | 
  3 | import type { Ticker } from "~/types/document";
  4 | import { useCombobox } from "downshift";
  5 | import cx from "classnames";
  6 | import { HiOutlineBuildingOffice2 } from "react-icons/hi2";
  7 | import useFocus from "~/hooks/utils/useFocus";
  8 | 
  9 | function getTickerFilter(inputValue: string) {
 10 |   const lowerCasedInputValue = inputValue.toLowerCase();
 11 | 
 12 |   return function tickerFilter(ticker: Ticker) {
 13 |     return (
 14 |       !inputValue ||
 15 |       ticker.fullName.toLowerCase().includes(lowerCasedInputValue) ||
 16 |       ticker.ticker.toLowerCase().includes(lowerCasedInputValue)
 17 |     );
 18 |   };
 19 | }
 20 | 
 21 | interface DocumentSelectComboboxProps {
 22 |   selectedItem: Ticker | null;
 23 |   setSelectedItem: (ticker: Ticker) => void;
 24 |   availableDocuments: Ticker[];
 25 |   shouldFocusTicker: boolean;
 26 |   setFocusState: Dispatch<SetStateAction<boolean>>;
 27 | }
 28 | 
 29 | export const DocumentSelectCombobox: React.FC<DocumentSelectComboboxProps> = ({
 30 |   selectedItem,
 31 |   availableDocuments,
 32 |   setSelectedItem,
 33 |   shouldFocusTicker,
 34 |   setFocusState,
 35 | }) => {
 36 |   const [focusRef, setFocus] = useFocus<HTMLInputElement>();
 37 | 
 38 |   useEffect(() => {
 39 |     if (shouldFocusTicker) {
 40 |       setInputValue("");
 41 |       setFocus();
 42 |       setFocusState(false);
 43 |     }
 44 |   }, [shouldFocusTicker]);
 45 | 
 46 |   const [filteredDocuments, setFilteredDocuments] =
 47 |     useState<Ticker[]>(availableDocuments);
 48 | 
 49 |   useEffect(() => {
 50 |     setFilteredDocuments(availableDocuments);
 51 |   }, [availableDocuments]);
 52 | 
 53 |   const {
 54 |     isOpen,
 55 |     getMenuProps,
 56 |     getInputProps,
 57 |     highlightedIndex,
 58 |     getItemProps,
 59 |     setInputValue,
 60 |   } = useCombobox({
 61 |     onInputValueChange({ inputValue }) {
 62 |       if (inputValue) {
 63 |         setFilteredDocuments(
 64 |           availableDocuments.filter(getTickerFilter(inputValue))
 65 |         );
 66 |       } else {
 67 |         setFilteredDocuments(availableDocuments);
 68 |       }
 69 |     },
 70 |     items: filteredDocuments,
 71 |     itemToString(item) {
 72 |       return item ? item.ticker : "";
 73 |     },
 74 |     selectedItem,
 75 |     onSelectedItemChange: ({ selectedItem: newSelectedItem }) => {
 76 |       if (newSelectedItem) {
 77 |         setSelectedItem(newSelectedItem);
 78 |       }
 79 |     },
 80 |   });
 81 |   return (
 82 |     <div className="flex-grow">
 83 |       <div className="flex flex-col gap-1 rounded-s bg-[#F7F7F7]">
 84 |         <div className="flex items-center justify-center gap-0.5 shadow-sm">
 85 |           <div className="ml-2">
 86 |             <HiOutlineBuildingOffice2 size={20} />
 87 |           </div>
 88 |           <input
 89 |             placeholder="Search by company ticker or name"
 90 |             className="align-center mt-[5px] w-full p-1.5 focus:outline-none "
 91 |             {...getInputProps({ ref: focusRef })}
 92 |             style={{ backgroundColor: "#F7F7F7" }}
 93 |           />
 94 |         </div>
 95 |       </div>
 96 |       <ul
 97 |         className={`absolute z-20 mt-1 max-h-72 w-72 overflow-scroll bg-white p-0 shadow-md ${
 98 |           // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
 99 |           !(isOpen && filteredDocuments.length) && "hidden"
100 |         }`}
101 |         {...getMenuProps()}
102 |       >
103 |         {isOpen &&
104 |           filteredDocuments.map((item, index) => (
105 |             <li
106 |               className={cx(
107 |                 highlightedIndex === index && "bg-[#818BE7] text-white",
108 |                 selectedItem === item && "font-bold",
109 |                 "z-20 flex flex-col px-3 py-2 shadow-sm"
110 |               )}
111 |               key={`${item.fullName}${index}`}
112 |               {...getItemProps({ item, index })}
113 |             >
114 |               <span>{item.fullName}</span>
115 |               <span className="text-sm ">{item.ticker}</span>
116 |             </li>
117 |           ))}
118 |       </ul>
119 |     </div>
120 |   );
121 | };
122 | 


--------------------------------------------------------------------------------
/frontend/src/components/modals/ShareLinkModal.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useRef, useEffect } from "react";
 2 | import Modal from "../basics/Modal";
 3 | 
 4 | interface ShareLinkModalProps {
 5 |   isOpen: boolean;
 6 |   toggleModal: () => void;
 7 | }
 8 | 
 9 | const ShareLinkModal: React.FC<ShareLinkModalProps> = ({
10 |   isOpen,
11 |   toggleModal,
12 | }) => {
13 |   const inputRef = useRef<HTMLInputElement | null>(null);
14 | 
15 |   const copyToClipboard = (e: React.MouseEvent) => {
16 |     e.preventDefault();
17 |     inputRef.current?.select();
18 |     document.execCommand("copy");
19 |   };
20 | 
21 |   useEffect(() => {
22 |     if (isOpen) {
23 |       inputRef.current?.select();
24 |     }
25 |   }, [isOpen]);
26 | 
27 |   return (
28 |     <Modal isOpen={isOpen} toggleModal={toggleModal} title="Share Conversation">
29 |       <p className="mb-6 mt-2 text-sm text-gray-500">
30 |         Note: this is a public page. Anyone with this link can view the
31 |         contents of the page. This statement is for informational purposes only
32 |         and does not serve as professional financial advice.
33 |       </p>
34 | 
35 |       <div className="flex items-center space-x-2">
36 |         <input
37 |           ref={inputRef}
38 |           className="text-grey-darkest w-full border px-3 py-2"
39 |           type="text"
40 |           value={typeof window !== "undefined" ? window.location.href : ""}
41 |           readOnly
42 |         />
43 |         <button
44 |           onClick={copyToClipboard}
45 |           className="rounded bg-llama-indigo px-4 py-2 font-bold text-white opacity-90 hover:opacity-100"
46 |         >
47 |           Copy
48 |         </button>
49 |       </div>
50 |     </Modal>
51 |   );
52 | };
53 | 
54 | export default ShareLinkModal;
55 | 


--------------------------------------------------------------------------------
/frontend/src/components/pdf-viewer/DisplayMultiplePdfs.tsx:
--------------------------------------------------------------------------------
 1 | import { ViewPdf } from "~/components/pdf-viewer/ViewPdf";
 2 | import { useMultiplePdfs } from "../../hooks/useMultiplePdfs";
 3 | import { SecDocument } from "~/types/document";
 4 | import cx from "classnames";
 5 | import { borderColors } from "~/utils/colors";
 6 | 
 7 | interface DisplayMultiplePdfsProps {
 8 |   pdfs: SecDocument[];
 9 | }
10 | 
11 | export const DisplayMultiplePdfs: React.FC<DisplayMultiplePdfsProps> = ({
12 |   pdfs,
13 | }) => {
14 |   const { isActivePdf, handlePdfFocus } = useMultiplePdfs(pdfs);
15 | 
16 |   return (
17 |     <>
18 |       <div className="flex h-full items-start justify-center ">
19 |         {pdfs.map((file) => {
20 |           return (
21 |             <div
22 |               key={`viewing-${file.url}`}
23 |               className={cx({ hidden: !isActivePdf(file) })}
24 |             >
25 |               <ViewPdf file={file} />
26 |             </div>
27 |           );
28 |         })}
29 | 
30 |         <div className="flex h-full w-[80px] flex-col">
31 |           <div className="flex h-[43px] w-[80px] items-center justify-center border-b border-l font-bold text-gray-90 "></div>
32 |           {pdfs.map((file, index) => (
33 |             <div key={index}>
34 |               <button
35 |                 onClick={() => handlePdfFocus(file)}
36 |                 className={`group flex h-[80px] w-[80px] items-end  justify-start border px-2 py-1 font-nunito text-sm font-bold ${
37 |                   isActivePdf(file)
38 |                     ? "border-l-0 bg-gray-pdf"
39 |                     : "bg-white font-light text-gray-60 "
40 |                 }`}
41 |               >
42 |                 <div
43 |                   className={`flex flex-col items-start justify-start ${
44 |                     borderColors[file.color]
45 |                   } ${
46 |                     !isActivePdf(file)
47 |                       ? "group-hover:border-l-4 group-hover:pl-1 group-hover:font-bold group-hover:text-gray-90"
48 |                       : ""
49 |                   }`}
50 |                 >
51 |                   <div>{file.ticker}</div>
52 |                   <div className="text-left">
53 |                     {file.year} {file.quarter && `Q${file.quarter}`}
54 |                   </div>
55 |                 </div>
56 |               </button>
57 |             </div>
58 |           ))}
59 |           <div className="h-max w-[80px] flex-grow overflow-hidden border-l"></div>
60 |         </div>
61 |       </div>
62 |     </>
63 |   );
64 | };
65 | 
66 | export default DisplayMultiplePdfs;
67 | 


--------------------------------------------------------------------------------
/frontend/src/components/pdf-viewer/PdfOptionsBar.tsx:
--------------------------------------------------------------------------------
  1 | // PDFOptionsBar.tsx
  2 | import { useEffect, useState } from "react";
  3 | import {
  4 |   HiMiniMagnifyingGlassMinus,
  5 |   HiMiniMagnifyingGlassPlus,
  6 | } from "react-icons/hi2";
  7 | import { PiCaretDownBold, PiCaretUpBold } from "react-icons/pi";
  8 | import { zoomLevels } from "~/hooks/usePdfViewer";
  9 | import { SecDocument } from "~/types/document";
 10 | import { borderColors } from "~/utils/colors";
 11 | 
 12 | interface PDFOptionsBarProps {
 13 |   file: SecDocument;
 14 |   scrolledIndex: number;
 15 |   numPages: number;
 16 |   scaleText: string;
 17 |   nextPage: () => void;
 18 |   prevPage: () => void;
 19 |   handleZoomIn: () => void;
 20 |   handleZoomOut: () => void;
 21 |   goToPage: (n: number) => void;
 22 |   setZoomLevel: (percent: string) => void;
 23 |   zoomInEnabled: boolean;
 24 |   zoomOutEnabled: boolean;
 25 | }
 26 | 
 27 | export const PDFOptionsBar: React.FC<PDFOptionsBarProps> = ({
 28 |   file,
 29 |   scrolledIndex,
 30 |   numPages,
 31 |   scaleText,
 32 |   nextPage,
 33 |   prevPage,
 34 |   handleZoomIn,
 35 |   handleZoomOut,
 36 |   goToPage,
 37 |   setZoomLevel,
 38 |   zoomInEnabled,
 39 |   zoomOutEnabled,
 40 | }) => {
 41 |   const [zoomPopoverOpen, setZoomPopoverOpen] = useState(false);
 42 | 
 43 |   const handleZoomSelection = (zoom: string) => {
 44 |     setZoomLevel(zoom);
 45 |     setZoomPopoverOpen(false);
 46 |   };
 47 | 
 48 |   const [inputValue, setInputValue] = useState(`${scrolledIndex + 1}`);
 49 | 
 50 |   useEffect(() => {
 51 |     setInputValue(`${scrolledIndex + 1}`);
 52 |   }, [scrolledIndex]);
 53 | 
 54 |   const handleChange = (e: React.ChangeEvent<HTMLInputElement>) => {
 55 |     setInputValue(e.target.value);
 56 |   };
 57 | 
 58 |   const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
 59 |     if (e.key === "Enter") {
 60 |       const value = parseInt(inputValue, 10);
 61 |       if (!isNaN(value) && value > 0) {
 62 |         scrollToPage(value - 1);
 63 |       }
 64 |     }
 65 |   };
 66 | 
 67 |   const scrollToPage = (page: number) => {
 68 |     goToPage(page);
 69 |   };
 70 | 
 71 |   return (
 72 |     <div
 73 |       className={`flex h-[44px] w-full items-center justify-between border-b-2 `}
 74 |     >
 75 |       <div className="ml-3 flex w-1/2 items-center justify-start ">
 76 |         <div
 77 |           className={`flex items-center justify-center border-l-4 pl-2 ${
 78 |             borderColors[file.color]
 79 |           } `}
 80 |         >
 81 |           <div className="text font-bold">{file.ticker}</div>
 82 |           <div className="ml-2">
 83 |             {" "}
 84 |             {file.year} {file.quarter && `Q${file.quarter}`}
 85 |           </div>
 86 |         </div>
 87 |       </div>
 88 |       <div className="flex flex-grow items-center justify-center border-l border-l-gray-30">
 89 |         <div className="flex h-[30px] w-[350px] items-center justify-between">
 90 |           <div className="ml-4 flex w-[140px] text-gray-90">
 91 |             <button
 92 |               className="p-1 enabled:hover:rounded enabled:hover:bg-gray-15 disabled:text-gray-30 "
 93 |               onClick={prevPage}
 94 |               disabled={scrolledIndex === 0}
 95 |             >
 96 |               <PiCaretUpBold />
 97 |             </button>
 98 |             <div className="flex items-center justify-center">
 99 |               <input
100 |                 className="ml-1 h-[25px] w-[36px] rounded border py-2  pl-1 text-left focus:outline-none"
101 |                 value={inputValue}
102 |                 onChange={handleChange}
103 |                 onKeyDown={handleKeyDown}
104 |               />
105 |             </div>
106 |             <div className="ml-1 mt-[.5px]"> / {numPages}</div>
107 |             <button
108 |               className="ml-1 p-1 enabled:hover:rounded enabled:hover:bg-gray-15 disabled:text-gray-30 "
109 |               onClick={nextPage}
110 |               disabled={scrolledIndex === numPages - 1}
111 |             >
112 |               <PiCaretDownBold />
113 |             </button>
114 |           </div>
115 |           <div className="mx-2 h-5/6 rounded border-l border-gray-30"></div>{" "}
116 |           <div className="relative">
117 |             <div className="mr-5 flex items-center justify-between ">
118 |               <button
119 |                 className="mr-2 p-1 text-gray-90 enabled:hover:rounded enabled:hover:bg-gray-15 disabled:text-gray-60"
120 |                 onClick={handleZoomOut}
121 |                 disabled={!zoomOutEnabled}
122 |               >
123 |                 <HiMiniMagnifyingGlassMinus size={22} />
124 |               </button>
125 |               <div
126 |                 className="w-[70px] cursor-pointer rounded px-1 px-2 hover:bg-gray-15 "
127 |                 onClick={() => setZoomPopoverOpen(!zoomPopoverOpen)}
128 |               >
129 |                 <div className="flex items-center justify-center">
130 |                   {scaleText}
131 |                   {!zoomPopoverOpen ? (
132 |                     <PiCaretDownBold size={16} />
133 |                   ) : (
134 |                     <PiCaretDownBold size={16} className="rotate-180" />
135 |                   )}
136 |                 </div>
137 |               </div>
138 |               {zoomPopoverOpen && (
139 |                 <div className="absolute right-[55px] top-[30px] z-20 mb-2 rounded border bg-white py-1 text-black shadow">
140 |                   {zoomLevels.map((zoom, index) => (
141 |                     <button
142 |                       key={index}
143 |                       className="block w-full px-4 py-1 text-left text-sm hover:bg-gray-200"
144 |                       onClick={() => handleZoomSelection(zoom)}
145 |                     >
146 |                       {zoom}
147 |                     </button>
148 |                   ))}
149 |                 </div>
150 |               )}
151 |               <button
152 |                 className="ml-2 p-1 text-gray-90 enabled:hover:rounded enabled:hover:bg-gray-15 disabled:text-gray-60 "
153 |                 onClick={handleZoomIn}
154 |                 disabled={!zoomInEnabled}
155 |               >
156 |                 <HiMiniMagnifyingGlassPlus size={22} />
157 |               </button>
158 |             </div>
159 |           </div>
160 |         </div>
161 |       </div>
162 |     </div>
163 |   );
164 | };
165 | 


--------------------------------------------------------------------------------
/frontend/src/components/pdf-viewer/ViewPdf.tsx:
--------------------------------------------------------------------------------
 1 | // ViewPdf.tsx
 2 | import usePDFViewer from "~/hooks/usePdfViewer";
 3 | import { PDFOptionsBar } from "./PdfOptionsBar";
 4 | import React from "react";
 5 | import MemoizedVirtualizedPDF from "./VirtualizedPdf";
 6 | import { SecDocument } from "~/types/document";
 7 | 
 8 | interface ViewPdfProps {
 9 |   file: SecDocument;
10 | }
11 | 
12 | export const ViewPdf: React.FC<ViewPdfProps> = ({ file }) => {
13 |   const {
14 |     scrolledIndex,
15 |     setCurrentPageNumber,
16 |     scale,
17 |     setScaleFit,
18 |     numPages,
19 |     setNumPages,
20 |     handleZoomIn,
21 |     handleZoomOut,
22 |     nextPage,
23 |     prevPage,
24 |     scaleText,
25 |     pdfFocusRef,
26 |     goToPage,
27 |     setZoomLevel,
28 |     zoomInEnabled,
29 |     zoomOutEnabled,
30 |   } = usePDFViewer(file);
31 | 
32 |   return (
33 |     <div className="relative">
34 |       {scaleText && (
35 |         <PDFOptionsBar
36 |           file={file}
37 |           scrolledIndex={scrolledIndex}
38 |           numPages={numPages}
39 |           scaleText={scaleText}
40 |           nextPage={nextPage}
41 |           prevPage={prevPage}
42 |           handleZoomIn={handleZoomIn}
43 |           handleZoomOut={handleZoomOut}
44 |           goToPage={goToPage}
45 |           setZoomLevel={setZoomLevel}
46 |           zoomInEnabled={zoomInEnabled}
47 |           zoomOutEnabled={zoomOutEnabled}
48 |         />
49 |       )}
50 | 
51 |       <MemoizedVirtualizedPDF
52 |         key={`${file.id}`}
53 |         ref={pdfFocusRef}
54 |         file={file}
55 |         setIndex={setCurrentPageNumber}
56 |         scale={scale}
57 |         setScaleFit={setScaleFit}
58 |         setNumPages={setNumPages}
59 |       />
60 |     </div>
61 |   );
62 | };
63 | 


--------------------------------------------------------------------------------
/frontend/src/components/pdf-viewer/pdfDisplayConstants.tsx:
--------------------------------------------------------------------------------
 1 | export const VERTICAL_GUTTER_SIZE_PX = 20;
 2 | export const HORIZONTAL_GUTTER_SIZE_PX = 20;
 3 | export const PAGE_HEIGHT = 792;
 4 | export const PDF_WIDTH_PERCENTAGE = 56;
 5 | export const PDF_HEIGHT_PERCENTAGE = 94;
 6 | export const OBSERVER_THRESHOLD_PERCENTAGE = 0.4;
 7 | 
 8 | export const PDF_HEADER_SIZE_PX = 44;
 9 | export const PDF_SIDEBAR_SIZE_PX = 80;
10 | 


--------------------------------------------------------------------------------
/frontend/src/config.js:
--------------------------------------------------------------------------------
 1 | import { env } from "~/env.mjs";
 2 | 
 3 | if (env.NEXT_PUBLIC_CODESPACES === 'true' && env.NEXT_PUBLIC_CODESPACE_NAME) {
 4 |     const suggestedUrl = `https://${env.NEXT_PUBLIC_CODESPACE_NAME}-8000.app.github.dev/`;
 5 |     if (!env.NEXT_PUBLIC_BACKEND_URL.startsWith(suggestedUrl)) {
 6 |         console.warn(`It looks like you're running on a Github codespace. You may want to set the NEXT_PUBLIC_BACKEND_URL environment variable to ${suggestedUrl}`);
 7 |     }
 8 | }
 9 | 
10 | export const backendUrl = env.NEXT_PUBLIC_BACKEND_URL;
11 | 
12 | 


--------------------------------------------------------------------------------
/frontend/src/constants.tsx:
--------------------------------------------------------------------------------
1 | export const GOOGLE_ANALYTICS_ID = "G-LGHB46ZGWR";
2 | export const INTERCOM_ID = "rx71g1uo";
3 | // TODO: Populate with your own Sentry DSN:
4 | // https://docs.sentry.io/product/sentry-basics/concepts/dsn-explainer/
5 | export const SENTRY_DSN: string | undefined = undefined;
6 | 


--------------------------------------------------------------------------------
/frontend/src/context/pdf.tsx:
--------------------------------------------------------------------------------
 1 | import React, { createContext, useState, useContext } from "react";
 2 | import type { Citation } from "~/types/conversation";
 3 | 
 4 | interface PdfFocusState {
 5 |   documentId: string;
 6 |   pageNumber: number;
 7 |   citation?: Citation;
 8 | }
 9 | 
10 | interface PdfFocusContextProps {
11 |   pdfFocusState: PdfFocusState;
12 |   setPdfFocusState: React.Dispatch<React.SetStateAction<PdfFocusState>>;
13 | }
14 | 
15 | // Initialize Context
16 | const PdfFocusContext = createContext<PdfFocusContextProps | undefined>(
17 |   undefined
18 | );
19 | 
20 | interface PdfFocusProviderProps {
21 |   children: React.ReactNode;
22 | }
23 | // PDF Provider
24 | export const PdfFocusProvider: React.FC<PdfFocusProviderProps> = ({
25 |   children,
26 | }) => {
27 |   const [pdfFocusState, setPdfFocusState] = useState<PdfFocusState>({
28 |     documentId: "",
29 |     pageNumber: 0,
30 |   });
31 | 
32 |   return (
33 |     <PdfFocusContext.Provider
34 |       value={{
35 |         pdfFocusState: pdfFocusState,
36 |         setPdfFocusState: setPdfFocusState,
37 |       }}
38 |     >
39 |       {children}
40 |     </PdfFocusContext.Provider>
41 |   );
42 | };
43 | 
44 | // Custom Hook to use PDF Context
45 | export const usePdfFocus = (): PdfFocusContextProps => {
46 |   const context = useContext(PdfFocusContext);
47 |   if (context === undefined) {
48 |     throw new Error("usePDF must be used within a PDFProvider");
49 |   }
50 |   return context;
51 | };
52 | 


--------------------------------------------------------------------------------
/frontend/src/env.mjs:
--------------------------------------------------------------------------------
 1 | import { createEnv } from "@t3-oss/env-nextjs";
 2 | import { z } from "zod";
 3 | 
 4 | export const env = createEnv({
 5 |   /**
 6 |    * Specify your server-side environment variables schema here. This way you can ensure the app
 7 |    * isn't built with invalid env vars.
 8 |    */
 9 |   server: {
10 |     NODE_ENV: z.enum(["development", "test", "production"]),
11 |   },
12 | 
13 |   /**
14 |    * Specify your client-side environment variables schema here. This way you can ensure the app
15 |    * isn't built with invalid env vars. To expose them to the client, prefix them with
16 |    * `NEXT_PUBLIC_`.
17 |    */
18 |   client: {
19 |     NEXT_PUBLIC_BACKEND_URL: z.string().min(1),
20 |     NEXT_PUBLIC_CODESPACES: z.string().default("false").optional(),
21 |     NEXT_PUBLIC_CODESPACE_NAME: z.string().optional(),
22 |   },
23 | 
24 |   /**
25 |    * You can't destruct `process.env` as a regular object in the Next.js edge runtimes (e.g.
26 |    * middlewares) or client-side so we need to destruct manually.
27 |    */
28 |   runtimeEnv: {
29 |     NODE_ENV: process.env.NODE_ENV,
30 |     NEXT_PUBLIC_BACKEND_URL: process.env.NEXT_PUBLIC_BACKEND_URL,
31 |     NEXT_PUBLIC_CODESPACES: process.env.CODESPACES,
32 |     NEXT_PUBLIC_CODESPACE_NAME: process.env.CODESPACE_NAME,
33 |   },
34 |   /**
35 |    * Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation.
36 |    * This is especially useful for Docker builds.
37 |    */
38 |   skipValidation: !!process.env.SKIP_ENV_VALIDATION,
39 | });
40 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/useDocumentSelector.tsx:
--------------------------------------------------------------------------------
  1 | import { useState, useEffect, useRef } from "react";
  2 | import { GroupBase } from "react-select";
  3 | import Select from "react-select/dist/declarations/src/Select";
  4 | import { SecDocument, DocumentType, Ticker } from "~/types/document";
  5 | import type { SelectOption } from "~/types/selection";
  6 | import {
  7 |   findDocumentById,
  8 |   getAllTickers,
  9 |   sortDocuments,
 10 |   sortSelectOptions,
 11 | } from "~/utils/documents";
 12 | import {
 13 |   documentTypeOptions,
 14 |   getAvailableYears,
 15 | } from "~/utils/landing-page-selection";
 16 | import useLocalStorage from "./utils/useLocalStorage";
 17 | import { backendClient } from "~/api/backend";
 18 | 
 19 | export const MAX_NUMBER_OF_SELECTED_DOCUMENTS = 10;
 20 | 
 21 | export const useDocumentSelector = () => {
 22 |   const [availableDocuments, setAvailableDocuments] = useState<SecDocument[]>(
 23 |     []
 24 |   );
 25 |   const [availableTickers, setAvailableTickers] = useState<Ticker[]>([]);
 26 |   const availableDocumentTypes = documentTypeOptions;
 27 |   const [availableYears, setAvailableYears] = useState<SelectOption[] | null>(
 28 |     null
 29 |   );
 30 | 
 31 |   const sortedAvailableYears = sortSelectOptions(availableYears);
 32 | 
 33 |   useEffect(() => {
 34 |     setAvailableTickers(getAllTickers(availableDocuments));
 35 |   }, [availableDocuments]);
 36 | 
 37 |   useEffect(() => {
 38 |     async function getDocuments() {
 39 |       const docs = await backendClient.fetchDocuments();
 40 |       setAvailableDocuments(docs);
 41 |     }
 42 |     getDocuments().catch(() => console.error("could not fetch documents"));
 43 |   }, []);
 44 | 
 45 |   const [selectedDocuments, setSelectedDocuments] = useLocalStorage<
 46 |     SecDocument[]
 47 |   >("selectedDocuments", []);
 48 |   const sortedSelectedDocuments = sortDocuments(selectedDocuments);
 49 | 
 50 |   const [selectedTicker, setSelectedTicker] = useState<Ticker | null>(null);
 51 |   const [selectedDocumentType, setSelectedDocumentType] =
 52 |     useState<SelectOption | null>(null);
 53 |   const [selectedYear, setSelectedYear] = useState<SelectOption | null>(null);
 54 | 
 55 |   const handleAddDocument = () => {
 56 |     if (selectedTicker && selectedDocumentType && selectedYear) {
 57 |       setSelectedDocuments((prevDocs = []) => {
 58 |         if (prevDocs.find((doc) => doc.id === selectedYear.value)) {
 59 |           return prevDocs;
 60 |         }
 61 |         const newDoc = findDocumentById(selectedYear.value, availableDocuments);
 62 |         return newDoc ? [newDoc, ...prevDocs] : prevDocs;
 63 |       });
 64 |       setSelectedTicker(null);
 65 |       setSelectedDocumentType(null);
 66 |       setSelectedYear(null);
 67 |       setShouldFocusCompanySelect(true);
 68 |     }
 69 |   };
 70 | 
 71 |   const handleRemoveDocument = (documentIndex: number) => {
 72 |     setSelectedDocuments((prevDocs) =>
 73 |       prevDocs.filter((_, index) => index !== documentIndex)
 74 |     );
 75 |   };
 76 | 
 77 |   useEffect(() => {
 78 |     setSelectedDocumentType(null);
 79 |     setSelectedYear(null);
 80 |   }, [selectedTicker]);
 81 | 
 82 |   useEffect(() => {
 83 |     setSelectedYear(null);
 84 |   }, [selectedDocumentType]);
 85 | 
 86 |   useEffect(() => {
 87 |     if (selectedTicker && selectedDocumentType) {
 88 |       setAvailableYears(
 89 |         getAvailableYears(
 90 |           selectedTicker?.ticker,
 91 |           selectedDocumentType?.value as DocumentType,
 92 |           availableDocuments
 93 |         )
 94 |       );
 95 |     }
 96 |   }, [selectedTicker, selectedDocumentType, availableDocuments]);
 97 | 
 98 |   useEffect(() => {
 99 |     const handleKeyDown = (event: KeyboardEvent) => {
100 |       if (
101 |         (event.key === "Enter" && event.shiftKey) ||
102 |         (event.key === "Enter" && event.metaKey)
103 |       ) {
104 |         handleAddDocument();
105 |       }
106 |       if (event.key === "k" && event.metaKey) {
107 |         setShouldFocusCompanySelect(true);
108 |       }
109 |     };
110 |     document.addEventListener("keydown", handleKeyDown);
111 |     return () => {
112 |       document.removeEventListener("keydown", handleKeyDown);
113 |     };
114 |   }, [handleAddDocument]);
115 | 
116 |   const isDocumentSelectionEnabled =
117 |     selectedDocuments.length < MAX_NUMBER_OF_SELECTED_DOCUMENTS;
118 | 
119 |   const isStartConversationButtonEnabled = selectedDocuments.length > 0;
120 | 
121 |   const selectTicker = (ticker: Ticker) => {
122 |     setSelectedTicker(ticker);
123 |     setFocusDocumentType(true);
124 |   };
125 | 
126 |   const selectDocumentType = (docType: SelectOption | null) => {
127 |     setSelectedDocumentType(docType);
128 |     setFocusYear(true);
129 |   };
130 | 
131 |   const [shouldFocusCompanySelect, setShouldFocusCompanySelect] =
132 |     useState(false);
133 | 
134 |   const [focusYear, setFocusYear] = useState(false);
135 |   const yearFocusRef = useRef<Select<
136 |     SelectOption,
137 |     false,
138 |     GroupBase<SelectOption>
139 |   > | null>(null);
140 | 
141 |   useEffect(() => {
142 |     if (focusYear && yearFocusRef.current) {
143 |       yearFocusRef.current?.focus();
144 |       setFocusYear(false);
145 |     }
146 |   }, [focusYear]);
147 | 
148 |   const [focusDocumentType, setFocusDocumentType] = useState(false);
149 |   const documentTypeFocusRef = useRef<Select<
150 |     SelectOption,
151 |     false,
152 |     GroupBase<SelectOption>
153 |   > | null>(null);
154 | 
155 |   useEffect(() => {
156 |     if (focusDocumentType && documentTypeFocusRef.current) {
157 |       documentTypeFocusRef.current?.focus();
158 |       setFocusDocumentType(false);
159 |     }
160 |   }, [focusDocumentType]);
161 | 
162 |   return {
163 |     availableDocuments,
164 |     availableTickers,
165 |     availableDocumentTypes,
166 |     availableYears,
167 |     sortedAvailableYears,
168 |     selectedDocuments,
169 |     sortedSelectedDocuments,
170 |     selectedTicker,
171 |     selectedDocumentType,
172 |     selectedYear,
173 |     setSelectedYear,
174 |     handleAddDocument,
175 |     handleRemoveDocument,
176 |     isDocumentSelectionEnabled,
177 |     isStartConversationButtonEnabled,
178 |     yearFocusRef,
179 |     documentTypeFocusRef,
180 |     selectTicker,
181 |     selectDocumentType,
182 |     shouldFocusCompanySelect,
183 |     setShouldFocusCompanySelect,
184 |   };
185 | };
186 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/useMessages.tsx:
--------------------------------------------------------------------------------
 1 | // hooks/useMessages.js
 2 | import { useState } from "react";
 3 | import { v4 as uuidv4 } from "uuid";
 4 | import { ROLE, MESSAGE_STATUS } from "~/types/conversation";
 5 | import type { Message } from "~/types/conversation";
 6 | import { getDateWithUTCOffset } from "~/utils/timezone";
 7 | 
 8 | const useMessages = (conversationId: string) => {
 9 |   const [messages, setMessages] = useState<Message[]>([]);
10 | 
11 |   const userSendMessage = (content: string) => {
12 |     setMessages((prevMessages) => [
13 |       ...prevMessages,
14 | 
15 |       {
16 |         id: uuidv4(),
17 |         conversationId,
18 |         content,
19 |         role: ROLE.USER,
20 |         status: MESSAGE_STATUS.PENDING,
21 |         created_at: getDateWithUTCOffset(),
22 |       },
23 |     ]);
24 |   };
25 | 
26 |   const systemSendMessage = (message: Message) => {
27 |     setMessages((prevMessages) => {
28 |       const existingMessageIndex = prevMessages.findIndex(
29 |         (msg) => msg.id === message.id
30 |       );
31 | 
32 |       // Update the existing message
33 |       if (existingMessageIndex > -1) {
34 |         const updatedMessages = [...prevMessages];
35 |         updatedMessages[existingMessageIndex] = message;
36 |         return updatedMessages;
37 |       }
38 | 
39 |       // Add a new message if it doesn't exist
40 |       return [...prevMessages, message];
41 |     });
42 |   };
43 | 
44 |   return {
45 |     messages,
46 |     userSendMessage,
47 |     setMessages,
48 |     systemSendMessage,
49 |   };
50 | };
51 | 
52 | export default useMessages;
53 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/useMultiplePdfs.tsx:
--------------------------------------------------------------------------------
 1 | import { useEffect, useState } from "react";
 2 | import { usePdfFocus } from "~/context/pdf";
 3 | import { SecDocument } from "~/types/document";
 4 | 
 5 | export const useMultiplePdfs = (pdfs: SecDocument[]) => {
 6 |   const [activePdfUrl, setActivePdfUrl] = useState<string>("");
 7 |   const { pdfFocusState } = usePdfFocus();
 8 | 
 9 |   useEffect(() => {
10 |     if (pdfs && pdfs[0]) {
11 |       setActivePdfUrl(pdfs[0].url);
12 |     }
13 |   }, [pdfs]);
14 | 
15 |   useEffect(() => {
16 |     if (pdfFocusState.documentId) {
17 |       const selectedPdf = pdfs.find(
18 |         (doc) => doc.id == pdfFocusState.documentId
19 |       );
20 |       if (selectedPdf) {
21 |         setActivePdfUrl(selectedPdf.url);
22 |       }
23 |     }
24 |   }, [pdfFocusState.pageNumber, pdfFocusState.documentId, setActivePdfUrl]);
25 | 
26 |   const isActivePdf = (file: SecDocument) => {
27 |     return file.url == activePdfUrl;
28 |   };
29 | 
30 |   const handlePdfFocus = (file: SecDocument) => {
31 |     setActivePdfUrl(file.url);
32 |   };
33 | 
34 |   return {
35 |     activePdfUrl,
36 |     isActivePdf,
37 |     handlePdfFocus,
38 |   };
39 | };
40 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/usePdfViewer.tsx:
--------------------------------------------------------------------------------
  1 | // usePDFViewer.ts
  2 | import { useState, useEffect, useCallback } from "react";
  3 | import { usePdfFocus } from "~/context/pdf";
  4 | 
  5 | import type { PdfFocusHandler as PdfFocusHandler } from "~/components/pdf-viewer/VirtualizedPdf";
  6 | import React from "react";
  7 | import { SecDocument } from "~/types/document";
  8 | 
  9 | export const zoomLevels = [
 10 |   "50%",
 11 |   "80%",
 12 |   "100%",
 13 |   "130%",
 14 |   "200%",
 15 |   "300%",
 16 |   "400%",
 17 | ];
 18 | const startZoomLevelIdx = 2;
 19 | 
 20 | const usePDFViewer = (file: SecDocument) => {
 21 |   const [scrolledIndex, setScrolledIndex] = useState(1);
 22 |   const [scale, setScale] = useState(1.0);
 23 |   const [scaleFit, setScaleFit] = useState(1.0);
 24 |   const [numPages, setNumPages] = useState(0);
 25 |   const [isPdfRendered, setIsPdfRendered] = useState(false);
 26 |   const [zoomLevelIdx, setZoomLevelIdx] = useState(startZoomLevelIdx);
 27 | 
 28 |   const { pdfFocusState } = usePdfFocus();
 29 | 
 30 |   const pdfFocusRef = React.useRef<PdfFocusHandler | null>(null);
 31 | 
 32 |   const goToPage = (page: number) => {
 33 |     if (pdfFocusRef.current) {
 34 |       pdfFocusRef.current.scrollToPage(page);
 35 |     }
 36 |   };
 37 | 
 38 |   useEffect(() => {
 39 |     const activeDocumentId = pdfFocusState.documentId;
 40 |     if (activeDocumentId === file.id) {
 41 |       if (pdfFocusState.pageNumber) {
 42 |         goToPage(pdfFocusState.pageNumber - 1);
 43 |       }
 44 |     }
 45 |   }, [file, pdfFocusState]);
 46 | 
 47 |   const setCurrentPageNumber = useCallback((n: number) => {
 48 |     setScrolledIndex(n);
 49 |   }, []);
 50 | 
 51 |   const handleZoomIn = useCallback(() => {
 52 |     const nextLevel = zoomLevelIdx + 1;
 53 |     if (nextLevel >= zoomLevels.length) {
 54 |       return;
 55 |     }
 56 |     setZoomLevel(zoomLevels[nextLevel] || "100%");
 57 |   }, [zoomLevelIdx, scrolledIndex, pdfFocusRef]);
 58 | 
 59 |   const handleZoomOut = useCallback(() => {
 60 |     const nextLevel = zoomLevelIdx - 1;
 61 |     if (nextLevel < 0) {
 62 |       return;
 63 |     }
 64 |     setZoomLevel(zoomLevels[nextLevel] || "100%");
 65 |   }, [zoomLevelIdx, scrolledIndex, pdfFocusRef]);
 66 | 
 67 |   const nextPage = () => {
 68 |     goToPage(scrolledIndex + 1);
 69 |   };
 70 | 
 71 |   const prevPage = () => {
 72 |     goToPage(scrolledIndex - 1);
 73 |   };
 74 | 
 75 |   const toPercentPlusBase = (n: number) => {
 76 |     return `${100 + n * 100}%`;
 77 |   };
 78 | 
 79 |   const setZoomLevel = useCallback(
 80 |     (zoomLevel: string) => {
 81 |       const newZoomLevelIdx = zoomLevels.indexOf(zoomLevel);
 82 |       const newScale = percentToScale(zoomLevel) + scaleFit - 1;
 83 |       setScale(newScale);
 84 |       setTimeout(() => {
 85 |         goToPage(scrolledIndex);
 86 |       }, 30);
 87 |       setZoomLevelIdx(newZoomLevelIdx);
 88 |     },
 89 |     [scrolledIndex]
 90 |   );
 91 | 
 92 |   function percentToScale(percent: string): number {
 93 |     const number = parseInt(percent, 10);
 94 |     return number / 100;
 95 |   }
 96 | 
 97 |   const scaleDiff = Math.round((scale - scaleFit) * 10) / 10;
 98 |   const scaleText = toPercentPlusBase(scaleDiff);
 99 | 
100 |   useEffect(() => {
101 |     setScale(scaleFit);
102 |   }, [scaleFit]);
103 | 
104 |   const zoomInEnabled = zoomLevelIdx < zoomLevels.length - 1;
105 |   const zoomOutEnabled = zoomLevelIdx > 0;
106 | 
107 |   return {
108 |     scrolledIndex,
109 |     setCurrentPageNumber,
110 |     scale,
111 |     setScaleFit,
112 |     numPages,
113 |     setNumPages,
114 |     handleZoomIn,
115 |     handleZoomOut,
116 |     nextPage,
117 |     prevPage,
118 |     scaleText,
119 |     isPdfRendered,
120 |     setIsPdfRendered,
121 |     pdfFocusRef,
122 |     goToPage,
123 |     setZoomLevel,
124 |     zoomInEnabled,
125 |     zoomOutEnabled,
126 |   };
127 | };
128 | 
129 | export default usePDFViewer;
130 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/utils/useFocus.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | // https://gist.github.com/carpben/de968e377cbac0ffbdefe1ab56237573
 4 | export default function useFocus<T extends HTMLElement = HTMLElement>() {
 5 |   const ref = React.useRef<T>(null);
 6 |   const setFocus = () => ref?.current?.focus?.();
 7 | 
 8 |   return [ref, setFocus] as const;
 9 | }
10 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/utils/useIsMobile.tsx:
--------------------------------------------------------------------------------
 1 | import { useWindowWidth } from "@wojtekmaj/react-hooks";
 2 | import { useEffect, useState } from "react";
 3 | 
 4 | export const MOBILE_BREAKPOINT = 768;
 5 | export default function useIsMobile() {
 6 |   const windowWidth = useWindowWidth();
 7 |   const [isMobile, setIsMobile] = useState(false);
 8 |   useEffect(() => {
 9 |     if ((windowWidth || 0) < MOBILE_BREAKPOINT) {
10 |       setIsMobile(true);
11 |     } else {
12 |       setIsMobile(false);
13 |     }
14 |   }, [windowWidth]);
15 | 
16 |   return { isMobile };
17 | }
18 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/utils/useLocalStorage.ts:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from "react";
 2 | 
 3 | function useLocalStorage<T>(
 4 |   key: string,
 5 |   initialValue: T
 6 | ): [T, (value: T | ((val: T) => T)) => void] {
 7 |   const [storedValue, setStoredValue] = useState<T>(initialValue);
 8 | 
 9 |   useEffect(() => {
10 |     try {
11 |       const item = window.localStorage.getItem(key);
12 |       if (item) {
13 |         setStoredValue(JSON.parse(item) as T);
14 |       }
15 |     } catch (error) {
16 |       console.error(error);
17 |     }
18 |   }, [key]);
19 | 
20 |   const setValue = (value: T | ((val: T) => T)) => {
21 |     try {
22 |       const valueToStore =
23 |         value instanceof Function ? value(storedValue) : value;
24 |       setStoredValue(valueToStore);
25 |       window.localStorage.setItem(key, JSON.stringify(valueToStore));
26 |     } catch (error) {
27 |       console.error(error);
28 |     }
29 |   };
30 | 
31 |   return [storedValue, setValue];
32 | }
33 | 
34 | export default useLocalStorage;
35 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/utils/useModal.tsx:
--------------------------------------------------------------------------------
 1 | import { useState } from "react";
 2 | 
 3 | export const useModal = () => {
 4 |   const [isOpen, setIsOpen] = useState(false);
 5 | 
 6 |   const toggleModal = () => {
 7 |     setIsOpen(!isOpen);
 8 |   };
 9 | 
10 |   return {
11 |     isOpen,
12 |     toggleModal,
13 |   };
14 | };
15 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/utils/useScrollBreakpoint.tsx:
--------------------------------------------------------------------------------
 1 | import { useRef, useEffect, useState } from "react";
 2 | 
 3 | export const useScrollBreakpoint = (offset = 0) => {
 4 |   const ref = useRef<HTMLDivElement>(null);
 5 |   const [breakpoint, setBreakpoint] = useState(0);
 6 | 
 7 |   useEffect(() => {
 8 |     const setTop = () => {
 9 |       if (ref.current) {
10 |         const rect = ref.current.getBoundingClientRect();
11 |         setBreakpoint(rect.top + window.scrollY - rect.height + offset);
12 |       }
13 |     };
14 | 
15 |     window.addEventListener("load", setTop);
16 |     window.addEventListener("resize", setTop);
17 | 
18 |     return () => {
19 |       window.removeEventListener("load", setTop);
20 |       window.removeEventListener("resize", setTop);
21 |     };
22 |   }, []);
23 | 
24 |   return { ref, breakpoint };
25 | };
26 | 
27 | export default useScrollBreakpoint;
28 | 


--------------------------------------------------------------------------------
/frontend/src/modules/react-pdf.d.ts:
--------------------------------------------------------------------------------
1 | // we have to use react-pdf 6.2.2 instead of
2 | // 7.^ because of a known text-layer issue.
3 | // There are no types for this early version,
4 | // so we need to declare a module file to get
5 | // rid of type compilation issues
6 | declare module "react-pdf";
7 | 


--------------------------------------------------------------------------------
/frontend/src/pages/_app.tsx:
--------------------------------------------------------------------------------
 1 | import { type AppType } from "next/dist/shared/lib/utils";
 2 | import Layout from "~/components/Layout";
 3 | import "~/styles/globals.css";
 4 | import ReactGA from "react-ga4";
 5 | 
 6 | import { IntercomProvider } from "react-use-intercom";
 7 | import { GOOGLE_ANALYTICS_ID, INTERCOM_ID } from "~/constants";
 8 | 
 9 | ReactGA.initialize(GOOGLE_ANALYTICS_ID);
10 | 
11 | const MyApp: AppType = ({ Component, pageProps }) => {
12 |   return (
13 |     <>
14 |       <IntercomProvider appId={INTERCOM_ID}>
15 |         <Layout>
16 |           <Component {...pageProps} />
17 |         </Layout>
18 |       </IntercomProvider>
19 |     </>
20 |   );
21 | };
22 | 
23 | export default MyApp;
24 | 


--------------------------------------------------------------------------------
/frontend/src/pages/_document.tsx:
--------------------------------------------------------------------------------
 1 | // pages/_document.js
 2 | import { Html, Head, Main, NextScript } from "next/document";
 3 | 
 4 | export default function Document() {
 5 |   return (
 6 |     <Html>
 7 |       <Head>
 8 |         <link
 9 |           href="https://fonts.googleapis.com/css2?family=Lora:wght@400;700&display=swap"
10 |           rel="stylesheet"
11 |         />
12 |         <link
13 |           href="https://fonts.googleapis.com/css2?family=Nunito+Sans:wght@400;700&display=swap"
14 |           rel="stylesheet"
15 |         />
16 |       </Head>
17 |       <body>
18 |         <Main />
19 |         <NextScript />
20 |         <div id="modal-root"></div>
21 |       </body>
22 |     </Html>
23 |   );
24 | }
25 | 


--------------------------------------------------------------------------------
/frontend/src/pages/_error.tsx:
--------------------------------------------------------------------------------
 1 | import { NextPageContext } from "next";
 2 | import React from "react";
 3 | 
 4 | interface ErrorProps {
 5 |   statusCode?: number;
 6 | }
 7 | 
 8 | const ErrorPage = ({ statusCode }: ErrorProps): JSX.Element => {
 9 |   return (
10 |     <p>
11 |       {statusCode
12 |         ? `An error ${statusCode} occurred on server`
13 |         : "An error occurred on client"}
14 |     </p>
15 |   );
16 | };
17 | 
18 | ErrorPage.getInitialProps = ({ res, err }: NextPageContext) => {
19 |   const statusCode = res ? res.statusCode : err ? err.statusCode : 404;
20 |   return { statusCode };
21 | };
22 | 
23 | export default ErrorPage;
24 | 


--------------------------------------------------------------------------------
/frontend/src/pages/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | import type { NextPage } from "next";
 4 | import { MarketingSection } from "~/components/landing-page/MarketingSection";
 5 | import { TitleAndDropdown } from "~/components/landing-page/TitleAndDropdown";
 6 | 
 7 | const LandingPage: NextPage = () => {
 8 |   return (
 9 |     <>
10 |       <TitleAndDropdown />
11 |       <MarketingSection />
12 |     </>
13 |   );
14 | };
15 | export default LandingPage;
16 | 


--------------------------------------------------------------------------------
/frontend/src/styles/globals.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | *,
 6 | *::before,
 7 | *::after {
 8 |   box-sizing: border-box;
 9 | }
10 | 
11 | @keyframes spin {
12 |   0% {
13 |     transform: rotate(0deg);
14 |   }
15 |   100% {
16 |     transform: rotate(360deg);
17 |   }
18 | }
19 | 
20 | .loader {
21 |   border-top-color: #9ca3af; /* The color of your spinner */
22 |   animation: spin 1s linear infinite;
23 | }
24 | 
25 | @layer components {
26 |   .landing-page-gradient-1 {
27 |     background-color: hsla(0,0%,100%,1);
28 |     background-image:
29 |       radial-gradient(at 21% 11%, hsla(240,51%,82%,0.53) 0px, transparent 50%),
30 |       radial-gradient(at 85% 0%, hsla(46,57%,78%,0.52) 0px, transparent 50%),
31 |       radial-gradient(at 91% 36%, hsla(221,100%,88%,0.68) 0px, transparent 50%),
32 |       radial-gradient(at 8% 40%, hsla(323,82%,92%,0.46) 0px, transparent 50%);
33 |   }
34 | }
35 | 
36 | @layer components {
37 |   .landing-page-gradient-2 {
38 |     background-color: hsla(41, 0%, 100%, .5);
39 |     background-image:
40 |       radial-gradient(at 68% 65%, hsla(207, 75%, 92%, .5) 0px, transparent 20%),
41 |       radial-gradient(at 100% 38%, hsla(257, 98%, 92%, .5) 0px, transparent 20%),
42 |       radial-gradient(at 85% 56%, hsla(219, 0%, 100%, 0.7) 0px, transparent 20%),
43 |       radial-gradient(at 67% 82%, hsla(323, 0%, 100%, .5) 0px, transparent 20%),
44 |       radial-gradient(at 73% 46%, hsla(176, 72%, 92%, .5) 0px, transparent 20%),
45 |       radial-gradient(at 51% 53%, hsla(317, 60%, 92%, .5) 0px, transparent 20%);
46 |   }
47 | }
48 | 
49 | @layer components {
50 |   .landing-page-gradient-3 {
51 |     background-color: hsla(0,0%,100%,1);
52 |     background-image:
53 |       radial-gradient(at 26% 56%, hsla(207,40%,91%,.5) 0px, transparent 20%),
54 |       radial-gradient(at 19% 43%, hsla(257,40%,91%,.51) 0px, transparent 20%),
55 |       radial-gradient(at 56% 54%, hsla(323,40%,91%,.51) 0px, transparent 20%),
56 |       radial-gradient(at 44% 62%, hsla(176,40%,91%,.51) 0px, transparent 20%),
57 |       radial-gradient(at 57% 45%, hsla(317,40%,91%,.51) 0px, transparent 20%);
58 |   }
59 | }
60 | 
61 | @layer components {
62 |   .landing-page-gradient-4 {
63 |     background-color: hsla(0,0%,100%,1);
64 |     background-image:
65 |       radial-gradient(at 79% 89%, hsla(240,51%,82%,0.23) 0px, transparent 20%), /* 21% 11% -> 79% 89% */
66 |       radial-gradient(at 15% 100%, hsla(46,57%,78%,0.22) 0px, transparent 20%), /* 85% 0% -> 15% 100% */
67 |       radial-gradient(at 9% 64%, hsla(221,100%,88%,0.28) 0px, transparent 20%), /* 91% 36% -> 9% 64% */
68 |       radial-gradient(at 92% 60%, hsla(323,82%,92%,0.26) 0px, transparent 20%); /* 8% 40% -> 92% 60% */
69 |   }
70 | }
71 | 
72 | 
73 | @layer components {
74 |   .landing-page-gradient-5 {
75 |     background-color: hsla(0,0%,100%,1);
76 |     background-image:
77 |       radial-gradient(at 21% 11%, hsla(240,51%,82%,0.83) 0px, transparent 20%),
78 |       radial-gradient(at 85% 0%, hsla(46,57%,78%,0.82) 0px, transparent 20%),
79 |       radial-gradient(at 91% 36%, hsla(221,100%,88%,0.88) 0px, transparent 20%),
80 |       radial-gradient(at 8% 40%, hsla(323,82%,92%,0.86) 0px, transparent 20%);
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/frontend/src/styles/react-select.tsx:
--------------------------------------------------------------------------------
 1 | export const customReactSelectStyles = {
 2 |   // eslint-disable-next-line @typescript-eslint/no-unsafe-return
 3 |   control: (base: any, state: { isFocused: any }) => ({
 4 |     ...base,
 5 |     background: "#F7F7F7",
 6 |     borderRadius: 0,
 7 |     borderWidth: 0,
 8 |     boxShadow: state.isFocused ? 0 : 0,
 9 |     "&:hover": {
10 |       border: "0",
11 |     },
12 |   }),
13 |   option: (styles: any, { isFocused, isSelected }: any) => {
14 |     // eslint-disable-next-line @typescript-eslint/no-unsafe-return
15 |     return {
16 |       ...styles,
17 |       backgroundColor: isSelected ? "#3B3775" : isFocused ? "#817AF2" : null,
18 |       color: isFocused ? "white" : isSelected ? "white" : "black",
19 |     };
20 |   },
21 | };
22 | 


--------------------------------------------------------------------------------
/frontend/src/svgs/right-arrow.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | const RightArrow = (
 3 |   props: React.JSX.IntrinsicAttributes & React.SVGProps<SVGSVGElement>
 4 | ) => (
 5 |   <svg
 6 |     xmlns="http://www.w3.org/2000/svg"
 7 |     width={17}
 8 |     height={19}
 9 |     className="text-brown-600 transition-colors duration-200 hover:text-black"
10 |     {...props}
11 |   >
12 |     <path
13 |       fill={props.fill}
14 |       d="M15.75 8.21 2.175.905C1.035.29-.285 1.355.075 2.6l1.86 6.51c.075.27.075.54 0 .81l-1.86 6.51c-.36 1.245.96 2.31 2.1 1.695L15.75 10.82a1.47 1.47 0 0 0 0-2.58v-.03Z"
15 |     />
16 |   </svg>
17 | );
18 | export default RightArrow;
19 | 


--------------------------------------------------------------------------------
/frontend/src/types/backend/document.tsx:
--------------------------------------------------------------------------------
 1 | export enum BackendDocumentType {
 2 |   TenK = "10-K",
 3 |   TenQ = "10-Q",
 4 | }
 5 | 
 6 | export interface BackendDocument {
 7 |   created_at: string;
 8 |   id: string;
 9 |   updated_at: string;
10 |   metadata_map: BackendMetadataMap;
11 |   url: string;
12 | }
13 | 
14 | export interface BackendMetadataMap {
15 |   sec_document: BackendSecDocument;
16 | }
17 | 
18 | export interface BackendSecDocument {
19 |   company_name: string;
20 |   company_ticker: string;
21 |   doc_type: BackendDocumentType;
22 |   year: number;
23 |   quarter: number;
24 | }
25 | 


--------------------------------------------------------------------------------
/frontend/src/types/conversation.tsx:
--------------------------------------------------------------------------------
 1 | import { DocumentColorEnum } from "~/utils/colors";
 2 | 
 3 | export enum MESSAGE_STATUS {
 4 |   PENDING = "PENDING",
 5 |   SUCCESS = "SUCCESS",
 6 |   ERROR = "ERROR",
 7 | }
 8 | 
 9 | export enum ROLE {
10 |   USER = "user",
11 |   ASSISTANT = "assistant",
12 | }
13 | 
14 | export enum MessageSubprocessSource {
15 |   PLACEHOLDER = "placeholder",
16 | }
17 | 
18 | export interface hasId {
19 |   id: string;
20 | }
21 | 
22 | export interface Citation {
23 |   documentId: string;
24 |   snippet: string;
25 |   pageNumber: number;
26 |   ticker: string;
27 |   displayDate: string;
28 |   color: DocumentColorEnum;
29 | }
30 | 
31 | export interface Conversation extends hasId {
32 |   messages?: Message[];
33 | }
34 | 
35 | export interface Message extends hasId {
36 |   content: string;
37 |   role: ROLE;
38 |   status: MESSAGE_STATUS;
39 |   conversationId: string;
40 |   sub_processes?: MessageSubProcess[];
41 |   created_at: Date;
42 | }
43 | export interface MessageSubProcess extends hasId {
44 |   messageId: string;
45 |   content: string;
46 |   source: MessageSubprocessSource;
47 |   metadata_map?: MetaDataMap;
48 | }
49 | 
50 | export interface ParsedData {
51 |   content?: string;
52 |   status?: string;
53 | }
54 | 
55 | export interface MetaDataMap {
56 |   sub_question?: SubQuestion;
57 |   sub_questions?: SubQuestion[];
58 | }
59 | 
60 | export interface SubQuestion {
61 |   question: string;
62 |   answer?: string;
63 |   citations?: BackendCitation[];
64 | }
65 | 
66 | export interface BackendCitation {
67 |   document_id: string;
68 |   page_number: number;
69 |   score: number;
70 |   text: string;
71 | }
72 | 


--------------------------------------------------------------------------------
/frontend/src/types/document.tsx:
--------------------------------------------------------------------------------
 1 | import { DocumentColorEnum } from "~/utils/colors";
 2 | 
 3 | export enum DocumentType {
 4 |   TenK = "Form 10K",
 5 |   TenQ = "Form 10Q",
 6 | }
 7 | 
 8 | export type Ticker = {
 9 |   ticker: string;
10 |   fullName: string;
11 | };
12 | 
13 | export interface SecDocument extends Ticker {
14 |   id: string;
15 |   url: string;
16 |   year: string;
17 |   docType: DocumentType;
18 |   quarter?: string;
19 |   color: DocumentColorEnum;
20 | }
21 | 


--------------------------------------------------------------------------------
/frontend/src/types/selection.tsx:
--------------------------------------------------------------------------------
1 | export interface SelectOption {
2 |   value: string;
3 |   label: string;
4 | }
5 | 


--------------------------------------------------------------------------------
/frontend/src/utils/colors.tsx:
--------------------------------------------------------------------------------
 1 | export enum DocumentColorEnum {
 2 |   purple = "llama-purple",
 3 |   magenta = "llama-magenta",
 4 |   red = "llama-red",
 5 |   orange = "llama-orange",
 6 |   yellow = "llama-yellow",
 7 |   lime = "llama-lime",
 8 |   teal = "llama-teal",
 9 |   cyan = "llama-cyan",
10 |   blue = "llama-blue",
11 |   indigo = "llama-indigo",
12 | }
13 | 
14 | // order matters! must be high contrast
15 | export const documentColors = [
16 |   DocumentColorEnum.lime,
17 |   DocumentColorEnum.orange,
18 |   DocumentColorEnum.cyan,
19 |   DocumentColorEnum.yellow,
20 |   DocumentColorEnum.magenta,
21 |   DocumentColorEnum.red,
22 |   DocumentColorEnum.purple,
23 |   DocumentColorEnum.teal,
24 |   DocumentColorEnum.indigo,
25 |   DocumentColorEnum.blue,
26 | ];
27 | 
28 | // need this because tailwind doesn't support dynamic template literals
29 | 
30 | export const borderColors: { [key in DocumentColorEnum]: string } = {
31 |   [DocumentColorEnum.purple]: "border-llama-purple",
32 |   [DocumentColorEnum.magenta]: "border-llama-magenta",
33 |   [DocumentColorEnum.red]: "border-llama-red",
34 |   [DocumentColorEnum.indigo]: "border-llama-indigo",
35 |   [DocumentColorEnum.lime]: "border-llama-lime",
36 |   [DocumentColorEnum.orange]: "border-llama-orange",
37 |   [DocumentColorEnum.blue]: "border-llama-blue",
38 |   [DocumentColorEnum.yellow]: "border-llama-yellow",
39 |   [DocumentColorEnum.teal]: "border-llama-teal",
40 |   [DocumentColorEnum.cyan]: "border-llama-cyan",
41 | };
42 | 
43 | export const highlightColors: { [key in DocumentColorEnum]: string } = {
44 |   [DocumentColorEnum.purple]: "bg-llama-purple-light",
45 |   [DocumentColorEnum.magenta]: "bg-llama-magenta-light",
46 |   [DocumentColorEnum.red]: "bg-llama-red-light",
47 |   [DocumentColorEnum.indigo]: "bg-llama-indigo-light",
48 |   [DocumentColorEnum.lime]: "bg-llama-lime-light",
49 |   [DocumentColorEnum.orange]: "bg-llama-orange-light",
50 |   [DocumentColorEnum.blue]: "bg-llama-blue-light",
51 |   [DocumentColorEnum.yellow]: "bg-llama-yellow-light",
52 |   [DocumentColorEnum.teal]: "bg-llama-teal-light",
53 |   [DocumentColorEnum.cyan]: "bg-llama-cyan-light",
54 | };
55 | 


--------------------------------------------------------------------------------
/frontend/src/utils/documents.tsx:
--------------------------------------------------------------------------------
 1 | import type { SecDocument, Ticker, DocumentType } from "~/types/document";
 2 | import { SelectOption } from "~/types/selection";
 3 | 
 4 | export function getAllTickers(documents: SecDocument[]): Ticker[] {
 5 |   const result: Ticker[] = [];
 6 |   const seen: { [key: string]: boolean } = {};
 7 | 
 8 |   for (const doc of documents) {
 9 |     // Skip if we've seen this ticker before
10 |     if (seen[doc.ticker]) {
11 |       continue;
12 |     }
13 | 
14 |     seen[doc.ticker] = true;
15 |     result.push({
16 |       fullName: doc.fullName,
17 |       ticker: doc.ticker,
18 |     });
19 |   }
20 | 
21 |   return result;
22 | }
23 | 
24 | export function filterByTickerAndType(
25 |   ticker: string,
26 |   docType: DocumentType,
27 |   documents: SecDocument[]
28 | ): SecDocument[] {
29 |   if (!ticker) {
30 |     return [];
31 |   }
32 |   return documents.filter(
33 |     (document) => document.ticker === ticker && document.docType === docType
34 |   );
35 | }
36 | 
37 | export function findDocumentById(
38 |   id: string,
39 |   documents: SecDocument[]
40 | ): SecDocument | null {
41 |   return documents.find((val) => val.id === id) || null;
42 | }
43 | 
44 | export function sortDocuments(selectedDocuments: SecDocument[]): SecDocument[] {
45 |   return selectedDocuments.sort((a, b) => {
46 |     // Sort by fullName
47 |     const nameComparison = a.fullName.localeCompare(b.fullName);
48 |     if (nameComparison !== 0) return nameComparison;
49 | 
50 |     // If fullNames are equal, sort by year
51 |     return a.year.localeCompare(b.year);
52 |   });
53 | }
54 | 
55 | export function sortSelectOptions(
56 |   options: SelectOption[] | null = []
57 | ): SelectOption[] {
58 |   if (!options) {
59 |     return [];
60 |   }
61 | 
62 |   return options.sort((a, b) => parseInt(a.label) - parseInt(b.label));
63 | }
64 | 


--------------------------------------------------------------------------------
/frontend/src/utils/landing-page-selection.tsx:
--------------------------------------------------------------------------------
 1 | import { DocumentType } from "~/types/document";
 2 | import type { SecDocument } from "~/types/document";
 3 | 
 4 | import type { SelectOption } from "~/types/selection";
 5 | import { filterByTickerAndType } from "./documents";
 6 | 
 7 | export const documentTypeOptions = [
 8 |   { value: DocumentType.TenK, label: DocumentType.TenK },
 9 |   { value: DocumentType.TenQ, label: DocumentType.TenQ },
10 | ] as SelectOption[];
11 | 
12 | function documentToYearOption(document: SecDocument): SelectOption {
13 |   if (document.quarter) {
14 |     return {
15 |       value: document.id,
16 |       label: document.year + " Q" + document.quarter,
17 |     };
18 |   }
19 |   return {
20 |     value: document.id,
21 |     label: document.year,
22 |   };
23 | }
24 | 
25 | export function getAvailableYears(
26 |   ticker: string,
27 |   type: DocumentType,
28 |   documents: SecDocument[]
29 | ): SelectOption[] {
30 |   const docs = filterByTickerAndType(ticker, type, documents);
31 |   const yearOptions: SelectOption[] = docs.map(documentToYearOption);
32 |   return yearOptions;
33 | }
34 | 


--------------------------------------------------------------------------------
/frontend/src/utils/timezone.tsx:
--------------------------------------------------------------------------------
 1 | export const getDateWithUTCOffset = () => {
 2 |   const now = new Date();
 3 |   const offsetInMilliseconds = now.getTimezoneOffset() * 60 * 1000;
 4 |   const utcDate = new Date(now.getTime() + offsetInMilliseconds);
 5 |   return utcDate;
 6 | };
 7 | 
 8 | export const formatDisplayDate = (dateToDisplay: Date) => {
 9 |   // Create a regular expression to match the time portion up to the milliseconds.
10 |   const regex = /(\d{2}:\d{2}:\d{2}\.\d{3})\d*/;
11 | 
12 |   // Extract the time portion up to the milliseconds.
13 |   const matchedDateTimeString = String(dateToDisplay).replace(regex, "$1");
14 | 
15 |   // Create a new Date object from the matched string.
16 |   const datetime = new Date(matchedDateTimeString);
17 | 
18 |   // Convert it to the local time
19 |   datetime.setMinutes(datetime.getMinutes() - datetime.getTimezoneOffset());
20 | 
21 |   // Get user's timezone
22 |   const userTimezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
23 | 
24 |   // Create an options object for formatting the time.
25 |   const options: Intl.DateTimeFormatOptions = {
26 |     hour: "2-digit",
27 |     minute: "2-digit",
28 |     hour12: true,
29 |     timeZone: userTimezone, // use the user's timezone
30 |   };
31 | 
32 |   // Convert the date to the desired format.
33 |   const formattedTime = new Intl.DateTimeFormat("en-US", options).format(
34 |     datetime
35 |   );
36 |   return formattedTime;
37 | };
38 | 


--------------------------------------------------------------------------------
/frontend/tailwind.config.ts:
--------------------------------------------------------------------------------
 1 | import { type Config } from "tailwindcss";
 2 | 
 3 | export default {
 4 |   content: ["./src/**/*.{js,ts,jsx,tsx}"],
 5 | 
 6 |   theme: {
 7 |     extend: {
 8 |       fontFamily: {
 9 |         lora: ["Lora", "serif"], // The second font is a fallback.
10 |         nunito: ["Nunito Sans", "sans-serif"], // The second font is a fallback.
11 |       },
12 |       colors: {
13 |         "gradient-start": "rgba(255, 255, 204, 0.2)", // Change this with your color.
14 |         "gradient-end": "rgba(204, 153, 255, 0.2)", // Change this with your color.
15 |         "gradient-start-light": "rgba(255, 255, 204, 0.1)", // Change this with your color.
16 |         "gradient-end-light": "rgba(204, 153, 255, 0.1)", // Change this with your color.
17 |         "gray-00": "#F9F9FA",
18 |         "gray-15": "#E9E9ED",
19 |         "gray-30": "#D2D2DC",
20 |         "gray-60": "#9EA2B0",
21 |         "gray-90": "#3F3F46",
22 |         "gray-pdf": "#F7F7F7",
23 |         "llama-purple-light": "#EDDDFC",
24 |         "llama-purple": "#D09FF6",
25 |         "llama-magenta-light": "#FBD7F9",
26 |         "llama-magenta": "#F48FEF",
27 |         "llama-red-light": "#FBDBD9",
28 |         "llama-red": "#F49B95",
29 |         "llama-orange-light": "#FAE9D3",
30 |         "llama-orange": "#F1BA72",
31 |         "llama-yellow-light": "#FDF6DD",
32 |         "llama-yellow": "#F8EC78",
33 |         "llama-lime-light": "#E5FAD2",
34 |         "llama-lime": "#A1E66D",
35 |         "llama-teal-light": "#D9FBEC",
36 |         "llama-teal": "#66D8A7",
37 |         "llama-cyan-light": "#DAFAFB",
38 |         "llama-cyan": "#70E4EC",
39 |         "llama-blue-light": "#EDF5FD",
40 |         "llama-blue": "#87B6F3",
41 |         "llama-indigo-light": "#EDECFD",
42 |         "llama-indigo": "#817AF2",
43 |       },
44 |       backgroundImage: (theme) => ({
45 |         gradient: "url('https://llama-app-frontend.vercel.app/Gradient.png')",
46 |       }),
47 |       backgroundSize: {
48 |         "100%": "100%",
49 |       },
50 |       backgroundPosition: {
51 |         center: "center",
52 |       },
53 |       backgroundRepeat: {
54 |         "no-repeat": "no-repeat",
55 |       },
56 |     },
57 |   },
58 |   plugins: [],
59 | } satisfies Config;
60 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es2017",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "checkJs": true,
 7 |     "skipLibCheck": true,
 8 |     "strict": true,
 9 |     "forceConsistentCasingInFileNames": true,
10 |     "noEmit": true,
11 |     "esModuleInterop": true,
12 |     "module": "esnext",
13 |     "moduleResolution": "node",
14 |     "resolveJsonModule": true,
15 |     "isolatedModules": true,
16 |     "jsx": "preserve",
17 |     "incremental": true,
18 |     "noUncheckedIndexedAccess": true,
19 |     "baseUrl": ".",
20 |     "paths": {
21 |       "~/*": ["./src/*"]
22 |     }
23 |   },
24 |   "include": [
25 |     ".eslintrc.cjs",
26 |     "next-env.d.ts",
27 |     "**/*.ts",
28 |     "**/*.tsx",
29 |     "**/*.cjs",
30 |     "**/*.mjs"
31 |   ],
32 |   "exclude": ["node_modules"]
33 | }
34 | 


--------------------------------------------------------------------------------
/render.yaml:
--------------------------------------------------------------------------------
  1 | previewsEnabled: true
  2 | databases:
  3 |   - name: llama-app-db
  4 |     databaseName: llama_app_db
  5 |     plan: pro
  6 |     previewPlan: starter
  7 | 
  8 | services:
  9 |   # A Docker web service
 10 |   # Docs for Render blueprints:
 11 |   # https://render.com/docs/blueprint-spec
 12 |   - type: web
 13 |     name: llama-app-backend
 14 |     runtime: docker
 15 |     repo: https://github.com/run-llama/sec-insights.git
 16 |     region: oregon
 17 |     plan: standard
 18 |     rootDir: ./backend
 19 |     # https://render.com/docs/blueprint-spec#scaling
 20 |     scaling:
 21 |       minInstances: 2
 22 |       maxInstances: 10
 23 |       targetMemoryPercent: 75 # optional if targetCPUPercent is set (valid: 1-90)
 24 |       targetCPUPercent: 75 # optional if targetMemory is set (valid: 1-90)
 25 |     healthCheckPath: /api/health/
 26 |     initialDeployHook: make seed_db_based_on_env
 27 |     envVars:
 28 |       - key: DATABASE_URL
 29 |         fromDatabase:
 30 |           name: llama-app-db
 31 |           property: connectionString
 32 |       - fromGroup: general-settings
 33 |       - fromGroup: prod-web-secrets
 34 |       - fromGroup: preview-web-secrets
 35 |   # A Docker cron service
 36 |   # Runs the seed_db job which should only be upserts and otherwise idempotent
 37 |   - type: cron
 38 |     name: llama-app-cron
 39 |     runtime: docker
 40 |     repo: https://github.com/run-llama/sec-insights.git
 41 |     region: oregon
 42 |     plan: standard
 43 |     rootDir: ./backend
 44 |     # set to the fake date of Feb 31st so it never runs. Meant to be manually triggered.
 45 |     schedule: "0 5 31 2 ?"
 46 |     dockerCommand: make seed_db_based_on_env
 47 |     envVars:
 48 |       - key: DATABASE_URL
 49 |         fromDatabase:
 50 |           name: llama-app-db
 51 |           property: connectionString
 52 |       - fromGroup: general-settings
 53 |       - fromGroup: prod-web-secrets
 54 |       - fromGroup: preview-web-secrets
 55 | envVarGroups:
 56 | - name: general-settings
 57 |   envVars:
 58 |     - key: IS_PREVIEW_ENV
 59 |       value: false
 60 |       previewValue: true
 61 |     - key: LOG_LEVEL
 62 |       value: INFO
 63 |       previewValue: DEBUG
 64 |     - key: BACKEND_CORS_ORIGINS
 65 |       value: '["http://localhost", "http://localhost:8000", "http://localhost:3000", "http://127.0.0.1:3000", "https://llama-app-backend.onrender.com", "https://llama-app-frontend.vercel.app", "http://secinsights.ai", "http://www.secinsights.ai", "https://secinsights.ai", "https://www.secinsights.ai"]'
 66 |     # S3_BUCKET_NAME is the bucket used for the StorageContext of the backend's LlamaIndex chat engine
 67 |     - key: S3_BUCKET_NAME
 68 |       value: llama-app-backend-prod
 69 |       previewValue: llama-app-backend-preview
 70 |     # S3_ASSET_BUCKET_NAME is the bucket used for app assets (e.g. document PDFs)
 71 |     - key: S3_ASSET_BUCKET_NAME
 72 |       value: llama-app-web-assets-prod
 73 |       previewValue: llama-app-web-assets-preview
 74 |     - key: CDN_BASE_URL
 75 |       value: https://d687lz8k56fia.cloudfront.net
 76 |       previewValue: https://dl94gqvzlh4k8.cloudfront.net
 77 |     - key: SENTRY_DSN
 78 |       sync: false
 79 | - name: prod-web-secrets
 80 |   envVars:
 81 |     # Manually add a prod value for OPENAI_API_KEY in Render dashboard
 82 |     - key: OPENAI_API_KEY
 83 |       sync: false
 84 |     - key: AWS_KEY
 85 |       sync: false
 86 |     - key: AWS_SECRET
 87 |       sync: false
 88 |     - key: POLYGON_IO_API_KEY
 89 |       sync: false
 90 | - name: preview-web-secrets
 91 |   envVars:
 92 |     # All env vars in this group should be prefixed with "PREVIEW_"
 93 |     # Manually add a preview value for PREVIEW_OPENAI_API_KEY in Render dashboard
 94 |     - key: PREVIEW_OPENAI_API_KEY
 95 |       sync: false
 96 |     - key: PREVIEW_AWS_KEY
 97 |       sync: false
 98 |     - key: PREVIEW_AWS_SECRET
 99 |       sync: false
100 |     - key: PREVIEW_POLYGON_IO_API_KEY
101 |       sync: false
102 | 


--------------------------------------------------------------------------------