├── .circleci └── config.yml ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── feature_request.md ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CODEOWNERS ├── LICENSE ├── airflow ├── .astro │ ├── config.yaml │ └── test_dag_integrity_default.py ├── .dockerignore ├── Dockerfile ├── dags │ ├── feedback │ │ └── find_example_runs.py │ ├── ingestion │ │ ├── ask-astro-forum-load.py │ │ ├── ask-astro-load-airflow-docs.py │ │ ├── ask-astro-load-astro-cli.py │ │ ├── ask-astro-load-astro-sdk.py │ │ ├── ask-astro-load-astronomer-docs.py │ │ ├── ask-astro-load-astronomer-provider.py │ │ ├── ask-astro-load-blogs.py │ │ ├── ask-astro-load-cosmos-docs.py │ │ ├── ask-astro-load-github.py │ │ ├── ask-astro-load-registry.py │ │ ├── ask-astro-load-slack.py │ │ ├── ask-astro-load-stackoverflow.py │ │ └── ask-astro-load.py │ ├── metrcis │ │ └── load_firestore_to_snowflake.py │ └── monitor │ │ ├── evaluate_rag_quality.py │ │ ├── external_trigger.py │ │ ├── monitor.py │ │ └── monitor_ingestion_dags.py ├── docker-compose.override.yml ├── include │ ├── __init__.py │ ├── data │ │ ├── OpenLineage │ │ │ ├── OpenLineage │ │ │ │ └── .gitinclude │ │ │ └── docs │ │ │ │ └── .gitinclude │ │ ├── apache │ │ │ └── airflow │ │ │ │ └── .gitinclude │ │ ├── astronomer │ │ │ ├── blogs │ │ │ │ └── .gitinclude │ │ │ ├── cosmos │ │ │ │ └── .gitinclude │ │ │ ├── docs │ │ │ │ └── .gitinclude │ │ │ └── registry │ │ │ │ └── .gitinclude │ │ ├── schema.json │ │ ├── slack │ │ │ └── .gitinclude │ │ └── stack_overflow │ │ │ └── base.parquet │ ├── streamlit │ │ ├── combine_docs_chat_prompt.txt │ │ ├── logo.png │ │ ├── logo1.png │ │ └── streamlit_app.py │ ├── tasks │ │ ├── __init__.py │ │ ├── chunking_utils.py │ │ └── extract │ │ │ ├── __init__.py │ │ │ ├── airflow_docs.py │ │ │ ├── astro_cli_docs.py │ │ │ ├── astro_docs.py │ │ │ ├── astro_forum_docs.py │ │ │ ├── astro_sdk_docs.py │ │ │ ├── astronomer_providers_docs.py │ │ │ ├── blogs.py │ │ │ ├── cosmos_docs.py │ │ │ ├── github.py │ │ │ ├── registry.py │ │ │ ├── slack.py │ │ │ ├── stack_overflow.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── evaluate_helpers.py │ │ │ ├── html_utils.py │ │ │ ├── slack_helpers.py │ │ │ └── stack_overflow_helpers.py │ └── utils │ │ ├── __init__.py │ │ └── slack.py ├── packages.txt ├── requirements.txt └── tests │ └── test_baseline.py ├── api ├── .gitignore ├── Dockerfile ├── ask_astro │ ├── .gitignore │ ├── __init__.py │ ├── app.py │ ├── chains │ │ ├── __init__.py │ │ ├── answer_question.py │ │ ├── custom_llm_filter_prompt.py │ │ └── custom_llm_output_lines_parser.py │ ├── clients │ │ ├── __init__.py │ │ ├── firestore.py │ │ ├── langsmith_.py │ │ └── weaviate_.py │ ├── config.py │ ├── models │ │ ├── __init__.py │ │ └── request.py │ ├── rest │ │ ├── __init__.py │ │ └── controllers │ │ │ ├── __init__.py │ │ │ ├── get_request.py │ │ │ ├── health_status.py │ │ │ ├── list_recent_requests.py │ │ │ ├── post_request.py │ │ │ └── submit_feedback.py │ ├── services │ │ ├── __init__.py │ │ ├── feedback.py │ │ └── questions.py │ ├── settings.py │ ├── slack │ │ ├── __init__.py │ │ ├── app.py │ │ ├── controllers │ │ │ ├── __init__.py │ │ │ ├── feedback │ │ │ │ ├── __init__.py │ │ │ │ ├── bad.py │ │ │ │ └── good.py │ │ │ └── mention.py │ │ └── utils.py │ ├── stores │ │ ├── __init__.py │ │ ├── installation_store.py │ │ └── oauth_state_store.py │ └── templates │ │ ├── app_home.jinja2 │ │ ├── combine_docs_sys_prompt_slack.txt │ │ ├── combine_docs_sys_prompt_webapp.txt │ │ └── message.jinja2 ├── cloudbuild.yaml ├── poetry.lock └── pyproject.toml ├── docs ├── Makefile ├── README.md ├── _static │ ├── DAG.png │ ├── feedback-loops.png │ ├── images │ │ ├── monitoring │ │ │ ├── airflow_dags.png │ │ │ ├── api_swagger.png │ │ │ ├── langsmith1.png │ │ │ ├── latency.png │ │ │ └── slack_alerts.png │ │ └── task_help_message │ │ │ ├── airflow-run.svg │ │ │ ├── airflow-stop.svg │ │ │ ├── api-init-poetry-env.svg │ │ │ ├── api-run-with-docker.svg │ │ │ ├── api-run-with-poetry.svg │ │ │ ├── api-stop-container.svg │ │ │ ├── api-test.svg │ │ │ ├── docs-build.svg │ │ │ ├── docs-generate-tasks-help-screenshot.svg │ │ │ ├── docs-serve.svg │ │ │ ├── list-tasks.svg │ │ │ ├── run-pre-commit.svg │ │ │ ├── ui-init.svg │ │ │ └── ui-run.svg │ ├── ingestion.png │ ├── logo.svg │ └── prompt-orchestration.png ├── airflow │ └── README.md ├── api │ ├── README.md │ ├── cloudbuild_and_run.md │ ├── google_firestore.md │ ├── setup_slack_bot.md │ └── static │ │ ├── 1-create-new-app.png │ │ ├── 2-create-an-app-from-scratch.png │ │ ├── 3-name-app.png │ │ ├── 4-app-home-page.png │ │ ├── 5-credentials.png │ │ ├── 6-redirect-url.png │ │ ├── 7-scope.png │ │ ├── 8-event-subscription.png │ │ └── 9-slack-install.png ├── conf.py ├── images │ ├── cloud_build_config1.png │ ├── cloud_build_config2.png │ ├── cloud_build_config3.png │ ├── cloud_build_setting.png │ ├── cloud_build_trigger.png │ ├── cloud_run_add_env.png │ ├── cloud_run_build_config.png │ ├── cloud_run_ci_deploy.png │ ├── cloud_run_config1.png │ ├── cloud_run_create.png │ ├── cloud_run_edit.png │ └── cloud_run_home.png ├── index.md ├── local_development.md ├── make.bat ├── monitoring.md └── ui │ └── README.md ├── poetry.lock ├── pyproject.toml ├── scripts └── local_dev.py ├── tasks ├── __init__.py ├── airflow.py ├── api.py ├── common.py ├── docs.py └── ui.py ├── tests ├── __init__.py ├── api │ ├── __init__.py │ └── ask_astro │ │ ├── __init__.py │ │ ├── chains │ │ ├── __init__.py │ │ └── test_answer_questions.py │ │ ├── clients │ │ ├── __init__.py │ │ ├── test_firestore.py │ │ └── test_langsmith_.py │ │ ├── models │ │ ├── __init__.py │ │ └── test_request.py │ │ └── rest │ │ ├── __init__.py │ │ └── controllers │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_get_requests.py │ │ ├── test_health_status.py │ │ ├── test_list_recent_requests.py │ │ ├── test_post_request.py │ │ └── test_submit_feedback.py └── test_nothing.py └── ui ├── .gitignore ├── .npmrc ├── components.json ├── package-lock.json ├── package.json ├── postcss.config.cjs ├── src ├── app.d.ts ├── app.html ├── app.postcss ├── lib │ ├── components │ │ ├── custom │ │ │ ├── ConsentManager.svelte │ │ │ ├── InfoIcon.svelte │ │ │ ├── MessageCard.svelte │ │ │ ├── RequestCard.svelte │ │ │ ├── SourceCard.svelte │ │ │ ├── StarsIcon.svelte │ │ │ └── XIcon.svelte │ │ └── ui │ │ │ ├── alert │ │ │ ├── alert-description.svelte │ │ │ ├── alert-title.svelte │ │ │ ├── alert.svelte │ │ │ └── index.ts │ │ │ ├── button │ │ │ ├── button.svelte │ │ │ └── index.ts │ │ │ ├── card │ │ │ ├── card-content.svelte │ │ │ ├── card-description.svelte │ │ │ ├── card-footer.svelte │ │ │ ├── card-header.svelte │ │ │ ├── card-title.svelte │ │ │ ├── card.svelte │ │ │ └── index.ts │ │ │ ├── collapsible │ │ │ ├── collapsible-content.svelte │ │ │ └── index.ts │ │ │ ├── hover-card │ │ │ ├── hover-card-content.svelte │ │ │ └── index.ts │ │ │ ├── input │ │ │ ├── index.ts │ │ │ └── input.svelte │ │ │ ├── progress │ │ │ ├── index.ts │ │ │ └── progress.svelte │ │ │ └── skeleton │ │ │ ├── index.ts │ │ │ └── skeleton.svelte │ └── utils.ts └── routes │ ├── +error.svelte │ ├── +layout.svelte │ ├── +page.server.ts │ ├── +page.svelte │ ├── requests │ └── [request_id] │ │ ├── +page.server.ts │ │ └── +page.svelte │ └── styles.css ├── static ├── analyticsInit.js ├── ask_astro.jpg ├── consentManagerConfig.js ├── cornerGraphic.svg ├── favicon.svg ├── fonts │ ├── IntelOneMono-Light.ttf │ └── Inter-VariableFont_slnt,wght.ttf ├── robots.txt ├── starsBackground.png └── starsBackground.svg ├── svelte.config.js ├── tailwind.config.js ├── tsconfig.json └── vite.config.ts /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 12 | 13 | **Version** 14 | * OS: 15 | 16 | **To Reproduce** 17 | Steps to reproduce the behavior: 18 | 24 | 25 | **Expected behavior** 26 | 27 | 28 | **Screenshots** 29 | 30 | 31 | 32 | **Additional context** 33 | 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | contact_links: 3 | - name: Ask a question or get support (Forum) 4 | url: https://github.com/astronomer/ask-astro/discussions/ 5 | about: Ask a question or request support for using this Ask Astro 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request for Ask Astro 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: feature 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Please describe the feature you'd like to see** 11 | 15 | 16 | **Describe the solution you'd like** 17 | 18 | 19 | **Are there any alternatives to this feature?** 20 | 21 | 22 | **Additional context** 23 | 24 | 25 | **Acceptance Criteria** 26 | 27 | - [ ] All checks and tests in the CI should pass 28 | - [ ] Unit tests 29 | - [ ] Integration tests (if the feature relates to a new database or external service) 30 | - [ ] Example DAG 31 | - [ ] Docstrings in [reStructuredText](https://peps.python.org/pep-0287/) for each of methods, classes, functions and module-level attributes (including Example DAG on how it should be used) 32 | - [ ] Exception handling in case of errors 33 | - [ ] Logging (are we exposing useful information to the user? e.g. source and destination) 34 | - [ ] Improve the documentation (README, Sphinx, and any other relevant) 35 | - [ ] How to use Guide for the feature ([example](https://airflow.apache.org/docs/apache-airflow-providers-postgres/stable/operators/postgres_operator_howto_guide.html)) 36 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | default_stages: [commit, push] 3 | default_language_version: 4 | python: python3 # force all unspecified python hooks to run python3 5 | minimum_pre_commit_version: "1.20.0" 6 | exclude: ^.*poetry.lock|ui/package-lock.json$ 7 | repos: 8 | - repo: meta 9 | hooks: 10 | - id: identity 11 | - id: check-hooks-apply 12 | 13 | - repo: https://github.com/pre-commit/pre-commit-hooks 14 | rev: v4.5.0 15 | hooks: 16 | - id: check-merge-conflict 17 | - id: check-yaml 18 | - id: debug-statements 19 | types: [python] 20 | - id: end-of-file-fixer 21 | - id: mixed-line-ending 22 | - id: trailing-whitespace 23 | exclude: ^docs/_static/images/.*$ 24 | 25 | - repo: https://github.com/pre-commit/pygrep-hooks 26 | rev: v1.10.0 27 | hooks: 28 | - id: python-no-log-warn 29 | types: [python] 30 | - id: python-check-mock-methods 31 | types: [python] 32 | 33 | - repo: https://github.com/Lucas-C/pre-commit-hooks 34 | rev: v1.5.4 35 | hooks: 36 | - id: forbid-crlf 37 | - id: remove-crlf 38 | - id: forbid-tabs 39 | exclude: ^mk/|^docs/Makefile|^Makefile$ 40 | - id: remove-tabs 41 | exclude: ^mk/|^docs/Makefile|^Makefile$ 42 | 43 | - repo: https://github.com/psf/black 44 | rev: 23.10.1 45 | hooks: 46 | - id: black 47 | args: ["--config", "pyproject.toml"] 48 | types: [python] 49 | 50 | - repo: https://github.com/asottile/blacken-docs 51 | rev: 1.16.0 52 | hooks: 53 | - id: blacken-docs 54 | alias: black 55 | additional_dependencies: [black>=22.10.0] 56 | types: [markdown] 57 | 58 | - repo: https://github.com/astral-sh/ruff-pre-commit 59 | rev: 'v0.1.6' 60 | hooks: 61 | # Run the linter. 62 | - id: ruff 63 | args: [ --fix ] 64 | # Run the formatter. 65 | - id: ruff-format 66 | 67 | - repo: https://github.com/codespell-project/codespell 68 | rev: v2.2.6 69 | hooks: 70 | - id: codespell 71 | name: Run codespell to check for common misspellings in files 72 | language: python 73 | types: [text] 74 | exclude: ^mk/.*\.mk$|^tests/modified_constraint_file.txt$ 75 | 76 | - repo: https://github.com/asottile/pyupgrade 77 | rev: v3.15.0 78 | hooks: 79 | - id: pyupgrade 80 | args: [--py39-plus] 81 | types: [python] 82 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | 4 | build: 5 | os: "ubuntu-22.04" 6 | tools: 7 | python: "3.11" 8 | jobs: 9 | post_create_environment: 10 | - python -m pip install poetry 11 | - python -m poetry config virtualenvs.create false 12 | post_install: 13 | # Install dependencies with 'docs' dependency group 14 | # https://python-poetry.org/docs/managing-dependencies/#dependency-groups 15 | - poetry install --only=docs,dev 16 | 17 | sphinx: 18 | configuration: docs/conf.py 19 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @Lee-W @pankajastro @sunank200 @davidgxue @jlaneve 2 | -------------------------------------------------------------------------------- /airflow/.astro/config.yaml: -------------------------------------------------------------------------------- 1 | project: 2 | name: airflow-ask-astro 3 | -------------------------------------------------------------------------------- /airflow/.astro/test_dag_integrity_default.py: -------------------------------------------------------------------------------- 1 | """Test the validity of all DAGs. **USED BY DEV PARSE COMMAND DO NOT EDIT**""" 2 | import logging 3 | import os 4 | from contextlib import contextmanager 5 | 6 | import pytest 7 | 8 | from airflow.hooks.base import BaseHook 9 | from airflow.models import Connection, DagBag, Variable 10 | 11 | # The following code patches errors caused by missing OS Variables, Airflow Connections, and Airflow Variables 12 | 13 | 14 | # =========== MONKEYPATCH BaseHook.get_connection() =========== 15 | def basehook_get_connection_monkeypatch(key: str, *args, **kwargs): 16 | print(f"Attempted to fetch connection during parse returning an empty Connection object for {key}") 17 | return Connection(key) 18 | 19 | 20 | BaseHook.get_connection = basehook_get_connection_monkeypatch 21 | # # =========== /MONKEYPATCH BASEHOOK.GET_CONNECTION() =========== 22 | 23 | 24 | # =========== MONKEYPATCH OS.GETENV() =========== 25 | def os_getenv_monkeypatch(key: str, *args, default=None, **kwargs): 26 | print(f"Attempted to fetch os environment variable during parse, returning a mocked value for {key}") 27 | if key == "JENKINS_HOME" and default is None: # fix https://github.com/astronomer/astro-cli/issues/601 28 | return None 29 | if default: 30 | return default 31 | return "NON_DEFAULT_OS_ENV_VALUE" 32 | 33 | 34 | os.getenv = os_getenv_monkeypatch 35 | # # =========== /MONKEYPATCH OS.GETENV() =========== 36 | 37 | # =========== MONKEYPATCH VARIABLE.GET() =========== 38 | 39 | 40 | class magic_dict(dict): 41 | def __init__(self, *args, **kwargs): 42 | self.update(*args, **kwargs) 43 | 44 | def __getitem__(self, key): 45 | return {}.get(key, "MOCKED_KEY_VALUE") 46 | 47 | 48 | def variable_get_monkeypatch(key: str, default_var=None, deserialize_json=False): 49 | print(f"Attempted to get Variable value during parse, returning a mocked value for {key}") 50 | 51 | if default_var: 52 | return default_var 53 | if deserialize_json: 54 | return magic_dict() 55 | return "NON_DEFAULT_MOCKED_VARIABLE_VALUE" 56 | 57 | 58 | Variable.get = variable_get_monkeypatch 59 | # # =========== /MONKEYPATCH VARIABLE.GET() =========== 60 | 61 | 62 | @contextmanager 63 | def suppress_logging(namespace): 64 | """ 65 | Suppress logging within a specific namespace to keep tests "clean" during build 66 | """ 67 | logger = logging.getLogger(namespace) 68 | old_value = logger.disabled 69 | logger.disabled = True 70 | try: 71 | yield 72 | finally: 73 | logger.disabled = old_value 74 | 75 | 76 | def get_import_errors(): 77 | """ 78 | Generate a tuple for import errors in the dag bag 79 | """ 80 | with suppress_logging("airflow"): 81 | dag_bag = DagBag(include_examples=False) 82 | 83 | def strip_path_prefix(path): 84 | return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) 85 | 86 | # we prepend "(None,None)" to ensure that a test object is always created even if its a no op. 87 | return [(None, None)] + [(strip_path_prefix(k), v.strip()) for k, v in dag_bag.import_errors.items()] 88 | 89 | 90 | @pytest.mark.parametrize("rel_path,rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]) 91 | def test_file_imports(rel_path, rv): 92 | """Test for import errors on a file""" 93 | if rel_path and rv: # Make sure our no op test doesn't raise an error 94 | raise Exception(f"{rel_path} failed to import with message \n {rv}") 95 | -------------------------------------------------------------------------------- /airflow/.dockerignore: -------------------------------------------------------------------------------- 1 | astro 2 | .git 3 | .env 4 | airflow_settings.yaml 5 | logs/ 6 | -------------------------------------------------------------------------------- /airflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM quay.io/astronomer/astro-runtime:10.1.0 2 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-forum-load.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | blog_cutoff_date = datetime.date(2022, 1, 1) 15 | 16 | default_args = {"retries": 3, "retry_delay": 30} 17 | 18 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 19 | 20 | 21 | @task 22 | def get_astro_forum_content(): 23 | from include.tasks.extract.astro_forum_docs import get_forum_df 24 | 25 | return get_forum_df() 26 | 27 | 28 | @dag( 29 | schedule_interval=schedule_interval, 30 | start_date=datetime.datetime(2023, 9, 27), 31 | catchup=False, 32 | is_paused_upon_creation=True, 33 | default_args=default_args, 34 | on_failure_callback=send_failure_notification( 35 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 36 | ), 37 | ) 38 | def ask_astro_load_astro_forum(): 39 | from include.tasks import chunking_utils 40 | 41 | split_docs = task(chunking_utils.split_html).expand(dfs=[get_astro_forum_content()]) 42 | 43 | _import_data = WeaviateDocumentIngestOperator.partial( 44 | class_name=WEAVIATE_CLASS, 45 | existing="replace", 46 | document_column="docLink", 47 | batch_config_params={"batch_size": 7, "dynamic": False}, 48 | verbose=True, 49 | conn_id=_WEAVIATE_CONN_ID, 50 | task_id="WeaviateDocumentIngestOperator", 51 | ).expand(input_data=[split_docs]) 52 | 53 | 54 | ask_astro_load_astro_forum() 55 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-airflow-docs.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | 15 | airflow_docs_base_url = "https://airflow.apache.org/docs/" 16 | 17 | default_args = {"retries": 3, "retry_delay": 30} 18 | 19 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 20 | 21 | 22 | @dag( 23 | schedule_interval=schedule_interval, 24 | start_date=datetime(2023, 9, 27), 25 | catchup=False, 26 | is_paused_upon_creation=True, 27 | default_args=default_args, 28 | on_failure_callback=send_failure_notification( 29 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 30 | ), 31 | ) 32 | def ask_astro_load_airflow_docs(): 33 | """ 34 | This DAG performs incremental load for any new Airflow docs. Initial load via ask_astro_load_bulk imported 35 | data from a point-in-time data capture. By using the upsert logic of the weaviate_import decorator 36 | any existing documents that have been updated will be removed and re-added. 37 | """ 38 | from include.tasks import chunking_utils 39 | from include.tasks.extract import airflow_docs 40 | 41 | extracted_airflow_docs = task(chunking_utils.split_html).expand( 42 | dfs=[airflow_docs.extract_airflow_docs(docs_base_url=airflow_docs_base_url)] 43 | ) 44 | 45 | _import_data = WeaviateDocumentIngestOperator.partial( 46 | class_name=WEAVIATE_CLASS, 47 | existing="replace", 48 | document_column="docLink", 49 | batch_config_params={"batch_size": 7, "dynamic": False}, 50 | verbose=True, 51 | conn_id=_WEAVIATE_CONN_ID, 52 | task_id="WeaviateDocumentIngestOperator", 53 | ).expand(input_data=[extracted_airflow_docs]) 54 | 55 | 56 | ask_astro_load_airflow_docs() 57 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-astro-cli.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | default_args = {"retries": 3, "retry_delay": 30} 15 | 16 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 17 | 18 | 19 | @dag( 20 | schedule_interval=schedule_interval, 21 | start_date=datetime.datetime(2023, 9, 27), 22 | catchup=False, 23 | is_paused_upon_creation=True, 24 | default_args=default_args, 25 | on_failure_callback=send_failure_notification( 26 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 27 | ), 28 | ) 29 | def ask_astro_load_astro_cli_docs(): 30 | """ 31 | This DAG performs incremental load for any new docs. Initial load via ask_astro_load_bulk imported 32 | data from a point-in-time data capture. By using the upsert logic of the weaviate_import decorator 33 | any existing documents that have been updated will be removed and re-added. 34 | """ 35 | from include.tasks import chunking_utils 36 | from include.tasks.extract import astro_cli_docs 37 | 38 | extract_astro_cli_docs = task(astro_cli_docs.extract_astro_cli_docs)() 39 | split_md_docs = task(chunking_utils.split_html).expand(dfs=[extract_astro_cli_docs]) 40 | 41 | _import_data = WeaviateDocumentIngestOperator.partial( 42 | class_name=WEAVIATE_CLASS, 43 | existing="replace", 44 | document_column="docLink", 45 | batch_config_params={"batch_size": 7, "dynamic": False}, 46 | verbose=True, 47 | conn_id=_WEAVIATE_CONN_ID, 48 | task_id="WeaviateDocumentIngestOperator", 49 | ).expand(input_data=[split_md_docs]) 50 | 51 | 52 | ask_astro_load_astro_cli_docs() 53 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-astro-sdk.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | blog_cutoff_date = datetime.date(2023, 1, 19) 15 | 16 | default_args = {"retries": 3, "retry_delay": 30} 17 | 18 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 19 | 20 | 21 | @task 22 | def get_astro_sdk_content(): 23 | from include.tasks.extract.astro_sdk_docs import extract_astro_sdk_docs 24 | 25 | dfs = extract_astro_sdk_docs() 26 | return dfs 27 | 28 | 29 | @dag( 30 | schedule_interval=schedule_interval, 31 | start_date=datetime.datetime(2023, 9, 27), 32 | catchup=False, 33 | is_paused_upon_creation=True, 34 | default_args=default_args, 35 | on_failure_callback=send_failure_notification( 36 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 37 | ), 38 | ) 39 | def ask_astro_load_astro_sdk(): 40 | from include.tasks import chunking_utils 41 | 42 | split_docs = task(chunking_utils.split_html).expand(dfs=[get_astro_sdk_content()]) 43 | 44 | _import_data = WeaviateDocumentIngestOperator.partial( 45 | class_name=WEAVIATE_CLASS, 46 | existing="replace", 47 | document_column="docLink", 48 | batch_config_params={"batch_size": 7, "dynamic": False}, 49 | verbose=True, 50 | conn_id=_WEAVIATE_CONN_ID, 51 | task_id="WeaviateDocumentIngestOperator", 52 | ).expand(input_data=[split_docs]) 53 | 54 | 55 | ask_astro_load_astro_sdk() 56 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-astronomer-docs.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | 15 | default_args = {"retries": 3, "retry_delay": 30} 16 | 17 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 18 | 19 | 20 | @dag( 21 | schedule_interval=schedule_interval, 22 | start_date=datetime.datetime(2023, 9, 27), 23 | catchup=False, 24 | is_paused_upon_creation=True, 25 | default_args=default_args, 26 | on_failure_callback=send_failure_notification( 27 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 28 | ), 29 | ) 30 | def ask_astro_load_astronomer_docs(): 31 | """ 32 | This DAG performs incremental load for any new docs in astronomer docs. 33 | """ 34 | from include.tasks import chunking_utils 35 | from include.tasks.extract.astro_docs import extract_astro_docs 36 | 37 | astro_docs = task(extract_astro_docs)() 38 | 39 | split_html_docs = task(chunking_utils.split_html).expand(dfs=[astro_docs]) 40 | 41 | _import_data = WeaviateDocumentIngestOperator.partial( 42 | class_name=WEAVIATE_CLASS, 43 | existing="replace", 44 | document_column="docLink", 45 | batch_config_params={"batch_size": 7, "dynamic": False}, 46 | verbose=True, 47 | conn_id=_WEAVIATE_CONN_ID, 48 | task_id="WeaviateDocumentIngestOperator", 49 | ).expand(input_data=[split_html_docs]) 50 | 51 | 52 | ask_astro_load_astronomer_docs() 53 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-astronomer-provider.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | blog_cutoff_date = datetime.date(2023, 1, 19) 15 | 16 | default_args = {"retries": 3, "retry_delay": 30} 17 | 18 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 19 | 20 | 21 | @task 22 | def get_provider_content(): 23 | from include.tasks.extract.astronomer_providers_docs import extract_provider_docs 24 | 25 | dfs = extract_provider_docs() 26 | return dfs 27 | 28 | 29 | @dag( 30 | schedule_interval=schedule_interval, 31 | start_date=datetime.datetime(2023, 9, 27), 32 | catchup=False, 33 | is_paused_upon_creation=True, 34 | default_args=default_args, 35 | on_failure_callback=send_failure_notification( 36 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 37 | ), 38 | ) 39 | def ask_astro_load_astronomer_providers(): 40 | """ 41 | This DAG performs incremental load for any new docs. Initial load via ask_astro_load_bulk imported 42 | data from a point-in-time data capture. By using the upsert logic of the weaviate_import decorator 43 | any existing documents that have been updated will be removed and re-added. 44 | """ 45 | 46 | from include.tasks import chunking_utils 47 | 48 | split_docs = task(chunking_utils.split_html).expand(dfs=[get_provider_content()]) 49 | 50 | _import_data = WeaviateDocumentIngestOperator.partial( 51 | class_name=WEAVIATE_CLASS, 52 | existing="replace", 53 | document_column="docLink", 54 | batch_config_params={"batch_size": 7, "dynamic": False}, 55 | verbose=True, 56 | conn_id=_WEAVIATE_CONN_ID, 57 | task_id="WeaviateDocumentIngestOperator", 58 | ).expand(input_data=[split_docs]) 59 | 60 | 61 | ask_astro_load_astronomer_providers() 62 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-blogs.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | blog_cutoff_date = datetime.date(2023, 1, 19) 15 | 16 | default_args = {"retries": 3, "retry_delay": 30} 17 | 18 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 19 | 20 | 21 | @dag( 22 | schedule_interval=schedule_interval, 23 | start_date=datetime.datetime(2023, 9, 27), 24 | catchup=False, 25 | is_paused_upon_creation=True, 26 | default_args=default_args, 27 | on_failure_callback=send_failure_notification( 28 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 29 | ), 30 | ) 31 | def ask_astro_load_blogs(): 32 | """ 33 | This DAG performs incremental load for any new docs. Initial load via ask_astro_load_bulk imported 34 | data from a point-in-time data capture. By using the upsert logic of the weaviate_import decorator 35 | any existing documents that have been updated will be removed and re-added. 36 | """ 37 | from include.tasks import chunking_utils 38 | from include.tasks.extract import blogs 39 | 40 | blogs_docs = task(blogs.extract_astro_blogs)(blog_cutoff_date=blog_cutoff_date) 41 | 42 | split_md_docs = task(chunking_utils.split_markdown).expand(dfs=[blogs_docs]) 43 | 44 | _import_data = WeaviateDocumentIngestOperator.partial( 45 | class_name=WEAVIATE_CLASS, 46 | existing="replace", 47 | document_column="docLink", 48 | batch_config_params={"batch_size": 7, "dynamic": False}, 49 | verbose=True, 50 | conn_id=_WEAVIATE_CONN_ID, 51 | task_id="WeaviateDocumentIngestOperator", 52 | ).expand(input_data=[split_md_docs]) 53 | 54 | 55 | ask_astro_load_blogs() 56 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-cosmos-docs.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | 4 | from include.tasks.extract.cosmos_docs import extract_cosmos_docs 5 | from include.utils.slack import send_failure_notification 6 | 7 | from airflow.decorators import dag, task 8 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 9 | 10 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 11 | 12 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 13 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 14 | 15 | 16 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 17 | 18 | 19 | @dag( 20 | schedule=schedule_interval, 21 | start_date=datetime(2023, 9, 27), 22 | catchup=False, 23 | is_paused_upon_creation=True, 24 | default_args={"retries": 3, "retry_delay": 30}, 25 | on_failure_callback=send_failure_notification( 26 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 27 | ), 28 | ) 29 | def ask_astro_load_cosmos_docs(): 30 | """ 31 | This DAG performs incremental load for any new Cosmos docs. Initial load via ask_astro_load_bulk imported 32 | data from a point-in-time data capture. By using the upsert logic of the weaviate_import decorator 33 | any existing documents that have been updated will be removed and re-added. 34 | """ 35 | 36 | from include.tasks import chunking_utils 37 | 38 | split_docs = task(chunking_utils.split_html).expand(dfs=[extract_cosmos_docs()]) 39 | 40 | _import_data = WeaviateDocumentIngestOperator.partial( 41 | class_name=WEAVIATE_CLASS, 42 | existing="replace", 43 | document_column="docLink", 44 | batch_config_params={"batch_size": 1000}, 45 | verbose=True, 46 | conn_id=_WEAVIATE_CONN_ID, 47 | task_id="load_cosmos_docs_to_weaviate", 48 | ).expand(input_data=[split_docs]) 49 | 50 | 51 | ask_astro_load_cosmos_docs() 52 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-github.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | _GITHUB_CONN_ID = "github_ro" 13 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 14 | _GITHUB_ISSUE_CUTOFF_DATE = os.environ.get("GITHUB_ISSUE_CUTOFF_DATE", "2022-1-1") 15 | 16 | markdown_docs_sources = [ 17 | {"doc_dir": "", "repo_base": "OpenLineage/docs"}, 18 | {"doc_dir": "", "repo_base": "OpenLineage/OpenLineage"}, 19 | ] 20 | 21 | issues_docs_sources = [ 22 | "apache/airflow", 23 | ] 24 | 25 | default_args = {"retries": 3, "retry_delay": 30} 26 | 27 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 28 | 29 | 30 | @dag( 31 | schedule_interval=schedule_interval, 32 | start_date=datetime.datetime(2023, 9, 27), 33 | catchup=False, 34 | is_paused_upon_creation=True, 35 | default_args=default_args, 36 | on_failure_callback=send_failure_notification( 37 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 38 | ), 39 | ) 40 | def ask_astro_load_github(): 41 | """ 42 | This DAG performs incremental load for any new docs. Initial load via ask_astro_load_bulk imported 43 | data from a point-in-time data capture. By using the upsert logic of the weaviate_import decorator 44 | any existing documents that have been updated will be removed and re-added. 45 | """ 46 | from include.tasks import chunking_utils 47 | from include.tasks.extract import github 48 | 49 | md_docs = ( 50 | task(github.extract_github_markdown) 51 | .partial(github_conn_id=_GITHUB_CONN_ID) 52 | .expand(source=markdown_docs_sources) 53 | ) 54 | 55 | split_md_docs = task(chunking_utils.split_markdown).expand(dfs=[md_docs]) 56 | 57 | _import_data = WeaviateDocumentIngestOperator.partial( 58 | class_name=WEAVIATE_CLASS, 59 | existing="replace", 60 | document_column="docLink", 61 | batch_config_params={"batch_size": 7, "dynamic": False}, 62 | verbose=True, 63 | conn_id=_WEAVIATE_CONN_ID, 64 | task_id="WeaviateDocumentIngestOperator", 65 | ).expand(input_data=[split_md_docs]) 66 | 67 | 68 | ask_astro_load_github() 69 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-registry.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | default_args = {"retries": 3, "retry_delay": 30} 15 | 16 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 17 | 18 | 19 | @dag( 20 | schedule_interval=schedule_interval, 21 | start_date=datetime(2023, 9, 27), 22 | catchup=False, 23 | is_paused_upon_creation=True, 24 | default_args=default_args, 25 | on_failure_callback=send_failure_notification( 26 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 27 | ), 28 | ) 29 | def ask_astro_load_registry(): 30 | """ 31 | This DAG performs incremental load for any new docs. Initial load via ask_astro_load_bulk imported 32 | data from a point-in-time data capture. By using the upsert logic of the weaviate_import decorator 33 | any existing documents that have been updated will be removed and re-added. 34 | """ 35 | from include.tasks import chunking_utils 36 | from include.tasks.extract import registry 37 | 38 | registry_cells_docs = task(registry.extract_astro_registry_cell_types)() 39 | 40 | registry_dags_docs = task(registry.extract_astro_registry_dags)() 41 | 42 | split_md_docs = task(chunking_utils.split_markdown).expand(dfs=[registry_cells_docs]) 43 | 44 | split_code_docs = task(chunking_utils.split_python).expand(dfs=[registry_dags_docs]) 45 | 46 | _import_data = WeaviateDocumentIngestOperator.partial( 47 | class_name=WEAVIATE_CLASS, 48 | existing="replace", 49 | document_column="docLink", 50 | batch_config_params={"batch_size": 7, "dynamic": False}, 51 | verbose=True, 52 | conn_id=_WEAVIATE_CONN_ID, 53 | task_id="WeaviateDocumentIngestOperator", 54 | ).expand(input_data=[split_md_docs, split_code_docs]) 55 | 56 | 57 | ask_astro_load_registry() 58 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-slack.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | slack_channel_sources = [ 15 | { 16 | "channel_name": "troubleshooting", 17 | "channel_id": "CCQ7EGB1P", 18 | "team_id": "TCQ18L22Z", 19 | "team_name": "Airflow Slack Community", 20 | "slack_api_conn_id": "slack_api_ro", 21 | } 22 | ] 23 | 24 | default_args = {"retries": 3, "retry_delay": 30} 25 | 26 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 27 | 28 | 29 | @dag( 30 | schedule_interval=schedule_interval, 31 | start_date=datetime(2023, 9, 27), 32 | catchup=False, 33 | is_paused_upon_creation=True, 34 | default_args=default_args, 35 | on_failure_callback=send_failure_notification( 36 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 37 | ), 38 | ) 39 | def ask_astro_load_slack(): 40 | """ 41 | This DAG performs incremental load for any new slack threads. The slack archive is a point-in-time capture. This 42 | DAG should run nightly to capture threads between archive periods. By using the upsert logic of the 43 | weaviate_import decorator any existing documents that have been updated will be removed and re-added. 44 | """ 45 | from include.tasks import chunking_utils 46 | from include.tasks.extract import slack 47 | 48 | slack_docs = task(slack.extract_slack).expand(source=slack_channel_sources) 49 | 50 | split_md_docs = task(chunking_utils.split_markdown).expand(dfs=[slack_docs]) 51 | 52 | _import_data = WeaviateDocumentIngestOperator.partial( 53 | class_name=WEAVIATE_CLASS, 54 | existing="replace", 55 | document_column="docLink", 56 | batch_config_params={"batch_size": 7, "dynamic": False}, 57 | verbose=True, 58 | conn_id=_WEAVIATE_CONN_ID, 59 | task_id="WeaviateDocumentIngestOperator", 60 | ).expand(input_data=[split_md_docs]) 61 | 62 | 63 | ask_astro_load_slack() 64 | -------------------------------------------------------------------------------- /airflow/dags/ingestion/ask-astro-load-stackoverflow.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | 4 | from include.utils.slack import send_failure_notification 5 | 6 | from airflow.decorators import dag, task 7 | from airflow.providers.weaviate.operators.weaviate import WeaviateDocumentIngestOperator 8 | 9 | ask_astro_env = os.environ.get("ASK_ASTRO_ENV", "dev") 10 | 11 | _WEAVIATE_CONN_ID = f"weaviate_{ask_astro_env}" 12 | WEAVIATE_CLASS = os.environ.get("WEAVIATE_CLASS", "DocsDev") 13 | 14 | stackoverflow_cutoff_date = os.environ.get("STACKOVERFLOW_CUTOFF_DATE", "2021-09-01") 15 | 16 | stackoverflow_tags = [ 17 | "airflow", 18 | ] 19 | 20 | default_args = {"retries": 3, "retry_delay": 30} 21 | 22 | schedule_interval = os.environ.get("INGESTION_SCHEDULE", "0 5 * * 2") if ask_astro_env == "prod" else None 23 | 24 | 25 | @dag( 26 | schedule_interval=schedule_interval, 27 | start_date=datetime(2023, 9, 27), 28 | catchup=False, 29 | is_paused_upon_creation=True, 30 | default_args=default_args, 31 | on_failure_callback=send_failure_notification( 32 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 33 | ), 34 | ) 35 | def ask_astro_load_stackoverflow(): 36 | """ 37 | This DAG performs incremental load for any new docs. Initial load via ask_astro_load_bulk imported 38 | data from a point-in-time data capture. By using the upsert logic of the weaviate_import decorator 39 | any existing documents that have been updated will be removed and re-added. 40 | """ 41 | from include.tasks import chunking_utils 42 | from include.tasks.extract import stack_overflow 43 | 44 | stack_overflow_docs = ( 45 | task(stack_overflow.extract_stack_overflow) 46 | .partial(stackoverflow_cutoff_date=stackoverflow_cutoff_date) 47 | .expand(tag=stackoverflow_tags) 48 | ) 49 | 50 | split_md_docs = task(chunking_utils.split_markdown).expand(dfs=[stack_overflow_docs]) 51 | 52 | _import_data = WeaviateDocumentIngestOperator.partial( 53 | class_name=WEAVIATE_CLASS, 54 | existing="replace", 55 | document_column="docLink", 56 | batch_config_params={"batch_size": 7, "dynamic": False}, 57 | verbose=True, 58 | conn_id=_WEAVIATE_CONN_ID, 59 | task_id="WeaviateDocumentIngestOperator", 60 | ).expand(input_data=[split_md_docs]) 61 | 62 | 63 | ask_astro_load_stackoverflow() 64 | -------------------------------------------------------------------------------- /airflow/dags/monitor/external_trigger.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from include.utils.slack import send_failure_notification 4 | 5 | from airflow.decorators import dag 6 | from airflow.operators.trigger_dagrun import TriggerDagRunOperator 7 | 8 | 9 | @dag( 10 | schedule_interval="@daily", 11 | start_date=datetime(2023, 9, 27), 12 | catchup=False, 13 | is_paused_upon_creation=True, 14 | on_failure_callback=send_failure_notification( 15 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 16 | ), 17 | ) 18 | def external_trigger_monitoring_dag(): 19 | TriggerDagRunOperator( 20 | task_id="run_monitoring_dag", 21 | trigger_dag_id="monitoring_dag", 22 | ) 23 | 24 | 25 | external_trigger_monitoring_dag() 26 | -------------------------------------------------------------------------------- /airflow/dags/monitor/monitor_ingestion_dags.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | import os 5 | from datetime import datetime 6 | from typing import Any 7 | 8 | from include.utils.slack import send_failure_notification 9 | 10 | from airflow.decorators import dag, task 11 | from airflow.models import DagBag 12 | from airflow.providers.slack.operators.slack_webhook import SlackWebhookOperator 13 | from airflow.utils.cli import process_subdir 14 | 15 | logger = logging.getLogger("airflow.task") 16 | 17 | slack_webhook_conn = os.environ.get("SLACK_WEBHOOK_CONN", "slack_webhook_default") 18 | 19 | 20 | ingestion_dags = [ 21 | "ask_astro_load_airflow_docs", 22 | "ask_astro_load_astro_cli_docs", 23 | "ask_astro_load_astronomer_providers", 24 | "ask_astro_load_astro_sdk", 25 | "ask_astro_load_blogs", 26 | "ask_astro_load_bulk", 27 | "ask_astro_load_github", 28 | "ask_astro_load_registry", 29 | # "ask_astro_load_slack", 30 | "ask_astro_load_stackoverflow", 31 | "ask_astro_load_astronomer_docs", 32 | ] 33 | 34 | 35 | @task 36 | def check_ingestion_dags(**context: Any): 37 | airflow_home = os.environ.get("AIRFLOW_HOME") 38 | dagbag = DagBag(process_subdir(f"{airflow_home}/dags")) 39 | data = [] 40 | for filename, errors in dagbag.import_errors.items(): 41 | data.append({"filepath": filename, "error": errors}) 42 | 43 | if data: 44 | logger.info("************DAG Import Error*************") 45 | logger.error(data) 46 | logger.info("******************************") 47 | message = ":red_circle: Import Error in DAG" 48 | 49 | ingestion_dag_exist = False 50 | if set(ingestion_dags).issubset(set(dagbag.dag_ids)): 51 | ingestion_dag_exist = True 52 | message = ":red_circle: Some Ingestion DAG's are missing" 53 | 54 | if not ingestion_dag_exist or data: 55 | SlackWebhookOperator( 56 | task_id="slack_alert", 57 | slack_webhook_conn_id=slack_webhook_conn, 58 | message=message, 59 | ).execute(context=context) 60 | 61 | 62 | @dag( 63 | schedule_interval="@daily", 64 | start_date=datetime(2023, 9, 27), 65 | catchup=False, 66 | is_paused_upon_creation=True, 67 | on_failure_callback=send_failure_notification( 68 | dag_id="{{ dag.dag_id }}", execution_date="{{ dag_run.execution_date }}" 69 | ), 70 | ) 71 | def monitor_ingestion_dags(): 72 | check_ingestion_dags() 73 | 74 | 75 | monitor_ingestion_dags() 76 | -------------------------------------------------------------------------------- /airflow/docker-compose.override.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | services: 3 | webserver: 4 | ports: 5 | - 8501:8501 6 | networks: 7 | - airflow 8 | weaviate: 9 | image: cr.weaviate.io/semitechnologies/weaviate:1.23.10 10 | command: "--host 0.0.0.0 --port '8081' --scheme http" 11 | volumes: 12 | - ${PWD}/include/weaviate/backup:/var/lib/weaviate/backup 13 | environment: 14 | QUERY_DEFAULTS_LIMIT: 25 15 | AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' 16 | PERSISTENCE_DATA_PATH: '/var/lib/weaviate' 17 | DEFAULT_VECTORIZER_MODULE: 'text2vec-openai' 18 | ENABLE_MODULES: 'text2vec-openai, backup-filesystem, qna-openai, generative-openai, text2vec-cohere, reranker-cohere' 19 | BACKUP_FILESYSTEM_PATH: '/var/lib/weaviate/backup' 20 | CLUSTER_HOSTNAME: 'node1' 21 | ports: 22 | - 8081:8081 23 | networks: 24 | - airflow 25 | -------------------------------------------------------------------------------- /airflow/include/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/__init__.py -------------------------------------------------------------------------------- /airflow/include/data/OpenLineage/OpenLineage/.gitinclude: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/data/OpenLineage/OpenLineage/.gitinclude -------------------------------------------------------------------------------- /airflow/include/data/OpenLineage/docs/.gitinclude: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/data/OpenLineage/docs/.gitinclude -------------------------------------------------------------------------------- /airflow/include/data/apache/airflow/.gitinclude: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/data/apache/airflow/.gitinclude -------------------------------------------------------------------------------- /airflow/include/data/astronomer/blogs/.gitinclude: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/data/astronomer/blogs/.gitinclude -------------------------------------------------------------------------------- /airflow/include/data/astronomer/cosmos/.gitinclude: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/data/astronomer/cosmos/.gitinclude -------------------------------------------------------------------------------- /airflow/include/data/astronomer/docs/.gitinclude: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/data/astronomer/docs/.gitinclude -------------------------------------------------------------------------------- /airflow/include/data/astronomer/registry/.gitinclude: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/data/astronomer/registry/.gitinclude -------------------------------------------------------------------------------- /airflow/include/data/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "classes": [ 3 | { 4 | "class": "Docs", 5 | "description": "Document from github or stackoverflow", 6 | "vectorizer": "text2vec-openai", 7 | "moduleConfig": { 8 | "text2vec-openai": { 9 | "model": "text-embedding-3-small", 10 | "type": "text", 11 | "vectorizeClassName": "False" 12 | }, 13 | "reranker-cohere": { 14 | "model": "rerank-multilingual-v2.0" 15 | }, 16 | "generative-openai": { 17 | "model": "gpt-4" 18 | }, 19 | "qna-openai": { 20 | "model": "text-davinci-003", 21 | "maxTokens": 100, 22 | "temperature": 0.0, 23 | "topP": 1, 24 | "frequencyPenalty": 0.0, 25 | "presencePenalty": 0.0 26 | } 27 | }, 28 | "properties": [ 29 | { 30 | "name": "docSource", 31 | "description": "Type of document ('learn', 'astro', 'airflow', 'stackoverflow', 'code_samples')", 32 | "dataType": ["text"], 33 | "moduleConfig": { 34 | "text2vec-openai": { 35 | "skip": "False", 36 | "vectorizePropertyName": "False" 37 | } 38 | } 39 | }, 40 | { 41 | "name": "docLink", 42 | "description": "The url of source data", 43 | "dataType": ["text"], 44 | "tokenization": "field", 45 | "moduleConfig": { 46 | "text2vec-openai": { 47 | "skip": "True", 48 | "vectorizePropertyName": "False" 49 | } 50 | } 51 | }, 52 | { 53 | "name": "content", 54 | "description": "Document content", 55 | "dataType": ["text"], 56 | "tokenization": "word", 57 | "moduleConfig": { 58 | "text2vec-openai": { 59 | "skip": "False", 60 | "vectorizePropertyName": "False" 61 | } 62 | } 63 | }, 64 | { 65 | "name": "sha", 66 | "description": "sha digest of content for checking changes", 67 | "dataType": ["text"], 68 | "moduleConfig": { 69 | "text2vec-openai": { 70 | "skip": "True", 71 | "vectorizePropertyName": "False" 72 | } 73 | } 74 | } 75 | ] 76 | } 77 | ] 78 | } 79 | -------------------------------------------------------------------------------- /airflow/include/data/slack/.gitinclude: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/data/slack/.gitinclude -------------------------------------------------------------------------------- /airflow/include/data/stack_overflow/base.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/data/stack_overflow/base.parquet -------------------------------------------------------------------------------- /airflow/include/streamlit/combine_docs_chat_prompt.txt: -------------------------------------------------------------------------------- 1 | You are Ask Astro, a friendy and helpful bot. 2 | Only answer questions related to Astronomer, the Astro platform and Apache Airflow. 3 | If you don't know the answer, just say that you don't know and ask the user to contact support, don't try to make up an answer. 4 | Be concise and precise in your answers and do not apologize. 5 | Format your response using Slack syntax. 6 | Surround text with SINGLE * to format it in bold or provide emphasis. Examples: GOOD: *This is bold!*. BAD: **This is bold!**. 7 | Support text with _ to format it in italic. Example: _This is italic._ 8 | Use the • character for unnumbered lists. 9 | Use the ` character to surround inline code. Example: This is a sentence with some `inline *code*` in it. 10 | Use ``` to surround multi-line code blocks. Do not specify a language in code blocks. Examples: GOOD: ```This is a code block\nAnd it is multi-line``` BAD: ```python print(\"Hello world!\")```. 11 | Format links using this format: \\. Examples: GOOD: \\. BAD: [This message *is* a link](https://www.example.com). 12 | 12 character words that start with \"<@U\" and end with \">\" are usernames. Example: <@U024BE7LH>. 13 | Use the following pieces of context to answer the users question. 14 | ---------------- 15 | Context: {{content}} 16 | Question: {question} 17 | Answer: 18 | -------------------------------------------------------------------------------- /airflow/include/streamlit/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/streamlit/logo.png -------------------------------------------------------------------------------- /airflow/include/streamlit/logo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/streamlit/logo1.png -------------------------------------------------------------------------------- /airflow/include/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/tasks/__init__.py -------------------------------------------------------------------------------- /airflow/include/tasks/extract/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/tasks/extract/__init__.py -------------------------------------------------------------------------------- /airflow/include/tasks/extract/airflow_docs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | import urllib.parse 5 | 6 | import pandas as pd 7 | from bs4 import BeautifulSoup 8 | from weaviate.util import generate_uuid5 9 | 10 | from airflow.decorators import task 11 | from include.tasks.extract.utils.html_utils import fetch_page_content, get_internal_links 12 | 13 | 14 | @task 15 | def extract_airflow_docs(docs_base_url: str) -> list[pd.DataFrame]: 16 | """ 17 | This task return all internal url for Airflow docs 18 | """ 19 | 20 | # we exclude the following docs which are not useful and/or too large for easy processing. 21 | exclude_docs = [ 22 | "changelog.html", 23 | "commits.html", 24 | "docs/apache-airflow/stable/release_notes.html", 25 | "docs/stable/release_notes.html", 26 | "_api", 27 | "_modules", 28 | "installing-providers-from-sources.html", 29 | "apache-airflow/1.", 30 | "apache-airflow/2.", 31 | "example", 32 | "cli-and-env-variables-ref.html", 33 | ] 34 | 35 | all_links = get_internal_links(docs_base_url, exclude_literal=exclude_docs) 36 | 37 | docs_url_parts = urllib.parse.urlsplit(docs_base_url) 38 | docs_url_base = f"{docs_url_parts.scheme}://{docs_url_parts.netloc}" 39 | # make sure we didn't accidentally pickup any unrelated links in recursion 40 | old_version_doc_pattern = r"/(\d+\.)*\d+/" 41 | non_doc_links = { 42 | link if (docs_url_base not in link) or re.search(old_version_doc_pattern, link) else "" for link in all_links 43 | } 44 | docs_links = all_links - non_doc_links 45 | 46 | df = pd.DataFrame(docs_links, columns=["docLink"]) 47 | 48 | df["html_content"] = df["docLink"].apply(fetch_page_content) 49 | 50 | df["content"] = df["html_content"].apply( 51 | lambda x: str(BeautifulSoup(x, "html.parser").find(class_="body", role="main")) 52 | ) 53 | df["content"] = df["content"].apply(lambda x: re.sub("¶", "", x)) 54 | 55 | df["sha"] = df["content"].apply(generate_uuid5) 56 | df["docSource"] = "apache/airflow/docs" 57 | df.reset_index(drop=True, inplace=True) 58 | 59 | # column order matters for uuid generation 60 | df = df[["docSource", "sha", "content", "docLink"]] 61 | 62 | return [df] 63 | -------------------------------------------------------------------------------- /airflow/include/tasks/extract/astro_cli_docs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | 5 | import pandas as pd 6 | import requests 7 | from bs4 import BeautifulSoup 8 | from weaviate.util import generate_uuid5 9 | 10 | 11 | def extract_astro_cli_docs() -> list[pd.DataFrame]: 12 | """ 13 | This task downloads Blogs from the astro-cli documentation website and returns a list of pandas dataframes. 14 | Return type is a list in order to map to upstream dynamic tasks. 15 | 16 | The returned data includes the following fields: 17 | 'docSource': 'apache/airflow/docs' 18 | 'docLink': URL for the page 19 | 'content': HTML content of the page 20 | 'sha': A UUID from the other fields 21 | """ 22 | astronomer_base_url = "https://www.astronomer.io/docs" 23 | astro_cli_overview_endpoint = "/astro/cli/overview" 24 | 25 | response = requests.get(f"{astronomer_base_url}{astro_cli_overview_endpoint}") 26 | soup = BeautifulSoup(response.text, "lxml") 27 | astro_cli_links = { 28 | f"{astronomer_base_url}{link.get('href')}" 29 | for link in soup.find_all("a") 30 | if link.get("href").startswith("/docs/astro/cli") 31 | } 32 | 33 | df = pd.DataFrame(astro_cli_links, columns=["docLink"]) 34 | df["html_content"] = df["docLink"].apply(lambda x: requests.get(x).content) 35 | 36 | df["content"] = df["html_content"].apply(lambda x: str(BeautifulSoup(x, "html.parser").find("body"))) 37 | df["content"] = df["content"].apply(lambda x: re.sub("¶", "", x)) 38 | 39 | df["sha"] = df["content"].apply(generate_uuid5) 40 | df["docSource"] = "astronomer/docs/astro-cli" 41 | df.reset_index(drop=True, inplace=True) 42 | 43 | # column order matters for uuid generation 44 | df = df[["docSource", "sha", "content", "docLink"]] 45 | 46 | return [df] 47 | -------------------------------------------------------------------------------- /airflow/include/tasks/extract/astro_docs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | 5 | import pandas as pd 6 | from bs4 import BeautifulSoup 7 | from weaviate.util import generate_uuid5 8 | 9 | from include.tasks.extract.utils.html_utils import fetch_page_content, get_internal_links 10 | 11 | base_url = "https://www.astronomer.io/docs" 12 | 13 | 14 | def process_astro_doc_page_content(page_content: str) -> str: 15 | soup = BeautifulSoup(page_content, "html.parser") 16 | 17 | # Find the main article container 18 | main_container = soup.find("main", class_="docMainContainer_TBSr") 19 | 20 | content_of_interest = main_container if main_container else soup 21 | for nav_tag in content_of_interest.find_all("nav"): 22 | nav_tag.decompose() 23 | 24 | for script_or_style in content_of_interest.find_all(["script", "style", "button", "img", "svg"]): 25 | script_or_style.decompose() 26 | 27 | feedback_widget = content_of_interest.find("div", id="feedbackWidget") 28 | if feedback_widget: 29 | feedback_widget.decompose() 30 | 31 | newsletter_form = content_of_interest.find("form", id="newsletterForm") 32 | if newsletter_form: 33 | newsletter_form.decompose() 34 | 35 | sidebar = content_of_interest.find("ul", class_=lambda value: value and "table-of-contents" in value) 36 | if sidebar: 37 | sidebar.decompose() 38 | 39 | footers = content_of_interest.find_all("footer") 40 | for footer in footers: 41 | footer.decompose() 42 | 43 | # The actual article in almost all pages of Astro Docs website is in the following HTML container 44 | container_div = content_of_interest.find("div", class_=lambda value: value and "container" in value) 45 | 46 | if container_div: 47 | row_div = container_div.find("div", class_="row") 48 | 49 | if row_div: 50 | col_div = row_div.find("div", class_=lambda value: value and "col" in value) 51 | 52 | if col_div: 53 | content_of_interest = str(col_div) 54 | 55 | return str(content_of_interest).strip() 56 | 57 | 58 | def extract_astro_docs(base_url: str = base_url) -> list[pd.DataFrame]: 59 | """ 60 | Extract documentation pages from www.astronomer.io/docs and its subdomains. 61 | 62 | :return: A list of pandas dataframes with extracted data. 63 | """ 64 | all_links = get_internal_links(base_url=base_url, exclude_literal=["learn/tags"], prefix_url=base_url) 65 | 66 | # for software references, we only want latest docs, ones with version number (old) is removed 67 | old_version_doc_pattern = r"^https://www\.astronomer\.io/docs/software/\d+\.\d+/.+$" 68 | # remove duplicate xml files, we only want html pages 69 | non_doc_links = { 70 | link if link.endswith("xml") or re.match(old_version_doc_pattern, link) or not link.startswith(base_url) else "" 71 | for link in all_links 72 | } 73 | docs_links = all_links - non_doc_links 74 | 75 | df = pd.DataFrame(docs_links, columns=["docLink"]) 76 | 77 | df["html_content"] = df["docLink"].apply(lambda url: fetch_page_content(url)) 78 | 79 | # Only keep the main article content 80 | df["content"] = df["html_content"].apply(process_astro_doc_page_content) 81 | 82 | df["sha"] = df["content"].apply(generate_uuid5) 83 | df["docSource"] = "astro docs" 84 | df.reset_index(drop=True, inplace=True) 85 | 86 | df = df[["docSource", "sha", "content", "docLink"]] 87 | return [df] 88 | -------------------------------------------------------------------------------- /airflow/include/tasks/extract/astro_sdk_docs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | 5 | import pandas as pd 6 | 7 | from include.tasks.extract.utils.html_utils import get_internal_links, urls_to_dataframe 8 | 9 | logger = logging.getLogger("airflow.task") 10 | 11 | 12 | def extract_astro_sdk_docs() -> list[pd.DataFrame]: 13 | exclude_docs = ["autoapi", "genindex.html", "py-modindex.html", ".md", ".py"] 14 | base_url = "https://astro-sdk-python.readthedocs.io/en/stable/" 15 | 16 | urls = get_internal_links(base_url, exclude_docs) 17 | 18 | new_urls = [url for url in urls if "stable" in url] 19 | logger.info("******ingesting****") 20 | logger.info(new_urls) 21 | logger.info("*********************") 22 | df = urls_to_dataframe(new_urls, "astro-sdk") 23 | 24 | return [df] 25 | -------------------------------------------------------------------------------- /airflow/include/tasks/extract/astronomer_providers_docs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | import urllib.parse 5 | 6 | import pandas as pd 7 | from bs4 import BeautifulSoup 8 | from weaviate.util import generate_uuid5 9 | 10 | from include.tasks.extract.utils.html_utils import fetch_page_content, get_internal_links 11 | 12 | 13 | def extract_provider_docs() -> list[pd.DataFrame]: 14 | exclude_docs = ["_api", "_modules", "_sources", "changelog.html", "genindex.html", "py-modindex.html", "#"] 15 | base_url = "https://astronomer-providers.readthedocs.io/en/stable/" 16 | 17 | all_links = get_internal_links(base_url, exclude_docs) 18 | 19 | docs_url_parts = urllib.parse.urlsplit(base_url) 20 | docs_url_base = f"{docs_url_parts.scheme}://{docs_url_parts.netloc}" 21 | 22 | # make sure we didn't accidentally pickup any unrelated links in recursion 23 | # get rid of older versions of the docs, only care about "stable" version docs 24 | 25 | old_version_doc_pattern = r"/(\d+\.)*\d+/" 26 | 27 | def is_doc_link_invalid(link): 28 | return (docs_url_base not in link) or re.search(old_version_doc_pattern, link) or "/latest/" in link 29 | 30 | invalid_doc_links = {link if is_doc_link_invalid(link) else "" for link in all_links} 31 | docs_links = all_links - invalid_doc_links 32 | 33 | df = pd.DataFrame(docs_links, columns=["docLink"]) 34 | 35 | df["html_content"] = df["docLink"].apply(fetch_page_content) 36 | 37 | df["content"] = df["html_content"].apply( 38 | lambda x: str(BeautifulSoup(x, "html.parser").find(class_="body", role="main")) 39 | ) 40 | df["content"] = df["content"].apply(lambda x: re.sub("¶", "", x)) 41 | 42 | df["sha"] = df["content"].apply(generate_uuid5) 43 | df["docSource"] = "astronomer-providers" 44 | df.reset_index(drop=True, inplace=True) 45 | 46 | # column order matters for uuid generation 47 | df = df[["docSource", "sha", "content", "docLink"]] 48 | return [df] 49 | -------------------------------------------------------------------------------- /airflow/include/tasks/extract/blogs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from datetime import datetime 4 | 5 | import pandas as pd 6 | import requests 7 | from bs4 import BeautifulSoup 8 | from weaviate.util import generate_uuid5 9 | 10 | blog_format = "# {title}\n\n## {content}" 11 | 12 | base_url = "https://www.astronomer.io" 13 | page_url = base_url + "/blog/{page}/#archive" 14 | 15 | 16 | def extract_astro_blogs(blog_cutoff_date: datetime) -> list[pd.DataFrame]: 17 | """ 18 | This task downloads Blogs from the Astronomer website and returns a list of pandas dataframes. Return 19 | type is a list in order to map to upstream dynamic tasks. 20 | 21 | param blog_cutoff_date: Blog posts dated before this date will not be ingested. 22 | type blog_cutoff_date: datetime 23 | 24 | The returned data includes the following fields: 25 | 'docSource': 'astro blog' 26 | 'docLink': URL for the blog post 27 | 'content': Markdown encoded content of the blog. 28 | 'sha': A UUID from the other fields 29 | """ 30 | 31 | links: list[str] = [] 32 | page = 1 33 | 34 | response = requests.get(page_url.format(page=page), headers={}) 35 | while response.ok: 36 | soup = BeautifulSoup(response.text, "lxml") 37 | 38 | articles = soup.find_all("article") 39 | 40 | card_links = [ 41 | f"{base_url}{article.find('a', href=True)['href']}" 42 | for article in articles 43 | if datetime.fromisoformat(article.find("time")["datetime"]).date() > blog_cutoff_date 44 | ] 45 | links.extend(card_links) 46 | if len(articles) != len(card_links): 47 | break 48 | 49 | page = page + 1 50 | response = requests.get(page_url.format(page=page), headers={}) 51 | 52 | df = pd.DataFrame(links, columns=["docLink"]) 53 | df.drop_duplicates(inplace=True) 54 | df["content"] = df["docLink"].apply(lambda x: requests.get(x).content) 55 | df["title"] = df["content"].apply(lambda x: BeautifulSoup(x, "html").find(class_="hero__title").get_text()) 56 | 57 | df["content"] = df["content"].apply(lambda x: BeautifulSoup(x, "lxml").find(class_="prose").get_text()) 58 | df["content"] = df.apply(lambda x: blog_format.format(title=x.title, content=x.content), axis=1) 59 | 60 | df.drop("title", axis=1, inplace=True) 61 | df["sha"] = df["content"].apply(generate_uuid5) 62 | df["docSource"] = "astro blog" 63 | df.reset_index(drop=True, inplace=True) 64 | 65 | # column order matters for uuid generation 66 | df = df[["docSource", "sha", "content", "docLink"]] 67 | 68 | return [df] 69 | -------------------------------------------------------------------------------- /airflow/include/tasks/extract/cosmos_docs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import urllib.parse 4 | 5 | import pandas as pd 6 | import requests 7 | from bs4 import BeautifulSoup 8 | from weaviate.util import generate_uuid5 9 | 10 | from airflow.decorators import task 11 | from include.tasks.extract.utils.html_utils import get_internal_links 12 | 13 | 14 | @task 15 | def extract_cosmos_docs(docs_base_url: str = "https://astronomer.github.io/astronomer-cosmos/") -> list[pd.DataFrame]: 16 | """ 17 | This task return a dataframe containing the extracted data for cosmos docs. 18 | """ 19 | 20 | # we exclude the following docs which are not useful and/or too large for easy processing. 21 | exclude_docs = [ 22 | "_sources", 23 | ] 24 | 25 | all_links = get_internal_links(docs_base_url, exclude_literal=exclude_docs) 26 | html_links = {url for url in all_links if url.endswith(".html")} 27 | 28 | docs_url_parts = urllib.parse.urlsplit(docs_base_url) 29 | docs_url_base = f"{docs_url_parts.scheme}://{docs_url_parts.netloc}" 30 | # make sure we didn't accidentally pickup any unrelated links in recursion 31 | non_doc_links = {link if docs_url_base not in link else "" for link in html_links} 32 | docs_links = html_links - non_doc_links 33 | 34 | df = pd.DataFrame(docs_links, columns=["docLink"]) 35 | 36 | df["html_content"] = df["docLink"].apply(lambda url: requests.get(url).content) 37 | 38 | # Only keep the main article content, 39 | df["content"] = df["html_content"].apply(lambda x: str(BeautifulSoup(x, "html.parser").find(name="article"))) 40 | 41 | df["sha"] = df["content"].apply(generate_uuid5) 42 | df["docSource"] = "cosmos docs" 43 | df.reset_index(drop=True, inplace=True) 44 | 45 | # column order matters for uuid generation 46 | df = df[["docSource", "sha", "content", "docLink"]] 47 | 48 | return [df] 49 | -------------------------------------------------------------------------------- /airflow/include/tasks/extract/stack_overflow.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pandas as pd 4 | 5 | from include.tasks.extract.utils.stack_overflow_helpers import ( 6 | combine_stack_dfs, 7 | fetch_questions_through_stack_api, 8 | process_stack_api_answers, 9 | process_stack_api_posts, 10 | process_stack_api_questions, 11 | ) 12 | 13 | 14 | def extract_stack_overflow( 15 | tag: str, stackoverflow_cutoff_date: str, *, page_size: int = 100, max_pages: int = 10000000 16 | ) -> pd.DataFrame: 17 | """ 18 | This task generates stack overflow documents as a single markdown document per question with associated comments 19 | and answers. The task returns a pandas dataframe with all documents. 20 | 21 | param tag: The tag names to include in extracting from stack overflow. 22 | This is used for populating the 'docSource' 23 | type tag: str 24 | 25 | param stackoverflow_cutoff_date: Only messages from after this date will be extracted. 26 | type stackoverflow_cutoff_date: str 27 | 28 | returned dataframe fields are: 29 | 'docSource': 'stackoverflow' plus the tag name (ie. 'airflow') 30 | 'docLink': URL for the base question. 31 | 'content': The question (plus answers) in markdown format. 32 | 'sha': a UUID based on the other fields. This is for compatibility with other document types. 33 | """ 34 | 35 | questions = fetch_questions_through_stack_api( 36 | tag=tag, 37 | stackoverflow_cutoff_date=stackoverflow_cutoff_date, 38 | page_size=page_size, 39 | max_pages=max_pages, 40 | ) 41 | posts_df = process_stack_api_posts(questions) 42 | questions_df = process_stack_api_questions(posts_df=posts_df, tag=tag) 43 | answers_df = process_stack_api_answers(posts_df=posts_df) 44 | return combine_stack_dfs(questions_df=questions_df, answers_df=answers_df, tag=tag) 45 | -------------------------------------------------------------------------------- /airflow/include/tasks/extract/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/tasks/extract/utils/__init__.py -------------------------------------------------------------------------------- /airflow/include/tasks/extract/utils/slack_helpers.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | 3 | import pandas as pd 4 | from slack_sdk import errors as slack_errors 5 | from slack_sdk.web.client import WebClient 6 | 7 | from airflow.exceptions import AirflowException 8 | 9 | 10 | def get_slack_replies(df: pd.DataFrame, channel_id: str, slack_client: WebClient) -> list: 11 | """ 12 | This helper function reads replies for each thread in df. Unlike the archive these need to 13 | be pulled separately. 14 | 15 | param df: Dataframe of slack messages. These are the base messages before pulling the 16 | associated replies. 17 | type df: pd.DataFrame 18 | 19 | param channel_id: The channel ID to search for replies. 20 | type channel_id: str 21 | 22 | param slack_client: A slack client to use for reading. It should be instantiated with a 23 | slack ID with read permissions for channel_id. 24 | type slack_client: slack_sdk.web.client.WebClient 25 | 26 | """ 27 | replies = [] 28 | 29 | threads_with_replies = df[df.reply_count > 0].thread_ts.to_list() 30 | 31 | for ts in threads_with_replies: 32 | print(f"Fetching replies for thread {ts}") 33 | for attempt in range(10): 34 | try: 35 | reply = slack_client.conversations_replies(channel=channel_id, ts=ts) 36 | replies.extend(reply.data["messages"]) 37 | except Exception as e: 38 | if isinstance(e, slack_errors.SlackApiError) and e.response.get("error") == "ratelimited": 39 | sleep_time = e.response.headers.get("retry-after") 40 | print(f"Received ratelimit. Sleeping {sleep_time}") 41 | sleep(int(sleep_time)) 42 | else: 43 | raise e 44 | else: 45 | break 46 | else: 47 | raise AirflowException("Retry count exceeded fetching replies.") 48 | 49 | return replies 50 | -------------------------------------------------------------------------------- /airflow/include/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/airflow/include/utils/__init__.py -------------------------------------------------------------------------------- /airflow/include/utils/slack.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from airflow.providers.slack.notifications.slack import send_slack_notification 4 | 5 | AIRFLOW__WEBSERVER__BASE_URL = os.environ.get("AIRFLOW__WEBSERVER__BASE_URL", "http://localhost:8080") 6 | ASK_ASTRO_ALERT_SLACK_CHANNEL_NAME = os.environ.get("ASK_ASTRO_ALERT_SLACK_CHANNEL_NAME", "#ask-astro-alert") 7 | ASK_ASTRO_ALERT_SLACK_CONN_ID = os.environ.get("ASK_ASTRO_ALERT_SLACK_CONN_ID", "slack_api_default") 8 | 9 | 10 | def send_failure_notification(dag_id, execution_date): 11 | dag_link = f"{AIRFLOW__WEBSERVER__BASE_URL}/dags/{dag_id}/grid?search={dag_id}" 12 | notification_text = f":red_circle: The DAG <{dag_link}|{dag_id}> with execution date `{execution_date}` failed." 13 | return send_slack_notification( 14 | slack_conn_id=ASK_ASTRO_ALERT_SLACK_CONN_ID, channel=ASK_ASTRO_ALERT_SLACK_CHANNEL_NAME, text=notification_text 15 | ) 16 | -------------------------------------------------------------------------------- /airflow/packages.txt: -------------------------------------------------------------------------------- 1 | build-essential 2 | pandoc 3 | git 4 | -------------------------------------------------------------------------------- /airflow/requirements.txt: -------------------------------------------------------------------------------- 1 | apache-airflow-providers-weaviate==1.3.0 2 | apache-airflow-providers-github==2.3.1 3 | apache-airflow-providers-slack==7.3.2 4 | streamlit==1.25.0 5 | html2text==2020.1.16 6 | pypandoc==1.11 7 | langchain==0.0.329 8 | langchain-community==0.0.6 9 | markdownify==0.11.6 10 | stackapi==0.3.0 11 | protobuf==3.20.2 12 | google-cloud-firestore==2.12.0 13 | openai==0.28.1 14 | bs4==0.0.1 15 | lxml==4.9.3 16 | tiktoken==0.5.1 17 | firebase-admin==6.2.0 18 | snowflake-connector-python==3.6.0 19 | tenacity==8.2.3 20 | html2text==2020.1.16 21 | -------------------------------------------------------------------------------- /api/.gitignore: -------------------------------------------------------------------------------- 1 | /embeddings/ 2 | /data/ 3 | /db/index.* 4 | /.DS_Store 5 | -------------------------------------------------------------------------------- /api/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-buster as builder 2 | 3 | ENV PYTHONUNBUFFERED=1 \ 4 | PYTHONDONTWRITEBYTECODE=1 \ 5 | PIP_NO_CACHE_DIR=off \ 6 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 7 | PIP_DEFAULT_TIMEOUT=100 \ 8 | POETRY_VIRTUALENVS_IN_PROJECT=1 \ 9 | POETRY_VIRTUALENVS_CREATE=1 \ 10 | POETRY_NO_INTERACTION=1 \ 11 | POETRY_CACHE_DIR="/tmp/poetry_cache" 12 | 13 | RUN pip install poetry==1.6.1 14 | 15 | # copy project requirement files here to ensure they will be cached. 16 | WORKDIR /app 17 | COPY poetry.lock pyproject.toml ./ 18 | RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR 19 | 20 | 21 | FROM python:3.11-slim as runtime 22 | ENV VIRTUAL_ENV=/app/.venv \ 23 | PATH="/app/.venv/bin:$PATH" 24 | COPY --from=builder $VIRTUAL_ENV $VIRTUAL_ENV 25 | COPY . . 26 | 27 | # GCP credential for local dev 28 | # COPY gcp.json ./ 29 | 30 | EXPOSE 8080 31 | ENTRYPOINT python -m ask_astro.app 32 | -------------------------------------------------------------------------------- /api/ask_astro/.gitignore: -------------------------------------------------------------------------------- 1 | /__pycache__/ 2 | /.chroma/ 3 | -------------------------------------------------------------------------------- /api/ask_astro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/api/ask_astro/__init__.py -------------------------------------------------------------------------------- /api/ask_astro/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | Initialize the Sanic app and route requests to the Slack app. 3 | """ 4 | import logging 5 | import os 6 | from logging import getLogger 7 | 8 | from sanic import Request, Sanic 9 | 10 | from ask_astro.rest.controllers import register_routes 11 | from ask_astro.slack.app import app_handler, slack_app 12 | from ask_astro.slack.controllers import register_controllers 13 | 14 | # set the logging level based on an env var 15 | logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO")) 16 | 17 | logger = getLogger(__name__) 18 | 19 | api = Sanic(name="ask_astro") 20 | 21 | 22 | # route slack requests to the slack app 23 | @api.get("/slack/oauth_redirect", name="oauth_redirect") 24 | @api.get("/slack/install", name="install") 25 | @api.post("/slack/events", name="events") 26 | async def endpoint(req: Request): 27 | """Forward requests to the Slack bolt handler.""" 28 | return await app_handler.handle(req) 29 | 30 | 31 | server_port = int(os.environ.get("PORT", 8080)) 32 | 33 | register_controllers(slack_app) 34 | register_routes(api) 35 | 36 | if __name__ == "__main__": 37 | api.run(host="0.0.0.0", port=server_port) 38 | -------------------------------------------------------------------------------- /api/ask_astro/chains/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/api/ask_astro/chains/__init__.py -------------------------------------------------------------------------------- /api/ask_astro/chains/custom_llm_filter_prompt.py: -------------------------------------------------------------------------------- 1 | from langchain_core.output_parsers import BaseOutputParser 2 | from langchain_core.prompts import PromptTemplate 3 | 4 | prompt_template = """Given the following question starting with "Question:", and context starting with "Context:" surrounded by >>> symbols, return YES if the context can be used to answer the question and NO if it cannot. 5 | 6 | > Question: {question} 7 | > Context: 8 | >>> 9 | {context} 10 | >>> 11 | > Context surrounded by >>> is helpful and can be used to answer the question (YES / NO):""" 12 | 13 | 14 | class CustomBooleanOutputParser(BaseOutputParser[bool]): 15 | """Parse the output of an LLM call to a boolean. Default to True if response not formatted correctly.""" 16 | 17 | true_val: str = "YES" 18 | """The string value that should be parsed as True.""" 19 | false_val: str = "NO" 20 | """The string value that should be parsed as False.""" 21 | 22 | def parse(self, text: str) -> bool: 23 | """Parse the output of an LLM call to a boolean by checking if YES/NO is contained in the output. 24 | 25 | Args: 26 | text: output of a language model. 27 | 28 | Returns: 29 | boolean 30 | 31 | """ 32 | cleaned_text = text.strip().upper() 33 | return self.false_val not in cleaned_text 34 | 35 | @property 36 | def _type(self) -> str: 37 | """Snake-case string identifier for an output parser type.""" 38 | return "custom_boolean_output_parser" 39 | 40 | 41 | custom_llm_chain_filter_prompt_template = PromptTemplate( 42 | template=prompt_template, 43 | input_variables=["question", "context"], 44 | output_parser=CustomBooleanOutputParser(), 45 | ) 46 | -------------------------------------------------------------------------------- /api/ask_astro/chains/custom_llm_output_lines_parser.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from langchain.output_parsers.pydantic import PydanticOutputParser 4 | from langchain.retrievers.multi_query import LineList 5 | 6 | 7 | class CustomLineListOutputParser(PydanticOutputParser): 8 | """ 9 | Output parser for a list of lines with additional cleaning and fail checks. 10 | This is a modified, less error prone implementation of the LineListOutputParser from LangChain. 11 | """ 12 | 13 | max_lines: int = 2 14 | max_line_len: int | None = None 15 | 16 | def __init__(self, max_lines: int = 2, max_line_len: int | None = None) -> None: 17 | """ 18 | Initialize CustomLineListOutputParser. 19 | 20 | :param max_lines: maximum number of lines, default is 2 21 | :param max_line_len: maximum length of lines, default is None 22 | """ 23 | super().__init__(pydantic_object=LineList) 24 | self.max_lines = max_lines 25 | self.max_line_len = max_line_len 26 | 27 | def _is_outpuit_line_valid(self, line: str) -> bool: 28 | """ 29 | Check if a line of an llm output is a valid line. 30 | 31 | :param line: llm output line to be used as input 32 | :return: true if line is valid, false otherwise 33 | """ 34 | return line != "" and (self.max_line_len is None or len(line) <= self.max_line_len) 35 | 36 | def parse(self, text: str) -> LineList: 37 | """ 38 | Parse the input text into LineList. 39 | 40 | :param text: input text to parse 41 | :return: parsed LineList 42 | """ 43 | lines = text.strip().split("\n") 44 | lines = [s for s in lines if self._is_outpuit_line_valid(s)][: self.max_lines] 45 | return LineList(lines=lines) 46 | -------------------------------------------------------------------------------- /api/ask_astro/clients/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/api/ask_astro/clients/__init__.py -------------------------------------------------------------------------------- /api/ask_astro/clients/firestore.py: -------------------------------------------------------------------------------- 1 | """ 2 | Firestore client to handle database operations. 3 | 4 | This module provides an asynchronous Firestore client instance for use across the application. 5 | The client will authenticate using the credentials set in the environment. 6 | """ 7 | 8 | from google.cloud import firestore 9 | 10 | # auth is handled implicitly by the environment. 11 | # Singleton instance of the Firestore AsyncClient. 12 | firestore_client = firestore.AsyncClient() 13 | 14 | __all__ = ["firestore_client"] 15 | -------------------------------------------------------------------------------- /api/ask_astro/clients/langsmith_.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module initializes a client instance from the LangSmith library. 3 | 4 | The client provides functionalities related to the LangSmith package, and it's instantiated on module import. 5 | """ 6 | from langsmith import Client 7 | 8 | # Singleton instance of the LangSmith Client. 9 | langsmith_client = Client() 10 | -------------------------------------------------------------------------------- /api/ask_astro/clients/weaviate_.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides configurations and initializations for the Weaviate client, 3 | as well as text embeddings using the OpenAIEmbeddings from the LangChain library. 4 | """ 5 | import weaviate 6 | from langchain.vectorstores import Weaviate 7 | from weaviate import Client as WeaviateClient 8 | 9 | from ask_astro.config import WeaviateConfig 10 | 11 | # Configure and initialize the Weaviate client. 12 | client = WeaviateClient( 13 | url=WeaviateConfig.url, 14 | auth_client_secret=weaviate.AuthApiKey(api_key=WeaviateConfig.api_key), 15 | additional_headers={ 16 | "X-Openai-Api-Key": WeaviateConfig.OpenAIApiKey, 17 | }, 18 | ) 19 | 20 | # Create a Weaviate instance for search functionality using the initialized client. 21 | docsearch = Weaviate( 22 | client=client, 23 | index_name=WeaviateConfig.index_name, 24 | text_key=WeaviateConfig.text_key, 25 | attributes=WeaviateConfig.attributes, 26 | ) 27 | -------------------------------------------------------------------------------- /api/ask_astro/config.py: -------------------------------------------------------------------------------- 1 | """Exports config variables that are used throughout the code.""" 2 | import json 3 | import os 4 | 5 | 6 | class FirestoreCollections: 7 | """Contains the names of the collections in the Firestore database.""" 8 | 9 | installation_store = os.environ.get("FIRESTORE_INSTALLATION_STORE_COLLECTION") 10 | state_store = os.environ.get("FIRESTORE_STATE_STORE_COLLECTION") 11 | messages = os.environ.get("FIRESTORE_MESSAGES_COLLECTION") 12 | mentions = os.environ.get("FIRESTORE_MENTIONS_COLLECTION") 13 | actions = os.environ.get("FIRESTORE_ACTIONS_COLLECTION") 14 | responses = os.environ.get("FIRESTORE_RESPONSES_COLLECTION") 15 | reactions = os.environ.get("FIRESTORE_REACTIONS_COLLECTION") 16 | shortcuts = os.environ.get("FIRESTORE_SHORTCUTS_COLLECTION") 17 | teams = os.environ.get("FIRESTORE_TEAMS_COLLECTION") 18 | requests = os.environ.get("FIRESTORE_REQUESTS_COLLECTION") 19 | 20 | 21 | class AzureOpenAIParams: 22 | """Contains the parameters for the Azure OpenAI API.""" 23 | 24 | us_east2_raw = os.environ.get("AZURE_OPENAI_USEAST2_PARAMS") 25 | us_east2 = json.loads(us_east2_raw) if us_east2_raw else {} 26 | 27 | 28 | class ZendeskConfig: 29 | """Contains the config variables for the Zendesk API.""" 30 | 31 | credentials = os.environ.get("ZENDESK_CREDENTIALS") 32 | assignee_group_id = os.environ.get("ZENDESK_ASSIGNEE_GROUP_ID") 33 | 34 | 35 | class SlackAppConfig: 36 | "Contains the config variables for the Slack app." 37 | 38 | client_id = os.environ.get("SLACK_CLIENT_ID") 39 | client_secret = os.environ.get("SLACK_CLIENT_SECRET") 40 | signing_secret = os.environ.get("SLACK_SIGNING_SECRET") 41 | 42 | 43 | class LangSmithConfig: 44 | """Contains the config variables for the Langsmith API.""" 45 | 46 | project_name = os.environ.get("LANGCHAIN_PROJECT") 47 | tracing_v2 = os.environ.get("LANGCHAIN_TRACING_V2") 48 | endpoint = os.environ.get("LANGCHAIN_ENDPOINT") 49 | api_key = os.environ.get("LANGCHAIN_API_KEY") 50 | 51 | 52 | class WeaviateConfig: 53 | """Contains the config variables for the Weaviate API.""" 54 | 55 | OpenAIApiKey = os.environ.get("OPENAI_API_KEY") 56 | url = os.environ.get("WEAVIATE_URL") 57 | api_key = os.environ.get("WEAVIATE_API_KEY") 58 | index_name = os.environ.get("WEAVIATE_INDEX_NAME") 59 | text_key = os.environ.get("WEAVIATE_TEXT_KEY") 60 | attributes = os.environ.get("WEAVIATE_ATTRIBUTES", "").split(",") 61 | k = os.environ.get("WEAVIATE_HYBRID_SEARCH_TOP_K", 100) 62 | alpha = os.environ.get("WEAVIATE_HYBRID_SEARCH_ALPHA", 0.5) 63 | create_schema_if_missing = bool(os.environ.get("WEAVIATE_CREATE_SCHEMA_IF_MISSING", "").lower() == "true") 64 | 65 | 66 | class CohereConfig: 67 | """Contains the config variables for the Cohere API.""" 68 | 69 | rerank_top_n = int(os.environ.get("COHERE_RERANK_TOP_N", 8)) 70 | 71 | 72 | class PromptPreprocessingConfig: 73 | """Contains the config variables for user prompt's preprocessing function.""" 74 | 75 | max_char = int(os.environ.get("PROMPT_PREPROCESSING_MAX_CHAR", 20000)) 76 | max_chat_history_len = int(os.environ.get("PROMPT_PREPROCESSING_MAX_CHAT_HISTORY_LEN", 10)) 77 | -------------------------------------------------------------------------------- /api/ask_astro/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/api/ask_astro/models/__init__.py -------------------------------------------------------------------------------- /api/ask_astro/rest/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/api/ask_astro/rest/__init__.py -------------------------------------------------------------------------------- /api/ask_astro/rest/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | """Contains a function to register all controllers with the app.""" 2 | from __future__ import annotations 3 | 4 | from dataclasses import dataclass 5 | from logging import getLogger 6 | from typing import Callable 7 | 8 | from sanic import Sanic, response 9 | 10 | from ask_astro.rest.controllers.get_request import on_get_request 11 | from ask_astro.rest.controllers.health_status import on_get_health_status 12 | from ask_astro.rest.controllers.list_recent_requests import on_list_recent_requests 13 | from ask_astro.rest.controllers.post_request import on_post_request 14 | from ask_astro.rest.controllers.submit_feedback import on_submit_feedback 15 | 16 | logger = getLogger(__name__) 17 | 18 | 19 | @dataclass 20 | class RouteConfig: 21 | handler: Callable[..., response.BaseHTTPResponse] 22 | uri: str 23 | methods: list[str] 24 | name: str 25 | 26 | 27 | def register_routes(api: Sanic): 28 | """Registers all controllers with the app.""" 29 | 30 | routes: list[RouteConfig] = [ 31 | RouteConfig(on_list_recent_requests, "/requests", ["GET"], "list_recent_requests"), 32 | RouteConfig(on_get_request, "/requests/", ["GET"], "get_request"), 33 | RouteConfig(on_post_request, "/requests", ["POST"], "post_request"), 34 | RouteConfig(on_submit_feedback, "/requests//feedback", ["POST"], "submit_feedback"), 35 | RouteConfig(on_get_health_status, "/health_status", ["GET"], "health_status"), 36 | ] 37 | 38 | for route_config in routes: 39 | api.add_route( 40 | handler=route_config.handler, 41 | uri=route_config.uri, 42 | methods=route_config.methods, 43 | name=route_config.name, 44 | ) 45 | logger.info("Registered %s %s controller", route_config.methods[0], route_config.uri) 46 | -------------------------------------------------------------------------------- /api/ask_astro/rest/controllers/get_request.py: -------------------------------------------------------------------------------- 1 | """ 2 | Handles GET requests to the /ask/{question_id} endpoint. 3 | """ 4 | from __future__ import annotations 5 | 6 | import re 7 | from logging import getLogger 8 | from uuid import UUID 9 | 10 | from sanic import Request, json 11 | from sanic_ext import openapi 12 | 13 | from ask_astro.clients.firestore import firestore_client 14 | from ask_astro.config import FirestoreCollections 15 | from ask_astro.models.request import AskAstroRequest 16 | 17 | logger = getLogger(__name__) 18 | 19 | 20 | def replace_single_newline_pattern_with_double_newline(text): 21 | return re.sub( 22 | r"(.+?)\n•", 23 | lambda match: f"{match.group(1)}\n\n•" if "\n\n" not in match.group(1) else match.group(0), 24 | text, 25 | ) 26 | 27 | 28 | @openapi.definition(response=AskAstroRequest.schema_json()) 29 | async def on_get_request(request: Request, request_id: UUID) -> json: 30 | """ 31 | Handles GET requests to the /requests/{request_id} endpoint. 32 | 33 | :param request: The Sanic request object. 34 | :param request_id: The unique identifier for the AskAstro request. 35 | """ 36 | try: 37 | logger.info("Received GET request for request %s", request_id) 38 | request = await firestore_client.collection(FirestoreCollections.requests).document(str(request_id)).get() 39 | 40 | logger.info("Request %s exists: %s", request_id, request.exists) 41 | 42 | if not request.exists: 43 | return json({"error": "Question not found"}, status=404) 44 | 45 | request_dict = request.to_dict() 46 | if request_dict and "response" in request_dict and request_dict["response"] is not None: 47 | request_dict["response"] = replace_single_newline_pattern_with_double_newline(request_dict["response"]) 48 | return json(request_dict, status=200) 49 | except Exception as e: 50 | logger.error("Error fetching data for request %s: %s", request_id, e) 51 | return json({"error": "Internal Server Error"}, status=500) 52 | -------------------------------------------------------------------------------- /api/ask_astro/rest/controllers/health_status.py: -------------------------------------------------------------------------------- 1 | """ 2 | Handles GET requests to the /ask/{question_id} endpoint. 3 | """ 4 | from __future__ import annotations 5 | 6 | from logging import getLogger 7 | 8 | from sanic import Request, json 9 | from sanic_ext import openapi 10 | 11 | from ask_astro import settings 12 | from ask_astro.models.request import HealthStatus 13 | 14 | logger = getLogger(__name__) 15 | 16 | 17 | @openapi.definition(response=HealthStatus.schema_json()) 18 | async def on_get_health_status(request: Request) -> json: 19 | """ 20 | Handles GET requests to the /health_status endpoint. 21 | 22 | :param request: The Sanic request object. 23 | """ 24 | if settings.SHOW_SERVICE_MAINTENANCE_BANNER: 25 | return json({"status": "maintenance"}, status=200) 26 | return json({"status": "healthy"}, status=200) 27 | -------------------------------------------------------------------------------- /api/ask_astro/rest/controllers/list_recent_requests.py: -------------------------------------------------------------------------------- 1 | """ 2 | Handles GET requests to the /ask/{question_id} endpoint. 3 | """ 4 | from __future__ import annotations 5 | 6 | from logging import getLogger 7 | 8 | from pydantic.v1 import BaseModel, Field 9 | from sanic import Request, json 10 | from sanic_ext import openapi 11 | 12 | from ask_astro.clients.firestore import firestore_client 13 | from ask_astro.config import FirestoreCollections 14 | from ask_astro.models.request import AskAstroRequest 15 | 16 | logger = getLogger(__name__) 17 | 18 | 19 | class RecentRequestsResponse(BaseModel): 20 | """Data model for the list of recent requests returned in the API response.""" 21 | 22 | requests: list[AskAstroRequest] = Field( 23 | default_factory=list, 24 | description="The requests", 25 | ) 26 | 27 | def to_dict(self): 28 | """Convert the RecentRequestsResponse model to a dictionary.""" 29 | return { 30 | "requests": [request.to_firestore() for request in self.requests], 31 | } 32 | 33 | 34 | @openapi.definition( 35 | response=RecentRequestsResponse.schema_json(), 36 | ) 37 | async def on_list_recent_requests(_: Request) -> json: 38 | """Handle GET requests to retrieve a list of recent completed requests marked as examples.""" 39 | try: 40 | # Query the Firestore for the most recent 12 completed example requests 41 | query_results = await ( 42 | firestore_client.collection(FirestoreCollections.requests) 43 | .order_by("sent_at", direction="DESCENDING") 44 | .where("status", "==", "complete") 45 | .where("is_example", "==", True) 46 | .limit(12) 47 | .get() 48 | ) 49 | recent_requests = [AskAstroRequest.from_dict(request_doc.to_dict()) for request_doc in query_results] 50 | 51 | return json(RecentRequestsResponse(requests=recent_requests).to_dict(), status=200) 52 | except Exception as e: 53 | logger.error("Error while fetching recent requests: %s", e) 54 | return json({"error": "An error occurred while processing your request."}, status=500) 55 | -------------------------------------------------------------------------------- /api/ask_astro/rest/controllers/submit_feedback.py: -------------------------------------------------------------------------------- 1 | """ 2 | Handles POST requests to the /requests/{question_id}/feedback endpoint. 3 | """ 4 | from __future__ import annotations 5 | 6 | from logging import getLogger 7 | from uuid import UUID 8 | 9 | from pydantic.v1 import BaseModel, Field 10 | from sanic import HTTPResponse, Request 11 | from sanic_ext import openapi 12 | 13 | from ask_astro.services.feedback import submit_feedback 14 | 15 | logger = getLogger(__name__) 16 | 17 | 18 | class PostRequestBody(BaseModel): 19 | """Model for the body of a POST feedback request.""" 20 | 21 | positive: bool = Field(..., description="Whether the feedback is positive") 22 | 23 | 24 | @openapi.definition( 25 | body=PostRequestBody.schema(), 26 | ) 27 | async def on_submit_feedback(request: Request, request_id: UUID) -> HTTPResponse: 28 | """ 29 | Handles POST requests to the /requests/{request_id}/feedback endpoint. 30 | 31 | :param request: The Sanic request object. 32 | :param request_id: The unique identifier for the AskAstro request. 33 | """ 34 | try: 35 | positive = request.json["positive"] 36 | 37 | await submit_feedback(str(request_id), positive, {"source": "api"}) 38 | 39 | return HTTPResponse(status=200) 40 | except Exception as e: 41 | logger.error("Error occurred while processing feedback for request %s: %s", request_id, e) 42 | return HTTPResponse(text="An internal error occurred.", status=500) 43 | -------------------------------------------------------------------------------- /api/ask_astro/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/api/ask_astro/services/__init__.py -------------------------------------------------------------------------------- /api/ask_astro/services/feedback.py: -------------------------------------------------------------------------------- 1 | """Handles app mention events from Slack""" 2 | from __future__ import annotations 3 | 4 | import asyncio 5 | from logging import getLogger 6 | from typing import Any 7 | 8 | from ask_astro.clients.firestore import firestore_client 9 | from ask_astro.clients.langsmith_ import langsmith_client 10 | from ask_astro.config import FirestoreCollections 11 | 12 | logger = getLogger(__name__) 13 | 14 | 15 | class FeedbackSubmissionError(Exception): 16 | """Exception raised when there's an error submitting feedback.""" 17 | 18 | 19 | async def submit_feedback(request_id: str, correct: bool, source_info: dict[str, Any] | None) -> None: 20 | """ 21 | Submits feedback for a request. Writes to firestore and langsmith. 22 | 23 | :param request_id: The ID of the request for which feedback is provided. 24 | :param correct: Boolean indicating if the feedback is positive or not. 25 | :param source_info: Additional source information for the feedback. 26 | """ 27 | logger.info("Submitting feedback for request %s: %s", request_id, correct) 28 | 29 | try: 30 | # first, get the request from the database 31 | request = await firestore_client.collection(FirestoreCollections.requests).document(request_id).get() 32 | 33 | if not request.exists: 34 | raise ValueError("Request %s does not exist", request_id) 35 | 36 | langchain_run_id = request.to_dict().get("langchain_run_id") 37 | if not langchain_run_id: 38 | raise ValueError("Request %s does not have a langchain run id", request_id) 39 | 40 | # update the db and langsmith 41 | async with asyncio.TaskGroup() as tg: 42 | # update just the score field 43 | tg.create_task( 44 | firestore_client.collection(FirestoreCollections.requests) 45 | .document(request_id) 46 | .update({"score": 1 if correct else 0}) 47 | ) 48 | 49 | tg.create_task( 50 | asyncio.to_thread( 51 | lambda: langsmith_client.create_feedback( 52 | key="correctness", 53 | run_id=langchain_run_id, 54 | score=1 if correct else 0, 55 | source_info=source_info, 56 | ) 57 | ) 58 | ) 59 | except Exception as e: 60 | logger.error("Error occurred while processing feedback for request %s: %s", request_id, e) 61 | raise FeedbackSubmissionError("Failed to submit feedback for request %s.", request_id) from e 62 | -------------------------------------------------------------------------------- /api/ask_astro/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Environment variable for MultiQueryRetriever 4 | MULTI_QUERY_RETRIEVER_TEMPERATURE = float(os.environ.get("MULTI_QUERY_RETRIEVER_TEMPERATURE", "0.0")) 5 | MULTI_QUERY_RETRIEVER_DEPLOYMENT_NAME = os.environ.get("MULTI_QUERY_RETRIEVER_DEPLOYMENT_NAME", "gpt-35-turbo") 6 | 7 | 8 | # Environment variables for ConversationalRetrievalChain LLMChain 9 | CONVERSATIONAL_RETRIEVAL_LLM_CHAIN_TEMPERATURE = float( 10 | os.environ.get("CONVERSATIONAL_RETRIEVAL_CHAIN_TEMPERATURE", "0.3") 11 | ) 12 | CONVERSATIONAL_RETRIEVAL_LLM_CHAIN_DEPLOYMENT_NAME = os.environ.get( 13 | "CONVERSATIONAL_RETRIEVAL_LLM_CHAIN_DEPLOYMENT_NAME", "gpt-35-turbo-16k" 14 | ) 15 | 16 | # Environment variables for ConversationalRetrievalChain Load QA Chain 17 | CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_TEMPERATURE = float( 18 | os.environ.get("CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_TEMPERATURE", "0.3") 19 | ) 20 | CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_DEPLOYMENT_NAME = os.environ.get( 21 | "CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_DEPLOYMENT_NAME", "gpt-4o" 22 | ) 23 | 24 | SHOW_SERVICE_MAINTENANCE_BANNER = os.environ.get("SHOW_SERVICE_MAINTENANCE_BANNER", "False").upper() == "TRUE" 25 | -------------------------------------------------------------------------------- /api/ask_astro/slack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/api/ask_astro/slack/__init__.py -------------------------------------------------------------------------------- /api/ask_astro/slack/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generates the Slack app and the Slack app handler. 3 | 4 | This module sets up the Slack app's OAuth settings and creates an instance 5 | of the Slack app and its handler. 6 | 7 | .. note:: 8 | **Scopes Required for Slack:** 9 | 10 | - `commands`: Add shortcuts and/or slash commands. 11 | - `app_mentions:read`: Read messages that directly mention the app in conversations. 12 | - `channels:read`: View basic information about public channels in the workspace. 13 | - `channels:history`: View messages and other content in public channels. 14 | - `groups:read`: View basic information about private channels. 15 | - `groups:history`: View messages and other content in private channels. 16 | - `chat:write`: Send messages as the app. 17 | - `reactions:read`: View emoji reactions and their associated messages in channels and conversations. 18 | - `reactions:write`: Add and remove emoji reactions to/from messages. 19 | - `users:read`: View people in the workspace. 20 | - `users:read.email`: View email addresses of people in the workspace. 21 | - `team:read`: View name, email domain, and icon for the workspace. 22 | - `im:history`: View messages and other content in direct messages. 23 | - `mpim:history`: View messages and other content in group direct messages. 24 | - `files:read`: View files shared in channels and conversations the app has access to. 25 | """ 26 | 27 | from slack_bolt.adapter.sanic import AsyncSlackRequestHandler 28 | from slack_bolt.app.async_app import AsyncApp 29 | from slack_bolt.oauth.async_oauth_settings import AsyncOAuthSettings 30 | 31 | from ask_astro.config import FirestoreCollections, SlackAppConfig 32 | from ask_astro.stores.installation_store import AsyncFirestoreInstallationStore 33 | from ask_astro.stores.oauth_state_store import AsyncFirestoreOAuthStateStore 34 | 35 | oauth_settings = AsyncOAuthSettings( 36 | client_id=SlackAppConfig.client_id, 37 | client_secret=SlackAppConfig.client_secret, 38 | scopes=[ 39 | "commands", 40 | "app_mentions:read", 41 | "channels:read", 42 | "channels:history", 43 | "groups:read", 44 | "groups:history", 45 | "chat:write", 46 | "reactions:read", 47 | "reactions:write", 48 | "users:read", 49 | "users:read.email", 50 | "team:read", 51 | "im:history", 52 | "mpim:history", 53 | "files:read", 54 | ], 55 | installation_store=AsyncFirestoreInstallationStore( 56 | collection=FirestoreCollections.installation_store, 57 | ), 58 | state_store=AsyncFirestoreOAuthStateStore( 59 | expiration_seconds=600, 60 | collection=FirestoreCollections.state_store, 61 | ), 62 | ) 63 | 64 | slack_app = AsyncApp( 65 | signing_secret=SlackAppConfig.signing_secret, 66 | oauth_settings=oauth_settings, 67 | ) 68 | app_handler = AsyncSlackRequestHandler(slack_app) 69 | -------------------------------------------------------------------------------- /api/ask_astro/slack/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | """Contains a function to register all controllers with the app.""" 2 | from __future__ import annotations 3 | 4 | from logging import getLogger 5 | 6 | from slack_bolt.async_app import AsyncApp 7 | 8 | from ask_astro.slack.controllers.feedback.bad import handle_feedback_bad 9 | from ask_astro.slack.controllers.feedback.good import handle_feedback_good 10 | from ask_astro.slack.controllers.mention import on_mention 11 | 12 | logger = getLogger(__name__) 13 | 14 | 15 | def register_controllers(app: AsyncApp): 16 | """ 17 | Registers all controllers with the app. 18 | 19 | :param app: The Slack AsyncApp instance where controllers need to be registered. 20 | """ 21 | 22 | handlers = { 23 | "event:app_mention": on_mention, 24 | "action:feedback_good": handle_feedback_good, 25 | "action:feedback_bad": handle_feedback_bad, 26 | } 27 | 28 | for event_action, handler in handlers.items(): 29 | event_type, identifier = event_action.split(":") 30 | 31 | if event_type == "event": 32 | app.event(identifier)(handler) 33 | elif event_type == "action": 34 | app.action(identifier)(handler) 35 | 36 | logger.info("Registered %s:%s controller", event_type, identifier) 37 | -------------------------------------------------------------------------------- /api/ask_astro/slack/controllers/feedback/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/api/ask_astro/slack/controllers/feedback/__init__.py -------------------------------------------------------------------------------- /api/ask_astro/slack/controllers/feedback/bad.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from asyncio import TaskGroup 4 | from logging import getLogger 5 | from typing import Any 6 | 7 | from slack_bolt.async_app import AsyncAck, AsyncRespond 8 | from slack_sdk.errors import SlackApiError 9 | from slack_sdk.web.async_client import AsyncWebClient 10 | 11 | from ask_astro.services.feedback import submit_feedback 12 | 13 | logger = getLogger(__name__) 14 | 15 | 16 | def extract_feedback_details(body: dict[str, Any]) -> dict[str, str]: 17 | """ 18 | Extract necessary details from Slack body for feedback processing. 19 | 20 | :param body: The slack event body. 21 | """ 22 | try: 23 | return { 24 | "user": body["user"]["id"], 25 | "channel": body["channel"]["id"], 26 | "thread_ts": body["message"]["thread_ts"], 27 | "message_ts": body["message"]["ts"], 28 | "value": body["actions"][0]["value"], 29 | } 30 | except KeyError as e: 31 | logger.error("Missing key: %s", e) 32 | return {} 33 | 34 | 35 | async def handle_feedback_bad( 36 | body: dict[str, Any], ack: AsyncAck, respond: AsyncRespond, client: AsyncWebClient 37 | ) -> None: 38 | """ 39 | Handle feedback received from Slack and send appropriate responses. 40 | 41 | :param body: The slack event body. 42 | :param ack: Acknowledgement object from slack_bolt. 43 | :param respond: Response object from slack_bolt. 44 | :param client: Slack API client. 45 | """ 46 | await ack() 47 | 48 | details = extract_feedback_details(body) 49 | if not details: 50 | return 51 | 52 | request_id = details["value"].split(":")[0] 53 | await submit_feedback(request_id, False, source_info={"type": "slack", "user": details["user"]}) 54 | 55 | async with TaskGroup() as tg: 56 | tg.create_task(_send_response(details, respond)) 57 | tg.create_task(_update_reaction(details, client)) 58 | 59 | 60 | async def _send_response(details: dict[str, str], respond: AsyncRespond) -> None: 61 | """ 62 | Send a response back to the user in Slack. 63 | 64 | :param details: The details extracted from the Slack body. 65 | :param respond: Response object from slack_bolt. 66 | """ 67 | await respond( 68 | f"😥 Thank you for your feedback, <@{details['user']}>!", 69 | thread_ts=details["thread_ts"], 70 | replace_original=False, 71 | response_type="in_channel", 72 | ) 73 | 74 | 75 | async def _update_reaction(details: dict[str, str], client: AsyncWebClient) -> None: 76 | """ 77 | Add a 'thumbsdown' reaction and remove the 'thumbsup' reaction from the original message. 78 | 79 | :param details: The details extracted from the Slack body. 80 | :param client: Slack API client. 81 | """ 82 | try: 83 | await client.reactions_add(name="thumbsdown", channel=details["channel"], timestamp=details["message_ts"]) 84 | except SlackApiError as e: 85 | # ignore the error if the reaction already exists 86 | if e.response["error"] != "already_reacted": 87 | raise e 88 | 89 | try: 90 | await client.reactions_remove(name="thumbsup", channel=details["channel"], timestamp=details["message_ts"]) 91 | except Exception as e: 92 | logger.debug("Failed to remove thumbsup reaction: %s", e) 93 | -------------------------------------------------------------------------------- /api/ask_astro/slack/controllers/feedback/good.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from asyncio import TaskGroup 4 | from logging import getLogger 5 | from typing import Any 6 | 7 | from slack_bolt.async_app import AsyncAck, AsyncRespond 8 | from slack_sdk.errors import SlackApiError 9 | from slack_sdk.web.async_client import AsyncWebClient 10 | 11 | from ask_astro.services.feedback import submit_feedback 12 | 13 | logger = getLogger(__name__) 14 | 15 | 16 | def extract_feedback_details(body: dict[str, Any]) -> dict[str, str] | None: 17 | """ 18 | Extract necessary details from Slack body for feedback processing. 19 | 20 | :param body: The slack event body. 21 | """ 22 | try: 23 | return { 24 | "user": body["user"]["id"], 25 | "channel": body["channel"]["id"], 26 | "thread_ts": body["message"]["thread_ts"], 27 | "message_ts": body["message"]["ts"], 28 | "value": body["actions"][0]["value"], 29 | } 30 | except KeyError as e: 31 | logger.error("Missing key: %s", e) 32 | return None 33 | 34 | 35 | async def handle_feedback_good( 36 | body: dict[str, Any], ack: AsyncAck, respond: AsyncRespond, client: AsyncWebClient 37 | ) -> None: 38 | """ 39 | Handle positive feedback received from Slack and send appropriate responses. 40 | 41 | :param body: The slack event body. 42 | :param ack: Acknowledgement object from slack_bolt. 43 | :param respond: Response object from slack_bolt. 44 | :param client: Slack API client. 45 | """ 46 | await ack() 47 | 48 | details = extract_feedback_details(body) 49 | if not details: 50 | return 51 | 52 | request_id = details["value"].split(":")[0] 53 | await submit_feedback(request_id, True, source_info={"type": "slack", "user": details["user"]}) 54 | 55 | async with TaskGroup() as tg: 56 | tg.create_task(_send_response(details, respond)) 57 | tg.create_task(_update_reaction(details, client)) 58 | 59 | 60 | async def _send_response(details: dict[str, str], respond: AsyncRespond) -> None: 61 | """ 62 | Send a positive response back to the user in Slack. 63 | 64 | :param details: The details extracted from the Slack body. 65 | :param respond: Response object from slack_bolt. 66 | """ 67 | await respond( 68 | f"☺️ Thank you for your feedback, <@{details['user']}>!", 69 | thread_ts=details["thread_ts"], 70 | replace_original=False, 71 | response_type="in_channel", 72 | ) 73 | 74 | 75 | async def _update_reaction(details: dict[str, str], client: AsyncWebClient) -> None: 76 | """ 77 | Add a 'thumbsup' reaction and remove the 'thumbsdown' reaction from the original message. 78 | 79 | :param details: The details extracted from the Slack body. 80 | :param client: Slack API client. 81 | """ 82 | try: 83 | await client.reactions_add(name="thumbsup", channel=details["channel"], timestamp=details["message_ts"]) 84 | except SlackApiError as e: 85 | # ignore the error if the reaction already exists 86 | if e.response["error"] != "already_reacted": 87 | raise e 88 | 89 | try: 90 | await client.reactions_remove(name="thumbsdown", channel=details["channel"], timestamp=details["message_ts"]) 91 | except Exception as e: 92 | logger.debug("Failed to remove thumbsdown reaction: %s", e) 93 | -------------------------------------------------------------------------------- /api/ask_astro/slack/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | import re 5 | from typing import Any 6 | 7 | import jinja2 8 | 9 | 10 | def markdown_to_slack(message: str) -> str: 11 | """ 12 | Convert markdown formatted text to Slack's message format. 13 | 14 | :param message: A string containing markdown formatted text. 15 | """ 16 | 17 | regexp_replacements = ( 18 | (re.compile("^- ", flags=re.M), "• "), 19 | (re.compile("^ - ", flags=re.M), " ◦ "), 20 | (re.compile("^ - ", flags=re.M), " ⬩ "), # ◆ 21 | (re.compile("^ - ", flags=re.M), " ◽ "), 22 | (re.compile("^#+ (.+)$", flags=re.M), r"*\1*"), 23 | (re.compile(r"\*\*"), "*"), 24 | (re.compile(r"\[(.+)]\((.+)\)"), r"<\2|\1>"), 25 | (re.compile("```\\S+\\n"), r"```\n"), 26 | ) 27 | 28 | for regex, replacement in regexp_replacements: 29 | message = regex.sub(replacement, message) 30 | 31 | return message 32 | 33 | 34 | def get_blocks(block: str, **kwargs: Any) -> list[dict[str, Any]]: 35 | """ 36 | Retrieve a list of Slack blocks by rendering a Jinja2 template. 37 | 38 | :param block: Name of the Jinja2 template to render. 39 | :param kwargs: Arguments to be passed to the Jinja2 template. 40 | """ 41 | env = jinja2.Environment(loader=jinja2.FileSystemLoader("ask_astro/templates"), autoescape=True) 42 | return json.loads(env.get_template(block).render(kwargs))["blocks"] 43 | -------------------------------------------------------------------------------- /api/ask_astro/stores/__init__.py: -------------------------------------------------------------------------------- 1 | """Re-exports stores for easier importing.""" 2 | -------------------------------------------------------------------------------- /api/ask_astro/stores/oauth_state_store.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import datetime 4 | import logging 5 | from datetime import timedelta 6 | from logging import Logger 7 | 8 | import google.cloud.firestore 9 | from slack_sdk.oauth.state_store.async_state_store import AsyncOAuthStateStore 10 | 11 | 12 | class AsyncFirestoreOAuthStateStore(AsyncOAuthStateStore): 13 | """An async state store backed by Firestore for Slack OAuth flows.""" 14 | 15 | def __init__( 16 | self, 17 | *, 18 | collection: str, 19 | expiration_seconds: int, 20 | client_id: str | None = None, 21 | logger: Logger = logging.getLogger(__name__), 22 | ): 23 | """ 24 | Initialize the state store with given parameters. 25 | 26 | :param collection: Firestore collection name. 27 | :param expiration_seconds: Duration in seconds before a state becomes expired. 28 | :param client_id: The client ID for Slack OAuth. Default is None. 29 | :param logger: Logger instance. Defaults to the module's logger. 30 | """ 31 | firestore_client = google.cloud.firestore.AsyncClient() 32 | self.collection = firestore_client.collection(collection) 33 | self.expiration_seconds = expiration_seconds 34 | 35 | self.client_id = client_id 36 | self._logger = logger 37 | 38 | @property 39 | def logger(self) -> Logger: 40 | """Logger property. If `_logger` is None, it initializes a new logger.""" 41 | if self._logger is None: 42 | self._logger = logging.getLogger(__name__) 43 | return self._logger 44 | 45 | async def async_issue(self, *args, **kwargs) -> str: 46 | """Issue a new OAuth state and store it in Firestore.""" 47 | doc_ref = self.collection.document() 48 | await doc_ref.set({"timestamp": datetime.datetime.now().astimezone()}) 49 | return doc_ref.id 50 | 51 | async def async_consume(self, state: str) -> bool: 52 | """ 53 | Consume the OAuth state by verifying its validity. 54 | 55 | :param state: The state string to verify. 56 | """ 57 | doc_ref = self.collection.document(state) 58 | created = (await doc_ref.get()).get("timestamp") 59 | 60 | if created: 61 | expiration = created + timedelta(seconds=self.expiration_seconds) 62 | still_valid: bool = datetime.datetime.now().astimezone() < expiration 63 | return still_valid 64 | 65 | return False 66 | -------------------------------------------------------------------------------- /api/ask_astro/templates/app_home.jinja2: -------------------------------------------------------------------------------- 1 | { 2 | "type": "home", 3 | "blocks": [ 4 | { 5 | "type": "header", 6 | "text": { 7 | "type": "plain_text", 8 | "text": "Welcome to Ask Astro!" 9 | } 10 | }, 11 | { 12 | "type": "section", 13 | "text": { 14 | "type": "mrkdwn", 15 | "text": "Ask Astro is a powerful LLM-powered chat bot designed to provide you with instant answers to your questions about Astronomer, Airflow, and Astro. Ask Astro leverages cutting-edge language models to deliver accurate and helpful responses, enhancing customer support and enabling efficient self-service." 16 | } 17 | }, 18 | { 19 | "type": "divider" 20 | }, 21 | { 22 | "type": "header", 23 | "text": { 24 | "type": "plain_text", 25 | "text": "Key Features" 26 | } 27 | }, 28 | { 29 | "type": "section", 30 | "text": { 31 | "type": "mrkdwn", 32 | "text": "*Instant Responses*\n\nAsk Astro uses natural language processing to understand customer queries and provide real-time responses, eliminating the need for customers to wait for human assistance.\n\n*Extensive Knowledge Base*\n\nAsk Astro is trained on a vast dataset, allowing it to answer a wide range of questions about Astronomer, Airflow, and Astro, including features, use cases, troubleshooting, and best practices.Seamless Integration: The chat bot is integrated directly into Slack, our popular collaboration platform, ensuring customers can access Ask Astro effortlessly within their existing workflow.\n\n*Intuitive User Experience*\n\nAsk Astro is designed with a user-friendly interface, making it easy for customers to interact with the chat bot and find the information they need quickly and efficiently.\n\n*Continuous Learning*\n\nThe LLM powering Ask Astro is constantly updated and fine-tuned based on customer interactions and feedback, ensuring improved accuracy and relevance over time." 33 | } 34 | }, 35 | { 36 | "type": "divider" 37 | }, 38 | { 39 | "type": "header", 40 | "text": { 41 | "type": "plain_text", 42 | "text": "Disclaimer & Limitations" 43 | } 44 | }, 45 | { 46 | "type": "section", 47 | "text": { 48 | "type": "mrkdwn", 49 | "text": "*Contextual Understanding*\n\nWhile Ask Astro excels at understanding context to a significant extent, it may occasionally misinterpret complex or ambiguous queries. In such cases, it’s recommended to provide additional clarifications or reach out to our human support team for further assistance.\n\n*Bias and Inaccuracy*\n\nLanguage models can be influenced by biases present in the training data. Despite our efforts to mitigate biases, Ask Astro may inadvertently reflect biases in its responses. We continuously monitor and address these issues, but user feedback is invaluable in helping us improve.\n\n*Non-Expert Advice*\n\nWhile Ask Astro is designed to provide accurate and helpful information, it should not be considered a substitute for expert advice or professional consultation. In complex or critical scenarios, it’s always recommended to consult our support team or relevant experts." 50 | } 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /api/ask_astro/templates/combine_docs_sys_prompt_slack.txt: -------------------------------------------------------------------------------- 1 | You are Ask Astro, a friendy and helpful bot. 2 | Only answer questions related to Astronomer, the Astro platform and Apache Airflow. If the question is not related to these topics, answer "Sorry I can only help with questions regarding Airflow, Astronomer and the Astro platform". 3 | If the question relates to pricing, licensing, or commercial usage, ask the user to contact support at www.astronomer.io/contact. 4 | Only include hyperlinks or URLs if they from the supplied context. 5 | Be concise and precise in your answers and do not apologize. 6 | Format your response using Slack syntax. 7 | Surround text with SINGLE * to format it in bold or provide emphasis. Examples: GOOD: *This is bold!*. BAD: **This is bold!**. 8 | Support text with _ to format it in italic. Example: _This is italic._ 9 | Use the • character for unnumbered lists. 10 | Use the ` character to surround inline code. Example: This is a sentence with some `inline *code*` in it. 11 | Use ``` to surround multi-line code blocks. Do not specify a language in code blocks. Examples: GOOD: ```This is a code block\nAnd it is multi-line``` BAD: ```python print("Hello world!")```. 12 | Format links using this format: . Examples: GOOD: . BAD: [This message *is* a link](https://www.example.com). 13 | 14 | You must refer to the following pieces of context documents below "----------------" to answer the users question. Each document starts with "Document X" where X is the document number and ends with "===End of Document===". 15 | If the context documents are not helpful enough, you came up with any parts of your answers on your own, or your answer contains information not directly in the context documents, prefix the answer with "I cannot find documents that are directly helpful with your question, but I provided my best guess below. Please use caution as the answer below is more likely to contain incorrect information and your should always verify the answers with Astronomer support at www.astronomer.io/contact". 16 | For example, if the question is "Does astronomer have a CLI", with context documents all about airflow CLI instead, the correct response would be "I cannot find documents that are directly helpful with your question, but I provided my best guess below. Please use caution as the answer below is more likely to contain incorrect information and your should always verify the answers with Astronomer support at www.astronomer.io/contact.\n Yes, astro does have a CLI.". 17 | 18 | To ensure your response answers are factual, for each chunk of statement in your response that is from a document in the context provided below, cite the source by adding an embedded slack hyperlink document link to "[document number]"" at the end of the statement. For slack syntax, this would look like . 19 | Do not add citations to the answer if your answer does not use the document's information. 20 | For example, if the statement in the context is "The sky is blue.", the source document is located at https://www.example.com, and the document number is 1, the response would be "The sky is blue .". 21 | A bad example would be if the answer generated is "The sky is red" without any citations to the context documents. 22 | Another bad example would be if the answer generated is "The sky is blue" with a citation to document 2, as the statement is not present in document 2. 23 | 24 | 12 character words that start with "<@U" and end with ">" are usernames. Example: <@U024BE7LH>. 25 | ---------------- 26 | {context} 27 | -------------------------------------------------------------------------------- /api/ask_astro/templates/combine_docs_sys_prompt_webapp.txt: -------------------------------------------------------------------------------- 1 | You are Ask Astro, a friendy and helpful bot. 2 | Only answer questions related to Astronomer, the Astro platform and Apache Airflow. If the question is not related to these topics, answer "Sorry I can only help with questions regarding Airflow, Astronomer and the Astro platform". 3 | If the question relates to pricing, licensing, or commercial usage, ask the user to contact support at www.astronomer.io/contact. 4 | Only include hyperlinks or URLs if they from the supplied context. 5 | Be concise and precise in your answers, and do not apologize. 6 | Format your response using Markdown syntax. 7 | Use the ` character to surround inline code. Use ``` to surround multi-line code blocks. Do not specify a language in code blocks. Examples: GOOD: ```This is a code block\nAnd it is multi-line``` BAD: ```python print("Hello world!")```. 8 | Format links using this format: [Text to display](URL). Examples: GOOD: [This message **is** a link](https://www.example.com). BAD: . 9 | 10 | You must refer to the following pieces of context documents below "----------------" to answer the users question. Each document starts with "Document X" where X is the document number and ends with "===End of Document===". 11 | If the context documents are not helpful enough, you came up with any parts of your answers on your own, or your answer contains information not directly in the context documents, prefix the answer with "I cannot find documents that are directly helpful with your question, but I provided my best guess below. Please use caution as the answer below is more likely to contain incorrect information and your should always verify the answers with Astronomer support at www.astronomer.io/contact". 12 | For example, if the question is "Does astronomer have a CLI", with context documents all about airflow CLI instead, the correct response would be "I cannot find documents that are directly helpful with your question, but I provided my best guess below. Please use caution as the answer below is more likely to contain incorrect information and your should always verify the answers with Astronomer support at www.astronomer.io/contact.\n Yes, astro does have a CLI.". 13 | 14 | To ensure your response answers are factual, for each chunk of statement in your response that is from a document in the context provided below, cite the source by adding [document number](document link) at the end of the statement. The citation document number should be double wrapped in [] for markdown syntax. 15 | Do not add citations to the answer if your answer does not use the document's information. 16 | For example, if the statement in the context is "The sky is blue.", the source document is located at https://www.example.com, and the document number is 1, the response would be "The sky is blue [[1]](https://www.example.com).". 17 | A bad example would be if the answer generated is "The sky is red" without any citations to the context documents. 18 | Another bad example would be if the answer generated is "The sky is blue" with a citation to document 2, as the statement is not present in document 2. 19 | 20 | 12 character words that start with "<@U" and end with ">" are usernames. Example: <@U024BE7LH>. 21 | ---------------- 22 | {context} 23 | -------------------------------------------------------------------------------- /api/ask_astro/templates/message.jinja2: -------------------------------------------------------------------------------- 1 | { 2 | "blocks": [ 3 | { 4 | "type": "section", 5 | "text": { 6 | "type": "mrkdwn", 7 | "text": {{ message | tojson }} 8 | } 9 | }, 10 | {% if sources %} 11 | { 12 | "type": "section", 13 | "text": { 14 | "type": "mrkdwn", 15 | "text": {{ sources | tojson }} 16 | } 17 | }, 18 | {% endif %} 19 | { 20 | "type": "context", 21 | "elements": [ 22 | { 23 | "type": "plain_text", 24 | "text": "⚠️ AI-generated content may be incorrect", 25 | "emoji": true 26 | } 27 | ] 28 | }, 29 | { 30 | "type": "actions", 31 | "elements": [ 32 | { 33 | "type": "button", 34 | "style": "primary", 35 | "text": { 36 | "type": "plain_text", 37 | "text": "👍", 38 | "emoji": true 39 | }, 40 | "value": "{{ feedback_value }}", 41 | "action_id": "feedback_good" 42 | }, 43 | { 44 | "type": "button", 45 | "style": "danger", 46 | "text": { 47 | "type": "plain_text", 48 | "text": "👎", 49 | "emoji": true 50 | }, 51 | "value": "{{ feedback_value }}", 52 | "action_id": "feedback_bad" 53 | }{% if show_customer_actions %}, 54 | { 55 | "type": "button", 56 | "text": { 57 | "type": "plain_text", 58 | "text": "🔧 Open a support case", 59 | "emoji": true 60 | }, 61 | "value": "feedback_support", 62 | "action_id": "feedback_support" 63 | }, 64 | { 65 | "type": "button", 66 | "text": { 67 | "type": "plain_text", 68 | "text": "📅 Book an office hour", 69 | "emoji": true 70 | }, 71 | "value": "feedback_morehelp", 72 | "action_id": "feedback_morehelp", 73 | "url": "https://calendly.com/d/yy2-tvp-xtv/astro-data-engineering-office-hours-ade" 74 | }{% endif %} 75 | ] 76 | } 77 | ] 78 | } 79 | -------------------------------------------------------------------------------- /api/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: gcr.io/kaniko-project/executor 3 | args: 4 | - --context=/workspace/api/ 5 | - --destination=$_AR_HOSTNAME/$PROJECT_ID/cloud-run-source-deploy/$REPO_NAME/$_SERVICE_NAME:$COMMIT_SHA 6 | - --cache=true 7 | - --cache-ttl=8h 8 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk:slim" 9 | args: 10 | - run 11 | - services 12 | - update 13 | - $_SERVICE_NAME 14 | - "--platform=managed" 15 | - >- 16 | --image=$_AR_HOSTNAME/$PROJECT_ID/cloud-run-source-deploy/$REPO_NAME/$_SERVICE_NAME:$COMMIT_SHA 17 | - >- 18 | --labels=managed-by=gcp-cloud-build-deploy-cloud-run,commit-sha=$COMMIT_SHA,gcb-build-id=$BUILD_ID,gcb-trigger-id=$_TRIGGER_ID 19 | - "--region=$_DEPLOY_REGION" 20 | - "--quiet" 21 | id: Deploy 22 | entrypoint: gcloud 23 | options: 24 | substitutionOption: ALLOW_LOOSE 25 | substitutions: 26 | _TRIGGER_ID: 38d07b2e-552a-4bda-9cc4-4ab31fef8938 27 | _PLATFORM: managed 28 | _SERVICE_NAME: ask-astro-dev 29 | _DEPLOY_REGION: us-central1 30 | _AR_HOSTNAME: us-central1-docker.pkg.dev 31 | tags: 32 | - gcp-cloud-build-deploy-cloud-run 33 | - gcp-cloud-build-deploy-cloud-run-managed 34 | - ask-astro-dev 35 | -------------------------------------------------------------------------------- /api/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "ask-astro" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Philippe Gagnon <12717218+pgagnon@users.noreply.github.com>"] 6 | packages = [{include = "ask_astro"}] 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.11" 10 | typing-extensions = "*" 11 | pydash = "*" 12 | aiohttp = {version = "*", extras=["speedups"]} 13 | sanic = {extras = ["ext"], version = "^23.6.0"} 14 | slack_bolt = "*" 15 | langchain = "*" 16 | openai = "*" 17 | faiss-cpu = "*" 18 | firebase-admin = "*" 19 | jinja2 = "*" 20 | zenpy = "*" 21 | asyncstdlib = "*" 22 | aiostream = "*" 23 | tiktoken = "*" 24 | aiofiles = "^23.2.1" 25 | weaviate-client = "*" 26 | pydantic = "^2.3.0" 27 | gunicorn = "^21.2.0" 28 | uvicorn = "^0.23.2" 29 | tenacity = "^8.2.3" 30 | cohere = "==4.39" 31 | sanic_limiter = { git = "https://github.com/Omegastick/sanic-limiter.git"} 32 | 33 | [tool.poetry.group.dev.dependencies] 34 | pytest = "^7.4.2" 35 | pytest-asyncio = "^0.21.1" 36 | pytest-sanic = "^1.0.0" 37 | sanic-testing = "^23.5.0" 38 | 39 | [build-system] 40 | requires = ["poetry-core"] 41 | build-backend = "poetry.core.masonry.api" 42 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/DAG.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/_static/DAG.png -------------------------------------------------------------------------------- /docs/_static/feedback-loops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/_static/feedback-loops.png -------------------------------------------------------------------------------- /docs/_static/images/monitoring/airflow_dags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/_static/images/monitoring/airflow_dags.png -------------------------------------------------------------------------------- /docs/_static/images/monitoring/api_swagger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/_static/images/monitoring/api_swagger.png -------------------------------------------------------------------------------- /docs/_static/images/monitoring/langsmith1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/_static/images/monitoring/langsmith1.png -------------------------------------------------------------------------------- /docs/_static/images/monitoring/latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/_static/images/monitoring/latency.png -------------------------------------------------------------------------------- /docs/_static/images/monitoring/slack_alerts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/_static/images/monitoring/slack_alerts.png -------------------------------------------------------------------------------- /docs/_static/ingestion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/_static/ingestion.png -------------------------------------------------------------------------------- /docs/_static/prompt-orchestration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/_static/prompt-orchestration.png -------------------------------------------------------------------------------- /docs/api/README.md: -------------------------------------------------------------------------------- 1 | # Ask Astro Backend API 2 | 3 | ## Prerequisites 4 | 5 | - Install [Docker](https://docs.docker.com/engine/install/) 6 | - Access to Firestore 7 | - Access to Langchain 8 | - [Setup Local Development Tools](../local_development.md) 9 | 10 | ### Setup local development environment 11 | 12 | Generate Env Variable template and add appropriate values 13 | 14 | ```bash 15 | python3 scripts/local_dev.py api-env-template 16 | ``` 17 | 18 | #### Run with poetry 19 | * Initialize the ask-astro API local poetry environment 20 | 21 | ![api.init-poetry-env](../_static/images/task_help_message/api-init-poetry-env.svg) 22 | 23 | * Run ask-astro API server with poetry 24 | 25 | ![api.run-with-poetry](../_static/images/task_help_message/api-run-with-poetry.svg) 26 | 27 | ### Run with Docker 28 | 29 | * Run ask-astro API server with docker 30 | 31 | ![api.run-with-docker](../_static/images/task_help_message/api-run-with-docker.svg) 32 | 33 | * Stop ask-astro API server container 34 | 35 | ![api.stop-container](../_static/images/task_help_message/api-stop-container.svg) 36 | -------------------------------------------------------------------------------- /docs/api/cloudbuild_and_run.md: -------------------------------------------------------------------------------- 1 | # Backend API CI/CD 2 | 3 | Currently, Ask Astro uses [Google cloud run](https://cloud.google.com/run/docs/overview/what-is-cloud-run) to run the API server, 4 | and deploys the backend API with [cloud build](https://cloud.google.com/build/docs/overview). You can find the cloud 5 | build configuration in [`/api/cloudbuild.yaml`](../../api/cloudbuild.yaml). 6 | 7 | ## Create cloud run service 8 | 9 | These steps create a cloud service and template for your cloud build on the Google Cloud Platform. 10 | 11 | 1. Go to `https://console.cloud.google.com/`, and search for the **Cloud Run** service. 12 | 2. On the **Cloud Run** page, click on **CREATE SERVICE**. 13 | 14 | ![create cloud run service](../images/cloud_run_home.png) 15 | 16 | 3. Select **Continuously deploy new revision from source repository** and **Set up with cloud build** 17 | 4. Select Repository provider as **GitHub** and click **Next**. 18 | 19 | ![create cloud build config](../images/cloud_run_ci_deploy.png) 20 | 21 | 5. Set up the build configuration: 22 | - Branch name: `main` 23 | - Build type: Python via buildpacks 24 | - Build context directory: `/api` 25 | 26 | ![create cloud build config](../images/cloud_run_build_config.png) 27 | 28 | 6. Add **Service name**, **Region**, **CPU allocation and pricing**, **Autoscaling**, and **Ingress**. 29 | 30 | ![create cloud build config](../images/cloud_run_config1.png) 31 | 32 | 7. Allow **Unauthenticated invocation** and click on **Create**. 33 | 34 | ![create cloud build config](../images/cloud_run_create.png) 35 | 36 | 8. Add the required Environment variables to run the backend API. 37 | 1. After you create the service, visit the **Created service** page 38 | 2. Click on **EDIT AND DEPLOY NEW REVISION** 39 | 3. Add you Environment variable and click **Deploy** 40 | 41 | ![cloud run edit](../images/cloud_run_edit.png) 42 | 43 | ![create cloud build config](../images/cloud_run_add_env.png) 44 | 45 | ## Update cloud build configuration 46 | 47 | 1. Go to `https://console.cloud.google.com/` and search for the **cloud build** service. 48 | 49 | ![cloud build trigger](../images/cloud_build_trigger.png) 50 | 51 | 2. Click on **Triggers**. When you created a **Cloud Run Service**, it also made a **Cloud Trigger Template** that you can edit. 52 | 4. Click on the cloud build trigger template and update the following information: 53 | 1. **Name**, **Description**, **Tag**, and **Event** 54 | ![cloud build config1](../images/cloud_build_config1.png) 55 | 2. Select your **Source** and **Branch**. 56 | ![cloud build config2](../images/cloud_build_config2.png) 57 | 3. In Configuration, select **Cloud Build configuration file** and define the Cloud Build configuration file location as `api/cloudbuild.yaml`. 58 | 4. Don't make any changes to the advance settings and click **Save**. 59 | ![cloud build config3](../images/cloud_build_config3.png) 60 | 61 | 5. In settings, enable **Cloud build** and **Cloud run**. 62 | 63 | ![cloud build setting](../images/cloud_build_setting.png) 64 | -------------------------------------------------------------------------------- /docs/api/setup_slack_bot.md: -------------------------------------------------------------------------------- 1 | # Create an ask-astro Slack bot 2 | 3 | 1. Go to `https://api.slack.com/apps`. 4 | 2. Click **Create New Apps**. 5 | ![1-create-new-app](static/1-create-new-app.png) 6 | 3. On the **Create an app** page, click **From scratch**. 7 | 4. Name the app and pick a Slack workspace 8 | ![3-name-app](static/3-name-app.png) 9 | After creating the Slack app, Slack redirects you to the home page of this app. 10 | ![4-app-home-page](static/4-app-home-page.png) 11 | 5. Scroll down, to find the credentials for the ask-astro API backend 12 | ![5-credentials](static/5-credentials.png) 13 | * `SLACK_CLIENT_ID`: Client ID 14 | * `SLACK_CLIENT_SECRET`: Client Secret 15 | * `SLACK_SIGNING_SECRET`: Signing Secret 16 | 6. Start the ask-astro API backend by using a reachable host, such as [ngrok](https://ngrok.com/). 17 | 7. Go to the [OAuth & Permissions](https://api.slack.com/apps//oauth?) page. 18 | 8. Add `https:///slack/oauth_redirect` to **Redirect URLs**. 19 | ![6-redirect-url](static/6-redirect-url.png) 20 | 9. Add the following scopes. 21 | * commands 22 | * app_mentions:read 23 | * channels:read 24 | * channels:history 25 | * groups:read 26 | * groups:history 27 | * chat:write 28 | * reactions:read 29 | * reactions:write 30 | * users:read 31 | * users:read.email 32 | * team:read 33 | * im:history 34 | * mpim:history 35 | * files:read 36 | ![7-scope](static/7-scope.png) 37 | 10. Go to **Event Subscriptions** page for your App. (`https://api.slack.com/apps//event-subscriptions?`) 38 | 11. Set **Request URL** to `https:///slack/events`. 39 | ![8-event-subscription](static/8-event-subscription.png) 40 | 12. Go to the **Interactivity & Shortcuts** page. 41 | 13. Set **Request URL** to `https:///slack/events`. 42 | 14. Go to `https:///slack/install` and click **Add to Slack**. 43 | ![9-slack-install](static/9-slack-install.png) 44 | 45 | The ask-astro bot is now available to use in your Slack work space! 46 | -------------------------------------------------------------------------------- /docs/api/static/1-create-new-app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/api/static/1-create-new-app.png -------------------------------------------------------------------------------- /docs/api/static/2-create-an-app-from-scratch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/api/static/2-create-an-app-from-scratch.png -------------------------------------------------------------------------------- /docs/api/static/3-name-app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/api/static/3-name-app.png -------------------------------------------------------------------------------- /docs/api/static/4-app-home-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/api/static/4-app-home-page.png -------------------------------------------------------------------------------- /docs/api/static/5-credentials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/api/static/5-credentials.png -------------------------------------------------------------------------------- /docs/api/static/6-redirect-url.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/api/static/6-redirect-url.png -------------------------------------------------------------------------------- /docs/api/static/7-scope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/api/static/7-scope.png -------------------------------------------------------------------------------- /docs/api/static/8-event-subscription.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/api/static/8-event-subscription.png -------------------------------------------------------------------------------- /docs/api/static/9-slack-install.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/api/static/9-slack-install.png -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = "ask-astro" 10 | copyright = "2023, astronomer.io" # noqa: A001 11 | author = "astronomer.io" 12 | release = "0.0.1" 13 | 14 | # -- General configuration --------------------------------------------------- 15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 16 | root_doc = "index" 17 | extensions = ["myst_parser"] 18 | 19 | templates_path = ["_templates"] 20 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 21 | 22 | 23 | # -- Options for HTML output ------------------------------------------------- 24 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 25 | 26 | html_theme = "sphinx_book_theme" 27 | html_static_path = ["_static"] 28 | -------------------------------------------------------------------------------- /docs/images/cloud_build_config1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_build_config1.png -------------------------------------------------------------------------------- /docs/images/cloud_build_config2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_build_config2.png -------------------------------------------------------------------------------- /docs/images/cloud_build_config3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_build_config3.png -------------------------------------------------------------------------------- /docs/images/cloud_build_setting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_build_setting.png -------------------------------------------------------------------------------- /docs/images/cloud_build_trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_build_trigger.png -------------------------------------------------------------------------------- /docs/images/cloud_run_add_env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_run_add_env.png -------------------------------------------------------------------------------- /docs/images/cloud_run_build_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_run_build_config.png -------------------------------------------------------------------------------- /docs/images/cloud_run_ci_deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_run_ci_deploy.png -------------------------------------------------------------------------------- /docs/images/cloud_run_config1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_run_config1.png -------------------------------------------------------------------------------- /docs/images/cloud_run_create.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_run_create.png -------------------------------------------------------------------------------- /docs/images/cloud_run_edit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_run_edit.png -------------------------------------------------------------------------------- /docs/images/cloud_run_home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/docs/images/cloud_run_home.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} README.md 2 | ``` 3 | 4 | ```{toctree} 5 | :hidden: 6 | 7 | local_development.md 8 | monitoring.md 9 | ``` 10 | 11 | 12 | ```{toctree} 13 | :hidden: 14 | :caption: 💻 Develop UI 15 | 16 | ui/README.md 17 | ``` 18 | 19 | ```{toctree} 20 | :hidden: 21 | :caption: 💻 Develop Backend Server 22 | 23 | api/README.md 24 | api/cloudbuild_and_run.md 25 | api/google_firestore.md 26 | api/setup_slack_bot.md 27 | ``` 28 | 29 | ```{toctree} 30 | :hidden: 31 | :caption: 💻 Develop Apache Airflow® DAGs 32 | 33 | airflow/README.md 34 | ``` 35 | -------------------------------------------------------------------------------- /docs/local_development.md: -------------------------------------------------------------------------------- 1 | # Setup Local Development Tool 2 | 3 | [TOC] 4 | 5 | ## Prerequisites 6 | 7 | - [Python 3.11](https://www.python.org/downloads/release/python-3116/) 8 | - [Install poetry](https://python-poetry.org/docs/#installation) 9 | 10 | 11 | ## Setup project root poetry envinroment for development tools 12 | 13 | ```sh 14 | $ pwd 15 | 16 | /.../ask-astro 17 | 18 | $ python --version 19 | 20 | Python 3.11.x 21 | 22 | # install poetry (https://python-poetry.org/docs/#installation) 23 | $ python -m pip install poetry 24 | $ poetry install 25 | ``` 26 | 27 | ![list_tasks](_static/images/task_help_message/list-tasks.svg) 28 | 29 | ### Documentation Tasks 30 | 31 | - Build sphinx docs 32 | 33 | ![docs.build](_static/images/task_help_message/docs-build.svg) 34 | 35 | - Serve the docs locally (http://127.0.0.1:8000) 36 | 37 | ![docs.serve](_static/images/task_help_message/docs-serve.svg) 38 | 39 | - Generate the screenshot for help message on each tasks 40 | 41 | ![docs.generate-tasks-help-screenshot](_static/images/task_help_message/docs-generate-tasks-help-screenshot.svg) 42 | 43 | ### Apache Airflow® DAGs Tasks 44 | 45 | - Run ask-astro airflow 46 | 47 | ![airflow.run](_static/images/task_help_message/airflow-run.svg) 48 | 49 | - Stop ask-astro airflow 50 | 51 | ![airflow.stop](_static/images/task_help_message/airflow-stop.svg) 52 | 53 | ### Backend API Tasks 54 | 55 | - Go to [Ask Astro Backend API](./api/README.md) 56 | 57 | ### UI Tasks 58 | 59 | - Go to [Ask-Astro UI](./ui/README.md) 60 | 61 | 62 | ### Run linters 63 | 64 | - Run pre-commit checks 65 | 66 | ![run-pre-commit](_static/images/task_help_message/run-pre-commit.svg) 67 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/ui/README.md: -------------------------------------------------------------------------------- 1 | # Ask-Astro UI 2 | 3 | ## Developing 4 | 5 | ### Initialize UI server dependencies 6 | 7 | ![ui.init](../_static/images/task_help_message/ui-init.svg) 8 | 9 | ### Run UI server 10 | 11 | Add backend API sever url in `ui/.env` file 12 | 13 | ```bash 14 | ASK_ASTRO_API_URL=http://0.0.0.0:8080 15 | ``` 16 | 17 | Run the development server: 18 | 19 | 20 | ![ui.run](../_static/images/task_help_message/ui-run.svg) 21 | 22 | ## Building 23 | 24 | To create a production version of your app: 25 | 26 | ```bash 27 | npm run build 28 | ``` 29 | 30 | You can preview the production build with `npm run preview`. 31 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "ask-astro" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Wei Lee "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.11" 10 | 11 | [tool.poetry.group.dev.dependencies] 12 | invoke = "^2.2.0" 13 | 14 | [tool.poetry.group.docs.dependencies] 15 | sphinx-book-theme = "^1.0.1" 16 | rich = "^13.6.0" 17 | myst-parser = "^2.0.0" 18 | 19 | [tool.poetry.group.pre-commit.dependencies] 20 | pre-commit = "^3.5.0" 21 | 22 | [build-system] 23 | requires = ["poetry-core"] 24 | build-backend = "poetry.core.masonry.api" 25 | 26 | 27 | [tool.black] 28 | line-length = 120 29 | target-version = ['py39'] 30 | 31 | 32 | [tool.ruff] 33 | line-length = 120 34 | 35 | # Enable Pyflakes `E` and `F` codes by default. 36 | extend-select = [ 37 | "W", # pycodestyle warnings 38 | "I", # isort 39 | # "C90", # Complexity # TODO: enable this and the score below 40 | # "B", # flake8-bugbear 41 | "C", # flake8-comprehensions 42 | # "ANN", # flake8-comprehensions 43 | "ISC", # flake8-implicit-str-concat 44 | "T10", # flake8-debugger 45 | "A", # flake8-builtins 46 | "UP", # pyupgrade 47 | ] 48 | extend-ignore = ["A002", "C901", "ISC001"] 49 | 50 | 51 | # Exclude a variety of commonly ignored directories. 52 | extend-exclude = [ 53 | "__pycache__", 54 | "docs/source/conf.py", 55 | ] 56 | target-version = "py39" 57 | fix = true 58 | 59 | [tool.ruff.per-file-ignores] 60 | 61 | # TODO: Decrease the complexity and fix the errors. 62 | # [mccabe] 63 | # max-complexity = 6 64 | 65 | [tool.ruff.isort] 66 | combine-as-imports = true 67 | -------------------------------------------------------------------------------- /tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from invoke import Collection, task 2 | from invoke.context import Context 3 | 4 | from tasks import airflow, api, docs, ui 5 | from tasks.common import project_root 6 | 7 | 8 | @task 9 | def run_pre_commit(ctx: Context) -> None: 10 | """Run pre-commit""" 11 | with ctx.cd(project_root): 12 | ctx.run("pre-commit run --all-files") 13 | 14 | 15 | ns = Collection() 16 | ns.add_collection(api) 17 | ns.add_collection(ui) 18 | ns.add_collection(airflow) 19 | ns.add_collection(docs) 20 | ns.add_task(run_pre_commit) 21 | -------------------------------------------------------------------------------- /tasks/airflow.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from invoke import task 4 | from invoke.context import Context 5 | 6 | from tasks.common import project_root 7 | 8 | airflow_root = project_root / Path("airflow") 9 | 10 | 11 | @task 12 | def run(ctx: Context) -> None: 13 | """Run ask-astro airflow""" 14 | with ctx.cd(airflow_root): 15 | print("Starting ask-astro airflow") 16 | ctx.run("astro dev start") 17 | 18 | 19 | @task 20 | def stop(ctx: Context) -> None: 21 | """Stop ask-astro airflow""" 22 | with ctx.cd(airflow_root): 23 | print("Starting ask-astro airflow") 24 | ctx.run("astro dev stop") 25 | -------------------------------------------------------------------------------- /tasks/common.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | project_root = Path(__file__).parent.parent.absolute() 4 | -------------------------------------------------------------------------------- /tasks/docs.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from invoke import task 4 | from invoke.context import Context 5 | from rich.console import Console 6 | 7 | from tasks.common import project_root 8 | 9 | docs_root = project_root / Path("docs") 10 | 11 | 12 | @task(help={"clean": "clean the docs before building"}) 13 | def build(ctx: Context, clean: bool = False) -> None: 14 | """Build sphinx docs""" 15 | with ctx.cd(docs_root): 16 | if clean: 17 | ctx.run("make clean") 18 | ctx.run("make html") 19 | 20 | 21 | @task(help={"rebuild": "clean and build the doc before serving"}) 22 | def serve(ctx: Context, rebuild: bool = False) -> None: 23 | """Serve the docs locally (http://127.0.0.1:8000)""" 24 | with ctx.cd(docs_root / Path("_build/html")): 25 | if rebuild: 26 | build(ctx, clean=True) 27 | ctx.run("python -m http.server") 28 | 29 | 30 | def _export_cmd_as_svg(ctx: Context, cmd: str, file_name: str) -> None: 31 | stdout = ctx.run(cmd, hide="both").stdout 32 | console = Console(record=True, width=80) 33 | console.print(f"$ {cmd}\n{stdout}") 34 | console.save_svg(file_name, title="") 35 | 36 | 37 | @task 38 | def generate_tasks_help_screenshot(ctx: Context) -> None: 39 | """Generate the screenshot for help message on each tasks""" 40 | with ctx.cd(project_root): 41 | image_dir = f"{project_root}/docs/_static/images/task_help_message/" 42 | list_tasks_cmd = "poetry run inv -l" 43 | _export_cmd_as_svg(ctx, list_tasks_cmd, f"{image_dir}/list-tasks.svg") 44 | 45 | stdout = ctx.run(list_tasks_cmd, hide="both").stdout 46 | task_names = [ 47 | message.strip().split()[0] 48 | for message in filter(lambda s: not s.startswith(" "), stdout.split("\n")[2:-2]) 49 | ] 50 | for task_name in task_names: 51 | cmd = f"poetry run inv {task_name} -h" 52 | file_name = task_name.replace(".", "-") 53 | _export_cmd_as_svg(ctx, cmd, f"{image_dir}/{file_name}.svg") 54 | -------------------------------------------------------------------------------- /tasks/ui.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from invoke import task 4 | from invoke.context import Context 5 | 6 | from tasks.common import project_root 7 | 8 | ui_root = project_root / Path("ui") 9 | 10 | 11 | @task(name="init") 12 | def init_task(ctx: Context) -> None: 13 | """Initialize UI server dependencies""" 14 | with ctx.cd(ui_root): 15 | ctx.run("npm install") 16 | 17 | 18 | @task(help={"init": "init UI dev env", "open_browser": "open the browser after running the server"}) 19 | def run(ctx: Context, init: bool = False, open_browser: bool = False) -> None: 20 | """Run UI server""" 21 | with ctx.cd(ui_root): 22 | if init: 23 | init_task(ctx) 24 | cmd = "npm run dev" 25 | if open_browser: 26 | cmd = f"{cmd} -- --open" 27 | ctx.run(cmd) 28 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/tests/__init__.py -------------------------------------------------------------------------------- /tests/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/tests/api/__init__.py -------------------------------------------------------------------------------- /tests/api/ask_astro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/tests/api/ask_astro/__init__.py -------------------------------------------------------------------------------- /tests/api/ask_astro/chains/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/tests/api/ask_astro/chains/__init__.py -------------------------------------------------------------------------------- /tests/api/ask_astro/chains/test_answer_questions.py: -------------------------------------------------------------------------------- 1 | from langchain.prompts import SystemMessagePromptTemplate 2 | 3 | 4 | def test_system_prompt_loading(): 5 | """Test if the system prompt is loaded correctly""" 6 | with open("ask_astro/templates/combine_docs_sys_prompt_webapp.txt") as fd: 7 | expected_template = fd.read() 8 | template_instance = SystemMessagePromptTemplate.from_template(expected_template) 9 | assert template_instance.prompt.template == expected_template 10 | -------------------------------------------------------------------------------- /tests/api/ask_astro/clients/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/tests/api/ask_astro/clients/__init__.py -------------------------------------------------------------------------------- /tests/api/ask_astro/clients/test_firestore.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | 4 | @patch("google.cloud.firestore.AsyncClient") 5 | def test_firestore_client_instance(mocked_async_client): 6 | """ 7 | Test that firestore_client is an instance of the AsyncClient class from the google.cloud.firestore library. 8 | """ 9 | from ask_astro.clients.firestore import firestore_client # noqa 10 | 11 | assert mocked_async_client.called, "AsyncClient was not instantiated" 12 | -------------------------------------------------------------------------------- /tests/api/ask_astro/clients/test_langsmith_.py: -------------------------------------------------------------------------------- 1 | from ask_astro.clients.langsmith_ import langsmith_client 2 | from langsmith import Client 3 | 4 | 5 | def test_langsmith_client_instance(): 6 | """ 7 | Test that the langsmith_client is an instance of the Client class from the langsmith library. 8 | """ 9 | assert isinstance( 10 | langsmith_client, Client 11 | ), "langsmith_client is not an instance of Client from the langsmith library" 12 | -------------------------------------------------------------------------------- /tests/api/ask_astro/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/tests/api/ask_astro/models/__init__.py -------------------------------------------------------------------------------- /tests/api/ask_astro/models/test_request.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | 3 | import pytest 4 | from ask_astro.models.request import AskAstroRequest, HumanMessage, Source 5 | 6 | 7 | # Define a fixture for AskAstroRequest 8 | @pytest.fixture 9 | def ask_astro_request_fixture(): 10 | """ 11 | Provides a fresh instance of AskAstroRequest with preset data 12 | for each test that needs it. 13 | """ 14 | request = AskAstroRequest( 15 | uuid=uuid4(), 16 | prompt="Test prompt", 17 | messages=[HumanMessage(content="Test message")], 18 | sources=[Source(name="Test Source", snippet="Test Snippet")], 19 | status="Test Status", 20 | client="Test", 21 | ) 22 | return request 23 | 24 | 25 | # Now use the fixture in your test functions 26 | def test_ask_astro_request_creation(ask_astro_request_fixture): 27 | request = ask_astro_request_fixture 28 | assert request is not None 29 | assert request.prompt == "Test prompt" 30 | assert request.status == "Test Status" 31 | 32 | 33 | def test_to_firestore(ask_astro_request_fixture): 34 | request = ask_astro_request_fixture 35 | firestore_dict = request.to_firestore() 36 | 37 | assert firestore_dict["prompt"] == "Test prompt" 38 | assert firestore_dict["status"] == "Test Status" 39 | assert len(firestore_dict["messages"]) == 1 40 | assert len(firestore_dict["sources"]) == 1 41 | 42 | 43 | def test_from_dict(): 44 | data = { 45 | "uuid": str(uuid4()), 46 | "prompt": "Test prompt", 47 | "messages": [{"content": "Test message", "type": "human"}], 48 | "sources": [{"name": "Test Source", "snippet": "Test Snippet"}], 49 | "status": "Test Status", 50 | "langchain_run_id": str(uuid4()), 51 | "score": 5, 52 | "sent_at": 123456789, 53 | "response": None, 54 | "client": "Test", 55 | } 56 | request = AskAstroRequest.from_dict(data) 57 | 58 | assert request is not None 59 | assert request.prompt == "Test prompt" 60 | assert request.status == "Test Status" 61 | assert len(request.messages) == 1 62 | assert isinstance(request.messages[0], HumanMessage) 63 | assert len(request.sources) == 1 64 | assert isinstance(request.sources[0], Source) 65 | assert request.client == "Test" 66 | -------------------------------------------------------------------------------- /tests/api/ask_astro/rest/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/tests/api/ask_astro/rest/__init__.py -------------------------------------------------------------------------------- /tests/api/ask_astro/rest/controllers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/ask-astro/7ed98ac98bc6ade6fa1f9cff93e87fe952f7d244/tests/api/ask_astro/rest/controllers/__init__.py -------------------------------------------------------------------------------- /tests/api/ask_astro/rest/controllers/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from sanic import Sanic 3 | from sanic_testing import TestManager 4 | 5 | 6 | @pytest.fixture 7 | def app() -> Sanic: 8 | """Fixture to create a new Sanic application for testing.""" 9 | sanitized_name = __name__.replace(".", "_") 10 | app_instance = Sanic(sanitized_name) 11 | TestManager(app_instance) 12 | 13 | from ask_astro.rest.controllers import register_routes 14 | 15 | register_routes(app_instance) 16 | 17 | return app_instance 18 | -------------------------------------------------------------------------------- /tests/api/ask_astro/rest/controllers/test_get_requests.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, Mock, patch 2 | from uuid import uuid4 3 | 4 | import pytest 5 | 6 | 7 | @pytest.mark.asyncio 8 | @pytest.mark.parametrize( 9 | "mock_exists, mock_data, expected_status, expected_response", 10 | [ 11 | (True, {"title": "Sample Question", "response": "abc"}, 200, {"title": "Sample Question", "response": "abc"}), 12 | (False, None, 404, {"error": "Question not found"}), 13 | (None, None, 500, {"error": "Internal Server Error"}), 14 | ], 15 | ) 16 | async def test_on_get_request(app, mock_exists, mock_data, expected_status, expected_response): 17 | """Test to validate get request behavior based on different Firestore responses.""" 18 | with patch("ask_astro.rest.controllers.get_request.firestore_client") as mock_firestore: 19 | request_id = uuid4() 20 | 21 | mock_get = Mock() 22 | mock_get.exists = mock_exists if mock_exists is not None else True # Ensure it's True for the error scenario 23 | mock_get.to_dict.return_value = mock_data 24 | 25 | mock_document = Mock() 26 | 27 | # Mock the async get() method behavior 28 | async def mock_get_async(): 29 | if mock_exists is not None: 30 | return mock_get 31 | else: 32 | # Simulate an exception for 500 status code 33 | raise Exception("Simulated Error") 34 | 35 | mock_document.get = AsyncMock(side_effect=mock_get_async) 36 | 37 | mock_collection = Mock() 38 | mock_collection.document.return_value = mock_document 39 | 40 | mock_firestore.collection.return_value = mock_collection 41 | 42 | request, response = await app.asgi_client.get(f"/requests/{request_id}") 43 | 44 | assert response.status == expected_status 45 | assert response.json == expected_response 46 | -------------------------------------------------------------------------------- /tests/api/ask_astro/rest/controllers/test_health_status.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | 6 | @pytest.mark.asyncio 7 | @pytest.mark.parametrize( 8 | "show_maintenance_banner, expected_response", 9 | [ 10 | (True, {"status": "maintenance"}), 11 | (False, {"status": "healthy"}), 12 | ], 13 | ) 14 | async def test_health_status(app, show_maintenance_banner, expected_response): 15 | """ 16 | Test the /health_status endpoint by mocking banner status environment variable 17 | """ 18 | 19 | with patch("ask_astro.settings.SHOW_SERVICE_MAINTENANCE_BANNER", new=show_maintenance_banner): 20 | _, response = await app.asgi_client.get("/health_status") 21 | 22 | # Validating the response status code and content 23 | assert response.status == 200 24 | assert response.json == expected_response 25 | -------------------------------------------------------------------------------- /tests/api/ask_astro/rest/controllers/test_list_recent_requests.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, Mock, PropertyMock, patch 2 | 3 | import pytest 4 | 5 | 6 | def generate_mock_document(data): 7 | mock_document = Mock() 8 | mock_document.to_dict.return_value = data 9 | return mock_document 10 | 11 | 12 | @pytest.mark.asyncio 13 | @pytest.mark.parametrize( 14 | "mock_data, expected_status, expected_response", 15 | [ 16 | ([], 500, {"error": "An error occurred while processing your request."}), 17 | ], 18 | ) 19 | async def test_on_list_recent_requests(app, mock_data, expected_status, expected_response): 20 | with patch("ask_astro.config.FirestoreCollections.requests", new_callable=PropertyMock) as mock_collection: 21 | mock_collection.return_value = "mock_collection_name" 22 | with patch("google.cloud.firestore_v1.Client", new=AsyncMock()) as MockFirestoreClient: 23 | # Here, MockFirestoreClient will replace the actual Firestore Client everywhere in the code. 24 | mock_client_instance = MockFirestoreClient.return_value 25 | 26 | # Create the final result (from Firestore's get() method) 27 | mock_get = AsyncMock() 28 | mock_get.return_value = [generate_mock_document(doc) for doc in mock_data] 29 | 30 | # Mock the Firestore query methods 31 | mock_query = AsyncMock() 32 | mock_query.order_by.return_value = mock_query 33 | mock_query.where.return_value = mock_query 34 | mock_query.limit.return_value = mock_query 35 | mock_query.get = mock_get 36 | 37 | # Mock the Firestore collection call 38 | mock_client_instance.collection.return_value = mock_query 39 | 40 | _, response = await app.asgi_client.get("/requests") 41 | 42 | assert response.status == expected_status 43 | assert response.json == expected_response 44 | -------------------------------------------------------------------------------- /tests/api/ask_astro/rest/controllers/test_post_request.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, patch 2 | 3 | import pytest 4 | 5 | 6 | @pytest.mark.asyncio 7 | @pytest.mark.parametrize( 8 | "request_payload,expected_status,expected_response", 9 | [ 10 | ({"prompt": "Tell me about space"}, 200, {"request_uuid": "test-uuid"}), 11 | ({"prompt": "What is quantum mechanics?"}, 200, {"request_uuid": "test-uuid"}), 12 | ({}, 400, {"error": "prompt is required"}), 13 | ], 14 | ) 15 | async def test_on_post_request(app, request_payload, expected_status, expected_response): 16 | """Test the POST request endpoint behavior based on different input payloads.""" 17 | with patch("ask_astro.services.questions.answer_question") as mock_answer_question, patch( 18 | "ask_astro.clients.firestore.firestore.AsyncClient" 19 | ) as mock_firestore, patch("google.cloud.firestore_v1.Client", new=AsyncMock()): 20 | mock_firestore.collection.return_value.document.return_value.get.return_value = AsyncMock() 21 | mock_answer_question.return_value = AsyncMock() 22 | 23 | request, response = await app.asgi_client.post("/requests", json=request_payload) 24 | 25 | assert response.status == expected_status 26 | # If expecting a 200 status 27 | if expected_status == 200: 28 | assert "request_uuid" in response.json 29 | assert isinstance(response.json.get("request_uuid"), str) 30 | 31 | # If expecting a 400 status 32 | elif expected_status == 400: 33 | assert "error" in response.json 34 | assert response.json["error"] == expected_response["error"] 35 | -------------------------------------------------------------------------------- /tests/api/ask_astro/rest/controllers/test_submit_feedback.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from unittest.mock import AsyncMock, patch 3 | 4 | import pytest 5 | from httpx import Response 6 | from pytest_sanic.utils import TestClient 7 | from sanic import Sanic 8 | 9 | 10 | def create_sanic_app(name: str) -> Sanic: 11 | """Create a new instance of a Sanic application with the provided name.""" 12 | Sanic.test_mode = True 13 | return Sanic(name) 14 | 15 | 16 | @pytest.mark.asyncio 17 | @pytest.mark.parametrize( 18 | "request_data, expected_status, expected_response_text", 19 | [ 20 | ({"positive": True}, 200, None), 21 | ({"positive": False}, 200, None), 22 | ({}, 500, "An internal error occurred."), # Missing positive key 23 | ], 24 | ) 25 | async def test_on_submit_feedback(request_data, expected_status, expected_response_text): 26 | """ 27 | Test the behavior of the on_submit_feedback route. This test validates the responses of the feedback submission 28 | route for various inputs. 29 | """ 30 | from ask_astro.rest.controllers.submit_feedback import on_submit_feedback 31 | 32 | app_name = f"test_sanic_app_{uuid.uuid4().hex}" 33 | app = create_sanic_app(app_name) 34 | app.add_route(on_submit_feedback, "/requests//feedback", methods=["POST"]) 35 | 36 | async def mock_post(*args, **kwargs): 37 | """Mock the POST request by returning a dummy response.""" 38 | return Response(status_code=expected_status, text=expected_response_text or "") 39 | 40 | with patch("pytest_sanic.utils.TestClient.post", new=mock_post): 41 | test_manager = TestClient(app) 42 | 43 | with patch("ask_astro.rest.controllers.submit_feedback.submit_feedback", new_callable=AsyncMock): 44 | response = await test_manager.post("/requests/test_request_id/feedback", json=request_data) 45 | 46 | assert response.status_code == expected_status 47 | assert response.text == (expected_response_text or "") 48 | -------------------------------------------------------------------------------- /tests/test_nothing.py: -------------------------------------------------------------------------------- 1 | def test_nothing(): 2 | assert 0 == 0 3 | -------------------------------------------------------------------------------- /ui/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | /build 4 | /.svelte-kit 5 | /package 6 | .env 7 | .env.* 8 | !.env.example 9 | .vercel 10 | .output 11 | vite.config.js.timestamp-* 12 | vite.config.ts.timestamp-* 13 | -------------------------------------------------------------------------------- /ui/.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict=true 2 | resolution-mode=highest 3 | -------------------------------------------------------------------------------- /ui/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://shadcn-svelte.com/schema.json", 3 | "style": "new-york", 4 | "tailwind": { 5 | "config": "tailwind.config.js", 6 | "css": "src/app.postcss", 7 | "baseColor": "slate" 8 | }, 9 | "aliases": { 10 | "components": "$lib/components", 11 | "utils": "$lib/utils" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ui", 3 | "version": "0.0.1", 4 | "scripts": { 5 | "dev": "vite dev", 6 | "build": "vite build", 7 | "preview": "vite preview", 8 | "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", 9 | "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch" 10 | }, 11 | "devDependencies": { 12 | "@fontsource/fira-mono": "^4.5.10", 13 | "@neoconfetti/svelte": "^1.0.0", 14 | "@sveltejs/adapter-auto": "^2.0.0", 15 | "@sveltejs/adapter-cloudflare": "^2.3.3", 16 | "@sveltejs/kit": "^1.27.2", 17 | "@types/cookie": "^0.5.1", 18 | "autoprefixer": "^10.4.14", 19 | "postcss": "^8.4.31", 20 | "postcss-load-config": "^4.0.1", 21 | "svelte": "^4.0.5", 22 | "svelte-check": "^3.4.3", 23 | "sveltekit-rate-limiter": "^0.4.3", 24 | "tailwindcss": "^3.3.2", 25 | "tslib": "^2.4.1", 26 | "typescript": "^5.0.0", 27 | "vite": "^4.4.12" 28 | }, 29 | "type": "module", 30 | "dependencies": { 31 | "@segment/in-eu": "astronomer/in-eu", 32 | "bits-ui": "^0.5.7", 33 | "clsx": "^2.0.0", 34 | "radix-icons-svelte": "^1.2.1", 35 | "svelte-markdown": "^0.4.0", 36 | "tailwind-merge": "^1.14.0", 37 | "tailwind-variants": "^0.1.14" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /ui/postcss.config.cjs: -------------------------------------------------------------------------------- 1 | const tailwindcss = require("tailwindcss"); 2 | const autoprefixer = require("autoprefixer"); 3 | 4 | const config = { 5 | plugins: [tailwindcss(), autoprefixer], 6 | }; 7 | 8 | module.exports = config; 9 | -------------------------------------------------------------------------------- /ui/src/app.d.ts: -------------------------------------------------------------------------------- 1 | // See https://kit.svelte.dev/docs/types#app 2 | // for information about these interfaces 3 | declare global { 4 | namespace App { 5 | // interface Error {} 6 | // interface Locals {} 7 | // interface PageData {} 8 | // interface Platform {} 9 | } 10 | } 11 | 12 | export {}; 13 | -------------------------------------------------------------------------------- /ui/src/app.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | %sveltekit.head% 16 | 17 | 18 | 19 |
%sveltekit.body%
20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /ui/src/app.postcss: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | @layer base { 6 | :root { 7 | --background: 0 0% 100%; 8 | --foreground: 222.2 84% 4.9%; 9 | 10 | --muted: 210 40% 96.1%; 11 | --muted-foreground: 215.4 16.3% 46.9%; 12 | 13 | --popover: 0 0% 100%; 14 | --popover-foreground: 222.2 84% 4.9%; 15 | 16 | --card: 0 0% 100%; 17 | --card-foreground: 222.2 84% 4.9%; 18 | 19 | --border: 214.3 31.8% 91.4%; 20 | --input: 214.3 31.8% 91.4%; 21 | 22 | --primary: 222.2 47.4% 11.2%; 23 | --primary-foreground: 210 40% 98%; 24 | 25 | --secondary: 210 40% 96.1%; 26 | --secondary-foreground: 222.2 47.4% 11.2%; 27 | 28 | --accent: 210 40% 96.1%; 29 | --accent-foreground: 222.2 47.4% 11.2%; 30 | 31 | --destructive: 0 84.2% 60.2%; 32 | --destructive-foreground: 210 40% 98%; 33 | 34 | --ring: 222.2 84% 4.9%; 35 | 36 | --radius: 0.5rem; 37 | } 38 | 39 | .dark { 40 | --background: 222.2 84% 4.9%; 41 | --foreground: 210 40% 98%; 42 | 43 | --muted: 217.2 32.6% 17.5%; 44 | --muted-foreground: 215 20.2% 65.1%; 45 | 46 | --popover: 222.2 84% 4.9%; 47 | --popover-foreground: 210 40% 98%; 48 | 49 | --card: 222.2 84% 4.9%; 50 | --card-foreground: 210 40% 98%; 51 | 52 | --border: 217.2 32.6% 17.5%; 53 | --input: 217.2 32.6% 17.5%; 54 | 55 | --primary: 210 40% 98%; 56 | --primary-foreground: 222.2 47.4% 11.2%; 57 | 58 | --secondary: 217.2 32.6% 17.5%; 59 | --secondary-foreground: 210 40% 98%; 60 | 61 | --accent: 217.2 32.6% 17.5%; 62 | --accent-foreground: 210 40% 98%; 63 | 64 | --destructive: 0 62.8% 30.6%; 65 | --destructive-foreground: 210 40% 98%; 66 | 67 | --ring: hsl(212.7, 26.8%, 83.9); 68 | } 69 | } 70 | 71 | @layer base { 72 | * { 73 | @apply border-border; 74 | } 75 | body { 76 | @apply bg-background text-foreground; 77 | } 78 | ul { 79 | @apply list-disc list-inside; 80 | } 81 | ol { 82 | @apply list-decimal list-inside; 83 | } 84 | li { 85 | @apply mb-2; 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /ui/src/lib/components/custom/ConsentManager.svelte: -------------------------------------------------------------------------------- 1 | 9 | 10 |