├── .github
    └── workflows
    │   ├── codeql.yml
    │   ├── dockerimage.yml
    │   └── test.yml
├── .gitignore
├── Dockerfile
├── Dockerfile.metadata_service
├── Dockerfile.migration_service
├── Dockerfile.service.test
├── Dockerfile.ui_service
├── LICENSE
├── MANIFEST.in
├── README.md
├── RELEASE.md
├── docker-compose.development.yml
├── docker-compose.test.yml
├── docker-compose.yml
├── migration_tools.py
├── pytest.ini
├── requirements.dev.txt
├── requirements.txt
├── run_goose.py
├── services
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── db_utils.py
    │   ├── models.py
    │   ├── postgres_async_db.py
    │   ├── service_configs.py
    │   └── tagging_utils.py
    ├── metadata_service
    │   ├── __init__.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── admin.py
    │   │   ├── artifact.py
    │   │   ├── flow.py
    │   │   ├── metadata.py
    │   │   ├── run.py
    │   │   ├── step.py
    │   │   ├── task.py
    │   │   └── utils.py
    │   ├── requirements.txt
    │   ├── server.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── integration_tests
    │   │       ├── __init__.py
    │   │       ├── artifact_test.py
    │   │       ├── flow_test.py
    │   │       ├── metadata_test.py
    │   │       ├── run_test.py
    │   │       ├── step_test.py
    │   │       ├── task_test.py
    │   │       └── utils.py
    │   │   └── unit_tests
    │   │       ├── __init__.py
    │   │       ├── api_util_test.py
    │   │       └── task_test.py
    ├── migration_service
    │   ├── __init__.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── admin.py
    │   │   └── utils.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   └── postgres_async_db.py
    │   ├── get_virtual_env.py
    │   ├── migration_config.py
    │   ├── migration_files
    │   │   ├── 1_create_tables.sql
    │   │   ├── 20200603104139_add_str_id_cols.sql
    │   │   ├── 20201002000616_update_metadata_primary_key.sql
    │   │   ├── 20210202145952_add_runs_idx_ts_epoch_flow_id.sql
    │   │   ├── 20210260056859_add_tasks_idx_on_.sql
    │   │   ├── 20211202100726_add_str_id_indices.sql
    │   │   ├── 20220503175500_add_run_epoch_index.sql
    │   │   └── 20230118020300_drop_partial_indexes.sql
    │   ├── migration_server.py
    │   ├── requirements.txt
    │   └── run_script.py
    ├── ui_backend_service
    │   ├── .gitignore
    │   ├── README.md
    │   ├── __init__.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── admin.py
    │   │   ├── artifact.py
    │   │   ├── autocomplete.py
    │   │   ├── card.py
    │   │   ├── config.py
    │   │   ├── dag.py
    │   │   ├── features.py
    │   │   ├── flow.py
    │   │   ├── heartbeat_monitor.py
    │   │   ├── log.py
    │   │   ├── metadata.py
    │   │   ├── notify.py
    │   │   ├── plugins.py
    │   │   ├── run.py
    │   │   ├── search.py
    │   │   ├── step.py
    │   │   ├── tag.py
    │   │   ├── task.py
    │   │   ├── utils.py
    │   │   └── ws.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── cache
    │   │   │   ├── __init__.py
    │   │   │   ├── card_cache_manager.py
    │   │   │   ├── card_cache_service.py
    │   │   │   ├── client
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── cache_action.py
    │   │   │   │   ├── cache_async_client.py
    │   │   │   │   ├── cache_client.py
    │   │   │   │   ├── cache_server.py
    │   │   │   │   ├── cache_store.py
    │   │   │   │   └── cache_worker.py
    │   │   │   ├── custom_flowgraph.py
    │   │   │   ├── generate_dag_action.py
    │   │   │   ├── get_artifacts_action.py
    │   │   │   ├── get_data_action.py
    │   │   │   ├── get_log_file_action.py
    │   │   │   ├── get_parameters_action.py
    │   │   │   ├── get_task_action.py
    │   │   │   ├── search_artifacts_action.py
    │   │   │   ├── store.py
    │   │   │   └── utils.py
    │   │   ├── db
    │   │   │   ├── __init__.py
    │   │   │   ├── models
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── artifact_row.py
    │   │   │   │   ├── base_row.py
    │   │   │   │   ├── flow_row.py
    │   │   │   │   ├── metadata_row.py
    │   │   │   │   ├── run_row.py
    │   │   │   │   ├── step_row.py
    │   │   │   │   └── task_row.py
    │   │   │   ├── postgres_async_db.py
    │   │   │   ├── tables
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── artifact.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── flow.py
    │   │   │   │   ├── metadata.py
    │   │   │   │   ├── run.py
    │   │   │   │   ├── step.py
    │   │   │   │   └── task.py
    │   │   │   └── utils.py
    │   │   └── refiner
    │   │   │   ├── __init__.py
    │   │   │   ├── artifact_refiner.py
    │   │   │   ├── parameter_refiner.py
    │   │   │   ├── refinery.py
    │   │   │   └── task_refiner.py
    │   ├── doc.py
    │   ├── docs
    │   │   ├── README.md
    │   │   ├── api.md
    │   │   ├── architecture.md
    │   │   ├── environment.md
    │   │   ├── images
    │   │   │   ├── cache_architecture.png
    │   │   │   ├── heartbeat_monitoring.png
    │   │   │   └── websocket_communication.png
    │   │   ├── plugins.md
    │   │   └── websockets.md
    │   ├── download_ui.sh
    │   ├── example.custom_quicklinks.json
    │   ├── example.notifications.json
    │   ├── example.plugins.json
    │   ├── features.py
    │   ├── frontend.py
    │   ├── plugins
    │   │   ├── __init__.py
    │   │   ├── installed
    │   │   │   └── .gitignore
    │   │   └── plugin.py
    │   ├── requirements.txt
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── integration_tests
    │   │   │   ├── __init__.py
    │   │   │   ├── admin_test.py
    │   │   │   ├── artifact_test.py
    │   │   │   ├── autocomplete_test.py
    │   │   │   ├── card_test.py
    │   │   │   ├── features_test.py
    │   │   │   ├── flows_test.py
    │   │   │   ├── grouped_runs_test.py
    │   │   │   ├── log_test.py
    │   │   │   ├── metadata_test.py
    │   │   │   ├── notify_test.py
    │   │   │   ├── plugins_test.py
    │   │   │   ├── runs_test.py
    │   │   │   ├── status_attempts_test.py
    │   │   │   ├── status_runs_test.py
    │   │   │   ├── status_tasks_test.py
    │   │   │   ├── steps_test.py
    │   │   │   ├── tasks_test.py
    │   │   │   ├── utils.py
    │   │   │   └── ws_test.py
    │   │   └── unit_tests
    │   │   │   ├── __init__.py
    │   │   │   ├── cache_utils_test.py
    │   │   │   ├── custom_flowgraph_test.py
    │   │   │   ├── data_test.py
    │   │   │   ├── get_artifacts_action_test.py
    │   │   │   ├── get_log_file_action_test.py
    │   │   │   ├── search_artifacts_action_test.py
    │   │   │   ├── search_test.py
    │   │   │   └── utils_test.py
    │   ├── ui
    │   │   ├── .dockerignore
    │   │   ├── .gitignore
    │   │   └── static
    │   │   │   └── .gitignore
    │   └── ui_server.py
    └── utils
    │   ├── __init__.py
    │   └── tests
    │       ├── __init__.py
    │       └── unit_tests
    │           └── utils_test.py
├── setup.cfg
├── setup.py
├── tox.ini
└── wait-for-postgres.sh


/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ "master" ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ "master" ]
20 |   schedule:
21 |     - cron: '22 12 * * 5'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 |     permissions:
28 |       actions: read
29 |       contents: read
30 |       security-events: write
31 | 
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         language: [ 'python' ]
36 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 |         # Use only 'java' to analyze code written in Java, Kotlin or both
38 |         # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
39 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
40 | 
41 |     steps:
42 |     - name: Checkout repository
43 |       uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2
44 | 
45 |     # Initializes the CodeQL tools for scanning.
46 |     - name: Initialize CodeQL
47 |       uses: github/codeql-action/init@7df0ce34898d659f95c0c4a09eaa8d4e32ee64db # v2.2.12
48 |       with:
49 |         languages: ${{ matrix.language }}
50 |         # If you wish to specify custom queries, you can do so here or in a config file.
51 |         # By default, queries listed here will override any specified in a config file.
52 |         # Prefix the list here with "+" to use these queries and those in the config file.
53 | 
54 |         # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
55 |         # queries: security-extended,security-and-quality
56 | 
57 | 
58 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, Go, or Java).
59 |     # If this step fails, then you should remove it and run the build manually (see below)
60 |     - name: Autobuild
61 |       uses: github/codeql-action/autobuild@7df0ce34898d659f95c0c4a09eaa8d4e32ee64db # v2.2.12
62 | 
63 |     # ℹ️ Command-line programs to run using the OS shell.
64 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
65 | 
66 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
67 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
68 | 
69 |     # - run: |
70 |     #     echo "Run, Build Application using script"
71 |     #     ./location_of_script_within_repo/buildscript.sh
72 | 
73 |     - name: Perform CodeQL Analysis
74 |       uses: github/codeql-action/analyze@7df0ce34898d659f95c0c4a09eaa8d4e32ee64db # v2.2.12
75 |       with:
76 |         category: "/language:${{matrix.language}}"
77 | 


--------------------------------------------------------------------------------
/.github/workflows/dockerimage.yml:
--------------------------------------------------------------------------------
 1 | name: Docker Image CI
 2 | 
 3 | on:
 4 |   release:
 5 |     branches: [ master ]
 6 | 
 7 | jobs:
 8 |   build:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       -
12 |         name: Checkout
13 |         uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
14 |       -
15 |         name: Docker meta
16 |         id: meta
17 |         uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175 # v4.6.0
18 |         with:
19 |           images: |
20 |             netflixoss/metaflow_metadata_service
21 |           tags: |
22 |             type=semver,pattern={{raw}}
23 |             type=sha
24 |             type=raw,value=latest
25 |       -
26 |         name: Login to Docker Hub
27 |         uses: docker/login-action@465a07811f14bebb1938fbed4728c6a1ff8901fc # v2.2.0
28 |         with:
29 |           username: ${{ secrets.DOCKER_USERNAME_NETFLIX_OSS }}
30 |           password: ${{ secrets.DOCKER_AUTH_TOKEN_NETFLIX_OSS }}
31 |       -
32 |         name: Build and push # We have a single-platform build, so use of setup-buildx-action is currently omitted.
33 |         uses: docker/build-push-action@2eb1c1961a95fc15694676618e422e8ba1d63825 # v4.1.1
34 |         with:
35 |           context: .
36 |           push: true
37 |           tags: ${{ steps.meta.outputs.tags }}
38 |           labels: ${{ steps.meta.outputs.labels }}
39 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
  1 | name: Test
  2 | on:
  3 |   push:
  4 |     branches:
  5 |     - master
  6 |   pull_request:
  7 |     branches:
  8 |     - master
  9 | jobs:
 10 |   codestyle:
 11 |     runs-on: ubuntu-latest
 12 | 
 13 |     strategy:
 14 |       matrix:
 15 |         python-version: [3.11]
 16 | 
 17 |     steps:
 18 |       - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
 19 |       - name: Set up Python ${{ matrix.python-version }}
 20 |         uses: actions/setup-python@75f3110429a8c05be0e1bf360334e4cced2b63fa # v2.3.3
 21 |         with:
 22 |           python-version: ${{ matrix.python-version }}
 23 |       - name: Install Python ${{ matrix.python-version }} dependencies
 24 |         run: |
 25 |           python -m pip install --upgrade pip
 26 |           python -m pip install pycodestyle
 27 |       - name: Run Python PEP8 code style checks
 28 |         run: pycodestyle
 29 | 
 30 |   pylint:
 31 |     runs-on: ubuntu-latest
 32 | 
 33 |     strategy:
 34 |       matrix:
 35 |         python-version: [3.11]
 36 | 
 37 |     steps:
 38 |       - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
 39 |       - name: Set up Python ${{ matrix.python-version }}
 40 |         uses: actions/setup-python@75f3110429a8c05be0e1bf360334e4cced2b63fa # v2.3.3
 41 |         with:
 42 |           python-version: ${{ matrix.python-version }}
 43 |       - name: Install Python ${{ matrix.python-version }} dependencies
 44 |         run: |
 45 |           python -m pip install --upgrade pip
 46 |           python -m pip install tox pylint
 47 |       - name: Run Tox (pylint)
 48 |         run: tox -e pylint
 49 | 
 50 |   unit:
 51 |     runs-on: ubuntu-latest
 52 | 
 53 |     strategy:
 54 |       matrix:
 55 |         python-version: [3.11]
 56 | 
 57 |     steps:
 58 |       - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
 59 |       - name: Set up Python ${{ matrix.python-version }}
 60 |         uses: actions/setup-python@75f3110429a8c05be0e1bf360334e4cced2b63fa # v2.3.3
 61 |         with:
 62 |           python-version: ${{ matrix.python-version }}
 63 |       - name: Install Python ${{ matrix.python-version }} dependencies
 64 |         run: |
 65 |           python -m pip install --upgrade pip
 66 |           python -m pip install tox
 67 |       - name: Run Tox
 68 |         run: tox -e unit
 69 | 
 70 |   integration:
 71 |     runs-on: ubuntu-latest
 72 | 
 73 |     services:
 74 |       db_test: # This will be the hostname
 75 |         image: postgres:11
 76 |         env:
 77 |           POSTGRES_USER: test
 78 |           POSTGRES_PASSWORD: test
 79 |           POSTGRES_DB: test
 80 |         options: >-
 81 |           --health-cmd "pg_isready -d test -U test"
 82 |           --health-interval 10s
 83 |           --health-timeout 5s
 84 |           --health-retries 5
 85 |         ports:
 86 |           - 5432:5432
 87 | 
 88 |     strategy:
 89 |       matrix:
 90 |         python-version: [3.11]
 91 |         golang-version: ["^1.14.5"]
 92 | 
 93 |     env:
 94 |       MF_METADATA_DB_HOST: db_test
 95 |       MF_METADATA_DB_PORT: 5432
 96 |       MF_METADATA_DB_USER: test
 97 |       MF_METADATA_DB_PSWD: test
 98 |       MF_METADATA_DB_NAME: test
 99 | 
100 |     steps:
101 |       - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
102 |       - uses: actions/setup-go@bfdd3570ce990073878bf10f6b2d79082de49492 # v2.2.0
103 |         with:
104 |           go-version: ${{ matrix.golang-version }}
105 |       - name: Install goose migration tool
106 |         run: go install github.com/pressly/goose/v3/cmd/goose@v3.5.3
107 |       - name: Set up Python ${{ matrix.python-version }}
108 |         uses: actions/setup-python@75f3110429a8c05be0e1bf360334e4cced2b63fa # v2.3.3
109 |         with:
110 |           python-version: ${{ matrix.python-version }}
111 |       - name: Install Python ${{ matrix.python-version }} dependencies
112 |         run: |
113 |           python -m pip install --upgrade pip
114 |           python -m pip install tox
115 |       - name: Add required test DB alias name for localhost
116 |         run: echo "127.0.0.1 db_test" | sudo tee -a /etc/hosts
117 |       - name: Run Tox
118 |         run: tox -v -e integration
119 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Distribution / packaging
 2 | .Python
 3 | env3/
 4 | env/
 5 | bin/
 6 | build/
 7 | develop-eggs/
 8 | dist/
 9 | eggs/
10 | lib/
11 | lib64/
12 | parts/
13 | sdist/
14 | var/
15 | *.egg-info/
16 | .installed.cfg
17 | *.egg
18 | *.eggs
19 | *.zip
20 | 
21 | # Test artifacts
22 | .tox/
23 | .coverage
24 | 
25 | # Byte-compiled / optimized / DLL files
26 | __pycache__/
27 | *.py[cod]
28 | 
29 | # C extensions
30 | *.so
31 | 
32 | # jetbrains
33 | .idea
34 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.20.2-buster as amd64-golang
 2 | FROM arm64v8/golang:1.20.2-buster as arm64-golang
 3 | 
 4 | FROM ${TARGETARCH}-golang as goose
 5 | RUN go install github.com/pressly/goose/v3/cmd/goose@v3.9.0
 6 | 
 7 | FROM python:3.11.6-slim-bookworm
 8 | COPY --from=goose /go/bin/goose /usr/local/bin/
 9 | 
10 | ARG BUILD_TIMESTAMP
11 | ARG BUILD_COMMIT_HASH
12 | ENV BUILD_TIMESTAMP=$BUILD_TIMESTAMP
13 | ENV BUILD_COMMIT_HASH=$BUILD_COMMIT_HASH
14 | 
15 | ARG UI_ENABLED="1"
16 | ARG UI_VERSION="v1.3.13"
17 | ENV UI_ENABLED=$UI_ENABLED
18 | ENV UI_VERSION=$UI_VERSION
19 | 
20 | ENV FEATURE_RUN_GROUPS=0
21 | ENV FEATURE_DEBUG_VIEW=1
22 | 
23 | RUN apt-get update -y \
24 |     && apt-get -y install libpq-dev unzip gcc curl
25 | 
26 | RUN pip3 install virtualenv requests
27 | 
28 | # TODO: possibly unused virtualenv. See if it can be removed
29 | RUN virtualenv /opt/v_1_0_1 -p python3
30 | # All of the official deployment templates reference this virtualenv for launching services.
31 | RUN virtualenv /opt/latest -p python3
32 | 
33 | RUN /opt/v_1_0_1/bin/pip install https://github.com/Netflix/metaflow-service/archive/1.0.1.zip
34 | 
35 | ADD services/__init__.py /root/services/
36 | ADD services/data/service_configs.py /root/services/
37 | ADD services/data /root/services/data
38 | ADD services/metadata_service /root/services/metadata_service
39 | ADD services/ui_backend_service /root/services/ui_backend_service
40 | ADD services/utils /root/services/utils
41 | ADD setup.py setup.cfg run_goose.py /root/
42 | WORKDIR /root
43 | RUN /opt/latest/bin/pip install .
44 | 
45 | # Install Netflix/metaflow-ui release artifact
46 | RUN /root/services/ui_backend_service/download_ui.sh
47 | 
48 | # Migration Service
49 | ADD services/migration_service /root/services/migration_service
50 | RUN pip3 install -r /root/services/migration_service/requirements.txt
51 | 
52 | RUN chmod 777 /root/services/migration_service/run_script.py
53 | CMD python3  services/migration_service/run_script.py
54 | 


--------------------------------------------------------------------------------
/Dockerfile.metadata_service:
--------------------------------------------------------------------------------
 1 | FROM python:3.11.7-bookworm
 2 | 
 3 | RUN apt-get update -y \
 4 |     && apt-get -y install libpq-dev gcc
 5 | 
 6 | ADD services/__init__.py /root/services/
 7 | ADD services/data /root/services/data
 8 | ADD services/utils /root/services/utils
 9 | ADD services/metadata_service /root/services/metadata_service
10 | ADD setup.py setup.cfg /root/
11 | WORKDIR /root
12 | RUN pip install --editable .
13 | CMD metadata_service


--------------------------------------------------------------------------------
/Dockerfile.migration_service:
--------------------------------------------------------------------------------
 1 | FROM golang:1.20.2 AS goose
 2 | RUN go install github.com/pressly/goose/v3/cmd/goose@v3.9.0
 3 | 
 4 | FROM python:3.11.7-bookworm
 5 | COPY --from=goose /go/bin/goose /usr/local/bin/
 6 | 
 7 | RUN apt-get update -y \
 8 |     && apt-get -y install libpq-dev
 9 | 
10 | ADD services/__init__.py /root/services/__init__.py
11 | ADD services/utils /root/services/utils
12 | ADD services/migration_service /root/services/migration_service
13 | ADD setup.py setup.cfg run_goose.py /root/
14 | WORKDIR /root
15 | RUN pip install --editable .
16 | CMD migration_service


--------------------------------------------------------------------------------
/Dockerfile.service.test:
--------------------------------------------------------------------------------
 1 | FROM golang:1.20.2 AS goose
 2 | RUN go install github.com/pressly/goose/v3/cmd/goose@v3.9.0
 3 | 
 4 | FROM python:3.11.7-bookworm
 5 | COPY --from=goose /go/bin/goose /usr/local/bin/
 6 | 
 7 | RUN apt-get update -y \
 8 |     && apt-get -y install libpq-dev gcc
 9 | 
10 | RUN pip install tox
11 | 
12 | COPY . /app
13 | WORKDIR /app
14 | 
15 | CMD /app/wait-for-postgres.sh tox


--------------------------------------------------------------------------------
/Dockerfile.ui_service:
--------------------------------------------------------------------------------
 1 | FROM python:3.11.7-bookworm
 2 | 
 3 | ARG UI_ENABLED="1"
 4 | ARG UI_VERSION="v1.3.13"
 5 | ENV UI_ENABLED=$UI_ENABLED
 6 | ENV UI_VERSION=$UI_VERSION
 7 | 
 8 | ARG BUILD_TIMESTAMP
 9 | ARG BUILD_COMMIT_HASH
10 | 
11 | ENV BUILD_TIMESTAMP=$BUILD_TIMESTAMP
12 | ENV BUILD_COMMIT_HASH=$BUILD_COMMIT_HASH
13 | 
14 | ARG CUSTOM_QUICKLINKS
15 | 
16 | ENV CUSTOM_QUICKLINKS=$CUSTOM_QUICKLINKS
17 | 
18 | RUN apt-get update -y \
19 |     && apt-get -y install libpq-dev unzip gcc curl
20 | 
21 | ADD services/__init__.py /root/services/__init__.py
22 | ADD services/data /root/services/data
23 | ADD services/utils /root/services/utils
24 | ADD services/metadata_service /root/services/metadata_service
25 | ADD services/ui_backend_service /root/services/ui_backend_service
26 | ADD setup.py setup.cfg /root/
27 | 
28 | WORKDIR /root
29 | 
30 | # Install Netflix/metaflow-ui release artifact
31 | RUN /root/services/ui_backend_service/download_ui.sh
32 | 
33 | RUN pip install --editable .
34 | 
35 | CMD ui_backend_service
36 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include services/metadata_service/requirements.txt
2 | include services/migration_service/requirements.txt
3 | include services/ui_backend_service/requirements.txt
4 | include requirements.dev.txt
5 | include requirements.txt
6 | include README.md


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Release process
 2 | 
 3 | We follow [Semantic Versioning Specification 2.0.0](https://semver.org/spec/v2.0.0.html).
 4 | 
 5 | In short, given a version number MAJOR.MINOR.PATCH, increment the:
 6 | 
 7 | 1. MAJOR version when you make incompatible API changes,
 8 | 2. MINOR version when you add functionality in a backwards compatible manner, and
 9 | 3. PATCH version when you make backwards compatible bug fixes.
10 | 
11 | Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.
12 | 
13 | ## Shipping a new version
14 | 
15 | The release process is mostly automated via Github Actions, however a few manual steps are required:
16 | 
17 | - [ ] [Edit `setup.py`](https://github.com/Netflix/metaflow-service/edit/master/setup.py) version in `master` branch (e.g. `"version": "1.0.0"`)
18 | - [ ] [Edit `Dockerfile.ui_service`](https://github.com/Netflix/metaflow-service/edit/master/Dockerfile.ui_service) and [edit `Dockerfile`](https://github.com/Netflix/metaflow-service/edit/master/Dockerfile) to set `ARG UI_VERSION="v7.7.7"` to the _latest version of `metaflow-ui`_ (if changed)
19 | - [ ] Create new tag from `master` branch (e.g. `git tag v1.0.0`, note the `v` -prefix)
20 | - [ ] Push tag to remote (e.g. `git push origin v1.0.0`)
21 | - [ ] Create a new release draft in [releases](https://github.com/Netflix/metaflow-service/releases)
22 | - [ ] Edit release draft
23 |   - [ ] Make sure current and previous version are correct
24 |   - [ ] Edit `Compatibility` section (Correct [Netflix/metaflow-service](https://github.com/Netflix/metaflow-service/releases) release versions)
25 |   - [ ] Edit/remove `Additional resources` section
26 |   - [ ] Make sure release artifact is uploaded
27 | - [ ] Publish release draft
28 | 
29 | GitHub Actions will automatically publish the docker image to [netflixoss/metaflow_metadata_service](https://hub.docker.com/r/netflixoss/metaflow_metadata_service)
30 | 


--------------------------------------------------------------------------------
/docker-compose.development.yml:
--------------------------------------------------------------------------------
  1 | version: "3"
  2 | services:
  3 |   ui_backend:
  4 |     platform: linux/amd64
  5 |     build:
  6 |       context: .
  7 |       dockerfile: Dockerfile.ui_service
  8 |       args:
  9 |         UI_ENABLED: 1
 10 |     ports:
 11 |       - "${MF_UI_METADATA_PORT:-8083}:${MF_UI_METADATA_PORT:-8083}"
 12 |     volumes:
 13 |       - ./services:/root/services
 14 |       - ${HOME}/.aws:/root/.aws
 15 |     # Add container capability for benchmarking processes. required for py-spy
 16 |     cap_add:
 17 |       - SYS_PTRACE
 18 |     environment:
 19 |       - MF_METADATA_DB_HOST=db
 20 |       - MF_METADATA_DB_PORT=5432
 21 |       - MF_METADATA_DB_USER=postgres
 22 |       - MF_METADATA_DB_PSWD=postgres
 23 |       - MF_METADATA_DB_NAME=postgres
 24 |       - MF_UI_METADATA_PORT=${MF_UI_METADATA_PORT:-8083}
 25 |       - MF_UI_METADATA_HOST=${MF_UI_METADATA_HOST:-0.0.0.0}
 26 |       - MF_METADATA_DB_POOL_MIN=1
 27 |       - MF_METADATA_DB_POOL_MAX=10
 28 |       - METAFLOW_S3_RETRY_COUNT=0
 29 |       - LOGLEVEL=INFO
 30 |       - AIOPG_ECHO=0
 31 |       - UI_ENABLED=0
 32 |       - PREFETCH_RUNS_SINCE=2592000 # 30 days in seconds
 33 |       - PREFETCH_RUNS_LIMIT=1 # Prefetch only one run
 34 |       - S3_NUM_WORKERS=2
 35 |       - CACHE_ARTIFACT_MAX_ACTIONS=1
 36 |       - CACHE_DAG_MAX_ACTIONS=1
 37 |       - CACHE_LOG_MAX_ACTIONS=1
 38 |       - CACHE_ARTIFACT_STORAGE_LIMIT=16000000
 39 |       - CACHE_DAG_STORAGE_LIMIT=16000000
 40 |       - WS_POSTPROCESS_CONCURRENCY_LIMIT=8
 41 |       - FEATURE_PREFETCH_DISABLE=0
 42 |       - FEATURE_CACHE_DISABLE=0
 43 |       - FEATURE_S3_DISABLE=0
 44 |       - FEATURE_REFINE_DISABLE=0
 45 |       - FEATURE_WS_DISABLE=0
 46 |       - FEATURE_HEARTBEAT_DISABLE=0
 47 |       - FEATURE_DB_LISTEN_DISABLE=0
 48 |       - FEATURE_ARTIFACT_SEARCH=1
 49 |       - FEATURE_FOREACH_VAR_SEARCH=1
 50 |       - FEATURE_ARTIFACT_TABLE=1
 51 |       - CUSTOM_QUICKLINKS=$CUSTOM_QUICKLINKS
 52 |       - NOTIFICATIONS=$NOTIFICATIONS
 53 |       - GA_TRACKING_ID=none
 54 |       - PLUGINS=$PLUGINS
 55 |       - AWS_PROFILE=$AWS_PROFILE
 56 |     depends_on:
 57 |       - migration
 58 |   metadata:
 59 |     platform: linux/amd64
 60 |     build:
 61 |       context: .
 62 |       dockerfile: Dockerfile.metadata_service
 63 |     ports:
 64 |       - "${MF_METADATA_PORT:-8080}:${MF_METADATA_PORT:-8080}"
 65 |     volumes:
 66 |       - ./services:/root/services
 67 |     environment:
 68 |       - LOGLEVEL=WARNING
 69 |       - MF_METADATA_DB_HOST=db
 70 |       - MF_METADATA_DB_PORT=5432
 71 |       - MF_METADATA_DB_USER=postgres
 72 |       - MF_METADATA_DB_PSWD=postgres
 73 |       - MF_METADATA_DB_NAME=postgres
 74 |       - MF_METADATA_PORT=${MF_METADATA_PORT:-8080}
 75 |       - MF_METADATA_HOST=${MF_METADATA_HOST:-0.0.0.0}
 76 |       - MF_MIGRATION_PORT=${MF_MIGRATION_PORT:-8082}
 77 |     depends_on:
 78 |       - migration
 79 |   migration:
 80 |     command: ["python", "/root/run_goose.py"]
 81 |     platform: linux/amd64
 82 |     build:
 83 |       context: .
 84 |       dockerfile: Dockerfile.migration_service
 85 |     volumes:
 86 |       - ./services:/root/services
 87 |     environment:
 88 |       - MF_METADATA_DB_HOST=db
 89 |       - MF_METADATA_DB_PORT=5432
 90 |       - MF_METADATA_DB_USER=postgres
 91 |       - MF_METADATA_DB_PSWD=postgres
 92 |       - MF_METADATA_DB_NAME=postgres
 93 |       - MF_METADATA_PORT=${MF_METADATA_PORT:-8080}
 94 |       - MF_METADATA_HOST=${MF_METADATA_HOST:-0.0.0.0}
 95 |       - MF_MIGRATION_ENDPOINTS_ENABLED=1
 96 |       - MF_MIGRATION_PORT=${MF_MIGRATION_PORT:-8082}
 97 |     depends_on:
 98 |       - db
 99 |   db:
100 |     image: "postgres:11"
101 |     command: ["postgres", "-c", "log_statement=none", "-c", "wal_level=logical"]
102 |     environment:
103 |       POSTGRES_USER: postgres
104 |       POSTGRES_PASSWORD: postgres
105 |       POSTGRES_DB: postgres
106 |     ports:
107 |       - "5432:5432"
108 |     volumes:
109 |       - db_dev_data:/var/lib/postgresql/data2
110 |     healthcheck:
111 |       test: ["CMD-SHELL", "pg_isready -U postgres"]
112 |       interval: 10s
113 |       timeout: 5s
114 |       retries: 5
115 | volumes:
116 |   db_dev_data:
117 | 


--------------------------------------------------------------------------------
/docker-compose.test.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |   service_test:
 4 |     container_name: service_test
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile.service.test
 8 |     volumes:
 9 |       - .:/app
10 |     environment:
11 |       - MF_METADATA_DB_HOST=db_test
12 |       - MF_METADATA_DB_PORT=5432
13 |       - MF_METADATA_DB_USER=test
14 |       - MF_METADATA_DB_PSWD=test
15 |       - MF_METADATA_DB_NAME=test
16 |       - MF_MIGRATION_ENDPOINTS_ENABLED=1
17 |     depends_on:
18 |       - db_test
19 |   db_test:
20 |     container_name: db_test
21 |     image: "postgres:11"
22 |     environment:
23 |       POSTGRES_USER: test
24 |       POSTGRES_PASSWORD: test
25 |       POSTGRES_DB: test
26 |     ports:
27 |       - "5432:5432"
28 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |   metadata:
 4 |     image: "metadata_service:latest"
 5 |     restart: always
 6 |     container_name: "metadata_service"
 7 |     ports:
 8 |       - "${MF_MIGRATION_PORT:-8082}:${MF_MIGRATION_PORT:-8082}"
 9 |       - "${MF_METADATA_PORT:-8080}:${MF_METADATA_PORT:-8080}"
10 |     volumes:
11 |       - .:/code
12 |     environment:
13 |       - MF_METADATA_DB_HOST=db
14 |       - MF_METADATA_DB_PORT=5432
15 |       - MF_METADATA_DB_USER=postgres
16 |       - MF_METADATA_DB_PSWD=postgres
17 |       - MF_METADATA_DB_NAME=postgres
18 |       - MF_MIGRATION_ENDPOINTS_ENABLED=1
19 |       - MF_METADATA_PORT=${MF_METADATA_PORT:-8080}
20 |       - MF_METADATA_HOST=${MF_METADATA_HOST:-0.0.0.0}
21 |       - MF_MIGRATION_PORT=${MF_MIGRATION_PORT:-8082}
22 |     links:
23 |       - db
24 |   db:
25 |     image: "postgres:11"
26 |     restart: always
27 |     container_name: "my_postgres"
28 |     environment:
29 |       POSTGRES_USER: postgres
30 |       POSTGRES_PASSWORD: postgres
31 |       POSTGRES_DB: postgres
32 |     ports:
33 |       - "5432:5432"
34 |     volumes:
35 |       - my_dbdata:/var/lib/postgresql/data2
36 | volumes:
37 |   my_dbdata:
38 | 


--------------------------------------------------------------------------------
/migration_tools.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import requests
 3 | 
 4 | 
 5 | @click.group()
 6 | def tools():
 7 |     pass
 8 | 
 9 | 
10 | @tools.command()
11 | @click.option('--base-url',
12 |               default=None,
13 |               required=True,
14 |               help='url to migration service ex: http://localhost:8082')
15 | def upgrade(base_url):
16 |     """Upgrade to latest db schema"""
17 |     url = base_url + "/upgrade"
18 |     response = requests.patch(url)
19 |     print(response.text)
20 | 
21 | 
22 | @tools.command()
23 | @click.option('--base-url',
24 |               default=None,
25 |               required=True,
26 |               help='url to migration service ex: http://localhost:8082')
27 | def db_status(base_url):
28 |     """get status of current db schema"""
29 |     url = base_url + "/db_schema_status"
30 |     response = requests.get(url)
31 |     print(response.json())
32 | 
33 | 
34 | @tools.command()
35 | @click.option('--base-url',
36 |               default=None,
37 |               required=True,
38 |               help='url to metadata service ex: http://localhost:8080')
39 | def metadata_service_version(base_url):
40 |     """get status of current db schema"""
41 |     url = base_url + "/version"
42 |     response = requests.get(url)
43 |     print(response.text)
44 | 
45 | 
46 | cli = click.CommandCollection(sources=[tools])
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     cli()
51 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     unit_tests: Unit tests (deselect with '-m "not unit_tests"')
4 |     integration_tests: Integration tests (deselect with '-m "not integration_tests"')
5 | asyncio_mode=auto
6 | 


--------------------------------------------------------------------------------
/requirements.dev.txt:
--------------------------------------------------------------------------------
1 | tox
2 | pylint
3 | pytest
4 | pytest-cov
5 | pytest-aiohttp >= 1.0.3, < 2


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r services/metadata_service/requirements.txt
2 | -r services/migration_service/requirements.txt
3 | -r services/ui_backend_service/requirements.txt
4 | 


--------------------------------------------------------------------------------
/run_goose.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import argparse
  5 | from subprocess import Popen
  6 | from urllib.parse import quote
  7 | import psycopg2
  8 | import psycopg2.errorcodes
  9 | 
 10 | 
 11 | DB_SCHEMA_NAME = os.environ.get("DB_SCHEMA_NAME", "public")
 12 | 
 13 | 
 14 | def check_if_goose_table_exists(db_connection_string: str):
 15 |     conn = psycopg2.connect(db_connection_string)
 16 |     cur = conn.cursor()
 17 |     try:
 18 |         cur.execute("SELECT schemaname,tablename FROM pg_tables")
 19 |         tables = [name for schema, name in cur.fetchall() if schema == DB_SCHEMA_NAME]
 20 |         if "goose_db_version" not in tables:
 21 |             print(
 22 |                 f"Goose migration table not found among tables in schema {DB_SCHEMA_NAME}. Found: {', '.join(tables)}",
 23 |                 file=sys.stderr,
 24 |             )
 25 |             return False
 26 |         else:
 27 |             print(f"Goose migration table found in schema {DB_SCHEMA_NAME}", file=sys.stderr)
 28 |             return True
 29 |     finally:
 30 |         conn.close()
 31 | 
 32 | 
 33 | def wait_for_postgres(db_connection_string: str, timeout_seconds: int):
 34 |     deadline = time.time() + timeout_seconds
 35 |     while True:
 36 |         try:
 37 |             conn = psycopg2.connect(db_connection_string)
 38 |             conn.close()
 39 |             return
 40 |         except psycopg2.OperationalError as e:
 41 |             if time.time() < deadline:
 42 |                 print(f"Failed to connect to postgres ({e}), sleeping", file=sys.stderr)
 43 |                 time.sleep(.5)
 44 |             else:
 45 |                 raise
 46 | 
 47 | 
 48 | def main():
 49 |     parser = argparse.ArgumentParser(description="Run goose migrations")
 50 |     parser.add_argument("--only-if-empty-db", default=False, action="store_true")
 51 |     parser.add_argument("--wait", type=int, default=30, help="Wait for connection for X seconds")
 52 |     args = parser.parse_args()
 53 | 
 54 |     db_connection_string = f'postgresql://{quote(os.environ["MF_METADATA_DB_USER"])}:'\
 55 |         f'{quote(os.environ["MF_METADATA_DB_PSWD"])}@{os.environ["MF_METADATA_DB_HOST"]}:'\
 56 |         f'{os.environ["MF_METADATA_DB_PORT"]}/{os.environ["MF_METADATA_DB_NAME"]}'
 57 | 
 58 |     ssl_mode = os.environ.get("MF_METADATA_DB_SSL_MODE")
 59 |     ssl_cert_path = os.environ.get("MF_METADATA_DB_SSL_CERT_PATH")
 60 |     ssl_key_path = os.environ.get("MF_METADATA_DB_SSL_KEY_PATH")
 61 |     ssl_root_cert_path = os.environ.get("MF_METADATA_DB_SSL_ROOT_CERT")
 62 | 
 63 |     if ssl_mode in ['allow', 'prefer', 'require', 'verify-ca', 'verify-full']:
 64 |         ssl_query = f'sslmode={ssl_mode}'
 65 |         if ssl_cert_path is not None:
 66 |             ssl_query = f'{ssl_query}&sslcert={ssl_cert_path}'
 67 |         if ssl_key_path is not None:
 68 |             ssl_query = f'{ssl_query}&sslkey={ssl_key_path}'
 69 |         if ssl_root_cert_path is not None:
 70 |             ssl_query = f'{ssl_query}&sslrootcert={ssl_root_cert_path}'
 71 |     else:
 72 |         ssl_query = f'sslmode=disable'
 73 | 
 74 |     db_connection_string = f'{db_connection_string}?{ssl_query}'
 75 | 
 76 |     if args.wait:
 77 |         wait_for_postgres(db_connection_string, timeout_seconds=args.wait)
 78 | 
 79 |     if args.only_if_empty_db:
 80 |         if check_if_goose_table_exists(db_connection_string):
 81 |             print(
 82 |                 f"Skipping migrations since --only-if-empty-db flag is used",
 83 |                 file=sys.stderr,
 84 |             )
 85 |             sys.exit(0)
 86 | 
 87 |     p = Popen(
 88 |         [
 89 |             "goose",
 90 |             "-dir",
 91 |             "/root/services/migration_service/migration_files/",
 92 |             "postgres",
 93 |             db_connection_string,
 94 |             "up",
 95 |         ]
 96 |     )
 97 |     if p.wait() != 0:
 98 |         raise Exception("Failed to run initial migration")
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     main()
103 | 


--------------------------------------------------------------------------------
/services/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/__init__.py


--------------------------------------------------------------------------------
/services/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import FlowRow, RunRow, StepRow, TaskRow, ArtifactRow, MetadataRow
2 | 


--------------------------------------------------------------------------------
/services/data/db_utils.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from typing import List, Dict, Any
  3 | import psycopg2
  4 | import collections
  5 | import datetime
  6 | import time
  7 | import json
  8 | 
  9 | 
 10 | DBResponse = collections.namedtuple("DBResponse", "response_code body")
 11 | 
 12 | DBPagination = collections.namedtuple("DBPagination", "limit offset count page")
 13 | 
 14 | 
 15 | def aiopg_exception_handling(exception):
 16 |     err_msg = str(exception)
 17 |     body = {"err_msg": err_msg}
 18 |     if isinstance(exception, asyncio.TimeoutError):
 19 |         body = {
 20 |             "err_msg": {
 21 |                 "type": "timeout error",
 22 |             }
 23 |         }
 24 |     elif isinstance(exception, psycopg2.Error):
 25 |         # this means that this is a psycopg2 exception
 26 |         # since this is of type `psycopg2.Error` we can use https://www.psycopg.org/docs/module.html#psycopg2.Error
 27 |         body = {
 28 |             "err_msg": {
 29 |                 "pgerror": exception.pgerror,
 30 |                 "pgcode": exception.pgcode,
 31 |                 "diag": None
 32 |                 if exception.diag is None
 33 |                 else {
 34 |                     "message_primary": exception.diag.message_primary,
 35 |                     "severity": exception.diag.severity,
 36 |                 },
 37 |             }
 38 |         }
 39 | 
 40 |     if isinstance(exception, psycopg2.IntegrityError):
 41 |         if "duplicate key" in err_msg:
 42 |             return DBResponse(response_code=409, body=json.dumps(body))
 43 |         elif "foreign key" in err_msg:
 44 |             return DBResponse(response_code=404, body=json.dumps(body))
 45 |         else:
 46 |             return DBResponse(response_code=500, body=json.dumps(body))
 47 |     elif isinstance(exception, psycopg2.errors.UniqueViolation):
 48 |         return DBResponse(response_code=409, body=json.dumps(body))
 49 |     elif isinstance(exception, IndexError):
 50 |         return DBResponse(response_code=404, body={})
 51 |     else:
 52 |         return DBResponse(response_code=500, body=json.dumps(body))
 53 | 
 54 | 
 55 | def get_db_ts_epoch_str():
 56 |     return str(int(round(time.time() * 1000)))
 57 | 
 58 | 
 59 | def new_heartbeat_ts():
 60 |     return int(datetime.datetime.utcnow().timestamp())
 61 | 
 62 | 
 63 | def translate_run_key(v: str):
 64 |     value = str(v)
 65 |     return "run_number" if value.isnumeric() else "run_id", value
 66 | 
 67 | 
 68 | def translate_task_key(v: str):
 69 |     value = str(v)
 70 |     return "task_id" if value.isnumeric() else "task_name", value
 71 | 
 72 | 
 73 | def get_exposed_run_id(run_number, run_id):
 74 |     if run_id is not None:
 75 |         return run_id
 76 |     return run_number
 77 | 
 78 | 
 79 | def get_exposed_task_id(task_id, task_name):
 80 |     if task_name is not None:
 81 |         return task_name
 82 |     return task_id
 83 | 
 84 | 
 85 | def get_latest_attempt_id_for_tasks(artifacts):
 86 |     attempt_ids = {}
 87 |     for artifact in artifacts:
 88 |         attempt_ids[artifact["task_id"]] = max(
 89 |             artifact["attempt_id"], attempt_ids.get(artifact["task_id"], 0)
 90 |         )
 91 |     return attempt_ids
 92 | 
 93 | 
 94 | def filter_artifacts_for_latest_attempt(
 95 |     artifacts: List[Dict[str, Any]]
 96 | ) -> List[Dict[str, Any]]:
 97 |     # `artifacts` is a `list` of dictionaries where each item in the list
 98 |     # consists of `ArtifactRow` in a dictionary form
 99 |     attempt_ids = get_latest_attempt_id_for_tasks(artifacts)
100 |     return filter_artifacts_by_attempt_id_for_tasks(artifacts, attempt_ids)
101 | 
102 | 
103 | def filter_artifacts_by_attempt_id_for_tasks(
104 |     artifacts: List[Dict[str, Any]], attempt_for_tasks: Dict[str, Any]
105 | ) -> List[dict]:
106 |     # `artifacts` is a `list` of dictionaries where each item in the list
107 |     # consists of `ArtifactRow` in a dictionary form
108 |     # `attempt_for_tasks` is a dictionary for form : {task_id:attempt_id}
109 |     result = []
110 |     for artifact in artifacts:
111 |         if artifact["attempt_id"] == attempt_for_tasks[artifact["task_id"]]:
112 |             result.append(artifact)
113 |     return result
114 | 


--------------------------------------------------------------------------------
/services/data/service_configs.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | max_connection_retires = int(os.environ.get("MF_SERVICE_CONNECTION_RETRIES", 3))
4 | connection_retry_wait_time_seconds = int(os.environ.get("MF_SERVICE_CONNECTION_RETRY_WAITTIME_SECONDS", 1))
5 | max_startup_retries = int(os.environ.get("MF_SERVICE_STARTUP_RETRIES", 5))
6 | startup_retry_wait_time_seconds = int(os.environ.get("MF_SERVICE_STARTUP_WAITTIME_SECONDS", 1))
7 | 


--------------------------------------------------------------------------------
/services/data/tagging_utils.py:
--------------------------------------------------------------------------------
 1 | from services.data.db_utils import DBResponse
 2 | import copy
 3 | 
 4 | 
 5 | async def apply_run_tags_to_db_response(flow_id, run_number, run_table_postgres, db_response: DBResponse) -> DBResponse:
 6 |     """
 7 |     We want read APIs to return steps, tasks and artifact objects with tags
 8 |     and system_tags set to their ancestral Run.
 9 | 
10 |     This is a prerequisite for supporting Run-based tag mutation.
11 |     """
12 |     # we will return a modified copy of db_response
13 |     new_db_response = copy.deepcopy(db_response)
14 |     # Only replace tags if response code is legit
15 |     # Object creation ought to return 201 (let's prepare for that)
16 |     if new_db_response.response_code not in (200, 201):
17 |         return new_db_response
18 |     if isinstance(new_db_response.body, list):
19 |         items_to_modify = new_db_response.body
20 |     else:
21 |         items_to_modify = [new_db_response.body]
22 |     if not items_to_modify:
23 |         return new_db_response
24 |     # items_to_modify now references all the items we want to modify
25 | 
26 |     # The ancestral run must be successfully read from DB
27 |     db_response_for_run = await run_table_postgres.get_run(flow_id, run_number)
28 |     if db_response_for_run.response_code != 200:
29 |         return DBResponse(response_code=500, body=db_response_for_run.body)
30 |     run = db_response_for_run.body
31 |     for item_as_dict in items_to_modify:
32 |         item_as_dict['tags'] = run['tags']
33 |         item_as_dict['system_tags'] = run['system_tags']
34 |     return new_db_response
35 | 


--------------------------------------------------------------------------------
/services/metadata_service/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/metadata_service/__init__.py


--------------------------------------------------------------------------------
/services/metadata_service/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/metadata_service/api/__init__.py


--------------------------------------------------------------------------------
/services/metadata_service/api/flow.py:
--------------------------------------------------------------------------------
  1 | from services.data import FlowRow
  2 | from services.data.postgres_async_db import AsyncPostgresDB
  3 | from services.utils import read_body
  4 | from services.metadata_service.api.utils import format_response, \
  5 |     handle_exceptions
  6 | import asyncio
  7 | 
  8 | 
  9 | class FlowApi(object):
 10 |     _flow_table = None
 11 |     lock = asyncio.Lock()
 12 | 
 13 |     def __init__(self, app):
 14 |         app.router.add_route("GET", "/flows", self.get_all_flows)
 15 |         app.router.add_route("GET", "/flows/{flow_id}", self.get_flow)
 16 |         app.router.add_route("POST", "/flows/{flow_id}", self.create_flow)
 17 |         self._async_table = AsyncPostgresDB.get_instance().flow_table_postgres
 18 | 
 19 |     @format_response
 20 |     @handle_exceptions
 21 |     async def create_flow(self, request):
 22 |         """
 23 |         ---
 24 |         description: create/register a flow
 25 |         tags:
 26 |         - Flow
 27 |         parameters:
 28 |         - name: "flow_id"
 29 |           in: "path"
 30 |           description: "flow_id"
 31 |           required: true
 32 |           type: "string"
 33 |         - name: "body"
 34 |           in: "body"
 35 |           description: "body"
 36 |           required: true
 37 |           schema:
 38 |             type: object
 39 |             properties:
 40 |                 user_name:
 41 |                     type: string
 42 |                 tags:
 43 |                     type: object
 44 |                 system_tags:
 45 |                     type: object
 46 | 
 47 |         produces:
 48 |         - 'text/plain'
 49 |         responses:
 50 |             "200":
 51 |                 description: successfully created flow row
 52 |             "409":
 53 |                 description: CONFLICT record exists
 54 |         """
 55 |         flow_name = request.match_info.get("flow_id")
 56 | 
 57 |         body = await read_body(request.content)
 58 |         user = body.get("user_name")
 59 |         tags = body.get("tags")
 60 |         system_tags = body.get("system_tags")
 61 |         flow = FlowRow(
 62 |             flow_id=flow_name, user_name=user, tags=tags, system_tags=system_tags
 63 |         )
 64 |         return await self._async_table.add_flow(flow)
 65 | 
 66 |     @format_response
 67 |     @handle_exceptions
 68 |     async def get_flow(self, request):
 69 |         """
 70 |         ---
 71 |         description: Get flow by id
 72 |         tags:
 73 |         - Flow
 74 |         parameters:
 75 |         - name: "flow_id"
 76 |           in: "path"
 77 |           description: "flow_id"
 78 |           required: true
 79 |           type: "string"
 80 |         produces:
 81 |         - text/plain
 82 |         responses:
 83 |             "200":
 84 |                 description: successful operation. Return flow
 85 |             "404":
 86 |                 description: flow not found
 87 |             "405":
 88 |                 description: invalid HTTP Method
 89 |         """
 90 | 
 91 |         flow_name = request.match_info.get("flow_id")
 92 |         return await self._async_table.get_flow(flow_name)
 93 | 
 94 |     @format_response
 95 |     @handle_exceptions
 96 |     async def get_all_flows(self, request):
 97 |         """
 98 |         ---
 99 |         description: Get all flows
100 |         tags:
101 |         - Flow
102 |         produces:
103 |         - text/plain
104 |         responses:
105 |             "200":
106 |                 description: successful operation. Returned all registered flows
107 |             "405":
108 |                 description: invalid HTTP Method
109 |         """
110 |         return await self._async_table.get_all_flows()
111 | 


--------------------------------------------------------------------------------
/services/metadata_service/api/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from functools import wraps
 3 | 
 4 | import collections
 5 | from aiohttp import web
 6 | from multidict import MultiDict
 7 | from importlib import metadata
 8 | 
 9 | from services.utils import get_traceback_str
10 | 
11 | version = metadata.version("metadata_service")
12 | METADATA_SERVICE_VERSION = version
13 | METADATA_SERVICE_HEADER = 'METADATA_SERVICE_VERSION'
14 | 
15 | ServiceResponse = collections.namedtuple("ServiceResponse", "response_code body")
16 | 
17 | 
18 | def format_response(func):
19 |     """handle formatting"""
20 | 
21 |     @wraps(func)
22 |     async def wrapper(*args, **kwargs):
23 |         db_response = await func(*args, **kwargs)
24 |         return web.Response(status=db_response.response_code,
25 |                             body=json.dumps(db_response.body),
26 |                             headers=MultiDict(
27 |                                 {METADATA_SERVICE_HEADER: METADATA_SERVICE_VERSION}))
28 | 
29 |     return wrapper
30 | 
31 | 
32 | def web_response(status: int, body):
33 |     return web.Response(status=status,
34 |                         body=json.dumps(body),
35 |                         headers=MultiDict(
36 |                             {"Content-Type": "application/json",
37 |                              METADATA_SERVICE_HEADER: METADATA_SERVICE_VERSION}))
38 | 
39 | 
40 | def http_500(msg, traceback_str=None):
41 |     # NOTE: worth considering if we want to expose tracebacks in the future in the api messages.
42 |     if traceback_str is None:
43 |         traceback_str = get_traceback_str()
44 |     body = {
45 |         'traceback': traceback_str,
46 |         'detail': msg,
47 |         'status': 500,
48 |         'title': 'Internal Server Error',
49 |         'type': 'about:blank'
50 |     }
51 | 
52 |     return ServiceResponse(500, body)
53 | 
54 | 
55 | def handle_exceptions(func):
56 |     """Catch exceptions and return appropriate HTTP error."""
57 | 
58 |     @wraps(func)
59 |     async def wrapper(*args, **kwargs):
60 |         try:
61 |             return await func(*args, **kwargs)
62 |         except web.HTTPClientError as ex:
63 |             return ServiceResponse(ex.status_code, ex.reason)
64 |         except Exception as err:
65 |             return http_500(str(err))
66 | 
67 |     return wrapper
68 | 


--------------------------------------------------------------------------------
/services/metadata_service/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp >= 3.8.1, < 4
2 | packaging
3 | psycopg2
4 | boto3
5 | aiopg
6 | 


--------------------------------------------------------------------------------
/services/metadata_service/server.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from aiohttp import web
 5 | 
 6 | from .api.run import RunApi
 7 | from .api.flow import FlowApi
 8 | 
 9 | from .api.step import StepApi
10 | from .api.task import TaskApi
11 | from .api.artifact import ArtificatsApi
12 | from .api.admin import AuthApi
13 | 
14 | from .api.metadata import MetadataApi
15 | from services.data.postgres_async_db import AsyncPostgresDB
16 | from services.utils import DBConfiguration
17 | 
18 | PATH_PREFIX = os.environ.get("PATH_PREFIX", "")
19 | 
20 | 
21 | def app(loop=None, db_conf: DBConfiguration = None, middlewares=None, path_prefix=""):
22 | 
23 |     loop = loop or asyncio.get_event_loop()
24 | 
25 |     _app = web.Application(loop=loop)
26 |     app = web.Application(loop=loop) if path_prefix else _app
27 |     async_db = AsyncPostgresDB()
28 |     loop.run_until_complete(async_db._init(db_conf))
29 |     FlowApi(app)
30 |     RunApi(app)
31 |     StepApi(app)
32 |     TaskApi(app)
33 |     MetadataApi(app)
34 |     ArtificatsApi(app)
35 |     AuthApi(app)
36 | 
37 |     if path_prefix:
38 |         _app.add_subapp(path_prefix, app)
39 |     if middlewares:
40 |         _app.middlewares.extend(middlewares)
41 |     return _app
42 | 
43 | 
44 | def main():
45 |     loop = asyncio.get_event_loop()
46 |     the_app = app(loop, DBConfiguration(), path_prefix=PATH_PREFIX)
47 |     handler = web.AppRunner(the_app)
48 |     loop.run_until_complete(handler.setup())
49 | 
50 |     port = os.environ.get("MF_METADATA_PORT", 8080)
51 |     host = str(os.environ.get("MF_METADATA_HOST", "0.0.0.0"))
52 |     f = loop.create_server(handler.server, host, port)
53 | 
54 |     srv = loop.run_until_complete(f)
55 |     print("serving on", srv.sockets[0].getsockname())
56 |     try:
57 |         loop.run_forever()
58 |     except KeyboardInterrupt:
59 |         pass
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     main()
64 | 


--------------------------------------------------------------------------------
/services/metadata_service/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/metadata_service/tests/__init__.py


--------------------------------------------------------------------------------
/services/metadata_service/tests/integration_tests/__init__.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | 
3 | # we need to register the utils helper for assert rewriting in order to get descriptive assertion errors.
4 | pytest.register_assert_rewrite("services.metadata_service.tests.integration_tests.utils")
5 | 


--------------------------------------------------------------------------------
/services/metadata_service/tests/integration_tests/flow_test.py:
--------------------------------------------------------------------------------
 1 | from .utils import (
 2 |     cli, db,
 3 |     assert_api_get_response, assert_api_post_response, compare_partial,
 4 |     add_flow
 5 | )
 6 | import pytest
 7 | 
 8 | pytestmark = [pytest.mark.integration_tests]
 9 | 
10 | 
11 | async def test_flows_post(cli, db):
12 |     payload = {
13 |         "user_name": "test_user",
14 |         "tags": ["a_tag", "b_tag"],
15 |         "system_tags": ["runtime:test"]
16 |     }
17 |     await assert_api_post_response(
18 |         cli,
19 |         path="/flows/{}".format("TestFlow"),
20 |         payload=payload,
21 |         status=200  # why 200 instead of 201?
22 |     )
23 | 
24 |     # Record should be found in DB
25 |     _flow = (await db.flow_table_postgres.get_flow(flow_id="TestFlow")).body
26 | 
27 |     compare_partial(_flow, payload)
28 | 
29 |     # Second post should fail as flow already exists.
30 |     await assert_api_post_response(
31 |         cli,
32 |         path="/flows/{}".format("TestFlow"),
33 |         payload=payload,
34 |         status=409
35 |     )
36 | 
37 | 
38 | async def test_flows_get(cli, db):
39 |     # create a few flows for test
40 |     _first_flow = (await add_flow(db, flow_id="TestFlow", user_name="test_user-1", tags=["a_tag", "b_tag"], system_tags=["runtime:test"])).body
41 |     _second_flow = (await add_flow(db, flow_id="AnotherTestFlow", user_name="test_user-1")).body
42 | 
43 |     # try to get all the created flows
44 |     await assert_api_get_response(cli, "/flows", data=[_first_flow, _second_flow], data_is_unordered_list_of_dicts=True)
45 | 
46 | 
47 | async def test_flow_get(cli, db):
48 |     # create flow for test
49 |     _flow = (await add_flow(db, flow_id="TestFlow", user_name="test_user-1")).body
50 | 
51 |     # try to get created flow
52 |     await assert_api_get_response(cli, "/flows/TestFlow", data=_flow)
53 | 
54 |     # non-existent flow should return 404
55 |     await assert_api_get_response(cli, "/flows/AnotherFlow", status=404)
56 | 


--------------------------------------------------------------------------------
/services/metadata_service/tests/integration_tests/step_test.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | from .utils import (
  4 |     cli, db,
  5 |     assert_api_get_response, assert_api_post_response, compare_partial,
  6 |     add_flow, add_run, add_step, update_objects_with_run_tags
  7 | )
  8 | import pytest
  9 | 
 10 | pytestmark = [pytest.mark.integration_tests]
 11 | 
 12 | 
 13 | async def test_step_post(cli, db):
 14 |     # create flow and run to add steps for.
 15 |     _flow = (await add_flow(db)).body
 16 |     _run = (await add_run(db, flow_id=_flow["flow_id"])).body
 17 | 
 18 |     payload = {
 19 |         "user_name": "test_user",
 20 |         "tags": ["a_tag", "b_tag"],
 21 |         "system_tags": ["runtime:test"]
 22 |     }
 23 | 
 24 |     # Check all fields from payload match what we get back from POST,
 25 |     # except for tags, which should match run tags instead.
 26 |     def _check_response_body(body):
 27 |         payload_cp = copy.deepcopy(payload)
 28 |         payload_cp["tags"] = _run["tags"]
 29 |         payload_cp["system_tags"] = _run["system_tags"]
 30 |         compare_partial(body, payload_cp)
 31 | 
 32 |     _step = await assert_api_post_response(
 33 |         cli,
 34 |         path="/flows/{flow_id}/runs/{run_number}/steps/test_step/step".format(**_run),
 35 |         payload=payload,
 36 |         status=200,  # why 200 instead of 201?
 37 |         check_fn=_check_response_body
 38 |     )
 39 | 
 40 |     # Record should be found in DB
 41 |     _found = (await db.step_table_postgres.get_step(_step["flow_id"], _step["run_number"], _step["step_name"])).body
 42 | 
 43 |     compare_partial(_found, {"step_name": "test_step", **payload})
 44 | 
 45 |     # Duplicate step names should not be accepted for a run
 46 |     await assert_api_post_response(
 47 |         cli,
 48 |         path="/flows/{flow_id}/runs/{run_number}/steps/test_step/step".format(**_run),
 49 |         payload=payload,
 50 |         status=409
 51 |     )
 52 | 
 53 |     # Posting on a non-existent flow_id should result in error
 54 |     await assert_api_post_response(
 55 |         cli,
 56 |         path="/flows/NonExistentFlow/runs/{run_number}/steps/test_step/step".format(**_run),
 57 |         payload=payload,
 58 |         status=500
 59 |     )
 60 | 
 61 |     # posting on a non-existent run number should result in an error
 62 |     await assert_api_post_response(
 63 |         cli,
 64 |         path="/flows/{flow_id}/runs/1234/steps/test_step/step".format(**_run),
 65 |         payload=payload,
 66 |         status=500
 67 |     )
 68 | 
 69 | 
 70 | async def test_steps_get(cli, db):
 71 |     # create a flow and run for the test
 72 |     _flow = (await add_flow(db, "TestFlow", "test_user-1", ["a_tag", "b_tag"], ["runtime:test"])).body
 73 |     _run = (await add_run(db, flow_id=_flow["flow_id"])).body
 74 | 
 75 |     # add steps to the run
 76 |     _first_step = (await add_step(db, flow_id=_run["flow_id"], run_number=_run["run_number"], step_name="first_step")).body
 77 |     _second_step = (await add_step(db, flow_id=_run["flow_id"], run_number=_run["run_number"], step_name="second_step")).body
 78 | 
 79 |     # expect steps' tags to be overridden by tags of their ancestral run
 80 |     update_objects_with_run_tags('step', [_first_step, _second_step], _run)
 81 | 
 82 |     # try to get all the created steps
 83 |     await assert_api_get_response(cli, "/flows/{flow_id}/runs/{run_number}/steps".format(**_first_step),
 84 |                                   data=[_first_step, _second_step], data_is_unordered_list_of_dicts=True)
 85 | 
 86 |     # getting steps for non-existent flow should return empty list
 87 |     await assert_api_get_response(cli, "/flows/NonExistentFlow/runs/{run_number}/steps".format(**_first_step), status=200, data=[])
 88 | 
 89 |     # getting steps for non-existent run should return empty list
 90 |     await assert_api_get_response(cli, "/flows/{flow_id}/runs/1234/steps".format(**_first_step), status=200, data=[])
 91 | 
 92 | 
 93 | async def test_step_get(cli, db):
 94 |     # create flow for test
 95 |     _flow = (await add_flow(db, "TestFlow", "test_user-1", ["a_tag", "b_tag"], ["runtime:test"])).body
 96 |     _run = (await add_run(db, flow_id=_flow["flow_id"])).body
 97 | 
 98 |     # add step to run for testing
 99 |     _step = (await add_step(db, flow_id=_run["flow_id"], run_number=_run["run_number"], step_name="first_step")).body
100 | 
101 |     # expect step's tags to be overridden by tags of their ancestral run
102 |     update_objects_with_run_tags('step', [_step], _run)
103 | 
104 |     # try to get created step
105 |     await assert_api_get_response(cli, "/flows/{flow_id}/runs/{run_number}/steps/{step_name}".format(**_step), data=_step)
106 | 
107 |     # non-existent flow, run, or step should return 404
108 |     await assert_api_get_response(cli, "/flows/NonExistentFlow/runs/{run_number}/steps/{step_name}".format(**_step), status=404)
109 |     await assert_api_get_response(cli, "/flows/{flow_id}/runs/1234/steps/{step_name}".format(**_step), status=404)
110 |     await assert_api_get_response(cli, "/flows/{flow_id}/runs/{run_number}/steps/nonexistent_step".format(**_step), status=404)
111 | 


--------------------------------------------------------------------------------
/services/metadata_service/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/metadata_service/tests/unit_tests/__init__.py


--------------------------------------------------------------------------------
/services/metadata_service/tests/unit_tests/api_util_test.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from services.metadata_service.api.utils import handle_exceptions, format_response
 3 | 
 4 | async def test_handle_exceptions():
 5 | 
 6 |     @handle_exceptions
 7 |     async def do_not_raise():
 8 |         return True
 9 | 
10 |     @format_response
11 |     @handle_exceptions
12 |     async def raise_without_id():
13 |         raise Exception("test")
14 | 
15 |     # wrapper should not touch successful calls.
16 |     assert (await do_not_raise())
17 | 
18 |     # NOTE: aiohttp Response StringPayload only has the internal property _value for accessing the payload value.
19 | 
20 |     response_without_id = await raise_without_id()
21 |     assert response_without_id.status == 500
22 |     _body = json.loads(response_without_id.body._value)
23 |     assert _body['traceback'] is not None
24 | 


--------------------------------------------------------------------------------
/services/metadata_service/tests/unit_tests/task_test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | from services.utils import has_heartbeat_capable_version_tag
 4 | 
 5 | 
 6 | expectations = [
 7 |     ([], False),
 8 |     (["2.2.12"], False),
 9 |     (["metaflow_version:0.5"], False),
10 |     (["metaflow_version:1.13"], False),
11 |     (["metaflow_version:1"], False),
12 |     (["metaflow_version:1.14.0"], True),
13 |     (["metaflow_version:1.22.1"], True),
14 |     (["metaflow_version:2.0.0"], False),
15 |     (["metaflow_version:2.0"], False),
16 |     (["metaflow_version:2"], False),
17 |     (["metaflow_version:2.0.5"], False),
18 |     (["metaflow_version:2.2.11"], False),
19 |     (["metaflow_version:2.2.12"], True),
20 |     (["metaflow_version:2.2.12+ab1234"], True),
21 |     (["metaflow_version:2.3"], True),
22 |     (["metaflow_version:2.3.1"], True),
23 |     (["metaflow_version:2.4.1"], True),
24 |     (["metaflow_version:2.12.24.post9-git2a5367b+ob(v1)"], True),
25 |     (["metaflow_version:2.12.24+inconsequential+trailing-string"], True),
26 |     (["metaflow_version:2.12.24.break"], True),
27 |     (["metaflow_version:3"], True),
28 |     (["metaflow_version:custom-1"], True),
29 | ]
30 | 
31 | 
32 | @pytest.mark.parametrize("system_tags, expected_boolean", expectations)
33 | async def test_has_heartbeat_capable_version_tag(system_tags, expected_boolean):
34 |   _result_bool = has_heartbeat_capable_version_tag(system_tags)
35 | 
36 |   assert expected_boolean == _result_bool
37 | 


--------------------------------------------------------------------------------
/services/migration_service/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/migration_service/__init__.py


--------------------------------------------------------------------------------
/services/migration_service/api/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shlex
 3 | 
 4 | version_dict = {
 5 |     '0': 'v_1_0_1',
 6 |     '1': 'v_1_0_1',
 7 |     '20200603104139': '20200603104139',
 8 |     '20201002000616': '20201002000616',
 9 |     '20210202145952': '20210202145952',
10 |     '20210260056859': '20210260056859',
11 |     '20211202100726': '20211202100726',
12 |     '20220503175500': '20220503175500',
13 |     '20230118020300': 'latest',
14 | }
15 | 
16 | latest = "latest"
17 | 
18 | 
19 | def make_goose_template(conn_str, command):
20 |     return ' '.join(shlex.quote(arg) for arg in [
21 |         "goose",
22 |         "postgres",
23 |         f"{conn_str}",
24 |         f"{command}"
25 |     ])
26 | 
27 | 
28 | path = os.path.dirname(__file__) + "/../migration_files"
29 | 
30 | 
31 | def make_goose_migration_template(conn_str, command):
32 |     return ' '.join(shlex.quote(arg) for arg in [
33 |         "goose",
34 |         "-dir",
35 |         path,
36 |         "postgres",
37 |         f"{conn_str}",
38 |         f"{command}"
39 |     ])
40 | 


--------------------------------------------------------------------------------
/services/migration_service/api/admin.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | from aiohttp import web
  4 | from subprocess import Popen
  5 | from multidict import MultiDict
  6 | from .utils import ApiUtils
  7 | from . import make_goose_migration_template
  8 | from services.migration_service.migration_config import db_conf
  9 | 
 10 | 
 11 | class AdminApi(object):
 12 |     def __init__(self, app):
 13 |         app.router.add_route("GET", "/version", self.version)
 14 |         app.router.add_route("GET", "/ping", self.ping)
 15 |         app.router.add_route("GET", "/db_schema_status", self.db_schema_status)
 16 | 
 17 |         endpoints_enabled = int(os.environ.get("MF_MIGRATION_ENDPOINTS_ENABLED",
 18 |                                                1))
 19 |         if endpoints_enabled:
 20 |             app.router.add_route("PATCH", "/upgrade", self.upgrade)
 21 | 
 22 |     async def ping(self, request):
 23 |         """
 24 |         ---
 25 |         description: This end-point allow to test that service is up.
 26 |         tags:
 27 |         - Admin
 28 |         produces:
 29 |         - 'text/plain'
 30 |         responses:
 31 |             "202":
 32 |                 description: successful operation. Return "pong" text
 33 |             "405":
 34 |                 description: invalid HTTP Method
 35 |         """
 36 |         return web.Response(text="pong")
 37 | 
 38 |     async def version(self, request):
 39 |         """
 40 |         ---
 41 |         description: This end-point returns the latest compatible version of the
 42 |             metadata service
 43 |         tags:
 44 |         - Admin
 45 |         produces:
 46 |         - 'application/json'
 47 |         responses:
 48 |             "200":
 49 |                 description: successful operation. Return version text
 50 |             "405":
 51 |                 description: invalid HTTP Method
 52 |         """
 53 |         version = await ApiUtils.get_latest_compatible_version()
 54 |         return web.Response(text=version)
 55 | 
 56 |     async def upgrade(self, request):
 57 |         """
 58 |         ---
 59 |         description: This end-point upgrades to the latest available version of
 60 |             of the schema
 61 |         tags:
 62 |         - Admin
 63 |         produces:
 64 |         - 'text/plain'
 65 |         responses:
 66 |             "200":
 67 |                 description: successful operation. Return  text
 68 |             "500":
 69 |                 description: could not upgrade
 70 |         """
 71 |         goose_version_cmd = make_goose_migration_template(
 72 |             db_conf.connection_string_url(),
 73 |             "up"
 74 |         )
 75 |         p = Popen(goose_version_cmd, shell=True,
 76 |                   close_fds=True)
 77 |         p.wait()
 78 |         if p.returncode == 0:
 79 |             return web.Response(text="upgrade success")
 80 |         else:
 81 |             return web.Response(text="upgrade failed", status=500)
 82 | 
 83 |     async def db_schema_status(self, request):
 84 |         """
 85 |         ---
 86 |         description: This end-point returns varius stats around
 87 |         tags:
 88 |         - Admin
 89 |         produces:
 90 |         - 'application/json'
 91 |         responses:
 92 |             "200":
 93 |                 description: successful operation. returns status of db schema and migrations
 94 |             "500":
 95 |                 description: could not upgrade
 96 |         """
 97 |         try:
 98 |             version = await ApiUtils.get_goose_version()
 99 |             migration_in_progress = await ApiUtils.is_migration_in_progress()
100 |             unapplied_migrations = ApiUtils.get_unapplied_migrations(version)
101 |             body = {
102 |                 "is_up_to_date": len(unapplied_migrations) == 0,
103 |                 "current_version": version,
104 |                 "migration_in_progress": migration_in_progress,
105 |                 "db_schema_versions": ApiUtils.list_migrations(),
106 |                 "unapplied_migrations": unapplied_migrations
107 |             }
108 |             return web.Response(body=json.dumps(body),
109 |                                 headers=MultiDict({"Content-Type": "application/json"}))
110 | 
111 |         except Exception as e:
112 |             body = {
113 |                 "detail": repr(e)
114 |             }
115 |             return web.Response(status=500, body=json.dumps(body),
116 |                                 headers=MultiDict({"Content-Type": "application/json"}))
117 | 


--------------------------------------------------------------------------------
/services/migration_service/api/utils.py:
--------------------------------------------------------------------------------
 1 | from subprocess import Popen, PIPE
 2 | from ..data.postgres_async_db import PostgresUtils
 3 | from . import version_dict, latest, \
 4 |     make_goose_migration_template, make_goose_template
 5 | from services.migration_service.migration_config import db_conf
 6 | import sys
 7 | 
 8 | 
 9 | class ApiUtils(object):
10 |     @staticmethod
11 |     def list_migrations():
12 |         migrations_list = list((version_dict.keys()))
13 |         migrations_list.sort(key=int)
14 |         return migrations_list[1:]
15 | 
16 |     @staticmethod
17 |     def get_unapplied_migrations(current_version):
18 |         try:
19 |             migrations_list = ApiUtils.list_migrations()
20 |             index_version = migrations_list.index(current_version)
21 |             return migrations_list[index_version + 1:]
22 |         except:
23 |             return migrations_list
24 | 
25 |     @staticmethod
26 |     async def get_goose_version():
27 |         # if tables exist but goose doesn't find version table then
28 |         goose_version_cmd = make_goose_template(db_conf.connection_string_url(), 'version')
29 | 
30 |         p = Popen(goose_version_cmd, stdout=PIPE, stderr=PIPE, shell=True,
31 |                   close_fds=True)
32 |         p.wait()
33 | 
34 |         version = None
35 |         std_err = p.stderr.read()
36 |         lines_err = std_err.decode("utf-8").split("\n")
37 |         for line in lines_err:
38 |             if "goose: version" in line:
39 |                 s = line.split("goose: version ")
40 |                 version = s[1]
41 |                 print(line)
42 |                 break
43 | 
44 |         if version:
45 |             return version
46 |         else:
47 |             raise Exception(
48 |                 "unable to get db version via goose: " + std_err.decode("utf-8"))
49 | 
50 |     @staticmethod
51 |     async def get_latest_compatible_version():
52 |         is_present = await PostgresUtils.is_present("flows_v3")
53 |         if is_present:
54 |             version = await ApiUtils.get_goose_version()
55 |             return version_dict[version]
56 |         else:
57 |             print("Running initial migration..", file=sys.stderr)
58 |             goose_version_cmd = make_goose_migration_template(db_conf.connection_string_url(), 'up')
59 |             p = Popen(goose_version_cmd, shell=True,
60 |                       close_fds=True)
61 |             if p.wait() != 0:
62 |                 raise Exception("Failed to run initial migration")
63 |             return latest
64 | 
65 |     @staticmethod
66 |     async def is_migration_in_progress():
67 |         goose_version_cmd = make_goose_template(
68 |             db_conf.connection_string_url(), "status"
69 |         )
70 | 
71 |         p = Popen(goose_version_cmd, stdout=PIPE, stderr=PIPE, shell=True,
72 |                   close_fds=True)
73 |         p.wait()
74 | 
75 |         std_err = p.stderr.read()
76 |         lines_err = std_err.decode("utf-8")
77 |         if "Pending" in lines_err:
78 |             return True
79 | 
80 |         return False
81 | 


--------------------------------------------------------------------------------
/services/migration_service/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/migration_service/data/__init__.py


--------------------------------------------------------------------------------
/services/migration_service/data/postgres_async_db.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import os
 3 | import aiopg
 4 | 
 5 | from services.utils import DBConfiguration
 6 | 
 7 | 
 8 | class PostgresUtils(object):
 9 |     @staticmethod
10 |     async def is_present(table_name):
11 |         with (await AsyncPostgresDB.get_instance().pool.cursor()) as cur:
12 |             await cur.execute(
13 |                 "select * from information_schema.tables where table_name=%s",
14 |                 (table_name,),
15 |             )
16 |             return bool(cur.rowcount)
17 | 
18 | 
19 | class AsyncPostgresDB(object):
20 |     connection = None
21 |     __instance = None
22 | 
23 |     pool = None
24 | 
25 |     @staticmethod
26 |     def get_instance():
27 |         if AsyncPostgresDB.__instance is None:
28 |             AsyncPostgresDB()
29 |         return AsyncPostgresDB.__instance
30 | 
31 |     def __init__(self):
32 |         if self.__instance is not None:
33 |             return
34 | 
35 |         AsyncPostgresDB.__instance = self
36 | 
37 |     async def _init(self, db_conf: DBConfiguration):
38 |         # todo make poolsize min and max configurable as well as timeout
39 |         # todo add retry and better error message
40 |         retries = 3
41 |         for i in range(retries):
42 |             try:
43 |                 self.pool = await aiopg.create_pool(db_conf.get_dsn(), timeout=db_conf.timeout)
44 |             except Exception as e:
45 |                 print("printing connection exception: " + str(e))
46 |                 if retries - i < 1:
47 |                     raise e
48 |                 time.sleep(1)
49 |                 continue
50 | 


--------------------------------------------------------------------------------
/services/migration_service/get_virtual_env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import requests
 4 | import socket
 5 | import time
 6 | from services.data.service_configs import max_startup_retries, \
 7 |     startup_retry_wait_time_seconds
 8 | 
 9 | port = int(os.environ.get("MF_MIGRATION_PORT", 8082))
10 | 
11 | try:
12 |     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
13 |     retry_count = max_startup_retries
14 |     while retry_count > 0:
15 |         print(retry_count)
16 |         try:
17 |             print("connecting")
18 |             s.connect(('localhost', port))
19 |             print("Port reachable", port)
20 |             break
21 |         except socket.error as e:
22 |             print("booting...")
23 |             print(e)
24 |             time.sleep(startup_retry_wait_time_seconds)
25 |         except Exception:
26 |             print("something broke")
27 |         finally:
28 |             retry_count = retry_count - 1
29 |     # continue
30 |     s.close()
31 |     if retry_count == 0:
32 |         print("ran out of retries to get migration version, exiting")
33 |         sys.exit(1)
34 | except Exception as e:
35 |     print(e)
36 |     sys.exit(1)
37 | 
38 | r = requests.get('http://localhost:{0}/version'.format(port))
39 | r.raise_for_status()
40 | 
41 | conf_file = open('/root/services/migration_service/config', 'w')
42 | print(r.text, file=conf_file)
43 | conf_file.close()
44 | 


--------------------------------------------------------------------------------
/services/migration_service/migration_config.py:
--------------------------------------------------------------------------------
1 | from services.utils import DBConfiguration
2 | db_conf = DBConfiguration()
3 | 


--------------------------------------------------------------------------------
/services/migration_service/migration_files/1_create_tables.sql:
--------------------------------------------------------------------------------
 1 | -- +goose Up
 2 | -- +goose StatementBegin
 3 | SELECT 'up SQL query';
 4 | CREATE TABLE IF NOT EXISTS flows_v3 (
 5 |   flow_id VARCHAR(255) PRIMARY KEY,
 6 |   user_name VARCHAR(255),
 7 |   ts_epoch BIGINT NOT NULL,
 8 |   tags JSONB,
 9 |   system_tags JSONB
10 | );
11 | 
12 | CREATE TABLE IF NOT EXISTS runs_v3 (
13 |   flow_id VARCHAR(255) NOT NULL,
14 |   run_number SERIAL NOT NULL,
15 |   user_name VARCHAR(255),
16 |   ts_epoch BIGINT NOT NULL,
17 |   tags JSONB,
18 |   system_tags JSONB,
19 |   PRIMARY KEY(flow_id, run_number),
20 |   FOREIGN KEY(flow_id) REFERENCES flows_v3 (flow_id)
21 | );
22 | 
23 | CREATE TABLE IF NOT EXISTS steps_v3 (
24 |     flow_id VARCHAR(255) NOT NULL,
25 |     run_number BIGINT NOT NULL,
26 |     step_name VARCHAR(255) NOT NULL,
27 |     user_name VARCHAR(255),
28 |     ts_epoch BIGINT NOT NULL,
29 |     tags JSONB,
30 |     system_tags JSONB,
31 |     PRIMARY KEY(flow_id, run_number, step_name),
32 |     FOREIGN KEY(flow_id, run_number) REFERENCES runs_v3 (flow_id, run_number)
33 | );
34 | 
35 | 
36 | CREATE TABLE IF NOT EXISTS tasks_v3 (
37 |     flow_id VARCHAR(255) NOT NULL,
38 |     run_number BIGINT NOT NULL,
39 |     step_name VARCHAR(255) NOT NULL,
40 |     task_id BIGSERIAL PRIMARY KEY,
41 |     user_name VARCHAR(255),
42 |     ts_epoch BIGINT NOT NULL,
43 |     tags JSONB,
44 |     system_tags JSONB,
45 |     FOREIGN KEY(flow_id, run_number, step_name) REFERENCES steps_v3 (flow_id, run_number, step_name)
46 | );
47 | 
48 | CREATE TABLE IF NOT EXISTS metadata_v3 (
49 |     flow_id VARCHAR(255),
50 |     run_number BIGINT NOT NULL,
51 |     step_name VARCHAR(255) NOT NULL,
52 |     task_id BIGINT NOT NULL,
53 |     id BIGSERIAL NOT NULL,
54 |     field_name VARCHAR(255) NOT NULL,
55 |     value TEXT NOT NULL,
56 |     type VARCHAR(255) NOT NULL,
57 |     user_name VARCHAR(255),
58 |     ts_epoch BIGINT NOT NULL,
59 |     tags JSONB,
60 |     system_tags JSONB,
61 |     PRIMARY KEY(flow_id, run_number, step_name, task_id, field_name)
62 | );
63 | 
64 | CREATE TABLE IF NOT EXISTS artifact_v3 (
65 |     flow_id VARCHAR(255) NOT NULL,
66 |     run_number BIGINT NOT NULL,
67 |     step_name VARCHAR(255) NOT NULL,
68 |     task_id BIGINT NOT NULL,
69 |     name VARCHAR(255) NOT NULL,
70 |     location VARCHAR(255) NOT NULL,
71 |     ds_type VARCHAR(255) NOT NULL,
72 |     sha VARCHAR(255),
73 |     type VARCHAR(255),
74 |     content_type VARCHAR(255),
75 |     user_name VARCHAR(255),
76 |     attempt_id SMALLINT NOT NULL,
77 |     ts_epoch BIGINT NOT NULL,
78 |     tags JSONB,
79 |     system_tags JSONB,
80 |     PRIMARY KEY(flow_id, run_number, step_name, task_id, attempt_id, name)
81 | );
82 | 
83 | -- +goose StatementEnd
84 | 
85 | -- +goose Down
86 | -- +goose StatementBegin
87 | SELECT 'down SQL query';
88 | 
89 | -- +goose StatementEnd
90 | 


--------------------------------------------------------------------------------
/services/migration_service/migration_files/20200603104139_add_str_id_cols.sql:
--------------------------------------------------------------------------------
 1 | -- +goose Up
 2 | -- +goose StatementBegin
 3 | SELECT 'up SQL query';
 4 | ALTER TABLE runs_v3
 5 | ADD COLUMN run_id VARCHAR(255);
 6 | 
 7 | ALTER TABLE runs_v3
 8 | ADD COLUMN last_heartbeat_ts BIGINT;
 9 | 
10 | ALTER TABLE runs_v3
11 | ADD CONSTRAINT runs_v3_flow_id_run_id_key UNIQUE (flow_id, run_id);
12 | 
13 | ALTER TABLE steps_v3
14 | ADD COLUMN run_id VARCHAR(255);
15 | 
16 | ALTER TABLE steps_v3
17 | ADD CONSTRAINT steps_v3_flow_id_run_id_step_name_key UNIQUE (flow_id, run_id, step_name);
18 | 
19 | ALTER TABLE tasks_v3
20 | ADD COLUMN run_id VARCHAR(255);
21 | 
22 | ALTER TABLE tasks_v3
23 | ADD COLUMN task_name VARCHAR(255);
24 | 
25 | ALTER TABLE tasks_v3
26 | ADD COLUMN last_heartbeat_ts BIGINT;
27 | 
28 | ALTER TABLE tasks_v3
29 | ADD CONSTRAINT tasks_v3_flow_id_run_number_step_name_task_name_key UNIQUE (flow_id, run_number, step_name, task_name);
30 | 
31 | ALTER TABLE metadata_v3
32 | ADD COLUMN run_id VARCHAR(255);
33 | 
34 | ALTER TABLE metadata_v3
35 | ADD COLUMN task_name VARCHAR(255);
36 | 
37 | ALTER TABLE artifact_v3
38 | ADD COLUMN run_id VARCHAR(255);
39 | 
40 | ALTER TABLE artifact_v3
41 | ADD COLUMN task_name VARCHAR(255);
42 | 
43 | -- +goose StatementEnd
44 | 
45 | -- +goose Down
46 | -- +goose StatementBegin
47 | SELECT 'down SQL query';
48 | ALTER TABLE artifact_v3
49 | DROP COLUMN task_name;
50 | 
51 | ALTER TABLE artifact_v3
52 | DROP COLUMN run_id;
53 | 
54 | ALTER TABLE metadata_v3
55 | DROP COLUMN run_id;
56 | 
57 | ALTER TABLE metadata_v3
58 | DROP COLUMN task_name;
59 | 
60 | ALTER TABLE tasks_v3
61 | DROP CONSTRAINT tasks_v3_flow_id_run_number_step_name_task_name_key;
62 | 
63 | ALTER TABLE tasks_v3
64 | DROP COLUMN run_id;
65 | 
66 | ALTER TABLE tasks_v3
67 | DROP COLUMN task_name;
68 | 
69 | ALTER TABLE tasks_v3
70 | DROP COLUMN last_heartbeat_ts;
71 | 
72 | ALTER TABLE steps_v3
73 | DROP CONSTRAINT steps_v3_flow_id_run_id_step_name_key;
74 | 
75 | ALTER TABLE steps_v3
76 | DROP COLUMN run_id;
77 | 
78 | ALTER TABLE runs_v3
79 | DROP CONSTRAINT runs_v3_flow_id_run_id_key;
80 | 
81 | ALTER TABLE runs_v3
82 | DROP COLUMN last_heartbeat_ts;
83 | 
84 | ALTER TABLE runs_v3
85 | DROP COLUMN run_id;
86 | 
87 | -- +goose StatementEnd
88 | 


--------------------------------------------------------------------------------
/services/migration_service/migration_files/20201002000616_update_metadata_primary_key.sql:
--------------------------------------------------------------------------------
 1 | -- +goose Up
 2 | -- +goose StatementBegin
 3 | SELECT 'up SQL query';
 4 | 
 5 | ALTER TABLE metadata_v3
 6 | ADD CONSTRAINT metadata_v3_primary_key UNIQUE (id,flow_id, run_number, step_name, task_id, field_name);
 7 | 
 8 | CREATE INDEX metadata_v3_akey ON metadata_v3(flow_id, run_number, step_name, task_id, field_name);
 9 | 
10 | ALTER TABLE metadata_v3
11 | DROP CONSTRAINT metadata_v3_pkey;
12 | 
13 | ALTER TABLE metadata_v3
14 | ADD PRIMARY KEY (id,flow_id, run_number, step_name, task_id, field_name);
15 | 
16 | ALTER TABLE metadata_v3
17 | DROP CONSTRAINT metadata_v3_primary_key;
18 | -- +goose StatementEnd
19 | 
20 | -- +goose Down
21 | -- +goose StatementBegin
22 | SELECT 'down SQL query';
23 | 
24 | -- create index that will become the primary key
25 | ALTER TABLE metadata_v3
26 | ADD CONSTRAINT metadata_v3_primary_key UNIQUE (flow_id, run_number, step_name, task_id, field_name);
27 | 
28 | -- drop index created for optimized access
29 | ALTER TABLE metadata_v3
30 | DROP metadata_v3 metadata_v3_akey;
31 | 
32 | -- drop primary key
33 | ALTER TABLE metadata_v3
34 | DROP CONSTRAINT metadata_v3_pkey;
35 | 
36 | -- set index as primary key
37 | ALTER TABLE metadata_v3
38 | ADD PRIMARY KEY (flow_id, run_number, step_name, task_id, field_name);
39 | 
40 | -- drop index
41 | ALTER TABLE metadata_v3
42 | DROP CONSTRAINT metadata_v3_primary_key;
43 | -- +goose StatementEnd
44 | 


--------------------------------------------------------------------------------
/services/migration_service/migration_files/20210202145952_add_runs_idx_ts_epoch_flow_id.sql:
--------------------------------------------------------------------------------
 1 | -- +goose Up
 2 | -- +goose StatementBegin
 3 | SELECT 'up SQL query';
 4 | 
 5 | -- Others
 6 | 
 7 | CREATE INDEX IF NOT EXISTS runs_v3_idx_ts_epoch ON runs_v3 (ts_epoch);
 8 | 
 9 | CREATE INDEX IF NOT EXISTS runs_v3_idx_gin_tags_combined ON runs_v3 USING gin ((tags || system_tags));
10 | 
11 | -- flow_id + ts_epoch
12 | 
13 | CREATE INDEX IF NOT EXISTS runs_v3_idx_flow_id_asc_ts_epoch_desc ON runs_v3 (flow_id ASC, ts_epoch DESC);
14 | 
15 | -- user && ts_epoch
16 | 
17 | CREATE INDEX IF NOT EXISTS runs_v3_idx_user_asc_ts_epoch_desc ON runs_v3 (
18 |     (CASE
19 |         WHEN system_tags ? ('user:' || user_name)
20 |         THEN user_name
21 |         ELSE NULL
22 |     END) ASC, ts_epoch DESC
23 | );
24 | 
25 | -- +goose StatementEnd
26 | 
27 | -- +goose Down
28 | -- +goose StatementBegin
29 | SELECT 'down SQL query';
30 | 
31 | DROP INDEX IF EXISTS runs_v3_idx_user_asc_ts_epoch_desc;
32 | 
33 | DROP INDEX IF EXISTS runs_v3_idx_flow_id_asc_ts_epoch_desc;
34 | 
35 | DROP INDEX IF EXISTS runs_v3_idx_gin_tags_combined;
36 | 
37 | DROP INDEX IF EXISTS runs_v3_idx_ts_epoch;
38 | 
39 | 
40 | -- +goose StatementEnd
41 | 


--------------------------------------------------------------------------------
/services/migration_service/migration_files/20210260056859_add_tasks_idx_on_.sql:
--------------------------------------------------------------------------------
 1 | -- +goose NO TRANSACTION
 2 | -- +goose Up
 3 | -- +goose StatementBegin
 4 | 
 5 | -- tasks on flow_id, run_id, step_name and task_name
 6 | CREATE INDEX CONCURRENTLY IF NOT EXISTS tasks_v3_idx_flow_id_run_id_step_name_task_name ON tasks_v3 (
 7 |     flow_id, run_id, step_name, task_name) WHERE run_id IS NOT NULL AND task_name IS NOT NULL;
 8 | 
 9 | -- +goose StatementEnd
10 | 
11 | -- +goose Down
12 | -- +goose StatementBegin
13 | DROP INDEX IF EXISTS tasks_v3_idx_flow_id_run_id_step_name_task_name;
14 | 
15 | -- +goose StatementEnd
16 | 


--------------------------------------------------------------------------------
/services/migration_service/migration_files/20211202100726_add_str_id_indices.sql:
--------------------------------------------------------------------------------
 1 | -- +goose NO TRANSACTION
 2 | -- +goose Up
 3 | -- runs idx on flow_id, run_id
 4 | CREATE INDEX CONCURRENTLY IF NOT EXISTS runs_v3_idx_str_ids_primary_key ON runs_v3 (flow_id, run_id)
 5 | WHERE
 6 |   run_id IS NOT NULL;
 7 | 
 8 | -- steps idx on flow_id, run_id
 9 | CREATE INDEX CONCURRENTLY IF NOT EXISTS steps_v3_idx_str_ids_primary_key ON steps_v3 (flow_id, run_id, step_name)
10 | WHERE
11 |   run_id IS NOT NULL;
12 | 
13 | -- metadata idx on id, flow_id, run_id, step_name and task_name, field_name
14 | CREATE INDEX CONCURRENTLY IF NOT EXISTS metadata_v3_idx_str_ids_primary_key ON metadata_v3 (
15 |   id,
16 |   flow_id,
17 |   run_id,
18 |   step_name,
19 |   task_name,
20 |   field_name
21 | )
22 | WHERE
23 |   run_id IS NOT NULL
24 |   AND task_name IS NOT NULL;
25 | 
26 | -- artifact idx on flow_id, run_id, step_name and task_name, attempt_id, name
27 | CREATE INDEX CONCURRENTLY IF NOT EXISTS artifact_v3_idx_str_ids_primary_key ON artifact_v3 (
28 |   flow_id,
29 |   run_id,
30 |   step_name,
31 |   task_name,
32 |   attempt_id,
33 |   name
34 | )
35 | WHERE
36 |   run_id IS NOT NULL
37 |   AND task_name IS NOT NULL;
38 | 
39 | -- +goose Down
40 | -- +goose StatementBegin
41 | DROP INDEX IF EXISTS runs_v3_idx_str_ids_primary_key;
42 | 
43 | DROP INDEX IF EXISTS steps_v3_idx_str_ids_primary_key;
44 | 
45 | DROP INDEX IF EXISTS metadata_v3_idx_str_ids_primary_key;
46 | 
47 | DROP INDEX IF EXISTS artifact_v3_idx_str_ids_primary_key;
48 | 
49 | -- +goose StatementEnd


--------------------------------------------------------------------------------
/services/migration_service/migration_files/20220503175500_add_run_epoch_index.sql:
--------------------------------------------------------------------------------
 1 | -- +goose NO TRANSACTION
 2 | -- +goose Up
 3 | -- +goose StatementBegin
 4 | 
 5 | -- UI requests recent runs a lot, this index helps make those queries go faster.
 6 | -- (it seems to help it push down LIMITs even if there aren't too many runs in the db)
 7 | CREATE INDEX CONCURRENTLY IF NOT EXISTS runs_v3_idx_epoch_ts_desc ON runs_v3 (ts_epoch DESC);
 8 | 
 9 | -- +goose StatementEnd
10 | 
11 | -- +goose Down
12 | -- +goose StatementBegin
13 | 
14 | DROP INDEX IF EXISTS runs_v3_idx_epoch_ts_desc;
15 | 
16 | -- +goose StatementEnd


--------------------------------------------------------------------------------
/services/migration_service/migration_files/20230118020300_drop_partial_indexes.sql:
--------------------------------------------------------------------------------
  1 | -- +goose NO TRANSACTION
  2 | -- +goose Up
  3 | 
  4 | -- Drop partial str_ids indexes created with
  5 | -- 20211202100726_add_str_id_indices.sql and 20210260056859_add_tasks_idx_on_.sql
  6 | -- and recreate them without the constraining WHERE clause.
  7 | -- This is being done as the psql query planner is not using these indexes many times.
  8 | -- To avoid perf downtime we first create the new indexes and then drop the old ones.
  9 | CREATE INDEX CONCURRENTLY IF NOT EXISTS runs_v3_idx_str_ids_primary_key_v2
 10 |   ON runs_v3 (flow_id, run_id);
 11 | 
 12 | DROP INDEX CONCURRENTLY IF EXISTS runs_v3_idx_str_ids_primary_key;
 13 | 
 14 | CREATE INDEX CONCURRENTLY IF NOT EXISTS steps_v3_idx_str_ids_primary_key_v2
 15 |   ON steps_v3 (flow_id, run_id, step_name);
 16 | 
 17 | DROP INDEX CONCURRENTLY IF EXISTS steps_v3_idx_str_ids_primary_key;
 18 | 
 19 | CREATE INDEX CONCURRENTLY IF NOT EXISTS tasks_v3_idx_flow_id_run_id_step_name_task_name_v2
 20 |   ON tasks_v3(flow_id, run_id, step_name, task_name);
 21 | 
 22 | DROP INDEX CONCURRENTLY IF EXISTS tasks_v3_idx_flow_id_run_id_step_name_task_name;
 23 | 
 24 | CREATE INDEX CONCURRENTLY IF NOT EXISTS metadata_v3_idx_str_ids_a_key
 25 |   ON metadata_v3 (
 26 |     flow_id,
 27 |     run_id,
 28 |     step_name,
 29 |     task_name,
 30 |     field_name
 31 |   );
 32 | 
 33 | CREATE INDEX CONCURRENTLY IF NOT EXISTS metadata_v3_idx_str_ids_a_key_with_task_id
 34 |   ON metadata_v3 (
 35 |     flow_id,
 36 |     run_id,
 37 |     step_name,
 38 |     task_id,
 39 |     field_name
 40 |   );
 41 | 
 42 | DROP INDEX CONCURRENTLY IF EXISTS metadata_v3_idx_str_ids_primary_key;
 43 | 
 44 | CREATE INDEX CONCURRENTLY IF NOT EXISTS artifact_v3_idx_str_ids_primary_key_v2 ON artifact_v3 (
 45 |   flow_id,
 46 |   run_id,
 47 |   step_name,
 48 |   task_name,
 49 |   attempt_id,
 50 |   name
 51 | );
 52 | 
 53 | CREATE INDEX CONCURRENTLY IF NOT EXISTS artifact_v3_idx_str_ids_primary_key_with_task_id ON artifact_v3 (
 54 |   flow_id,
 55 |   run_id,
 56 |   step_name,
 57 |   task_id,
 58 |   attempt_id,
 59 |   name
 60 | );
 61 | 
 62 | DROP INDEX CONCURRENTLY IF EXISTS artifact_v3_idx_str_ids_primary_key;
 63 | 
 64 | 
 65 | -- +goose Down
 66 | 
 67 | -- copy of 20211202100726_add_str_id_indices.sql and 20210260056859_add_tasks_idx_on_.sql
 68 | -- runs idx on flow_id, run_id
 69 | CREATE INDEX CONCURRENTLY IF NOT EXISTS runs_v3_idx_str_ids_primary_key ON runs_v3 (flow_id, run_id)
 70 | WHERE
 71 |   run_id IS NOT NULL;
 72 | 
 73 | -- steps idx on flow_id, run_id
 74 | CREATE INDEX CONCURRENTLY IF NOT EXISTS steps_v3_idx_str_ids_primary_key ON steps_v3 (flow_id, run_id, step_name)
 75 | WHERE
 76 |   run_id IS NOT NULL;
 77 | 
 78 | -- metadata idx on id, flow_id, run_id, step_name and task_name, field_name
 79 | CREATE INDEX CONCURRENTLY IF NOT EXISTS metadata_v3_idx_str_ids_primary_key ON metadata_v3 (
 80 |   id,
 81 |   flow_id,
 82 |   run_id,
 83 |   step_name,
 84 |   task_name,
 85 |   field_name
 86 | )
 87 | WHERE
 88 |   run_id IS NOT NULL
 89 |   AND task_name IS NOT NULL;
 90 | 
 91 | -- artifact idx on flow_id, run_id, step_name and task_name, attempt_id, name
 92 | CREATE INDEX CONCURRENTLY IF NOT EXISTS artifact_v3_idx_str_ids_primary_key ON artifact_v3 (
 93 |   flow_id,
 94 |   run_id,
 95 |   step_name,
 96 |   task_name,
 97 |   attempt_id,
 98 |   name
 99 | )
100 | WHERE
101 |   run_id IS NOT NULL
102 |   AND task_name IS NOT NULL;
103 | 
104 | -- tasks on flow_id, run_id, step_name and task_name
105 | CREATE INDEX CONCURRENTLY IF NOT EXISTS tasks_v3_idx_flow_id_run_id_step_name_task_name ON tasks_v3 (
106 |     flow_id, run_id, step_name, task_name) WHERE run_id IS NOT NULL AND task_name IS NOT NULL;
107 | 
108 | 
109 | DROP INDEX CONCURRENTLY IF EXISTS runs_v3_idx_str_ids_primary_key_v2;
110 | DROP INDEX CONCURRENTLY IF EXISTS steps_v3_idx_str_ids_primary_key_v2;
111 | DROP INDEX CONCURRENTLY IF EXISTS tasks_v3_idx_flow_id_run_id_step_name_task_name_v2;
112 | DROP INDEX CONCURRENTLY IF EXISTS metadata_v3_idx_str_ids_a_key;
113 | DROP INDEX CONCURRENTLY IF EXISTS metadata_v3_idx_str_ids_a_key_with_task_id;
114 | DROP INDEX CONCURRENTLY IF EXISTS artifact_v3_idx_str_ids_primary_key_v2;
115 | DROP INDEX CONCURRENTLY IF EXISTS artifact_v3_idx_str_ids_primary_key_with_task_id;
116 | 


--------------------------------------------------------------------------------
/services/migration_service/migration_server.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from aiohttp import web
 5 | 
 6 | from .api.admin import AdminApi
 7 | 
 8 | from .data.postgres_async_db import AsyncPostgresDB
 9 | from services.utils import DBConfiguration
10 | from .migration_config import db_conf
11 | 
12 | 
13 | def app(loop=None, db_conf: DBConfiguration = None):
14 | 
15 |     loop = loop or asyncio.get_event_loop()
16 |     app = web.Application(loop=loop)
17 |     async_db = AsyncPostgresDB()
18 |     loop.run_until_complete(async_db._init(db_conf))
19 |     AdminApi(app)
20 |     return app
21 | 
22 | 
23 | def main():
24 |     loop = asyncio.get_event_loop()
25 |     the_app = app(loop, db_conf)
26 |     handler = web.AppRunner(the_app)
27 |     loop.run_until_complete(handler.setup())
28 | 
29 |     port = os.environ.get("MF_MIGRATION_PORT", 8082)
30 |     host = str(os.environ.get("MF_METADATA_HOST", "0.0.0.0"))
31 |     f = loop.create_server(handler.server, host, port)
32 | 
33 |     srv = loop.run_until_complete(f)
34 | 
35 |     print("serving on", srv.sockets[0].getsockname())
36 |     try:
37 |         loop.run_forever()
38 |     except KeyboardInterrupt:
39 |         pass
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/services/migration_service/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp >= 3.8.1, < 4
2 | packaging
3 | psycopg2
4 | aiopg
5 | 


--------------------------------------------------------------------------------
/services/migration_service/run_script.py:
--------------------------------------------------------------------------------
 1 | from subprocess import Popen
 2 | import os
 3 | import sys
 4 | 
 5 | 
 6 | def setup_env(version_value: str):
 7 |     _env = os.environ
 8 |     virtual_env_path = '/opt/' + version_value
 9 |     _env['VIRTUAL_ENV'] = virtual_env_path
10 |     path = _env['PATH']
11 |     _env['PATH'] = virtual_env_path + "/bin:" + path
12 |     return _env
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     try:
17 |         migration_server_process = Popen(
18 |             "PYTHONPATH=/ python3 -m services.migration_service.migration_server",
19 |             shell=True,
20 |             close_fds=True,
21 |             env=setup_env('latest')
22 |         )
23 | 
24 |         get_env_version = Popen(
25 |             "python3 -m services.migration_service.get_virtual_env",
26 |             shell=True,
27 |             close_fds=True
28 |         )
29 | 
30 |         if get_env_version.wait() != 0:
31 |             print("Failed to get env version", file=sys.stderr)
32 |             sys.exit(1)
33 | 
34 |         # read in version of metadata service to load
35 |         version_value_file = open('/root/services/migration_service/config', 'r')
36 |         version_value = str(version_value_file.read()).strip()
37 | 
38 |         # start proper version of metadata service
39 |         metadata_server_process = Popen(
40 |             "metadata_service",
41 |             shell=True,
42 |             close_fds=True,
43 |             env=setup_env(version_value)
44 |         )
45 | 
46 |         rc = metadata_server_process.wait()
47 |         if rc != 0:
48 |             print("Metadata server exited with non zero status")
49 |             sys.exit(rc)
50 |         rc = migration_server_process.wait()
51 |         if rc != 0:
52 |             print("Migration server exited with non zero status")
53 |             sys.exit(rc)
54 |     except Exception as e:
55 |         print(e)
56 |         sys.exit(1)
57 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/.gitignore:
--------------------------------------------------------------------------------
1 | config.*


--------------------------------------------------------------------------------
/services/ui_backend_service/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/ui_backend_service/__init__.py


--------------------------------------------------------------------------------
/services/ui_backend_service/api/__init__.py:
--------------------------------------------------------------------------------
 1 | # api routes
 2 | from .admin import AdminApi
 3 | from .autocomplete import AutoCompleteApi
 4 | from .artifact import ArtificatsApi
 5 | from .search import SearchApi
 6 | from .dag import DagApi
 7 | from .flow import FlowApi
 8 | from .run import RunApi
 9 | from .step import StepApi
10 | from .task import TaskApi
11 | from .log import LogApi
12 | from .tag import TagApi
13 | from .metadata import MetadataApi
14 | from .features import FeaturesApi
15 | from .config import ConfigApi
16 | from .plugins import PluginsApi
17 | from .card import CardsApi
18 | 
19 | # service processes
20 | from .notify import ListenNotify
21 | from .heartbeat_monitor import RunHeartbeatMonitor, TaskHeartbeatMonitor
22 | from .ws import Websocket
23 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/api/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from services.utils import handle_exceptions, web_response
 3 | 
 4 | # These environment values will be available to the frontend
 5 | ALLOWED_CONFIG_KEYS = [
 6 |     'GA_TRACKING_ID'
 7 | ]
 8 | 
 9 | 
10 | class ConfigApi(object):
11 |     """
12 |     Adds an Api endpoint for fetching required configuration variables for the frontend.
13 |     """
14 |     def __init__(self, app):
15 |         app.router.add_route('GET', '/config', self.get_config)
16 | 
17 |     @handle_exceptions
18 |     async def get_config(self, request):
19 |         """
20 |         ---
21 |         description: Get all frontend configuration key-value pairs.
22 |         tags:
23 |         - Admin
24 |         produces:
25 |         - application/json
26 |         responses:
27 |             "200":
28 |                 description: Returns all allowed configuration key-value pairs for the frontend.
29 |                 schema:
30 |                     type: object
31 |                     properties:
32 |                         "ALLOWED_CONFIG_VARIABLE":
33 |                             type: string
34 |                             example: "value-to-pass-frontend-1234"
35 |                             description: "A frontend configuration variable from the server environment. These are exposed based on a whitelist on the server."
36 |             "405":
37 |                 description: invalid HTTP Method
38 |          """
39 |         config = {}
40 |         for key in ALLOWED_CONFIG_KEYS:
41 |             val = os.environ.get(key, None)
42 |             if val:
43 |                 config[key] = val
44 |         return web_response(200, config)
45 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/api/dag.py:
--------------------------------------------------------------------------------
 1 | from services.data.db_utils import DBResponse, translate_run_key
 2 | from services.utils import handle_exceptions
 3 | from .utils import format_response, web_response, query_param_enabled
 4 | from services.ui_backend_service.data.db.utils import get_run_dag_data
 5 | 
 6 | 
 7 | class DagApi(object):
 8 |     def __init__(self, app, db, cache=None):
 9 |         self.db = db
10 |         app.router.add_route(
11 |             "GET", "/flows/{flow_id}/runs/{run_number}/dag", self.get_run_dag
12 |         )
13 |         self._dag_store = getattr(cache, "dag_cache", None)
14 | 
15 |     @handle_exceptions
16 |     async def get_run_dag(self, request):
17 |         """
18 |         ---
19 |         description: Get DAG structure for a run.
20 |         tags:
21 |         - Run
22 |         parameters:
23 |           - $ref: '#/definitions/Params/Path/flow_id'
24 |           - $ref: '#/definitions/Params/Path/run_number'
25 |           - $ref: '#/definitions/Params/Custom/invalidate'
26 |         produces:
27 |         - application/json
28 |         responses:
29 |             "200":
30 |                 description: Return DAG structure for a specific run
31 |                 schema:
32 |                   $ref: '#/definitions/ResponsesDag'
33 |             "405":
34 |                 description: invalid HTTP Method
35 |                 schema:
36 |                   $ref: '#/definitions/ResponsesError405'
37 |             "404":
38 |                 description: necessary data for DAG generation Not Found
39 |                 schema:
40 |                   $ref: '#/definitions/ResponsesError404'
41 |             "500":
42 |                 description: Internal Server Error (with error id)
43 |                 schema:
44 |                     $ref: '#/definitions/ResponsesDagError500'
45 |         """
46 |         flow_name = request.match_info['flow_id']
47 |         run_number = request.match_info.get("run_number")
48 |         # Before running the cache action, we make sure that the run has
49 |         # the necessary data to generate a DAG.
50 |         db_response = await get_run_dag_data(self.db, flow_name, run_number)
51 | 
52 |         if not db_response.response_code == 200:
53 |             # DAG data was not found, return with the corresponding status.
54 |             status, body = format_response(request, db_response)
55 |             return web_response(status, body)
56 | 
57 |         # Prefer run_id over run_number
58 |         flow_name = db_response.body['flow_id']
59 |         run_id = db_response.body.get('run_id') or db_response.body['run_number']
60 |         invalidate_cache = query_param_enabled(request, "invalidate")
61 | 
62 |         dag = await self._dag_store.cache.GenerateDag(
63 |             flow_name, run_id, invalidate_cache=invalidate_cache)
64 | 
65 |         if dag.has_pending_request():
66 |             async for event in dag.stream():
67 |                 if event["type"] == "error":
68 |                     # raise error, there was an exception during processing.
69 |                     raise GenerateDAGFailed(event["message"], event["id"], event["traceback"])
70 |             await dag.wait()  # wait until results are ready
71 |         dag = dag.get()
72 |         response = DBResponse(200, dag)
73 |         status, body = format_response(request, response)
74 | 
75 |         return web_response(status, body)
76 | 
77 | 
78 | class GenerateDAGFailed(Exception):
79 |     def __init__(self, msg="Failed to process DAG", id="failed-to-process-dag", traceback_str=None):
80 |         self.message = msg
81 |         self.id = id
82 |         self.traceback_str = traceback_str
83 | 
84 |     def __str__(self):
85 |         return self.message
86 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/api/features.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from services.utils import handle_exceptions, web_response
 4 | 
 5 | from ..features import get_features
 6 | 
 7 | 
 8 | class FeaturesApi(object):
 9 |     """
10 |     Adds an Api endpoint that returns a list of enabled/disabled features for the UI Backend Service
11 |     """
12 | 
13 |     def __init__(self, app):
14 |         app.router.add_route("GET", "/features", self.get_all_features)
15 | 
16 |     @handle_exceptions
17 |     async def get_all_features(self, request):
18 |         """
19 |         ---
20 |         description: Get all of enabled/disabled features as key-value pairs.
21 |         tags:
22 |         - Admin
23 |         produces:
24 |         - application/json
25 |         responses:
26 |             "200":
27 |                 description: Returns all features to be enabled or disabled by the frontend.
28 |                 schema:
29 |                     type: object
30 |                     properties:
31 |                         "FEATURE_*":
32 |                             type: boolean
33 |                             example: true
34 |                             description: "An environment variable from the server with a FEATURE_ prefix, and its value as a boolean"
35 |             "405":
36 |                 description: invalid HTTP Method
37 |         """
38 |         return web_response(200, get_features())
39 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/api/flow.py:
--------------------------------------------------------------------------------
 1 | from services.utils import handle_exceptions
 2 | from .utils import find_records
 3 | 
 4 | 
 5 | class FlowApi(object):
 6 |     def __init__(self, app, db):
 7 |         self.db = db
 8 |         app.router.add_route("GET", "/flows", self.get_all_flows)
 9 |         app.router.add_route("GET", "/flows/{flow_id}", self.get_flow)
10 |         self._async_table = self.db.flow_table_postgres
11 | 
12 |     @handle_exceptions
13 |     async def get_flow(self, request):
14 |         """
15 |         ---
16 |         description: Get one flow
17 |         tags:
18 |         - Flow
19 |         parameters:
20 |           - $ref: '#/definitions/Params/Path/flow_id'
21 |         produces:
22 |         - application/json
23 |         responses:
24 |             "200":
25 |                 description: Returns one flow
26 |                 schema:
27 |                   $ref: '#/definitions/ResponsesFlow'
28 |             "405":
29 |                 description: invalid HTTP Method
30 |                 schema:
31 |                   $ref: '#/definitions/ResponsesError405'
32 |         """
33 | 
34 |         flow_name = request.match_info.get("flow_id")
35 | 
36 |         return await find_records(request,
37 |                                   self._async_table,
38 |                                   fetch_single=True,
39 |                                   initial_conditions=["flow_id = %s"],
40 |                                   initial_values=[flow_name])
41 | 
42 |     @handle_exceptions
43 |     async def get_all_flows(self, request):
44 |         """
45 |         ---
46 |         description: Get all flows
47 |         tags:
48 |         - Flow
49 |         parameters:
50 |           - $ref: '#/definitions/Params/Builtin/_page'
51 |           - $ref: '#/definitions/Params/Builtin/_limit'
52 |           - $ref: '#/definitions/Params/Builtin/_order'
53 |           - $ref: '#/definitions/Params/Builtin/_tags'
54 |           - $ref: '#/definitions/Params/Builtin/_group'
55 |           - $ref: '#/definitions/Params/Custom/flow_id'
56 |           - $ref: '#/definitions/Params/Custom/user_name'
57 |           - $ref: '#/definitions/Params/Custom/ts_epoch'
58 |         produces:
59 |         - application/json
60 |         responses:
61 |             "200":
62 |                 description: Returns all flows
63 |                 schema:
64 |                   $ref: '#/definitions/ResponsesFlowList'
65 |             "405":
66 |                 description: invalid HTTP Method
67 |                 schema:
68 |                   $ref: '#/definitions/ResponsesError405'
69 |         """
70 | 
71 |         return await find_records(request,
72 |                                   self._async_table,
73 |                                   initial_conditions=[],
74 |                                   initial_values=[],
75 |                                   allowed_order=self._async_table.keys,
76 |                                   allowed_group=self._async_table.keys,
77 |                                   allowed_filters=self._async_table.keys
78 |                                   )
79 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/api/plugins.py:
--------------------------------------------------------------------------------
  1 | from services.utils import handle_exceptions, web_response
  2 | from ..plugins import list_plugins
  3 | 
  4 | 
  5 | class PluginsApi(object):
  6 |     """
  7 |     Adds an Api endpoint for fetching UI plugins.
  8 |     """
  9 | 
 10 |     def __init__(self, app):
 11 |         app.router.add_route('GET', '/plugin', self.get_plugins)
 12 |         app.router.add_route("GET", "/plugin/{plugin_name}", self.get_plugin)
 13 |         app.router.add_route("GET", "/plugin/{plugin_name}/{filename:.+}", self.get_plugin_asset)
 14 | 
 15 |     @handle_exceptions
 16 |     async def get_plugins(self, request):
 17 |         """
 18 |         ---
 19 |         description: List all plugins
 20 |         tags:
 21 |         - Plugin
 22 |         produces:
 23 |         - application/json
 24 |         responses:
 25 |             "200":
 26 |                 description: Returns list of all plugins
 27 |                 schema:
 28 |                   $ref: '#/definitions/ResponsesPluginList'
 29 |             "405":
 30 |                 description: invalid HTTP Method
 31 |                 schema:
 32 |                   $ref: '#/definitions/ResponsesError405'
 33 |         """
 34 |         plugins = []
 35 |         for plugin in list_plugins():
 36 |             plugins.append(dict(plugin))
 37 | 
 38 |         return web_response(200, plugins)
 39 | 
 40 |     @handle_exceptions
 41 |     async def get_plugin(self, request):
 42 |         """
 43 |         ---
 44 |         description: Get one plugin
 45 |         tags:
 46 |         - Plugin
 47 |         parameters:
 48 |           - $ref: '#/definitions/Params/Path/plugin_name'
 49 |         produces:
 50 |         - application/json
 51 |         responses:
 52 |             "200":
 53 |                 description: Returns one plugin
 54 |                 schema:
 55 |                   $ref: '#/definitions/ResponsesPlugin'
 56 |             "405":
 57 |                 description: invalid HTTP Method
 58 |                 schema:
 59 |                   $ref: '#/definitions/ResponsesError405'
 60 |         """
 61 |         plugin = _get_plugin_from_request(request)
 62 |         if not plugin:
 63 |             return web_response(404, "Plugin not found")
 64 | 
 65 |         return web_response(200, dict(plugin))
 66 | 
 67 |     @handle_exceptions
 68 |     async def get_plugin_asset(self, request):
 69 |         """
 70 |         ---
 71 |         description: Serve plugin asset
 72 |         tags:
 73 |         - Plugin
 74 |         parameters:
 75 |           - $ref: '#/definitions/Params/Path/plugin_name'
 76 |           - $ref: '#/definitions/Params/Path/plugin_filename'
 77 |         produces:
 78 |         - application/json
 79 |         responses:
 80 |             "200":
 81 |                 description: Serve plugin asset, e.g. dist/index.html
 82 |             "405":
 83 |                 description: invalid HTTP Method
 84 |                 schema:
 85 |                   $ref: '#/definitions/ResponsesError405'
 86 |         """
 87 |         plugin = _get_plugin_from_request(request)
 88 |         if not plugin:
 89 |             return web_response(404, "Plugin not found")
 90 | 
 91 |         filename = request.match_info.get("filename")
 92 |         try:
 93 |             return plugin.serve(filename)
 94 |         except:
 95 |             return web_response(500, "Internal server error")
 96 | 
 97 | 
 98 | def _get_plugin_from_request(request):
 99 |     _plugins = list_plugins()
100 |     plugin_name = request.match_info.get("plugin_name")
101 |     for plugin in _plugins:
102 |         if plugin.name == plugin_name:
103 |             return plugin
104 |     return None
105 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/api/step.py:
--------------------------------------------------------------------------------
  1 | from services.data.db_utils import translate_run_key
  2 | from services.utils import handle_exceptions
  3 | from .utils import find_records, apply_run_tags_postprocess
  4 | 
  5 | 
  6 | class StepApi(object):
  7 |     def __init__(self, app, db):
  8 |         self.db = db
  9 |         app.router.add_route(
 10 |             "GET", "/flows/{flow_id}/runs/{run_number}/steps", self.get_steps
 11 |         )
 12 |         app.router.add_route(
 13 |             "GET", "/flows/{flow_id}/runs/{run_number}/steps/{step_name}", self.get_step
 14 |         )
 15 |         self._async_table = self.db.step_table_postgres
 16 |         self._async_run_table = self.db.run_table_postgres
 17 | 
 18 |     @handle_exceptions
 19 |     async def get_steps(self, request):
 20 |         """
 21 |         ---
 22 |         description: Get all steps of specified run
 23 |         tags:
 24 |         - Step
 25 |         parameters:
 26 |           - $ref: '#/definitions/Params/Path/flow_id'
 27 |           - $ref: '#/definitions/Params/Path/run_number'
 28 |           - $ref: '#/definitions/Params/Builtin/_page'
 29 |           - $ref: '#/definitions/Params/Builtin/_limit'
 30 |           - $ref: '#/definitions/Params/Builtin/_order'
 31 |           - $ref: '#/definitions/Params/Builtin/_tags'
 32 |           - $ref: '#/definitions/Params/Builtin/_group'
 33 |           - $ref: '#/definitions/Params/Custom/flow_id'
 34 |           - $ref: '#/definitions/Params/Custom/run_number'
 35 |           - $ref: '#/definitions/Params/Custom/step_name'
 36 |           - $ref: '#/definitions/Params/Custom/user_name'
 37 |           - $ref: '#/definitions/Params/Custom/ts_epoch'
 38 |         produces:
 39 |         - application/json
 40 |         responses:
 41 |             "200":
 42 |                 description: Returns all steps of specified run
 43 |                 schema:
 44 |                   $ref: '#/definitions/ResponsesStepList'
 45 |             "405":
 46 |                 description: invalid HTTP Method
 47 |                 schema:
 48 |                   $ref: '#/definitions/ResponsesError405'
 49 |         """
 50 | 
 51 |         flow_name = request.match_info.get("flow_id")
 52 |         run_number = request.match_info.get("run_number")
 53 |         run_id_key, run_id_value = translate_run_key(run_number)
 54 | 
 55 |         return await find_records(request,
 56 |                                   self._async_table,
 57 |                                   initial_conditions=[
 58 |                                       "flow_id = %s",
 59 |                                       "{run_id_key} = %s".format(run_id_key=run_id_key)],
 60 |                                   initial_values=[flow_name, run_id_value],
 61 |                                   allowed_order=self._async_table.keys,
 62 |                                   allowed_group=self._async_table.keys,
 63 |                                   allowed_filters=self._async_table.keys,
 64 |                                   enable_joins=True,
 65 |                                   postprocess=apply_run_tags_postprocess(flow_name, run_number, self._async_run_table))
 66 | 
 67 |     @handle_exceptions
 68 |     async def get_step(self, request):
 69 |         """
 70 |         ---
 71 |         description: Get one step
 72 |         tags:
 73 |         - Step
 74 |         parameters:
 75 |           - $ref: '#/definitions/Params/Path/flow_id'
 76 |           - $ref: '#/definitions/Params/Path/run_number'
 77 |           - $ref: '#/definitions/Params/Path/step_name'
 78 |         produces:
 79 |         - application/json
 80 |         responses:
 81 |             "200":
 82 |                 description: Returns one step
 83 |                 schema:
 84 |                   $ref: '#/definitions/ResponsesStep'
 85 |             "405":
 86 |                 description: invalid HTTP Method
 87 |                 schema:
 88 |                   $ref: '#/definitions/ResponsesError405'
 89 |         """
 90 | 
 91 |         flow_name = request.match_info.get("flow_id")
 92 |         run_number = request.match_info.get("run_number")
 93 |         run_id_key, run_id_value = translate_run_key(run_number)
 94 |         step_name = request.match_info.get("step_name")
 95 | 
 96 |         return await find_records(request,
 97 |                                   self._async_table,
 98 |                                   fetch_single=True,
 99 |                                   initial_conditions=[
100 |                                       "flow_id = %s",
101 |                                       "{run_id_key} = %s".format(
102 |                                           run_id_key=run_id_key),
103 |                                       "step_name = %s"],
104 |                                   initial_values=[
105 |                                       flow_name, run_id_value, step_name],
106 |                                   enable_joins=True,
107 |                                   postprocess=apply_run_tags_postprocess(flow_name, run_number, self._async_run_table)
108 |                                   )
109 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/api/tag.py:
--------------------------------------------------------------------------------
 1 | from services.utils import handle_exceptions, web_response
 2 | 
 3 | 
 4 | class TagApi(object):
 5 |     def __init__(self, app, db):
 6 |         self.db = db
 7 |         app.router.add_route("GET", "/tags", self.get_all_tags)
 8 |         self._async_table = self.db.run_table_postgres
 9 | 
10 |     @handle_exceptions
11 |     async def get_all_tags(self, request):
12 |         db_response, _ = await self._async_table.get_tags()
13 |         return web_response(db_response.response_code, db_response.body)
14 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Optional
 2 | 
 3 | 
 4 | # Shared helpers
 5 | 
 6 | 
 7 | def unpack_processed_value(value) -> Tuple[bool, Optional[str], Optional[str], Optional[str]]:
 8 |     '''
 9 |     Unpack cached value returning tuple of: success, value, detail, stacktrace
10 | 
11 |     Defaults to None in case values are not defined.
12 | 
13 |     Success example:
14 |         True, 'foo', None
15 | 
16 |     Failure examples:
17 |         False, 'failure-id', 'error-details', None
18 |         False, 'failure-id-without-details', None, None
19 |         False, None, None, None
20 |         False, 'CustomError', 'Custom failure description', 'stacktrace of error'
21 | 
22 |     Returns
23 |     -------
24 |     tuple : (bool, optional(str), optional(str), optional(str))
25 |         success, value, description, stacktrace
26 |     '''
27 |     return (list(value) + [None] * 4)[:4]
28 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/cache/__init__.py:
--------------------------------------------------------------------------------
1 | from .store import CacheStore
2 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/cache/client/__init__.py:
--------------------------------------------------------------------------------
1 | # This module is a copy of an implementation of a cache store
2 | # originally from https://github.com/Netflix/metaflow/pull/316
3 | # TODO: use the metaflow cli cache implementation if the aforementioned PR gets merged
4 | from .cache_action import CacheAction
5 | from .cache_async_client import CacheAsyncClient
6 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/cache/client/cache_action.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | import importlib
  3 | 
  4 | LO_PRIO = 'lo_prio'
  5 | HI_PRIO = 'hi_prio'
  6 | 
  7 | 
  8 | class CacheServerInitFailed(Exception):
  9 |     pass
 10 | 
 11 | 
 12 | def import_action_class_spec(action_spec):
 13 |     parts = action_spec.split('.')
 14 |     package = '.'.join(action_spec.split('.')[:-1])
 15 |     action_name = action_spec.split('.')[-1]
 16 |     return import_action_class('.'.join(parts[:-1]), parts[-1])
 17 | 
 18 | 
 19 | def import_action_class(mod, cls):
 20 |     return getattr(importlib.import_module(mod), cls)
 21 | 
 22 | 
 23 | class CacheAction(object):
 24 | 
 25 |     PRIORITY = LO_PRIO
 26 | 
 27 |     @classmethod
 28 |     def format_request(cls, *args, **kwargs):
 29 |         """
 30 |         Encode the given arguments as a request. This method
 31 |         is proxied by `cache_client` as a client-facing API
 32 |         of the action.
 33 | 
 34 |         Function returns a tuple:
 35 |         1. `message`: an arbitrary JSON-encodable payload that
 36 |            is passed to `execute`.
 37 |         2. `obj_keys`: a list of keys that the action promises
 38 |            to produce in `execute`.
 39 |         3. `stream_key`: an optional key name for a streaming
 40 |            result of the action. May be `None` if the action
 41 |            doesn't have any streaming results.
 42 |         4. `disposable_keys`: a subset of `obj_keys` that will
 43 |            be purged from the cache before other objects.
 44 |         5. `invalidate_cache`: boolean to indicate if existing
 45 |            cache keys should be invalidated.
 46 |         6. `ephemeral_storage_path` : optional path for persisting files across cache action invocations
 47 |         """
 48 |         # return message, obj_keys, stream_key, disposable_keys, invalidate_cache, ephemeral_storage_path
 49 |         raise NotImplementedError
 50 | 
 51 |     @classmethod
 52 |     def response(cls, keys_objs):
 53 |         """
 54 |         Decodes and refines `execute` output before it is returned
 55 |         to the client. The argument `keys_objs` is the return value
 56 |         of `execute`. This method is called by `cache_client` to
 57 |         convert serialized, cached results to a client-facing object.
 58 | 
 59 |         The function may return anything.
 60 |         """
 61 |         raise NotImplementedError
 62 | 
 63 |     @classmethod
 64 |     def stream_response(cls, it):
 65 |         """
 66 |         Iterator that iterates over streamed events in `it`. This
 67 |         generator is the reader counterpart to the `stream_output`
 68 |         writer in `execute`. This method is called by `cache_client`
 69 |         to convert serialized events to client-facing objects.
 70 | 
 71 |         If the event is `None`, it should be yield as-is. For other
 72 |         events, the function may perform any stateful manipulation and
 73 |         yield zero or more refined objects.
 74 |         """
 75 |         raise NotImplementedError
 76 | 
 77 |     @classmethod
 78 |     def execute(cls,
 79 |                 message=None,
 80 |                 keys=[],
 81 |                 existing_keys={},
 82 |                 stream_output=None,
 83 |                 invalidate_cache=False):
 84 |         """
 85 |         Execute an action. This method is called by `cache_worker` to
 86 |         execute the action as a subprocess.
 87 | 
 88 |         - `message` is an arbitrary payload produced by format_request.
 89 |         - `keys` is a list of objects that the action needs to produce.
 90 |         - `existing_keys` refers to existing values of caches keys, if
 91 |           available.
 92 |         - `stream_output` is a function that can be called to produce
 93 |           an output event to the stream object.
 94 |         - `invalidate_cache` boolean to indicate whether to invalidate
 95 |           existing cache keys.
 96 | 
 97 |         Returns a dictionary that includes a string/byte result
 98 |         per key that will be stored in the cache.
 99 |         """
100 |         raise NotImplementedError
101 | 
102 | 
103 | class Check(CacheAction):
104 | 
105 |     PRIORITY = HI_PRIO
106 | 
107 |     @classmethod
108 |     def format_request(cls, *args, **kwargs):
109 |         key = 'check-%s' % uuid.uuid4()
110 |         return None, [key], None, [key], False, None
111 | 
112 |     @classmethod
113 |     def response(cls, keys_objs):
114 |         for key, blob in keys_objs.items():
115 |             if blob != b'works: %s' % key.encode('utf-8'):
116 |                 raise CacheServerInitFailed()
117 |         return True
118 | 
119 |     @classmethod
120 |     def stream_response(cls, it):
121 |         pass
122 | 
123 |     @classmethod
124 |     def execute(cls, keys=[], **kwargs):
125 |         return {key: 'works: %s' % key for key in keys}
126 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/cache/client/cache_async_client.py:
--------------------------------------------------------------------------------
  1 | from json.decoder import JSONDecodeError
  2 | import time
  3 | import asyncio
  4 | import json
  5 | from asyncio.subprocess import PIPE, STDOUT
  6 | 
  7 | from .cache_client import CacheClient, CacheServerUnreachable, CacheClientTimeout
  8 | 
  9 | from services.utils import logging
 10 | 
 11 | OP_WORKER_CREATE = "worker_create"
 12 | OP_WORKER_TERMINATE = "worker_terminate"
 13 | 
 14 | WAIT_FREQUENCY = 0.2
 15 | HEARTBEAT_FREQUENCY = 1
 16 | 
 17 | 
 18 | class CacheAsyncClient(CacheClient):
 19 |     _drain_lock = asyncio.Lock()
 20 |     _restart_requested = False
 21 | 
 22 |     async def start_server(self, cmdline, env):
 23 |         self.logger = logging.getLogger(
 24 |             "CacheAsyncClient:{root}".format(root=self._root)
 25 |         )
 26 | 
 27 |         self._proc = await asyncio.create_subprocess_exec(
 28 |             *cmdline, env=env, stdin=PIPE, stdout=PIPE, stderr=STDOUT, limit=1024000
 29 |         )  # 1024KB
 30 | 
 31 |         asyncio.gather(self._heartbeat(), self.read_stdout())
 32 | 
 33 |     async def _read_pipe(self, src):
 34 |         while self._is_alive:
 35 |             line = await src.readline()
 36 |             if not line:
 37 |                 await asyncio.sleep(WAIT_FREQUENCY)
 38 |                 break
 39 |             yield line.rstrip().decode("utf-8")
 40 | 
 41 |     async def read_stdout(self):
 42 |         async for line in self._read_pipe(self._proc.stdout):
 43 |             await self.read_message(line)
 44 | 
 45 |     async def read_message(self, line: str):
 46 |         try:
 47 |             # We check for isEnabledFor because some things may be very long to print
 48 |             # (in particularly pending_requests)
 49 |             message = json.loads(line)
 50 |             if self.logger.isEnabledFor(logging.INFO):
 51 |                 self.logger.info(message)
 52 |             if message["op"] == OP_WORKER_CREATE:
 53 |                 self.pending_requests.add(message["stream_key"])
 54 |             elif message["op"] == OP_WORKER_TERMINATE:
 55 |                 self.pending_requests.remove(message["stream_key"])
 56 | 
 57 |             if self.logger.isEnabledFor(logging.INFO):
 58 |                 self.logger.info(
 59 |                     "Pending stream keys: {}".format(len(list(self.pending_requests)))
 60 |                 )
 61 |         except JSONDecodeError as ex:
 62 |             if self.logger.isEnabledFor(logging.INFO):
 63 |                 self.logger.info("Message: {}".format(line))
 64 |         except Exception as ex:
 65 |             self.logger.exception(ex)
 66 | 
 67 |     async def check(self):
 68 |         ret = await self.Check()  # pylint: disable=no-member
 69 |         await ret.wait()
 70 |         ret.get()
 71 | 
 72 |     async def stop_server(self):
 73 |         if self._is_alive:
 74 |             self._is_alive = False
 75 |             self._proc.terminate()
 76 |             self.logger.info("Waiting for cache server to terminate")
 77 |             await self._proc.wait()
 78 | 
 79 |     async def send_request(self, blob):
 80 |         try:
 81 |             self._proc.stdin.write(blob)
 82 |             async with self._drain_lock:
 83 |                 await asyncio.wait_for(self._proc.stdin.drain(), timeout=WAIT_FREQUENCY)
 84 |         except asyncio.TimeoutError:
 85 |             self.logger.warning(
 86 |                 "StreamWriter.drain timeout, request restart: {}".format(
 87 |                     repr(self._proc.stdin)
 88 |                 )
 89 |             )
 90 |             # Drain timeout error indicates unrecoverable critical issue,
 91 |             # essentially the cache functionality remains broken after the first asyncio.TimeoutError.
 92 |             # Request restart from CacheStore so that normal operation can be resumed.
 93 |             self._restart_requested = True
 94 |         except ConnectionResetError:
 95 |             self._is_alive = False
 96 |             # This could indicate that the cache worker pool has unexpectedly crashed.
 97 |             # Request restart from CacheStore so that normal operation can be resumed.
 98 |             self._restart_requested = True
 99 |             raise CacheServerUnreachable()
100 | 
101 |     async def wait_iter(self, it, timeout):
102 |         end = time.time() + timeout
103 |         for obj in it:
104 |             if obj is None:
105 |                 await asyncio.sleep(WAIT_FREQUENCY)
106 |                 if not self._is_alive:
107 |                     raise CacheServerUnreachable()
108 |                 elif time.time() > end:
109 |                     raise CacheClientTimeout()
110 |             else:
111 |                 yield obj
112 | 
113 |     async def wait(self, fun, timeout):
114 |         def _repeat():
115 |             while True:
116 |                 yield fun()
117 | 
118 |         async for obj in self.wait_iter(_repeat(), timeout):
119 |             return obj
120 | 
121 |     async def request_and_return(self, reqs, ret):
122 |         for req in reqs:
123 |             await req
124 |         return ret
125 | 
126 |     async def _heartbeat(self):
127 |         while self._is_alive:
128 |             try:
129 |                 await self.ping()
130 |             except CacheServerUnreachable:
131 |                 self._is_alive = False
132 |             await asyncio.sleep(HEARTBEAT_FREQUENCY)
133 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/cache/client/cache_worker.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import signal
 5 | 
 6 | from .cache_action import import_action_class_spec
 7 | 
 8 | 
 9 | def best_effort_read(key_paths):
10 |     for key, path in key_paths:
11 |         try:
12 |             with open(path, 'rb') as f:
13 |                 yield key, f.read()
14 |         except:
15 |             pass
16 | 
17 | 
18 | def execute_action(tempdir, action_spec, request_file, timeout=0):
19 |     def timeout_handler(signum, frame):
20 |         raise WorkerTimeoutException()
21 | 
22 |     signal.signal(signal.SIGALRM, timeout_handler)
23 |     signal.alarm(timeout)  # Activate timeout, 0 = no timeout
24 | 
25 |     action_cls = import_action_class_spec(action_spec)
26 |     with open(os.path.join(tempdir, request_file)) as f:
27 |         request = json.load(f)
28 | 
29 |     execute(tempdir, action_cls, request)
30 | 
31 |     signal.alarm(0)  # Disable timeout
32 | 
33 | 
34 | def execute(tempdir, action_cls, req):
35 |     try:
36 |         # prepare stream
37 |         stream = None
38 |         if req['stream_key']:
39 |             stream = open(os.path.join(tempdir, req['stream_key']), 'a', buffering=1)
40 | 
41 |             def stream_output(obj):
42 |                 stream.write(json.dumps(obj) + '\n')
43 |         else:
44 |             stream_output = None
45 | 
46 |         # prepare keys
47 |         keys = list(req['keys'])
48 |         ex_keys = dict(best_effort_read(req['existing_keys'].items()))
49 | 
50 |         # execute action
51 |         res = action_cls.execute(
52 |             message=req['message'],
53 |             keys=keys,
54 |             existing_keys=ex_keys,
55 |             stream_output=stream_output,
56 |             invalidate_cache=req.get('invalidate_cache', False))
57 | 
58 |         # write outputs to keys
59 |         for key, val in res.items():
60 |             if key in ex_keys and ex_keys[key] == val:
61 |                 # Reduce disk churn by not unnecessarily writing existing keys
62 |                 # that have identical values to the newly produced ones.
63 |                 continue
64 |             blob = val if isinstance(val, bytes) else val.encode('utf-8')
65 |             with open(os.path.join(tempdir, req['keys'][key]), 'wb') as f:
66 |                 f.write(blob)
67 |     finally:
68 |         # make sure the stream is finalized so clients won't hang even if
69 |         # the worker crashes
70 |         if stream:
71 |             stream.write('\n\n')
72 |             stream.close()
73 | 
74 | 
75 | class WorkerTimeoutException(Exception):
76 |     pass
77 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/cache/generate_dag_action.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import json
  3 | 
  4 | from .client import CacheAction
  5 | from .utils import streamed_errors, DAGParsingFailed, DAGUnsupportedFlowLanguage
  6 | 
  7 | from .custom_flowgraph import FlowGraph
  8 | 
  9 | from metaflow import Run, Step, DataArtifact, namespace
 10 | from metaflow.exception import MetaflowNotFound
 11 | namespace(None)  # Always use global namespace by default
 12 | 
 13 | 
 14 | class GenerateDag(CacheAction):
 15 |     """
 16 |     Generates a DAG for a given Run.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     flow_id : str
 21 |         The flow id that this codepackage belongs to.
 22 |         Required for finding the correct class inside the parser logic.
 23 |     run_number : str
 24 |         Run number to construct rest of the pathspec
 25 | 
 26 |     Returns
 27 |     --------
 28 |     List or None
 29 |         example:
 30 |         [
 31 |         boolean,
 32 |         {
 33 |             "step_name": {
 34 |             'type': string,
 35 |             'box_next': boolean,
 36 |             'box_ends': string,
 37 |             'next': list,
 38 |             'doc': string
 39 |             },
 40 |             ...
 41 |         }
 42 |         ]
 43 |         First field conveys whether dag generation was successful.
 44 |         Second field contains the actual DAG.
 45 |     """
 46 | 
 47 |     @classmethod
 48 |     def format_request(cls, flow_id, run_number, invalidate_cache=False):
 49 |         msg = {
 50 |             'flow_id': flow_id,
 51 |             'run_number': run_number
 52 |         }
 53 |         key_identifier = "{}/{}".format(flow_id, run_number)
 54 |         result_key = 'dag:result:%s' % hashlib.sha1((key_identifier).encode('utf-8')).hexdigest()
 55 |         stream_key = 'dag:stream:%s' % hashlib.sha1((key_identifier).encode('utf-8')).hexdigest()
 56 | 
 57 |         return msg, \
 58 |             [result_key], \
 59 |             stream_key, \
 60 |             [stream_key], \
 61 |             invalidate_cache, \
 62 |             None
 63 | 
 64 |     @classmethod
 65 |     def response(cls, keys_objs):
 66 |         '''
 67 |         Returns the generated DAG result
 68 |         '''
 69 |         return [json.loads(val) for key, val in keys_objs.items() if key.startswith('dag:result')][0]
 70 | 
 71 |     @classmethod
 72 |     def stream_response(cls, it):
 73 |         for msg in it:
 74 |             yield msg
 75 | 
 76 |     @classmethod
 77 |     def execute(cls,
 78 |                 message=None,
 79 |                 keys=None,
 80 |                 existing_keys={},
 81 |                 stream_output=None,
 82 |                 invalidate_cache=False,
 83 |                 **kwargs):
 84 |         results = {}
 85 |         flow_id = message['flow_id']
 86 |         run_number = message['run_number']
 87 | 
 88 |         result_key = [key for key in keys if key.startswith('dag:result')][0]
 89 | 
 90 |         with streamed_errors(stream_output):
 91 |             run = Run("{}/{}".format(flow_id, run_number))
 92 |             param_step = Step("{}/_parameters".format(run.pathspec))
 93 |             try:
 94 |                 dag = DataArtifact("{}/_graph_info".format(param_step.task.pathspec)).data
 95 |             except MetaflowNotFound:
 96 |                 dag = generate_dag(run)
 97 | 
 98 |             results[result_key] = json.dumps(dag)
 99 | 
100 |         return results
101 | 
102 | # Utilities
103 | 
104 | 
105 | def generate_dag(run: Run):
106 |     try:
107 |         # Initialize a FlowGraph object
108 |         graph = FlowGraph(source=run.code.flowspec, name=run.parent.id)
109 |         # Build the DAG based on the DAGNodes given by the FlowGraph for the found FlowSpec class.
110 |         steps_info, graph_structure = graph.output_steps()
111 |         graph_info = {
112 |             "steps": steps_info,
113 |             "graph_structure": graph_structure,
114 |             "doc": graph.doc
115 |         }
116 | 
117 |         return graph_info
118 |     except Exception as ex:
119 |         if ex.__class__.__name__ == 'KeyError' and "python" in str(ex):
120 |             raise DAGUnsupportedFlowLanguage(
121 |                 'DAG parsing is not supported for the language used in this Flow.'
122 |             ) from None
123 |         else:
124 |             raise DAGParsingFailed(f"DAG Parsing failed: {str(ex)}")
125 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/cache/get_artifacts_action.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Callable
 2 | 
 3 | from .get_data_action import GetData
 4 | from .utils import unpack_pathspec_with_attempt_id, artifact_value
 5 | 
 6 | from metaflow import DataArtifact
 7 | 
 8 | 
 9 | class GetArtifacts(GetData):
10 |     @classmethod
11 |     def format_request(cls, pathspecs: List[str], invalidate_cache=False):
12 |         """
13 |         Cache Action to fetch Artifact values
14 | 
15 |         Parameters
16 |         ----------
17 |         pathspecs : List[str]
18 |             List of Artifact pathspecs with attempt id as last component:
19 |                 ["FlowId/RunNumber/StepName/TaskId/ArtifactName/0"]
20 |         invalidate_cache : bool
21 |             Force cache invalidation, defaults to False
22 |         """
23 |         return super().format_request(targets=pathspecs, invalidate_cache=invalidate_cache)
24 | 
25 |     @classmethod
26 |     def fetch_data(cls, pathspec: str, stream_output: Callable[[str], None]):
27 |         """
28 |         Fetch data using Metaflow Client.
29 | 
30 |         Parameters
31 |         ----------
32 |         pathspec : str
33 |             Artifact pathspec with attempt id as last component:
34 |                 "FlowId/RunNumber/StepName/TaskId/ArtifactName/0"
35 |         stream_output : Callable[[object], None]
36 |             Stream output callable from execute() that accepts a JSON serializable object.
37 |             Used for generic messaging.
38 | 
39 |         Errors can be streamed to cache client using `stream_output` in combination with
40 |         the error_event_msg helper. This way failures won't be cached for individual artifacts,
41 |         thus making it necessary to retry fetching during next attempt.
42 |         (Will add significant overhead/delay).
43 | 
44 |         Stream error example:
45 |             stream_output(error_event_msg(str(ex), "s3-not-found", get_traceback_str()))
46 |         """
47 |         pathspec_without_attempt, attempt_id = unpack_pathspec_with_attempt_id(pathspec)
48 | 
49 |         artifact = DataArtifact(pathspec_without_attempt, attempt=attempt_id)
50 |         return artifact_value(artifact)
51 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/cache/get_parameters_action.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Callable
 2 | 
 3 | from .get_data_action import GetData
 4 | from .utils import MAX_S3_SIZE, streamed_errors
 5 | 
 6 | from metaflow import Step
 7 | 
 8 | 
 9 | class GetParameters(GetData):
10 |     @classmethod
11 |     def format_request(cls, pathspecs: List[str], invalidate_cache=False):
12 |         """
13 |         Cache Action to fetch Run parameters for list of runs.
14 | 
15 |         Parameters
16 |         ----------
17 |         pathspecs : List[str]
18 |             List of Run pathspecs: ["FlowId/RunNumber"]
19 |         invalidate_cache : bool
20 |             Force cache invalidation, defaults to False
21 |         """
22 |         return super().format_request(targets=pathspecs, invalidate_cache=invalidate_cache)
23 | 
24 |     @classmethod
25 |     def fetch_data(cls, pathspec: str, stream_output: Callable[[object], None]):
26 |         """
27 |         Fetch data using Metaflow Client.
28 | 
29 |         Parameters
30 |         ----------
31 |         pathspec : str
32 |             Run pathspec: "FlowId/RunNumber"
33 |         stream_output : Callable[[object], None]
34 |             Stream output callable from execute()  that accepts a JSON serializable object.
35 |             Used for generic messaging.
36 | 
37 |         Errors can be streamed to cache client using `stream_output` in combination with
38 |         the error_event_msg helper. This way failures won't be cached for individual artifacts,
39 |         thus making it necessary to retry fetching during next attempt.
40 |         (Will add significant overhead/delay).
41 | 
42 |         Stream error example:
43 |             stream_output(error_event_msg(str(ex), "s3-not-found", get_traceback_str()))
44 |         """
45 |         try:
46 |             with streamed_errors(stream_output):
47 |                 step = Step("{}/_parameters".format(pathspec))
48 |         except Exception as ex:
49 |             # NOTE: return false in order not to cache this
50 |             # since parameters might be available later
51 |             return False
52 | 
53 |         values = {}
54 |         for artifact_name, artifact in step.task.artifacts._asdict().items():
55 |             # Exclude following internal only artifacts from results:
56 |             #   - Artifacts prefixed with underscore (_)
57 |             #   - Artifacts with 'name' or 'script_name'
58 |             if artifact_name.startswith('_') or artifact_name in ['name', 'script_name']:
59 |                 continue
60 |             try:
61 |                 if artifact.size < MAX_S3_SIZE:
62 |                     values[artifact_name] = artifact.data
63 |                 else:
64 |                     values[artifact_name] = "Artifact too large: {} bytes".format(artifact.size)
65 |             except Exception as ex:
66 |                 values[artifact_name] = str(ex)
67 | 
68 |         return [True, values]
69 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/cache/get_task_action.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Callable
 2 | 
 3 | from .get_data_action import GetData
 4 | from .utils import unpack_pathspec_with_attempt_id, MAX_S3_SIZE
 5 | 
 6 | from metaflow import Task
 7 | from metaflow.exception import MetaflowNotFound
 8 | 
 9 | 
10 | class GetTask(GetData):
11 |     @classmethod
12 |     def format_request(cls, pathspecs: List[str], invalidate_cache=False):
13 |         """
14 |         Cache Action to fetch Task status and foreach labels.
15 | 
16 |         Parameters
17 |         ----------
18 |         pathspecs : List[str]
19 |             List of Task pathspecs with attempt id as last component:
20 |                 ["FlowId/RunNumber/StepName/TaskId/0"]
21 |         invalidate_cache : bool
22 |             Force cache invalidation, defaults to False
23 |         """
24 |         return super().format_request(targets=pathspecs, invalidate_cache=invalidate_cache)
25 | 
26 |     @classmethod
27 |     def fetch_data(cls, pathspec: str, stream_output: Callable[[object], None]):
28 |         """
29 |         Fetch data using Metaflow Client.
30 | 
31 |         Parameters
32 |         ----------
33 |         pathspec : str
34 |             Task pathspec with attempt id as last component:
35 |                 "FlowId/RunNumber/StepName/TaskId/0"
36 |         stream_output : Callable[[object], None]
37 |             Stream output callable from execute()  that accepts a JSON serializable object.
38 |             Used for generic messaging.
39 | 
40 |         Errors can be streamed to cache client using `stream_output` in combination with
41 |         the error_event_msg helper. This way failures won't be cached for individual artifacts,
42 |         thus making it necessary to retry fetching during next attempt.
43 |         (Will add significant overhead/delay).
44 | 
45 |         Stream error example:
46 |             stream_output(error_event_msg(str(ex), "s3-not-found", get_traceback_str()))
47 |         """
48 |         try:
49 |             pathspec_without_attempt, attempt_id = unpack_pathspec_with_attempt_id(pathspec)
50 |             task = Task(pathspec_without_attempt, attempt=attempt_id)
51 |         except MetaflowNotFound:
52 |             return False  # Skip cache persist if Task cannot be found
53 | 
54 |         if '_task_ok' not in task:
55 |             # Skip cache persist if _task_ok artifact cannot be found
56 |             return False
57 | 
58 |         values = {}
59 |         for artifact_name in ['_task_ok', '_foreach_stack']:
60 |             if artifact_name in task:
61 |                 artifact = task[artifact_name]
62 |                 if artifact.size < MAX_S3_SIZE:
63 |                     values[artifact_name] = artifact.data
64 |                 else:
65 |                     return [False, 'artifact-too-large', "{}: {} bytes".format(artifact.pathspec, artifact.size)]
66 | 
67 |         return [True, values]
68 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/__init__.py:
--------------------------------------------------------------------------------
1 | from .postgres_async_db import AsyncPostgresDB
2 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .flow_row import FlowRow
2 | from .run_row import RunRow
3 | from .step_row import StepRow
4 | from .task_row import TaskRow
5 | from .artifact_row import ArtifactRow
6 | from .metadata_row import MetadataRow
7 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/models/artifact_row.py:
--------------------------------------------------------------------------------
  1 | from services.data.db_utils import get_exposed_run_id, get_exposed_task_id
  2 | from .base_row import BaseRow
  3 | import time
  4 | 
  5 | 
  6 | class ArtifactRow(BaseRow):
  7 |     flow_id: str = None
  8 |     run_number: int = None
  9 |     run_id: str = None
 10 |     step_name: str = None
 11 |     task_id: int = None
 12 |     task_name: str = None
 13 |     name: str = None
 14 |     location: str = None
 15 |     sha: str = None
 16 |     type: str = None
 17 |     content_type: str = None
 18 |     user_name: str = None
 19 |     attempt_id: int = 0
 20 |     ts_epoch: int = 0
 21 | 
 22 |     def __init__(
 23 |         self,
 24 |         flow_id,
 25 |         run_number,
 26 |         run_id,
 27 |         step_name,
 28 |         task_id,
 29 |         task_name,
 30 |         name,
 31 |         location,
 32 |         ds_type,
 33 |         sha,
 34 |         type,
 35 |         content_type,
 36 |         user_name,
 37 |         attempt_id,
 38 |         ts_epoch=None,
 39 |         tags=None,
 40 |         system_tags=None,
 41 |         **kwargs
 42 |     ):
 43 |         self.flow_id = flow_id
 44 |         self.run_number = run_number
 45 |         self.run_id = run_id
 46 |         self.step_name = step_name
 47 |         self.task_id = task_id
 48 |         self.task_name = task_name
 49 |         self.name = name
 50 |         self.location = location
 51 |         self.ds_type = ds_type
 52 |         self.sha = sha
 53 |         self.type = type
 54 |         self.content_type = content_type
 55 |         self.user_name = user_name
 56 |         self.attempt_id = attempt_id
 57 |         if ts_epoch is None:
 58 |             ts_epoch = int(round(time.time() * 1000))
 59 | 
 60 |         self.ts_epoch = ts_epoch
 61 |         self.tags = tags
 62 |         self.system_tags = system_tags
 63 | 
 64 |     def serialize(self, expanded: bool = False):
 65 |         if expanded:
 66 |             return {
 67 |                 "flow_id": self.flow_id,
 68 |                 "run_number": self.run_number,
 69 |                 "run_id": self.run_id,
 70 |                 "step_name": self.step_name,
 71 |                 "task_id": self.task_id,
 72 |                 "task_name": self.task_name,
 73 |                 "name": self.name,
 74 |                 "location": self.location,
 75 |                 "ds_type": self.ds_type,
 76 |                 "sha": self.sha,
 77 |                 "type": self.type,
 78 |                 "content_type": self.content_type,
 79 |                 "user_name": self.user_name,
 80 |                 "attempt_id": self.attempt_id,
 81 |                 "ts_epoch": self.ts_epoch,
 82 |                 "tags": self.tags,
 83 |                 "system_tags": self.system_tags,
 84 |             }
 85 |         else:
 86 |             return {
 87 |                 "flow_id": self.flow_id,
 88 |                 "run_number": str(get_exposed_run_id(self.run_number, self.run_id)),
 89 |                 "step_name": self.step_name,
 90 |                 "task_id": str(get_exposed_task_id(self.task_id, self.task_name)),
 91 |                 "name": self.name,
 92 |                 "location": self.location,
 93 |                 "ds_type": self.ds_type,
 94 |                 "sha": self.sha,
 95 |                 "type": self.type,
 96 |                 "content_type": self.content_type,
 97 |                 "user_name": self.user_name,
 98 |                 "attempt_id": self.attempt_id,
 99 |                 "ts_epoch": self.ts_epoch,
100 |                 "tags": self.tags,
101 |                 "system_tags": self.system_tags,
102 |             }
103 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/models/base_row.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | 
 4 | class BaseRow(object):
 5 |     """
 6 |     Base class for Row serialization of database query results.
 7 |     Inherited by all row classes and ensures that serialize() is implemented.
 8 |     """
 9 | 
10 |     def serialize(self) -> Dict:
11 |         raise NotImplementedError("Row model needs to define a serialize function")
12 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/models/flow_row.py:
--------------------------------------------------------------------------------
 1 | from .base_row import BaseRow
 2 | import time
 3 | 
 4 | 
 5 | class FlowRow(BaseRow):
 6 |     flow_id: str = None
 7 |     user_name: str = None
 8 |     ts_epoch: int = 0
 9 | 
10 |     def __init__(self, flow_id, user_name, ts_epoch=None, tags=None, system_tags=None, **kwargs):
11 |         self.flow_id = flow_id
12 |         self.user_name = user_name
13 |         if ts_epoch is None:
14 |             ts_epoch = int(round(time.time() * 1000))
15 |         self.ts_epoch = ts_epoch
16 |         self.tags = tags
17 |         self.system_tags = system_tags
18 | 
19 |     def serialize(self, expanded: bool = False):
20 |         return {
21 |             "flow_id": self.flow_id,
22 |             "user_name": self.user_name,
23 |             "ts_epoch": self.ts_epoch,
24 |             "tags": self.tags,
25 |             "system_tags": self.system_tags,
26 |         }
27 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/models/metadata_row.py:
--------------------------------------------------------------------------------
 1 | from .base_row import BaseRow
 2 | from services.data.db_utils import get_exposed_run_id, get_exposed_task_id
 3 | import time
 4 | 
 5 | 
 6 | class MetadataRow(BaseRow):
 7 |     flow_id: str = None
 8 |     run_number: int = None
 9 |     run_id: str = None
10 |     step_name: str = None
11 |     task_id: int = None
12 |     task_name: str = None
13 |     attempt_id: int = None
14 |     id: int = None  # autoincrement
15 |     field_name: str = None
16 |     value: dict = None
17 |     type: str = None
18 |     user_name: str = None
19 |     ts_epoch: int = 0
20 |     tags = None
21 |     system_tags = None
22 | 
23 |     def __init__(
24 |         self,
25 |         flow_id,
26 |         run_number,
27 |         run_id,
28 |         step_name,
29 |         task_id,
30 |         task_name,
31 |         id,
32 |         field_name,
33 |         value,
34 |         type,
35 |         user_name,
36 |         attempt_id=None,
37 |         ts_epoch=None,
38 |         tags=None,
39 |         system_tags=None,
40 |         **kwargs
41 |     ):
42 |         self.flow_id = flow_id
43 |         self.run_number = run_number
44 |         self.run_id = run_id
45 |         self.step_name = step_name
46 |         self.task_id = task_id
47 |         self.task_name = task_name
48 |         self.attempt_id = attempt_id
49 |         self.field_name = field_name
50 |         self.value = value
51 |         self.type = type
52 |         self.user_name = user_name
53 |         if ts_epoch is None:
54 |             ts_epoch = int(round(time.time() * 1000))
55 | 
56 |         self.ts_epoch = ts_epoch
57 |         self.id = id
58 |         self.tags = tags
59 |         self.system_tags = system_tags
60 | 
61 |     def serialize(self, expanded: bool = False):
62 |         if expanded:
63 |             return {
64 |                 "id": self.id,
65 |                 "flow_id": self.flow_id,
66 |                 "run_number": self.run_number,
67 |                 "run_id": self.run_id,
68 |                 "step_name": self.step_name,
69 |                 "task_id": self.task_id,
70 |                 "task_name": self.task_name,
71 |                 "attempt_id": self.attempt_id,
72 |                 "field_name": self.field_name,
73 |                 "value": self.value,
74 |                 "type": self.type,
75 |                 "user_name": self.user_name,
76 |                 "ts_epoch": self.ts_epoch,
77 |                 "tags": self.tags,
78 |                 "system_tags": self.system_tags,
79 |             }
80 |         else:
81 |             return {
82 |                 "id": self.id,
83 |                 "flow_id": self.flow_id,
84 |                 "run_number": str(get_exposed_run_id(self.run_number, self.run_id)),
85 |                 "step_name": self.step_name,
86 |                 "task_id": str(get_exposed_task_id(self.task_id, self.task_name)),
87 |                 "attempt_id": self.attempt_id,
88 |                 "field_name": self.field_name,
89 |                 "value": self.value,
90 |                 "type": self.type,
91 |                 "user_name": self.user_name,
92 |                 "ts_epoch": self.ts_epoch,
93 |                 "tags": self.tags,
94 |                 "system_tags": self.system_tags,
95 |             }
96 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/models/run_row.py:
--------------------------------------------------------------------------------
 1 | from .base_row import BaseRow
 2 | import time
 3 | from services.data.db_utils import get_exposed_run_id
 4 | 
 5 | 
 6 | class RunRow(BaseRow):
 7 |     flow_id: str = None
 8 |     run_number: int = None
 9 |     run_id: str = None
10 |     run: str = None
11 |     user_name: str = None
12 |     user: str = None
13 |     status: str = None
14 |     ts_epoch: int = 0
15 |     finished_at: int = None
16 |     duration: int = None
17 | 
18 |     def __init__(
19 |         self,
20 |         flow_id,
21 |         user_name,
22 |         user=None,
23 |         run_number=None,
24 |         run_id=None,
25 |         run=None,
26 |         status=None,
27 |         ts_epoch=None,
28 |         finished_at=None,
29 |         duration=None,
30 |         tags=None,
31 |         system_tags=None,
32 |         last_heartbeat_ts=None,
33 |         **kwargs
34 |     ):
35 |         self.flow_id = flow_id
36 |         self.user_name = user_name
37 |         self.user = user
38 |         self.run_number = run_number
39 |         self.run_id = run_id
40 |         self.run = run
41 |         self.status = status
42 |         self.tags = tags
43 |         self.system_tags = system_tags
44 |         if ts_epoch is None:
45 |             ts_epoch = int(round(time.time() * 1000))
46 | 
47 |         self.ts_epoch = ts_epoch
48 |         self.last_heartbeat_ts = last_heartbeat_ts
49 |         self.finished_at = finished_at
50 |         self.duration = duration
51 |         self.last_heartbeat_ts = last_heartbeat_ts
52 | 
53 |     def serialize(self, expanded: bool = False):
54 |         if expanded:
55 |             return {
56 |                 "flow_id": self.flow_id,
57 |                 "run_number": self.run_number,
58 |                 "run_id": self.run_id,
59 |                 "user_name": self.user_name,
60 |                 "user": self.user,
61 |                 "run": self.run,
62 |                 "status": self.status,
63 |                 "ts_epoch": self.ts_epoch,
64 |                 "finished_at": self.finished_at,
65 |                 "duration": self.duration,
66 |                 "last_heartbeat_ts": self.last_heartbeat_ts,
67 |                 "tags": self.tags,
68 |                 "system_tags": self.system_tags
69 |             }
70 |         else:
71 |             return {
72 |                 "flow_id": self.flow_id,
73 |                 "run_number": str(get_exposed_run_id(self.run_number, self.run_id)),
74 |                 "user_name": self.user_name,
75 |                 "status": self.status,
76 |                 "ts_epoch": self.ts_epoch,
77 |                 "finished_at": self.finished_at,
78 |                 "duration": self.duration,
79 |                 "last_heartbeat_ts": self.last_heartbeat_ts,
80 |                 "tags": self.tags,
81 |                 "system_tags": self.system_tags
82 |             }
83 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/models/step_row.py:
--------------------------------------------------------------------------------
 1 | from .base_row import BaseRow
 2 | import time
 3 | from services.data.db_utils import get_exposed_run_id
 4 | 
 5 | 
 6 | class StepRow(BaseRow):
 7 |     flow_id: str = None
 8 |     run_number: int = None
 9 |     run_id: str = None
10 |     step_name: str = None
11 |     user_name: str = None
12 |     ts_epoch: int = 0
13 |     duration: int = 0
14 |     tags = None
15 |     system_tags = None
16 | 
17 |     def __init__(
18 |         self,
19 |         flow_id,
20 |         run_number,
21 |         run_id,
22 |         user_name,
23 |         step_name,
24 |         ts_epoch=None,
25 |         duration=None,
26 |         tags=None,
27 |         system_tags=None,
28 |         **kwargs
29 |     ):
30 |         self.flow_id = flow_id
31 |         self.run_number = run_number
32 | 
33 |         if run_id is None:
34 |             run_id = str(run_number)
35 |         self.run_id = run_id
36 | 
37 |         self.step_name = step_name
38 |         self.user_name = user_name
39 |         if ts_epoch is None:
40 |             ts_epoch = int(round(time.time() * 1000))
41 | 
42 |         self.ts_epoch = ts_epoch
43 |         self.duration = duration
44 |         self.tags = tags
45 |         self.system_tags = system_tags
46 | 
47 |     def serialize(self, expanded: bool = False):
48 |         if expanded:
49 |             return {
50 |                 "flow_id": self.flow_id,
51 |                 "run_number": self.run_number,
52 |                 "run_id": self.run_id,
53 |                 "step_name": self.step_name,
54 |                 "user_name": self.user_name,
55 |                 "ts_epoch": self.ts_epoch,
56 |                 "duration": self.duration,
57 |                 "tags": self.tags,
58 |                 "system_tags": self.system_tags,
59 |             }
60 |         else:
61 |             return {
62 |                 "flow_id": self.flow_id,
63 |                 "run_number": str(get_exposed_run_id(self.run_number, self.run_id)),
64 |                 "step_name": self.step_name,
65 |                 "user_name": self.user_name,
66 |                 "ts_epoch": self.ts_epoch,
67 |                 "duration": self.duration,
68 |                 "tags": self.tags,
69 |                 "system_tags": self.system_tags,
70 |             }
71 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/models/task_row.py:
--------------------------------------------------------------------------------
  1 | from .base_row import BaseRow
  2 | import time
  3 | from services.data.db_utils import get_exposed_task_id, get_exposed_run_id
  4 | 
  5 | 
  6 | class TaskRow(BaseRow):
  7 |     flow_id: str = None
  8 |     run_number: int = None
  9 |     run_id: str = None
 10 |     step_name: str = None
 11 |     task_id: int = None
 12 |     task_name: str = None
 13 |     user_name: str = None
 14 |     status: str = None
 15 |     task_ok: str = None
 16 |     ts_epoch: int = 0
 17 |     started_at: int = None
 18 |     finished_at: int = None
 19 |     duration: int = None
 20 |     attempt_id: int = 0
 21 |     tags = None
 22 |     system_tags = None
 23 | 
 24 |     def __init__(
 25 |         self,
 26 |         flow_id,
 27 |         run_number,
 28 |         run_id,
 29 |         user_name,
 30 |         step_name,
 31 |         task_id=None,
 32 |         task_name=None,
 33 |         status=None,
 34 |         task_ok=None,
 35 |         ts_epoch=None,
 36 |         started_at=None,
 37 |         finished_at=None,
 38 |         duration=None,
 39 |         attempt_id=0,
 40 |         tags=None,
 41 |         system_tags=None,
 42 |         last_heartbeat_ts=None,
 43 |         **kwargs
 44 |     ):
 45 |         self.flow_id = flow_id
 46 |         self.run_number = run_number
 47 |         self.run_id = run_id
 48 |         self.step_name = step_name
 49 |         self.task_id = task_id
 50 |         self.task_name = task_name
 51 | 
 52 |         self.user_name = user_name
 53 |         if ts_epoch is None:
 54 |             ts_epoch = int(round(time.time() * 1000))
 55 | 
 56 |         self.status = status
 57 |         self.task_ok = task_ok
 58 |         self.ts_epoch = ts_epoch
 59 |         self.started_at = started_at
 60 |         self.finished_at = finished_at
 61 |         self.duration = duration
 62 |         self.attempt_id = attempt_id
 63 |         self.tags = tags
 64 |         self.system_tags = system_tags
 65 |         self.last_heartbeat_ts = last_heartbeat_ts
 66 | 
 67 |     def serialize(self, expanded: bool = False):
 68 |         if expanded:
 69 |             return {
 70 |                 "flow_id": self.flow_id,
 71 |                 "run_number": self.run_number,
 72 |                 "run_id": self.run_id,
 73 |                 "step_name": self.step_name,
 74 |                 "task_id": self.task_id,
 75 |                 "task_name": self.task_name,
 76 |                 "user_name": self.user_name,
 77 |                 "status": self.status,
 78 |                 "task_ok": self.task_ok,
 79 |                 "ts_epoch": self.ts_epoch,
 80 |                 "started_at": self.started_at,
 81 |                 "finished_at": self.finished_at,
 82 |                 "duration": self.duration,
 83 |                 "attempt_id": self.attempt_id,
 84 |                 "tags": self.tags,
 85 |                 "system_tags": self.system_tags,
 86 |                 "last_heartbeat_ts": self.last_heartbeat_ts
 87 |             }
 88 |         else:
 89 |             return {
 90 |                 "flow_id": self.flow_id,
 91 |                 "run_number": str(get_exposed_run_id(self.run_number, self.run_id)),
 92 |                 "step_name": self.step_name,
 93 |                 "task_id": str(get_exposed_task_id(self.task_id, self.task_name)),
 94 |                 "user_name": self.user_name,
 95 |                 "status": self.status,
 96 |                 "task_ok": self.task_ok,
 97 |                 "ts_epoch": self.ts_epoch,
 98 |                 "started_at": self.started_at,
 99 |                 "finished_at": self.finished_at,
100 |                 "duration": self.duration,
101 |                 "attempt_id": self.attempt_id,
102 |                 "tags": self.tags,
103 |                 "system_tags": self.system_tags,
104 |                 "last_heartbeat_ts": self.last_heartbeat_ts
105 |             }
106 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/postgres_async_db.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import time
 4 | from typing import List
 5 | 
 6 | import aiopg
 7 | import psycopg2
 8 | import psycopg2.extras
 9 | # baselevel classes from shared data adapter to inherit from.
10 | from services.data.postgres_async_db import \
11 |     _AsyncPostgresDB as BaseAsyncPostgresDB
12 | from services.utils import DBConfiguration, logging
13 | 
14 | from .tables import (AsyncArtifactTablePostgres, AsyncFlowTablePostgres,
15 |                      AsyncMetadataTablePostgres, AsyncRunTablePostgres,
16 |                      AsyncStepTablePostgres, AsyncTaskTablePostgres)
17 | 
18 | 
19 | class AsyncPostgresDB(BaseAsyncPostgresDB):
20 |     """
21 |     UI Backend specific database adapter.
22 |     Basic functionality is inherited from the classes provided by the shared services.data.postgres_async_db module.
23 | 
24 |     Parameters
25 |     ----------
26 |     name : str (optional)
27 |         name for the DB Adapter instance. Used primarily for naming the associated logger.
28 |     """
29 |     connection = None
30 |     flow_table_postgres = None
31 |     run_table_postgres = None
32 |     step_table_postgres = None
33 |     task_table_postgres = None
34 |     artifact_table_postgres = None
35 |     metadata_table_postgres = None
36 | 
37 |     pool = None
38 |     reader_pool = None
39 |     db_conf: DBConfiguration = None
40 | 
41 |     def __init__(self, name='global'):
42 |         self.name = name
43 |         self.logger = logging.getLogger("AsyncPostgresDB:{name}".format(name=self.name))
44 | 
45 |         tables = []
46 |         self.flow_table_postgres = AsyncFlowTablePostgres(self)
47 |         self.run_table_postgres = AsyncRunTablePostgres(self)
48 |         self.step_table_postgres = AsyncStepTablePostgres(self)
49 |         self.task_table_postgres = AsyncTaskTablePostgres(self)
50 |         self.artifact_table_postgres = AsyncArtifactTablePostgres(self)
51 |         self.metadata_table_postgres = AsyncMetadataTablePostgres(self)
52 |         tables.append(self.flow_table_postgres)
53 |         tables.append(self.run_table_postgres)
54 |         tables.append(self.step_table_postgres)
55 |         tables.append(self.task_table_postgres)
56 |         tables.append(self.artifact_table_postgres)
57 |         tables.append(self.metadata_table_postgres)
58 |         self.tables = tables
59 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/tables/__init__.py:
--------------------------------------------------------------------------------
1 | from .flow import AsyncFlowTablePostgres
2 | from .run import AsyncRunTablePostgres
3 | from .step import AsyncStepTablePostgres
4 | from .task import AsyncTaskTablePostgres
5 | from .metadata import AsyncMetadataTablePostgres
6 | from .artifact import AsyncArtifactTablePostgres
7 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/tables/artifact.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Tuple
  2 | from .base import AsyncPostgresTable
  3 | from .task import AsyncTaskTablePostgres
  4 | from ..models import ArtifactRow
  5 | # use schema constants from the .data module to keep things consistent
  6 | from services.data.postgres_async_db import AsyncArtifactTablePostgres as MetadataArtifactTable
  7 | from services.data.db_utils import translate_run_key, DBResponse, DBPagination
  8 | 
  9 | 
 10 | class AsyncArtifactTablePostgres(AsyncPostgresTable):
 11 |     _row_type = ArtifactRow
 12 |     table_name = MetadataArtifactTable.table_name
 13 |     task_table_name = AsyncTaskTablePostgres.table_name
 14 |     ordering = ["attempt_id DESC"]
 15 |     keys = MetadataArtifactTable.keys
 16 |     primary_keys = MetadataArtifactTable.primary_keys
 17 |     trigger_keys = None
 18 |     trigger_operations = None
 19 |     select_columns = keys
 20 | 
 21 |     async def get_run_parameter_artifacts(self, flow_name, run_number, postprocess=None, invalidate_cache=False):
 22 |         run_id_key, run_id_value = translate_run_key(run_number)
 23 | 
 24 |         # '_parameters' step has all the parameters as artifacts. only pick the
 25 |         # public parameters (no underscore prefix)
 26 |         return await self.find_records(
 27 |             conditions=[
 28 |                 "flow_id = %s",
 29 |                 "{run_id_key} = %s".format(run_id_key=run_id_key),
 30 |                 "step_name = %s",
 31 |                 "name NOT LIKE %s",
 32 |                 "name <> %s",
 33 |                 "name <> %s"
 34 |             ],
 35 |             values=[
 36 |                 flow_name,
 37 |                 run_id_value,
 38 |                 "_parameters",
 39 |                 r"\_%",
 40 |                 "name",  # exclude the 'name' parameter as this always exists, and contains the FlowName
 41 |                 "script_name"  # exclude the internally used 'script_name' parameter.
 42 |             ],
 43 |             fetch_single=False,
 44 |             expanded=True,
 45 |             postprocess=postprocess,
 46 |             invalidate_cache=invalidate_cache
 47 |         )
 48 | 
 49 |     async def get_artifact_names(self, conditions: List[str] = [],
 50 |                                  values: List[str] = [], limit: int = 0, offset: int = 0) -> Tuple[DBResponse, DBPagination]:
 51 |         """
 52 |         Get a paginated set of artifact names.
 53 | 
 54 |         Parameters
 55 |         ----------
 56 |         conditions : List[str]
 57 |             list of conditions to pass the sql execute, with %s placeholders for values
 58 |         values : List[str]
 59 |             list of values to be passed for the sql execute.
 60 |         limit : int (optional) (default 0)
 61 |             limit for the number of results
 62 |         offset : int (optional) (default 0)
 63 |             offset for the results.
 64 | 
 65 |         Returns
 66 |         -------
 67 |         (DBResponse, DBPagination)
 68 |         """
 69 |         sql_template = """
 70 |             SELECT name FROM (
 71 |                 SELECT DISTINCT name, flow_id, run_number, run_id
 72 |                 FROM {table_name}
 73 |             ) T
 74 |             {conditions}
 75 |             {limit}
 76 |             {offset}
 77 |             """
 78 |         select_sql = sql_template.format(
 79 |             table_name=self.table_name,
 80 |             keys=",".join(self.select_columns),
 81 |             conditions=("WHERE {}".format(" AND ".join(conditions)) if conditions else ""),
 82 |             limit="LIMIT {}".format(limit) if limit else "",
 83 |             offset="OFFSET {}".format(offset) if offset else ""
 84 |         )
 85 | 
 86 |         res, pag = await self.execute_sql(select_sql=select_sql, values=values, fetch_single=False,
 87 |                                           expanded=False,
 88 |                                           limit=limit, offset=offset, serialize=False)
 89 |         # process the unserialized DBResponse
 90 |         _body = [row[0] for row in res.body]
 91 | 
 92 |         return DBResponse(res.response_code, _body), pag
 93 | 
 94 |     async def get_run_graph_info_artifact(self, flow_name: str, run_id: str) -> DBResponse:
 95 |         """
 96 |         Tries to locate '_graph_info' in run artifacts
 97 |         """
 98 |         run_id_key, run_id_value = translate_run_key(run_id)
 99 | 
100 |         db_response, *_ = await self.find_records(
101 |             conditions=[
102 |                 "flow_id = %s",
103 |                 "{run_id_key} = %s".format(
104 |                     run_id_key=run_id_key),
105 |                 "step_name = %s",
106 |                 "name = %s"
107 |             ],
108 |             values=[
109 |                 flow_name, run_id_value, "_parameters",
110 |                 "_graph_info",
111 |             ],
112 |             fetch_single=True, expanded=True
113 |         )
114 | 
115 |         return db_response
116 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/tables/flow.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from .base import AsyncPostgresTable
 3 | from ..models import FlowRow
 4 | from services.data.db_utils import DBResponse, DBPagination
 5 | # use schema constants from the .data module to keep things consistent
 6 | from services.data.postgres_async_db import AsyncFlowTablePostgres as MetadataFlowTable
 7 | 
 8 | 
 9 | class AsyncFlowTablePostgres(AsyncPostgresTable):
10 |     table_name = MetadataFlowTable.table_name
11 |     keys = MetadataFlowTable.keys
12 |     primary_keys = MetadataFlowTable.primary_keys
13 |     trigger_keys = MetadataFlowTable.trigger_keys
14 |     select_columns = keys
15 |     _row_type = FlowRow
16 | 
17 |     async def get_flow_ids(self, conditions: List[str] = [],
18 |                            values: List[str] = [], limit: int = 0, offset: int = 0) -> (DBResponse, DBPagination):
19 |         """
20 |         Get a paginated set of flow ids.
21 | 
22 |         Parameters
23 |         ----------
24 |         conditions : List[str]
25 |             list of conditions to pass the sql execute, with %s placeholders for values
26 |         values : List[str]
27 |             list of values to be passed for the sql execute.
28 |         limit : int (optional) (default 0)
29 |             limit for the number of results
30 |         offset : int (optional) (default 0)
31 |             offset for the results.
32 | 
33 |         Returns
34 |         -------
35 |         (DBResponse, DBPagination)
36 |         """
37 |         sql_template = """
38 |             SELECT DISTINCT flow_id
39 |             FROM {table_name}
40 |             {conditions}
41 |             {limit}
42 |             {offset}
43 |             """
44 |         select_sql = sql_template.format(
45 |             table_name=self.table_name,
46 |             keys=",".join(self.select_columns),
47 |             conditions=("WHERE {}".format(" AND ".join(conditions)) if conditions else ""),
48 |             limit="LIMIT {}".format(limit) if limit else "",
49 |             offset="OFFSET {}".format(offset) if offset else ""
50 |         )
51 | 
52 |         res, pag = await self.execute_sql(select_sql=select_sql, values=values, fetch_single=False,
53 |                                           expanded=False,
54 |                                           limit=limit, offset=offset, serialize=False)
55 |         # process the unserialized DBResponse
56 |         _body = [row[0] for row in res.body]
57 | 
58 |         return DBResponse(res.response_code, _body), pag
59 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/tables/metadata.py:
--------------------------------------------------------------------------------
 1 | from services.data.db_utils import DBResponse, translate_run_key
 2 | from .base import AsyncPostgresTable
 3 | from .task import AsyncTaskTablePostgres
 4 | from ..models import MetadataRow
 5 | # use schema constants from the .data module to keep things consistent
 6 | from services.data.postgres_async_db import AsyncMetadataTablePostgres as MetaserviceMetadataTable
 7 | 
 8 | 
 9 | class AsyncMetadataTablePostgres(AsyncPostgresTable):
10 |     _row_type = MetadataRow
11 |     table_name = MetaserviceMetadataTable.table_name
12 |     task_table_name = AsyncTaskTablePostgres.table_name
13 |     keys = MetaserviceMetadataTable.keys
14 |     primary_keys = MetaserviceMetadataTable.primary_keys
15 |     trigger_keys = MetaserviceMetadataTable.trigger_keys
16 |     trigger_operations = ["INSERT"]
17 |     trigger_conditions = [
18 |         "NEW.field_name = 'attempt'",
19 |         "NEW.field_name = 'attempt_ok'",
20 |         "NEW.field_name = 'code-package'",
21 |         "NEW.field_name = 'code-package-url'",
22 |     ]
23 | 
24 |     @property
25 |     def select_columns(self):
26 |         keys = ["{table_name}.{col} AS {col}".format(table_name=self.table_name, col=k) for k in self.keys]
27 | 
28 |         # Must use SELECT on the regexp matches in order to include non-matches as well, otherwise
29 |         # we won't be able to fill attempt_id with NULL in case no id has been recorded
30 |         # (f.ex. run-level metadata)
31 |         keys.append(
32 |             "(SELECT regexp_matches(tags::text, 'attempt_id:(\\d+)'))[1]::int as attempt_id"
33 |         )
34 |         return keys
35 | 
36 |     async def get_run_codepackage_metadata(self, flow_name: str, run_id: str) -> DBResponse:
37 |         """
38 |         Tries to locate 'code-package' or 'code-package-url' in run metadata.
39 |         """
40 |         run_id_key, run_id_value = translate_run_key(run_id)
41 |         # 'code-package' value contains json with dstype, sha1 hash and location
42 |         # 'code-package-url' value contains only location as a string
43 |         db_response, *_ = await self.find_records(
44 |             conditions=[
45 |                 "flow_id = %s",
46 |                 "{run_id_key} = %s".format(
47 |                     run_id_key=run_id_key),
48 |                 "(field_name = %s OR field_name = %s)"
49 |             ],
50 |             values=[
51 |                 flow_name, run_id_value,
52 |                 "code-package", "code-package-url"
53 |             ],
54 |             fetch_single=True, expanded=True
55 |         )
56 | 
57 |         return db_response
58 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/db/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from services.data.db_utils import DBResponse
 3 | from services.ui_backend_service.data.db.postgres_async_db import AsyncPostgresDB
 4 | 
 5 | 
 6 | async def get_run_dag_data(db: AsyncPostgresDB, flow_name: str, run_number: str) -> DBResponse:
 7 |     """
 8 |     Fetches either a _graph_info artifact, or a code-package metadata entry if the artifact is missing.
 9 |     Used to determine whether a run can display a DAG.
10 |     """
11 |     db_response = await db.artifact_table_postgres.get_run_graph_info_artifact(flow_name, run_number)
12 |     if not db_response.response_code == 200:
13 |         # Try to look for codepackage if graph artifact is missing
14 |         db_response = await db.metadata_table_postgres.get_run_codepackage_metadata(flow_name, run_number)
15 | 
16 |     return db_response
17 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/refiner/__init__.py:
--------------------------------------------------------------------------------
1 | from .task_refiner import TaskRefiner
2 | from .parameter_refiner import ParameterRefiner
3 | from .artifact_refiner import ArtifactRefiner
4 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/refiner/artifact_refiner.py:
--------------------------------------------------------------------------------
 1 | from .refinery import Refinery
 2 | 
 3 | 
 4 | class ArtifactRefiner(Refinery):
 5 |     """
 6 |     Refiner class for postprocessing Artifact rows.
 7 | 
 8 |     Uses Metaflow Client API to refine Artifact's actual content from Metaflow Service and Datastore.
 9 | 
10 |     Parameters
11 |     -----------
12 |     cache : AsyncCacheClient
13 |         An instance of a cache that implements the GetArtifacts action.
14 |     """
15 | 
16 |     def __init__(self, cache):
17 |         super().__init__(cache=cache)
18 | 
19 |     def _action(self):
20 |         return self.cache_store.cache.GetArtifacts
21 | 
22 |     def _record_to_action_input(self, record):
23 |         # Prefer run_id over run_number
24 |         # Prefer task_name over task_id
25 |         return "{flow_id}/{run_id}/{step_name}/{task_name}/{name}/{attempt_id}".format(
26 |             flow_id=record['flow_id'],
27 |             run_id=record.get('run_id') or record['run_number'],
28 |             step_name=record['step_name'],
29 |             task_name=record.get('task_name') or record['task_id'],
30 |             name=record['name'],
31 |             attempt_id=record['attempt_id'])
32 | 
33 |     async def refine_record(self, record, values):
34 |         record['content'] = str(values)
35 |         return record
36 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/refiner/parameter_refiner.py:
--------------------------------------------------------------------------------
 1 | from .refinery import Refinery
 2 | 
 3 | 
 4 | class ParameterRefiner(Refinery):
 5 |     """
 6 |     Refiner class for postprocessing Run parameters.
 7 | 
 8 |     Uses Metaflow Client API to refine Run parameters from Metaflow Datastore.
 9 | 
10 |     Parameters
11 |     -----------
12 |     cache : AsyncCacheClient
13 |         An instance of a cache that implements the GetParameters action.
14 |     """
15 | 
16 |     def __init__(self, cache):
17 |         super().__init__(cache=cache)
18 | 
19 |     def _action(self):
20 |         return self.cache_store.cache.GetParameters
21 | 
22 |     async def fetch_data(self, targets, event_stream=None, invalidate_cache=False):
23 |         _res = await self._action()(targets, invalidate_cache=invalidate_cache)
24 |         if _res.has_pending_request():
25 |             async for event in _res.stream():
26 |                 if event["type"] == "error":
27 |                     # raise error, there was an exception during processing.
28 |                     raise GetParametersFailed(event["message"], event["id"], event["traceback"])
29 |             await _res.wait()  # wait for results to be ready
30 |         return _res.get() or {}  # cache get() might return None if no keys are produced.
31 | 
32 |     def _record_to_action_input(self, record):
33 |         # Prefer run_id over run_number
34 |         return "{flow_id}/{run_id}".format(
35 |             flow_id=record['flow_id'],
36 |             run_id=record.get('run_id') or record['run_number'])
37 | 
38 |     async def refine_record(self, record, values):
39 |         return {k: {'value': v} for k, v in values.items()}
40 | 
41 | 
42 | class GetParametersFailed(Exception):
43 |     def __init__(self, msg="Failed to Get Parameters", id="failed-to-get-parameters", traceback_str=None):
44 |         self.message = msg
45 |         self.id = id
46 |         self.traceback_str = traceback_str
47 | 
48 |     def __str__(self):
49 |         return self.message
50 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/refiner/refinery.py:
--------------------------------------------------------------------------------
  1 | from services.data.db_utils import DBResponse
  2 | from services.ui_backend_service.features import FEATURE_REFINE_DISABLE
  3 | from services.ui_backend_service.data import unpack_processed_value
  4 | from services.utils import logging
  5 | 
  6 | 
  7 | class Refinery(object):
  8 |     """
  9 |     Refiner class for postprocessing database rows.
 10 | 
 11 |     Uses predefined cache actions to refine database responses with Metaflow Datastore artifacts.
 12 | 
 13 |     Parameters
 14 |     -----------
 15 |     cache : AsyncCacheClient
 16 |         An instance of a cache that implements the GetArtifacts action.
 17 |     """
 18 | 
 19 |     def __init__(self, cache):
 20 |         self.cache_store = cache
 21 |         self.logger = logging.getLogger(self.__class__.__name__)
 22 | 
 23 |     def _action(self):
 24 |         return self.cache_store.cache.GetData
 25 | 
 26 |     async def fetch_data(self, targets, event_stream=None, invalidate_cache=False):
 27 |         _res = await self._action()(targets, invalidate_cache=invalidate_cache)
 28 |         if _res.has_pending_request():
 29 |             async for event in _res.stream():
 30 |                 if event["type"] == "error":
 31 |                     if event_stream:
 32 |                         event_stream(event)
 33 |             await _res.wait()  # wait for results to be ready
 34 |         return _res.get() or {}  # cache get() might return None if no keys are produced.
 35 | 
 36 |     async def refine_record(self, record, values):
 37 |         """No refinement necessary here"""
 38 |         return record
 39 | 
 40 |     def _response_to_action_input(self, response: DBResponse):
 41 |         if isinstance(response.body, list):
 42 |             return [self._record_to_action_input(task) for task in response.body]
 43 |         else:
 44 |             return [self._record_to_action_input(response.body)]
 45 | 
 46 |     def _record_to_action_input(self, record):
 47 |         return "{flow_id}/{run_number}/{step_name}/{task_id}".format(**record)
 48 | 
 49 |     async def postprocess(self, response: DBResponse, invalidate_cache=False):
 50 |         """
 51 |         Calls the refiner postprocessing to fetch Metaflow artifacts.
 52 | 
 53 |         Parameters
 54 |         ----------
 55 |         response : DBResponse
 56 |             The DBResponse to be refined
 57 | 
 58 |         Returns
 59 |         -------
 60 |         A refined DBResponse, or in case of errors, the original DBResponse
 61 |         """
 62 |         if FEATURE_REFINE_DISABLE:
 63 |             return response
 64 | 
 65 |         if response.response_code != 200 or not response.body:
 66 |             return response
 67 | 
 68 |         input = self._response_to_action_input(response)
 69 | 
 70 |         errors = {}
 71 | 
 72 |         def _event_stream(event):
 73 |             if event.get("type") == "error" and event.get("key"):
 74 |                 # Get last element from cache key which usually translates to "target"
 75 |                 target = event["key"].split(':')[-1:][0]
 76 |                 errors[target] = event
 77 | 
 78 |         data = await self.fetch_data(
 79 |             input, event_stream=_event_stream, invalidate_cache=invalidate_cache)
 80 | 
 81 |         async def _process(record):
 82 |             target = self._record_to_action_input(record)
 83 | 
 84 |             if target in errors:
 85 |                 # Add streamed postprocess errors if any
 86 |                 record["postprocess_error"] = format_error_body(
 87 |                     errors[target].get("id"),
 88 |                     errors[target].get("message"),
 89 |                     errors[target].get("traceback")
 90 |                 )
 91 | 
 92 |             if target in data:
 93 |                 success, value, detail, trace = unpack_processed_value(data[target])
 94 |                 if success:
 95 |                     record = await self.refine_record(record, value)
 96 |                 else:
 97 |                     record['postprocess_error'] = format_error_body(
 98 |                         value if value else "artifact-handle-failed",
 99 |                         detail if detail else "Unknown error during postprocessing",
100 |                         trace
101 |                     )
102 |             else:
103 |                 record['postprocess_error'] = format_error_body(
104 |                     "artifact-value-not-found",
105 |                     "Artifact value not found"
106 |                 )
107 | 
108 |             return record
109 | 
110 |         if isinstance(response.body, list):
111 |             body = [await _process(task) for task in response.body]
112 |         else:
113 |             body = await _process(response.body)
114 | 
115 |         return DBResponse(response_code=response.response_code, body=body)
116 | 
117 | 
118 | def format_error_body(id=None, detail=None, traceback=None):
119 |     '''
120 |     formatter for the "postprocess_error" key added to refined items in case of errors.
121 |     '''
122 |     return {
123 |         "id": id or "artifact-refine-failure",
124 |         "detail": detail,
125 |         "traceback": traceback
126 |     }
127 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/data/refiner/task_refiner.py:
--------------------------------------------------------------------------------
 1 | from .refinery import Refinery
 2 | 
 3 | 
 4 | class TaskRefiner(Refinery):
 5 |     """
 6 |     Refiner class for postprocessing Task rows.
 7 | 
 8 |     Uses Metaflow Client API to refine Task's actual status from Metaflow Service and Datastore.
 9 | 
10 |     Parameters
11 |     -----------
12 |     cache : AsyncCacheClient
13 |         An instance of a cache that implements the GetTask action.
14 |     """
15 | 
16 |     def __init__(self, cache):
17 |         super().__init__(cache=cache)
18 | 
19 |     def _action(self):
20 |         return self.cache_store.cache.GetTask
21 | 
22 |     def _record_to_action_input(self, record):
23 |         # Prefer run_id over run_number
24 |         # Prefer task_name over task_id
25 |         return "{flow_id}/{run_id}/{step_name}/{task_name}/{attempt_id}".format(
26 |             flow_id=record['flow_id'],
27 |             run_id=record.get('run_id') or record['run_number'],
28 |             step_name=record['step_name'],
29 |             task_name=record.get('task_name') or record['task_id'],
30 |             attempt_id=record['attempt_id'])
31 | 
32 |     async def refine_record(self, record, values):
33 |         if record['status'] == 'unknown' and values.get('_task_ok') is not None:
34 |             value = values['_task_ok']
35 |             if value is False:
36 |                 record['status'] = 'failed'
37 |             elif value is True:
38 |                 record['status'] = 'completed'
39 | 
40 |         if values.get('_foreach_stack'):
41 |             value = values['_foreach_stack']
42 |             if len(value) > 0 and len(value[0]) >= 4:
43 |                 # The third one in the tuple is the foreach index. We access this way for backwards compatibility.
44 |                 record['foreach_label'] = "{}[{}]".format(record['task_id'], value[0][3])
45 | 
46 |         return record
47 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Metaflow UI Service Documentation
 2 | 
 3 | ## Table of Contents
 4 | 
 5 | - Optional configuration
 6 |   - [Configurable environment variables](environment.md)
 7 |   - [Plugin system](plugins.md)
 8 | - API Documentation
 9 |   - [REST API routes](api.md)
10 |   - [Realtime resource subscriptions](websockets.md#realtime-state-subscriptions-for-resources)
11 |   - [Artifact Search](websockets.md#search-api)
12 | - Architecture descriptions
13 |   - [Disk Cache structure](architecture.md#cache)
14 |   - [Heartbeat Monitoring of active resources](architecture.md#heartbeat-monitoring)
15 |   - [Realtime events through websockets](architecture.md#realtime-events-over-web-sockets)
16 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/docs/api.md:
--------------------------------------------------------------------------------
 1 | # API documentation
 2 | 
 3 | A thorough documentation of the RESTful API routes, responses and types can be accessed through the Swagger docs that the backend serves.
 4 | These are accessible at `example.com/api/doc`
 5 | 
 6 | ## Examples
 7 | 
 8 | ```
 9 | /flows/HelloFlow/runs?_page=4                               List page 4
10 | /flows/HelloFlow/runs?_page=2&_limit=10                     List page 4, each page contains 10 items
11 | 
12 | /flows/HelloFlow/runs?_order=run_number                     Order by `run_number` in descending order
13 | /flows/HelloFlow/runs?_order=+run_number                    Order by `run_number` in ascending order
14 | /flows/HelloFlow/runs?_order=-run_number                    Order by `run_number` in descending order
15 | /flows/HelloFlow/runs?_order=run_number,ts_epoch            Order by `run_number` and `ts_epoch` in descending order
16 | 
17 | /runs?_tags=user:dipper                                     Filter by one tag
18 | /runs?_tags=user:dipper,runtime:dev                         Filter by multiple tags (AND)
19 | /runs?_tags:all=user:dipper,runtime:dev                     Filter by multiple tags (AND)
20 | /runs?_tags:any=user:dipper,runtime:dev                     Filter by multiple tags (OR)
21 | /runs?_tags:likeall=user:dip,untime:de                      Filter by multiple tags that contains string (AND)
22 | /runs?_tags:likeany=user:,untime:de                         Filter by multiple tags that contains string (OR)
23 | 
24 | /runs?_group=flow_id                                        Group by `flow_id`
25 | /runs?_group=flow_id,user_name                              Group by `flow_id` and `user_name`
26 | /runs?_group=user_name&_limit=2                             Group by `user_name` and limit each group to `2` runs
27 | /runs?_group=flow_id&_order=flow_id,run_number              Group by `flow_id` and order by `flow_id & run_number`
28 | /runs?_group=flow_id&user_name=dipper                       List runs by `dipper` and group by `flow_id`
29 | /runs?user=null                                             `user` is NULL
30 | 
31 | /flows/HelloFlow/runs?run_number=40                         `run_number` equals `40`
32 | /flows/HelloFlow/runs?run_number:eq=40                      `run_number` equals `40`
33 | /flows/HelloFlow/runs?run_number:ne=40                      `run_number` not equals `40`
34 | /flows/HelloFlow/runs?run_number:lt=40                      `run_number` less than `40`
35 | /flows/HelloFlow/runs?run_number:le=40                      `run_number` less than or equals `40`
36 | /flows/HelloFlow/runs?run_number:gt=40                      `run_number` greater than `40`
37 | /flows/HelloFlow/runs?run_number:ge=40                      `run_number` greater than equals `40`
38 | 
39 | /flows/HelloFlow/runs?user_name:co=atia                     `user_name` contains `atia`
40 | /flows/HelloFlow/runs?user_name:sw=mati                     `user_name` starts with `mati`
41 | /flows/HelloFlow/runs?user_name:ew=tias                     `user_name` ends with `tias`
42 | 
43 | /flows?user_name=dipper,mabel                               `user_name` is either `dipper` OR `mabel`
44 | 
45 | /flows/HelloFlow/runs?run_number:lt=60&run_number:gt=40     `run_number` less than 60 and greater than 40
46 | ```
47 | 
48 | ## Available operators
49 | 
50 | | URL operator | Description             | SQL operator |
51 | |--------------|-------------------------|--------------|
52 | | `eq`         | equals                  | `=`          |
53 | | `ne`         | not equals              | `!=`         |
54 | | `lt`         | less than               | `<`          |
55 | | `le`         | less than equals        | `<=`         |
56 | | `gt`         | greater than            | `>`          |
57 | | `ge`         | greater than equals     | `>=`         |
58 | | `co`         | contains                | `*string*`   |
59 | | `sw`         | starts with             | `^string*`   |
60 | | `ew`         | ends with               | `*string$`   |
61 | | `is`         | is                      | `IS`         |
62 | | `li`         | is like (use with %val) | `ILIKE`      |


--------------------------------------------------------------------------------
/services/ui_backend_service/docs/architecture.md:
--------------------------------------------------------------------------------
 1 | # Architecture documentation for UI Service
 2 | 
 3 | ## Cache
 4 | 
 5 | ![Cache architecture diagram](images/cache_architecture.png)
 6 | 
 7 | The Cache system is split into three main components
 8 |   - [Async Cache client](#async-cache-client) (cache interface)
 9 |     - [CacheFuture](#cachefuture) (awaitable cache result)
10 |   - [Cache server](#cache-server) (worker queueing)
11 |   - [Cache worker](#cache-worker) (execution of cache requests)
12 | 
13 | ### Async Cache Client
14 | 
15 | The Cache client is the interface for accessing cached content. It is responsible for setting up and starting the Cache server as a *subprocess*, setting the necessary configuration variables, such as maximum allowed diskspace and number of cache workers.
16 | 
17 | The cache client instance exposes a number of Cache Actions after the server subprocess has successfully started. These are the interface for accessing cached content. The response of a cache action is an awaitable `CacheFuture`.
18 | 
19 | #### CacheFuture
20 | 
21 | The inner workings of the cache future are best explained with an example. Take the following cache action
22 | 
23 | ```python
24 |   result = await cache_client_instance.GetArtifacts("s3://location")
25 | ```
26 | the `result` will be a CacheFuture instance, which will check if all cache keys required by the request are present on disk (cache hit).
27 | 
28 | In case of a cache miss, the CacheFuture will send a cache request through the Cache Client instance, and wait to perform another check for keys on disk. The cache keys will be finally present when the worker has finished processing the action. The future has a very generous timeout so in case the worker/server/client experiences an issue, it will take a while for the future to timeout.
29 | 
30 | ### Cache Server
31 | 
32 | The cache server is responsible for receiving cache requests from the subprocess stdin, queueing the requests, and starting cache workers to process the queue, up to a limit. Note that the cache workers run their cache action as a *subprocesses* of the cache server.
33 | 
34 | Each cache server is responsible for maintaining a non-ephemeral cache worker pool. UI Service has multiple cache worker pools for different types of resources, such as DAG and artifacts. The size of each pool can be controller via environment variables [via environment variables](./environment.md).
35 | 
36 | For starting a cache worker, the server writes the request payload to disk as a `request.json` tempfile, which the worker process then reads at start.
37 | 
38 | ### Cache Worker
39 | 
40 | The cache worker is a subprocess whose sole responsibility is to read the request payload from `request.json` and execute the corresponding cache action as a subprocess, with the inputs contained in the request, and persisting the produced cache keys to disk.
41 | 
42 | ## Heartbeat monitoring
43 | 
44 | ![Heartbeat monitoring architecture diagram](images/heartbeat_monitoring.png)
45 | 
46 | Heartbeat monitoring is required to track in-flight resources that might stop executing without producing any trace of failure. 
47 | 
48 | ### Basic structure
49 | A heartbeat monitor has a list of resources, with their respective latest heartbeat timestamps. The list is iterated through periodically (heartbeat interval + buffer), and further processing is done on items that have an expired timestamp, for example broadcasting them as failures.
50 | 
51 | Adding items for tracking is implemented with the `PyEE` event emitter internally. A `HeartbeatMonitor` class sets up its event listeners for adding and removing tracked items. Monitoring responsibilities are shared with the `ListenNotify` component as follows
52 | 
53 | [`HeartbeatMonitor`](../api/heartbeat_monitor.py)
54 | - periodically checks for expired heartbeats on tracked items
55 | - manages list of tracked items (add/update/remove)
56 | 
57 | [`ListenNotify`](../api/notify.py)
58 | - broadcast resources to add or update heartbeats for tracking
59 | - broadcast when a resource should be removed from heartbeat tracking (completion events)
60 | 
61 | ## Realtime events over web sockets
62 | 
63 | ![Websocket architecture diagram](images/websocket_communication.png)
64 | 
65 | ### Basic structure
66 | For receiving realtime events regarding specific resources, there are two distinct components that interact together over `PyEE` events; `Websocket` and `ListenNotify`. Their respective responsibilities are as follows
67 | 
68 | [`Websocket`](../api/ws.py)
69 | - handles opening web sockets and subscribing to resources
70 | - receives resources for broadcasting to subscribers
71 | - handles loading(from database) and broadcasting of resources to affected subscriptions.
72 | 
73 | [`ListenNotify`](../api/notify.py)
74 | - broadcast resources received from the database events


--------------------------------------------------------------------------------
/services/ui_backend_service/docs/images/cache_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/ui_backend_service/docs/images/cache_architecture.png


--------------------------------------------------------------------------------
/services/ui_backend_service/docs/images/heartbeat_monitoring.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/ui_backend_service/docs/images/heartbeat_monitoring.png


--------------------------------------------------------------------------------
/services/ui_backend_service/docs/images/websocket_communication.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/ui_backend_service/docs/images/websocket_communication.png


--------------------------------------------------------------------------------
/services/ui_backend_service/docs/websockets.md:
--------------------------------------------------------------------------------
  1 | # Documentation for Web Socket endpoints
  2 | 
  3 | - Realtime state subscriptions for resources
  4 |   - [Subscribing and unsubscribing](#subscribing-and-unsubscribing)
  5 |   - [Resources](#resources)
  6 | - Search API
  7 |   - [Searching](#searching)
  8 |   - [Search responses](#search-responses)
  9 | 
 10 | ## Realtime state subscriptions for resources
 11 | 
 12 | ### Subscribing and unsubscribing.
 13 | Subscribing to a RESTful resources realtime events is done by sending the following message to the
 14 | `ws://HOSTNAME/ws` endpoint.
 15 | 
 16 | ```json
 17 |   {
 18 |     "type": "SUBSCRIBE", 
 19 |     "resource": "path-to-subscribable-restful-resource",
 20 |     "uuid": "client-generated-uuid"
 21 |   }
 22 | ```
 23 | 
 24 | Subscribe to future events and return past data since unix time (seconds):
 25 | ```json
 26 |   {
 27 |     "type": "SUBSCRIBE", 
 28 |     "resource": "path-to-subscribable-restful-resource",
 29 |     "uuid": "client-generated-uuid",
 30 |     "since": 1602752197
 31 |   }
 32 | ```
 33 | 
 34 | Unsubscribing is done through the same endpoint with the message:
 35 | ```json
 36 |   {
 37 |     "type": "UNSUBSCRIBE", 
 38 |     "uuid": "existing-client-generated-uuid"
 39 |   }
 40 | ```
 41 | 
 42 | ### Resources
 43 | Subscribable resource endpoints include. All subscriptions also adhere to the corresponding RESTful routes query parameters to further filter received messages.
 44 | 
 45 | ```
 46 |   /flow_name/runs/
 47 |   /flow_name/runs/run_number
 48 |   /flow_name/runs/run_number/steps
 49 |   /flow_name/runs/run_number/steps/step_name
 50 |   /flow_name/runs/run_number/steps/step_name/tasks
 51 |   /flow_name/runs/run_number/steps/step_name/tasks/task_id
 52 |   /flow_name/runs/run_number/steps/step_name/tasks/task_id/logs/out
 53 |   /flow_name/runs/run_number/steps/step_name/tasks/task_id/logs/err
 54 | ```
 55 | 
 56 | ### Received messages
 57 | The web socket client can receive three types of messages for its subscription:
 58 | 
 59 | ```json
 60 |   {
 61 |     "type": "type-of-event",
 62 |     "resource": "path/of/subscribed/resource",
 63 |     "data": {},
 64 |     "uuid": "uuid-of-subscription"
 65 |   }
 66 | ```
 67 | The type can be one of `INSERT`, `UPDATE` or `DELETE`, corresponding to similar database actions.
 68 | The `data` property contains the complete object of the subscribed resource, as it would be received from a basic GET request.
 69 | 
 70 | # SEARCH API
 71 | 
 72 | The Search Api provides a way to search which tasks have matching artifacts for a given run. Searching is performed through a websocket connection.
 73 | 
 74 | ## Searching
 75 | 
 76 | The endpoint to perform searches for a given run looks like
 77 | ```
 78 | ws://HOSTNAME/flows/flow_id/runs/run_number/search?key=ARTIFACT_NAME&value=VALUE
 79 | ```
 80 | where `ARTIFACT_NAME` is the name of an artifact to look for, and `VALUE` is the content of the artifact that we are searching for.
 81 | 
 82 | ### Search Responses
 83 | When the web socket opens for the search, the client starts receiving messages. These include progress, possible errors, and eventually the results.
 84 | 
 85 | Progress message example:
 86 | ```json
 87 |   {
 88 |     "event": {
 89 |       "type": "progress",
 90 |       "fraction": 1
 91 |     }
 92 |   }
 93 | ```
 94 | The fraction is a percentage of objects loaded for the search.
 95 | 
 96 | Error example:
 97 | ```json
 98 |   {
 99 |     "event": {
100 |       "type": "error",
101 |       "message": "error message",
102 |       "id": "uniqueErrorId"
103 |     }
104 |   }
105 | ```
106 | The unique id is either the classname of the exception, or a custom id. Here are some of the most common ones:
107 | | Error ID                  | Description                                                       |
108 | |---------------------------|-------------------------------------------------------------------|
109 | | `MetaflowS3AccessDenied`  | server does not have access to s3 bucket                          |
110 | | `MetaflowS3NotFound`      | s3 404 response                                                   |
111 | | `MetaflowS3URLException`  | malformed s3 url                                                  |
112 | | `MetaflowS3Exception`     | something went wrong with s3 access                               |
113 | | `artifact-handle-failed`  | something went wrong with processing the artifact                 |
114 | 
115 | Results example:
116 | ```json
117 |   {
118 |     "event": {
119 |       "type": "result",
120 |       "matches": [
121 |         {
122 |           "flow_id": "FlowName",
123 |           "run_number": 123,
124 |           "step_name": "some_step",
125 |           "task_id": 456,
126 |           "searchable": true
127 |         }
128 |       ]
129 |     }
130 |   }
131 | ```
132 | The `searchable` boolean of a single task conveys whether the task had an artifact that could be included in the search process.
133 | 
134 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/download_ui.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 6 | 
 7 | FILENAME="metaflow-ui.zip"
 8 | DEST=${1:-$DIR/ui}
 9 | 
10 | UI_RELEASE_URL="https://github.com/Netflix/metaflow-ui/releases/download/${UI_VERSION}/metaflow-ui-${UI_VERSION}.zip"
11 | 
12 | if [ $UI_ENABLED = "1" ]
13 | then
14 |     echo "Download UI version ${UI_VERSION} from $UI_RELEASE_URL to $DEST"
15 |     curl -L $UI_RELEASE_URL -o $FILENAME
16 |     unzip -o $FILENAME -d $DEST
17 |     rm $FILENAME
18 | else
19 |     echo "UI not enabled, skip download."
20 | fi


--------------------------------------------------------------------------------
/services/ui_backend_service/example.custom_quicklinks.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "href": "https://docs.metaflow.org/",
 4 |     "label": "Metaflow documentation"
 5 |   },
 6 |   {
 7 |     "href": "https://github.com/Netflix/metaflow",
 8 |     "label": "GitHub"
 9 |   }
10 | ]


--------------------------------------------------------------------------------
/services/ui_backend_service/example.notifications.json:
--------------------------------------------------------------------------------
1 | [
2 |   {
3 |     "created": 1618404534000,
4 |     "message": "Upcoming service maintenance"
5 |   }
6 | ]


--------------------------------------------------------------------------------
/services/ui_backend_service/example.plugins.json:
--------------------------------------------------------------------------------
1 | {
2 |   "plugin-example": "git@github.com:Netflix/metaflow-ui-plugin-example.git"
3 | }


--------------------------------------------------------------------------------
/services/ui_backend_service/features.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | FEATURE_ENV_PREFIX = 'FEATURE_'
 4 | 
 5 | 
 6 | def get_features():
 7 |     """
 8 |     Get a dict of features that are enabled or disabled for the process
 9 | 
10 |     Returns
11 |     -------
12 |     Dict
13 |         example:
14 |         {
15 |             "FEATURE_SOME_FEAT": True
16 |         }
17 |     """
18 |     features = {}
19 |     for key, val in os.environ.items():
20 |         if key.startswith(FEATURE_ENV_PREFIX):
21 |             val = val.lower()
22 |             features[key] = val != '0' and val != 'false' and val != 'f'
23 |     return features
24 | 
25 | 
26 | FEATURES = get_features()
27 | 
28 | FEATURE_PREFETCH_DISABLE = FEATURES.get('FEATURE_PREFETCH_DISABLE', False)
29 | FEATURE_CACHE_DISABLE = FEATURES.get('FEATURE_CACHE_DISABLE', False)
30 | FEATURE_S3_DISABLE = FEATURES.get('FEATURE_S3_DISABLE', False)
31 | FEATURE_REFINE_DISABLE = FEATURES.get('FEATURE_REFINE_DISABLE', False)
32 | 
33 | FEATURE_PREFETCH_ENABLE = not FEATURE_PREFETCH_DISABLE
34 | FEATURE_CACHE_ENABLE = not FEATURE_CACHE_DISABLE
35 | FEATURE_S3_ENABLE = not FEATURE_S3_DISABLE
36 | FEATURE_REFINE_ENABLE = not FEATURE_REFINE_DISABLE
37 | 
38 | FEATURE_WS_DISABLE = FEATURES.get('FEATURE_WS_DISABLE', False)
39 | FEATURE_DB_LISTEN_DISABLE = FEATURES.get('FEATURE_DB_LISTEN_DISABLE', False)
40 | FEATURE_HEARTBEAT_DISABLE = FEATURES.get('FEATURE_HEARTBEAT_DISABLE', False)
41 | 
42 | FEATURE_WS_ENABLE = not FEATURE_WS_DISABLE
43 | FEATURE_DB_LISTEN_ENABLE = not FEATURE_DB_LISTEN_DISABLE
44 | FEATURE_HEARTBEAT_ENABLE = not FEATURE_HEARTBEAT_DISABLE
45 | 
46 | if FEATURE_S3_DISABLE:
47 |     os.environ["AWS_ACCESS_KEY_ID"] = "None"
48 |     os.environ["AWS_SECRET_ACCESS_KEY"] = "None"
49 |     os.environ["AWS_DEFAULT_REGION"] = "None"
50 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/frontend.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | 
 4 | from aiohttp import web
 5 | 
 6 | dirname = os.path.dirname(os.path.realpath(__file__))
 7 | static_ui_path = os.path.join(dirname, "ui")
 8 | 
 9 | 
10 | METAFLOW_SERVICE = os.environ.get("METAFLOW_SERVICE", "/")
11 | 
12 | METAFLOW_HEAD = os.environ.get("METAFLOW_HEAD", None)
13 | METAFLOW_BODY_BEFORE = os.environ.get("METAFLOW_BODY_BEFORE", None)
14 | METAFLOW_BODY_AFTER = os.environ.get("METAFLOW_BODY_AFTER", None)
15 | 
16 | 
17 | class Frontend(object):
18 |     """
19 |     Provides routes for the static UI webpage.
20 |     Require this as the last Api, as it is a catch-all route.
21 |     """
22 | 
23 |     def __init__(self, app):
24 |         app.router.add_static('/static',
25 |                               path=os.path.join(static_ui_path, "static"),
26 |                               name='static')
27 | 
28 |         # serve the root static files separately.
29 |         static_files = glob.glob(os.path.join(static_ui_path, "*.*"))
30 |         for filepath in static_files:
31 |             filename = filepath[len(static_ui_path) + 1:]
32 |             app.router.add_route(
33 |                 'GET', f'/{filename}', self.serve_file(filename))
34 | 
35 |         # catch-all route that unfortunately messes with root static file serving.
36 |         # Refreshing SPA pages won't work without the tail.
37 |         app.router.add_route('GET', '/{tail:.*}', self.serve_index_html)
38 | 
39 |     def serve_file(self, filename: str):
40 |         "Generator for single static file serving handlers"
41 |         async def filehandler(request):
42 |             return web.FileResponse(os.path.join(static_ui_path, filename))
43 |         return filehandler
44 | 
45 |     async def serve_index_html(self, request):
46 |         "Serve index.html by injecting `METAFLOW_SERVICE` variable to define API base url."
47 |         try:
48 |             with open(os.path.join(static_ui_path, "index.html")) as f:
49 |                 content = f.read() \
50 |                     .replace("</head>",
51 |                              "<script>window.METAFLOW_SERVICE=\"{METAFLOW_SERVICE}\";</script></head>".format(METAFLOW_SERVICE=METAFLOW_SERVICE))
52 | 
53 |                 if METAFLOW_HEAD:
54 |                     content = content.replace("</head>", "{METAFLOW_HEAD}</head>"
55 |                                               .format(METAFLOW_HEAD=METAFLOW_HEAD))
56 | 
57 |                 if METAFLOW_BODY_BEFORE:
58 |                     content = content.replace("<body>", "<body>{METAFLOW_BODY_BEFORE}"
59 |                                               .format(METAFLOW_BODY_BEFORE=METAFLOW_BODY_BEFORE))
60 | 
61 |                 if METAFLOW_BODY_AFTER:
62 |                     content = content.replace("</body>", "{METAFLOW_BODY_AFTER}</body>"
63 |                                               .format(METAFLOW_BODY_AFTER=METAFLOW_BODY_AFTER))
64 | 
65 |                 return web.Response(text=content, content_type='text/html')
66 |         except Exception as err:
67 |             return web.Response(text=str(err), status=500, content_type='text/plain')
68 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | from services.utils import logging
 2 | from ..api.utils import get_json_config
 3 | 
 4 | from .plugin import (Plugin, PluginException)
 5 | 
 6 | _PLUGINS = []
 7 | 
 8 | logger = logging.getLogger("Plugin")
 9 | 
10 | 
11 | def list_plugins():
12 |     global _PLUGINS
13 |     return _PLUGINS
14 | 
15 | 
16 | def init_plugins():
17 |     global _PLUGINS
18 | 
19 |     logger.info("Init plugins")
20 | 
21 |     plugins = get_json_config("plugins")
22 |     if plugins:
23 |         global_auth = None
24 |         if "auth" in plugins and isinstance(plugins["auth"], dict):
25 |             global_auth = plugins["auth"]
26 | 
27 |         for identifier, value in plugins.items():
28 |             if isinstance(value, str):
29 |                 repository = value
30 |                 ref = None
31 |                 parameters = {}
32 |                 paths = None
33 |                 auth = global_auth
34 |             elif identifier == "auth":
35 |                 continue
36 |             elif isinstance(value, dict):
37 |                 repository = value.get("repository", None)
38 |                 ref = value.get("ref", None)
39 |                 parameters = value.get("parameters", {})
40 |                 paths = value.get("paths", None)
41 |                 if "auth" in value:
42 |                     auth = value.get("auth", None)
43 |                 else:
44 |                     auth = global_auth
45 |             else:
46 |                 logger.warning("   [{}] Invalid plugin format, skipping".format(identifier))
47 |                 continue
48 | 
49 |             if paths and isinstance(paths, list):
50 |                 for path in paths:
51 |                     _load_plugin(identifier=identifier, repository=repository, ref=ref, parameters=parameters, path=path, auth=auth)
52 |             else:
53 |                 _load_plugin(identifier=identifier, repository=repository, ref=ref, parameters=parameters, auth=auth)
54 | 
55 |     logger.info("Plugins ready: {}".format(list(map(lambda p: p.identifier, _PLUGINS))))
56 | 
57 | 
58 | def _load_plugin(identifier: str, repository: str = None, ref: str = None, parameters: dict = {}, path: str = None, auth: dict = {}):
59 |     global _PLUGINS
60 |     try:
61 |         plugin = Plugin(identifier=identifier, repository=repository, ref=ref, parameters=parameters, path=path, auth=auth)
62 |         _PLUGINS.append(plugin.init())
63 |     except PluginException as err:
64 |         logger.error("  [{}:{}] PluginException: {}".format(identifier, path, err))
65 |     except Exception as err:
66 |         logger.error("  [{}:{}] Unknown error loading plugin {}".format(identifier, path, err))
67 | 
68 | 
69 | def _reset_plugins():
70 |     global _PLUGINS
71 |     _PLUGINS = []
72 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/plugins/installed/.gitignore:
--------------------------------------------------------------------------------
1 | *


--------------------------------------------------------------------------------
/services/ui_backend_service/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp >= 3.8.1, < 4
 2 | pyee==8.0.1
 3 | throttler==1.2.0
 4 | packaging
 5 | psycopg2
 6 | aiopg
 7 | pygit2==1.12.1
 8 | aiohttp_cors==0.7.0
 9 | metaflow>=2.11.4
10 | click==8.0.3
11 | azure-storage-blob==12.13.1
12 | azure-identity==1.16.1
13 | google-cloud-storage~=2.10.0
14 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/ui_backend_service/tests/__init__.py


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/integration_tests/__init__.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | 
3 | # we need to register the utils helper for assert rewriting in order to get descriptive assertion errors.
4 | pytest.register_assert_rewrite("services.ui_backend_service.tests.integration_tests.utils")
5 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/integration_tests/features_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from .utils import (
 3 |     init_app, set_env, cli
 4 | )
 5 | pytestmark = [pytest.mark.integration_tests]
 6 | 
 7 | 
 8 | async def get_features(cli):
 9 |     return await (await cli.get('/features')).json()
10 | 
11 | 
12 | async def expect_features(cli, expected_features={}):
13 |     assert await get_features(cli) == expected_features
14 | 
15 | 
16 | async def test_features_none(cli):
17 |     with set_env():
18 |         assert await get_features(cli) == {}
19 | 
20 | 
21 | async def test_features_true(cli):
22 |     with set_env({
23 |         'FEATURE_ONE': 'true',
24 |         'FEATURE_SECOND': 'foo',
25 |         'FEATURE_THIRD': '1',
26 |         'FEATURE_FOURTH': '',
27 |         'FEATURE_FIFTH': ' '
28 |     }):
29 |         await expect_features(cli, {
30 |             'FEATURE_ONE': True,
31 |             'FEATURE_SECOND': True,
32 |             'FEATURE_THIRD': True,
33 |             'FEATURE_FOURTH': True,
34 |             'FEATURE_FIFTH': True
35 |         })
36 | 
37 | 
38 | async def test_features_false(cli):
39 |     with set_env({'FEATURE_ONE': 'false'}):
40 |         await expect_features(cli, {
41 |             'FEATURE_ONE': False
42 |         })
43 | 
44 | 
45 | async def test_features_f(cli):
46 |     with set_env({'FEATURE_ONE': 'f'}):
47 |         await expect_features(cli, {
48 |             'FEATURE_ONE': False
49 |         })
50 | 
51 | 
52 | async def test_features_0(cli):
53 |     with set_env({'FEATURE_ONE': '0'}):
54 |         await expect_features(cli, {
55 |             'FEATURE_ONE': False
56 |         })
57 | 
58 | 
59 | async def test_features_only(cli):
60 |     with set_env({
61 |         'FEATURE_FOO': 'true',
62 |         'ANOTHER_ENV_VAR': 'bar'
63 |     }):
64 |         await expect_features(cli, {
65 |             'FEATURE_FOO': True
66 |         })
67 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/integration_tests/flows_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from .utils import (
 3 |     cli, db,
 4 |     add_flow,
 5 |     _test_list_resources, _test_single_resource
 6 | )
 7 | pytestmark = [pytest.mark.integration_tests]
 8 | 
 9 | 
10 | async def test_list_flows(cli, db):
11 |     await _test_list_resources(cli, db, "/flows", 200, [])
12 | 
13 |     _flow = (await add_flow(db, flow_id="HelloFlow")).body
14 | 
15 |     await _test_list_resources(cli, db, "/flows", 200, [_flow])
16 | 
17 | 
18 | async def test_single_flow(cli, db):
19 |     await _test_single_resource(cli, db, "/flows/HelloFlow", 404, {})
20 | 
21 |     _flow = (await add_flow(db, flow_id="HelloFlow")).body
22 | 
23 |     await _test_single_resource(cli, db, "/flows/{flow_id}".format(**_flow), 200, _flow)
24 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/integration_tests/grouped_runs_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import time
 3 | from .utils import (
 4 |     cli, db,
 5 |     add_flow, add_run, add_artifact,
 6 |     add_step, add_task, add_metadata,
 7 |     _test_list_resources, _test_single_resource, get_heartbeat_ts
 8 | )
 9 | pytestmark = [pytest.mark.integration_tests]
10 | 
11 | @pytest.mark.skip("Test failing due to refactor. TODO: fix later if applicable")
12 | async def test_list_runs_group_by_flow_id(cli, db):
13 |     await _test_list_resources(cli, db, "/runs", 200, [])
14 |     await _test_list_resources(cli, db, "/runs?_group=flow_id", 200, [])
15 | 
16 |     first_runs = await create_n_runs(db, 11, "A-FirstFlow")
17 |     second_runs = await create_n_runs(db, 11, "B-SecondFlow")
18 | 
19 |     # default per-group limit should be 10
20 |     await _test_list_resources(cli, db, "/runs?_group=flow_id", 200, [*first_runs[:10], *second_runs[:10]], approx_keys=["duration"])
21 | 
22 |     # _group_limit should limit number of records returned per group
23 |     await _test_list_resources(cli, db, "/runs?_group=flow_id&_group_limit=1", 200, [first_runs[0], second_runs[0]], approx_keys=["duration"])
24 | 
25 |     # _limit should limit number of groups, not number of rows.
26 |     await _test_list_resources(cli, db, "/runs?_group=flow_id&_group_limit=2&_limit=1&_order=%2Brun_number", 200, first_runs[:2], approx_keys=["duration"])
27 | 
28 |     # _order should order within groups.
29 |     await _test_list_resources(cli, db, "/runs?_group=flow_id&_order=run_number", 200, [*first_runs[::-1][:10], *second_runs[::-1][:10]], approx_keys=["duration"])
30 | 
31 | @pytest.mark.skip("Test failing due to refactor. TODO: fix later if applicable")
32 | async def test_list_runs_group_by_user(cli, db):
33 |     await _test_list_resources(cli, db, "/runs", 200, [])
34 |     await _test_list_resources(cli, db, "/runs?_group=user", 200, [])
35 | 
36 |     first_runs = await create_n_runs(db, 11, "A-Flow", "B-user")
37 |     second_runs = await create_n_runs(db, 11, "B-Flow", "A-user")
38 | 
39 |     # default per-group should be 10. ordering by run_number ASC within group to test sorting,
40 |     # and to retain order of test runs list.
41 |     await _test_list_resources(cli, db, "/runs?_group=user&_order=%2Brun", 200, [*second_runs[:10], *first_runs[:10]], approx_keys=["duration"])
42 | 
43 |     # _group_limit should limit number of records returned per group
44 |     await _test_list_resources(cli, db, "/runs?_group=user&&_order=%2Brun&_group_limit=1", 200, [second_runs[0], first_runs[0]], approx_keys=["duration"])
45 | 
46 |     # _limit should limit number of groups, not number of rows.
47 |     await _test_list_resources(cli, db, "/runs?_group=user&&_order=%2Brun&_group_limit=2&_limit=1", 200, second_runs[:2], approx_keys=["duration"])
48 | 
49 | 
50 | async def create_n_runs(db, n=1, flow_id="TestFlow", user="TestUser"):
51 |     await add_flow(db, flow_id=flow_id)
52 |     created_runs = []
53 |     for _ in range(n):
54 |         _run = (await add_run(db, flow_id=flow_id, user_name=user, system_tags=["runtime:dev", "user:{}".format(user)])).body
55 |         _run["run"] = _run["run_number"]
56 |         _run["status"] = "running"
57 |         _run["duration"] = max(int(round(time.time() * 1000)) - _run["ts_epoch"], 1)  # approx assert breaks in the odd case when duration==0
58 |         _run["user"] = user
59 |         created_runs.append(_run)
60 |     return created_runs
61 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/integration_tests/steps_test.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from .utils import (
  3 |     cli, db,
  4 |     add_flow, add_run, add_step, add_task,
  5 |     add_artifact, get_heartbeat_ts,
  6 |     _test_list_resources, _test_single_resource, update_objects_with_run_tags
  7 | )
  8 | pytestmark = [pytest.mark.integration_tests]
  9 | 
 10 | 
 11 | async def test_list_steps(cli, db):
 12 |     _flow = (await add_flow(db, flow_id="HelloFlow")).body
 13 |     _run = (await add_run(db, flow_id=_flow.get("flow_id"))).body
 14 | 
 15 |     await _test_list_resources(cli, db, "/flows/{flow_id}/runs/{run_number}/steps".format(**_run), 200, [])
 16 | 
 17 |     _step = (await add_step(db, flow_id=_run.get("flow_id"), step_name="step", run_number=_run.get("run_number"), run_id=_run.get("run_id"))).body
 18 | 
 19 |     _, data = await _test_list_resources(cli, db, "/flows/{flow_id}/runs/{run_number}/steps".format(**_step), 200, None)
 20 | 
 21 |     assert len(data) == 1
 22 |     assert data[0]['run_number'] == int(_run.get('run_number'))
 23 |     assert data[0]['step_name'] == 'step'
 24 | 
 25 | 
 26 | async def test_single_step(cli, db):
 27 |     await _test_single_resource(cli, db, "/flows/HelloFlow/runs/404/steps/none", 404, {})
 28 | 
 29 |     _flow = (await add_flow(db, flow_id="HelloFlow")).body
 30 |     _run = (await add_run(db, flow_id=_flow.get("flow_id"))).body
 31 |     _step = (await add_step(db, flow_id=_run.get("flow_id"), step_name="step", run_number=_run.get("run_number"))).body
 32 | 
 33 |     _, data = await _test_single_resource(cli, db, "/flows/{flow_id}/runs/{run_number}/steps/{step_name}".format(**_step), 200, None)
 34 | 
 35 |     assert data['run_number'] == int(_run.get('run_number'))
 36 |     assert data['step_name'] == 'step'
 37 | 
 38 | 
 39 | async def test_step_duration(cli, db):
 40 |     _flow = (await add_flow(db, flow_id="HelloFlow")).body
 41 |     _run = (await add_run(db, flow_id=_flow.get("flow_id"))).body
 42 |     _step = (await add_step(db, flow_id=_run.get("flow_id"), step_name="step", run_number=_run.get("run_number"))).body
 43 |     _step['run_id'] = _run['run_number']
 44 |     _step['duration'] = 1  # approx step duration for started step
 45 |     update_objects_with_run_tags('step', [_step], _run)
 46 | 
 47 |     # step duration should fallback to current time when no tasks exist.
 48 |     await _test_single_resource(cli, db, "/flows/{flow_id}/runs/{run_number}/steps/{step_name}".format(**_step), 200, _step, approx_keys=["duration"])
 49 | 
 50 |     # existing task should have an effect on step duration
 51 |     _task = (await add_task(
 52 |         db,
 53 |         flow_id=_flow.get("flow_id"),
 54 |         run_number=_run.get("run_number"),
 55 |         step_name=_step.get("step_name"),
 56 |         last_heartbeat_ts=get_heartbeat_ts(offset=10)
 57 |     )).body
 58 | 
 59 |     # if only task heartbeat exists, this should be used for the step duration
 60 |     _step['duration'] = _task['last_heartbeat_ts'] * 1000 - _step['ts_epoch']
 61 | 
 62 |     await _test_single_resource(cli, db, "/flows/{flow_id}/runs/{run_number}/steps/{step_name}".format(**_step), 200, _step)
 63 | 
 64 |     # more recent _task_ok artifact timestamp should be used in favor of last_heartbeat if exists.
 65 | 
 66 |     _task_ok = (await add_artifact(
 67 |         db,
 68 |         flow_id=_flow.get("flow_id"),
 69 |         run_number=_run.get("run_number"),
 70 |         step_name=_step.get("step_name"),
 71 |         task_id=_task.get("task_id"),
 72 |         artifact={
 73 |             "name": "_task_ok",
 74 |             "location": "location",
 75 |             "ds_type": "ds_type",
 76 |             "sha": "sha",
 77 |             "type": "type",
 78 |             "content_type": "content_type",
 79 |             "attempt_id": 0
 80 |         }
 81 |     )).body
 82 | 
 83 |     # update ts_epoch to be newer than the task heartbeat.
 84 |     _new_ts = _task['last_heartbeat_ts'] * 1000 + 10
 85 |     await db.artifact_table_postgres.update_row(
 86 |         filter_dict={
 87 |             "flow_id": _task_ok.get("flow_id"),
 88 |             "run_number": _task_ok.get("run_number"),
 89 |             "step_name": _task_ok.get("step_name"),
 90 |             "task_id": _task_ok.get("task_id")
 91 |         },
 92 |         update_dict={
 93 |             "ts_epoch": _new_ts
 94 |         }
 95 |     )
 96 | 
 97 |     # _task_ok should be used in favor of heartbeat_ts for step duration.
 98 |     _step['duration'] = _new_ts - _step['ts_epoch']
 99 | 
100 |     await _test_single_resource(cli, db, "/flows/{flow_id}/runs/{run_number}/steps/{step_name}".format(**_step), 200, _step)
101 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Netflix/metaflow-service/9e47d2d85e127d2673d457dde7ae535a3341de0f/services/ui_backend_service/tests/unit_tests/__init__.py


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/unit_tests/cache_utils_test.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from services.ui_backend_service.data.cache.utils import (
  4 |     error_event_msg, progress_event_msg, search_result_event_msg,
  5 |     artifact_location_from_key, artifact_cache_id, unpack_pathspec_with_attempt_id,
  6 |     streamed_errors, cacheable_artifact_value, artifact_value
  7 | )
  8 | 
  9 | pytestmark = [pytest.mark.unit_tests]
 10 | 
 11 | 
 12 | def test_error_event_msg():
 13 |     assert error_event_msg("test message", "test-id") == \
 14 |         {"type": "error", "message": "test message", "id": "test-id", "traceback": None, "key": None}
 15 | 
 16 |     assert error_event_msg("test message", "test-id", "test-traceback") == \
 17 |         {"type": "error", "message": "test message", "id": "test-id", "traceback": "test-traceback", "key": None}
 18 | 
 19 |     assert error_event_msg("test message", "test-id", "test-traceback", "search:artifact:s3://etc") == \
 20 |         {"type": "error", "message": "test message", "id": "test-id", "traceback": "test-traceback", "key": "search:artifact:s3://etc"}
 21 | 
 22 | 
 23 | def test_progress_event_msg():
 24 |     assert progress_event_msg(0.5) == {"type": "progress", "fraction": 0.5}
 25 | 
 26 | 
 27 | def test_search_result_event_msg():
 28 |     assert search_result_event_msg([1, 2, 3]) == {"type": "result", "matches": [1, 2, 3]}
 29 | 
 30 | 
 31 | def test_artifact_cache_key_and_location_from_key():
 32 |     # first generate an artifact cache key with any location
 33 |     _loc = "s3://test-s3-locations/artifact_location/for/cache/1"
 34 | 
 35 |     key = artifact_cache_id(_loc)
 36 | 
 37 |     assert _loc in key
 38 | 
 39 |     # We need to be able to extract the location from a cache key, to form correctly keyed responses
 40 |     _extracted_loc = artifact_location_from_key(key)
 41 | 
 42 |     assert _extracted_loc == _loc
 43 | 
 44 | 
 45 | def test_unpack_pathspec_with_attempt_id():
 46 |     pathspec = "FlowName/RunNumber/StepName/TaskId/4"
 47 |     pathspec_without_attempt_id, attempt_id = unpack_pathspec_with_attempt_id(pathspec)
 48 |     assert pathspec_without_attempt_id == "FlowName/RunNumber/StepName/TaskId"
 49 |     assert attempt_id == 4
 50 | 
 51 | 
 52 | def test_streamed_errors_no_op():
 53 |     # if nothing raised, callable should not be called
 54 |     def _called():
 55 |         # should not have been called
 56 |         assert False
 57 |     try:
 58 |         with streamed_errors(_called):
 59 |             pass
 60 |     except Exception as ex:
 61 |         assert False #  Should not have raised any exception
 62 | 
 63 | 
 64 | 
 65 | def test_streamed_errors_exception_output():
 66 |     # raised errors should be written to output callable.
 67 |     def _raised(output):
 68 |         assert output['type'] == 'error'
 69 |         assert output['id'] == 'Exception'
 70 |         assert output['message'] == 'Custom exception'
 71 |         assert output['traceback'] is not None
 72 | 
 73 |     try:
 74 |         with streamed_errors(_raised):
 75 |             raise Exception("Custom exception")
 76 |         assert False #  Should never get here due to re-raising of the exception
 77 |     except Exception as ex:
 78 |         assert str(ex) == "Custom exception"
 79 | 
 80 | 
 81 | def test_streamed_errors_exception_output_no_re_raise():
 82 |     # should not raise any exception with re_raise set to false.
 83 |     def _re_raise(output):
 84 |         pass
 85 |     
 86 |     try:
 87 |         with streamed_errors(_re_raise, re_raise=False):
 88 |             raise Exception("Should not be reraised")
 89 |     except Exception as ex:
 90 |         assert False #  Should not have re-raised exception
 91 |     
 92 | 
 93 | def test_cacheable_artifact_value():
 94 |     artifact = MockArtifact("pathspec/to", 1, "test")
 95 |     big_artifact = MockArtifact("pathspec/to", 123456789, "test")
 96 | 
 97 |     assert cacheable_artifact_value(artifact) == '[true, "test"]'
 98 |     assert cacheable_artifact_value(big_artifact) == '[false, "artifact-too-large", "pathspec/to: 123456789 bytes"]'
 99 | 
100 | 
101 | def test_artifact_value():
102 |     artifact = MockArtifact("pathspec/to", 1, "test")
103 |     big_artifact = MockArtifact("pathspec/to", 123456789, "test")
104 | 
105 |     assert artifact_value(artifact) == (True, "test")
106 |     assert artifact_value(big_artifact) == (False, "artifact-too-large", "pathspec/to: 123456789 bytes")
107 | 
108 | 
109 | class MockArtifact():
110 |     def __init__(self, pathspec, size, data):
111 |         self.pathspec = pathspec
112 |         self.size = size
113 |         self.data = data
114 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/unit_tests/data_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from services.ui_backend_service.data import unpack_processed_value
 4 | 
 5 | pytestmark = [pytest.mark.unit_tests]
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("value, expected", [
 9 |     ([True, "test_value"], [True, 'test_value', None, None]),
10 |     ([False, "CustomException"], [False, 'CustomException', None, None]),
11 |     ([False, "CustomException", "error details"], [False, 'CustomException', "error details", None]),
12 |     ([False, "CustomException", "error details", "stacktrace"], [False, 'CustomException', "error details", "stacktrace"]),
13 | ])
14 | def test_unpack_processed_value_padding(value, expected):
15 |     # test that the helper pads the output list with enough None items by default.
16 |     assert unpack_processed_value(value) == expected
17 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/unit_tests/get_artifacts_action_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from services.ui_backend_service.data.cache.get_data_action import lookup_id
 4 | 
 5 | pytestmark = [pytest.mark.unit_tests]
 6 | 
 7 | 
 8 | async def test_cache_key_independent_of_location_order():
 9 |     locs = ["a", "b", "c"]
10 |     a = lookup_id(locs)
11 |     b = lookup_id(reversed(locs))
12 | 
13 |     assert a == b
14 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/unit_tests/search_artifacts_action_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from services.ui_backend_service.data.cache.search_artifacts_action import lookup_id
 4 | 
 5 | pytestmark = [pytest.mark.unit_tests]
 6 | 
 7 | 
 8 | async def test_cache_key_independent_of_location_order():
 9 |     locs = ["a", "b", "c"]
10 |     a = lookup_id(locs, "test", "eq")
11 |     b = lookup_id(reversed(locs), "test", "eq")
12 | 
13 |     assert a == b
14 | 
15 | 
16 | async def test_cache_key_dependent_on_searchterm():
17 |     locs = ["a", "b", "c"]
18 |     a = lookup_id(locs, "test", "eq")
19 |     b = lookup_id(locs, "another test", "eq")
20 |     c = lookup_id(locs, "another test", "co")
21 | 
22 |     assert not a == b
23 |     assert not b == c
24 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/tests/unit_tests/search_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from services.ui_backend_service.api.search import _parse_search_term
 4 | 
 5 | pytestmark = [pytest.mark.unit_tests]
 6 | 
 7 | 
 8 | async def test_search_term_parsing():
 9 | 
10 |     op, term = _parse_search_term("\"test term\"")
11 | 
12 |     assert op == "eq"
13 |     assert term == "test term"
14 | 
15 |     op, term = _parse_search_term("test term")
16 | 
17 |     assert op == "co"
18 |     assert term == "test term"
19 | 
20 |     op, term = _parse_search_term("test \"term\"")
21 | 
22 |     assert op == "co"
23 |     assert term == "test \"term\""
24 | 


--------------------------------------------------------------------------------
/services/ui_backend_service/ui/.dockerignore:
--------------------------------------------------------------------------------
1 | *


--------------------------------------------------------------------------------
/services/ui_backend_service/ui/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | !.dockerignore
4 | !static/


--------------------------------------------------------------------------------
/services/ui_backend_service/ui/static/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/services/utils/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | from services.utils import DBConfiguration
 2 | import pytest
 3 | 
 4 | 
 5 | def get_test_dbconf():
 6 |     """
 7 |     Returns a DBConfiguration suitable for the test environment, or exits pytest completely upon failure
 8 |     """
 9 |     db_conf = DBConfiguration(timeout=1)
10 | 
11 |     if db_conf.get_dsn() != "dbname=test user=test host=db_test port=5432 password=test":
12 |         pytest.exit("The test suite should only be run in a test environment. \n \
13 |             Configured database host is not suited for running tests. \n \
14 |             expected DSN to be: dbname=test user=test host=db_test port=5432 password=test")
15 | 
16 |     return db_conf
17 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | license_files = LICENSE
 3 | 
 4 | [pycodestyle]
 5 | count = False
 6 | exclude = *_test.py,.svn,CVS,.bzr,.hg,.git,__pycache__,.tox,env
 7 | ignore = E722,W503
 8 | max-line-length = 160
 9 | statistics = True
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from os.path import dirname, join, exists
 2 | from setuptools import setup, find_packages
 3 | 
 4 | 
 5 | def open_and_read_if_exists(path: str):
 6 |     try:
 7 |         with open(join(dirname(__file__), path)) as f:
 8 |             return f.read()
 9 |     except:
10 |         return ""
11 | 
12 | 
13 | requirements = []
14 | for service in ["metadata_service", "migration_service", "ui_backend_service"]:
15 |     requirements += open_and_read_if_exists(
16 |         "services/{}/requirements.txt".format(service)
17 |     ).splitlines()
18 | 
19 | requirements_tests = open_and_read_if_exists("requirements.dev.txt").splitlines()
20 | 
21 | long_description = open_and_read_if_exists("README.md")
22 | 
23 | setup(
24 |     name="metadata_service",
25 |     version="2.5.0",
26 |     license="Apache License 2.0",
27 |     description="Metadata Service: backend service for Metaflow",
28 |     long_description=long_description,
29 |     author="Machine Learning Infrastructure Team at Netflix",
30 |     author_email="help@metaflow.org",
31 |     url="https://github.com/Netflix/metaflow-service",
32 |     keywords=["metaflow", "machinelearning", "ml"],
33 |     py_modules=["services.metadata_service"],
34 |     packages=find_packages(exclude=("tests",)),
35 |     entry_points="""
36 |         [console_scripts]
37 |         metadata_service=services.metadata_service.server:main
38 |         migration_service=services.migration_service.migration_server:main
39 |         ui_backend_service=services.ui_backend_service.ui_server:main
40 |    """,
41 |     install_requires=requirements,
42 |     tests_require=requirements + requirements_tests,
43 |     extras_require={"test": requirements + requirements_tests},
44 |     classifiers=[
45 |         "Development Status :: 5 - Production/Stable",
46 |         "Intended Audience :: Developers",
47 |         "Topic :: Software Development :: Build Tools",
48 |         "License :: OSI Approved :: Apache Software License",
49 |         "Programming Language :: Python :: 3.11",
50 |     ],
51 | )
52 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py311
 3 | 
 4 | [testenv]
 5 | deps =
 6 |     -rrequirements.txt
 7 |     -rrequirements.dev.txt
 8 | commands = pytest --cov=services
 9 | passenv = MF_METADATA_DB_HOST,MF_METADATA_DB_PORT,MF_METADATA_DB_USER,MF_METADATA_DB_PSWD,MF_METADATA_DB_NAME,MF_UI_METADATA_PORT,MF_UI_METADATA_HOST
10 | extras = tests
11 | 
12 | [testenv:pylint]
13 | commands = pylint -E services --ignored-modules=psycopg2,pygit2
14 | 
15 | [testenv:unit]
16 | commands = pytest --cov=services -m unit_tests
17 | 
18 | [testenv:integration]
19 | commands = pytest --cov=services -m integration_tests
20 | 
21 | 


--------------------------------------------------------------------------------
/wait-for-postgres.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | RETRIES=1;
 3 | MAX_RETRIES=${POSTGRES_WAIT_MAX_RETRIES:=5};
 4 | SLEEP_SECONDS=${POSTGRES_WAIT_SLEEP_SECONDS:=1};
 5 | 
 6 | # Retry loop for postgres server.
 7 | while !</dev/tcp/${MF_METADATA_DB_HOST}/${MF_METADATA_DB_PORT}; do
 8 |     if (($RETRIES <= $MAX_RETRIES)); then
 9 |         echo "retry $RETRIES out of $MAX_RETRIES"
10 |         RETRIES=$((RETRIES+1))
11 |         sleep $SLEEP_SECONDS;
12 |     else
13 |         echo "Waiting for postgres server timed out."; exit 1;
14 |     fi
15 | done;
16 | 
17 | $@


--------------------------------------------------------------------------------