├── .dockerignore ├── .github ├── titleLint.yml ├── CODEOWNERS ├── workflows │ ├── license.yml │ ├── pull_request.yml │ └── pypipublish.yml ├── stale.yml └── PULL_REQUEST_TEMPLATE.md ├── tests ├── __init__.py ├── unit │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── table │ │ │ ├── __init__.py │ │ │ ├── table_test_case.py │ │ │ ├── test_table_owner_api.py │ │ │ ├── test_table_badge_api.py │ │ │ ├── test_table_tag_api.py │ │ │ ├── test_table_description_api.py │ │ │ ├── test_dashboards_using_table_api.py │ │ │ └── test_table_detail_api.py │ │ ├── tag │ │ │ ├── __init__.py │ │ │ └── test_tag_common.py │ │ ├── dashboard │ │ │ ├── __init__.py │ │ │ ├── dashboard_test_case.py │ │ │ ├── test_dashboard_badge_api.py │ │ │ └── test_dashboard_tag_api.py │ │ ├── test_redshit_disable_comment_edit.py │ │ ├── test_popular_tables_api.py │ │ ├── badge │ │ │ └── test_badge_common.py │ │ ├── test_column_description_api.py │ │ └── test_user.py │ ├── proxy │ │ ├── __init__.py │ │ ├── fixtures │ │ │ ├── __init__.py │ │ │ └── atlas_test_data.py │ │ ├── roundtrip │ │ │ ├── __init__.py │ │ │ ├── roundtrip_janusgraph_proxy.py │ │ │ ├── test_janus_graph_proxy.py │ │ │ ├── roundtrip_base_proxy.py │ │ │ ├── roundtrip_neptune_proxy.py │ │ │ ├── test_neptune_proxy.py │ │ │ └── abstract_proxy_tests.py │ │ └── test_statsd_utilities.py │ ├── test_basics.py │ └── test_swagger.py └── conftest.py ├── NOTICE ├── metadata_service ├── entity │ ├── __init__.py │ ├── resource_type.py │ ├── description.py │ ├── badge.py │ ├── tag_detail.py │ ├── dashboard_summary.py │ ├── dashboard_query.py │ └── dashboard_detail.py ├── util.py ├── exception.py ├── api │ ├── healthcheck.py │ ├── swagger_doc │ │ ├── healthcheck_get.yml │ │ ├── badge │ │ │ ├── badge_get.yml │ │ │ ├── badge_delete.yml │ │ │ └── badge_put.yml │ │ ├── tag │ │ │ ├── tag_get.yml │ │ │ ├── tag_delete.yml │ │ │ └── tag_put.yml │ │ ├── neo4j │ │ │ └── detail_get.yml │ │ ├── popular_tables_get.yml │ │ ├── user │ │ │ ├── detail_get.yml │ │ │ ├── read_get.yml │ │ │ ├── own_put.yml │ │ │ ├── follow_delete.yml │ │ │ ├── follow_put.yml │ │ │ ├── own_delete.yml │ │ │ ├── own_get.yml │ │ │ └── follow_get.yml │ │ ├── table │ │ │ ├── detail_get.yml │ │ │ ├── owner_put.yml │ │ │ ├── dashboards_using_table_get.yml │ │ │ └── owner_delete.yml │ │ ├── dashboard │ │ │ └── detail_get.yml │ │ ├── common │ │ │ ├── description_put.yml │ │ │ └── description_get.yml │ │ └── column │ │ │ ├── description_put.yml │ │ │ └── description_get.yml │ ├── system.py │ ├── popular_tables.py │ ├── __init__.py │ ├── column.py │ ├── badge.py │ ├── tag.py │ ├── dashboard.py │ └── table.py ├── metadata_wsgi.py ├── proxy │ ├── __init__.py │ ├── janus_graph_proxy.py │ ├── shared.py │ ├── statsd_utilities.py │ └── base_proxy.py ├── config.py └── __init__.py ├── MANIFEST.in ├── .dependabot └── config.yml ├── CODE_OF_CONDUCT.md ├── .gitignore ├── docs ├── proxy │ ├── atlas_proxy.md │ ├── neptune.md │ ├── gremlin.md │ └── atlas │ │ └── popular_tables.md ├── configurations.md └── structure.md ├── public.Dockerfile ├── setup.cfg ├── setup.py ├── Makefile ├── requirements.txt ├── README.md └── LICENSE /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | -------------------------------------------------------------------------------- /.github/titleLint.yml: -------------------------------------------------------------------------------- 1 | regex: (build|ci|docs|feat|fix|perf|refactor|style|test|chore|other): .* 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/unit/api/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/unit/api/table/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/unit/api/tag/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/unit/proxy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | amundsenmetadatalibrary 2 | Copyright 2018-2019 Lyft Inc. 3 | 4 | This product includes software developed at Lyft Inc. 5 | -------------------------------------------------------------------------------- /metadata_service/entity/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/unit/api/dashboard/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/unit/proxy/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/unit/proxy/roundtrip/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include metadata_service/api/swagger_doc/*.yml 3 | include metadata_service/api/swagger_doc/*/*.yml 4 | -------------------------------------------------------------------------------- /.dependabot/config.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | update_configs: 3 | - package_manager: "python" 4 | directory: "/" 5 | update_schedule: "monthly" 6 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | This project is governed by [Amundsen's code of conduct](https://github.com/amundsen-io/amundsen/blob/master/CODE_OF_CONDUCT.md). 2 | All contributors and participants agree to abide by its terms. 3 | -------------------------------------------------------------------------------- /metadata_service/util.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from collections import namedtuple 5 | 6 | 7 | UserResourceRel = namedtuple('UserResourceRel', 'follow, own, read') 8 | -------------------------------------------------------------------------------- /metadata_service/exception.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | 5 | class NotFoundException(Exception): 6 | def __init__(self, message: str) -> None: 7 | super().__init__(message) 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *.pyo 4 | *.pyt 5 | *.pytc 6 | *.egg-info 7 | .*.swp 8 | .DS_Store 9 | build/ 10 | dist/ 11 | venv/ 12 | venv3/ 13 | .cache/ 14 | .idea/ 15 | .vscode/ 16 | .coverage 17 | .mypy_cache 18 | .pytest_cache 19 | **/coverage.xml 20 | **/htmlcov/** 21 | 22 | -------------------------------------------------------------------------------- /metadata_service/api/healthcheck.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from typing import Tuple 5 | from flasgger import swag_from 6 | 7 | 8 | @swag_from('swagger_doc/healthcheck_get.yml') 9 | def healthcheck() -> Tuple[str, int]: 10 | return '', 200 11 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/healthcheck_get.yml: -------------------------------------------------------------------------------- 1 | Simple healthcheck that returns an empty response 2 | --- 3 | tags: 4 | - 'healthcheck' 5 | responses: 6 | 200: 7 | description: 'Empty response' 8 | content: 9 | application/json: 10 | schema: 11 | $ref: '#/components/schemas/EmptyResponse' 12 | -------------------------------------------------------------------------------- /metadata_service/entity/resource_type.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from enum import Enum, auto 5 | 6 | 7 | class ResourceType(Enum): 8 | Table = auto() 9 | Dashboard = auto() 10 | User = auto() 11 | 12 | 13 | def to_resource_type(*, label: str) -> ResourceType: 14 | return ResourceType[label.title()] 15 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/badge/badge_get.yml: -------------------------------------------------------------------------------- 1 | Get badges 2 | --- 3 | tags: 4 | - 'badge' 5 | responses: 6 | 200: 7 | description: 'Badges with category' 8 | content: 9 | application/json: 10 | schema: 11 | type: object 12 | properties: 13 | badges: 14 | type: array 15 | items: 16 | $ref: '#/components/schemas/Badge' 17 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/tag/tag_get.yml: -------------------------------------------------------------------------------- 1 | Get tags 2 | --- 3 | tags: 4 | - 'tag' 5 | responses: 6 | 200: 7 | description: 'The tags and their usage' 8 | content: 9 | application/json: 10 | schema: 11 | type: object 12 | properties: 13 | tag_usages: 14 | type: array 15 | items: 16 | $ref: '#/components/schemas/TagUsage' 17 | -------------------------------------------------------------------------------- /metadata_service/entity/description.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import attr 5 | from marshmallow_annotations.ext.attrs import AttrsSchema 6 | 7 | 8 | @attr.s(auto_attribs=True, kw_only=True) 9 | class Description: 10 | description: str = attr.ib() 11 | 12 | 13 | class DescriptionSchema(AttrsSchema): 14 | class Meta: 15 | target = Description 16 | register_as_scheme = True 17 | -------------------------------------------------------------------------------- /metadata_service/entity/badge.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import attr 5 | from marshmallow_annotations.ext.attrs import AttrsSchema 6 | 7 | 8 | @attr.s(auto_attribs=True, kw_only=True) 9 | class Badge: 10 | badge_name: str = attr.ib() 11 | category: str = attr.ib() 12 | 13 | 14 | class BadgeSchema(AttrsSchema): 15 | class Meta: 16 | target = Badge 17 | register_as_scheme = True 18 | -------------------------------------------------------------------------------- /tests/unit/proxy/roundtrip/roundtrip_janusgraph_proxy.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0import json 3 | 4 | 5 | import logging 6 | from .roundtrip_gremlin_proxy import RoundtripGremlinProxy 7 | from metadata_service.proxy.janus_graph_proxy import JanusGraphGremlinProxy 8 | 9 | LOGGER = logging.getLogger(__name__) 10 | 11 | 12 | class RoundtripJanusGraphProxy(JanusGraphGremlinProxy, RoundtripGremlinProxy): 13 | pass 14 | -------------------------------------------------------------------------------- /metadata_service/metadata_wsgi.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import os 5 | 6 | from metadata_service import create_app 7 | 8 | ''' 9 | Entry point to flask. 10 | ''' 11 | 12 | application = create_app( 13 | config_module_class=os.getenv('METADATA_SVC_CONFIG_MODULE_CLASS') 14 | or 'metadata_service.config.LocalConfig') 15 | 16 | if __name__ == '__main__': 17 | application.run(host='0.0.0.0', port=5002) 18 | -------------------------------------------------------------------------------- /metadata_service/entity/tag_detail.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import attr 5 | from marshmallow_annotations.ext.attrs import AttrsSchema 6 | 7 | 8 | @attr.s(auto_attribs=True, kw_only=True) 9 | class TagDetail: 10 | tag_name: str = attr.ib() 11 | tag_count: int = attr.ib() 12 | 13 | 14 | class TagDetailSchema(AttrsSchema): 15 | class Meta: 16 | target = TagDetail 17 | register_as_scheme = True 18 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/neo4j/detail_get.yml: -------------------------------------------------------------------------------- 1 | Gets system information for neo4j 2 | --- 3 | tags: 4 | - 'neo4j' 5 | responses: 6 | 200: 7 | description: 'Neo4j system information' 8 | content: 9 | application/json: 10 | schema: 11 | $ref: '#/components/schemas/Neo4jDetail' 12 | 204: 13 | description: 'Neo4j was not updated/indexed' 14 | content: 15 | application/json: 16 | schema: 17 | $ref: '#/components/schemas/ErrorResponse' 18 | -------------------------------------------------------------------------------- /tests/unit/api/table/table_test_case.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from unittest.mock import patch, Mock 5 | 6 | from tests.unit.test_basics import BasicTestCase 7 | 8 | 9 | class TableTestCase(BasicTestCase): 10 | def setUp(self) -> None: 11 | super().setUp() 12 | self.mock_client = patch('metadata_service.api.table.get_proxy_client') 13 | self.mock_proxy = self.mock_client.start().return_value = Mock() 14 | 15 | def tearDown(self) -> None: 16 | super().tearDown() 17 | self.mock_client.stop() 18 | -------------------------------------------------------------------------------- /metadata_service/entity/dashboard_summary.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from typing import List 5 | 6 | import attr 7 | from amundsen_common.models.dashboard import DashboardSummary as Summary 8 | from marshmallow_annotations.ext.attrs import AttrsSchema 9 | 10 | 11 | @attr.s(auto_attribs=True, kw_only=True) 12 | class DashboardSummary: 13 | dashboards: List[Summary] = attr.ib(factory=list) 14 | 15 | 16 | class DashboardSummarySchema(AttrsSchema): 17 | class Meta: 18 | target = DashboardSummary 19 | register_as_scheme = True 20 | -------------------------------------------------------------------------------- /tests/unit/api/dashboard/dashboard_test_case.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from unittest.mock import patch, Mock 5 | 6 | from tests.unit.test_basics import BasicTestCase 7 | 8 | 9 | class DashboardTestCase(BasicTestCase): 10 | def setUp(self) -> None: 11 | super().setUp() 12 | self.mock_client = patch('metadata_service.api.dashboard.get_proxy_client') 13 | self.mock_proxy = self.mock_client.start().return_value = Mock() 14 | 15 | def tearDown(self) -> None: 16 | super().tearDown() 17 | self.mock_client.stop() 18 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/popular_tables_get.yml: -------------------------------------------------------------------------------- 1 | Gets information about popular tables 2 | --- 3 | tags: 4 | - 'popular_tables' 5 | parameters: 6 | - in: query 7 | name: limit 8 | required: false 9 | type: integer 10 | schema: 11 | type: integer 12 | default: 10 13 | responses: 14 | 200: 15 | description: 'Popular tables' 16 | content: 17 | application/json: 18 | schema: 19 | type: object 20 | properties: 21 | popular_tables: 22 | type: array 23 | items: 24 | $ref: '#/components/schemas/PopularTables' 25 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/user/detail_get.yml: -------------------------------------------------------------------------------- 1 | Gets the user details 2 | --- 3 | tags: 4 | - 'user' 5 | parameters: 6 | - name: id 7 | in: path 8 | example: 'roald9@example.org' 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | responses: 14 | 200: 15 | description: 'User description' 16 | content: 17 | application/json: 18 | schema: 19 | $ref: '#/components/schemas/UserDetailFields' 20 | 404: 21 | description: 'User not found' 22 | content: 23 | application/json: 24 | schema: 25 | $ref: '#/components/schemas/ErrorResponse' 26 | -------------------------------------------------------------------------------- /metadata_service/entity/dashboard_query.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from typing import Optional 5 | 6 | import attr 7 | from marshmallow_annotations.ext.attrs import AttrsSchema 8 | 9 | 10 | @attr.s(auto_attribs=True, kw_only=True) 11 | class DashboardQuery: 12 | name: Optional[str] = attr.ib(default=None) 13 | url: Optional[str] = attr.ib(default=None) 14 | query_text: Optional[str] = attr.ib(default=None) 15 | 16 | 17 | class DashboardQuerySchema(AttrsSchema): 18 | class Meta: 19 | target = DashboardQuery 20 | register_as_scheme = True 21 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/table/detail_get.yml: -------------------------------------------------------------------------------- 1 | Gets table descriptions 2 | --- 3 | tags: 4 | - 'table' 5 | parameters: 6 | - name: table_uri 7 | in: path 8 | type: string 9 | schema: 10 | type: string 11 | required: true 12 | example: 'dynamo://gold.test_schema/test_table2' 13 | responses: 14 | 200: 15 | description: 'Table details' 16 | content: 17 | application/json: 18 | schema: 19 | $ref: '#/components/schemas/TableDetail' 20 | 404: 21 | description: 'Table not found' 22 | content: 23 | application/json: 24 | schema: 25 | $ref: '#/components/schemas/ErrorResponse' 26 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Codeowners file by GitHub 2 | # Reference: https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners 3 | # Each line is a file pattern followed by one or more owners. 4 | # Order is important; the last matching pattern takes the most 5 | # precedence. 6 | 7 | # These owners will be the default owners for everything in 8 | # the repo. Unless a later match takes precedence, 9 | # @amundsen-io/amundsen-committerswill be requested for 10 | # review when someone opens a pull request. 11 | * @amundsen-io/amundsen-committers 12 | 13 | *.py @feng-tao @jinhyukchang @allisonsuarez @dikshathakur3119 @verdan @bolkedebruin @mgorsk1 14 | -------------------------------------------------------------------------------- /.github/workflows/license.yml: -------------------------------------------------------------------------------- 1 | name: license 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Golang 18 | uses: actions/setup-go@v2 19 | - name: Install addlicense 20 | run: | 21 | export PATH=${PATH}:`go env GOPATH`/bin 22 | go get -v -u github.com/google/addlicense 23 | - name: Check license 24 | run: | 25 | export PATH=${PATH}:`go env GOPATH`/bin 26 | addlicense -check -l mit -c "Amundsen" $(find $PWD -type f -name '*.py') -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/dashboard/detail_get.yml: -------------------------------------------------------------------------------- 1 | Gets dashboard descriptions 2 | --- 3 | tags: 4 | - 'dashboard' 5 | parameters: 6 | - name: dashboard_uri 7 | in: path 8 | type: string 9 | schema: 10 | type: string 11 | required: true 12 | example: 'mode_dashboard://gold.abcdefg/1234567' 13 | responses: 14 | 200: 15 | description: 'Dashboard details' 16 | content: 17 | application/json: 18 | schema: 19 | $ref: '#/components/schemas/DashboardDetail' 20 | 404: 21 | description: 'Dashboard not found' 22 | content: 23 | application/json: 24 | schema: 25 | $ref: '#/components/schemas/ErrorResponse' 26 | -------------------------------------------------------------------------------- /tests/unit/proxy/roundtrip/test_janus_graph_proxy.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from typing import Any, Mapping 5 | import unittest 6 | 7 | from .abstract_gremlin_proxy_tests import abstract_gremlin_proxy_test_class 8 | from .roundtrip_janusgraph_proxy import RoundtripJanusGraphProxy 9 | 10 | 11 | class JanusGraphGremlinProxyTest( 12 | abstract_gremlin_proxy_test_class(), unittest.TestCase): # type: ignore 13 | def _create_gremlin_proxy(self, config: Mapping[str, Any]) -> RoundtripJanusGraphProxy: 14 | # Don't use PROXY_HOST, PROXY_PORT, PROXY_PASSWORD. They might not be JanusGraph 15 | return RoundtripJanusGraphProxy(host=config['JANUS_GRAPH_URL']) 16 | -------------------------------------------------------------------------------- /tests/unit/test_basics.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import unittest 5 | 6 | from flask import current_app 7 | 8 | from metadata_service import create_app 9 | 10 | 11 | class BasicTestCase(unittest.TestCase): 12 | """ 13 | Test the service if it can standup 14 | """ 15 | 16 | def setUp(self) -> None: 17 | self.app = create_app( 18 | config_module_class='metadata_service.config.LocalConfig') 19 | self.app_context = self.app.app_context() 20 | self.app_context.push() 21 | 22 | def tearDown(self) -> None: 23 | self.app_context.pop() 24 | 25 | def test_app_exists(self) -> None: 26 | self.assertFalse(current_app is None) 27 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/table/owner_put.yml: -------------------------------------------------------------------------------- 1 | Updates table owner 2 | --- 3 | tags: 4 | - 'table' 5 | parameters: 6 | - name: table_uri 7 | in: path 8 | type: string 9 | schema: 10 | type: string 11 | required: true 12 | example: 'dynamo://gold.test_schema/test_table2' 13 | - name: owner 14 | in: path 15 | type: string 16 | schema: 17 | type: string 18 | required: true 19 | example: 'roald9@example.org' 20 | responses: 21 | 200: 22 | description: 'Owner updated' 23 | content: 24 | application/json: 25 | schema: 26 | $ref: '#/components/schemas/MessageResponse' 27 | 500: 28 | description: 'Owner was not added / updated successfully' 29 | content: 30 | application/json: 31 | schema: 32 | $ref: '#/components/schemas/ErrorResponse' 33 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 14 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 21 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - keep fresh 8 | # Label to use when marking an issue as stale 9 | staleLabel: stale 10 | # Comment to post when marking an issue as stale. Set to `false` to disable 11 | markComment: > 12 | This issue has been automatically marked as stale because it has not had 13 | recent activity. It will be closed if no further activity occurs. 14 | # Comment to post when closing a stale issue. Set to `false` to disable 15 | closeComment: > 16 | This issue has been automatically closed for inactivity. If you still wish to 17 | make these changes, please open a new pull request or reopen this one. 18 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/table/dashboards_using_table_get.yml: -------------------------------------------------------------------------------- 1 | Gets Dashboards that is using this table 2 | --- 3 | tags: 4 | - 'table' 5 | parameters: 6 | - name: id 7 | in: path 8 | type: string 9 | schema: 10 | type: string 11 | required: true 12 | example: 'hive://gold.test_schema/test_table2' 13 | responses: 14 | 200: 15 | description: 'List of dashboards that table is used' 16 | content: 17 | application/json: 18 | schema: 19 | type: object 20 | properties: 21 | dashboards: 22 | type: array 23 | items: 24 | $ref: '#/components/schemas/DashboardSummary' 25 | 404: 26 | description: 'Table not found' 27 | content: 28 | application/json: 29 | schema: 30 | $ref: '#/components/schemas/ErrorResponse' 31 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/table/owner_delete.yml: -------------------------------------------------------------------------------- 1 | Deletes an owner from a table 2 | --- 3 | tags: 4 | - 'table' 5 | parameters: 6 | - name: table_uri 7 | in: path 8 | type: string 9 | schema: 10 | type: string 11 | required: true 12 | example: 'dynamo://gold.test_schema/test_table2' 13 | - name: owner 14 | in: path 15 | type: string 16 | schema: 17 | type: string 18 | required: true 19 | example: 'roald9@example.org' 20 | responses: 21 | 200: 22 | description: 'Owner deleted' 23 | content: 24 | application/json: 25 | schema: 26 | $ref: '#/components/schemas/MessageResponse' 27 | 500: 28 | description: 'Owner was not deleted successfully' 29 | content: 30 | application/json: 31 | schema: 32 | $ref: '#/components/schemas/ErrorResponse' 33 | -------------------------------------------------------------------------------- /docs/proxy/atlas_proxy.md: -------------------------------------------------------------------------------- 1 | # Atlas Proxy 2 | 3 | In order to make the Atlas-Amundsen integration smooth, we've developed a python package, 4 | [amundsenatlastypes](https://github.com/dwarszawski/amundsen-atlas-types) that has all the required entity definitions along with helper functions needed to make Atlas compatible with Amundsen. 5 | 6 | Usage and Installation of `amundsenatlastypes` can be found [here](https://github.com/dwarszawski/amundsen-atlas-types/blob/master/README.md) 7 | 8 | Minimum Requirements: 9 | - amundsenatlastypes==1.1.4 10 | - pyatlasclient==1.0.4 11 | 12 | ### Configurations 13 | 14 | Once you are done with setting up required entity definitions using [amundsenatlastypes](https://github.com/dwarszawski/amundsen-atlas-types), you are all set to use Atlas with Amundsen. 15 | 16 | Other things to configure: 17 | 18 | - [Popular Tables](/docs/proxy/atlas/popular_tables.md) -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/common/description_put.yml: -------------------------------------------------------------------------------- 1 | Upserts description of a resource 2 | --- 3 | tags: 4 | - 'table' 5 | - 'dashboard' 6 | parameters: 7 | - name: id 8 | in: path 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | example: 'dynamo://gold.test_schema/test_table2' 14 | requestBody: 15 | content: 16 | application/json: 17 | schema: 18 | $ref: '#/components/schemas/Description' 19 | description: Resource description 20 | required: true 21 | responses: 22 | 200: 23 | description: 'Empty response' 24 | content: 25 | application/json: 26 | schema: 27 | $ref: '#/components/schemas/EmptyResponse' 28 | 404: 29 | description: 'Resource not found' 30 | content: 31 | application/json: 32 | schema: 33 | $ref: '#/components/schemas/ErrorResponse' 34 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/common/description_get.yml: -------------------------------------------------------------------------------- 1 | Gets the description of a resource 2 | --- 3 | tags: 4 | - 'table' 5 | - 'description' 6 | parameters: 7 | - name: id 8 | in: path 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | example: 'dynamo://gold.test_schema/test_table2' 14 | responses: 15 | 200: 16 | description: 'Resource description' 17 | content: 18 | application/json: 19 | schema: 20 | $ref: '#/components/schemas/Description' 21 | description: 'Resource description' 22 | 404: 23 | description: 'Resource not found' 24 | content: 25 | application/json: 26 | schema: 27 | $ref: '#/components/schemas/ErrorResponse' 28 | 500: 29 | description: 'Internal server error' 30 | content: 31 | application/json: 32 | schema: 33 | $ref: '#/components/schemas/ErrorResponse' 34 | -------------------------------------------------------------------------------- /public.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim as base 2 | WORKDIR /app 3 | RUN pip3 install gunicorn 4 | 5 | COPY requirements.txt /app/requirements.txt 6 | RUN pip3 install -r requirements.txt 7 | 8 | COPY . /app 9 | 10 | CMD [ "python3", "metadata_service/metadata_wsgi.py" ] 11 | 12 | FROM base as oidc-release 13 | 14 | RUN pip3 install .[oidc] 15 | RUN python3 setup.py install 16 | ENV FLASK_APP_MODULE_NAME flaskoidc 17 | ENV FLASK_APP_CLASS_NAME FlaskOIDC 18 | ENV FLASK_OIDC_WHITELISTED_ENDPOINTS status,healthcheck,health 19 | ENV SQLALCHEMY_DATABASE_URI sqlite:///sessions.db 20 | 21 | # You will need to set these environment variables in order to use the oidc image 22 | # FLASK_OIDC_CLIENT_SECRETS - a path to a client_secrets.json file 23 | # FLASK_OIDC_SECRET_KEY - A secret key from your oidc provider 24 | # You will also need to mount a volume for the clients_secrets.json file. 25 | 26 | FROM base as release 27 | RUN python3 setup.py install 28 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/user/read_get.yml: -------------------------------------------------------------------------------- 1 | Get tables a user read 2 | --- 3 | tags: 4 | - 'user' 5 | parameters: 6 | - name: user_id 7 | in: path 8 | example: 'roald9@example.org' 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | responses: 14 | 200: 15 | description: 'List of tables the user has read' 16 | content: 17 | application/json: 18 | schema: 19 | type: object 20 | properties: 21 | table: 22 | type: array 23 | items: 24 | $ref: '#/components/schemas/PopularTables' 25 | 404: 26 | description: 'User not found' 27 | content: 28 | application/json: 29 | schema: 30 | $ref: '#/components/schemas/ErrorResponse' 31 | 500: 32 | description: 'Internal server error' 33 | content: 34 | application/json: 35 | schema: 36 | $ref: '#/components/schemas/ErrorResponse' 37 | -------------------------------------------------------------------------------- /metadata_service/api/system.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | from typing import Iterable, Union, Mapping 6 | 7 | from flask_restful import Resource 8 | from flasgger import swag_from 9 | 10 | from metadata_service.proxy import get_proxy_client 11 | 12 | 13 | class Neo4jDetailAPI(Resource): 14 | """ 15 | API to fetch system information for neo4j 16 | """ 17 | 18 | def __init__(self) -> None: 19 | self.client = get_proxy_client() 20 | 21 | @swag_from('swagger_doc/neo4j/detail_get.yml') 22 | def get(self) -> Iterable[Union[Mapping, int, None]]: 23 | last_updated_ts = self.client.get_latest_updated_ts() 24 | if last_updated_ts is not None: 25 | return {'neo4j_latest_timestamp': int(last_updated_ts)}, HTTPStatus.OK 26 | else: 27 | return {'message': 'neo4j / es hasnt been updated / indexed.'}, HTTPStatus.NO_CONTENT 28 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | format = pylint 3 | exclude = .svc,CVS,.bzr,.hg,.git,__pycache__,venv 4 | max-complexity = 10 5 | max-line-length = 120 6 | ignore = I201,W503,E999 7 | 8 | [pep8] 9 | max-line-length = 120 10 | 11 | [tool:pytest] 12 | addopts = --cov=metadata_service --cov-fail-under=65 --cov-report=term-missing:skip-covered --cov-report=xml --cov-report=html -vvv 13 | 14 | [coverage:run] 15 | branch = True 16 | # These are only tested roundtrip 17 | omit = 18 | metadata_service/proxy/gremlin_proxy.py 19 | metadata_service/proxy/janus_graph_proxy.py 20 | metadata_service/proxy/neptune_proxy.py 21 | 22 | [coverage:xml] 23 | output = build/coverage.xml 24 | 25 | [coverage:html] 26 | directory = build/coverage_html 27 | 28 | [coverage:report] 29 | exclude_lines = 30 | pragma: no cover 31 | from * 32 | import * 33 | 34 | [mypy] 35 | python_version = 3.6 36 | disallow_untyped_defs = True 37 | ignore_missing_imports = True 38 | strict_optional = True 39 | warn_no_return = True 40 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/badge/badge_delete.yml: -------------------------------------------------------------------------------- 1 | Delete badges of a resource 2 | --- 3 | tags: 4 | - 'table' 5 | - 'dashboard' 6 | parameters: 7 | - name: id 8 | in: path 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | example: 'hive://gold.test_schema/test_table1' 14 | - name: badge_name 15 | in: path 16 | type: string 17 | schema: 18 | type: string 19 | required: true 20 | example: 'beta' 21 | - name: category 22 | in: query 23 | type: string 24 | schema: 25 | type: string 26 | required: true 27 | example: 'table_status' 28 | responses: 29 | 200: 30 | description: 'The badge was deleted successfully' 31 | content: 32 | application/json: 33 | schema: 34 | $ref: '#/components/schemas/MessageResponse' 35 | 404: 36 | description: 'Table or badge not found' 37 | content: 38 | application/json: 39 | schema: 40 | $ref: '#/components/schemas/ErrorResponse' 41 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/badge/badge_put.yml: -------------------------------------------------------------------------------- 1 | Add badge to a resource 2 | --- 3 | tags: 4 | - 'table' 5 | - 'dashboard' 6 | parameters: 7 | - name: id 8 | in: path 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | example: 'hive://gold.test_schema/test_table1' 14 | - name: badge 15 | in: path 16 | type: string 17 | schema: 18 | type: string 19 | required: true 20 | example: 'beta' 21 | - name: category 22 | in: query 23 | type: string 24 | schema: 25 | type: string 26 | required: true 27 | example: 'table_status' 28 | responses: 29 | 200: 30 | description: 'The badge was added successfully' 31 | content: 32 | application/json: 33 | schema: 34 | $ref: '#/components/schemas/MessageResponse' 35 | 404: 36 | description: 'Table not found, or badge is not whitelisted' 37 | content: 38 | application/json: 39 | schema: 40 | $ref: '#/components/schemas/ErrorResponse' 41 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/tag/tag_delete.yml: -------------------------------------------------------------------------------- 1 | Delete tags of a resource 2 | --- 3 | tags: 4 | - 'table' 5 | - 'dashboard' 6 | parameters: 7 | - name: id 8 | in: path 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | example: 'hive://gold.test_schema/test_table1' 14 | - name: tag 15 | in: path 16 | type: string 17 | schema: 18 | type: string 19 | required: true 20 | example: 'tagw' 21 | - name: tag_type 22 | in: query 23 | type: string 24 | schema: 25 | type: string 26 | required: false 27 | default: 'default' 28 | example: 'badge' 29 | responses: 30 | 200: 31 | description: 'The tag was deleted successfully' 32 | content: 33 | application/json: 34 | schema: 35 | $ref: '#/components/schemas/MessageResponse' 36 | 404: 37 | description: 'Table or tag not found' 38 | content: 39 | application/json: 40 | schema: 41 | $ref: '#/components/schemas/ErrorResponse' 42 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/tag/tag_put.yml: -------------------------------------------------------------------------------- 1 | Adds tag on a resource 2 | --- 3 | tags: 4 | - 'table' 5 | - 'dashboard' 6 | parameters: 7 | - name: id 8 | in: path 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | example: 'hive://gold.test_schema/test_table1' 14 | - name: tag 15 | in: path 16 | type: string 17 | schema: 18 | type: string 19 | required: true 20 | example: 'tag3' 21 | - name: tag_type 22 | in: query 23 | type: string 24 | schema: 25 | type: string 26 | required: false 27 | default: 'default' 28 | example: 'badge' 29 | responses: 30 | 200: 31 | description: 'The tag was added successfully' 32 | content: 33 | application/json: 34 | schema: 35 | $ref: '#/components/schemas/MessageResponse' 36 | 404: 37 | description: 'Table not found, or tag has type badge and is not whitelisted' 38 | content: 39 | application/json: 40 | schema: 41 | $ref: '#/components/schemas/ErrorResponse' 42 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/user/own_put.yml: -------------------------------------------------------------------------------- 1 | Update the user owner information 2 | --- 3 | tags: 4 | - 'user' 5 | parameters: 6 | - name: user_id 7 | in: path 8 | example: 'roald9@example.org' 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | - name: resource_type 14 | in: path 15 | example: 'table' 16 | description: 'resource_type is ignored at the moment' 17 | type: string 18 | schema: 19 | type: string 20 | required: true 21 | - name: table_uri 22 | in: path 23 | example: 'hive://gold.test_schema/test_table1' 24 | type: string 25 | schema: 26 | type: string 27 | required: true 28 | responses: 29 | 200: 30 | description: 'User was added as owner successfully' 31 | content: 32 | application/json: 33 | schema: 34 | $ref: '#/components/schemas/MessageResponse' 35 | 500: 36 | description: 'Internal server error' 37 | content: 38 | application/json: 39 | schema: 40 | $ref: '#/components/schemas/ErrorResponse' 41 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/column/description_put.yml: -------------------------------------------------------------------------------- 1 | Updates column description (passed as a request body) 2 | --- 3 | tags: 4 | - 'column' 5 | parameters: 6 | - name: table_uri 7 | in: path 8 | type: string 9 | schema: 10 | type: string 11 | required: true 12 | example: 'dynamo://gold.test_schema/test_table2' 13 | - name: column_name 14 | in: path 15 | type: string 16 | schema: 17 | type: string 18 | required: true 19 | example: 'col2' 20 | requestBody: 21 | content: 22 | application/json: 23 | schema: 24 | $ref: '#/components/schemas/Description' 25 | description: 'Column description' 26 | required: true 27 | responses: 28 | 200: 29 | description: 'Empty json response' 30 | content: 31 | application/json: 32 | schema: 33 | $ref: '#/components/schemas/EmptyResponse' 34 | 404: 35 | description: 'Table uri with this column not found' 36 | content: 37 | application/json: 38 | schema: 39 | $ref: '#/components/schemas/ErrorResponse' 40 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/user/follow_delete.yml: -------------------------------------------------------------------------------- 1 | Delete the user following information 2 | --- 3 | tags: 4 | - 'user' 5 | parameters: 6 | - name: user_id 7 | in: path 8 | example: 'roald9@example.org' 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | - name: resource_type 14 | in: path 15 | example: 'table' 16 | description: 'resource_type is ignored at the moment' 17 | type: string 18 | schema: 19 | type: string 20 | required: true 21 | - name: resource_id 22 | in: path 23 | example: 'hive://gold.test_schema/test_table1' 24 | type: string 25 | schema: 26 | type: string 27 | required: true 28 | responses: 29 | 200: 30 | description: 'User following for table removed' 31 | content: 32 | application/json: 33 | schema: 34 | $ref: '#/components/schemas/MessageResponse' 35 | 500: 36 | description: 'Internal server error' 37 | content: 38 | application/json: 39 | schema: 40 | $ref: '#/components/schemas/ErrorResponse' 41 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/user/follow_put.yml: -------------------------------------------------------------------------------- 1 | Updates the user following information 2 | --- 3 | tags: 4 | - 'user' 5 | parameters: 6 | - name: user_id 7 | in: path 8 | example: 'roald9@example.org' 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | - name: resource_type 14 | in: path 15 | example: 'table' 16 | description: 'resource_type is ignored at the moment' 17 | type: string 18 | schema: 19 | type: string 20 | required: true 21 | - name: resource_id 22 | in: path 23 | example: 'hive://gold.test_schema/test_table1' 24 | type: string 25 | schema: 26 | type: string 27 | required: true 28 | responses: 29 | 200: 30 | description: 'Added user as follower of the table' 31 | content: 32 | application/json: 33 | schema: 34 | $ref: '#/components/schemas/MessageResponse' 35 | 500: 36 | description: 'Internal server error' 37 | content: 38 | application/json: 39 | schema: 40 | $ref: '#/components/schemas/ErrorResponse' 41 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/user/own_delete.yml: -------------------------------------------------------------------------------- 1 | Delete the user owner information 2 | --- 3 | tags: 4 | - 'user' 5 | parameters: 6 | - name: user_id 7 | in: path 8 | example: 'roald9@example.org' 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | - name: resource_type 14 | in: path 15 | example: 'table' 16 | description: 'resource_type is ignored at the moment' 17 | type: string 18 | schema: 19 | type: string 20 | required: true 21 | - name: table_uri 22 | in: path 23 | example: 'hive://gold.test_schema/test_table1' 24 | type: string 25 | schema: 26 | type: string 27 | required: true 28 | responses: 29 | 200: 30 | description: 'User following information deleted successfully' 31 | content: 32 | application/json: 33 | schema: 34 | $ref: '#/components/schemas/ErrorResponse' 35 | 500: 36 | description: 'Internal server error' 37 | content: 38 | application/json: 39 | schema: 40 | $ref: '#/components/schemas/ErrorResponse' 41 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/user/own_get.yml: -------------------------------------------------------------------------------- 1 | Delete the user owner information 2 | --- 3 | tags: 4 | - 'user' 5 | parameters: 6 | - name: user_id 7 | in: path 8 | example: 'roald9@example.org' 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | responses: 14 | 200: 15 | description: 'List of tables the user has owned' 16 | content: 17 | application/json: 18 | schema: 19 | type: object 20 | properties: 21 | table: 22 | type: array 23 | items: 24 | $ref: '#/components/schemas/PopularTables' 25 | dashboard: 26 | type: array 27 | items: 28 | $ref: '#/components/schemas/DashboardSummary' 29 | 404: 30 | description: 'User not found' 31 | content: 32 | application/json: 33 | schema: 34 | $ref: '#/components/schemas/ErrorResponse' 35 | 500: 36 | description: 'Internal server error' 37 | content: 38 | application/json: 39 | schema: 40 | $ref: '#/components/schemas/ErrorResponse' 41 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/user/follow_get.yml: -------------------------------------------------------------------------------- 1 | Gets the user following information 2 | --- 3 | tags: 4 | - 'user' 5 | parameters: 6 | - name: user_id 7 | in: path 8 | example: 'roald9@example.org' 9 | type: string 10 | schema: 11 | type: string 12 | required: true 13 | responses: 14 | 200: 15 | description: 'List of resources that user has followed' 16 | content: 17 | application/json: 18 | schema: 19 | type: object 20 | properties: 21 | table: 22 | type: array 23 | items: 24 | $ref: '#/components/schemas/PopularTables' 25 | dashboard: 26 | type: array 27 | items: 28 | $ref: '#/components/schemas/DashboardSummary' 29 | 404: 30 | description: 'User not found' 31 | content: 32 | application/json: 33 | schema: 34 | $ref: '#/components/schemas/ErrorResponse' 35 | 500: 36 | description: 'Internal server error' 37 | content: 38 | application/json: 39 | schema: 40 | $ref: '#/components/schemas/ErrorResponse' 41 | -------------------------------------------------------------------------------- /metadata_service/api/popular_tables.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | from typing import Iterable, List, Mapping, Union 6 | 7 | from amundsen_common.models.popular_table import (PopularTable, 8 | PopularTableSchema) 9 | from flasgger import swag_from 10 | from flask import request 11 | from flask_restful import Resource 12 | from metadata_service.proxy import get_proxy_client 13 | 14 | 15 | class PopularTablesAPI(Resource): 16 | """ 17 | PopularTables API 18 | """ 19 | 20 | def __init__(self) -> None: 21 | self.client = get_proxy_client() 22 | 23 | @swag_from('swagger_doc/popular_tables_get.yml') 24 | def get(self) -> Iterable[Union[Mapping, int, None]]: 25 | limit = request.args.get('limit', 10, type=int) 26 | popular_tables: List[PopularTable] = self.client.get_popular_tables(num_entries=limit) 27 | popular_tables_json: str = PopularTableSchema(many=True).dump(popular_tables).data 28 | return {'popular_tables': popular_tables_json}, HTTPStatus.OK 29 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import os 5 | 6 | from setuptools import setup, find_packages 7 | 8 | __version__ = '3.0.0' 9 | 10 | 11 | requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt') 12 | with open(requirements_path) as requirements_file: 13 | requirements = requirements_file.readlines() 14 | 15 | setup( 16 | name='amundsen-metadata', 17 | version=__version__, 18 | description='Metadata service for Amundsen', 19 | url='https://www.github.com/amundsen-io/amundsenmetadatalibrary', 20 | maintainer='Amundsen TSC', 21 | maintainer_email='amundsen-tsc@lists.lfai.foundation', 22 | packages=find_packages(exclude=['tests*']), 23 | include_package_data=True, 24 | zip_safe=False, 25 | dependency_links=[], 26 | install_requires=requirements, 27 | extras_require={ 28 | 'oidc': ['flaskoidc==0.1.1'] 29 | }, 30 | python_requires=">=3.6", 31 | classifiers=[ 32 | 'Programming Language :: Python :: 3.6', 33 | 'Programming Language :: Python :: 3.7', 34 | ], 35 | ) 36 | -------------------------------------------------------------------------------- /metadata_service/api/swagger_doc/column/description_get.yml: -------------------------------------------------------------------------------- 1 | Gets column description using table_uri and column_name 2 | --- 3 | tags: 4 | - 'column' 5 | parameters: 6 | - name: table_uri 7 | in: path 8 | type: string 9 | schema: 10 | type: string 11 | required: true 12 | example: 'dynamo://gold.test_schema/test_table2' 13 | - name: column_name 14 | in: path 15 | type: string 16 | schema: 17 | type: string 18 | required: true 19 | example: 'col2' 20 | responses: 21 | 200: 22 | description: 'Column description' 23 | content: 24 | application/json: 25 | schema: 26 | type: object 27 | properties: 28 | description: 29 | type: string 30 | example: 'Identifies a user' 31 | 404: 32 | description: 'Table uri with this column not found' 33 | content: 34 | application/json: 35 | schema: 36 | $ref: '#/components/schemas/ErrorResponse' 37 | 500: 38 | description: 'Internal error fetching the column description' 39 | content: 40 | application/json: 41 | schema: 42 | $ref: '#/components/schemas/ErrorResponse' 43 | -------------------------------------------------------------------------------- /.github/workflows/pull_request.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | on: pull_request 5 | jobs: 6 | pre-commit: 7 | runs-on: ubuntu-18.04 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v1 11 | - name: Setup python 3.6 12 | uses: actions/setup-python@v1 13 | with: 14 | python-version: 3.6 15 | test-unit: 16 | runs-on: ubuntu-18.04 17 | strategy: 18 | matrix: 19 | python-version: ['3.6.x', '3.7.x'] 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v1 23 | - name: Setup python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v1 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: pip3 install -r requirements.txt && pip3 install .[all] && pip3 install codecov 29 | - name: Run python unit tests 30 | run: make test 31 | - name: Codecov 32 | uses: codecov/codecov-action@v1 33 | -------------------------------------------------------------------------------- /docs/proxy/neptune.md: -------------------------------------------------------------------------------- 1 | # Neptune 2 | 3 | ## Documentation 4 | 5 | In particular, see [Gremlin differences](https://docs.aws.amazon.com/neptune/latest/userguide/access-graph-gremlin-differences.html), 6 | and [Gremlin sessions](https://docs.aws.amazon.com/neptune/latest/userguide/access-graph-gremlin-sessions.html). 7 | 8 | And any time you see docs from Kelvin (like the PRACTICAL GREMLIN book or lots of stackoverflow) pay 9 | attention, he works for AWS on Neptune. 10 | 11 | ## IAM authentication 12 | 13 | The gremlin transport is usually websockets, and the requests-aws4auth library we use elsewhere is 14 | for requests, which does not support websockets at all. So we rolled our in `aws4authwebsocket`. 15 | The saving grace of websockets and IAM is that the IAM authentication really only applies to the 16 | initialization request and the rest of the data flows over the existing TCP connection. The usual 17 | gremlin-python transport is Tornado, which was a huge pain to try and insinuate the aws4 18 | autentication in to, so we use the websockets-client library instead. 19 | 20 | ## How to get a gremlin console for AWS 21 | 22 | They have pretty decent recipe [here](https://docs.aws.amazon.com/neptune/latest/userguide/iam-auth-connecting-gremlin-java.html) 23 | -------------------------------------------------------------------------------- /tests/unit/api/test_redshit_disable_comment_edit.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import json 5 | import unittest 6 | from http import HTTPStatus 7 | from unittest.mock import patch 8 | from tests.unit.test_basics import BasicTestCase 9 | 10 | 11 | class RedshiftCommentEditDisableTest(BasicTestCase): 12 | 13 | def test_table_comment_edit(self) -> None: 14 | with patch('metadata_service.api.table.get_proxy_client'): 15 | table_uri = 'hive://gold.test_schema/test_table' 16 | url = '/table/' + table_uri + '/description' 17 | response = self.app.test_client().put(url, data=json.dumps({'description': 'test table'})) 18 | self.assertEqual(response.status_code, HTTPStatus.OK) 19 | 20 | def test_column_comment_edit(self) -> None: 21 | with patch('metadata_service.api.column.get_proxy_client'): 22 | table_uri = 'hive://gold.test_schema/test_table' 23 | column_name = 'foo' 24 | url = '/table/' + table_uri + '/column/' + column_name + '/description' 25 | response = self.app.test_client().put(url, data=json.dumps({'description': 'test column'})) 26 | self.assertEqual(response.status_code, HTTPStatus.OK) 27 | 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /.github/workflows/pypipublish.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Build and Deploy 3 | on: 4 | push: 5 | branches: 6 | - master 7 | tags: 8 | - '*' 9 | jobs: 10 | build-and-publish-python-module: 11 | name: Build and publish python module to pypi 12 | runs-on: ubuntu-18.04 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v1 16 | - name: Setup python 3.6 17 | uses: actions/setup-python@v1 18 | with: 19 | python-version: 3.6 20 | - name: Add wheel dependency 21 | run: pip install wheel 22 | - name: Generate dist 23 | run: python setup.py sdist bdist_wheel 24 | - name: Publish to PyPI 25 | if: startsWith(github.event.ref, 'refs/tags') 26 | uses: pypa/gh-action-pypi-publish@master 27 | with: 28 | user: __token__ 29 | password: ${{ secrets.pypi_password }} 30 | build-and-publish-docker-image: 31 | name: Build and publish docker image 32 | runs-on: ubuntu-18.04 33 | steps: 34 | - name: Checkout 35 | uses: actions/checkout@v1 36 | - name: Login with docker 37 | run: docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} 38 | - name: Publish to Registry for latest 39 | if: success() 40 | run: make build-push-image-latest 41 | - name: Publish to Registry for version 42 | if: startsWith(github.event.ref, 'refs/tags') 43 | run: make build-push-image-version 44 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | IMAGE := amundsendev/amundsen-metadata 2 | OIDC_IMAGE := ${IMAGE}-oidc 3 | VERSION:= $(shell grep -m 1 '__version__' setup.py | cut -d '=' -f 2 | tr -d "'" | tr -d '[:space:]') 4 | 5 | .PHONY: clean 6 | clean: 7 | find . -name \*.pyc -delete 8 | find . -name __pycache__ -delete 9 | rm -rf dist/ 10 | 11 | .PHONY: test_unit 12 | test_unit: 13 | python3 -b -m pytest tests 14 | 15 | lint: 16 | python3 -m flake8 17 | 18 | .PHONY: mypy 19 | mypy: 20 | mypy --ignore-missing-imports --follow-imports=skip --strict-optional --warn-no-return . 21 | 22 | .PHONY: test 23 | test: test_unit lint mypy 24 | 25 | .PHONY: image 26 | image: 27 | docker build -f public.Dockerfile -t ${IMAGE}:latest . 28 | 29 | .PHONY: image-version 30 | image-version: 31 | docker build -f public.Dockerfile -t ${IMAGE}:${VERSION} . 32 | 33 | .PHONY: push-image-version 34 | push-image-version: 35 | docker push ${IMAGE}:${VERSION} 36 | 37 | .PHONY: push-image 38 | push-image: 39 | docker push ${IMAGE}:latest 40 | 41 | .PHONY: oidc-image 42 | oidc-image: 43 | docker build -f public.Dockerfile --target=oidc-release -t ${OIDC_IMAGE}:${VERSION} . 44 | docker tag ${OIDC_IMAGE}:${VERSION} ${OIDC_IMAGE}:latest 45 | 46 | .PHONY: push-odic-image 47 | push-oidc-image: 48 | docker push ${OIDC_IMAGE}:${VERSION} 49 | docker push ${OIDC_IMAGE}:latest 50 | 51 | 52 | 53 | .PHONY: build-push-image 54 | build-push-image-latest: image oidc-image push-image push-oidc-image 55 | build-push-image-version: image-version push-image-version 56 | -------------------------------------------------------------------------------- /tests/unit/proxy/roundtrip/roundtrip_base_proxy.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from metadata_service.proxy import BaseProxy 5 | from abc import abstractmethod 6 | from amundsen_common.models.table import Table, Application, Column, ProgrammaticDescription 7 | from amundsen_common.models.user import User 8 | from typing import List 9 | 10 | 11 | class RoundtripBaseProxy(BaseProxy): 12 | """ 13 | A base proxy that supports roundtrip tests 14 | """ 15 | @abstractmethod 16 | def put_user(self, *, data: User) -> None: 17 | pass 18 | 19 | @abstractmethod 20 | def post_users(self, *, data: List[User]) -> None: 21 | pass 22 | 23 | @abstractmethod 24 | def put_app(self, *, data: Application) -> None: 25 | pass 26 | 27 | @abstractmethod 28 | def post_apps(self, *, data: List[Application]) -> None: 29 | pass 30 | 31 | @abstractmethod 32 | def put_table(self, *, table: Table) -> None: 33 | pass 34 | 35 | @abstractmethod 36 | def post_tables(self, *, tables: List[Table]) -> None: 37 | pass 38 | 39 | @abstractmethod 40 | def put_column(self, *, table_uri: str, column: Column) -> None: 41 | pass 42 | 43 | @abstractmethod 44 | def put_programmatic_table_description(self, *, table_uri: str, description: ProgrammaticDescription) -> None: 45 | pass 46 | 47 | @abstractmethod 48 | def add_read_count(self, *, table_uri: str, user_id: str, read_count: int) -> None: 49 | pass 50 | -------------------------------------------------------------------------------- /tests/unit/api/table/test_table_owner_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | 6 | from tests.unit.api.table.table_test_case import TableTestCase 7 | 8 | TABLE_URI = 'wizards' 9 | OWNER = 'harry' 10 | 11 | 12 | class TestTableOwnerAPI(TableTestCase): 13 | 14 | def test_should_update_table_owner(self) -> None: 15 | response = self.app.test_client().put(f'/table/{TABLE_URI}/owner/{OWNER}') 16 | 17 | self.assertEqual(response.status_code, HTTPStatus.OK) 18 | self.mock_proxy.add_owner.assert_called_with(table_uri=TABLE_URI, owner=OWNER) 19 | 20 | def test_should_fail_when_owner_update_fails(self) -> None: 21 | self.mock_proxy.add_owner.side_effect = RuntimeError() 22 | 23 | response = self.app.test_client().put(f'/table/{TABLE_URI}/owner/{OWNER}') 24 | 25 | self.assertEqual(response.status_code, HTTPStatus.INTERNAL_SERVER_ERROR) 26 | 27 | def test_should_delete_table_owner(self) -> None: 28 | response = self.app.test_client().delete(f'/table/{TABLE_URI}/owner/{OWNER}') 29 | 30 | self.assertEqual(response.status_code, HTTPStatus.OK) 31 | self.mock_proxy.delete_owner.assert_called_with(table_uri=TABLE_URI, owner=OWNER) 32 | 33 | def test_should_fail_when_delete_owner_fails(self) -> None: 34 | self.mock_proxy.delete_owner.side_effect = RuntimeError() 35 | 36 | response = self.app.test_client().delete(f'/table/{TABLE_URI}/owner/{OWNER}') 37 | 38 | self.assertEqual(response.status_code, HTTPStatus.INTERNAL_SERVER_ERROR) 39 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import pytest 5 | 6 | from _pytest.config import Config, Parser 7 | from _pytest.nodes import Item 8 | from typing import List 9 | 10 | # This file configures the roundtrip pytest option and skips roundtrip tests without it 11 | 12 | 13 | def pytest_addoption(parser: Parser) -> None: 14 | parser.addoption( 15 | "--roundtrip-neptune", action="store_true", default=False, help="Run roundtrip tests. These tests are slow and require \ 16 | a configured neptune instance." 17 | ) 18 | parser.addoption( 19 | "--roundtrip-janusgraph", action="store_true", default=False, help="Run roundtrip tests. These tests are slow and require \ 20 | a configured janusgraph instance." 21 | ) 22 | 23 | 24 | def pytest_configure(config: Config) -> None: 25 | config.addinivalue_line("markers", "roundtrip: mark test as roundtrip") 26 | 27 | 28 | def pytest_collection_modifyitems(config: Config, items: List[Item]) -> None: 29 | roundtrip_neptune: bool = config.getoption("--roundtrip-neptune") 30 | roundtrip_janusgraph: bool = config.getoption("--roundtrip-janusgraph") 31 | skip_roundtrip = pytest.mark.skip(reason="need the approprirate --roundtrip-[neptune|janus] option to run") 32 | for item in items: 33 | if "NeptuneGremlinProxyTest" in item.keywords and not roundtrip_neptune: 34 | item.add_marker(skip_roundtrip) 35 | if "JanusGraphGremlinProxyTest" in item.keywords and not roundtrip_janusgraph: 36 | item.add_marker(skip_roundtrip) 37 | -------------------------------------------------------------------------------- /docs/proxy/gremlin.md: -------------------------------------------------------------------------------- 1 | # Gremlin Proxy 2 | 3 | ## What the heck is Gremlin? Why is it named Gremlin? 4 | 5 | [Gremin](https://tinkerpop.apache.org/gremlin.html) is the graph traversal language of 6 | [Apache TinkerPop](https://tinkerpop.apache.org/). Why not Gremlin? 7 | 8 | ## Documentation 9 | 10 | The docs linked from [Gremin](https://tinkerpop.apache.org/gremlin.html) are a good start. For 11 | example, the [Getting Started](http://tinkerpop.apache.org/docs/current/tutorials/getting-started/) 12 | and the [PRACTICAL GREMLIN book](http://kelvinlawrence.net/book/Gremlin-Graph-Guide.html) 13 | 14 | ## How to target a new Gremlin backend 15 | 16 | This is not an exhaustive list, but some issues we've found along the way: 17 | - Are there restricted property names? For example JanusGraph does not allow a property named 18 | `key`, so the base Gremlin proxy has a property named `key_property_name` which is set to `_key` 19 | for JanusGraph but `key` for others. 20 | - Is there database management required? For example AWS Neptune does now allow explicit creation 21 | of indexes, nor assigning data types to properties, but JanusGraph does and practically requires 22 | the creation of indexes. 23 | - Are there restrictions on the methods? For example, JanusGraph accepts any of the Java or Groovy 24 | names, but Neptune accepts a strict subset. JanusGraph can install any script engine, e.g. to 25 | allow Python lambdas but Neptune only allows Groovy lambdas. 26 | 27 | Other differences between Janusgraph and Neptune can be found here: 28 | https://docs.aws.amazon.com/neptune/latest/userguide/access-graph-gremlin-differences.html 29 | -------------------------------------------------------------------------------- /metadata_service/proxy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from threading import Lock 5 | 6 | from flask import current_app 7 | from werkzeug.utils import import_string 8 | 9 | from metadata_service import config 10 | from metadata_service.proxy.base_proxy import BaseProxy 11 | 12 | _proxy_client = None 13 | _proxy_client_lock = Lock() 14 | 15 | 16 | def get_proxy_client() -> BaseProxy: 17 | """ 18 | Provides singleton proxy client based on the config 19 | :return: Proxy instance of any subclass of BaseProxy 20 | """ 21 | global _proxy_client 22 | 23 | if _proxy_client: 24 | return _proxy_client 25 | 26 | with _proxy_client_lock: 27 | if _proxy_client: 28 | return _proxy_client 29 | else: 30 | # Gather all the configuration to create a Proxy Client 31 | host = current_app.config[config.PROXY_HOST] 32 | port = current_app.config[config.PROXY_PORT] 33 | user = current_app.config[config.PROXY_USER] 34 | password = current_app.config[config.PROXY_PASSWORD] 35 | encrypted = current_app.config[config.PROXY_ENCRYPTED] 36 | validate_ssl = current_app.config[config.PROXY_VALIDATE_SSL] 37 | 38 | client = import_string(current_app.config[config.PROXY_CLIENT]) 39 | _proxy_client = client(host=host, 40 | port=port, 41 | user=user, 42 | password=password, 43 | encrypted=encrypted, 44 | validate_ssl=validate_ssl) 45 | 46 | return _proxy_client 47 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | ### Summary of Changes 18 | 19 | _Include a summary of changes then remove this line_ 20 | 21 | ### Tests 22 | 23 | _What tests did you add or modify and why? If no tests were added or modified, explain why. Remove this line_ 24 | 25 | ### Documentation 26 | 27 | _What documentation did you add or modify and why? Add any relevant links then remove this line_ 28 | 29 | ### CheckList 30 | 31 | Make sure you have checked **all** steps below to ensure a timely review. 32 | 33 | - [ ] PR title addresses the issue accurately and concisely. Example: "Updates the version of Flask to v1.0.2" 34 | - In case you are adding a dependency, check if the license complies with the [ASF 3rd Party License Policy](https://www.apache.org/legal/resolved.html#category-x). 35 | - [ ] PR includes a summary of changes. 36 | - [ ] PR adds unit tests, updates existing unit tests, **OR** documents why no test additions or modifications are needed. 37 | - [ ] In case of new functionality, my PR adds documentation that describes how to use it. 38 | - All the public functions and the classes in the PR contain docstrings that explain what it does 39 | - [ ] PR passes `make test` 40 | -------------------------------------------------------------------------------- /metadata_service/api/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import logging 5 | from http import HTTPStatus 6 | from typing import Iterable, Union, Mapping, Any, Optional, List 7 | 8 | from flask_restful import Resource 9 | 10 | from metadata_service.proxy import BaseProxy 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | class BaseAPI(Resource): 16 | def __init__(self, schema: Any, str_type: str, client: BaseProxy) -> None: 17 | self.schema = schema 18 | self.client = client 19 | self.str_type = str_type 20 | self.allow_empty_upload = False 21 | 22 | def get(self, *, id: Optional[str] = None) -> Iterable[Union[Mapping, int, None]]: 23 | """ 24 | Gets a single or multiple objects 25 | """ 26 | return self.get_with_kwargs(id=id) 27 | 28 | def get_with_kwargs(self, *, id: Optional[str] = None, **kwargs: Optional[Any]) \ 29 | -> Iterable[Union[Mapping, int, None]]: 30 | if id is not None: 31 | get_object = getattr(self.client, f'get_{self.str_type}') 32 | try: 33 | actual_id: Union[str, int] = int(id) if id.isdigit() else id 34 | object = get_object(id=actual_id, **kwargs) 35 | if object is not None: 36 | return self.schema().dump(object).data, HTTPStatus.OK 37 | return None, HTTPStatus.NOT_FOUND 38 | except ValueError as e: 39 | return {'message': f'exception:{e}'}, HTTPStatus.BAD_REQUEST 40 | else: 41 | get_objects = getattr(self.client, f'get_{self.str_type}s') 42 | objects: List[Any] = get_objects() 43 | return self.schema(many=True).dump(objects).data, HTTPStatus.OK 44 | -------------------------------------------------------------------------------- /metadata_service/entity/dashboard_detail.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from typing import List 5 | from typing import Optional 6 | 7 | import attr 8 | from amundsen_common.models.popular_table import PopularTable 9 | from amundsen_common.models.table import Tag 10 | from amundsen_common.models.table import Badge 11 | from amundsen_common.models.user import User 12 | from marshmallow_annotations.ext.attrs import AttrsSchema 13 | 14 | from metadata_service.entity.dashboard_query import DashboardQuery 15 | 16 | 17 | @attr.s(auto_attribs=True, kw_only=True) 18 | class DashboardDetail: 19 | uri: str = attr.ib() 20 | cluster: str = attr.ib() 21 | group_name: str = attr.ib() 22 | group_url: str = attr.ib() 23 | product: str = attr.ib() 24 | name: str = attr.ib() 25 | url: str = attr.ib() 26 | description: Optional[str] = attr.ib() 27 | created_timestamp: Optional[int] = attr.ib() 28 | updated_timestamp: Optional[int] = attr.ib() 29 | last_successful_run_timestamp: Optional[int] = attr.ib() 30 | last_run_timestamp: Optional[int] = attr.ib() 31 | last_run_state: Optional[str] = attr.ib() 32 | owners: List[User] = attr.ib(factory=list) 33 | frequent_users: List[User] = attr.ib(factory=list) 34 | chart_names: List[str] = attr.ib(factory=list) 35 | query_names: List[str] = attr.ib(factory=list) # DEPRECATED 36 | queries: List[DashboardQuery] = attr.ib(factory=list) 37 | tables: List[PopularTable] = attr.ib(factory=list) 38 | tags: List[Tag] = attr.ib(factory=list) 39 | badges: List[Badge] = attr.ib(factory=list) 40 | recent_view_count: Optional[int] = attr.ib(default=0) 41 | 42 | 43 | class DashboardSchema(AttrsSchema): 44 | class Meta: 45 | target = DashboardDetail 46 | register_as_scheme = True 47 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # The modular source code checker: pep8, pyflakes and co 2 | # License: MIT 3 | # Upstream url: http://bitbucket.org/tarek/flake8 4 | flake8==3.5.0 5 | 6 | # A flake8 plugin that helps you write tidier imports. 7 | # License: ISCL 8 | # Upstream url: https://pypi.python.org/pypi/flake8-tidy-imports 9 | flake8-tidy-imports==1.1.0 10 | 11 | # A mature full-featured Python testing tool. 12 | # License: MIT 13 | # Upstream url: http://pytest.org/ 14 | pytest==3.5.1 15 | 16 | # Pytest plugin for measuring coverage. 17 | # License: MIT 18 | # Upstream url: https://github.com/pytest-dev/pytest-cov 19 | pytest-cov==2.5.1 20 | 21 | # Mypy is an optional static type checker for Python. 22 | # License: MIT 23 | # Upstream url: https://github.com/python/mypy 24 | mypy==0.782 25 | 26 | # Thin-wrapper around the mock package for easier use with py.test. 27 | # License: MIT 28 | # Upstream url: https://pypi.python.org/pypi/pytest-mock 29 | pytest-mock==1.1 30 | 31 | # PEP 484 32 | # License: PSF 33 | # Upstream url: https://github.com/python/typing 34 | typing-extensions==3.7.4 35 | 36 | # A common package that holds the models deifnition and schemas that are used 37 | # accross different amundsen repositories. 38 | amundsen-common>=0.5.3 39 | amundsen-gremlin>=0.0.4 40 | 41 | boto3==1.12.12 42 | flasgger==0.9.3 43 | Flask-RESTful==0.3.6 44 | Flask==1.0.2 45 | flask-cors==3.0.8 46 | gremlinpython==3.4.3 47 | aniso8601==3.0.0 48 | attrs==19.1.0 49 | click==6.7 50 | gremlinpython==3.4.3 51 | itsdangerous==0.24 52 | Jinja2>=2.10.1 53 | jsonschema==2.6.0 54 | marshmallow>=2.15.3,<3.0 55 | marshmallow-annotations>=2.4.0,<3.0 56 | MarkupSafe==1.1 57 | pytz==2018.4 58 | Werkzeug==0.15.5 59 | wheel==0.33.1 60 | neo4j==1.7.6 61 | neotime==1.7.1 62 | pytz==2018.4 63 | requests-aws4auth==0.9 64 | statsd==3.2.1 65 | pyatlasclient==1.0.5 66 | beaker>=1.10.0 67 | overrides==2.5 68 | typed-ast==1.4.1 69 | -------------------------------------------------------------------------------- /tests/unit/proxy/roundtrip/roundtrip_neptune_proxy.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0import json 3 | 4 | 5 | import logging 6 | from typing import List 7 | from amundsen_common.models.table import Table, Application 8 | from amundsen_common.models.user import User 9 | from amundsen_gremlin.neptune_bulk_loader.gremlin_model_converter import ( 10 | GetGraph 11 | ) 12 | from overrides import overrides 13 | from .roundtrip_gremlin_proxy import RoundtripGremlinProxy 14 | from metadata_service.proxy.neptune_proxy import NeptuneGremlinProxy 15 | 16 | LOGGER = logging.getLogger(__name__) 17 | 18 | 19 | class RoundtripNeptuneGremlinProxy(NeptuneGremlinProxy, RoundtripGremlinProxy): 20 | @overrides 21 | def post_users(self, *, data: List[User]) -> None: 22 | entities = GetGraph.user_entities(user_data=data, g=self.neptune_graph_traversal_source_factory()) 23 | self.neptune_bulk_loader_api.bulk_load_entities(entities=entities) 24 | 25 | @overrides 26 | def put_user(self, *, data: User) -> None: 27 | self.post_users(data=[data]) 28 | 29 | @overrides 30 | def put_app(self, *, data: Application) -> None: 31 | self.post_apps(data=[data]) 32 | 33 | @overrides 34 | def post_apps(self, *, data: List[Application]) -> None: 35 | entities = GetGraph.app_entities(app_data=data, g=self.neptune_graph_traversal_source_factory()) 36 | self.neptune_bulk_loader_api.bulk_load_entities(entities=entities) 37 | 38 | @overrides 39 | def put_table(self, *, table: Table) -> None: 40 | self.post_tables(tables=[table]) 41 | 42 | @overrides 43 | def post_tables(self, *, tables: List[Table]) -> None: 44 | entities = GetGraph.table_entities(table_data=tables, g=self.neptune_graph_traversal_source_factory()) 45 | self.neptune_bulk_loader_api.bulk_load_entities(entities=entities) 46 | -------------------------------------------------------------------------------- /docs/proxy/atlas/popular_tables.md: -------------------------------------------------------------------------------- 1 | # Popular Tables Configurations 2 | 3 | The required entity definitions for Atlas can be applied using [amundsenatlastypes](https://github.com/dwarszawski/amundsen-atlas-types/blob/master/README.md#kickstart-apache-atlas). 4 | 5 | 6 | Popular Tables 7 | -------------- 8 | Amundsen has a concept of popular tables, which is a default entry point of the application for now. 9 | Popular Tables API leverages `popularityScore` attribute of `Table` super type to enable custom sorting strategy. 10 | 11 | The suggested formula to generate the popularity score is provided below and should be applied by the external script or batch/stream process to update Atlas entities accordingly. 12 | ``` 13 | Popularity score = number of distinct readers * log(total number of reads) 14 | ``` 15 | 16 | `Table` entity definition with `popularityScore` attribute [amundsenatlastypes==1.0.2](https://github.com/dwarszawski/amundsen-atlas-types/blob/master/amundsenatlastypes/schema/01_2_table_schema.json). 17 | 18 | ```json 19 | { 20 | "entityDefs": [ 21 | { 22 | "name": "Table", 23 | "superTypes": [ 24 | "DataSet" 25 | ], 26 | "attributeDefs": [ 27 | { 28 | "name": "popularityScore", 29 | "typeName": "float", 30 | "isOptional": true, 31 | "cardinality": "SINGLE", 32 | "isUnique": false, 33 | "isIndexable": false, 34 | "defaultValue": "0.0" 35 | }, 36 | { 37 | "name": "readers", 38 | "typeName": "array", 39 | "isOptional": true, 40 | "cardinality": "LIST", 41 | "isUnique": false, 42 | "isIndexable": true, 43 | "includeInNotification": false 44 | } 45 | ] 46 | } 47 | ] 48 | } 49 | ``` -------------------------------------------------------------------------------- /tests/unit/api/test_popular_tables_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | 6 | from unittest.mock import patch, Mock 7 | 8 | from tests.unit.test_basics import BasicTestCase 9 | 10 | API_RESPONSE = [{'database': 'ministry', 11 | 'cluster': 'postgres', 12 | 'schema': 'ministry', 13 | 'name': 'wizards', 14 | 'description': 'all wizards'}] 15 | 16 | CLIENT_RESPONSE = [{'database': 'ministry', 17 | 'cluster': 'postgres', 18 | 'schema': 'ministry', 19 | 'name': 'wizards', 20 | 'description': 'all wizards'}] 21 | 22 | 23 | class TestPopularTablesAPI(BasicTestCase): 24 | def setUp(self) -> None: 25 | super().setUp() 26 | 27 | self.mock_client = patch('metadata_service.api.popular_tables.get_proxy_client') 28 | self.mock_proxy = self.mock_client.start().return_value = Mock() 29 | 30 | def tearDown(self) -> None: 31 | super().tearDown() 32 | 33 | self.mock_client.stop() 34 | 35 | def test_should_get_popular_tables_with_default_limits(self) -> None: 36 | self.mock_proxy.get_popular_tables.return_value = CLIENT_RESPONSE 37 | 38 | response = self.app.test_client().get('popular_tables/') 39 | 40 | self.assertEqual(response.json, {'popular_tables': API_RESPONSE}) 41 | self.assertEqual(response.status_code, HTTPStatus.OK) 42 | self.mock_proxy.get_popular_tables.assert_called_with(num_entries=10) 43 | 44 | def test_should_get_popular_tables_with_requested_limits(self) -> None: 45 | self.mock_proxy.get_popular_tables.return_value = CLIENT_RESPONSE 46 | 47 | self.app.test_client().get('popular_tables/?limit=90') 48 | 49 | self.mock_proxy.get_popular_tables.assert_called_with(num_entries=90) 50 | -------------------------------------------------------------------------------- /tests/unit/api/table/test_table_badge_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import unittest 5 | from http import HTTPStatus 6 | 7 | from unittest.mock import patch, Mock 8 | 9 | from tests.unit.test_basics import BasicTestCase 10 | from metadata_service.entity.badge import Badge 11 | 12 | TABLE_NAME = 'magic' 13 | BADGE_NAME = 'alpha' 14 | 15 | 16 | class TestTableBadgeAPI(BasicTestCase): 17 | def setUp(self) -> None: 18 | super().setUp() 19 | 20 | self.mock_client = patch('metadata_service.api.table.get_proxy_client') 21 | self.mock_proxy = self.mock_client.start().return_value = Mock() 22 | 23 | def tearDown(self) -> None: 24 | super().tearDown() 25 | 26 | self.mock_client.stop() 27 | 28 | def test_block_bad_badge_name(self) -> None: 29 | self.app.config['WHITELIST_BADGES'] = [] 30 | response = self.app.test_client().put(f'/table/{TABLE_NAME}/badge/{BADGE_NAME}?category=table_status') 31 | 32 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 33 | 34 | def test_block_badge_missing_category(self) -> None: 35 | self.app.config['WHITELIST_BADGES'] = [Badge(badge_name='alpha', 36 | category='table_status')] 37 | response = self.app.test_client().put(f'/table/{TABLE_NAME}/badge/{BADGE_NAME}') 38 | 39 | self.assertEqual(response.status_code, HTTPStatus.BAD_REQUEST) 40 | 41 | def test_badge_with_category(self) -> None: 42 | self.app.config['WHITELIST_BADGES'] = [Badge(badge_name='alpha', 43 | category='table_status')] 44 | response = self.app.test_client().put(f'/table/{TABLE_NAME}/badge/{BADGE_NAME}?category=table_status') 45 | 46 | self.assertEqual(response.status_code, HTTPStatus.OK) 47 | 48 | 49 | if __name__ == '__main__': 50 | unittest.main() 51 | -------------------------------------------------------------------------------- /tests/unit/api/dashboard/test_dashboard_badge_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import unittest 5 | from http import HTTPStatus 6 | 7 | from unittest.mock import patch, Mock 8 | 9 | from tests.unit.test_basics import BasicTestCase 10 | from metadata_service.entity.badge import Badge 11 | 12 | DASHBOARD_NAME = 'magic' 13 | BADGE_NAME = 'alpha' 14 | 15 | 16 | class TestDashboardBadgeAPI(BasicTestCase): 17 | def setUp(self) -> None: 18 | super().setUp() 19 | 20 | self.mock_client = patch('metadata_service.api.dashboard.get_proxy_client') 21 | self.mock_proxy = self.mock_client.start().return_value = Mock() 22 | 23 | def tearDown(self) -> None: 24 | super().tearDown() 25 | 26 | self.mock_client.stop() 27 | 28 | def test_block_bad_badge_name(self) -> None: 29 | self.app.config['WHITELIST_BADGES'] = [] 30 | response = self.app.test_client().put(f'/dashboard/{DASHBOARD_NAME}/badge/{BADGE_NAME}?category=table_status') 31 | 32 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 33 | 34 | def test_block_badge_missing_category(self) -> None: 35 | self.app.config['WHITELIST_BADGES'] = [Badge(badge_name='alpha', 36 | category='table_status')] 37 | response = self.app.test_client().put(f'/dashboard/{DASHBOARD_NAME}/badge/{BADGE_NAME}') 38 | 39 | self.assertEqual(response.status_code, HTTPStatus.BAD_REQUEST) 40 | 41 | def test_badge_with_category(self) -> None: 42 | self.app.config['WHITELIST_BADGES'] = [Badge(badge_name='alpha', 43 | category='table_status')] 44 | response = self.app.test_client().put(f'/dashboard/{DASHBOARD_NAME}/badge/{BADGE_NAME}?category=table_status') 45 | 46 | self.assertEqual(response.status_code, HTTPStatus.OK) 47 | 48 | 49 | if __name__ == '__main__': 50 | unittest.main() 51 | -------------------------------------------------------------------------------- /tests/unit/api/dashboard/test_dashboard_tag_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | 6 | from metadata_service.exception import NotFoundException 7 | from metadata_service.entity.resource_type import ResourceType 8 | 9 | from tests.unit.api.dashboard.dashboard_test_case import DashboardTestCase 10 | 11 | ID = 'wizards' 12 | TAG = 'underage_wizards' 13 | 14 | 15 | class TestDashboardTagAPI(DashboardTestCase): 16 | 17 | def test_should_update_tag(self) -> None: 18 | response = self.app.test_client().put(f'/dashboard/{ID}/tag/{TAG}') 19 | 20 | self.assertEqual(response.status_code, HTTPStatus.OK) 21 | self.mock_proxy.add_tag.assert_called_with(id=ID, 22 | tag=TAG, 23 | tag_type='default', 24 | resource_type=ResourceType.Dashboard) 25 | 26 | def test_should_fail_to_update_tag_when_table_not_found(self) -> None: 27 | self.mock_proxy.add_tag.side_effect = NotFoundException(message='foo') 28 | 29 | response = self.app.test_client().put(f'/dashboard/{ID}/tag/{TAG}') 30 | 31 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 32 | 33 | def test_should_delete_tag(self) -> None: 34 | response = self.app.test_client().delete(f'/dashboard/{ID}/tag/{TAG}') 35 | 36 | self.assertEqual(response.status_code, HTTPStatus.OK) 37 | self.mock_proxy.delete_tag.assert_called_with(id=ID, 38 | tag=TAG, 39 | tag_type='default', 40 | resource_type=ResourceType.Dashboard) 41 | 42 | def test_should_fail_to_delete_tag_when_table_not_found(self) -> None: 43 | self.mock_proxy.delete_tag.side_effect = NotFoundException(message='foo') 44 | 45 | response = self.app.test_client().delete(f'/dashboard/{ID}/tag/{TAG}') 46 | 47 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 48 | -------------------------------------------------------------------------------- /tests/unit/api/table/test_table_tag_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | 6 | from metadata_service.exception import NotFoundException 7 | from metadata_service.entity.resource_type import ResourceType 8 | 9 | from tests.unit.api.table.table_test_case import TableTestCase 10 | 11 | TABLE_URI = 'wizards' 12 | TAG = 'underage_wizards' 13 | 14 | 15 | class TestTableTagAPI(TableTestCase): 16 | 17 | def test_should_update_tag(self) -> None: 18 | response = self.app.test_client().put(f'/table/{TABLE_URI}/tag/{TAG}') 19 | 20 | self.assertEqual(response.status_code, HTTPStatus.OK) 21 | self.mock_proxy.add_tag.assert_called_with(id=TABLE_URI, 22 | tag=TAG, 23 | tag_type='default', 24 | resource_type=ResourceType.Table) 25 | 26 | def test_should_fail_to_update_tag_when_table_not_found(self) -> None: 27 | self.mock_proxy.add_tag.side_effect = NotFoundException(message='cannot find table') 28 | 29 | response = self.app.test_client().put(f'/table/{TABLE_URI}/tag/{TAG}') 30 | 31 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 32 | 33 | def test_should_delete_tag(self) -> None: 34 | response = self.app.test_client().delete(f'/table/{TABLE_URI}/tag/{TAG}') 35 | 36 | self.assertEqual(response.status_code, HTTPStatus.OK) 37 | self.mock_proxy.delete_tag.assert_called_with(id=TABLE_URI, 38 | tag=TAG, 39 | tag_type='default', 40 | resource_type=ResourceType.Table) 41 | 42 | def test_should_fail_to_delete_tag_when_table_not_found(self) -> None: 43 | self.mock_proxy.delete_tag.side_effect = NotFoundException(message='cannot find table') 44 | 45 | response = self.app.test_client().delete(f'/table/{TABLE_URI}/tag/{TAG}') 46 | 47 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 48 | -------------------------------------------------------------------------------- /tests/unit/api/badge/test_badge_common.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | 6 | from flask import current_app 7 | from unittest.mock import MagicMock 8 | 9 | from metadata_service import create_app 10 | from metadata_service.api.badge import BadgeCommon 11 | from metadata_service.entity.resource_type import ResourceType 12 | from metadata_service.entity.badge import Badge 13 | from tests.unit.api.dashboard.dashboard_test_case import DashboardTestCase 14 | 15 | BADGE_NAME = 'alpha' 16 | CATEGORY = 'table_status' 17 | 18 | 19 | class TestBadgeCommon(DashboardTestCase): 20 | 21 | def setUp(self) -> None: 22 | self.app = create_app( 23 | config_module_class='metadata_service.config.LocalConfig') 24 | self.app_context = self.app.app_context() 25 | self.app_context.push() 26 | 27 | def tearDown(self) -> None: 28 | self.app_context.pop() 29 | 30 | def test_app_exists(self) -> None: 31 | self.assertFalse(current_app is None) 32 | 33 | def test_badge_on_reserved_badge_name(self) -> None: 34 | self.app.config['WHITELIST_BADGES'] = [Badge(badge_name='alpha', 35 | category='table_status')] 36 | 37 | mock_proxy = MagicMock() 38 | 39 | badge_common = BadgeCommon(client=mock_proxy) 40 | response = badge_common.put(id='', 41 | resource_type=ResourceType.Dashboard, 42 | badge_name=BADGE_NAME, 43 | category=CATEGORY) 44 | 45 | self.assertEqual(response[1], HTTPStatus.OK) 46 | 47 | def test_badge_on_not_reserved_badge_name(self) -> None: 48 | self.app.config['WHITELIST_BADGES'] = [] 49 | 50 | mock_proxy = MagicMock() 51 | badge_common = BadgeCommon(client=mock_proxy) 52 | response = badge_common.put(id='', 53 | resource_type=ResourceType.Dashboard, 54 | badge_name=BADGE_NAME, 55 | category=CATEGORY) 56 | 57 | self.assertEqual(response[1], HTTPStatus.NOT_FOUND) 58 | -------------------------------------------------------------------------------- /metadata_service/proxy/janus_graph_proxy.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from typing import Any, Mapping, Optional, Type 5 | 6 | from amundsen_gremlin.script_translator import ScriptTranslatorTargetJanusgraph 7 | from overrides import overrides 8 | 9 | from .gremlin_proxy import AbstractGremlinProxy 10 | 11 | 12 | class JanusGraphGremlinProxy(AbstractGremlinProxy): 13 | """ 14 | A proxy to a JanusGraph using the Gremlin protocol. 15 | 16 | TODO: HTTP proxy support. This does *NOT* support HTTP proxies as-is. Why? The default transport factory in 17 | gremlin_python is tornado.websocket, which is hardcoded to use simple_httpclient (look at 18 | WebSocketClientConnection). But, even if that could be made to use curl_httpclient, curl_httpclient requires pycurl 19 | which requires libcurl and other native libraries which is a pain to install. 20 | """ 21 | def __init__(self, *, host: str, port: Optional[int] = None, user: Optional[str] = None, 22 | password: Optional[str] = None, traversal_source: 'str' = 'g', 23 | driver_remote_connection_options: Mapping[str, Any] = {}) -> None: 24 | driver_remote_connection_options = dict(driver_remote_connection_options) 25 | 26 | # as others, we repurpose host a url, and url can be an HTTPRequest 27 | self.url = host 28 | 29 | # port should be part of that url 30 | if port is not None: 31 | raise NotImplementedError(f'port is not allowed! port={port}') 32 | 33 | if user is not None: 34 | driver_remote_connection_options.update(username=user) 35 | if password is not None: 36 | driver_remote_connection_options.update(password=password) 37 | 38 | driver_remote_connection_options.update(traversal_source=traversal_source) 39 | 40 | # use _key 41 | AbstractGremlinProxy.__init__(self, key_property_name='_key', 42 | driver_remote_connection_options=driver_remote_connection_options) 43 | 44 | @classmethod 45 | @overrides 46 | def script_translator(cls) -> Type[ScriptTranslatorTargetJanusgraph]: 47 | return ScriptTranslatorTargetJanusgraph 48 | 49 | @overrides 50 | def possibly_signed_ws_client_request_or_url(self) -> str: 51 | return self.url 52 | -------------------------------------------------------------------------------- /tests/unit/api/table/test_table_description_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | 6 | from metadata_service.exception import NotFoundException 7 | from tests.unit.api.table.table_test_case import TableTestCase 8 | 9 | TABLE_URI = 'wizards' 10 | DESCRIPTION = 'magical people' 11 | 12 | 13 | class TestTableDescriptionAPI(TableTestCase): 14 | def test_should_get_table_description(self) -> None: 15 | self.mock_proxy.get_table_description.return_value = DESCRIPTION 16 | 17 | response = self.app.test_client().get(f'/table/{TABLE_URI}/description') 18 | 19 | self.assertEqual(response.json, {'description': DESCRIPTION}) 20 | self.assertEqual(response.status_code, HTTPStatus.OK) 21 | self.mock_proxy.get_table_description.assert_called_with(table_uri=TABLE_URI) 22 | 23 | def test_should_fail_when_cannot_get_description(self) -> None: 24 | self.mock_proxy.get_table_description.side_effect = RuntimeError() 25 | 26 | response = self.app.test_client().get(f'/table/{TABLE_URI}/description') 27 | 28 | self.assertEqual(response.status_code, HTTPStatus.INTERNAL_SERVER_ERROR) 29 | 30 | def test_should_fail_when_cannot_find_table(self) -> None: 31 | self.mock_proxy.get_table_description.side_effect = NotFoundException(message='cannot find table') 32 | 33 | response = self.app.test_client().get(f'/table/{TABLE_URI}/description') 34 | 35 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 36 | 37 | def test_should_update_table_description(self) -> None: 38 | response = self.app.test_client().put(f'/table/{TABLE_URI}/description', 39 | json={'description': DESCRIPTION}) 40 | 41 | self.assertEqual(response.status_code, HTTPStatus.OK) 42 | self.mock_proxy.put_table_description.assert_called_with(table_uri=TABLE_URI, description=DESCRIPTION) 43 | 44 | def test_should_fail_to_update_description_when_table_not_found(self) -> None: 45 | self.mock_proxy.put_table_description.side_effect = NotFoundException(message='cannot find table') 46 | 47 | response = self.app.test_client().put(f'/table/{TABLE_URI}/description', json={'description': DESCRIPTION}) 48 | 49 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 50 | -------------------------------------------------------------------------------- /metadata_service/api/column.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import json 5 | from http import HTTPStatus 6 | from typing import Iterable, Union 7 | 8 | from flask import request 9 | from flasgger import swag_from 10 | from flask_restful import Resource 11 | 12 | from metadata_service.exception import NotFoundException 13 | from metadata_service.proxy import get_proxy_client 14 | 15 | 16 | class ColumnDescriptionAPI(Resource): 17 | """ 18 | ColumnDescriptionAPI supports PUT and GET operations to upsert column description 19 | """ 20 | 21 | def __init__(self) -> None: 22 | self.client = get_proxy_client() 23 | super(ColumnDescriptionAPI, self).__init__() 24 | 25 | @swag_from('swagger_doc/column/description_put.yml') 26 | def put(self, 27 | table_uri: str, 28 | column_name: str) -> Iterable[Union[dict, tuple, int, None]]: 29 | """ 30 | Updates column description (passed as a request body) 31 | :param table_uri: 32 | :param column_name: 33 | :return: 34 | """ 35 | try: 36 | description = json.loads(request.data).get('description') 37 | self.client.put_column_description(table_uri=table_uri, 38 | column_name=column_name, 39 | description=description) 40 | return None, HTTPStatus.OK 41 | 42 | except NotFoundException: 43 | msg = 'table_uri {} with column {} does not exist'.format(table_uri, column_name) 44 | return {'message': msg}, HTTPStatus.NOT_FOUND 45 | 46 | @swag_from('swagger_doc/column/description_get.yml') 47 | def get(self, table_uri: str, column_name: str) -> Union[tuple, int, None]: 48 | """ 49 | Gets column descriptions in Neo4j 50 | """ 51 | try: 52 | description = self.client.get_column_description(table_uri=table_uri, 53 | column_name=column_name) 54 | 55 | return {'description': description}, HTTPStatus.OK 56 | 57 | except NotFoundException: 58 | msg = 'table_uri {} with column {} does not exist'.format(table_uri, column_name) 59 | return {'message': msg}, HTTPStatus.NOT_FOUND 60 | 61 | except Exception: 62 | return {'message': 'Internal server error!'}, HTTPStatus.INTERNAL_SERVER_ERROR 63 | -------------------------------------------------------------------------------- /metadata_service/proxy/shared.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import logging 5 | from random import randint 6 | from time import sleep 7 | from typing import ( 8 | Callable, Optional, TypeVar 9 | ) 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | 13 | X = TypeVar('X') 14 | 15 | 16 | def checkNotNone(x: Optional[X], *, message: str = 'is None') -> X: 17 | """ 18 | >>> checkNotNone('a string') 19 | 'a string' 20 | >>> checkNotNone(31337) 21 | 31337 22 | >>> checkNotNone('') 23 | '' 24 | >>> checkNotNone(False) 25 | False 26 | >>> checkNotNone({}) 27 | {} 28 | >>> checkNotNone(None) 29 | .... 30 | >>> checkNotNone(None, message='thing is None') 31 | ... 32 | """ 33 | if x is None: 34 | raise RuntimeError(message) 35 | return x 36 | 37 | 38 | V = TypeVar('V') 39 | K = TypeVar('K') 40 | 41 | 42 | def make_wait_exponential_with_jitter(base: int, jitter: int) -> Callable[[int], int]: 43 | def wait(retry: int) -> int: 44 | assert retry > 0 45 | return 10**retry + randint(0, jitter) 46 | return wait 47 | 48 | 49 | CallableV = TypeVar('CallableV') 50 | 51 | 52 | def retrying(callable: Callable[[], CallableV], *, 53 | is_retryable: Callable[[Exception], bool], 54 | maximum_number_of_retries: int = 4, 55 | wait_millis: Callable[[int], int] = make_wait_exponential_with_jitter(10, 20)) -> CallableV: 56 | assert maximum_number_of_retries >= 0, f'maximum_number_of_retries ({maximum_number_of_retries}) must be >= 0!' 57 | retry = 0 58 | while True: 59 | try: 60 | return callable() 61 | except Exception as e: 62 | retry += 1 63 | try: 64 | if not is_retryable(e): 65 | LOGGER.info(f'exception {e} is not retryable') 66 | elif retry > maximum_number_of_retries: 67 | LOGGER.info(f'retry = {retry} exceeds {maximum_number_of_retries}') 68 | else: 69 | millis = wait_millis(retry) 70 | LOGGER.info(f'waiting {millis}ms on retry {retry} of {maximum_number_of_retries}') 71 | sleep(millis / 1000) 72 | continue 73 | except Exception as e2: 74 | # ignore this, assume our exception is not retryable 75 | LOGGER.warning(f'got exception {e2} while handling original exception {e}') 76 | raise # the original exception 77 | raise RuntimeError(f'we should never get here') 78 | -------------------------------------------------------------------------------- /docs/configurations.md: -------------------------------------------------------------------------------- 1 | Most of the configurations are set through Flask [Config Class](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/config.py). 2 | 3 | #### BADGES 4 | In order to add a badge to a resource you should first add the combination of badge name and category to the in WHITELIST_BADGES [Config Class](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/config.py). 5 | 6 | Example: 7 | ```python 8 | 9 | WHITELIST_BADGES: List[Badge] = [Badge(badge_name='beta', 10 | category='table_status')] 11 | ``` 12 | 13 | Once this is done users will be able to add badge the badges in the whitelist by running: 14 | 15 | ```curl -X PUT https://{amundsen metadata url}/table/"{table key}"/badge/{badge_name}?category={category}``` 16 | 17 | #### USER_DETAIL_METHOD `OPTIONAL` 18 | This is a method that can be used to get the user details from any third-party or custom system. 19 | This custom function takes user_id as a parameter, and returns a dictionary consisting user details' fields defined in [UserSchema](https://github.com/amundsen-io/amundsencommon/blob/master/amundsen_common/models/user.py). 20 | 21 | Example: 22 | ```python 23 | 24 | def get_user_details(user_id): 25 | user_info = { 26 | 'email': 'test@email.com', 27 | 'user_id': user_id, 28 | 'first_name': 'Firstname', 29 | 'last_name': 'Lastname', 30 | 'full_name': 'Firstname Lastname', 31 | } 32 | return user_info 33 | 34 | USER_DETAIL_METHOD = get_user_details 35 | ``` 36 | 37 | #### STATISTICS_FORMAT_SPEC `OPTIONAL` 38 | 39 | This is a variable enabling possibility to reformat statistics displayed in UI. 40 | 41 | The key is name of statistic and a value is a dictionary with optional keys: 42 | * **new_name** - how to rename statistic (if absent proxy should default to old name) 43 | * **format** - how to format numerical statistics (if absent, proxy should default to original format) 44 | * **drop** - should given statistic not be displayed in UI (if absent, proxy should keep it) 45 | 46 | Example (if you're using [deeque](https://aws.amazon.com/blogs/big-data/test-data-quality-at-scale-with-deequ/) library), you might want to: 47 | ```python 48 | STATISTICS_FORMAT_SPEC = { 49 | 'stdDev': dict(new_name='standard deviation', format='{:,.2f}'), 50 | 'mean': dict(format='{:,.2f}'), 51 | 'maximum': dict(format='{:,.2f}'), 52 | 'minimum': dict(format='{:,.2f}'), 53 | 'completeness': dict(format='{:.2%}'), 54 | 'approximateNumDistinctValues': dict(new_name='distinct values', format='{:,.0f}', ), 55 | 'sum': dict(drop=True) 56 | } 57 | ``` 58 | -------------------------------------------------------------------------------- /tests/unit/api/table/test_dashboards_using_table_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import unittest 5 | from http import HTTPStatus 6 | 7 | from metadata_service.entity.resource_type import ResourceType 8 | from tests.unit.api.table.table_test_case import TableTestCase 9 | 10 | TABLE_URI = 'wizards' 11 | 12 | QUERY_RESPONSE = { 13 | 'dashboards': [ 14 | { 15 | 'uri': 'foo_dashboard://gold.foo/bar1', 16 | 'cluster': 'gold', 17 | 'group_name': 'foo', 18 | 'group_url': 'https://foo', 19 | 'product': 'foo', 20 | 'name': 'test dashboard 1', 21 | 'url': 'https://foo.bar', 22 | 'description': 'test dashboard description 1', 23 | 'last_successful_run_timestamp': 1234567890 24 | }, 25 | { 26 | 'uri': 'foo_dashboard://gold.foo/bar1', 27 | 'cluster': 'gold', 28 | 'group_name': 'foo', 29 | 'group_url': 'https://foo', 30 | 'product': 'foo', 31 | 'name': 'test dashboard 1', 32 | 'url': 'https://foo.bar', 33 | 'description': None, 34 | 'last_successful_run_timestamp': None 35 | } 36 | ] 37 | } 38 | 39 | API_RESPONSE = { 40 | 'dashboards': 41 | [ 42 | { 43 | 'group_url': 'https://foo', 'uri': 'foo_dashboard://gold.foo/bar1', 44 | 'last_successful_run_timestamp': 1234567890, 'group_name': 'foo', 'name': 'test dashboard 1', 45 | 'url': 'https://foo.bar', 'description': 'test dashboard description 1', 'cluster': 'gold', 46 | 'product': 'foo' 47 | }, 48 | { 49 | 'group_url': 'https://foo', 'uri': 'foo_dashboard://gold.foo/bar1', 50 | 'last_successful_run_timestamp': None, 51 | 'group_name': 'foo', 'name': 'test dashboard 1', 'url': 'https://foo.bar', 'description': None, 52 | 'cluster': 'gold', 'product': 'foo' 53 | } 54 | ] 55 | } 56 | 57 | 58 | class TestTableDashboardAPI(TableTestCase): 59 | 60 | def test_get_dashboards_using_table(self) -> None: 61 | self.mock_proxy.get_resources_using_table.return_value = QUERY_RESPONSE 62 | 63 | response = self.app.test_client().get(f'/table/{TABLE_URI}/dashboard/') 64 | self.assertEqual(response.json, API_RESPONSE) 65 | self.assertEqual(response.status_code, HTTPStatus.OK) 66 | self.mock_proxy.get_resources_using_table.assert_called_with(id=TABLE_URI, 67 | resource_type=ResourceType.Dashboard) 68 | 69 | 70 | if __name__ == '__main__': 71 | unittest.main() 72 | -------------------------------------------------------------------------------- /tests/unit/proxy/test_statsd_utilities.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import unittest 5 | from unittest.mock import patch, MagicMock 6 | from statsd import StatsClient 7 | from metadata_service.proxy import statsd_utilities 8 | from metadata_service.proxy.statsd_utilities import _get_statsd_client 9 | 10 | from flask import current_app 11 | 12 | from metadata_service import create_app 13 | from neo4j import GraphDatabase 14 | from metadata_service.proxy.neo4j_proxy import Neo4jProxy 15 | 16 | 17 | class TestStatsdUtilities(unittest.TestCase): 18 | def setUp(self) -> None: 19 | self.app = create_app(config_module_class='metadata_service.config.LocalConfig') 20 | self.app_context = self.app.app_context() 21 | self.app_context.push() 22 | 23 | def test_no_statsd_client(self) -> None: 24 | with patch.object(StatsClient, '__init__'): 25 | statsd_client = _get_statsd_client(prefix='foo') 26 | self.assertIsNone(statsd_client) 27 | 28 | def test_get_statsd_client(self) -> None: 29 | with patch.object(current_app, 'config') as mock_config, \ 30 | patch.object(StatsClient, '__init__', return_value=None) as mock_statsd_init: 31 | mock_config.return_value.single.return_value = True 32 | 33 | statsd_client1 = _get_statsd_client(prefix='foo') 34 | self.assertIsNotNone(statsd_client1) 35 | statsd_client2 = _get_statsd_client(prefix='foo') 36 | self.assertIsNotNone(statsd_client2) 37 | self.assertEqual(statsd_client1, statsd_client2) 38 | 39 | self.assertEqual(mock_statsd_init.call_count, 1) 40 | 41 | statsd_client3 = _get_statsd_client(prefix='bar') 42 | self.assertIsNotNone(statsd_client3) 43 | statsd_client4 = _get_statsd_client(prefix='bar') 44 | self.assertIsNotNone(statsd_client4) 45 | self.assertEqual(statsd_client3, statsd_client4) 46 | 47 | self.assertNotEqual(statsd_client1, statsd_client3) 48 | self.assertEqual(mock_statsd_init.call_count, 2) 49 | 50 | def test_with_neo4j_proxy(self) -> None: 51 | with patch.object(GraphDatabase, 'driver'), \ 52 | patch.object(Neo4jProxy, '_execute_cypher_query'), \ 53 | patch.object(statsd_utilities, '_get_statsd_client') as mock_statsd_client: 54 | 55 | mock_success_incr = MagicMock() 56 | mock_statsd_client.return_value.incr = mock_success_incr 57 | 58 | neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000) 59 | neo4j_proxy.add_owner(table_uri='bogus_uri', owner='foo') 60 | 61 | self.assertEqual(mock_success_incr.call_count, 1) 62 | 63 | 64 | if __name__ == '__main__': 65 | unittest.main() 66 | -------------------------------------------------------------------------------- /tests/unit/api/tag/test_tag_common.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | 6 | from flask import current_app 7 | from unittest.mock import MagicMock 8 | 9 | from metadata_service import create_app 10 | from metadata_service.api.tag import TagCommon 11 | from metadata_service.entity.resource_type import ResourceType 12 | from metadata_service.entity.badge import Badge 13 | from tests.unit.api.dashboard.dashboard_test_case import DashboardTestCase 14 | 15 | BADGE_NAME = 'foo' 16 | TAG_NAME = 'bar' 17 | 18 | 19 | class TestDashboardTagAPI(DashboardTestCase): 20 | """ 21 | Test the service if it can standup 22 | """ 23 | 24 | def setUp(self) -> None: 25 | self.app = create_app( 26 | config_module_class='metadata_service.config.LocalConfig') 27 | self.app_context = self.app.app_context() 28 | self.app_context.push() 29 | 30 | def tearDown(self) -> None: 31 | self.app_context.pop() 32 | 33 | def test_app_exists(self) -> None: 34 | self.assertFalse(current_app is None) 35 | 36 | def test_block_tag_on_reserved_badge_value(self) -> None: 37 | self.app.config['WHITELIST_BADGES'] = [Badge(badge_name=BADGE_NAME, 38 | category='table_status')] 39 | 40 | mock_proxy = MagicMock() 41 | 42 | tag_common = TagCommon(client=mock_proxy) 43 | response = tag_common.put(id='', 44 | resource_type=ResourceType.Dashboard, 45 | tag=BADGE_NAME, 46 | tag_type='default') 47 | 48 | self.assertEqual(response[1], HTTPStatus.CONFLICT) 49 | 50 | def test_tag_on_unreserved_badge_value(self) -> None: 51 | self.app.config['WHITELIST_BADGES'] = [Badge(badge_name=BADGE_NAME, 52 | category='table_status')] 53 | 54 | mock_proxy = MagicMock() 55 | 56 | tag_common = TagCommon(client=mock_proxy) 57 | response = tag_common.put(id='', 58 | resource_type=ResourceType.Dashboard, 59 | tag=TAG_NAME, 60 | tag_type='default') 61 | 62 | self.assertEqual(response[1], HTTPStatus.OK) 63 | 64 | def test_badge_on_reserved_badge_value(self) -> None: 65 | self.app.config['WHITELIST_BADGES'] = [Badge(badge_name=BADGE_NAME, 66 | category='table_status')] 67 | 68 | mock_proxy = MagicMock() 69 | tag_common = TagCommon(client=mock_proxy) 70 | response = tag_common.put(id='', 71 | resource_type=ResourceType.Dashboard, 72 | tag=BADGE_NAME, 73 | tag_type='badge') 74 | self.assertEqual(response[1], HTTPStatus.NOT_ACCEPTABLE) 75 | -------------------------------------------------------------------------------- /metadata_service/proxy/statsd_utilities.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import logging 5 | from threading import Lock 6 | from typing import Any, Dict, Callable # noqa: F401 7 | 8 | from flask import current_app, has_app_context 9 | from statsd import StatsClient 10 | 11 | from metadata_service import config 12 | 13 | LOGGER = logging.getLogger(__name__) 14 | __STATSD_POOL = {} # type: Dict[str, StatsClient] 15 | __STATSD_POOL_LOCK = Lock() 16 | 17 | 18 | def timer_with_counter(f: Callable) -> Any: 19 | """ 20 | A function decorator that adds statsd timer and statsd counter on success or fail 21 | statsd prefix will is from the fuction's module and metric name is from function name itself. 22 | Note that config.IS_STATSD_ON needs to be True to emit metrics 23 | 24 | e.g: decorating function neo4j_proxy,get_table will emit: 25 | - metadata_service.proxy.neo4j_proxy.get_table.success.count 26 | - metadata_service.proxy.neo4j_proxy.get_table.fail.count 27 | - metadata_service.proxy.neo4j_proxy.get_table.timer 28 | 29 | More information on statsd: https://statsd.readthedocs.io/en/v3.2.1/index.html 30 | For statsd daemon not following default settings, refer to doc above to configure environment variables 31 | 32 | :param f: 33 | :return: 34 | """ 35 | def wrapper(*args: Any, **kwargs: Any) -> Any: 36 | statsd_client = _get_statsd_client(prefix=f.__module__) 37 | if not statsd_client: 38 | return f(*args, **kwargs) 39 | 40 | with statsd_client.timer(f.__name__): 41 | if LOGGER.isEnabledFor(logging.DEBUG): 42 | LOGGER.debug('Calling function with emitting statsd metrics on prefix {}'.format(f.__name__)) 43 | try: 44 | result = f(*args, **kwargs) 45 | statsd_client.incr('{}.success'.format(f.__name__)) 46 | return result 47 | except Exception as e: 48 | statsd_client.incr('{}.fail'.format(f.__name__)) 49 | raise e 50 | 51 | return wrapper 52 | 53 | 54 | def _get_statsd_client(*, prefix: str) -> StatsClient: 55 | """ 56 | Object pool method that reuse already created StatsClient based on prefix 57 | :param prefix: 58 | :return: 59 | """ 60 | if not has_app_context() or not current_app.config[config.IS_STATSD_ON]: 61 | return None 62 | else: 63 | if prefix not in __STATSD_POOL: 64 | with __STATSD_POOL_LOCK: 65 | if prefix not in __STATSD_POOL: 66 | LOGGER.info('Instantiate StatsClient with prefix {}'.format(prefix)) 67 | statsd_client = StatsClient(prefix=prefix) 68 | __STATSD_POOL[prefix] = statsd_client 69 | return statsd_client 70 | 71 | if LOGGER.isEnabledFor(logging.DEBUG): 72 | LOGGER.debug('Reuse StatsClient with prefix {}'.format(prefix)) 73 | return __STATSD_POOL[prefix] 74 | -------------------------------------------------------------------------------- /tests/unit/api/table/test_table_detail_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | import pytest 6 | 7 | from metadata_service.exception import NotFoundException 8 | from tests.unit.api.table.table_test_case import TableTestCase 9 | 10 | TABLE_URI = 'wizards' 11 | 12 | STATS = [{'stat_type': 'requests', 'stat_val': '10', 'start_epoch': 1570581861, 'end_epoch': 1570581861}] 13 | READER = {'email': 'ssnape@hogwarts.com', 'first_name': 'severus', 'last_name': 'snape'} 14 | BASE = { 15 | 'database': 'postgres', 16 | 'cluster': 'postgres', 17 | 'schema': 'hogwarts', 18 | 'tags': [{'tag_type': 'table', 'tag_name': 'wizards'}], 19 | 'badges': [{'badge_name': 'badge', 'category': 'table_status'}], 20 | 'owners': [{'email': 'mmcgonagall@hogwarts.com', 'first_name': 'minerva', 'last_name': 'mcgonagall'}], 21 | 'watermarks': [ 22 | {'watermark_type': 'type', 'partition_key': 'key', 'partition_value': 'value', 'create_time': '1570581861'}], 23 | 'table_writer': {'application_url': 'table_writer_rul', 'name': 'table_writer_name', 'id': 'table_writer_id', 24 | 'description': 'table_writer_description'}, 25 | 'last_updated_timestamp': 1570581861, 26 | 'source': {'source_type': 'type', 'source': 'source'}, 27 | 'is_view': True 28 | } 29 | 30 | QUERY_RESPONSE = { 31 | **BASE, 32 | 'name': 'wizards', 33 | 'description': 'all wizards at hogwarts', 34 | 'table_readers': [{ 35 | 'user': READER, 36 | 'read_count': 10 37 | }], 38 | 'columns': [{ 39 | 'name': 'wizard_name', 40 | 'description': 'full name of wizard', 41 | 'col_type': 'String', 42 | 'sort_order': 0, 43 | 'stats': STATS 44 | }], 45 | 'programmatic_descriptions': [] 46 | } 47 | 48 | API_RESPONSE = { 49 | **BASE, 50 | 'name': 'wizards', 51 | 'description': 'all wizards at hogwarts', 52 | 'table_readers': [{ 53 | 'user': READER, 54 | 'read_count': 10 55 | }], 56 | 'columns': [{ 57 | 'name': 'wizard_name', 58 | 'description': 'full name of wizard', 59 | 'col_type': 'String', 60 | 'sort_order': 0, 61 | 'stats': STATS 62 | }], 63 | 'programmatic_descriptions': [] 64 | } 65 | 66 | 67 | class TestTableDetailAPI(TableTestCase): 68 | @pytest.mark.skip(reason='The test is flaky in CI') 69 | def test_should_get_column_details(self) -> None: 70 | self.mock_proxy.get_table.return_value = QUERY_RESPONSE 71 | 72 | response = self.app.test_client().get(f'/table/{TABLE_URI}') 73 | self.assertEqual(response.json, API_RESPONSE) 74 | self.assertEqual(response.status_code, HTTPStatus.OK) 75 | self.mock_proxy.get_table.assert_called_with(table_uri=TABLE_URI) 76 | 77 | def test_should_fail_to_get_column_details_when_table_not_foubd(self) -> None: 78 | self.mock_proxy.get_table.side_effect = NotFoundException(message='table not found') 79 | 80 | response = self.app.test_client().get(f'/table/{TABLE_URI}') 81 | 82 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 83 | -------------------------------------------------------------------------------- /tests/unit/api/test_column_description_api.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | 6 | from unittest.mock import patch, Mock 7 | 8 | from metadata_service.exception import NotFoundException 9 | from tests.unit.test_basics import BasicTestCase 10 | 11 | DESCRIPTION = 'This is the name of the spell.' 12 | COLUMN_NAME = 'spell' 13 | TABLE_NAME = 'magic' 14 | 15 | 16 | class TestColumnDescriptionAPI(BasicTestCase): 17 | def setUp(self) -> None: 18 | super().setUp() 19 | 20 | self.mock_client = patch('metadata_service.api.column.get_proxy_client') 21 | self.mock_proxy = self.mock_client.start().return_value = Mock() 22 | 23 | def tearDown(self) -> None: 24 | super().tearDown() 25 | 26 | self.mock_client.stop() 27 | 28 | def test_should_update_column_description(self) -> None: 29 | 30 | response = self.app.test_client().put(f'/table/{TABLE_NAME}/column/{COLUMN_NAME}/description', 31 | json={"description": DESCRIPTION}) 32 | 33 | self.assertEqual(response.json, None) 34 | self.assertEqual(response.status_code, HTTPStatus.OK) 35 | self.mock_proxy.put_column_description.assert_called_with(table_uri=TABLE_NAME, column_name=COLUMN_NAME, 36 | description=DESCRIPTION) 37 | 38 | def test_should_fail_to_update_column_description_when_table_does_not_exist(self) -> None: 39 | self.mock_proxy.put_column_description.side_effect = NotFoundException(message="table does not exist") 40 | 41 | response = self.app.test_client().put(f'/table/{TABLE_NAME}/column/{COLUMN_NAME}/description', 42 | json={"description": DESCRIPTION}) 43 | 44 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 45 | 46 | def test_should_get_column_description(self) -> None: 47 | self.mock_proxy.get_column_description.return_value = DESCRIPTION 48 | 49 | response = self.app.test_client().get(f'/table/{TABLE_NAME}/column/{COLUMN_NAME}/description') 50 | 51 | self.assertEqual(response.json, {'description': DESCRIPTION}) 52 | self.assertEqual(response.status_code, HTTPStatus.OK) 53 | self.mock_proxy.get_column_description.assert_called_with(table_uri=TABLE_NAME, column_name=COLUMN_NAME) 54 | 55 | def test_should_fail_to_get_column_description_when_table_is_not_found(self) -> None: 56 | self.mock_proxy.get_column_description.side_effect = NotFoundException(message="table does not exist") 57 | 58 | response = self.app.test_client().get(f'/table/{TABLE_NAME}/column/{COLUMN_NAME}/description') 59 | 60 | self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND) 61 | 62 | def test_should_fail_to_get_column_description(self) -> None: 63 | self.mock_proxy.get_column_description.side_effect = RuntimeError 64 | 65 | response = self.app.test_client().get(f'/table/{TABLE_NAME}/column/{COLUMN_NAME}/description') 66 | 67 | self.assertEqual(response.status_code, HTTPStatus.INTERNAL_SERVER_ERROR) 68 | -------------------------------------------------------------------------------- /tests/unit/test_swagger.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import unittest 5 | 6 | from typing import Any, Dict 7 | 8 | from metadata_service import create_app 9 | 10 | 11 | class TestSwagger(unittest.TestCase): 12 | 13 | def setUp(self) -> None: 14 | config_module_class = 'metadata_service.config.LocalConfig' 15 | self.app = create_app(config_module_class=config_module_class) 16 | self.app_context = self.app.app_context() 17 | self.app_context.push() 18 | 19 | def tearDown(self) -> None: 20 | self.app_context.pop() 21 | 22 | def test_should_get_swagger_docs(self) -> None: 23 | response = self.app.test_client().get('/apidocs/') 24 | self.assertEqual(response.status_code, 200) 25 | 26 | def test_should_get_swagger_json(self) -> None: 27 | response = self.app.test_client().get('/apispec_1.json') 28 | 29 | self.assertEqual(response.status_code, 200) 30 | 31 | def test_should_have_a_component_from_each_reference(self) -> None: 32 | response = self.app.test_client().get('/apispec_1.json') 33 | 34 | for reference in list(TestSwagger.find('$ref', response.json)): 35 | path_to_component = reference[2:].split('/') 36 | 37 | json_response_to_reduce = response.json 38 | for key in path_to_component: 39 | try: 40 | json_response_to_reduce = json_response_to_reduce[key] 41 | except KeyError: 42 | self.fail(f'The following $ref does not have a valid component to reference. $ref: {reference}') 43 | 44 | # This is a requirement from Flasgger not Swagger 45 | def test_should_have_type_for_each_query_parameter(self) -> None: 46 | response = self.app.test_client().get('/apispec_1.json') 47 | 48 | for request_params in list(TestSwagger.find('parameters', response.json)): 49 | for param in request_params: 50 | if param['in'] == 'query' and 'type' not in param.keys(): 51 | self.fail(f'The following query parameter is missing a type: {param}') 52 | 53 | def test_should_have_all_endpoints_in_swagger(self) -> None: 54 | paths_excluded_from_swagger = ['/apidocs/index.html', '/apispec_1.json', '/apidocs/', 55 | '/static/{filename}', '/flasgger_static/{filename}'] 56 | 57 | response = self.app.test_client().get('/apispec_1.json') 58 | 59 | paths_in_swagger = response.json.get('paths').keys() 60 | for endpoint in [rule.rule for rule in self.app.url_map.iter_rules()]: 61 | endpoint = endpoint.replace('', '}') 62 | if endpoint not in paths_excluded_from_swagger and endpoint not in paths_in_swagger: 63 | self.fail(f'The following endpoint is not in swagger: {endpoint}') 64 | 65 | @staticmethod 66 | def find(key: str, json_response: Dict[str, Any]) -> Any: 67 | for json_key, json_value in json_response.items(): 68 | if json_key == key: 69 | yield json_value 70 | elif isinstance(json_value, dict): 71 | for result in TestSwagger.find(key, json_value): 72 | yield result 73 | -------------------------------------------------------------------------------- /tests/unit/proxy/roundtrip/test_neptune_proxy.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import gremlin_python.driver.protocol 5 | import json 6 | from typing import Any, Mapping 7 | import unittest 8 | 9 | from metadata_service.proxy.neptune_proxy import NeptuneGremlinProxy 10 | from amundsen_gremlin.gremlin_model import VertexTypes 11 | from amundsen_gremlin.script_translator import ScriptTranslator 12 | from gremlin_python.process.traversal import Cardinality 13 | from gremlin_python.process.graph_traversal import __ 14 | 15 | from .abstract_gremlin_proxy_tests import abstract_gremlin_proxy_test_class 16 | from .roundtrip_neptune_proxy import RoundtripNeptuneGremlinProxy 17 | 18 | 19 | class NeptuneGremlinProxyTest( 20 | abstract_gremlin_proxy_test_class(), unittest.TestCase): # type: ignore 21 | def _create_gremlin_proxy(self, config: Mapping[str, Any]) -> RoundtripNeptuneGremlinProxy: 22 | # Don't use PROXY_HOST, PROXY_PORT, PROXY_PASSWORD. They might not be neptune 23 | return RoundtripNeptuneGremlinProxy(host=config['NEPTUNE_URL'], password=config['NEPTUNE_SESSION'], 24 | neptune_bulk_loader_s3_bucket_name=config['NEPTUNE_BULK_LOADER_S3_BUCKET_NAME']) # noqa E501 25 | 26 | def test_is_retryable(self) -> None: 27 | exception = gremlin_python.driver.protocol.GremlinServerError(dict( 28 | code=408, attributes=(), message=json.dumps(dict(code='ConcurrentModificationException')))) 29 | self.assertTrue(NeptuneGremlinProxy._is_retryable_exception(method_name=None, exception=exception)) 30 | exception = gremlin_python.driver.protocol.GremlinServerError(dict( 31 | code=408, attributes=(), message=json.dumps(dict(code='InternalError')))) 32 | self.assertFalse(NeptuneGremlinProxy._is_retryable_exception(method_name=None, exception=exception)) 33 | exception = RuntimeError() 34 | self.assertFalse(NeptuneGremlinProxy._is_retryable_exception(method_name=None, exception=exception)) 35 | 36 | def test_gremlin_status(self) -> None: 37 | proxy = self.get_proxy() 38 | results = proxy._gremlin_status() 39 | self.assertIsNotNone(results) 40 | 41 | def test_sparql_status(self) -> None: 42 | proxy = self.get_proxy() 43 | results = proxy._sparql_status() 44 | self.assertIsNotNone(results) 45 | 46 | def test_explain(self) -> None: 47 | proxy = self.get_proxy() 48 | g = proxy.g.V().has(VertexTypes.User.value.label, proxy.key_property_name, 'jack').fold().coalesce( 49 | __.unfold(), 50 | __.addV(VertexTypes.User.value.label).property(Cardinality.single, proxy.key_property_name, 'jack')) 51 | g = g.property(Cardinality.single, 'email', 'jack@squareup.com') 52 | query = ScriptTranslator.translateT(g) 53 | g.iterate() 54 | # just enough to not explode 55 | proxy._explain(query) 56 | 57 | def test_profile(self) -> None: 58 | count = self._get(label=VertexTypes.User, key='jack', extra_traversal=__.count()) 59 | self.assertEqual(count, 0) 60 | # just enough to not explode 61 | self._upsert(label=VertexTypes.User, key='jack', email='jack@squareup.com') 62 | # and show it ran 63 | count = self._get(label=VertexTypes.User, key='jack', extra_traversal=__.count()) 64 | self.assertEqual(count, 1) 65 | -------------------------------------------------------------------------------- /docs/structure.md: -------------------------------------------------------------------------------- 1 | Amundsen metadata service consists of three packages, API, Entity, and Proxy. 2 | 3 | ### [API package](https://github.com/amundsen-io/amundsenmetadatalibrary/tree/master/metadata_service/api "API package") 4 | A package that contains [Flask Restful resources](https://flask-restful.readthedocs.io/en/latest/api.html#flask_restful.Resource "Flask Restful resources") that serves Restful API request. 5 | The [routing of API](https://flask-restful.readthedocs.io/en/latest/quickstart.html#resourceful-routing "routing of API") is being registered [here](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/__init__.py#L67 "here"). 6 | 7 | ### [Proxy package](https://github.com/amundsen-io/amundsenmetadatalibrary/tree/master/metadata_service/proxy "Proxy package") 8 | Proxy package contains proxy modules that talks dependencies of Metadata service. There are currently three modules in Proxy package, 9 | [Neo4j](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/proxy/neo4j_proxy.py "Neo4j"), 10 | [Statsd](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/proxy/statsd_utilities.py "Statsd") 11 | and [Atlas](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/proxy/atlas_proxy.py "Atlas") 12 | 13 | Selecting the appropriate proxy (Neo4j or Atlas) is configurable using a config variable `PROXY_CLIENT`, 14 | which takes the path to class name of proxy module available [here](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/config.py#L11). 15 | 16 | _Note: Proxy's host and port are configured using config variables `PROXY_HOST` and `PROXY_PORT` respectively. 17 | Both of these variables can be set using environment variables._ 18 | 19 | ##### [Neo4j proxy module](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/proxy/neo4j_proxy.py "Neo4j proxy module") 20 | [Neo4j](https://neo4j.com/docs/ "Neo4j") proxy module serves various use case of getting metadata or updating metadata from or into Neo4j. Most of the methods have [Cypher query](https://neo4j.com/developer/cypher/ "Cypher query") for the use case, execute the query and transform into [entity](https://github.com/amundsen-io/amundsenmetadatalibrary/tree/master/metadata_service/entity "entity"). 21 | 22 | ##### [Apache Atlas proxy module](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/proxy/atlas_proxy.py "Apache Atlas proxy module") 23 | [Apache Atlas](https://atlas.apache.org/ "Apache Atlas") proxy module serves all of the metadata from Apache Atlas, using [pyatlasclient](https://pyatlasclient.readthedocs.io/en/latest/index.html). 24 | More information on how to setup Apache Atlas to make it compatible with Amundsen can be found [here](proxy/atlas_proxy.md) 25 | 26 | ##### [Statsd utilities module](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/proxy/statsd_utilities.py "Statsd utilities module") 27 | [Statsd](https://github.com/etsy/statsd/wiki "Statsd") utilities module has methods / functions to support statsd to publish metrics. By default, statsd integration is disabled and you can turn in on from [Metadata service configuration](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/config.py "Metadata service configuration"). 28 | For specific configuration related to statsd, you can configure it through [environment variable.](https://statsd.readthedocs.io/en/latest/configure.html#from-the-environment "environment variable.") 29 | 30 | ### [Entity package](https://github.com/amundsen-io/amundsenmetadatalibrary/tree/master/metadata_service/entity "Entity package") 31 | Entity package contains many modules where each module has many Python classes in it. These Python classes are being used as a schema and a data holder. All data exchange within Amundsen Metadata service use classes in Entity to ensure validity of itself and improve readability and mainatability. 32 | 33 | 34 | ## [Configurations](configurations.md) 35 | There are different settings you might want to change depending on the application environment like toggling the debug mode, setting the proxy, and other such environment-specific things. 36 | -------------------------------------------------------------------------------- /metadata_service/config.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import distutils.util 5 | import os 6 | from typing import List, Dict, Optional, Set # noqa: F401 7 | from metadata_service.entity.badge import Badge 8 | from amundsen_gremlin.config import ( 9 | LocalGremlinConfig 10 | ) 11 | 12 | # PROXY configuration keys 13 | PROXY_HOST = 'PROXY_HOST' 14 | PROXY_PORT = 'PROXY_PORT' 15 | PROXY_USER = 'PROXY_USER' 16 | PROXY_PASSWORD = 'PROXY_PASSWORD' 17 | PROXY_ENCRYPTED = 'PROXY_ENCRYPTED' 18 | PROXY_VALIDATE_SSL = 'PROXY_VALIDATE_SSL' 19 | PROXY_CLIENT = 'PROXY_CLIENT' 20 | 21 | PROXY_CLIENTS = { 22 | 'NEO4J': 'metadata_service.proxy.neo4j_proxy.Neo4jProxy', 23 | 'ATLAS': 'metadata_service.proxy.atlas_proxy.AtlasProxy', 24 | 'NEPTUNE': 'metadata_service.proxy.neptune_proxy.NeptuneGremlinProxy' 25 | } 26 | 27 | IS_STATSD_ON = 'IS_STATSD_ON' 28 | USER_OTHER_KEYS = 'USER_OTHER_KEYS' 29 | 30 | 31 | class Config: 32 | LOG_FORMAT = '%(asctime)s.%(msecs)03d [%(levelname)s] %(module)s.%(funcName)s:%(lineno)d (%(process)d:' \ 33 | '%(threadName)s) - %(message)s' 34 | LOG_DATE_FORMAT = '%Y-%m-%dT%H:%M:%S%z' 35 | LOG_LEVEL = 'INFO' 36 | 37 | # Path to the logging configuration file to be used by `fileConfig()` method 38 | # https://docs.python.org/3.7/library/logging.config.html#logging.config.fileConfig 39 | # LOG_CONFIG_FILE = 'metadata_service/logging.conf' 40 | LOG_CONFIG_FILE = None 41 | 42 | PROXY_USER = os.environ.get('CREDENTIALS_PROXY_USER', 'neo4j') 43 | PROXY_PASSWORD = os.environ.get('CREDENTIALS_PROXY_PASSWORD', 'test') 44 | 45 | PROXY_ENCRYPTED = True 46 | """Whether the connection to the proxy should use SSL/TLS encryption.""" 47 | 48 | # Prior to enable PROXY_VALIDATE_SSL, you need to configure SSL. 49 | # https://neo4j.com/docs/operations-manual/current/security/ssl-framework/ 50 | PROXY_VALIDATE_SSL = False 51 | """Whether the SSL/TLS certificate presented by the user should be validated against the system's trusted CAs.""" 52 | 53 | IS_STATSD_ON = False 54 | 55 | # Used to differentiate tables with other entities in Atlas. For more details: 56 | # https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/docs/proxy/atlas_proxy.md 57 | ATLAS_TABLE_ENTITY = 'Table' 58 | 59 | # The relationalAttribute name of Atlas Entity that identifies the database entity. 60 | ATLAS_DB_ATTRIBUTE = 'db' 61 | 62 | # Configurable dictionary to influence format of column statistics displayed in UI 63 | STATISTICS_FORMAT_SPEC: Dict[str, Dict] = {} 64 | 65 | # whitelist badges 66 | WHITELIST_BADGES: List[Badge] = [] 67 | 68 | SWAGGER_ENABLED = os.environ.get('SWAGGER_ENABLED', False) 69 | 70 | USER_DETAIL_METHOD = None # type: Optional[function] 71 | 72 | RESOURCE_REPORT_CLIENT = None # type: Optional[function] 73 | 74 | # On User detail method, these keys will be added into amundsen_common.models.user.User.other_key_values 75 | USER_OTHER_KEYS = {'mode_user_id'} # type: Set[str] 76 | 77 | # Number of minimum reader count to qualify for popular table 78 | POPULAR_TABLE_MINIMUM_READER_COUNT = 10 # type: int 79 | 80 | # List of regexes which will exclude certain parameters from appearing as Programmatic Descriptions 81 | PROGRAMMATIC_DESCRIPTIONS_EXCLUDE_FILTERS = [] # type: list 82 | 83 | # List of accepted date formats for AtlasProxy Watermarks. With this we allow more than one datetime partition 84 | # format to be used in tables 85 | WATERMARK_DATE_FORMATS = ['%Y%m%d'] 86 | 87 | 88 | # NB: If you're using the gremlin proxy, the appropriate GremlinConfig must be added to any other configs 89 | class LocalConfig(LocalGremlinConfig, Config): 90 | DEBUG = True 91 | TESTING = False 92 | LOG_LEVEL = 'DEBUG' 93 | LOCAL_HOST = '0.0.0.0' 94 | 95 | PROXY_HOST = os.environ.get('PROXY_HOST', f'bolt://{LOCAL_HOST}') 96 | PROXY_PORT = os.environ.get('PROXY_PORT', 7687) 97 | PROXY_CLIENT = PROXY_CLIENTS[os.environ.get('PROXY_CLIENT', 'NEO4J')] 98 | PROXY_ENCRYPTED = bool(distutils.util.strtobool(os.environ.get(PROXY_ENCRYPTED, 'True'))) 99 | PROXY_VALIDATE_SSL = bool(distutils.util.strtobool(os.environ.get(PROXY_VALIDATE_SSL, 'False'))) 100 | 101 | JANUS_GRAPH_URL = None 102 | 103 | IS_STATSD_ON = bool(distutils.util.strtobool(os.environ.get(IS_STATSD_ON, 'False'))) 104 | 105 | SWAGGER_ENABLED = True 106 | SWAGGER_TEMPLATE_PATH = os.path.join('api', 'swagger_doc', 'template.yml') 107 | SWAGGER = { 108 | 'openapi': '3.0.2', 109 | 'title': 'Metadata Service', 110 | 'uiversion': 3 111 | } 112 | -------------------------------------------------------------------------------- /metadata_service/api/badge.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from flask_restful import Resource, fields, marshal 5 | 6 | from http import HTTPStatus 7 | from typing import Iterable, Union, Mapping, Tuple, Any 8 | 9 | from flasgger import swag_from 10 | from flask import current_app as app 11 | 12 | from metadata_service.entity.resource_type import ResourceType 13 | from metadata_service.entity.badge import Badge 14 | from metadata_service.exception import NotFoundException 15 | from metadata_service.proxy import get_proxy_client 16 | from metadata_service.proxy.base_proxy import BaseProxy 17 | 18 | badge_fields = { 19 | 'badge_name': fields.String, 20 | 'category': fields.String, 21 | } 22 | 23 | badges_fields = { 24 | 'badges': fields.List(fields.Nested(badge_fields)) 25 | } 26 | 27 | 28 | class BadgeAPI(Resource): 29 | def __init__(self) -> None: 30 | self.client = get_proxy_client() 31 | super(BadgeAPI, self).__init__() 32 | 33 | @swag_from('swagger_doc/badge/badge_get.yml') 34 | def get(self) -> Iterable[Union[Mapping, int, None]]: 35 | """ 36 | API to get all existing badges 37 | """ 38 | badges = self.client.get_badges() 39 | return marshal({'badges': badges}, badges_fields), HTTPStatus.OK 40 | 41 | 42 | class BadgeCommon: 43 | def __init__(self, client: BaseProxy) -> None: 44 | self.client = client 45 | 46 | def put(self, id: str, resource_type: ResourceType, 47 | badge_name: str, 48 | category: str = '') -> Tuple[Any, HTTPStatus]: 49 | 50 | if category == '': 51 | return \ 52 | {'message': f'The badge {badge_name} for resource id {id} is not added successfully because ' 53 | f'category `{category}` parameter is required ' 54 | 'for badges'}, \ 55 | HTTPStatus.NOT_FOUND 56 | 57 | # TODO check resource type is column when adding a badge of category column after 58 | # implementing column level badges 59 | whitelist_badges = app.config.get('WHITELIST_BADGES', []) 60 | incomimg_badge = Badge(badge_name=badge_name, 61 | category=category) 62 | # need to check whether the badge combination is part of the whitelist: 63 | 64 | in_whitelist = False 65 | for badge in whitelist_badges: 66 | if incomimg_badge.badge_name == badge.badge_name and incomimg_badge.category == badge.category: 67 | in_whitelist = True 68 | if not in_whitelist: 69 | return \ 70 | {'message': f'The badge {badge_name} with category {category} for resource ' 71 | f'id {id} and resource_type {resource_type.name} is not added successfully because ' 72 | 'this combination of values is not part of the whitelist'}, \ 73 | HTTPStatus.NOT_FOUND 74 | 75 | try: 76 | self.client.add_badge(id=id, 77 | badge_name=badge_name, 78 | category=category, 79 | resource_type=resource_type) 80 | return {'message': f'The badge {badge_name} with category {category} was ' 81 | f'added successfully to resurce with id {id}'}, HTTPStatus.OK 82 | except Exception as e: 83 | return {'message': f'The badge {badge_name} with category {category} ' 84 | f'for resource id {id} and resource_type {resource_type.name} failed to ' 85 | 'be added'}, \ 86 | HTTPStatus.NOT_FOUND 87 | 88 | def delete(self, id: str, badge_name: str, 89 | category: str, 90 | resource_type: ResourceType) -> Tuple[Any, HTTPStatus]: 91 | try: 92 | self.client.delete_badge(id=id, 93 | resource_type=resource_type, 94 | badge_name=badge_name, 95 | category=category) 96 | return \ 97 | {'message': f'The badge {badge_name} with category {category} for resource ' 98 | f'id {id} and resource_type {resource_type.name} was deleted successfully'}, \ 99 | HTTPStatus.OK 100 | except NotFoundException: 101 | return \ 102 | {'message': f'The badge {badge_name} with category {category} for resource ' 103 | f'id {id} and resource_type {resource_type.name} was not deleted successfully'}, \ 104 | HTTPStatus.NOT_FOUND 105 | -------------------------------------------------------------------------------- /metadata_service/proxy/base_proxy.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from abc import ABCMeta, abstractmethod 5 | from typing import Any, Dict, List, Union 6 | 7 | from amundsen_common.models.popular_table import PopularTable 8 | from amundsen_common.models.table import Table 9 | from amundsen_common.models.user import User 10 | from amundsen_common.models.dashboard import DashboardSummary 11 | 12 | from metadata_service.entity.dashboard_detail import DashboardDetail as DashboardDetailEntity 13 | from metadata_service.entity.description import Description 14 | from metadata_service.entity.resource_type import ResourceType 15 | from metadata_service.util import UserResourceRel 16 | 17 | 18 | class BaseProxy(metaclass=ABCMeta): 19 | """ 20 | Base Proxy, which behaves like an interface for all 21 | the proxy clients available in the amundsen metadata service 22 | """ 23 | 24 | @abstractmethod 25 | def get_user(self, *, id: str) -> Union[User, None]: 26 | pass 27 | 28 | @abstractmethod 29 | def get_users(self) -> List[User]: 30 | pass 31 | 32 | @abstractmethod 33 | def get_table(self, *, table_uri: str) -> Table: 34 | pass 35 | 36 | @abstractmethod 37 | def delete_owner(self, *, table_uri: str, owner: str) -> None: 38 | pass 39 | 40 | @abstractmethod 41 | def add_owner(self, *, table_uri: str, owner: str) -> None: 42 | pass 43 | 44 | @abstractmethod 45 | def get_table_description(self, *, 46 | table_uri: str) -> Union[str, None]: 47 | pass 48 | 49 | @abstractmethod 50 | def put_table_description(self, *, 51 | table_uri: str, 52 | description: str) -> None: 53 | pass 54 | 55 | @abstractmethod 56 | def add_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType) -> None: 57 | pass 58 | 59 | @abstractmethod 60 | def add_badge(self, *, id: str, badge_name: str, category: str = '', 61 | resource_type: ResourceType) -> None: 62 | pass 63 | 64 | @abstractmethod 65 | def delete_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType) -> None: 66 | pass 67 | 68 | @abstractmethod 69 | def delete_badge(self, *, id: str, badge_name: str, category: str, 70 | resource_type: ResourceType) -> None: 71 | pass 72 | 73 | @abstractmethod 74 | def put_column_description(self, *, 75 | table_uri: str, 76 | column_name: str, 77 | description: str) -> None: 78 | pass 79 | 80 | @abstractmethod 81 | def get_column_description(self, *, 82 | table_uri: str, 83 | column_name: str) -> Union[str, None]: 84 | pass 85 | 86 | @abstractmethod 87 | def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]: 88 | pass 89 | 90 | @abstractmethod 91 | def get_latest_updated_ts(self) -> int: 92 | pass 93 | 94 | @abstractmethod 95 | def get_tags(self) -> List: 96 | pass 97 | 98 | @abstractmethod 99 | def get_badges(self) -> List: 100 | pass 101 | 102 | @abstractmethod 103 | def get_dashboard_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) \ 104 | -> Dict[str, List[DashboardSummary]]: 105 | pass 106 | 107 | @abstractmethod 108 | def get_table_by_user_relation(self, *, user_email: str, 109 | relation_type: UserResourceRel) -> Dict[str, Any]: 110 | pass 111 | 112 | @abstractmethod 113 | def get_frequently_used_tables(self, *, user_email: str) -> Dict[str, Any]: 114 | pass 115 | 116 | @abstractmethod 117 | def add_resource_relation_by_user(self, *, 118 | id: str, 119 | user_id: str, 120 | relation_type: UserResourceRel, 121 | resource_type: ResourceType) -> None: 122 | pass 123 | 124 | @abstractmethod 125 | def delete_resource_relation_by_user(self, *, 126 | id: str, 127 | user_id: str, 128 | relation_type: UserResourceRel, 129 | resource_type: ResourceType) -> None: 130 | pass 131 | 132 | @abstractmethod 133 | def get_dashboard(self, 134 | dashboard_uri: str, 135 | ) -> DashboardDetailEntity: 136 | pass 137 | 138 | @abstractmethod 139 | def get_dashboard_description(self, *, 140 | id: str) -> Description: 141 | pass 142 | 143 | @abstractmethod 144 | def put_dashboard_description(self, *, 145 | id: str, 146 | description: str) -> None: 147 | pass 148 | 149 | @abstractmethod 150 | def get_resources_using_table(self, *, 151 | id: str, 152 | resource_type: ResourceType) -> Dict[str, List[DashboardSummary]]: 153 | pass 154 | -------------------------------------------------------------------------------- /metadata_service/api/tag.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from http import HTTPStatus 5 | from typing import Iterable, Union, Mapping, Tuple, Any 6 | 7 | from flasgger import swag_from 8 | from flask import current_app as app 9 | from flask_restful import Resource, fields, marshal 10 | 11 | from metadata_service.entity.resource_type import ResourceType 12 | from metadata_service.exception import NotFoundException 13 | from metadata_service.proxy import get_proxy_client 14 | from metadata_service.proxy.base_proxy import BaseProxy 15 | 16 | tag_fields = { 17 | 'tag_name': fields.String, 18 | 'tag_count': fields.Integer 19 | } 20 | 21 | tag_usage_fields = { 22 | 'tag_usages': fields.List(fields.Nested(tag_fields)) 23 | } 24 | 25 | 26 | BADGE_TYPE = 'badge' 27 | 28 | 29 | class TagAPI(Resource): 30 | def __init__(self) -> None: 31 | self.client = get_proxy_client() 32 | super(TagAPI, self).__init__() 33 | 34 | @swag_from('swagger_doc/tag/tag_get.yml') 35 | def get(self) -> Iterable[Union[Mapping, int, None]]: 36 | """ 37 | API to fetch all the existing tags with usage. 38 | """ 39 | tag_usages = self.client.get_tags() 40 | return marshal({'tag_usages': tag_usages}, tag_usage_fields), HTTPStatus.OK 41 | 42 | 43 | class TagCommon: 44 | def __init__(self, client: BaseProxy) -> None: 45 | self.client = client 46 | 47 | def put(self, id: str, resource_type: ResourceType, 48 | tag: str, tag_type: str = 'default') -> Tuple[Any, HTTPStatus]: 49 | """ 50 | Method to add a tag to existing resource. 51 | 52 | :param id: 53 | :param resource_type: 54 | :param tag: 55 | :param tag_type: 56 | :return: 57 | """ 58 | 59 | whitelist_badges = app.config.get('WHITELIST_BADGES', []) 60 | if tag_type == BADGE_TYPE: 61 | return \ 62 | {'message': 'Badges should be added using /badges/, tag_type=badge no longer valid'}, \ 63 | HTTPStatus.NOT_ACCEPTABLE 64 | 65 | else: 66 | for badge in whitelist_badges: 67 | if tag == badge.badge_name: 68 | return \ 69 | {'message': 'The tag {} for id {} with type {} and resource_type {} ' 70 | 'is not added successfully as tag ' 71 | 'for it is reserved for badge'.format(tag, 72 | id, 73 | tag_type, 74 | resource_type.name)}, \ 75 | HTTPStatus.CONFLICT 76 | 77 | try: 78 | self.client.add_tag(id=id, 79 | tag=tag, 80 | tag_type=tag_type, 81 | resource_type=resource_type) 82 | return {'message': 'The tag {} for id {} with type {} and resource_type {} ' 83 | 'is added successfully'.format(tag, 84 | id, 85 | tag_type, 86 | resource_type.name)}, HTTPStatus.OK 87 | except NotFoundException: 88 | return \ 89 | {'message': 'The tag {} for table_uri {} with type {} and resource_type {} ' 90 | 'is not added successfully'.format(tag, 91 | id, 92 | tag_type, 93 | resource_type.name)}, \ 94 | HTTPStatus.NOT_FOUND 95 | 96 | def delete(self, id: str, tag: str, 97 | resource_type: ResourceType, tag_type: str = 'default') -> Tuple[Any, HTTPStatus]: 98 | """ 99 | Method to remove a association between a given tag and a resource. 100 | 101 | :param id: 102 | :param resource_type: 103 | :param tag: 104 | :param tag_type: 105 | :return: 106 | """ 107 | 108 | try: 109 | self.client.delete_tag(id=id, 110 | tag=tag, 111 | tag_type=tag_type, 112 | resource_type=resource_type) 113 | return {'message': 'The tag {} for id {} with type {} and resource_type {} ' 114 | 'is deleted successfully'.format(tag, 115 | id, 116 | tag_type, 117 | resource_type.name)}, HTTPStatus.OK 118 | except NotFoundException: 119 | return \ 120 | {'message': 'The tag {} for id {} with type {} and resource_type {} ' 121 | 'is not deleted successfully'.format(tag, 122 | id, 123 | tag_type, 124 | resource_type.name)}, \ 125 | HTTPStatus.NOT_FOUND 126 | -------------------------------------------------------------------------------- /metadata_service/api/dashboard.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import json 5 | from http import HTTPStatus 6 | from typing import Iterable, Mapping, Optional, Union 7 | 8 | from flasgger import swag_from 9 | from flask import request 10 | from flask_restful import Resource, reqparse 11 | 12 | from metadata_service.api import BaseAPI 13 | from metadata_service.api.tag import TagCommon 14 | from metadata_service.api.badge import BadgeCommon 15 | from metadata_service.entity.dashboard_detail import DashboardSchema 16 | from metadata_service.entity.description import DescriptionSchema 17 | from metadata_service.entity.resource_type import ResourceType 18 | from metadata_service.exception import NotFoundException 19 | from metadata_service.proxy import get_proxy_client 20 | 21 | 22 | class DashboardDetailAPI(BaseAPI): 23 | """ 24 | Dashboard detail API 25 | """ 26 | 27 | def __init__(self) -> None: 28 | self.client = get_proxy_client() 29 | super().__init__(DashboardSchema, 'dashboard', self.client) 30 | 31 | @swag_from('swagger_doc/dashboard/detail_get.yml') 32 | def get(self, *, id: Optional[str] = None) -> Iterable[Union[Mapping, int, None]]: 33 | try: 34 | return super().get(id=id) 35 | except NotFoundException: 36 | return {'message': 'dashboard_id {} does not exist'.format(id)}, HTTPStatus.NOT_FOUND 37 | 38 | 39 | class DashboardDescriptionAPI(BaseAPI): 40 | """ 41 | DashboardDescriptionAPI supports PUT and GET operation to upsert table description 42 | """ 43 | 44 | def __init__(self) -> None: 45 | self.client = get_proxy_client() 46 | super().__init__(DescriptionSchema, 'dashboard_description', self.client) 47 | 48 | @swag_from('swagger_doc/common/description_get.yml') 49 | def get(self, *, id: Optional[str] = None) -> Iterable[Union[Mapping, int, None]]: 50 | """ 51 | Returns description 52 | """ 53 | try: 54 | return super().get(id=id) 55 | 56 | except NotFoundException: 57 | return {'message': 'Dashboard {} does not exist'.format(id)}, HTTPStatus.NOT_FOUND 58 | 59 | except Exception: 60 | return {'message': 'Internal server error!'}, HTTPStatus.INTERNAL_SERVER_ERROR 61 | 62 | @swag_from('swagger_doc/common/description_put.yml') 63 | def put(self, id: str) -> Iterable[Union[Mapping, int, None]]: 64 | """ 65 | Updates Dashboard description (passed as a request body) 66 | :param id: 67 | :return: 68 | """ 69 | try: 70 | description = json.loads(request.data).get('description') 71 | self.client.put_dashboard_description(id=id, description=description) 72 | return None, HTTPStatus.OK 73 | 74 | except NotFoundException: 75 | return {'message': 'id {} does not exist'.format(id)}, HTTPStatus.NOT_FOUND 76 | 77 | 78 | class DashboardBadgeAPI(Resource): 79 | """ 80 | DashboardBadgeAPI that supports PUT and DELETE operation to add or delete badges 81 | on Dashboard 82 | """ 83 | def __init__(self) -> None: 84 | self.client = get_proxy_client() 85 | self.parser = reqparse.RequestParser() 86 | self.parser.add_argument('category', type=str, required=True) 87 | super(DashboardBadgeAPI, self).__init__() 88 | 89 | self._badge_common = BadgeCommon(client=self.client) 90 | 91 | @swag_from('swagger_doc/badge/badge_put.yml') 92 | def put(self, id: str, badge: str) -> Iterable[Union[Mapping, int, None]]: 93 | args = self.parser.parse_args() 94 | 95 | category = args.get('category', '') 96 | 97 | return self._badge_common.put(id=id, 98 | resource_type=ResourceType.Dashboard, 99 | badge_name=badge, 100 | category=category) 101 | 102 | @swag_from('swagger_doc/badge/badge_delete.yml') 103 | def delete(self, id: str, badge: str) -> Iterable[Union[Mapping, int, None]]: 104 | args = self.parser.parse_args() 105 | category = args.get('category', '') 106 | 107 | return self._badge_common.delete(id=id, 108 | resource_type=ResourceType.Dashboard, 109 | badge_name=badge, 110 | category=category) 111 | 112 | 113 | class DashboardTagAPI(Resource): 114 | """ 115 | DashboardTagAPI that supports PUT and DELETE operation to add or delete tag 116 | on Dashboard 117 | """ 118 | 119 | def __init__(self) -> None: 120 | self.client = get_proxy_client() 121 | self.parser = reqparse.RequestParser() 122 | self.parser.add_argument('tag_type', type=str, required=False, default='default') 123 | super(DashboardTagAPI, self).__init__() 124 | 125 | self._tag_common = TagCommon(client=self.client) 126 | 127 | @swag_from('swagger_doc/tag/tag_put.yml') 128 | def put(self, id: str, tag: str) -> Iterable[Union[Mapping, int, None]]: 129 | """ 130 | API to add a tag to existing Dashboard. 131 | 132 | :param table_uri: 133 | :param tag: 134 | :return: 135 | """ 136 | args = self.parser.parse_args() 137 | tag_type = args.get('tag_type', 'default') 138 | 139 | return self._tag_common.put(id=id, 140 | resource_type=ResourceType.Dashboard, 141 | tag=tag, 142 | tag_type=tag_type) 143 | 144 | @swag_from('swagger_doc/tag/tag_delete.yml') 145 | def delete(self, id: str, tag: str) -> Iterable[Union[Mapping, int, None]]: 146 | """ 147 | API to remove a association between a given tag and a Dashboard. 148 | 149 | :param table_uri: 150 | :param tag: 151 | :return: 152 | """ 153 | args = self.parser.parse_args() 154 | tag_type = args.get('tag_type', 'default') 155 | 156 | return self._tag_common.delete(id=id, 157 | resource_type=ResourceType.Dashboard, 158 | tag=tag, 159 | tag_type=tag_type) 160 | -------------------------------------------------------------------------------- /metadata_service/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import ast 5 | import importlib 6 | import logging 7 | import logging.config 8 | import os 9 | import sys 10 | from typing import Dict, Any # noqa: F401 11 | from flask_cors import CORS 12 | 13 | from flasgger import Swagger 14 | from flask import Flask, Blueprint 15 | from flask_restful import Api 16 | 17 | from metadata_service.api.column import ColumnDescriptionAPI 18 | from metadata_service.api.dashboard import (DashboardDetailAPI, DashboardDescriptionAPI, 19 | DashboardTagAPI, DashboardBadgeAPI) 20 | from metadata_service.api.healthcheck import healthcheck 21 | from metadata_service.api.popular_tables import PopularTablesAPI 22 | from metadata_service.api.system import Neo4jDetailAPI 23 | from metadata_service.api.table \ 24 | import TableDetailAPI, TableOwnerAPI, TableTagAPI, TableBadgeAPI, TableDescriptionAPI, TableDashboardAPI 25 | from metadata_service.api.tag import TagAPI 26 | from metadata_service.api.badge import BadgeAPI 27 | from metadata_service.api.user import (UserDetailAPI, UserFollowAPI, 28 | UserFollowsAPI, UserOwnsAPI, 29 | UserOwnAPI, UserReadsAPI) 30 | 31 | # For customized flask use below arguments to override. 32 | FLASK_APP_MODULE_NAME = os.getenv('FLASK_APP_MODULE_NAME') 33 | FLASK_APP_CLASS_NAME = os.getenv('FLASK_APP_CLASS_NAME') 34 | FLASK_APP_KWARGS_DICT_STR = os.getenv('FLASK_APP_KWARGS_DICT') 35 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 36 | 37 | # Environment Variable to enable cors 38 | CORS_ENABLED = os.environ.get('CORS_ENABLED', False) 39 | 40 | 41 | def create_app(*, config_module_class: str) -> Flask: 42 | """ 43 | Creates app in function so that flask with flask extensions can be 44 | initialized with specific config. Here it defines the route of APIs 45 | so that it can be seen in one place where implementation is separated. 46 | 47 | Config is being fetched via module.class name where module.class name 48 | can be passed through environment variable. 49 | This is to make config fetched through runtime PYTHON_PATH so that 50 | Config class can be easily injected. 51 | More on: http://flask.pocoo.org/docs/1.0/config/ 52 | 53 | :param config_module_class: name of the config (TODO: Implement config.py) 54 | :return: Flask 55 | """ 56 | if FLASK_APP_MODULE_NAME and FLASK_APP_CLASS_NAME: 57 | print('Using requested Flask module {module_name} and class {class_name}' 58 | .format(module_name=FLASK_APP_MODULE_NAME, class_name=FLASK_APP_CLASS_NAME), file=sys.stderr) 59 | class_obj = getattr(importlib.import_module(FLASK_APP_MODULE_NAME), FLASK_APP_CLASS_NAME) 60 | 61 | flask_kwargs_dict = {} # type: Dict[str, Any] 62 | if FLASK_APP_KWARGS_DICT_STR: 63 | print('Using kwargs {kwargs} to instantiate Flask'.format(kwargs=FLASK_APP_KWARGS_DICT_STR), 64 | file=sys.stderr) 65 | flask_kwargs_dict = ast.literal_eval(FLASK_APP_KWARGS_DICT_STR) 66 | 67 | app = class_obj(__name__, **flask_kwargs_dict) 68 | 69 | else: 70 | app = Flask(__name__) 71 | 72 | if CORS_ENABLED: 73 | CORS(app) 74 | config_module_class = \ 75 | os.getenv('METADATA_SVC_CONFIG_MODULE_CLASS') or config_module_class 76 | app.config.from_object(config_module_class) 77 | 78 | if app.config.get('LOG_CONFIG_FILE'): 79 | logging.config.fileConfig(app.config.get('LOG_CONFIG_FILE'), disable_existing_loggers=False) 80 | else: 81 | logging.basicConfig(format=app.config.get('LOG_FORMAT'), datefmt=app.config.get('LOG_DATE_FORMAT')) 82 | logging.getLogger().setLevel(app.config.get('LOG_LEVEL')) 83 | logging.info('Created app with config name {}'.format(config_module_class)) 84 | logging.info('Using backend {}'.format(app.config.get('PROXY_CLIENT'))) 85 | 86 | api_bp = Blueprint('api', __name__) 87 | api_bp.add_url_rule('/healthcheck', 'healthcheck', healthcheck) 88 | 89 | api = Api(api_bp) 90 | 91 | api.add_resource(PopularTablesAPI, '/popular_tables/') 92 | api.add_resource(TableDetailAPI, '/table/') 93 | api.add_resource(TableDescriptionAPI, 94 | '/table//description') 95 | api.add_resource(TableTagAPI, 96 | '/table//tag/') 97 | api.add_resource(TableBadgeAPI, 98 | '/table//badge/') 99 | api.add_resource(TableOwnerAPI, 100 | '/table//owner/') 101 | api.add_resource(TableDashboardAPI, 102 | '/table//dashboard/') 103 | api.add_resource(ColumnDescriptionAPI, 104 | '/table//column//description') 105 | api.add_resource(Neo4jDetailAPI, 106 | '/latest_updated_ts') 107 | api.add_resource(TagAPI, 108 | '/tags/') 109 | api.add_resource(BadgeAPI, 110 | '/badges/') 111 | api.add_resource(UserDetailAPI, 112 | '/user', 113 | '/user/') 114 | api.add_resource(UserFollowsAPI, 115 | '/user//follow/') 116 | api.add_resource(UserFollowAPI, 117 | '/user//follow//') 118 | api.add_resource(UserOwnsAPI, 119 | '/user//own/') 120 | api.add_resource(UserOwnAPI, 121 | '/user//own//') 122 | api.add_resource(UserReadsAPI, 123 | '/user//read/') 124 | api.add_resource(DashboardDetailAPI, 125 | '/dashboard/') 126 | api.add_resource(DashboardDescriptionAPI, 127 | '/dashboard//description') 128 | api.add_resource(DashboardTagAPI, 129 | '/dashboard//tag/') 130 | api.add_resource(DashboardBadgeAPI, 131 | '/dashboard//badge/') 132 | app.register_blueprint(api_bp) 133 | 134 | if app.config.get('SWAGGER_ENABLED'): 135 | Swagger(app, template_file=os.path.join(ROOT_DIR, app.config.get('SWAGGER_TEMPLATE_PATH')), parse=True) 136 | return app 137 | -------------------------------------------------------------------------------- /tests/unit/api/test_user.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import unittest 5 | 6 | from http import HTTPStatus 7 | from unittest import mock 8 | from unittest.mock import MagicMock 9 | from metadata_service import create_app 10 | 11 | from metadata_service.api.user import (UserDetailAPI, UserFollowAPI, UserFollowsAPI, 12 | UserOwnsAPI, UserOwnAPI, UserReadsAPI) 13 | 14 | from metadata_service.util import UserResourceRel 15 | from metadata_service.entity.resource_type import ResourceType 16 | 17 | 18 | class UserDetailAPITest(unittest.TestCase): 19 | @mock.patch('metadata_service.api.user.get_proxy_client') 20 | def setUp(self, mock_get_proxy_client: MagicMock) -> None: 21 | self.app = create_app(config_module_class='metadata_service.config.LocalConfig') 22 | self.app_context = self.app.app_context() 23 | self.app_context.push() 24 | 25 | self.mock_client = mock.Mock() 26 | mock_get_proxy_client.return_value = self.mock_client 27 | self.api = UserDetailAPI() 28 | 29 | def test_get(self) -> None: 30 | self.mock_client.get_user.return_value = {} 31 | response = self.api.get(id='username') 32 | self.assertEqual(list(response)[1], HTTPStatus.OK) 33 | self.mock_client.get_user.assert_called_once_with(id='username') 34 | 35 | def test_gets(self) -> None: 36 | self.mock_client.get_users.return_value = [] 37 | response = self.api.get() 38 | self.assertEqual(list(response)[1], HTTPStatus.OK) 39 | self.mock_client.get_users.assert_called_once() 40 | 41 | 42 | class UserFollowsAPITest(unittest.TestCase): 43 | 44 | @mock.patch('metadata_service.api.user.get_proxy_client') 45 | def setUp(self, mock_get_proxy_client: MagicMock) -> None: 46 | self.mock_client = mock.Mock() 47 | mock_get_proxy_client.return_value = self.mock_client 48 | self.api = UserFollowsAPI() 49 | 50 | def test_get(self) -> None: 51 | self.mock_client.get_table_by_user_relation.return_value = {'table': []} 52 | self.mock_client.get_dashboard_by_user_relation.return_value = {'dashboard': []} 53 | 54 | response = self.api.get(user_id='username') 55 | self.assertEqual(list(response)[1], HTTPStatus.OK) 56 | self.mock_client.get_table_by_user_relation.assert_called_once() 57 | 58 | 59 | class UserFollowAPITest(unittest.TestCase): 60 | 61 | @mock.patch('metadata_service.api.user.get_proxy_client') 62 | def setUp(self, mock_get_proxy_client: MagicMock) -> None: 63 | self.mock_client = mock.Mock() 64 | mock_get_proxy_client.return_value = self.mock_client 65 | self.api = UserFollowAPI() 66 | 67 | def test_table_put(self) -> None: 68 | response = self.api.put(user_id='username', resource_type='table', resource_id='3') 69 | self.assertEqual(list(response)[1], HTTPStatus.OK) 70 | self.mock_client.add_resource_relation_by_user.assert_called_with(id='3', 71 | user_id='username', 72 | relation_type=UserResourceRel.follow, 73 | resource_type=ResourceType.Table) 74 | 75 | def test_dashboard_put(self) -> None: 76 | response = self.api.put(user_id='username', resource_type='dashboard', resource_id='3') 77 | self.assertEqual(list(response)[1], HTTPStatus.OK) 78 | self.mock_client.add_resource_relation_by_user.assert_called_with(id='3', 79 | user_id='username', 80 | relation_type=UserResourceRel.follow, 81 | resource_type=ResourceType.Dashboard) 82 | 83 | def test_table_delete(self) -> None: 84 | response = self.api.delete(user_id='username', resource_type='table', resource_id='3') 85 | self.assertEqual(list(response)[1], HTTPStatus.OK) 86 | self.mock_client.delete_resource_relation_by_user.assert_called_with(id='3', 87 | user_id='username', 88 | relation_type=UserResourceRel.follow, 89 | resource_type=ResourceType.Table) 90 | 91 | def test_dashboard_delete(self) -> None: 92 | response = self.api.delete(user_id='username', resource_type='dashboard', resource_id='3') 93 | self.assertEqual(list(response)[1], HTTPStatus.OK) 94 | self.mock_client.delete_resource_relation_by_user.assert_called_with(id='3', 95 | user_id='username', 96 | relation_type=UserResourceRel.follow, 97 | resource_type=ResourceType.Dashboard) 98 | 99 | 100 | class UserOwnsAPITest(unittest.TestCase): 101 | 102 | @mock.patch('metadata_service.api.user.get_proxy_client') 103 | def setUp(self, mock_get_proxy_client: MagicMock) -> None: 104 | self.mock_client = mock.Mock() 105 | mock_get_proxy_client.return_value = self.mock_client 106 | self.api = UserOwnsAPI() 107 | 108 | def test_get(self) -> None: 109 | self.mock_client.get_table_by_user_relation.return_value = {'table': []} 110 | self.mock_client.get_dashboard_by_user_relation.return_value = {'dashboard': []} 111 | response = self.api.get(user_id='username') 112 | self.assertEqual(list(response)[1], HTTPStatus.OK) 113 | self.mock_client.get_table_by_user_relation.assert_called_once() 114 | self.mock_client.get_dashboard_by_user_relation.assert_called_once() 115 | 116 | 117 | class UserOwnAPITest(unittest.TestCase): 118 | 119 | @mock.patch('metadata_service.api.user.get_proxy_client') 120 | def setUp(self, mock_get_proxy_client: MagicMock) -> None: 121 | self.mock_client = mock.Mock() 122 | mock_get_proxy_client.return_value = self.mock_client 123 | self.api = UserOwnAPI() 124 | 125 | def test_put(self) -> None: 126 | response = self.api.put(user_id='username', resource_type='2', table_uri='3') 127 | self.assertEqual(list(response)[1], HTTPStatus.OK) 128 | self.mock_client.add_owner.assert_called_once() 129 | 130 | def test_delete(self) -> None: 131 | response = self.api.delete(user_id='username', resource_type='2', table_uri='3') 132 | self.assertEqual(list(response)[1], HTTPStatus.OK) 133 | self.mock_client.delete_owner.assert_called_once() 134 | 135 | 136 | class UserReadsAPITest(unittest.TestCase): 137 | @mock.patch('metadata_service.api.user.get_proxy_client') 138 | def test_get(self, mock_get_proxy_client: MagicMock) -> None: 139 | mock_client = mock.Mock() 140 | mock_get_proxy_client.return_value = mock_client 141 | mock_client.get_frequently_used_tables.return_value = {'table': []} 142 | api = UserReadsAPI() 143 | response = api.get(user_id='username') 144 | self.assertEqual(list(response)[1], HTTPStatus.OK) 145 | mock_client.get_frequently_used_tables.assert_called_once() 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amundsen Metadata Service 2 | [![PyPI version](https://badge.fury.io/py/amundsen-metadata.svg)](https://badge.fury.io/py/amundsen-metadata) 3 | [![Build Status](https://api.travis-ci.com/lyft/amundsenmetadatalibrary.svg?branch=master)](https://travis-ci.com/lyft/amundsenmetadatalibrary) 4 | [![Coverage Status](https://img.shields.io/codecov/c/github/lyft/amundsenmetadatalibrary/master.svg)](https://codecov.io/github/lyft/amundsenmetadatalibrary?branch=master) 5 | [![License](https://img.shields.io/:license-Apache%202-blue.svg)](LICENSE) 6 | [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](#developer-guide) 7 | [![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://amundsenworkspace.slack.com/join/shared_invite/enQtNTk2ODQ1NDU1NDI0LTc3MzQyZmM0ZGFjNzg5MzY1MzJlZTg4YjQ4YTU0ZmMxYWU2MmVlMzhhY2MzMTc1MDg0MzRjNTA4MzRkMGE0Nzk) 8 | 9 | Amundsen Metadata service serves Restful API and is responsible for providing and also updating metadata, such as table & column description, and tags. Metadata service can use Neo4j or Apache Atlas as a persistent layer. 10 | 11 | For information about Amundsen and our other services, visit the [main repository](https://github.com/amundsen-io/amundsen#amundsen) `README.md`. Please also see our instructions for a [quick start](https://github.com/amundsen-io/amundsen/blob/master/docs/installation.md#bootstrap-a-default-version-of-amundsen-using-docker) setup of Amundsen with dummy data, and an [overview of the architecture](https://github.com/amundsen-io/amundsen/blob/master/docs/architecture.md#architecture). 12 | 13 | ## Requirements 14 | - Python >= 3.7 15 | 16 | ## Doc 17 | - https://www.amundsen.io/amundsen/ 18 | 19 | ## Instructions to start the Metadata service from distribution 20 | ```bash 21 | $ venv_path=[path_for_virtual_environment] 22 | $ python3 -m venv $venv_path 23 | $ source $venv_path/bin/activate 24 | $ pip3 install amundsen-metadata 25 | $ python3 metadata_service/metadata_wsgi.py 26 | 27 | -- In a different terminal, verify getting HTTP/1.0 200 OK 28 | $ curl -v http://localhost:5002/healthcheck 29 | ``` 30 | 31 | ## Instructions to start the Metadata service from the source 32 | ```bash 33 | $ git clone https://github.com/amundsen-io/amundsenmetadatalibrary.git 34 | $ cd amundsenmetadatalibrary 35 | $ python3 -m venv venv 36 | $ source venv/bin/activate 37 | $ pip3 install -r requirements.txt 38 | $ python3 setup.py install 39 | $ python3 metadata_service/metadata_wsgi.py 40 | 41 | -- In a different terminal, verify getting HTTP/1.0 200 OK 42 | $ curl -v http://localhost:5002/healthcheck 43 | ``` 44 | 45 | ## Instructions to start the service from Docker 46 | 47 | ```bash 48 | $ docker pull amundsendev/amundsen-metadata:latest 49 | $ docker run -p 5002:5002 amundsendev/amundsen-metadata 50 | # - alternative, for production environment with Gunicorn (see its homepage link below) 51 | $ ## docker run -p 5002:5002 amundsendev/amundsen-metadata gunicorn --bind 0.0.0.0:5002 metadata_service.metadata_wsgi 52 | 53 | -- In a different terminal, verify getting HTTP/1.0 200 OK 54 | $ curl -v http://localhost:5002/healthcheck 55 | ``` 56 | 57 | 58 | ## Production environment 59 | By default, Flask comes with Werkzeug webserver, which is for development. For production environment use production grade web server such as [Gunicorn](https://gunicorn.org/ "Gunicorn"). 60 | 61 | ```bash 62 | $ pip install gunicorn 63 | $ gunicorn metadata_service.metadata_wsgi 64 | ``` 65 | Here is [documentation](https://docs.gunicorn.org/en/latest/run.html "documentation") of gunicorn configuration. 66 | 67 | ### Configuration outside local environment 68 | By default, Metadata service uses [LocalConfig](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/config.py "LocalConfig") that looks for Neo4j running in localhost. 69 | In order to use different end point, you need to create [Config](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/config.py "Config") suitable for your use case. Once config class has been created, it can be referenced by [environment variable](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/metadata_wsgi.py "environment variable"): `METADATA_SVC_CONFIG_MODULE_CLASS` 70 | 71 | For example, in order to have different config for production, you can inherit Config class, create Production config and passing production config class into environment variable. Let's say class name is ProdConfig and it's in metadata_service.config module. then you can set as below: 72 | 73 | `METADATA_SVC_CONFIG_MODULE_CLASS=metadata_service.config.ProdConfig` 74 | 75 | This way Metadata service will use production config in production environment. For more information on how the configuration is being loaded and used, here's reference from Flask [doc](http://flask.pocoo.org/docs/1.0/config/#development-production "doc"). 76 | 77 | # Apache Atlas 78 | Amundsen Metadata service can use Apache Atlas as a backend. Some of the benefits of using Apache Atlas instead of Neo4j is that Apache Atlas offers plugins to several services (e.g. Apache Hive, Apache Spark) that allow for push based updates. It also allows to set policies on what metadata is accesible and editable by means of Apache Ranger. 79 | 80 | If you would like to use Apache Atlas as a backend for Metadata service you will need to create a [Config](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/master/metadata_service/config.py "Config") as mentioned above. Make sure to include the following: 81 | 82 | ```python 83 | PROXY_CLIENT = PROXY_CLIENTS['ATLAS'] # or env PROXY_CLIENT='ATLAS' 84 | PROXY_PORT = 21000 # or env PROXY_PORT 85 | PROXY_USER = 'atlasuser' # or env CREDENTIALS_PROXY_USER 86 | PROXY_PASSWORD = 'password' # or env CREDENTIALS_PROXY_PASSWORD 87 | ``` 88 | 89 | To start the service with Atlas from Docker. Make sure you have `atlasserver` configured in DNS (or docker-compose) 90 | 91 | ```bash 92 | $ docker run -p 5002:5002 --env PROXY_CLIENT=ATLAS --env PROXY_PORT=21000 --env PROXY_HOST=atlasserver --env CREDENTIALS_PROXY_USER=atlasuser --env CREDENTIALS_PROXY_PASSWORD=password amundsen-metadata:latest 93 | ``` 94 | 95 | --- 96 | **NOTE** 97 | 98 | The support for Apache Atlas is work in progress. For example, while Apache Atlas supports fine grained access, Amundsen does not support this yet. 99 | 100 | # Developer guide 101 | ## Code style 102 | - PEP 8: Amundsen Metadata service follows [PEP8 - Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/ "PEP8 - Style Guide for Python Code"). 103 | - Typing hints: Amundsen Metadata service also utilizes [Typing hint](https://docs.python.org/3/library/typing.html "Typing hint") for better readability. 104 | 105 | ## API documentation 106 | 107 | We have Swagger documentation setup with OpenApi 3.0.2. This documentation is generated via Flasgger. When adding or updating an API please make sure to update the documentation. To see the documentation run the application locally and go to localhost:5002/apidocs/. Currently the documentation only works with local configuration. 108 | 109 | ## Code structure 110 | Please visit [Code Structure](docs/structure.md) to read how different modules are structured in Amundsen Metadata service. 111 | 112 | ## Roundtrip tests 113 | Roundtrip tests are a new feature - by implementing the abstract_proxy_tests and some test setup endpoints in the base_proxy, you can validate your proxy code against the actual data store. These tests do not run by default, but can be run by passing the `--roundtrip-[proxy]` argument. Note this requires 114 | a fully-configured backend to test against. 115 | ```bash 116 | $ python -m pytest --roundtrip-neptune . 117 | ``` -------------------------------------------------------------------------------- /metadata_service/api/table.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import json 5 | from http import HTTPStatus 6 | from typing import Any, Iterable, Mapping, Union, Optional 7 | 8 | from amundsen_common.models.table import TableSchema 9 | from flasgger import swag_from 10 | from flask import request 11 | from flask_restful import Resource, reqparse 12 | 13 | from metadata_service.api import BaseAPI 14 | from metadata_service.api.tag import TagCommon 15 | from metadata_service.api.badge import BadgeCommon 16 | from metadata_service.entity.resource_type import ResourceType 17 | from metadata_service.entity.dashboard_summary import DashboardSummarySchema 18 | from metadata_service.exception import NotFoundException 19 | from metadata_service.proxy import get_proxy_client 20 | 21 | 22 | class TableDetailAPI(Resource): 23 | """ 24 | TableDetail API 25 | """ 26 | 27 | def __init__(self) -> None: 28 | self.client = get_proxy_client() 29 | 30 | @swag_from('swagger_doc/table/detail_get.yml') 31 | def get(self, table_uri: str) -> Iterable[Union[Mapping, int, None]]: 32 | try: 33 | table = self.client.get_table(table_uri=table_uri) 34 | schema = TableSchema(strict=True) 35 | return schema.dump(table).data, HTTPStatus.OK 36 | 37 | except NotFoundException: 38 | return {'message': 'table_uri {} does not exist'.format(table_uri)}, HTTPStatus.NOT_FOUND 39 | 40 | 41 | class TableOwnerAPI(Resource): 42 | """ 43 | TableOwner API to add / delete owner info 44 | """ 45 | 46 | def __init__(self) -> None: 47 | self.client = get_proxy_client() 48 | 49 | @swag_from('swagger_doc/table/owner_put.yml') 50 | def put(self, table_uri: str, owner: str) -> Iterable[Union[Mapping, int, None]]: 51 | try: 52 | self.client.add_owner(table_uri=table_uri, owner=owner) 53 | return {'message': 'The owner {} for table_uri {} ' 54 | 'is added successfully'.format(owner, 55 | table_uri)}, HTTPStatus.OK 56 | except Exception: 57 | return {'message': 'The owner {} for table_uri {} ' 58 | 'is not added successfully'.format(owner, 59 | table_uri)}, HTTPStatus.INTERNAL_SERVER_ERROR 60 | 61 | @swag_from('swagger_doc/table/owner_delete.yml') 62 | def delete(self, table_uri: str, owner: str) -> Iterable[Union[Mapping, int, None]]: 63 | try: 64 | self.client.delete_owner(table_uri=table_uri, owner=owner) 65 | return {'message': 'The owner {} for table_uri {} ' 66 | 'is deleted successfully'.format(owner, 67 | table_uri)}, HTTPStatus.OK 68 | except Exception: 69 | return {'message': 'The owner {} for table_uri {} ' 70 | 'is not deleted successfully'.format(owner, 71 | table_uri)}, HTTPStatus.INTERNAL_SERVER_ERROR 72 | 73 | 74 | class TableDescriptionAPI(Resource): 75 | """ 76 | TableDescriptionAPI supports PUT and GET operation to upsert table description 77 | """ 78 | 79 | def __init__(self) -> None: 80 | self.client = get_proxy_client() 81 | super(TableDescriptionAPI, self).__init__() 82 | 83 | @swag_from('swagger_doc/common/description_get.yml') 84 | def get(self, id: str) -> Iterable[Any]: 85 | """ 86 | Returns description in Neo4j endpoint 87 | """ 88 | try: 89 | description = self.client.get_table_description(table_uri=id) 90 | return {'description': description}, HTTPStatus.OK 91 | 92 | except NotFoundException: 93 | return {'message': 'table_uri {} does not exist'.format(id)}, HTTPStatus.NOT_FOUND 94 | 95 | except Exception: 96 | return {'message': 'Internal server error!'}, HTTPStatus.INTERNAL_SERVER_ERROR 97 | 98 | @swag_from('swagger_doc/common/description_put.yml') 99 | def put(self, id: str) -> Iterable[Any]: 100 | """ 101 | Updates table description (passed as a request body) 102 | :param table_uri: 103 | :return: 104 | """ 105 | try: 106 | description = json.loads(request.data).get('description') 107 | self.client.put_table_description(table_uri=id, description=description) 108 | return None, HTTPStatus.OK 109 | 110 | except NotFoundException: 111 | return {'message': 'table_uri {} does not exist'.format(id)}, HTTPStatus.NOT_FOUND 112 | 113 | 114 | class TableTagAPI(Resource): 115 | """ 116 | TableTagAPI that supports GET, PUT and DELETE operation to add or delete tag 117 | on table 118 | """ 119 | 120 | def __init__(self) -> None: 121 | self.client = get_proxy_client() 122 | self.parser = reqparse.RequestParser() 123 | self.parser.add_argument('tag_type', type=str, required=False, default='default') 124 | super(TableTagAPI, self).__init__() 125 | 126 | self._tag_common = TagCommon(client=self.client) 127 | 128 | @swag_from('swagger_doc/tag/tag_put.yml') 129 | def put(self, id: str, tag: str) -> Iterable[Union[Mapping, int, None]]: 130 | """ 131 | API to add a tag to existing table uri. 132 | 133 | :param table_uri: 134 | :param tag: 135 | :return: 136 | """ 137 | args = self.parser.parse_args() 138 | # use tag_type to distinguish between tag and badge 139 | tag_type = args.get('tag_type', 'default') 140 | 141 | return self._tag_common.put(id=id, 142 | resource_type=ResourceType.Table, 143 | tag=tag, 144 | tag_type=tag_type) 145 | 146 | @swag_from('swagger_doc/tag/tag_delete.yml') 147 | def delete(self, id: str, tag: str) -> Iterable[Union[Mapping, int, None]]: 148 | """ 149 | API to remove a association between a given tag and a table. 150 | 151 | :param table_uri: 152 | :param tag: 153 | :return: 154 | """ 155 | args = self.parser.parse_args() 156 | tag_type = args.get('tag_type', 'default') 157 | 158 | return self._tag_common.delete(id=id, 159 | resource_type=ResourceType.Table, 160 | tag=tag, 161 | tag_type=tag_type) 162 | 163 | 164 | class TableBadgeAPI(Resource): 165 | def __init__(self) -> None: 166 | self.client = get_proxy_client() 167 | self.parser = reqparse.RequestParser() 168 | self.parser.add_argument('category', type=str, required=True) 169 | super(TableBadgeAPI, self).__init__() 170 | 171 | self._badge_common = BadgeCommon(client=self.client) 172 | 173 | @swag_from('swagger_doc/badge/badge_put.yml') 174 | def put(self, id: str, badge: str) -> Iterable[Union[Mapping, int, None]]: 175 | args = self.parser.parse_args() 176 | category = args.get('category', '') 177 | 178 | return self._badge_common.put(id=id, 179 | resource_type=ResourceType.Table, 180 | badge_name=badge, 181 | category=category) 182 | 183 | @swag_from('swagger_doc/badge/badge_delete.yml') 184 | def delete(self, id: str, badge: str) -> Iterable[Union[Mapping, int, None]]: 185 | args = self.parser.parse_args() 186 | category = args.get('category', '') 187 | 188 | return self._badge_common.delete(id=id, 189 | resource_type=ResourceType.Table, 190 | badge_name=badge, 191 | category=category) 192 | 193 | 194 | class TableDashboardAPI(BaseAPI): 195 | """ 196 | TableDashboard API that supports GET operation providing list of Dashboards using a table. 197 | """ 198 | 199 | def __init__(self) -> None: 200 | self.client = get_proxy_client() 201 | super().__init__(DashboardSummarySchema, 'resources_using_table', self.client) 202 | 203 | @swag_from('swagger_doc/table/dashboards_using_table_get.yml') 204 | def get(self, *, id: Optional[str] = None) -> Iterable[Union[Mapping, int, None]]: 205 | """ 206 | Supports GET operation providing list of Dashboards using a table. 207 | :param id: Table URI 208 | :return: See Swagger doc for the schema. swagger_doc/table/dashboards_using_table_get.yml 209 | """ 210 | try: 211 | return super().get_with_kwargs(id=id, resource_type=ResourceType.Dashboard) 212 | except NotFoundException: 213 | return {'message': 'table_id {} does not exist'.format(id)}, HTTPStatus.NOT_FOUND 214 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "{}" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2018 Lyft, Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /tests/unit/proxy/roundtrip/abstract_proxy_tests.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import logging 5 | import time 6 | import unittest 7 | from abc import ABC, abstractmethod 8 | from typing import ( 9 | Any, Callable, Dict, Generic, List, Type, TypeVar, 10 | no_type_check 11 | ) 12 | 13 | from amundsen_common.tests.fixtures import Fixtures 14 | from amundsen_common.models.table import ( 15 | ProgrammaticDescription, Table 16 | ) 17 | 18 | from amundsen_common.models.popular_table import PopularTable 19 | from metadata_service.entity.tag_detail import TagDetail 20 | from metadata_service.proxy.shared import checkNotNone 21 | from metadata_service.util import UserResourceRel 22 | from metadata_service.entity.resource_type import ResourceType 23 | from .roundtrip_base_proxy import RoundtripBaseProxy 24 | 25 | __all__ = ['abstract_proxy_test_class'] 26 | 27 | T = TypeVar('T', bound=RoundtripBaseProxy) 28 | 29 | LOGGER = logging.getLogger(__name__) 30 | 31 | 32 | class AbstractProxyTest(ABC, Generic[T], unittest.TestCase): 33 | """ 34 | Proxy integration testing 35 | 36 | use abstract_proxy_test_class() to get the class, e.g. 37 | 38 | class YourProxyTest(abstract_proxy_test_class(), unittest.TestCase): 39 | def get_proxy(self) -> YourProxy: 40 | return self.your_proxy 41 | ... 42 | """ 43 | 44 | @abstractmethod 45 | def setUp(self) -> None: 46 | """ 47 | this is for implementing classes (if they need it) 48 | """ 49 | pass 50 | 51 | @abstractmethod 52 | def tearDown(self) -> None: 53 | """ 54 | this is for implementing classes (if they need it) 55 | """ 56 | pass 57 | 58 | @abstractmethod 59 | def get_proxy(self) -> T: 60 | pass 61 | 62 | @abstractmethod 63 | def get_relationship(self, *, node_type1: str, node_key1: str, node_type2: str, 64 | node_key2: str) -> List[Any]: 65 | pass 66 | 67 | def test_rt_table(self) -> None: 68 | """ 69 | it'd be nice to check that the result could be deserialized as a client of the metadata_service would 70 | """ 71 | expected = Fixtures.next_table() 72 | expected.description = '"hello!" said no one' 73 | expected.tags.sort() 74 | 75 | self.get_proxy().put_table(table=expected) 76 | actual: Table = self.get_proxy().get_table(table_uri=checkNotNone(expected.key)) 77 | actual.last_updated_timestamp = None 78 | actual.tags.sort() 79 | 80 | self.assertEqual(expected, actual) 81 | 82 | def test_rt_table_with_owner(self) -> None: 83 | user = Fixtures.next_user(is_active=True) 84 | self.get_proxy().put_user(data=user) 85 | application = Fixtures.next_application(application_id=user.user_id) 86 | expected = Fixtures.next_table(application=application) 87 | self.get_proxy().put_table(table=expected) 88 | 89 | actual: Table = self.get_proxy().get_table(table_uri=checkNotNone(expected.key)) 90 | 91 | self.assertEqual(user.user_id, actual.owners[0].user_id) 92 | 93 | def test_rt_table_with_non_existent_app(self) -> None: 94 | application = Fixtures.next_application() 95 | # purposefully don't insert application 96 | expected_table = Fixtures.next_table(application=application) 97 | 98 | self.get_proxy().put_table(table=expected_table) 99 | actual_table: Table = self.get_proxy().get_table(table_uri=checkNotNone(expected_table.key)) 100 | 101 | self.assertEqual(actual_table.table_writer, None) 102 | self.assertEqual(actual_table.owners, []) 103 | 104 | def test_get_popular_tables(self) -> None: 105 | application = Fixtures.next_application() 106 | self.get_proxy().put_app(data=application) 107 | # Add 10 tables 108 | tables: List[Table] = [Fixtures.next_table(application=application) for _ in range(10)] 109 | self.get_proxy().post_tables(tables=tables) 110 | 111 | user = Fixtures.next_user() 112 | self.get_proxy().put_user(data=user) 113 | 114 | # add reads to 6 of them, expecting that only the top five will be "popular" 115 | expected_popular_tables = [] 116 | reads = 0 117 | for i in range(6): 118 | table_name: str = checkNotNone(tables[i].name) 119 | table_uri: str = checkNotNone(tables[i].key) 120 | self.get_proxy().add_read_count(table_uri=table_uri, user_id=f'{user.user_id}', read_count=reads) 121 | if reads > 0: 122 | expected_popular_tables.append(table_name) 123 | reads += 1000 124 | 125 | # ensure popular tables returns those 5 we added 126 | actual_popular_tables = self.get_proxy().get_popular_tables(num_entries=5) 127 | self.assertEqual(len(actual_popular_tables), 5) 128 | 129 | popular_tables = [] 130 | for table in tables: 131 | if table.name in expected_popular_tables: 132 | popular_tables.append( 133 | PopularTable(database=table.database, 134 | cluster=table.cluster, 135 | schema=table.schema, 136 | name=table.name, 137 | description=table.description)) 138 | self.assertEqual(sorted(actual_popular_tables), sorted(popular_tables)) 139 | 140 | def test_put_programmatic_table_description(self) -> None: 141 | table: Table = Fixtures.next_table() 142 | table.programmatic_descriptions = [] 143 | self.get_proxy().put_table(table=table) 144 | expected_description: ProgrammaticDescription = Fixtures.next_description() 145 | self.get_proxy().put_programmatic_table_description(table_uri=checkNotNone(table.key), 146 | description=expected_description) 147 | actual_table = self.get_proxy().get_table(table_uri=checkNotNone(table.key)) 148 | self.assertEqual([expected_description], actual_table.programmatic_descriptions) 149 | 150 | # confirm that this runs without failing 151 | self.get_proxy().put_programmatic_table_description(table_uri=checkNotNone(Fixtures.next_table().key), 152 | description=Fixtures.next_description()) 153 | 154 | def test_add_delete_user_relation(self) -> None: 155 | table = Fixtures.next_table() 156 | self.get_proxy().put_table(table=table) 157 | user = Fixtures.next_user() 158 | self.get_proxy().put_user(data=user) 159 | 160 | self.get_proxy().add_resource_relation_by_user(id=f'{table.key}', user_id=f'{user.user_id}', 161 | relation_type=UserResourceRel.read, 162 | resource_type=ResourceType.Table) 163 | 164 | res: Dict[str, List[Table]] = self.get_proxy().get_table_by_user_relation(user_email=f'{user.user_id}', 165 | relation_type=UserResourceRel.read) 166 | self.assertEqual(1, len(res['table'])) 167 | relations = self.get_relationship(node_type1='User', 168 | node_key1=f'{user.user_id}', 169 | node_type2='Table', 170 | node_key2=checkNotNone(table.key)) 171 | self.assertEqual(1, len(relations)) 172 | 173 | # Now delete the relation 174 | self.get_proxy().delete_resource_relation_by_user(id=f'{table.key}', user_id=f'{user.user_id}', 175 | relation_type=UserResourceRel.read, 176 | resource_type=ResourceType.Table) 177 | res2: Dict[str, List[Table]] = self.get_proxy().get_table_by_user_relation(user_email=f'{user.user_id}', 178 | relation_type=UserResourceRel.read) 179 | self.assertEqual(0, len(res2['table'])) 180 | 181 | def test_owner_rt(self) -> None: 182 | application = Fixtures.next_application() 183 | self.get_proxy().put_app(data=application) 184 | table = Fixtures.next_table(application=application) 185 | self.get_proxy().put_table(table=table) 186 | user = Fixtures.next_user() 187 | self.get_proxy().put_user(data=user) 188 | user_id: str = user.user_id or 'test' 189 | self.get_proxy().add_owner(table_uri=checkNotNone(table.key), owner=user_id) 190 | table = self.get_proxy().get_table(table_uri=checkNotNone(table.key)) 191 | self.assertEqual([user_id], [u.user_id for u in table.owners]) 192 | self.get_proxy().delete_owner(table_uri=checkNotNone(table.key), owner=user_id) 193 | no_owner_table: Table = self.get_proxy().get_table(table_uri=checkNotNone(table.key)) 194 | self.assertEqual([], no_owner_table.owners) 195 | relations = self.get_relationship(node_type1='User', 196 | node_key1=user_id, 197 | node_type2='Table', 198 | node_key2=checkNotNone(table.key)) 199 | self.assertEqual(0, len(relations)) 200 | 201 | def test_tag_rt(self) -> None: 202 | table = Fixtures.next_table() 203 | self.get_proxy().put_table(table=table) 204 | test_tag_detail = TagDetail(tag_name='a', tag_count=1) 205 | self.get_proxy().add_tag(id=checkNotNone(table.key), tag=test_tag_detail.tag_name, 206 | tag_type='default', resource_type=ResourceType.Table) 207 | tags_added = self.get_proxy().get_tags() 208 | self.assertIn(test_tag_detail, tags_added) 209 | self.get_proxy().delete_tag(id=checkNotNone(table.key), tag=test_tag_detail.tag_name, 210 | tag_type='default', resource_type=ResourceType.Table) 211 | tags_removed = self.get_proxy().get_tags() 212 | self.assertNotIn(test_tag_detail, tags_removed) 213 | relations = self.get_relationship(node_type1='Table', 214 | node_key1=checkNotNone(table.key), 215 | node_type2='Tag', 216 | node_key2=test_tag_detail.tag_name) 217 | self.assertEqual(0, len(relations)) 218 | 219 | def test_get_latest_updated_ts(self) -> None: 220 | application = Fixtures.next_application() 221 | self.get_proxy().put_app(data=application) 222 | table = Fixtures.next_table(application=application) 223 | table_uri: str = checkNotNone(table.key) 224 | self.get_proxy().put_table(table=table) 225 | res = self.get_proxy().get_latest_updated_ts() 226 | self.assertEqual(type(res), int) 227 | actual: Table = self.get_proxy().get_table(table_uri=table_uri) 228 | self.assertEqual(actual.last_updated_timestamp, res) 229 | 230 | # try posting the same table again and make sure the timestamp updates 231 | time.sleep(1) 232 | self.get_proxy().put_table(table=table) 233 | res2 = self.get_proxy().get_latest_updated_ts() 234 | self.assertNotEqual(res, res2) 235 | actual = self.get_proxy().get_table(table_uri=table_uri) 236 | self.assertEqual(actual.last_updated_timestamp, res2) 237 | 238 | 239 | @no_type_check 240 | def class_getter_closure() -> Callable[[], Type[AbstractProxyTest]]: # noqa: F821 241 | the_class: Type[AbstractProxyTest[Any]] = AbstractProxyTest # noqa: F821 242 | 243 | def abstract_proxy_test_class() -> Type[AbstractProxyTest]: # noqa: F821 244 | return the_class 245 | return abstract_proxy_test_class 246 | 247 | 248 | abstract_proxy_test_class: Callable[[], Type[AbstractProxyTest]] = class_getter_closure() 249 | del AbstractProxyTest 250 | del class_getter_closure 251 | -------------------------------------------------------------------------------- /tests/unit/proxy/fixtures/atlas_test_data.py: -------------------------------------------------------------------------------- 1 | # Copyright Contributors to the Amundsen project. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import copy 5 | from typing import List, Dict 6 | 7 | 8 | class DottedDict(dict): 9 | """dot.notation access to dictionary attributes""" 10 | __getattr__ = dict.get 11 | __setattr__ = dict.__setitem__ 12 | __delattr__ = dict.__delitem__ 13 | 14 | 15 | class Data: 16 | entity_type = 'hive_table' 17 | column_type = 'hive_column' 18 | cluster = 'TEST_CLUSTER' 19 | db = 'TEST_DB' 20 | name = 'TEST_TABLE' 21 | table_uri = f'{entity_type}://{cluster}.{db}/{name}' 22 | 23 | active_columns = 4 24 | inactive_columns = 7 25 | 26 | classification_entity = { 27 | 'classifications': [ 28 | {'typeName': 'PII_DATA', 'name': 'PII_DATA'}, 29 | ] 30 | } 31 | 32 | test_column = { 33 | 'guid': 'COLUMN_GUID', 34 | 'typeName': 'COLUMN', 35 | 'entityStatus': 'ACTIVE', 36 | 'attributes': { 37 | 'name': 'column name', 38 | 'qualifiedName': 'column@name', 39 | 'type': 'Managed', 40 | 'description': 'column description', 41 | 'position': 1, 42 | 'statistics': [ 43 | {'attributes': { 44 | 'stat_name': 'max', 45 | 'stat_val': 100.1234, 46 | 'start_epoch': '100', 47 | 'end_epoch': '200', 48 | }}, 49 | {'attributes': { 50 | 'stat_name': 'min', 51 | 'stat_val': 0.5678, 52 | 'start_epoch': '100', 53 | 'end_epoch': '200', 54 | }}, 55 | ] 56 | }, 57 | } 58 | 59 | test_column_inactive = copy.deepcopy(test_column) 60 | test_column_inactive['entityStatus'] = 'INACTIVE' 61 | 62 | test_exp_col_stats_raw = [ 63 | {'attributes': { 64 | 'stat_name': 'max', 65 | 'stat_val': '100.1234', 66 | 'start_epoch': '100', 67 | 'end_epoch': '200', 68 | }}, 69 | {'attributes': { 70 | 'stat_name': 'min', 71 | 'stat_val': '0.5678', 72 | 'start_epoch': '100', 73 | 'end_epoch': '200', 74 | }}, 75 | ] 76 | 77 | test_exp_col_stats_formatted = [ 78 | {'attributes': { 79 | 'stat_name': 'minimum', 80 | 'stat_val': '0.57', 81 | 'start_epoch': '100', 82 | 'end_epoch': '200', 83 | }}, 84 | ] 85 | 86 | db_entity = { 87 | 'guid': '-100', 88 | 'updateTime': 2345678901234, 89 | 'typeName': entity_type, 90 | 'attributes': { 91 | 'qualifiedName': db, 92 | 'name': 'db', 93 | 'description': 'Dummy DB Description', 94 | 'owner': 'dummy@email.com', 95 | } 96 | } 97 | 98 | partition_entity_1 = { 99 | 'typeName': 'table_partition', 100 | 'status': 'INACTIVE', 101 | 'attributes': { 102 | 'name': '20200908' 103 | }, 104 | 'createTime': 1599723564000 105 | } 106 | 107 | partition_entity_2 = { 108 | 'typeName': 'table_partition', 109 | 'status': 'ACTIVE', 110 | 'attributes': { 111 | 'name': '20200909' 112 | }, 113 | 'createTime': 1599723564000 114 | } 115 | 116 | partition_entity_3 = { 117 | 'typeName': 'table_partition', 118 | 'status': 'ACTIVE', 119 | 'attributes': { 120 | 'name': '20200910' 121 | }, 122 | 'createTime': 1599723564000 123 | } 124 | 125 | partition_entity_4 = { 126 | 'typeName': 'table_partition', 127 | 'status': 'ACTIVE', 128 | 'attributes': { 129 | 'name': '2020,8' 130 | }, 131 | 'createTime': 1599723564000 132 | } 133 | 134 | partitions: List[Dict] = [partition_entity_1, partition_entity_2, partition_entity_3, partition_entity_4] 135 | 136 | entity1 = { 137 | 'guid': '1', 138 | 'typeName': entity_type, 139 | 'updateTime': 1234567890123, 140 | 'attributes': { 141 | 'qualifiedName': '{}.{}@{}'.format(db, 'Table1', cluster), 142 | 'name': 'Table1', 143 | 'description': 'Dummy Description', 144 | 'owner': 'dummy@email.com', 145 | 'db': db_entity, 146 | 'popularityScore': 100, 147 | 'partitions': list(), 148 | 'parameters': { 149 | 'testParameterKeyB': 'testParameterValueB', 150 | 'testParameterKeyA': 'testParameterValueA', 151 | 'spark.sql.param': 1 152 | }, 153 | 'reports': [{'guid': '23'}, {'guid': '121212'}, {'guid': '2344'}], 154 | 'tableType': 'MANAGED_TABLE' 155 | }, 156 | 'relationshipAttributes': { 157 | 'db': db_entity, 158 | 'columns': ([test_column_inactive] * inactive_columns) + ([test_column] * active_columns), 159 | 'ownedBy': [ 160 | { 161 | "entityStatus": "ACTIVE", 162 | "relationshipStatus": "ACTIVE", 163 | "guid": "000", 164 | "displayText": "active_owned_by" 165 | }, 166 | { 167 | "entityStatus": "ACTIVE", 168 | "relationshipStatus": "DELETED", 169 | "guid": "111", 170 | "displayText": "deleted_owned_by" 171 | } 172 | ], 173 | 'partitions': [dict(displayText=p.get('attributes', dict()).get('name'), 174 | entityStatus=p.get('status'), 175 | relationshipStatus='ACTIVE') for p in partitions] 176 | }, 177 | } 178 | entity1.update(classification_entity) 179 | 180 | entity2 = { 181 | 'guid': '2', 182 | 'updateTime': 234, 183 | 'typeName': entity_type, 184 | 'attributes': { 185 | 'qualifiedName': '{}.{}@{}'.format(db, 'Table2', cluster), 186 | 'name': 'Table2', 187 | 'description': 'Dummy Description', 188 | 'owner': 'dummy@email.com', 189 | 'db': db_entity, 190 | 'popularityScore': 100, 191 | 'partitions': list(), 192 | 'tableType': 'VIRTUAL_VIEW' 193 | }, 194 | 'relationshipAttributes': { 195 | 'db': db_entity 196 | }, 197 | } 198 | entity2.update(classification_entity) 199 | 200 | entities = { 201 | 'entities': [ 202 | entity1, 203 | entity2, 204 | ] 205 | } 206 | 207 | bookmark_entity1 = { 208 | "typeName": "Bookmark", 209 | "attributes": { 210 | "active": True, 211 | "qualifiedName": '{}.{}.{}.{}.bookmark@{}'.format(db, name, 'hive_table', 'test_user_id', cluster), 212 | "entityUri": table_uri, 213 | }, 214 | "guid": "0fa40fd5-016c-472e-a72f-25a5013cc818", 215 | "status": "ACTIVE", 216 | "displayText": '{}.{}.{}.{}.bookmark@{}'.format(db, name, 'hive_table', 'test_user_id', cluster), 217 | "classificationNames": [], 218 | "meaningNames": [], 219 | "meanings": [] 220 | } 221 | 222 | bookmark_entity2 = { 223 | "typeName": "Bookmark", 224 | "attributes": { 225 | "active": True, 226 | "qualifiedName": '{}.{}.{}.{}.bookmark@{}'.format(db, 'Table2', 'hive_table', 'test_user_id', cluster), 227 | "entityUri": table_uri, 228 | }, 229 | "guid": "0fa40fd5-016c-472e-a72f-a72ffa40fd5", 230 | "status": "ACTIVE", 231 | "displayText": '{}.{}.{}.{}.bookmark@{}'.format(db, 'Table2', 'hive_table', 'test_user_id', cluster), 232 | "classificationNames": [], 233 | "meaningNames": [], 234 | "meanings": [] 235 | } 236 | 237 | bookmark_entities = { 238 | 'entities': [ 239 | bookmark_entity1, 240 | bookmark_entity2, 241 | ] 242 | } 243 | 244 | user_entity_1 = { 245 | "typeName": "User", 246 | "attributes": { 247 | "qualifiedName": "test_user_1" 248 | }, 249 | "guid": "", 250 | "status": "ACTIVE", 251 | "displayText": 'test_user_1', 252 | "classificationNames": [], 253 | "meaningNames": [], 254 | "meanings": [] 255 | } 256 | 257 | user_entity_2 = { 258 | "typeName": "User", 259 | "attributes": { 260 | "qualifiedName": "test_user_2" 261 | }, 262 | "guid": "", 263 | "status": "ACTIVE", 264 | "displayText": 'test_user_2', 265 | "classificationNames": [], 266 | "meaningNames": [], 267 | "meanings": [], 268 | "relationshipAttributes": { 269 | "entityReads": [ 270 | { 271 | "entityStatus": "ACTIVE", 272 | "relationshipStatus": "ACTIVE", 273 | "guid": "1" 274 | }, 275 | { 276 | "entityStatus": "INACTIVE", 277 | "relationshipStatus": "ACTIVE", 278 | "guid": "2" 279 | }, 280 | { 281 | "entityStatus": "ACTIVE", 282 | "relationshipStatus": "INACTIVE", 283 | "guid": "3" 284 | } 285 | ], 286 | "owns": [ 287 | { 288 | "entityStatus": "ACTIVE", 289 | "relationshipStatus": "ACTIVE", 290 | "typeName": entity_type, 291 | "guid": entity1["guid"] 292 | }, 293 | { 294 | "entityStatus": "ACTIVE", 295 | "relationshipStatus": "DELETED", 296 | "typeName": entity_type, 297 | "guid": entity2["guid"] 298 | }] 299 | } 300 | } 301 | 302 | reader_entity_1 = { 303 | "typeName": "Reader", 304 | "attributes": { 305 | "count": 5, 306 | "qualifiedName": '{}.{}.{}.reader@{}'.format(db, 'Table1', 'test_user_1', cluster), 307 | "entityUri": f"hive_table://{cluster}.{db}/Table1", 308 | }, 309 | "guid": "1", 310 | "status": "ACTIVE", 311 | "displayText": '{}.{}.{}.reader@{}'.format(db, 'Table1', 'test_user', cluster), 312 | "classificationNames": [], 313 | "meaningNames": [], 314 | "meanings": [], 315 | "relationshipAttributes": {"user": user_entity_1} 316 | } 317 | 318 | reader_entity_2 = { 319 | "typeName": "Reader", 320 | "attributes": { 321 | "count": 150, 322 | "qualifiedName": '{}.{}.{}.reader@{}'.format(db, 'Table1', 'test_user_2', cluster), 323 | "entityUri": f"hive_table://{cluster}.{db}/Table1", 324 | }, 325 | "guid": "2", 326 | "status": "ACTIVE", 327 | "displayText": '{}.{}.{}.reader@{}'.format(db, 'Table1', 'test_user_2', cluster), 328 | "classificationNames": [], 329 | "meaningNames": [], 330 | "meanings": [], 331 | "relationshipAttributes": {"user": user_entity_2} 332 | } 333 | 334 | reader_entities = [DottedDict(reader_entity) for reader_entity in [reader_entity_1, reader_entity_2]] 335 | 336 | report_entity_1 = { 337 | 'typeName': 'Report', 338 | 'status': 'ACTIVE', 339 | 'attributes': { 340 | 'name': "test_report", 341 | 'url': "http://test" 342 | }} 343 | report_entity_2 = { 344 | 'typeName': 'Report', 345 | 'status': 'DELETED', 346 | 'attributes': { 347 | 'name': "test_report2", 348 | 'url': "http://test2" 349 | }} 350 | report_entity_3 = { 351 | 'typeName': 'Report', 352 | 'status': 'ACTIVE', 353 | 'attributes': { 354 | 'name': "test_report3", 355 | 'url': "http://test3" 356 | }} 357 | 358 | report_entities = [report_entity_1, report_entity_2, report_entity_3] 359 | 360 | metrics_data = [DottedDict({ 361 | 'general': { 362 | 'stats': { 363 | 'Notification:lastMessageProcessedTime': 1598342400000 364 | } 365 | } 366 | })] 367 | --------------------------------------------------------------------------------