├── docs ├── common ├── index.md ├── CONTRIBUTING.md ├── databuilder ├── search ├── frontend ├── metadata ├── k8s_install.md ├── img │ ├── graph_model.png │ ├── neo4j-debug.png │ ├── search-page.png │ ├── column_details.png │ ├── data_preview.png │ ├── landing_page.png │ ├── search_preview.png │ ├── search-exact-match.png │ ├── tutorials │ │ ├── postgres.png │ │ ├── table-badge.png │ │ ├── column-badge.png │ │ ├── table-postgres.png │ │ ├── search-postgres.png │ │ ├── superset-add-db.png │ │ ├── superset-welcome.png │ │ ├── amundsen-preview1.png │ │ ├── amundsen-preview2.png │ │ └── superset-sqllab-verify.png │ ├── Amundsen_Architecture.png │ ├── issue_process_diagram.png │ ├── logos │ │ ├── amundsen_mark_circle.png │ │ ├── amundsen_mark_tan_bg.png │ │ ├── amundsen_mark_transparent_bg.png │ │ ├── amundsen_mark_blue.svg │ │ ├── amundsen_mark_orange.svg │ │ ├── amundsen_logo_on_light.svg │ │ └── amundsen_logo_on_blue.svg │ └── table_detail_page_with_badges.png ├── installation-aws-ecs │ ├── userData.sh │ ├── ecs-params.yml │ ├── aws-ecs-deployment.md │ └── docker-ecs-amundsen.yml ├── css │ └── app.css ├── tutorials │ ├── index-postgres.md │ ├── user-profiles.md │ ├── badges.md │ ├── data-preview-with-superset.md │ ├── how-to-track-user-metric.md │ └── how-to-search-effective.md ├── architecture.md ├── issue_labeling.md ├── faq.md ├── installation.md ├── roadmap.md ├── authentication │ └── oidc.md └── developer_guide.md ├── requirements.txt ├── .github ├── titleLint.yml ├── CODEOWNERS ├── workflows │ └── deploy_docs.yml ├── ISSUE_TEMPLATE │ ├── feature-request.md │ └── bug-report.md └── PULL_REQUEST_TEMPLATE.md ├── NOTICE ├── .gitignore ├── CODE_OF_CONDUCT.md ├── .dependabot └── config.yml ├── amundsen-kube-helm ├── templates │ ├── helm │ │ ├── requirements.yaml │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── pv-neo4j.yaml │ │ │ ├── pvc-neo4j.yaml │ │ │ ├── service-search.yaml │ │ │ ├── service-frontend.yaml │ │ │ ├── service-metadata.yaml │ │ │ ├── service-neo4j.yaml │ │ │ ├── secret-oidc-config.yaml │ │ │ ├── _helpers.tpl │ │ │ ├── configmap-neo4j.yaml │ │ │ ├── deployment-search.yaml │ │ │ ├── cronjob-neo4j-s3-backup.yaml │ │ │ ├── deployment-metadata.yaml │ │ │ ├── deployment-neo4j.yaml │ │ │ └── deployment-frontend.yaml │ │ └── values.yaml │ └── restore-backup │ │ ├── README.md │ │ └── restore-neo4j-pod.yaml └── README.md ├── .gitmodules ├── OWNERS.md ├── SECURITY.md ├── .all-contributorsrc ├── deploy_website.sh ├── docker-amundsen-atlas.yml ├── CONTRIBUTORS.md ├── docker-amundsen-local.yml ├── docker-amundsen.yml ├── CONTRIBUTING.md ├── MAINTAINING.md ├── mkdocs.yml ├── LICENSE ├── GOVERNANCE.md └── example └── docker └── neo4j └── conf └── neo4j.conf /docs/common: -------------------------------------------------------------------------------- 1 | ../amundsencommon -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ../CONTRIBUTING.md -------------------------------------------------------------------------------- /docs/databuilder: -------------------------------------------------------------------------------- 1 | ../amundsendatabuilder -------------------------------------------------------------------------------- /docs/search: -------------------------------------------------------------------------------- 1 | ../amundsensearchlibrary -------------------------------------------------------------------------------- /docs/frontend: -------------------------------------------------------------------------------- 1 | ../amundsenfrontendlibrary -------------------------------------------------------------------------------- /docs/metadata: -------------------------------------------------------------------------------- 1 | ../amundsenmetadatalibrary -------------------------------------------------------------------------------- /docs/k8s_install.md: -------------------------------------------------------------------------------- 1 | ../amundsen-kube-helm/README.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs==1.1 2 | mkdocs-material==4.6.3 3 | mkdocs-redirects==1.0.0 4 | -------------------------------------------------------------------------------- /.github/titleLint.yml: -------------------------------------------------------------------------------- 1 | regex: (build|ci|docs|feat|fix|perf|refactor|style|test|chore|other): .* 2 | -------------------------------------------------------------------------------- /docs/img/graph_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/graph_model.png -------------------------------------------------------------------------------- /docs/img/neo4j-debug.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/neo4j-debug.png -------------------------------------------------------------------------------- /docs/img/search-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/search-page.png -------------------------------------------------------------------------------- /docs/img/column_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/column_details.png -------------------------------------------------------------------------------- /docs/img/data_preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/data_preview.png -------------------------------------------------------------------------------- /docs/img/landing_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/landing_page.png -------------------------------------------------------------------------------- /docs/img/search_preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/search_preview.png -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | amundsen 2 | Copyright 2018-2019 Lyft Inc. 3 | 4 | This product includes software developed at Lyft Inc. 5 | -------------------------------------------------------------------------------- /docs/img/search-exact-match.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/search-exact-match.png -------------------------------------------------------------------------------- /docs/img/tutorials/postgres.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/postgres.png -------------------------------------------------------------------------------- /docs/img/Amundsen_Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/Amundsen_Architecture.png -------------------------------------------------------------------------------- /docs/img/issue_process_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/issue_process_diagram.png -------------------------------------------------------------------------------- /docs/img/tutorials/table-badge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/table-badge.png -------------------------------------------------------------------------------- /docs/img/tutorials/column-badge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/column-badge.png -------------------------------------------------------------------------------- /docs/img/tutorials/table-postgres.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/table-postgres.png -------------------------------------------------------------------------------- /docs/installation-aws-ecs/userData.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # For ElasticSearch 4 | sysctl -w vm.max_map_count=262144 5 | 6 | -------------------------------------------------------------------------------- /docs/img/logos/amundsen_mark_circle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/logos/amundsen_mark_circle.png -------------------------------------------------------------------------------- /docs/img/logos/amundsen_mark_tan_bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/logos/amundsen_mark_tan_bg.png -------------------------------------------------------------------------------- /docs/img/tutorials/search-postgres.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/search-postgres.png -------------------------------------------------------------------------------- /docs/img/tutorials/superset-add-db.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/superset-add-db.png -------------------------------------------------------------------------------- /docs/img/tutorials/superset-welcome.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/superset-welcome.png -------------------------------------------------------------------------------- /docs/img/table_detail_page_with_badges.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/table_detail_page_with_badges.png -------------------------------------------------------------------------------- /docs/img/tutorials/amundsen-preview1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/amundsen-preview1.png -------------------------------------------------------------------------------- /docs/img/tutorials/amundsen-preview2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/amundsen-preview2.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | example/backup/ 2 | example/docker/neo4j/plugins/ 3 | example/docker/es_data* 4 | .local/ 5 | 6 | .idea/ 7 | venv/ 8 | site/ 9 | -------------------------------------------------------------------------------- /docs/img/logos/amundsen_mark_transparent_bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/logos/amundsen_mark_transparent_bg.png -------------------------------------------------------------------------------- /docs/img/tutorials/superset-sqllab-verify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/amundsen/master/docs/img/tutorials/superset-sqllab-verify.png -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | This project is governed by [Linux Foundation's code of conduct](https://www.linuxfoundation.org/code-of-conduct/). 2 | All contributors and participants agree to abide by its terms. 3 | -------------------------------------------------------------------------------- /.dependabot/config.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | update_configs: 3 | - package_manager: "submodules" 4 | directory: "/" 5 | update_schedule: "weekly" 6 | automerged_updates: 7 | - match: 8 | update_type: all 9 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/requirements.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | # - name: neo4j 3 | # version: 1.2.2 4 | # repository: https://kubernetes-charts.storage.googleapis.com/ 5 | - name: elasticsearch 6 | version: 1.32.0 7 | repository: https://kubernetes-charts.storage.googleapis.com/ 8 | condition: elasticsearch.enabled 9 | -------------------------------------------------------------------------------- /docs/css/app.css: -------------------------------------------------------------------------------- 1 | @import "theme.css"; 2 | 3 | /* Splits a long line descriptions in tables in to multiple lines */ 4 | .wy-table-responsive table td, .wy-table-responsive table th { 5 | white-space: normal !important; 6 | } 7 | 8 | /* align multi line csv table columns */ 9 | table.docutils div.line-block { 10 | margin-left: 0; 11 | } 12 | -------------------------------------------------------------------------------- /docs/installation-aws-ecs/ecs-params.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | task_definition: 3 | services: 4 | neo4j: 5 | cpu_shares: 100 6 | mem_limit: 3GB 7 | elasticsearch: 8 | cpu_shares: 100 9 | mem_limit: 3GB 10 | amundsensearch: 11 | cpu_shares: 100 12 | mem_limit: 500MB 13 | amundsenmetadata: 14 | cpu_shares: 100 15 | mem_limit: 500MB 16 | amundsenfrontend: 17 | cpu_shares: 100 18 | mem_limit: 500MB 19 | -------------------------------------------------------------------------------- /docs/img/logos/amundsen_mark_blue.svg: -------------------------------------------------------------------------------- 1 | amundsen_mark_blue -------------------------------------------------------------------------------- /docs/img/logos/amundsen_mark_orange.svg: -------------------------------------------------------------------------------- 1 | amundsen_mark_orange -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | description: Amundsen is a metadata driven application for improving the productivity of data analysts, data scientists and engineers when interacting with data. 3 | name: amundsen 4 | version: 2.0.0 5 | icon: https://github.com/amundsen-io/amundsen/blob/master/docs/img/logos/amundsen_logo_on_light.svg 6 | home: https://github.com/amundsen-io/amundsen 7 | maintainers: 8 | - name: Amundsen TSC 9 | email: amundsen-tsc@lists.lfai.foundation 10 | sources: 11 | - https://github.com/amundsen-io/amundsen 12 | keywords: 13 | - metadata 14 | - data -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Codeowners file by GitHub 2 | # Reference: https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners 3 | # Each line is a file pattern followed by one or more owners. 4 | # Order is important; the last matching pattern takes the most 5 | # precedence. 6 | 7 | # These owners will be the default owners for everything in 8 | # the repo. Unless a later match takes precedence, 9 | # @amundsen-io/amundsen-committerswill be requested for 10 | # review when someone opens a pull request. 11 | * @amundsen-io/amundsen-committers 12 | 13 | /amundsen-kube-helm/ @feng-tao @jornh @javamonkey79 14 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/pv-neo4j.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.neo4j.enabled .Values.neo4j.persistence .Values.neo4j.persistence.efs }} 2 | apiVersion: v1 3 | kind: PersistentVolume 4 | metadata: 5 | name: neo4j-pv 6 | labels: 7 | app: {{ template "amundsen.fullname" . }} 8 | chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" 9 | release: "{{ .Release.Name }}" 10 | heritage: "{{ .Release.Service }}" 11 | spec: 12 | accessModes: 13 | - ReadWriteMany 14 | capacity: 15 | storage: {{ default "3Gi" .Values.neo4j.persistence.size }} 16 | nfs: 17 | server: {{ .Values.neo4j.persistence.efs.dns }} 18 | path: "/" 19 | {{- end }} -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/restore-backup/README.md: -------------------------------------------------------------------------------- 1 | # Restoring neo4j Backups 2 | 3 | The Amundsen Helm chart includes a Kubernetes CronJob that backs up the neo4j database to S3. If you need to restore from one of these backups, use the one-off pod in this directory. 4 | 5 | ## Create the Pod 6 | 7 | You should have setup `kubectl` for the Kubernetes cluster you wish to restore in before running these commands. 8 | 9 | Update the YAML file with the S3 Bucket for the backup you wish to restore and then apply the pod. 10 | 11 | ```shell 12 | kubectl apply -n -f restore-neo4j-pod.yaml 13 | ``` 14 | 15 | Once the pod has been created, it will automatically run the restore. You can check the pod's logs to see whether it has succeeded for failed. 16 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/pvc-neo4j.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.neo4j.enabled .Values.neo4j.persistence }} 2 | apiVersion: v1 3 | kind: PersistentVolumeClaim 4 | metadata: 5 | name: neo4j-pvc 6 | namespace: {{ .Release.Namespace }} 7 | annotations: 8 | helm.sh/resource-policy: "keep" 9 | labels: 10 | app: {{ template "amundsen.fullname" . }} 11 | chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" 12 | release: "{{ .Release.Name }}" 13 | heritage: "{{ .Release.Service }}" 14 | spec: 15 | accessModes: 16 | - {{ default "ReadWriteOnce" .Values.neo4j.persistence.accessMode }} 17 | storageClassName: "{{ default "" .Values.neo4j.persistence.storageClass }}" 18 | resources: 19 | requests: 20 | storage: {{ default "3Gi" .Values.neo4j.persistence.size }} 21 | {{- end }} 22 | -------------------------------------------------------------------------------- /.github/workflows/deploy_docs.yml: -------------------------------------------------------------------------------- 1 | name: Publish docs via GitHub Pages 2 | on: 3 | push: 4 | branches: 5 | - master 6 | 7 | jobs: 8 | build: 9 | name: Deploy docs 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout master 13 | uses: actions/checkout@v1 14 | - name: Checkout submodules using a PAT 15 | run: | 16 | git config --file .gitmodules --get-regexp url | while read url; do 17 | git config --file=.gitmodules $(echo "$url" | sed -E "s/git@github.com:|https:\/\/github.com\//https:\/\/${{ secrets.CI_PAT }}:${{ secrets.CI_PAT }}@github.com\//") 18 | done 19 | git submodule sync 20 | git submodule update --init --recursive 21 | - name: Deploy docs 22 | uses: mhausenblas/mkdocs-deploy-gh-pages@master 23 | env: 24 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 25 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "amundsendatabuilder"] 2 | path = amundsendatabuilder 3 | url = https://github.com/amundsen-io/amundsendatabuilder 4 | branch = master 5 | [submodule "amundsenfrontendlibrary"] 6 | path = amundsenfrontendlibrary 7 | url = https://github.com/amundsen-io/amundsenfrontendlibrary 8 | branch = master 9 | [submodule "amundsenmetadatalibrary"] 10 | path = amundsenmetadatalibrary 11 | url = https://github.com/amundsen-io/amundsenmetadatalibrary 12 | branch = master 13 | [submodule "amundsensearchlibrary"] 14 | path = amundsensearchlibrary 15 | url = https://github.com/amundsen-io/amundsensearchlibrary 16 | branch = master 17 | [submodule "amundsencommon"] 18 | path = amundsencommon 19 | url = https://github.com/amundsen-io/amundsencommon 20 | branch = master 21 | [submodule "amundsengremlin"] 22 | path = amundsengremlin 23 | url = https://github.com/amundsen-io/amundsengremlin 24 | branch = master 25 | -------------------------------------------------------------------------------- /OWNERS.md: -------------------------------------------------------------------------------- 1 | * See [CONTRIBUTING.md](CONTRIBUTING.md) for general contribution guidelines. 2 | * See [GOVERNANCE.md](GOVERNANCE.md) for governance guidelines. 3 | 4 | This page lists all the maintainers for Amundsen. This can be used for 5 | routing PRs, questions, etc. to the right place. 6 | 7 | # Amundsen committers 8 | - Tao Feng (https://github.com/feng-tao) 9 | - Jin Hyuk Chang (https://github.com/jinhyukchang) 10 | - Tamika Tannis (https://github.com/ttannis) 11 | - Daniel Won (https://github.com/danwom) 12 | - Marcos Iglesias (https://github.com/golodhros) 13 | - Diksha Thakur (https://github.com/dikshathakur3119) 14 | - Allison Suarez Miranda (https://github.com/allisonsuarez) 15 | - Shenghu Yang (https://github.com/shenghuy) 16 | - Mark Grover (https://github.com/markgrover) 17 | - Verdan Mahmood (https://github.com/verdan) 18 | - Bolke de Bruin (https://github.com/bolkedebruin) 19 | - Mariusz Gorski (https://github.com/mgorsk1) 20 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | If you think you have found a security vulnerability, please send a report to amundsen-security@lists.lfaidata.foundation. Please do not post security vulnerabilities on Slack. 5 | 6 | We don't currently have a PGP key, unfortunately. 7 | 8 | An Amundsen committer will send you a response indicating the next steps in handling your report. After the initial reply to your report, the committer will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance. 9 | 10 | *Important:* Please don't disclose the vulnerability before it have been fixed and announced, to protect our users. 11 | 12 | ## Security announcements 13 | 14 | Please subscribe to [the announcements mailing list](https://lists.lfai.foundation/g/amundsen-announce), where we post notifications and remediation details for security vulnerabilities. 15 | -------------------------------------------------------------------------------- /.all-contributorsrc: -------------------------------------------------------------------------------- 1 | { 2 | "files": ["CONTRIBUTORS.md"], 3 | "imageSize": 150, 4 | "commit": false, 5 | "contributors": [ 6 | { 7 | "login": "ttannis", 8 | "name": "Tamika Tannis", 9 | "avatar_url": "https://avatars2.githubusercontent.com/u/1790900?v=4", 10 | "profile": "https://www.linkedin.com/in/tamika-tannis/", 11 | "contributions": [ 12 | "bug", 13 | "code", 14 | "content", 15 | "doc", 16 | "example", 17 | "ideas", 18 | "infra", 19 | "maintenance", 20 | "platform", 21 | "plugin", 22 | "projectManagement", 23 | "question", 24 | "review", 25 | "security", 26 | "tool", 27 | "test", 28 | "tutorial" 29 | } 30 | ], 31 | "contributorsPerLine": 6, 32 | "projectName": "amundsen", 33 | "projectOwner": "amundsen-io", 34 | "repoType": "github", 35 | "repoHost": "https://github.com", 36 | "skipCi": true 37 | } 38 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/service-search.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ template "amundsen.name" . }}-{{ .Values.search.serviceName }} 6 | labels: 7 | app: {{ template "amundsen.name" . }} 8 | run: {{ .Chart.Name }}-{{ .Values.search.serviceName }} 9 | component: {{ .Values.search.serviceName }} 10 | chart: {{ template "amundsen.chart" . }} 11 | release: {{ .Release.Name }} 12 | heritage: {{ .Release.Service }} 13 | {{- with .Values.search.annotations }} 14 | annotations: 15 | {{ toYaml . | indent 4 }} 16 | {{- end}} 17 | spec: 18 | type: {{ .Values.search.serviceType }} 19 | selector: 20 | app: {{ template "amundsen.name" . }} 21 | component: {{ .Values.search.serviceName }} 22 | release: {{ .Release.Name }} 23 | ports: 24 | - name: {{ .Chart.Name }}-{{ .Values.search.serviceName }}-{{ .Values.environment }}-http 25 | port: 5001 26 | targetPort: 5001 27 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/service-frontend.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ template "amundsen.name" . }}-{{ .Values.frontEnd.serviceName }} 6 | labels: 7 | app: {{ template "amundsen.name" . }} 8 | run: {{ .Chart.Name }}-{{ .Values.frontEnd.serviceName }} 9 | component: {{ .Values.frontEnd.serviceName }} 10 | chart: {{ template "amundsen.chart" . }} 11 | release: {{ .Release.Name }} 12 | heritage: {{ .Release.Service }} 13 | {{- with .Values.frontEnd.annotations }} 14 | annotations: 15 | {{ toYaml . | indent 4 }} 16 | {{- end}} 17 | spec: 18 | type: {{ .Values.frontEnd.serviceType }} 19 | selector: 20 | app: {{ template "amundsen.name" . }} 21 | component: {{ .Values.frontEnd.serviceName }} 22 | release: {{ .Release.Name }} 23 | ports: 24 | - name: {{ .Chart.Name }}-{{ .Values.frontEnd.serviceName }}-{{ .Values.environment }}-http 25 | port: 5000 26 | targetPort: 5000 27 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/service-metadata.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ template "amundsen.name" . }}-{{ .Values.metadata.serviceName }} 6 | labels: 7 | app: {{ template "amundsen.name" . }} 8 | run: {{ .Chart.Name }}-{{ .Values.metadata.serviceName }} 9 | component: {{ .Values.metadata.serviceName }} 10 | chart: {{ template "amundsen.chart" . }} 11 | release: {{ .Release.Name }} 12 | heritage: {{ .Release.Service }} 13 | {{- with .Values.metadata.annotations }} 14 | annotations: 15 | {{ toYaml . | indent 4 }} 16 | {{- end}} 17 | spec: 18 | type: {{ .Values.metadata.serviceType }} 19 | selector: 20 | app: {{ template "amundsen.name" . }} 21 | component: {{ .Values.metadata.serviceName }} 22 | release: {{ .Release.Name }} 23 | ports: 24 | - name: {{ .Chart.Name }}-{{ .Values.metadata.serviceName }}-{{ .Values.environment }}-http 25 | port: 5002 26 | targetPort: 5002 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Create a feature request 4 | title: Feature Proposal 5 | labels: feature proposal 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | 12 | 13 | ## Expected Behavior or Use Case 14 | 15 | 16 | ## Service or Ingestion ETL 17 | 18 | 19 | ## Possible Implementation 20 | 21 | 22 | ## Example Screenshots (if appropriate): 23 | 24 | ## Context 25 | 26 | -------------------------------------------------------------------------------- /deploy_website.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | 3 | #REPO="git@github.com:amundsen-io/amundsen.git" 4 | #DIR=temp-clone 5 | # We use https://www.mkdocs.org/user-guide/deploying-your-docs/ to build/deploy docs 6 | # Currently the doc is built/deployed manually. We should first build and deploy locally and verify it. 7 | # Here are some basic steps: 8 | # 1. virtualenv venv 9 | # 2. source venv/bin/activate 10 | # 3. pip3 install -r requirements.txt 11 | # 4. brew install mkdocs 12 | # 5. mkdocs serve # build locally and serve it in localhost:8000 . On mac OS, you may face ImportError and you may need to downgrade openssl by $ brew switch openssl 1.0.2r 13 | # 6. mkdocs gh-deploy # deploy to gh page 14 | 15 | # Delete any existing temporary website clone. 16 | #rm -rf $DIR 17 | 18 | # Clone the current repo into temp folder. 19 | #git clone $REPO $DIR 20 | 21 | # Move working directory into temp folder. 22 | #cd $DIR 23 | 24 | # Build the site and push the new files up to GitHub. 25 | mkdocs gh-deploy 26 | git checkout gh-pages 27 | git push 28 | 29 | # Delete our temp folder. 30 | #cd .. 31 | #rm -rf $DIR 32 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/service-neo4j.yaml: -------------------------------------------------------------------------------- 1 | {{ if .Values.neo4j.enabled }} 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: neo4j 6 | labels: 7 | app: {{ template "amundsen.name" . }} 8 | run: neo4j 9 | component: neo4j 10 | chart: {{ template "amundsen.chart" . }} 11 | release: {{ .Release.Name }} 12 | heritage: {{ .Release.Service }} 13 | {{- with .Values.neo4j.annotations }} 14 | annotations: 15 | {{ toYaml . | indent 4 }} 16 | {{- end}} 17 | spec: 18 | type: {{ .Values.neo4j.serviceType }} 19 | selector: 20 | app: {{ template "amundsen.name" . }} 21 | component: neo4j 22 | release: {{ .Release.Name }} 23 | ports: 24 | - port: 7473 25 | name: neo4j-{{ .Values.environment }}-https 26 | targetPort: 7473 27 | - port: 7474 28 | name: neo4j-{{ .Values.environment }}-http 29 | targetPort: 7474 30 | - port: 7687 31 | name: neo4j-{{ .Values.environment }}-bolt 32 | targetPort: 7687 33 | - port: 1337 34 | name: neo4j-{{ .Values.environment }}-shell 35 | targetPort: 1337 36 | {{ end }} 37 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | ### Summary of Changes 20 | 21 | 22 | 23 | ### Documentation 24 | 25 | 26 | 27 | ### CheckList 28 | 29 | Make sure you have checked **all** steps below to ensure a timely review. 30 | 31 | - [ ] PR title addresses the issue accurately and concisely, including a title prefix. 32 | - [ ] PR includes a summary of changes. 33 | - [ ] My commits follow the guidelines from "[How to write a good git commit message](http://chris.beams.io/posts/git-commit/)" 34 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/secret-oidc-config.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.frontEnd.createOidcSecret }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: oidc-config 6 | namespace: {{ .Release.Namespace }} 7 | stringData: 8 | OIDC_CLIENT_SECRET: {{ .Values.frontEnd.OIDC_CLIENT_SECRET }} 9 | client_secrets.json: |- 10 | { 11 | "web": { 12 | "client_id": "{{ .Values.frontEnd.OIDC_CLIENT_ID }}", 13 | "client_secret": "{{ .Values.frontEnd.OIDC_CLIENT_SECRET }}", 14 | "auth_uri": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}/v1/authorize", 15 | "token_uri": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}/v1/token", 16 | "issuer": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}", 17 | "userinfo_uri": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}/v1/userinfo", 18 | "redirect_uris": [ 19 | "http://localhost/oidc_callback" 20 | ], 21 | "token_introspection_uri": "{{ .Values.frontEnd.OIDC_ORG_URL }}/oauth2/{{ .Values.frontEnd.OIDC_AUTH_SERVER_ID }}/v1/introspect" 22 | } 23 | } 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Create a bug report 4 | title: Bug Report 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | 12 | 13 | ## Expected Behavior 14 | 15 | 16 | ## Current Behavior 17 | 18 | 19 | ## Possible Solution 20 | 21 | 22 | ## Steps to Reproduce 23 | 24 | 25 | 1. 26 | 2. 27 | 3. 28 | 4. 29 | 30 | ## Screenshots (if appropriate) 31 | 32 | ## Context 33 | 34 | 35 | 36 | ## Your Environment 37 | 38 | * Amunsen version used: 39 | * Data warehouse stores: 40 | * Deployment (k8s or native): 41 | * Link to your fork or repository: -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/restore-backup/restore-neo4j-pod.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: restore-neo4j-from-latest 5 | annotations: 6 | fill_in_here_with_correct_key: fill_in_here_with_correct_value 7 | spec: 8 | containers: 9 | - name: restore-neo4j-from-latest 10 | image: neo4j:3.3.0 11 | command: 12 | - "/bin/sh" 13 | - "-c" 14 | - | 15 | apk -v --update add --no-cache --quiet curl python py-pip && pip install awscli -q 16 | latest_backup=$(aws s3api list-objects-v2 --bucket "$BUCKET" --query 'reverse(sort_by(Contents, &LastModified))[:1].Key' --output=text) 17 | aws s3 cp s3://$BUCKET/$latest_backup /tmp 18 | tar -xf /tmp/$latest_backup -C / 19 | data_file=`ls /data|grep \.data` 20 | schema_file=`ls /data|grep \.schema` 21 | ./bin/neo4j-shell -host neo4j -file /data/$schema_file 22 | echo "CALL apoc.import.graphml('/data/$data_file', {useTypes: true, readLabels: true});" | /var/lib/neo4j/bin/neo4j-shell -host neo4j 23 | env: 24 | - name: BUCKET 25 | value: s3://dev/null 26 | volumeMounts: 27 | - name: data 28 | mountPath: /data 29 | restartPolicy: OnFailure 30 | volumes: 31 | - name: data 32 | persistentVolumeClaim: 33 | claimName: neo4j-pvc 34 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "amundsen.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "amundsen.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "amundsen.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "amundsen.labels" -}} 38 | app.kubernetes.io/name: {{ include "amundsen.name" . }} 39 | helm.sh/chart: {{ include "amundsen.chart" . }} 40 | app.kubernetes.io/instance: {{ .Release.Name }} 41 | {{- if .Chart.AppVersion }} 42 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 43 | {{- end }} 44 | app.kubernetes.io/managed-by: {{ .Release.Service }} 45 | {{- end -}} 46 | -------------------------------------------------------------------------------- /docs/tutorials/index-postgres.md: -------------------------------------------------------------------------------- 1 | # How to index metadata for real life databases 2 | 3 | From previous [doc](../installation.md), we have indexed tables from a csv files. In real production cases, 4 | the table metadata is stored in data warehouses(e.g Hive, Postgres, Mysql, Snowflake, Bigquery etc.) which Amundsen has 5 | the extractors for metadata extraction. 6 | 7 | In this tutorial, we will use a postgres db as an example to walk through how to index metadata for a postgres database. 8 | The doc won't cover how to setup a postgres database. 9 | 10 | 1. In the example, we have a postgres table in localhost postgres named `films`. 11 |  12 | 13 | 2. We leverage the [postgres metadata extractor](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/extractor/postgres_metadata_extractor.py) 14 | to extract the metadata information of the postgres database. We could call the metadata extractor 15 | in an adhoc python function as this [example](https://github.com/amundsen-io/amundsendatabuilder/pull/248/commits/f5064e58a19a5bfa380b333cfc657ebb34702a2c) 16 | or from an Airflow DAG. 17 | 18 | 3. Once we run the script, we could search the `films` table using Amundsen Search. 19 |  20 | 21 | 4. We could also find and view the `films` table in the table detail page. 22 |  23 | 24 | This tutorial uses postgres to serve as an example, but you could apply the same approach for your various data warehouses. If Amundsen 25 | doesn't provide the extractor, you could build one based on the API and contribute the extractor back to us! 26 | -------------------------------------------------------------------------------- /docs/tutorials/user-profiles.md: -------------------------------------------------------------------------------- 1 | 2 | ## People resources 3 | 4 | ### What can I do with User Resources? 5 | User profile pages and the ability to bookmark/favorite and search for users is also available as of now. See a demo of what they feels like from an end user viewpoint from around the 36 minute mark of [this September 2019 talk](https://youtu.be/Gr3-RfWn49A?t=36m00s) - so you could actually argue that this video snippet can work as an end user guide. 6 | 7 | ### How do I enable User pages? 8 | 9 | The configuration to have `Users` available consists of: 10 | 11 | 1. Enable the users profile page index and display feature by performing [this frontend configuration](../../frontend/docs/application_config#index-users) 12 | 13 | 2. There are two different alternative ways to populate user profile data. You can either: 14 | 15 | * Configure the Metadata service to a do a [live lookup](../../metadata/docs/configurations#user_detail_method-optional) in some directory service, like LDAP or a HR system. 16 | 17 | * Setup ongoing ingest of user profile data as they onboard/change/offboard into Neo4j and Elasticsearch effectively caching it with the pros/cons of that (similar to what the Databuilder sample loader does from user CSV, see the “pre-cooked demo data” link in the [Architecture overview](../../architecture#databuilder) 18 | 19 | !!! note 20 | Currently, for both of these options Amundsen _only_ provides these hooks/interfaces to add your own implementation. If you build something you think is generally useful, contributions are welcome! 21 | 22 | 3. Configure login, according to the [Authentication guide](../../authentication/oidc) 23 | 24 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/configmap-neo4j.yaml: -------------------------------------------------------------------------------- 1 | 2 | {{ if .Values.neo4j.enabled }} 3 | apiVersion: v1 4 | kind: ConfigMap 5 | metadata: 6 | name: neo4j-configmap 7 | labels: 8 | app: {{ template "amundsen.name" . }} 9 | component: neo4j 10 | chart: {{ template "amundsen.chart" . }} 11 | release: {{ .Release.Name }} 12 | heritage: {{ .Release.Service }} 13 | data: 14 | neo4j.conf: |- 15 | apoc.export.file.enabled=true 16 | apoc.import.file.enabled=true 17 | cypher.forbid_shortestpath_common_nodes=false 18 | dbms.connector.bolt.enabled=true 19 | dbms.connector.bolt.listen_address=:7687 20 | dbms.connector.bolt.tls_level=OPTIONAL 21 | dbms.connector.http.enabled=true 22 | dbms.connector.https.enabled=true 23 | dbms.connectors.default_listen_address=0.0.0.0 24 | dbms.directories.import=/mnt 25 | dbms.jvm.additional=-Djdk.tls.ephemeralDHKeySize=2048 26 | dbms.jvm.additional=-Dunsupported.dbms.udc.source=tarball 27 | dbms.jvm.additional=-XX:+AlwaysPreTouch 28 | dbms.jvm.additional=-XX:+DisableExplicitGC 29 | dbms.jvm.additional=-XX:+UseG1GC 30 | dbms.logs.query.enabled=true 31 | dbms.logs.query.rotation.keep_number=7 32 | dbms.logs.query.rotation.size=20m 33 | dbms.memory.heap.initial_size={{ .Values.neo4j.config.dbms.heap_initial_size }} 34 | dbms.memory.heap.max_size={{ .Values.neo4j.config.dbms.heap_max_size }} 35 | dbms.memory.pagecache.size={{ .Values.neo4j.config.dbms.pagecache_size }} 36 | dbms.security.allow_csv_import_from_file_urls=true 37 | dbms.security.auth_enabled=false 38 | dbms.security.procedures.unrestricted=algo.*,apoc.* 39 | dbms.shell.enabled=true 40 | dbms.shell.host=0.0.0.0 41 | dbms.windows_service_name=neo4j 42 | {{ end }} 43 | -------------------------------------------------------------------------------- /docker-amundsen-atlas.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | atlas: 4 | # the image comes from https://github.com/ing-bank/rokku-dev-apache-atlas 5 | # it might take some time for Atlas to start 6 | image: wbaa/rokku-dev-apache-atlas:0.1.5 7 | container_name: atlas 8 | ports: 9 | - 21000:21000 10 | networks: 11 | - amundsennet 12 | environment: 13 | - ATLAS_KICKSTART_AMUNDSEN=true 14 | amundsensearch: 15 | build: 16 | context: ./amundsensearchlibrary 17 | dockerfile: public.Dockerfile 18 | container_name: amundsensearch 19 | ports: 20 | - 5001:5001 21 | environment: 22 | - CREDENTIALS_PROXY_USER=admin 23 | - CREDENTIALS_PROXY_PASSWORD=admin 24 | - PROXY_ENDPOINT=http://atlas:21000 25 | - PROXY_CLIENT=ATLAS 26 | networks: 27 | - amundsennet 28 | amundsenmetadata: 29 | build: 30 | context: ./amundsenmetadatalibrary 31 | dockerfile: public.Dockerfile 32 | container_name: amundsenmetadata 33 | ports: 34 | - 5002:5002 35 | networks: 36 | - amundsennet 37 | environment: 38 | - CREDENTIALS_PROXY_USER=admin 39 | - CREDENTIALS_PROXY_PASSWORD=admin 40 | - PROXY_HOST=http://atlas 41 | - PROXY_PORT=21000 42 | - PROXY_CLIENT=ATLAS 43 | amundsenfrontend: 44 | build: 45 | context: ./amundsenfrontendlibrary 46 | args: 47 | SEARCHSERVICE_BASE: http://amundsensearch:5001 48 | METADATASERVICE_BASE: http://amundsenmetadata:5002 49 | dockerfile: local.Dockerfile 50 | container_name: amundsenfrontend 51 | depends_on: 52 | - amundsenmetadata 53 | - amundsensearch 54 | ports: 55 | - 5000:5000 56 | networks: 57 | - amundsennet 58 | environment: 59 | - METADATASERVICE_BASE=http://amundsenmetadata:5002 60 | - SEARCHSERVICE_BASE=http://amundsensearch:5001 61 | networks: 62 | amundsennet: 63 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | # Contributors 2 | 3 | Here is a list (WIP) of contributors to Amundsen, listing their contributions following this [emoji system](https://allcontributors.org/docs/en/emoji-key). This list is in progress, so feel free to use the [All Contributors bot](https://allcontributors.org/docs/en/bot/usage) to update it with the missing contributors! 4 | 5 | 6 | 7 | 8 | 9 | 10 | Tamika Tannis🐛 💻 🖋 📖 💡 🤔 🚇 🚧 📦 🔌 📆 💬 👀 🛡️ 🔧 ⚠️ ✅ 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /docker-amundsen-local.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | neo4j: 4 | image: neo4j:3.3.0 5 | container_name: neo4j_amundsen 6 | environment: 7 | - NEO4J_AUTH=neo4j/test 8 | ulimits: 9 | nofile: 10 | soft: 40000 11 | hard: 40000 12 | ports: 13 | - 7474:7474 14 | - 7687:7687 15 | volumes: 16 | - ./example/docker/neo4j/conf:/conf 17 | - ./.local/neo4j/data:/neo4j/data 18 | networks: 19 | - amundsennet 20 | elasticsearch: 21 | image: elasticsearch:6.7.0 22 | container_name: es_amundsen 23 | ports: 24 | - 9200:9200 25 | networks: 26 | - amundsennet 27 | ulimits: 28 | nofile: 29 | soft: 65536 30 | hard: 65536 31 | volumes: 32 | - ./.local/elasticsearch/data:/usr/share/elasticsearch/data 33 | amundsensearch: 34 | build: 35 | context: ./amundsensearchlibrary 36 | dockerfile: public.Dockerfile 37 | container_name: amundsensearch 38 | ports: 39 | - 5001:5001 40 | depends_on: 41 | - elasticsearch 42 | networks: 43 | - amundsennet 44 | environment: 45 | - PROXY_ENDPOINT=es_amundsen 46 | amundsenmetadata: 47 | build: 48 | context: ./amundsenmetadatalibrary 49 | dockerfile: public.Dockerfile 50 | container_name: amundsenmetadata 51 | depends_on: 52 | - neo4j 53 | ports: 54 | - 5002:5002 55 | networks: 56 | - amundsennet 57 | environment: 58 | - PROXY_HOST=bolt://neo4j_amundsen 59 | amundsenfrontend: 60 | build: 61 | context: ./amundsenfrontendlibrary 62 | args: 63 | SEARCHSERVICE_BASE: http://amundsensearch:5001 64 | METADATASERVICE_BASE: http://amundsenmetadata:5002 65 | dockerfile: local.Dockerfile 66 | container_name: amundsenfrontend 67 | depends_on: 68 | - amundsenmetadata 69 | - amundsensearch 70 | ports: 71 | - 5000:5000 72 | networks: 73 | - amundsennet 74 | environment: 75 | - SEARCHSERVICE_BASE=http://amundsensearch:5001 76 | - METADATASERVICE_BASE=http://amundsenmetadata:5002 77 | 78 | networks: 79 | amundsennet: 80 | -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | 3 | The following diagram shows the overall architecture for Amundsen. 4 |  5 | 6 | ## Frontend 7 | 8 | The [frontend service](https://github.com/amundsen-io/amundsenfrontendlibrary#amundsen-frontend-service) serves as web UI portal for users interaction. 9 | It is Flask-based web app which representation layer is built with React with Redux, Bootstrap, Webpack, and Babel. 10 | 11 | ## Search 12 | 13 | The [search service](https://github.com/amundsen-io/amundsensearchlibrary#amundsen-search-service) proxy leverages Elasticsearch's search functionality (or Apache Atlas's search API, if that's the backend you picked) and 14 | provides a RESTful API to serve search requests from the frontend service. This [API is documented and live explorable](https://github.com/amundsen-io/amundsensearchlibrary#api-documentation) through OpenAPI aka "Swagger". 15 | Currently only [table resources](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/models/elasticsearch_document.py) are indexed and searchable. 16 | The search index is built with the [databuilder elasticsearch publisher](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/publisher/elasticsearch_publisher.py). 17 | 18 | ## Metadata 19 | 20 | The [metadata service](https://github.com/amundsen-io/amundsenmetadatalibrary#amundsen-metadata-service) currently uses a Neo4j proxy to interact with Neo4j graph db and serves frontend service's metadata. 21 | The metadata is represented as a graph model: 22 |  23 | The above diagram shows how metadata is modeled in Amundsen. 24 | 25 | ## Databuilder 26 | 27 | Amundsen provides a [data ingestion library](https://github.com/amundsen-io/amundsendatabuilder) for building the metadata. At Lyft, we build the metadata once a day 28 | using an Airflow DAG ([examples](https://github.com/amundsen-io/amundsendatabuilder/tree/master/example/dags)). 29 | 30 | In addition to "real use" the databuilder is also employed as a handy tool to ingest some ["pre-cooked" demo data](https://github.com/amundsen-io/amundsendatabuilder/blob/master/example/sample_data/) used in the Quickstart guide. This allows you to have a supersmall sample of data to explore so many of the features in Amundsen are lit up without you even having to setup any connections to databases etc. to ingest real data. 31 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/deployment-search.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: {{ template "amundsen.fullname" . }}-{{ .Values.search.serviceName }} 6 | labels: 7 | app: {{ template "amundsen.name" . }} 8 | component: {{ .Values.search.serviceName }} 9 | chart: {{ template "amundsen.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | selector: 14 | matchLabels: 15 | app: {{ template "amundsen.name" . }} 16 | component: {{ .Values.search.serviceName }} 17 | release: {{ .Release.Name }} 18 | replicas: {{ default 1 .Values.search.replicas }} 19 | template: 20 | metadata: 21 | {{- with default .Values.podAnnotations .Values.search.podAnnotations }} 22 | annotations: 23 | {{ toYaml . | indent 8 }} 24 | {{- end }} 25 | labels: 26 | app: {{ template "amundsen.name" . }} 27 | component: {{ .Values.search.serviceName }} 28 | release: {{ .Release.Name }} 29 | spec: 30 | {{- with default .Values.nodeSelector .Values.search.nodeSelector }} 31 | nodeSelector: 32 | {{ toYaml . | indent 8 }} 33 | {{- end }} 34 | {{- with default .Values.affinity .Values.search.affinity }} 35 | affinity: 36 | {{ toYaml . | indent 8 }} 37 | {{- end }} 38 | {{- with default .Values.tolerations .Values.search.tolerations }} 39 | tolerations: 40 | {{ toYaml . | indent 8 }} 41 | {{- end }} 42 | containers: 43 | - name: {{ .Chart.Name }}-{{ .Values.search.serviceName }} 44 | image: {{ .Values.search.image }}:{{ .Values.search.imageTag }} 45 | ports: 46 | - containerPort: 5001 47 | env: 48 | - name: PROXY_ENDPOINT 49 | value: {{ if .Values.search.elasticsearchEndpoint }}{{ .Values.search.elasticsearchEndpoint }}{{ else }}{{ .Release.Name }}-elasticsearch-client.{{ .Release.Namespace }}.svc.cluster.local{{ end }} 50 | livenessProbe: 51 | httpGet: 52 | path: "/healthcheck" 53 | port: 5001 54 | initialDelaySeconds: 60 55 | periodSeconds: 60 56 | timeoutSeconds: 1 57 | successThreshold: 1 58 | failureThreshold: 5 59 | {{- with .Values.search.resources }} 60 | resources: 61 | {{ toYaml . | indent 10 }} 62 | {{- end }} 63 | -------------------------------------------------------------------------------- /docs/img/logos/amundsen_logo_on_light.svg: -------------------------------------------------------------------------------- 1 | amundsen_logo_on_light -------------------------------------------------------------------------------- /docs/img/logos/amundsen_logo_on_blue.svg: -------------------------------------------------------------------------------- 1 | amundsen_logo_on_blue -------------------------------------------------------------------------------- /docker-amundsen.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | neo4j: 4 | image: neo4j:3.3.0 5 | container_name: neo4j_amundsen 6 | environment: 7 | - NEO4J_AUTH=neo4j/test 8 | ulimits: 9 | nofile: 10 | soft: 40000 11 | hard: 40000 12 | ports: 13 | - 7474:7474 14 | - 7687:7687 15 | volumes: 16 | - ./example/docker/neo4j/conf:/conf 17 | - ./example/docker/neo4j/plugins:/plugins 18 | - ./example/backup:/backup 19 | - neo4j_data:/neo4j/data 20 | networks: 21 | - amundsennet 22 | elasticsearch: 23 | image: elasticsearch:6.7.0 24 | container_name: es_amundsen 25 | ports: 26 | - 9200:9200 27 | volumes: 28 | - es_data:/usr/share/elasticsearch/data 29 | networks: 30 | - amundsennet 31 | ulimits: 32 | nofile: 33 | soft: 65536 34 | hard: 65536 35 | amundsensearch: 36 | image: amundsendev/amundsen-search:2.4.1 37 | container_name: amundsensearch 38 | ports: 39 | - 5001:5000 40 | depends_on: 41 | - elasticsearch 42 | networks: 43 | - amundsennet 44 | environment: 45 | - PROXY_ENDPOINT=es_amundsen 46 | command: gunicorn -w 2 --bind :5000 search_service.search_wsgi 47 | amundsenmetadata: 48 | image: amundsendev/amundsen-metadata:3.0.0 49 | container_name: amundsenmetadata 50 | depends_on: 51 | - neo4j 52 | ports: 53 | - 5002:5000 54 | networks: 55 | - amundsennet 56 | environment: 57 | - PROXY_HOST=bolt://neo4j_amundsen 58 | command: gunicorn -w 2 --bind :5000 metadata_service.metadata_wsgi 59 | amundsenfrontend: 60 | image: amundsendev/amundsen-frontend:3.0.0 61 | container_name: amundsenfrontend 62 | depends_on: 63 | - amundsenmetadata 64 | - amundsensearch 65 | ports: 66 | - 5000:5000 67 | networks: 68 | - amundsennet 69 | environment: 70 | - SEARCHSERVICE_BASE=http://amundsensearch:5000 71 | - METADATASERVICE_BASE=http://amundsenmetadata:5000 72 | # Only for easy config-less Quickstart bookmark evalutation. `TestConfig` extends ordinary `LocalConfig` by 73 | # defining `AUTH_USER_METHOD` to a hardcoded dummy user in `amundsen_application.tests.test_utils.get_test_user()` 74 | # See further docs in https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/docs/configuration.md#flask 75 | # and https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/docs/configuration.md#authentication 76 | - FRONTEND_SVC_CONFIG_MODULE_CLASS=amundsen_application.config.TestConfig 77 | command: gunicorn -w 2 --bind :5000 amundsen_application.wsgi 78 | 79 | networks: 80 | amundsennet: 81 | 82 | volumes: 83 | es_data: 84 | neo4j_data: 85 | -------------------------------------------------------------------------------- /docs/installation-aws-ecs/aws-ecs-deployment.md: -------------------------------------------------------------------------------- 1 | # Deployment of non-production Amundsen on AWS ECS using aws-cli 2 | 3 | The following is a set of instructions to run Amundsen on AWS Elastic Container Service. The current configuration is very basic but it is working. It is a migration of the docker-amundsen.yml to run on AWS ECS. 4 | 5 | ## Install ECS CLI 6 | 7 | The first step is to install ECS CLI, please follow the instructions from AWS [documentation](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ECS_CLI_installation.html) 8 | 9 | ### Get your access and secret keys from IAM 10 | 11 | ```bash 12 | # in ~//amundsenfrontendlibrary/docs/instalation-aws-ecs 13 | $ export AWS_ACCESS_KEY_ID=xxxxxxxx 14 | $ export AWS_SECRET_ACCESS_KEY=xxxxxx 15 | $ export AWS_PROFILE=profilename 16 | ``` 17 | 18 | For the purpose of this instruction we used the [tutorial](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-cli-tutorial-ec2.html#ECS_CLI_tutorial_compose_create) on AWS documentation 19 | 20 | 21 | ## STEP 1: Create a cluster configuration: 22 | 23 | ```bash 24 | # in ~//amundsenfrontendlibrary/docs/instalation-aws-ecs 25 | $ ecs-cli configure --cluster amundsen --region us-west-2 --default-launch-type EC2 --config-name amundsen 26 | ``` 27 | 28 | ### STEP 2: Create a profile using your access key and secret key: 29 | 30 | ```bash 31 | # in ~//amundsenfrontendlibrary/docs/instalation-aws-ecs 32 | $ ecs-cli configure profile --access-key $AWS_ACCESS_KEY_ID --secret-key $AWS_SECRET_ACCESS_KEY --profile-name amundsen 33 | ``` 34 | 35 | ### STEP 3: Create the Cluster Use profile name from \~/.aws/credentials 36 | 37 | ```bash 38 | # in ~//amundsenfrontendlibrary/docs/instalation-aws-ecs 39 | $ ecs-cli up --keypair JoaoCorreia --extra-user-data userData.sh --capability-iam --size 1 --instance-type t2.large --cluster-config amundsen --verbose --force --aws-profile $AWS_PROFILE 40 | ``` 41 | 42 | ### STEP 4: Deploy the Compose File to a Cluster 43 | 44 | ```bash 45 | # in ~//amundsenfrontendlibrary/docs/instalation-aws-ecs 46 | $ ecs-cli compose --cluster-config amundsen --file docker-ecs-amundsen.yml up --create-log-groups 47 | ``` 48 | 49 | You can use the ECS CLI to see what tasks are running. 50 | 51 | ```bash 52 | $ ecs-cli ps 53 | ``` 54 | 55 | ### STEP 5 Open the EC2 Instance 56 | 57 | Edit the Security Group to allow traffic to your IP, you should be able to see the frontend, elasticsearch and neo4j by visiting the URLs: 58 | 59 | - http://xxxxxxx:5000/ 60 | - http://xxxxxxx:9200/ 61 | - http://xxxxxxx:7474/browser/ 62 | 63 | ## TODO 64 | 65 | - Configuration sent to services not working properly (amunsen.db vs graph.db) 66 | - Create a persistent volume for graph/metadata storage. [See this](https://aws.amazon.com/blogs/compute/amazon-ecs-and-docker-volume-drivers-amazon-ebs/) 67 | - Refactor the VPC and default security group permissions 68 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | 3 | ## Reporting an Issue 4 | 5 | The easiest way you can contribute to Amundsen is by creating issues. For that, please use the [issues][issues] section of the Amundsen repository and search for a similar problem. If you don't find it, submit your bug, question, proposal or feature request. 6 | 7 | In the case of bugs, please be descriptive and, if possible, include a screenshot of the issue. 8 | 9 | ## Creating Pull Requests 10 | 11 | Before sending a Pull Request with significant changes, please use the [issue tracker][issues] to discuss the potential improvements you want to make. 12 | 13 | ## First-Time Contributors 14 | 15 | If this is your first contribution to open source, you can [follow this tutorial][contributionTutorial] or check [this video series][contributionVideos] to learn about the contribution workflow with GitHub. 16 | 17 | We always have tickets labeled ['good first issue'][goodFirstIssues] and ['help wanted'][helpWantedIssues]. These are a great starting point if you want to contribute. Don't hesitate to ask questions about the issue if you are not sure about the strategy to follow. 18 | 19 | ## Requesting a Feature 20 | 21 | We have created a [Roadmap][roadmap] document with our plans for next releases, however, we are open to hear your ideas for new features! 22 | 23 | For that, you can create an issue and select the "Feature Proposal" template. Fill in as much information as possible, and if you can, add responses to the following questions: 24 | 25 | - We'll need to add a new model or change any existing model? 26 | - What would the Migration Plan look like? Will it be backwards-compatible? 27 | - Which alternatives did you consider? 28 | 29 | ## Setup 30 | 31 | To start contributing to Amundsen, you need to set up your machine to develop with the project. For that, we have prepareda a [Developer Guide][developerGuide] that will guide you to set up your environment to develop locally with Amundsen. 32 | 33 | ## Get Recognition 34 | 35 | You can add yourself or somebody else to the contributors list by using the [All Contributors bot][allContributorsBot]. 36 | 37 | ## Next Steps 38 | 39 | Once you have your environment set and ready to go, you can check our [documentation][documentationHomepage] and the project's [Roadmap][roadmap] to see what's coming. 40 | 41 | [issues]: https://github.com/amundsen-io/amundsen/issues 42 | [allContributorsBot]: https://allcontributors.org/docs/en/bot/usage 43 | [contributionTutorial]: https://github.com/firstcontributions/first-contributions#first-contributions 44 | [contributionVideos]: https://egghead.io/courses/how-to-contribute-to-an-open-source-project-on-github 45 | [goodFirstIssues]: https://github.com/amundsen-io/amundsen/labels/good%20first%20issue 46 | [helpWantedIssues]: https://github.com/amundsen-io/amundsen/labels/help%20wanted 47 | [developerGuide]: https://www.amundsen.io/amundsen/developer_guide/ 48 | [roadmap]: https://www.amundsen.io/amundsen/roadmap/ 49 | [documentationHomepage]: https://www.amundsen.io/amundsen/ 50 | -------------------------------------------------------------------------------- /docs/installation-aws-ecs/docker-ecs-amundsen.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | neo4j: 4 | image: neo4j:3.3.0 5 | container_name: neo4j_amundsen 6 | environment: 7 | - NEO4J_AUTH=neo4j/test 8 | # These dont seem to be working though! 9 | - NEO4J_dbms.active_database=amundsen.db 10 | - NEO4J_dbms.directories.data=/neo4j/data 11 | - NEO4J_dbms.directories.logs=/var/log/neo4j 12 | - NEO4J_dbms.directories.import=/var/lib/neo4j/import 13 | - NEO4J_dbms.security.auth_enabled=false 14 | - NEO4J_dbms.connectors.default_listen_address=0.0.0.0 15 | ulimits: 16 | nofile: 17 | soft: 40000 18 | hard: 40000 19 | ports: 20 | - 7474:7474 21 | - 7687:7687 22 | logging: 23 | driver: awslogs 24 | options: 25 | awslogs-group: amundsen-neo4j 26 | awslogs-region: us-west-2 27 | awslogs-stream-prefix: amundsen-neo4j 28 | 29 | elasticsearch: 30 | image: elasticsearch:6.7.0 31 | container_name: es_amundsen 32 | ports: 33 | - 9200:9200 34 | ulimits: 35 | nofile: 36 | soft: 65536 37 | hard: 65536 38 | logging: 39 | driver: awslogs 40 | options: 41 | awslogs-group: amundsen-elasticsearch 42 | awslogs-region: us-west-2 43 | awslogs-stream-prefix: amundsen-elasticsearch 44 | 45 | amundsensearch: 46 | image: amundsendev/amundsen-search:1.1.1 47 | container_name: amundsensearch 48 | ports: 49 | - 5001:5000 50 | depends_on: 51 | - elasticsearch 52 | environment: 53 | - PROXY_ENDPOINT=es_amundsen 54 | logging: 55 | driver: awslogs 56 | options: 57 | awslogs-group: amundsensearch 58 | awslogs-region: us-west-2 59 | awslogs-stream-prefix: amundsensearch 60 | 61 | amundsenmetadata: 62 | image: amundsendev/amundsen-metadata:1.0.7 63 | container_name: amundsenmetadata 64 | depends_on: 65 | - neo4j 66 | ports: 67 | - 5002:5000 68 | environment: 69 | - PROXY_HOST=bolt://neo4j_amundsen 70 | logging: 71 | driver: awslogs 72 | options: 73 | awslogs-group: amundsenmetadata 74 | awslogs-region: us-west-2 75 | awslogs-stream-prefix: amundsenmetadata 76 | 77 | amundsenfrontend: 78 | image: amundsendev/amundsen-frontend:1.0.5 79 | container_name: amundsenfrontend 80 | depends_on: 81 | - amundsenmetadata 82 | - amundsensearch 83 | ports: 84 | - 5000:5000 85 | environment: 86 | - SEARCHSERVICE_BASE=http://amundsensearch:5000 87 | - METADATASERVICE_BASE=http://amundsenmetadata:5000 88 | logging: 89 | driver: awslogs 90 | options: 91 | awslogs-group: amundsenfrontend 92 | awslogs-region: us-west-2 93 | awslogs-stream-prefix: amundsenfrontend 94 | 95 | 96 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/cronjob-neo4j-s3-backup.yaml: -------------------------------------------------------------------------------- 1 | {{ if and .Values.neo4j.enabled (and .Values.neo4j.backup.enabled .Values.neo4j.backup.s3Path .Values.neo4j.persistence) }} 2 | apiVersion: batch/v1beta1 3 | kind: CronJob 4 | metadata: 5 | name: neo4j-s3-backup 6 | labels: 7 | app: {{ template "amundsen.name" . }} 8 | component: neo4j-s3-backup 9 | chart: {{ template "amundsen.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | schedule: {{ .Values.neo4j.backup.schedule | quote }} 14 | concurrencyPolicy: Forbid 15 | jobTemplate: 16 | spec: 17 | template: 18 | metadata: 19 | labels: 20 | app: {{ template "amundsen.name" . }} 21 | component: neo4j-s3-backup 22 | release: {{ .Release.Name }} 23 | {{- with .Values.neo4j.backup.podAnnotations }} 24 | annotations: 25 | {{ toYaml . | indent 12 }} 26 | {{- end }} 27 | spec: 28 | restartPolicy: Never 29 | containers: 30 | - name: backup-neo4j 31 | image: neo4j:{{ .Values.neo4j.version }} 32 | command: 33 | - "/bin/sh" 34 | - "-c" 35 | - | 36 | apk -v --update add --no-cache --quiet curl python py-pip && 37 | pip install awscli -q && 38 | NOW="$(date "+%Y-%m-%d-%H:%M:%S")" && 39 | BACKUP_SCHEMA_NAME="graph.db-backup-$NOW.schema" && 40 | BACKUP_DATA_NAME="graph.db-backup-$NOW.data" && 41 | BACKUP_NAME="graph.db-backup-$NOW" && 42 | echo "CALL apoc.export.cypher.schema('/var/lib/neo4j/data/$BACKUP_SCHEMA_NAME', {});" | /var/lib/neo4j/bin/neo4j-shell -host neo4j && 43 | echo "CALL apoc.export.graphml.all('/var/lib/neo4j/data/$BACKUP_DATA_NAME', {useTypes: true, readLabels: true});" | /var/lib/neo4j/bin/neo4j-shell -host neo4j && 44 | printf "\nTarring -> /data/$BACKUP_SCHEMA_NAME and /data/$BACKUP_DATA_NAME to /data/$BACKUP_NAME.tar" && 45 | while [ ! -f /data/$BACKUP_DATA_NAME ]; do echo "backup data file does not exist: [/data/$BACKUP_DATA_NAME] sleeping..." && ls "/data/" && sleep 30; done && 46 | tar -cvf "/data/$BACKUP_NAME.tar" "/data/$BACKUP_SCHEMA_NAME" "/data/$BACKUP_DATA_NAME" && 47 | printf "\nZipping -> /data/$BACKUP_NAME.tar.gz\n" && 48 | gzip -9 "/data/$BACKUP_NAME.tar" && 49 | printf "Pushing /data/$BACKUP_NAME.tar.gz -> $BUCKET" && 50 | aws s3 cp "/data/$BACKUP_NAME.tar.gz" "$BUCKET" && 51 | printf "Cleaning up /data/graph.db-backup*" && 52 | rm /data/graph.db-backup* 53 | env: 54 | - name: BUCKET 55 | value: {{ .Values.neo4j.backup.s3Path }} 56 | volumeMounts: 57 | - name: data 58 | mountPath: /data 59 | volumes: 60 | - name: data 61 | persistentVolumeClaim: 62 | claimName: neo4j-pvc 63 | {{- end}} 64 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/deployment-metadata.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "amundsen.fullname" . }}-{{ .Values.metadata.serviceName }} 5 | labels: 6 | app: {{ template "amundsen.name" . }} 7 | component: {{ .Values.metadata.serviceName }} 8 | chart: {{ template "amundsen.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | spec: 12 | selector: 13 | matchLabels: 14 | app: {{ template "amundsen.name" . }} 15 | component: {{ .Values.metadata.serviceName }} 16 | release: {{ .Release.Name }} 17 | template: 18 | metadata: 19 | {{- with default .Values.podAnnotations .Values.metadata.podAnnotations }} 20 | annotations: 21 | {{ toYaml . | indent 8 }} 22 | {{- end }} 23 | labels: 24 | app: {{ template "amundsen.name" . }} 25 | component: {{ .Values.metadata.serviceName }} 26 | release: {{ .Release.Name }} 27 | spec: 28 | {{- with default .Values.nodeSelector .Values.metadata.nodeSelector }} 29 | nodeSelector: 30 | {{ toYaml . | indent 8 }} 31 | {{- end }} 32 | {{- with default .Values.affinity .Values.metadata.affinity }} 33 | affinity: 34 | {{ toYaml . | indent 8 }} 35 | {{- end }} 36 | {{- with default .Values.tolerations .Values.metadata.tolerations }} 37 | tolerations: 38 | {{ toYaml . | indent 8 }} 39 | {{- end }} 40 | volumes: 41 | {{- if .Values.frontEnd.oidcEnabled }} 42 | - name: oidc-config 43 | secret: 44 | secretName: oidc-config 45 | {{- end }} 46 | containers: 47 | - name: {{ .Chart.Name }}-{{ .Values.metadata.serviceName }} 48 | image: {{ .Values.metadata.image }}:{{ .Values.metadata.imageTag }} 49 | imagePullPolicy: Always 50 | ports: 51 | - containerPort: 5002 52 | env: 53 | - name: PROXY_HOST 54 | value: {{ if .Values.metadata.neo4jEndpoint }}{{ .Values.metadata.neo4jEndpoint }}{{ else }}bolt://neo4j.{{ .Release.Namespace }}.svc.cluster.local{{ end }} 55 | {{- if .Values.frontEnd.oidcEnabled }} 56 | - name: FLASK_OIDC_CLIENT_SECRETS 57 | value: /etc/client_secrets.json 58 | - name: FLASK_OIDC_SECRET_KEY 59 | valueFrom: 60 | secretKeyRef: 61 | name: oidc-config 62 | key: OIDC_CLIENT_SECRET 63 | {{- end }} 64 | livenessProbe: 65 | httpGet: 66 | path: "/healthcheck" 67 | port: 5002 68 | initialDelaySeconds: 60 69 | periodSeconds: 60 70 | timeoutSeconds: 1 71 | successThreshold: 1 72 | failureThreshold: 5 73 | volumeMounts: 74 | {{- if .Values.frontEnd.oidcEnabled }} 75 | - name: oidc-config 76 | mountPath: /etc/client_secrets.json 77 | subPath: client_secrets.json 78 | {{- end }} 79 | {{- with .Values.metadata.resources }} 80 | resources: 81 | {{ toYaml . | indent 10 }} 82 | {{- end }} 83 | -------------------------------------------------------------------------------- /docs/tutorials/badges.md: -------------------------------------------------------------------------------- 1 | # How to add table level and column level badges 2 | Amundsen supports use of clickable badges on tables, and non clickable badges for columns. Clickable badges trigger a search for all of the resources with the given badge name as a filter. 3 | 4 |  5 | *Table badge* 6 | 7 |  8 | *Column badge* 9 | ## Badges configuration 10 | 11 | In order for amundsen to accept new badges via metadata and to change the style in the UI there are two configs that need to be setup: 12 | 13 | On [amundsen metadata library](https://github.com/amundsen-io/amundsenmetadatalibrary/blob/3c9a55e6af4cac9b342803c34cfe81851470e7f5/metadata_service/config.py) you should add your badges to the whitelist within your custom configuration file following the format of this example: 14 | 15 | ``` 16 | # whitelist badges 17 | WHITELIST_BADGES: List[Badge] = [ 18 | Badge(badge_name='alpha', 19 | category='table_status'), 20 | Badge(badge_name='beta', 21 | category='table_status'), 22 | ] 23 | ``` 24 | 25 | In order to set up the color and display name on [amundsen frontend library](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/amundsen_application/static/js/config/config-custom.ts) you should add the desired badges style as follows: 26 | 27 | ``` 28 | const configCustom: AppConfigCustom = { 29 | badges: { 30 | 'alpha': { 31 | style: BadgeStyle.DEFAULT, 32 | displayName: 'Alpha', 33 | }, 34 | 'partition column': { 35 | style: BadgeStyle.DEFAULT, 36 | displayName: 'Partition Column', 37 | }, 38 | } 39 | } 40 | ``` 41 | 42 | **Note:** any badges that are not defined in this configuration will show up with `BadgeStyle.DEFAULT`. 43 | 44 | 45 | ## Adding table badges through metadata library 46 | To manually add a badge to a particular table the metadata API can be used. Here are the available requests: 47 | 48 | To add a badge on a table: 49 | ``` 50 | curl -X PUT https://{your metadata url}/table/{table key}/badge/{badge name}?category={badge category} 51 | ``` 52 | 53 | To delete a badge on a table: 54 | ``` 55 | curl -X DELETE https://{your metadata url}/table/{table key}/badge/{badge name}?category={badge category} 56 | ``` 57 | 58 | ## Adding badges throught databuilder (and column level badges) 59 | 60 | To add badges using databuilder, you can use the [BadgeMetadata](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/models/badge.py) class and pass in the entity you want to create a badge relationship for. For an example of how this is done search for badge in [TableMetadata](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/models/table_metadata.py) to see how we add badge nodes and relationships to neo4j. 61 | In [hive_table_metadata_extractor.py](https://github.com/amundsen-io/amundsendatabuilder/blob/8655338725bf279ea0332e5e6ab0592c8c7459ae/databuilder/extractor/hive_table_metadata_extractor.py#L106) you can see how the partition column is obtained and added to a column so the badge node can be created and related to the correct column. -------------------------------------------------------------------------------- /docs/issue_labeling.md: -------------------------------------------------------------------------------- 1 | # Issue and Feature Labeling 2 | > On Amundsen, we aim to be methodical on using issue labels, offering our community a way to understand what are the issues about and their status within or development process. 3 | 4 | We use a bunch of GitHub labels. They are a mix of custom labels and the default Github labels for open-source projects. We base these labels on four main types: **status labels**, **issue type labels**, **project labels**, and the **“other” category**. Read on to learn more about them. 5 | 6 | ## Status Labels 7 | * They show at a glance the status and progress of each issue 8 | * Prefixed with "Status:", followed by the label 9 | * Only *one status label* will be applied to any particular issue 10 | 11 | ### Labels 12 | - **Status: Needs Reproducing** – For bugs that need to be reproduced in order to get fixed 13 | - **Status: Review Needed** – Issue that needs review to be considered 14 | - **Status: Accepted** – Feature that we want to implement going forward 15 | - **Status: In Progress** – Issue that is being worked on right now. 16 | - **Status: Completed** – Issue is completed and on master 17 | - **Status: Abandoned** – Issue we won’t go ahead and implement, or that needs a “champion” to take it through 18 | - **Status: Blocked** – Issue blocked by any reason (dependencies, previous work, etc.) 19 | - **Status: On Hold** – Issue that is being considered but stopped due to lack of resources or changes in the roadmap 20 | 21 | Here is a diagram representing these states within the lifecycles: 22 |  23 | 24 | ## Type Labels 25 | * They show the type of the issue 26 | * Prefixed with "Type:", followed by the label 27 | 28 | ### Labels 29 | - **Type: Bug** – An unexpected problem or unintended behavior 30 | - **Type: Feature** – A new feature request 31 | - **Type: Maintenance** – A regular maintenance chore or task, including refactors, build system, CI, performance improvements 32 | - **Type: Documentation** – A documentation improvement task 33 | - **Type: Question** – An issue or PR that needs more information or a user question 34 | 35 | ## Project Labels 36 | * They indicate which project the issue refers to 37 | * Prefixed with "Project:", followed by the name of the project 38 | 39 | ### Labels 40 | - **Project: Common** – From amundsencommon 41 | - **Project: Databuilder** – From amundsendatabuilder 42 | - **Project: Frontend** – From amundsenfrontendlibrary 43 | - **Project: Metadata** – From amundsenmetadatalibrary 44 | - **Project: Search** – From amundsensearchlibrary 45 | - **Project: k8s** – Related to the Kubernetes helm chart 46 | - **Project: All** – Related to all the projects above 47 | 48 | ## Other Labels 49 | * Some of these are part of the standard GitHub labels and intended for OSS contributors 50 | * Some are related to the tools we use to maintain the library 51 | * They are not prefixed 52 | 53 | ### Labels 54 | - **help wanted** – Indicates we are looking for contributors on this issue 55 | - **good first issue** – Indicates the issue is a great one to tackle by newcomers to the project or OSS in general 56 | - **keep fresh** – Avoids getting the issue archived by our stale bot 57 | -------------------------------------------------------------------------------- /docs/tutorials/data-preview-with-superset.md: -------------------------------------------------------------------------------- 1 | # How to setup a preview client with Apache Superset 2 | 3 | In the previous [tutorial](./index-postgres.md), we talked about how to index the table metadata 4 | for a postgres database. In this tutorial, we will walk through how to configure data preview for this `films` table 5 | using Apache Superset. 6 | 7 | Amundsen provides an integration between Amundsen and BI Viz tool for data preview. It is not necessary to use Apache Superset 8 | as long as the BI Viz tool provides endpoint to do querying and get the results back from the BI tool. 9 | [Apache Superset](https://superset.apache.org/) is an open-source business intelligence tool 10 | that can be used for data exploration and it is what we leverage internally at Lyft to support the feature. 11 | 12 | 1. Please setup Apache Superset following its official installation 13 | [guide](https://superset.apache.org/installation.html#superset-installation-and-initialization): 14 | ```bash 15 | # Install superset 16 | pip install apache-superset 17 | 18 | # Initialize the database 19 | superset db upgrade 20 | 21 | # Create an admin user (you will be prompted to set a username, first and last name before setting a password) 22 | $ export FLASK_APP=superset 23 | superset fab create-admin 24 | 25 | # Load some data to play with 26 | superset load_examples 27 | 28 | # Create default roles and permissions 29 | superset init 30 | 31 | # To start a development web server on port 8088, use -p to bind to another port 32 | superset run -p 8088 --with-threads --reload --debugger 33 | ``` 34 | 35 | Once setup properly, you could view the superset UI as following: 36 |  37 | 38 | 2. We need to add the postgres database to superset as the following: 39 |  40 | 41 | 3. We could verify the content of the `films` table using superset's sqlab feature: 42 |  43 | 44 | 4. Next, We need to build a preview client following this [guide](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/docs/examples/superset_preview_client.md) 45 | and the [example client code](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/amundsen_application/base/examples/example_superset_preview_client.py). 46 | There are a couple of things to keep in mind: 47 | - We could start with an unauthenticated Superset([example superset config](https://gist.github.com/feng-tao/b89e6faf7236372cef70a44f13615c39)), 48 | but in production, we will need to send the impersonate info to Superset 49 | to properly verify whether the given user could view the data. 50 | - When we build the client, we could need to configure the database id instead of the database name when send the request to superset. 51 | 52 | 5. Once we configure the preview client, put it in the frontend service entry point ([example](https://github.com/lyft/amundsenfrontendlibrary/blob/master/docs/configuration.md#python-entry-points)) and restart the frontend. 53 | 54 | 6. We could now view the preview data for the `films` table in Amundsen. 55 |  56 | From the above figure, the preview button on the table page is clickable. 57 | Once it clicked, you could see the actual data queried 58 | from Apache Superset: 59 |  60 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/deployment-neo4j.yaml: -------------------------------------------------------------------------------- 1 | {{ if .Values.neo4j.enabled }} 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: neo4j 6 | labels: 7 | app: {{ template "amundsen.name" . }} 8 | component: neo4j 9 | chart: {{ template "amundsen.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | selector: 14 | matchLabels: 15 | app: {{ template "amundsen.name" . }} 16 | component: neo4j 17 | release: {{ .Release.Name }} 18 | replicas: 1 19 | template: 20 | metadata: 21 | {{- with default .Values.podAnnotations .Values.neo4j.podAnnotations }} 22 | annotations: 23 | {{ toYaml . | indent 8 }} 24 | {{- end }} 25 | labels: 26 | app: {{ template "amundsen.name" . }} 27 | component: neo4j 28 | release: {{ .Release.Name }} 29 | spec: 30 | {{- with .Values.neo4j.nodeSelector }} 31 | nodeSelector: 32 | {{ toYaml . | indent 8 }} 33 | {{- end }} 34 | {{- with .Values.neo4j.affinity }} 35 | affinity: 36 | {{ toYaml . | indent 8 }} 37 | {{- end }} 38 | {{- with .Values.neo4j.tolerations }} 39 | tolerations: 40 | {{ toYaml . | indent 8 }} 41 | {{- end }} 42 | initContainers: 43 | - name: init-neo4j-plugins 44 | image: "appropriate/curl:latest" 45 | imagePullPolicy: "IfNotPresent" 46 | command: 47 | - "/bin/sh" 48 | - "-c" 49 | - | 50 | curl -L https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/3.3.0.4/apoc-3.3.0.4-all.jar -O 51 | curl -L https://github.com/neo4j-contrib/neo4j-graph-algorithms/releases/download/3.3.5.0/graph-algorithms-algo-3.3.5.0.jar -O 52 | curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.11.250/aws-java-sdk-core-1.11.250.jar -O 53 | curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-s3/1.11.250/aws-java-sdk-s3-1.11.250.jar -O 54 | curl -L https://repo1.maven.org/maven2/org/apache/httpcomponents/httpclient/4.5.4/httpclient-4.5.4.jar -O 55 | curl -L https://repo1.maven.org/maven2/org/apache/httpcomponents/httpcore/4.4.8/httpcore-4.4.8.jar -O 56 | curl -L https://repo1.maven.org/maven2/joda-time/joda-time/2.9.9/joda-time-2.9.9.jar -O 57 | chmod 755 *.jar 58 | mv *.jar /var/lib/neo4j/plugins 59 | volumeMounts: 60 | - name: plugins 61 | mountPath: /var/lib/neo4j/plugins 62 | containers: 63 | - name: neo4j 64 | image: neo4j:{{ .Values.neo4j.version }} 65 | ports: 66 | - containerPort: 7474 67 | - containerPort: 7687 68 | - containerPort: 1337 69 | env: 70 | - name: NEO4J_CONF 71 | value: "/conf" 72 | volumeMounts: 73 | - name: conf 74 | mountPath: /conf 75 | {{- if .Values.neo4j.persistence }} 76 | - name: data 77 | mountPath: /var/lib/neo4j/data 78 | {{- end}} 79 | - name: plugins 80 | mountPath: /var/lib/neo4j/plugins 81 | {{- with .Values.neo4j.resources }} 82 | resources: 83 | {{ toYaml . | indent 10 }} 84 | {{- end}} 85 | volumes: 86 | - name: conf 87 | configMap: 88 | name: neo4j-configmap 89 | {{- if .Values.neo4j.persistence }} 90 | - name: data 91 | persistentVolumeClaim: 92 | claimName: neo4j-pvc 93 | {{- end}} 94 | - name: plugins 95 | emptyDir: {} 96 | {{ end }} 97 | -------------------------------------------------------------------------------- /MAINTAINING.md: -------------------------------------------------------------------------------- 1 | # Maintaining Amundsen 2 | 3 | As maintainers of the project, this is our guide. Most of the steps and guidelines 4 | in the [Contributing](CONTRIBUTING.md) document apply here, including how to set 5 | up your environment, write code to fit the code style, run tests, craft commits 6 | and manage branches. 7 | 8 | Beyond this, this document provides some details that would 9 | be too low-level for contributors. 10 | 11 | ## Table of Contents 12 | 13 | - [Communication](#communication) 14 | - [Managing the community](#managing-the-community) 15 | - [Workflow](#workflow) 16 | - [Architecture](#architecture) 17 | - [Updating the changelog](#updating-the-changelog) 18 | - [Documentation](#documentation) 19 | - [Labels](#labels) 20 | - [Adding new projects](#adding-new-projects) 21 | - [Related Documents](#related-documents) 22 | 23 | ## Communication 24 | 25 | We have several ways that we can communicate with each other: 26 | 27 | - To show our direction and next steps, the [**roadmap**][roadmap] is the best place. 28 | - To track progress on the movement of issues, [**labels**](#labels) 29 | are useful. 30 | - To learn about what the community has been working lately, our [community meeting] is a great event. It happens the first Thursday of every month at 9AM PST, and you can watch past meeting recordings [here][cmeetingrecordings] 31 | - To chat with the maintainers team, get support or connect with Amundsen's community, join our Slack 32 | 33 | [roadmap]: https://www.amundsen.io/amundsen/roadmap/ 34 | [cmeeting]: meet.google.com/mqz-ndck-jmj 35 | [cmeetingrecordings]: https://www.youtube.com/channel/UCgOyzG0sEoolxuC9YXDYPeg 36 | [slack]: amundsenworkspace.slack.com 37 | 38 | ## Managing the community 39 | 40 | We try to create and foster a community around Amundsen. We do this by: 41 | 42 | - Answering questions from members of the community 43 | - Triaging Github issues, adding the proper [labels][labels] to new tickets 44 | - Closing stale issues and feature requests 45 | - Keeping the community informed by ensuring that we add communications regularly with the new features 46 | - Ensuring that the documentation, as well as the documentation site, is kept up to 47 | date 48 | - Doing code reviews for other maintainers and the community 49 | - Reviewing [RFCs][rfcs] and shaping the future of the project 50 | 51 | [labels]: https://github.com/amundsen-io/amundsen/labels 52 | [rfcs]: https://github.com/amundsen-io/rfcs 53 | 54 | ## Workflow 55 | 56 | We generally follow [GitHub Flow]. The `master` branch is the main line, and all 57 | branches are cut from and get merged back into this branch. Generally, the 58 | workflow is as follows: 59 | 60 | [github flow]: https://help.github.com/articles/github-flow/ 61 | 62 | - Cut a feature or bugfix branch from this branch 63 | - Upon completing a branch, create a PR and ask another maintainer to approve 64 | it 65 | - Try to keep the commit history as clean as possible. Before merging, squash 66 | "WIP" or related commits together and rebase as needed 67 | - Once your PR is approved, and you've cleaned up your branch, you're free to 68 | merge it in 69 | 70 | ## Architecture 71 | 72 | We have covered Amundsen's architecture in our [docs](https://lyft.github.io/amundsen/architecture/). 73 | 74 | ## Documentation 75 | 76 | We use [mkdocs] for creating our documentation from Markdown files. This system is configured from the 'mkdocs.yml' file in the root of this repository. 77 | 78 | Currently, our docs are built and deployed automatically with a GitHub action, so we shouldn't need to do anything. 79 | 80 | [mkdocs]: https://www.mkdocs.org/ 81 | 82 | ## Labels 83 | 84 | We've found labels to be useful for cataloging and marking progress on features and bugs. You can read about our labels on the [issue_labeling](https://lyft.github.io/amundsen/issue_labeling/) document. 85 | 86 | ## Adding new projects 87 | 88 | To add new projects to the amundsen-io organization, we will first discuss it through a GitHub issue. Once we discuss it thoroughly (~3-5 business days, depending on the volume of conversation), the maintainers will decide whether the new project should be added. 89 | 90 | ## Related Documents 91 | 92 | - [Contributing Guide](https://www.amundsen.io/amundsen/CONTRIBUTING/) 93 | - [Governance Document](https://github.com/amundsen-io/amundsen/blob/master/GOVERNANCE.md) 94 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Amundsen 2 | repo_name: Amundsen 3 | repo_url: https://github.com/amundsen-io/amundsen 4 | site_description: "Amundsen is a metadata driven application for improving the productivity of data analysts, data scientists and engineers when interacting with data." 5 | site_author: Amundsen Project Authors. 6 | site_url: https://www.amundsen.io/amundsen/ 7 | remote_branch: gh-pages 8 | 9 | copyright: 'Copyright © 2018-2020 Amundsen Project Authors.' 10 | 11 | theme: 12 | name: 'material' 13 | logo: img/logos/amundsen_mark_orange.svg 14 | favicon: 'img/logos/amundsen_mark_orange.svg' 15 | palette: 16 | primary: '#2B1B81' 17 | accent: '#2B1B81' 18 | feature: 19 | tabs: true 20 | 21 | -extra_css: 22 | - 'css/app.css' 23 | 24 | 25 | markdown_extensions: 26 | - admonition 27 | - smarty 28 | - codehilite: 29 | guess_lang: false 30 | linenums: True 31 | - footnotes 32 | - meta 33 | - toc: 34 | permalink: true 35 | - pymdownx.betterem: 36 | smart_enable: all 37 | - pymdownx.caret 38 | - pymdownx.details 39 | - pymdownx.inlinehilite 40 | - pymdownx.magiclink 41 | - pymdownx.smartsymbols 42 | - pymdownx.superfences 43 | - tables 44 | 45 | 46 | extra: 47 | # type is the name of the FontAwesome icon without the fa- prefix. 48 | social: 49 | - type: globe 50 | link: https://www.amundsen.io/ 51 | - type: github-alt 52 | link: https://github.com/amundsen-io 53 | - type: twitter 54 | link: https://twitter.com/amundsenio 55 | - type: linkedin 56 | link: https://www.linkedin.com/company/the-linux-foundation/ 57 | 58 | nav: 59 | - 'Overview': index.md 60 | - 'Architecture': architecture.md 61 | - 'Developer Guide': 62 | - 'Overview': developer_guide.md 63 | - 'Issue and Feature Labeling': issue_labeling.md 64 | - 'Contributing Guide': CONTRIBUTING.md 65 | - 'User Guide': 66 | - 'Quick Start': 'installation.md' 67 | - 'Tutorials': 68 | - 'How to index metadata for real life databases': 'tutorials/index-postgres.md' 69 | - 'How to setup a preview client with Apache Superset': 'tutorials/data-preview-with-superset.md' 70 | - 'How to setup user profiles': 'tutorials/user-profiles.md' 71 | - 'How to ingest Dashboard': 'databuilder/docs/dashboard_ingestion_guide.md' 72 | - 'How to track user metric for Amundsen': 'tutorials/how-to-track-user-metric.md' 73 | - 'How to add table level and column level badges': 'tutorials/badges.md' 74 | - 'How to search Amundsen effectively': 'tutorials/how-to-search-effective.md' 75 | - 'Deployment': 76 | - 'Authentication': 'authentication/oidc.md' 77 | - 'AWS ECS Installation': 'installation-aws-ecs/aws-ecs-deployment.md' 78 | - 'K8S Installation': 'k8s_install.md' 79 | - 'Components': 80 | - 'Frontend': 81 | - 'Overview': 'frontend/README.md' 82 | - 'Configuration': 83 | - 'Application Config': 'frontend/docs/application_config.md' 84 | - 'React Configuration': 'frontend/docs/configuration.md' 85 | - 'Flask Configuration': 'frontend/docs/flask_config.md' 86 | - 'Preview Client Setup': 'frontend/docs/examples/superset_preview_client.md' 87 | - 'FE Developer Guide': 'frontend/docs/developer_guide.md' 88 | - 'FE Installation Guide': 'frontend/docs/installation.md' 89 | - 'Recommended Practices': 'frontend/docs/recommended_practices.md' 90 | - 'Search': 91 | - 'Overview': 'search/README.md' 92 | - 'Proxy': 93 | - 'Atlas Backend': 'search/docs/atlas-search.md' 94 | - 'Metadata': 95 | - 'Overview': 'metadata/README.md' 96 | - 'Configuration': 97 | - 'Overview': 'metadata/docs/configurations.md' 98 | - 'Metadata API Structure': 'metadata/docs/structure.md' 99 | - 'Proxy': 100 | - 'Atlas Backend': 101 | - 'Overview': 'metadata/docs/proxy/atlas_proxy.md' 102 | - 'Popular Table': 'metadata/docs/proxy/atlas/popular_tables.md' 103 | - 'Gremlin Backend': 'metadata/docs/proxy/gremlin.md' 104 | - 'Neptune Backend': 'metadata/docs/proxy/neptune.md' 105 | - 'Databuilder': 106 | - 'Overview': 'databuilder/README.md' 107 | - 'Models': 'databuilder/docs/models.md' 108 | - 'Dashboard Ingestion guide': 'databuilder/docs/dashboard_ingestion_guide.md' 109 | - 'Common': 110 | - 'Overview': 'common/README.md' 111 | - 'FAQ': faq.md 112 | - 'Roadmap': roadmap.md 113 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/templates/deployment-frontend.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: {{ template "amundsen.fullname" . }}-{{ .Values.frontEnd.serviceName }} 6 | labels: 7 | app: {{ template "amundsen.name" . }} 8 | component: {{ .Values.frontEnd.serviceName }} 9 | chart: {{ template "amundsen.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | spec: 13 | selector: 14 | matchLabels: 15 | app: {{ template "amundsen.name" . }} 16 | component: {{ .Values.frontEnd.serviceName }} 17 | release: {{ .Release.Name }} 18 | replicas: {{ default 1 .Values.frontEnd.replicas }} 19 | template: 20 | metadata: 21 | {{- with default .Values.podAnnotations .Values.frontEnd.podAnnotations }} 22 | annotations: 23 | {{ toYaml . | indent 8 }} 24 | {{- end }} 25 | labels: 26 | app: {{ template "amundsen.name" . }} 27 | component: {{ .Values.frontEnd.serviceName }} 28 | release: {{ .Release.Name }} 29 | spec: 30 | {{- with default .Values.nodeSelector .Values.frontEnd.nodeSelector }} 31 | nodeSelector: 32 | {{ toYaml . | indent 8 }} 33 | {{- end }} 34 | {{- with default .Values.affinity .Values.frontEnd.affinity }} 35 | affinity: 36 | {{ toYaml . | indent 8 }} 37 | {{- end }} 38 | {{- with default .Values.tolerations .Values.frontEnd.tolerations }} 39 | tolerations: 40 | {{ toYaml . | indent 8 }} 41 | {{- end }} 42 | volumes: 43 | {{- if .Values.frontEnd.oidcEnabled }} 44 | - name: oidc-config 45 | secret: 46 | secretName: oidc-config 47 | {{- end }} 48 | containers: 49 | - name: {{ .Chart.Name }}-{{ .Values.frontEnd.serviceName }} 50 | image: {{ .Values.frontEnd.image }}:{{ .Values.frontEnd.imageTag }} 51 | imagePullPolicy: Always 52 | ports: 53 | - containerPort: 5000 54 | env: 55 | - name: FRONTEND_BASE 56 | value: {{ .Values.frontEnd.baseUrl }} 57 | - name: SEARCHSERVICE_BASE 58 | value: http://{{ .Chart.Name }}-{{ .Values.search.serviceName }}:5001 59 | - name: METADATASERVICE_BASE 60 | value: http://{{ .Chart.Name }}-{{ .Values.metadata.serviceName }}:5002 61 | - name: LONG_RANDOM_STRING 62 | value: {{ quote .Values.LONG_RANDOM_STRING }} 63 | {{- if .Values.frontEnd.oidcEnabled }} 64 | - name: FRONTEND_SVC_CONFIG_MODULE_CLASS 65 | value: amundsen_application.oidc_config.OidcConfig 66 | - name: FLASK_OIDC_WHITELISTED_ENDPOINTS 67 | value: status,healthcheck,health,logout 68 | - name: SQLALCHEMY_DATABASE_URI 69 | value: sqlite:///sessions.db 70 | - name: APP_WRAPPER 71 | value: flaskoidc 72 | - name: APP_WRAPPER_CLASS 73 | value: FlaskOIDC 74 | - name: OIDC_CLIENT_SECRETS 75 | value: /etc/client_secrets.json 76 | {{- if .Values.frontEnd.OVERWRITE_REDIRECT_URI }} 77 | - name: OVERWRITE_REDIRECT_URI 78 | value: {{ .Values.frontEnd.OVERWRITE_REDIRECT_URI }} 79 | {{- end }} 80 | - name: OIDC_SECRET_KEY 81 | valueFrom: 82 | secretKeyRef: 83 | name: oidc-config 84 | key: OIDC_CLIENT_SECRET 85 | {{- end }} 86 | command: ["gunicorn"] 87 | args: ['-w', '4', '--bind', ':5000', 'amundsen_application.wsgi'] 88 | readinessProbe: 89 | httpGet: 90 | path: "/healthcheck" 91 | port: 5000 92 | initialDelaySeconds: 10 93 | periodSeconds: 60 94 | timeoutSeconds: 1 95 | successThreshold: 1 96 | failureThreshold: 5 97 | livenessProbe: 98 | httpGet: 99 | path: "/healthcheck" 100 | port: 5000 101 | initialDelaySeconds: 10 102 | periodSeconds: 60 103 | timeoutSeconds: 1 104 | successThreshold: 1 105 | failureThreshold: 5 106 | volumeMounts: 107 | {{- if .Values.frontEnd.oidcEnabled }} 108 | - name: oidc-config 109 | mountPath: /etc/client_secrets.json 110 | subPath: client_secrets.json 111 | {{- end }} 112 | {{- with .Values.metadata.resources }} 113 | resources: 114 | {{ toYaml . | indent 10 }} 115 | {{- end }} 116 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ## How to select between Neo4j and Atlas as backend for Amundsen? 4 | 5 | ### Why Neo4j? 6 | 1. Amundsen has direct influence over the data model if you use neo4j. This, at least initially, will benefit the speed by which new features in amundsen can arrive 7 | 2. Atlas is developed with data governance in mind and not with data discovery. You could view "slapping amundsen on top of Atlas" as a kind of Frankenstein: never able to properly able to cater to your audience 8 | 3. Atlas seems to have a slow development cycle and it's community is not very responsive although some small improvements have been made 9 | 4. Atlas has the "Hadoop" era "smell" which isn't considered very sexy nowadays 10 | 5. Neo4j for it is the market leader in Graph database and also was proven by Airbnb’s Data portal on their Data discovery tool. 11 | 12 | ### Why Atlas? 13 | 1. Atlas has lineage support already available. It's been tried and tested. 14 | 2. Tag propagation is supported 15 | 3. It has a robust authentication and authorization system 16 | 4. Atlas does data governance adding amundsen for discovery makes it best of both worlds 17 | 5. It has support for push based due to its many plugins 18 | 6. The free version of Neo4j does not have authorization support(Enterprise version does). Your question should actually be why use "neo4j over janusgraph" cause that is the right level of comparison. Atlas adds a whole bunch on top of the graph database. 19 | 20 | ## What are the prerequisites to use Apache Atlas as backend for Amundsen? 21 | To run Amundsen with Atlas, latest versions of following components should be used: 22 | 1. [Apache Atlas](https://github.com/apache/atlas/) - built from `master` branch. Ref [`103e867cc126ddb84e64bf262791a01a55bee6e5`](https://github.com/apache/atlas/commit/103e867cc126ddb84e64bf262791a01a55bee6e5) (or higher). 23 | 2. [amundsenatlastypes](https://pypi.org/project/amundsenatlastypes/) - library for installing Atlas entity definitions specific to Amundsen integration. Version `1.1.0` (or higher). 24 | 25 | ## How to migrate from Amundsen 1.x -> 2.x? 26 | 27 | v2.0 renames a handful of fields in the services to be more consistent. Unfortunately one side effect is that the 2.0 versions of the services will need to be deployed simultaneously, as they are not interoperable with the 1.x versions. 28 | 29 | Additionally, some indexed field names in the elasticsearch document change as well, so if you're using elasticsearch, you'll need to republish Elasticsearch index via Databuilder job. 30 | 31 | The data in the metadata store, however, can be preserved when migrating from 1.x to 2.0. 32 | 33 | v2.0 deployments consists of deployment of all three services along with republishing Elasticsearch document on Table with v2.0 Databuilder. 34 | 35 | Keep in mind there is likely to be some downtime as v2.0.0, between deploying 3 services and re-seeding the elasticsearch indexes, so it might be ideal to stage a rollout by datacenter/environment if uptime is key 36 | 37 | ## How to avoid certain metadatas in Amundsen got erased by databuilder ingestion? 38 | 39 | By default, databuilder always upserts the metadata. If you want to prevent that happens on certain type of metadata, you could add the following 40 | config to your databuilder job's config 41 | 42 | ```python 43 | 'publisher.neo4j.{}'.format(neo4j_csv_publisher.NEO4J_CREATE_ONLY_NODES): [DESCRIPTION_NODE_LABEL], 44 | ``` 45 | 46 | This config means that databuilder will only update the table / column description if it doesn't exist before which could be the table is newly created. 47 | This is useful when we treat Amundsen graph as the source of truth for certain types of metadata (e.g description). 48 | 49 | ## How to capture all Google Analytics? 50 | 51 | Users are likely to have some sort of adblocker installed, making your Google Analytics less accurate. 52 | 53 | To put a proxy in place to bypass any adblockers and capture all analytics, follow these steps: 54 | 55 | 1. Follow https://github.com/ZitRos/save-analytics-from-content-blockers#setup to set up your own proxy server. 56 | 2. In the same repository, run `npm run mask www.googletagmanager.com/gtag/js?id=UA-XXXXXXXXX` and save the output. 57 | 3. In your custom frontend, override https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/amundsen_application/static/templates/fragments/google-analytics-loader.html#L6 to 58 | 4. Now, note that network requests to www.googletagmanager.com will be sent from behind your masked proxy endpoint, saving your analytics from content blockers! -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Bootstrap a default version of Amundsen using Docker 4 | The following instructions are for setting up a version of Amundsen using Docker. 5 | 6 | 1. Make sure you have at least 3GB available to docker. Install `docker` and `docker-compose`. 7 | 2. Clone [this repo](https://github.com/amundsen-io/amundsen) and its submodules by running: 8 | ```bash 9 | $ git clone --recursive git@github.com:amundsen-io/amundsen.git 10 | ``` 11 | 3. Enter the cloned directory and run: 12 | ```bash 13 | # For Neo4j Backend 14 | $ docker-compose -f docker-amundsen.yml up 15 | 16 | # For Atlas 17 | $ docker-compose -f docker-amundsen-atlas.yml up 18 | ``` 19 | 4. Ingest provided sample data into Neo4j by doing the following: _(Please skip if you are using Atlas backend)_ 20 | 21 | * In a separate terminal window, change directory to the [amundsendatabuilder](https://github.com/amundsen-io/amundsendatabuilder) submodule. 22 | * `sample_data_loader` python script included in `examples/` directory uses _elasticsearch client_, _pyhocon_ and other libraries. Install the dependencies in a virtual env and run the script by following the commands below: 23 | ```bash 24 | $ python3 -m venv venv 25 | $ source venv/bin/activate 26 | $ pip3 install -r requirements.txt 27 | $ python3 setup.py install 28 | $ python3 example/scripts/sample_data_loader.py 29 | ``` 30 | 5. View UI at [`http://localhost:5000`](http://localhost:5000) and try to search `test`, it should return some result. 31 |  32 | 33 | 6. We could also do an exact matched search for table entity. For example: search `test_table1` in table field and 34 | it return the records that matched. 35 |  36 | 37 | **Atlas Note:** Atlas takes some time to boot properly. So you may not be able to see the results immediately 38 | after `docker-compose up` command. 39 | Atlas would be ready once you'll have the following output in the docker output `Amundsen Entity Definitions Created...` 40 | 41 | ### Verify setup 42 | 43 | 1. You can verify dummy data has been ingested into Neo4j by by visiting [`http://localhost:7474/browser/`](http://localhost:7474/browser/) and run `MATCH (n:Table) RETURN n LIMIT 25` in the query box. You should see two tables: 44 | 1. `hive.test_schema.test_table1` 45 | 2. `hive.test_schema.test_table2` 46 |  47 | 2. You can verify the data has been loaded into the metadataservice by visiting: 48 | 1. [`http://localhost:5000/table_detail/gold/hive/test_schema/test_table1`](http://localhost:5000/table_detail/gold/hive/test_schema/test_table1) 49 | 2. [`http://localhost:5000/table_detail/gold/dynamo/test_schema/test_table2`](http://localhost:5000/table_detail/gold/dynamo/test_schema/test_table2) 50 | 51 | ### Troubleshooting 52 | 53 | 1. If the docker container doesn't have enough heap memory for Elastic Search, `es_amundsen` will fail during `docker-compose`. 54 | 1. docker-compose error: `es_amundsen | [1]: max virtual memory areas vm.max_map_count [65530] is too low, increase to at least [262144]` 55 | 2. Increase the heap memory [detailed instructions here](https://www.elastic.co/guide/en/elasticsearch/reference/7.1/docker.html#docker-cli-run-prod-mode) 56 | 1. Edit `/etc/sysctl.conf` 57 | 2. Make entry `vm.max_map_count=262144`. Save and exit. 58 | 3. Reload settings `$ sysctl -p` 59 | 4. Restart `docker-compose` 60 | 61 | 2. If `docker-amundsen-local.yml` stops because of `org.elasticsearch.bootstrap.StartupException: java.lang.IllegalStateException: Failed to create node environment`, then `es_amundsen` [cannot write](https://discuss.elastic.co/t/elastic-elasticsearch-docker-not-assigning-permissions-to-data-directory-on-run/65812/4) to `.local/elasticsearch`. 62 | 1. `chown -R 1000:1000 .local/elasticsearch` 63 | 2. Restart `docker-compose` 64 | 3. If when running the sample data loader you recieve a connection error related to ElasticSearch or like this for Neo4j: 65 | ``` 66 | Traceback (most recent call last): 67 | File "/home/ubuntu/amundsen/amundsendatabuilder/venv/lib/python3.6/site-packages/neobolt/direct.py", line 831, in _connect 68 | s.connect(resolved_address) 69 | ConnectionRefusedError: [Errno 111] Connection refused 70 | ``` 71 | 4. If `elastic search` container stops with an error `max file descriptors [4096] for elasticsearch process is too low, increase to at least [65535]`, then add the below code to the file `docker-amundsen-local.yml` in the `elasticsearch` definition. 72 | ``` 73 | ulimits: 74 | nofile: 75 | soft: 65535 76 | hard: 65535 77 | ``` 78 | Then check if all 5 Amundsen related containers are running with `docker ps`? Can you connect to the Neo4j UI at http://localhost:7474/browser/ and similarly the raw ES API at http://localhost:9200? Does Docker logs reveal any serious issues? 79 | -------------------------------------------------------------------------------- /docs/roadmap.md: -------------------------------------------------------------------------------- 1 | # Amundsen Roadmap 2 | The following roadmap gives an overview of what we are currently working on and what we want to tackle next. This helps potential contributors understand the current status of your project and where it's going next, as well as giving a chance to be part of the planning. 3 | 4 | ## Amundsen Mission 5 | > *To organize all information about data and make it universally actionable* 6 | 7 | ## Vision for 2020 8 | > *Centralize a comprehensive and actionable map of all our data resources that can be leveraged to solve a growing number of use cases and workflows* 9 | 10 | ## Short Term - Our Current focus 11 | 12 | #### Provide Rich metadata to make data trust worthy 13 | *What*: Enrich table detail page with additional structure metadata / programmatic description. 14 | 15 | *Status*: tech spec WIP 16 | 17 | #### Native lineage integration 18 | *What*: We want to create a native lineage integration in Amundsen, to better surface how data assets interact with each other. 19 | 20 | *Status*: tech spec out 21 | 22 | #### Integrate with Data Quality system 23 | *What*: Integrate with different data quality systems to provide quality score. 24 | 25 | *Status*: planning 26 | 27 | ## Mid Term - Our Next steps 28 | #### Improve search ranking 29 | *What*: Update search ranking to be informed by "badges" that may exist on data sets e.g. deprecated, etc. 30 | 31 | *Status*: planning 32 | 33 | #### Notifications when a table evolves 34 | *What*: Notify users in Amundsen (akin to Facebook notifications or similar) when a table evolves. Owners of data and consumers of data will likely need to be notified of different things. 35 | 36 | *Status*: planning has not started 37 | 38 | #### Commonly joined tables / browsing the data model 39 | *What*: As a data user, I would like to see commonly joined tables and how to join them. 40 | One option would be to show commonly joined tables and showing example join queries. Another option would be to provide a navigational experience for data model, showing foreign keys and which tables they come from. 41 | 42 | *Status*: planning has not started 43 | 44 | #### Curated navigation experience 45 | *What*: Currently Amundsen's experience is very focussed on search. However, especially for new users, an experience where they are able to navigate through the data hierarchy is very important. This item proposes to revamp the navigational experience in Amundsen (currently, barebones - based on tags) to do justice to the user need to browse through data sets when they don't know what to even search for. 46 | 47 | *Status*: planning 48 | 49 | #### Push ingest API 50 | *What*: We want to create a push API so that it is as easy as possible for a new data resource type to be ingested 51 | 52 | *Status*: implementation has started (around 80% complete) 53 | 54 | #### GET Rest API 55 | *What*: enable users to access our data map programmatically through a Rest API 56 | 57 | *Status*: implementation has started 58 | 59 | #### Granular Access Control 60 | *What*: we want to have a more granular control of the access. For example, only certain types of people would be able to see certain types of metadata/functionality 61 | 62 | *Status*: implementation has not started 63 | 64 | #### Show distinct column values 65 | *What*: When a column has a limited set of possible values, we want to make then easily discoverable 66 | 67 | *Status*: implementation has not started 68 | 69 | #### “Order by” for columns 70 | *What*: we want to help users make sense of what are the columns people use in the tables we index. Within a frequently used table, a column might not be used anymore because it is know to be deprecated 71 | 72 | *Status*: implementation has not started 73 | 74 | #### Versioning system 75 | *What*: We want to create a versioning system for our indexed resources, to be able to index different versions of the same resource. This is especially required for machine learning purposes. 76 | 77 | *Status*: implementation has not started 78 | 79 | #### Index Processes 80 | *What*: we want to index ETLs and pipelines from our Machine Learning Engine 81 | 82 | *Status*: implementation has not started 83 | 84 | #### Index Teams 85 | *What*: We want to add teams pages to enable users to see what are the important tables and dashboard a team uses 86 | 87 | *Status*: implementation has not started 88 | 89 | #### Index Services 90 | *What*: With our microservices architecture, we want to index services and show how these services interact with data artifacts 91 | 92 | *Status*: implementation has not started 93 | 94 | #### Index S3 buckets 95 | *What*: add these new resource types to our data map and create resource pages for them 96 | 97 | *Status*: implementation has not started 98 | 99 | #### Index Pub/Sub systems 100 | *What*: We want to make our pub/sub systems discoverable 101 | 102 | *Status*: implementation has not started 103 | 104 | ## How to Get Involved 105 | Let us know in the [Slack channel](https://app.slack.com/client/TGFR0CZM3/CGFBVT23V) if you are interested in taking a stab at leading the development of one of these features. 106 | 107 | You can also jump right in by tackling one of our issues labeled as ['help wanted'](https://github.com/amundsen-io/amundsen/labels/help%20wanted) or, if you are new to Amundsen, try one of our ['good first issue'](https://github.com/amundsen-io/amundsen/labels/good%20first%20issue) tickets. 108 | -------------------------------------------------------------------------------- /docs/tutorials/how-to-track-user-metric.md: -------------------------------------------------------------------------------- 1 | # How to track Amundsen user metric 2 | 3 | After you have deployed Amundsen into production, you want to track how user interacts with Amundsen for various reasons. 4 | 5 | The easier way is to leverage [Google Analytics](https://analytics.google.com/analytics/web/) for basic user tracking. You could first 6 | get the analytics token for your domain and put it as the [frontend config](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/54de01bdc574665316f0517aefbd55cf7ca37ef0/amundsen_application/static/js/config/config-default.ts#L22) 7 | 8 | 9 | Besides implementing Google Analytics, we provide a way called `action_logging` to do fine grained user action tracking. 10 | The `action_logging` is a decorator to allow you to integrate user info and pipe it to your inhouse event tracking system(e.g Kafka). 11 | 12 | You need to put the custom method into entry_points following this 13 | [example](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/54de01bdc574665316f0517aefbd55cf7ca37ef0/docs/configuration.md#action-logging). 14 | 15 | And here is the IDL proto we used at Lyft to send the event message: 16 | ```bash 17 | message UserAction { 18 | // Sending host name 19 | google.protobuf.StringValue host_name = 1; 20 | // start time in epoch ms 21 | google.protobuf.Int64Value start_epoch_ms = 2; 22 | // end time in epoch ms 23 | google.protobuf.Int64Value end_epoch_ms = 3; 24 | // json array contains positional arguments 25 | common.LongString pos_args_json = 4; 26 | // json object contains key word arguments 27 | common.LongString keyword_args_json = 5; 28 | // json object contains output of command 29 | common.LongString output = 6; 30 | // an error message or exception stacktrace 31 | common.LongString error = 7; 32 | // `user` 33 | google.protobuf.StringValue user = 8; 34 | } 35 | ``` 36 | 37 | It matches the action log model defined in [here](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/ccfd2d6b82957fef347e956b243e4048c191fc0d/amundsen_application/log/action_log_model.py). 38 | 39 | Once you have the event in your data warehouse, you could start building different KPI user metric: 40 | 41 | 1. WAU 42 | 43 | Sample query if the event table named as `default.event_amundsenfrontend_user_action` 44 | ``` 45 | SELECT date_trunc('week', CAST("ds" AS TIMESTAMP)) AS "__timestamp", 46 | COUNT(DISTINCT user_value) AS "count_distinct_active_users" 47 | FROM 48 | (SELECT * 49 | FROM default.event_amundsenfrontend_user_action 50 | WHERE ds > '2019-09-01') AS "expr_qry" 51 | WHERE "ds" >= '2020-04-21 00:00:00.000000' 52 | AND "ds" <= '2020-10-21 05:31:14.000000' 53 | GROUP BY date_trunc('week', CAST("ds" AS TIMESTAMP)) 54 | ORDER BY "count_distinct_active_users" DESC 55 | LIMIT 10000 56 | ``` 57 | 58 | 2. DAU 59 | 60 | Sample query if the event table named as `default.event_amundsenfrontend_user_action` 61 | ``` 62 | SELECT date_trunc('day', CAST("ds" AS TIMESTAMP)) AS "__timestamp", 63 | COUNT(DISTINCT user_value) AS "count_distinct_active_users" 64 | FROM 65 | (SELECT * 66 | FROM default.event_amundsenfrontend_user_action 67 | WHERE ds > '2019-09-01') AS "expr_qry" 68 | WHERE "ds" >= '2020-07-21 00:00:00.000000' 69 | AND "ds" <= '2020-10-21 00:00:00.000000' 70 | GROUP BY date_trunc('day', CAST("ds" AS TIMESTAMP)) 71 | ORDER BY "count_distinct_active_users" DESC 72 | LIMIT 50000 73 | ``` 74 | 75 | You could also exclude weekends: 76 | ``` 77 | SELECT date_trunc('day', CAST("ds" AS TIMESTAMP)) AS "__timestamp", 78 | COUNT(DISTINCT user_value) AS "count_distinct_active_users" 79 | FROM 80 | (SELECT * 81 | FROM default.event_amundsenfrontend_user_action 82 | WHERE ds > '2019-09-01') AS "expr_qry" 83 | WHERE "ds" >= '2020-04-21 00:00:00.000000' 84 | AND "ds" <= '2020-10-21 05:33:11.000000' 85 | AND day_of_week(logged_at) NOT IN (6, 86 | 7) 87 | GROUP BY date_trunc('day', CAST("ds" AS TIMESTAMP)) 88 | ORDER BY "count_distinct_active_users" DESC 89 | LIMIT 50000 90 | ``` 91 | 92 | 3. User Penetration per role 93 | 94 | Sample query if the event table named as `default.event_amundsenfrontend_user_action` and a table for user: 95 | ``` 96 | SELECT "title" AS "title", 97 | COUNT(DISTINCT email) * 100 / MAX(role_count) AS "penetration_percent" 98 | FROM 99 | (SELECT e.occurred_at, 100 | u.email, 101 | u.title, 102 | tmp.role_count 103 | FROM default.family_user u 104 | JOIN default.event_amundsenfrontend_user_action e ON u.email = e.user_value 105 | JOIN 106 | (SELECT title, 107 | count(*) role_count 108 | FROM default.family_user 109 | GROUP BY 1) as tmp ON u.title = tmp.title 110 | where ds is not NULL) AS "expr_qry" 111 | WHERE "occurred_at" >= from_iso8601_timestamp('2020-10-14T00:00:00.000000') 112 | AND "occurred_at" <= from_iso8601_timestamp('2020-10-21T00:00:00.000000') 113 | AND "role_count" > 20 114 | GROUP BY "title" 115 | ORDER BY "penetration_percent" DESC 116 | LIMIT 100 117 | ``` 118 | 119 | 4. Usage breakdown per role_count 120 | 121 | sample query: 122 | ``` 123 | SELECT "title" AS "title", 124 | count("email") AS "COUNT(email)" 125 | FROM 126 | (SELECT e.occurred_at, 127 | u.email, 128 | u.title, 129 | tmp.role_count 130 | FROM default.family_user u 131 | JOIN default.event_amundsenfrontend_user_action e ON u.email = e.user_value 132 | JOIN 133 | (SELECT title, 134 | count(*) role_count 135 | FROM default.family_user 136 | GROUP BY 1) as tmp ON u.title = tmp.title 137 | where ds is not NULL) AS "expr_qry" 138 | WHERE "occurred_at" >= from_iso8601_timestamp('2020-10-14T00:00:00.000000') 139 | AND "occurred_at" <= from_iso8601_timestamp('2020-10-21T00:00:00.000000') 140 | GROUP BY "title" 141 | ORDER BY "COUNT(email)" DESC 142 | LIMIT 15 143 | ``` 144 | 145 | 5. search click through rate 146 | 147 | sample query: 148 | ``` 149 | SELECT date_trunc('day', CAST("occurred_at" AS TIMESTAMP)) AS "__timestamp", 150 | SUM(CASE 151 | WHEN CAST(json_extract_scalar(keyword_args_json, '$.index') AS BIGINT) <= 3 THEN 1 152 | ELSE 0 153 | END) * 100 / COUNT(*) AS "click_through_rate" 154 | FROM 155 | (SELECT * 156 | FROM default.event_amundsenfrontend_user_action 157 | WHERE ds > '2019-09-01') AS "expr_qry" 158 | WHERE "occurred_at" >= from_iso8601_timestamp('2020-09-21T00:00:00.000000') 159 | AND "occurred_at" <= from_iso8601_timestamp('2020-10-21T00:00:00.000000') 160 | AND "command" IN ('_get_table_metadata', 161 | '_get_dashboard_metadata', 162 | '_log_get_user') 163 | AND json_extract_scalar(keyword_args_json, '$.source') IN ('search_results', 164 | 'inline_search') 165 | GROUP BY date_trunc('day', CAST("occurred_at" AS TIMESTAMP)) 166 | ORDER BY "click_through_rate" DESC 167 | LIMIT 10000 168 | ``` 169 | 170 | 6. Top 50 active user 171 | 172 | 7. Top search term 173 | 174 | 8. Top popular tables 175 | 176 | 9. Search click index 177 | 178 | 10. Metadata edits 179 | 180 | 11. Metadata edit leaders 181 | 182 | 12. Amundsen user per role (by joining with employee data) 183 | 184 | 13. ... 185 | -------------------------------------------------------------------------------- /docs/tutorials/how-to-search-effective.md: -------------------------------------------------------------------------------- 1 | # How to search Amundsen effectively 2 | 3 | The goal of this tutorial is to provide a few tips on how to search for datasets effectively in Amundsen. 4 | 5 | ## Overview 6 | 7 | Amundsen currently indexes three types of entities: tables, people, and dashboards. This tutorial mostly covers how to search for a table entity effectively. 8 | 9 | We will cover other entities in the future. 10 | 11 | ## General Search 12 | 13 | Once the users are on the Amundsen home page, the users could search for any random information in the search bar. In the backend, the search system will use the same query term from users and search across three different entities (tables, people, and dashboards) and return the results with the highest ranking. For Table search, it will search across different fields, including table name, schema name, table or column descriptions, tags and etc. Amundsen also supports typeahead search which will search in the backend as soon as users enter new characters. 14 | 15 | 16 | Tips: 17 | 18 | - If you know the full table name (e.g. schema.table), try to search with that full table name, which will provide as the top result in general. 19 | - If you are unsure of the table name, search with `word1 word2` with space in between. For example, if your table's name is `test.test_rides` but you don't know the exact table name but only know the table name includes test and rides, please search with `test rides` (space in between). In this case, Amundsen will return tables that match either test or rides and union the result together based on the search algorithm ranking. 20 | - If you know your table name but don't know the schema of the table name, you could search with `word1_word2`. For example, if you know your table name is `test_rides`, please search with `test_rides` that will only return the table matched that given name. 21 | 22 | ## Advanced Search 23 | 24 | If you want to do the traditional faceted search, which will allow users to apply multiple filters, you could try out the advanced search. Currently, only the table entity is supported with the advanced search. But we plan to add the support for the dashboard entity as well in the near future. 25 | 26 | You could use wildcard in the search box as well. In the above example, the users put `rides*` on the table box. This will search across all the tables that have rides* as table name from different databases, including bigquery/druid/hive/presto/rs, etc. 27 | 28 | If you want to narrow down the search results, you could put more filters. In the above example, the users try to search a table name that is `rides*`, which has beta as the badge. Once the search is finished, you could see only one table matches the criteria (test.rides in this case). 29 | 30 | ## Searching Ranking Algorithm Demystified 31 | 32 | Currently, Amundsen provides the same search ranking for all the different personas. It ranks the table based on the query count in the presto search query log from the past 90 days at Lyft. It could be different based on your company's setup. 33 | 34 | ## Try out different search heuristic 35 | 36 | You could always try out different search heuristic using the kibana devtools. 37 | 38 | For example for table, you could use: 39 | ``` 40 | GET table_search_index/_search 41 | { 42 | "query": { 43 | "function_score": { 44 | "query": { 45 | "multi_match": { 46 | "query": "$term", 47 | "fields": ["display_name^1000", 48 | "name.raw^75", 49 | "name^5", 50 | "schema^3", 51 | "description^3", 52 | "column_names^2", 53 | "column_descriptions", 54 | "tags", 55 | "badges", 56 | "programmatic_descriptions"] 57 | } 58 | }, 59 | "field_value_factor": { 60 | "field": "total_usage", 61 | "modifier": "log2p" 62 | } 63 | } 64 | } 65 | } 66 | ``` 67 | 68 | The result will be ranked with certain weight based on total usage. It is the same as the following with painless script: 69 | ``` 70 | "function_score": { 71 | "query": { 72 | "multi_match": { 73 | "query": query_term, 74 | "fields": ["display_name^1000", 75 | "name.raw^75", 76 | "name^5", 77 | "schema^3", 78 | "description^3", 79 | "column_names^2", 80 | "column_descriptions", 81 | "tags", 82 | "badges", 83 | "programmatic_descriptions"], 84 | } 85 | }, 86 | "functions": [ 87 | { 88 | "script_score": { 89 | "script": "def scores = 0; scores = doc['total_usage'].value; return _score * Math.log10(2 + scores); }" 90 | 91 | } 92 | } 93 | ] 94 | } 95 | ``` 96 | 97 | If you want to boot the search result that has certain badge: 98 | ``` 99 | "function_score": { 100 | "query": { 101 | "multi_match": { 102 | "query": query_term, 103 | "fields": ["display_name^1000", 104 | "name.raw^75", 105 | "name^5", 106 | "schema^3", 107 | "description^3", 108 | "column_names^2", 109 | "column_descriptions", 110 | "tags", 111 | "badges", 112 | "programmatic_descriptions"], 113 | } 114 | }, 115 | "functions": [ 116 | { 117 | "script_score": { 118 | "script": "def scores = 0; scores = doc['total_usage'].value; if (doc['badges'].value == " 119 | "'$badge_for_boost') {return _score * Math.log10(2 + scores) " 120 | "* 1.5} else{ return _score * Math.log10(2 + scores); }" 121 | 122 | } 123 | } 124 | ] 125 | } 126 | ``` 127 | 128 | In this case, the table with a badge ($badge_for_boost or replace with your own badge), the search ranking score will get boosted. 129 | 130 | For dashboard, you could try out the following: 131 | ``` 132 | GET dashboard_search_index/_search 133 | { 134 | "query": { 135 | "function_score": { 136 | "query": { 137 | "multi_match": { 138 | "query": "$search-term", 139 | "fields": ["name.raw^75", 140 | "name^7", 141 | "group_name.raw^15", 142 | "group_name^7", 143 | "description^3", 144 | "query_names^3"] 145 | } 146 | }, 147 | "field_value_factor": { 148 | "field": "total_usage", 149 | "modifier": "log2p" 150 | } 151 | } 152 | } 153 | } 154 | ``` 155 | 156 | Hope this tutorial gives you some ideas on how the search works. 157 | -------------------------------------------------------------------------------- /docs/authentication/oidc.md: -------------------------------------------------------------------------------- 1 | # OIDC (Keycloak) Authentication 2 | 3 | Setting up end-to-end authentication using OIDC is fairly simple and can be done using a Flask wrapper i.e., [flaskoidc](https://github.com/verdan/flaskoidc). 4 | 5 | `flaskoidc` leverages the Flask's `before_request` functionality to authenticate each request before passing that to 6 | the views. It also accepts headers on each request if available in order to validate bearer token from incoming requests. 7 | 8 | ## Installation 9 | 10 | Please refer to the [flaskoidc documentation](https://github.com/verdan/flaskoidc/blob/master/README.md) 11 | for the installation and the configurations. 12 | 13 | Note: You need to install and configure `flaskoidc` for each microservice of Amundsen 14 | i.e., for frontendlibrary, metadatalibrary and searchlibrary in order to secure each of them. 15 | 16 | ## Amundsen Configuration 17 | 18 | Once you have `flaskoidc` installed and configured for each microservice, please set the following environment variables: 19 | 20 | - amundsenfrontendlibrary: 21 | ```bash 22 | APP_WRAPPER: flaskoidc 23 | APP_WRAPPER_CLASS: FlaskOIDC 24 | ``` 25 | 26 | - amundsenmetadatalibrary: 27 | ```bash 28 | FLASK_APP_MODULE_NAME: flaskoidc 29 | FLASK_APP_CLASS_NAME: FlaskOIDC 30 | ``` 31 | 32 | - amundsensearchlibrary: _(Needs to be implemented)_ 33 | ```bash 34 | FLASK_APP_MODULE_NAME: flaskoidc 35 | FLASK_APP_CLASS_NAME: FlaskOIDC 36 | ``` 37 | 38 | By default `flaskoidc` whitelist the healthcheck URLs, to not authenticate them. In case of metadatalibrary and searchlibrary 39 | we may want to whitelist the healthcheck APIs explicitly using following environment variable. 40 | 41 | ```bash 42 | FLASK_OIDC_WHITELISTED_ENDPOINTS: 'api.healthcheck' 43 | ``` 44 | 45 | ## Setting Up Request Headers 46 | 47 | To communicate securely between the microservices, you need to pass the bearer token from frontend in each request 48 | to metadatalibrary and searchlibrary. This should be done using `REQUEST_HEADERS_METHOD` config variable in frontendlibrary. 49 | 50 | - Define a function to add the bearer token in each request in your config.py: 51 | ```python 52 | def get_access_headers(app): 53 | """ 54 | Function to retrieve and format the Authorization Headers 55 | that can be passed to various microservices who are expecting that. 56 | :param oidc: OIDC object having authorization information 57 | :return: A formatted dictionary containing access token 58 | as Authorization header. 59 | """ 60 | try: 61 | access_token = app.oidc.get_access_token() 62 | return {'Authorization': 'Bearer {}'.format(access_token)} 63 | except Exception: 64 | return None 65 | ``` 66 | 67 | - Set the method as the request header method in your config.py: 68 | ```python 69 | REQUEST_HEADERS_METHOD = get_access_headers 70 | ``` 71 | 72 | This function will be called using the current `app` instance to add the headers in each request when calling any endpoint of 73 | metadatalibrary and searchlibrary [here](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/amundsen_application/api/utils/request_utils.py) 74 | 75 | ## Setting Up Auth User Method 76 | 77 | In order to get the current authenticated user (which is being used in Amundsen for many operations), we need to set 78 | `AUTH_USER_METHOD` config variable in frontendlibrary. 79 | This function should return email address, user id and any other required information. 80 | 81 | - Define a function to fetch the user information in your config.py: 82 | ```python 83 | def get_auth_user(app): 84 | """ 85 | Retrieves the user information from oidc token, and then makes 86 | a dictionary 'UserInfo' from the token information dictionary. 87 | We need to convert it to a class in order to use the information 88 | in the rest of the Amundsen application. 89 | :param app: The instance of the current app. 90 | :return: A class UserInfo 91 | """ 92 | from flask import g 93 | user_info = type('UserInfo', (object,), g.oidc_id_token) 94 | # noinspection PyUnresolvedReferences 95 | user_info.user_id = user_info.preferred_username 96 | return user_info 97 | ``` 98 | 99 | - Set the method as the auth user method in your config.py: 100 | ```python 101 | AUTH_USER_METHOD = get_auth_user 102 | ``` 103 | 104 | Once done, you'll have the end-to-end authentication in Amundsen without any proxy or code changes. 105 | 106 | ## Using Okta with Amundsen on K8s 107 | 108 | Assumptions: 109 | 110 | - You have access to okta (you can create a developer account for free!) 111 | - You are using k8s to setup amundsen. See [amundsen-kube-helm](../../amundsen-kube-helm/README.md) 112 | 113 | 1. You need to have a stable DNS entry for amundsen-frontend that can be registered in okta. 114 | - for example in AWS you can setup route53 115 | I will assume for the rest of this tutorial that your stable uri is "http://amundsen-frontend" 116 | 2. You need to register amundsen in okta as an app. More info [here](https://developer.okta.com/blog/2018/07/12/flask-tutorial-simple-user-registration-and-login). 117 | But here are specific instructions for amundsen: 118 | - At this time, I have only succesfully tested integration after ALL grants were checked. 119 | - Set the Login redirect URIs to: `http://amundsen-frontend/oidc_callback` 120 | - No need to set a logout redirect URI 121 | - Set the Initiate login URI to: `http://amundsen-frontend/` 122 | (This is where okta will take you if users click on amundsen via okta landing page) 123 | - Copy the Client ID and Client secret as you will need this later. 124 | 3. At present, there is no oidc build of the frontend. So you will need to build an oidc build yourself and upload it to, for example ECR, for use by k8s. 125 | You can then specify which image you want to use as a property override for your helm install like so: 126 | 127 | ```yaml 128 | frontEndServiceImage: 123.dkr.ecr.us-west-2.amazonaws.com/edmunds/amundsen-frontend:oidc-test 129 | ``` 130 | 131 | Please see further down in this doc for more instructions on how to build frontend. 132 | 4. When you start up helm you will need to provide some properties. Here are the properties that need to be overridden for oidc to work: 133 | 134 | ```yaml 135 | oidcEnabled: true 136 | createOidcSecret: true 137 | OIDC_CLIENT_ID: YOUR_CLIENT_ID 138 | OIDC_CLIENT_SECRET: YOUR_SECRET_ID 139 | OIDC_ORG_URL: https://edmunds.okta.com 140 | OIDC_AUTH_SERVER_ID: default 141 | # You also will need a custom oidc frontend build too 142 | frontEndServiceImage: 123.dkr.ecr.us-west-2.amazonaws.com/edmunds/amundsen-frontend:oidc-test 143 | ``` 144 | 145 | ## Building frontend with OIDC 146 | 147 | 1. Please look at [this guide](../developer_guide.md) for instructions on how to build a custom frontend docker image. 148 | 2. The only difference to above is that in your docker file you will want to add the following at the end. This will make sure its ready to go for oidc. 149 | You can take alook at the public.Dockerfile as a reference. 150 | 151 | ```dockerfile 152 | RUN pip3 install .[oidc] 153 | ENV FRONTEND_SVC_CONFIG_MODULE_CLASS amundsen_application.oidc_config.OidcConfig 154 | ENV APP_WRAPPER flaskoidc 155 | ENV APP_WRAPPER_CLASS FlaskOIDC 156 | ENV FLASK_OIDC_WHITELISTED_ENDPOINTS status,healthcheck,health 157 | ENV SQLALCHEMY_DATABASE_URI sqlite:///sessions.db 158 | ``` 159 | 160 | 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "{}" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2018 Lyft, Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /amundsen-kube-helm/README.md: -------------------------------------------------------------------------------- 1 | # Amundsen K8s Helm Charts 2 | 3 | Source code can be found [here](https://github.com/amundsen-io/amundsen) 4 | 5 | ## What is this? 6 | 7 | This is setup templates for deploying [amundsen](https://github.com/amundsen-io/amundsen) on [k8s (kubernetes)](https://kubernetes.io/), using [helm.](https://helm.sh/) 8 | 9 | ## How do I get started? 10 | 11 | 1. Make sure you have the following command line clients setup: 12 | - k8s (kubectl) 13 | - helm 14 | 2. Build out a cloud based k8s cluster, such as [Amazon EKS](https://aws.amazon.com/eks/) 15 | 3. Ensure you can connect to your cluster with cli tools in step 1. 16 | 17 | ## Prerequisites 18 | 19 | 1. Helm 2.14+ 20 | 2. Kubernetes 1.14+ 21 | 22 | ## Chart Requirements 23 | 24 | | Repository | Name | Version | 25 | |------------|------|---------| 26 | | https://kubernetes-charts.storage.googleapis.com/ | elasticsearch | 1.32.0 | 27 | 28 | ## Chart Values 29 | 30 | The following table lists the configurable parameters of the Amundsen charts and their default values. 31 | 32 | | Key | Type | Default | Description | 33 | |-----|------|---------|-------------| 34 | | LONG_RANDOM_STRING | int | `1234` | A long random string. You should probably provide your own. This is needed for OIDC. | 35 | | affinity | object | `{}` | amundsen application wide configuration of affinity. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) | 36 | | dnsZone | string | `"teamname.company.com"` | **DEPRECATED - its not standard to pre construct urls this way.** The dns zone (e.g. group-qa.myaccount.company.com) the app is running in. Used to construct dns hostnames (on aws only). | 37 | | dockerhubImagePath | string | `"amundsendev"` | **DEPRECATED - this is not useful, it would be better to just allow the whole image to be swapped instead.** The image path for dockerhub. | 38 | | elasticsearch.client.replicas | int | `1` | only running amundsen on 1 client replica | 39 | | elasticsearch.cluster.env.EXPECTED_MASTER_NODES | int | `1` | required to match master.replicas | 40 | | elasticsearch.cluster.env.MINIMUM_MASTER_NODES | int | `1` | required to match master.replicas | 41 | | elasticsearch.cluster.env.RECOVER_AFTER_MASTER_NODES | int | `1` | required to match master.replicas | 42 | | elasticsearch.data.replicas | int | `1` | only running amundsen on 1 data replica | 43 | | elasticsearch.enabled | bool | `true` | set this to false, if you want to provide your own ES instance. | 44 | | elasticsearch.master.replicas | int | `1` | only running amundsen on 1 master replica | 45 | | environment | string | `"dev"` | **DEPRECATED - its not standard to pre construct urls this way.** The environment the app is running in. Used to construct dns hostnames (on aws only) and ports. | 46 | | frontEnd.OIDC_AUTH_SERVER_ID | string | `nil` | The authorization server id for OIDC. | 47 | | frontEnd.OIDC_CLIENT_ID | string | `nil` | The client id for OIDC. | 48 | | frontEnd.OIDC_CLIENT_SECRET | string | `""` | The client secret for OIDC. | 49 | | frontEnd.OIDC_ORG_URL | string | `nil` | The organization URL for OIDC. | 50 | | frontEnd.affinity | object | `{}` | Frontend pod specific affinity. | 51 | | frontEnd.annotations | object | `{}` | Frontend service specific tolerations. | 52 | | frontEnd.baseUrl | string | `"http://localhost"` | used by notifications util to provide links to amundsen pages in emails. | 53 | | frontEnd.createOidcSecret | bool | `false` | OIDC needs some configuration. If you want the chart to make your secrets, set this to true and set the next four values. If you don't want to configure your secrets via helm, you can still use the amundsen-oidc-config.yaml as a template | 54 | | frontEnd.image | string | `"amundsendev/amundsen-frontend"` | The image of the frontend container. | 55 | | frontEnd.imageTag | string | `"2.0.0"` | The image tag of the frontend container. | 56 | | frontEnd.nodeSelector | object | `{}` | Frontend pod specific nodeSelector. | 57 | | frontEnd.oidcEnabled | bool | `false` | To enable auth via OIDC, set this to true. | 58 | | frontEnd.podAnnotations | object | `{}` | Frontend pod specific annotations. | 59 | | frontEnd.replicas | int | `1` | How many replicas of the frontend service to run. | 60 | | frontEnd.resources | object | `{}` | See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) | 61 | | frontEnd.serviceName | string | `"frontend"` | The frontend service name. | 62 | | frontEnd.servicePort | int | `80` | The port the frontend service will be exposed on via the loadbalancer. | 63 | | frontEnd.serviceType | string | `"ClusterIP"` | The frontend service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) | 64 | | frontEnd.tolerations | list | `[]` | Frontend pod specific tolerations. | 65 | | metadata.affinity | object | `{}` | Metadata pod specific affinity. | 66 | | metadata.annotations | object | `{}` | Metadata service specific tolerations. | 67 | | metadata.image | string | `"amundsendev/amundsen-metadata"` | The image of the metadata container. | 68 | | metadata.imageTag | string | `"2.0.0"` | The image tag of the metadata container. | 69 | | metadata.neo4jEndpoint | string | `nil` | The name of the service hosting neo4j on your cluster, if you bring your own. You should only need to change this, if you don't use the version in this chart. | 70 | | metadata.nodeSelector | object | `{}` | Metadata pod specific nodeSelector. | 71 | | metadata.podAnnotations | object | `{}` | Metadata pod specific annotations. | 72 | | metadata.replicas | int | `1` | How many replicas of the metadata service to run. | 73 | | metadata.resources | object | `{}` | See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) | 74 | | metadata.serviceName | string | `"metadata"` | The metadata service name. | 75 | | metadata.serviceType | string | `"ClusterIP"` | The metadata service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) | 76 | | metadata.tolerations | list | `[]` | Metadata pod specific tolerations. | 77 | | neo4j.affinity | object | `{}` | neo4j specific affinity. | 78 | | neo4j.annotations | object | `{}` | neo4j service specific tolerations. | 79 | | neo4j.backup | object | `{"enabled":false,"s3Path":"s3://dev/null","schedule":"0 * * * *"}` | If enabled is set to true, make sure and set the s3 path as well. | 80 | | neo4j.backup.s3Path | string | `"s3://dev/null"` | The s3path to write to for backups. | 81 | | neo4j.backup.schedule | string | `"0 * * * *"` | The schedule to run backups on. Defaults to hourly. | 82 | | neo4j.config | object | `{"dbms":{"heap_initial_size":"23000m","heap_max_size":"23000m","pagecache_size":"26600m"}}` | Neo4j application specific configuration. This type of configuration is why the charts/stable version is not used. See [ref](https://github.com/helm/charts/issues/21439) | 83 | | neo4j.config.dbms | object | `{"heap_initial_size":"23000m","heap_max_size":"23000m","pagecache_size":"26600m"}` | dbms config for neo4j | 84 | | neo4j.config.dbms.heap_initial_size | string | `"23000m"` | the initial java heap for neo4j | 85 | | neo4j.config.dbms.heap_max_size | string | `"23000m"` | the max java heap for neo4j | 86 | | neo4j.config.dbms.pagecache_size | string | `"26600m"` | the page cache size for neo4j | 87 | | neo4j.enabled | bool | `true` | If neo4j is enabled as part of this chart, or not. Set this to false if you want to provide your own version. | 88 | | neo4j.nodeSelector | object | `{}` | neo4j specific nodeSelector. | 89 | | neo4j.persistence | object | `{}` | Neo4j persistence. Turn this on to keep your data between pod crashes, etc. This is also needed for backups. | 90 | | neo4j.podAnnotations | object | `{}` | neo4j pod specific annotations. | 91 | | neo4j.resources | object | `{}` | See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) | 92 | | neo4j.tolerations | list | `[]` | neo4j specific tolerations. | 93 | | neo4j.version | string | `"3.3.0"` | The neo4j application version used by amundsen. | 94 | | nodeSelector | object | `{}` | amundsen application wide configuration of nodeSelector. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector) | 95 | | podAnnotations | object | `{}` | amundsen application wide configuration of podAnnotations. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) | 96 | | provider | string | `"aws"` | The cloud provider the app is running in. Used to construct dns hostnames (on aws only). | 97 | | search.affinity | object | `{}` | Search pod specific affinity. | 98 | | search.annotations | object | `{}` | Search service specific tolerations. | 99 | | search.elasticsearchEndpoint | string | `nil` | The name of the service hosting elasticsearch on your cluster, if you bring your own. You should only need to change this, if you don't use the version in this chart. | 100 | | search.image | string | `"amundsendev/amundsen-search"` | The image of the search container. | 101 | | search.imageTag | string | `"2.0.0"` | The image tag of the search container. | 102 | | search.nodeSelector | object | `{}` | Search pod specific nodeSelector. | 103 | | search.podAnnotations | object | `{}` | Search pod specific annotations. | 104 | | search.replicas | int | `1` | How many replicas of the search service to run. | 105 | | search.resources | object | `{}` | See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) | 106 | | search.serviceName | string | `"search"` | The search service name. | 107 | | search.serviceType | string | `"ClusterIP"` | The search service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) | 108 | | search.tolerations | list | `[]` | Search pod specific tolerations. | 109 | | tolerations | list | `[]` | amundsen application wide configuration of tolerations. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#taints-and-tolerations-beta-feature) | 110 | 111 | ## Neo4j DBMS Config? 112 | 113 | You may want to override the default memory usage for Neo4J. In particular, if you're just test-driving a deployment and your node exits with status 137, you should set the usage to smaller values: 114 | 115 | ``` yaml 116 | config: 117 | dbms: 118 | heap_initial_size: 1G 119 | heap_max_size: 2G 120 | pagecache_size: 2G 121 | ``` 122 | 123 | With this values file, you can then install Amundsen using Helm 2 with: 124 | 125 | ``` shell 126 | helm install ./templates/helm --values impl/helm/dev/values.yaml 127 | ``` 128 | 129 | For Helm 3 it's now mandatory to specify a [chart reference name](https://helm.sh/docs/intro/using_helm/#helm-install-installing-a-package) e.g. `my-amundsen`: 130 | 131 | ``` shell 132 | helm install my-amundsen ./templates/helm --values impl/helm/dev/values.yaml 133 | ``` 134 | 135 | ## Other Notes 136 | 137 | - For aws setup, you will also need to setup the [external-dns plugin](https://github.com/kubernetes-incubator/external-dns) 138 | - There is an existing helm chart for neo4j, but, it is missing some features necessary to for use such as: 139 | - [\[stable/neo4j\] make neo4j service definition more extensible](https://github.com/helm/charts/issues/21441); without this, it is not possible to setup external load balancers, external-dns, etc 140 | - [\[stable/neo4j\] allow custom configuration of neo4j](https://github.com/helm/charts/issues/21439); without this, custom configuration is not possible which includes setting configmap based settings, which also includes turning on apoc. 141 | -------------------------------------------------------------------------------- /docs/developer_guide.md: -------------------------------------------------------------------------------- 1 | # Developer Guide 2 | 3 | This repository uses `git submodules` to link the code for all of Amundsen's libraries into a central location. This document offers guidance on how to develop locally with this setup. 4 | 5 | This workflow leverages `docker` and `docker-compose` in a very similar manner to our [installation documentation](https://github.com/amundsen-io/amundsen/blob/master/docs/installation.md#bootstrap-a-default-version-of-amundsen-using-docker), to spin up instances of all 3 of Amundsen's services connected with an instances of Neo4j and ElasticSearch which ingest dummy data. 6 | 7 | ## Cloning the Repository 8 | 9 | If cloning the repository for the first time, run the following command to clone the repository and pull the submodules: 10 | 11 | ```bash 12 | $ git clone --recursive git@github.com:amundsen-io/amundsen.git 13 | ``` 14 | 15 | If you have already cloned the repository but your submodules are empty, from your cloned `amundsen` directory run: 16 | 17 | ```bash 18 | $ git submodule init 19 | $ git submodule update 20 | ``` 21 | 22 | After cloning the repository you can change directories into any of the upstream folders and work in those directories as you normally would. You will have full access to all of the git features, and working in the upstream directories will function the same as if you were working in a cloned version of that repository. 23 | 24 | ## Local Development 25 | 26 | ### Ensure you have the latest code 27 | 28 | Beyond running `git pull origin master` in your local `amundsen` directory, the submodules for our libraries also have to be manually updated to point to the latest versions of each libraries' code. When creating a new branch on `amundsen` to begin local work, ensure your local submodules are pointing to the latest code for each library by running: 29 | 30 | ```bash 31 | $ git submodule update --remote 32 | ``` 33 | 34 | ### Building local changes 35 | 36 | 1. First, be sure that you have first followed the [installation documentation](https://github.com/amundsen-io/amundsen/blob/master/docs/installation.md#bootstrap-a-default-version-of-amundsen-using-docker) and can spin up a default version of Amundsen without any issues. If you have already completed this step, be sure to have stopped and removed those containers by running: 37 | ```bash 38 | $ docker-compose -f docker-amundsen.yml down 39 | ``` 40 | 41 | 2. Launch the containers needed for local development (the `-d` option launches in background) : 42 | ```bash 43 | $ docker-compose -f docker-amundsen-local.yml up -d 44 | ``` 45 | 46 | 3. After making local changes rebuild and relaunch modified containers: 47 | ```bash 48 | $ docker-compose -f docker-amundsen-local.yml build \ 49 | && docker-compose -f docker-amundsen-local.yml up -d 50 | ``` 51 | 52 | 4. Optionally, to still tail logs, in a different terminal you can: 53 | ```bash 54 | $ docker-compose -f docker-amundsen-local.yml logs --tail=3 -f 55 | ## - or just tail single container(s): 56 | $ docker logs amundsenmetadata --tail 10 -f 57 | ``` 58 | 59 | ### Local data 60 | 61 | Local data is persisted under .local/ (at the root of the project), clean up the following directories to reset the databases: 62 | 63 | ```bash 64 | # reset elasticsearch 65 | rm -rf .local/elasticsearch 66 | 67 | # reset neo4j 68 | rm -rf .local/neo4j 69 | ``` 70 | 71 | 72 | ### Troubleshooting 73 | 74 | 1. If you have made a change in `amundsen/amundsenfrontendlibrary` and do not see your changes, this could be due to your browser's caching behaviors. Either execute a hard refresh (recommended) or clear your browser cache (last resort). 75 | 76 | ### Testing Amundsen frontend locally 77 | 78 | Amundsen has an instruction regarding local frontend launch [here](https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/docs/installation.md) 79 | 80 | Here are some additional changes you might need for windows (OS Win 10): 81 | 82 | - amundsen_application/config.py, set LOCAL_HOST = '127.0.0.1' 83 | - amundsen_application/wsgi.py, set host='127.0.0.1' 84 | (for other microservices also need to change `port` here because the default is 5000) 85 | 86 | (using that approach you can run locally another microservices as well if needed) 87 | 88 | Once you have a running frontend microservice, the rest of Amundsen components can be launched with docker-compose 89 | from the root Amundsen project (don't forget to remove frontend microservice section from docker-amundsen.yml): 90 | `docker-compose -f docker-amundsen.yml up` 91 | https://github.com/amundsen-io/amundsen/blob/master/docs/installation.md 92 | 93 | ### Developing Dockerbuild file 94 | 95 | When making edits to Dockerbuild file (docker-amundsen-local.yml) it is good to see what you are getting wrong locally. 96 | To do that you build it `docker build .` 97 | 98 | And then the output should include a line like so at the step right before it failed: 99 | 100 | ```bash 101 | Step 3/20 : RUN git clone --recursive git://github.com/amundsen-io/amundsenfrontendlibrary.git && cd amundsenfrontendlibrary && git submodule foreach git pull origin master 102 | ---> Using cache 103 | ---> ec052612747e 104 | ``` 105 | 106 | You can then launch a container from this image like so 107 | 108 | ```bash 109 | docker container run -it --name=debug ec052612747e /bin/sh 110 | ``` 111 | 112 | ### Building and Testing Amundsen Frontend Docker Image (or any other service) 113 | 114 | 1. Build your image 115 | `docker build --no-cache .` it is recommended that you use --no-cache so you aren't accidentally using an old version of an image. 116 | 2. Determine the hash of your images by running `docker images` and getting the id of your most recent image 117 | 3. Go to your locally cloned amundsen repo and edit the docker compose file "docker-amundsen.yml" to have 118 | the amundsenfrontend image point to the hash of the image that you built 119 | 120 | ```yaml 121 | amundsenfrontend: 122 | #image: amundsendev/amundsen-frontend:1.0.9 123 | #image: 1234.dkr.ecr.us-west-2.amazonaws.com/edmunds/amundsen-frontend:2020-01-21 124 | image: 0312d0ac3938 125 | ``` 126 | 127 | ### Pushing image to ECR and using in K8s 128 | 129 | Assumptions: 130 | 131 | - You have an aws account 132 | - You have aws command line set up and ready to go 133 | 134 | 1. Choose an ECR repository you'd like to push to (or create a new one) 135 | https://us-west-2.console.aws.amazon.com/ecr/repositories 136 | 2. Click onto repository name and open "View push commands" cheat sheet 137 | 2b. Login 138 | 139 | it would look something like this: 140 | 141 | `aws ecr get-login --no-include-email --region us-west-2` 142 | Then execute what is returned by above 143 | 144 | 3. Follow the instructions (you may need to install first AWS CLI, aws-okta and configure your AWS credentials if you haven't done it before) 145 | Given image name is amundsen-frontend, build, tag and push commands will be the following: 146 | Here you can see the tag is YYYY-MM-dd but you should choose whatever you like. 147 | ``` 148 | docker build -t amundsen-frontend:{YYYY-MM-dd} . 149 | docker tag amundsen-frontend:{YYYY-MM-dd} >.dkr.ecr.>.amazonaws.com/amundsen-frontend:{YYYY-MM-dd} 150 | docker push >.dkr.ecr.>.amazonaws.com/amundsen-frontend:{YYYY-MM-dd} 151 | ``` 152 | 153 | 4. Go to the `helm/{env}/amundsen/values.yaml` and modify to the image tag that you want to use. 154 | 155 | 5. When updating amundsen-frontend, make sure to do a hard refresh of amundsen with emptying the cache, 156 | otherwise you will see stale version of webpage. 157 | 158 | ### Test search service in local using staging or production data 159 | 160 | To test in local, we need to stand up Elasticsearch, publish index data, and stand up Elastic search 161 | 162 | #### Standup Elasticsearch 163 | 164 | Running Elasticsearch via Docker. To install Docker, go [here](https://hub.docker.com/editions/community/docker-ce-desktop-mac) 165 | Example: 166 | 167 | docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:6.2.4 168 | 169 | ##### (Optional) Standup Kibana 170 | 171 | docker run --link ecstatic_edison:elasticsearch -p 5601:5601 docker.elastic.co/kibana/kibana:6.2.4 172 | 173 | *Note that `ecstatic_edison` is container_id of Elasticsearch container. Update it if it's different by looking at `docker ps` 174 | 175 | #### Publish Table index through Databuilder 176 | 177 | ##### Install Databuilder 178 | 179 | cd ~/src/ 180 | git clone git@github.com:amundsen-io/amundsendatabuilder.git 181 | cd ~/src/amundsendatabuilder 182 | virtualenv venv 183 | source venv/bin/activate 184 | python setup.py install 185 | pip install -r requirements.txt 186 | 187 | ##### Publish Table index 188 | 189 | First fill this two environment variables: `NEO4J_ENDPOINT` , `CREDENTIALS_NEO4J_PASSWORD` 190 | 191 | $ python 192 | 193 | import logging 194 | import os 195 | import uuid 196 | 197 | from elasticsearch import Elasticsearch 198 | from pyhocon import ConfigFactory 199 | 200 | from databuilder.extractor.neo4j_extractor import Neo4jExtractor 201 | from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor 202 | from databuilder.job.job import DefaultJob 203 | from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader 204 | from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher 205 | from databuilder.task.task import DefaultTask 206 | 207 | logging.basicConfig(level=logging.INFO) 208 | 209 | neo4j_user = 'neo4j' 210 | neo4j_password = os.getenv('CREDENTIALS_NEO4J_PASSWORD') 211 | neo4j_endpoint = os.getenv('NEO4J_ENDPOINT') 212 | 213 | elasticsearch_client = Elasticsearch([ 214 | {'host': 'localhost'}, 215 | ]) 216 | 217 | data_file_path = '/var/tmp/amundsen/elasticsearch_upload/es_data.json' 218 | 219 | elasticsearch_new_index = 'table_search_index_{hex_str}'.format(hex_str=uuid.uuid4().hex) 220 | logging.info("Elasticsearch new index: " + elasticsearch_new_index) 221 | 222 | elasticsearch_doc_type = 'table' 223 | elasticsearch_index_alias = 'table_search_index' 224 | 225 | job_config = ConfigFactory.from_dict({ 226 | 'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY): 227 | neo4j_endpoint, 228 | 'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.MODEL_CLASS_CONFIG_KEY): 229 | 'databuilder.models.table_elasticsearch_document.TableESDocument', 230 | 'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_USER): 231 | neo4j_user, 232 | 'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_PW): 233 | neo4j_password, 234 | 'loader.filesystem.elasticsearch.{}'.format(FSElasticsearchJSONLoader.FILE_PATH_CONFIG_KEY): 235 | data_file_path, 236 | 'loader.filesystem.elasticsearch.{}'.format(FSElasticsearchJSONLoader.FILE_MODE_CONFIG_KEY): 237 | 'w', 238 | 'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.FILE_PATH_CONFIG_KEY): 239 | data_file_path, 240 | 'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.FILE_MODE_CONFIG_KEY): 241 | 'r', 242 | 'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_CLIENT_CONFIG_KEY): 243 | elasticsearch_client, 244 | 'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_NEW_INDEX_CONFIG_KEY): 245 | elasticsearch_new_index, 246 | 'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_DOC_TYPE_CONFIG_KEY): 247 | elasticsearch_doc_type, 248 | 'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_ALIAS_CONFIG_KEY): 249 | elasticsearch_index_alias, 250 | }) 251 | 252 | job = DefaultJob(conf=job_config, 253 | task=DefaultTask(extractor=Neo4jSearchDataExtractor(), 254 | loader=FSElasticsearchJSONLoader()), 255 | publisher=ElasticsearchPublisher()) 256 | if neo4j_password: 257 | job.launch() 258 | else: 259 | raise ValueError('Add environment variable CREDENTIALS_NEO4J_PASSWORD') 260 | 261 | #### Standup Search service 262 | 263 | Follow this [instruction](https://github.com/amundsen-io/amundsensearchlibrary#instructions-to-start-the-search-service-from-source) 264 | 265 | Test the search API with this command: 266 | 267 | curl -vv "http://localhost:5001/search?query_term=test&page_index=0" 268 | -------------------------------------------------------------------------------- /GOVERNANCE.md: -------------------------------------------------------------------------------- 1 | # Governance 2 | 3 | At Amundsen, we want to produce an environment of fairness that people can rely on. A formal governance structure helps us resolve debates, invite in (or out) new developers and plan new features. 4 | 5 | With the following governance system, we want to facilitate project permanence, supporting it with healthy habits and processes that are well understood by everyone. 6 | 7 | ## Amundsen Governance Model 8 | 9 | Amundsen is a meritocratic, consensus-based community project. Anyone interested in the project can join the community, contribute to the project design, and participate in the decision-making process. This document describes how that participation occurs and how to set about earning merit within the project community. 10 | 11 | ## Roles And Responsibilities 12 | 13 | ### Users 14 | 15 | Users are community members who need the data discovery features of Amundsen. They are the most important community members, and without them, the project would have no purpose. Anyone can be a user; there are no special requirements. 16 | 17 | Amundsen asks its users to participate in the project and community as much as possible. User contributions enable the project team to ensure that they satisfy the needs of those users. Frequent user contributions include (but are not limited to): 18 | 19 | - Evangelizing about the project (e.g., a link on a website and word-of-mouth awareness raising) 20 | - Informing developers of strengths and weaknesses from a new user perspective 21 | - Providing moral support (a ‘thank you’ goes a long way) 22 | - Providing financial support (the software is open source, but its developers need to eat) 23 | 24 | Users who continue to engage with the project and its community will often become more and more involved. Such users may find themselves becoming contributors, as described in the next section. 25 | 26 | ### Contributors 27 | 28 | Contributors are community members who contribute in concrete ways to the project. Anyone can become a contributor, and contributions can take many forms, as detailed in the [all-contributors project](https://allcontributors.org/docs/en/emoji-key#table). There is no expectation of commitment to the project, no specific skill requirements, and no selection process. 29 | 30 | In addition to their actions as users, contributors may also find themselves doing one or more of the following: 31 | 32 | - Supporting new users (existing users are often the best people to help new users) 33 | - Creating, triaging or commenting on Issues 34 | - Doing code reviews or commenting on technical documents 35 | - Writing, editing, translating or reviewing the documentation 36 | - Organizing events or evangelizing the project 37 | 38 | Contributors engage with the project through the issue tracker and slack community, or by writing or editing documentation. They submit changes to the project itself via Pull Requests (PRs), which will be considered for inclusion in the project by existing maintainers (see next section). Contributors follow the [Contributing guide](https://www.amundsen.io/amundsen/CONTRIBUTING/) when creating PRs. 39 | 40 | As contributors gain experience and familiarity with the project, their profile and commitment within the community will increase. At some stage, they may find themselves being nominated for being a maintainer. 41 | 42 | ### Maintainers 43 | 44 | Maintainers are community members who have shown that they are committed to Amundsen’s continued development through ongoing engagement with the community. Because of this, maintainers have the right to merge PRs and triage issues. 45 | 46 | Note that any change to resources in Amundsen must be through pull requests. This applies to all changes to documentation, code, configuration, etc. Even maintainers must use pull requests, as they are key to provide transparency and attract new contributors to the project. Additionally, no pull request can be merged without being reviewed. 47 | 48 | Anyone can become a maintainer. Typically, a potential maintainer will need to show that they understand the project, its objectives, and its strategy. They will also have provided valuable contributions to the project over a period of time. Read the sections below to know how to become an Amundsen maintainer. 49 | 50 | ## Becoming a Maintainer 51 | 52 | Any existing maintainer can nominate new maintainers. Once they have been nominated, there will be a vote by the rest of the maintainers. Maintainer voting is one of the few activities that take place on a private channel. This is to allow maintainers to express their opinions about a nominee without causing embarrassment freely. The approval requires **three maintainers +1 vote** and **no -1 vote**. 53 | 54 | Once the vote has been held, the aggregated voting results are published on the #amundsen channel. The nominee is entitled to request an explanation of any ‘no’ votes against them, regardless of the vote's outcome. This explanation will be provided by the maintainers and will be anonymous and constructive. 55 | 56 | Nominees may decline their appointment as a maintainer. Becoming a maintainer means that they will be spending a substantial time working on Amundsen for the foreseeable future. It is essential to recognize that being a maintainer is a privilege, not a right. That privilege must be earned, and once earned, the rest of the maintainers can remove it in extreme circumstances. 57 | 58 | ### Earning a Nomination 59 | 60 | There is not a single path of earning a nomination for maintainer at Amundsen, however, we can give some guidance about some actions that would help: 61 | 62 | - Start by expressing interest to the maintainers that you are interested in becoming a maintainer. 63 | - You can start tackling issues labeled as [‘help wanted’](https://github.com/amundsen-io/amundsen/labels/help%20wanted), or if you are new to the project, some of the [‘good first issue’](https://github.com/amundsen-io/amundsen/labels/good%20first%20issue) tickets. 64 | - As you gain experience with the codebase and our standards, we will ask you to do code reviews for incoming PRs (i.e., all maintainers are expected to shoulder a proportional share of community reviews). 65 | - We will expect you to start contributing increasingly complicated PRs, under the guidance of the existing maintainers. 66 | - After approximately 2-3 months of working together, an existing maintainer will be able to nominate you for maintainer status. 67 | 68 | We make no guarantees on the length of time this will take, but 2-3 months is the approximate goal. 69 | 70 | ### Maintainer Responsibilities 71 | 72 | The project maintainers are those individuals identified as ‘project owners’ on the development site. Maintainers have many responsibilities, which ensure the smooth running of the project. Among them, we can name: 73 | 74 | - Monitor email aliases and our Slack (delayed response is perfectly acceptable). 75 | - Perform code reviews for other maintainers and the community. The areas of specialization listed in [OWNERS.md](OWNERS.md) can be used to help with routing an issue/question to the right person. 76 | - Triage GitHub issues, applying [labels](https://github.com/amundsen-io/amundsen/labels) to each new item. Labels are extremely useful for future issue follow ups. Adding labels is somewhat subjective, so please use your best judgment. Read more about our labels on [this document](https://www.amundsen.io/amundsen/issue_labeling/). 77 | - Triage build issues, filing issues for known flaky builds or bugs, fixing or finding someone to fix any master build breakages. 78 | - Make sure that ongoing PRs are moving forward at the right pace or closing them. 79 | - Continue to spend at least 25% of your time working on Amundsen (~1.25 business days per week). 80 | - Participate in strategic planning, approve changes to the governance model, and manage the copyrights within the project outputs. 81 | 82 | ## Losing Maintainer Status 83 | 84 | If a maintainer is no longer interested and cannot perform the maintainer duties listed above, they could volunteer to be moved to emeritus status. The maintainer status is attributed for life otherwise. An emeritus maintainer may request reinstatement of commit access from the rest of maintainers. Such reinstatement is subject to lazy consensus approval of active maintainers. 85 | 86 | In extreme cases, maintainers can lose their status by a vote of the maintainers per the voting process below. 87 | 88 | ## Decision Making Process 89 | 90 | Decisions about the future of Amundsen are made through discussion with all community members, from the newest user to the most experienced maintainer. All non-sensitive project management discussion takes place on the project issue tracker system. Occasionally, sensitive discussion occurs on a private channel of our Slack. 91 | 92 | To ensure that the project is not bogged down by endless discussion and continual voting, the project operates a policy of lazy consensus. This allows the majority of decisions to be made without resorting to a formal vote. 93 | 94 | ### Lazy consensus 95 | 96 | Decision making typically involves the following steps: 97 | 98 | - Proposal 99 | - Discussion 100 | - Vote (if consensus is not reached through discussion) 101 | - Decision 102 | 103 | Any community member can make a proposal for consideration by the community. To initiate a discussion about a new idea, they should create an issue or submit a PR implementing the idea to the issue tracker. This will prompt a review and, if necessary, a discussion of the idea. The goal of this review and discussion is to gain approval for the contribution. Since most people in the project community have a shared vision, there is often little discussion to reach consensus. 104 | 105 | In general, as long as nobody explicitly opposes a proposal or PR, it is recognized as having the support of the community. This is called lazy consensus - that is, those who have not stated their opinion explicitly have implicitly agreed to the proposal's implementation. 106 | 107 | Lazy consensus is a fundamental concept within the project. This process allows a large group of people to reach consensus efficiently as someone with no objections to a proposal need not spend time stating their position. 108 | 109 | For lazy consensus to be effective, it is necessary to allow at least 48 hours before assuming that there are no objections to the proposal. This requirement ensures that everyone is given enough time to read, digest, and respond to the proposal. This time period is chosen to be as inclusive as possible of all participants, regardless of their location and time commitments. 110 | 111 | ### Voting 112 | 113 | Not all decisions can be made using lazy consensus. Issues such as those affecting the strategic direction or legal standing of the project must gain explicit approval in the form of a vote. Every member of the community is encouraged to express their opinions in all discussions and all votes. However, only project maintainers have binding votes for the purposes of decision making. 114 | 115 | ## Roadmap Creation 116 | 117 | Our [roadmap](https://www.amundsen.io/amundsen/roadmap/) gives an overview of what we are currently working on and what we want to tackle next. This helps potential contributors understand your project's current status and where it's going next, as well as giving a chance to be part of the planning. 118 | 119 | In this section, we describe the process we follow to create it, using request for comments documents (RFCs). 120 | 121 | ### RFCs Process 122 | 123 | Most of the issues we see can be handled with regular GitHub issues. However, some changes are "substantial", and we ask that these go through a design process and produce a consensus among the Amundsen community. 124 | 125 | The "RFC" (request for comments) process is intended to provide a consistent and controlled path for new features to enter the roadmap. The high-level process looks like this: 126 | 127 | 1. Contributor creates an RFC draft in the repository 128 | 2. Users, Contributors, and Maintainers discuss and upvote the draft 129 | 3. If confident on its success, contributor completes the RFC with more in-detail technical specifications 130 | 4. Maintainers approve RFC when it is ready 131 | 5. Maintainers meet every quarter and choose three or five items based on popularity and alignment with project vision and goals 132 | 6. Those selected items become part of the Mid-term goals 133 | 134 | ##### When to Use RFCs 135 | 136 | What constitutes a "substantial" change is evolving based on the community, but may include the following: 137 | 138 | - New features that require configuration options to activate/deactivate 139 | - Remove features 140 | - Architecture changes 141 | - Examples: 142 | - Adding lineage features 143 | - Dashboards integration 144 | 145 | Some changes do not require a RFC: 146 | 147 | - Reorganizing or refactoring code or documentation 148 | - Improvements that tackle objective quality criteria (speedup, better browser support) 149 | - Changes noticeable only by contributors or maintainers 150 | - Examples: 151 | - Adding programmatic descriptions 152 | - Adding support for tags at a column level 153 | 154 | If you submit a pull request to implement a new feature without going through the RFC process, it may be closed with a polite request to submit an RFC first. That said, if most of the work is done, we'd accelerate the process. 155 | 156 | We will keep our RFC documents in a separate repo on the Amundsen-io organization, where a detailed step by step process will be documented. 157 | 158 | ## References 159 | 160 | - [Envoy’s Governance Document](https://github.com/envoyproxy/envoy/blob/master/GOVERNANCE.md) 161 | - [OSS Watch, Meritocratic Governance](http://oss-watch.ac.uk/resources/meritocraticgovernancemodel) 162 | - [The Apache Software Foundation meritocratic model](http://www.apache.org/foundation/how-it-works.html#meritocracy) 163 | - [Ember RFCs](https://github.com/emberjs/rfcs) 164 | -------------------------------------------------------------------------------- /amundsen-kube-helm/templates/helm/values.yaml: -------------------------------------------------------------------------------- 1 | # Duplicate this file and put your customization here 2 | 3 | ## 4 | ## common settings for all apps 5 | ## 6 | 7 | ## NOTE - README table was generated with https://github.com/norwoodj/helm-docs 8 | 9 | ## 10 | ## environment -- **DEPRECATED - its not standard to pre construct urls this way.** The environment the app is running in. Used to construct dns hostnames (on aws only) and ports. 11 | ## 12 | environment: "dev" 13 | ## 14 | ## DEPRECATED - its not standard to pre construct urls this way 15 | ## provider -- The cloud provider the app is running in. Used to construct dns hostnames (on aws only). 16 | ## 17 | provider: aws 18 | ## 19 | ## dnsZone -- **DEPRECATED - its not standard to pre construct urls this way.** The dns zone (e.g. group-qa.myaccount.company.com) the app is running in. Used to construct dns hostnames (on aws only). 20 | ## 21 | dnsZone: teamname.company.com 22 | ## 23 | ## dockerhubImagePath -- **DEPRECATED - this is not useful, it would be better to just allow the whole image to be swapped instead.** The image path for dockerhub. 24 | ## 25 | dockerhubImagePath: amundsendev 26 | ## 27 | ## LONG_RANDOM_STRING -- A long random string. You should probably provide your own. This is needed for OIDC. 28 | ## 29 | LONG_RANDOM_STRING: 1234 30 | 31 | ## 32 | ## nodeSelector -- amundsen application wide configuration of nodeSelector. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector) 33 | ## 34 | nodeSelector: {} 35 | ## 36 | ## affinity -- amundsen application wide configuration of affinity. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) 37 | ## 38 | affinity: {} 39 | ## 40 | ## tolerations -- amundsen application wide configuration of tolerations. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#taints-and-tolerations-beta-feature) 41 | ## 42 | tolerations: [] 43 | ## 44 | ## podAnnotations -- amundsen application wide configuration of podAnnotations. This applies to search, metadata, frontend and neo4j. Elasticsearch has it's own configuation properties for this. [ref](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) 45 | ## 46 | podAnnotations: {} 47 | 48 | ## 49 | ## Configuration related to the search service. 50 | ## 51 | search: 52 | ## 53 | ## search.serviceName -- The search service name. 54 | ## 55 | serviceName: search 56 | ## 57 | ## search.serviceType -- The search service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) 58 | ## 59 | serviceType: ClusterIP 60 | ## 61 | ## search.elasticsearchEndpoint -- The name of the service hosting elasticsearch on your cluster, if you bring your own. You should only need to change this, if you don't use the version in this chart. 62 | ## 63 | elasticsearchEndpoint: 64 | ## 65 | ## search.image -- The image of the search container. 66 | ## 67 | image: amundsendev/amundsen-search 68 | ## 69 | ## search.imageTag -- The image tag of the search container. 70 | ## 71 | imageTag: 2.4.0 72 | ## 73 | ## search.replicas -- How many replicas of the search service to run. 74 | ## 75 | replicas: 1 76 | ## 77 | ## search.resources -- See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) 78 | ## 79 | resources: {} 80 | # limits: 81 | # cpu: 2 82 | # memory: 2Gi 83 | # requests: 84 | # cpu: 1 85 | # memory: 1Gi 86 | 87 | ## 88 | ## search.nodeSelector -- Search pod specific nodeSelector. 89 | ## 90 | nodeSelector: {} 91 | ## 92 | ## search.affinity -- Search pod specific affinity. 93 | ## 94 | affinity: {} 95 | ## 96 | ## search.tolerations -- Search pod specific tolerations. 97 | ## 98 | tolerations: [] 99 | ## 100 | ## search.annotations -- Search service specific tolerations. 101 | ## 102 | annotations: {} 103 | ## 104 | ## search.podAnnotations -- Search pod specific annotations. 105 | ## 106 | podAnnotations: {} 107 | 108 | ## 109 | ## Configuration related to the metadata service. 110 | ## 111 | metadata: 112 | ## 113 | ## metadata.serviceName -- The metadata service name. 114 | ## 115 | serviceName: metadata 116 | ## 117 | ## metadata.serviceType -- The metadata service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) 118 | ## 119 | serviceType: ClusterIP 120 | ## 121 | ## metadata.neo4jEndpoint -- The name of the service hosting neo4j on your cluster, if you bring your own. You should only need to change this, if you don't use the version in this chart. 122 | ## 123 | neo4jEndpoint: 124 | ## 125 | ## metadata.image -- The image of the metadata container. 126 | ## 127 | image: amundsendev/amundsen-metadata 128 | ## 129 | ## metadata.imageTag -- The image tag of the metadata container. 130 | ## 131 | imageTag: 2.5.5 132 | ## 133 | ## metadata.replicas -- How many replicas of the metadata service to run. 134 | ## 135 | replicas: 1 136 | ## 137 | ## metadata.resources -- See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) 138 | ## 139 | resources: {} 140 | # limits: 141 | # cpu: 2 142 | # memory: 2Gi 143 | # requests: 144 | # cpu: 1 145 | # memory: 1Gi 146 | 147 | ## 148 | ## metadata.nodeSelector -- Metadata pod specific nodeSelector. 149 | ## 150 | nodeSelector: {} 151 | ## 152 | ## metadata.affinity -- Metadata pod specific affinity. 153 | ## 154 | affinity: {} 155 | ## 156 | ## metadata.tolerations -- Metadata pod specific tolerations. 157 | ## 158 | tolerations: [] 159 | ## 160 | ## metadata.annotations -- Metadata service specific tolerations. 161 | ## 162 | annotations: {} 163 | ## 164 | ## metadata.podAnnotations -- Metadata pod specific annotations. 165 | ## 166 | podAnnotations: {} 167 | 168 | ## 169 | ## Configuration related to the frontEnd service. 170 | ## 171 | frontEnd: 172 | ## 173 | ## frontEnd.serviceName -- The frontend service name. 174 | ## 175 | serviceName: frontend 176 | ## 177 | ## frontEnd.serviceType -- The frontend service type. See service types [ref](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) 178 | ## 179 | serviceType: ClusterIP 180 | ## 181 | ## frontEnd.image -- The image of the frontend container. 182 | ## 183 | image: amundsendev/amundsen-frontend 184 | ## 185 | ## frontEnd.imageTag -- The image tag of the frontend container. 186 | ## 187 | imageTag: 2.3.0 188 | ## 189 | ## frontEnd.servicePort -- The port the frontend service will be exposed on via the loadbalancer. 190 | ## 191 | servicePort: 80 192 | ## 193 | ## frontEnd.replicas -- How many replicas of the frontend service to run. 194 | ## 195 | replicas: 1 196 | ## 197 | ## frontEnd.baseUrl -- used by notifications util to provide links to amundsen pages in emails. 198 | ## 199 | baseUrl: http://localhost 200 | ## 201 | ## frontEnd.oidcEnabled -- To enable auth via OIDC, set this to true. 202 | ## 203 | oidcEnabled: false 204 | ## 205 | ## frontEnd.createOidcSecret -- OIDC needs some configuration. If you want the chart to make your secrets, set this to true and set the next four values. If you don't want to configure your secrets via helm, you can still use the amundsen-oidc-config.yaml as a template 206 | ## 207 | createOidcSecret: false 208 | 209 | ## 210 | ## frontEnd.OIDC_CLIENT_ID -- The client id for OIDC. 211 | ## 212 | OIDC_CLIENT_ID: 213 | ## 214 | ## frontEnd.OIDC_CLIENT_SECRET -- The client secret for OIDC. 215 | ## 216 | OIDC_CLIENT_SECRET: "" 217 | ## 218 | ## frontEnd.OIDC_ORG_URL -- The organization URL for OIDC. 219 | ## 220 | OIDC_ORG_URL: 221 | ## 222 | ## frontEnd.OIDC_AUTH_SERVER_ID -- The authorization server id for OIDC. 223 | ## 224 | OIDC_AUTH_SERVER_ID: 225 | ## 226 | ## frontEnd.OVERWRITE_REDIRECT_URI -- The redirect uri for OIDC. 227 | ## 228 | OVERWRITE_REDIRECT_URI: 229 | 230 | ## 231 | ## frontEnd.resources -- See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) 232 | ## 233 | resources: {} 234 | # limits: 235 | # cpu: 2 236 | # memory: 2Gi 237 | # requests: 238 | # cpu: 1 239 | # memory: 1Gi 240 | 241 | ## 242 | ## frontEnd.nodeSelector -- Frontend pod specific nodeSelector. 243 | ## 244 | nodeSelector: {} 245 | ## 246 | ## frontEnd.affinity -- Frontend pod specific affinity. 247 | ## 248 | affinity: {} 249 | ## 250 | ## frontEnd.tolerations -- Frontend pod specific tolerations. 251 | ## 252 | tolerations: [] 253 | ## 254 | ## frontEnd.annotations -- Frontend service specific tolerations. 255 | ## 256 | annotations: {} 257 | ## 258 | ## frontEnd.podAnnotations -- Frontend pod specific annotations. 259 | ## 260 | podAnnotations: {} 261 | 262 | ## 263 | ## Configuration related to neo4j. 264 | ## 265 | neo4j: 266 | ## 267 | ## neo4j.enabled -- If neo4j is enabled as part of this chart, or not. Set this to false if you want to provide your own version. 268 | ## 269 | enabled: true 270 | ## 271 | ## neo4j.version -- The neo4j application version used by amundsen. 272 | ## 273 | version: 3.3.0 274 | 275 | ## 276 | ## neo4j.resources -- See pod resourcing [ref](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) 277 | ## 278 | resources: {} 279 | #resources: 280 | # limits: 281 | # cpu: 2 282 | # memory: 2Gi 283 | # requests: 284 | # cpu: 1 285 | # memory: 1Gi 286 | 287 | ## 288 | ## neo4j.config -- Neo4j application specific configuration. This type of configuration is why the charts/stable version is not used. See [ref](https://github.com/helm/charts/issues/21439) 289 | ## 290 | config: 291 | ## 292 | ## neo4j.config.dbms -- dbms config for neo4j 293 | ## 294 | dbms: 295 | ## neo4j.config.dbms.heap_initial_size -- the initial java heap for neo4j 296 | heap_initial_size: 1G 297 | ## neo4j.config.dbms.heap_max_size -- the max java heap for neo4j 298 | heap_max_size: 2G 299 | ## neo4j.config.dbms.pagecache_size -- the page cache size for neo4j 300 | pagecache_size: 2G 301 | 302 | ## 303 | ## neo4j.persistence -- Neo4j persistence. Turn this on to keep your data between pod crashes, etc. This is also needed for backups. 304 | ## 305 | persistence: {} 306 | # storageClass: gp2 307 | # size: 10Gi 308 | # accessMode: ReadWriteMany 309 | # efs: 310 | # dns: 311 | 312 | ## 313 | ## neo4j.backup -- If enabled is set to true, make sure and set the s3 path as well. 314 | ## 315 | backup: 316 | # neo4j.backup.enabled - Whether to include the backup neo4j cron pod. If set to true, s3Path is required. 317 | enabled: false 318 | ## 319 | ## neo4j.backup.s3Path -- The s3path to write to for backups. 320 | ## 321 | s3Path: "s3://dev/null" 322 | ## 323 | ## neo4j.backup.schedule -- The schedule to run backups on. Defaults to hourly. 324 | ## 325 | schedule: "0 * * * *" 326 | podAnnotations: {} 327 | 328 | ## 329 | ## neo4j.nodeSelector -- neo4j specific nodeSelector. 330 | ## 331 | nodeSelector: {} 332 | ## 333 | ## neo4j.affinity -- neo4j specific affinity. 334 | ## 335 | affinity: {} 336 | ## 337 | ## neo4j.tolerations -- neo4j specific tolerations. 338 | ## 339 | tolerations: [] 340 | ## 341 | ## neo4j.annotations -- neo4j service specific tolerations. 342 | ## 343 | annotations: {} 344 | ## 345 | ## neo4j.podAnnotations -- neo4j pod specific annotations. 346 | ## 347 | podAnnotations: {} 348 | 349 | ## 350 | ## Configuration related to elasticsearch. 351 | ## 352 | ## To add values to dependent charts, prefix the value with the chart name (e.g. elasticsearch) 353 | ## By default, the ES chart runs with 3,3,2 nodes for master, data, client. Amundsen likely does not need so much, 354 | ## so, this has been tuned down to 1,1,1. 355 | ## 356 | elasticsearch: 357 | # elasticsearch.enabled -- set this to false, if you want to provide your own ES instance. 358 | enabled: true 359 | cluster: 360 | env: 361 | ## elasticsearch.cluster.env.MINIMUM_MASTER_NODES -- required to match master.replicas 362 | MINIMUM_MASTER_NODES: 1 363 | ## elasticsearch.cluster.env.EXPECTED_MASTER_NODES -- required to match master.replicas 364 | EXPECTED_MASTER_NODES: 1 365 | ## elasticsearch.cluster.env.RECOVER_AFTER_MASTER_NODES -- required to match master.replicas 366 | RECOVER_AFTER_MASTER_NODES: 1 367 | master: 368 | ## elasticsearch.master.replicas -- only running amundsen on 1 master replica 369 | replicas: 1 370 | data: 371 | ## elasticsearch.data.replicas -- only running amundsen on 1 data replica 372 | replicas: 1 373 | client: 374 | ## elasticsearch.client.replicas -- only running amundsen on 1 client replica 375 | replicas: 1 376 | # serviceType: LoadBalancer 377 | # serviceAnnotations: 378 | # external-dns.alpha.kubernetes.io/hostname: amundsen-elasticsearch.dev.teamname.company.com 379 | # service.beta.kubernetes.io/aws-load-balancer-internal: 0.0.0.0/0 380 | # service.beta.kubernetes.io/aws-load-balancer-type: nlb 381 | # nodeAffinity: high 382 | # resources: 383 | # limits: 384 | # cpu: 2 385 | # memory: 2Gi 386 | -------------------------------------------------------------------------------- /example/docker/neo4j/conf/neo4j.conf: -------------------------------------------------------------------------------- 1 | #***************************************************************** 2 | # Neo4j configuration 3 | # 4 | # For more details and a complete list of settings, please see 5 | # https://neo4j.com/docs/operations-manual/current/reference/configuration-settings/ 6 | #***************************************************************** 7 | 8 | # The name of the database to mount 9 | dbms.active_database=amundsen.db 10 | 11 | # Paths of directories in the installation. 12 | dbms.directories.data=/neo4j/data 13 | #dbms.directories.plugins=/var/lib/neo4j/plugins 14 | #dbms.directories.certificates=/var/lib/neo4j/certificates 15 | dbms.directories.logs=/var/log/neo4j 16 | #dbms.directories.lib=/usr/share/neo4j/lib 17 | #dbms.directories.run=/var/run/neo4j 18 | 19 | # This setting constrains all `LOAD CSV` import files to be under the `import` directory. Remove or comment it out to 20 | # allow files to be loaded from anywhere in the filesystem; this introduces possible security problems. See the 21 | # `LOAD CSV` section of the manual for details. 22 | dbms.directories.import=/var/lib/neo4j/import 23 | 24 | # Whether requests to Neo4j are authenticated. 25 | # To disable authentication, uncomment this line 26 | dbms.security.auth_enabled=false 27 | 28 | # Enable this to be able to upgrade a store from an older version. 29 | #dbms.allow_upgrade=true 30 | 31 | # Java Heap Size: by default the Java heap size is dynamically 32 | # calculated based on available system resources. 33 | # Uncomment these lines to set specific initial and maximum 34 | # heap size. 35 | #dbms.memory.heap.initial_size=512m 36 | #dbms.memory.heap.max_size=512m 37 | 38 | # The amount of memory to use for mapping the store files, in bytes (or 39 | # kilobytes with the 'k' suffix, megabytes with 'm' and gigabytes with 'g'). 40 | # If Neo4j is running on a dedicated server, then it is generally recommended 41 | # to leave about 2-4 gigabytes for the operating system, give the JVM enough 42 | # heap to hold all your transaction state and query context, and then leave the 43 | # rest for the page cache. 44 | # The default page cache memory assumes the machine is dedicated to running 45 | # Neo4j, and is heuristically set to 50% of RAM minus the max Java heap size. 46 | #dbms.memory.pagecache.size=10g 47 | 48 | #***************************************************************** 49 | # Network connector configuration 50 | #***************************************************************** 51 | 52 | # With default configuration Neo4j only accepts local connections. 53 | # To accept non-local connections, uncomment this line: 54 | dbms.connectors.default_listen_address=0.0.0.0 55 | # You can also choose a specific network interface, and configure a non-default 56 | # port for each connector, by setting their individual listen_address. 57 | 58 | # The address at which this server can be reached by its clients. This may be the server's IP address or DNS name, or 59 | # it may be the address of a reverse proxy which sits in front of the server. This setting may be overridden for 60 | # individual connectors below. 61 | #dbms.connectors.default_advertised_address=localhost 62 | 63 | # You can also choose a specific advertised hostname or IP address, and 64 | # configure an advertised port for each connector, by setting their 65 | # individual advertised_address. 66 | 67 | # Bolt connector 68 | dbms.connector.bolt.enabled=true 69 | #dbms.connector.bolt.tls_level=OPTIONAL 70 | #dbms.connector.bolt.listen_address=:7687 71 | 72 | # HTTP Connector. There must be exactly one HTTP connector. 73 | dbms.connector.http.enabled=true 74 | #dbms.connector.http.listen_address=:7474 75 | 76 | # HTTPS Connector. There can be zero or one HTTPS connectors. 77 | dbms.connector.https.enabled=true 78 | #dbms.connector.https.listen_address=:7473 79 | 80 | # Number of Neo4j worker threads. 81 | #dbms.threads.worker_count= 82 | 83 | #***************************************************************** 84 | # SSL system configuration 85 | #***************************************************************** 86 | 87 | # Names of the SSL policies to be used for the respective components. 88 | 89 | # The legacy policy is a special policy which is not defined in 90 | # the policy configuration section, but rather derives from 91 | # dbms.directories.certificates and associated files 92 | # (by default: neo4j.key and neo4j.cert). Its use will be deprecated. 93 | 94 | # The policies to be used for connectors. 95 | # 96 | # N.B: Note that a connector must be configured to support/require 97 | # SSL/TLS for the policy to actually be utilized. 98 | # 99 | # see: dbms.connector.*.tls_level 100 | 101 | #bolt.ssl_policy=legacy 102 | #https.ssl_policy=legacy 103 | 104 | #***************************************************************** 105 | # SSL policy configuration 106 | #***************************************************************** 107 | 108 | # Each policy is configured under a separate namespace, e.g. 109 | # dbms.ssl.policy..* 110 | # 111 | # The example settings below are for a new policy named 'default'. 112 | 113 | # The base directory for cryptographic objects. Each policy will by 114 | # default look for its associated objects (keys, certificates, ...) 115 | # under the base directory. 116 | # 117 | # Every such setting can be overriden using a full path to 118 | # the respective object, but every policy will by default look 119 | # for cryptographic objects in its base location. 120 | # 121 | # Mandatory setting 122 | 123 | #dbms.ssl.policy.default.base_directory=certificates/default 124 | 125 | # Allows the generation of a fresh private key and a self-signed 126 | # certificate if none are found in the expected locations. It is 127 | # recommended to turn this off again after keys have been generated. 128 | # 129 | # Keys should in general be generated and distributed offline 130 | # by a trusted certificate authority (CA) and not by utilizing 131 | # this mode. 132 | 133 | #dbms.ssl.policy.default.allow_key_generation=false 134 | 135 | # Enabling this makes it so that this policy ignores the contents 136 | # of the trusted_dir and simply resorts to trusting everything. 137 | # 138 | # Use of this mode is discouraged. It would offer encryption but no security. 139 | 140 | #dbms.ssl.policy.default.trust_all=false 141 | 142 | # The private key for the default SSL policy. By default a file 143 | # named private.key is expected under the base directory of the policy. 144 | # It is mandatory that a key can be found or generated. 145 | 146 | #dbms.ssl.policy.default.private_key= 147 | 148 | # The private key for the default SSL policy. By default a file 149 | # named public.crt is expected under the base directory of the policy. 150 | # It is mandatory that a certificate can be found or generated. 151 | 152 | #dbms.ssl.policy.default.public_certificate= 153 | 154 | # The certificates of trusted parties. By default a directory named 155 | # 'trusted' is expected under the base directory of the policy. It is 156 | # mandatory to create the directory so that it exists, because it cannot 157 | # be auto-created (for security purposes). 158 | # 159 | # To enforce client authentication client_auth must be set to 'require'! 160 | 161 | #dbms.ssl.policy.default.trusted_dir= 162 | 163 | # Client authentication setting. Values: none, optional, require 164 | # The default is to require client authentication. 165 | # 166 | # Servers are always authenticated unless explicitly overridden 167 | # using the trust_all setting. In a mutual authentication setup this 168 | # should be kept at the default of require and trusted certificates 169 | # must be installed in the trusted_dir. 170 | 171 | #dbms.ssl.policy.default.client_auth=require 172 | 173 | # A comma-separated list of allowed TLS versions. 174 | # By default TLSv1, TLSv1.1 and TLSv1.2 are allowed. 175 | 176 | #dbms.ssl.policy.default.tls_versions= 177 | 178 | # A comma-separated list of allowed ciphers. 179 | # The default ciphers are the defaults of the JVM platform. 180 | 181 | #dbms.ssl.policy.default.ciphers= 182 | 183 | #***************************************************************** 184 | # Logging configuration 185 | #***************************************************************** 186 | 187 | # To enable HTTP logging, uncomment this line 188 | #dbms.logs.http.enabled=true 189 | 190 | # Number of HTTP logs to keep. 191 | #dbms.logs.http.rotation.keep_number=5 192 | 193 | # Size of each HTTP log that is kept. 194 | #dbms.logs.http.rotation.size=20m 195 | 196 | # To enable GC Logging, uncomment this line 197 | #dbms.logs.gc.enabled=true 198 | 199 | # GC Logging Options 200 | # see http://docs.oracle.com/cd/E19957-01/819-0084-10/pt_tuningjava.html#wp57013 for more information. 201 | #dbms.logs.gc.options=-XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCApplicationStoppedTime -XX:+PrintPromotionFailure -XX:+PrintTenuringDistribution 202 | 203 | # Number of GC logs to keep. 204 | #dbms.logs.gc.rotation.keep_number=5 205 | 206 | # Size of each GC log that is kept. 207 | #dbms.logs.gc.rotation.size=20m 208 | 209 | # Size threshold for rotation of the debug log. If set to zero then no rotation will occur. Accepts a binary suffix "k", 210 | # "m" or "g". 211 | #dbms.logs.debug.rotation.size=20m 212 | 213 | # Maximum number of history files for the internal log. 214 | #dbms.logs.debug.rotation.keep_number=7 215 | 216 | #***************************************************************** 217 | # Miscellaneous configuration 218 | #***************************************************************** 219 | 220 | # Enable this to specify a parser other than the default one. 221 | #cypher.default_language_version=3.0 222 | 223 | # Determines if Cypher will allow using file URLs when loading data using 224 | # `LOAD CSV`. Setting this value to `false` will cause Neo4j to fail `LOAD CSV` 225 | # clauses that load data from the file system. 226 | #dbms.security.allow_csv_import_from_file_urls=true 227 | 228 | # Retention policy for transaction logs needed to perform recovery and backups. 229 | dbms.tx_log.rotation.retention_policy=1 days 230 | 231 | # Enable a remote shell server which Neo4j Shell clients can log in to. 232 | dbms.shell.enabled=true 233 | # The network interface IP the shell will listen on (use 0.0.0.0 for all interfaces). 234 | #dbms.shell.host=127.0.0.1 235 | # The port the shell will listen on, default is 1337. 236 | #dbms.shell.port=1337 237 | 238 | # Only allow read operations from this Neo4j instance. This mode still requires 239 | # write access to the directory for lock purposes. 240 | #dbms.read_only=false 241 | 242 | # Comma separated list of JAX-RS packages containing JAX-RS resources, one 243 | # package name for each mountpoint. The listed package names will be loaded 244 | # under the mountpoints specified. Uncomment this line to mount the 245 | # org.neo4j.examples.server.unmanaged.HelloWorldResource.java from 246 | # neo4j-server-examples under /examples/unmanaged, resulting in a final URL of 247 | # http://localhost:7474/examples/unmanaged/helloworld/{nodeId} 248 | #dbms.unmanaged_extension_classes=org.neo4j.examples.server.unmanaged=/examples/unmanaged 249 | 250 | #******************************************************************** 251 | # JVM Parameters 252 | #******************************************************************** 253 | 254 | # G1GC generally strikes a good balance between throughput and tail 255 | # latency, without too much tuning. 256 | dbms.jvm.additional=-XX:+UseG1GC 257 | 258 | # Have common exceptions keep producing stack traces, so they can be 259 | # debugged regardless of how often logs are rotated. 260 | dbms.jvm.additional=-XX:-OmitStackTraceInFastThrow 261 | 262 | # Make sure that `initmemory` is not only allocated, but committed to 263 | # the process, before starting the database. This reduces memory 264 | # fragmentation, increasing the effectiveness of transparent huge 265 | # pages. It also reduces the possibility of seeing performance drop 266 | # due to heap-growing GC events, where a decrease in available page 267 | # cache leads to an increase in mean IO response time. 268 | # Try reducing the heap memory, if this flag degrades performance. 269 | dbms.jvm.additional=-XX:+AlwaysPreTouch 270 | 271 | # Trust that non-static final fields are really final. 272 | # This allows more optimizations and improves overall performance. 273 | # NOTE: Disable this if you use embedded mode, or have extensions or dependencies that may use reflection or 274 | # serialization to change the value of final fields! 275 | dbms.jvm.additional=-XX:+UnlockExperimentalVMOptions 276 | dbms.jvm.additional=-XX:+TrustFinalNonStaticFields 277 | 278 | # Disable explicit garbage collection, which is occasionally invoked by the JDK itself. 279 | dbms.jvm.additional=-XX:+DisableExplicitGC 280 | 281 | # Remote JMX monitoring, uncomment and adjust the following lines as needed. Absolute paths to jmx.access and 282 | # jmx.password files are required. 283 | # Also make sure to update the jmx.access and jmx.password files with appropriate permission roles and passwords, 284 | # the shipped configuration contains only a read only role called 'monitor' with password 'Neo4j'. 285 | # For more details, see: http://download.oracle.com/javase/8/docs/technotes/guides/management/agent.html 286 | # On Unix based systems the jmx.password file needs to be owned by the user that will run the server, 287 | # and have permissions set to 0600. 288 | # For details on setting these file permissions on Windows see: 289 | # http://docs.oracle.com/javase/8/docs/technotes/guides/management/security-windows.html 290 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.port=3637 291 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.authenticate=true 292 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.ssl=false 293 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.password.file=/absolute/path/to/conf/jmx.password 294 | #dbms.jvm.additional=-Dcom.sun.management.jmxremote.access.file=/absolute/path/to/conf/jmx.access 295 | 296 | # Some systems cannot discover host name automatically, and need this line configured: 297 | #dbms.jvm.additional=-Djava.rmi.server.hostname=$THE_NEO4J_SERVER_HOSTNAME 298 | 299 | # Expand Diffie Hellman (DH) key size from default 1024 to 2048 for DH-RSA cipher suites used in server TLS handshakes. 300 | # This is to protect the server from any potential passive eavesdropping. 301 | dbms.jvm.additional=-Djdk.tls.ephemeralDHKeySize=2048 302 | 303 | # This mitigates a DDoS vector. 304 | dbms.jvm.additional=-Djdk.tls.rejectClientInitiatedRenegotiation=true 305 | 306 | #******************************************************************** 307 | # Wrapper Windows NT/2000/XP Service Properties 308 | #******************************************************************** 309 | # WARNING - Do not modify any of these properties when an application 310 | # using this configuration file has been installed as a service. 311 | # Please uninstall the service before modifying this section. The 312 | # service can then be reinstalled. 313 | 314 | # Name of the service 315 | dbms.windows_service_name=neo4j 316 | 317 | #******************************************************************** 318 | # Other Neo4j system properties 319 | #******************************************************************** 320 | dbms.jvm.additional=-Dunsupported.dbms.udc.source=debian 321 | 322 | 323 | # Allow Neo4j APOC to import and export for backup 324 | dbms.security.procedures.unrestricted=apoc.export.*,apoc.import.* 325 | apoc.export.file.enabled=true 326 | apoc.import.file.enabled=true --------------------------------------------------------------------------------